obi clean: first version
This commit is contained in:
94
python/obitools3/commands/clean.pyx
Normal file
94
python/obitools3/commands/clean.pyx
Normal file
@ -0,0 +1,94 @@
|
||||
#cython: language_level=3
|
||||
|
||||
from obitools3.apps.progress cimport ProgressBar # @UnresolvedImport
|
||||
from obitools3.dms.dms cimport DMS
|
||||
from obitools3.dms.capi.obidms cimport OBIDMS_p
|
||||
from obitools3.dms.view import RollbackException
|
||||
from obitools3.dms.capi.obiclean cimport obi_clean
|
||||
from obitools3.apps.optiongroups import addSequenceInputOption, addMinimalOutputOption
|
||||
from obitools3.uri.decode import open_uri
|
||||
from obitools3.apps.config import logger
|
||||
from obitools3.utils cimport tobytes
|
||||
|
||||
from obitools3.dms.view.typed_view.view_NUC_SEQS cimport View_NUC_SEQS
|
||||
|
||||
|
||||
__title__="Tag a set of sequences for PCR and sequencing errors identification"
|
||||
|
||||
|
||||
def addOptions(parser):
|
||||
|
||||
addSequenceInputOption(parser)
|
||||
addMinimalOutputOption(parser)
|
||||
|
||||
group = parser.add_argument_group('obi clean specific options')
|
||||
|
||||
group.add_argument('--distance', '-d',
|
||||
action="store", dest="clean:distance",
|
||||
metavar='<DISTANCE>',
|
||||
default=1.0,
|
||||
type=float,
|
||||
help="Maximum numbers of errors between two variant sequences. Default: 1.")
|
||||
|
||||
group.add_argument('--sample-tag', '-s',
|
||||
action="store",
|
||||
dest="clean:sample-tag-name",
|
||||
metavar="<SAMPLE TAG NAME>",
|
||||
type=str,
|
||||
default="merged_sample",
|
||||
help="Name of the tag where sample counts are kept.")
|
||||
|
||||
group.add_argument('--ratio', '-r',
|
||||
action="store", dest="clean:ratio",
|
||||
metavar='<RATIO>',
|
||||
default=0.5,
|
||||
type=float,
|
||||
help="Maximum ratio between the counts of two sequences so that the less abundant one can be considered"
|
||||
" a variant of the more abundant one. Default: 0.5.")
|
||||
|
||||
group.add_argument('--heads-only', '-H',
|
||||
action="store_true",
|
||||
dest="clean:heads-only",
|
||||
default=False,
|
||||
help="Only sequences labeled as heads are kept in the output. Default: False")
|
||||
|
||||
group.add_argument('--cluster-tags', '-C',
|
||||
action="store_true",
|
||||
dest="clean:cluster-tags",
|
||||
default=False,
|
||||
help="Adds tags for each sequence giving its cluster's head and weight for each sample.")
|
||||
|
||||
|
||||
def run(config):
|
||||
|
||||
DMS.obi_atexit()
|
||||
|
||||
logger("info", "obi clean")
|
||||
|
||||
# Open DMS
|
||||
dms_name = config['obi']['inputURI'].split('/')[0]
|
||||
dms = open_uri(dms_name)[0]
|
||||
|
||||
# Read the name of the input view
|
||||
uri_i = config['obi']['inputURI'].split('/')
|
||||
i_view_name = uri_i[1]
|
||||
|
||||
# Read the name of the output view
|
||||
uri_o = config['obi']['outputURI'].split('/')
|
||||
if len(uri_o)==2:
|
||||
# Check that input and output DMS are the same (predicate, to discuss)
|
||||
if dms_name != uri_o[0]:
|
||||
raise Exception("Input and output DMS must be the same")
|
||||
o_view_name = uri_o[1]
|
||||
else:
|
||||
o_view_name = uri_o[0]
|
||||
|
||||
if obi_clean(tobytes(dms_name), tobytes(i_view_name), tobytes(config['clean']['sample-tag-name']), tobytes(o_view_name), b"obiclean", \
|
||||
config['clean']['distance'], config['clean']['ratio'], config['clean']['heads-only'], 1) < 0:
|
||||
raise Exception("Error running obiclean")
|
||||
|
||||
print("\n")
|
||||
print(repr(dms[o_view_name]))
|
||||
|
||||
dms.close()
|
||||
|
17
python/obitools3/dms/capi/obiclean.pxd
Normal file
17
python/obitools3/dms/capi/obiclean.pxd
Normal file
@ -0,0 +1,17 @@
|
||||
#cython: language_level=3
|
||||
|
||||
from obitools3.dms.capi.obidms cimport OBIDMS_p
|
||||
|
||||
|
||||
cdef extern from "obi_clean.h" nogil:
|
||||
|
||||
int obi_clean(const char* dms_name,
|
||||
const char* i_view_name,
|
||||
const char* sample_column_name,
|
||||
const char* o_view_name,
|
||||
const char* o_view_comments,
|
||||
double threshold,
|
||||
double max_ratio,
|
||||
bint heads_only,
|
||||
int thread_count)
|
||||
|
@ -8,6 +8,7 @@
|
||||
../../../src/linked_list.c
|
||||
../../../src/murmurhash2.c
|
||||
../../../src/obi_align.c
|
||||
../../../src/obi_clean.c
|
||||
../../../src/obiavl.c
|
||||
../../../src/obiblob_indexer.c
|
||||
../../../src/obiblob.c
|
||||
|
@ -8,6 +8,7 @@
|
||||
../../src/linked_list.c
|
||||
../../src/murmurhash2.c
|
||||
../../src/obi_align.c
|
||||
../../src/obi_clean.c
|
||||
../../src/obiavl.c
|
||||
../../src/obiblob_indexer.c
|
||||
../../src/obiblob.c
|
||||
|
463
src/obi_clean.c
Normal file
463
src/obi_clean.c
Normal file
@ -0,0 +1,463 @@
|
||||
/****************************************************************************
|
||||
* Tags a set of sequences for PCR/sequencing errors identification *
|
||||
****************************************************************************/
|
||||
|
||||
/**
|
||||
* @file obi_clean.c
|
||||
* @author Celine Mercier (celine.mercier@metabarcoding.org)
|
||||
* @date April 9th 2018
|
||||
* @brief Functions tagging a set of sequences for PCR/sequencing errors identification.
|
||||
*/
|
||||
|
||||
//#define OMP_SUPPORT // TODO
|
||||
#ifdef OMP_SUPPORT
|
||||
#include <omp.h>
|
||||
#endif
|
||||
|
||||
#include <stdlib.h>
|
||||
#include <stdio.h>
|
||||
#include <stdbool.h>
|
||||
#include <search.h>
|
||||
#include <sys/time.h>
|
||||
|
||||
#include "obi_clean.h"
|
||||
#include "obidms.h"
|
||||
#include "obidebug.h"
|
||||
#include "obierrno.h"
|
||||
#include "obitypes.h"
|
||||
#include "obiview.h"
|
||||
#include "sse_banded_LCS_alignment.h"
|
||||
#include "upperband.h"
|
||||
#include "obiblob.h"
|
||||
|
||||
|
||||
|
||||
#define DEBUG_LEVEL 0 // TODO has to be defined somewhere else (cython compil flag?)
|
||||
|
||||
|
||||
/**************************************************************************
|
||||
*
|
||||
* D E C L A R A T I O N O F T H E P R I V A T E F U N C T I O N S
|
||||
*
|
||||
**************************************************************************/
|
||||
|
||||
// TODO
|
||||
static int create_output_columns(Obiview_p o_view,
|
||||
Obiview_p i_view,
|
||||
OBIDMS_column_p sample_column,
|
||||
int sample_count);
|
||||
|
||||
|
||||
static inline int idxcmp(const void* idx1, const void* idx2);
|
||||
|
||||
|
||||
/************************************************************************
|
||||
*
|
||||
* D E F I N I T I O N O F T H E P R I V A T E F U N C T I O N S
|
||||
*
|
||||
************************************************************************/
|
||||
|
||||
static int create_output_columns(Obiview_p o_view,
|
||||
Obiview_p i_view,
|
||||
OBIDMS_column_p sample_column,
|
||||
int sample_count)
|
||||
{
|
||||
// Status column
|
||||
if (obi_view_add_column(o_view, CLEAN_STATUS_COLUMN_NAME, -1, NULL, OBI_CHAR, 0, sample_count, (sample_column->header)->elements_names, true, false, false, NULL, NULL, -1, CLEAN_STATUS_COLUMN_COMMENTS, true) < 0)
|
||||
{
|
||||
obidebug(1, "\nError creating the %s column", CLEAN_STATUS_COLUMN_NAME);
|
||||
return -1;
|
||||
}
|
||||
|
||||
// Head column
|
||||
if (obi_view_add_column(o_view, CLEAN_HEAD_COLUMN_NAME, -1, NULL, OBI_BOOL, 0, 1, NULL, false, false, false, NULL, NULL, -1, CLEAN_HEAD_COLUMN_COMMENTS, true) < 0)
|
||||
{
|
||||
obidebug(1, "\nError creating the %s column", CLEAN_HEAD_COLUMN_NAME);
|
||||
return -1;
|
||||
}
|
||||
|
||||
// Sample count column
|
||||
if (obi_view_add_column(o_view, CLEAN_SAMPLECOUNT_COLUMN_NAME, -1, NULL, OBI_INT, 0, 1, NULL, false, false, false, NULL, NULL, -1, CLEAN_SAMPLECOUNT_COLUMN_COMMENTS, true) < 0)
|
||||
{
|
||||
obidebug(1, "\nError creating the %s column", CLEAN_SAMPLECOUNT_COLUMN_NAME);
|
||||
return -1;
|
||||
}
|
||||
|
||||
// Head count column
|
||||
if (obi_view_add_column(o_view, CLEAN_HEADCOUNT_COLUMN_NAME, -1, NULL, OBI_INT, 0, 1, NULL, false, false, false, NULL, NULL, -1, CLEAN_HEADCOUNT_COLUMN_COMMENTS, true) < 0)
|
||||
{
|
||||
obidebug(1, "\nError creating the %s column", CLEAN_HEADCOUNT_COLUMN_NAME);
|
||||
return -1;
|
||||
}
|
||||
|
||||
// Internal count column
|
||||
if (obi_view_add_column(o_view, CLEAN_INTERNALCOUNT_COLUMN_NAME, -1, NULL, OBI_INT, 0, 1, NULL, false, false, false, NULL, NULL, -1, CLEAN_INTERNALCOUNT_COLUMN_COMMENTS, true) < 0)
|
||||
{
|
||||
obidebug(1, "\nError creating the %s column", CLEAN_INTERNALCOUNT_COLUMN_NAME);
|
||||
return -1;
|
||||
}
|
||||
|
||||
// Singleton count column
|
||||
if (obi_view_add_column(o_view, CLEAN_SINGLETONCOUNT_COLUMN_NAME, -1, NULL, OBI_INT, 0, 1, NULL, false, false, false, NULL, NULL, -1, CLEAN_SINGLETONCOUNT_COLUMN_COMMENTS, true) < 0)
|
||||
{
|
||||
obidebug(1, "\nError creating the %s column", CLEAN_SINGLETONCOUNT_COLUMN_NAME);
|
||||
return -1;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
static inline int idxcmp(const void* idx1, const void* idx2)
|
||||
{
|
||||
if (*(index_t*) idx1 < *(index_t*) idx2)
|
||||
return -1;
|
||||
if (*(index_t*) idx1 > *(index_t*) idx2)
|
||||
return 1;
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
/**********************************************************************
|
||||
*
|
||||
* D E F I N I T I O N O F T H E P U B L I C F U N C T I O N S
|
||||
*
|
||||
**********************************************************************/
|
||||
|
||||
|
||||
// TODO comment
|
||||
int obi_clean(const char* dms_name,
|
||||
const char* i_view_name,
|
||||
const char* sample_column_name,
|
||||
const char* o_view_name,
|
||||
const char* o_view_comments,
|
||||
double threshold,
|
||||
double max_ratio,
|
||||
bool heads_only,
|
||||
int thread_count)
|
||||
{
|
||||
float p;
|
||||
index_t i, j;
|
||||
index_t seq_count;
|
||||
index_t* index_array;
|
||||
double score;
|
||||
bool above_threshold;
|
||||
int lcs_length;
|
||||
int ali_length;
|
||||
Kmer_table_p ktable;
|
||||
Obi_blob_p blob1;
|
||||
Obi_blob_p blob2;
|
||||
int lcs_min;
|
||||
int sample_count;
|
||||
int sample;
|
||||
int s1_count;
|
||||
int s2_count;
|
||||
bool head;
|
||||
int head_count;
|
||||
int internal_count;
|
||||
int singleton_count;
|
||||
int ind_sample_count;
|
||||
char status;
|
||||
|
||||
void** yes_trees;
|
||||
void** no_trees;
|
||||
|
||||
OBIDMS_p dms = NULL;
|
||||
Obiview_p i_view = NULL;
|
||||
Obiview_p o_view = NULL;
|
||||
OBIDMS_column_p iseq_column = NULL;
|
||||
OBIDMS_column_p sample_column = NULL;
|
||||
OBIDMS_column_p status_column = NULL;
|
||||
OBIDMS_column_p head_column = NULL;
|
||||
OBIDMS_column_p internalcount_column = NULL;
|
||||
OBIDMS_column_p headcount_column = NULL;
|
||||
OBIDMS_column_p singletoncount_column = NULL;
|
||||
OBIDMS_column_p samplecount_column = NULL;
|
||||
|
||||
// TODO other columns
|
||||
|
||||
void* no;
|
||||
void* yes;
|
||||
void* key_p;
|
||||
|
||||
bool normalize = false;
|
||||
int reference = 0;
|
||||
bool similarity_mode = false;
|
||||
|
||||
char* seq1;
|
||||
char* seq2;
|
||||
|
||||
// Open DMS
|
||||
dms = obi_dms(dms_name);
|
||||
|
||||
// Open input view
|
||||
i_view = obi_open_view(dms, i_view_name);
|
||||
if (i_view == NULL)
|
||||
{
|
||||
obidebug(1, "\nError opening the input view to clean");
|
||||
return -1;
|
||||
}
|
||||
|
||||
// Open the sequence column
|
||||
if (strcmp((i_view->infos)->view_type, VIEW_TYPE_NUC_SEQS) == 0)
|
||||
iseq_column = obi_view_get_column(i_view, NUC_SEQUENCE_COLUMN);
|
||||
else
|
||||
{
|
||||
obi_set_errno(OBI_CLEAN_ERROR);
|
||||
obidebug(1, "\nError: no sequence column");
|
||||
return -1;
|
||||
}
|
||||
if (iseq_column == NULL)
|
||||
{
|
||||
obidebug(1, "\nError getting the sequence column");
|
||||
return -1;
|
||||
}
|
||||
|
||||
// Open the sample column if there is one
|
||||
if ((strcmp(sample_column_name, "") == 0) || (sample_column_name == NULL))
|
||||
sample_column = NULL;
|
||||
else
|
||||
{
|
||||
sample_column = obi_view_get_column(i_view, sample_column_name);
|
||||
if (sample_column == NULL)
|
||||
{
|
||||
obidebug(1, "\nError getting the sample column");
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
// Create the output view
|
||||
o_view = obi_clone_view(dms, i_view, o_view_name, NULL, o_view_comments);
|
||||
if (o_view == NULL)
|
||||
{
|
||||
obidebug(1, "\nError creating the output view");
|
||||
return -1;
|
||||
}
|
||||
|
||||
sample_count = (sample_column->header)->nb_elements_per_line;
|
||||
|
||||
// Create the output columns
|
||||
if (create_output_columns(o_view, i_view, sample_column, sample_count) < 0)
|
||||
// TODO
|
||||
return -1;
|
||||
|
||||
// Get the created output columns
|
||||
status_column = obi_view_get_column(o_view, CLEAN_STATUS_COLUMN_NAME);
|
||||
head_column = obi_view_get_column(o_view, CLEAN_HEAD_COLUMN_NAME);
|
||||
internalcount_column = obi_view_get_column(o_view, CLEAN_INTERNALCOUNT_COLUMN_NAME);
|
||||
headcount_column = obi_view_get_column(o_view, CLEAN_HEADCOUNT_COLUMN_NAME);
|
||||
singletoncount_column = obi_view_get_column(o_view, CLEAN_SINGLETONCOUNT_COLUMN_NAME);
|
||||
samplecount_column = obi_view_get_column(o_view, CLEAN_SAMPLECOUNT_COLUMN_NAME);
|
||||
|
||||
// Build kmer tables
|
||||
ktable = hash_seq_column(i_view, iseq_column, 0);
|
||||
if (ktable == NULL)
|
||||
{
|
||||
obi_set_errno(OBI_CLEAN_ERROR);
|
||||
obidebug(1, "\nError building kmer tables before aligning");
|
||||
return -1;
|
||||
}
|
||||
|
||||
seq_count = (i_view->infos)->line_count;
|
||||
|
||||
// Allocate arrays of pointers to binary trees
|
||||
yes_trees = (void**) calloc(seq_count, sizeof(void*));
|
||||
no_trees = (void**) calloc(seq_count, sizeof(void*));
|
||||
|
||||
// Allocate and fill index array for the binary trees to reference (they don't copy the data)
|
||||
index_array = (index_t*) malloc(seq_count * sizeof(index_t));
|
||||
for (i=0; i < seq_count; i++)
|
||||
index_array[i]=i;
|
||||
|
||||
// Initialize all sequences to singletons or NA if no sequences in that sample
|
||||
for (i=0; i < (seq_count - 1); i++)
|
||||
{
|
||||
for (sample=0; sample < sample_count; sample++)
|
||||
{
|
||||
if (obi_get_int_with_elt_idx_and_col_p_in_view(i_view, sample_column, i, sample) != OBIInt_NA)
|
||||
{
|
||||
if (obi_set_char_with_elt_idx_and_col_p_in_view(o_view, status_column, i, sample, 's') < 0)
|
||||
// TODO
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
for (sample=0; sample < sample_count; sample++)
|
||||
{
|
||||
for (i=0; i < (seq_count - 1); i++)
|
||||
{
|
||||
if (i%1000 == 0)
|
||||
{
|
||||
p = ((i+(sample*(seq_count)))/(float)(seq_count*sample_count))*100;
|
||||
fprintf(stderr,"\rDone : %f %% ",p);
|
||||
}
|
||||
|
||||
// Get first sequence
|
||||
blob1 = obi_get_blob_with_elt_idx_and_col_p_in_view(i_view, iseq_column, i, 0);
|
||||
if (blob1 == NULL)
|
||||
{
|
||||
obidebug(1, "\nError retrieving sequences to align");
|
||||
return -1;
|
||||
}
|
||||
|
||||
seq1 = obi_get_seq_with_elt_idx_and_col_p_in_view(i_view, iseq_column, i, 0);
|
||||
|
||||
// Get count for this sample
|
||||
s1_count = obi_get_int_with_elt_idx_and_col_p_in_view(i_view, sample_column, i, sample);
|
||||
|
||||
for (j=i+1; j < seq_count; j++) // TODO parallelize this loop?
|
||||
{
|
||||
// Get second sequence
|
||||
blob2 = obi_get_blob_with_elt_idx_and_col_p_in_view(i_view, iseq_column, j, 0);
|
||||
if (blob2 == NULL)
|
||||
{
|
||||
obidebug(1, "\nError retrieving sequences to align");
|
||||
return -1;
|
||||
}
|
||||
|
||||
seq2 = obi_get_seq_with_elt_idx_and_col_p_in_view(i_view, iseq_column, j, 0);
|
||||
|
||||
// Get count for this sample
|
||||
s2_count = obi_get_int_with_elt_idx_and_col_p_in_view(i_view, sample_column, j, sample);
|
||||
|
||||
// Checking ratio
|
||||
if (((s1_count!=OBIInt_NA && s2_count!=OBIInt_NA) && (s1_count>0 && s2_count>0)) &&
|
||||
((((s1_count >= s2_count) && (((double) s2_count / (double) s1_count) <= max_ratio))) ||
|
||||
(((s2_count >= s1_count) && (((double) s1_count / (double) s2_count) <= max_ratio)))))
|
||||
{
|
||||
yes=NULL;
|
||||
no=NULL;
|
||||
no = tfind(index_array+j, &(no_trees[i]), idxcmp);
|
||||
if (no == NULL)
|
||||
yes = tfind(index_array+j, &(yes_trees[i]), idxcmp);
|
||||
|
||||
above_threshold = false;
|
||||
if ((no == NULL) && (yes == NULL)) //never compared
|
||||
{
|
||||
// Check if the sequences are identical in a quick way (same index in the same indexer)
|
||||
if (obi_get_index_with_elt_idx_and_col_p_in_view(i_view, iseq_column, i, 0) == obi_get_index_with_elt_idx_and_col_p_in_view(i_view, iseq_column, j, 0))
|
||||
above_threshold = true;
|
||||
else // the sequences aren't identical
|
||||
{
|
||||
// kmer filter
|
||||
align_filters(ktable, blob1, blob2, i, j, threshold, normalize, reference, similarity_mode, &score, &lcs_min, false);
|
||||
|
||||
// Compute alignment score if filter passed
|
||||
if (score == -1.0)
|
||||
score = obiblob_sse_banded_lcs_align(blob1, blob2, threshold, normalize, reference, similarity_mode, &lcs_length, &ali_length);
|
||||
|
||||
above_threshold = ((score >= 0) && (score <= threshold));
|
||||
}
|
||||
}
|
||||
|
||||
if (yes || above_threshold)
|
||||
{
|
||||
if (yes == NULL)
|
||||
// put seqs in their 'yes' trees
|
||||
key_p = tsearch(index_array+j, &(yes_trees[i]), idxcmp); // TODO error checking
|
||||
// label as head or internal
|
||||
if (s1_count >= s2_count)
|
||||
{
|
||||
if (obi_get_char_with_elt_idx_and_col_p_in_view(o_view, status_column, i, sample) == 's') // seq can become head ONLY if it's a singleton
|
||||
obi_set_char_with_elt_idx_and_col_p_in_view(o_view, status_column, i, sample, 'h');
|
||||
// Otherwise it's an internal
|
||||
obi_set_char_with_elt_idx_and_col_p_in_view(o_view, status_column, j, sample, 'i');
|
||||
}
|
||||
else
|
||||
{
|
||||
if (obi_get_char_with_elt_idx_and_col_p_in_view(o_view, status_column, j, sample) == 's') // seq can become head ONLY if it's a singleton
|
||||
obi_set_char_with_elt_idx_and_col_p_in_view(o_view, status_column, j, sample, 'h');
|
||||
obi_set_char_with_elt_idx_and_col_p_in_view(o_view, status_column, i, sample, 'i');
|
||||
}
|
||||
}
|
||||
else if (no == NULL)
|
||||
// put in 'no' tree of both sequences // TODO why just one then???
|
||||
key_p = tsearch(index_array+j, &(no_trees[i]), idxcmp); // TODO error checking
|
||||
}
|
||||
free(seq2);
|
||||
}
|
||||
free(seq1);
|
||||
}
|
||||
}
|
||||
|
||||
free_kmer_tables(ktable, seq_count);
|
||||
|
||||
fprintf(stderr, "\n");
|
||||
|
||||
for (i=0; i < (seq_count - 1); i++)
|
||||
{
|
||||
if (i%1000 == 0)
|
||||
{
|
||||
p = (i/(float)(seq_count))*100;
|
||||
fprintf(stderr, "\rAnnotating : %f %% ",p);
|
||||
}
|
||||
|
||||
head = false;
|
||||
head_count = 0;
|
||||
internal_count = 0;
|
||||
singleton_count = 0;
|
||||
ind_sample_count = 0;
|
||||
|
||||
for (sample=0; sample < sample_count; sample++)
|
||||
{
|
||||
// Check if head or singleton in at least one sample
|
||||
status = obi_get_char_with_elt_idx_and_col_p_in_view(o_view, status_column, i, sample);
|
||||
if ((!head) && ((status == 'h') || (status == 's')))
|
||||
head = true;
|
||||
|
||||
if (status == 's')
|
||||
{
|
||||
singleton_count++;
|
||||
ind_sample_count++;
|
||||
}
|
||||
else if (status == 'i')
|
||||
{
|
||||
internal_count++;
|
||||
ind_sample_count++;
|
||||
}
|
||||
else if (status == 'h')
|
||||
{
|
||||
head_count++;
|
||||
ind_sample_count++;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
if (obi_set_bool_with_elt_idx_and_col_p_in_view(o_view, head_column, i, 0, head) < 0)
|
||||
// TODO
|
||||
return -1;
|
||||
|
||||
if (obi_set_int_with_elt_idx_and_col_p_in_view(o_view, singletoncount_column, i, 0, singleton_count) < 0)
|
||||
// TODO
|
||||
return -1;
|
||||
if (obi_set_int_with_elt_idx_and_col_p_in_view(o_view, internalcount_column, i, 0, internal_count) < 0)
|
||||
// TODO
|
||||
return -1;
|
||||
if (obi_set_int_with_elt_idx_and_col_p_in_view(o_view, headcount_column, i, 0, head_count) < 0)
|
||||
// TODO
|
||||
return -1;
|
||||
if (obi_set_int_with_elt_idx_and_col_p_in_view(o_view, samplecount_column, i, 0, ind_sample_count) < 0)
|
||||
// TODO
|
||||
return -1;
|
||||
|
||||
}
|
||||
|
||||
// Close views
|
||||
if (obi_save_and_close_view(i_view) < 0)
|
||||
{
|
||||
obidebug(1, "\nError closing the input view after aligning");
|
||||
return -1;
|
||||
}
|
||||
if (obi_save_and_close_view(o_view) < 0)
|
||||
{
|
||||
obidebug(1, "\nError closing the output view after aligning");
|
||||
return -1;
|
||||
}
|
||||
|
||||
fprintf(stderr,"\rDone : 100 %% ");
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
|
85
src/obi_clean.h
Normal file
85
src/obi_clean.h
Normal file
@ -0,0 +1,85 @@
|
||||
/*************************************************************************************************
|
||||
* Header file for functions tagging a set of sequences for PCR/sequencing errors identification *
|
||||
*************************************************************************************************/
|
||||
|
||||
/**
|
||||
* @file obi_clean.h
|
||||
* @author Celine Mercier (celine.mercier@metabarcoding.org)
|
||||
* @date April 9th 2018
|
||||
* @brief Header file for the functions tagging a set of sequences for PCR/sequencing errors identification.
|
||||
*/
|
||||
|
||||
|
||||
#ifndef OBI_CLEAN_H_
|
||||
#define OBI_CLEAN_H_
|
||||
|
||||
|
||||
#include <stdlib.h>
|
||||
#include <stdio.h>
|
||||
#include <stdbool.h>
|
||||
|
||||
#include "obidms.h"
|
||||
#include "obiview.h"
|
||||
#include "obidmscolumn.h"
|
||||
#include "obitypes.h"
|
||||
|
||||
|
||||
/**
|
||||
* @brief Names and comments of columns automatically created in the output view when aligning.
|
||||
*
|
||||
* @since April 2018
|
||||
* @author Celine Mercier (celine.mercier@metabarcoding.org)
|
||||
*/
|
||||
#define CLEAN_STATUS_COLUMN_NAME "obiclean_status"
|
||||
#define CLEAN_HEAD_COLUMN_NAME "obiclean_head"
|
||||
#define CLEAN_SAMPLECOUNT_COLUMN_NAME "obiclean_samplecount"
|
||||
#define CLEAN_HEADCOUNT_COLUMN_NAME "obiclean_headcount"
|
||||
#define CLEAN_INTERNALCOUNT_COLUMN_NAME "obiclean_internalcount"
|
||||
#define CLEAN_SINGLETONCOUNT_COLUMN_NAME "obiclean_singletoncount"
|
||||
|
||||
#define CLEAN_STATUS_COLUMN_COMMENTS ""
|
||||
#define CLEAN_HEAD_COLUMN_COMMENTS ""
|
||||
#define CLEAN_SAMPLECOUNT_COLUMN_COMMENTS ""
|
||||
#define CLEAN_HEADCOUNT_COLUMN_COMMENTS ""
|
||||
#define CLEAN_INTERNALCOUNT_COLUMN_COMMENTS ""
|
||||
#define CLEAN_SINGLETONCOUNT_COLUMN_COMMENTS ""
|
||||
|
||||
|
||||
/**
|
||||
* @brief Tags a set of sequences for PCR/sequencing errors identification
|
||||
*
|
||||
* Note: The columns where the results are written are automatically named and created.
|
||||
*
|
||||
* @param dms A pointer on an OBIDMS.
|
||||
* @param i_view_name The name of the input view.
|
||||
* @param sample_column_name The name of the OBI_STR column in the input view where the sample information is kept.
|
||||
* NULL or "" (empty string) if there is no sample information.
|
||||
* @param o_view_name The name of the output view where the results should be written (should not already exist).
|
||||
* @param o_view_comments The comments that should be associated with the output view.
|
||||
* @param threshold Similarity threshold expressed as a number of differences.
|
||||
* Only sequence pairs with a similarity above the threshold are clustered.
|
||||
* @param max_ratio Maximum ratio between the counts of two sequences so that the less abundant one can be considered
|
||||
* as a variant of the more abundant one.
|
||||
* @param heads_only If true, only cluster heads are printed to the output view.
|
||||
* @param thread_count Number of threads to use (Not available yet) TODO
|
||||
*
|
||||
* @returns A value indicating the success of the operation.
|
||||
* @retval 0 if the operation was successfully completed.
|
||||
* @retval -1 if an error occurred.
|
||||
*
|
||||
* @since April 2017
|
||||
* @author Celine Mercier (celine.mercier@metabarcoding.org)
|
||||
*/
|
||||
int obi_clean(const char* dms_name,
|
||||
const char* i_view_name,
|
||||
const char* sample_column_name,
|
||||
const char* o_view_name,
|
||||
const char* o_view_comments,
|
||||
double threshold,
|
||||
double max_ratio,
|
||||
bool heads_only,
|
||||
int thread_count);
|
||||
|
||||
|
||||
#endif /* OBI_CLEAN_H_ */
|
||||
|
@ -122,6 +122,8 @@ extern int obi_errno;
|
||||
*/
|
||||
#define OBI_ELT_IDX_ERROR (31) /** Error setting or getting a value at a non-existent element index or with a non-existent element name
|
||||
*/
|
||||
#define OBI_CLEAN_ERROR (32) /** Error while cleaning sequences
|
||||
*/
|
||||
|
||||
/**@}*/
|
||||
|
||||
|
Reference in New Issue
Block a user