obi clean: first version

This commit is contained in:
Celine Mercier
2018-05-18 14:26:54 +02:00
parent f0c147c252
commit 6911bf4d70
7 changed files with 663 additions and 0 deletions

View File

@ -0,0 +1,94 @@
#cython: language_level=3
from obitools3.apps.progress cimport ProgressBar # @UnresolvedImport
from obitools3.dms.dms cimport DMS
from obitools3.dms.capi.obidms cimport OBIDMS_p
from obitools3.dms.view import RollbackException
from obitools3.dms.capi.obiclean cimport obi_clean
from obitools3.apps.optiongroups import addSequenceInputOption, addMinimalOutputOption
from obitools3.uri.decode import open_uri
from obitools3.apps.config import logger
from obitools3.utils cimport tobytes
from obitools3.dms.view.typed_view.view_NUC_SEQS cimport View_NUC_SEQS
__title__="Tag a set of sequences for PCR and sequencing errors identification"
def addOptions(parser):
addSequenceInputOption(parser)
addMinimalOutputOption(parser)
group = parser.add_argument_group('obi clean specific options')
group.add_argument('--distance', '-d',
action="store", dest="clean:distance",
metavar='<DISTANCE>',
default=1.0,
type=float,
help="Maximum numbers of errors between two variant sequences. Default: 1.")
group.add_argument('--sample-tag', '-s',
action="store",
dest="clean:sample-tag-name",
metavar="<SAMPLE TAG NAME>",
type=str,
default="merged_sample",
help="Name of the tag where sample counts are kept.")
group.add_argument('--ratio', '-r',
action="store", dest="clean:ratio",
metavar='<RATIO>',
default=0.5,
type=float,
help="Maximum ratio between the counts of two sequences so that the less abundant one can be considered"
" a variant of the more abundant one. Default: 0.5.")
group.add_argument('--heads-only', '-H',
action="store_true",
dest="clean:heads-only",
default=False,
help="Only sequences labeled as heads are kept in the output. Default: False")
group.add_argument('--cluster-tags', '-C',
action="store_true",
dest="clean:cluster-tags",
default=False,
help="Adds tags for each sequence giving its cluster's head and weight for each sample.")
def run(config):
DMS.obi_atexit()
logger("info", "obi clean")
# Open DMS
dms_name = config['obi']['inputURI'].split('/')[0]
dms = open_uri(dms_name)[0]
# Read the name of the input view
uri_i = config['obi']['inputURI'].split('/')
i_view_name = uri_i[1]
# Read the name of the output view
uri_o = config['obi']['outputURI'].split('/')
if len(uri_o)==2:
# Check that input and output DMS are the same (predicate, to discuss)
if dms_name != uri_o[0]:
raise Exception("Input and output DMS must be the same")
o_view_name = uri_o[1]
else:
o_view_name = uri_o[0]
if obi_clean(tobytes(dms_name), tobytes(i_view_name), tobytes(config['clean']['sample-tag-name']), tobytes(o_view_name), b"obiclean", \
config['clean']['distance'], config['clean']['ratio'], config['clean']['heads-only'], 1) < 0:
raise Exception("Error running obiclean")
print("\n")
print(repr(dms[o_view_name]))
dms.close()