From 7a88ca619ab9c1bc8a34496d42fbb3ad1b59cf10 Mon Sep 17 00:00:00 2001 From: Celine Mercier Date: Fri, 15 Apr 2016 17:00:08 +0200 Subject: [PATCH] First obi import (doesn't import tags yet because NA values aren't handled) --- python/obitools3/commands/import.pyx | 133 +++++++++++++++++++++++++++ 1 file changed, 133 insertions(+) create mode 100644 python/obitools3/commands/import.pyx diff --git a/python/obitools3/commands/import.pyx b/python/obitools3/commands/import.pyx new file mode 100644 index 0000000..c9d4ce0 --- /dev/null +++ b/python/obitools3/commands/import.pyx @@ -0,0 +1,133 @@ +from obitools3.apps.progress cimport ProgressBar # @UnresolvedImport +from obitools3.files.universalopener cimport uopen +from obitools3.parsers.fasta import fastaIterator +from obitools3.parsers.fastq import fastqIterator +from obitools3.obidms._obidms import OBIDMS + + +import time + +__title__="Counts sequences in a sequence set" + + +default_config = { 'destview' : None, + 'skip' : 0, + 'only' : None, + 'skiperror' : False, + 'seqinformat' : None, + 'moltype' : 'nuc', + 'filename' : None + } + +def addOptions(parser): + parser.add_argument(dest='import:filename', + metavar='', + nargs='?', + default=None, + help='sequence file name to be imported' ) + + group=parser.add_argument_group('obi import specific options') + + group.add_argument('--default-dms','-d', + action="store", dest="obi:defaultdms", + metavar='', + default=None, + type=str, + help="Name of the default DMS for reading and writing data") + + + group.add_argument('--destination-view','-v', + action="store", dest="import:destview", + metavar='', + default=None, + type=str, + required=True, + help="Name of the default DMS for reading and writing data") + + group=parser.add_argument_group('obi import specific options') + + group.add_argument('--skip', + action="store", dest="import:skip", + metavar='', + default=None, + type=int, + help="skip the N first sequences") + + group.add_argument('--only', + action="store", dest="import:only", + metavar='', + default=None, + type=int, + help="treat only N sequences") + + group.add_argument('--skip-on-error', + action="store_true", dest="import:skiperror", + default=None, + help="Skip sequence entries with parse error") + + group.add_argument('--fasta', + action="store_const", dest="import:seqinformat", + default=None, + const='fasta', + help="Input file is in fasta nucleic format (including obitools fasta extentions)") + + group.add_argument('--fastq', + action="store_const", dest="import:seqinformat", + default=None, + const='fastq', + help="Input file is in sanger fastq nucleic format (standard fastq)") + + group.add_argument('--nuc', + action="store_const", dest="import:moltype", + default=None, + const='nuc', + help="Input file contains nucleic sequences") + + group.add_argument('--prot', + action="store_const", dest="import:moltype", + default=None, + const='pep', + help="Input file contains protein sequences") + + + +def run(config): + #pb = ProgressBar(1000,config,seconde=1) + + print(config) + + inputs = uopen(config['import']['filename']) + + if config['import']['seqinformat']=='fasta': + iseq = fastaIterator(inputs) + view_type="NUC_SEQS_VIEW" + elif config['import']['seqinformat']=='fastq': + iseq = fastqIterator(inputs) + view_type="NUC_SEQS_VIEW" + else: + raise RuntimeError('No file format specified') + + # Create DMS + d = OBIDMS(config['obi']['defaultdms']) + + # Create view + view = d.new_view(config['import']['destview'], view_type=view_type) + + i = 0 + for seq in iseq: + #pb(i) + view[i].set_id(seq['id']) + view[i].set_definition(seq['definition']) + view[i].set_sequence(seq['sequence']) +# for tag in seq['tags'] : +# print(tag, seq['tags'][tag]) +# view[i][tag] = seq['tags'][tag] + i+=1 + + print(view) + print(view.__repr__()) + + view.save_and_close() + d.close() + + \ No newline at end of file