diff --git a/python/obitools3/commands/export.pyx b/python/obitools3/commands/export.pyx new file mode 100644 index 0000000..ee65d40 --- /dev/null +++ b/python/obitools3/commands/export.pyx @@ -0,0 +1,93 @@ +from obitools3.apps.progress cimport ProgressBar # @UnresolvedImport +from obitools3.obidms._obidms import OBIDMS # TODO cimport doesn't work +from obitools3.utils cimport bytes2str + +import time +import re + +__title__="Export a NUC_SEQS view to a fasta file" + + +default_config = { 'inputview' : None, + } + +def addOptions(parser): + + # TODO put this common group somewhere else but I don't know where + group=parser.add_argument_group('DMS and view options') + + group.add_argument('--default-dms','-d', + action="store", dest="obi:defaultdms", + metavar='', + default=None, + type=str, + help="Name of the default DMS for reading and writing data.") + + group.add_argument('--input-view','-i', + action="store", dest="obi:inputview", + metavar='', + default=None, + type=str, + help="Name of the input view, either raw if the view is in the default DMS," + " or in the form 'dms:view' if it is in another DMS.") + + group=parser.add_argument_group('obi export specific options') + + group.add_argument('--format','-f', + action="store", dest="export:format", + metavar='', + default="fasta", + type=str, + help="Export in the format , 'fasta' or 'fastq'. Default: 'fasta'.") + +def run(config): + + # TODO import doesn't work + NUC_SEQUENCE_COLUMN = "NUC_SEQ" + ID_COLUMN = "ID" + DEFINITION_COLUMN = "DEFINITION" + QUALITY_COLUMN = "QUALITY" + + # Open DMS + d = OBIDMS(config['obi']['defaultdms']) + + # Open input view + iview = d.open_view(config['obi']['inputview']) + + # Initialize the progress bar + #pb = ProgressBar(len(iview), config, seconde=5) + + i=0 + for seq in iview : + #pb(i) + toprint = ">"+seq.get_id()+" " + for col_name in seq : + if col_name != NUC_SEQUENCE_COLUMN and col_name != ID_COLUMN and col_name != DEFINITION_COLUMN and col_name != QUALITY_COLUMN : + toprint = toprint + col_name + "=" + str(seq[col_name]) + "; " + if DEFINITION_COLUMN in seq : + toprint = toprint + seq.get_definition() + nucseq = bytes2str(seq.get_sequence()) + + if config['export']['format'] == "fasta" : + nucseq = re.sub("(.{60})", "\\1\n", nucseq, 0, re.DOTALL) + toprint = toprint + "\n" + nucseq + elif config['export']['format'] == "fastq" : + toprint = toprint + "\n" + nucseq + toprint = toprint + "\n" + "+" + toprint = toprint + "\n" + seq.get_str_quality() + + print(toprint) + i+=1 + + iview.save_and_close() + d.close() + + print("Done.") + + + + + + + + \ No newline at end of file