Basic obi export command exporting from view to fasta or fastq format,
for testing purposes
This commit is contained in:
93
python/obitools3/commands/export.pyx
Normal file
93
python/obitools3/commands/export.pyx
Normal file
@ -0,0 +1,93 @@
|
|||||||
|
from obitools3.apps.progress cimport ProgressBar # @UnresolvedImport
|
||||||
|
from obitools3.obidms._obidms import OBIDMS # TODO cimport doesn't work
|
||||||
|
from obitools3.utils cimport bytes2str
|
||||||
|
|
||||||
|
import time
|
||||||
|
import re
|
||||||
|
|
||||||
|
__title__="Export a NUC_SEQS view to a fasta file"
|
||||||
|
|
||||||
|
|
||||||
|
default_config = { 'inputview' : None,
|
||||||
|
}
|
||||||
|
|
||||||
|
def addOptions(parser):
|
||||||
|
|
||||||
|
# TODO put this common group somewhere else but I don't know where
|
||||||
|
group=parser.add_argument_group('DMS and view options')
|
||||||
|
|
||||||
|
group.add_argument('--default-dms','-d',
|
||||||
|
action="store", dest="obi:defaultdms",
|
||||||
|
metavar='<DMS NAME>',
|
||||||
|
default=None,
|
||||||
|
type=str,
|
||||||
|
help="Name of the default DMS for reading and writing data.")
|
||||||
|
|
||||||
|
group.add_argument('--input-view','-i',
|
||||||
|
action="store", dest="obi:inputview",
|
||||||
|
metavar='<INPUT VIEW NAME>',
|
||||||
|
default=None,
|
||||||
|
type=str,
|
||||||
|
help="Name of the input view, either raw if the view is in the default DMS,"
|
||||||
|
" or in the form 'dms:view' if it is in another DMS.")
|
||||||
|
|
||||||
|
group=parser.add_argument_group('obi export specific options')
|
||||||
|
|
||||||
|
group.add_argument('--format','-f',
|
||||||
|
action="store", dest="export:format",
|
||||||
|
metavar='<FORMAT>',
|
||||||
|
default="fasta",
|
||||||
|
type=str,
|
||||||
|
help="Export in the format <FORMAT>, 'fasta' or 'fastq'. Default: 'fasta'.")
|
||||||
|
|
||||||
|
def run(config):
|
||||||
|
|
||||||
|
# TODO import doesn't work
|
||||||
|
NUC_SEQUENCE_COLUMN = "NUC_SEQ"
|
||||||
|
ID_COLUMN = "ID"
|
||||||
|
DEFINITION_COLUMN = "DEFINITION"
|
||||||
|
QUALITY_COLUMN = "QUALITY"
|
||||||
|
|
||||||
|
# Open DMS
|
||||||
|
d = OBIDMS(config['obi']['defaultdms'])
|
||||||
|
|
||||||
|
# Open input view
|
||||||
|
iview = d.open_view(config['obi']['inputview'])
|
||||||
|
|
||||||
|
# Initialize the progress bar
|
||||||
|
#pb = ProgressBar(len(iview), config, seconde=5)
|
||||||
|
|
||||||
|
i=0
|
||||||
|
for seq in iview :
|
||||||
|
#pb(i)
|
||||||
|
toprint = ">"+seq.get_id()+" "
|
||||||
|
for col_name in seq :
|
||||||
|
if col_name != NUC_SEQUENCE_COLUMN and col_name != ID_COLUMN and col_name != DEFINITION_COLUMN and col_name != QUALITY_COLUMN :
|
||||||
|
toprint = toprint + col_name + "=" + str(seq[col_name]) + "; "
|
||||||
|
if DEFINITION_COLUMN in seq :
|
||||||
|
toprint = toprint + seq.get_definition()
|
||||||
|
nucseq = bytes2str(seq.get_sequence())
|
||||||
|
|
||||||
|
if config['export']['format'] == "fasta" :
|
||||||
|
nucseq = re.sub("(.{60})", "\\1\n", nucseq, 0, re.DOTALL)
|
||||||
|
toprint = toprint + "\n" + nucseq
|
||||||
|
elif config['export']['format'] == "fastq" :
|
||||||
|
toprint = toprint + "\n" + nucseq
|
||||||
|
toprint = toprint + "\n" + "+"
|
||||||
|
toprint = toprint + "\n" + seq.get_str_quality()
|
||||||
|
|
||||||
|
print(toprint)
|
||||||
|
i+=1
|
||||||
|
|
||||||
|
iview.save_and_close()
|
||||||
|
d.close()
|
||||||
|
|
||||||
|
print("Done.")
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
Reference in New Issue
Block a user