135 lines
4.5 KiB
Cython
135 lines
4.5 KiB
Cython
from obitools3.apps.progress cimport ProgressBar # @UnresolvedImport
|
|
from obitools3.files.universalopener cimport uopen
|
|
from obitools3.parsers.fasta import fastaIterator
|
|
from obitools3.parsers.fastq import fastqIterator
|
|
from obitools3.obidms._obidms import OBIDMS
|
|
|
|
|
|
import time
|
|
|
|
__title__="Counts sequences in a sequence set"
|
|
|
|
|
|
default_config = { 'destview' : None,
|
|
'skip' : 0,
|
|
'only' : None,
|
|
'skiperror' : False,
|
|
'seqinformat' : None,
|
|
'moltype' : 'nuc',
|
|
'filename' : None
|
|
}
|
|
|
|
def addOptions(parser):
|
|
parser.add_argument(dest='import:filename',
|
|
metavar='<FILENAME>',
|
|
nargs='?',
|
|
default=None,
|
|
help='sequence file name to be imported' )
|
|
|
|
group=parser.add_argument_group('obi import specific options')
|
|
|
|
group.add_argument('--default-dms','-d',
|
|
action="store", dest="obi:defaultdms",
|
|
metavar='<DMS NAME>',
|
|
default=None,
|
|
type=str,
|
|
help="Name of the default DMS for reading and writing data")
|
|
|
|
|
|
group.add_argument('--destination-view','-v',
|
|
action="store", dest="import:destview",
|
|
metavar='<VIEW NAME>',
|
|
default=None,
|
|
type=str,
|
|
required=True,
|
|
help="Name of the default DMS for reading and writing data")
|
|
|
|
group=parser.add_argument_group('obi import specific options')
|
|
|
|
group.add_argument('--skip',
|
|
action="store", dest="import:skip",
|
|
metavar='<N>',
|
|
default=None,
|
|
type=int,
|
|
help="skip the N first sequences")
|
|
|
|
group.add_argument('--only',
|
|
action="store", dest="import:only",
|
|
metavar='<N>',
|
|
default=None,
|
|
type=int,
|
|
help="treat only N sequences")
|
|
|
|
group.add_argument('--skip-on-error',
|
|
action="store_true", dest="import:skiperror",
|
|
default=None,
|
|
help="Skip sequence entries with parse error")
|
|
|
|
group.add_argument('--fasta',
|
|
action="store_const", dest="import:seqinformat",
|
|
default=None,
|
|
const='fasta',
|
|
help="Input file is in fasta nucleic format (including obitools fasta extentions)")
|
|
|
|
group.add_argument('--fastq',
|
|
action="store_const", dest="import:seqinformat",
|
|
default=None,
|
|
const='fastq',
|
|
help="Input file is in sanger fastq nucleic format (standard fastq)")
|
|
|
|
group.add_argument('--nuc',
|
|
action="store_const", dest="import:moltype",
|
|
default=None,
|
|
const='nuc',
|
|
help="Input file contains nucleic sequences")
|
|
|
|
group.add_argument('--prot',
|
|
action="store_const", dest="import:moltype",
|
|
default=None,
|
|
const='pep',
|
|
help="Input file contains protein sequences")
|
|
|
|
|
|
|
|
def run(config):
|
|
pb = ProgressBar(35000000,config,seconde=5)
|
|
|
|
inputs = uopen(config['import']['filename'])
|
|
|
|
if config['import']['seqinformat']=='fasta':
|
|
iseq = fastaIterator(inputs)
|
|
view_type="NUC_SEQS_VIEW"
|
|
elif config['import']['seqinformat']=='fastq':
|
|
iseq = fastqIterator(inputs)
|
|
view_type="NUC_SEQS_VIEW"
|
|
else:
|
|
raise RuntimeError('No file format specified')
|
|
|
|
# Temporary way to handle NA values
|
|
NA_list = ["nan"]
|
|
|
|
# Create DMS
|
|
d = OBIDMS(config['obi']['defaultdms'])
|
|
|
|
# Create view
|
|
view = d.new_view(config['import']['destview'], view_type=view_type)
|
|
|
|
i = 0
|
|
for seq in iseq:
|
|
pb(i)
|
|
view[i].set_id(seq['id'])
|
|
view[i].set_definition(seq['definition'])
|
|
view[i].set_sequence(seq['sequence'])
|
|
for tag in seq['tags'] :
|
|
#print(tag, seq['tags'][tag])
|
|
#if seq['tags'][tag] not in NA_list :
|
|
view[i][tag] = seq['tags'][tag]
|
|
i+=1
|
|
|
|
print(view)
|
|
# print(view.__repr__())
|
|
|
|
view.save_and_close()
|
|
d.close()
|
|
|
|
|