Add docuentation for the new options and an option to manage the ecotag
cache size
This commit is contained in:
@ -3,6 +3,24 @@ Options to specify input format
|
|||||||
|
|
||||||
.. program:: obitools
|
.. program:: obitools
|
||||||
|
|
||||||
|
|
||||||
|
Restrict the analysis to a sub-part of the input file
|
||||||
|
.....................................................
|
||||||
|
|
||||||
|
.. cmdoption:: --skip <N>
|
||||||
|
|
||||||
|
The N first sequence records of the file are discarded from the analysis and
|
||||||
|
not reported to the output file
|
||||||
|
|
||||||
|
|
||||||
|
.. cmdoption:: --only <N>
|
||||||
|
|
||||||
|
Only the N next sequence records of the file are analyzed. The following sequences
|
||||||
|
in the file are neither analyzed, neither reported to the output file.
|
||||||
|
This option can be used conjointly with the `--skip` option.
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
Sequence annotated format
|
Sequence annotated format
|
||||||
.........................
|
.........................
|
||||||
|
|
||||||
|
@ -9,10 +9,15 @@
|
|||||||
|
|
||||||
.. cmdoption:: -m FLOAT, --minimum-identity=FLOAT
|
.. cmdoption:: -m FLOAT, --minimum-identity=FLOAT
|
||||||
|
|
||||||
When sequence identity is less than FLOAT, the taxonomic
|
When the best match with the reference database present an identity
|
||||||
assignment for the sequence record is not indicated in ``ecotag``'s
|
level below FLOAT, the taxonomic assignment for the sequence record
|
||||||
output. FLOAT is included in a [0,1] interval.
|
is not computed. The sequence record is nevertheless included in the
|
||||||
(This option doesn't seem to work).
|
output file. FLOAT is included in a [0,1] interval.
|
||||||
|
|
||||||
|
.. cmdoption:: --minimum-circle=FLOAT
|
||||||
|
|
||||||
|
minimum identity considered for the assignment circle.
|
||||||
|
FLOAT is included in a [0,1] interval.
|
||||||
|
|
||||||
.. cmdoption:: -x RANK, --explain=RANK
|
.. cmdoption:: -x RANK, --explain=RANK
|
||||||
|
|
||||||
@ -40,8 +45,18 @@
|
|||||||
is expected to be assigned to the wrong taxon, for example because of
|
is expected to be assigned to the wrong taxon, for example because of
|
||||||
taxonomic misidentification. FLOAT is included in a [0,1] interval.
|
taxonomic misidentification. FLOAT is included in a [0,1] interval.
|
||||||
|
|
||||||
|
.. cmdoption:: --cache-size=INTEGER
|
||||||
|
|
||||||
|
A cache for computed similarities is maintained by `ecotag`. the default
|
||||||
|
size for this cache is 1,000,000 of scores. This option allows to change
|
||||||
|
the cache size.
|
||||||
|
|
||||||
.. include:: ../optionsSet/taxonomyDB.txt
|
.. include:: ../optionsSet/taxonomyDB.txt
|
||||||
|
|
||||||
|
.. include:: ../optionsSet/inputformat.txt
|
||||||
|
|
||||||
|
.. include:: ../optionsSet/outputformat.txt
|
||||||
|
|
||||||
.. include:: ../optionsSet/defaultoptions.txt
|
.. include:: ../optionsSet/defaultoptions.txt
|
||||||
|
|
||||||
:py:mod:`ecotag` added sequence attributes
|
:py:mod:`ecotag` added sequence attributes
|
||||||
@ -65,4 +80,3 @@
|
|||||||
- :doc:`species_list <../attributes/species_list>`
|
- :doc:`species_list <../attributes/species_list>`
|
||||||
- :doc:`species_name <../attributes/species_name>`
|
- :doc:`species_name <../attributes/species_name>`
|
||||||
- :doc:`taxid <../attributes/taxid>`
|
- :doc:`taxid <../attributes/taxid>`
|
||||||
|
|
||||||
|
@ -148,6 +148,13 @@ def addSearchOptions(optionManager):
|
|||||||
default=0.0,
|
default=0.0,
|
||||||
help='Tolerated rate of wrong assignation')
|
help='Tolerated rate of wrong assignation')
|
||||||
|
|
||||||
|
optionManager.add_option('--cache-size',
|
||||||
|
action='store',dest='cache',
|
||||||
|
type='int',
|
||||||
|
metavar='<SIZE>',
|
||||||
|
default=1000000,
|
||||||
|
help='Cache size for the aligment score')
|
||||||
|
|
||||||
|
|
||||||
def count(data):
|
def count(data):
|
||||||
rep = {}
|
rep = {}
|
||||||
@ -203,6 +210,7 @@ def cachedLenLCS(s1,s2,minid,normalized,reference):
|
|||||||
global __LCSCache__
|
global __LCSCache__
|
||||||
global __INCache__
|
global __INCache__
|
||||||
global __OUTCache__
|
global __OUTCache__
|
||||||
|
global __CACHE_SIZE__
|
||||||
|
|
||||||
pair=frozenset((s1.id,s2.id))
|
pair=frozenset((s1.id,s2.id))
|
||||||
|
|
||||||
@ -217,7 +225,7 @@ def cachedLenLCS(s1,s2,minid,normalized,reference):
|
|||||||
|
|
||||||
__LCSCache__[pair]=rep
|
__LCSCache__[pair]=rep
|
||||||
|
|
||||||
if len(__LCSCache__) > 1000000:
|
if len(__LCSCache__) > __CACHE_SIZE__:
|
||||||
__LCSCache__.popitem(0)
|
__LCSCache__.popitem(0)
|
||||||
return rep
|
return rep
|
||||||
|
|
||||||
@ -292,10 +300,16 @@ if __name__=='__main__':
|
|||||||
__INCache__=1.0
|
__INCache__=1.0
|
||||||
__OUTCache__=1.0
|
__OUTCache__=1.0
|
||||||
|
|
||||||
|
|
||||||
optionParser = getOptionManager([addSearchOptions,addTaxonomyDBOptions,addInOutputOption],progdoc=__doc__)
|
optionParser = getOptionManager([addSearchOptions,addTaxonomyDBOptions,addInOutputOption],progdoc=__doc__)
|
||||||
|
|
||||||
(options, entries) = optionParser()
|
(options, entries) = optionParser()
|
||||||
|
|
||||||
|
__CACHE_SIZE__=options.cache
|
||||||
|
|
||||||
|
if __CACHE_SIZE__ < 10:
|
||||||
|
__CACHE_SIZE__=10
|
||||||
|
|
||||||
taxonomy = loadTaxonomyDatabase(options)
|
taxonomy = loadTaxonomyDatabase(options)
|
||||||
writer = sequenceWriterGenerator(options)
|
writer = sequenceWriterGenerator(options)
|
||||||
|
|
||||||
@ -338,6 +352,8 @@ if __name__=='__main__':
|
|||||||
|
|
||||||
search = lcsIteratorSelf(entries,db,options)
|
search = lcsIteratorSelf(entries,db,options)
|
||||||
|
|
||||||
|
print >>sys.stderr,'\nCache size : %d\n'
|
||||||
|
|
||||||
|
|
||||||
for seq,best,match in search:
|
for seq,best,match in search:
|
||||||
try:
|
try:
|
||||||
@ -424,9 +440,9 @@ if __name__=='__main__':
|
|||||||
else:
|
else:
|
||||||
seq['species_name']=None
|
seq['species_name']=None
|
||||||
|
|
||||||
print >>sys.stderr,'\rCache size : %5.3f ' % (__INCache__/__OUTCache__),
|
|
||||||
writer(seq)
|
writer(seq)
|
||||||
|
print >>sys.stderr,'\n%5.3f% of the alignments was cached' % (__INCache__/(__INCache__+__OUTCache__)*100)
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
Reference in New Issue
Block a user