Add a --minimum-circle option to ecotag and a cache on the self
alignment scores of 1000000 of pairwise scores
This commit is contained in:
2
setup.py
2
setup.py
@ -19,7 +19,7 @@ from os import path
|
||||
|
||||
|
||||
PACKAGE = "OBITools"
|
||||
VERSION = "1.1.18"
|
||||
VERSION = "1.1.19"
|
||||
AUTHOR = 'Eric Coissac'
|
||||
EMAIL = 'eric@coissac.eu'
|
||||
URL = 'metabarcoding.org/obitools'
|
||||
|
@ -46,6 +46,8 @@ from obitools.options.taxonomyfilter import addTaxonomyDBOptions,loadTaxonomyDat
|
||||
from obitools.options import getOptionManager
|
||||
from obitools.format.options import addInOutputOption, sequenceWriterGenerator
|
||||
|
||||
from collections import OrderedDict
|
||||
|
||||
import sys
|
||||
import math
|
||||
import os.path
|
||||
@ -76,6 +78,13 @@ def addSearchOptions(optionManager):
|
||||
default=0.0,
|
||||
help="minimum identity to consider.")
|
||||
|
||||
optionManager.add_option('--minimum-circle',
|
||||
action="store", dest="circle",
|
||||
metavar="identity",
|
||||
type="float",
|
||||
default=1.0,
|
||||
help="minimum identity considered for the assignment circle.")
|
||||
|
||||
# optionManager.add_option('-S','--normalized-smallest',
|
||||
# action="store_false", dest="large",
|
||||
# default=True,
|
||||
@ -190,6 +199,26 @@ def myLenlcs(s1, s2, minid, normalized, reference):
|
||||
return lcs, lali
|
||||
|
||||
|
||||
def cachedLenLCS(s1,s2,minid,normalized,reference):
|
||||
global __LCSCache__
|
||||
|
||||
pair=frozenset((s1.id,s2.id))
|
||||
|
||||
if pair in __LCSCache__:
|
||||
rep=__LCSCache__[pair]
|
||||
del __LCSCache__[pair]
|
||||
|
||||
else:
|
||||
rep=lenlcs(s1,s2,minid,normalized,reference)
|
||||
|
||||
__LCSCache__[pair]=rep
|
||||
|
||||
if len(__LCSCache__) > 1000000:
|
||||
__LCSCache__.popitem(0)
|
||||
return rep
|
||||
|
||||
|
||||
|
||||
#def lcsIterator(entries,db,options):
|
||||
#
|
||||
# for seq in entries:
|
||||
@ -233,7 +262,7 @@ def lcsIteratorSelf(entries,db,options):
|
||||
maxid = ([],0.0)
|
||||
minid = options.minimum
|
||||
for d in db:
|
||||
lcs,lali = myLenlcs(seq,d,minid,normalized=True,reference=ALILEN)
|
||||
lcs,lali = myLenlcs(seq,d,minid,normalized=True,reference=ALILEN) # @UnusedVariable
|
||||
if lcs > maxid[1] and lcs > options.minimum:
|
||||
maxid = ([d],lcs)
|
||||
minid = maxid[1]
|
||||
@ -241,11 +270,13 @@ def lcsIteratorSelf(entries,db,options):
|
||||
maxid[0].append(d)
|
||||
|
||||
if maxid[0]:
|
||||
if maxid[1] > options.circle:
|
||||
maxid[1]=options.circle
|
||||
results.extend([(s,maxid[1]) for s in maxid[0]])
|
||||
for d in db:
|
||||
for s in maxid[0]:
|
||||
if d.id != s.id:
|
||||
lcs,lali = lenlcs(s,d,maxid[1],normalized=True,reference=ALILEN)
|
||||
lcs,lali = cachedLenLCS(s,d,maxid[1],normalized=True,reference=ALILEN) # @UnusedVariable
|
||||
if lcs >= maxid[1]:
|
||||
results.append((d,lcs))
|
||||
|
||||
@ -253,6 +284,8 @@ def lcsIteratorSelf(entries,db,options):
|
||||
|
||||
if __name__=='__main__':
|
||||
|
||||
__LCSCache__=OrderedDict()
|
||||
|
||||
optionParser = getOptionManager([addSearchOptions,addTaxonomyDBOptions,addInOutputOption],progdoc=__doc__)
|
||||
|
||||
(options, entries) = optionParser()
|
||||
|
@ -1,5 +1,5 @@
|
||||
major = 1
|
||||
minor = 1
|
||||
serial= '18'
|
||||
serial= '19'
|
||||
|
||||
version = "%2d.%02d %s" % (major,minor,serial)
|
||||
|
Reference in New Issue
Block a user