Add a --minimum-circle option to ecotag and a cache on the self
alignment scores of 1000000 of pairwise scores
This commit is contained in:
2
setup.py
2
setup.py
@ -19,7 +19,7 @@ from os import path
|
|||||||
|
|
||||||
|
|
||||||
PACKAGE = "OBITools"
|
PACKAGE = "OBITools"
|
||||||
VERSION = "1.1.18"
|
VERSION = "1.1.19"
|
||||||
AUTHOR = 'Eric Coissac'
|
AUTHOR = 'Eric Coissac'
|
||||||
EMAIL = 'eric@coissac.eu'
|
EMAIL = 'eric@coissac.eu'
|
||||||
URL = 'metabarcoding.org/obitools'
|
URL = 'metabarcoding.org/obitools'
|
||||||
|
@ -46,6 +46,8 @@ from obitools.options.taxonomyfilter import addTaxonomyDBOptions,loadTaxonomyDat
|
|||||||
from obitools.options import getOptionManager
|
from obitools.options import getOptionManager
|
||||||
from obitools.format.options import addInOutputOption, sequenceWriterGenerator
|
from obitools.format.options import addInOutputOption, sequenceWriterGenerator
|
||||||
|
|
||||||
|
from collections import OrderedDict
|
||||||
|
|
||||||
import sys
|
import sys
|
||||||
import math
|
import math
|
||||||
import os.path
|
import os.path
|
||||||
@ -76,6 +78,13 @@ def addSearchOptions(optionManager):
|
|||||||
default=0.0,
|
default=0.0,
|
||||||
help="minimum identity to consider.")
|
help="minimum identity to consider.")
|
||||||
|
|
||||||
|
optionManager.add_option('--minimum-circle',
|
||||||
|
action="store", dest="circle",
|
||||||
|
metavar="identity",
|
||||||
|
type="float",
|
||||||
|
default=1.0,
|
||||||
|
help="minimum identity considered for the assignment circle.")
|
||||||
|
|
||||||
# optionManager.add_option('-S','--normalized-smallest',
|
# optionManager.add_option('-S','--normalized-smallest',
|
||||||
# action="store_false", dest="large",
|
# action="store_false", dest="large",
|
||||||
# default=True,
|
# default=True,
|
||||||
@ -190,6 +199,26 @@ def myLenlcs(s1, s2, minid, normalized, reference):
|
|||||||
return lcs, lali
|
return lcs, lali
|
||||||
|
|
||||||
|
|
||||||
|
def cachedLenLCS(s1,s2,minid,normalized,reference):
|
||||||
|
global __LCSCache__
|
||||||
|
|
||||||
|
pair=frozenset((s1.id,s2.id))
|
||||||
|
|
||||||
|
if pair in __LCSCache__:
|
||||||
|
rep=__LCSCache__[pair]
|
||||||
|
del __LCSCache__[pair]
|
||||||
|
|
||||||
|
else:
|
||||||
|
rep=lenlcs(s1,s2,minid,normalized,reference)
|
||||||
|
|
||||||
|
__LCSCache__[pair]=rep
|
||||||
|
|
||||||
|
if len(__LCSCache__) > 1000000:
|
||||||
|
__LCSCache__.popitem(0)
|
||||||
|
return rep
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
#def lcsIterator(entries,db,options):
|
#def lcsIterator(entries,db,options):
|
||||||
#
|
#
|
||||||
# for seq in entries:
|
# for seq in entries:
|
||||||
@ -233,7 +262,7 @@ def lcsIteratorSelf(entries,db,options):
|
|||||||
maxid = ([],0.0)
|
maxid = ([],0.0)
|
||||||
minid = options.minimum
|
minid = options.minimum
|
||||||
for d in db:
|
for d in db:
|
||||||
lcs,lali = myLenlcs(seq,d,minid,normalized=True,reference=ALILEN)
|
lcs,lali = myLenlcs(seq,d,minid,normalized=True,reference=ALILEN) # @UnusedVariable
|
||||||
if lcs > maxid[1] and lcs > options.minimum:
|
if lcs > maxid[1] and lcs > options.minimum:
|
||||||
maxid = ([d],lcs)
|
maxid = ([d],lcs)
|
||||||
minid = maxid[1]
|
minid = maxid[1]
|
||||||
@ -241,11 +270,13 @@ def lcsIteratorSelf(entries,db,options):
|
|||||||
maxid[0].append(d)
|
maxid[0].append(d)
|
||||||
|
|
||||||
if maxid[0]:
|
if maxid[0]:
|
||||||
|
if maxid[1] > options.circle:
|
||||||
|
maxid[1]=options.circle
|
||||||
results.extend([(s,maxid[1]) for s in maxid[0]])
|
results.extend([(s,maxid[1]) for s in maxid[0]])
|
||||||
for d in db:
|
for d in db:
|
||||||
for s in maxid[0]:
|
for s in maxid[0]:
|
||||||
if d.id != s.id:
|
if d.id != s.id:
|
||||||
lcs,lali = lenlcs(s,d,maxid[1],normalized=True,reference=ALILEN)
|
lcs,lali = cachedLenLCS(s,d,maxid[1],normalized=True,reference=ALILEN) # @UnusedVariable
|
||||||
if lcs >= maxid[1]:
|
if lcs >= maxid[1]:
|
||||||
results.append((d,lcs))
|
results.append((d,lcs))
|
||||||
|
|
||||||
@ -253,6 +284,8 @@ def lcsIteratorSelf(entries,db,options):
|
|||||||
|
|
||||||
if __name__=='__main__':
|
if __name__=='__main__':
|
||||||
|
|
||||||
|
__LCSCache__=OrderedDict()
|
||||||
|
|
||||||
optionParser = getOptionManager([addSearchOptions,addTaxonomyDBOptions,addInOutputOption],progdoc=__doc__)
|
optionParser = getOptionManager([addSearchOptions,addTaxonomyDBOptions,addInOutputOption],progdoc=__doc__)
|
||||||
|
|
||||||
(options, entries) = optionParser()
|
(options, entries) = optionParser()
|
||||||
|
@ -1,5 +1,5 @@
|
|||||||
major = 1
|
major = 1
|
||||||
minor = 1
|
minor = 1
|
||||||
serial= '18'
|
serial= '19'
|
||||||
|
|
||||||
version = "%2d.%02d %s" % (major,minor,serial)
|
version = "%2d.%02d %s" % (major,minor,serial)
|
||||||
|
Reference in New Issue
Block a user