Add a --minimum-circle option to ecotag and a cache on the self

alignment scores of 1000000 of pairwise scores
This commit is contained in:
2015-07-02 16:14:22 +02:00
parent b99881817a
commit f38ccae698
3 changed files with 37 additions and 4 deletions

View File

@ -19,7 +19,7 @@ from os import path
PACKAGE = "OBITools" PACKAGE = "OBITools"
VERSION = "1.1.18" VERSION = "1.1.19"
AUTHOR = 'Eric Coissac' AUTHOR = 'Eric Coissac'
EMAIL = 'eric@coissac.eu' EMAIL = 'eric@coissac.eu'
URL = 'metabarcoding.org/obitools' URL = 'metabarcoding.org/obitools'

View File

@ -46,6 +46,8 @@ from obitools.options.taxonomyfilter import addTaxonomyDBOptions,loadTaxonomyDat
from obitools.options import getOptionManager from obitools.options import getOptionManager
from obitools.format.options import addInOutputOption, sequenceWriterGenerator from obitools.format.options import addInOutputOption, sequenceWriterGenerator
from collections import OrderedDict
import sys import sys
import math import math
import os.path import os.path
@ -76,6 +78,13 @@ def addSearchOptions(optionManager):
default=0.0, default=0.0,
help="minimum identity to consider.") help="minimum identity to consider.")
optionManager.add_option('--minimum-circle',
action="store", dest="circle",
metavar="identity",
type="float",
default=1.0,
help="minimum identity considered for the assignment circle.")
# optionManager.add_option('-S','--normalized-smallest', # optionManager.add_option('-S','--normalized-smallest',
# action="store_false", dest="large", # action="store_false", dest="large",
# default=True, # default=True,
@ -190,6 +199,26 @@ def myLenlcs(s1, s2, minid, normalized, reference):
return lcs, lali return lcs, lali
def cachedLenLCS(s1,s2,minid,normalized,reference):
global __LCSCache__
pair=frozenset((s1.id,s2.id))
if pair in __LCSCache__:
rep=__LCSCache__[pair]
del __LCSCache__[pair]
else:
rep=lenlcs(s1,s2,minid,normalized,reference)
__LCSCache__[pair]=rep
if len(__LCSCache__) > 1000000:
__LCSCache__.popitem(0)
return rep
#def lcsIterator(entries,db,options): #def lcsIterator(entries,db,options):
# #
# for seq in entries: # for seq in entries:
@ -233,7 +262,7 @@ def lcsIteratorSelf(entries,db,options):
maxid = ([],0.0) maxid = ([],0.0)
minid = options.minimum minid = options.minimum
for d in db: for d in db:
lcs,lali = myLenlcs(seq,d,minid,normalized=True,reference=ALILEN) lcs,lali = myLenlcs(seq,d,minid,normalized=True,reference=ALILEN) # @UnusedVariable
if lcs > maxid[1] and lcs > options.minimum: if lcs > maxid[1] and lcs > options.minimum:
maxid = ([d],lcs) maxid = ([d],lcs)
minid = maxid[1] minid = maxid[1]
@ -241,11 +270,13 @@ def lcsIteratorSelf(entries,db,options):
maxid[0].append(d) maxid[0].append(d)
if maxid[0]: if maxid[0]:
if maxid[1] > options.circle:
maxid[1]=options.circle
results.extend([(s,maxid[1]) for s in maxid[0]]) results.extend([(s,maxid[1]) for s in maxid[0]])
for d in db: for d in db:
for s in maxid[0]: for s in maxid[0]:
if d.id != s.id: if d.id != s.id:
lcs,lali = lenlcs(s,d,maxid[1],normalized=True,reference=ALILEN) lcs,lali = cachedLenLCS(s,d,maxid[1],normalized=True,reference=ALILEN) # @UnusedVariable
if lcs >= maxid[1]: if lcs >= maxid[1]:
results.append((d,lcs)) results.append((d,lcs))
@ -253,6 +284,8 @@ def lcsIteratorSelf(entries,db,options):
if __name__=='__main__': if __name__=='__main__':
__LCSCache__=OrderedDict()
optionParser = getOptionManager([addSearchOptions,addTaxonomyDBOptions,addInOutputOption],progdoc=__doc__) optionParser = getOptionManager([addSearchOptions,addTaxonomyDBOptions,addInOutputOption],progdoc=__doc__)
(options, entries) = optionParser() (options, entries) = optionParser()

View File

@ -1,5 +1,5 @@
major = 1 major = 1
minor = 1 minor = 1
serial= '18' serial= '19'
version = "%2d.%02d %s" % (major,minor,serial) version = "%2d.%02d %s" % (major,minor,serial)