Add a --minimum-circle option to ecotag and a cache on the self

alignment scores of 1000000 of pairwise scores
This commit is contained in:
2015-07-02 16:14:22 +02:00
parent b99881817a
commit f38ccae698
3 changed files with 37 additions and 4 deletions

View File

@ -19,7 +19,7 @@ from os import path
PACKAGE = "OBITools"
VERSION = "1.1.18"
VERSION = "1.1.19"
AUTHOR = 'Eric Coissac'
EMAIL = 'eric@coissac.eu'
URL = 'metabarcoding.org/obitools'

View File

@ -46,6 +46,8 @@ from obitools.options.taxonomyfilter import addTaxonomyDBOptions,loadTaxonomyDat
from obitools.options import getOptionManager
from obitools.format.options import addInOutputOption, sequenceWriterGenerator
from collections import OrderedDict
import sys
import math
import os.path
@ -76,6 +78,13 @@ def addSearchOptions(optionManager):
default=0.0,
help="minimum identity to consider.")
optionManager.add_option('--minimum-circle',
action="store", dest="circle",
metavar="identity",
type="float",
default=1.0,
help="minimum identity considered for the assignment circle.")
# optionManager.add_option('-S','--normalized-smallest',
# action="store_false", dest="large",
# default=True,
@ -190,6 +199,26 @@ def myLenlcs(s1, s2, minid, normalized, reference):
return lcs, lali
def cachedLenLCS(s1,s2,minid,normalized,reference):
global __LCSCache__
pair=frozenset((s1.id,s2.id))
if pair in __LCSCache__:
rep=__LCSCache__[pair]
del __LCSCache__[pair]
else:
rep=lenlcs(s1,s2,minid,normalized,reference)
__LCSCache__[pair]=rep
if len(__LCSCache__) > 1000000:
__LCSCache__.popitem(0)
return rep
#def lcsIterator(entries,db,options):
#
# for seq in entries:
@ -233,7 +262,7 @@ def lcsIteratorSelf(entries,db,options):
maxid = ([],0.0)
minid = options.minimum
for d in db:
lcs,lali = myLenlcs(seq,d,minid,normalized=True,reference=ALILEN)
lcs,lali = myLenlcs(seq,d,minid,normalized=True,reference=ALILEN) # @UnusedVariable
if lcs > maxid[1] and lcs > options.minimum:
maxid = ([d],lcs)
minid = maxid[1]
@ -241,11 +270,13 @@ def lcsIteratorSelf(entries,db,options):
maxid[0].append(d)
if maxid[0]:
if maxid[1] > options.circle:
maxid[1]=options.circle
results.extend([(s,maxid[1]) for s in maxid[0]])
for d in db:
for s in maxid[0]:
if d.id != s.id:
lcs,lali = lenlcs(s,d,maxid[1],normalized=True,reference=ALILEN)
lcs,lali = cachedLenLCS(s,d,maxid[1],normalized=True,reference=ALILEN) # @UnusedVariable
if lcs >= maxid[1]:
results.append((d,lcs))
@ -253,6 +284,8 @@ def lcsIteratorSelf(entries,db,options):
if __name__=='__main__':
__LCSCache__=OrderedDict()
optionParser = getOptionManager([addSearchOptions,addTaxonomyDBOptions,addInOutputOption],progdoc=__doc__)
(options, entries) = optionParser()

View File

@ -1,5 +1,5 @@
major = 1
minor = 1
serial= '18'
serial= '19'
version = "%2d.%02d %s" % (major,minor,serial)