Test for a new version of the ecoTag sequence Matcher

This commit is contained in:
2012-04-17 11:35:37 +00:00
parent 7b0033a1b6
commit 53f6640bba

View File

@ -92,8 +92,10 @@ def addSearchOptions(optionManager):
default=False,
help='Add an extra column in the output with the query sequence')
optionManager.add_option('--self-matches',
action='store_true',dest='selfmatches',
default=False,
help='Switch to the new match algorithm')
def count(data):
@ -135,6 +137,27 @@ def lcsIterator(entries,db,options):
results = [x for x in results if x[1]>=minid]
yield seq,maxid,results
def lcsIteratorSelf(entries,db,options):
for seq in entries:
results = []
maxid = (None,0.0)
minid = options.minimum
for d in db:
lcs,lali = lenlcs(seq,d,minid,normalized=True)
if lcs > maxid[1]:
maxid = (d,lcs)
minid = maxid[1] ** options.shape
for d in db:
if d.id != maxid[0].id:
lcs,lali = lenlcs(maxid[0],d,maxid[1],normalized=True)
if lcs >= maxid[1]:
results.append((d,lcs))
yield seq,maxid,results
if __name__=='__main__':
optionParser = getOptionManager([addSearchOptions,addTaxonomyDBOptions],
@ -146,6 +169,7 @@ if __name__=='__main__':
if options.explain is not None:
options.table=True
taxonomy = loadTaxonomyDatabase(options)
print >>sys.stderr,"Reading reference DB ...",
@ -177,7 +201,12 @@ if __name__=='__main__':
if options.sort is not None:
entries = sortSequence(entries, options.sort, options.reverse)
search = lcsIterator(entries,db,options)
matcher = lcsIterator
if options.selfmatches:
matcher= lcsIteratorSelf
search = matcher(entries,db,options)
for seq,best,match in search: