ADD: new option --fieldcutname to make ecoTag work with paired-end sequences (experimental)
This commit is contained in:
@ -102,6 +102,12 @@ def addSearchOptions(optionManager):
|
||||
default=0.0,
|
||||
help='Tolarated rate of wrong assignation')
|
||||
|
||||
optionManager.add_option('-F','--fieldcutname',
|
||||
action='store',dest='fieldcutname',
|
||||
default=None,
|
||||
help="""Turn Unassembled match mode 'ON' when 'fieldcutname' is a tag of the sequence.
|
||||
This tag gives the position of the junction of the two reads in the sequence (on (Celine + Fred) parle trop bien anglais !)""")
|
||||
|
||||
|
||||
def count(data):
|
||||
rep = {}
|
||||
@ -121,6 +127,34 @@ def count(data):
|
||||
rep[k]=rep.get(k,default)+v
|
||||
return rep
|
||||
|
||||
|
||||
from obitools.location import SimpleLocation
|
||||
def myLenlcs(s1, s2, fieldCutName, minid, normalized, reference):
|
||||
|
||||
if (fieldCutName is not None) and (s1.hasKey(fieldCutName)) :
|
||||
|
||||
cutPos = s1[fieldCutName]
|
||||
locs1_5P = SimpleLocation(1,cutPos)
|
||||
locs1_3P = SimpleLocation(cutPos+1,len(s1))
|
||||
|
||||
locs2_5P = SimpleLocation(1, min(cutPos, len(s2)))
|
||||
locs2_3P = SimpleLocation(max(1, len(s2)-(len(s1)-cutPos)+1), len(s2))
|
||||
|
||||
overlap = min(0,len(s1) - len(s2))
|
||||
|
||||
lcs5P, lali5P = lenlcs(s1.getSubSeq(locs1_5P),s2.getSubSeq(locs2_5P),minid,False)
|
||||
lcs3P, lali3P = lenlcs(s1.getSubSeq(locs1_3P),s2.getSubSeq(locs2_3P),minid,False)
|
||||
|
||||
raw_lcs = lcs5P + lcs3P - overlap
|
||||
lali = lali5P + lali3P - overlap
|
||||
lcs = raw_lcs / float(lali)
|
||||
|
||||
else:
|
||||
lcs, lali = lenlcs(s1,s2,minid,normalized,reference)
|
||||
|
||||
return lcs, lali
|
||||
|
||||
|
||||
def lcsIterator(entries,db,options):
|
||||
|
||||
for seq in entries:
|
||||
@ -128,7 +162,7 @@ def lcsIterator(entries,db,options):
|
||||
maxid = (None,0.0)
|
||||
minid = options.minimum
|
||||
for d in db:
|
||||
lcs,lali = lenlcs(seq,d,minid,normalized=True,reference=ALILEN)
|
||||
lcs,lali = myLenlcs(seq, d, options.fieldcutname, minid,normalized=True,reference=ALILEN)
|
||||
if lcs > maxid[1]:
|
||||
maxid = (d,lcs)
|
||||
minid = maxid[1] ** options.shape
|
||||
@ -145,7 +179,7 @@ def lcsIteratorSelf(entries,db,options):
|
||||
maxid = ([],0.0)
|
||||
minid = options.minimum
|
||||
for d in db:
|
||||
lcs,lali = lenlcs(seq,d,minid,normalized=True,reference=ALILEN)
|
||||
lcs,lali = myLenlcs(seq,d,options.fieldcutname,minid,normalized=True,reference=ALILEN)
|
||||
if lcs > maxid[1]:
|
||||
maxid = ([d],lcs)
|
||||
minid = maxid[1]
|
||||
|
Reference in New Issue
Block a user