ADD: new option --fieldcutname to make ecoTag work with paired-end sequences (experimental)
This commit is contained in:
@ -102,6 +102,12 @@ def addSearchOptions(optionManager):
|
|||||||
default=0.0,
|
default=0.0,
|
||||||
help='Tolarated rate of wrong assignation')
|
help='Tolarated rate of wrong assignation')
|
||||||
|
|
||||||
|
optionManager.add_option('-F','--fieldcutname',
|
||||||
|
action='store',dest='fieldcutname',
|
||||||
|
default=None,
|
||||||
|
help="""Turn Unassembled match mode 'ON' when 'fieldcutname' is a tag of the sequence.
|
||||||
|
This tag gives the position of the junction of the two reads in the sequence (on (Celine + Fred) parle trop bien anglais !)""")
|
||||||
|
|
||||||
|
|
||||||
def count(data):
|
def count(data):
|
||||||
rep = {}
|
rep = {}
|
||||||
@ -121,6 +127,34 @@ def count(data):
|
|||||||
rep[k]=rep.get(k,default)+v
|
rep[k]=rep.get(k,default)+v
|
||||||
return rep
|
return rep
|
||||||
|
|
||||||
|
|
||||||
|
from obitools.location import SimpleLocation
|
||||||
|
def myLenlcs(s1, s2, fieldCutName, minid, normalized, reference):
|
||||||
|
|
||||||
|
if (fieldCutName is not None) and (s1.hasKey(fieldCutName)) :
|
||||||
|
|
||||||
|
cutPos = s1[fieldCutName]
|
||||||
|
locs1_5P = SimpleLocation(1,cutPos)
|
||||||
|
locs1_3P = SimpleLocation(cutPos+1,len(s1))
|
||||||
|
|
||||||
|
locs2_5P = SimpleLocation(1, min(cutPos, len(s2)))
|
||||||
|
locs2_3P = SimpleLocation(max(1, len(s2)-(len(s1)-cutPos)+1), len(s2))
|
||||||
|
|
||||||
|
overlap = min(0,len(s1) - len(s2))
|
||||||
|
|
||||||
|
lcs5P, lali5P = lenlcs(s1.getSubSeq(locs1_5P),s2.getSubSeq(locs2_5P),minid,False)
|
||||||
|
lcs3P, lali3P = lenlcs(s1.getSubSeq(locs1_3P),s2.getSubSeq(locs2_3P),minid,False)
|
||||||
|
|
||||||
|
raw_lcs = lcs5P + lcs3P - overlap
|
||||||
|
lali = lali5P + lali3P - overlap
|
||||||
|
lcs = raw_lcs / float(lali)
|
||||||
|
|
||||||
|
else:
|
||||||
|
lcs, lali = lenlcs(s1,s2,minid,normalized,reference)
|
||||||
|
|
||||||
|
return lcs, lali
|
||||||
|
|
||||||
|
|
||||||
def lcsIterator(entries,db,options):
|
def lcsIterator(entries,db,options):
|
||||||
|
|
||||||
for seq in entries:
|
for seq in entries:
|
||||||
@ -128,7 +162,7 @@ def lcsIterator(entries,db,options):
|
|||||||
maxid = (None,0.0)
|
maxid = (None,0.0)
|
||||||
minid = options.minimum
|
minid = options.minimum
|
||||||
for d in db:
|
for d in db:
|
||||||
lcs,lali = lenlcs(seq,d,minid,normalized=True,reference=ALILEN)
|
lcs,lali = myLenlcs(seq, d, options.fieldcutname, minid,normalized=True,reference=ALILEN)
|
||||||
if lcs > maxid[1]:
|
if lcs > maxid[1]:
|
||||||
maxid = (d,lcs)
|
maxid = (d,lcs)
|
||||||
minid = maxid[1] ** options.shape
|
minid = maxid[1] ** options.shape
|
||||||
@ -145,7 +179,7 @@ def lcsIteratorSelf(entries,db,options):
|
|||||||
maxid = ([],0.0)
|
maxid = ([],0.0)
|
||||||
minid = options.minimum
|
minid = options.minimum
|
||||||
for d in db:
|
for d in db:
|
||||||
lcs,lali = lenlcs(seq,d,minid,normalized=True,reference=ALILEN)
|
lcs,lali = myLenlcs(seq,d,options.fieldcutname,minid,normalized=True,reference=ALILEN)
|
||||||
if lcs > maxid[1]:
|
if lcs > maxid[1]:
|
||||||
maxid = ([d],lcs)
|
maxid = ([d],lcs)
|
||||||
minid = maxid[1]
|
minid = maxid[1]
|
||||||
|
Reference in New Issue
Block a user