A new set of protein cleaned for the CDS detector prepared using the

clusterizecore.sh script from the detectors/cds/lib folder.

The CDS detector is now modified to use the clean.fst files.


Former-commit-id: e30a53b5b6b658388af4b2640b30e6765c729894
Former-commit-id: 3015ad50d25248fb117ab00e816b00fde1f9ba1d
This commit is contained in:
2016-10-05 08:01:30 -03:00
parent 3a8860aaf7
commit d4da1d01fd
86 changed files with 202819 additions and 14 deletions

View File

@ -1,6 +1,8 @@
#!/usr/bin/env python
import sys
from math import lgamma
from math import log
data = open(sys.argv[1])
repeats = open(sys.argv[2])
@ -8,6 +10,10 @@ repeats = open(sys.argv[2])
chloro = {'LSC' : [], 'SSC' : [] }
chlorosize =0
def lpbinom(x,n,p):
lprob = log(p) * x + log(1-p) * (n-x) + lgamma(n+1) - lgamma(x+1) - lgamma(n-x+1)
return lprob
# We scan the blast matches:
# We build a vector with one position per base pair counting the matches
@ -102,6 +108,7 @@ for line in repeats:
o_ssc /= o
score = ((c_lsc - c_ssc) ** 2 + (o_lsc - o_ssc) ** 2) / 2.0
# pvalue=
# print >>sys.stderr,"c.lsc = %f c.ssc = %f o.lsc = %f o.ssc = %f score = %6.4f (len=%d)" % (c_lsc,c_ssc,o_lsc,o_ssc,score,len1)
if (score >= scoreMax) and ((len1 > len1Max) or (len2 > len2Max)):