test a new obitools script

This commit is contained in:
2012-06-11 17:51:11 +00:00
parent c20b9d33a9
commit ea18b7f7ed

View File

@ -95,25 +95,27 @@ def indexSequence(seq,w2s,s2w,options):
s2w[seq]=ws
def addSeq2Pattern(seq,words,options):
idx=Index()
for w in words:
idx.enter(w)
s = str(seq)
cs= str(seq.complement())
ls = len(s) - options.length + 1
for wp in xrange(0,ls,options.step):
w=s[wp:wp+options.length]
if w not in words:
def addSeq2Pattern(seqs,words,options):
lw=len(words)
for seq in seqs:
s = str(seq)
cs= str(seq.complement())
ls = len(s) - options.length + 1
for wp in xrange(0,ls,options.step):
w=s[wp:wp+options.length]
words.add(w)
idx.enter(w)
w=cs[wp:wp+options.length]
if w not in words:
w=cs[wp:wp+options.length]
words.add(w)
if len(words)>lw:
print >>sys.stderr,"\nAdd %d new words to automata" % (len(words)-lw)
idx=Index()
for w in words:
idx.enter(w)
idx.fix()
return idx,words
idx.fix()
return idx,words
else:
return None
def lookforseq(seq,w2s,s2w,options):
s = str(seq)
@ -174,10 +176,8 @@ if __name__ == '__main__':
word2seq = {}
seq2word = {}
for rs in reference:
words,wordlist=addSeq2Pattern(rs, wordlist, options)
words,wordlist=addSeq2Pattern((rs for rs in reference), wordlist, options)
writer = sequenceWriterGenerator(options)
nbseq=0
for seq in sequences:
@ -190,9 +190,12 @@ if __name__ == '__main__':
if r:
# writer(seq)
nidx=addSeq2Pattern(seq, wordlist, options)
if nidx is not None:
words,wordlist=nidx
for s in seq:
words,wordlist=addSeq2Pattern(s, wordlist, options)
st = lookforseq(s, word2seq, seq2word, options)
nbseq+=len(st)
ldict=len(wordlist)