test a new obitools script

This commit is contained in:
2012-06-11 17:51:11 +00:00
parent c20b9d33a9
commit ea18b7f7ed

View File

@ -95,25 +95,27 @@ def indexSequence(seq,w2s,s2w,options):
s2w[seq]=ws s2w[seq]=ws
def addSeq2Pattern(seq,words,options): def addSeq2Pattern(seqs,words,options):
idx=Index() lw=len(words)
for w in words: for seq in seqs:
idx.enter(w) s = str(seq)
s = str(seq) cs= str(seq.complement())
cs= str(seq.complement()) ls = len(s) - options.length + 1
ls = len(s) - options.length + 1 for wp in xrange(0,ls,options.step):
for wp in xrange(0,ls,options.step): w=s[wp:wp+options.length]
w=s[wp:wp+options.length]
if w not in words:
words.add(w) words.add(w)
idx.enter(w) w=cs[wp:wp+options.length]
w=cs[wp:wp+options.length]
if w not in words:
words.add(w) words.add(w)
idx.enter(w)
idx.fix()
return idx,words
if len(words)>lw:
print >>sys.stderr,"\nAdd %d new words to automata" % (len(words)-lw)
idx=Index()
for w in words:
idx.enter(w)
idx.fix()
return idx,words
else:
return None
def lookforseq(seq,w2s,s2w,options): def lookforseq(seq,w2s,s2w,options):
s = str(seq) s = str(seq)
@ -174,9 +176,7 @@ if __name__ == '__main__':
word2seq = {} word2seq = {}
seq2word = {} seq2word = {}
for rs in reference: words,wordlist=addSeq2Pattern((rs for rs in reference), wordlist, options)
words,wordlist=addSeq2Pattern(rs, wordlist, options)
writer = sequenceWriterGenerator(options) writer = sequenceWriterGenerator(options)
nbseq=0 nbseq=0
@ -191,8 +191,11 @@ if __name__ == '__main__':
if r: if r:
# writer(seq) # writer(seq)
nidx=addSeq2Pattern(seq, wordlist, options)
if nidx is not None:
words,wordlist=nidx
for s in seq: for s in seq:
words,wordlist=addSeq2Pattern(s, wordlist, options)
st = lookforseq(s, word2seq, seq2word, options) st = lookforseq(s, word2seq, seq2word, options)
nbseq+=len(st) nbseq+=len(st)
ldict=len(wordlist) ldict=len(wordlist)