test a new obitools script
This commit is contained in:
@ -95,25 +95,27 @@ def indexSequence(seq,w2s,s2w,options):
|
|||||||
|
|
||||||
s2w[seq]=ws
|
s2w[seq]=ws
|
||||||
|
|
||||||
def addSeq2Pattern(seq,words,options):
|
def addSeq2Pattern(seqs,words,options):
|
||||||
idx=Index()
|
lw=len(words)
|
||||||
for w in words:
|
for seq in seqs:
|
||||||
idx.enter(w)
|
s = str(seq)
|
||||||
s = str(seq)
|
cs= str(seq.complement())
|
||||||
cs= str(seq.complement())
|
ls = len(s) - options.length + 1
|
||||||
ls = len(s) - options.length + 1
|
for wp in xrange(0,ls,options.step):
|
||||||
for wp in xrange(0,ls,options.step):
|
w=s[wp:wp+options.length]
|
||||||
w=s[wp:wp+options.length]
|
|
||||||
if w not in words:
|
|
||||||
words.add(w)
|
words.add(w)
|
||||||
idx.enter(w)
|
w=cs[wp:wp+options.length]
|
||||||
w=cs[wp:wp+options.length]
|
|
||||||
if w not in words:
|
|
||||||
words.add(w)
|
words.add(w)
|
||||||
idx.enter(w)
|
|
||||||
idx.fix()
|
|
||||||
return idx,words
|
|
||||||
|
|
||||||
|
if len(words)>lw:
|
||||||
|
print >>sys.stderr,"\nAdd %d new words to automata" % (len(words)-lw)
|
||||||
|
idx=Index()
|
||||||
|
for w in words:
|
||||||
|
idx.enter(w)
|
||||||
|
idx.fix()
|
||||||
|
return idx,words
|
||||||
|
else:
|
||||||
|
return None
|
||||||
|
|
||||||
def lookforseq(seq,w2s,s2w,options):
|
def lookforseq(seq,w2s,s2w,options):
|
||||||
s = str(seq)
|
s = str(seq)
|
||||||
@ -174,9 +176,7 @@ if __name__ == '__main__':
|
|||||||
word2seq = {}
|
word2seq = {}
|
||||||
seq2word = {}
|
seq2word = {}
|
||||||
|
|
||||||
for rs in reference:
|
words,wordlist=addSeq2Pattern((rs for rs in reference), wordlist, options)
|
||||||
words,wordlist=addSeq2Pattern(rs, wordlist, options)
|
|
||||||
|
|
||||||
|
|
||||||
writer = sequenceWriterGenerator(options)
|
writer = sequenceWriterGenerator(options)
|
||||||
nbseq=0
|
nbseq=0
|
||||||
@ -191,8 +191,11 @@ if __name__ == '__main__':
|
|||||||
if r:
|
if r:
|
||||||
# writer(seq)
|
# writer(seq)
|
||||||
|
|
||||||
|
nidx=addSeq2Pattern(seq, wordlist, options)
|
||||||
|
if nidx is not None:
|
||||||
|
words,wordlist=nidx
|
||||||
|
|
||||||
for s in seq:
|
for s in seq:
|
||||||
words,wordlist=addSeq2Pattern(s, wordlist, options)
|
|
||||||
st = lookforseq(s, word2seq, seq2word, options)
|
st = lookforseq(s, word2seq, seq2word, options)
|
||||||
nbseq+=len(st)
|
nbseq+=len(st)
|
||||||
ldict=len(wordlist)
|
ldict=len(wordlist)
|
||||||
|
Reference in New Issue
Block a user