This commit is contained in:
@ -1,73 +0,0 @@
|
||||
from itertools import imap
|
||||
|
||||
|
||||
_dna='acgt'
|
||||
|
||||
def wordIterator(size,_prefix=''):
|
||||
'''
|
||||
Iterate thought the list of all DNA word of
|
||||
size `size`.
|
||||
|
||||
@param size: size of the DNA word
|
||||
@type size: int
|
||||
@param _prefix: internal parameter used for recursion purpose
|
||||
@type _prefix: string
|
||||
|
||||
@return an iterator on DNA word (str)
|
||||
@rtype: iterator
|
||||
'''
|
||||
if size:
|
||||
for l in _dna:
|
||||
for w in wordIterator(size-1,_prefix+l):
|
||||
yield w
|
||||
else:
|
||||
yield _prefix
|
||||
|
||||
def wordSelector(words,accept=None,reject=None):
|
||||
'''
|
||||
Filter over a DNA word iterator.
|
||||
|
||||
@param words: an iterable object other a list of DNA words
|
||||
@type words: an iterator
|
||||
@param accept: a list of predicat. Eeach predicat is a function
|
||||
accepting one str parametter and returning a boolean
|
||||
value.
|
||||
@type accept: list
|
||||
@param reject: a list of predicat. Eeach predicat is a function
|
||||
accepting one str parametter and returning a boolean
|
||||
value.
|
||||
@type reject: list
|
||||
|
||||
@return an iterator on DNA word (str)
|
||||
@rtype: iterator
|
||||
'''
|
||||
if accept is None:
|
||||
accept=[]
|
||||
if reject is None:
|
||||
reject=[]
|
||||
for w in words:
|
||||
accepted = reduce(lambda x,y: bool(x) and bool(y),
|
||||
(p(w) for p in accept),
|
||||
True)
|
||||
rejected = reduce(lambda x,y:bool(x) or bool(y),
|
||||
(p(w) for p in reject),
|
||||
False)
|
||||
if accepted and not rejected:
|
||||
yield w
|
||||
|
||||
def wordDist(w1,w2):
|
||||
'''
|
||||
estimate Hamming distance between two words of the same size.
|
||||
|
||||
@param w1: the first word
|
||||
@type w1: str
|
||||
@param w2: the second word
|
||||
@type w2: str
|
||||
|
||||
@return: the count of difference between the two words
|
||||
@rtype: int
|
||||
'''
|
||||
dist = reduce(lambda x,y:x+y,
|
||||
(int(i[0]!=i[1])
|
||||
for i in imap(None,w1,w2)))
|
||||
return dist
|
@ -1,103 +0,0 @@
|
||||
from logging import debug,root,DEBUG
|
||||
|
||||
|
||||
|
||||
from obitools.oligo import wordSelector,wordIterator
|
||||
from obitools.oligo import predicat
|
||||
|
||||
|
||||
|
||||
def _acceptedOptionCallback(options,opt,value,parser):
|
||||
if not hasattr(parser.values, 'acceptedOligo'):
|
||||
parser.values.acceptedOligo=[]
|
||||
parser.values.acceptedOligo.append(predicat.rePredicatGenerator(value))
|
||||
|
||||
def _rejectedOptionCallback(options,opt,value,parser):
|
||||
debug(value)
|
||||
if not hasattr(parser.values, 'rejectedOligo'):
|
||||
parser.values.rejectedOligo=[]
|
||||
parser.values.rejectedOligo.append(predicat.rePredicatGenerator(value))
|
||||
|
||||
|
||||
|
||||
def addOligoOptions(optionManager):
|
||||
|
||||
optionManager.add_option('-s','--oligo-size',
|
||||
action="store", dest="oligoSize",
|
||||
metavar="<###>",
|
||||
type="int",
|
||||
help="Size of oligonucleotide to generate")
|
||||
|
||||
optionManager.add_option('-f','--family-size',
|
||||
action="store", dest="familySize",
|
||||
metavar="<###>",
|
||||
type="int",
|
||||
help="Size of oligonucleotide family to generate")
|
||||
|
||||
optionManager.add_option('-d','--distance',
|
||||
action="store", dest="oligoDist",
|
||||
metavar="<###>",
|
||||
type="int",
|
||||
default=1,
|
||||
help="minimal distance between two oligonucleotides")
|
||||
|
||||
optionManager.add_option('-g','--gc-max',
|
||||
action="store", dest="gcMax",
|
||||
metavar="<###>",
|
||||
type="int",
|
||||
default=0,
|
||||
help="maximum count of G or C nucleotide acceptable in a word")
|
||||
|
||||
optionManager.add_option('-a','--accepted',
|
||||
action="callback", callback=_acceptedOptionCallback,
|
||||
metavar="<regular pattern>",
|
||||
type="str",
|
||||
help="pattern of accepted oligonucleotide")
|
||||
|
||||
optionManager.add_option('-r','--rejected',
|
||||
action="callback", callback=_rejectedOptionCallback,
|
||||
metavar="<regular pattern>",
|
||||
type="str",
|
||||
help="pattern of rejected oligonucleotide")
|
||||
|
||||
optionManager.add_option('-p','--homopolymere',
|
||||
action="store", dest="homopolymere",
|
||||
metavar="<###>",
|
||||
type="int",
|
||||
default=0,
|
||||
help="reject oligo with homopolymere longer than.")
|
||||
|
||||
optionManager.add_option('-P','--homopolymere-min',
|
||||
action="store", dest="homopolymere_min",
|
||||
metavar="<###>",
|
||||
type="int",
|
||||
default=0,
|
||||
help="accept only oligo with homopolymere longer than.")
|
||||
|
||||
def dnaWordIterator(options):
|
||||
|
||||
assert options.oligoSize is not None,"option -s or --oligo-size must be specified"
|
||||
assert options.familySize is not None,"option -f or --family-size must be specified"
|
||||
assert options.oligoDist is not None,"option -d or --distance must be specified"
|
||||
|
||||
words = wordIterator(options.oligoSize)
|
||||
seed = 'a' * options.oligoSize
|
||||
|
||||
if not hasattr(options, "acceptedOligo"):
|
||||
options.acceptedOligo=[]
|
||||
|
||||
if not hasattr(options, "rejectedOligo"):
|
||||
options.rejectedOligo=[]
|
||||
|
||||
options.acceptedOligo.append(predicat.distMinGenerator(seed, options.oligoDist))
|
||||
|
||||
if options.homopolymere:
|
||||
options.rejectedOligo.append(predicat.homoPolymerGenerator(options.homopolymere))
|
||||
|
||||
if options.homopolymere_min:
|
||||
options.acceptedOligo.append(predicat.homoPolymerGenerator(options.homopolymere_min))
|
||||
|
||||
if options.gcMax:
|
||||
options.rejectedOligo.append(predicat.gcUpperBondGenerator(options.gcMax))
|
||||
|
||||
return wordSelector(words, options.acceptedOligo, options.rejectedOligo)
|
@ -1,23 +0,0 @@
|
||||
import re
|
||||
from obitools.oligo import wordDist
|
||||
|
||||
def rePredicatGenerator(regex):
|
||||
regex = re.compile(regex,re.I)
|
||||
def predicat(w):
|
||||
return bool(regex.search(w))
|
||||
return predicat
|
||||
|
||||
def gcUpperBondGenerator(count):
|
||||
def predicat(w):
|
||||
c = w.count('g')+w.count('c')
|
||||
return c <= count
|
||||
return predicat
|
||||
|
||||
def homoPolymerGenerator(count):
|
||||
pattern = '(.)' + '\\1' * (count -1)
|
||||
return rePredicatGenerator(pattern)
|
||||
|
||||
def distMinGenerator(word,dmin):
|
||||
def predicat(w):
|
||||
return w==word or wordDist(w, word) >= dmin
|
||||
return predicat
|
Reference in New Issue
Block a user