This commit is contained in:
@ -1,73 +0,0 @@
|
|||||||
from itertools import imap
|
|
||||||
|
|
||||||
|
|
||||||
_dna='acgt'
|
|
||||||
|
|
||||||
def wordIterator(size,_prefix=''):
|
|
||||||
'''
|
|
||||||
Iterate thought the list of all DNA word of
|
|
||||||
size `size`.
|
|
||||||
|
|
||||||
@param size: size of the DNA word
|
|
||||||
@type size: int
|
|
||||||
@param _prefix: internal parameter used for recursion purpose
|
|
||||||
@type _prefix: string
|
|
||||||
|
|
||||||
@return an iterator on DNA word (str)
|
|
||||||
@rtype: iterator
|
|
||||||
'''
|
|
||||||
if size:
|
|
||||||
for l in _dna:
|
|
||||||
for w in wordIterator(size-1,_prefix+l):
|
|
||||||
yield w
|
|
||||||
else:
|
|
||||||
yield _prefix
|
|
||||||
|
|
||||||
def wordSelector(words,accept=None,reject=None):
|
|
||||||
'''
|
|
||||||
Filter over a DNA word iterator.
|
|
||||||
|
|
||||||
@param words: an iterable object other a list of DNA words
|
|
||||||
@type words: an iterator
|
|
||||||
@param accept: a list of predicat. Eeach predicat is a function
|
|
||||||
accepting one str parametter and returning a boolean
|
|
||||||
value.
|
|
||||||
@type accept: list
|
|
||||||
@param reject: a list of predicat. Eeach predicat is a function
|
|
||||||
accepting one str parametter and returning a boolean
|
|
||||||
value.
|
|
||||||
@type reject: list
|
|
||||||
|
|
||||||
@return an iterator on DNA word (str)
|
|
||||||
@rtype: iterator
|
|
||||||
'''
|
|
||||||
if accept is None:
|
|
||||||
accept=[]
|
|
||||||
if reject is None:
|
|
||||||
reject=[]
|
|
||||||
for w in words:
|
|
||||||
accepted = reduce(lambda x,y: bool(x) and bool(y),
|
|
||||||
(p(w) for p in accept),
|
|
||||||
True)
|
|
||||||
rejected = reduce(lambda x,y:bool(x) or bool(y),
|
|
||||||
(p(w) for p in reject),
|
|
||||||
False)
|
|
||||||
if accepted and not rejected:
|
|
||||||
yield w
|
|
||||||
|
|
||||||
def wordDist(w1,w2):
|
|
||||||
'''
|
|
||||||
estimate Hamming distance between two words of the same size.
|
|
||||||
|
|
||||||
@param w1: the first word
|
|
||||||
@type w1: str
|
|
||||||
@param w2: the second word
|
|
||||||
@type w2: str
|
|
||||||
|
|
||||||
@return: the count of difference between the two words
|
|
||||||
@rtype: int
|
|
||||||
'''
|
|
||||||
dist = reduce(lambda x,y:x+y,
|
|
||||||
(int(i[0]!=i[1])
|
|
||||||
for i in imap(None,w1,w2)))
|
|
||||||
return dist
|
|
@ -1,103 +0,0 @@
|
|||||||
from logging import debug,root,DEBUG
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
from obitools.oligo import wordSelector,wordIterator
|
|
||||||
from obitools.oligo import predicat
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
def _acceptedOptionCallback(options,opt,value,parser):
|
|
||||||
if not hasattr(parser.values, 'acceptedOligo'):
|
|
||||||
parser.values.acceptedOligo=[]
|
|
||||||
parser.values.acceptedOligo.append(predicat.rePredicatGenerator(value))
|
|
||||||
|
|
||||||
def _rejectedOptionCallback(options,opt,value,parser):
|
|
||||||
debug(value)
|
|
||||||
if not hasattr(parser.values, 'rejectedOligo'):
|
|
||||||
parser.values.rejectedOligo=[]
|
|
||||||
parser.values.rejectedOligo.append(predicat.rePredicatGenerator(value))
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
def addOligoOptions(optionManager):
|
|
||||||
|
|
||||||
optionManager.add_option('-s','--oligo-size',
|
|
||||||
action="store", dest="oligoSize",
|
|
||||||
metavar="<###>",
|
|
||||||
type="int",
|
|
||||||
help="Size of oligonucleotide to generate")
|
|
||||||
|
|
||||||
optionManager.add_option('-f','--family-size',
|
|
||||||
action="store", dest="familySize",
|
|
||||||
metavar="<###>",
|
|
||||||
type="int",
|
|
||||||
help="Size of oligonucleotide family to generate")
|
|
||||||
|
|
||||||
optionManager.add_option('-d','--distance',
|
|
||||||
action="store", dest="oligoDist",
|
|
||||||
metavar="<###>",
|
|
||||||
type="int",
|
|
||||||
default=1,
|
|
||||||
help="minimal distance between two oligonucleotides")
|
|
||||||
|
|
||||||
optionManager.add_option('-g','--gc-max',
|
|
||||||
action="store", dest="gcMax",
|
|
||||||
metavar="<###>",
|
|
||||||
type="int",
|
|
||||||
default=0,
|
|
||||||
help="maximum count of G or C nucleotide acceptable in a word")
|
|
||||||
|
|
||||||
optionManager.add_option('-a','--accepted',
|
|
||||||
action="callback", callback=_acceptedOptionCallback,
|
|
||||||
metavar="<regular pattern>",
|
|
||||||
type="str",
|
|
||||||
help="pattern of accepted oligonucleotide")
|
|
||||||
|
|
||||||
optionManager.add_option('-r','--rejected',
|
|
||||||
action="callback", callback=_rejectedOptionCallback,
|
|
||||||
metavar="<regular pattern>",
|
|
||||||
type="str",
|
|
||||||
help="pattern of rejected oligonucleotide")
|
|
||||||
|
|
||||||
optionManager.add_option('-p','--homopolymere',
|
|
||||||
action="store", dest="homopolymere",
|
|
||||||
metavar="<###>",
|
|
||||||
type="int",
|
|
||||||
default=0,
|
|
||||||
help="reject oligo with homopolymere longer than.")
|
|
||||||
|
|
||||||
optionManager.add_option('-P','--homopolymere-min',
|
|
||||||
action="store", dest="homopolymere_min",
|
|
||||||
metavar="<###>",
|
|
||||||
type="int",
|
|
||||||
default=0,
|
|
||||||
help="accept only oligo with homopolymere longer than.")
|
|
||||||
|
|
||||||
def dnaWordIterator(options):
|
|
||||||
|
|
||||||
assert options.oligoSize is not None,"option -s or --oligo-size must be specified"
|
|
||||||
assert options.familySize is not None,"option -f or --family-size must be specified"
|
|
||||||
assert options.oligoDist is not None,"option -d or --distance must be specified"
|
|
||||||
|
|
||||||
words = wordIterator(options.oligoSize)
|
|
||||||
seed = 'a' * options.oligoSize
|
|
||||||
|
|
||||||
if not hasattr(options, "acceptedOligo"):
|
|
||||||
options.acceptedOligo=[]
|
|
||||||
|
|
||||||
if not hasattr(options, "rejectedOligo"):
|
|
||||||
options.rejectedOligo=[]
|
|
||||||
|
|
||||||
options.acceptedOligo.append(predicat.distMinGenerator(seed, options.oligoDist))
|
|
||||||
|
|
||||||
if options.homopolymere:
|
|
||||||
options.rejectedOligo.append(predicat.homoPolymerGenerator(options.homopolymere))
|
|
||||||
|
|
||||||
if options.homopolymere_min:
|
|
||||||
options.acceptedOligo.append(predicat.homoPolymerGenerator(options.homopolymere_min))
|
|
||||||
|
|
||||||
if options.gcMax:
|
|
||||||
options.rejectedOligo.append(predicat.gcUpperBondGenerator(options.gcMax))
|
|
||||||
|
|
||||||
return wordSelector(words, options.acceptedOligo, options.rejectedOligo)
|
|
@ -1,23 +0,0 @@
|
|||||||
import re
|
|
||||||
from obitools.oligo import wordDist
|
|
||||||
|
|
||||||
def rePredicatGenerator(regex):
|
|
||||||
regex = re.compile(regex,re.I)
|
|
||||||
def predicat(w):
|
|
||||||
return bool(regex.search(w))
|
|
||||||
return predicat
|
|
||||||
|
|
||||||
def gcUpperBondGenerator(count):
|
|
||||||
def predicat(w):
|
|
||||||
c = w.count('g')+w.count('c')
|
|
||||||
return c <= count
|
|
||||||
return predicat
|
|
||||||
|
|
||||||
def homoPolymerGenerator(count):
|
|
||||||
pattern = '(.)' + '\\1' * (count -1)
|
|
||||||
return rePredicatGenerator(pattern)
|
|
||||||
|
|
||||||
def distMinGenerator(word,dmin):
|
|
||||||
def predicat(w):
|
|
||||||
return w==word or wordDist(w, word) >= dmin
|
|
||||||
return predicat
|
|
Reference in New Issue
Block a user