This commit is contained in:
2008-02-01 09:35:10 +00:00
parent 6ee13ba008
commit 072c7e0acc
3 changed files with 0 additions and 199 deletions

View File

@ -1,73 +0,0 @@
from itertools import imap
_dna='acgt'
def wordIterator(size,_prefix=''):
'''
Iterate thought the list of all DNA word of
size `size`.
@param size: size of the DNA word
@type size: int
@param _prefix: internal parameter used for recursion purpose
@type _prefix: string
@return an iterator on DNA word (str)
@rtype: iterator
'''
if size:
for l in _dna:
for w in wordIterator(size-1,_prefix+l):
yield w
else:
yield _prefix
def wordSelector(words,accept=None,reject=None):
'''
Filter over a DNA word iterator.
@param words: an iterable object other a list of DNA words
@type words: an iterator
@param accept: a list of predicat. Eeach predicat is a function
accepting one str parametter and returning a boolean
value.
@type accept: list
@param reject: a list of predicat. Eeach predicat is a function
accepting one str parametter and returning a boolean
value.
@type reject: list
@return an iterator on DNA word (str)
@rtype: iterator
'''
if accept is None:
accept=[]
if reject is None:
reject=[]
for w in words:
accepted = reduce(lambda x,y: bool(x) and bool(y),
(p(w) for p in accept),
True)
rejected = reduce(lambda x,y:bool(x) or bool(y),
(p(w) for p in reject),
False)
if accepted and not rejected:
yield w
def wordDist(w1,w2):
'''
estimate Hamming distance between two words of the same size.
@param w1: the first word
@type w1: str
@param w2: the second word
@type w2: str
@return: the count of difference between the two words
@rtype: int
'''
dist = reduce(lambda x,y:x+y,
(int(i[0]!=i[1])
for i in imap(None,w1,w2)))
return dist

View File

@ -1,103 +0,0 @@
from logging import debug,root,DEBUG
from obitools.oligo import wordSelector,wordIterator
from obitools.oligo import predicat
def _acceptedOptionCallback(options,opt,value,parser):
if not hasattr(parser.values, 'acceptedOligo'):
parser.values.acceptedOligo=[]
parser.values.acceptedOligo.append(predicat.rePredicatGenerator(value))
def _rejectedOptionCallback(options,opt,value,parser):
debug(value)
if not hasattr(parser.values, 'rejectedOligo'):
parser.values.rejectedOligo=[]
parser.values.rejectedOligo.append(predicat.rePredicatGenerator(value))
def addOligoOptions(optionManager):
optionManager.add_option('-s','--oligo-size',
action="store", dest="oligoSize",
metavar="<###>",
type="int",
help="Size of oligonucleotide to generate")
optionManager.add_option('-f','--family-size',
action="store", dest="familySize",
metavar="<###>",
type="int",
help="Size of oligonucleotide family to generate")
optionManager.add_option('-d','--distance',
action="store", dest="oligoDist",
metavar="<###>",
type="int",
default=1,
help="minimal distance between two oligonucleotides")
optionManager.add_option('-g','--gc-max',
action="store", dest="gcMax",
metavar="<###>",
type="int",
default=0,
help="maximum count of G or C nucleotide acceptable in a word")
optionManager.add_option('-a','--accepted',
action="callback", callback=_acceptedOptionCallback,
metavar="<regular pattern>",
type="str",
help="pattern of accepted oligonucleotide")
optionManager.add_option('-r','--rejected',
action="callback", callback=_rejectedOptionCallback,
metavar="<regular pattern>",
type="str",
help="pattern of rejected oligonucleotide")
optionManager.add_option('-p','--homopolymere',
action="store", dest="homopolymere",
metavar="<###>",
type="int",
default=0,
help="reject oligo with homopolymere longer than.")
optionManager.add_option('-P','--homopolymere-min',
action="store", dest="homopolymere_min",
metavar="<###>",
type="int",
default=0,
help="accept only oligo with homopolymere longer than.")
def dnaWordIterator(options):
assert options.oligoSize is not None,"option -s or --oligo-size must be specified"
assert options.familySize is not None,"option -f or --family-size must be specified"
assert options.oligoDist is not None,"option -d or --distance must be specified"
words = wordIterator(options.oligoSize)
seed = 'a' * options.oligoSize
if not hasattr(options, "acceptedOligo"):
options.acceptedOligo=[]
if not hasattr(options, "rejectedOligo"):
options.rejectedOligo=[]
options.acceptedOligo.append(predicat.distMinGenerator(seed, options.oligoDist))
if options.homopolymere:
options.rejectedOligo.append(predicat.homoPolymerGenerator(options.homopolymere))
if options.homopolymere_min:
options.acceptedOligo.append(predicat.homoPolymerGenerator(options.homopolymere_min))
if options.gcMax:
options.rejectedOligo.append(predicat.gcUpperBondGenerator(options.gcMax))
return wordSelector(words, options.acceptedOligo, options.rejectedOligo)

View File

@ -1,23 +0,0 @@
import re
from obitools.oligo import wordDist
def rePredicatGenerator(regex):
regex = re.compile(regex,re.I)
def predicat(w):
return bool(regex.search(w))
return predicat
def gcUpperBondGenerator(count):
def predicat(w):
c = w.count('g')+w.count('c')
return c <= count
return predicat
def homoPolymerGenerator(count):
pattern = '(.)' + '\\1' * (count -1)
return rePredicatGenerator(pattern)
def distMinGenerator(word,dmin):
def predicat(w):
return w==word or wordDist(w, word) >= dmin
return predicat