diff --git a/obitools/SVGdraw.py b/obitools/SVGdraw.py deleted file mode 100644 index 521f750..0000000 --- a/obitools/SVGdraw.py +++ /dev/null @@ -1,1054 +0,0 @@ -#!/usr/bin/env python -##Copyright (c) 2002, Fedor Baart & Hans de Wit (Stichting Farmaceutische Kengetallen) -##All rights reserved. -## -##Redistribution and use in source and binary forms, with or without modification, -##are permitted provided that the following conditions are met: -## -##Redistributions of source code must retain the above copyright notice, this -##list of conditions and the following disclaimer. -## -##Redistributions in binary form must reproduce the above copyright notice, -##this list of conditions and the following disclaimer in the documentation and/or -##other materials provided with the distribution. -## -##Neither the name of the Stichting Farmaceutische Kengetallen nor the names of -##its contributors may be used to endorse or promote products derived from this -##software without specific prior written permission. -## -##THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" -##AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -##IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE -##DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE -##FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL -##DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR -##SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER -##CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, -##OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -##OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - -##Thanks to Gerald Rosennfellner for his help and useful comments. - -__doc__="""Use SVGdraw to generate your SVGdrawings. - -SVGdraw uses an object model drawing and a method toXML to create SVG graphics -by using easy to use classes and methods usualy you start by creating a drawing eg - - d=drawing() - #then you create a SVG root element - s=svg() - #then you add some elements eg a circle and add it to the svg root element - c=circle() - #you can supply attributes by using named arguments. - c=circle(fill='red',stroke='blue') - #or by updating the attributes attribute: - c.attributes['stroke-width']=1 - s.addElement(c) - #then you add the svg root element to the drawing - d.setSVG(s) - #and finaly you xmlify the drawing - d.toXml() - - -this results in the svg source of the drawing, which consists of a circle -on a white background. Its as easy as that;) -This module was created using the SVG specification of www.w3c.org and the -O'Reilly (www.oreilly.com) python books as information sources. A svg viewer -is available from www.adobe.com""" - -__version__="1.0" - -# there are two possibilities to generate svg: -# via a dom implementation and directly using text strings -# the latter is way faster (and shorter in coding) -# the former is only used in debugging svg programs -# maybe it will be removed alltogether after a while -# with the following variable you indicate whether to use the dom implementation -# Note that PyXML is required for using the dom implementation. -# It is also possible to use the standard minidom. But I didn't try that one. -# Anyway the text based approach is about 60 times faster than using the full dom implementation. -use_dom_implementation=0 - - -import exceptions -if use_dom_implementation<>0: - try: - from xml.dom import implementation - from xml.dom.ext import PrettyPrint - except: - raise exceptions.ImportError, "PyXML is required for using the dom implementation" -#The implementation is used for the creating the XML document. -#The prettyprint module is used for converting the xml document object to a xml file - -import sys -assert sys.version_info[0]>=2 -if sys.version_info[1]<2: - True=1 - False=0 - file=open - -sys.setrecursionlimit=50 -#The recursion limit is set conservative so mistakes like s=svg() s.addElement(s) -#won't eat up too much processor time. - -#the following code is pasted form xml.sax.saxutils -#it makes it possible to run the code without the xml sax package installed -#To make it possible to have in your text elements, it is necessary to escape the texts -def _escape(data, entities={}): - """Escape &, <, and > in a string of data. - - You can escape other strings of data by passing a dictionary as - the optional entities parameter. The keys and values must all be - strings; each key will be replaced with its corresponding value. - """ - data = data.replace("&", "&") - data = data.replace("<", "<") - data = data.replace(">", ">") - for chars, entity in entities.items(): - data = data.replace(chars, entity) - return data - -def _quoteattr(data, entities={}): - """Escape and quote an attribute value. - - Escape &, <, and > in a string of data, then quote it for use as - an attribute value. The \" character will be escaped as well, if - necessary. - - You can escape other strings of data by passing a dictionary as - the optional entities parameter. The keys and values must all be - strings; each key will be replaced with its corresponding value. - """ - data = _escape(data, entities) - if '"' in data: - if "'" in data: - data = '"%s"' % data.replace('"', """) - else: - data = "'%s'" % data - else: - data = '"%s"' % data - return data - - - -def _xypointlist(a): - """formats a list of xy pairs""" - s='' - for e in a: #this could be done more elegant - s+=str(e)[1:-1] +' ' - return s - -def _viewboxlist(a): - """formats a tuple""" - s='' - for e in a: - s+=str(e)+' ' - return s - -def _pointlist(a): - """formats a list of numbers""" - return str(a)[1:-1] - -class pathdata: - """class used to create a pathdata object which can be used for a path. - although most methods are pretty straightforward it might be useful to look at the SVG specification.""" - #I didn't test the methods below. - def __init__(self,x=None,y=None): - self.path=[] - if x is not None and y is not None: - self.path.append('M '+str(x)+' '+str(y)) - def closepath(self): - """ends the path""" - self.path.append('z') - def move(self,x,y): - """move to absolute""" - self.path.append('M '+str(x)+' '+str(y)) - def relmove(self,x,y): - """move to relative""" - self.path.append('m '+str(x)+' '+str(y)) - def line(self,x,y): - """line to absolute""" - self.path.append('L '+str(x)+' '+str(y)) - def relline(self,x,y): - """line to relative""" - self.path.append('l '+str(x)+' '+str(y)) - def hline(self,x): - """horizontal line to absolute""" - self.path.append('H'+str(x)) - def relhline(self,x): - """horizontal line to relative""" - self.path.append('h'+str(x)) - def vline(self,y): - """verical line to absolute""" - self.path.append('V'+str(y)) - def relvline(self,y): - """vertical line to relative""" - self.path.append('v'+str(y)) - def bezier(self,x1,y1,x2,y2,x,y): - """bezier with xy1 and xy2 to xy absolut""" - self.path.append('C'+str(x1)+','+str(y1)+' '+str(x2)+','+str(y2)+' '+str(x)+','+str(y)) - def relbezier(self,x1,y1,x2,y2,x,y): - """bezier with xy1 and xy2 to xy relative""" - self.path.append('c'+str(x1)+','+str(y1)+' '+str(x2)+','+str(y2)+' '+str(x)+','+str(y)) - def smbezier(self,x2,y2,x,y): - """smooth bezier with xy2 to xy absolut""" - self.path.append('S'+str(x2)+','+str(y2)+' '+str(x)+','+str(y)) - def relsmbezier(self,x2,y2,x,y): - """smooth bezier with xy2 to xy relative""" - self.path.append('s'+str(x2)+','+str(y2)+' '+str(x)+','+str(y)) - def qbezier(self,x1,y1,x,y): - """quadratic bezier with xy1 to xy absolut""" - self.path.append('Q'+str(x1)+','+str(y1)+' '+str(x)+','+str(y)) - def relqbezier(self,x1,y1,x,y): - """quadratic bezier with xy1 to xy relative""" - self.path.append('q'+str(x1)+','+str(y1)+' '+str(x)+','+str(y)) - def smqbezier(self,x,y): - """smooth quadratic bezier to xy absolut""" - self.path.append('T'+str(x)+','+str(y)) - def relsmqbezier(self,x,y): - """smooth quadratic bezier to xy relative""" - self.path.append('t'+str(x)+','+str(y)) - def ellarc(self,rx,ry,xrot,laf,sf,x,y): - """elliptival arc with rx and ry rotating with xrot using large-arc-flag and sweep-flag to xy absolut""" - self.path.append('A'+str(rx)+','+str(ry)+' '+str(xrot)+' '+str(laf)+' '+str(sf)+' '+str(x)+' '+str(y)) - def relellarc(self,rx,ry,xrot,laf,sf,x,y): - """elliptival arc with rx and ry rotating with xrot using large-arc-flag and sweep-flag to xy relative""" - self.path.append('a'+str(rx)+','+str(ry)+' '+str(xrot)+' '+str(laf)+' '+str(sf)+' '+str(x)+' '+str(y)) - def __repr__(self): - return ' '.join(self.path) - - - - -class SVGelement: - """SVGelement(type,attributes,elements,text,namespace,**args) - Creates a arbitrary svg element and is intended to be subclassed not used on its own. - This element is the base of every svg element it defines a class which resembles - a xml-element. The main advantage of this kind of implementation is that you don't - have to create a toXML method for every different graph object. Every element - consists of a type, attribute, optional subelements, optional text and an optional - namespace. Note the elements==None, if elements = None:self.elements=[] construction. - This is done because if you default to elements=[] every object has a reference - to the same empty list.""" - def __init__(self,type='',attributes=None,elements=None,text='',namespace='',cdata=None,**args): - self.type=type - if attributes==None: - self.attributes={} - else: - self.attributes=attributes - if elements==None: - self.elements=[] - else: - self.elements=elements - self.text=text - self.namespace=namespace - self.cdata=cdata - for arg in args.keys(): - self.attributes[arg]=args[arg] - def addElement(self,SVGelement): - """adds an element to a SVGelement - - SVGelement.addElement(SVGelement) - """ - self.elements.append(SVGelement) - - #def toXml(self,level,f, preserveWhitespace=False): - def toXml(self,level,f, **kwargs): - preserve = kwargs.get("preserveWhitespace", False) - if preserve: - #print "PRESERVING" - NEWLINE = "" - TAB = "" - else: - #print "NOT PRESE" - NEWLINE = "\n" - TAB = "\t" - f.write(TAB*level) - f.write('<'+self.type) - for attkey in self.attributes.keys(): - f.write(' '+_escape(str(attkey))+'='+_quoteattr(str(self.attributes[attkey]))) - if self.namespace: - f.write(' xmlns="'+ _escape(str(self.namespace))+'" ') - if self.elements or self.text or self.cdata: - f.write('>') - if self.elements: - f.write(NEWLINE) - for element in self.elements: - element.toXml(level+1,f, preserveWhitespace=preserve) - if self.cdata: - f.write(NEWLINE+TAB*(level+1)+''+NEWLINE) - if self.text: - if type(self.text)==type(''): #If the text is only text - f.write(_escape(str(self.text))) - else: #If the text is a spannedtext class - f.write(str(self.text)) - if self.elements: - f.write(TAB*level+''+NEWLINE) - elif self.text: - f.write(''+NEWLINE) - elif self.cdata: - f.write(TAB*level+''+NEWLINE) - else: - f.write('/>'+NEWLINE) - -class tspan(SVGelement): - """ts=tspan(text='',**args) - - a tspan element can be used for applying formatting to a textsection - usage: - ts=tspan('this text is bold') - ts.attributes['font-weight']='bold' - st=spannedtext() - st.addtspan(ts) - t=text(3,5,st) - """ - def __init__(self,text=None,**args): - SVGelement.__init__(self,'tspan',**args) - if self.text<>None: - self.text=text - def __repr__(self): - s="None: - raise ValueError, 'height is required' - if height<>None: - raise ValueError, 'width is required' - else: - raise ValueError, 'both height and width are required' - SVGelement.__init__(self,'rect',{'width':width,'height':height},**args) - if x<>None: - self.attributes['x']=x - if y<>None: - self.attributes['y']=y - if fill<>None: - self.attributes['fill']=fill - if stroke<>None: - self.attributes['stroke']=stroke - if stroke_width<>None: - self.attributes['stroke-width']=stroke_width - -class ellipse(SVGelement): - """e=ellipse(rx,ry,x,y,fill,stroke,stroke_width,**args) - - an ellipse is defined as a center and a x and y radius. - """ - def __init__(self,cx=None,cy=None,rx=None,ry=None,fill=None,stroke=None,stroke_width=None,**args): - if rx==None or ry== None: - if rx<>None: - raise ValueError, 'rx is required' - if ry<>None: - raise ValueError, 'ry is required' - else: - raise ValueError, 'both rx and ry are required' - SVGelement.__init__(self,'ellipse',{'rx':rx,'ry':ry},**args) - if cx<>None: - self.attributes['cx']=cx - if cy<>None: - self.attributes['cy']=cy - if fill<>None: - self.attributes['fill']=fill - if stroke<>None: - self.attributes['stroke']=stroke - if stroke_width<>None: - self.attributes['stroke-width']=stroke_width - - -class circle(SVGelement): - """c=circle(x,y,radius,fill,stroke,stroke_width,**args) - - The circle creates an element using a x, y and radius values eg - """ - def __init__(self,cx=None,cy=None,r=None,fill=None,stroke=None,stroke_width=None,**args): - if r==None: - raise ValueError, 'r is required' - SVGelement.__init__(self,'circle',{'r':r},**args) - if cx<>None: - self.attributes['cx']=cx - if cy<>None: - self.attributes['cy']=cy - if fill<>None: - self.attributes['fill']=fill - if stroke<>None: - self.attributes['stroke']=stroke - if stroke_width<>None: - self.attributes['stroke-width']=stroke_width - -class point(circle): - """p=point(x,y,color) - - A point is defined as a circle with a size 1 radius. It may be more efficient to use a - very small rectangle if you use many points because a circle is difficult to render. - """ - def __init__(self,x,y,fill='black',**args): - circle.__init__(self,x,y,1,fill,**args) - -class line(SVGelement): - """l=line(x1,y1,x2,y2,stroke,stroke_width,**args) - - A line is defined by a begin x,y pair and an end x,y pair - """ - def __init__(self,x1=None,y1=None,x2=None,y2=None,stroke=None,stroke_width=None,**args): - SVGelement.__init__(self,'line',**args) - if x1<>None: - self.attributes['x1']=x1 - if y1<>None: - self.attributes['y1']=y1 - if x2<>None: - self.attributes['x2']=x2 - if y2<>None: - self.attributes['y2']=y2 - if stroke_width<>None: - self.attributes['stroke-width']=stroke_width - if stroke<>None: - self.attributes['stroke']=stroke - -class polyline(SVGelement): - """pl=polyline([[x1,y1],[x2,y2],...],fill,stroke,stroke_width,**args) - - a polyline is defined by a list of xy pairs - """ - def __init__(self,points,fill=None,stroke=None,stroke_width=None,**args): - SVGelement.__init__(self,'polyline',{'points':_xypointlist(points)},**args) - if fill<>None: - self.attributes['fill']=fill - if stroke_width<>None: - self.attributes['stroke-width']=stroke_width - if stroke<>None: - self.attributes['stroke']=stroke - -class polygon(SVGelement): - """pl=polyline([[x1,y1],[x2,y2],...],fill,stroke,stroke_width,**args) - - a polygon is defined by a list of xy pairs - """ - def __init__(self,points,fill=None,stroke=None,stroke_width=None,**args): - SVGelement.__init__(self,'polygon',{'points':_xypointlist(points)},**args) - if fill<>None: - self.attributes['fill']=fill - if stroke_width<>None: - self.attributes['stroke-width']=stroke_width - if stroke<>None: - self.attributes['stroke']=stroke - -class path(SVGelement): - """p=path(path,fill,stroke,stroke_width,**args) - - a path is defined by a path object and optional width, stroke and fillcolor - """ - def __init__(self,pathdata,fill=None,stroke=None,stroke_width=None,id=None,**args): - SVGelement.__init__(self,'path',{'d':str(pathdata)},**args) - if stroke<>None: - self.attributes['stroke']=stroke - if fill<>None: - self.attributes['fill']=fill - if stroke_width<>None: - self.attributes['stroke-width']=stroke_width - if id<>None: - self.attributes['id']=id - - -class text(SVGelement): - """t=text(x,y,text,font_size,font_family,**args) - - a text element can bge used for displaying text on the screen - """ - def __init__(self,x=None,y=None,text=None,font_size=None,font_family=None,text_anchor=None,**args): - SVGelement.__init__(self,'text',**args) - if x<>None: - self.attributes['x']=x - if y<>None: - self.attributes['y']=y - if font_size<>None: - self.attributes['font-size']=font_size - if font_family<>None: - self.attributes['font-family']=font_family - if text<>None: - self.text=text - if text_anchor<>None: - self.attributes['text-anchor']=text_anchor - - def toXml(self,level,f, **kwargs): - preserve = self.attributes.get("xml:space", None) - if preserve == "preserve": - #print "FOO PRE" - SVGelement.toXml(self,level, f, preserveWhitespace=True) - else: - #print "FOO NOT" - SVGelement.toXml(self, level, f, preserveWhitespace=False) - -class textpath(SVGelement): - """tp=textpath(text,link,**args) - - a textpath places a text on a path which is referenced by a link. - """ - def __init__(self,link,text=None,**args): - SVGelement.__init__(self,'textPath',{'xlink:href':link},**args) - if text<>None: - self.text=text - -class pattern(SVGelement): - """p=pattern(x,y,width,height,patternUnits,**args) - - A pattern is used to fill or stroke an object using a pre-defined - graphic object which can be replicated ("tiled") at fixed intervals - in x and y to cover the areas to be painted. - """ - def __init__(self,x=None,y=None,width=None,height=None,patternUnits=None,**args): - SVGelement.__init__(self,'pattern',**args) - if x<>None: - self.attributes['x']=x - if y<>None: - self.attributes['y']=y - if width<>None: - self.attributes['width']=width - if height<>None: - self.attributes['height']=height - if patternUnits<>None: - self.attributes['patternUnits']=patternUnits - -class title(SVGelement): - """t=title(text,**args) - - a title is a text element. The text is displayed in the title bar - add at least one to the root svg element - """ - def __init__(self,text=None,**args): - SVGelement.__init__(self,'title',**args) - if text<>None: - self.text=text - -class description(SVGelement): - """d=description(text,**args) - - a description can be added to any element and is used for a tooltip - Add this element before adding other elements. - """ - def __init__(self,text=None,**args): - SVGelement.__init__(self,'desc',**args) - if text<>None: - self.text=text - -class lineargradient(SVGelement): - """lg=lineargradient(x1,y1,x2,y2,id,**args) - - defines a lineargradient using two xy pairs. - stop elements van be added to define the gradient colors. - """ - def __init__(self,x1=None,y1=None,x2=None,y2=None,id=None,**args): - SVGelement.__init__(self,'linearGradient',**args) - if x1<>None: - self.attributes['x1']=x1 - if y1<>None: - self.attributes['y1']=y1 - if x2<>None: - self.attributes['x2']=x2 - if y2<>None: - self.attributes['y2']=y2 - if id<>None: - self.attributes['id']=id - -class radialgradient(SVGelement): - """rg=radialgradient(cx,cy,r,fx,fy,id,**args) - - defines a radial gradient using a outer circle which are defined by a cx,cy and r and by using a focalpoint. - stop elements van be added to define the gradient colors. - """ - def __init__(self,cx=None,cy=None,r=None,fx=None,fy=None,id=None,**args): - SVGelement.__init__(self,'radialGradient',**args) - if cx<>None: - self.attributes['cx']=cx - if cy<>None: - self.attributes['cy']=cy - if r<>None: - self.attributes['r']=r - if fx<>None: - self.attributes['fx']=fx - if fy<>None: - self.attributes['fy']=fy - if id<>None: - self.attributes['id']=id - -class stop(SVGelement): - """st=stop(offset,stop_color,**args) - - Puts a stop color at the specified radius - """ - def __init__(self,offset,stop_color=None,**args): - SVGelement.__init__(self,'stop',{'offset':offset},**args) - if stop_color<>None: - self.attributes['stop-color']=stop_color - -class style(SVGelement): - """st=style(type,cdata=None,**args) - - Add a CDATA element to this element for defing in line stylesheets etc.. - """ - def __init__(self,type,cdata=None,**args): - SVGelement.__init__(self,'style',{'type':type},cdata=cdata, **args) - - -class image(SVGelement): - """im=image(url,width,height,x,y,**args) - - adds an image to the drawing. Supported formats are .png, .jpg and .svg. - """ - def __init__(self,url,x=None,y=None,width=None,height=None,**args): - if width==None or height==None: - if width<>None: - raise ValueError, 'height is required' - if height<>None: - raise ValueError, 'width is required' - else: - raise ValueError, 'both height and width are required' - SVGelement.__init__(self,'image',{'xlink:href':url,'width':width,'height':height},**args) - if x<>None: - self.attributes['x']=x - if y<>None: - self.attributes['y']=y - -class cursor(SVGelement): - """c=cursor(url,**args) - - defines a custom cursor for a element or a drawing - """ - def __init__(self,url,**args): - SVGelement.__init__(self,'cursor',{'xlink:href':url},**args) - - -class marker(SVGelement): - """m=marker(id,viewbox,refX,refY,markerWidth,markerHeight,**args) - - defines a marker which can be used as an endpoint for a line or other pathtypes - add an element to it which should be used as a marker. - """ - def __init__(self,id=None,viewBox=None,refx=None,refy=None,markerWidth=None,markerHeight=None,**args): - SVGelement.__init__(self,'marker',**args) - if id<>None: - self.attributes['id']=id - if viewBox<>None: - self.attributes['viewBox']=_viewboxlist(viewBox) - if refx<>None: - self.attributes['refX']=refx - if refy<>None: - self.attributes['refY']=refy - if markerWidth<>None: - self.attributes['markerWidth']=markerWidth - if markerHeight<>None: - self.attributes['markerHeight']=markerHeight - -class group(SVGelement): - """g=group(id,**args) - - a group is defined by an id and is used to contain elements - g.addElement(SVGelement) - """ - def __init__(self,id=None,**args): - SVGelement.__init__(self,'g',**args) - if id<>None: - self.attributes['id']=id - -class symbol(SVGelement): - """sy=symbol(id,viewbox,**args) - - defines a symbol which can be used on different places in your graph using - the use element. A symbol is not rendered but you can use 'use' elements to - display it by referencing its id. - sy.addElement(SVGelement) - """ - - def __init__(self,id=None,viewBox=None,**args): - SVGelement.__init__(self,'symbol',**args) - if id<>None: - self.attributes['id']=id - if viewBox<>None: - self.attributes['viewBox']=_viewboxlist(viewBox) - -class defs(SVGelement): - """d=defs(**args) - - container for defining elements - """ - def __init__(self,**args): - SVGelement.__init__(self,'defs',**args) - -class switch(SVGelement): - """sw=switch(**args) - - Elements added to a switch element which are "switched" by the attributes - requiredFeatures, requiredExtensions and systemLanguage. - Refer to the SVG specification for details. - """ - def __init__(self,**args): - SVGelement.__init__(self,'switch',**args) - - -class use(SVGelement): - """u=use(link,x,y,width,height,**args) - - references a symbol by linking to its id and its position, height and width - """ - def __init__(self,link,x=None,y=None,width=None,height=None,**args): - SVGelement.__init__(self,'use',{'xlink:href':link},**args) - if x<>None: - self.attributes['x']=x - if y<>None: - self.attributes['y']=y - - if width<>None: - self.attributes['width']=width - if height<>None: - self.attributes['height']=height - - -class link(SVGelement): - """a=link(url,**args) - - a link is defined by a hyperlink. add elements which have to be linked - a.addElement(SVGelement) - """ - def __init__(self,link='',**args): - SVGelement.__init__(self,'a',{'xlink:href':link},**args) - -class view(SVGelement): - """v=view(id,**args) - - a view can be used to create a view with different attributes""" - def __init__(self,id=None,**args): - SVGelement.__init__(self,'view',**args) - if id<>None: - self.attributes['id']=id - -class script(SVGelement): - """sc=script(type,type,cdata,**args) - - adds a script element which contains CDATA to the SVG drawing - - """ - def __init__(self,type,cdata=None,**args): - SVGelement.__init__(self,'script',{'type':type},cdata=cdata,**args) - -class animate(SVGelement): - """an=animate(attribute,from,to,during,**args) - - animates an attribute. - """ - def __init__(self,attribute,fr=None,to=None,dur=None,**args): - SVGelement.__init__(self,'animate',{'attributeName':attribute},**args) - if fr<>None: - self.attributes['from']=fr - if to<>None: - self.attributes['to']=to - if dur<>None: - self.attributes['dur']=dur - -class animateMotion(SVGelement): - """an=animateMotion(pathdata,dur,**args) - - animates a SVGelement over the given path in dur seconds - """ - def __init__(self,pathdata,dur,**args): - SVGelement.__init__(self,'animateMotion',**args) - if pathdata<>None: - self.attributes['path']=str(pathdata) - if dur<>None: - self.attributes['dur']=dur - -class animateTransform(SVGelement): - """antr=animateTransform(type,from,to,dur,**args) - - transform an element from and to a value. - """ - def __init__(self,type=None,fr=None,to=None,dur=None,**args): - SVGelement.__init__(self,'animateTransform',{'attributeName':'transform'},**args) - #As far as I know the attributeName is always transform - if type<>None: - self.attributes['type']=type - if fr<>None: - self.attributes['from']=fr - if to<>None: - self.attributes['to']=to - if dur<>None: - self.attributes['dur']=dur -class animateColor(SVGelement): - """ac=animateColor(attribute,type,from,to,dur,**args) - - Animates the color of a element - """ - def __init__(self,attribute,type=None,fr=None,to=None,dur=None,**args): - SVGelement.__init__(self,'animateColor',{'attributeName':attribute},**args) - if type<>None: - self.attributes['type']=type - if fr<>None: - self.attributes['from']=fr - if to<>None: - self.attributes['to']=to - if dur<>None: - self.attributes['dur']=dur -class set(SVGelement): - """st=set(attribute,to,during,**args) - - sets an attribute to a value for a - """ - def __init__(self,attribute,to=None,dur=None,**args): - SVGelement.__init__(self,'set',{'attributeName':attribute},**args) - if to<>None: - self.attributes['to']=to - if dur<>None: - self.attributes['dur']=dur - - - -class svg(SVGelement): - """s=svg(viewbox,width,height,**args) - - a svg or element is the root of a drawing add all elements to a svg element. - You can have different svg elements in one svg file - s.addElement(SVGelement) - - eg - d=drawing() - s=svg((0,0,100,100),'100%','100%') - c=circle(50,50,20) - s.addElement(c) - d.setSVG(s) - d.toXml() - """ - def __init__(self,viewBox=None, width=None, height=None,**args): - SVGelement.__init__(self,'svg',**args) - if viewBox<>None: - self.attributes['viewBox']=_viewboxlist(viewBox) - if width<>None: - self.attributes['width']=width - if height<>None: - self.attributes['height']=height - self.namespace="http://www.w3.org/2000/svg" - -class drawing: - """d=drawing() - - this is the actual SVG document. It needs a svg element as a root. - Use the addSVG method to set the svg to the root. Use the toXml method to write the SVG - source to the screen or to a file - d=drawing() - d.addSVG(svg) - d.toXml(optionalfilename) - """ - - def __init__(self): - self.svg=None - def setSVG(self,svg): - self.svg=svg - #Voeg een element toe aan de grafiek toe. - if use_dom_implementation==0: - def toXml(self, filename='',compress=False): - import cStringIO - xml=cStringIO.StringIO() - xml.write('\n') - xml.write("""]>\n""") - self.svg.toXml(0,xml) - if not filename: - if compress: - import gzip - f=cStringIO.StringIO() - zf=gzip.GzipFile(fileobj=f,mode='wb') - zf.write(xml.getvalue()) - zf.close() - f.seek(0) - return f.read() - else: - return xml.getvalue() - else: - if filename[-4:]=='svgz': - import gzip - f=gzip.GzipFile(filename=filename,mode="wb", compresslevel=9) - f.write(xml.getvalue()) - f.close() - else: - f=file(filename,'w') - f.write(xml.getvalue()) - f.close() - - else: - def toXml(self,filename='',compress=False): - """drawing.toXml() ---->to the screen - drawing.toXml(filename)---->to the file - writes a svg drawing to the screen or to a file - compresses if filename ends with svgz or if compress is true - """ - doctype = implementation.createDocumentType('svg',"-//W3C//DTD SVG 1.0//EN""",'http://www.w3.org/TR/2001/REC-SVG-20010904/DTD/svg10.dtd ') - - global root - #root is defined global so it can be used by the appender. Its also possible to use it as an arugument but - #that is a bit messy. - root=implementation.createDocument(None,None,doctype) - #Create the xml document. - global appender - def appender(element,elementroot): - """This recursive function appends elements to an element and sets the attributes - and type. It stops when alle elements have been appended""" - if element.namespace: - e=root.createElementNS(element.namespace,element.type) - else: - e=root.createElement(element.type) - if element.text: - textnode=root.createTextNode(element.text) - e.appendChild(textnode) - for attribute in element.attributes.keys(): #in element.attributes is supported from python 2.2 - e.setAttribute(attribute,str(element.attributes[attribute])) - if element.elements: - for el in element.elements: - e=appender(el,e) - elementroot.appendChild(e) - return elementroot - root=appender(self.svg,root) - if not filename: - import cStringIO - xml=cStringIO.StringIO() - PrettyPrint(root,xml) - if compress: - import gzip - f=cStringIO.StringIO() - zf=gzip.GzipFile(fileobj=f,mode='wb') - zf.write(xml.getvalue()) - zf.close() - f.seek(0) - return f.read() - else: - return xml.getvalue() - else: - try: - if filename[-4:]=='svgz': - import gzip - import cStringIO - xml=cStringIO.StringIO() - PrettyPrint(root,xml) - f=gzip.GzipFile(filename=filename,mode='wb',compresslevel=9) - f.write(xml.getvalue()) - f.close() - else: - f=open(filename,'w') - PrettyPrint(root,f) - f.close() - except: - print "Cannot write SVG file: " + filename - def validate(self): - try: - import xml.parsers.xmlproc.xmlval - except: - raise exceptions.ImportError,'PyXml is required for validating SVG' - svg=self.toXml() - xv=xml.parsers.xmlproc.xmlval.XMLValidator() - try: - xv.feed(svg) - except: - raise "SVG is not well formed, see messages above" - else: - print "SVG well formed" -if __name__=='__main__': - - - d=drawing() - s=svg((0,0,100,100)) - r=rect(-100,-100,300,300,'cyan') - s.addElement(r) - - t=title('SVGdraw Demo') - s.addElement(t) - g=group('animations') - e=ellipse(0,0,5,2) - g.addElement(e) - c=circle(0,0,1,'red') - g.addElement(c) - pd=pathdata(0,-10) - for i in range(6): - pd.relsmbezier(10,5,0,10) - pd.relsmbezier(-10,5,0,10) - an=animateMotion(pd,10) - an.attributes['rotate']='auto-reverse' - an.attributes['repeatCount']="indefinite" - g.addElement(an) - s.addElement(g) - for i in range(20,120,20): - u=use('#animations',i,0) - s.addElement(u) - for i in range(0,120,20): - for j in range(5,105,10): - c=circle(i,j,1,'red','black',.5) - s.addElement(c) - d.setSVG(s) - - print d.toXml() - diff --git a/obitools/__init__.py b/obitools/__init__.py deleted file mode 100644 index 3063d78..0000000 --- a/obitools/__init__.py +++ /dev/null @@ -1,711 +0,0 @@ -''' -**obitools** main module ------------------------- - -.. codeauthor:: Eric Coissac - - - -obitools module provides base class for sequence manipulation. - -All biological sequences must be subclass of :py:class:`obitools.BioSequence`. -Some biological sequences are defined as transformation of other -biological sequences. For example Reversed complemented sequences -are a transformation of a :py:class:`obitools.NucSequence`. This particular -type of sequences are subclasses of the :py:class:`obitools.WrappedBioSequence`. - -.. inheritance-diagram:: BioSequence NucSequence AASequence WrappedBioSequence SubSequence DNAComplementSequence - :parts: 1 - - -''' - -from weakref import ref - -from obitools.utils.iterator import uniqueChain -from itertools import chain -import re - -_default_raw_parser = " %s *= *([^;]*);" - -try: - from functools import partial -except: - # - # Add for compatibility purpose with Python < 2.5 - # - def partial(func, *args, **keywords): - def newfunc(*fargs, **fkeywords): - newkeywords = keywords.copy() - newkeywords.update(fkeywords) - return func(*(args + fargs), **newkeywords) - newfunc.func = func - newfunc.args = args - newfunc.keywords = keywords - return newfunc - - -from obitools.sequenceencoder import DNAComplementEncoder -from obitools.location import Location - -class WrapperSetIterator(object): - def __init__(self,s): - self._i = set.__iter__(s) - def next(self): - return self._i.next()() - def __iter__(self): - return self - -class WrapperSet(set): - def __iter__(self): - return WrapperSetIterator(self) - - -class BioSequence(object): - ''' - BioSequence class is the base class for biological - sequence representation. - - It provides storage of : - - - the sequence itself, - - an identifier, - - a definition an manage - - a set of complementary information on a key / value principle. - - .. warning:: - - :py:class:`obitools.BioSequence` is an abstract class, this constructor - can only be called by a subclass constructor. - ''' - - def __init__(self,id,seq,definition=None,rawinfo=None,rawparser=_default_raw_parser,**info): - ''' - - :param id: sequence identifier - :type id: `str` - - :param seq: the sequence - :type seq: `str` - - :param definition: sequence definition (optional) - :type definition: `str` - - :param rawinfo: a text containing a set of key=value; patterns - :type definition: `str` - - :param rawparser: a text describing a regular patterns template - used to parse rawinfo - :type definition: `str` - - :param info: extra named parameters can be added to associate complementary - data to the sequence - - ''' - - assert type(self)!=BioSequence,"obitools.BioSequence is an abstract class" - - self._seq=str(seq).lower() - self._info = dict(info) - if rawinfo is not None: - self._rawinfo=' ' + rawinfo - else: - self._rawinfo=None - self._rawparser=rawparser - self.definition=definition - self.id=id - self._hasTaxid=None - - def get_seq(self): - return self.__seq - - - def set_seq(self, value): - if not isinstance(value, str): - value=str(value) - self.__seq = value - self.__len = len(value) - - - def clone(self): - seq = type(self)(self.id, - str(self), - definition=self.definition - ) - seq._info=dict(self.getTags()) - seq._rawinfo=self._rawinfo - seq._rawparser=self._rawparser - seq._hasTaxid=self._hasTaxid - return seq - - def getDefinition(self): - ''' - Sequence definition getter. - - :return: the sequence definition - :rtype: str - - ''' - return self._definition - - def setDefinition(self, value): - ''' - Sequence definition setter. - - :param value: the new sequence definition - :type value: C{str} - :return: C{None} - ''' - self._definition = value - - def getId(self): - ''' - Sequence identifier getter - - :return: the sequence identifier - :rtype: C{str} - ''' - return self._id - - def setId(self, value): - ''' - Sequence identifier setter. - - :param value: the new sequence identifier - :type value: C{str} - :return: C{None} - ''' - self._id = value - - def getStr(self): - ''' - Return the sequence as a string - - :return: the string representation of the sequence - :rtype: str - ''' - return self._seq - - def getSymbolAt(self,position): - ''' - Return the symbole at C{position} in the sequence - - :param position: the desired position. Position start from 0 - if position is < 0 then they are considered - to reference the end of the sequence. - :type position: `int` - - :return: a one letter string - :rtype: `str` - ''' - return str(self)[position] - - def getSubSeq(self,location): - ''' - return a subsequence as described by C{location}. - - The C{location} parametter can be a L{obitools.location.Location} instance, - an interger or a python C{slice} instance. If C{location} - is an iterger this method is equivalent to L{getSymbolAt}. - - :param location: the positions of the subsequence to return - :type location: C{Location} or C{int} or C{slice} - :return: the subsequence - :rtype: a single character as a C{str} is C{location} is an integer, - a L{obitools.SubSequence} instance otherwise. - - ''' - if isinstance(location,Location): - return location.extractSequence(self) - elif isinstance(location, int): - return self.getSymbolAt(location) - elif isinstance(location, slice): - return SubSequence(self,location) - - raise TypeError,'key must be a Location, an integer or a slice' - - def getKey(self,key): - if key not in self._info: - if self._rawinfo is None: - if key=='count': - return 1 - else: - raise KeyError,key - p = re.compile(self._rawparser % key) - m = p.search(self._rawinfo) - if m is not None: - v=m.group(1) - self._rawinfo=' ' + self._rawinfo[0:m.start(0)]+self._rawinfo[m.end(0):] - try: - v = eval(v) - except: - pass - self._info[key]=v - else: - if key=='count': - v=1 - else: - raise KeyError,key - else: - v=self._info[key] - return v - - def extractTaxon(self): - ''' - Extract Taxonomy information from the sequence header. - This method by default return None. It should be subclassed - if necessary as in L{obitools.seqdb.AnnotatedSequence}. - - :return: None - ''' - self._hasTaxid=self.hasKey('taxid') - return None - - def __str__(self): - return self.getStr() - - def __getitem__(self,key): - if isinstance(key, str): - if key=='taxid' and self._hasTaxid is None: - self.extractTaxon() - return self.getKey(key) - else: - return self.getSubSeq(key) - - def __setitem__(self,key,value): - self._info[key]=value - if key=='taxid': - self._hasTaxid=value is not None - - def __delitem__(self,key): - if isinstance(key, str): - if key in self: - del self._info[key] - else: - raise KeyError,key - - if key=='taxid': - self._hasTaxid=False - else: - raise TypeError,key - - def __iter__(self): - ''' - Iterate through the sequence symbols - ''' - return iter(str(self)) - - def __len__(self): - return self.__len - - def hasKey(self,key): - rep = key in self._info - - if not rep and self._rawinfo is not None: - p = re.compile(self._rawparser % key) - m = p.search(self._rawinfo) - if m is not None: - v=m.group(1) - self._rawinfo=' ' + self._rawinfo[0:m.start(0)]+self._rawinfo[m.end(0):] - try: - v = eval(v) - except: - pass - self._info[key]=v - rep=True - - return rep - - def __contains__(self,key): - ''' - methods allowing to use the C{in} operator on a C{BioSequence}. - - The C{in} operator test if the C{key} value is defined for this - sequence. - - :param key: the name of the checked value - :type key: str - :return: C{True} if the value is defined, {False} otherwise. - :rtype: C{bool} - ''' - if key=='taxid' and self._hasTaxid is None: - self.extractTaxon() - return self.hasKey(key) - - def rawiteritems(self): - return self._info.iteritems() - - def iteritems(self): - ''' - iterate other items dictionary storing the values - associated to the sequence. It works similarly to - the iteritems function of C{dict}. - - :return: an iterator over the items (key,value) - link to a sequence - :rtype: iterator over tuple - :see: L{items} - ''' - if self._rawinfo is not None: - p = re.compile(self._rawparser % "([a-zA-Z]\w*)") - for k,v in p.findall(self._rawinfo): - try: - self._info[k]=eval(v) - except: - self._info[k]=v - self._rawinfo=None - return self._info.iteritems() - - def items(self): - return [x for x in self.iteritems()] - - def iterkeys(self): - return (k for k,v in self.iteritems()) - - def keys(self): - return [x for x in self.iterkeys()] - - def getTags(self): - self.iteritems() - return self._info - - def getRoot(self): - return self - - def getWrappers(self): - if not hasattr(self, '_wrappers'): - self._wrappers=WrapperSet() - return self._wrappers - - def register(self,wrapper): - self.wrappers.add(ref(wrapper,self._unregister)) - - def _unregister(self,ref): - self.wrappers.remove(ref) - - wrappers = property(getWrappers,None,None,'') - - definition = property(getDefinition, setDefinition, None, "Sequence Definition") - - id = property(getId, setId, None, 'Sequence identifier') - - def _getTaxid(self): - return self['taxid'] - - def _setTaxid(self,taxid): - self['taxid']=taxid - - taxid = property(_getTaxid,_setTaxid,None,'NCBI Taxonomy identifier') - _seq = property(get_seq, set_seq, None, None) - -class NucSequence(BioSequence): - """ - :py:class:`NucSequence` specialize the :py:class:`BioSequence` class for storing DNA - sequences. - - The constructor is identical to the :py:class:`BioSequence` constructor. - """ - - def complement(self): - """ - :return: The reverse complemented sequence as an instance of :py:class:`DNAComplementSequence` - :rtype: :py:class:`DNAComplementSequence` - """ - return DNAComplementSequence(self) - - def isNucleotide(self): - return True - - -class AASequence(BioSequence): - """ - :py:class:`AASequence` specialize the :py:class:`BioSequence` class for storing protein - sequences. - - The constructor is identical to the :py:class:`BioSequence` constructor. - """ - - - def isNucleotide(self): - return False - - -class WrappedBioSequence(BioSequence): - """ - .. warning:: - - :py:class:`obitools.WrappedBioSequence` is an abstract class, this constructor - can only be called by a subclass constructor. - """ - - - def __init__(self,reference,id=None,definition=None,**info): - - assert type(self)!=WrappedBioSequence,"obitools.WrappedBioSequence is an abstract class" - - self._wrapped = reference - reference.register(self) - self._id=id - self.definition=definition - self._info=info - - def clone(self): - seq = type(self)(self.wrapped, - id=self._id, - definition=self._definition - ) - seq._info=dict(self._info) - - return seq - - def getWrapped(self): - return self._wrapped - - def getDefinition(self): - d = self._definition or self.wrapped.definition - return d - - def getId(self): - d = self._id or self.wrapped.id - return d - - def isNucleotide(self): - return self.wrapped.isNucleotide() - - - def iterkeys(self): - return uniqueChain(self._info.iterkeys(), - self.wrapped.iterkeys()) - - def rawiteritems(self): - return chain(self._info.iteritems(), - (x for x in self.wrapped.rawiteritems() - if x[0] not in self._info)) - - def iteritems(self): - for x in self.iterkeys(): - yield (x,self[x]) - - def getKey(self,key): - if key in self._info: - return self._info[key] - else: - return self.wrapped.getKey(key) - - def hasKey(self,key): - return key in self._info or self.wrapped.hasKey(key) - - def getSymbolAt(self,position): - return self.wrapped.getSymbolAt(self.posInWrapped(position)) - - def posInWrapped(self,position,reference=None): - if reference is None or reference is self.wrapped: - return self._posInWrapped(position) - else: - return self.wrapped.posInWrapped(self._posInWrapped(position),reference) - - - def getStr(self): - return str(self.wrapped) - - def getRoot(self): - return self.wrapped.getRoot() - - def complement(self): - """ - The :py:meth:`complement` method of the :py:class:`WrappedBioSequence` class - raises an exception :py:exc:`AttributeError` if the method is called and the cut - sequence does not corresponds to a nucleic acid sequence. - """ - - if self.wrapped.isNucleotide(): - return DNAComplementSequence(self) - raise AttributeError - - - def _posInWrapped(self,position): - return position - - - definition = property(getDefinition,BioSequence.setDefinition, None) - id = property(getId,BioSequence.setId, None) - - wrapped = property(getWrapped, None, None, "A pointer to the wrapped sequence") - - def _getWrappedRawInfo(self): - return self.wrapped._rawinfo - - _rawinfo = property(_getWrappedRawInfo) - - -class SubSequence(WrappedBioSequence): - """ - """ - - - @staticmethod - def _sign(x): - if x == 0: - return 0 - elif x < 0: - return -1 - return 1 - - def __init__(self,reference, - location=None, - start=None,stop=None, - id=None,definition=None, - **info): - WrappedBioSequence.__init__(self,reference,id=None,definition=None,**info) - - if isinstance(location, slice): - self._location = location - else: - step = 1 - if not isinstance(start, int): - start = 0; - if not isinstance(stop,int): - stop = len(reference) - self._location=slice(start,stop,step) - - self._indices=self._location.indices(len(self.wrapped)) - self._xrange=xrange(*self._indices) - - self._info['cut']='[%d,%d,%s]' % self._indices - - if hasattr(reference,'quality'): - self.quality = reference.quality[self._location] - - def getId(self): - d = self._id or ("%s_SUB" % self.wrapped.id) - return d - - - def clone(self): - seq = WrappedBioSequence.clone(self) - seq._location=self._location - seq._indices=seq._location.indices(len(seq.wrapped)) - seq._xrange=xrange(*seq._indices) - return seq - - - def __len__(self): - return len(self._xrange) - - def getStr(self): - return ''.join([x for x in self]) - - def __iter__(self): - return (self.wrapped.getSymbolAt(x) for x in self._xrange) - - def _posInWrapped(self,position): - return self._xrange[position] - - - id = property(getId,BioSequence.setId, None) - - - -class DNAComplementSequence(WrappedBioSequence): - """ - Class used to represent a reverse complemented DNA sequence. Usually instances - of this class are produced by using the :py:meth:`NucSequence.complement` method. - """ - - - _comp={'a': 't', 'c': 'g', 'g': 'c', 't': 'a', - 'r': 'y', 'y': 'r', 'k': 'm', 'm': 'k', - 's': 's', 'w': 'w', 'b': 'v', 'd': 'h', - 'h': 'd', 'v': 'b', 'n': 'n', 'u': 'a', - '-': '-'} - - - def __init__(self,reference, - id=None,definition=None,**info): - WrappedBioSequence.__init__(self,reference,id=None,definition=None,**info) - assert reference.isNucleotide() - self._info['complemented']=True - if hasattr(reference,'quality'): - self.quality = reference.quality[::-1] - - - def getId(self): - d = self._id or ("%s_CMP" % self.wrapped.id) - return d - - def __len__(self): - return len(self._wrapped) - - def getStr(self): - return ''.join([x for x in self]) - - def __iter__(self): - return (self.getSymbolAt(x) for x in xrange(len(self))) - - def _posInWrapped(self,position): - return -(position+1) - - def getSymbolAt(self,position): - return DNAComplementSequence._comp[self.wrapped.getSymbolAt(self.posInWrapped(position))] - - def complement(self): - """ - The :py:meth:`complement` method of the :py:class:`DNAComplementSequence` class actually - returns the wrapped sequenced. Effectively the reversed complemented sequence of a reversed - complemented sequence is the initial sequence. - """ - return self.wrapped - - id = property(getId,BioSequence.setId, None) - - -def _isNucSeq(text): - acgt = 0 - notnuc = 0 - ltot = len(text) * 0.8 - for c in text.lower(): - if c in 'acgt-': - acgt+=1 - if c not in DNAComplementEncoder._comp: - notnuc+=1 - return notnuc==0 and float(acgt) > ltot - - -def bioSeqGenerator(id,seq,definition=None,rawinfo=None,rawparser=_default_raw_parser,**info): - """ - Generate automagically the good class instance between : - - - :py:class:`NucSequence` - - :py:class:`AASequence` - - Build a new sequence instance. Sequences are instancied as :py:class:`NucSequence` if the - `seq` attribute contains more than 80% of *A*, *C*, *G*, *T* or *-* symbols - in upper or lower cases. Conversely, the new sequence instance is instancied as - :py:class:`AASequence`. - - - - :param id: sequence identifier - :type id: `str` - - :param seq: the sequence - :type seq: `str` - - :param definition: sequence definition (optional) - :type definition: `str` - - :param rawinfo: a text containing a set of key=value; patterns - :type definition: `str` - - :param rawparser: a text describing a regular patterns template - used to parse rawinfo - :type definition: `str` - - :param info: extra named parameters can be added to associate complementary - data to the sequence - """ - if _isNucSeq(seq): - return NucSequence(id,seq,definition,rawinfo,rawparser,**info) - else: - return AASequence(id,seq,definition,rawinfo,rawparser,**info) - diff --git a/obitools/__init__.pyc b/obitools/__init__.pyc deleted file mode 100644 index 3cc2111..0000000 Binary files a/obitools/__init__.pyc and /dev/null differ diff --git a/obitools/align/__init__.py b/obitools/align/__init__.py deleted file mode 100644 index 54cca7d..0000000 --- a/obitools/align/__init__.py +++ /dev/null @@ -1,13 +0,0 @@ - - -from _nws import NWS -from _upperbond import indexSequences -from _lcs import LCS,lenlcs -from _assemble import DirectAssemble, ReverseAssemble -from _qsassemble import QSolexaDirectAssemble,QSolexaReverseAssemble -from _rassemble import RightDirectAssemble as RightReverseAssemble -from _qsrassemble import QSolexaRightDirectAssemble,QSolexaRightReverseAssemble -from _freeendgap import FreeEndGap -from _freeendgapfm import FreeEndGapFullMatch -from _upperbond import isLCSReachable - diff --git a/obitools/align/_assemble.so b/obitools/align/_assemble.so deleted file mode 100755 index dbc2139..0000000 Binary files a/obitools/align/_assemble.so and /dev/null differ diff --git a/obitools/align/_dynamic.so b/obitools/align/_dynamic.so deleted file mode 100755 index 2f93d3a..0000000 Binary files a/obitools/align/_dynamic.so and /dev/null differ diff --git a/obitools/align/_freeendgap.so b/obitools/align/_freeendgap.so deleted file mode 100755 index 53cd9c0..0000000 Binary files a/obitools/align/_freeendgap.so and /dev/null differ diff --git a/obitools/align/_freeendgapfm.so b/obitools/align/_freeendgapfm.so deleted file mode 100755 index f88c07b..0000000 Binary files a/obitools/align/_freeendgapfm.so and /dev/null differ diff --git a/obitools/align/_lcs.so b/obitools/align/_lcs.so deleted file mode 100755 index 555a2a2..0000000 Binary files a/obitools/align/_lcs.so and /dev/null differ diff --git a/obitools/align/_nws.so b/obitools/align/_nws.so deleted file mode 100755 index af7e849..0000000 Binary files a/obitools/align/_nws.so and /dev/null differ diff --git a/obitools/align/_profilenws.so b/obitools/align/_profilenws.so deleted file mode 100755 index baa8eda..0000000 Binary files a/obitools/align/_profilenws.so and /dev/null differ diff --git a/obitools/align/_qsassemble.so b/obitools/align/_qsassemble.so deleted file mode 100755 index 3bc83e9..0000000 Binary files a/obitools/align/_qsassemble.so and /dev/null differ diff --git a/obitools/align/_qsrassemble.so b/obitools/align/_qsrassemble.so deleted file mode 100755 index 75b98aa..0000000 Binary files a/obitools/align/_qsrassemble.so and /dev/null differ diff --git a/obitools/align/_rassemble.so b/obitools/align/_rassemble.so deleted file mode 100755 index e2a063c..0000000 Binary files a/obitools/align/_rassemble.so and /dev/null differ diff --git a/obitools/align/_upperbond.so b/obitools/align/_upperbond.so deleted file mode 100755 index 5f2b1fe..0000000 Binary files a/obitools/align/_upperbond.so and /dev/null differ diff --git a/obitools/align/homopolymere.py b/obitools/align/homopolymere.py deleted file mode 100644 index 5efcbff..0000000 --- a/obitools/align/homopolymere.py +++ /dev/null @@ -1,56 +0,0 @@ -''' -Created on 14 mai 2009 - -@author: coissac -''' - -from obitools import WrappedBioSequence - -class HomoNucBioSeq(WrappedBioSequence): - ''' - classdocs - ''' - - - def __init__(self,reference,id=None,definition=None,**info): - ''' - Constructor - ''' - assert reference.isNucleotide(),"reference must be a nucleic sequence" - WrappedBioSequence.__init__(self,reference,id=None,definition=None,**info) - self.__cleanHomopolymer() - - def __cleanHomopolymer(self): - s = [] - c = [] - old=None - nc=0 - for n in self._wrapped: - if old is not None and n!=old: - s.append(old) - c.append(nc) - nc=0 - old=n - nc+=1 - self._cached=''.join(s) - self['homopolymer']=c - self._cumulative=[] - sum=0 - for c in self._count: - sum+=c - self._cumulative.append(sum) - - def __len__(self): - return len(self._cached) - - def getStr(self): - return self._cached - - def __iter__(self): - return iter(self._cached) - - def _posInWrapped(self,position): - return self._cumulative[position] - - - \ No newline at end of file diff --git a/obitools/align/ssearch.py b/obitools/align/ssearch.py deleted file mode 100644 index 55a74ce..0000000 --- a/obitools/align/ssearch.py +++ /dev/null @@ -1,46 +0,0 @@ -import os -import re - -from obitools.fasta import formatFasta - -class SsearchParser(object): - - _matchQuery = re.compile("^Query:.+\n.+?>+([^ ]+)", re.MULTILINE) - _matchLQuery = re.compile("^Query:.+\n.+?(\d+)(?= nt\n)", re.MULTILINE) - _matchProp = re.compile("^The best scores are:.*\n(.+?)>>>", re.DOTALL+re.MULTILINE) - def __init__(self,file): - if isinstance(file,str): - file = open(file,'rU') - self.data = file.read() - self.query= SsearchParser._matchQuery.search(self.data).group(1) - self.queryLength= int(SsearchParser._matchLQuery.search(self.data).group(1)) - props = SsearchParser._matchProp.search(self.data) - if props: - props=props.group(0).split('\n')[1:-2] - self.props=[] - for line in props: - subject,tab = line.split('\t') - tab=tab.split() - ssp = subject.split() - ac = ssp[0] - dbl= int(ssp[-5][:-1]) - ident = float(tab[0]) - matchlen = abs(int(tab[5]) - int(tab[4])) +1 - self.props.append({"ac" :ac, - "identity" :ident, - "subjectlength":dbl, - 'matchlength' : matchlen}) - -def run(seq,database,program='fasta35',opts=''): - ssearchin,ssearchout,ssearcherr = os.popen3("%s %s %s" % (program,opts,database)) - print >>ssearchin,formatFasta(seq) - ssearchin.close() - result = SsearchParser(ssearchout) - - return seq,result - -def ssearchIterator(sequenceIterator,database,program='ssearch35',opts=''): - for seq in sequenceIterator: - yield run(seq,database,program,opts) - - diff --git a/obitools/alignment/__init__.py b/obitools/alignment/__init__.py deleted file mode 100644 index a89793a..0000000 --- a/obitools/alignment/__init__.py +++ /dev/null @@ -1,175 +0,0 @@ -from obitools import BioSequence -from obitools import WrappedBioSequence -from copy import deepcopy - -class GappedPositionException(Exception): - pass - -class AlignedSequence(WrappedBioSequence): - - def __init__(self,reference, - id=None,definition=None,**info): - WrappedBioSequence.__init__(self,reference,id=None,definition=None,**info) - self._length=len(reference) - self._gaps=[[self._length,0]] - - def clone(self): - seq = WrappedBioSequence.clone(self) - seq._gaps=deepcopy(self._gaps) - seq._length=reduce(lambda x,y:x+y, (z[0]+z[1] for z in self._gaps),0) - return seq - - def setGaps(self, value): - ''' - Set gap vector to an AlignedSequence. - - Gap vector describes the gap positions on a sequence. - It is a gap of couple. The first couple member is the count - of sequence letter, the second one is the gap length. - @param value: a list of length 2 list describing gap positions - @type value: list of couple - ''' - assert isinstance(value, list),'Gap vector must be a list' - assert reduce(lambda x,y: x and y, - (isinstance(z, list) and len(z)==2 for z in value), - True),"Value must be a list of length 2 list" - - lseq = reduce(lambda x,y:x+y, (z[0] for z in value),0) - assert lseq==len(self.wrapped),"Gap vector incompatible with the sequence" - self._gaps = value - self._length=reduce(lambda x,y:x+y, (z[0]+z[1] for z in value),0) - - def getGaps(self): - return tuple(self._gaps) - gaps = property(getGaps, setGaps, None, "Gaps's Docstring") - - def _getIndice(self,pos): - i=0 - cpos=0 - for s,g in self._gaps: - cpos+=s - if cpos>pos: - return i,pos-cpos+s - cpos+=g - if cpos>pos: - return i,-pos+cpos-g-1 - i+=1 - raise IndexError - - def getId(self): - d = self._id or ("%s_ALN" % self.wrapped.id) - return d - - def __len__(self): - return self._length - - def getStr(self): - return ''.join([x for x in self]) - - def __iter__(self): - def isymb(): - cpos=0 - for s,g in self._gaps: - for x in xrange(s): - yield self.wrapped[cpos+x] - for x in xrange(g): - yield '-' - cpos+=s - return isymb() - - def _posInWrapped(self,position): - i,s=self._getIndice(position) - if s<0: - raise GappedPositionException - value=self._gaps - p=reduce(lambda x,y:x+y, (z[0] for z in value[:i]),0)+s - return p - - def getSymbolAt(self,position): - try: - return self.wrapped.getSymbolAt(self.posInWrapped(position)) - except GappedPositionException: - return '-' - - def insertGap(self,position,count=1): - if position==self._length: - idx=len(self._gaps)-1 - p=-1 - else: - idx,p = self._getIndice(position) - - if p >= 0: - self._gaps.insert(idx, [p,count]) - self._gaps[idx+1][0]-=p - else: - self._gaps[idx][1]+=count - self._length=reduce(lambda x,y:x+y, (z[0]+z[1] for z in self._gaps),0) - - - id = property(getId,BioSequence.setId, None, "Sequence Identifier") - - -class Alignment(list): - - def _assertData(self,data): - assert isinstance(data, BioSequence),'You must only add bioseq to an alignement' - if hasattr(self, '_alignlen'): - assert self._alignlen==len(data),'All aligned sequences must have the same length' - else: - self._alignlen=len(data) - return data - - def clone(self): - ali = Alignment(x.clone() for x in self) - return ali - - def append(self,data): - data = self._assertData(data) - list.append(self,data) - - def __setitem__(self,index,data): - - data = self._assertData(data) - list.__setitem__(self,index,data) - - def getSite(self,key): - if isinstance(key,int): - return [x[key] for x in self] - - def insertGap(self,position,count=1): - for s in self: - s.insertGap(position,count) - - def isFullGapSite(self,key): - return reduce(lambda x,y: x and y,(z=='-' for z in self.getSite(key)),True) - - def isGappedSite(self,key): - return '-' in self.getSite(key) - - def __str__(self): - l = len(self[0]) - rep="" - idmax = max(len(x.id) for x in self)+2 - template= "%%-%ds %%-60s" % idmax - for p in xrange(0,l,60): - for s in self: - rep+= (template % (s.id,s[p:p+60])).strip() + '\n' - rep+="\n" - return rep - -def alignmentReader(file,sequenceIterator): - seqs = sequenceIterator(file) - alignement = Alignment() - for seq in seqs: - alignement.append(seq) - return alignement - - - - - -def columnIterator(alignment): - lali = len(alignment[0]) - for p in xrange(lali): - c = [x[p] for x in alignment] - yield c \ No newline at end of file diff --git a/obitools/alignment/ace.py b/obitools/alignment/ace.py deleted file mode 100644 index 59cc8f6..0000000 --- a/obitools/alignment/ace.py +++ /dev/null @@ -1,47 +0,0 @@ -from obitools.format.genericparser import GenericParser -from obitools.utils import universalOpen -from obitools.fasta import parseFastaDescription -from obitools import NucSequence - - -import sys - -_contigIterator=GenericParser('^CO ') - -_contigIterator.addParseAction('AF', '\nAF +(\S+) +([UC]) +(-?[0-9]+)') -_contigIterator.addParseAction('RD', '\nRD +(\S+) +([0-9]+) +([0-9]+) +([0-9]+) *\n([A-Za-z\n*]+?)\n\n') -_contigIterator.addParseAction('DS', '\nDS +(.+)') -_contigIterator.addParseAction('CO', '^CO (\S+)') - -def contigIterator(file): - file = universalOpen(file) - for entry in _contigIterator(file): - contig=[] - for rd,ds,af in map(None,entry['RD'],entry['DS'],entry['AF']): - id = rd[0] - shift = int(af[2]) - if shift < 0: - print >> sys.stderr,"Sequence %s in contig %s has a negative paddng value %d : skipped" % (id,entry['CO'][0],shift) - #continue - - definition,info = parseFastaDescription(ds) - info['shift']=shift - seq = rd[4].replace('\n','').replace('*','-').strip() - contig.append(NucSequence(id,seq,definition,**info)) - - maxlen = max(len(x)+x['shift'] for x in contig) - minshift=min(x['shift'] for x in contig) - rep = [] - - for s in contig: - info = s.getTags() - info['shift']-=minshift-1 - head = '-' * (info['shift']-1) - - tail = (maxlen + minshift - len(s) - info['shift'] - 1) - info['tail']=tail - newseq = NucSequence(s.id,head + str(s)+ '-' * tail,s.definition,**info) - rep.append(newseq) - - yield entry['CO'][0],rep - \ No newline at end of file diff --git a/obitools/barcodecoverage/__init__.py b/obitools/barcodecoverage/__init__.py deleted file mode 100644 index 09e542e..0000000 --- a/obitools/barcodecoverage/__init__.py +++ /dev/null @@ -1,7 +0,0 @@ -''' - -@author: merciece -Creates the tree representing the coverage of 2 primers from an ecoPCR output file and an ecoPCR database. - - -''' \ No newline at end of file diff --git a/obitools/barcodecoverage/calcBc.py b/obitools/barcodecoverage/calcBc.py deleted file mode 100644 index 13b0401..0000000 --- a/obitools/barcodecoverage/calcBc.py +++ /dev/null @@ -1,62 +0,0 @@ -#!/usr/local/bin/python -''' -Created on 24 nov. 2011 - -@author: merciece -''' - - -def main(amplifiedSeqs, seqsFromDB, keptRanks, errors, tax) : - ''' - error threshold is set to 3 - ''' - - listtaxabygroupinDB = {} - - for seq in seqsFromDB : - taxid = seq['taxid'] - p = [a for a in tax.parentalTreeIterator(taxid)] - for a in p : - if a != p[0] : - if a[1] in keptRanks : - group = a[0] - if group in listtaxabygroupinDB and taxid not in listtaxabygroupinDB[group] : - listtaxabygroupinDB[group].add(taxid) - elif group not in listtaxabygroupinDB : - listtaxabygroupinDB[group]=set([taxid]) - - taxabygroup = dict((x,len(listtaxabygroupinDB[x])) for x in listtaxabygroupinDB) - - listamplifiedtaxabygroup = {} - - for seq in amplifiedSeqs : - if errors[seq.id][2] <= 3 : - taxid = seq['taxid'] - p = [a for a in tax.parentalTreeIterator(taxid)] - for a in p : - if a != p[0] : - if a[1] in keptRanks : - group = a[0] - if group in listamplifiedtaxabygroup and taxid not in listamplifiedtaxabygroup[group] : - listamplifiedtaxabygroup[group].add(taxid) - elif group not in listamplifiedtaxabygroup : - listamplifiedtaxabygroup[group]=set([taxid]) - - amplifiedtaxabygroup = dict((x,len(listamplifiedtaxabygroup[x])) for x in listamplifiedtaxabygroup) - - BcValues = {} - - groups = [g for g in taxabygroup.keys()] - - for g in groups : - if g in amplifiedtaxabygroup : - BcValues[g] = float(amplifiedtaxabygroup[g])/taxabygroup[g]*100 - BcValues[g] = round(BcValues[g], 2) - else : - BcValues[g] = 0.0 - - return BcValues - - - - diff --git a/obitools/barcodecoverage/calculateBc.py b/obitools/barcodecoverage/calculateBc.py deleted file mode 100644 index c5edb8a..0000000 --- a/obitools/barcodecoverage/calculateBc.py +++ /dev/null @@ -1,72 +0,0 @@ -#!/usr/local/bin/python -''' -Created on 24 nov. 2011 - -@author: merciece -''' - -import sys - - -def main(amplifiedSeqs, seqsFromDB, keptRanks, tax) : - - BcValues = {} - - #speciesid = tax.findRankByName('species') - #subspeciesid = tax.findRankByName('subspecies') - - listtaxonbygroup = {} - - for seq in seqsFromDB : - taxid = seq['taxid'] - p = [a for a in tax.parentalTreeIterator(taxid)] - for a in p : - if a != p[0] : - if a[1] in keptRanks : - group = a - if group in listtaxonbygroup: - listtaxonbygroup[group].add(taxid) - else: - listtaxonbygroup[group]=set([taxid]) - - #stats = dict((x,len(listtaxonbygroup[x])) for x in listtaxonbygroup) - - print>>sys.stderr, listtaxonbygroup - - listtaxonbygroup = {} - - for seq in amplifiedSeqs : - taxid = seq['taxid'] - p = [a for a in tax.parentalTreeIterator(taxid)] - for a in p : - if a != p[0] : - if a[1] in keptRanks : - group = a - if group in listtaxonbygroup: - listtaxonbygroup[group].add(taxid) - else: - listtaxonbygroup[group]=set([taxid]) - - print>>sys.stderr, listtaxonbygroup - - return BcValues - -# dbstats= dict((x,len(listtaxonbygroup[x])) for x in listtaxonbygroup) -# -# ranks = [r for r in keptRanks] -# ranks.sort() -# -# print '%-20s\t%10s\t%10s\t%7s' % ('rank','ecopcr','db','percent') -# -# print>>sys.stderr, stats -# print>>sys.stderr, dbstats -# print>>sys.stderr, ranks -# -# for r in ranks: -# if r in dbstats and dbstats[r]: -# print '%-20s\t%10d\t%10d\t%8.2f' % (r,dbstats[r],stats[r],float(dbstats[r])/stats[r]*100) - - - - - diff --git a/obitools/barcodecoverage/drawBcTree.py b/obitools/barcodecoverage/drawBcTree.py deleted file mode 100644 index 9b1e215..0000000 --- a/obitools/barcodecoverage/drawBcTree.py +++ /dev/null @@ -1,108 +0,0 @@ -#!/usr/local/bin/python -''' -Created on 25 nov. 2011 - -@author: merciece -''' - -from obitools.graph.rootedtree import nexusFormat - - -figtree="""\ -begin figtree; - set appearance.backgroundColorAttribute="User Selection"; - set appearance.backgroundColour=#-1; - set appearance.branchColorAttribute="bc"; - set appearance.branchLineWidth=2.0; - set appearance.foregroundColour=#-16777216; - set appearance.selectionColour=#-2144520576; - set branchLabels.colorAttribute="User Selection"; - set branchLabels.displayAttribute="errors"; - set branchLabels.fontName="sansserif"; - set branchLabels.fontSize=10; - set branchLabels.fontStyle=0; - set branchLabels.isShown=true; - set branchLabels.significantDigits=4; - set layout.expansion=2000; - set layout.layoutType="RECTILINEAR"; - set layout.zoom=0; - set nodeBars.barWidth=4.0; - set nodeLabels.colorAttribute="User Selection"; - set nodeLabels.displayAttribute="label"; - set nodeLabels.fontName="sansserif"; - set nodeLabels.fontSize=10; - set nodeLabels.fontStyle=0; - set nodeLabels.isShown=true; - set nodeLabels.significantDigits=4; - set polarLayout.alignTipLabels=false; - set polarLayout.angularRange=0; - set polarLayout.rootAngle=0; - set polarLayout.rootLength=100; - set polarLayout.showRoot=true; - set radialLayout.spread=0.0; - set rectilinearLayout.alignTipLabels=false; - set rectilinearLayout.curvature=0; - set rectilinearLayout.rootLength=100; - set scale.offsetAge=0.0; - set scale.rootAge=1.0; - set scale.scaleFactor=1.0; - set scale.scaleRoot=false; - set scaleAxis.automaticScale=true; - set scaleAxis.fontSize=8.0; - set scaleAxis.isShown=false; - set scaleAxis.lineWidth=2.0; - set scaleAxis.majorTicks=1.0; - set scaleAxis.origin=0.0; - set scaleAxis.reverseAxis=false; - set scaleAxis.showGrid=true; - set scaleAxis.significantDigits=4; - set scaleBar.automaticScale=true; - set scaleBar.fontSize=10.0; - set scaleBar.isShown=true; - set scaleBar.lineWidth=1.0; - set scaleBar.scaleRange=0.0; - set scaleBar.significantDigits=4; - set tipLabels.colorAttribute="User Selection"; - set tipLabels.displayAttribute="Names"; - set tipLabels.fontName="sansserif"; - set tipLabels.fontSize=10; - set tipLabels.fontStyle=0; - set tipLabels.isShown=true; - set tipLabels.significantDigits=4; - set trees.order=false; - set trees.orderType="increasing"; - set trees.rooting=false; - set trees.rootingType="User Selection"; - set trees.transform=false; - set trees.transformType="cladogram"; -end; -""" - - -def cartoonRankGenerator(rank): - def cartoon(node): - return 'rank' in node and node['rank']==rank - - return cartoon - - -def collapseBcGenerator(Bclimit): - def collapse(node): - return 'bc' in node and node['bc']<=Bclimit - return collapse - - -def label(node): - if 'bc' in node: - return "(%+3.1f) %s" % (node['bc'],node['name']) - else: - return " %s" % node['name'] - - -def main(coverageTree) : - print nexusFormat(coverageTree, - label=label, - blocks=figtree, - cartoon=cartoonRankGenerator('family')) - #collapse=collapseBcGenerator(70)) - diff --git a/obitools/barcodecoverage/findErrors.py b/obitools/barcodecoverage/findErrors.py deleted file mode 100644 index dae20a0..0000000 --- a/obitools/barcodecoverage/findErrors.py +++ /dev/null @@ -1,56 +0,0 @@ -#!/usr/local/bin/python -''' -Created on 24 nov. 2011 - -@author: merciece -''' - - -def main(seqs, keptRanks, tax): - errorsBySeq = getErrorsOnLeaves(seqs) - errorsByTaxon = propagateErrors(errorsBySeq, keptRanks, tax) - return errorsBySeq, errorsByTaxon - - -def getErrorsOnLeaves(seqs) : - errors = {} - for s in seqs : - taxid = s['taxid'] - forErrs = s['forward_error'] - revErrs = s['reverse_error'] - total = forErrs + revErrs - seqNb = 1 - errors[s.id] = [forErrs,revErrs,total,seqNb,taxid] - return errors - - -def propagateErrors(errorsOnLeaves, keptRanks, tax) : - allErrors = {} - for seq in errorsOnLeaves : - taxid = errorsOnLeaves[seq][4] - p = [a for a in tax.parentalTreeIterator(taxid)] - for a in p : - if a[1] in keptRanks : - group = a[0] - if group in allErrors : - allErrors[group][0] += errorsOnLeaves[seq][0] - allErrors[group][1] += errorsOnLeaves[seq][1] - allErrors[group][2] += errorsOnLeaves[seq][2] - allErrors[group][3] += 1 - else : - allErrors[group] = errorsOnLeaves[seq] - - for group in allErrors : - allErrors[group][0] /= float(allErrors[group][3]) - allErrors[group][1] /= float(allErrors[group][3]) - allErrors[group][2] /= float(allErrors[group][3]) - - allErrors[group][0] = round(allErrors[group][0], 2) - allErrors[group][1] = round(allErrors[group][1], 2) - allErrors[group][2] = round(allErrors[group][2], 2) - - return allErrors - - - - diff --git a/obitools/barcodecoverage/readFiles.py b/obitools/barcodecoverage/readFiles.py deleted file mode 100644 index b03e72a..0000000 --- a/obitools/barcodecoverage/readFiles.py +++ /dev/null @@ -1,69 +0,0 @@ -#!/usr/local/bin/python -''' -Created on 23 nov. 2011 - -@author: merciece -''' - -from obitools.ecopcr import sequence -from obitools.ecopcr import taxonomy - - -def main(entries,options): - filteredDataFromDB = ecoPCRDatabaseReader(options) - filteredData = ecoPCRFileReader(entries,filteredDataFromDB) - return filteredDataFromDB,filteredData - - -def ecoPCRDatabaseReader(options): - - tax = taxonomy.EcoTaxonomyDB(options.taxonomy) - seqs = sequence.EcoPCRDBSequenceIterator(options.taxonomy,taxonomy=tax) - - norankid = tax.findRankByName('no rank') - speciesid = tax.findRankByName('species') - genusid = tax.findRankByName('genus') - familyid = tax.findRankByName('family') - - minrankseq = set([speciesid,genusid,familyid]) - - usedrankid = {} - - ingroup = {} - outgroup= {} - - for s in seqs : - if 'taxid' in s : - taxid = s['taxid'] - allrank = set() - for p in tax.parentalTreeIterator(taxid): - if p[1]!=norankid: - allrank.add(p[1]) - if len(minrankseq & allrank) == 3: - for r in allrank: - usedrankid[r]=usedrankid.get(r,0) + 1 - - if tax.isAncestor(options.ingroup,taxid): - ingroup[s.id] = s - else: - outgroup[s.id] = s - - keptranks = set(r for r in usedrankid - if float(usedrankid[r])/float(len(ingroup)) > options.rankthresold) - - return { 'ingroup' : ingroup, - 'outgroup': outgroup, - 'ranks' : keptranks, - 'taxonomy': tax - } - - -def ecoPCRFileReader(entries,filteredDataFromDB) : - filteredData = [] - for s in entries : - if 'taxid' in s : - seqId = s.id - if seqId in filteredDataFromDB['ingroup'] : - filteredData.append(s) - return filteredData - diff --git a/obitools/barcodecoverage/writeBcTree.py b/obitools/barcodecoverage/writeBcTree.py deleted file mode 100644 index 7c8243e..0000000 --- a/obitools/barcodecoverage/writeBcTree.py +++ /dev/null @@ -1,42 +0,0 @@ -#!/usr/local/bin/python -''' -Created on 25 nov. 2011 - -@author: merciece -''' - -from obitools.graph.rootedtree import RootedTree - - -def main(BcValues,errors,tax) : - - tree = RootedTree() - tset = set(BcValues) - - for taxon in BcValues: - if taxon in errors : - forErr = errors[taxon][0] - revErr = errors[taxon][1] - totErr = errors[taxon][2] - else : - forErr = -1.0 - revErr = -1.0 - totErr = -1.0 - - tree.addNode(taxon, rank=tax.getRank(taxon), - name=tax.getScientificName(taxon), - bc = BcValues[taxon], - errors = str(forErr)+' '+str(revErr), - totError = totErr - ) - - for taxon in BcValues: - piter = tax.parentalTreeIterator(taxon) - taxon = piter.next() - for parent in piter: - if taxon[0] in tset and parent[0] in BcValues: - tset.remove(taxon[0]) - tree.addEdge(parent[0], taxon[0]) - taxon=parent - - return tree diff --git a/obitools/blast/__init__.py b/obitools/blast/__init__.py deleted file mode 100644 index 11b5274..0000000 --- a/obitools/blast/__init__.py +++ /dev/null @@ -1,207 +0,0 @@ -from os import popen2 -from itertools import imap,count - -from obitools.table import iTableIterator,TableRow,Table,SelectionIterator -from obitools.utils import ColumnFile -from obitools.location import SimpleLocation -from obitools.fasta import formatFasta -import sys - -class Blast(object): - ''' - Run blast - ''' - - def __init__(self,mode,db,program='blastall',**options): - self._mode = mode - self._db = db - self._program = program - self._options = options - - def getMode(self): - return self._mode - - - def getDb(self): - return self._db - - - def getProgram(self): - return self._program - - def _blastcmd(self): - tmp = """%(program)s \\ - -p %(mode)s \\ - -d %(db)s \\ - -m 8 \\ - %(options)s \\ - """ - options = ' '.join(['-%s %s' % (x[0],str(x[1])) - for x in self._options.iteritems()]) - data = { - 'program' : self.program, - 'db' : self.db, - 'mode' : self.mode, - 'options' : options - } - - return tmp % data - - def __call__(self,sequence): - ''' - Run blast with one sequence object - @param sequence: - @type sequence: - ''' - cmd = self._blastcmd() - - (blast_in,blast_out) = popen2(cmd) - - print >>blast_in,formatFasta(sequence) - blast_in.close() - - blast = BlastResultIterator(blast_out) - - return blast - - mode = property(getMode, None, None, "Mode's Docstring") - - db = property(getDb, None, None, "Db's Docstring") - - program = property(getProgram, None, None, "Program's Docstring") - - -class NetBlast(Blast): - ''' - Run blast on ncbi servers - ''' - - def __init__(self,mode,db,**options): - ''' - - @param mode: - @param db: - ''' - Blast.__init__(self, mode, db, 'blastcl3',**options) - - -class BlastResultIterator(iTableIterator): - - def __init__(self,blastoutput,query=None): - ''' - - @param blastoutput: - @type blastoutput: - ''' - self._blast = ColumnFile(blastoutput, - strip=True, - skip="#", - sep="\t", - types=self.types - ) - self._query = query - self._hindex = dict((k,i) for i,k in imap(None,count(),self._getHeaders())) - - def _getHeaders(self): - return ('Query id','Subject id', - '% identity','alignment length', - 'mismatches', 'gap openings', - 'q. start', 'q. end', - 's. start', 's. end', - 'e-value', 'bit score') - - def _getTypes(self): - return (str,str, - float,int, - int,int, - int,int, - int,int, - float,float) - - def _getRowFactory(self): - return BlastMatch - - def _getSubrowFactory(self): - return TableRow - - def _getQuery(self): - return self._query - - - headers = property(_getHeaders,None,None) - types = property(_getTypes,None,None) - rowFactory = property(_getRowFactory,None,None) - subrowFactory = property(_getSubrowFactory,None,None) - query = property(_getQuery,None,None) - - def next(self): - ''' - - ''' - value = self._blast.next() - return self.rowFactory(self,value) - - - -class BlastResult(Table): - ''' - Results of a blast run - ''' - -class BlastMatch(TableRow): - ''' - Blast high scoring pair between two sequences - ''' - - def getQueryLocation(self): - l = SimpleLocation(self[6], self[7]) - return l - - def getSubjectLocation(self): - l = SimpleLocation(self[8], self[9]) - return l - - def getSubjectSequence(self,database): - return database[self[1]] - - def queryCov(self,query=None): - ''' - Compute coverage of match on query sequence. - - @param query: the query sequence. Default is None. - In this case the query sequence associated - to this blast result is used. - @type query: L{obitools.BioSequence} - - @return: coverage fraction - @rtype: float - ''' - if query is None: - query = self.table.query - assert query is not None - return float(self[7]-self[6]+1)/float(len(query)) - - def __getitem__(self,key): - if key=='query coverage' and self.table.query is not None: - return self.queryCov() - else: - return TableRow.__getitem__(self,key) - -class BlastCovMinFilter(SelectionIterator): - - def __init__(self,blastiterator,covmin,query=None,**conditions): - if query is None: - query = blastiterator.table.query - assert query is not None - SelectionIterator.__init__(self,blastiterator,**conditions) - self._query = query - self._covmin=covmin - - def _covMinPredicat(self,row): - return row.queryCov(self._query)>=self._covmin - - def _checkCondition(self,row): - return self._covMinPredicat(row) and SelectionIterator._checkCondition(self, row) - - - \ No newline at end of file diff --git a/obitools/carto/__init__.py b/obitools/carto/__init__.py deleted file mode 100644 index b7ac176..0000000 --- a/obitools/carto/__init__.py +++ /dev/null @@ -1,376 +0,0 @@ -# -*- coding: latin1 -*- - - - -from obitools import SVGdraw -import math - -class Map(object): - """ - Map represente une instance d'une carte genetique physique. - Une telle carte est definie par la longueur de la sequence - qui lui est associe. - - A une carte est associe un certain nombre de niveaux (Level) - eux meme decoupe en sous-niveau (SubLevel) - Les sous niveaux contiennent eux des features - """ - def __init__(self,name,seqlength,scale=1): - """ - Constructeur d'une nouvelle carte - - *Param*: - - name - nom de la carte - - seqlength - longueur de la sequence associee a la carte - - scale - echelle de la carte indicant combien de pixel - correspondent a une unite de la carte - """ - self.name = name - self.seqlength = seqlength - self.scale = scale - self.levels = {} - self.basicHSize = 10 - - def __str__(self): - return '<%s>' % self.name - - def __getitem__(self,level): - """ - retourne le niveau *level* de la carte et - le cree s'il n'existe pas - """ - if not isinstance(level,int): - raise TypeError('level must be an non Zero integer value') - elif level==0: - raise AssertionError('Level cannot be set to 0') - try: - return self.levels[level] - except KeyError: - self.levels[level] = Level(level,self) - return self.levels[level] - - def getBasicHSize(self): - """ - retourne la hauteur de base d'un element de cartographie - exprimee en pixel - """ - return self.basicHSize - - def getScale(self): - """ - Retourne l'echelle de la carte en nombre de pixels par - unite physique de la carte - """ - return self.scale - - - - def getNegativeBase(self): - return reduce(lambda x,y:x-y,[self.levels[z].getHeight() - for z in self.levels - if z < 0],self.getHeight()) - - def getPositiveBase(self): - return self.getNegativeBase() - 3 * self.getBasicHSize() - - def getHeight(self): - return reduce(lambda x,y:x+y,[z.getHeight() for z in self.levels.values()],0) \ - + 4 * self.getBasicHSize() - - def toXML(self,file=None,begin=0,end=None): - dessin = SVGdraw.drawing() - if end==None: - end = self.seqlength - hauteur= self.getHeight() - largeur=(end-begin+1)*self.scale - svg = SVGdraw.svg((begin*self.scale,0,largeur,hauteur), - '%fpx' % (self.seqlength * self.scale), - '%dpx' % hauteur) - - centre = self.getPositiveBase() + (1 + 1/4) * self.getBasicHSize() - svg.addElement(SVGdraw.rect(0,centre,self.seqlength * self.scale,self.getBasicHSize()/2)) - for e in self.levels.values(): - svg.addElement(e.getElement()) - dessin.setSVG(svg) - return dessin.toXml(file) - -class Feature(object): - pass - -class Level(object): - - def __init__(self,level,map): - if not isinstance(map,Map): - raise AssertionError('map is not an instance of class Map') - if level in map.levels: - raise AssertionError('Level %d already define for map %s' % (level,map)) - else: - map.levels[level] = self - self.map = map - self.level = level - self.sublevels = {} - - def __getitem__(self,sublevel): - """ - retourne le niveau *sublevel* du niveau en - le creant s'il n'existe pas - """ - if not isinstance(sublevel,int): - raise TypeError('sublevel must be a positive integer value') - elif sublevel<0: - raise AssertionError('Level cannot be negative') - try: - return self.sublevels[sublevel] - except KeyError: - self.sublevels[sublevel] = SubLevel(sublevel,self) - return self.sublevels[sublevel] - - def getBase(self): - if self.level < 0: - base = self.map.getNegativeBase() - base += reduce(lambda x,y:x+y,[self.map.levels[z].getHeight() - for z in self.map.levels - if z <0 and z >= self.level],0) - return base - else: - base = self.map.getPositiveBase() - base -= reduce(lambda x,y:x+y,[self.map.levels[z].getHeight() - for z in self.map.levels - if z >0 and z < self.level],0) - return base - - def getElement(self): - objet = SVGdraw.group('level%d' % self.level) - for e in self.sublevels.values(): - objet.addElement(e.getElement()) - return objet - - - - def getHeight(self): - return reduce(lambda x,y:x+y,[z.getHeight() for z in self.sublevels.values()],0) \ - + 2 * self.map.getBasicHSize() - -class SubLevel(object): - - def __init__(self,sublevel,level): - if not isinstance(level,Level): - raise AssertionError('level is not an instance of class Level') - if level in level.sublevels: - raise AssertionError('Sublevel %d already define for level %s' % (sublevel,level)) - else: - level.sublevels[sublevel] = self - self.level = level - self.sublevel = sublevel - self.features = {} - - def getHeight(self): - return max([x.getHeight() for x in self.features.values()]+[0]) + 4 * self.level.map.getBasicHSize() - - def getBase(self): - base = self.level.getBase() - if self.level.level < 0: - base -= self.level.getHeight() - 2 * self.level.map.getBasicHSize() - base += reduce(lambda x,y:x+y,[self.level.sublevels[z].getHeight() - for z in self.level.sublevels - if z <= self.sublevel],0) - base -= 2* self.level.map.getBasicHSize() - else: - base -= reduce(lambda x,y:x+y,[self.level.sublevels[z].getHeight() - for z in self.level.sublevels - if z < self.sublevel],0) - base -= self.level.map.getBasicHSize() - return base - - def getElement(self): - base = self.getBase() - objet = SVGdraw.group('sublevel%d' % self.sublevel) - for e in self.features.values(): - objet.addElement(e.getElement(base)) - return objet - - def add(self,feature): - if not isinstance(feature,Feature): - raise TypeError('feature must be an instance oof Feature') - if feature.name in self.features: - raise AssertionError('A feature with the same name (%s) have already be insert in this sublevel' - % feature.name) - self.features[feature.name]=feature - feature.sublevel=self - -class SimpleFeature(Feature): - - def __init__(self,name,begin,end,visiblename=False,color=0): - self.begin = begin - self.end = end - self.name = name - self.color = color - self.sublevel = None - self.visiblename=visiblename - - def getHeight(self): - if not self.sublevel: - raise AssertionError('Not affected Simple feature') - if self.visiblename: - return self.sublevel.level.map.getBasicHSize() * 2 - else: - return self.sublevel.level.map.getBasicHSize() - - def getElement(self,base): - scale = self.sublevel.level.map.getScale() - y = base - self.sublevel.level.map.getBasicHSize() - x = self.begin * scale - width = (self.end - self.begin + 1) * scale - heigh = self.sublevel.level.map.getBasicHSize() - - objet = SVGdraw.rect(x,y,width,heigh,stroke=self.color) - objet.addElement(SVGdraw.description(self.name)) - - return objet - -class BoxFeature(SimpleFeature): - - def getHeight(self): - if not self.sublevel: - raise AssertionError('Not affected Box feature') - if self.visiblename: - return self.sublevel.level.map.getBasicHSize() * 4 - else: - return self.sublevel.level.map.getBasicHSize() * 3 - - def getElement(self,base): - scale = self.sublevel.level.map.getScale() - y = base - self.sublevel.level.map.getBasicHSize() * 2 - x = self.begin * scale - width = (self.end - self.begin + 1) * scale - height = self.sublevel.level.map.getBasicHSize() * 3 - - objet = SVGdraw.rect(x,y,width,height,stroke=self.color,fill="none") - objet.addElement(SVGdraw.description(self.name)) - - return objet - -class MultiPartFeature(Feature): - - def __init__(self,name,*args,**kargs): - self.limits = args - self.name = name - try: - self.color = kargs['color'] - except KeyError: - self.color = "black" - - try: - self.visiblename=kargs['visiblename'] - except KeyError: - self.visiblename=None - - try: - self.flatlink=kargs['flatlink'] - except KeyError: - self.flatlink=False - - try: - self.roundlink=kargs['roundlink'] - except KeyError: - self.roundlink=False - - self.sublevel = None - - - def getHeight(self): - if not self.sublevel: - raise AssertionError('Not affected Simple feature') - if self.visiblename: - return self.sublevel.level.map.getBasicHSize() * 3 - else: - return self.sublevel.level.map.getBasicHSize() * 2 - - def getElement(self,base): - scale = self.sublevel.level.map.getScale() - - y = base - self.sublevel.level.map.getBasicHSize() - height = self.sublevel.level.map.getBasicHSize() - objet = SVGdraw.group(self.name) - for (debut,fin) in self.limits: - x = debut * scale - width = (fin - debut + 1) * scale - part = SVGdraw.rect(x,y,width,height,fill=self.color) - objet.addElement(part) - - debut = self.limits[0][1] - for (fin,next) in self.limits[1:]: - debut*=scale - fin*=scale - path = SVGdraw.pathdata(debut,y + height / 2) - delta = height / 2 - if self.roundlink: - path.qbezier((debut+fin)/2, y - delta,fin,y + height / 2) - else: - if self.flatlink: - delta = - height / 2 - path.line((debut+fin)/2, y - delta) - path.line(fin,y + height / 2) - path = SVGdraw.path(path,fill="none",stroke=self.color) - objet.addElement(path) - debut = next - - objet.addElement(SVGdraw.description(self.name)) - - return objet - -class TagFeature(Feature): - - def __init__(self,name,begin,length,ratio,visiblename=False,color=0): - self.begin = begin - self.length = length - self.ratio = ratio - self.name = name - self.color = color - self.sublevel = None - self.visiblename=visiblename - - def getHeight(self): - if not self.sublevel: - raise AssertionError('Not affected Tag feature') - - return self.sublevel.level.map.getBasicHSize()*11 - - def getElement(self,base): - scale = self.sublevel.level.map.getScale() - height = math.floor(max(1,self.sublevel.level.map.getBasicHSize()* 10 * self.ratio)) - y = base + self.sublevel.level.map.getBasicHSize() - height - x = self.begin * scale - width = self.length * scale - objet = SVGdraw.rect(x,y,width,height,stroke=self.color) - objet.addElement(SVGdraw.description(self.name)) - - return objet - -if __name__ == '__main__': - carte = Map('essai',20000,scale=0.5) - carte[-1][0].add(SimpleFeature('toto',100,300)) - carte[1][0].add(SimpleFeature('toto',100,300)) - carte[1][1].add(SimpleFeature('toto',200,1000)) - - carte[1][0].add(MultiPartFeature('bout',(1400,1450),(1470,1550),(1650,1800),color='red',flatlink=True)) - carte[1][0].add(MultiPartFeature('titi',(400,450),(470,550),(650,800),color='red',flatlink=True)) - carte[-1][1].add(MultiPartFeature('titi',(400,450),(470,550),(650,800),color='green')) - carte[-1][2].add(MultiPartFeature('titi',(400,450),(470,550),(650,800),color='purple',roundlink=True)) - - carte[-1][1].add(BoxFeature('tutu',390,810,color='purple')) - carte[1][0].add(BoxFeature('tutu',390,810,color='red')) - carte[2][0].add(TagFeature('t1',1400,20,0.8)) - carte[2][0].add(TagFeature('t2',1600,20,0.2)) - carte.basicHSize=6 - print carte.toXML('truc.svg',begin=0,end=1000) - print carte.toXML('truc2.svg',begin=460,end=2000) - - - diff --git a/obitools/decorator.py b/obitools/decorator.py deleted file mode 100644 index e69de29..0000000 diff --git a/obitools/distances/__init__.py b/obitools/distances/__init__.py deleted file mode 100644 index 1542fa9..0000000 --- a/obitools/distances/__init__.py +++ /dev/null @@ -1,29 +0,0 @@ -class DistanceMatrix(object): - - def __init__(self,alignment): - ''' - DistanceMatrix constructor. - - @param alignment: aligment used to compute distance matrix - @type alignment: obitools.align.Alignment - ''' - self.aligment = alignment - self.matrix = [[None] * (x+1) for x in xrange(len(alignment))] - - def evaluateDist(self,x,y): - raise NotImplementedError - - def __getitem__(self,key): - assert isinstance(key,(tuple,list)) and len(key)==2, \ - 'key must be a tuple or a list of two integers' - x,y = key - if y < x: - z=x - x=y - y=z - rep = self.matrix[y][x] - if rep is None: - rep = self.evaluateDist(x,y) - self.matrix[y][x] = rep - - return rep \ No newline at end of file diff --git a/obitools/distances/observed.py b/obitools/distances/observed.py deleted file mode 100644 index 8828d92..0000000 --- a/obitools/distances/observed.py +++ /dev/null @@ -1,77 +0,0 @@ -''' -Module dedicated to compute observed divergeances from -an alignment. No distance correction is applied at all -''' - -from itertools import imap - -from obitools.distances import DistanceMatrix - -class PairewiseGapRemoval(DistanceMatrix): - ''' - Observed divergeance matrix from an alignment. - Gap are removed from the alignemt on a pairewise - sequence base - ''' - - def evaluateDist(self,x,y): - ''' - Compute the observed divergeance from two sequences - of an aligment. - - @attention: For performance purpose this method should - be directly used. use instead the __getitem__ - method from DistanceMatrix. - - @see: L{__getitem__} - - @param x: number of the fisrt sequence in the aligment - @type x: int - @param y: umber of the second sequence in the aligment - @type y: int - - - ''' - - seq1 = self.aligment[x] - seq2 = self.aligment[y] - - diff,tot = reduce(lambda x,y: (x[0]+y,x[1]+1), - (z[0]!=z[1] for z in imap(None,seq1,seq2) - if '-' not in z),(0,0)) - return float(diff)/tot - - -class Pairewise(DistanceMatrix): - ''' - Observed divergeance matrix from an alignment. - Gap are kept from the alignemt - ''' - - def evaluateDist(self,x,y): - ''' - Compute the observed divergeance from two sequences - of an aligment. - - @attention: For performance purpose this method should - be directly used. use instead the __getitem__ - method from DistanceMatrix. - - @see: L{__getitem__} - - @param x: number of the fisrt sequence in the aligment - @type x: int - @param y: umber of the second sequence in the aligment - @type y: int - - - ''' - - seq1 = self.aligment[x] - seq2 = self.aligment[y] - - diff,tot = reduce(lambda x,y: (x[0]+y,x[1]+1), - (z[0]!=z[1] for z in imap(None,seq1,seq2)), - (0,0)) - return float(diff)/tot - \ No newline at end of file diff --git a/obitools/distances/phylip.py b/obitools/distances/phylip.py deleted file mode 100644 index e2043fa..0000000 --- a/obitools/distances/phylip.py +++ /dev/null @@ -1,35 +0,0 @@ -import sys - -from itertools import imap,count - -def writePhylipMatrix(matrix): - names = [x.id for x in matrix.aligment] - pnames= [x[:10] for x in names] - unicity={} - redundent=[] - for n in pnames: - unicity[n]=unicity.get(n,0)+1 - redundent.append(unicity[n]) - - for i,n,r in imap(None,count(),pnames,redundent): - alternate = n - if r > 1: - while alternate in pnames: - lcut = 9 - len(str(r)) - alternate = n[:lcut]+ '_%d' % r - r+=1 - pnames[i]='%-10s' % alternate - - firstline = '%5d' % len(matrix.aligment) - rep = [firstline] - for i,n in imap(None,count(),pnames): - line = [n] - for j in xrange(i): - line.append('%5.4f' % matrix[(j,i)]) - rep.append(' '.join(line)) - return '\n'.join(rep) - - - - - \ No newline at end of file diff --git a/obitools/distances/r.py b/obitools/distances/r.py deleted file mode 100644 index f674a4c..0000000 --- a/obitools/distances/r.py +++ /dev/null @@ -1,25 +0,0 @@ -import sys - -from itertools import imap,count - -def writeRMatrix(matrix): - names = [x.id for x in matrix.aligment] - lmax = max(max(len(x) for x in names),5) - lali = len(matrix.aligment) - - nformat = '%%-%ds' % lmax - dformat = '%%%d.4f' % lmax - - pnames=[nformat % x for x in names] - - rep = [' '.join(pnames)] - - for i in xrange(lali): - line=[] - for j in xrange(lali): - line.append('%5.4f' % matrix[(j,i)]) - rep.append(' '.join(line)) - return '\n'.join(rep) - - - \ No newline at end of file diff --git a/obitools/dnahash/__init__.py b/obitools/dnahash/__init__.py deleted file mode 100644 index ca02e35..0000000 --- a/obitools/dnahash/__init__.py +++ /dev/null @@ -1,100 +0,0 @@ -_A=[0] -_C=[1] -_G=[2] -_T=[3] -_R= _A + _G -_Y= _C + _T -_M= _C + _A -_K= _T + _G -_W= _T + _A -_S= _C + _G -_B= _C + _G + _T -_D= _A + _G + _T -_H= _A + _C + _T -_V= _A + _C + _G -_N= _A + _C + _G + _T - -_dnahash={'a':_A, - 'c':_C, - 'g':_G, - 't':_T, - 'r':_R, - 'y':_Y, - 'm':_M, - 'k':_K, - 'w':_W, - 's':_S, - 'b':_B, - 'd':_D, - 'h':_H, - 'v':_V, - 'n':_N, - } - -def hashCodeIterator(sequence,wsize,degeneratemax=0,offset=0): - errors = 0 - emask = [0] * wsize - epointer = 0 - size = 0 - position = offset - hashs = set([0]) - hashmask = 0 - for i in xrange(wsize): - hashmask <<= 2 - hashmask +=3 - - for l in sequence: - l = l.lower() - hl = _dnahash[l] - - if emask[epointer]: - errors-=1 - emask[epointer]=0 - - if len(hl) > 1: - errors +=1 - emask[epointer]=1 - - epointer+=1 - epointer%=wsize - - if errors > degeneratemax: - hl=set([hl[0]]) - - hashs=set((((hc<<2) | cl) & hashmask) - for hc in hashs - for cl in hl) - - if size < wsize: - size+=1 - - if size==wsize: - if errors <= degeneratemax: - yield (position,hashs,errors) - position+=1 - -def hashSequence(sequence,wsize,degeneratemax=0,offset=0,hashs=None): - if hashs is None: - hashs=[[] for x in xrange(4**wsize)] - - for pos,keys,errors in hashCodeIterator(sequence, wsize, degeneratemax, offset): - for k in keys: - hashs[k].append(pos) - - return hashs - -def hashSequences(sequences,wsize,maxpos,degeneratemax=0): - hashs=None - offsets=[] - offset=0 - for s in sequences: - offsets.append(offset) - hashSequence(s,wsize,degeneratemax=degeneratemax,offset=offset,hashs=hashs) - offset+=len(s) - - return hashs,offsets - - - - - \ No newline at end of file diff --git a/obitools/ecobarcode/__init__.py b/obitools/ecobarcode/__init__.py deleted file mode 100644 index e69de29..0000000 diff --git a/obitools/ecobarcode/databases.py b/obitools/ecobarcode/databases.py deleted file mode 100644 index 70d2319..0000000 --- a/obitools/ecobarcode/databases.py +++ /dev/null @@ -1,32 +0,0 @@ -''' -Created on 25 sept. 2010 - -@author: coissac -''' -from obitools import NucSequence - -def referenceDBIterator(options): - - cursor = options.ecobarcodedb.cursor() - - cursor.execute("select id from databases.database where name='%s'" % options.database) - options.dbid = cursor.fetchone()[0] - - cursor.execute(''' - select s.accession,r.id,r.taxid,r.sequence - from databases.database d, - databases.reference r, - databases.relatedsequences s - where r.database = d.id - and s.reference= r.id - and s.mainac - and d.name = '%s' - ''' % options.database - ) - - for ac,id,taxid,sequence in cursor: - s = NucSequence(ac,sequence) - s['taxid']=taxid - s['refdbid']=id - yield s - \ No newline at end of file diff --git a/obitools/ecobarcode/ecotag.py b/obitools/ecobarcode/ecotag.py deleted file mode 100644 index 2ebd3fb..0000000 --- a/obitools/ecobarcode/ecotag.py +++ /dev/null @@ -1,50 +0,0 @@ -''' -Created on 25 sept. 2010 - -@author: coissac -''' - -def alreadyIdentified(seqid,options): - cursor = options.ecobarcodedb.cursor() - cursor.execute(''' - select count(*) - from ecotag.identification - where sequence=%s - and database=%s - ''',(int(seqid),int(options.dbid))) - - return int(cursor.fetchone()[0]) > 0; - -def storeIdentification(seqid, - idstatus,taxid, - matches, - options - ): - - cursor = options.ecobarcodedb.cursor() - - if not options.updatedb: - cursor.execute(''' - delete from ecotag.identification where sequence=%s and database=%s - ''',(int(seqid),int(options.dbid))) - - cursor.execute(''' - insert into ecotag.identification (sequence,database,idstatus,taxid) - values (%s,%s,%s,%s) - returning id - ''' , (int(seqid),int(options.dbid),idstatus,int(taxid))) - - idid = cursor.fetchone()[0] - - for seq,identity in matches.iteritems(): - cursor.execute(''' - insert into ecotag.evidence (identification,reference,identity) - values (%s, - %s, - %s) - ''',(idid,seq,identity)) - - - cursor.close() - - options.ecobarcodedb.commit() diff --git a/obitools/ecobarcode/options.py b/obitools/ecobarcode/options.py deleted file mode 100644 index 6086423..0000000 --- a/obitools/ecobarcode/options.py +++ /dev/null @@ -1,64 +0,0 @@ -''' -Created on 23 sept. 2010 - -@author: coissac -''' -import psycopg2 - -from obitools.ecobarcode.taxonomy import EcoTaxonomyDB - -def addEcoBarcodeDBOption(optionManager): - optionManager.add_option('--dbname', - action="store", dest="ecobarcodedb", - type='str', - default=None, - help="Specify the name of the ecobarcode database") - - optionManager.add_option('--server', - action="store", dest="dbserver", - type='str', - default="localhost", - help="Specify the adress of the ecobarcode database server") - - optionManager.add_option('--user', - action="store", dest="dbuser", - type='str', - default='postgres', - help="Specify the user of the ecobarcode database") - - optionManager.add_option('--port', - action="store", dest="dbport", - type='str', - default=5432, - help="Specify the port of the ecobarcode database") - - optionManager.add_option('--passwd', - action="store", dest="dbpasswd", - type='str', - default='', - help="Specify the passwd of the ecobarcode database") - - optionManager.add_option('--primer', - action="store", dest="primer", - type='str', - default=None, - help="Specify the primer used for amplification") - - -def ecobarcodeDatabaseConnection(options): - if options.ecobarcodedb is not None: - connection = psycopg2.connect(database=options.ecobarcodedb, - user=options.dbuser, - password=options.dbpasswd, - host=options.dbserver, - port=options.dbport) - options.dbname=options.ecobarcodedb - else: - connection=None - if connection is not None: - options.ecobarcodedb=connection - taxonomy = EcoTaxonomyDB(connection) - else: - taxonomy=None - return taxonomy - diff --git a/obitools/ecobarcode/rawdata.py b/obitools/ecobarcode/rawdata.py deleted file mode 100644 index a5f58cf..0000000 --- a/obitools/ecobarcode/rawdata.py +++ /dev/null @@ -1,38 +0,0 @@ -''' -Created on 25 sept. 2010 - -@author: coissac -''' - -from obitools import NucSequence -from obitools.utils import progressBar -from obitools.ecobarcode.ecotag import alreadyIdentified - -import sys - -def sequenceIterator(options): - cursor = options.ecobarcodedb.cursor() - - cursor.execute(''' - select s.id,sum(o.count),s.sequence - from rawdata.sequence s, - rawdata.occurrences o - where o.sequence= s.id - and s.primers = '%s' - group by s.id,s.sequence - ''' % options.primer - ) - - nbseq = cursor.rowcount - progressBar(1, nbseq, True, head=options.dbname) - for id,count,sequence in cursor: - progressBar(cursor.rownumber+1, nbseq, head=options.dbname) - if not options.updatedb or not alreadyIdentified(id,options): - s = NucSequence(id,sequence) - s['count']=count - print >>sys.stderr,' +', cursor.rownumber+1, - yield s - else: - print >>sys.stderr,' @', cursor.rownumber+1, - - print >>sys.stderr diff --git a/obitools/ecobarcode/taxonomy.py b/obitools/ecobarcode/taxonomy.py deleted file mode 100644 index c7d0185..0000000 --- a/obitools/ecobarcode/taxonomy.py +++ /dev/null @@ -1,120 +0,0 @@ -''' -Created on 24 sept. 2010 - -@author: coissac -''' - -from obitools.ecopcr.taxonomy import TaxonomyDump -from obitools.ecopcr.taxonomy import Taxonomy -import sys - -class EcoTaxonomyDB(TaxonomyDump) : - - def __init__(self,dbconnect): - self._dbconnect=dbconnect - - print >> sys.stderr,"Reading ecobarcode taxonomy database..." - - self._readNodeTable() - print >> sys.stderr," ok" - - print >>sys.stderr,"Adding scientific name..." - - self._name=[] - for taxid,name,classname in self._nameIterator(): - self._name.append((name,classname,self._index[taxid])) - if classname == 'scientific name': - self._taxonomy[self._index[taxid]].append(name) - - print >>sys.stderr,"Adding taxid alias..." - for taxid,current in self._mergedNodeIterator(): - self._index[taxid]=self._index[current] - - print >>sys.stderr,"Adding deleted taxid..." - for taxid in self._deletedNodeIterator(): - self._index[taxid]=None - - - Taxonomy.__init__(self) - - ##### - # - # Iterator functions - # - ##### - - def _readNodeTable(self): - - cursor = self._dbconnect.cursor() - - cursor.execute(""" - select taxid,rank,parent - from ncbitaxonomy.nodes - """) - - print >>sys.stderr,"Reading taxonomy nodes..." - taxonomy=[list(n) for n in cursor] - - print >>sys.stderr,"List all taxonomy rank..." - ranks =list(set(x[1] for x in taxonomy)) - ranks.sort() - rankidx = dict(map(None,ranks,xrange(len(ranks)))) - - print >>sys.stderr,"Sorting taxons..." - taxonomy.sort(TaxonomyDump._taxonCmp) - - self._taxonomy=taxonomy - - print >>sys.stderr,"Indexing taxonomy..." - index = {} - for t in self._taxonomy: - index[t[0]]=self._bsearchTaxon(t[0]) - - print >>sys.stderr,"Indexing parent and rank..." - for t in self._taxonomy: - t[1]=rankidx[t[1]] - t[2]=index[t[2]] - - self._ranks=ranks - self._index=index - - cursor.close() - - def _nameIterator(self): - cursor = self._dbconnect.cursor() - - cursor.execute(""" - select taxid,name,nameclass - from ncbitaxonomy.names - """) - - for taxid,name,nameclass in cursor: - yield taxid,name,nameclass - - cursor.close() - - def _mergedNodeIterator(self): - cursor = self._dbconnect.cursor() - - cursor.execute(""" - select oldtaxid,newtaxid - from ncbitaxonomy.merged - """) - - for oldtaxid,newtaxid in cursor: - yield oldtaxid,newtaxid - - cursor.close() - - def _deletedNodeIterator(self): - cursor = self._dbconnect.cursor() - - cursor.execute(""" - select taxid - from ncbitaxonomy.delnodes - """) - - for taxid in cursor: - yield taxid[0] - - cursor.close() diff --git a/obitools/ecopcr/__init__.py b/obitools/ecopcr/__init__.py deleted file mode 100644 index 10a90e5..0000000 --- a/obitools/ecopcr/__init__.py +++ /dev/null @@ -1,69 +0,0 @@ -from obitools import utils -from obitools import NucSequence -from obitools.utils import universalOpen, universalTell, fileSize, progressBar -import struct -import sys - - -class EcoPCRFile(utils.ColumnFile): - def __init__(self,stream): - utils.ColumnFile.__init__(self, - stream, '|', True, - (str,int,int, - str,int,str, - int,str,int, - str,int,str, - str,str,int,float, - str,int,float, - int, - str,str), "#") - - - def next(self): - data = utils.ColumnFile.next(self) - seq = NucSequence(data[0],data[20],data[21]) - seq['seq_length_ori']=data[1] - seq['taxid']=data[2] - seq['rank']=data[3] - seq['species']=data[4] - seq['species_sn']=data[5] - seq['genus']=data[6] - seq['genus_sn']=data[7] - seq['family']=data[8] - seq['family_sn']=data[9] - seq['strand']=data[12] - seq['forward_primer']=data[13] - seq['forward_error']=data[14] - seq['forward_tm']=data[15] - seq['reverse_primer']=data[16] - seq['reverse_error']=data[17] - seq['reverse_tm']=data[18] - - return seq - - - -class EcoPCRDBFile(object): - - def _ecoRecordIterator(self,file): - file = universalOpen(file) - (recordCount,) = struct.unpack('> I',file.read(4)) - self._recover=False - - if recordCount: - for i in xrange(recordCount): - (recordSize,)=struct.unpack('>I',file.read(4)) - record = file.read(recordSize) - yield record - else: - print >> sys.stderr,"\n\n WARNING : EcoPCRDB readding set into recover data mode\n" - self._recover=True - ok=True - while(ok): - try: - (recordSize,)=struct.unpack('>I',file.read(4)) - record = file.read(recordSize) - yield record - except: - ok=False - \ No newline at end of file diff --git a/obitools/ecopcr/annotation.py b/obitools/ecopcr/annotation.py deleted file mode 100644 index 7c76fb2..0000000 --- a/obitools/ecopcr/annotation.py +++ /dev/null @@ -1,104 +0,0 @@ -import struct - -class EcoPCRDBAnnotationWriter(object): - ''' - Class used to write Annotation description in EcoPCRDB format. - - EcoPCRDBAnnotationWriter is oftenly called through the EcoPCRDBSequenceWriter class - - @see: L{ecopcr.sequence.EcoPCRDBSequenceWriter} - ''' - - def __init__(self,dbname,id,fileidx=1,type=('CDS'),definition=None): - ''' - class constructor - - @param dbname: name of ecoPCR database - @type dbname: C{str} - @param id: name of the qualifier used as feature id - @type id: C{str} - @param fileidx: - @type fileidx: C{int} - @param type: - @type type: C{list} or C{tuple} - @param definition: - @type definition: C{str} - ''' - self._type = type - self._definition = definition - self._id = id - self._filename="%s_%03d.adx" % (dbname,fileidx) - self._file = open(self._filename,'wb') - self._sequenceIdx=0 - - - ftname ="%s.fdx" % (dbname) - ft = open(ftname,'wb') - - self._fttypeidx=dict(map(None,type,xrange(len(type)))) - - ft.write(struct.pack('> I',len(type))) - - for t in type: - ft.write(self._ecoFtTypePacker(t)) - - ft.close() - - self._annotationCount=0 - self._file.write(struct.pack('> I',self._annotationCount)) - - - def _ecoFtTypePacker(self,type): - totalSize = len(type) - packed = struct.pack('> I %ds' % totalSize,totalSize,type) - - assert len(packed) == totalSize+4, "error in feature type packing" - - return packed - - def _ecoAnnotationPacker(self,feature,seqidx): - begin = feature.begin-1 - end = feature.end - type = self._fttypeidx[feature.ftType] - strand = feature.isDirect() - id = feature[self._id][0] - if self._definition in feature: - definition = feature[self._definition][0] - else: - definition = '' - - assert strand is not None,"Only strand defined features can be stored" - - deflength = len(definition) - - totalSize = 4 + 4 + 4 + 4 + 4 + 20 + 4 + deflength - - packed = struct.pack('> I I I I I 20s I %ds' % (deflength), - totalSize, - seqidx, - begin, - end, - type, - int(strand), - id, - deflength, - definition) - - assert len(packed) == totalSize+4, "error in annotation packing" - - return packed - - - def put(self,sequence,seqidx=None): - if seqidx is None: - seqidx = self._sequenceIdx - self._sequenceIdx+=1 - for feature in sequence.getFeatureTable(): - if feature.ftType in self._type: - self._annotationCount+=1 - self._file.write(self._ecoAnnotationPacker(feature,seqidx)) - - def __del__(self): - self._file.seek(0,0) - self._file.write(struct.pack('> I',self._annotationCount)) - self._file.close() diff --git a/obitools/ecopcr/options.py b/obitools/ecopcr/options.py deleted file mode 100644 index 03663cd..0000000 --- a/obitools/ecopcr/options.py +++ /dev/null @@ -1,129 +0,0 @@ -''' -Created on 13 fevr. 2011 - -@author: coissac -''' - -from obitools.ecopcr.taxonomy import Taxonomy, EcoTaxonomyDB, TaxonomyDump, ecoTaxonomyWriter - -try: - from obitools.ecobarcode.options import addEcoBarcodeDBOption,ecobarcodeDatabaseConnection -except ImportError: - def addEcoBarcodeDBOption(optionmanager): - pass - def ecobarcodeDatabaseConnection(options): - return None - -def addTaxonomyDBOptions(optionManager): - addEcoBarcodeDBOption(optionManager) - optionManager.add_option('-d','--database', - action="store", dest="taxonomy", - metavar="", - type="string", - help="ecoPCR taxonomy Database " - "name") - optionManager.add_option('-t','--taxonomy-dump', - action="store", dest="taxdump", - metavar="", - type="string", - help="NCBI Taxonomy dump repository " - "name") - - -def addTaxonomyFilterOptions(optionManager): - addTaxonomyDBOptions(optionManager) - optionManager.add_option('--require-rank', - action="append", - dest='requiredRank', - metavar="", - type="string", - default=[], - help="select sequence with taxid tag containing " - "a parent of rank ") - - optionManager.add_option('-r','--required', - action="append", - dest='required', - metavar="", - type="int", - default=[], - help="required taxid") - - optionManager.add_option('-i','--ignore', - action="append", - dest='ignored', - metavar="", - type="int", - default=[], - help="ignored taxid") - -def loadTaxonomyDatabase(options): - if isinstance(options.taxonomy, Taxonomy): - return options.taxonomy - taxonomy = ecobarcodeDatabaseConnection(options) - if (taxonomy is not None or - options.taxonomy is not None or - options.taxdump is not None): - if options.taxdump is not None: - taxonomy = TaxonomyDump(options.taxdump) - if taxonomy is not None and isinstance(options.taxonomy, str): - ecoTaxonomyWriter(options.taxonomy,taxonomy) - options.ecodb=options.taxonomy - if isinstance(options.taxonomy, Taxonomy): - taxonomy = options.taxonomy - if taxonomy is None and isinstance(options.taxonomy, str): - taxonomy = EcoTaxonomyDB(options.taxonomy) - options.ecodb=options.taxonomy - options.taxonomy=taxonomy - return options.taxonomy - -def taxonomyFilterGenerator(options): - loadTaxonomyDatabase(options) - if options.taxonomy is not None: - taxonomy=options.taxonomy - def taxonomyFilter(seq): - def annotateAtRank(seq,rank): - if 'taxid' in seq and seq['taxid'] is not None: - rtaxid= taxonomy.getTaxonAtRank(seq['taxid'],rank) - return rtaxid - return None - good = True - if 'taxid' in seq: - taxid = seq['taxid'] -# print taxid, - if options.requiredRank: - taxonatrank = reduce(lambda x,y: x and y, - (annotateAtRank(seq,rank) is not None - for rank in options.requiredRank),True) - good = good and taxonatrank -# print >>sys.stderr, " Has rank : ",good, - if options.required: - good = good and reduce(lambda x,y: x or y, - (taxonomy.isAncestor(r,taxid) for r in options.required), - False) -# print " Required : ",good, - if options.ignored: - good = good and not reduce(lambda x,y: x or y, - (taxonomy.isAncestor(r,taxid) for r in options.ignored), - False) -# print " Ignored : ",good, -# print " Global : ",good - - return good - - - else: - def taxonomyFilter(seq): - return True - - return taxonomyFilter - -def taxonomyFilterIteratorGenerator(options): - taxonomyFilter = taxonomyFilterGenerator(options) - - def filterIterator(seqiterator): - for seq in seqiterator: - if taxonomyFilter(seq): - yield seq - - return filterIterator \ No newline at end of file diff --git a/obitools/ecopcr/sequence.py b/obitools/ecopcr/sequence.py deleted file mode 100644 index 1465e69..0000000 --- a/obitools/ecopcr/sequence.py +++ /dev/null @@ -1,133 +0,0 @@ -from obitools import NucSequence -from obitools.ecopcr import EcoPCRDBFile -from obitools.ecopcr.taxonomy import EcoTaxonomyDB, ecoTaxonomyWriter -from obitools.ecopcr.annotation import EcoPCRDBAnnotationWriter -from obitools.utils import universalOpen -from glob import glob -import struct -import gzip -import sys - - -class EcoPCRDBSequenceIterator(EcoPCRDBFile): - ''' - Build an iterator over the sequences include in a sequence database - formated for ecoPCR - ''' - - def __init__(self,path,taxonomy=None): - ''' - ecoPCR data iterator constructor - - @param path: path to the ecoPCR database including the database prefix name - @type path: C{str} - @param taxonomy: a taxonomy can be given to the reader to decode the taxonomic data - associated to the sequences. If no Taxonomy is furnish, it will be read - before the sequence database files using the same path. - @type taxonomy: L{obitools.ecopcr.taxonomy.Taxonomy} - ''' - self._path = path - - if taxonomy is not None: - self._taxonomy=taxonomy - else: - self._taxonomy=EcoTaxonomyDB(path) - - self._seqfilesFiles = glob('%s_???.sdx' % self._path) - self._seqfilesFiles.sort() - - def __ecoSequenceIterator(self,file): - for record in self._ecoRecordIterator(file): - lrecord = len(record) - lnames = lrecord - (4*4+20) - (taxid,seqid,deflength,seqlength,cptseqlength,string)=struct.unpack('> I 20s I I I %ds' % lnames, record) - seqid=seqid.strip('\x00') - de = string[:deflength] - seq = gzip.zlib.decompress(string[deflength:]) - bioseq = NucSequence(seqid,seq,de,taxidx=taxid,taxid=self._taxonomy._taxonomy[taxid][0]) - yield bioseq - - def __iter__(self): - for seqfile in self._seqfilesFiles: - for seq in self.__ecoSequenceIterator(seqfile): - yield seq - -class EcoPCRDBSequenceWriter(object): - - def __init__(self,dbname,fileidx=1,taxonomy=None,ftid=None,type=None,definition=None,append=False): - self._taxonomy=taxonomy - self._filename="%s_%03d.sdx" % (dbname,fileidx) - if append: - mode ='r+b' - f = universalOpen(self._filename) - (recordCount,) = struct.unpack('> I',f.read(4)) - self._sequenceCount=recordCount - del f - self._file = open(self._filename,mode) - self._file.seek(0,0) - self._file.write(struct.pack('> I',0)) - self._file.seek(0,2) - else: - self._sequenceCount=0 - mode = 'wb' - self._file = open(self._filename,mode) - self._file.write(struct.pack('> I',self._sequenceCount)) - - if self._taxonomy is not None: - print >> sys.stderr,"Writing the taxonomy file...", - ecoTaxonomyWriter(dbname,self._taxonomy) - print >> sys.stderr,"Ok" - - if type is not None: - assert ftid is not None,"You must specify an id attribute for features" - self._annotation = EcoPCRDBAnnotationWriter(dbname, ftid, fileidx, type, definition) - else: - self._annotation = None - - def _ecoSeqPacker(self,seq): - - compactseq = gzip.zlib.compress(str(seq).upper(),9) - cptseqlength = len(compactseq) - delength = len(seq.definition) - - totalSize = 4 + 20 + 4 + 4 + 4 + cptseqlength + delength - - if self._taxonomy is None or 'taxid' not in seq: - taxon=-1 - else: - taxon=self._taxonomy.findIndex(seq['taxid']) - - try: - packed = struct.pack('> I i 20s I I I %ds %ds' % (delength,cptseqlength), - totalSize, - taxon, - seq.id, - delength, - len(seq), - cptseqlength, - seq.definition, - compactseq) - except struct.error as e: - print >>sys.stderr,"\n\n============\n\nError on sequence : %s\n\n" % seq.id - raise e - - assert len(packed) == totalSize+4, "error in sequence packing" - - return packed - - - def put(self,sequence): - if self._taxonomy is not None: - if 'taxid' not in sequence and hasattr(sequence, 'extractTaxon'): - sequence.extractTaxon() - self._file.write(self._ecoSeqPacker(sequence)) - if self._annotation is not None: - self._annotation.put(sequence, self._sequenceCount) - self._sequenceCount+=1 - - def __del__(self): - self._file.seek(0,0) - self._file.write(struct.pack('> I',self._sequenceCount)) - self._file.close() - - diff --git a/obitools/ecopcr/taxonomy.py b/obitools/ecopcr/taxonomy.py deleted file mode 100644 index bb2ec4e..0000000 --- a/obitools/ecopcr/taxonomy.py +++ /dev/null @@ -1,630 +0,0 @@ -import struct -import sys - -from itertools import count,imap - -from obitools.ecopcr import EcoPCRDBFile -from obitools.utils import universalOpen -from obitools.utils import ColumnFile - -class Taxonomy(object): - def __init__(self): - ''' - The taxonomy database constructor - - @param path: path to the ecoPCR database including the database prefix name - @type path: C{str} - ''' - - self._ranks.append('obi') - - self._speciesidx = self._ranks.index('species') - self._genusidx = self._ranks.index('genus') - self._familyidx = self._ranks.index('family') - self._orderidx = self._ranks.index('order') - self._nameidx=dict((x[0],x[2]) for x in self._name) - self._nameidx.update(dict((x[0],x[2]) for x in self._preferedName)) - self._preferedidx=dict((x[2],x[1]) for x in self._preferedName) - - self._bigestTaxid = max(x[0] for x in self._taxonomy) - - - def findTaxonByIdx(self,idx): - if idx is None: - return None - return self._taxonomy[idx] - - def findIndex(self,taxid): - if taxid is None: - return None - return self._index[taxid] - - def findTaxonByTaxid(self,taxid): - return self.findTaxonByIdx(self.findIndex(taxid)) - - def findTaxonByName(self,name): - return self._taxonomy[self._nameidx[name]] - - def findRankByName(self,rank): - try: - return self._ranks.index(rank) - except ValueError: - return None - - def __contains__(self,taxid): - return self.findTaxonByTaxid(taxid) is not None - - - - - ##### - # - # PUBLIC METHODS - # - ##### - - - def subTreeIterator(self, taxid): - "return subtree for given taxonomic id " - idx = self.findTaxonByTaxid(taxid) - yield self._taxonomy[idx] - for t in self._taxonomy: - if t[2] == idx: - for subt in self.subTreeIterator(t[0]): - yield subt - - def parentalTreeIterator(self, taxid): - """ - return parental tree for given taxonomic id starting from - first ancester to the root. - """ - taxon=self.findTaxonByTaxid(taxid) - if taxon is not None: - while taxon[2]!= 0: - yield taxon - taxon = self._taxonomy[taxon[2]] - yield self._taxonomy[0] - else: - raise StopIteration - - def isAncestor(self,parent,taxid): - return parent in [x[0] for x in self.parentalTreeIterator(taxid)] - - def lastCommonTaxon(self,*taxids): - if not taxids: - return None - if len(taxids)==1: - return taxids[0] - - if len(taxids)==2: - t1 = [x[0] for x in self.parentalTreeIterator(taxids[0])] - t2 = [x[0] for x in self.parentalTreeIterator(taxids[1])] - t1.reverse() - t2.reverse() - - count = min(len(t1),len(t2)) - i=0 - while(i < count and t1[i]==t2[i]): - i+=1 - i-=1 - - return t1[i] - - ancetre = taxids[0] - for taxon in taxids[1:]: - ancetre = self.lastCommonTaxon(ancetre,taxon) - - return ancetre - - def betterCommonTaxon(self,error=1,*taxids): - lca = self.lastCommonTaxon(*taxids) - idx = self._index[lca] - sublca = [t[0] for t in self._taxonomy if t[2]==idx] - return sublca - - - def getPreferedName(self,taxid): - idx = self.findIndex(taxid) - return self._preferedidx.get(idx,self._taxonomy[idx][3]) - - - def getScientificName(self,taxid): - return self.findTaxonByTaxid(taxid)[3] - - def getRankId(self,taxid): - return self.findTaxonByTaxid(taxid)[1] - - def getRank(self,taxid): - return self._ranks[self.getRankId(taxid)] - - def getTaxonAtRank(self,taxid,rankid): - if isinstance(rankid, str): - rankid=self._ranks.index(rankid) - try: - return [x[0] for x in self.parentalTreeIterator(taxid) - if x[1]==rankid][0] - except IndexError: - return None - - def getSpecies(self,taxid): - return self.getTaxonAtRank(taxid, self._speciesidx) - - def getGenus(self,taxid): - return self.getTaxonAtRank(taxid, self._genusidx) - - def getFamily(self,taxid): - return self.getTaxonAtRank(taxid, self._familyidx) - - def getOrder(self,taxid): - return self.getTaxonAtRank(taxid, self._orderidx) - - def rankIterator(self): - for x in imap(None,self._ranks,xrange(len(self._ranks))): - yield x - - def groupTaxa(self,taxa,groupname): - t=[self.findTaxonByTaxid(x) for x in taxa] - a=set(x[2] for x in t) - assert len(a)==1,"All taxa must have the same parent" - newtaxid=max([2999999]+[x[0] for x in self._taxonomy if x[0]>=3000000 and x[0]<4000000])+1 - newidx=len(self._taxonomy) - if 'GROUP' not in self._ranks: - self._ranks.append('GROUP') - rankid=self._ranks.index('GROUP') - self._taxonomy.append((newtaxid,rankid,a.pop(),groupname)) - for x in t: - x[2]=newidx - - def addLocalTaxon(self,name,rank,parent,minimaltaxid=10000000): - newtaxid = minimaltaxid if (self._bigestTaxid < minimaltaxid) else self._bigestTaxid+1 - - rankid=self.findRankByName(rank) - parentidx = self.findIndex(int(parent)) - tx = (newtaxid,rankid,parentidx,name,'local') - self._taxonomy.append(tx) - newidx=len(self._taxonomy)-1 - self._name.append((name,'scientific name',newidx)) - self._nameidx[name]=newidx - self._index[newtaxid]=newidx - - self._bigestTaxid=newtaxid - - return newtaxid - - def removeLocalTaxon(self,taxid): - raise NotImplemented - txidx = self.findIndex(taxid) - taxon = self.findTaxonByIdx(txidx) - - assert txidx >= self._localtaxon,"Only local taxon can be deleted" - - for t in self._taxonomy: - if t[2] == txidx: - self.removeLocalTaxon(t[0]) - - - - - return taxon - - def addPreferedName(self,taxid,name): - idx = self.findIndex(taxid) - self._preferedName.append(name,'obi',idx) - self._preferedidx[idx]=name - return taxid - -class EcoTaxonomyDB(Taxonomy,EcoPCRDBFile): - ''' - A taxonomy database class - ''' - - - def __init__(self,path): - ''' - The taxonomy database constructor - - @param path: path to the ecoPCR database including the database prefix name - @type path: C{str} - ''' - self._path = path - self._taxonFile = "%s.tdx" % self._path - self._localTaxonFile = "%s.ldx" % self._path - self._ranksFile = "%s.rdx" % self._path - self._namesFile = "%s.ndx" % self._path - self._preferedNamesFile = "%s.pdx" % self._path - self._aliasFile = "%s.adx" % self._path - - print >> sys.stderr,"Reading binary taxonomy database...", - - self.__readNodeTable() - - print >> sys.stderr," ok" - - Taxonomy.__init__(self) - - - ##### - # - # Iterator functions - # - ##### - - def __ecoNameIterator(self,file): - for record in self._ecoRecordIterator(file): - lrecord = len(record) - lnames = lrecord - 16 - (isScientificName,namelength,classLength,indextaxid,names)=struct.unpack('> I I I I %ds' % lnames, record) - name=names[:namelength] - classname=names[namelength:] - yield (name,classname,indextaxid) - - - def __ecoTaxonomicIterator(self): - for record in self._ecoRecordIterator(self._taxonFile): - lrecord = len(record) - lnames = lrecord - 16 - (taxid,rankid,parentidx,nameLength,name)=struct.unpack('> I I I I %ds' % lnames, record) - yield (taxid,rankid,parentidx,name,'ncbi') - - try : - lt=0 - for record in self._ecoRecordIterator(self._localTaxonFile): - lrecord = len(record) - lnames = lrecord - 16 - (taxid,rankid,parentidx,nameLength,name)=struct.unpack('> I I I I %ds' % lnames, record) - lt+=1 - yield (taxid,rankid,parentidx,name,'local') - print >> sys.stderr, " [INFO : Local taxon file found] : %d added taxa" % lt - except: - print >> sys.stderr, " [INFO : Local taxon file not found] " - - def __ecoRankIterator(self): - for record in self._ecoRecordIterator(self._ranksFile): - yield record - - def __ecoAliasIterator(self): - for record in self._ecoRecordIterator(self._aliasFile): - (taxid,index) = struct.unpack('> I i',record) - yield taxid,index - - ##### - # - # Indexes - # - ##### - - def __ecoNameIndex(self): - indexName = [x for x in self.__ecoNameIterator(self._namesFile)] - return indexName - - def __ecoRankIndex(self): - rank = [r for r in self.__ecoRankIterator()] - return rank - - def __ecoTaxonomyIndex(self): - taxonomy = [] - - try : - index = dict(self.__ecoAliasIterator()) - print >> sys.stderr, " [INFO : Taxon alias file found] " - buildIndex=False - except: - print >> sys.stderr, " [INFO : Taxon alias file not found] " - index={} - i = 0; - buildIndex=True - - localtaxon=0 - i=0 - for x in self.__ecoTaxonomicIterator(): - taxonomy.append(x) - if x[4]=='ncbi': - localtaxon+=1 - - if buildIndex or x[4]!='ncbi': - index[x[0]] = i - i+=1 - - - print >> sys.stderr,"Taxonomical tree read", - return taxonomy, index,localtaxon - - def __readNodeTable(self): - self._taxonomy, self._index, self._localtaxon= self.__ecoTaxonomyIndex() - self._ranks = self.__ecoRankIndex() - self._name = self.__ecoNameIndex() - - # Add local taxon tame to the name index - i=self._localtaxon - for t in self._taxonomy[self._localtaxon:]: - self._name.append((t[3],'scientific name',i)) - i+=1 - - try : - self._preferedName = [(x[0],'obi',x[2]) - for x in self.__ecoNameIterator(self._preferedNamesFile)] - print >> sys.stderr, " [INFO : Prefered taxon name file found] : %d added taxa" % len(self._preferedName) - except: - print >> sys.stderr, " [INFO : Prefered taxon name file not found]" - self._preferedName = [] - - - - -class TaxonomyDump(Taxonomy): - - def __init__(self,taxdir): - - self._path=taxdir - self._readNodeTable('%s/nodes.dmp' % taxdir) - - print >>sys.stderr,"Adding scientific name..." - - self._name=[] - for taxid,name,classname in self._nameIterator('%s/names.dmp' % taxdir): - self._name.append((name,classname,self._index[taxid])) - if classname == 'scientific name': - self._taxonomy[self._index[taxid]].extend([name,'ncbi']) - - print >>sys.stderr,"Adding taxid alias..." - for taxid,current in self._mergedNodeIterator('%s/merged.dmp' % taxdir): - self._index[taxid]=self._index[current] - - print >>sys.stderr,"Adding deleted taxid..." - for taxid in self._deletedNodeIterator('%s/delnodes.dmp' % taxdir): - self._index[taxid]=None - - self._nameidx=dict((x[0],x[2]) for x in self._name) - - - def _taxonCmp(t1,t2): - if t1[0] < t2[0]: - return -1 - elif t1[0] > t2[0]: - return +1 - return 0 - - _taxonCmp=staticmethod(_taxonCmp) - - def _bsearchTaxon(self,taxid): - taxCount = len(self._taxonomy) - begin = 0 - end = taxCount - oldcheck=taxCount - check = begin + end / 2 - while check != oldcheck and self._taxonomy[check][0]!=taxid : - if self._taxonomy[check][0] < taxid: - begin=check - else: - end=check - oldcheck=check - check = (begin + end) / 2 - - - if self._taxonomy[check][0]==taxid: - return check - else: - return None - - - - def _readNodeTable(self,file): - - file = universalOpen(file) - - nodes = ColumnFile(file, - sep='|', - types=(int,int,str, - str,str,bool, - int,bool,int, - bool,bool,bool,str)) - print >>sys.stderr,"Reading taxonomy dump file..." - # (taxid,rank,parent) - taxonomy=[[n[0],n[2],n[1]] for n in nodes] - print >>sys.stderr,"List all taxonomy rank..." - ranks =list(set(x[1] for x in taxonomy)) - ranks.sort() - rankidx = dict(map(None,ranks,xrange(len(ranks)))) - - print >>sys.stderr,"Sorting taxons..." - taxonomy.sort(TaxonomyDump._taxonCmp) - - self._taxonomy=taxonomy - self._localtaxon=len(taxonomy) - - print >>sys.stderr,"Indexing taxonomy..." - index = {} - for t in self._taxonomy: - index[t[0]]=self._bsearchTaxon(t[0]) - - print >>sys.stderr,"Indexing parent and rank..." - for t in self._taxonomy: - t[1]=rankidx[t[1]] - t[2]=index[t[2]] - - self._ranks=ranks - self._index=index - self._preferedName = [] - - def _nameIterator(self,file): - file = universalOpen(file) - names = ColumnFile(file, - sep='|', - types=(int,str, - str,str)) - for taxid,name,unique,classname,white in names: - yield taxid,name,classname - - def _mergedNodeIterator(self,file): - file = universalOpen(file) - merged = ColumnFile(file, - sep='|', - types=(int,int,str)) - for taxid,current,white in merged: - yield taxid,current - - def _deletedNodeIterator(self,file): - file = universalOpen(file) - deleted = ColumnFile(file, - sep='|', - types=(int,str)) - for taxid,white in deleted: - yield taxid - -##### -# -# -# Binary writer -# -# -##### - -def ecoTaxonomyWriter(prefix, taxonomy,onlyLocal=False): - - def ecoTaxPacker(tx): - - namelength = len(tx[3]) - - totalSize = 4 + 4 + 4 + 4 + namelength - - packed = struct.pack('> I I I I I %ds' % namelength, - totalSize, - tx[0], - tx[1], - tx[2], - namelength, - tx[3]) - - return packed - - def ecoRankPacker(rank): - - namelength = len(rank) - - packed = struct.pack('> I %ds' % namelength, - namelength, - rank) - - return packed - - def ecoAliasPacker(taxid,index): - - totalSize = 4 + 4 - try: - packed = struct.pack('> I I i', - totalSize, - taxid, - index) - except struct.error,e: - print >>sys.stderr,(totalSize,taxid,index) - print >>sys.stderr,"Total size : %d taxid : %d index : %d" %(totalSize,taxid,index) - raise e - - return packed - - def ecoNamePacker(name): - - namelength = len(name[0]) - classlength= len(name[1]) - totalSize = namelength + classlength + 4 + 4 + 4 + 4 - - packed = struct.pack('> I I I I I %ds %ds' % (namelength,classlength), - totalSize, - int(name[1]=='scientific name'), - namelength, - classlength, - name[2], - name[0], - name[1]) - - return packed - - - def ecoTaxWriter(file,taxonomy): - output = open(file,'wb') - nbtaxon = reduce(lambda x,y:x+y,(1 for t in taxonomy if t[4]=='ncbi'),0) - - output.write(struct.pack('> I',nbtaxon)) - - for tx in taxonomy: - if tx[4]=='ncbi': - output.write(ecoTaxPacker(tx)) - - output.close() - return nbtaxon < len(taxonomy) - - def ecoLocalTaxWriter(file,taxonomy): - nbtaxon = reduce(lambda x,y:x+y,(1 for t in taxonomy if t[4]!='ncbi'),0) - - if nbtaxon: - output = open(file,'wb') - - output.write(struct.pack('> I',nbtaxon)) - - for tx in taxonomy: - if tx[4]!='ncbi': - output.write(ecoTaxPacker(tx)) - - output.close() - - - def ecoRankWriter(file,ranks): - output = open(file,'wb') - output.write(struct.pack('> I',len(ranks))) - - for rank in ranks: - output.write(ecoRankPacker(rank)) - - output.close() - - def ecoAliasWriter(file,index): - output = open(file,'wb') - output.write(struct.pack('> I',len(index))) - - for taxid in index: - i=index[taxid] - if i is None: - i=-1 - output.write(ecoAliasPacker(taxid, i)) - - output.close() - - def nameCmp(n1,n2): - name1=n1[0].upper() - name2=n2[0].upper() - if name1 < name2: - return -1 - elif name1 > name2: - return 1 - return 0 - - - def ecoNameWriter(file,names): - output = open(file,'wb') - output.write(struct.pack('> I',len(names))) - - names.sort(nameCmp) - - for name in names: - output.write(ecoNamePacker(name)) - - output.close() - - def ecoPreferedNameWriter(file,names): - output = open(file,'wb') - output.write(struct.pack('> I',len(names))) - for name in names: - output.write(ecoNamePacker(name)) - - output.close() - - localtaxon=True - if not onlyLocal: - ecoRankWriter('%s.rdx' % prefix, taxonomy._ranks) - localtaxon = ecoTaxWriter('%s.tdx' % prefix, taxonomy._taxonomy) - ecoNameWriter('%s.ndx' % prefix, [x for x in taxonomy._name if x[2] < taxonomy._localtaxon]) - ecoAliasWriter('%s.adx' % prefix, taxonomy._index) - if localtaxon: - ecoLocalTaxWriter('%s.ldx' % prefix, taxonomy._taxonomy) - if taxonomy._preferedName: - ecoNameWriter('%s.pdx' % prefix, taxonomy._preferedName) diff --git a/obitools/ecotag/__init__.py b/obitools/ecotag/__init__.py deleted file mode 100644 index 26c94d3..0000000 --- a/obitools/ecotag/__init__.py +++ /dev/null @@ -1,2 +0,0 @@ -class EcoTagResult(dict): - pass \ No newline at end of file diff --git a/obitools/ecotag/parser.py b/obitools/ecotag/parser.py deleted file mode 100644 index f431e34..0000000 --- a/obitools/ecotag/parser.py +++ /dev/null @@ -1,150 +0,0 @@ -from itertools import imap -from obitools import utils - -from obitools.ecotag import EcoTagResult - -class EcoTagFileIterator(utils.ColumnFile): - - @staticmethod - def taxid(x): - x = int(x) - if x < 0: - return None - else: - return x - - @staticmethod - def scientificName(x): - if x=='--': - return None - else: - return x - - @staticmethod - def value(x): - if x=='--': - return None - else: - return float(x) - - @staticmethod - def count(x): - if x=='--': - return None - else: - return int(x) - - - def __init__(self,stream): - utils.ColumnFile.__init__(self, - stream, '\t', True, - (str,str,str, - EcoTagFileIterator.value, - EcoTagFileIterator.value, - EcoTagFileIterator.value, - EcoTagFileIterator.count, - EcoTagFileIterator.count, - EcoTagFileIterator.taxid, - EcoTagFileIterator.scientificName, - str, - EcoTagFileIterator.taxid, - EcoTagFileIterator.scientificName, - EcoTagFileIterator.taxid, - EcoTagFileIterator.scientificName, - EcoTagFileIterator.taxid, - EcoTagFileIterator.scientificName, - str - )) - self._memory=None - - _colname = ['identification', - 'seqid', - 'best_match_ac', - 'max_identity', - 'min_identity', - 'theorical_min_identity', - 'count', - 'match_count', - 'taxid', - 'scientific_name', - 'rank', - 'order_taxid', - 'order_sn', - 'family_taxid', - 'family_sn', - 'genus_taxid', - 'genus_sn', - 'species_taxid', - 'species_sn', - 'sequence'] - - def next(self): - if self._memory is not None: - data=self._memory - self._memory=None - else: - data = utils.ColumnFile.next(self) - data = EcoTagResult(imap(None,EcoTagFileIterator._colname[:len(data)],data)) - - if data['identification']=='ID': - data.cd=[] - try: - nextone = utils.ColumnFile.next(self) - nextone = EcoTagResult(imap(None,EcoTagFileIterator._colname[:len(nextone)],nextone)) - except StopIteration: - nextone = None - while nextone is not None and nextone['identification']=='CD': - data.cd.append(nextone) - try: - nextone = utils.ColumnFile.next(self) - nextone = EcoTagResult(imap(None,EcoTagFileIterator._colname[:len(nextone)],nextone)) - except StopIteration: - nextone = None - self._memory=nextone - - return data - -def ecoTagIdentifiedFilter(ecoTagIterator): - for x in ecoTagIterator: - if x['identification']=='ID': - yield x - - -class EcoTagAbstractIterator(utils.ColumnFile): - - _colname = ['scientific_name', - 'taxid', - 'rank', - 'count', - 'max_identity', - 'min_identity'] - - - @staticmethod - def taxid(x): - x = int(x) - if x < 0: - return None - else: - return x - - def __init__(self,stream): - utils.ColumnFile.__init__(self, - stream, '\t', True, - (str, - EcoTagFileIterator.taxid, - str, - int, - float,float,float)) - - def next(self): - data = utils.ColumnFile.next(self) - data = dict(imap(None,EcoTagAbstractIterator._colname,data)) - - return data - -def ecoTagAbstractFilter(ecoTagAbsIterator): - for x in ecoTagAbsIterator: - if x['taxid'] is not None: - yield x - \ No newline at end of file diff --git a/obitools/eutils/__init__.py b/obitools/eutils/__init__.py deleted file mode 100644 index 1e7d3b2..0000000 --- a/obitools/eutils/__init__.py +++ /dev/null @@ -1,54 +0,0 @@ -import time -from urllib2 import urlopen -import shelve -from threading import Lock -import sys - -class EUtils(object): - ''' - - ''' - - _last_request=0 - _interval=3 - - def __init__(self): - self._lock = Lock() - - def wait(self): - now=time.time() - delta = now - EUtils._last_request - while delta < EUtils._interval: - time.sleep(delta) - now=time.time() - delta = now - EUtils._last_request - - def _sendRequest(self,url): - self.wait() - EUtils._last_request=time.time() - t = EUtils._last_request - print >>sys.stderr,"Sending request to NCBI @ %f" % t - data = urlopen(url).read() - print >>sys.stderr,"Data red from NCBI @ %f (%f)" % (t,time.time()-t) - return data - - def setInterval(self,seconde): - EUtils._interval=seconde - - -class EFetch(EUtils): - ''' - - ''' - def __init__(self,db,tool='OBITools', - retmode='text',rettype="native", - server='eutils.ncbi.nlm.nih.gov'): - EUtils.__init__(self) - self._url = "http://%s/entrez/eutils/efetch.fcgi?db=%s&tool=%s&retmode=%s&rettype=%s" - self._url = self._url % (server,db,tool,retmode,rettype) - - - def get(self,**args): - key = "&".join(['%s=%s' % x for x in args.items()]) - return self._sendRequest(self._url +"&" + key) - diff --git a/obitools/fast.py b/obitools/fast.py deleted file mode 100644 index 760f493..0000000 --- a/obitools/fast.py +++ /dev/null @@ -1,56 +0,0 @@ -""" - implement fastn/fastp sililarity search algorithm for BioSequence. -""" - -class Fast(object): - - def __init__(self,seq,kup=2): - ''' - @param seq: sequence to hash - @type seq: BioSequence - @param kup: word size used for hashing process - @type kup: int - ''' - hash={} - seq = str(seq) - for word,pos in ((seq[i:i+kup].upper(),i) for i in xrange(len(seq)-kup)): - if word in hash: - hash[word].append(pos) - else: - hash[word]=[pos] - - self._kup = kup - self._hash= hash - self._seq = seq - - def __call__(self,seq): - ''' - Align one sequence with the fast hash table. - - @param seq: the sequence to align - @type seq: BioSequence - - @return: where smax is the - score of the largest diagonal and pmax the - associated shift - @rtype: a int tuple (smax,pmax) - ''' - histo={} - seq = str(seq).upper() - hash= self._hash - kup = self._kup - - for word,pos in ((seq[i:i+kup],i) for i in xrange(len(seq)-kup)): - matchedpos = hash.get(word,[]) - for p in matchedpos: - delta = pos - p - histo[delta]=histo.get(delta,0) + 1 - smax = max(histo.values()) - pmax = [x for x in histo if histo[x]==smax] - return smax,pmax - - def __len__(self): - return len(self._seq) - - - diff --git a/obitools/fasta/__init__.py b/obitools/fasta/__init__.py deleted file mode 100644 index d5b90c5..0000000 --- a/obitools/fasta/__init__.py +++ /dev/null @@ -1,384 +0,0 @@ -""" -fasta module provides functions to read and write sequences in fasta format. - - -""" - -#from obitools.format.genericparser import fastGenericEntryIteratorGenerator -from obitools.format.genericparser import genericEntryIteratorGenerator -from obitools import bioSeqGenerator,BioSequence,AASequence,NucSequence -from obitools import _default_raw_parser - -#from obitools.alignment import alignmentReader -#from obitools.utils import universalOpen - -import re -from obitools.ecopcr.options import loadTaxonomyDatabase -from obitools.format import SequenceFileIterator - -#from _fasta import parseFastaDescription,fastaParser -#from _fasta import _fastaJoinSeq -#from _fasta import _parseFastaTag - - -#fastaEntryIterator=fastGenericEntryIteratorGenerator(startEntry='>') -fastaEntryIterator=genericEntryIteratorGenerator(startEntry='>') -rawFastaEntryIterator=genericEntryIteratorGenerator(startEntry='\s*>') - -def _fastaJoinSeq(seqarray): - return ''.join([x.strip() for x in seqarray]) - - -def parseFastaDescription(ds,tagparser): - - m = tagparser.search(' '+ds) - if m is not None: - info=m.group(0) - definition = ds[m.end(0):].strip() - else: - info=None - definition=ds - - return definition,info - -def fastaParser(seq,bioseqfactory,tagparser,rawparser,joinseq=_fastaJoinSeq): - ''' - Parse a fasta record. - - @attention: internal purpose function - - @param seq: a sequence object containing all lines corresponding - to one fasta sequence - @type seq: C{list} or C{tuple} of C{str} - - @param bioseqfactory: a callable object return a BioSequence - instance. - @type bioseqfactory: a callable object - - @param tagparser: a compiled regular expression usable - to identify key, value couples from - title line. - @type tagparser: regex instance - - @return: a C{BioSequence} instance - ''' - seq = seq.split('\n') - title = seq[0].strip()[1:].split(None,1) - id=title[0] - if len(title) == 2: - definition,info=parseFastaDescription(title[1], tagparser) - else: - info= None - definition=None - - seq=joinseq(seq[1:]) - return bioseqfactory(id, seq, definition,info,rawparser) - - -def fastaNucParser(seq,tagparser=_default_raw_parser,joinseq=_fastaJoinSeq): - return fastaParser(seq,NucSequence,tagparser=tagparser,joinseq=_fastaJoinSeq) - -def fastaAAParser(seq,tagparser=_default_raw_parser,joinseq=_fastaJoinSeq): - return fastaParser(seq,AASequence,tagparser=tagparser,joinseq=_fastaJoinSeq) - -def fastaIterator(file,bioseqfactory=bioSeqGenerator, - tagparser=_default_raw_parser, - joinseq=_fastaJoinSeq): - ''' - iterate through a fasta file sequence by sequence. - Returned sequences by this iterator will be BioSequence - instances - - @param file: a line iterator containing fasta data or a filename - @type file: an iterable object or str - @param bioseqfactory: a callable object return a BioSequence - instance. - @type bioseqfactory: a callable object - - @param tagparser: a compiled regular expression usable - to identify key, value couples from - title line. - @type tagparser: regex instance - - @return: an iterator on C{BioSequence} instance - - @see: L{fastaNucIterator} - @see: L{fastaAAIterator} - - >>> from obitools.format.sequence.fasta import fastaIterator - >>> f = fastaIterator('monfichier') - >>> s = f.next() - >>> print s - gctagctagcatgctagcatgcta - >>> - ''' - rawparser=tagparser - allparser = tagparser % '[a-zA-Z][a-zA-Z0-9_]*' - tagparser = re.compile('( *%s)+' % allparser) - - for entry in fastaEntryIterator(file): - yield fastaParser(entry,bioseqfactory,tagparser,rawparser,joinseq) - -def rawFastaIterator(file,bioseqfactory=bioSeqGenerator, - tagparser=_default_raw_parser, - joinseq=_fastaJoinSeq): - - rawparser=tagparser - allparser = tagparser % '[a-zA-Z][a-zA-Z0-9_]*' - tagparser = re.compile('( *%s)+' % allparser) - - for entry in rawFastaEntryIterator(file): - entry=entry.strip() - yield fastaParser(entry,bioseqfactory,tagparser,rawparser,joinseq) - -def fastaNucIterator(file,tagparser=_default_raw_parser): - ''' - iterate through a fasta file sequence by sequence. - Returned sequences by this iterator will be NucSequence - instances - - @param file: a line iterator containint fasta data - @type file: an iterable object - - @param tagparser: a compiled regular expression usable - to identify key, value couples from - title line. - @type tagparser: regex instance - - @return: an iterator on C{NucBioSequence} instance - @rtype: a generator object - - @see: L{fastaIterator} - @see: L{fastaAAIterator} - ''' - return fastaIterator(file, NucSequence,tagparser) - -def fastaAAIterator(file,tagparser=_default_raw_parser): - ''' - iterate through a fasta file sequence by sequence. - Returned sequences by this iterator will be AASequence - instances - - @param file: a line iterator containing fasta data - @type file: an iterable object - - @param tagparser: a compiled regular expression usable - to identify key, value couples from - title line. - @type tagparser: regex instance - - @return: an iterator on C{AABioSequence} instance - - @see: L{fastaIterator} - @see: L{fastaNucIterator} - ''' - return fastaIterator(file, AASequence,tagparser) - -def formatFasta(data,gbmode=False,upper=False,restrict=None): - ''' - Convert a seqence or a set of sequences in a - string following the fasta format - - @param data: sequence or a set of sequences - @type data: BioSequence instance or an iterable object - on BioSequence instances - - @param gbmode: if set to C{True} identifier part of the title - line follows recommendation from nbci to allow - sequence indexing with the blast formatdb command. - @type gbmode: bool - - @param restrict: a set of key name that will be print in the formated - output. If restrict is set to C{None} (default) then - all keys are formated. - @type restrict: any iterable value or None - - @return: a fasta formated string - @rtype: str - ''' - if isinstance(data, BioSequence): - data = [data] - - if restrict is not None and not isinstance(restrict, set): - restrict = set(restrict) - - rep = [] - for sequence in data: - seq = str(sequence) - if sequence.definition is None: - definition='' - else: - definition=sequence.definition - if upper: - frgseq = '\n'.join([seq[x:x+60].upper() for x in xrange(0,len(seq),60)]) - else: - frgseq = '\n'.join([seq[x:x+60] for x in xrange(0,len(seq),60)]) - info='; '.join(['%s=%s' % x - for x in sequence.rawiteritems() - if restrict is None or x[0] in restrict]) - if info: - info=info+';' - if sequence._rawinfo is not None and sequence._rawinfo: - info+=" " + sequence._rawinfo.strip() - - id = sequence.id - if gbmode: - if 'gi' in sequence: - id = "gi|%s|%s" % (sequence['gi'],id) - else: - id = "lcl|%s|" % (id) - title='>%s %s %s' %(id,info,definition) - rep.append("%s\n%s" % (title,frgseq)) - return '\n'.join(rep) - -def formatSAPFastaGenerator(options): - loadTaxonomyDatabase(options) - - taxonomy=None - if options.taxonomy is not None: - taxonomy=options.taxonomy - - assert taxonomy is not None,"SAP formating require indication of a taxonomy database" - - ranks = ('superkingdom', 'kingdom', 'subkingdom', 'superphylum', - 'phylum', 'subphylum', 'superclass', 'class', 'subclass', - 'infraclass', 'superorder', 'order', 'suborder', 'infraorder', - 'parvorder', 'superfamily', 'family', 'subfamily', 'supertribe', 'tribe', - 'subtribe', 'supergenus', 'genus', 'subgenus', 'species group', - 'species subgroup', 'species', 'subspecies') - - trank=set(taxonomy._ranks) - ranks = [taxonomy._ranks.index(x) for x in ranks if x in trank] - - strict= options.strictsap - - def formatSAPFasta(data,gbmode=False,upper=False,restrict=None): - ''' - Convert a seqence or a set of sequences in a - string following the fasta format as recommended for the SAP - software - - http://ib.berkeley.edu/labs/slatkin/munch/StatisticalAssignmentPackage.html - - @param data: sequence or a set of sequences - @type data: BioSequence instance or an iterable object - on BioSequence instances - - @param gbmode: if set to C{True} identifier part of the title - line follows recommendation from nbci to allow - sequence indexing with the blast formatdb command. - @type gbmode: bool - - @param restrict: a set of key name that will be print in the formated - output. If restrict is set to C{None} (default) then - all keys are formated. - @type restrict: any iterable value or None - - @return: a fasta formated string - @rtype: str - ''' - if isinstance(data, BioSequence): - data = [data] - - if restrict is not None and not isinstance(restrict, set): - restrict = set(restrict) - - rep = [] - for sequence in data: - seq = str(sequence) - - if upper: - frgseq = '\n'.join([seq[x:x+60].upper() for x in xrange(0,len(seq),60)]) - else: - frgseq = '\n'.join([seq[x:x+60] for x in xrange(0,len(seq),60)]) - - try: - taxid = sequence["taxid"] - except KeyError: - if strict: - raise AssertionError('All sequence must have a taxid') - else: - continue - - definition=' ;' - - for r in ranks: - taxon = taxonomy.getTaxonAtRank(taxid,r) - if taxon is not None: - definition+=' %s: %s,' % (taxonomy._ranks[r],taxonomy.getPreferedName(taxon)) - - definition='%s ; %s' % (definition[0:-1],taxonomy.getPreferedName(taxid)) - - id = sequence.id - if gbmode: - if 'gi' in sequence: - id = "gi|%s|%s" % (sequence['gi'],id) - else: - id = "lcl|%s|" % (id) - title='>%s%s' %(id,definition) - rep.append("%s\n%s" % (title,frgseq)) - return '\n'.join(rep) - - return formatSAPFasta - -class FastaIterator(SequenceFileIterator): - - - entryIterator = genericEntryIteratorGenerator(startEntry='>') - classmethod(entryIterator) - - def __init__(self,inputfile,bioseqfactory=bioSeqGenerator, - tagparser=_default_raw_parser, - joinseq=_fastaJoinSeq): - - SequenceFileIterator.__init__(self, inputfile, bioseqfactory) - - self.__file = FastaIterator.entryIterator(self._inputfile) - - self._tagparser = tagparser - self._joinseq = joinseq - - def get_tagparser(self): - return self.__tagparser - - - def set_tagparser(self, value): - self._rawparser = value - allparser = value % '[a-zA-Z][a-zA-Z0-9_]*' - self.__tagparser = re.compile('( *%s)+' % allparser) - - def _parseFastaDescription(self,ds): - - m = self._tagparser.search(' '+ds) - if m is not None: - info=m.group(0) - definition = ds[m.end(0):].strip() - else: - info=None - definition=ds - - return definition,info - - - def _parser(self): - ''' - Parse a fasta record. - - @attention: internal purpose function - - @return: a C{BioSequence} instance - ''' - seq = self._seq.split('\n') - title = seq[0].strip()[1:].split(None,1) - id=title[0] - if len(title) == 2: - definition,info=self._parseFastaDescription(title[1]) - else: - info= None - definition=None - - seq=self._joinseq(seq[1:]) - - return self._bioseqfactory(id, seq, definition,info,self._rawparser) - - _tagparser = property(get_tagparser, set_tagparser, None, "_tagparser's docstring") diff --git a/obitools/fasta/_fasta.so b/obitools/fasta/_fasta.so deleted file mode 100755 index de300ce..0000000 Binary files a/obitools/fasta/_fasta.so and /dev/null differ diff --git a/obitools/fastq/__init__.py b/obitools/fastq/__init__.py deleted file mode 100644 index 1cf3535..0000000 --- a/obitools/fastq/__init__.py +++ /dev/null @@ -1,190 +0,0 @@ -''' -Created on 29 aout 2009 - -@author: coissac -''' - -from obitools import BioSequence -from obitools import _default_raw_parser -from obitools.format.genericparser import genericEntryIteratorGenerator -from obitools import bioSeqGenerator,AASequence,NucSequence -from obitools.fasta import parseFastaDescription -from _fastq import fastqQualitySangerDecoder,fastqQualitySolexaDecoder -from _fastq import qualityToSangerError,qualityToSolexaError -from _fastq import errorToSangerFastQStr -from _fastq import formatFastq -from _fastq import fastqParserGenetator -from obitools.utils import universalOpen - -import re - -fastqEntryIterator=genericEntryIteratorGenerator(startEntry='^@',endEntry="^\+",strip=True,join=False) - -#def fastqParserGenetator(fastqvariant='sanger',bioseqfactory=NucSequence,tagparser=_parseFastaTag): -# -# qualityDecoder,errorDecoder = {'sanger' : (fastqQualitySangerDecoder,qualityToSangerError), -# 'solexa' : (fastqQualitySolexaDecoder,qualityToSolexaError), -# 'illumina' : (fastqQualitySolexaDecoder,qualityToSangerError)}[fastqvariant] -# -# def fastqParser(seq): -# ''' -# Parse a fasta record. -# -# @attention: internal purpose function -# -# @param seq: a sequence object containing all lines corresponding -# to one fasta sequence -# @type seq: C{list} or C{tuple} of C{str} -# -# @param bioseqfactory: a callable object return a BioSequence -# instance. -# @type bioseqfactory: a callable object -# -# @param tagparser: a compiled regular expression usable -# to identify key, value couples from -# title line. -# @type tagparser: regex instance -# -# @return: a C{BioSequence} instance -# ''' -# -# title = seq[0][1:].split(None,1) -# id=title[0] -# if len(title) == 2: -# definition,info=parseFastaDescription(title[1], tagparser) -# else: -# info= {} -# definition=None -# -# quality=errorDecoder(qualityDecoder(seq[3])) -# -# seq=seq[1] -# -# seq = bioseqfactory(id, seq, definition,False,**info) -# seq.quality = quality -# -# return seq -# -# return fastqParser - - -def fastqIterator(file,fastqvariant='sanger',bioseqfactory=NucSequence,tagparser=_default_raw_parser): - ''' - iterate through a fasta file sequence by sequence. - Returned sequences by this iterator will be BioSequence - instances - - @param file: a line iterator containing fasta data or a filename - @type file: an iterable object or str - @param bioseqfactory: a callable object return a BioSequence - instance. - @type bioseqfactory: a callable object - - @param tagparser: a compiled regular expression usable - to identify key, value couples from - title line. - @type tagparser: regex instance - - @return: an iterator on C{BioSequence} instance - - @see: L{fastaNucIterator} - @see: L{fastaAAIterator} - - ''' - fastqParser=fastqParserGenetator(fastqvariant, bioseqfactory, tagparser) - file = universalOpen(file) - for entry in fastqEntryIterator(file): - title=entry[0] - seq="".join(entry[1:-1]) - quality='' - lenseq=len(seq) - while (len(quality) < lenseq): - quality+=file.next().strip() - - yield fastqParser([title,seq,'+',quality]) - -def fastqSangerIterator(file,tagparser=_default_raw_parser): - ''' - iterate through a fastq file sequence by sequence. - Returned sequences by this iterator will be NucSequence - instances - - @param file: a line iterator containint fasta data - @type file: an iterable object - - @param tagparser: a compiled regular expression usable - to identify key, value couples from - title line. - @type tagparser: regex instance - - @return: an iterator on C{NucBioSequence} instance - - @see: L{fastqIterator} - @see: L{fastqAAIterator} - ''' - return fastqIterator(file,'sanger',NucSequence,tagparser) - -def fastqSolexaIterator(file,tagparser=_default_raw_parser): - ''' - iterate through a fastq file sequence by sequence. - Returned sequences by this iterator will be NucSequence - instances - - @param file: a line iterator containint fasta data - @type file: an iterable object - - @param tagparser: a compiled regular expression usable - to identify key, value couples from - title line. - @type tagparser: regex instance - - @return: an iterator on C{NucBioSequence} instance - - @see: L{fastqIterator} - @see: L{fastqAAIterator} - ''' - return fastqIterator(file,'solexa',NucSequence,tagparser) - -def fastqIlluminaIterator(file,tagparser=_default_raw_parser): - ''' - iterate through a fastq file sequence by sequence. - Returned sequences by this iterator will be NucSequence - instances - - @param file: a line iterator containint fasta data - @type file: an iterable object - - @param tagparser: a compiled regular expression usable - to identify key, value couples from - title line. - @type tagparser: regex instance - - @return: an iterator on C{NucBioSequence} instance - - @see: L{fastqIterator} - @see: L{fastqAAIterator} - ''' - return fastqIterator(file,'illumina',NucSequence,tagparser) - -def fastqAAIterator(file,tagparser=_default_raw_parser): - ''' - iterate through a fastq file sequence by sequence. - Returned sequences by this iterator will be AASequence - instances - - @param file: a line iterator containing fasta data - @type file: an iterable object - - @param tagparser: a compiled regular expression usable - to identify key, value couples from - title line. - @type tagparser: regex instance - - @return: an iterator on C{AABioSequence} instance - - @see: L{fastqIterator} - @see: L{fastqNucIterator} - ''' - return fastqIterator(file,'sanger',AASequence,tagparser) - - diff --git a/obitools/fastq/_fastq.so b/obitools/fastq/_fastq.so deleted file mode 100755 index 4e3b942..0000000 Binary files a/obitools/fastq/_fastq.so and /dev/null differ diff --git a/obitools/fnaqual/__init__.py b/obitools/fnaqual/__init__.py deleted file mode 100644 index 384eb96..0000000 --- a/obitools/fnaqual/__init__.py +++ /dev/null @@ -1,2 +0,0 @@ - -fnaTag=' %s *= *([^\s]+)' diff --git a/obitools/fnaqual/fasta.py b/obitools/fnaqual/fasta.py deleted file mode 100644 index 102a13e..0000000 --- a/obitools/fnaqual/fasta.py +++ /dev/null @@ -1,8 +0,0 @@ -from obitools.fasta import fastaNucIterator -from obitools.fnaqual import fnaTag - -def fnaFastaIterator(file): - - x = fastaNucIterator(file, fnaTag) - - return x \ No newline at end of file diff --git a/obitools/fnaqual/quality.py b/obitools/fnaqual/quality.py deleted file mode 100644 index 092f610..0000000 --- a/obitools/fnaqual/quality.py +++ /dev/null @@ -1,137 +0,0 @@ -""" - - -""" - -from obitools import _default_raw_parser -from obitools.fasta import fastaIterator -from obitools.fnaqual import fnaTag -from obitools.location import Location - -import re - - -class QualitySequence(list): - - def __init__(self,id,seq,definition=None,rawinfo=None,rawparser=_default_raw_parser,**info): - ''' - - @param id: - @param seq: - @param definition: - ''' - list.__init__(self,seq) - self._info = info - self.definition=definition - self.id=id - self._rawinfo=' ' + rawinfo - self._rawparser=rawparser - - def getDefinition(self): - ''' - Sequence definition getter - - @return: the sequence definition - @rtype: str - - ''' - return self._definition - - def setDefinition(self, value): - self._definition = value - - def getId(self): - return self._id - - def setId(self, value): - self._id = value - - def getKey(self,key): - if key not in self._info: - p = re.compile(self._rawparser % key) - m = p.search(self._rawinfo) - if m is not None: - v=m.group(1) - self._rawinfo=' ' + self._rawinfo[0:m.start(0)]+self._rawinfo[m.end(0):] - try: - v = eval(v) - except: - pass - self._info[key]=v - else: - raise KeyError,key - else: - v=self._info[key] - return v - - def __getitem__(self,key): - if isinstance(key,Location): - return key.extractSequence(self) - elif isinstance(key, str): - return self._getKey(key) - elif isinstance(key, int): - return list.__getitem__(self,key) - elif isinstance(key, slice): - subseq=list.__getitem__(self,key) - info = dict(self._info) - if key.start is not None: - start = key.start +1 - else: - start = 1 - if key.stop is not None: - stop = key.stop+1 - else: - stop = len(self) - if key.step is not None: - step = key.step - else: - step = 1 - - info['cut']='[%d,%d,%s]' % (start,stop,step) - return QualitySequence(self.id, subseq, self.definition,self._rawinfo,self._rawparser,**info) - - raise TypeError,'key must be an integer, a str or a slice' - - def __setitem__(self,key,value): - self._info[key]=value - - def __delitem__(self,key): - if isinstance(key, str): - del self._info[key] - else: - raise TypeError,key - - def __iter__(self): - return list.__iter__(self) - - def __contains__(self,key): - return key in self._info - - def getTags(self): - return self._info - - def complement(self): - ''' - - ''' - cseq = self[::-1] - rep = QualitySequence(self.id,cseq,self.definition,self._rawinfo,self._rawparser,**self._info) - rep._info['complemented']=not rep._info.get('complemented',False) - return rep - - - definition = property(getDefinition, setDefinition, None, "Sequence Definition") - - id = property(getId, setId, None, 'Sequence identifier') - - -def _qualityJoinSeq(seqarray): - text = ' '.join([x.strip() for x in seqarray]) - return [int(x) for x in text.split()] - -def qualityIterator(file): - for q in fastaIterator(file, QualitySequence, fnaTag, _qualityJoinSeq): - yield q - - - \ No newline at end of file diff --git a/obitools/format/__init__.py b/obitools/format/__init__.py deleted file mode 100644 index a680505..0000000 --- a/obitools/format/__init__.py +++ /dev/null @@ -1,28 +0,0 @@ -from obitools import bioSeqGenerator -from obitools.utils import universalOpen - - -class SequenceFileIterator: - - def __init__(self,inputfile,bioseqfactory=bioSeqGenerator): - self._inputfile = universalOpen(inputfile) - self._bioseqfactory = bioseqfactory - - def get_inputfile(self): - return self.__file - - - def get_bioseqfactory(self): - return self.__bioseqfactory - - def next(self): - entry = self.inputfile.next() - return self._parse(entry) - - def __iter__(self): - return self - - _inputfile = property(get_inputfile, None, None, "_file's docstring") - _bioseqfactory = property(get_bioseqfactory, None, None, "_bioseqfactory's docstring") - - \ No newline at end of file diff --git a/obitools/format/_format.so b/obitools/format/_format.so deleted file mode 100755 index 92e460d..0000000 Binary files a/obitools/format/_format.so and /dev/null differ diff --git a/obitools/format/genericparser/__init__.py b/obitools/format/genericparser/__init__.py deleted file mode 100644 index fecc72f..0000000 --- a/obitools/format/genericparser/__init__.py +++ /dev/null @@ -1,217 +0,0 @@ -""" -G{packagetree format} -""" -import re - -from obitools.utils import universalOpen - -def genericEntryIteratorGenerator(startEntry=None,endEntry=None, - head=False,tail=False, - strip=False,join=True): - ''' - Transfome a text line iterator to an entry oriented iterator. - - This iterator converted is useful to implement first stage - of flat file parsing. - - @param startEntry: a regular pattern matching the beginning of - an entry - @type startEntry: C{str} or None - @param endEntry: a regular pattern matching the end of - an entry - @type endEntry: C{str} or None - @param head: indicate if an header is present before - the first entry (as in many original genbank - files) - @type head: C{bool} - @param tail: indicate if some extra informations are present - after the last entry. - @type tail: C{bool} - - @return: an iterator on entries in text format - @rtype: an iterator on C{str} - ''' - - def isBeginning(line): - return startEntry is None or startEntry.match(line) is not None - - def isEnding(line): - return ((endEntry is not None and endEntry.match(line) is not None) or - (endEntry is None and startEntry is not None and startEntry.match(line) is not None)) - - def transparentIteratorEntry(file): - file = universalOpen(file) - return file - - def genericEntryIterator(file): - file = universalOpen(file) - entry = [] - line = file.next() - started = head or isBeginning(line) - - try: - while 1: - while not started: - line = file.next() - started = isBeginning(line) - - if endEntry is None: - entry.append(line) - line = file.next() - - while started: - end = isEnding(line) - if end: - if endEntry is not None: - entry.append(line) - if join: - e = ''.join(entry) - if strip: - e=e.strip() - else: - e=entry - if strip: - e=[x.strip() for x in e] - entry=[] - yield e - started=False - if endEntry is not None: - line = file.next() - else: - entry.append(line) - line = file.next() - - started = isBeginning(line) - - except StopIteration: - if entry and (endEntry is None or tail): - if join: - e = ''.join(entry) - if strip: - e=e.strip() - else: - e=entry - if strip: - e=[x.strip() for x in e] - yield e - - - - if startEntry is not None: - startEntry = re.compile(startEntry) - if endEntry is not None: - endEntry = re.compile(endEntry) - - if startEntry is None and endEntry is None: - return transparentIteratorEntry - - return genericEntryIterator - - -class GenericParser(object): - - def __init__(self, - startEntry=None, - endEntry=None, - head=False, - tail=False, - strip=False, - **parseAction): - """ - @param startEntry: a regular pattern matching the beginning of - an entry - @type startEntry: C{str} or None - @param endEntry: a regular pattern matching the end of - an entry - @type endEntry: C{str} or None - @param head: indicate if an header is present before - the first entry (as in many original genbank - files) - @type head: C{bool} - @param tail: indicate if some extra informations are present - after the last entry. - @type tail: C{bool} - - @param parseAction: - - """ - self.flatiterator= genericEntryIteratorGenerator(startEntry, - endEntry, - head, - tail, - strip) - - self.action={} - - for k in parseAction: - self.addParseAction(k,*parseAction[k]) - - def addParseAction(self,name,dataMatcher,dataCleaner=None,cleanSub=''): - ''' - Add a parse action to the generic parser. A parse action - allows to extract one information from an entry. A parse - action is defined by a name and a method to extract this - information from the full text entry. - - A parse action can be defined following two ways. - - - via regular expression patterns - - - via dedicated function. - - In the first case, you have to indicate at least the - dataMatcher regular pattern. This pattern should match exactly - the data part you want to retrieve. If cleanning of extra - characters is needed. The second pattern dataCLeanner can be - used to specifyed these characters. - - In the second case you must provide a callable object (function) - that extract and clean data from the text entry. This function - should return an array containing all data retrevied even if - no data or only one data is retrevied. - - @summary: Add a parse action to the generic parser. - - @param name: name of the data extracted - @type name: C{str} - @param dataMatcher: a regular pattern matching the data - or a callable object parsing the - entry and returning a list of marched data - @type dataMatcher: C{str} or C{SRE_Pattern} instance or a callable - object - @param dataCleaner: a regular pattern matching part of the data - to suppress. - @type dataCleaner: C{str} or C{SRE_Pattern} instance or C{None} - @param cleanSub: string used to replace dataCleaner matches. - Default is an empty string - @type cleanSub: C{str} - - ''' - if callable(dataMatcher): - self.action[name]=dataMatcher - else : - if isinstance(dataMatcher, str): - dataMatcher=re.compile(dataMatcher) - if isinstance(dataCleaner, str): - dataCleaner=re.compile(dataCleaner) - self.action[name]=self._buildREParser(dataMatcher, - dataCleaner, - cleanSub) - - def _buildREParser(self,dataMatcher,dataCleaner,cleanSub): - def parser(data): - x = dataMatcher.findall(data) - if dataCleaner is not None: - x = [dataCleaner.sub(cleanSub,y) for y in x] - return x - return parser - - def __call__(self,file): - for e in self.flatiterator(file): - pe = {'fullentry':e} - for k in self.action: - pe[k]=self.action[k](e) - yield pe - - - \ No newline at end of file diff --git a/obitools/format/ontology/__init__.py b/obitools/format/ontology/__init__.py deleted file mode 100644 index e69de29..0000000 diff --git a/obitools/format/ontology/go_obo.py b/obitools/format/ontology/go_obo.py deleted file mode 100644 index cd1d87e..0000000 --- a/obitools/format/ontology/go_obo.py +++ /dev/null @@ -1,274 +0,0 @@ -__docformat__ = 'restructuredtext' - -import re -import string -import textwrap - - -from obitools.obo.go.parser import GOEntryIterator -from obitools.obo.go.parser import GOTerm -from obitools.obo.go.parser import GOEntry - -""" -go_obo.py : gene_ontology_edit.obo file parser: ----------------------------------------------------- - -- OBOFile class: open a flat file and return an entry. - -""" -class OBOFile(object): - """ - Iterator over all entries of an OBO file - """ - - def __init__(self,_path): - self.file = GOEntryIterator(_path) - - def __iter__(self): - return self - - def next(self): - fiche = self.file.next() - - if isinstance(fiche, GOTerm): - self.isaterm=True - return Term(fiche) - elif isinstance(fiche, GOEntry): - self.isaterm=False - return Entry(fiche) - else: - self.isaterm=False - return Header(fiche) - - -############# tout le reste doit descendre a l'etage obitools/ogo/go/parser.py ########## - -# define an XRef into a go_obo.py script in the microbi pylib -class Xref(object): - """ - Class Xref - Xref.db Xref database - Xref.id Xref identifier - """ - - def __init__(self,description): - data = description.split(':') - self.db = data[0].strip() - self.id = data[1].strip() - -# define a RelatedTerm into a go_obo.py script in the microbi pylib -class RelatedTerm(object): - """ - Class RelatedTerm - RelatedTerm.relation RelatedTerm relation - RelatedTerm.related_term RelatedTerm GO identifier - RelatedTerm.comment all terms have 0 or 1 comment - """ - - def __init__(self,relation,value,comment): - self.relation = relation - self.related_term = value.strip('GO:') - self.comment = comment - - -# define into a go_obo.py script in the microbi pylib -#class Term(object): -# """ -# class representing an OBO term (entry). -# """ -# -# def __init__(self): -# raise RuntimeError('biodb.go_obo is an abstract class') -# -# def __checkEntry__(self): -# minimum=(hasattr(self,'goid') ) -# if not minimum: -# raise AssertionError('Misconstructed GO Term instance %s' % [x for x in dir(self) if x[0]!='_']) - -class Term(object): - """ - Class Term - representing a GO term. - """ - - def __init__(self,data=None): - """ - """ - self.data=data - self.isaterm = True - - if data: - self.__filtreGoid__() - self.__filtreName__() - self.__filtreComment__() - self.__filtreSynonyms__() - self.__filtreDef__() - self.__filtreParents__() - self.__filtreRelationships__() - self.__filtreRelation__() - self.__filtreObsolete__() - self.__filtreAltIds__() - self.__filtreXRefs__() - self.__filtreSubsets__() - - # check if all required attributes were valued - self.__checkEntry__() - - - def __checkEntry__(self): - minimum=(hasattr(self,'goid') ) - if not minimum: - raise AssertionError('Misconstructed GO Term instance %s' % [x for x in dir(self) if x[0]!='_']) - - - def __filtreGoid__(self): - """ - Extract GO id. - """ - self.goid = self.data.id.value.strip('GO:') - - def __filtreName__(self): - """ - Extract GO name. - """ - self.name = self.data.name.value - - def __filtreSynonyms__(self): - """ - Extract GO synonym(s). - """ - self.list_synonyms = {} - if self.data.synonyms: - for y in self.data.synonyms: - self.list_synonyms[y.value] = y.scope - - - def __filtreComment__(self): - """ - manage None comments - """ - if self.data.comment != None: - self.comment = self.data.comment.value - else: - self.comment = "" - - def __filtreDef__(self): - """ - Extract GO definition. - """ - if self.data.definition != None: - self.definition = self.data.definition.value - else: - self.definition = "" - - def __filtreParents__(self): - """ - To make the is_a hierarchy - """ - if self.data.is_a != None: - self.is_a = set([isa.value.strip('GO:') for isa in self.data.is_a]) - else: - self.is_a = set() - - def __filtreRelation__(self): - """ - To make the part_of hierarchy - """ - self.part_of = set() - self.regulates = set() - self.negatively_regulates = set() - self.positively_regulates = set() - - if self.data.relationship != None: - for rel in self.data.relationship: - if rel.relationship == "part_of": - self.part_of.add(rel.value.strip('GO:')) - elif rel.relationship == "regulates": - self.regulates.add(rel.value.strip('GO:')) - elif rel.relationship == "negatively_regulates": - self.negatively_regulates.add(rel.value.strip('GO:')) - elif rel.relationship == "positively_regulates": - self.positively_regulates.add(rel.value.strip('GO:')) - - - def __filtreRelationships__(self): - """ - Relation list with other GO Terms (is_a, part_of or some regulates relation) - """ - self.related_term =[] - if self.data.relationship != None: - for x in self.data.relationship: - self.related_term.append(RelatedTerm(x.relationship,x.value,x.__doc__)) - #self.related_term.append(RelatedTerm(x.relationship,x.value,x.comment)) - if self.data.is_a != None: - for x in self.data.is_a: - self.related_term.append(RelatedTerm('is_a',x.value,x.__doc__)) - #self.related_term.append(RelatedTerm('is_a',x.value,x.comment)) - - - - def __filtreObsolete__(self): - """ - for each obsolete terms corresponds a set of GO Identifiers - so that this GO term is consider as others GO Terms - """ - self.considers = set() - self.replaces = set() - self.is_obsolete = self.data.is_obsolete - if self.data.is_obsolete: - if self.data.consider: - self.considers = set([considered.value.strip('GO:') for considered in self.data.consider]) - if self.data.replaced_by: - self.replaces = set([replaced.value.strip('GO:') for replaced in self.data.replaced_by]) - - - def __filtreAltIds__(self): - """ - alternate(s) id(s) for this term (= alias in the geneontology schema model!) - """ - if self.data.alt_ids: - self.alt_ids = set([x.value.strip('GO:') for x in self.data.alt_ids]) - else: - self.alt_ids = set() - - def __filtreXRefs__(self): - """ - cross references to other databases - """ - self.xrefs = set() - if self.data.xrefs: - self.xrefs = set([Xref(x.value.reference) for x in self.data.xrefs]) - - - def __filtreSubsets__(self): - """ - subset label to make smaller sets of GO Terms - """ - self.subsets = set() - if self.data.subsets: - self.subsets = set([x.value for x in self.data.subsets]) - - -class Entry(object): - """ - a Stanza entry, like [Typedef] for example - """ - def __init__(self,data=None): - self.data=data - self.isaterm=False - self.isanentry=True - - -class Header(object): - """ - class representing a GO header. - """ - - def __init__(self,data=None): - """ - """ - self.data=data - self.isaterm = False - - - diff --git a/obitools/format/options.py b/obitools/format/options.py deleted file mode 100644 index c42a23f..0000000 --- a/obitools/format/options.py +++ /dev/null @@ -1,284 +0,0 @@ -''' -Created on 13 oct. 2009 - -@author: coissac -''' - -from obitools.format.sequence.embl import emblIterator -from obitools.format.sequence.genbank import genbankIterator -from obitools.format.sequence.fnaqual import fnaFastaIterator -from obitools.format.sequence.fasta import fastaAAIterator,fastaNucIterator,fastaIterator -from obitools.format.sequence.fastq import fastqIlluminaIterator,fastqSolexaIterator -from obitools.fastq import fastqSangerIterator -from obitools.fnaqual.quality import qualityIterator -from obitools.fasta import formatFasta, rawFastaIterator,\ - formatSAPFastaGenerator -from obitools.fastq import formatFastq - -from obitools.ecopcr.sequence import EcoPCRDBSequenceWriter -from obitools.ecopcr.options import loadTaxonomyDatabase - -#from obitools.format._format import printOutput - -from array import array -from itertools import chain -import sys - -import re -from obitools.ecopcr import EcoPCRFile - - -def addInputFormatOption(optionManager): -# optionManager.add_option('--rank', -# action="store_true", dest='addrank', -# default=False, -# help="add a rank attribute to the sequence " -# "indicating the sequence position in the input data") - optionManager.add_option('--genbank', - action="store_const", dest="seqinformat", - default=None, - const='genbank', - help="input file is in genbank format") - optionManager.add_option('--embl', - action="store_const", dest="seqinformat", - default=None, - const='embl', - help="input file is in embl format") - - optionManager.add_option('--fasta', - action="store_const", dest="seqinformat", - default=None, - const='fasta', - help="input file is in fasta nucleic format (including obitools fasta extentions)") - - optionManager.add_option('--ecopcr', - action="store_const", dest="seqinformat", - default=None, - const='ecopcr', - help="input file is in fasta nucleic format (including obitools fasta extentions)") - - optionManager.add_option('--raw-fasta', - action="store_const", dest="seqinformat", - default=None, - const='rawfasta', - help="input file is in fasta format (but more tolerant to format variant)") - - optionManager.add_option('--fna', - action="store_const", dest="seqinformat", - default=None, - const='fna', - help="input file is in fasta nucleic format produced by 454 sequencer pipeline") - - optionManager.add_option('--qual', - action="store", dest="withqualfile", - type='str', - default=None, - help="Specify the name of a quality file produced by 454 sequencer pipeline") - - optionManager.add_option('--sanger', - action="store_const", dest="seqinformat", - default=None, - const='sanger', - help="input file is in sanger fastq nucleic format (standard fastq)") - - optionManager.add_option('--solexa', - action="store_const", dest="seqinformat", - default=None, - const='solexa', - help="input file is in fastq nucleic format produced by solexa sequencer") - - optionManager.add_option('--illumina', - action="store_const", dest="seqinformat", - default=None, - const='illumina', - help="input file is in fastq nucleic format produced by old solexa sequencer") - - optionManager.add_option('--nuc', - action="store_const", dest="moltype", - default=None, - const='nuc', - help="input file is nucleic sequences") - optionManager.add_option('--prot', - action="store_const", dest="moltype", - default=None, - const='pep', - help="input file is protein sequences") - - -def addOutputFormatOption(optionManager): - optionManager.add_option('--fastq-output', - action="store_const", dest="output", - default=None, - const=formatFastq, - help="output sequences in sanger fastq format") - optionManager.add_option('--fasta-output', - action="store_const", dest="output", - default=None, - const=formatFasta, - help="output sequences in obitools fasta format") - optionManager.add_option('--sap-output', - action="store_const", dest="output", - default=None, - const=formatSAPFastaGenerator, - help="output sequences in sap fasta format") - optionManager.add_option('--strict-sap', - action='store_true',dest='strictsap', - default=False, - help="Print sequences in upper case (defualt is lower case)") - optionManager.add_option('--ecopcr-output', - action="store", dest="ecopcroutput", - default=None, - help="output sequences in obitools ecopcr format") - optionManager.add_option('--uppercase', - action='store_true',dest='uppercase', - default=False, - help="Print sequences in upper case (defualt is lower case)") - - - -def addInOutputOption(optionManager): - addInputFormatOption(optionManager) - addOutputFormatOption(optionManager) - - - - - -def autoEntriesIterator(options): - options.outputFormater=formatFasta - options.outputFormat="fasta" - - ecopcr_pattern = re.compile('^[^ ]+ +| +[0-9]+ +| + [0-9]+ + | +') - - def annotatedIterator(formatIterator): - options.outputFormater=formatFasta - options.outputFormat="fasta" - def iterator(lineiterator): - for s in formatIterator(lineiterator): - s.extractTaxon() - yield s - - return iterator - - def withQualIterator(qualityfile): - options.outputFormater=formatFastq - options.outputFormat="fastq" - def iterator(lineiterator): - for s in fnaFastaIterator(lineiterator): - q = qualityfile.next() - quality = array('d',(10.**(-x/10.) for x in q)) - s.quality=quality - yield s - - return iterator - - def autoSequenceIterator(lineiterator): - options.outputFormater=formatFasta - options.outputFormat="fasta" - first = lineiterator.next() - if first[0]==">": - if options.withqualfile is not None: - qualfile=qualityIterator(options.withqualfile) - reader=withQualIterator(qualfile) - options.outputFormater=formatFastq - options.outputFormat="fastq" - elif options.moltype=='nuc': - reader=fastaNucIterator - elif options.moltype=='pep': - reader=fastaAAIterator - else: - reader=fastaIterator - elif first[0]=='@': - reader=fastqSangerIterator - options.outputFormater=formatFastq - options.outputFormat="fastq" - elif first[0:3]=='ID ': - reader=emblIterator - elif first[0:6]=='LOCUS ': - reader=genbankIterator - elif first[0]=="#" or ecopcr_pattern.search(first): - reader=EcoPCRFile - else: - raise AssertionError,'file is not in fasta, fasta, embl, genbank or ecoPCR format' - - input = reader(chain([first],lineiterator)) - - return input - - if options.seqinformat is None: - reader = autoSequenceIterator - else: - if options.seqinformat=='fasta': - if options.moltype=='nuc': - reader=fastaNucIterator - elif options.moltype=='pep': - reader=fastaAAIterator - else: - reader=fastaIterator - elif options.seqinformat=='rawfasta': - reader=annotatedIterator(rawFastaIterator) - elif options.seqinformat=='genbank': - reader=annotatedIterator(genbankIterator) - elif options.seqinformat=='embl': - reader=annotatedIterator(emblIterator) - elif options.seqinformat=='fna': - reader=fnaFastaIterator - elif options.seqinformat=='sanger': - options.outputFormater=formatFastq - options.outputFormat="fastq" - reader=fastqSangerIterator - elif options.seqinformat=='solexa': - options.outputFormater=formatFastq - options.outputFormat="fastq" - reader=fastqSolexaIterator - elif options.seqinformat=='illumina': - options.outputFormater=formatFastq - options.outputFormat="fastq" - reader=fastqIlluminaIterator - elif options.seqinformat=='ecopcr': - reader=EcoPCRFile - - if options.seqinformat=='fna' and options.withqualfile is not None: - qualfile=qualityIterator(options.withqualfile) - reader=withQualIterator(qualfile) - options.outputFormater=formatFastq - options.outputFormat="fastq" - -# if options.addrank: -# reader = withRankIterator(reader) - return reader - -def sequenceWriterGenerator(options,output=sys.stdout): - class SequenceWriter: - def __init__(self,options,file=sys.stdout): - self._format=None - self._file=file - self._upper=options.uppercase - def put(self,seq): - if self._format is None: - self._format=formatFasta - if options.output is not None: - self._format=options.output - if self._format is formatSAPFastaGenerator: - self._format=formatSAPFastaGenerator(options) - elif options.outputFormater is not None: - self._format=options.outputFormater - s = self._format(seq,upper=self._upper) - try: - self._file.write(s) - self._file.write("\n") - except IOError: - sys.exit(0) - - if options.ecopcroutput is not None: - taxo = loadTaxonomyDatabase(options) - writer=EcoPCRDBSequenceWriter(options.ecopcroutput,taxonomy=taxo) - else: - writer=SequenceWriter(options,output) - - def sequenceWriter(sequence): - writer.put(sequence) - - return sequenceWriter - - \ No newline at end of file diff --git a/obitools/format/sequence/__init__.py b/obitools/format/sequence/__init__.py deleted file mode 100644 index 3918761..0000000 --- a/obitools/format/sequence/__init__.py +++ /dev/null @@ -1,24 +0,0 @@ -from obitools.fasta import fastaIterator -from obitools.fastq import fastqSangerIterator -from obitools.seqdb.embl.parser import emblIterator -from obitools.seqdb.genbank.parser import genbankIterator -from itertools import chain -from obitools.utils import universalOpen - -def autoSequenceIterator(file): - lineiterator = universalOpen(file) - first = lineiterator.next() - if first[0]==">": - reader=fastaIterator - elif first[0]=='@': - reader=fastqSangerIterator - elif first[0:3]=='ID ': - reader=emblIterator - elif first[0:6]=='LOCUS ': - reader=genbankIterator - else: - raise AssertionError,'file is not in fasta, fasta, embl, or genbank format' - - input = reader(chain([first],lineiterator)) - - return input diff --git a/obitools/format/sequence/embl.py b/obitools/format/sequence/embl.py deleted file mode 100644 index f59f14a..0000000 --- a/obitools/format/sequence/embl.py +++ /dev/null @@ -1,2 +0,0 @@ -from obitools.seqdb.embl.parser import emblIterator,emblParser - diff --git a/obitools/format/sequence/fasta.py b/obitools/format/sequence/fasta.py deleted file mode 100644 index 1d7bd49..0000000 --- a/obitools/format/sequence/fasta.py +++ /dev/null @@ -1,4 +0,0 @@ -from obitools.fasta import fastaIterator,fastaParser -from obitools.fasta import fastaAAIterator,fastaAAParser -from obitools.fasta import fastaNucIterator,fastaNucParser -from obitools.fasta import formatFasta diff --git a/obitools/format/sequence/fastq.py b/obitools/format/sequence/fastq.py deleted file mode 100644 index 54fdf89..0000000 --- a/obitools/format/sequence/fastq.py +++ /dev/null @@ -1,13 +0,0 @@ -''' -Created on 15 janv. 2010 - -@author: coissac -''' - -from obitools.fastq import fastqIterator,fastqParserGenetator -from obitools.fastq import fastqSangerIterator,fastqSolexaIterator, \ - fastqIlluminaIterator -from obitools.fastq import fastqAAIterator -from obitools.fastq import formatFastq - - diff --git a/obitools/format/sequence/fnaqual.py b/obitools/format/sequence/fnaqual.py deleted file mode 100644 index ab69916..0000000 --- a/obitools/format/sequence/fnaqual.py +++ /dev/null @@ -1,8 +0,0 @@ -''' -Created on 12 oct. 2009 - -@author: coissac -''' - -from obitools.fnaqual.fasta import fnaFastaIterator -from obitools.fnaqual.quality import qualityIterator diff --git a/obitools/format/sequence/genbank.py b/obitools/format/sequence/genbank.py deleted file mode 100644 index 8524b6f..0000000 --- a/obitools/format/sequence/genbank.py +++ /dev/null @@ -1,4 +0,0 @@ -from obitools.seqdb.genbank.parser import genpepIterator,genpepParser -from obitools.seqdb.genbank.parser import genbankIterator,genbankParser - - diff --git a/obitools/format/sequence/tagmatcher.py b/obitools/format/sequence/tagmatcher.py deleted file mode 100644 index 60ad8d8..0000000 --- a/obitools/format/sequence/tagmatcher.py +++ /dev/null @@ -1,5 +0,0 @@ -from obitools.tagmatcher.parser import tagMatcherParser -from obitools.tagmatcher.parser import TagMatcherIterator -from obitools.tagmatcher.parser import formatTagMatcher - -tagMatcherIterator=TagMatcherIterator diff --git a/obitools/goa/__init__.py b/obitools/goa/__init__.py deleted file mode 100644 index e69de29..0000000 diff --git a/obitools/goa/parser.py b/obitools/goa/parser.py deleted file mode 100644 index 8ffd1e3..0000000 --- a/obitools/goa/parser.py +++ /dev/null @@ -1,33 +0,0 @@ -from itertools import imap -from obitools import utils - -class GoAFileIterator(utils.ColumnFile): - def __init__(self,stream): - utils.ColumnFile.__init__(self, - stream, '\t', True, - (str,)) - - _colname = ['database', - 'ac', - 'symbol', - 'qualifier', - 'goid', - 'origin', - 'evidence', - 'evidnce_origine', - 'namespace', - 'db_object_name', - 'gene', - 'object_type', - 'taxid', - 'date', - 'assigned_by'] - - def next(self): - data = utils.ColumnFile.next(self) - data = dict(imap(None,GoAFileIterator._colname,data)) - - return data - - - diff --git a/obitools/graph/__init__.py b/obitools/graph/__init__.py deleted file mode 100644 index fbc5253..0000000 --- a/obitools/graph/__init__.py +++ /dev/null @@ -1,962 +0,0 @@ -''' -**obitool.graph** for representing graph structure in obitools --------------------------------------------------------------- - -.. codeauthor:: Eric Coissac - - -This module offert classes to manipulate graphs, mainly trough the -:py:class:`obitools.graph.Graph` class. - -.. inheritance-diagram:: Graph DiGraph UndirectedGraph - :parts: 2 - -''' - -import sys - - -from obitools.utils import progressBar - - -class Indexer(dict): - ''' - Allow to manage convertion between an arbitrarly hashable python - value and an unique integer key - ''' - - def __init__(self): - - self.__max=0 - self.__reverse=[] - - def getLabel(self,index): - ''' - Return the python value associated to an integer index. - - :param index: an index value - :type index: int - - :raises: IndexError if the index is not used in this - Indexer instance - ''' - return self.__reverse[index] - - def getIndex(self,key,strict=False): - ''' - Return the index associated to a **key** in the indexer. Two - modes are available : - - - strict mode : - - if the key is not known by the :py:class:`Indexer` instance - a :py:exc:`KeyError` exception is raised. - - - non strict mode : - - in this mode if the requested *key** is absent, it is added to - the :py:class:`Indexer` instance and the new index is returned - - :param key: the requested key - :type key: a hashable python value - - :param strict: select the looking for mode - :type strict: bool - - :return: the index corresponding to the key - :rtype: int - - :raises: - :py:exc:`KeyError` in strict mode is key is absent - of the :py:class:`Indexer` instance - - - :py:exc:`TypeError` if key is not an hashable value. - ''' - if dict.__contains__(self,key): - return dict.__getitem__(self,key) - elif strict: - raise KeyError,key - else: - value = self.__max - self[key]= value - self.__reverse.append(key) - self.__max+=1 - return value - - def __getitem__(self,key): - ''' - Implement the [] operateor to emulate the standard dictionnary - behaviour on :py:class:`Indexer` and returns the integer key - associated to a python value. - - Actually this method call the:py:meth:`getIndex` method in - non strict mode so it only raises an :py:exc:`TypeError` - if key is not an hashable value. - - :param key: the value to index - :type key: an hashable python value - - :return: an unique integer value associated to the key - :rtype: int - - :raises: :py:exc:`TypeError` if **key** is not an hashable value. - - ''' - return self.getIndex(key) - - def __equal__(self,index): - ''' - Implement equal operator **==** for comparing two :py:class:`Indexer` instances. - Two :py:class:`Indexer` instances are equals only if they are physically - the same instance - - :param index: the second Indexer - :type index: an :py:class:`Indexer` instance - - :return: True is the two :py:class:`Indexer` instances are the same - :rtype: bool - ''' - return id(self)==id(index) - - -class Graph(object): - ''' - Class used to represent directed or undirected graph. - - .. warning:: - - Only one edge can connect two nodes in a given direction. - - .. warning:: - - Specifying nodes through their index seepud your code but as no check - is done on index value, it may result in inconsistency. So prefer the - use of node label to specify a node. - - - ''' - def __init__(self,label='G',directed=False,indexer=None,nodes=None,edges=None): - ''' - :param label: Graph name, set to 'G' by default - :type label: str - - :param directed: true for directed graph, set to False by defalt - :type directed: boolean - - :param indexer: node label indexer. This allows to define several graphs - sharing the same indexer (see : :py:meth:`newEmpty`) - :type indexer: :py:class:`Indexer` - - :param nodes: set of nodes to add to the graph - :type nodes: iterable value - - :param edges: set of edges to add to the graph - :type edges: iterable value - ''' - - self._directed=directed - if indexer is None: - indexer = Indexer() - self._index = indexer - self._node = {} - self._node_attrs = {} - self._edge_attrs = {} - self._label=label - - def newEmpty(self): - """ - Build a new empty graph using the same :py:class:`Indexer` instance. - This allows two graph for sharing their vertices through their indices. - """ - n = Graph(self._label+"_compact",self._directed,self._index) - - return n - - def addNode(self,node=None,index=None,**data): - ''' - Add a new node or update an existing one. - - :param node: the new node label or the label of an existing node - for updating it. - :type node: an hashable python value - - :param index: the index of an existing node for updating it. - :type index: int - - :return: the index of the node - :rtype: int - - :raises: :py:exc:`IndexError` is index is not **None** and - corresponds to a not used index in this graph. - ''' - if index is None: - index = self._index[node] - - if index not in self._node: - self._node[index]=set() - else: - if index not in self._node: - raise IndexError,"This index is not used in this graph" - - if data: - if index in self._node_attrs: - self._node_attrs[index].update(data) - else: - self._node_attrs[index]=dict(data) - - return index - - def __contains__(self,node): - try: - index = self._index.getIndex(node,strict=True) - r = index in self._node - except KeyError: - r=False - return r - - def getNode(self,node=None,index=None): - """ - :param node: a node label. - :type node: an hashable python value - - :param index: the index of an existing node. - :type index: int - - .. note:: Index value are prevalent over node label. - - :return: the looked for node - :rtype: :py:class:`Node` - - :raises: :py:exc:`IndexError` if specified node lablel - corresponds to a non-existing node. - - .. warning:: no check on index value - """ - if index is None: - index = self._index.getIndex(node, True) - return Node(index,self) - - def getBestNode(self,estimator): - ''' - Select the node maximizing the estimator function - - :param estimator: the function to maximize - :type estimator: a function returning a numerical value and accepting one - argument of type :py:class:`Node` - - :return: the best node - :rtype: py:class:`Node` - ''' - - bestScore=0 - best=None - for n in self: - score = estimator(n) - if best is None or score > bestScore: - bestScore = score - best=n - return best - - - def delNode(self,node=None,index=None): - """ - Delete a node from a graph and all associated edges. - - :param node: a node label. - :type node: an hashable python value - - :param index: the index of an existing node. - :type index: int - - .. note:: Index value are prevalent over node label. - - :raises: :py:exc:`IndexError` if specified node lablel - corresponds to a non-existing node. - - .. warning:: no check on index value - """ - if index is None: - index = self._index[node] - - for n in self._node: - if n!=index: - e = self._node[n] - if index in e: - if (n,index) in self._edge_attrs: - del self._edge_attrs[(n,index)] - e.remove(index) - - e = self._node[index] - - for n in e: - if (index,n) in self._edge_attrs: - del self._edge_attrs[(index,n)] - - del self._node[index] - if index in self._node_attrs: - del self._node_attrs[index] - - - def addEdge(self,node1=None,node2=None,index1=None,index2=None,**data): - ''' - Create a new edge in the graph between both the specified nodes. - - .. note:: Nodes can be specified using their label or their index in the graph - if both values are indicated the index is used. - - :param node1: The first vertex label - :type node1: an hashable python value - :param node2: The second vertex label - :type node2: an hashable python value - :param index1: The first vertex index - :type index1: int - :param index2: The second vertex index - :type index2: int - - :raises: :py:exc:`IndexError` if one of both the specified node lablel - corresponds to a non-existing node. - - - .. warning:: no check on index value - ''' - - index1=self.addNode(node1, index1) - index2=self.addNode(node2, index2) - - self._node[index1].add(index2) - - if not self._directed: - self._node[index2].add(index1) - - if data: - if (index1,index2) not in self._edge_attrs: - data =dict(data) - self._edge_attrs[(index1,index2)]=data - if not self._directed: - self._edge_attrs[(index2,index1)]=data - else: - self._edge_attrs[(index2,index1)].update(data) - - return (index1,index2) - - def getEdge(self,node1=None,node2=None,index1=None,index2=None): - ''' - Extract the :py:class:`Edge` instance linking two nodes of the graph. - - .. note:: Nodes can be specified using their label or their index in the graph - if both values are indicated the index is used. - - :param node1: The first vertex label - :type node1: an hashable python value - :param node2: The second vertex label - :type node2: an hashable python value - :param index1: The first vertex index - :type index1: int - :param index2: The second vertex index - :type index2: int - - :raises: :py:exc:`IndexError` if one of both the specified node lablel - corresponds to a non-existing node. - - - .. warning:: no check on index value - ''' - node1=self.getNode(node1, index1) - node2=self.getNode(node2, index2) - return Edge(node1,node2) - - def delEdge(self,node1=None,node2=None,index1=None,index2=None): - """ - Delete the edge linking node 1 to node 2. - - .. note:: Nodes can be specified using their label or their index in the graph - if both values are indicated the index is used. - - - :param node1: The first vertex label - :type node1: an hashable python value - :param node2: The second vertex label - :type node2: an hashable python value - :param index1: The first vertex index - :type index1: int - :param index2: The second vertex index - :type index2: int - - :raises: :py:exc:`IndexError` if one of both the specified node lablel - corresponds to a non-existing node. - - - .. warning:: no check on index value - """ - if index1 is None: - index1 = self._index[node1] - if index2 is None: - index2 = self._index[node2] - if index1 in self._node and index2 in self._node[index1]: - self._node[index1].remove(index2) - if (index1,index2) in self._node_attrs: - del self._node_attrs[(index1,index2)] - if not self._directed: - self._node[index2].remove(index1) - if (index2,index1) in self._node_attrs: - del self._node_attrs[(index2,index1)] - - def edgeIterator(self,predicate=None): - """ - Iterate through a set of selected vertices. - - :param predicate: a function allowing node selection. Default value - is **None** and indicate that all nodes are selected. - :type predicate: a function returning a boolean value - and accepting one argument of class :py:class:`Edge` - - :return: an iterator over selected edge - :rtype: interator over :py:class:`Edge` instances - - .. seealso:: - function :py:func:`selectEdgeAttributeFactory` for simple predicate. - - """ - for n1 in self._node: - for n2 in self._node[n1]: - if self._directed or n1 <= n2: - e = self.getEdge(index1=n1, index2=n2) - if predicate is None or predicate(e): - yield e - - - def nodeIterator(self,predicate=None): - """ - Iterate through a set of selected vertices. - - :param predicate: a function allowing edge selection. Default value - is **None** and indicate that all edges are selected. - :type predicate: a function returning a boolean value - and accepting one argument of class :py:class:`Node` - - :return: an iterator over selected nodes. - :rtype: interator over :py:class:`Node` instances - - """ - for n in self._node: - node = self.getNode(index=n) - if predicate is None or predicate(node): - yield node - - def nodeIndexIterator(self,predicate=None): - """ - Iterate through the indexes of a set of selected vertices. - - :param predicate: a function allowing edge selection. Default value - is **None** and indicate that all edges are selected. - :type predicate: a function returning a boolean value - and accepting one argument of class :py:class:`Node` - - :return: an iterator over selected node indices. - :rtype: interator over `int` - - """ - for n in self._node: - node = self.getNode(index=n) - if predicate is None or predicate(node): - yield n - - def neighbourIndexSet(self,node=None,index=None): - if index is None: - index=self.getNode(node).index - return self._node[index] - - def edgeCount(self): - n = reduce(lambda x,y:x+y, (len(z) for z in self._node.itervalues()),0) - if not self._directed: - n=n/2 - return n - - def subgraph(self,nodes,name='G'): - sub = Graph(name,self._directed,self._index) - if not isinstance(nodes, set): - nodes = set(nodes) - for n in nodes: - sub._node[n]=nodes & self._node[n] - if n in self._node_attrs: - sub._node_attrs[n]=dict(self._node_attrs[n]) - for n2 in sub._node[n]: - if not self._directed: - if n <= n2: - if (n,n2) in self._edge_attrs: - data=dict(self._edge_attrs[(n,n2)]) - sub._edge_attrs[(n,n2)]=data - sub._edge_attrs[(n2,n)]=data - else: - if (n,n2) in self._edge_attrs: - data=dict(self._edge_attrs[(n,n2)]) - sub._edge_attrs[(n,n2)]=data - return sub - - def __len__(self): - return len(self._node) - - def __getitem__(self,key): - return self.getNode(node=key) - - def __delitem__(self,key): - self.delNode(node=key) - - def __iter__(self): - return self.nodeIterator() - - def __str__(self): - if self._directed: - kw ='digraph' - else: - kw='graph' - - nodes = "\n ".join([str(x) for x in self]) - edges = "\n ".join([str(x) for x in self.edgeIterator()]) - - return "%s %s {\n %s\n\n %s\n}" % (kw,self._label,nodes,edges) - -class Node(object): - """ - Class used for representing one node or vertex in a graph - - """ - def __init__(self,index,graph): - ''' - .. warning:: - - :py:class:`Node` constructor is usualy called through the :py:class:`Graph` methods - - :param index: Index of the node in the graph - :type index: int - :param graph: graph instance owning the node - :type graph: :py:class:`obitools.graph.Graph` - ''' - self.index = index - self.__graph = graph - - def getGraph(self): - ''' - return graph owning this node. - - :rtype: :py:class:`obitools.graph.Graph` - ''' - return self.__graph - - - def getLabel(self): - ''' - return label associated to this node. - ''' - return self.__graph._index.getLabel(self.index) - - - def has_key(self,key): - ''' - test is the node instance has a property named 'key'. - - :param key: the name of a property - :type key: str - - :return: True if the nade has a property named - :rtype: bool - ''' - if self.index in self.__graph._node_attrs: - return key in self.__graph._node_attrs[self.index] - else: - return False - - def neighbourIterator(self,nodePredicat=None,edgePredicat=None): - ''' - iterate through the nodes directly connected to - this node. - - :param nodePredicat: a function accepting one node as parameter - and returning **True** if this node must be - returned by the iterator. - :type nodePredicat: function - - :param edgePredicat: a function accepting one edge as parameter - and returning True if the edge linking self and - the current must be considered. - :type edgePredicat: function - - - :rtype: iterator on Node instances - ''' - for n in self.neighbourIndexIterator(nodePredicat, edgePredicat): - node = self.graph.getNode(index=n) - yield node - - def neighbourIndexSet(self): - ''' - Return a set of node indexes directely connected - to this node. - - .. warning:: - - do not change this set unless you know - exactly what you do. - - @rtype: set of int - ''' - return self.__graph._node[self.index] - - def neighbourIndexIterator(self,nodePredicat=None,edgePredicat=None): - ''' - iterate through the node indexes directly connected to - this node. - - :param nodePredicat: a function accepting one node as parameter - and returning True if this node must be - returned by the iterator. - :type nodePredicat: function - - :param edgePredicat: a function accepting one edge as parameter - and returning True if the edge linking self and - the current must be considered. - :type edgePredicat: function - - :rtype: iterator on int - ''' - for n in self.neighbourIndexSet(): - if nodePredicat is None or nodePredicat(self.__graph.getNode(index=n)): - if edgePredicat is None or edgePredicat(self.__graph.getEdge(index1=self.index,index2=n)): - yield n - - def degree(self,nodeIndexes=None): - ''' - return count of edges linking this node to the - set of nodes describes by their index in nodeIndexes - - :param nodeIndexes: set of node indexes. - if set to None, all nodes of the - graph are take into account. - Set to None by default. - :type nodeIndexes: set of int - - :rtype: int - ''' - if nodeIndexes is None: - return len(self.__graph._node[self.index]) - else: - return len(self.__graph._node[self.index] & nodeIndexes) - - def componentIndexSet(self,nodePredicat=None,edgePredicat=None): - ''' - Return the set of node index in the same connected component. - - :param nodePredicat: a function accepting one node as parameter - and returning True if this node must be - returned by the iterator. - :type nodePredicat: function - - :param edgePredicat: a function accepting one edge as parameter - and returning True if the edge linking self and - the current must be considered. - :type edgePredicat: function - - - :rtype: set of int - ''' - cc=set([self.index]) - added = set(x for x in self.neighbourIndexIterator(nodePredicat, edgePredicat)) - while added: - cc |= added - added = reduce(lambda x,y : x | y, - (set(z for z in self.graph.getNode(index=c).neighbourIndexIterator(nodePredicat, edgePredicat)) - for c in added), - set()) - added -= cc - return cc - - def componentIterator(self,nodePredicat=None,edgePredicat=None): - ''' - Iterate through the nodes in the same connected - component. - - :rtype: iterator on :py:class:`Node` instance - ''' - for c in self.componentIndexSet(nodePredicat, edgePredicat): - yield self.graph.getNode(c) - - def shortestPathIterator(self,nodes=None): - ''' - Iterate through the shortest path sourcing - from this node. if nodes is not None, iterates - only path linkink this node to one node listed in - nodes - - :param nodes: set of node index - :type nodes: iterable on int - - :return: an iterator on list of int describing path - :rtype: iterator on list of int - ''' - if nodes is not None: - nodes = set(nodes) - - - Q=[(self.index,-1)] - - gray = set([self.index]) - paths = {} - - while Q and (nodes is None or nodes): - u,p = Q.pop() - paths[u]=p - next = self.graph._node[u] - gray - gray|=next - Q.extend((x,u) for x in next) - if nodes is None or u in nodes: - if nodes: - nodes.remove(u) - path = [u] - while p >= 0: - path.append(p) - p = paths[p] - path.reverse() - yield path - - def shortestPathTo(self,node=None,index=None): - ''' - return one of the shortest path linking this - node to specified node. - - :param node: a node label or None - :param index: a node index or None. the parameter index - has a priority on the parameter node. - :type index: int - - :return: list of node index corresponding to the path or None - if no path exists. - :rtype: list of int or None - ''' - if index is None: - index=self.graph.getNode(node).index - for p in self.shortestPathIterator([index]): - return p - - - def __getitem__(self,key): - ''' - return the value of the property of this node - - :param key: the name of a property - :type key: str - ''' - return self.__graph._node_attrs.get(self.index,{})[key] - - def __setitem__(self,key,value): - ''' - set the value of a node property. In the property doesn't - already exist a new property is added to this node. - - :param key: the name of a property - :type key: str - :param value: the value of the property - - .. seealso:: - - :py:meth:`Node.__getitem__` - ''' - if self.index in self.__graph._node_attrs: - data = self.__graph._node_attrs[self.index] - data[key]=value - else: - self.graph._node_attrs[self.index]={key:value} - - def __len__(self): - ''' - Count neighbour of this node - - :rtype: int - - .. seealso:: - - :py:meth:`Node.degree` - ''' - return len(self.__graph._node[self.index]) - - def __iter__(self): - ''' - iterate through neighbour of this node - - :rtype: iterator in :py:class:`Node` instances - - .. seealso:: - - :py:meth:`Node.neighbourIterator` - ''' - return self.neighbourIterator() - - def __contains__(self,key): - return self.has_key(key) - - def __str__(self): - - if self.index in self.__graph._node_attrs: - keys = " ".join(['%s="%s"' % (x[0],str(x[1]).replace('"','\\"').replace('\n','\\n')) - for x in self.__graph._node_attrs[self.index].iteritems()] - ) - else: - keys='' - - return '%d [label="%s" %s]' % (self.index, - str(self.label).replace('"','\\"').replace('\n','\\n'), - keys) - - def keys(self): - if self.index in self.__graph._node_attrs: - k = self.__graph._node_attrs[self.index].keys() - else: - k=[] - return k - - label = property(getLabel, None, None, "Label of the node") - - graph = property(getGraph, None, None, "Graph owning this node") - - - -class Edge(object): - """ - Class used for representing one edge of a graph - - """ - - def __init__(self,node1,node2): - ''' - .. warning:: - - :py:class:`Edge` constructor is usualy called through the :py:class:`Graph` methods - - :param node1: First node likend by the edge - :type node1: :py:class:`Node` - :param node2: Seconde node likend by the edge - :type node2: :py:class:`Node` - ''' - self.node1 = node1 - self.node2 = node2 - - def getGraph(self): - """ - Return the :py:class:`Graph` instance owning this edge. - """ - return self.node1.graph - - def has_key(self,key): - ''' - test is the :py:class:`Edge` instance has a property named **key**. - - :param key: the name of a property - :type key: str - - :return: True if the edge has a property named - :rtype: bool - ''' - if (self.node1.index,self.node2.index) in self.graph._edge_attrs: - return key in self.graph._edge_attrs[(self.node1.index,self.node2.index)] - else: - return False - - - def getDirected(self): - return self.node1.graph._directed - - def __getitem__(self,key): - return self.graph._edge_attrs.get((self.node1.index,self.node2.index),{})[key] - - def __setitem__(self,key,value): - e = (self.node1.index,self.node2.index) - if e in self.graph._edge_attrs: - data = self.graph._edge_attrs[e] - data[key]=value - else: - self.graph._edge_attrs[e]={key:value} - - def __str__(self): - e = (self.node1.index,self.node2.index) - if e in self.graph._edge_attrs: - keys = "[%s]" % " ".join(['%s="%s"' % (x[0],str(x[1]).replace('"','\\"')) - for x in self.graph._edge_attrs[e].iteritems()] - ) - else: - keys = "" - - if self.directed: - link='->' - else: - link='--' - - return "%d %s %d %s" % (self.node1.index,link,self.node2.index,keys) - - def __contains__(self,key): - return self.has_key(key) - - - graph = property(getGraph, None, None, "Graph owning this edge") - - directed = property(getDirected, None, None, "Directed's Docstring") - - -class DiGraph(Graph): - """ - :py:class:`DiGraph class`is a specialisation of the :py:class:`Graph` class - dedicated to directed graph representation - - .. seealso:: - - :py:class:`UndirectedGraph` - - """ - def __init__(self,label='G',indexer=None,nodes=None,edges=None): - ''' - :param label: Graph name, set to 'G' by default - :type label: str - :param indexer: node label indexer - :type indexer: Indexer instance - :param nodes: set of nodes to add to the graph - :type nodes: iterable value - :param edges: set of edges to add to the graph - :type edges: iterable value - ''' - - Graph.__init__(self, label, True, indexer, nodes, edges) - -class UndirectedGraph(Graph): - """ - :py:class:`UndirectGraph class`is a specialisation of the :py:class:`Graph` class - dedicated to undirected graph representation - - .. seealso:: - - :py:class:`DiGraph` - - """ - def __init__(self,label='G',indexer=None,nodes=None,edges=None): - ''' - :param label: Graph name, set to 'G' by default - :type label: str - :param indexer: node label indexer - :type indexer: Indexer instance - :param nodes: set of nodes to add to the graph - :type nodes: iterable value - :param edges: set of edges to add to the graph - :type edges: iterable value - ''' - - Graph.__init__(self, label, False, indexer, nodes, edges) - - - -def selectEdgeAttributeFactory(attribut,value): - """ - This function help in building predicat function usable for selecting edge - in the folowing :py:class:`Graph` methods : - - - :py:meth:`Graph.edgeIterator` - - """ - def selectEdge(e): - return attribut in e and e[attribut]==value - return selectEdge diff --git a/obitools/graph/__init__.pyc b/obitools/graph/__init__.pyc deleted file mode 100644 index 397e5c0..0000000 Binary files a/obitools/graph/__init__.pyc and /dev/null differ diff --git a/obitools/graph/algorithms/__init__.py b/obitools/graph/algorithms/__init__.py deleted file mode 100644 index e69de29..0000000 diff --git a/obitools/graph/algorithms/__init__.pyc b/obitools/graph/algorithms/__init__.pyc deleted file mode 100644 index 1f2edcc..0000000 Binary files a/obitools/graph/algorithms/__init__.pyc and /dev/null differ diff --git a/obitools/graph/algorithms/clique.py b/obitools/graph/algorithms/clique.py deleted file mode 100644 index 2007c1a..0000000 --- a/obitools/graph/algorithms/clique.py +++ /dev/null @@ -1,134 +0,0 @@ -import time -import sys - - - -_maxsize=0 -_solution=0 -_notbound=0 -_sizebound=0 -_lastyield=0 -_maxclique=None - -def cliqueIterator(graph,minsize=1,node=None,timeout=None): - global _maxsize,_solution,_notbound,_sizebound,_lastyield - _maxsize=0 - _solution=0 - _notbound=0 - _sizebound=0 - starttime = time.time() - - if node: - node = graph.getNode(node) - index = node.index - clique= set([index]) - candidates= set(graph.neighbourIndexSet(index=index)) - else: - clique=set() - candidates = set(x.index for x in graph) - - -# candidates = set(x for x in candidates -# if len(graph.neighbourIndexSet(index=x) & candidates) >= (minsize - 1)) - - _lastyield=time.time() - for c in _cliqueIterator(graph,clique,candidates,set(),minsize,start=starttime,timeout=timeout): - yield c - - - - - -def _cliqueIterator(graph,clique,candidates,notlist,minsize=0,start=None,timeout=None): - global _maxsize,_maxclique,_solution,_notbound,_sizebound,_lastyield - - # Speed indicator - lclique = len(clique) - lcandidates = len(candidates) - notmin = lcandidates - notfix = None - - for n in notlist: - nnc = candidates - graph.neighbourIndexSet(index=n) - nc = len(nnc) - if nc < notmin: - notmin=nc - notfix=n - notfixneib = nnc - - if lclique > _maxsize or not _solution % 1000 : - if start is not None: - top = time.time() - delta = top - start - if delta==0: - delta=1e-6 - speed = _solution / delta - start = top - else: - speed = 0 - print >>sys.stderr,"\rCandidates : %-5d Maximum clique size : %-5d Solutions explored : %10d speed = %5.2f solutions/sec sizebound=%10d notbound=%10d " % (lcandidates,_maxsize,_solution,speed,_sizebound,_notbound), - sys.stderr.flush() - if lclique > _maxsize: - _maxsize=lclique - -# print >>sys.stderr,'koukou' - - timer = time.time() - _lastyield - - if not candidates and not notlist: - if lclique==_maxsize: - _maxclique=set(clique) - if lclique >= minsize: - yield set(clique) - if timeout is not None and timer > timeout and _maxclique is not None: - yield _maxclique - _maxclique=None - - else: - while notmin and candidates and ((lclique + len(candidates)) >= minsize or (timeout is not None and timer > timeout)): - # count explored solution - _solution+=1 - - if notfix is None: - nextcandidate = candidates.pop() - else: - nextcandidate = notfixneib.pop() - candidates.remove(nextcandidate) - - clique.add(nextcandidate) - - neighbours = graph.neighbourIndexSet(index=nextcandidate) - - nextcandidates = candidates & neighbours - nextnot = notlist & neighbours - - nnc = candidates - neighbours - lnnc=len(nnc) - - for c in _cliqueIterator(graph, - set(clique), - nextcandidates, - nextnot, - minsize, - start, - timeout=timeout): - yield c - - - clique.remove(nextcandidate) - - notmin-=1 - - if lnnc < notmin: - notmin = lnnc - notfix = nextcandidate - notfixneib = nnc - - if notmin==0: - _notbound+=1 - - notlist.add(nextcandidate) - else: - if (lclique + len(candidates)) < minsize: - _sizebound+=1 - diff --git a/obitools/graph/algorithms/compact.py b/obitools/graph/algorithms/compact.py deleted file mode 100644 index 8065a93..0000000 --- a/obitools/graph/algorithms/compact.py +++ /dev/null @@ -1,8 +0,0 @@ - -def compactGraph(graph,nodeSetIterator): - compact = graph.newEmpty() - for ns in nodeSetIterator(graph): - nlabel = "\n".join([str(graph.getNode(index=x).label) for x in ns]) - compact.addNode(nlabel) - print - print compact diff --git a/obitools/graph/algorithms/component.py b/obitools/graph/algorithms/component.py deleted file mode 100644 index a17c8dd..0000000 --- a/obitools/graph/algorithms/component.py +++ /dev/null @@ -1,82 +0,0 @@ -""" -Iterate through the connected components of a graph ---------------------------------------------------- - -the module :py:mod:`obitools.graph.algorithm.component` provides -two functions to deal with the connected component of a graph -represented as a :py:class:`obitools.graph.Graph` instance. - -The whole set of connected component of a graph is a partition of this graph. -So a node cannot belongs to two distinct connected component. - -Two nodes are in the same connected component if it exits a path through -the graph edges linking them. - -TODO: THere is certainly a bug with DirectedGraph - -""" - -def componentIterator(graph,nodePredicat=None,edgePredicat=None): - ''' - Build an iterator over the connected component of a graph. - Each connected component returned by the iterator is represented - as a `set` of node indices. - - :param graph: the graph to partitionne - :type graph: :py:class:`obitools.graph.Graph` - - :param predicate: a function allowing edge selection. Default value - is **None** and indicate that all edges are selected. - :type predicate: a function returning a boolean value - and accepting one argument of class :py:class:`Node` - - :param predicate: a function allowing node selection. Default value - is **None** and indicate that all nodes are selected. - :type predicate: a function returning a boolean value - and accepting one argument of class :py:class:`Edge` - - :return: an iterator over the connected component set - :rtype: an iterator over `set` of `int` - - .. seealso:: - the :py:meth:`obitools.graph.Graph.componentIndexSet` method - on which is based this function. - ''' - seen = set() - for n in graph.nodeIterator(nodePredicat): - if n.index not in seen: - cc=n.componentIndexSet(nodePredicat, edgePredicat) - yield cc - seen |= cc - -def componentCount(graph,nodePredicat=None,edgePredicat=None): - ''' - Count the connected componnent in a graph. - - :param graph: the graph to partitionne - :type graph: :py:class:`obitools.graph.Graph` - - :param predicate: a function allowing edge selection. Default value - is **None** and indicate that all edges are selected. - :type predicate: a function returning a boolean value - and accepting one argument of class :py:class:`Node` - - :param predicate: a function allowing node selection. Default value - is **None** and indicate that all nodes are selected. - :type predicate: a function returning a boolean value - and accepting one argument of class :py:class:`Edge` - - :return: an iterator over the connected component set - :rtype: an iterator over `set` of `int` - - .. seealso:: - the :py:func:`componentIterator` function - on which is based this function. - ''' - n=0 - for c in componentIterator(graph,nodePredicat, edgePredicat): - n+=1 - return n - - - \ No newline at end of file diff --git a/obitools/graph/algorithms/component.pyc b/obitools/graph/algorithms/component.pyc deleted file mode 100644 index a3b6298..0000000 Binary files a/obitools/graph/algorithms/component.pyc and /dev/null differ diff --git a/obitools/graph/dag.py b/obitools/graph/dag.py deleted file mode 100644 index f9a7a96..0000000 --- a/obitools/graph/dag.py +++ /dev/null @@ -1,80 +0,0 @@ -from obitools.graph import DiGraph,Node -from obitools.graph.algorithms.component import componentIterator - -class DAG(DiGraph): - def __init__(self,label='G',indexer=None,nodes=None,edges=None): - ''' - Directed Graph constructor. - - @param label: Graph name, set to 'G' by default - @type label: str - @param indexer: node label indexer - @type indexer: Indexer instance - @param nodes: set of nodes to add to the graph - @type nodes: iterable value - @param edges: set of edges to add to the graph - @type edges: iterable value - ''' - - self._parents={} - DiGraph.__init__(self, label, indexer, nodes, edges) - - def getNode(self,node=None,index=None): - if index is None: - index = self._index.getIndex(node, True) - return DAGNode(index,self) - - def addEdge(self,parent=None,node=None,indexp=None,index=None,**data): - indexp=self.addNode(parent, indexp) - index =self.addNode(node , index) - - pindex = set(n.index - for n in self.getNode(index=indexp).ancestorIterator()) - - assert index not in pindex,'Child node cannot be a parent node' - - DiGraph.addEdge(self,index1=indexp,index2=index,**data) - - if index in self._parents: - self._parents[index].add(indexp) - else: - self._parents[index]=set([indexp]) - - - return (indexp,index) - - def getRoots(self): - return [self.getNode(index=cc.pop()).getRoot() - for cc in componentIterator(self)] - - - - -class DAGNode(Node): - - def ancestorIterator(self): - if self.index in self.graph._parents: - for p in self.graph._parents[self.index]: - parent = DAGNode(p,self.graph) - yield parent - for pnode in parent.ancestorIterator(): - yield pnode - - def getRoot(self): - for x in self.ancestorIterator(): - pass - return x - - def leavesIterator(self): - if not self: - yield self - for n in self: - for nn in n.leavesIterator(): - yield nn - - def subgraphIterator(self): - yield self - for n in self: - for nn in n.subgraphIterator(): - yield nn - diff --git a/obitools/graph/layout/__init__.py b/obitools/graph/layout/__init__.py deleted file mode 100644 index e69de29..0000000 diff --git a/obitools/graph/layout/radialtree.py b/obitools/graph/layout/radialtree.py deleted file mode 100644 index e69de29..0000000 diff --git a/obitools/graph/rootedtree.py b/obitools/graph/rootedtree.py deleted file mode 100644 index 803316d..0000000 --- a/obitools/graph/rootedtree.py +++ /dev/null @@ -1,117 +0,0 @@ -from obitools.graph.dag import DAG,DAGNode - -class RootedTree(DAG): - - def addEdge(self,parent=None,node=None,indexp=None,index=None,**data): - indexp=self.addNode(parent, indexp) - index =self.addNode(node , index) - - assert index not in self._parents or indexp in self._parents[index], \ - 'Child node cannot have more than one parent node' - - return DAG.addEdge(self,indexp=indexp,index=index,**data) - - def getNode(self,node=None,index=None): - if index is None: - index = self._index.getIndex(node, True) - return RootedTreeNode(index,self) - - - -class RootedTreeNode(DAGNode): - - def subTreeSize(self): - n=1 - for subnode in self: - n+=subnode.subTreeSize() - return n - - def subTreeLeaves(self): - if not self: - return 1 - n=0 - for subnode in self: - n+=subnode.subTreeLeaves() - return n - - -def nodeWriter(node,deep=0,label=None,distance="distance", bootstrap="bootstrap",cartoon=None,collapse=None): - - ks = node.keys() - - - if label is None: - name=node.label - elif callable(label): - name=label(node) - elif isinstance(label, str) and label in node: - name=node[label] - ks.remove(label) - else: - name='' - - if distance in node: - dist=':%6.5f' % node[distance] - ks.remove(distance) - else: - dist='' - - ks = ["%s=%s" % (k,node[k]) for k in ks] - - if cartoon is not None and cartoon(node): - ks.append("!cartoon={%d,0.0}" % node.subTreeLeaves()) - - if collapse is not None and collapse(node): - ks.append('!collapse={"collapsed",0.0}') - - if ks: - ks="[&"+",".join(ks)+"]" - else: - ks='' - - - nodeseparator = ',\n' + ' ' * (deep+1) - - subnodes = nodeseparator.join([nodeWriter(x, deep+1,label,distance,bootstrap,cartoon=cartoon,collapse=collapse) - for x in node]) - if subnodes: - subnodes='(\n' + ' ' * (deep+1) + subnodes + '\n' + ' ' * deep + ')' - - return '%s"%s"%s%s' % (subnodes,name,ks,dist) - - -def nexusFormat(tree,startnode=None,label=None,blocks="",cartoon=None,collapse=None): - head="#NEXUS\n" - - tx = [] - - for n in tree: - if label is None: - name=n.label - elif callable(label): - name=label(n) - elif isinstance(label, str) and label in n: - name=n[label] - else: - name='' - - if name: - tx.append('"%s"' % name) - - taxa = "begin taxa;\n\tdimensions ntax=%d;\n\ttaxlabels\n\t" % len(tx) - - taxa+="\n\t".join(tx) - - taxa+="\n;\nend;\n\n" - - - - if startnode is not None: - roots =[startnode] - else: - roots = tree.getRoots() - trees = nodeWriter(roots[0],0,label,cartoon=cartoon,collapse=collapse) - trees = "begin trees;\n\ttree tree_1 = [&R] "+ trees +";\nend;\n\n" - return head+taxa+trees+"\n\n"+blocks+"\n" - - \ No newline at end of file diff --git a/obitools/graph/tree.py b/obitools/graph/tree.py deleted file mode 100644 index 940ee44..0000000 --- a/obitools/graph/tree.py +++ /dev/null @@ -1,37 +0,0 @@ -from obitools.graph import UndirectedGraph,Node -from obitools.graph.algorithms.component import componentCount - - -class Forest(UndirectedGraph): - - - def getNode(self,node=None,index=None): - if index is None: - index = self._index.getIndex(node, True) - return TreeNode(index,self) - - def addEdge(self,node1=None,node2=None,index1=None,index2=None,**data): - index1=self.addNode(node1, index1) - index2=self.addNode(node2, index2) - - cc = set(n.index for n in self.getNode(index=index2).componentIterator()) - - assert index1 in self._node[index2] or index1 not in cc, \ - "No more than one path is alloed between two nodes in a tree" - - UndirectedGraph.addEdge(self, index1=index1, index2=index2,**data) - - return (index1,index2) - - def isASingleTree(self): - return componentCount(self)==1 - -class TreeNode(Node): - - def componentIterator(self): - for c in self: - yield c - for cc in c: - yield cc - - \ No newline at end of file diff --git a/obitools/gzip.py b/obitools/gzip.py deleted file mode 100644 index 841641a..0000000 --- a/obitools/gzip.py +++ /dev/null @@ -1,504 +0,0 @@ -"""Functions that read and write gzipped files. - -The user of the file doesn't have to worry about the compression, -but random access is not allowed. - -This consisted on a patched version of of standard gzip python -module based on Andrew Kuchling's minigzip.py distributed with the zlib module - -""" - -# based on Andrew Kuchling's minigzip.py distributed with the zlib module - -import struct, sys, time -import zlib -import __builtin__ - -__all__ = ["GzipFile","open"] - -FTEXT, FHCRC, FEXTRA, FNAME, FCOMMENT = 1, 2, 4, 8, 16 - -READ, WRITE = 1, 2 - -def U32(i): - """Return i as an unsigned integer, assuming it fits in 32 bits. - - If it's >= 2GB when viewed as a 32-bit unsigned int, return a long. - """ - if i < 0: - i += 1L << 32 - return i - -def LOWU32(i): - """Return the low-order 32 bits of an int, as a non-negative int.""" - return i & 0xFFFFFFFFL - -def write32(output, value): - output.write(struct.pack("' - - def _init_write(self, filename): - if filename[-3:] != '.gz': - filename = filename + '.gz' - self.filename = filename - self.crc = zlib.crc32("") - self.size = 0 - self.writebuf = [] - self.bufsize = 0 - - def _write_gzip_header(self): - self.fileobj.write('\037\213') # magic header - self.fileobj.write('\010') # compression method - fname = self.filename[:-3] - flags = 0 - if fname: - flags = FNAME - self.fileobj.write(chr(flags)) - write32u(self.fileobj, long(time.time())) - self.fileobj.write('\002') - self.fileobj.write('\377') - if fname: - self.fileobj.write(fname + '\000') - - def _init_read(self): - self.crc = zlib.crc32("") - self.size = 0 - - def _read_internal(self, size): - if len(self.inputbuf) < size: - self.inputbuf += self.fileobj.read(size-len(self.inputbuf)) - chunk = self.inputbuf[:size] - # need to use len(chunk) bellow instead of size in case it's EOF. - if len(chunk) < 8: - self.last8 = self.last8[len(chunk):] + chunk - else: - self.last8 = chunk[-8:] - self.inputbuf = self.inputbuf[size:] - return chunk - - def _read_gzip_header(self): - magic = self._read_internal(2) - if len(magic) != 2: - raise EOFError, "Reached EOF" - if magic != '\037\213': - raise IOError, 'Not a gzipped file' - method = ord( self._read_internal(1) ) - if method != 8: - raise IOError, 'Unknown compression method' - flag = ord( self._read_internal(1) ) - # modtime = self.fileobj.read(4) - # extraflag = self.fileobj.read(1) - # os = self.fileobj.read(1) - self._read_internal(6) - - if flag & FEXTRA: - # Read & discard the extra field, if present - xlen = ord(self._read_internal(1)) - xlen = xlen + 256*ord(self._read_internal(1)) - self._read_internal(xlen) - if flag & FNAME: - # Read and discard a null-terminated string containing the filename - while True: - s = self._read_internal(1) - if not s or s=='\000': - break - if flag & FCOMMENT: - # Read and discard a null-terminated string containing a comment - while True: - s = self._read_internal(1) - if not s or s=='\000': - break - if flag & FHCRC: - self._read_internal(2) # Read & discard the 16-bit header CRC - - - def write(self,data): - if self.mode != WRITE: - import errno - raise IOError(errno.EBADF, "write() on read-only GzipFile object") - - if self.fileobj is None: - raise ValueError, "write() on closed GzipFile object" - if len(data) > 0: - self.size = self.size + len(data) - self.crc = zlib.crc32(data, self.crc) - self.fileobj.write( self.compress.compress(data) ) - self.offset += len(data) - - def read(self, size=-1): - if self.mode != READ: - import errno - raise IOError(errno.EBADF, "read() on write-only GzipFile object") - - if self.extrasize <= 0 and self.fileobj is None: - return '' - - readsize = 1024 - if size < 0: # get the whole thing - try: - while True: - self._read(readsize) - readsize = min(self.max_read_chunk, readsize * 2) - except EOFError: - size = self.extrasize - else: # just get some more of it - try: - while size > self.extrasize: - self._read(readsize) - readsize = min(self.max_read_chunk, readsize * 2) - except EOFError: - if size > self.extrasize: - size = self.extrasize - - chunk = self.extrabuf[:size] - self.extrabuf = self.extrabuf[size:] - self.extrasize = self.extrasize - size - - self.offset += size - return chunk - - def _unread(self, buf): - self.extrabuf = buf + self.extrabuf - self.extrasize = len(buf) + self.extrasize - self.offset -= len(buf) - - def _read(self, size=1024): - if self.fileobj is None: - raise EOFError, "Reached EOF" - - if self._new_member: - # If the _new_member flag is set, we have to - # jump to the next member, if there is one. - # - # _read_gzip_header will raise EOFError exception - # if there no more members to read. - self._init_read() - self._read_gzip_header() - self.decompress = zlib.decompressobj(-zlib.MAX_WBITS) - self._new_member = False - - # Read a chunk of data from the file - buf = self._read_internal(size) - - # If the EOF has been reached, flush the decompression object - # and mark this object as finished. - - if buf == "": - uncompress = self.decompress.flush() - self._read_eof() - self._add_read_data( uncompress ) - raise EOFError, 'Reached EOF' - - uncompress = self.decompress.decompress(buf) - self._add_read_data( uncompress ) - - if self.decompress.unused_data != "": - # Ending case: we've come to the end of a member in the file, - # so put back unused_data and initialize last8 by reading them. - self.inputbuf = self.decompress.unused_data + self.inputbuf - self._read_internal(8) - - # Check the CRC and file size, and set the flag so we read - # a new member on the next call - self._read_eof() - self._new_member = True - - def _add_read_data(self, data): - self.crc = zlib.crc32(data, self.crc) - self.extrabuf = self.extrabuf + data - self.extrasize = self.extrasize + len(data) - self.size = self.size + len(data) - - def _read_eof(self): - # We've read to the end of the file, so we have to rewind in order - # to reread the 8 bytes containing the CRC and the file size. - # We check the that the computed CRC and size of the - # uncompressed data matches the stored values. Note that the size - # stored is the true file size mod 2**32. - crc32 = unpack32(self.last8[:4]) - isize = U32(unpack32(self.last8[4:])) # may exceed 2GB - if U32(crc32) != U32(self.crc): - raise IOError, "CRC check failed" - elif isize != LOWU32(self.size): - raise IOError, "Incorrect length of data produced" - - def close(self): - if self.mode == WRITE: - self.fileobj.write(self.compress.flush()) - # The native zlib crc is an unsigned 32-bit integer, but - # the Python wrapper implicitly casts that to a signed C - # long. So, on a 32-bit box self.crc may "look negative", - # while the same crc on a 64-bit box may "look positive". - # To avoid irksome warnings from the `struct` module, force - # it to look positive on all boxes. - write32u(self.fileobj, LOWU32(self.crc)) - # self.size may exceed 2GB, or even 4GB - write32u(self.fileobj, LOWU32(self.size)) - self.fileobj = None - elif self.mode == READ: - self.fileobj = None - if self.myfileobj: - self.myfileobj.close() - self.myfileobj = None - - def __del__(self): - try: - if (self.myfileobj is None and - self.fileobj is None): - return - except AttributeError: - return - self.close() - - def flush(self,zlib_mode=zlib.Z_SYNC_FLUSH): - if self.mode == WRITE: - # Ensure the compressor's buffer is flushed - self.fileobj.write(self.compress.flush(zlib_mode)) - self.fileobj.flush() - - def fileno(self): - """Invoke the underlying file object's fileno() method. - - This will raise AttributeError if the underlying file object - doesn't support fileno(). - """ - return self.fileobj.fileno() - - def isatty(self): - return False - - def tell(self): - return self.offset - - def rewind(self): - '''Return the uncompressed stream file position indicator to the - beginning of the file''' - if self.mode != READ: - raise IOError("Can't rewind in write mode") - self.fileobj.seek(0) - self._new_member = True - self.extrabuf = "" - self.extrasize = 0 - self.offset = 0 - - def seek(self, offset): - if self.mode == WRITE: - if offset < self.offset: - raise IOError('Negative seek in write mode') - count = offset - self.offset - for i in range(count // 1024): - self.write(1024 * '\0') - self.write((count % 1024) * '\0') - elif self.mode == READ: - if offset < self.offset: - # for negative seek, rewind and do positive seek - self.rewind() - count = offset - self.offset - for i in range(count // 1024): - self.read(1024) - self.read(count % 1024) - - def readline(self, size=-1): - if size < 0: - size = sys.maxint - readsize = self.min_readsize - else: - readsize = size - bufs = [] - while size != 0: - c = self.read(readsize) - i = c.find('\n') - - # We set i=size to break out of the loop under two - # conditions: 1) there's no newline, and the chunk is - # larger than size, or 2) there is a newline, but the - # resulting line would be longer than 'size'. - if (size <= i) or (i == -1 and len(c) > size): - i = size - 1 - - if i >= 0 or c == '': - bufs.append(c[:i + 1]) # Add portion of last chunk - self._unread(c[i + 1:]) # Push back rest of chunk - break - - # Append chunk to list, decrease 'size', - bufs.append(c) - size = size - len(c) - readsize = min(size, readsize * 2) - if readsize > self.min_readsize: - self.min_readsize = min(readsize, self.min_readsize * 2, 512) - return ''.join(bufs) # Return resulting line - - def readlines(self, sizehint=0): - # Negative numbers result in reading all the lines - if sizehint <= 0: - sizehint = sys.maxint - L = [] - while sizehint > 0: - line = self.readline() - if line == "": - break - L.append(line) - sizehint = sizehint - len(line) - - return L - - def writelines(self, L): - for line in L: - self.write(line) - - def __iter__(self): - return self - - def next(self): - line = self.readline() - if line: - return line - else: - raise StopIteration - - -def _test(): - # Act like gzip; with -d, act like gunzip. - # The input file is not deleted, however, nor are any other gzip - # options or features supported. - args = sys.argv[1:] - decompress = args and args[0] == "-d" - if decompress: - args = args[1:] - if not args: - args = ["-"] - for arg in args: - if decompress: - if arg == "-": - f = GzipFile(filename="", mode="rb", fileobj=sys.stdin) - g = sys.stdout - else: - if arg[-3:] != ".gz": - print "filename doesn't end in .gz:", repr(arg) - continue - f = open(arg, "rb") - g = __builtin__.open(arg[:-3], "wb") - else: - if arg == "-": - f = sys.stdin - g = GzipFile(filename="", mode="wb", fileobj=sys.stdout) - else: - f = __builtin__.open(arg, "rb") - g = open(arg + ".gz", "wb") - while True: - chunk = f.read(1024) - if not chunk: - break - g.write(chunk) - if g is not sys.stdout: - g.close() - if f is not sys.stdin: - f.close() - -if __name__ == '__main__': - _test() diff --git a/obitools/gzip.pyc b/obitools/gzip.pyc deleted file mode 100644 index 9c44a43..0000000 Binary files a/obitools/gzip.pyc and /dev/null differ diff --git a/obitools/location/__init__.py b/obitools/location/__init__.py deleted file mode 100644 index b5463b0..0000000 --- a/obitools/location/__init__.py +++ /dev/null @@ -1,538 +0,0 @@ -import obitools -import re -import array - -class Location(object): - """ - Define a location on a sequence. - """ - - def extractSequence(self,sequence): - ''' - Extract subsequence corresponding to a Location. - - @param sequence: - @type sequence: C{BioSequence} or C{str} - ''' - assert isinstance(sequence, (obitools.BioSequence,str)), \ - "sequence must be an instance of str or BioSequence" - - if isinstance(sequence, str): - seq = self._extractSequence(sequence) - else: - if isinstance(sequence, obitools.AASequence): - assert not self.needNucleic(), \ - "This location can be used only with Nucleic sequences" - seq = self._extractSequence(str(sequence)) - - if isinstance(sequence, obitools.AASequence): - st = obitools.AASequence - else: - st = obitools.NucSequence - - seq = st(sequence.id, - seq, - sequence.definition, - **sequence.getTags()) - seq['location']=str(self) - - if 'length' in sequence.getTags(): - seq['length']=len(seq) - - if hasattr(sequence, 'quality'): - quality = self._extractQuality(sequence) - seq.quality=quality - - return seq - - def isDirect(self): - return None - - def isSimple(self): - ''' - Indicate if a location is composed of a single continuous - region or is composed by the junction of several locations - by the C{join} operator. - - @return: C{True} if the location is composed of a single - continuous region. - @rtype: bool - ''' - - return None - - def isFullLength(self): - return None - - def needNucleic(self): - ''' - If a location contains a complement operator, it can be use - only on nucleic sequence. - - @return: C{True} if location contains a complement operator - @rtype: bool - ''' - return None - - def getGloc(self): - loc = self.simplify() - assert loc.isDirect() is not None,"Gloc cannot be created for multi oriented location : %s" % str(loc) - positions = ','.join([str(x) for x in loc._getglocpos()]) - return "(%s,%s)" % ({True:'T',False:'F'}[loc.isDirect()], - positions) - - def shift(self,s): - return None - - def getBegin(self): - return None - - def getEnd(self): - return None - - def getFivePrime(self): - return self.getBegin() - - def getThreePrime(self): - return self.getEnd() - - begin = property(getBegin,None,None,"beginning position of the location") - end = property(getEnd,None,None,"ending position of the location") - fivePrime=property(getFivePrime,None,None,"5' position of the location") - threePrime=property(getThreePrime,None,None,"3' position of the location") - - def __abs__(self): - assert self.isDirect() is not None,"Abs operator cannot be applied on non oriented location" - if self.isDirect(): - return self - else: - return ComplementLocation(self).simplify() - - def __cmp__(self,y): - if self.begin < y.begin: - return -1 - if self.begin > y.begin: - return 1 - if self.isDirect() == y.isDirect(): - return 0 - if self.isDirect() and not y.isDirect(): - return -1 - return 1 - -class SimpleLocation(Location): - """ - A simple location is describe a continuous region of - a sequence define by a C{begin} and a C{end} position. - """ - - def __init__(self,begin,end): - ''' - Build a new C{SimpleLocation} instance. Valid - position are define on M{[1,N]} with N the length - of the sequence. - - @param begin: start position of the location - @type begin: int - @param end: end position of the location - @type end: int - ''' - assert begin > 0 and end > 0 - - self._begin = begin - self._end = end - self._before=False - self._after=False - - def _extractSequence(self,sequence): - - assert ( self._begin < len(sequence) - and self._end <= len(sequence)), \ - "Sequence length %d is too short" % len(sequence) - - return sequence[self._begin-1:self._end] - - def _extractQuality(self,sequence): - - assert ( self._begin < len(sequence) - and self._end <= len(sequence)), \ - "Sequence length %d is too short" % len(sequence) - - return sequence.quality[self._begin-1:self._end] - - - def isDirect(self): - return True - - def isSimple(self): - return True - - def isFullLength(self): - return not (self.before or self.after) - - def simplify(self): - if self._begin == self._end: - return PointLocation(self._begin) - else: - return self - - def needNucleic(self): - return False - - def __str__(self): - before = {True:'<',False:''}[self.before] - after = {True:'>',False:''}[self.after] - return "%s%d..%s%d" % (before,self._begin,after,self._end) - - def shift(self,s): - assert (self._begin + s) > 0,"shift to large (%d)" % s - if s == 0: - return self - return SimpleLocation(self._begin + s, self._end + s) - - def _getglocpos(self): - return (self.begin,self.end) - - def getGloc(self): - positions = ','.join([str(x) for x in self._getglocpos()]) - return "(%s,%s)" % ({True:'T',False:'F'}[self.isDirect()], - positions) - - def getBegin(self): - return self._begin - - def getEnd(self): - return self._end - - - begin = property(getBegin,None,None,"beginning position of the location") - end = property(getEnd,None,None,"ending position of the location") - - def getBefore(self): - return self._before - - def getAfter(self): - return self._after - - def setBefore(self,value): - assert isinstance(value, bool) - self._before=value - - def setAfter(self,value): - assert isinstance(value, bool) - self._after=value - - before=property(getBefore,setBefore,None) - after=property(getAfter,setAfter,None) - - - - -class PointLocation(Location): - """ - A point location describes a location on a sequence - limited to a single position - """ - - def __init__(self,position): - assert position > 0 - self._pos=position - - def _extractSequence(self,sequence): - - assert self._end <= len(sequence), \ - "Sequence length %d is too short" % len(sequence) - - return sequence[self._pos-1] - - def _extractQuality(self,sequence): - - assert self._end <= len(sequence), \ - "Sequence length %d is too short" % len(sequence) - - return sequence[self._pos-1:self._pos] - - def isDirect(self): - return True - - def isSimple(self): - return True - - def isFullLength(self): - return True - - def simplify(self): - return self - - def needNucleic(self): - return False - - def shift(self,s): - assert (self._pos + s) > 0,"shift to large (%d)" % s - if s == 0: - return self - return PointLocation(self._pos + s) - - def _getglocpos(self): - return (self._pos,self._pos) - - def getBegin(self): - return self._pos - - def getEnd(self): - return self._pos - - begin = property(getBegin,None,None,"beginning position of the location") - end = property(getEnd,None,None,"ending position of the location") - - def __str__(self): - return str(self._pos) - -class CompositeLocation(Location): - """ - """ - def __init__(self,locations): - self._locs = tuple(locations) - - - def _extractSequence(self,sequence): - seq = ''.join([x._extractSequence(sequence) - for x in self._locs]) - return seq - - def _extractQuality(self,sequence): - rep=array.array('d',[]) - for x in self._locs: - rep.extend(x._extractQuality(sequence)) - return rep - - def isDirect(self): - hasDirect,hasReverse = reduce(lambda x,y: (x[0] or y,x[1] or not y), - (z.isDirect() for z in self._locs),(False,False)) - - if hasDirect and not hasReverse: - return True - if hasReverse and not hasDirect: - return False - - return None - - - def isSimple(self): - return False - - - def simplify(self): - if len(self._locs)==1: - return self._locs[0] - - rep = CompositeLocation(x.simplify() for x in self._locs) - - if reduce(lambda x,y : x and y, - (isinstance(z, ComplementLocation) - for z in self._locs)): - rep = ComplementLocation(CompositeLocation(x._loc.simplify() - for x in rep._locs[::-1])) - - return rep - - def isFullLength(self): - return reduce(lambda x,y : x and y, (z.isFullLength() for z in self._locs),1) - - def needNucleic(self): - return reduce(lambda x,y : x or y, - (z.needNucleic for z in self._locs), - False) - - def _getglocpos(self): - return reduce(lambda x,y : x + y, - (z._getglocpos() for z in self._locs)) - - - def getBegin(self): - return min(x.getBegin() for x in self._locs) - - def getEnd(self): - return max(x.getEnd() for x in self._locs) - - def shift(self,s): - assert (self.getBegin() + s) > 0,"shift to large (%d)" % s - if s == 0: - return self - return CompositeLocation(x.shift(s) for x in self._locs) - - - begin = property(getBegin,None,None,"beginning position of the location") - end = property(getEnd,None,None,"ending position of the location") - - - def __str__(self): - return "join(%s)" % ','.join([str(x) - for x in self._locs]) - -class ComplementLocation(Location): - """ - """ - - _comp={'a': 't', 'c': 'g', 'g': 'c', 't': 'a', - 'r': 'y', 'y': 'r', 'k': 'm', 'm': 'k', - 's': 's', 'w': 'w', 'b': 'v', 'd': 'h', - 'h': 'd', 'v': 'b', 'n': 'n', 'u': 'a', - '-': '-'} - - def __init__(self,location): - self._loc = location - - def _extractSequence(self,sequence): - seq = self._loc._extractSequence(sequence) - seq = ''.join([ComplementLocation._comp.get(x.lower(),'n') for x in seq[::-1]]) - return seq - - def _extractQuality(self,sequence): - return sequence.quality[::-1] - - def isDirect(self): - return False - - def isSimple(self): - return self._loc.isSimple() - - def isFullLength(self): - return self._loc.isFullLength() - - def simplify(self): - if isinstance(self._loc, ComplementLocation): - return self._loc._loc.simplify() - else: - return self - - def needNucleic(self): - return True - - def __str__(self): - return "complement(%s)" % self._loc - - def shift(self,s): - assert (self.getBegin() + s) > 0,"shift to large (%d)" % s - if s == 0: - return self - return ComplementLocation(self._loc.shift(s)) - - def _getglocpos(self): - return self._loc._getglocpos() - - def getBegin(self): - return self._loc.getBegin() - - def getEnd(self): - return self._loc.getEnd() - - def getFivePrime(self): - return self.getEnd() - - def getThreePrime(self): - return self.getBegin() - - - begin = property(getBegin,None,None,"beginning position of the location") - end = property(getEnd,None,None,"ending position of the location") - fivePrime=property(getFivePrime,None,None,"5' potisition of the location") - threePrime=property(getThreePrime,None,None,"3' potisition of the location") - - - # - # Internal functions used for location parsing - # - -def __sublocationIterator(text): - sl = [] - plevel=0 - for c in text: - assert plevel>=0,"Misformated location : %s" % text - if c == '(': - plevel+=1 - sl.append(c) - elif c==')': - plevel-=1 - sl.append(c) - elif c==',' and plevel == 0: - assert sl,"Misformated location : %s" % text - yield ''.join(sl) - sl=[] - else: - sl.append(c) - assert sl and plevel==0,"Misformated location : %s" % text - yield ''.join(sl) - - - - # - # Internal functions used for location parsing - # - -__simplelocparser = re.compile('(?P[0-9]+)(\.\.(?P>?)(?P[0-9]+))?') - - -def __locationParser(text): - text=text.strip() - if text[0:5]=='join(': - assert text[-1]==')',"Misformated location : %s" % text - return CompositeLocation(__locationParser(sl) for sl in __sublocationIterator(text[5:-1])) - elif text[0:11]=='complement(': - assert text[-1]==')',"Misformated location : %s" % text - subl = tuple(__locationParser(sl) for sl in __sublocationIterator(text[11:-1])) - if len(subl)>1: - subl = CompositeLocation(subl) - else: - subl = subl[0] - return ComplementLocation(subl) - else: - data = __simplelocparser.match(text) - assert data is not None,"Misformated location : %s" % text - data = data.groupdict() - if not data['to'] : - sl = PointLocation(int(data['from'])) - else: - sl = SimpleLocation(int(data['from']),int(data['to'])) - sl.before=data['before']=='<' - sl.after=data['after']=='>' - return sl - -def locationGenerator(locstring): - ''' - Parse a location string as present in genbank or embl file. - - @param locstring: string description of the location in embl/gb format - @type locstring: str - - @return: a Location instance - @rtype: C{Location} subclass instance - ''' - return __locationParser(locstring) - - -_matchExternalRef = re.compile('[A-Za-z0-9_|]+(\.[0-9]+)?(?=:)') - -def extractExternalRefs(locstring): - ''' - When a location describe external references (ex: D28156.1:1..>1292) - separate the external reference part of the location and the location - by itself. - - @param locstring: text representation of the location. - @type locstring: str - - @return: a tuple with a set of string describing accession number - of the referred sequences and a C{Location} instance. - - @rtype: tuple(set,Location) - ''' - m = set(x.group() for x in _matchExternalRef.finditer(locstring)) - clean = re.compile(':|'.join([re.escape(x) for x in m])+':') - cloc = locationGenerator(clean.sub('',locstring)) - - return m,cloc - - - - - diff --git a/obitools/location/__init__.pyc b/obitools/location/__init__.pyc deleted file mode 100644 index 545f024..0000000 Binary files a/obitools/location/__init__.pyc and /dev/null differ diff --git a/obitools/location/feature.py b/obitools/location/feature.py deleted file mode 100644 index 89a183f..0000000 --- a/obitools/location/feature.py +++ /dev/null @@ -1,177 +0,0 @@ -from obitools.location import Location,locationGenerator -import logging -import re - - - - -_featureMatcher = re.compile('^(FT| ) [^ ].+\n((FT| ) .+\n)+',re.M) -_featureCleaner = re.compile('^FT',re.M) - - -def textFeatureIterator(fttable): - ''' - Iterate through a textual description of a feature table in a genbank - or embl format. Return at each step a text representation of each individual - feature composing the table. - - @param fttable: a string corresponding to the feature table of a genbank - or an embl entry - - @type fttable: C{str} - - @return: an iterator on str - @rtype: iterator - - @see: L{ftParser} - ''' - for m in _featureMatcher.finditer(fttable): - t = m.group() - t = _featureCleaner.sub(' ',t) - yield t - -_qualifierMatcher = re.compile('(?<=^ {21}/).+(\n {21}[^/].+)*',re.M) -_qualifierCleanner= re.compile("^ +",re.M) - -def qualifierIterator(qualifiers): - ''' - Parse a textual description of a feature in embl or genbank format - as returned by the textFeatureIterator iterator and iterate through - the key, value qualified defining this location. - - @param qualifiers: substring containing qualifiers - @type qualifiers: str - - @return: an iterator on tuple (key,value), where keys are C{str} - @rtype: iterator - ''' - for m in _qualifierMatcher.finditer(qualifiers): - t = m.group() - t = _qualifierCleanner.sub('',t) - t = t.split('=',1) - if len(t)==1: - t = (t[0],None) - else: - if t[0]=='translation': - value = t[1].replace('\n','') - else: - value = t[1].replace('\n',' ') - try: - value = eval(value) - except: - pass - t = (t[0],value) - yield t - - -_ftmatcher = re.compile('(?<=^ {5})\S+') -_locmatcher= re.compile('(?<=^.{21})[^/]+',re.DOTALL) -_cleanloc = re.compile('[\s\n]+') -_qualifiersMatcher = re.compile('^ +/.+',re.M+re.DOTALL) - -def ftParser(feature): - fttype = _ftmatcher.search(feature).group() - location=_locmatcher.search(feature).group() - location=_cleanloc.sub('',location) - qualifiers=_qualifiersMatcher.search(feature) - if qualifiers is not None: - qualifiers=qualifiers.group() - else: - qualifiers="" - logging.debug("Qualifiers regex not matching on \n=====\n%s\n========" % feature) - - return fttype,location,qualifiers - - -class Feature(dict,Location): - def __init__(self,type,location): - self._fttype=type - self._loc=location - - def getFttype(self): - return self._fttype - - - def extractSequence(self,sequence,withQualifier=False): - seq = self._loc.extractSequence(sequence) - if withQualifier: - seq.getInfo().update(self) - return seq - - def isDirect(self): - return self._loc.isDirect() - - def isSimple(self): - return self._loc.isSimple() - - def isFullLength(self): - return self._loc.isFullLength() - - def simplify(self): - f = Feature(self._fttype,self._loc.simplify()) - f.update(self) - return f - - def locStr(self): - return str(self._loc) - - def needNucleic(self): - return self._loc.needNucleic() - - def __str__(self): - return repr(self) - - def __repr__(self): - return str((self.ftType,str(self._loc),dict.__repr__(self))) - - def __cmp__(self,y): - return self._loc.__cmp__(y) - - def _getglocpos(self): - return self._loc._getglocpos() - - ftType = property(getFttype, None, None, "Feature type name") - - def shift(self,s): - assert (self.getBegin() + s) > 0,"shift to large (%d)" % s - if s == 0: - return self - f = Feature(self._fttype,self._loc.shift(s)) - f.update(self) - return f - - - def getBegin(self): - return self._loc.getBegin() - - def getEnd(self): - return self._loc.getEnd() - - begin = property(getBegin,None,None,"beginning position of the location") - end = property(getEnd,None,None,"ending position of the location") - - -def featureFactory(featureDescription): - fttype,location,qualifiers = ftParser(featureDescription) - location = locationGenerator(location) - feature = Feature(fttype,location) - feature.raw = featureDescription - - for k,v in qualifierIterator(qualifiers): - feature.setdefault(k,[]).append(v) - - return feature - -def featureIterator(featureTable,skipError=False): - for tft in textFeatureIterator(featureTable): - try: - feature = featureFactory(tft) - except AssertionError,e: - logging.debug("Parsing error on feature :\n===============\n%s\n===============" % tft) - if not skipError: - raise e - logging.debug("\t===> Error skipped") - continue - - yield feature - \ No newline at end of file diff --git a/obitools/metabarcoding/__init__.py b/obitools/metabarcoding/__init__.py deleted file mode 100644 index 3b29b17..0000000 --- a/obitools/metabarcoding/__init__.py +++ /dev/null @@ -1,265 +0,0 @@ -from obitools.ecopcr.options import addTaxonomyFilterOptions,\ - loadTaxonomyDatabase -from obitools.graph import UndirectedGraph -from obitools.align import lenlcs,isLCSReachable -from obitools.graph.algorithms.component import componentIterator -from obitools.utils.bioseq import uniqSequence -from obitools.utils import progressBar -import math -import sys -from obitools.graph.rootedtree import RootedTree - -def average(x): - x=list(x) - s = sum(i*j for (i,j) in x) - n = sum(i[1] for i in x) - return (float(s)/float(n),n) - -def minimum(x): - x=list(x) - m = min(i[0] for i in x) - n = sum(i[1] for i in x) - return (float(m),n) - -def ecoPCRReader(entries,options): - - taxonomy = loadTaxonomyDatabase(options) - - norankid =options.taxonomy.findRankByName('no rank') - speciesid=options.taxonomy.findRankByName('species') - genusid =options.taxonomy.findRankByName('genus') - familyid =options.taxonomy.findRankByName('family') - - minrankseq = set([speciesid,genusid,familyid]) - - usedrankid = {} - - ingroup = [] - outgroup= [] - - for s in entries: - if 'taxid' in s : - taxid = s['taxid'] - if taxid in taxonomy: - allrank = set() - for p in options.taxonomy.parentalTreeIterator(taxid): - if p[1]!=norankid: - allrank.add(p[1]) - if len(minrankseq & allrank) == 3: - for r in allrank: - usedrankid[r]=usedrankid.get(r,0) + 1 - - if taxonomy.isAncestor(options.ingroup,taxid): - ingroup.append(s) - else: - outgroup.append(s) - - keptrank = set(r for r in usedrankid - if float(usedrankid[r])/float(len(ingroup)) > options.rankthresold) - - return { 'ingroup' : ingroup, - 'outgroup': outgroup, - 'ranks' : keptrank - } - -def buildSimilarityGraph(dbseq,ranks,taxonomy,dcmax=5): - - ldbseq = len(dbseq) - pos = 1 - digit = int(math.ceil(math.log10(ldbseq))) - header = "Alignment : %%0%dd x %%0%dd -> %%0%dd " % (digit,digit,digit) - aligncount = ldbseq*(ldbseq+1)/2 - edgecount = 0 - print >>sys.stderr - - progressBar(1,aligncount,True,"Alignment : %s x %s -> %s " % ('-'*digit,'-'*digit, '0'*digit)) - - - sim = UndirectedGraph() - - i=0 - for s in dbseq: - taxid = s['taxid'] - - rtaxon = dict((rid,taxonomy.getTaxonAtRank(taxid,rid)) - for rid in ranks) - - sim.addNode(i, seq=s,taxid=taxid,rtaxon=rtaxon) - - i+=1 - -# aligner = LCS() - - for is1 in xrange(ldbseq): - s1 = dbseq[is1] - ls1= len(s1) -# aligner.seqA=s1 - - for is2 in xrange(is1+1,ldbseq): - - s2=dbseq[is2] - ls2=len(s2) - - lm = max(ls1,ls2) - lcsmin = lm - dcmax - - if isLCSReachable(s1,s2,lcsmin): - llcs,lali=lenlcs(s1,s2) - ds1s2 = lali - llcs - - if ds1s2 <= dcmax: - sim.addEdge(node1=is1, node2=is2,ds1s2=ds1s2,label=ds1s2) - edgecount+=1 - - progressBar(pos,aligncount,head=header % (is1,is2,edgecount)) - pos+=(ldbseq-is1-1) - - return sim - -def buildTsr(component): - ''' - Build for each consider taxonomic rank the list of taxa - present in the connected component - - :param component: the analyzed connected component - :type component: :py:class:`UndirectedGraph` - - :return: a dictionary indexed by rankid containing a `dict` indexed by taxid and containing count of sequences for this taxid - :rtype: `dict` indexed by `int` containing `dict` indexed by `int` and containing of `int` - - ''' - taxalist = {} - for n in component: - for r in n['rtaxon']: - rtaxid = n['rtaxon'][r] - if rtaxid is not None: - ts = taxalist.get(r,{}) - ts[rtaxid]=ts.get(rtaxid,0)+1 - taxalist[r]=ts - - return taxalist - -def edgeDistSelector(dcmax): - def predicate(e): - return e['ds1s2'] <= dcmax - return predicate - -def distanceOfConfusion(simgraph,dcmax=5,aggregate=average): - - alltaxa = set() - - for n in simgraph: - alltaxa|=set(n['rtaxon'].values()) - - taxacount = len(alltaxa) - - result = {} - - pos = [1] - header = "Component : %-5d Identified : %-8d " - progressBar(1,taxacount,True,header % (0,0)) - - def _idc(cc,dcmax): - composante=[] - for x in cc: - composante.extend(simgraph.subgraph(c) - for c in componentIterator(x, - edgePredicat=edgeDistSelector(dcmax))) - - good = set() - bad = {} - - complexe = [] - - for c in composante: - tsr = buildTsr(c) - newbad=False - for r in tsr: - if len(tsr[r]) == 1: - taxid = tsr[r].keys()[0] - good.add((taxid,tsr[r][taxid])) - else: - newbad=True - for taxid in tsr[r]: - bad[taxid]=bad.get(taxid,0)+tsr[r][taxid] - if newbad: - complexe.append(c) - -# good = good - bad - - for taxid,weight in good: - if taxid not in result: - result[taxid]=[] - result[taxid].append((dcmax+1,weight)) - - - progressBar(pos[0],taxacount,False,header % (len(composante),pos[0])) - pos[0]=len(result) - - if dcmax > 0: - dcmax-=1 - _idc(complexe,dcmax) - - else: - for taxid in bad: - if taxid not in result: - result[taxid]=[] - result[taxid].append((0,bad[taxid])) - - progressBar(pos[0],taxacount,False,header % (len(composante),pos[0])) - pos[0]=len(result) - - _idc([simgraph],dcmax) - - for taxid in result: - result[taxid]=aggregate(result[taxid]) - return result - -def propagateDc(tree,node=None,aggregate=min): - if node is None: - node = tree.getRoots()[0] - dca=aggregate(n['dc'] for n in node.leavesIterator()) - node['dc']=dca - for n in node: - propagateDc(tree, n, aggregate) - -def confusionTree(distances,ranks,taxonomy,aggregate=min,bsrank='species',dcmax=1): - - def Bs(node,rank,dcmax): - n = len(node) - if n: - g = [int(x['dc']>=dcmax) for x in node.subgraphIterator() if x['rank']==bsrank] - n = len(g) - g = sum(g) - bs= float(g)/float(n) - node['bs']=bs - node['bs_label']="%3.2f (%d)" % (bs,n) - - for n in node: - Bs(n,rank,dcmax) - - tree = RootedTree() - ranks = set(ranks) - tset = set(distances) - - for taxon in distances: - tree.addNode(taxon, rank=taxonomy.getRank(taxon), - name=taxonomy.getScientificName(taxon), - dc=float(distances[taxon][0]), - n=distances[taxon][1], - dc_label="%4.2f (%d)" % (float(distances[taxon][0]),distances[taxon][1]) - ) - - for taxon in distances: - piter = taxonomy.parentalTreeIterator(taxon) - taxon = piter.next() - for parent in piter: - if taxon[0] in tset and parent[0] in distances: - tset.remove(taxon[0]) - tree.addEdge(parent[0], taxon[0]) - taxon=parent - - root = tree.getRoots()[0] - Bs(root,bsrank,dcmax) - - return tree diff --git a/obitools/metabarcoding/options.py b/obitools/metabarcoding/options.py deleted file mode 100644 index 08ff423..0000000 --- a/obitools/metabarcoding/options.py +++ /dev/null @@ -1,34 +0,0 @@ -''' -Created on 30 oct. 2011 - -@author: coissac -''' - -from obitools.ecopcr.options import addTaxonomyDBOptions - - -def addMetabarcodingOption(optionManager): - - addTaxonomyDBOptions(optionManager) - - optionManager.add_option('--dcmax', - action="store", dest="dc", - metavar="###", - type="int", - default=0, - help="Maximum confusion distance considered") - - optionManager.add_option('--ingroup', - action="store", dest="ingroup", - metavar="###", - type="int", - default=1, - help="ncbi taxid delimitation the in group") - - optionManager.add_option('--rank-thresold', - action="store", dest="rankthresold", - metavar="#.##", - type="float", - default=0.5, - help="minimum fraction of the ingroup sequences " - "for concidering the rank") diff --git a/obitools/obischemas/__init__.py b/obitools/obischemas/__init__.py deleted file mode 100644 index 6bcafde..0000000 --- a/obitools/obischemas/__init__.py +++ /dev/null @@ -1,28 +0,0 @@ -from obitools.obischemas import kb -__connection__ = None - -def initConnection(options): - global __connection__ - param = {} - if hasattr(options, "dbname") and options.dbname is not None: - param["database"]=options.dbname - if hasattr(options, "dbhost") and options.dbhost is not None: - param["host"]=options.dbhost - if hasattr(options, "dbuser") and options.dbuser is not None: - param["username"]=options.dbuser - if hasattr(options, "dbpassword") and options.dbpassword is not None: - param["password"]=options.dbpassword - - __connection__=kb.getConnection(**param) - __connection__.autocommit=options.autocommit - -def getConnection(options=None): - global __connection__ - - if options is not None: - initConnection(options) - - assert __connection__ is not None,"database connection is not initialized" - - return __connection__ - \ No newline at end of file diff --git a/obitools/obischemas/kb/__init__.py b/obitools/obischemas/kb/__init__.py deleted file mode 100644 index 7d35dcb..0000000 --- a/obitools/obischemas/kb/__init__.py +++ /dev/null @@ -1,55 +0,0 @@ -""" - kb package is devoted to manage access to postgresql database from python - script -""" - - -class Connection(object): - - def __init__(self): - raise RuntimeError('pyROM.KB.Connection is an abstract class') - - def cursor(self): - raise RuntimeError('pyROM.KB.Connection.cursor is an abstract function') - - def commit(self): - raise RuntimeError('pyROM.KB.Connection.commit is an abstract function') - - def rollback(self): - raise RuntimeError('pyROM.KB.Connection.rollback is an abstract function') - - def __call__(self,query): - return self.cursor().execute(query) - - -class Cursor(object): - - def __init__(self,db): - raise RuntimeError('pyROM.KB.Cursor is an abstract class') - - def execute(self,query): - raise RuntimeError('pyROM.KB.Cursor.execute is an abstract function') - - __call__=execute - - -_current_connection = None # Static variable used to store connection to KB - -def getConnection(*args,**kargs): - """ - return a connection to the database. - When call from database backend no argument are needed. - All connection returned by this function - """ - global _current_connection - - if _current_connection==None or args or kargs : - try: - from obischemas.kb import backend - _current_connection = backend.Connection() - except ImportError: - from obischemas.kb import extern - _current_connection = extern.Connection(*args,**kargs) - return _current_connection - - diff --git a/obitools/obischemas/kb/extern.py b/obitools/obischemas/kb/extern.py deleted file mode 100644 index ce2ff84..0000000 --- a/obitools/obischemas/kb/extern.py +++ /dev/null @@ -1,78 +0,0 @@ -""" -Module : KB.extern -Author : Eric Coissac -Date : 03/05/2004 - -Module wrapping psycopg interface module to allow connection -to a postgresql databases with the same interface from -backend and external script. - -This module define a class usable from external script -""" - - -import psycopg2 -import sys -from obischemas import kb - -class Connection(kb.Connection): - - def __init__(self,*connectParam,**kconnectParam): - if connectParam: - self.connectParam=={'dsn':connectParam} - else: - self.connectParam=kconnectParam - print self.connectParam - self.db = psycopg2.connect(**(self.connectParam)) - - def restart(self): - ok=1 - while (ok and ok < 1000): - try: - self.db = psycopg2.connect(**self.connectParam) - except: - ok+=1 - else: - ok=0 - - - def cursor(self): - curs = Cursor(self.db) - if hasattr(self,'autocommit') and self.autocommit: - curs.autocommit = self.autocommit - return curs - - def commit(self): - self.db.commit() - - def rollback(self): - if hasattr(self,'db'): - self.db.rollback() - - def __del__(self): - if hasattr(self,'db'): - self.rollback() - -class Cursor(kb.Cursor): - - def __init__(self,db): - self.db = db - self.curs = db.cursor() - - def execute(self,query): - try: - self.curs.execute(query) - if hasattr(self,'autocommit') and self.autocommit: - self.db.commit() - except psycopg2.ProgrammingError,e: - print >>sys.stderr,"===> %s" % query - raise e - except psycopg2.IntegrityError,e: - print >>sys.stderr,"---> %s" % query - raise e - try: - label = [x[0] for x in self.curs.description] - return [dict(map(None,label,y)) - for y in self.curs.fetchall()] - except TypeError: - return [] diff --git a/obitools/obischemas/options.py b/obitools/obischemas/options.py deleted file mode 100644 index 66f5138..0000000 --- a/obitools/obischemas/options.py +++ /dev/null @@ -1,31 +0,0 @@ -def addConnectionOptions(optionManager): - - optionManager.add_option('-d','--dbname', - action="store", dest="dbname", - metavar="", - type="string", - help="OBISchema database name containing" - "taxonomical data") - - optionManager.add_option('-H','--host', - action="store", dest="dbhost", - metavar="", - type="string", - help="host hosting OBISchema database") - - optionManager.add_option('-U','--user', - action="store", dest="dbuser", - metavar="", - type="string", - help="user for OBISchema database connection") - - optionManager.add_option('-W','--password', - action="store", dest="dbpassword", - metavar="", - type="string", - help="password for OBISchema database connection") - - optionManager.add_option('-A','--autocommit', - action="store_true",dest="autocommit", - default=False, - help="add commit action after each query") \ No newline at end of file diff --git a/obitools/obo/__init__.py b/obitools/obo/__init__.py deleted file mode 100644 index e69de29..0000000 diff --git a/obitools/obo/go/__init__.py b/obitools/obo/go/__init__.py deleted file mode 100644 index e69de29..0000000 diff --git a/obitools/obo/go/parser.py b/obitools/obo/go/parser.py deleted file mode 100644 index 6902974..0000000 --- a/obitools/obo/go/parser.py +++ /dev/null @@ -1,53 +0,0 @@ -from obitools.obo.parser import OBOTerm -from obitools.obo.parser import OBOEntry -from obitools.obo.parser import stanzaIterator -from logging import debug - -class GOEntry(OBOEntry): - ''' - An entry of a GeneOntology .obo file. It can be a header (without a stanza name) or - a stanza (with a stanza name between brackets). It inherits from the class dict. - ''' - - -class GOTerm(OBOTerm): - - ''' - A stanza named 'Term'. It inherits from the class OBOTerm. - ''' - - def __init__(self,stanza): - - ## use of the OBOEntry constructor. - OBOTerm.__init__(self, stanza) - - assert 'namespace' in self and len(self['namespace'])==1, "An OBOTerm must belong to one of the cell_component, molecular_function or biological_process namespace" - - -def GOEntryFactory(stanza): - ''' - Dispatcher of stanza. - - @param stanza: a stanza composed of several lines. - @type stanza: text - - @return: an C{OBOTerm} | C{OBOEntry} instance - - @note: The dispatcher treats differently the stanza which are OBO "Term" - and the others. - ''' - - stanzaType = OBOEntry.parseStanzaName(stanza) - - if stanzaType=="Term": - return GOTerm(stanza) - else: - return OBOEntry(stanza) - - -def GOEntryIterator(file): - entries = stanzaIterator(file) - for e in entries: - debug(e) - yield GOEntryFactory(e) - diff --git a/obitools/obo/parser.py b/obitools/obo/parser.py deleted file mode 100644 index f6f05f3..0000000 --- a/obitools/obo/parser.py +++ /dev/null @@ -1,707 +0,0 @@ -from obitools.utils import skipWhiteLineIterator,multiLineWrapper -from obitools.utils import universalOpen -from obitools.format.genericparser import genericEntryIteratorGenerator -from logging import debug,warning - -import re - - -################################################################################# -## Stanza preparation area ## -################################################################################# - - -class FileFormatError(Exception): - ''' - An error derived from the class Exception. - ''' - pass - -_oboEntryIterator = genericEntryIteratorGenerator(endEntry='^ *$', - strip=True) - -def stanzaIterator(inputfile): - ''' - Iterator of stanza. The stanza are the basic units of OBO files. - - @param inputfile: a stream of strings from an opened OBO file. - @type inputfile: a stream of strings - - @return: a stream of stanza - @rtype: a stream of aggregated strings - - @note: The iterator constructs stanza by aggregate strings from the - OBO file. - ''' - inputfile = universalOpen(inputfile) - inputfile = multiLineWrapper(inputfile) - return _oboEntryIterator(inputfile) - - - -################################################################################# -## Trailing Modifiers treatment area ## -################################################################################# - - -class TrailingModifier(dict): - ''' - A class object which inherits from the class dict. Trailing modifiers can be found - at the end of TaggedValue objects when they exist. - ''' - - _match_brace = re.compile('(?<=\ {)[^\]]*(\}) *( !|$)') - - def __init__(self,string): - - ## search for trailing modifiers signals - trailing_modifiers = TrailingModifier._match_brace.search(string) - - ## the trailing modifiers exist - if trailing_modifiers: - trailing_modifiers=trailing_modifiers.group(0).strip() - print trailing_modifiers - ## creates and feeds the dictionary of trailing modifiers - dict.__init__(self,(x.strip().split('=',1) for x in trailing_modifiers.split(','))) - - -def trailingModifierFactory(string): - ''' - Dispatcher of trailing modifiers. - - @param string: a string from a TaggedValue object with a trailing modifiers signal. - @type string: string - - @return: a class object - - @note: The dispatcher is currently very simple. Only one case is treated by the function. - `the function returns a class object inherited from the class dict if the trailing modifiers - exist, None if they don't. - ''' - - trailing_modifiers = TrailingModifier(string) - if not trailing_modifiers: - trailing_modifiers=None - return trailing_modifiers - - -################################################################################# -## TaggedValue treatment area ## -################################################################################# - - -class TaggedValue(object): - ''' - A couple 'tag:value' of an OBOEntry. - ''' - - _match_value = re.compile('(("(\\\\"|[^\"])*")|(\\\\"|[^\"]))*?( !| {|$)') - _split_comment = re.compile('^!| !') - _match_quotedString = re.compile('(?<=")(\\\\"|[^\"])*(?=")') - _match_bracket = re.compile('\[[^\]]*\]') - - def __init__(self,line): - ''' - Constructor of the class TaggedValue. - - @param line: a line of an OBOEntry composed of a tag and a value. - @type line: string - - @note: The constructor separates tags from right terms. 'value' is extracted - from right terms using a regular expression (value is at the beginning of the - string, between quotes or not). Then, 'comment' is extracted from the rest of the - string using another regular expression ('comment' is at the end of the string - after a '!'. By default, 'comment' is set to None). Finally, 'trailing_modifiers' - are extracted from the last string using another regular expression. - The tag, the value, the comment and the trailing_modifiers are saved. - ''' - - debug("tagValueParser : %s" % line) - - ## by default : - trailing_modifiers = None - comment = None - - ## the tag is saved. 'right' is composed of the value, the comment and the trailing modifiers - tag,rigth = line.split(':',1) - - ## the value is saved - value = TaggedValue._match_value.search(rigth).group(0) - debug("Extracted value : %s" % value) - - ## if there is a value AND a sign of a comment or trailing modifiers - if value and value[-1] in '!{': - lvalue = len(value) - ## whatever it is a comment or trailing modifiers, it is saved into 'extra' - extra = rigth[lvalue-1:].strip() - ## a comment is extracted - extra =TaggedValue._split_comment.split(extra,1) - ## and saved if it exists - if len(extra)==2: - comment=extra[1].strip() - ## trailing modifiers are extracted - extra=extra[0] - trailing_modifiers = trailingModifierFactory(extra) - ## the value is cleaned of any comment or trailing modifiers signals - value = value[0:-1] - - if tag=='use_term': - tag='consider' - raise DeprecationWarning,"user_term is a deprecated tag, you should instead use consider" - - ## recording zone - self.value =value.strip() - self.tag = tag - self.__doc__=comment - self.trailing_modifiers=trailing_modifiers - - def __str__(self): - return str(self.value) - - def __repr__(self): - return '''"""%s"""''' % str(self) - - -class NameValue(TaggedValue): - ''' - A couple 'name:value' inherited from the class TaggedValue. Used to manage name tags. - ''' - - def __init__(self,line): - - ## no use of the TaggedValue constructor. The NameValue is very simple. - tag,rigth = line.split(':',1) - - ## recording zone - self.value = rigth.strip() - self.tag = 'name' - self.__doc__=None - self.trailing_modifiers=None - - - -class DefValue(TaggedValue): - ''' - A couple 'def:value' inherited from the class TaggedValue. Used to manage def tags. - ''' - - def __init__(self,line): - ''' - Constructor of the class DefValue. - - @param line: a line of an OBOEntry composed of a tag named 'def' and a value. - @type line: string - - @note: The constructor calls the TaggedValue constructor. A regular expression - is used to extract the 'definition' from TaggedValue.value (definition is a not - quoted TaggedValue.value). A regular expression is used to extract 'dbxrefs' - from the aggedValue.value without the definition (dbxrefs are between brackets - and definition can be so). Definition is saved as the new value of the DefValue. - dbxrefs are saved. - ''' - - ## use of the TaggedValue constructor - TaggedValue.__init__(self, line) - - ## definition, which is quoted, is extracted from the standard value of a TaggedValue. - definition = TaggedValue._match_quotedString.search(self.value).group(0) - - ## the standard value is cleaned of the definition. - cleanvalue = self.value.replace(definition,'') - cleanvalue = cleanvalue.replace(' ',' ') - - ## dbxrefs are searched into the rest of the standard value. - dbxrefs = TaggedValue._match_bracket.search(cleanvalue).group(0) - - ## recording zone - self.tag = 'def' - ## the value of a DefValue is not the standard value but the definition. - self.value=definition - self.dbxrefs=xrefFactory(dbxrefs) - - -class SynonymValue(TaggedValue): - ''' - A couple 'synonym:value' inherited from the class TaggedValue. Used to manage - synonym tags, exact_synonym tags, broad_synonym tags and narrow_synonym tags. - ''' - - _match_scope = re.compile('(?<="")[^\[]*(?=\[|$)') - - def __init__(self,line): - ''' - Constructor of the class SynonymValue. - - @param line: a line of an OBOEntry composed of a tag named 'synonym' or - 'exact_synonym' or 'broad_synonym' or 'narrow_synonym' and a value. - @type line: string - - @note: SynonymValue is composed of a tag, a value, a scope, a list of types and - dbxrefs. - The constructor calls the TaggedValue constructor. A regular expression - is used to extract 'definition' from TaggedValue.value (definition is a not - quoted TaggedValue.value). Definition is saved as the new value of the class - SynonymValue. - A regular expression is used to extract 'attributes' from the rest of the - string. Attributes may contain an optional synonym scope and an optional list - of synonym types. The scope is extracted from attributes or set by default to - 'RELATED'. It is saved as the scope of the class. The types are the rest of the - attributes and are saved as the list of types of the class. - For deprecated tags 'exact_synonym', 'broad_synonym' and 'narrow_synonym', tag - is set to 'synonym' and scope is set respectively to 'EXACT', 'BROAD' and 'NARROW'. - A regular expression is used to extract 'dbxrefs' from the TaggedValue.value - without the definition (dbxrefs are between brackets and definition can be so). - dbxrefs are saved. - ''' - - ## use of the TaggedValue constructor - TaggedValue.__init__(self, line) - - ## definition, which is quoted, is extracted from the standard value of a TaggedValue. - definition = TaggedValue._match_quotedString.search(self.value).group(0) - - ## the standard value is cleaned of the definition. - cleanvalue = self.value.replace(definition,'') - cleanvalue = cleanvalue.replace(' ',' ') - - ## 1) attributes are searched into the rest of the standard value. - ## 2) then they are stripped. - ## 3) then they are split on every ' '. - ## 4) finally they are ordered into a set. - attributes = set(SynonymValue._match_scope.search(cleanvalue).group(0).strip().split()) - - ## the scopes are the junction between the attributes and a set of specific terms. - scopes = attributes & set(['RELATED','EXACT','BROAD','NARROW']) - - ## the types are the rest of the attributes. - types = attributes - scopes - - ## this is a constraint of the OBO format - assert len(scopes)< 2,"Only one synonym scope allowed" - - ## the scope of the SynonymValue is into scopes or set by default to RELATED - if scopes: - scope = scopes.pop() - else: - scope = 'RELATED' - - ## Specific rules are defined for the following tags : - if self.tag == 'exact_synonym': - raise DeprecationWarning,'exact_synonym is a deprecated tag use instead synonym tag' - self.tag = 'synonym' - scope = 'EXACT' - - if self.tag == 'broad_synonym': - raise DeprecationWarning,'broad_synonym is a deprecated tag use instead synonym tag' - self.tag = 'synonym' - scope = 'BROAD' - - if self.tag == 'narrow_synonym': - raise DeprecationWarning,'narrow_synonym is a deprecated tag use instead synonym tag' - self.tag = 'synonym' - scope = 'NARROW' - - if self.tag == 'systematic_synonym': - #raise DeprecationWarning,'narrow_synonym is a deprecated tag use instead sysnonym tag' - self.tag = 'synonym' - scope = 'SYSTEMATIC' - - ## this is our own constraint. deprecated tags are not saved by this parser. - assert self.tag =='synonym',"%s synonym type is not managed" % self.tag - - ## dbxrefs are searched into the rest of the standard value. - dbxrefs = TaggedValue._match_bracket.search(cleanvalue).group(0) - - ## recording zone - ## the value of a SynonymValue is not the standard value but the definition. - self.value = definition - self.dbxrefs = xrefFactory(dbxrefs) - self.scope = scope - self.types = list(types) - - def __eq__(self,b): - return ((self.value==b.value) and (self.dbxrefs==b.dbxrefs) - and (self.scope==b.scope) and (self.types==b.types) - and (self.__doc__==b.__doc__) and (self.tag==b.tag) - and (self.trailing_modifiers==b.trailing_modifiers)) - - def __hash__(self): - return (reduce(lambda x,y:x+y,(hash(z) for z in [self.__doc__, - self.value, - frozenset(self.dbxrefs), - self.scope, - frozenset(self.types), - self.tag, - self.trailing_modifiers]),0)) % (2**31) - - -class XrefValue(TaggedValue): - ''' - A couple 'xref:value' inherited from the class TaggedValue. Used to manage - xref tags. - ''' - - def __init__(self,line): - - ## use of the TaggedValue constructor - TaggedValue.__init__(self, line) - - ## use the same function as the dbxrefs - self.value=xrefFactory(self.value) - - if self.tag in ('xref_analog','xref_unk'): - raise DeprecationWarning,'%s is a deprecated tag use instead sysnonym tag' % self.tag - self.tag='xref' - - ## this is our own constraint. deprecated tags are not saved by this parser. - assert self.tag=='xref' - - -class RelationshipValue(TaggedValue): - ''' - A couple 'xref:value' inherited from the class TaggedValue. Used to manage - xref tags. - ''' - - def __init__(self,line): - - ## use of the TaggedValue constructor - TaggedValue.__init__(self, line) - - ## the value is split on the first ' '. - value = self.value.split(None,1) - - ## succesful split ! - if len(value)==2: - relationship=value[0] - term=value[1] - ## unsuccesful split. The relationship is set by default to IS_A - else: - relationship='is_a' - term=value[0] - - ## recording zone - self.value=term - self.relationship=relationship - - -class NamespaceValue(TaggedValue): - def __init__(self,line): - TaggedValue.__init__(self, line) - -class RemarkValue(TaggedValue): - def __init__(self,line): - TaggedValue.__init__(self, line) - label,value = self.value.split(':',1) - label = label.strip() - value = value.strip() - self.value=value - self.label=label - - -def taggedValueFactory(line): - ''' - A function used to dispatch lines of an OBOEntry between the class TaggedValue - and its inherited classes. - - @param line: a line of an OBOEntry composed of a tag and a value. - @type line: string - - @return: a class object - ''' - - if (line[0:9]=='namespace' or - line[0:17]=='default-namespace'): - return NamespaceValue(line) - ## DefValue is an inherited class of TaggedValue - elif line[0:3]=='def': - return DefValue(line) - ## SynonymValue is an inherited class of TaggedValue - elif ((line[0:7]=="synonym" and line[0:14]!="synonymtypedef") or - line[0:13]=="exact_synonym" or - line[0:13]=="broad_synonym" or - line[0:14]=="narrow_synonym"): - return SynonymValue(line) - ## XrefValue is an inherited class of TaggedValue - elif line[0:4]=='xref': - return XrefValue(line) - ## NameValue is an inherited class of TaggedValue - elif line[0:4]=='name': - return NameValue(line) - ## RelationshipValue is an inherited class of TaggedValue - elif (line[0:15]=='intersection_of' or - line[0:8] =='union_of' or - line[0:12]=='relationship'): - return RelationshipValue(line) - elif (line[0:6]=='remark'): - return RemarkValue(line) - ## each line is a couple : tag / value (and some more features) - else: - return TaggedValue(line) - - -################################################################################# -## Xref treatment area ## -################################################################################# - - - -class Xref(object): - ''' - A xref object of an OBOentry. It may be the 'dbxrefs' of SynonymValue and - DefValue objects or the 'value' of XrefValue objects. - ''' - - __splitdata__ = re.compile(' +(?=["{])') - - def __init__(self,ref): - if ref == '' : # - ref = None # - data = '' # - else : # Modifs JJ sinon erreur : list index out of range - data = Xref.__splitdata__.split(ref,1) # - ref = data[0] # - description=None - trailing_modifiers = None - if len(data)> 1: - extra = data[1] - description = TaggedValue._match_quotedString.search(extra) - if description is not None: - description = description.group(0) - extra.replace(description,'') - trailing_modifiers=trailingModifierFactory(extra) - self.reference=ref - self.description=description - self.trailing_modifiers=trailing_modifiers - - def __eq__(self,b): - return ((self.reference==b.reference) and (self.description==b.description) - and (self.trailing_modifiers==b.trailing_modifiers)) - - def __hash__(self): - return (reduce(lambda x,y:x+y,(hash(z) for z in [self.reference, - self.description, - self.trailing_modifiers]),0)) % (2**31) - - -def xrefFactory(string): - ''' - Dispatcher of xrefs. - - @param string: a string (between brackets) from an inherited TaggedValue object with a dbxrefs - signal (actually, the signal can only be found into SynonymValue and DefValue - objects) or a string (without brackets) from a XrefValue object. - @type string: string - - @return: a class object - - @note: The dispatcher treats differently the strings between brackets (from SynonymValue and - DefValue objects) and without brackets (from XrefValue objects). - ''' - - string = string.strip() - if string[0]=='[': - return [Xref(x.strip()) for x in string[1:-1].split(',')] - else: - return Xref(string) - - -################################################################################# -## Stanza treatment area ## -################################################################################# - - -class OBOEntry(dict): - ''' - An entry of an OBOFile. It can be a header (without a stanza name) or - a stanza (with a stanza name between brackets). It inherits from the class dict. - ''' - _match_stanza_name = re.compile('(?<=^\[)[^\]]*(?=\])') - - def __init__(self,stanza): - ## tests if it is the header of the OBO file (returns TRUE) or not (returns FALSE) - self.isHeader = stanza[0]!='[' - lines = stanza.split('\n') - ## not the header : there is a [stanzaName] - if not self.isHeader: - self.stanzaName = lines[0].strip()[1:-1] - lines=lines[1:] - self["stanza"] = [stanza.strip()] - - ## whatever the stanza is. - for line in lines: - ## each line is a couple : tag / value - taggedvalue = taggedValueFactory(line) - if taggedvalue.tag in self: - self[taggedvalue.tag].append(taggedvalue) - else: - self[taggedvalue.tag]=[taggedvalue] - - - def parseStanzaName(stanza): - sm = OBOEntry._match_stanza_name.search(stanza) - if sm: - return sm.group(0) - else: - return None - - parseStanzaName=staticmethod(parseStanzaName) - - - -class OBOTerm(OBOEntry): - ''' - A stanza named 'Term'. It inherits from the class OBOEntry. - ''' - def __init__(self,stanza): - - ## use of the OBOEntry constructor. - OBOEntry.__init__(self, stanza) - - assert self.stanzaName=='Term' - assert 'stanza' in self - assert 'id' in self and len(self['id'])==1,"An OBOTerm must have an id" - assert 'name' in self and len(self['name'])==1,"An OBOTerm must have a name" - assert 'namespace' not in self or len(self['namespace'])==1, "Only one namespace is allowed for an OBO term" - - assert 'def' not in self or len(self['def'])==1,"Only one definition is allowed for an OBO term" - assert 'comment' not in self or len(self['comment'])==1,"Only one comment is allowed for an OBO term" - - assert 'union_of' not in self or len(self['union_of'])>=2,"Only one union relationship is allowed for an OBO term" - assert 'intersection_of' not in self or len(self['intersection_of'])>=2,"Only one intersection relationship is allowed for an OBO term" - - if self._isObsolete(): - #assert 'is_a' not in self - assert 'relationship' not in self - assert 'inverse_of' not in self - assert 'disjoint_from' not in self - assert 'union_of' not in self - assert 'intersection_of' not in self - - assert 'replaced_by' not in self or self._isObsolete() - assert 'consider' not in self or self._isObsolete() - - def _getStanza(self): - return self['stanza'][0] - - ## make-up functions. - def _getDefinition(self): - if 'def' in self: - return self['def'][0] - return None - - def _getId(self): - return self['id'][0] - - def _getNamespace(self): - return self['namespace'][0] - - def _getName(self): - return self['name'][0] - - def _getComment(self): - if 'comment' in self: - return self['comment'][0] - return None - - def _getAltIds(self): - if 'alt_id' in self: - return list(set(self.get('alt_id',None))) - return None - - def _getIsA(self): - if 'is_a' in self: - return list(set(self.get('is_a',None))) - return None - - def _getSynonym(self): - if 'synonym' in self : - return list(set(self.get('synonym',None))) - return None - - def _getSubset(self): - if self.get('subset',None) != None: - return list(set(self.get('subset',None))) - else: - return None - - def _getXref(self): - if 'xref' in self: - return list(set(self.get('xref',None))) - return None - - def _getRelationShip(self): - if 'relationship' in self: - return list(set(self.get('relationship',None))) - return None - - def _getUnion(self): - return list(set(self.get('union_of',None))) - - def _getIntersection(self): - return list(set(self.get('intersection_of',None))) - - def _getDisjonction(self): - return list(set(self.get('disjoint_from',None))) - - def _isObsolete(self): - return 'is_obsolete' in self and str(self['is_obsolete'][0])=='true' - - def _getReplacedBy(self): - if 'replaced_by' in self: - return list(set(self.get('replaced_by',None))) - return None - - def _getConsider(self): - if 'consider' in self: - return list(set(self.get('consider',None))) - return None - - ## automatically make-up ! - stanza = property(_getStanza,None,None) - definition = property(_getDefinition,None,None) - id = property(_getId,None,None) - namespace = property(_getNamespace,None,None) - name = property(_getName,None,None) - comment = property(_getComment,None,None) - alt_ids = property(_getAltIds,None,None) - is_a = property(_getIsA,None,None) - synonyms = property(_getSynonym,None,None) - subsets = property(_getSubset,None,None) - xrefs = property(_getXref,None,None) - relationship = property(_getRelationShip,None,None) - union_of = property(_getUnion,None,None) - intersection_of = property(_getIntersection,None,None) - disjoint_from = property(_getDisjonction,None,None) - is_obsolete = property(_isObsolete,None,None) - replaced_by = property(_getReplacedBy,None,None) - consider = property(_getConsider,None,None) - - -def OBOEntryFactory(stanza): - ''' - Dispatcher of stanza. - - @param stanza: a stanza composed of several lines. - @type stanza: text - - @return: an C{OBOTerm} | C{OBOEntry} instance - - @note: The dispatcher treats differently the stanza which are OBO "Term" - and the others. - ''' - - stanzaType = OBOEntry.parseStanzaName(stanza) - - if stanzaType=="Term": - return OBOTerm(stanza) - else: - return OBOEntry(stanza) - -def OBOEntryIterator(file): - entries = stanzaIterator(file) - for e in entries: - debug(e) - yield OBOEntryFactory(e) - - \ No newline at end of file diff --git a/obitools/options/__init__.py b/obitools/options/__init__.py deleted file mode 100644 index d6793d6..0000000 --- a/obitools/options/__init__.py +++ /dev/null @@ -1,137 +0,0 @@ -""" - Module providing high level functions to manage command line options. -""" -import logging -import sys - -from logging import debug - -from optparse import OptionParser - -from obitools.utils import universalOpen -from obitools.utils import fileSize -from obitools.utils import universalTell -from obitools.utils import progressBar -from obitools.format.options import addInputFormatOption, addInOutputOption,\ - autoEntriesIterator -import time - - - -def getOptionManager(optionDefinitions,entryIterator=None,progdoc=None): - ''' - Build an option manager fonction. that is able to parse - command line options of the script. - - @param optionDefinitions: list of function describing a set of - options. Each function must allows as - unique parametter an instance of OptionParser. - @type optionDefinitions: list of functions. - - @param entryIterator: an iterator generator function returning - entries from the data files. - - @type entryIterator: an iterator generator function with only one - parametter of type file - ''' - parser = OptionParser(progdoc) - parser.add_option('--DEBUG', - action="store_true", dest="debug", - default=False, - help="Set logging in debug mode") - - parser.add_option('--no-psyco', - action="store_true", dest="noPsyco", - default=False, - help="Don't use psyco even if it installed") - - parser.add_option('--without-progress-bar', - action="store_false", dest="progressbar", - default=True, - help="desactivate progress bar") - - checkFormat=False - for f in optionDefinitions: - if f == addInputFormatOption or f == addInOutputOption: - checkFormat=True - f(parser) - - def commandLineAnalyzer(): - options,files = parser.parse_args() - if options.debug: - logging.root.setLevel(logging.DEBUG) - - if checkFormat: - ei=autoEntriesIterator(options) - else: - ei=entryIterator - - i = allEntryIterator(files,ei,with_progress=options.progressbar) - return options,i - - return commandLineAnalyzer - -_currentInputFileName=None -_currentFile = None -_currentFileSize = None - -def currentInputFileName(): - return _currentInputFileName - -def currentInputFile(): - return _currentFile - -def currentFileSize(): - return _currentFileSize - -def currentFileTell(): - return universalTell(_currentFile) - -def fileWithProgressBar(file,step=100): - try: - size = currentFileSize() - except: - size = None - - def fileBar(): - pos=1 - progressBar(pos, size, True,currentInputFileName()) - for l in file: - progressBar(currentFileTell,size,head=currentInputFileName()) - yield l - print >>sys.stderr,'' - if size is None: - return file - else: - f = fileBar() - return f - - -def allEntryIterator(files,entryIterator,with_progress=False,histo_step=102): - global _currentFile - global _currentInputFileName - global _currentFileSize - if files : - for f in files: - _currentInputFileName=f - f = universalOpen(f) - _currentFile=f - _currentFileSize=fileSize(_currentFile) - debug(f) - if with_progress: - f=fileWithProgressBar(f,step=histo_step) - if entryIterator is None: - for line in f: - yield line - else: - for entry in entryIterator(f): - yield entry - else: - if entryIterator is None: - for line in sys.stdin: - yield line - else: - for entry in entryIterator(sys.stdin): - yield entry - - \ No newline at end of file diff --git a/obitools/options/bioseqcutter.py b/obitools/options/bioseqcutter.py deleted file mode 100644 index 77189af..0000000 --- a/obitools/options/bioseqcutter.py +++ /dev/null @@ -1,85 +0,0 @@ -from logging import debug - -def _beginOptionCallback(options,opt,value,parser): - def beginCutPosition(seq): - debug("begin = %s" % value ) - if hasattr(options, 'taxonomy') and options.taxonomy is not None: - environ = {'taxonomy' : options.taxonomy,'sequence':seq} - else: - environ = {'sequence':seq} - - return eval(value,environ,seq) - 1 - - parser.values.beginCutPosition=beginCutPosition - -def _endOptionCallback(options,opt,value,parser): - def endCutPosition(seq): - if hasattr(options, 'taxonomy') and options.taxonomy is not None: - environ = {'taxonomy' : options.taxonomy,'sequence':seq} - else: - environ = {'sequence':seq} - - return eval(value,environ,seq) - - parser.values.endCutPosition=endCutPosition - - - - -def addSequenceCuttingOptions(optionManager): - - optionManager.add_option('-b','--begin', - action="callback", callback=_beginOptionCallback, - metavar="", - type="string", - help="python expression to be evaluated in the " - "sequence context. The attribute name can be " - "used in the expression as variable name. " - "An extra variable named 'sequence' refers " - "to the sequence object itself. ") - - optionManager.add_option('-e','--end', - action="callback", callback=_endOptionCallback, - metavar="", - type="string", - help="python expression to be evaluated in the " - "sequence context. The attribute name can be " - "used in the expression as variable name ." - "An extra variable named 'sequence' refers" - "to the sequence object itself. ") - - -def cutterGenerator(options): - - def sequenceCutter(seq): - - lseq = len(seq) - - if hasattr(options, 'beginCutPosition'): - begin = int(options.beginCutPosition(seq)) - else: - begin = 0 - - if hasattr(options, 'endCutPosition'): - end = int(options.endCutPosition(seq)) - else: - end = lseq - - if begin > 0 or end < lseq: - seq = seq[begin:end] - seq['subsequence']="%d..%d" % (begin+1,end) - - return seq - - return sequenceCutter - -def cutterIteratorGenerator(options): - _cutter = cutterGenerator(options) - - def sequenceCutterIterator(seqIterator): - for seq in seqIterator: - yield _cutter(seq) - - return sequenceCutterIterator - - diff --git a/obitools/options/bioseqedittag.py b/obitools/options/bioseqedittag.py deleted file mode 100644 index 6eb1c36..0000000 --- a/obitools/options/bioseqedittag.py +++ /dev/null @@ -1,237 +0,0 @@ -import sys -from obitools.options.taxonomyfilter import loadTaxonomyDatabase -def addSequenceEditTagOptions(optionManager): - - optionManager.add_option('--rank', - action="store_true", dest='addrank', - default=False, - help="add a rank attribute to the sequence " - "indicating the sequence position in the input data") - - optionManager.add_option('-R','--rename-tag', - action="append", - dest='renameTags', - metavar="", - type="string", - default=[], - help="change tag name from OLD_NAME to NEW_NAME") - - optionManager.add_option('--delete-tag', - action="append", - dest='deleteTags', - metavar="", - type="string", - default=[], - help="delete tag TAG_NAME") - - optionManager.add_option('-S','--set-tag', - action="append", - dest='setTags', - metavar="", - type="string", - default=[], - help="Add a new tag named TAG_NAME with " - "a value computed from PYTHON_EXPRESSION") - - optionManager.add_option('--set-identifier', - action="store", - dest='setIdentifier', - metavar="", - type="string", - default=None, - help="Set sequence identifier with " - "a value computed from PYTHON_EXPRESSION") - - optionManager.add_option('--set-sequence', - action="store", - dest='setSequence', - metavar="", - type="string", - default=None, - help="Change the sequence itself with " - "a value computed from PYTHON_EXPRESSION") - - optionManager.add_option('-T','--set-definition', - action="store", - dest='setDefinition', - metavar="", - type="string", - default=None, - help="Set sequence definition with " - "a value computed from PYTHON_EXPRESSION") - - optionManager.add_option('-O','--only-valid-python', - action="store_true", - dest='onlyValid', - default=False, - help="only valid python expressions are allowed") - - optionManager.add_option('-C','--clear', - action="store_true", - dest='clear', - default=False, - help="clear all tags associated to the sequences") - - optionManager.add_option('-k','--keep', - action='append', - dest='keep', - default=[], - type="string", - help="only keep this tag") - - optionManager.add_option('--length', - action="store_true", - dest='length', - default=False, - help="add seqLength tag with sequence length") - - optionManager.add_option('--with-taxon-at-rank', - action='append', - dest='taxonrank', - default=[], - type="string", - help="add taxonomy annotation at a speciefied rank level") - - optionManager.add_option('-m','--mcl', - action="store", dest="mcl", - metavar="", - type="string", - default=None, - help="split following mcl graph clustering partition") - - -def readMCLFile(file): - partition=1 - parts = {} - for l in file: - for seq in l.strip().split(): - parts[seq]=partition - partition+=1 - return parts - - - - -def sequenceTaggerGenerator(options): - toDelete = options.deleteTags[:] - toRename = [x.split(':',1) for x in options.renameTags if len(x.split(':',1))==2] - toSet = [x.split(':',1) for x in options.setTags if len(x.split(':',1))==2] - newId = options.setIdentifier - newDef = options.setDefinition - newSeq = options.setSequence - clear = options.clear - keep = set(options.keep) - length = options.length - counter = [0] - loadTaxonomyDatabase(options) - if options.taxonomy is not None: - annoteRank=options.taxonrank - else: - annoteRank=[] - - if options.mcl is not None: - parts = readMCLFile(open(options.mcl)) - else: - parts = False - - def sequenceTagger(seq): - - if counter[0]>=0: - counter[0]+=1 - - if clear or keep: - ks = seq.keys() - for k in ks: - if k not in keep: - del seq[k] - else: - for i in toDelete: - if i in seq: - del seq[i] - for o,n in toRename: - if o in seq: - seq[n]=seq[o] - del seq[o] - - for rank in annoteRank: - if 'taxid' in seq: - taxid = seq['taxid'] - if taxid is not None: - rtaxid = options.taxonomy.getTaxonAtRank(taxid,rank) - if rtaxid is not None: - scn = options.taxonomy.getScientificName(rtaxid) - else: - scn=None - seq[rank]=rtaxid - seq["%s_name"%rank]=scn - - if parts and seq.id in parts: - seq['cluster']=parts[seq.id] - - if options.addrank: - seq['rank']=counter[0] - - for i,v in toSet: - try: - if options.taxonomy is not None: - environ = {'taxonomy' : options.taxonomy,'sequence':seq, 'counter':counter[0]} - else: - environ = {'sequence':seq, 'counter':counter[0]} - - val = eval(v,environ,seq) - except Exception,e: - if options.onlyValid: - raise e - val = v - seq[i]=val - - if length: - seq['seqLength']=len(seq) - - if newId is not None: - try: - if options.taxonomy is not None: - environ = {'taxonomy' : options.taxonomy,'sequence':seq, 'counter':counter[0]} - else: - environ = {'sequence':seq, 'counter':counter[0]} - - val = eval(newId,environ,seq) - except Exception,e: - if options.onlyValid: - raise e - val = newId - seq.id=val - if newDef is not None: - try: - if options.taxonomy is not None: - environ = {'taxonomy' : options.taxonomy,'sequence':seq, 'counter':counter[0]} - else: - environ = {'sequence':seq, 'counter':counter[0]} - - val = eval(newDef,environ,seq) - except Exception,e: - if options.onlyValid: - raise e - val = newDef - seq.definition=val - - if newSeq is not None: - try: - if options.taxonomy is not None: - environ = {'taxonomy' : options.taxonomy,'sequence':seq, 'counter':counter[0]} - else: - environ = {'sequence':seq, 'counter':counter[0]} - - val = eval(newSeq,environ,seq) - except Exception,e: - if options.onlyValid: - raise e - val = newSeq - if hasattr(seq, '_seq'): - seq._seq=str(val).lower() - if 'seqLength' in seq: - seq['seqLength']=len(seq) - - return seq - - return sequenceTagger \ No newline at end of file diff --git a/obitools/options/bioseqfilter.py b/obitools/options/bioseqfilter.py deleted file mode 100644 index d52c9b5..0000000 --- a/obitools/options/bioseqfilter.py +++ /dev/null @@ -1,179 +0,0 @@ -import re - -from obitools.options.taxonomyfilter import addTaxonomyFilterOptions -from obitools.options.taxonomyfilter import taxonomyFilterGenerator - -def _sequenceOptionCallback(options,opt,value,parser): - parser.values.sequencePattern = re.compile(value,re.I) - -def _defintionOptionCallback(options,opt,value,parser): - parser.values.definitionPattern = re.compile(value) - -def _identifierOptionCallback(options,opt,value,parser): - parser.values.identifierPattern = re.compile(value) - -def _attributeOptionCallback(options,opt,value,parser): - if not hasattr(options, 'attributePatterns'): - parser.values.attributePatterns={} - attribute,pattern=value.split(':',1) - parser.values.attributePatterns[attribute]=re.compile(pattern) - -def _predicatOptionCallback(options,opt,value,parser): - if not hasattr(options, 'predicats'): - options.predicats=[] - parser.values.predicats.append(value) - - -def addSequenceFilteringOptions(optionManager): - - optionManager.add_option('-s','--sequence', - action="callback", callback=_sequenceOptionCallback, - metavar="", - type="string", - help="regular expression pattern used to select " - "the sequence. The pattern is case insensitive") - - optionManager.add_option('-D','--definition', - action="callback", callback=_defintionOptionCallback, - type="string", - metavar="", - help="regular expression pattern matched against " - "the definition of the sequence. " - "The pattern is case sensitive") - - optionManager.add_option('-I','--identifier', - action="callback", callback=_identifierOptionCallback, - type="string", - metavar="", - help="regular expression pattern matched against " - "the identifier of the sequence. " - "The pattern is case sensitive") - - optionManager.add_option('-a','--attribute', - action="callback", callback=_attributeOptionCallback, - type="string", - metavar=":", - help="regular expression pattern matched against " - "the attributes of the sequence. " - "the value of this atribute is of the form : " - "attribute_name:regular_pattern. " - "The pattern is case sensitive." - "Several -a option can be used on the same " - "commande line.") - - optionManager.add_option('-A','--has-attribute', - action="append", - type="string", - dest="has_attribute", - default=[], - metavar="", - help="select sequence with attribute " - "defined") - - optionManager.add_option('-p','--predicat', - action="append", dest="predicats", - metavar="", - help="python boolean expression to be evaluated in the " - "sequence context. The attribute name can be " - "used in the expression as variable name ." - "An extra variable named 'sequence' refers" - "to the sequence object itself. " - "Several -p option can be used on the same " - "commande line.") - - optionManager.add_option('-L','--lmax', - action='store', - metavar="<##>", - type="int",dest="lmax", - help="keep sequences shorter than lmax") - - optionManager.add_option('-l','--lmin', - action='store', - metavar="<##>", - type="int",dest="lmin", - help="keep sequences longer than lmin") - - optionManager.add_option('-v','--inverse-match', - action='store_true', - default=False, - dest="invertedFilter", - help="revert the sequence selection " - "[default : %default]") - - addTaxonomyFilterOptions(optionManager) - - - - - -def filterGenerator(options): - taxfilter = taxonomyFilterGenerator(options) - - def sequenceFilter(seq): - good = True - - if hasattr(options, 'sequencePattern'): - good = bool(options.sequencePattern.search(str(seq))) - - if good and hasattr(options, 'identifierPattern'): - good = bool(options.identifierPattern.search(seq.id)) - - if good and hasattr(options, 'definitionPattern'): - good = bool(options.definitionPattern.search(seq.definition)) - - if good : - good = reduce(lambda x,y:x and y, - (k in seq for k in options.has_attribute), - True) - - if good and hasattr(options, 'attributePatterns'): - good = (reduce(lambda x,y : x and y, - (bool(options.attributePatterns[p].search(str(seq[p]))) - for p in options.attributePatterns - if p in seq),True) - and - reduce(lambda x,y : x and y, - (bool(p in seq) - for p in options.attributePatterns),True) - ) - - if good and hasattr(options, 'predicats') and options.predicats is not None: - if options.taxonomy is not None: - e = {'taxonomy' : options.taxonomy,'sequence':seq} - else: - e = {'sequence':seq} - - good = (reduce(lambda x,y: x and y, - (bool(eval(p,e,seq)) - for p in options.predicats),True) - ) - - if good and hasattr(options, 'lmin') and options.lmin is not None: - good = len(seq) >= options.lmin - - if good and hasattr(options, 'lmax') and options.lmax is not None: - good = len(seq) <= options.lmax - - if good: - good = taxfilter(seq) - - if hasattr(options, 'invertedFilter') and options.invertedFilter: - good=not good - - - return good - - return sequenceFilter - -def sequenceFilterIteratorGenerator(options): - filter = filterGenerator(options) - - def sequenceFilterIterator(seqIterator): - for seq in seqIterator: - if filter(seq): - yield seq - - return sequenceFilterIterator - - - \ No newline at end of file diff --git a/obitools/options/taxonomyfilter.py b/obitools/options/taxonomyfilter.py deleted file mode 100644 index 5526c79..0000000 --- a/obitools/options/taxonomyfilter.py +++ /dev/null @@ -1,6 +0,0 @@ -from obitools.ecopcr.options import addTaxonomyDBOptions, \ - addTaxonomyFilterOptions, \ - loadTaxonomyDatabase, \ - taxonomyFilterGenerator, \ - taxonomyFilterIteratorGenerator - diff --git a/obitools/parallel/__init__.py b/obitools/parallel/__init__.py deleted file mode 100644 index 2aa1b07..0000000 --- a/obitools/parallel/__init__.py +++ /dev/null @@ -1,99 +0,0 @@ -import threading - -class TaskPool(object): - - def __init__(self,iterable,function,count=2): - self.pool = [] - self.queue= [] - self.plock= threading.Lock() - self.qlock= threading.Lock() - self.function=function - self.event=threading.Event() - self.iterable=iterable - for i in xrange(count): - Task(self) - - def register(self,task): - self.plock.acquire() - self.pool.append(task) - self.plock.release() - self.ready(task) - - def unregister(self,task): - task.thread.join() - self.plock.acquire() - self.pool.remove(task) - self.plock.release() - - - def ready(self,task): - self.qlock.acquire() - self.queue.append(task) - self.qlock.release() - self.event.set() - - def __iter__(self): - for data in self.iterable: - while not self.queue: - self.event.wait() - self.event.clear() - self.qlock.acquire() - task=self.queue.pop(0) - self.qlock.release() - if hasattr(task, 'rep'): - yield task.rep - #print "send ",data - if isinstance(data,dict): - task.submit(**data) - else: - task.submit(*data) - - while self.pool: - self.pool[0].finish() - while self.queue: - self.event.clear() - self.qlock.acquire() - task=self.queue.pop(0) - self.qlock.release() - if hasattr(task, 'rep'): - yield task.rep - - - - - -class Task(object): - def __init__(self,pool): - self.pool = pool - self.lock = threading.Lock() - self.dataOk = threading.Event() - self.repOk = threading.Event() - self.args = None - self.kwargs=None - self.stop=False - self.thread = threading.Thread(target=self) - self.thread.start() - self.pool.register(self) - - def __call__(self): - self.dataOk.wait() - while(not self.stop): - self.lock.acquire() - self.dataOk.clear() - self.rep=self.pool.function(*self.args,**self.kwargs) - self.pool.ready(self) - self.lock.release() - self.dataOk.wait() - - def submit(self,*args,**kwargs): - self.args=args - self.kwargs=kwargs - self.dataOk.set() - - def finish(self): - self.lock.acquire() - self.stop=True - self.dataOk.set() - self.pool.unregister(self) - - diff --git a/obitools/parallel/jobqueue.py b/obitools/parallel/jobqueue.py deleted file mode 100644 index 9df4804..0000000 --- a/obitools/parallel/jobqueue.py +++ /dev/null @@ -1,183 +0,0 @@ -import threading -from logging import warning,info -from time import sleep,time - -from obitools.parallel import TaskPool - - -class JobPool(dict): - ''' - JobPool is dedicated to manage a job queue. These jobs - will run in a limited number of thread. - ''' - - def __init__(self,count,precision=0.01): - ''' - - @param count: number of thread dedicated to this JobPool - @type count: int - @param precision: delay between two check for new job (in second) - @type precision: float - ''' - self._iterator = JobIterator(self) - self._taskPool = TaskPool(self._iterator, - self._runJob, - count) - self._precision=precision - self._toRun=set() - self._runnerThread = threading.Thread(target=self._runner) - self._runnerThread.start() - self._finalyzed=False - - def _runner(self): - for rep in self._taskPool: - info('Job %d finnished' % id(rep)) - info('All jobs in %d JobPool finished' % id(self)) - - def _jobIterator(self): - return self._iterator - - def _runJob(self,job): - job.started= time() - info('Job %d started' % id(job)) - job.result = job() - job.ended = time() - job.finished=True - return job - - def submit(self,job,priority=1.0,userid=None): - ''' - Submit a new job to the JobPool. - - @param job: the new submited job - @type job: Job instance - @param priority: priority level of this job (higher is better) - @type priority: float - @param userid: a user identifier (Default is None) - - @return: job identifier - @rtype: int - ''' - - assert not self._finalyzed,\ - "This jobPool does not accept new job" - if job.submitted is not None: - warning('Job %d was already submitted' % id(job)) - return id(job) - - job.submitted = time() - job.priority = priority - job.userid = userid - i=id(job) - job.id=id - self[i]=job - self._toRun.add(job) - - info('Job %d submitted' % i) - - return i - - def finalyze(self): - ''' - Indicate to the JobPool, that no new jobs will - be submitted. - ''' - self._iterator.finalyze() - self._finalyzed=True - - def __del__(self): - self.finalyze() - - -class JobIterator(object): - def __init__(self,pool): - self._pool = pool - self._finalyze=False - self._nextLock=threading.Lock() - - - def __iter__(self): - return self - - def finalyze(self): - ''' - Indicate to the JobIterator, that no new jobs will - be submitted. - ''' - self._finalyze=True - - - def next(self): - ''' - - @return: the next job to run - @rtype: Job instance - ''' - self._nextLock.acquire() - while self._pool._toRun or not self._finalyze: - rep = None - maxScore=0 - for k in self._pool._toRun: - s = k.runScore() - if s > maxScore: - maxScore=s - rep=k - if rep is not None: - self._pool._toRun.remove(rep) - self._nextLock.release() - return (rep,) - sleep(self._pool._precision) - self._nextLock.release() - info('No more jobs in %d JobPool' % id(self._pool)) - raise StopIteration - - - -class Job(object): - - def __init__(self,pool=None,function=None,*args,**kwargs): - ''' - Create a new job - - @param pool: the jobpool used to run job. Can be None to not - execute the job immediately. - @type pool: JobPool instance - - @param function: the function to run for the job - @type function: callable object - - @param args: parametters for function call - @param kwargs: named parametters for function call - - @precondition: function cannot be None - ''' - assert function is not None - self._args=args - self._kwargs = kwargs - self._function = function - self.running = False - self.finished= False - self.submitted = None - self.priority = None - self.userid = None - - if pool is not None: - pool.submit(self) - - def runScore(self): - ''' - @return: the score used to ordonnance job in the queue - @rtype: C{float} - ''' - - return (time() - self.submitted) * self.priority - - def __call__(self): - return self._function(*self._args,**self._kwargs) - - - - - - - \ No newline at end of file diff --git a/obitools/phylogeny/__init__.py b/obitools/phylogeny/__init__.py deleted file mode 100644 index 8eb1587..0000000 --- a/obitools/phylogeny/__init__.py +++ /dev/null @@ -1,119 +0,0 @@ - -from obitools.graph.tree import Forest,TreeNode -from obitools.graph import Edge - - - -class PhylogenicTree(Forest): - - def __init__(self,label='G',indexer=None,nodes=None,edges=None): - Forest.__init__(self, label, indexer, nodes, edges) - self.root=None - self.comment=None - - def addNode(self,node=None,index=None,**data): - if node is None and index is None: - node = '__%d' % (len(self._node)+1) - - return Forest.addNode(self, node, index, **data) - - def getNode(self,node=None,index=None): - if index is None: - index = self._index.getIndex(node, True) - return PhylogenicNode(index,self) - - def getEdge(self,node1=None,node2=None,index1=None,index2=None): - ''' - - @param node1: - @type node1: - @param node2: - @type node2: - @param index1: - @type index1: - @param index2: - @type index2: - ''' - node1=self.getNode(node1, index1) - node2=self.getNode(node2, index2) - return PhylogenicEdge(node1,node2) - - - -class PhylogenicNode(TreeNode): - - def getLabel(self): - label = TreeNode.getLabel(self) - if label[0:2]=='__': - return None - else: - return label - - def __str__(self): - - if self.index in self.graph._node_attrs: - keys = " ".join(['%s="%s"' % (x[0],str(x[1]).replace('"','\\"')) - for x in self.graph._node_attrs[self.index].iteritems()] - ) - else: - keys='' - - if self.label is None: - label='' - shape='point' - else: - label=self.label - shape='box' - - return '%d [label="%s" shape="%s" %s]' % (self.index,str(label).replace('"','\\"'),shape,keys) - - def distanceTo(self,node=None,index=None): - ''' - compute branch length between the two nodes. - If distances are not secified for this tree, None is returned. - - @param node: a node label or None - @param index: a node index or None. the parameter index - has a priority on the parameter node. - @type index: int - - @return: the evolutive distance between the two nodes - @rtype: int, float or None - ''' - path = self.shortestPathTo(node, index) - - start = path.pop(0) - dist=0 - for dest in path: - edge = self.graph.getEdge(index1=start,index2=dest) - if 'distance' in edge: - dist+=edge['distance'] - else: - return None - start=dest - - return dist - - label = property(getLabel, None, None, "Label of the node") - -class PhylogenicEdge(Edge): - - def __str__(self): - e = (self.node1.index,self.node2.index) - if e in self.graph._edge_attrs: - keys = "[%s]" % " ".join(['%s="%s"' % (x[0],str(x[1]).replace('"','\\"')) - for x in self.graph._edge_attrs[e].iteritems() - if x[0] not in ('distance','bootstrap')] - ) - else: - keys = "" - - - - if self.directed: - link='->' - else: - link='--' - - return "%d %s %d %s" % (self.node1.index,link,self.node2.index,keys) - diff --git a/obitools/phylogeny/newick.py b/obitools/phylogeny/newick.py deleted file mode 100644 index cf0330c..0000000 --- a/obitools/phylogeny/newick.py +++ /dev/null @@ -1,123 +0,0 @@ -import re -import sys - -from obitools.utils import universalOpen -from obitools.phylogeny import PhylogenicTree - -def subNodeIterator(data): - level=0 - start = 1 - if data[0]=='(': - for i in xrange(1,len(data)): - c=data[i] - if c=='(': - level+=1 - elif c==')': - level-=1 - if c==',' and not level: - yield data[start:i] - start = i+1 - yield data[start:i] - else: - yield data - - -_nodeParser=re.compile('\s*(?P\(.*\))?(?P[^ :]+)? *(?P[0-9.]+)?(:(?P-?[0-9.]+))?') - -def nodeParser(data): - parsedNode = _nodeParser.match(data).groupdict(0) - if not parsedNode['name']: - parsedNode['name']=None - - if not parsedNode['bootstrap']: - parsedNode['bootstrap']=None - else: - parsedNode['bootstrap']=float(parsedNode['bootstrap']) - - if not parsedNode['distance']: - parsedNode['distance']=None - else: - parsedNode['distance']=float(parsedNode['distance']) - - if not parsedNode['subnodes']: - parsedNode['subnodes']=None - - return parsedNode - -_cleanTreeData=re.compile('\s+') - -def treeParser(data,tree=None,parent=None): - if tree is None: - tree = PhylogenicTree() - data = _cleanTreeData.sub(' ',data).strip() - - parsedNode = nodeParser(data) - - if parent is not None: - son,parent = tree.addEdge(node1=parsedNode['name'], - index2=parent, - distance=parsedNode['distance'], - bootstrap=parsedNode['bootstrap']) - else: - son = tree.addNode(node1=parsedNode['name']) - tree.root=son - - - - if parsedNode['subnodes']: - for subnode in subNodeIterator(parsedNode['subnodes']): - treeParser(subnode,tree,son) - - return tree - -_treecomment=re.compile('\[.*\]') - -def treeIterator(file): - file = universalOpen(file) - data = file.read() - - comment = _treecomment.findall(data) - data=_treecomment.sub('',data).strip() - - if comment: - comment=comment[0] - else: - comment=None - for tree in data.split(';'): - t = treeParser(tree) - if comment: - t.comment=comment - yield t - -def nodeWriter(tree,node,deep=0): - name = node._name - if name is None: - name='' - - distance=node._dist - if distance is None: - distance='' - else: - distance = ':%6.5f' % distance - - bootstrap=node._bootstrap - if bootstrap is None: - bootstrap='' - else: - bootstrap=' %d' % int(bootstrap) - - nodeseparator = ',\n' + ' ' * (deep+1) - - subnodes = nodeseparator.join([nodeWriter(tree, x, deep+1) - for x in tree.childNodeIterator(node)]) - if subnodes: - subnodes='(\n' + ' ' * (deep+1) + subnodes + '\n' + ' ' * deep + ')' - - return '%s%s%s%s' % (subnodes,name,bootstrap,distance) - -def treeWriter(tree,startnode=None): - if startnode is not None: - root=startnode - else: - root = tree.getRoot() - return nodeWriter(tree,root)+';' diff --git a/obitools/profile/__init__.py b/obitools/profile/__init__.py deleted file mode 100644 index e69de29..0000000 diff --git a/obitools/profile/_profile.so b/obitools/profile/_profile.so deleted file mode 100755 index 7f52483..0000000 Binary files a/obitools/profile/_profile.so and /dev/null differ diff --git a/obitools/sample.py b/obitools/sample.py deleted file mode 100644 index 4894c94..0000000 --- a/obitools/sample.py +++ /dev/null @@ -1,76 +0,0 @@ -''' -Created on 31 oct. 2009 - -@author: coissac -''' -from random import shuffle, randrange - -def lookfor(x,cumsum): - lmax=len(cumsum) - lmin=0 - - assert x < cumsum[-1],"x must be smaller then cumulative sum" - - while((lmax - lmin) > 0): - - i=(lmax+lmin)/2 - #print i,lmin,lmax - if (xcumsum[i-1])): - #print "return 1 :",i,cumsum[i-1],"<",x,"<",cumsum[i] - return i - elif cumsum[i]==x: - while cumsum[i]==x: - i+=1 - #print "return 2 :",i,cumsum[i],"<",x,"<",cumsum[i+1] - return i - elif x0] - shuffle(entries) - cumul=[] - s=0 - for e in entries: - s+=events[e] - cumul.append(s) - - #print cumul - result={} - - for t in xrange(size): - e=lookfor(randrange(s), cumul) - k=entries[e] - result[k]=result.get(k,0)+1 - - return result - -def weigthedSampleWithoutReplacement(events,size): - entries = [k for k in events.iterkeys() if events[k]>0] - shuffle(entries) - cumul=[] - s=0 - for e in entries: - s+=events[e] - cumul.append(s) - - #print cumul - result={} - - for t in xrange(size): - # print s,cumul, - e=lookfor(randrange(s), cumul) - # print e - k=entries[e] - for x in xrange(e,len(cumul)): - cumul[x]-=1 - s-=1 - result[k]=result.get(k,0)+1 - - return result \ No newline at end of file diff --git a/obitools/seqdb/__init__.py b/obitools/seqdb/__init__.py deleted file mode 100644 index 274cbad..0000000 --- a/obitools/seqdb/__init__.py +++ /dev/null @@ -1,88 +0,0 @@ -from obitools import NucSequence,AASequence -from obitools.format.genericparser import genericEntryIteratorGenerator -from obitools.location.feature import featureIterator - -from itertools import chain - -class AnnotatedSequence(object): - - def __init__(self,header,featureTable,secondaryAcs): - self._header = header - self._featureTableText = featureTable - self._featureTable=None - self._secondaryAcs=secondaryAcs - self._hasTaxid=None - - def getHeader(self): - return self._header - - - def getFeatureTable(self,skipError=False): - if self._featureTable is None: - self._featureTable = [x for x in featureIterator(self._featureTableText,skipError)] - return self._featureTable - - - def getSecondaryAcs(self): - return self._secondaryAcs - - def extractTaxon(self): - if self._hasTaxid is None: - - if self._featureTable is not None: - s = [f for f in self._featureTable if f.ftType=='source'] - else: - s = featureIterator(self._featureTableText).next() - if s.ftType=='source': - s = [s] - else: - s = [f for f in self.featureTable if f.ftType=='source'] - - t =set(int(v[6:]) for v in chain(*tuple(f['db_xref'] for f in s if 'db_xref' in f)) - if v[0:6]=='taxon:') - - self._hasTaxid=False - - if len(t)==1 : - taxid=t.pop() - if taxid >=0: - self['taxid']=taxid - self._hasTaxid=True - - - t =set(chain(*tuple(f['organism'] for f in s if 'organism' in f))) - - if len(t)==1: - self['organism']=t.pop() - - - header = property(getHeader, None, None, "Header's Docstring") - - featureTable = property(getFeatureTable, None, None, "FeatureTable's Docstring") - - secondaryAcs = property(getSecondaryAcs, None, None, "SecondaryAcs's Docstring") - -class AnnotatedNucSequence(AnnotatedSequence,NucSequence): - ''' - - ''' - def __init__(self,id,seq,de,header,featureTable,secondaryAcs,**info): - NucSequence.__init__(self, id, seq, de,**info) - AnnotatedSequence.__init__(self, header, featureTable, secondaryAcs) - - -class AnnotatedAASequence(AnnotatedSequence,AASequence): - ''' - - ''' - def __init__(self,id,seq,de,header,featureTable,secondaryAcs,**info): - AASequence.__init__(self, id, seq, de,**info) - AnnotatedSequence.__init__(self, header, featureTable, secondaryAcs) - - - -nucEntryIterator=genericEntryIteratorGenerator(endEntry='^//') -aaEntryIterator=genericEntryIteratorGenerator(endEntry='^//') - - - diff --git a/obitools/seqdb/blastdb/__init__.py b/obitools/seqdb/blastdb/__init__.py deleted file mode 100644 index e69de29..0000000 diff --git a/obitools/seqdb/dnaparser.py b/obitools/seqdb/dnaparser.py deleted file mode 100644 index 85b82a2..0000000 --- a/obitools/seqdb/dnaparser.py +++ /dev/null @@ -1,16 +0,0 @@ -from obitools.format.sequence import embl,fasta,genbank - -class UnknownFormatError(Exception): - pass - -def whichParser(seq): - if seq[0]=='>': - return fasta.fastaNucParser - if seq[0:2]=='ID': - return embl.emblParser - if seq[0:5]=='LOCUS': - return genbank.genbankParser - raise UnknownFormatError,"Unknown nucleic format" - -def nucleicParser(seq): - return whichParser(seq)(seq) diff --git a/obitools/seqdb/embl/__init__.py b/obitools/seqdb/embl/__init__.py deleted file mode 100644 index 94f9efc..0000000 --- a/obitools/seqdb/embl/__init__.py +++ /dev/null @@ -1,13 +0,0 @@ -from obitools.seqdb import AnnotatedNucSequence, AnnotatedAASequence -from obitools.location import locationGenerator,extractExternalRefs - - - -class EmblSequence(AnnotatedNucSequence): - ''' - Class used to represent a nucleic sequence issued from EMBL. - ''' - - - - diff --git a/obitools/seqdb/embl/parser.py b/obitools/seqdb/embl/parser.py deleted file mode 100644 index 2e3624f..0000000 --- a/obitools/seqdb/embl/parser.py +++ /dev/null @@ -1,50 +0,0 @@ -import re -import sys - -from obitools.seqdb import embl -from obitools.seqdb import nucEntryIterator - -_featureMatcher = re.compile('(^FT .*\n)+', re.M) -_cleanFT = re.compile('^FT',re.M) - -_headerMatcher = re.compile('^ID.+(?=\nFH )', re.DOTALL) -_seqMatcher = re.compile('(^ ).+(?=//\n)', re.DOTALL + re.M) -_cleanSeq = re.compile('[ \n0-9]+') -_acMatcher = re.compile('(?<=^AC ).+',re.M) -_deMatcher = re.compile('(^DE .+\n)+',re.M) -_cleanDe = re.compile('(^|\n)DE +') - -def __emblparser(text): - try: - header = _headerMatcher.search(text).group() - - ft = _featureMatcher.search(text).group() - ft = _cleanFT.sub(' ',ft) - - seq = _seqMatcher.search(text).group() - seq = _cleanSeq.sub('',seq).upper() - - acs = _acMatcher.search(text).group() - acs = acs.split() - ac = acs[0] - acs = acs[1:] - - de = _deMatcher.search(header).group() - de = _cleanDe.sub(' ',de).strip().strip('.') - except AttributeError,e: - print >>sys.stderr,'=======================================================' - print >>sys.stderr,text - print >>sys.stderr,'=======================================================' - raise e - - return (ac,seq,de,header,ft,acs) - -def emblParser(text): - return embl.EmblSequence(*__emblparser(text)) - - -def emblIterator(file): - for e in nucEntryIterator(file): - yield emblParser(e) - - \ No newline at end of file diff --git a/obitools/seqdb/genbank/__init__.py b/obitools/seqdb/genbank/__init__.py deleted file mode 100644 index fb5b622..0000000 --- a/obitools/seqdb/genbank/__init__.py +++ /dev/null @@ -1,84 +0,0 @@ -from obitools.seqdb import AnnotatedNucSequence, AnnotatedAASequence -from obitools.location import locationGenerator,extractExternalRefs - - - -class GbSequence(AnnotatedNucSequence): - ''' - Class used to represent a nucleic sequence issued from Genbank. - ''' - - -class GpepSequence(AnnotatedAASequence): - ''' - Class used to represent a peptidic sequence issued from Genpep. - ''' - - def __init__(self,id,seq,de,header,featureTable,secondaryAcs,**info): - AnnotatedAASequence.__init__(self,id, seq, de, header, featureTable, secondaryAcs,**info) - self.__hasNucRef=None - - def __getGeneRef(self): - if self.__hasNucRef is None: - self.__hasNucRef=False - cds = [x for x in self.featureTable - if x.ftType=='CDS' - and 'coded_by' in x] - - if cds: - source = cds[0]['coded_by'][0] - if 'transl_table' in cds[0]: - tt = cds[0]['transl_table'][0] - else: - tt=None - ac,loc = extractExternalRefs(source) - - if len(ac)==1: - ac = ac.pop() - self.__hasNucRef=True - self.__nucRef = (ac,loc,tt) - - - - def geneAvailable(self): - ''' - Predicat indicating if reference to the nucleic sequence encoding - this protein is available in feature table. - - @return: True if gene description is available - @rtype: bool - ''' - self.__getGeneRef() - return self.__hasNucRef is not None and self.__hasNucRef - - - def getCDS(self,database): - ''' - Return the nucleic sequence coding for this protein if - data are available. - - @param database: a database object where looking for the sequence - @type database: a C{dict} like object - - @return: a NucBioseq instance carreponding to the CDS - @rtype: NucBioSeq - - @raise AssertionError: if no gene references are available - @see: L{geneAvailable} - - ''' - - assert self.geneAvailable(), \ - "No information available to retreive gene sequence" - - ac,loc,tt = self.__nucRef - seq = database[ac] - seq.extractTaxon() - gene = seq[loc] - if tt is not None: - gene['transl_table']=tt - return gene - - - - diff --git a/obitools/seqdb/genbank/ncbi.py b/obitools/seqdb/genbank/ncbi.py deleted file mode 100644 index 40ddf91..0000000 --- a/obitools/seqdb/genbank/ncbi.py +++ /dev/null @@ -1,79 +0,0 @@ -from urllib2 import urlopen -import sys -import re - -import cStringIO - -from obitools.eutils import EFetch -from parser import genbankParser,genpepParser -from parser import genbankIterator,genpepIterator - -from obitools.utils import CachedDB - - -class NCBIGenbank(EFetch): - def __init__(self): - EFetch.__init__(self,db='nucleotide', - rettype='gbwithparts') - - def __getitem__(self,ac): - if isinstance(ac,str): - text = self.get(id=ac) - seq = genbankParser(text) - return seq - else: - query = ','.join([x for x in ac]) - data = cStringIO.StringIO(self.get(id=query)) - return genbankIterator(data) - - - - -class NCBIGenpep(EFetch): - def __init__(self): - EFetch.__init__(self,db='protein', - rettype='gbwithparts') - - def __getitem__(self,ac): - if isinstance(ac,str): - text = self.get(id=ac) - seq = genpepParser(text) - return seq - else: - query = ','.join([x for x in ac]) - data = cStringIO.StringIO(self.get(id=query)) - return genpepIterator(data) - -class NCBIAccession(EFetch): - - _matchACS = re.compile(' +accession +"([^"]+)"') - - def __init__(self): - EFetch.__init__(self,db='nucleotide', - rettype='seqid') - - def __getitem__(self,ac): - if isinstance(ac,str): - text = self.get(id=ac) - rep = NCBIAccession._matchACS.search(text).group(1) - return rep - else: - query = ','.join([x for x in ac]) - text = self.get(id=query) - rep = (ac.group(1) for ac in NCBIAccession._matchACS.finditer(text)) - return rep - -def Genbank(cache=None): - gb = NCBIGenbank() - if cache is not None: - gb = CachedDB(cache, gb) - return gb - - -def Genpep(cache=None): - gp = NCBIGenpep() - if cache is not None: - gp = CachedDB(cache, gp) - return gp - - diff --git a/obitools/seqdb/genbank/parser.py b/obitools/seqdb/genbank/parser.py deleted file mode 100644 index b52fe59..0000000 --- a/obitools/seqdb/genbank/parser.py +++ /dev/null @@ -1,53 +0,0 @@ -import re -import sys - -import obitools.seqdb.genbank as gb -from obitools.seqdb import nucEntryIterator,aaEntryIterator - -_featureMatcher = re.compile('^FEATURES.+\n(?=ORIGIN)',re.DOTALL + re.M) - -_headerMatcher = re.compile('^LOCUS.+(?=\nFEATURES)', re.DOTALL + re.M) -_seqMatcher = re.compile('(?<=ORIGIN).+(?=//\n)', re.DOTALL + re.M) -_cleanSeq = re.compile('[ \n0-9]+') -_acMatcher = re.compile('(?<=^ACCESSION ).+',re.M) -_deMatcher = re.compile('(?<=^DEFINITION ).+\n( .+\n)*',re.M) -_cleanDe = re.compile('\n *') - -def __gbparser(text): - try: - header = _headerMatcher.search(text).group() - ft = _featureMatcher.search(text).group() - seq = _seqMatcher.search(text).group() - seq = _cleanSeq.sub('',seq).upper() - acs = _acMatcher.search(text).group() - acs = acs.split() - ac = acs[0] - acs = acs[1:] - de = _deMatcher.search(header).group() - de = _cleanDe.sub(' ',de).strip().strip('.') - except AttributeError,e: - print >>sys.stderr,'=======================================================' - print >>sys.stderr,text - print >>sys.stderr,'=======================================================' - raise e - - return (ac,seq,de,header,ft,acs) - -def genbankParser(text): - return gb.GbSequence(*__gbparser(text)) - - -def genbankIterator(file): - for e in nucEntryIterator(file): - yield genbankParser(e) - - -def genpepParser(text): - return gb.GpepSequence(*__gbparser(text)) - - -def genpepIterator(file): - for e in aaEntryIterator(file): - yield genpepParser(e) - - \ No newline at end of file diff --git a/obitools/sequenceencoder/__init__.py b/obitools/sequenceencoder/__init__.py deleted file mode 100644 index 89a8a59..0000000 --- a/obitools/sequenceencoder/__init__.py +++ /dev/null @@ -1,73 +0,0 @@ -from obitools import location - -class SequenceEncoder(object): - pass - -class DNAComplementEncoder(SequenceEncoder): - _comp={'a': 't', 'c': 'g', 'g': 'c', 't': 'a', - 'r': 'y', 'y': 'r', 'k': 'm', 'm': 'k', - 's': 's', 'w': 'w', 'b': 'v', 'd': 'h', - 'h': 'd', 'v': 'b', 'n': 'n', 'u': 'a', - '-': '-'} - - _info={'complemented':True} - - @staticmethod - def _encode(seq,position=slice(None, None, -1)): - cseq = [DNAComplementEncoder._comp.get(x.lower(),'n') for x in seq[position]] - return ''.join(cseq) - - @staticmethod - def _check(seq): - assert seq.isNucleotide() - - @staticmethod - def _convertpos(position): - if isinstance(position, int): - return -(position+1) - elif isinstance(position, slice): - return slice(-(position.stop+1), - -(position.start+1), - -position.step) - elif isinstance(position, location.Location): - return location.ComplementLocation(position).simplify() - - raise TypeError,"position must be an int, slice or Location instance" - - @staticmethod - def complement(seq): - return seq - -class SeqFragmentEncoder(SequenceEncoder): - def __init__(self,begin,end): - assert begin < end and begin >=0 - self._limits = slice(begin,end) - self._info = {'cut' : [begin,end,1]} - self._len = end - begin + 1 - - def _check(self,seq): - lseq = len(seq) - assert self._limits.stop <= lseq - - def _encode(self,seq,position=None): - return str(seq)[self._limits] - - def _convertpos(self,position): - if isinstance(position, int): - if position < -self._len or position >= self._len: - raise IndexError,position - if position >=0: - return self._limits.start + position - else: - return self._limits.stop + position + 1 - elif isinstance(position, slice): - return slice(-(position.stop+1), - -(position.start+1), - -position.step) - elif isinstance(position, location.Location): - return location.ComplementLocation(position).simplify() - - raise TypeError,"position must be an int, slice or Location instance" - - - \ No newline at end of file diff --git a/obitools/sequenceencoder/__init__.pyc b/obitools/sequenceencoder/__init__.pyc deleted file mode 100644 index 463f84f..0000000 Binary files a/obitools/sequenceencoder/__init__.pyc and /dev/null differ diff --git a/obitools/solexa/__init__.py b/obitools/solexa/__init__.py deleted file mode 100644 index 60e35f8..0000000 --- a/obitools/solexa/__init__.py +++ /dev/null @@ -1,45 +0,0 @@ -from obitools import utils -from obitools import NucSequence -from obitools.dnahash import hashCodeIterator - - -class SolexaSequence(NucSequence): - def __init__(self,id,seq,definition=None,quality=None,**info): - NucSequence.__init__(self, id, seq, definition,**info) - self._quality=quality - self._hash=None - - def getQuality(self): - if isinstance(self._quality, str): - self._quality=[int(x) for x in self._quality.split()] - return self._quality - - - def __hash__(self): - if self._hash is None: - self._hash = hashCodeIterator(str(self), len(str(self)), 16, 0).next()[1].pop() - return self._hash - -class SolexaFile(utils.ColumnFile): - def __init__(self,stream): - utils.ColumnFile.__init__(self, - stream, ':', True, - (str, - int,int,int,int, - str, - str), "#") - - - def next(self): - data = utils.ColumnFile.next(self) - seq = SolexaSequence('%d_%d_%d_%d'%(data[1],data[2],data[3],data[4]), - data[5], - quality=data[6]) - seq['machine']=data[0] - seq['channel']=data[1] - seq['tile']=data[2] - seq['pos_x']=data[3] - seq['pos_y']=data[4] - - #assert len(seq['quality'])==len(seq),"Error in file format" - return seq diff --git a/obitools/statistics/__init__.py b/obitools/statistics/__init__.py deleted file mode 100644 index e69de29..0000000 diff --git a/obitools/statistics/hypergeometric.py b/obitools/statistics/hypergeometric.py deleted file mode 100644 index 9a9b812..0000000 --- a/obitools/statistics/hypergeometric.py +++ /dev/null @@ -1,166 +0,0 @@ -# -*- coding: utf-8 -*- -""" - Module de calcules statistiques. - - Le module `statistics` contient des fonctions permettant le calcule - des probabilités associées à la loi hypergéométrique et - hypergéométrique cumulée, ainsi d'une méthode de correction pour les - tests multiples. - -""" - -from decimal import * - -getcontext().prec = 28 - - -def _hyper0(N,n,r): - """ - Fonction interne permetant le calcule du terme 0 de la loi hypergéométrique. - - Le calcule est réalisé selon la méthode décrite dans l'article - - Trong Wu, An accurate computation of the hypergeometric distribution function, - ACM Trans. Math. Softw. 19 (1993), no. 1, 33–43. - - Paramètres: - - - `N` : La taille de la population - - `n` : Le nombre d'éléments marqués - - `r` : La taille de l'echantillon - - Retourne un *float* indiquant la probabilité de récupérer 0 élément - marqué parmi *n* dans une population de taille *N* lors du tirage - d'un échantillon de taille *r* - """ - - # - # au numerateur nous avons : - # [N -r + 1 -n;N - n + 1[ - # - # au denominateur : - # [N - r + 1; N + 1] - # - # avec X = N - r + 1 - # et Y = N + 1 - # - # Numerateur -> [ X - n; Y - n [ - # Denominateur -> [ X ; Y [ - # - # On peut donc siplifier - # - # Numerateur -> [X - n; X [ - # Denominateur -> [Y - n; Y [ - - numerateur = xrange(N - r + 1 - n, N - r + 1) - denominateur= xrange(N + 1 - n, N + 1) -# -# version original -# -# m = N - n -# numerateur = set(range(m-r+1,m+1)) -# denominateur = set(range(N-r+1,N+1)) -# simplification = numerateur & denominateur -# numerateur -= simplification -# denominateur -= simplification -# numerateur = list(numerateur) -# denominateur=list(denominateur) -# numerateur.sort() -# denominateur.sort() - - - p = reduce(lambda x,y:x*y,map(lambda i,j:Decimal(i)/Decimal(j),numerateur,denominateur)) - return p - - -def hypergeometric(x,N,n,r): - """ - Calcule le terme *x* d'une loi hypergéométrique - - Le calcule est réalisé selon la méthode décrite dans l'article - - Trong Wu, An accurate computation of the hypergeometric distribution function, - ACM Trans. Math. Softw. 19 (1993), no. 1, 33–43. - - Paramètres: - - - `x` : Nombre d'éléments marqués attendu - - `N` : La taille de la population - - `n` : Le nombre d'éléments marqués - - `r` : La taille de l'echantillon - - Retourne un *float* indiquant la probabilité de récupérer *x* éléments - marqués parmi *n* dans une population de taille *N* lors du tirage - d'un échantillon de taille *r* - """ - if n < r: - s = n - n = r - r = s - assert x>=0 and x <= r,"x out of limits" - if x > 0 : - return hypergeometric(x-1,N,n,r) * (n - x + 1)/x * (r - x + 1)/(N-n-r+x) - else: - return _hyper0(N,n,r) - -def chypergeometric(xmin,xmax,N,n,r): - """ - Calcule le terme *x* d'une loi hypergéométrique - - Le calcule est réalisé selon la méthode décrite dans l'article - - Trong Wu, An accurate computation of the hypergeometric distribution function, - ACM Trans. Math. Softw. 19 (1993), no. 1, 33–43. - - Paramètres: - - - `xmin` : Nombre d'éléments marqués minimum attendu - - `xmax` : Nombre d'éléments marqués maximum attendu - - `N` : La taille de la population - - `n` : Le nombre d'éléments marqués - - `r` : La taille de l'echantillon - - Retourne un *float* indiquant la probabilité de récupérer entre - *xmin* et *xmax* éléments marqués parmi *n* dans une population - de taille *N* lors du tirage d'un échantillon de taille *r* - """ - if n < r: - s = n - n = r - r = s - assert xmin>=0 and xmin <= r and xmax>=0 and xmax <= r and xmin <=xmax,"x out of limits" - hg = hypergeometric(xmin,N,n,r) - rep = hg - for x in xrange(xmin+1,xmax+1): - hg = hg * (n - x + 1)/x * (r - x + 1)/(N-n-r+x) - rep+=hg - return rep - -def multipleTest(globalPvalue,testList): - """ - Correction pour les tests multiples. - - Séléctionne parmis un ensemble de test le plus grand sous ensemble - telque le risque global soit inférieur à une pvalue déterminée. - - Paramètres: - - - `globalPvalue` : Risque global à prendre pour l'ensemble des tests - - `testList` : un élément itérable sur un ensemble de tests. - Chaque test est une liste ou un tuple dont le dernier élément - est la pvalue associée au test - - Retourne une liste contenant le sous ensemble des tests selectionnés dans - `testList` - """ - testList=list(testList) - testList.sort(lambda x,y:cmp(x[-1],y[-1])) - h0=1.0-globalPvalue - p=1.0 - rep = [] - for t in testList: - p*=1.0-t[-1] - if p > h0: - rep.append(t) - return rep - \ No newline at end of file diff --git a/obitools/statistics/noncentralhypergeo.py b/obitools/statistics/noncentralhypergeo.py deleted file mode 100644 index e6a96ce..0000000 --- a/obitools/statistics/noncentralhypergeo.py +++ /dev/null @@ -1,208 +0,0 @@ -from decimal import * -from math import log - -#from obitools.utils import moduleInDevelopment - -#moduleInDevelopment(__name__) - -# from : http://www.programmish.com/?p=25 - -def dec_log(self, base=10): - cur_prec = getcontext().prec - getcontext().prec += 2 - baseDec = Decimal(10) - retValue = self - - if isinstance(base, Decimal): - baseDec = base - elif isinstance(base, float): - baseDec = Decimal("%f" % (base)) - else: - baseDec = Decimal(base) - - integer_part = Decimal(0) - while retValue < 1: - integer_part = integer_part - 1 - retValue = retValue * baseDec - while retValue >= baseDec: - integer_part = integer_part + 1 - retValue = retValue / baseDec - - retValue = retValue ** 10 - decimal_frac = Decimal(0) - partial_part = Decimal(1) - while cur_prec > 0: - partial_part = partial_part / Decimal(10) - digit = Decimal(0) - while retValue >= baseDec: - digit += 1 - retValue = retValue / baseDec - decimal_frac = decimal_frac + digit * partial_part - retValue = retValue ** 10 - cur_prec -= 1 - getcontext().prec -= 2 - - return integer_part + decimal_frac - -class Interval(object): - def __init__(self,begin,end,facteur=1): - self._begin = begin - self._end = end - self._facteur=facteur - - def __str__(self): - return '[%d,%d] ^ %d' % (self._begin,self._end,self._facteur) - - def __repr__(self): - return 'Interval(%d,%d,%d)' % (self._begin,self._end,self._facteur) - - def begin(self): - return (self._begin,self._facteur,True) - - def end(self): - return (self._end,-self._facteur,False) - - -def cmpb(i1,i2): - x= cmp(i1[0],i2[0]) - if x==0: - x = cmp(i2[2],i1[2]) - return x - -class Product(object): - def __init__(self,i=None): - if i is not None: - self.prod=[i] - else: - self.prod=[] - self._simplify() - - def _simplify(self): - bornes=[] - prod =[] - - if self.prod: - - for i in self.prod: - bornes.append(i.begin()) - bornes.append(i.end()) - bornes.sort(cmpb) - - - j=0 - r=len(bornes) - for i in xrange(1,len(bornes)): - if bornes[i][0]==bornes[j][0] and bornes[i][2]==bornes[j][2]: - bornes[j]=(bornes[j][0],bornes[j][1]+bornes[i][1],bornes[i][2]) - r-=1 - else: - j+=1 - bornes[j]=bornes[i] - - bornes=bornes[0:r] - - facteur=0 - close=1 - - for b,level,open in bornes: - if not open: - close=0 - else: - close=1 - if facteur: - prod.append(Interval(debut,b-close,facteur)) - debut=b+1-close - facteur+=level - - self.prod=prod - - - - - def __mul__(self,p): - res = Product() - res.prod=list(self.prod) - res.prod.extend(p.prod) - res._simplify() - return res - - def __div__(self,p): - np = Product() - np.prod = [Interval(x._begin,x._end,-x._facteur) for x in p.prod] - return self * np - - def __str__(self): - return str(self.prod) - - def log(self): - p=Decimal(0) - for k in self.prod: - p+= Decimal(k._facteur) * reduce(lambda x,y:x+dec_log(Decimal(y),Decimal(10)),xrange(k._begin,k._end+1),Decimal(0)) - return p - - def product(self): - p=Decimal(1) - for k in self.prod: - p*= reduce(lambda x,y:x*Decimal(y),xrange(k._begin,k._end+1),Decimal(1)) ** Decimal(k._facteur) - return p - - def __call__(self,log=True): - if log: - return self.log() - else: - return self.product() - - -def fact(n): - return Product(Interval(1,n)) - -def cnp(n,p): - return fact(n)/fact(p)/fact(n-p) - -def hypergeometic(x,n,M,N): - ''' - - @param x: Variable aleatoire - @type x: int - @param n: taille du tirage - @type n: int - @param M: boule gagnante - @type M: int - @param N: nombre total dans l'urne - @type N: int - - p(x)= cnp(M,x) * cnp(N-M,n-x) / cnp(N,n) - ''' - return cnp(M,x) * cnp(N-M,n-x) / cnp(N,n) - -def nchypergeometique(x,n,M,N,r): - ''' - - @param x: Variable aleatoire - @type x: int - @param n: taille du tirage - @type n: int - @param M: boule gagnante - @type M: int - @param N: nombre total dans l'urne - @type N: int - @param r: odd ratio - @type r: float - - p(x)= cnp(M,x) * cnp(N-M,n-x) / cnp(N,n) - ''' - - xmin = max(0,n-N+M) - xmax = min(n,M) - lr = dec_log(r) - xlr = x * lr - num = cnp(M,x) * cnp(N-M,n-x) - den = [cnp(M,y) * cnp(N-M,n-y) / num for y in xrange(xmin,xmax+1)] - fden = [lr * y - xlr for y in xrange(xmin,xmax+1)] - - inverse=reduce(lambda x,y : x+y, - map(lambda i,j: i(False) * 10**j ,den,fden)) - return 1/inverse - - - \ No newline at end of file diff --git a/obitools/svg.py b/obitools/svg.py deleted file mode 100644 index c42e3ef..0000000 --- a/obitools/svg.py +++ /dev/null @@ -1,120 +0,0 @@ -#!/usr/bin/env python -"""\ -SVG.py - Construct/display SVG scenes. - -The following code is a lightweight wrapper around SVG files. The metaphor -is to construct a scene, add objects to it, and then write it to a file -to display it. - -This program uses ImageMagick to display the SVG files. ImageMagick also -does a remarkable job of converting SVG files into other formats. -""" - -import os -display_prog = 'display' # Command to execute to display images. - -class Scene: - def __init__(self,name="svg",height=400,width=400): - self.name = name - self.items = [] - self.height = height - self.width = width - return - - def add(self,item): self.items.append(item) - - def strarray(self): - var = ["\n", - "\n" % (self.height,self.width), - " \n"] - for item in self.items: var += item.strarray() - var += [" \n\n"] - return var - - def write_svg(self,filename=None): - if filename: - self.svgname = filename - else: - self.svgname = self.name + ".svg" - file = open(self.svgname,'w') - file.writelines(self.strarray()) - file.close() - return - - def display(self,prog=display_prog): - os.system("%s %s" % (prog,self.svgname)) - return - - -class Line: - def __init__(self,start,end): - self.start = start #xy tuple - self.end = end #xy tuple - return - - def strarray(self): - return [" \n" %\ - (self.start[0],self.start[1],self.end[0],self.end[1])] - - -class Circle: - def __init__(self,center,radius,color): - self.center = center #xy tuple - self.radius = radius #xy tuple - self.color = color #rgb tuple in range(0,256) - return - - def strarray(self): - return [" \n" % colorstr(self.color)] - -class Rectangle: - def __init__(self,origin,height,width,color): - self.origin = origin - self.height = height - self.width = width - self.color = color - return - - def strarray(self): - return [" \n" %\ - (self.width,colorstr(self.color))] - -class Text: - def __init__(self,origin,text,size=24): - self.origin = origin - self.text = text - self.size = size - return - - def strarray(self): - return [" \n" %\ - (self.origin[0],self.origin[1],self.size), - " %s\n" % self.text, - " \n"] - - -def colorstr(rgb): return "#%x%x%x" % (rgb[0]/16,rgb[1]/16,rgb[2]/16) - -def test(): - scene = Scene('test') - scene.add(Rectangle((100,100),200,200,(0,255,255))) - scene.add(Line((200,200),(200,300))) - scene.add(Line((200,200),(300,200))) - scene.add(Line((200,200),(100,200))) - scene.add(Line((200,200),(200,100))) - scene.add(Circle((200,200),30,(0,0,255))) - scene.add(Circle((200,300),30,(0,255,0))) - scene.add(Circle((300,200),30,(255,0,0))) - scene.add(Circle((100,200),30,(255,255,0))) - scene.add(Circle((200,100),30,(255,0,255))) - scene.add(Text((50,50),"Testing SVG")) - scene.write_svg() - scene.display() - return - -if __name__ == '__main__': test() diff --git a/obitools/table/__init__.py b/obitools/table/__init__.py deleted file mode 100644 index 41e00bd..0000000 --- a/obitools/table/__init__.py +++ /dev/null @@ -1,633 +0,0 @@ -''' - -''' - -from itertools import imap,count,chain - -from itertools import imap,count,chain - -class Table(list): - """ - Tables are list of rows of the same model - """ - def __init__(self, headers=None, - types=None, - colcount=None, - rowFactory=None, - subrowFactory=None): - ''' - - @param headers: the list of column header. - - if this parametter is C{None}, C{colcount} - parametter must be set. - - @type headers: C{list}, C{tuple} or and iterable object - - @param types: the list of data type associated to each column. - - If this parametter is specified its length must be - equal to the C{headers} length or to C{colcount}. - - @type types: C{list}, C{tuple} or and iterable object - - @param colcount: number of column in the created table. - - If C{headers} parametter is not C{None} this - parametter is ignored - - @type colcount: int - ''' - - assert headers is not None or colcount is not None,\ - 'headers or colcount parametter must be not None value' - - if headers is None: - headers = tuple('Col_%d' % x for x in xrange(colcount)) - - self.headers = headers - self.types = types - self.colcount= len(self.headers) - - if rowFactory is None: - self.rowFactory=TableRow - else: - self.rowFactory=rowFactory - - if subrowFactory is None: - self.subrowFactory=TableRow - else: - self.subrowFactory=rowFactory - - - self.likedTo=set() - - - - def isCompatible(self,data): - assert isinstance(data,(Table,TableRow)) - return (self.colcount == data.colcount and - (id(self.types)==id(data.types) or - self.types==data.types - ) - ) - - def __setitem__ (self,key,value): - ''' - - @param key: - @type key: C{int}, C{slice} or C{str} - @param value: - @type value: - ''' - - if isintance(key,int): - if not isinstance(value, TableRow): - value = self.rowFactory(self,value) - else: - assert self.isCompatible(value) - list.__setitem__(self,key,value.row) - - elif isinstance(key,slice): - indices = xrange(key.indices(len(self))) - for i,d in imap(None,indices,value): - self[i]=d - - else: - raise TypeError, "Key must be an int or slice value" - - def __getitem__(self,key): - ''' - this function has different comportements depending - of the data type of C{key} and the table used. - - @param key: description of the table part to return - @type key: C{int} or C{slice} - - @return: return a TableRow (if key is C{int}) - or a subpart of the table (if key is C{slice}). - ''' - if isinstance(key,int): - return self.rowFactory(self, - list.__getitem__(self,key)) - - if isinstance(key,slice): - newtable=Table(self.headers,self.types) - indices = xrange(key.indices(len(self))) - for i in indices: - list.append(newtable,list.__getitem__(self,i)) - self.likedTo.add(newtable) - return newtable - - raise TypeError - - - def __getslice__(self,x,y): - return self.__getitem__(slice(x,y)) - - def __iter__(self): - return TableIterator(self) - - def __hash__(self): - return id(self) - - def __add__(self,itable): - return concatTables(self,itable) - - def _setTypes(self,types): - if types is not None and not isinstance(type,tuple): - types = tuple(x for x in types) - - assert types is None or len(types)==len(self._headers) - - self._types = types - - if types is not None: - for row in self: - row.castRow() - - def _getTypes(self): - return self._types - - types = property(_getTypes,_setTypes) - - def _getHeaders(self): - return self._headers - - def _setHeaders(self,headers): - if not isinstance(headers, tuple): - headers = tuple(x for x in headers) - - self._hindex = dict((k,i) for i,k in imap(None,count(),headers)) - self._headers=headers - self.colcount=len(headers) - - headers=property(_getHeaders,_setHeaders) - - def append(self,value): - if not isinstance(value, TableRow): - value = self.rowFactory(self,value) - else: - assert self.isCompatible(value) - list.append(self,value.row) - - - -class _Row(list): - def __init__(self,data,size): - if data is None: - list.__init__(self,(None for x in xrange(size))) - else: - list.__init__(self,data) - assert len(self)==size, \ - "Size of data is not correct (%d instead of %d)" % (len(self),size) - - def append(self,value): - raise NotImplementedError, \ - "Rows cannot change of size" - - def pop(self,key=None): - raise NotImplementedError, \ - "Rows cannot change of size" - - def extend(self,values): - raise NotImplementedError, \ - "Rows cannot change of size" - - - - -class TableRow(object): - ''' - - ''' - def __init__(self, table, - data=None, - ): - - self.table = table - - if isinstance(data,_Row): - self.row=row - else: - data = self._castRow(data) - self.row=_Row(data,self._colcount) - - def getType(self): - return self.table.types - - def getHeaders(self): - return self.table.headers - - def getHIndex(self): - return self.table._hindex - - def getColCount(self): - return self.table.colcount - - types = property(getType,None,None, - "List of types associated to this row") - headers= property(getHeaders,None,None, - "List of headers associated to this row") - - _hindex= property(getHIndex,None,None) - _colcount = property(getColCount,None,None) - - def _castValue(t,x): - ''' - Cast a value to a specified type, with exception of - C{None} values that are returned without cast. - - @param t: the destination type - @type t: C{type} - @param x: the value to cast - - @return: the casted value or C{None} - - ''' - if x is None or t is None: - return x - else: - return t(x) - - _castValue=staticmethod(_castValue) - - def _castRow(self,data): - - if not isinstance(data, (list,dict)): - data=[x for x in data] - - if isinstance(data,list): - assert len(data)==self._colcount, \ - 'values has not good length' - if self.types is not None: - data=[TableRow._castValue(t, x) - for t,x in imap(None,self.types,data)] - - elif isinstance(data,dict): - lvalue = [None] * len(self.header) - - for k,v in data.items(): - try: - hindex = self._hindex[k] - if self.types is not None: - lvalue[hindex]=TableRow._castValue(self.types[hindex], v) - else: - lvalue[hindex]=v - except KeyError: - info('%s is not a table column' % k) - - data=lvalue - else: - raise TypeError - - return data - - def __getitem__(self,key): - ''' - - @param key: - @type key: - ''' - - if isinstance(key,(int,slice)): - return self.row[key] - - if isinstance(key,str): - i = self._hindex[key] - return self.row[i] - - raise TypeError, "Key must be an int, slice or str value" - - def __setitem__(self,key,value): - ''' - - @param key: - @type key: - @param value: - @type value: - ''' - - if isinstance(key,str): - key = self._hindex[key] - - elif isinstance(key,int): - if self.types is not None: - value = TableRow._castValue(self.types[key], value) - self.row[key]=value - - elif isinstance(key,slice): - indices = xrange(key.indices(len(self.row))) - for i,v in imap(None,indices,value): - self[i]=v - else: - raise TypeError, "Key must be an int, slice or str value" - - - - def __iter__(self): - ''' - - ''' - return iter(self.row) - - def append(self,value): - raise NotImplementedError, \ - "Rows cannot change of size" - - def pop(self,key=None): - raise NotImplementedError, \ - "Rows cannot change of size" - - def extend(self,values): - raise NotImplementedError, \ - "Rows cannot change of size" - - def __len__(self): - return self._colcount - - def __repr__(self): - return repr(self.row) - - def __str__(self): - return str(self.row) - - def castRow(self): - self.row = _Row(self._castRow(self.row),len(self.row)) - - -class iTableIterator(object): - - def _getHeaders(self): - raise NotImplemented - - def _getTypes(self): - raise NotImplemented - - def _getRowFactory(self): - raise NotImplemented - - def _getSubrowFactory(self): - raise NotImplemented - - def _getColcount(self): - return len(self._getTypes()) - - def __iter__(self): - return self - - headers = property(_getHeaders,None,None) - types = property(_getTypes,None,None) - rowFactory = property(_getRowFactory,None,None) - subrowFactory = property(_getSubrowFactory,None,None) - colcount = property(_getColcount,None,None) - - def columnIndex(self,name): - if isinstance(name,str): - return self._reference.headers.index(name) - elif isinstance(name,int): - lh = len(self._reference.headers) - if name < lh and name >=0: - return name - elif name < 0 and name >= -lh: - return lh - name - raise IndexError - raise TypeError - - def next(self): - raise NotImplemented - - -class TableIterator(iTableIterator): - - def __init__(self,table): - if not isinstance(table,Table): - raise TypeError - - self._reftable=table - self._i=0 - - def _getHeaders(self): - return self._reftable.headers - - def _getTypes(self): - return self._reftable.types - - def _getRowFactory(self): - return self._reftable.rowFactory - - def _getSubrowFactory(self): - return self._reftable.subrowFactory - - def columnIndex(self,name): - if isinstance(name,str): - return self._reftable._hindex[name] - elif isinstance(name,int): - lh = len(self._reftable._headers) - if name < lh and name >=0: - return name - elif name < 0 and name >= -lh: - return lh - name - raise IndexError - raise TypeError - - - def rewind(self): - i=0 - - def next(self): - if self._i < len(self._reftable): - rep=self._reftable[self._i] - self._i+=1 - return rep - else: - raise StopIteration - - headers = property(_getHeaders,None,None) - types = property(_getTypes,None,None) - rowFactory = property(_getRowFactory,None,None) - subrowFactory = property(_getSubrowFactory,None,None) - - -class ProjectionIterator(iTableIterator): - - def __init__(self,tableiterator,*cols): - self._reference = iter(tableiterator) - - assert isinstance(self._reference, iTableIterator) - - self._selected = tuple(self._reference.columnIndex(x) - for x in cols) - self._headers = tuple(self._reference.headers[x] - for x in self._selected) - - if self._reference.types is not None: - self._types= tuple(self._reference.types[x] - for x in self._selected) - else: - self._types=None - - def _getRowFactory(self): - return self._reference.subrowFactory - - def _getSubrowFactory(self): - return self._reference.subrowFactory - - def _getHeaders(self): - return self._headers - - def _getTypes(self): - return self._types - - headers = property(_getHeaders,None,None) - types = property(_getTypes,None,None) - rowFactory = property(_getRowFactory,None,None) - subrowFactory = property(_getSubrowFactory,None,None) - - def next(self): - value = self._reference.next() - value = (value[x] for x in self._selected) - return self.rowFactory(self,value) - -class SelectionIterator(iTableIterator): - def __init__(self,tableiterator,**conditions): - self._reference = iter(tableiterator) - - assert isinstance(self._reference, iTableIterator) - - self._conditions=dict((self._reference.columnIndex(i),c) - for i,c in conditions.iteritems()) - - def _checkCondition(self,row): - return reduce(lambda x,y : x and y, - (bool(self._conditions[i](row[i])) - for i in self._conditions), - True) - - def _getRowFactory(self): - return self._reference.rowFactory - - def _getSubrowFactory(self): - return self._reference.subrowFactory - - def _getHeaders(self): - return self._reference.headers - - def _getTypes(self): - return self._reference.types - - def next(self): - row = self._reference.next() - while not self._checkCondition(row): - row = self._reference.next() - return row - - - headers = property(_getHeaders,None,None) - types = property(_getTypes,None,None) - rowFactory = property(_getRowFactory,None,None) - subrowFactory = property(_getSubrowFactory,None,None) - - -class UnionIterator(iTableIterator): - def __init__(self,*itables): - self._itables=[iter(x) for x in itables] - self._types = self._itables[0].types - self._headers = self._itables[0].headers - - assert reduce(lambda x,y: x and y, - ( isinstance(z,iTableIterator) - and len(z.headers)==len(self._headers) - for z in self._itables), - True) - - self._iterator = chain(*self._itables) - - def _getRowFactory(self): - return self._itables[0].rowFactory - - def _getSubrowFactory(self): - return self._itables[0].subrowFactory - - def _getHeaders(self): - return self._headers - - def _getTypes(self): - return self._types - - def next(self): - value = self._iterator.next() - return self.rowFactory(self,value.row) - - headers = property(_getHeaders,None,None) - types = property(_getTypes,None,None) - rowFactory = property(_getRowFactory,None,None) - subrowFactory = property(_getSubrowFactory,None,None) - - - -def tableFactory(tableiterator): - tableiterator = iter(tableiterator) - assert isinstance(tableiterator, iTableIterator) - - newtable = Table(tableiterator.headers, - tableiterator.types, - tableiterator.rowFactory, - tableiterator.subrowFactory) - - for r in tableiterator: - newtable.append(r) - - return newtable - -def projectTable(tableiterator,*cols): - return tableFactory(ProjectionIterator(tableiterator,*cols)) - -def subTable(tableiterator,**conditions): - return tableFactory(SelectionIterator(tableiterator,**conditions)) - -def concatTables(*itables): - ''' - Concatene severals tables. - - concatenation is done using the L{UnionIterator} - - @type itables: iTableIterator or Table - - @return: a new Table - @rtype: c{Table} - - @see: L{UnionIterator} - ''' - return tableFactory(UnionIterator(*itables)) - -class TableIteratorAsDict(object): - - def __init__(self,tableiterator): - self._reference = iter(tableiterator) - - assert isinstance(self._reference, iTableIterator) - - self._headers = self._reference.headers - self._types = self._reference.types - if self._types is not None: - self._types = dict((n,t) - for n,t in imap(None,self._headers,self._types)) - - def __iter__(self): - return self - - def next(self): - value = self._reference.next() - return dict((n,t) - for n,t in imap(None,self._headers,value)) - - def _getHeaders(self): - return self._headers - - def _getTypes(self): - return self._types - - headers = property(_getHeaders,None,None) - types = property(_getTypes,None,None) - \ No newline at end of file diff --git a/obitools/table/csv.py b/obitools/table/csv.py deleted file mode 100644 index 1d9a73d..0000000 --- a/obitools/table/csv.py +++ /dev/null @@ -1,52 +0,0 @@ -""" -obitools.table.csv module provides an iterator adapter -allowing to parse csv (comma separatted value) file -""" - -import re - -def csvIterator(lineIterator,sep=','): - ''' - Allows easy parsing of a csv file. This function - convert an iterator on line over a csv text file - in an iterator on data list. Each list corresponds - to all values present n one line. - - @param lineIterator: iterator on text lines - @type lineIterator: iterator - @param sep: string of one letter used as separator - blank charactere or " is not allowed as - separator - @type sep: string - @return: an iterator on data list - @rtype: iterator - ''' - assert len(sep)==1 and not sep.isspace() and sep!='"' - valueMatcher=re.compile('\s*((")(([^"]|"")*)"|([^%s]*?))\s*(%s|$)' % (sep,sep)) - def iterator(): - for l in lineIterator: - yield _csvParse(l,valueMatcher) - return iterator() - - -def _csvParse(line,valueMatcher): - data=[] - i = iter(valueMatcher.findall(line)) - m = i.next() - if m[0]: - while m[-1]!='': - if m[1]=='"': - data.append(m[2].replace('""','"')) - else: - data.append(m[0]) - m=i.next() - if m[1]=='"': - data.append(m[2].replace('""','"')) - else: - data.append(m[0]) - return data - - - - - \ No newline at end of file diff --git a/obitools/tagmatcher/__init__.py b/obitools/tagmatcher/__init__.py deleted file mode 100644 index 880ead0..0000000 --- a/obitools/tagmatcher/__init__.py +++ /dev/null @@ -1,35 +0,0 @@ -from obitools import NucSequence -from obitools.location import locationGenerator,extractExternalRefs - - - -class TagMatcherSequence(NucSequence): - ''' - Class used to represent a nucleic sequence issued mapped - on a genome by the tagMatcher software. - ''' - - def __init__(self,seq,cd,locs,dm,rm): - NucSequence.__init__(self, seq, seq) - self['locations']=locs - self['conditions']=cd - self['dm']=dm - self['rm']=rm - self['tm']=dm+rm - - def eminEmaxFilter(self,emin=None,emax=None): - result = [x for x in self['locations'] - if (emin is None or x['error'] >=emin) - and (emax is None or x['error'] <=emax)] - self['locations']=result - dm=0 - rm=0 - for x in result: - if x.isDirect(): - dm+=1 - else: - rm+=1 - self['dm']=dm - self['rm']=rm - self['tm']=dm+rm - return self diff --git a/obitools/tagmatcher/options.py b/obitools/tagmatcher/options.py deleted file mode 100644 index 45673ce..0000000 --- a/obitools/tagmatcher/options.py +++ /dev/null @@ -1,14 +0,0 @@ -def addTagMatcherErrorOptions(optionManager): - optionManager.add_option('-E','--emax', - action='store', - metavar="<##>", - type="int",dest="emax", - default=None, - help="keep match with no more than emax errors") - - optionManager.add_option('-e','--emin', - action='store', - metavar="<##>", - type="int",dest="emin", - default=0, - help="keep match with at least emin errors") diff --git a/obitools/tagmatcher/parser.py b/obitools/tagmatcher/parser.py deleted file mode 100644 index a843e66..0000000 --- a/obitools/tagmatcher/parser.py +++ /dev/null @@ -1,89 +0,0 @@ -import re -import sys - -from obitools import tagmatcher -from obitools.seqdb import nucEntryIterator -from obitools.location.feature import Feature -from obitools.location import locationGenerator - -_seqMatcher = re.compile('(?<=TG )[acgtrymkwsbdhvnACGTRYMKWSBDHVN]+') -_cdMatcher = re.compile('(?<=CD ) *([^:]+?) +: +([0-9]+)') -_loMatcher = re.compile('(?<=LO ) *([ACGTRYMKWSBDHVN]+) +([^ ]+) +([^ ]+) +\(([0-9]+)\)') -_dmMatcher = re.compile('(?<=DM )[0-9]+') -_rmMatcher = re.compile('(?<=RM )[0-9]+') - - -def __tagmatcherparser(text): - try: - seq = _seqMatcher.search(text).group() - cd = dict((x[0],int(x[1])) for x in _cdMatcher.findall(text)) - locs = [] - - for (match,ac,loc,err) in _loMatcher.findall(text): - feat = Feature('location', locationGenerator(loc)) - feat['error']=int(err) - feat['match']=match - feat['contig']=ac - locs.append(feat) - - dm = int(_dmMatcher.search(text).group()) - rm = int(_rmMatcher.search(text).group()) - - except AttributeError,e: - print >>sys.stderr,'=======================================================' - print >>sys.stderr,text - print >>sys.stderr,'=======================================================' - raise e - - return (seq,cd,locs,dm,rm) - -def tagMatcherParser(text): - return tagmatcher.TagMatcherSequence(*__tagmatcherparser(text)) - - -class TagMatcherIterator(object): - _cdheadparser = re.compile('condition [0-9]+ : (.+)') - - def __init__(self,file): - self._ni = nucEntryIterator(file) - self.header=self._ni.next() - self.conditions=TagMatcherIterator._cdheadparser.findall(self.header) - - def next(self): - return tagMatcherParser(self._ni.next()) - - def __iter__(self): - return self - -def formatTagMatcher(tmseq,reader=None): - if isinstance(tmseq, TagMatcherIterator): - return tmseq.header - - assert isinstance(tmseq,tagmatcher.TagMatcherSequence),'Only TagMatcherSequence can be used' - lo = '\n'.join(['LO %s %s %s (%d)' % (l['match'],l['contig'],l.locStr(),l['error']) - for l in tmseq['locations']]) - if reader is not None: - cd = '\n'.join(['CD %s : %d' % (x,tmseq['conditions'][x]) - for x in reader.conditions]) - else: - cd = '\n'.join(['CD %s : %d' % (x,tmseq['conditions'][x]) - for x in tmseq['conditions']]) - - tg = 'TG %s' % str(tmseq) - - e=[tg] - if cd: - e.append(cd) - if lo: - e.append(lo) - - tm = 'TM %d' % tmseq['tm'] - dm = 'DM %d' % tmseq['dm'] - rm = 'RM %d' % tmseq['rm'] - - e.extend((tm,dm,rm,'//')) - - return '\n'.join(e) - - - diff --git a/obitools/thermo/__init__.py b/obitools/thermo/__init__.py deleted file mode 100644 index 492dbb9..0000000 --- a/obitools/thermo/__init__.py +++ /dev/null @@ -1,597 +0,0 @@ -from math import log -from array import array -from copy import deepcopy - -bpencoder={'A':1,'C':2,'G':3,'T':4, - 'a':1,'c':2,'g':3,'t':4, - '-':0 - } - -rvencoder={'A':4,'C':3,'G':2,'T':1, - 'a':4,'c':3,'g':2,'t':1, - '-':0 - } - -R = 1.987 -SALT_METHOD_SANTALUCIA = 1 -SALT_METHOD_OWCZARZY = 2 -DEF_CONC_PRIMERS = 8.e-7 -DEF_CONC_SEQUENCES = 0. -DEF_SALT = 0.05 -forbidden_entropy = 0. -forbidden_enthalpy = 1.e18 - -__dH = [[[[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.]], - [[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.]], - [[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.]], - [[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.]], - [[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.]], - [[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.]]], - [[[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.]], - [[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.]], - [[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.]], - [[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.]], - [[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.]], - [[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.]]], - [[[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.]], - [[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.]], - [[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.]], - [[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.]], - [[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.]], - [[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.]]], - [[[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.]], - [[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.]], - [[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.]], - [[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.]], - [[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.]], - [[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.]]], - [[[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.]], - [[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.]], - [[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.]], - [[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.]], - [[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.]], - [[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.]]], - [[[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.]], - [[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.]], - [[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.]], - [[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.]], - [[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.]], - [[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.]]] - ] -__dS = [[[[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.]], - [[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.]], - [[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.]], - [[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.]], - [[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.]], - [[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.]]], - [[[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.]], - [[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.]], - [[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.]], - [[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.]], - [[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.]], - [[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.]]], - [[[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.]], - [[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.]], - [[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.]], - [[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.]], - [[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.]], - [[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.]]], - [[[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.]], - [[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.]], - [[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.]], - [[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.]], - [[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.]], - [[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.]]], - [[[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.]], - [[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.]], - [[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.]], - [[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.]], - [[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.]], - [[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.]]], - [[[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.]], - [[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.]], - [[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.]], - [[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.]], - [[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.]], - [[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.]]] - ] - -def initParams(c1, c2, kp, sm,nparm={}): - global forbidden_entropy - global dH,dS - - dH=deepcopy(__dH) - dS=deepcopy(__dS) - - nparm['Ct1'] = c1; - nparm['Ct2'] = c2; - nparm['kplus'] = kp; - maxCT = 1; - - if(nparm['Ct2'] > nparm['Ct1']): - maxCT = 2 - - if(nparm['Ct1'] == nparm['Ct2']): - ctFactor = nparm['Ct1']/2 - elif (maxCT == 1): - ctFactor = nparm['Ct1']-nparm['Ct2']/2 - else: - ctFactor = nparm['Ct2']-nparm['Ct1']/2 - - nparm['rlogc'] = R * log(ctFactor) - forbidden_entropy = nparm['rlogc'] - nparm['kfac'] = 0.368 * log(nparm['kplus']) - nparm['saltMethod'] = sm - - - # Set all X-/Y-, -X/Y- and X-/-Y so, that TM will be VERY small! - for x in xrange(1,5): - for y in xrange(1,5): - dH[0][x][y][0]=forbidden_enthalpy; - dS[0][x][y][0]=forbidden_entropy; - dH[x][0][0][y]=forbidden_enthalpy; - dS[x][0][0][y]=forbidden_entropy; - dH[x][0][y][0]=forbidden_enthalpy; - dS[x][0][y][0]=forbidden_entropy; - # forbid X-/Y$ and X$/Y- etc., i.e. terminal must not be paired with gap! - dH[x][5][y][0]=forbidden_enthalpy; - dS[x][5][y][0]=forbidden_entropy; - dH[x][0][y][5]=forbidden_enthalpy; - dS[x][0][y][5]=forbidden_entropy; - dH[5][x][0][y]=forbidden_enthalpy; - dS[5][x][0][y]=forbidden_entropy; - dH[0][x][5][y]=forbidden_enthalpy; - dS[0][x][5][y]=forbidden_entropy; - - #forbid X$/-Y etc. - dH[x][5][0][y]=forbidden_enthalpy; - dS[x][5][0][y]=forbidden_entropy; - dH[x][0][5][y]=forbidden_enthalpy; - dS[x][0][5][y]=forbidden_entropy; - dH[5][x][y][0]=forbidden_enthalpy; - dS[5][x][y][0]=forbidden_entropy; - dH[0][x][y][5]=forbidden_enthalpy; - dS[0][x][y][5]=forbidden_entropy; - - - - #also, forbid x-/-- and --/x-, i.e. no two inner gaps paired - dH[x][0][0][0]=forbidden_enthalpy; - dS[x][0][0][0]=forbidden_entropy; - dH[0][0][x][0]=forbidden_enthalpy; - dS[0][0][x][0]=forbidden_entropy; - # x-/-$ - dH[x][0][0][5]=forbidden_enthalpy; - dS[x][0][0][5]=forbidden_entropy; - dH[5][0][0][x]=forbidden_enthalpy; - dS[5][0][0][x]=forbidden_entropy; - dH[0][5][x][0]=forbidden_enthalpy; - dS[x][0][0][5]=forbidden_entropy; - dH[0][x][5][0]=forbidden_enthalpy; - dS[0][x][5][0]=forbidden_entropy; - - # forbid --/-- - dH[0][0][0][0]=forbidden_enthalpy; - dS[0][0][0][0]=forbidden_entropy; - - dH[5][0][0][0]=forbidden_enthalpy; - dS[5][0][0][0]=forbidden_entropy; - dH[0][0][5][0]=forbidden_enthalpy; - dS[0][0][5][0]=forbidden_entropy; - dH[0][5][5][0]=forbidden_enthalpy; - dS[0][5][5][0]=forbidden_entropy; - - # Interior loops (double Mismatches) - iloop_entropy=-0.97 - iloop_enthalpy=0.0 - - for x in xrange(1,5): - for y in xrange(1,5): - for a in xrange(1,5): - for b in xrange(1,5): - # AT and CG pair, and as A=1, C=2, G=3, T=4 this means - # we have Watson-Crick pairs if (x+a==5) and (y+b)==5. - if ( not ((x+a==5) or (y+b==5))): - # No watson-crick-pair, i.e. double mismatch! - # set enthalpy/entropy to loop expansion! - dH[x][y][a][b] = iloop_enthalpy; - dS[x][y][a][b] = iloop_entropy; - - - # xy/-- and --/xy (Bulge Loops of size > 1) - bloop_entropy=-1.3 - bloop_enthalpy=0.0 - - for x in xrange(1,5): - for y in xrange(1,5): - dH[x][y][0][0] = bloop_enthalpy; - dS[x][y][0][0] = bloop_entropy; - dH[0][0][x][y] = bloop_enthalpy; - dS[0][0][x][y] = bloop_entropy; - - - # x-/ya abd xa/y- as well as -x/ay and ax/-y - # bulge opening and closing parameters with - # adjacent matches / mismatches - # obulge_mism and cbulge_mism chosen so high to avoid - # AAAAAAAAA - # T--G----T - # being better than - # AAAAAAAAA - # TG------T - obulge_match_H =-2.66e3 - obulge_match_S =-14.22 - cbulge_match_H =-2.66e3 - cbulge_match_S =-14.22 - obulge_mism_H = 0.0 - obulge_mism_S = -6.45 - cbulge_mism_H = 0.0 - cbulge_mism_S =-6.45 - - for x in xrange(1,5): - for y in xrange(1,5): - for a in xrange(1,5): - if (x+y==5): # other base pair matches! - - dH[x][0][y][a]=obulge_match_H; # bulge opening - dS[x][0][y][a]=obulge_match_S; - dH[x][a][y][0]=obulge_match_H; - dS[x][a][y][0]=obulge_match_S; - dH[0][x][a][y]=cbulge_match_H; # bulge closing - dS[0][x][a][y]=cbulge_match_S; - dH[a][x][0][y]=cbulge_match_H; - dS[a][x][0][y]=cbulge_match_S; - else: - # mismatch in other base pair! - dH[x][0][y][a]=obulge_mism_H; # bulge opening - dS[x][0][y][a]=obulge_mism_S; - dH[x][a][y][0]=obulge_mism_H; - dS[x][a][y][0]=obulge_mism_S; - dH[0][x][a][y]=cbulge_mism_H; # bulge closing - dS[0][x][a][y]=cbulge_mism_S; - dH[a][x][0][y]=cbulge_mism_H; - dS[a][x][0][y]=cbulge_mism_S; - - - - # Watson-Crick pairs (note that only ten are unique, as obviously - # 5'-AG-3'/3'-TC-5' = 5'-CT-3'/3'-GA-5' etc. - dH[1][1][4][4]=-7.6e3; dS[1][1][4][4]=-21.3 # AA/TT 04 - dH[1][2][4][3]=-8.4e3; dS[1][2][4][3]=-22.4 # AC/TG adapted GT/CA - dH[1][3][4][2]=-7.8e3; dS[1][3][4][2]=-21.0 # AG/TC adapted CT/GA - dH[1][4][4][1]=-7.2e3; dS[1][4][4][1]=-20.4 # AT/TA 04 - dH[2][1][3][4]=-8.5e3; dS[2][1][3][4]=-22.7 # CA/GT 04 - dH[2][2][3][3]=-8.0e3; dS[2][2][3][3]=-19.9 # CC/GG adapted GG/CC - dH[2][3][3][2]=-10.6e3; dS[2][3][3][2]=-27.2 # CG/GC 04 - dH[2][4][3][1]=-7.8e3; dS[2][4][3][1]=-21.0 # CT/GA 04 - dH[3][1][2][4]=-8.2e3; dS[3][1][2][4]=-22.2 # GA/CT 04 - dH[3][2][2][3]=-9.8e3; dS[3][2][2][3]=-24.4 # GC/CG 04 - dH[3][3][2][2]=-8.0e3; dS[3][3][2][2]=-19.9 # GG/CC 04 - dH[3][4][2][1]=-8.4e3; dS[3][4][2][1]=-22.4 # GT/CA 04 - dH[4][1][1][4]=-7.2e3; dS[4][1][1][4]=-21.3 # TA/AT 04 - dH[4][2][1][3]=-8.2e3; dS[4][2][1][3]=-22.2 # TC/AG adapted GA/CT - dH[4][3][1][2]=-8.5e3; dS[4][3][1][2]=-22.7 # TG/AC adapted CA/GT - dH[4][4][1][1]=-7.6e3; dS[4][4][1][1]=-21.3 # TT/AA adapted AA/TT - - # A-C Mismatches (Values for pH 7.0) - dH[1][1][2][4]=7.6e3; dS[1][1][2][4]=20.2 # AA/CT - dH[1][1][4][2]=2.3e3; dS[1][1][4][2]=4.6 # AA/TC - dH[1][2][2][3]=-0.7e3; dS[1][2][2][3]=-3.8 # AC/CG - dH[1][2][4][1]=5.3e3; dS[1][2][4][1]=14.6 # AC/TA - dH[1][3][2][2]=0.6e3; dS[1][3][2][2]=-0.6 # AG/CC - dH[1][4][2][1]=5.3e3; dS[1][4][2][1]=14.6 # AT/CA - dH[2][1][1][4]=3.4e3; dS[2][1][1][4]=8.0 # CA/AT - dH[2][1][3][2]=1.9e3; dS[2][1][3][2]=3.7 # CA/GC - dH[2][2][1][3]=5.2e3; dS[2][2][1][3]=14.2 # CC/AG - dH[2][2][3][1]=0.6e3; dS[2][2][3][1]=-0.6 # CC/GA - dH[2][3][1][2]=1.9e3; dS[2][3][1][2]=3.7 # CG/AC - dH[2][4][1][1]=2.3e3; dS[2][4][1][1]=4.6 # CT/AA - dH[3][1][2][2]=5.2e3; dS[3][1][2][2]=14.2 # GA/CC - dH[3][2][2][1]=-0.7e3; dS[3][2][2][1]=-3.8 # GC/CA - dH[4][1][1][2]=3.4e3; dS[4][1][1][2]=8.0 # TA/AC - dH[4][2][1][1]=7.6e3; dS[4][2][1][1]=20.2 # TC/AA - - # C-T Mismatches - dH[1][2][4][4]=0.7e3; dS[1][2][4][4]=0.2 # AC/TT - dH[1][4][4][2]=-1.2e3; dS[1][4][4][2]=-6.2 # AT/TC - dH[2][1][4][4]=1.0e3; dS[2][1][4][4]=0.7 # CA/TT - dH[2][2][3][4]=-0.8e3; dS[2][2][3][4]=-4.5 # CC/GT - dH[2][2][4][3]=5.2e3; dS[2][2][4][3]=13.5 # CC/TG - dH[2][3][4][2]=-1.5e3; dS[2][3][4][2]=-6.1 # CG/TC - dH[2][4][3][2]=-1.5e3; dS[2][4][3][2]=-6.1 # CT/GC - dH[2][4][4][1]=-1.2e3; dS[2][4][4][1]=-6.2 # CT/TA - dH[3][2][2][4]=2.3e3; dS[3][2][2][4]=5.4 # GC/CT - dH[3][4][2][2]=5.2e3; dS[3][4][2][2]=13.5 # GT/CC - dH[4][1][2][4]=1.2e3; dS[4][1][2][4]=0.7 # TA/CT - dH[4][2][2][3]=2.3e3; dS[4][2][2][3]=5.4 # TC/CG - dH[4][2][1][4]=1.2e3; dS[4][2][1][4]=0.7 # TC/AT - dH[4][3][2][2]=-0.8e3; dS[4][3][2][2]=-4.5 # TG/CC - dH[4][4][2][1]=0.7e3; dS[4][4][2][1]=0.2 # TT/CA - dH[4][4][1][2]=1.0e3; dS[4][4][1][2]=0.7 # TT/AC - - # G-A Mismatches - dH[1][1][3][4]=3.0e3; dS[1][1][3][4]=7.4 # AA/GT - dH[1][1][4][3]=-0.6e3; dS[1][1][4][3]=-2.3 # AA/TG - dH[1][2][3][3]=0.5e3; dS[1][2][3][3]=3.2 # AC/GG - dH[1][3][3][2]=-4.0e3; dS[1][3][3][2]=-13.2 # AG/GC - dH[1][3][4][1]=-0.7e3; dS[1][3][4][1]=-2.3 # AG/TA - dH[1][4][3][1]=-0.7e3; dS[1][4][3][1]=-2.3 # AT/GA - dH[2][1][3][3]=-0.7e3; dS[2][1][3][3]=-2.3 # CA/GG - dH[2][3][3][1]=-4.0e3; dS[2][3][3][1]=-13.2 # CG/GA - dH[3][1][1][4]=0.7e3; dS[3][1][1][4]=0.7 # GA/AT - dH[3][1][2][3]=-0.6e3; dS[3][1][2][3]=-1.0 # GA/CG - dH[3][2][1][3]=-0.6e3; dS[3][2][1][3]=-1.0 # GC/AG - dH[3][3][1][2]=-0.7e3; dS[3][3][1][2]=-2.3 # GG/AC - dH[3][3][2][1]=0.5e3; dS[3][3][2][1]=3.2 # GG/CA - dH[3][4][1][1]=-0.6e3; dS[3][4][1][1]=-2.3 # GT/AA - dH[4][1][1][3]=0.7e3; dS[4][1][1][3]=0.7 # TA/AG - dH[4][3][1][1]=3.0e3; dS[4][3][1][1]=7.4 # TG/AA - - # G-T Mismatches - dH[1][3][4][4]=1.0e3; dS[1][3][4][4]=0.9 # AG/TT - dH[1][4][4][3]=-2.5e3; dS[1][4][4][3]=-8.3 # AT/TG - dH[2][3][3][4]=-4.1e3; dS[2][3][3][4]=-11.7 # CG/GT - dH[2][4][3][3]=-2.8e3; dS[2][4][3][3]=-8.0 # CT/GG - dH[3][1][4][4]=-1.3e3; dS[3][1][4][4]=-5.3 # GA/TT - dH[3][2][4][3]=-4.4e3; dS[3][2][4][3]=-12.3 # GC/TG - dH[3][3][2][4]=3.3e3; dS[3][3][2][4]=10.4 # GG/CT - dH[3][3][4][2]=-2.8e3; dS[3][3][4][2]=-8.0 # GG/TC -# dH[3][3][4][4]=5.8e3; dS[3][3][4][4]=16.3 # GG/TT - dH[3][4][2][3]=-4.4e3; dS[3][4][2][3]=-12.3 # GT/CG - dH[3][4][4][1]=-2.5e3; dS[3][4][4][1]=-8.3 # GT/TA -# dH[3][4][4][3]=4.1e3; dS[3][4][4][3]=9.5 # GT/TG - dH[4][1][3][4]=-0.1e3; dS[4][1][3][4]=-1.7 # TA/GT - dH[4][2][3][3]=3.3e3; dS[4][2][3][3]=10.4 # TC/GG - dH[4][3][1][4]=-0.1e3; dS[4][3][1][4]=-1.7 # TG/AT - dH[4][3][3][2]=-4.1e3; dS[4][3][3][2]=-11.7 # TG/GC -# dH[4][3][3][4]=-1.4e3; dS[4][3][3][4]=-6.2 # TG/GT - dH[4][4][1][3]=-1.3e3; dS[4][4][1][3]=-5.3 # TT/AG - dH[4][4][3][1]=1.0e3; dS[4][4][3][1]=0.9 # TT/GA -# dH[4][4][3][3]=5.8e3; dS[4][4][3][3]=16.3 # TT/GG - - # A-A Mismatches - dH[1][1][1][4]=4.7e3; dS[1][1][1][4]=12.9 # AA/AT - dH[1][1][4][1]=1.2e3; dS[1][1][4][1]=1.7 # AA/TA - dH[1][2][1][3]=-2.9e3; dS[1][2][1][3]=-9.8 # AC/AG - dH[1][3][1][2]=-0.9e3; dS[1][3][1][2]=-4.2 # AG/AC - dH[1][4][1][1]=1.2e3; dS[1][4][1][1]=1.7 # AT/AA - dH[2][1][3][1]=-0.9e3; dS[2][1][3][1]=-4.2 # CA/GA - dH[3][1][2][1]=-2.9e3; dS[3][1][2][1]=-9.8 # GA/CA - dH[4][1][1][1]=4.7e3; dS[4][1][1][1]=12.9 # TA/AA - - # C-C Mismatches - dH[1][2][4][2]=0.0e3; dS[1][2][4][2]=-4.4 # AC/TC - dH[2][1][2][4]=6.1e3; dS[2][1][2][4]=16.4 # CA/CT - dH[2][2][2][3]=3.6e3; dS[2][2][2][3]=8.9 # CC/CG - dH[2][2][3][2]=-1.5e3; dS[2][2][3][2]=-7.2 # CC/GC - dH[2][3][2][2]=-1.5e3; dS[2][3][2][2]=-7.2 # CG/CC - dH[2][4][2][1]=0.0e3; dS[2][4][2][1]=-4.4 # CT/CA - dH[3][2][2][2]=3.6e3; dS[3][2][2][2]=8.9 # GC/CC - dH[4][2][1][2]=6.1e3; dS[4][2][1][2]=16.4 # TC/AC - - # G-G Mismatches - dH[1][3][4][3]=-3.1e3; dS[1][3][4][3]=-9.5 # AG/TG - dH[2][3][3][3]=-4.9e3; dS[2][3][3][3]=-15.3 # CG/GG - dH[3][1][3][4]=1.6e3; dS[3][1][3][4]=3.6 # GA/GT - dH[3][2][3][3]=-6.0e3; dS[3][2][3][3]=-15.8 # GC/GG - dH[3][3][2][3]=-6.0e3; dS[3][3][2][3]=-15.8 # GG/CG - dH[3][3][3][2]=-4.9e3; dS[3][3][3][2]=-15.3 # GG/GC - dH[3][4][3][1]=-3.1e3; dS[3][4][3][1]=-9.5 # GT/GA - dH[4][3][1][3]=1.6e3; dS[4][3][1][3]=3.6 # TG/AG - - # T-T Mismatches - dH[1][4][4][4]=-2.7e3; dS[1][4][4][4]=-10.8 # AT/TT - dH[2][4][3][4]=-5.0e3; dS[2][4][3][4]=-15.8 # CT/GT - dH[3][4][2][4]=-2.2e3; dS[3][4][2][4]=-8.4 # GT/CT - dH[4][1][4][4]=0.2e3; dS[4][1][4][4]=-1.5 # TA/TT - dH[4][2][4][3]=-2.2e3; dS[4][2][4][3]=-8.4 # TC/TG - dH[4][3][4][2]=-5.0e3; dS[4][3][4][2]=-15.8 # TG/TC - dH[4][4][1][4]=0.2e3; dS[4][4][1][4]=-1.5 # TT/AT - dH[4][4][4][1]=-2.7e3; dS[4][4][4][1]=-10.8 # TT/TA - - # Dangling Eds - dH[5][1][1][4]=-0.7e3; dS[5][1][1][4]=-0.8 # $A/AT - dH[5][1][2][4]=4.4e3; dS[5][1][2][4]=14.9 # $A/CT - dH[5][1][3][4]=-1.6e3; dS[5][1][3][4]=-3.6 # $A/GT - dH[5][1][4][4]=2.9e3; dS[5][1][4][4]=10.4 # $A/TT - dH[5][2][1][3]=-2.1e3; dS[5][2][1][3]=-3.9 # $C/AG - dH[5][2][2][3]=-0.2e3; dS[5][2][2][3]=-0.1 # $C/CG - dH[5][2][3][3]=-3.9e3; dS[5][2][3][3]=-11.2 # $C/GG - dH[5][2][4][3]=-4.4e3; dS[5][2][4][3]=-13.1 # $C/TG - dH[5][3][1][2]=-5.9e3; dS[5][3][1][2]=-16.5 # $G/AC - dH[5][3][2][2]=-2.6e3; dS[5][3][2][2]=-7.4 # $G/CC - dH[5][3][3][2]=-3.2e3; dS[5][3][3][2]=-10.4 # $G/GC - dH[5][3][4][2]=-5.2e3; dS[5][3][4][2]=-15.0 # $G/TC - dH[5][4][1][1]=-0.5e3; dS[5][4][1][1]=-1.1 # $T/AA - dH[5][4][2][1]=4.7e3; dS[5][4][2][1]=14.2 # $T/CA - dH[5][4][3][1]=-4.1e3; dS[5][4][3][1]=-13.1 # $T/GA - dH[5][4][4][1]=-3.8e3; dS[5][4][4][1]=-12.6 # $T/TA - dH[1][5][4][1]=-2.9e3; dS[1][5][4][1]=-7.6 # A$/TA - dH[1][5][4][2]=-4.1e3; dS[1][5][4][2]=-13.0 # A$/TC - dH[1][5][4][3]=-4.2e3; dS[1][5][4][3]=-15.0 # A$/TG - dH[1][5][4][4]=-0.2e3; dS[1][5][4][4]=-0.5 # A$/TT - dH[1][1][5][4]=0.2e3; dS[1][1][5][4]=2.3 # AA/$T - dH[1][1][4][5]=-0.5e3; dS[1][1][4][5]=-1.1 # AA/T$ - dH[1][2][5][3]=-6.3e3; dS[1][2][5][3]=-17.1 # AC/$G - dH[1][2][4][5]=4.7e3; dS[1][2][4][5]=14.2 # AC/T$ - dH[1][3][5][2]=-3.7e3; dS[1][3][5][2]=-10.0 # AG/$C - dH[1][3][4][5]=-4.1e3; dS[1][3][4][5]=-13.1 # AG/T$ - dH[1][4][5][1]=-2.9e3; dS[1][4][5][1]=-7.6 # AT/$A - dH[1][4][4][5]=-3.8e3; dS[1][4][4][5]=-12.6 # AT/T$ - dH[2][5][3][1]=-3.7e3; dS[2][5][3][1]=-10.0 # C$/GA - dH[2][5][3][2]=-4.0e3; dS[2][5][3][2]=-11.9 # C$/GC - dH[2][5][3][3]=-3.9e3; dS[2][5][3][3]=-10.9 # C$/GG - dH[2][5][3][4]=-4.9e3; dS[2][5][3][4]=-13.8 # C$/GT - dH[2][1][5][4]=0.6e3; dS[2][1][5][4]=3.3 # CA/$T - dH[2][1][3][5]=-5.9e3; dS[2][1][3][5]=-16.5 # CA/G$ - dH[2][2][5][3]=-4.4e3; dS[2][2][5][3]=-12.6 # CC/$G - dH[2][2][3][5]=-2.6e3; dS[2][2][3][5]=-7.4 # CC/G$ - dH[2][3][5][2]=-4.0e3; dS[2][3][5][2]=-11.9 # CG/$C - dH[2][3][3][5]=-3.2e3; dS[2][3][3][5]=-10.4 # CG/G$ - dH[2][4][5][1]=-4.1e3; dS[2][4][5][1]=-13.0 # CT/$A - dH[2][4][3][5]=-5.2e3; dS[2][4][3][5]=-15.0 # CT/G$ - dH[3][5][2][1]=-6.3e3; dS[3][5][2][1]=-17.1 # G$/CA - dH[3][5][2][2]=-4.4e3; dS[3][5][2][2]=-12.6 # G$/CC - dH[3][5][2][3]=-5.1e3; dS[3][5][2][3]=-14.0 # G$/CG - dH[3][5][2][4]=-4.0e3; dS[3][5][2][4]=-10.9 # G$/CT - dH[3][1][5][4]=-1.1e3; dS[3][1][5][4]=-1.6 # GA/$T - dH[3][1][2][5]=-2.1e3; dS[3][1][2][5]=-3.9 # GA/C$ - dH[3][2][5][3]=-5.1e3; dS[3][2][5][3]=-14.0 # GC/$G - dH[3][2][2][5]=-0.2e3; dS[3][2][2][5]=-0.1 # GC/C$ - dH[3][3][5][2]=-3.9e3; dS[3][3][5][2]=-10.9 # GG/$C - dH[3][3][2][5]=-3.9e3; dS[3][3][2][5]=-11.2 # GG/C$ - dH[3][4][5][1]=-4.2e3; dS[3][4][5][1]=-15.0 # GT/$A - dH[3][4][2][5]=-4.4e3; dS[3][4][2][5]=-13.1 # GT/C$ - dH[4][5][1][1]=0.2e3; dS[4][5][1][1]=2.3 # T$/AA - dH[4][5][1][2]=0.6e3; dS[4][5][1][2]=3.3 # T$/AC - dH[4][5][1][3]=-1.1e3; dS[4][5][1][3]=-1.6 # T$/AG - dH[4][5][1][4]=-6.9e3; dS[4][5][1][4]=-20.0 # T$/AT - dH[4][1][5][4]=-6.9e3; dS[4][1][5][4]=-20.0 # TA/$T - dH[4][1][1][5]=-0.7e3; dS[4][1][1][5]=-0.7 # TA/A$ - dH[4][2][5][3]=-4.0e3; dS[4][2][5][3]=-10.9 # TC/$G - dH[4][2][1][5]=4.4e3; dS[4][2][1][5]=14.9 # TC/A$ - dH[4][3][5][2]=-4.9e3; dS[4][3][5][2]=-13.8 # TG/$C - dH[4][3][1][5]=-1.6e3; dS[4][3][1][5]=-3.6 # TG/A$ - dH[4][4][5][1]=-0.2e3; dS[4][4][5][1]=-0.5 # TT/$A - dH[4][4][1][5]=2.9e3; dS[4][4][1][5]=10.4 # TT/A$ - - - nparm['dH']=dH - nparm['dS']=dS - - return nparm - - -defaultParm=initParams(DEF_CONC_PRIMERS,DEF_CONC_SEQUENCES,DEF_SALT, SALT_METHOD_SANTALUCIA) - -def seqencoder(seq): - return [bpencoder[x] for x in seq] - -def getInitialEntropy(nparm=defaultParm): - return -5.9+nparm['rlogc'] - -def getEnthalpy(x0, x1, y0, y1,nparm=defaultParm): - return nparm['dH'][x0][x1][y0][y1] - -def GetEntropy(x0, x1, y0, y1,nparm=defaultParm): - - nx0=x0 - nx1=x1 - ny0=y0 - ny1=y1 - dH=nparm['dH'] - dS=nparm['dS'] - answer = dS[nx0][nx1][ny0][ny1] - - if (nparm['saltMethod'] == SALT_METHOD_SANTALUCIA): - if(nx0!=5 and 1<= nx1 and nx1<=4): - answer += 0.5*nparm['kfac'] - - if(ny1!=5 and 1<= ny0 and ny0<=4): - answer += 0.5*nparm['kfac'] - - if (nparm['saltMethod'] == SALT_METHOD_OWCZARZY): - logk = log(nparm['kplus']); - answer += dH[nx0][nx1][ny0][ny1]*((4.29 * nparm['gcContent']-3.95)* 1e-5 * logk + 0.0000094*logk**2); - - return answer; - -def CalcTM(entropy,enthalpy): - tm = 0 - if (enthalpy>=forbidden_enthalpy) : - return 0; - - if (entropy<0) : - tm = enthalpy/entropy - if (tm<0): - return 0; - - return tm; - - - - -def countGCContent(seq): - count = 0; - for k in seq : - if k in 'cgGC': - count+=1; - return count; - - -#def cleanSeq (inseq,outseq,length): -# -# seqlen = len(inseq) -# if (len != 0) -# seqlen = length; -# -# j=0 -# for i in xrange(seqlen): -# { -# switch (inseq[i]) -# { -# case 'a': -# case '\0': -# case 'A': -# outseq[j++] = 'A'; break; -# case 'c': -# case '\1': -# case 'C': -# outseq[j++] = 'C'; break; -# case 'g': -# case '\2': -# case 'G': -# outseq[j++] = 'G'; break; -# case 't': -# case '\3': -# case 'T': -# outseq[j++] = 'T'; break; -# } -# } -# outseq[j] = '\0'; -#} - -def calcSelfTM(seq,nparm=defaultParm): - dH=nparm['dH'] - dS=nparm['dS'] - length=len(seq) - - thedH = 0; - thedS = -5.9+nparm['rlogc'] - for i in xrange(1,length): - c1 = rvencoder[seq[i-1]]; - c2 = rvencoder[seq[i]]; - c3 = bpencoder[seq[i-1]]; - c4 = bpencoder[seq[i]]; - - thedH += dH[c3][c4][c1][c2]; - thedS += GetEntropy(c3, c4, c1, c2, nparm) - - mtemp = CalcTM(thedS,thedH); -# print thedH,thedS,nparm['rlogc'] - return mtemp-273.15; - - -def calcTMTwoSeq(seq1,seq2,nparm=defaultParm): - - thedH = 0; - thedS = -5.9+nparm['rlogc'] - dH=nparm['dH'] - dS=nparm['dS'] - length=len(seq1) - - for i in xrange(1,length): - c1 = rvencoder[seq2[i-1]] - c2 = rvencoder[seq2[i]] - c3 = bpencoder[seq1[i-1]] - c4 = bpencoder[seq1[i]] - - thedH += dH[c3][c4][c1][c2] - thedS += GetEntropy(c3, c4, c1, c2, nparm) - - mtemp = CalcTM(thedS,thedH); -# print thedH,thedS,nparm['rlogc'] - - return mtemp-273.15; - - diff --git a/obitools/tools/__init__.py b/obitools/tools/__init__.py deleted file mode 100644 index e69de29..0000000 diff --git a/obitools/tools/_solexapairend.so b/obitools/tools/_solexapairend.so deleted file mode 100755 index 2d9e075..0000000 Binary files a/obitools/tools/_solexapairend.so and /dev/null differ diff --git a/obitools/tools/solexapairend.py b/obitools/tools/solexapairend.py deleted file mode 100644 index 609f533..0000000 --- a/obitools/tools/solexapairend.py +++ /dev/null @@ -1,51 +0,0 @@ -''' -Created on 17 mai 2010 - -@author: coissac -''' - -from obitools.alignment import columnIterator - - -def iterOnAligment(ali): - pos0=0 - pos1=len(ali[1].wrapped)-1 - begin0=False - end0=False - begin1=False - end1=False - for nuc0,nuc1 in columnIterator(ali): - if nuc0=='-': - if begin0: - if not end0: - score0 = ( ali[0].wrapped.quality[pos0-1] - +ali[0].wrapped.quality[pos0] - )/2 - else: - score0 = 1. - else: - score0 = 0. - else: - begin0=True - score0 = ali[0].wrapped.quality[pos0] - pos0+=1 - end0= pos0==len(ali[0].wrapped) - - if nuc1=='-': - if begin1: - if not end1: - score1 = ( ali[1].wrapped.wrapped.quality[pos1] - +ali[1].wrapped.wrapped.quality[pos1+1] - )/2 - else: - score1 = 0. - else: - score1 = 1. - else: - begin1=True - score1 = ali[1].wrapped.wrapped.quality[pos1] - pos1-=1 - end1=pos1<0 - - result = (nuc0,score0,nuc1,score1) - yield result diff --git a/obitools/tree/__init__.py b/obitools/tree/__init__.py deleted file mode 100644 index facb5ff..0000000 --- a/obitools/tree/__init__.py +++ /dev/null @@ -1,116 +0,0 @@ -import re - - -class Tree(set): - def registerNode(self,node): - assert isinstance(node, TreeNode) - self.add(node) - - def childNodeIterator(self,node): - assert isinstance(node, TreeNode) - return (x for x in self if x._parent==node) - - def subTreeSize(self,node): - n=1 - for subnode in self.childNodeIterator(node): - n+=self.subTreeSize(subnode) - return n - - def getRoot(self): - roots = [x for x in self if x._parent is None] - assert len(roots)==1,'Tree cannot have several root node' - return roots[0] - - def ancestorNodeIterator(self,node): - assert isinstance(node, TreeNode) - while node._parent is not None: - yield node - node=node._parent - yield node - - def terminalNodeIterator(self): - return (x for x in self if x._isterminal) - - def commonAncestor(self,node1,node2): - anc1 = set(x for x in self.ancestorNodeIterator(node1)) - rep = [x for x in self.ancestorNodeIterator(node2) - if x in anc1] - assert len(rep)>=1 - return rep[0] - - def getDist(self,node1,node2): - ca = self.commonAncestor(node1, node2) - dist = 0 - while node1 != ca: - dist+=node1._dist - node1=node1._parent - while node2 != ca: - dist+=node2._dist - node2=node2._parent - return dist - - def farestNodes(self): - dmax=0 - n1=None - n2=None - for node1 in self.terminalNodeIterator(): - for node2 in self.terminalNodeIterator(): - d = self.getDist(node1, node2) - if d > dmax: - dmax = d - n1=node1 - n2=node2 - return node1,node2,dmax - - def setRoot(self,node,dist): - assert node in self - assert node._parent and node._dist > dist - - newroot = TreeNode(self) - parent = node._parent - node._parent = newroot - compdist = node._dist - dist - node._dist=dist - node = parent - - while node: - parent = node._parent - if parent: - dist = node._dist - - node._parent = newroot - node._dist = compdist - - newroot = node - node = parent - - if node: - compdist=dist - - for child in self.childNodeIterator(newroot): - child._parent = newroot._parent - child._dist += newroot._dist - - self.remove(newroot) - - -class TreeNode(object): - def __init__(self,tree,name=None,dist=None,bootstrap=None,**info): - self._parent=None - self._name=name - self._dist=dist - self._bootstrap=bootstrap - self._info=info - tree.registerNode(self) - self._isterminal=True - - - def linkToParent(self,parent): - assert isinstance(parent, TreeNode) or parent is None - self._parent=parent - if parent is not None: - parent._isterminal=False - - - - diff --git a/obitools/tree/dot.py b/obitools/tree/dot.py deleted file mode 100644 index a21c4a1..0000000 --- a/obitools/tree/dot.py +++ /dev/null @@ -1,18 +0,0 @@ - -from obitools.utils import universalOpen -from obitools.tree import Tree,TreeNode - -def nodeWriter(tree,node,nodes): - data=[] - if node._parent: - data.append('%d -> %d ' % (nodes[node],nodes[node._parent])) - return "\n".join(data) - - -def treeWriter(tree): - nodes=dict(map(None,tree,xrange(len(tree)))) - code=[] - for node in tree: - code.append(nodeWriter(tree,node,nodes)) - code = "\n".join(code) - return 'digraph tree { node [shape=point]\n%s\n};' % code \ No newline at end of file diff --git a/obitools/tree/layout.py b/obitools/tree/layout.py deleted file mode 100644 index a39ba77..0000000 --- a/obitools/tree/layout.py +++ /dev/null @@ -1,103 +0,0 @@ - -class NodeLayout(dict): - ''' - Layout data associated to a tree node. - ''' - pass - -class TreeLayout(dict): - ''' - Description of a phylogenetic tree layout - - @see: - ''' - def addNode(self,node): - self[node]=NodeLayout() - - def setAttribute(self,node,key,value): - self[node][key]=value - - def hasAttribute(self,node,key): - return key in self[node] - - def getAttribute(self,node,key,default=None): - return self[node].get(key,default) - - def setNodesColor(self,color,predicate=True): - ''' - - @param color: - @type color: - @param predicat: - @type predicat: - ''' - for node in self: - if callable(predicat): - change = predicat(node) - else: - change = predicat - - if change: - if callable(color): - c = color(node) - else: - c = color - self.setAttribute(node, 'color', color) - - def setCircular(self,iscircularpredicat): - for node in self: - if callable(iscircularpredicat): - change = iscircularpredicat(node) - else: - change = iscircularpredicat - - if change: - self.setAttribute(node, 'shape', 'circle') - else: - self.setAttribute(node, 'shape', 'square') - - def setRadius(self,radius,predicate=True): - for node in self: - if callable(predicat): - change = predicat(node) - else: - change = predicat - - if change: - if callable(radius): - r = radius(node) - else: - r = radius - self.setAttribute(node, 'radius', r) - -def predicatGeneratorIsInfoEqual(info,value): - def isInfoEqual(node): - data = node._info - return data is not None and info in data and data[info]==value - - return isInfoEqual - -def isTerminalNode(node): - return node._isterminal - -def constantColorGenerator(color): - def colorMaker(node): - return color - - return colorMaker - -def constantColorGenerator(color): - def colorMaker(node): - return color - - return colorMaker - -def notPredicatGenerator(predicate): - def notpred(x): - return not predicat(x) - return notpred - - - - - \ No newline at end of file diff --git a/obitools/tree/newick.py b/obitools/tree/newick.py deleted file mode 100644 index c69d0d3..0000000 --- a/obitools/tree/newick.py +++ /dev/null @@ -1,117 +0,0 @@ -import re -import sys - -from obitools.utils import universalOpen -from obitools.tree import Tree,TreeNode - -def subNodeIterator(data): - level=0 - start = 1 - if data[0]=='(': - for i in xrange(1,len(data)): - c=data[i] - if c=='(': - level+=1 - elif c==')': - level-=1 - if c==',' and not level: - yield data[start:i] - start = i+1 - yield data[start:i] - else: - yield data - - -_nodeParser=re.compile('\s*(?P\(.*\))?(?P[^ :]+)? *(?P[0-9.]+)?(:(?P-?[0-9.]+))?') - -def nodeParser(data): - parsedNode = _nodeParser.match(data).groupdict(0) - if not parsedNode['name']: - parsedNode['name']=None - - if not parsedNode['bootstrap']: - parsedNode['bootstrap']=None - else: - parsedNode['bootstrap']=float(parsedNode['bootstrap']) - - if not parsedNode['distance']: - parsedNode['distance']=None - else: - parsedNode['distance']=float(parsedNode['distance']) - - if not parsedNode['subnodes']: - parsedNode['subnodes']=None - - return parsedNode - -_cleanTreeData=re.compile('\s+') - -def treeParser(data,tree=None,parent=None): - if tree is None: - tree = Tree() - data = _cleanTreeData.sub(' ',data).strip() - - parsedNode = nodeParser(data) - node = TreeNode(tree, - parsedNode['name'], - parsedNode['distance'], - parsedNode['bootstrap']) - - node.linkToParent(parent) - - if parsedNode['subnodes']: - for subnode in subNodeIterator(parsedNode['subnodes']): - treeParser(subnode,tree,node) - return tree - -_treecomment=re.compile('\[.*\]') - -def treeIterator(file): - file = universalOpen(file) - data = file.read() - - comment = _treecomment.findall(data) - data=_treecomment.sub('',data).strip() - - if comment: - comment=comment[0] - else: - comment=None - for tree in data.split(';'): - t = treeParser(tree) - if comment: - t.comment=comment - yield t - -def nodeWriter(tree,node,deep=0): - name = node._name - if name is None: - name='' - - distance=node._dist - if distance is None: - distance='' - else: - distance = ':%6.5f' % distance - - bootstrap=node._bootstrap - if bootstrap is None: - bootstrap='' - else: - bootstrap=' %d' % int(bootstrap) - - nodeseparator = ',\n' + ' ' * (deep+1) - - subnodes = nodeseparator.join([nodeWriter(tree, x, deep+1) - for x in tree.childNodeIterator(node)]) - if subnodes: - subnodes='(\n' + ' ' * (deep+1) + subnodes + '\n' + ' ' * deep + ')' - - return '%s%s%s%s' % (subnodes,name,bootstrap,distance) - -def treeWriter(tree,startnode=None): - if startnode is not None: - root=startnode - else: - root = tree.getRoot() - return nodeWriter(tree,root)+';' diff --git a/obitools/tree/svg.py b/obitools/tree/svg.py deleted file mode 100644 index ff51a8c..0000000 --- a/obitools/tree/svg.py +++ /dev/null @@ -1,70 +0,0 @@ -import math - -from obitools.svg import Scene,Circle,Line,Rectangle,Text -from obitools.tree import Tree - -def displayTreeLayout(layout,width=400,height=400,radius=3,scale=1.0): - ''' - Convert a tree layout object in an svg file. - - @param layout: the tree layout object - @type layout: obitools.tree.layout.TreeLayout - @param width: svg document width - @type width: int - @param height: svg document height - @type height: int - @param radius: default radius of node in svg unit (default 3) - @type radius: int - @param scale: scale factor applied to the svg coordinates (default 1.0) - @type scale: float - - @return: str containing svg code - ''' - xmin = min(layout.getAttribute(n,'x') for n in layout) - xmax = max(layout.getAttribute(n,'x') for n in layout) - ymin = min(layout.getAttribute(n,'y') for n in layout) - ymax = max(layout.getAttribute(n,'y') for n in layout) - - dx = xmax - xmin - dy = ymax - ymin - - xscale = width * 0.95 / dx * scale - yscale = height * 0.95 / dy * scale - - def X(x): - return (x - xmin ) * xscale + width * 0.025 - - def Y(y): - return (y - ymin ) * yscale + height * 0.025 - - scene = Scene('unrooted', height, width) - - for n in layout: - if n._parent is not None: - parent = n._parent - xf = layout.getAttribute(n,'x') - yf = layout.getAttribute(n,'y') - xp = layout.getAttribute(parent,'x') - yp = layout.getAttribute(parent,'y') - scene.add(Line((X(xf),Y(yf)),(X(xp),Y(yp)))) - - for n in layout: - xf = layout.getAttribute(n,'x') - yf = layout.getAttribute(n,'y') - cf = layout.getAttribute(n,'color') - sf = layout.getAttribute(n,'shape') - if layout.hasAttribute(n,'radius'): - rf=layout.getAttribute(n,'radius') - else: - rf=radius - - if sf=='circle': - scene.add(Circle((X(xf),Y(yf)),rf,cf)) - else: - scene.add(Rectangle((X(xf)-rf,Y(yf)-rf),2*rf,2*rf,cf)) - - - return ''.join(scene.strarray()) - - - \ No newline at end of file diff --git a/obitools/tree/unrooted.py b/obitools/tree/unrooted.py deleted file mode 100644 index 9a9f3e6..0000000 --- a/obitools/tree/unrooted.py +++ /dev/null @@ -1,33 +0,0 @@ -from obitools.tree.layout import TreeLayout -import math - -def subtreeLayout(tree,node,layout,start,end,x,y,default): - nbotu = tree.subTreeSize(node) - delta = (end-start)/(nbotu+1) - - layout.addNode(node) - layout.setAttribute(node,'x',x) - layout.setAttribute(node,'y',y) - layout.setAttribute(node,'color',(255,0,0)) - layout.setAttribute(node,'shape','circle') - - for subnode in tree.childNodeIterator(node): - snbotu = tree.subTreeSize(subnode) - end = start + snbotu * delta - med = start + snbotu * delta /2 - r = subnode._dist - if r is None or r <=0: - r=default - subx=math.cos(med) * r + x - suby=math.sin(med) * r + y - subtreeLayout(tree, subnode, layout, start, end, subx, suby, default) - start=end - - return layout - -def treeLayout(tree): - layout = TreeLayout() - root = tree.getRoot() - dmin = min(n._dist for n in tree if n._dist is not None and n._dist > 0) - return subtreeLayout(tree,root,layout,0,2*math.pi,0,0,dmin / 100) - \ No newline at end of file diff --git a/obitools/unit/__init__.py b/obitools/unit/__init__.py deleted file mode 100644 index d02c812..0000000 --- a/obitools/unit/__init__.py +++ /dev/null @@ -1,8 +0,0 @@ -import unittest - -from obitools import tests_group as obitools_tests_group - -tests_group=obitools_tests_group - - - diff --git a/obitools/unit/obitools/__init__.py b/obitools/unit/obitools/__init__.py deleted file mode 100644 index ab1bcec..0000000 --- a/obitools/unit/obitools/__init__.py +++ /dev/null @@ -1,89 +0,0 @@ -import unittest - -import obitools - -class BioseqTest(unittest.TestCase): - - sequenceId = 'id1' - sequenceDefinition = 'sequence definition' - sequenceQualifier = {'extra':3} - - def setUp(self): - self.bioseq = self.bioseqClass(self.sequenceId, - self.sequenceString, - self.sequenceDefinition, - **self.sequenceQualifier) - - title = self.__doc__.strip() - underline = "=" * len(title) - - #print "%s\n%s" % (title,underline) - - def tearDown(self): - pass - #print "\n" - - def testIdAttribute(self): - ''' - test if id attribute exists - ''' - self.failUnless(hasattr(self.bioseq, 'id'), 'id missing attribute') - - def testIdValue(self): - ''' - test if id attribute value is 'id1' - ''' - self.failUnlessEqual(self.bioseq.id, 'id1', - 'identifier is created with good value') - - def testDefinitionAttribute(self): - ''' - test if definition attribute exists - ''' - self.failUnless(hasattr(self.bioseq, 'definition'), 'definition missing attribute') - - def testSequenceIsLowerCase(self): - ''' - test if sequence is stored as lower case letter - ''' - self.failUnlessEqual(str(self.bioseq), - str(self.bioseq).lower(), - "Sequence is not stored as lower case string") - - def testSequenceQualifier(self): - ''' - test if the extra qualifier is present and its value is three. - ''' - self.failUnlessEqual(self.bioseq['extra'], - 3, - "Sequence qualifier cannot be successfully retrieve") - - def testCreateSequenceQualifier(self): - self.bioseq['testqualifier']='ok' - self.failUnlessEqual(self.bioseq['testqualifier'], - 'ok', - "Sequence qualifier cannot be successfully created") - - - -class NucBioseqTest(BioseqTest): - ''' - Test obitools.NucSequence class - ''' - - bioseqClass = obitools.NucSequence - sequenceString = 'AACGT' * 5 - - -class AABioseqTest(BioseqTest): - ''' - Test obitools.AASequence class - ''' - - bioseqClass = obitools.AASequence - sequenceString = 'MLKCVT' * 5 - - - - -tests_group = [NucBioseqTest,AABioseqTest] \ No newline at end of file diff --git a/obitools/utils/__init__.py b/obitools/utils/__init__.py deleted file mode 100644 index fd7076f..0000000 --- a/obitools/utils/__init__.py +++ /dev/null @@ -1,324 +0,0 @@ -import sys - -import time -import re -import shelve - -from threading import Lock -from logging import warning -import urllib2 - -from obitools.gzip import GzipFile -from obitools.zipfile import ZipFile -import os.path - - -class FileFormatError(Exception): - pass - - - -def universalOpen(file,*options): - ''' - Open a file gziped or not. - - If file is a C{str} instance, file is - concidered as a file name. In this case - the C{.gz} suffixe is tested to eventually - open it a a gziped file. - - If file is an other kind of object, it is assumed - that this object follow the C{file} interface - and it is return as is. - - @param file: the file to open - @type file: C{str} or a file like object - - @return: an iterator on text lines. - ''' - if isinstance(file,str): - if urllib2.urlparse.urlparse(file)[0]=='': - rep = open(file,*options) - else: - rep = urllib2.urlopen(file,timeout=15) - - if file[-3:] == '.gz': - rep = GzipFile(fileobj=rep) - if file[-4:] == '.zip': - zip = ZipFile(file=rep) - data = zip.infolist() - assert len(data)==1,'Only zipped file containning a single file can be open' - name = data[0].filename - rep = zip.open(name) - else: - rep = file - return rep - -def universalTell(file): - ''' - Return the position in the file even if - it is a gziped one. - - @param file: the file to check - @type file: a C{file} like instance - - @return: position in the file - @rtype: C{int} - ''' - if isinstance(file, GzipFile): - file=file.myfileobj - return file.tell() - -def fileSize(file): - ''' - Return the file size even if it is a - gziped one. - - @param file: the file to check - @type file: a C{file} like instance - - @return: the size of the file - @rtype: C{int} - ''' - if isinstance(file, GzipFile): - file=file.myfileobj - pos = file.tell() - file.seek(0,2) - length = file.tell() - file.seek(pos,0) - return length - -def progressBar(pos,maxi,reset=False,head='',delta=[],step=[1,0,0]): - if reset: - del delta[:] - if not delta: - delta.append(time.time()) - delta.append(time.time()) - assert maxi>0 - - step[1]+=1 - if step[1] % step[0] == 0: - step[1]=1 - newtime = time.time() - d = newtime-delta[1] - if d < 0.2: - step[0]*=2 - elif d > 0.4 and step[0]>1: - step[0]/=2 - - delta[1]=newtime - elapsed = delta[1]-delta[0] - - if callable(pos): - pos=pos() - percent = float(pos)/maxi * 100 - remain = time.gmtime(elapsed / percent * (100-percent)) - days = remain.tm_yday - 1 - hour = remain.tm_hour - minu = remain.tm_min - sec = remain.tm_sec - if days: - remain = "%d days %02d:%02d:%02d" % (days,hour,minu,sec) - else: - remain = "%02d:%02d:%02d" % (hour,minu,sec) - bar = '#' * int(percent/2) - step[2]=(step[2]+1) % 4 - bar+= '|/-\\'[step[2]] - bar+= ' ' * (50 - int(percent/2)) - sys.stderr.write('\r%s %5.1f %% |%s] remain : %s' %(head,percent,bar,remain)) - else: - step[1]+=1 - -def endLessIterator(endedlist): - for x in endedlist: - yield x - while(1): - yield endedlist[-1] - - -def multiLineWrapper(lineiterator): - ''' - Aggregator of strings. - - @param lineiterator: a stream of strings from an opened OBO file. - @type lineiterator: a stream of strings. - - @return: an aggregated stanza. - @rtype: an iterotor on str - - @note: The aggregator aggregates strings from an opened OBO file. - When the length of a string is < 2, the current stanza is over. - ''' - - for line in lineiterator: - rep = [line] - while len(line)>=2 and line[-2]=='\\': - rep[-1]=rep[-1][0:-2] - try: - line = lineiterator.next() - except StopIteration: - raise FileFormatError - rep.append(line) - yield ''.join(rep) - - -def skipWhiteLineIterator(lineiterator): - ''' - Curator of stanza. - - @param lineiterator: a stream of strings from an opened OBO file. - @type lineiterator: a stream of strings. - - @return: a stream of strings without blank strings. - @rtype: a stream strings - - @note: The curator skip white lines of the current stanza. - ''' - - for line in lineiterator: - cleanline = line.strip() - if cleanline: - yield line - else: - print 'skipped' - - -class ColumnFile(object): - - def __init__(self,stream,sep=None,strip=True, - types=None,skip=None,head=None, - extra=None, - extraformat='([a-zA-Z]\w*) *= *([^;]+);'): - self._stream = universalOpen(stream) - self._delimiter=sep - self._strip=strip - self._extra=extra - self._extraformat = re.compile(extraformat) - - if types: - self._types=[x for x in types] - for i in xrange(len(self._types)): - if self._types[i] is bool: - self._types[i]=ColumnFile.str2bool - else: - self._types=None - - self._skip = skip - if skip is not None: - self._lskip= len(skip) - else: - self._lskip= 0 - self._head=head - - def str2bool(x): - return bool(eval(x.strip()[0].upper(),{'T':True,'V':True,'F':False})) - - str2bool = staticmethod(str2bool) - - - def __iter__(self): - return self - - def next(self): - - def cast(txt,type): - try: - v = type(txt) - except: - v=None - return v - ligne = self._stream.next() - if self._skip is not None: - while ligne[0:self._lskip]==self._skip: - ligne = self._stream.next() - if self._extra is not None: - try: - (ligne,extra) = ligne.rsplit(self._extra,1) - extra = dict(self._extraformat.findall(extra)) - except ValueError: - extra=None - else: - extra = None - data = ligne.split(self._delimiter) - if self._strip or self._types: - data = [x.strip() for x in data] - if self._types: - it = endLessIterator(self._types) - data = [cast(*x) for x in ((y,it.next()) for y in data)] - if self._head is not None: - data=dict(map(None, self._head,data)) - if extra is not None: - data['__extra__']=extra - else: - if extra is not None: - data.append(extra) - return data - - def tell(self): - return universalTell(self._stream) - - -class CachedDB(object): - - def __init__(self,cachefile,masterdb): - self._cache = shelve.open(cachefile,'c') - self._db = masterdb - self._lock=Lock() - - def _cacheSeq(self,seq): - self._lock.acquire() - self._cache[seq.id]=seq - self._lock.release() - return seq - - def __getitem__(self,ac): - if isinstance(ac,str): - self._lock.acquire() - if ac in self._cache: -# print >>sys.stderr,"Use cache for %s" % ac - data = self._cache[ac] - self._lock.release() - - else: - self._lock.release() - data = self._db[ac] - self._cacheSeq(data) - return data - else: - self._lock.acquire() - acs = [[x,self._cache.get(x,None)] for x in ac] - self._lock.release() - newacs = [ac for ac,cached in acs if cached is None] - if newacs: - newseqs = self._db[newacs] - else: - newseqs = iter([]) - for r in acs: - if r[1] is None: - r[1]=self._cacheSeq(newseqs.next()) -# else: -# print >>sys.stderr,"Use cache for %s" % r[0] - return (x[1] for x in acs) - - -def moduleInDevelopment(name): - Warning('This module %s is under development : use it with caution' % name) - - -def deprecatedScript(newscript): - current = sys.argv[0] - print >>sys.stderr," " - print >>sys.stderr," " - print >>sys.stderr," " - print >>sys.stderr,"#########################################################" - print >>sys.stderr,"# #" - print >>sys.stderr," W A R N I N G :" - print >>sys.stderr," %s is a deprecated script " % os.path.split(current)[1] - print >>sys.stderr," it will disappear in the next obitools version" - print >>sys.stderr," " - print >>sys.stderr," The new corresponding command is %s " % newscript - print >>sys.stderr,"# #" - print >>sys.stderr,"#########################################################" - print >>sys.stderr," " - print >>sys.stderr," " - print >>sys.stderr," " diff --git a/obitools/utils/__init__.pyc b/obitools/utils/__init__.pyc deleted file mode 100644 index 99512dc..0000000 Binary files a/obitools/utils/__init__.pyc and /dev/null differ diff --git a/obitools/utils/bioseq.py b/obitools/utils/bioseq.py deleted file mode 100644 index 71337c7..0000000 --- a/obitools/utils/bioseq.py +++ /dev/null @@ -1,232 +0,0 @@ -def mergeTaxonomyClassification(uniqSeq,taxonomy): - for seq in uniqSeq: - if seq['merged_taxid']: - seq['taxid']=taxonomy.lastCommonTaxon(*seq['merged_taxid'].keys()) - tsp = taxonomy.getSpecies(seq['taxid']) - tgn = taxonomy.getGenus(seq['taxid']) - tfa = taxonomy.getFamily(seq['taxid']) - - if tsp is not None: - sp_sn = taxonomy.getScientificName(tsp) - else: - sp_sn="###" - tsp=-1 - - if tgn is not None: - gn_sn = taxonomy.getScientificName(tgn) - else: - gn_sn="###" - tgn=-1 - - if tfa is not None: - fa_sn = taxonomy.getScientificName(tfa) - else: - fa_sn="###" - tfa=-1 - - seq['species']=tsp - seq['genus']=tgn - seq['family']=tfa - - seq['species_sn']=sp_sn - seq['genus_sn']=gn_sn - seq['family_sn']=fa_sn - - seq['rank']=taxonomy.getRank(seq['taxid']) - seq['scientific_name']=fa_sn = taxonomy.getScientificName(seq['taxid']) - -def uniqSequence(seqIterator,taxonomy=None,mergedKey=None,mergeIds=False,categories=None): - uniques={} - uniqSeq=[] - - if categories is None: - categories=[] - - if mergedKey is not None: - mergedKey=set(mergedKey) - else: - mergedKey=set() - - if taxonomy is not None: - mergedKey.add('taxid') - - for seq in seqIterator: - s = tuple(seq[x] for x in categories) + (str(seq),) - if s in uniques: - s = uniques[s] - if 'count' in seq: - s['count']+=seq['count'] - else: - s['count']+=1 -# if taxonomy is not None and 'taxid' in seq: -# s['merged_taxid'][seq['taxid']]= - for key in mergedKey: - if key=='taxid' and mergeIds: - if 'taxid_dist' in seq: - s["taxid_dist"].update(seq["taxid_dist"]) - if 'taxid' in seq: - s["taxid_dist"][seq.id]=seq['taxid'] - - mkey = "merged_%s" % key - if key in seq: - s[mkey][seq[key]]=s[mkey].get(seq[key],0)+1 - if mkey in seq: - for skey in seq[mkey]: - if skey in s: - s[mkey][skey]=s[mkey].get(seq[skey],0)+seq[mkey][skey] - else: - s[mkey][skey]=seq[mkey][skey] - - for key in seq.iterkeys(): - # Merger proprement l'attribut merged s'il exist - if key in s and s[key]!=seq[key] and key!='count' and key[0:7]!='merged_' and key!='merged': - del(s[key]) - - - if mergeIds: - s['merged'].append(seq.id) - else: - uniques[s]=seq - for key in mergedKey: - if key=='taxid' and mergeIds: - if 'taxid_dist' not in seq: - seq["taxid_dist"]={} - if 'taxid' in seq: - seq["taxid_dist"][seq.id]=seq['taxid'] - mkey = "merged_%s" % key - if mkey not in seq: - seq[mkey]={} - if key in seq: - seq[mkey][seq[key]]=seq[mkey].get(seq[key],0)+1 - del(seq[key]) - - if 'count' not in seq: - seq['count']=1 - if mergeIds: - seq['merged']=[seq.id] - uniqSeq.append(seq) - - if taxonomy is not None: - mergeTaxonomyClassification(uniqSeq, taxonomy) - - - - return uniqSeq - -def uniqPrefixSequence(seqIterator,taxonomy=None,mergedKey=None,mergeIds=False,categories=None): - - if categories is None: - categories=[] - - def cmpseq(s1,s2): - return cmp(str(s1),str(s2)) - - if mergedKey is not None: - mergedKey=set(mergedKey) - else: - mergedKey=set() - - if taxonomy is not None: - mergedKey.add('taxid') - - sequences=list(seqIterator) - - if not sequences: - return [] - - sequences.sort(cmpseq) - - - old=sequences.pop() - uniqSeq=[old] - if 'count' not in old: - old['count']=1 - for key in mergedKey: - mkey = "merged_%s" % key - if mkey not in old: - old[mkey]={} - if key in old: - old[mkey][old[key]]=old[mkey].get(old[key],0)+1 - if mergeIds: - old['merged']=[old.id] - - - while(sequences): - seq=sequences.pop() - lseq=len(seq) - pold = str(old)[0:lseq] - if pold==str(seq): - - if 'count' in seq: - old['count']+=seq['count'] - else: - old['count']+=1 - - for key in mergedKey: - mkey = "merged_%s" % key - if key in seq: - old[mkey][seq[key]]=old[mkey].get(seq[key],0)+1 - if mkey in seq: - for skey in seq[mkey]: - if skey in old: - old[mkey][skey]=old[mkey].get(seq[skey],0)+seq[mkey][skey] - else: - old[mkey][skey]=seq[mkey][skey] - - for key in seq.iterkeys(): - if key in old and old[key]!=seq[key]: - del(old[key]) - - - if mergeIds: - old['merged'].append(seq.id) - else: - old=seq - - for key in mergedKey: - mkey = "merged_%s" % key - if mkey not in seq: - seq[mkey]={} - if key in seq: - seq[mkey][seq[key]]=seq[mkey].get(seq[key],0)+1 - del(seq[key]) - - if 'count' not in seq: - seq['count']=1 - if mergeIds: - seq['merged']=[seq.id] - uniqSeq.append(seq) - - if taxonomy is not None: - mergeTaxonomyClassification(uniqSeq, taxonomy) - - return uniqSeq - - - - -def _cmpOnKeyGenerator(key,reverse=False): - def compare(x,y): - try: - c1 = x[key] - except KeyError: - c1=None - - try: - c2 = y[key] - except KeyError: - c2=None - - if reverse: - s=c1 - c1=c2 - c2=s - return cmp(c1,c2) - - return compare - -def sortSequence(seqIterator,key,reverse=False): - seqs = list(seqIterator) - seqs.sort(_cmpOnKeyGenerator(key, reverse)) - return seqs - \ No newline at end of file diff --git a/obitools/utils/crc64.py b/obitools/utils/crc64.py deleted file mode 100644 index 537391e..0000000 --- a/obitools/utils/crc64.py +++ /dev/null @@ -1,53 +0,0 @@ -# -# Code obtained from : -# http://aspn.activestate.com/ASPN/Cookbook/Python/Recipe/259177/index_txt -# - -# Initialisation -# 32 first bits of generator polynomial for CRC64 -# the 32 lower bits are assumed to be zero - -POLY64REVh = 0xd8000000L -CRCTableh = [0] * 256 -CRCTablel = [0] * 256 -isInitialized = False - -def CRC64(aString): - global isInitialized - crcl = 0 - crch = 0 - if (isInitialized is not True): - isInitialized = True - for i in xrange(256): - partl = i - parth = 0L - for j in xrange(8): - rflag = partl & 1L - partl >>= 1L - if (parth & 1): - partl |= (1L << 31L) - parth >>= 1L - if rflag: - parth ^= POLY64REVh - CRCTableh[i] = parth; - CRCTablel[i] = partl; - - for item in aString: - shr = 0L - shr = (crch & 0xFF) << 24 - temp1h = crch >> 8L - temp1l = (crcl >> 8L) | shr - tableindex = (crcl ^ ord(item)) & 0xFF - - crch = temp1h ^ CRCTableh[tableindex] - crcl = temp1l ^ CRCTablel[tableindex] - return (crch, crcl) - -def CRC64digest(aString): - return "%08X%08X" % (CRC64(aString)) - -if __name__ == '__main__': - assert CRC64("IHATEMATH") == (3822890454, 2600578513) - assert CRC64digest("IHATEMATH") == "E3DCADD69B01ADD1" - print 'CRC64: dumb test successful' - diff --git a/obitools/utils/iterator.py b/obitools/utils/iterator.py deleted file mode 100644 index f53537f..0000000 --- a/obitools/utils/iterator.py +++ /dev/null @@ -1,8 +0,0 @@ -from itertools import chain - -def uniqueChain(*args): - see = set() - for x in chain(*args): - if x not in see: - see.add(x) - yield x \ No newline at end of file diff --git a/obitools/utils/iterator.pyc b/obitools/utils/iterator.pyc deleted file mode 100644 index 88d415e..0000000 Binary files a/obitools/utils/iterator.pyc and /dev/null differ diff --git a/obitools/word/__init__.py b/obitools/word/__init__.py deleted file mode 100644 index c1a4b6b..0000000 --- a/obitools/word/__init__.py +++ /dev/null @@ -1,72 +0,0 @@ -from itertools import imap -from _binary import * - -def wordCount(liste): - count = {} - - for e in liste: - count[e]=count.get(e,0) + 1 - - return count - - -def wordIterator(sequence,lword,step=1,endIncluded=False,circular=False): - - assert not (endIncluded and circular), \ - "endIncluded and circular cannot not be set to True at the same time" - - L = len(sequence) - sequence = str(sequence) - if circular: - sequence += sequence[0:lword] - pmax=L - elif endIncluded: - pmax=L - else: - pmax = L - lword + 1 - - pos = xrange(0,pmax,step) - - for x in pos: - yield encodeWord(sequence[x:x+lword]) - - - -def wordSelector(words,accept=None,reject=None): - ''' - Filter over a DNA word iterator. - - @param words: an iterable object other a list of DNA words - @type words: an iterator - @param accept: a list of predicate. Each predicate is a function - accepting one str parametter and returning a boolean - value. - @type accept: list - @param reject: a list of predicat. Each predicat is a function - accepting one str parametter and returning a boolean - value. - @type reject: list - - @return: an iterator on DNA word (str) - @rtype: iterator - ''' - if accept is None: - accept=[] - if reject is None: - reject=[] - for w in words: -# print [bool(p(w)) for p in accept] - accepted = reduce(lambda x,y: bool(x) and bool(y), - (p(w) for p in accept), - True) -# print [(p.__name__,bool(p(w))) for p in reject] - rejected = reduce(lambda x,y:bool(x) or bool(y), - (p(w) for p in reject), - False) -# print decodeWord(w,5),accepted,rejected, - if accepted and not rejected: -# print " conserved" - yield w -# else: -# print - diff --git a/obitools/word/_binary.so b/obitools/word/_binary.so deleted file mode 100755 index 1780762..0000000 Binary files a/obitools/word/_binary.so and /dev/null differ diff --git a/obitools/word/options.py b/obitools/word/options.py deleted file mode 100644 index ff44e57..0000000 --- a/obitools/word/options.py +++ /dev/null @@ -1,116 +0,0 @@ -from obitools.word import wordSelector -from obitools.word import allDNAWordIterator,encodeWord -from obitools.word import predicate - - - - -def _acceptedOptionCallback(options,opt,value,parser): - if not hasattr(parser.values, 'acceptedOligo'): - parser.values.acceptedOligo=[] - parser.values.acceptedOligo.append(predicate.predicateMatchPattern(value,)) - -def _rejectedOptionCallback(options,opt,value,parser): - if not hasattr(parser.values, 'rejectedOligo'): - parser.values.rejectedOligo=[] - parser.values.rejectedOligo.append(predicate.predicateMatchPattern(value)) - - - -def addOligoOptions(optionManager): - - optionManager.add_option('-L','--oligo-list', - action="store", dest="oligoList", - metavar="", - type="str", - help="filename containing a list of oligonucleotide") - - - optionManager.add_option('-s','--oligo-size', - action="store", dest="oligoSize", - metavar="<###>", - type="int", - help="Size of oligonucleotide to generate") - - optionManager.add_option('-f','--family-size', - action="store", dest="familySize", - metavar="<###>", - type="int", - help="Size of oligonucleotide family to generate") - - optionManager.add_option('-d','--distance', - action="store", dest="oligoDist", - metavar="<###>", - type="int", - default=1, - help="minimal distance between two oligonucleotides") - - optionManager.add_option('-g','--gc-max', - action="store", dest="gcMax", - metavar="<###>", - type="int", - default=0, - help="maximum count of G or C nucleotide acceptable in a word") - - optionManager.add_option('-a','--accepted', - action="append",dest="acceptedPattern", - metavar="", - default=[], - type="str", - help="pattern of accepted oligonucleotide") - - optionManager.add_option('-r','--rejected', - action="append",dest="rejectedPattern", - metavar="", - default=[], - type="str", - help="pattern of rejected oligonucleotide") - - optionManager.add_option('-p','--homopolymer', - action="store", dest="homopolymere", - metavar="<###>", - type="int", - default=0, - help="reject oligo with homopolymer longer than.") - - optionManager.add_option('-P','--homopolymer-min', - action="store", dest="homopolymere_min", - metavar="<###>", - type="int", - default=0, - help="accept only oligo with homopolymer longer or equal to.") - -def dnaWordIterator(options): - - assert options.oligoSize is not None or options.oligoList is not None,"option -s or --oligo-size must be specified" - assert options.familySize is not None,"option -f or --family-size must be specified" - assert options.oligoDist is not None,"option -d or --distance must be specified" - - if options.oligoList is not None: - words = (encodeWord(x.strip().lower()) for x in open(options.oligoList)) - else: - words = allDNAWordIterator(options.oligoSize) - #seed = 'a' * options.oligoSize - options.acceptedOligo=[] - for p in options.acceptedPattern: - assert len(p)==options.oligoSize,"Accept pattern with bad lenth : %s" % p - options.acceptedOligo.append(predicate.predicateMatchPattern(p, options.oligoSize)) - - options.rejectedOligo=[] - for p in options.rejectedPattern: - assert len(p)==options.oligoSize,"Reject pattern with bad lenth : %s" % p - options.rejectedOligo.append(predicate.predicateMatchPattern(p, options.oligoSize)) - - - #options.acceptedOligo.append(predicat.distMinGenerator(seed, options.oligoDist)) - - if options.homopolymere: - options.rejectedOligo.append(predicate.predicateHomoPolymerLarger(options.homopolymere, options.oligoSize)) - - if options.homopolymere_min: - options.acceptedOligo.append(predicate.predicateHomoPolymerLarger(options.homopolymere_min-1, options.oligoSize)) - - if options.gcMax: - options.rejectedOligo.append(predicate.predicateGCUpperBond(options.gcMax, options.oligoSize)) - - return wordSelector(words, options.acceptedOligo, options.rejectedOligo) diff --git a/obitools/word/predicate.py b/obitools/word/predicate.py deleted file mode 100644 index 082b80f..0000000 --- a/obitools/word/predicate.py +++ /dev/null @@ -1,41 +0,0 @@ -#@PydevCodeAnalysisIgnore -''' -Created on 14 oct. 2009 - -@author: coissac -''' - -from _binary import wordDist, \ - homoMax, \ - countCG, \ - matchPattern, \ - encodePattern - -def predicateWordDistMin(word,dmin,size): - def predicate(w): - return wordDist(word, w) >= dmin - return predicate - -def predicateHomoPolymerLarger(count,size): - def predicate(w): - return homoMax(w, size) > count - return predicate - -def predicateHomoPolymerSmaller(count,size): - def predicate(w): - return homoMax(w, size) < count - return predicate - -def predicateGCUpperBond(count,size): - def predicate(w): - return countCG(w, size) > count - return predicate - -def predicateMatchPattern(pattern,size): - pattern=encodePattern(pattern) - def predicate(w): - return matchPattern(w, pattern) - return predicate - - - diff --git a/obitools/zipfile.py b/obitools/zipfile.py deleted file mode 100644 index 41e4bcb..0000000 --- a/obitools/zipfile.py +++ /dev/null @@ -1,1282 +0,0 @@ -""" -Read and write ZIP files. -""" -import struct, os, time, sys, shutil -import binascii, cStringIO - -try: - import zlib # We may need its compression method - crc32 = zlib.crc32 -except ImportError: - zlib = None - crc32 = binascii.crc32 - -__all__ = ["BadZipfile", "error", "ZIP_STORED", "ZIP_DEFLATED", "is_zipfile", - "ZipInfo", "ZipFile", "PyZipFile", "LargeZipFile" ] - -class BadZipfile(Exception): - pass - - -class LargeZipFile(Exception): - """ - Raised when writing a zipfile, the zipfile requires ZIP64 extensions - and those extensions are disabled. - """ - -error = BadZipfile # The exception raised by this module - -ZIP64_LIMIT= (1 << 31) - 1 - -# constants for Zip file compression methods -ZIP_STORED = 0 -ZIP_DEFLATED = 8 -# Other ZIP compression methods not supported - -# Here are some struct module formats for reading headers -structEndArchive = "<4s4H2LH" # 9 items, end of archive, 22 bytes -stringEndArchive = "PK\005\006" # magic number for end of archive record -structCentralDir = "<4s4B4HLLL5HLL"# 19 items, central directory, 46 bytes -stringCentralDir = "PK\001\002" # magic number for central directory -structFileHeader = "<4s2B4HLLL2H" # 12 items, file header record, 30 bytes -stringFileHeader = "PK\003\004" # magic number for file header -structEndArchive64Locator = "<4sLQL" # 4 items, locate Zip64 header, 20 bytes -stringEndArchive64Locator = "PK\x06\x07" # magic token for locator header -structEndArchive64 = "<4sQHHLLQQQQ" # 10 items, end of archive (Zip64), 56 bytes -stringEndArchive64 = "PK\x06\x06" # magic token for Zip64 header - - -# indexes of entries in the central directory structure -_CD_SIGNATURE = 0 -_CD_CREATE_VERSION = 1 -_CD_CREATE_SYSTEM = 2 -_CD_EXTRACT_VERSION = 3 -_CD_EXTRACT_SYSTEM = 4 # is this meaningful? -_CD_FLAG_BITS = 5 -_CD_COMPRESS_TYPE = 6 -_CD_TIME = 7 -_CD_DATE = 8 -_CD_CRC = 9 -_CD_COMPRESSED_SIZE = 10 -_CD_UNCOMPRESSED_SIZE = 11 -_CD_FILENAME_LENGTH = 12 -_CD_EXTRA_FIELD_LENGTH = 13 -_CD_COMMENT_LENGTH = 14 -_CD_DISK_NUMBER_START = 15 -_CD_INTERNAL_FILE_ATTRIBUTES = 16 -_CD_EXTERNAL_FILE_ATTRIBUTES = 17 -_CD_LOCAL_HEADER_OFFSET = 18 - -# indexes of entries in the local file header structure -_FH_SIGNATURE = 0 -_FH_EXTRACT_VERSION = 1 -_FH_EXTRACT_SYSTEM = 2 # is this meaningful? -_FH_GENERAL_PURPOSE_FLAG_BITS = 3 -_FH_COMPRESSION_METHOD = 4 -_FH_LAST_MOD_TIME = 5 -_FH_LAST_MOD_DATE = 6 -_FH_CRC = 7 -_FH_COMPRESSED_SIZE = 8 -_FH_UNCOMPRESSED_SIZE = 9 -_FH_FILENAME_LENGTH = 10 -_FH_EXTRA_FIELD_LENGTH = 11 - -def is_zipfile(filename): - """Quickly see if file is a ZIP file by checking the magic number.""" - try: - fpin = open(filename, "rb") - endrec = _EndRecData(fpin) - fpin.close() - if endrec: - return True # file has correct magic number - except IOError: - pass - return False - -def _EndRecData64(fpin, offset, endrec): - """ - Read the ZIP64 end-of-archive records and use that to update endrec - """ - locatorSize = struct.calcsize(structEndArchive64Locator) - fpin.seek(offset - locatorSize, 2) - data = fpin.read(locatorSize) - sig, diskno, reloff, disks = struct.unpack(structEndArchive64Locator, data) - if sig != stringEndArchive64Locator: - return endrec - - if diskno != 0 or disks != 1: - raise BadZipfile("zipfiles that span multiple disks are not supported") - - # Assume no 'zip64 extensible data' - endArchiveSize = struct.calcsize(structEndArchive64) - fpin.seek(offset - locatorSize - endArchiveSize, 2) - data = fpin.read(endArchiveSize) - sig, sz, create_version, read_version, disk_num, disk_dir, \ - dircount, dircount2, dirsize, diroffset = \ - struct.unpack(structEndArchive64, data) - if sig != stringEndArchive64: - return endrec - - # Update the original endrec using data from the ZIP64 record - endrec[1] = disk_num - endrec[2] = disk_dir - endrec[3] = dircount - endrec[4] = dircount2 - endrec[5] = dirsize - endrec[6] = diroffset - return endrec - - -def _EndRecData(fpin): - """Return data from the "End of Central Directory" record, or None. - - The data is a list of the nine items in the ZIP "End of central dir" - record followed by a tenth item, the file seek offset of this record.""" - fpin.seek(-22, 2) # Assume no archive comment. - filesize = fpin.tell() + 22 # Get file size - data = fpin.read() - if data[0:4] == stringEndArchive and data[-2:] == "\000\000": - endrec = struct.unpack(structEndArchive, data) - endrec = list(endrec) - endrec.append("") # Append the archive comment - endrec.append(filesize - 22) # Append the record start offset - if endrec[-4] == 0xffffffff: - return _EndRecData64(fpin, -22, endrec) - return endrec - # Search the last END_BLOCK bytes of the file for the record signature. - # The comment is appended to the ZIP file and has a 16 bit length. - # So the comment may be up to 64K long. We limit the search for the - # signature to a few Kbytes at the end of the file for efficiency. - # also, the signature must not appear in the comment. - END_BLOCK = min(filesize, 1024 * 4) - fpin.seek(filesize - END_BLOCK, 0) - data = fpin.read() - start = data.rfind(stringEndArchive) - if start >= 0: # Correct signature string was found - endrec = struct.unpack(structEndArchive, data[start:start+22]) - endrec = list(endrec) - comment = data[start+22:] - if endrec[7] == len(comment): # Comment length checks out - # Append the archive comment and start offset - endrec.append(comment) - endrec.append(filesize - END_BLOCK + start) - if endrec[-4] == 0xffffffff: - return _EndRecData64(fpin, - END_BLOCK + start, endrec) - return endrec - return # Error, return None - - -class ZipInfo (object): - """Class with attributes describing each file in the ZIP archive.""" - - __slots__ = ( - 'orig_filename', - 'filename', - 'date_time', - 'compress_type', - 'comment', - 'extra', - 'create_system', - 'create_version', - 'extract_version', - 'reserved', - 'flag_bits', - 'volume', - 'internal_attr', - 'external_attr', - 'header_offset', - 'CRC', - 'compress_size', - 'file_size', - '_raw_time', - ) - - def __init__(self, filename="NoName", date_time=(1980,1,1,0,0,0)): - self.orig_filename = filename # Original file name in archive - - # Terminate the file name at the first null byte. Null bytes in file - # names are used as tricks by viruses in archives. - null_byte = filename.find(chr(0)) - if null_byte >= 0: - filename = filename[0:null_byte] - # This is used to ensure paths in generated ZIP files always use - # forward slashes as the directory separator, as required by the - # ZIP format specification. - if os.sep != "/" and os.sep in filename: - filename = filename.replace(os.sep, "/") - - self.filename = filename # Normalized file name - self.date_time = date_time # year, month, day, hour, min, sec - # Standard values: - self.compress_type = ZIP_STORED # Type of compression for the file - self.comment = "" # Comment for each file - self.extra = "" # ZIP extra data - if sys.platform == 'win32': - self.create_system = 0 # System which created ZIP archive - else: - # Assume everything else is unix-y - self.create_system = 3 # System which created ZIP archive - self.create_version = 20 # Version which created ZIP archive - self.extract_version = 20 # Version needed to extract archive - self.reserved = 0 # Must be zero - self.flag_bits = 0 # ZIP flag bits - self.volume = 0 # Volume number of file header - self.internal_attr = 0 # Internal attributes - self.external_attr = 0 # External file attributes - # Other attributes are set by class ZipFile: - # header_offset Byte offset to the file header - # CRC CRC-32 of the uncompressed file - # compress_size Size of the compressed file - # file_size Size of the uncompressed file - - def FileHeader(self): - """Return the per-file header as a string.""" - dt = self.date_time - dosdate = (dt[0] - 1980) << 9 | dt[1] << 5 | dt[2] - dostime = dt[3] << 11 | dt[4] << 5 | (dt[5] // 2) - if self.flag_bits & 0x08: - # Set these to zero because we write them after the file data - CRC = compress_size = file_size = 0 - else: - CRC = self.CRC - compress_size = self.compress_size - file_size = self.file_size - - extra = self.extra - - if file_size > ZIP64_LIMIT or compress_size > ZIP64_LIMIT: - # File is larger than what fits into a 4 byte integer, - # fall back to the ZIP64 extension - fmt = '= 24: - counts = unpack('> 1) & 0x7FFFFFFF) ^ poly - else: - crc = ((crc >> 1) & 0x7FFFFFFF) - table[i] = crc - return table - crctable = _GenerateCRCTable() - - def _crc32(self, ch, crc): - """Compute the CRC32 primitive on one byte.""" - return ((crc >> 8) & 0xffffff) ^ self.crctable[(crc ^ ord(ch)) & 0xff] - - def __init__(self, pwd): - self.key0 = 305419896 - self.key1 = 591751049 - self.key2 = 878082192 - for p in pwd: - self._UpdateKeys(p) - - def _UpdateKeys(self, c): - self.key0 = self._crc32(c, self.key0) - self.key1 = (self.key1 + (self.key0 & 255)) & 4294967295 - self.key1 = (self.key1 * 134775813 + 1) & 4294967295 - self.key2 = self._crc32(chr((self.key1 >> 24) & 255), self.key2) - - def __call__(self, c): - """Decrypt a single character.""" - c = ord(c) - k = self.key2 | 2 - c = c ^ (((k * (k^1)) >> 8) & 255) - c = chr(c) - self._UpdateKeys(c) - return c - -class ZipExtFile: - """File-like object for reading an archive member. - Is returned by ZipFile.open(). - """ - - def __init__(self, fileobj, zipinfo, decrypt=None): - self.fileobj = fileobj - self.decrypter = decrypt - self.bytes_read = 0L - self.rawbuffer = '' - self.readbuffer = '' - self.linebuffer = '' - self.eof = False - self.univ_newlines = False - self.nlSeps = ("\n", ) - self.lastdiscard = '' - - self.compress_type = zipinfo.compress_type - self.compress_size = zipinfo.compress_size - - self.closed = False - self.mode = "r" - self.name = zipinfo.filename - - # read from compressed files in 64k blocks - self.compreadsize = 64*1024 - if self.compress_type == ZIP_DEFLATED: - self.dc = zlib.decompressobj(-15) - - def set_univ_newlines(self, univ_newlines): - self.univ_newlines = univ_newlines - - # pick line separator char(s) based on universal newlines flag - self.nlSeps = ("\n", ) - if self.univ_newlines: - self.nlSeps = ("\r\n", "\r", "\n") - - def __iter__(self): - return self - - def next(self): - nextline = self.readline() - if not nextline: - raise StopIteration() - - return nextline - - def close(self): - self.closed = True - - def _checkfornewline(self): - nl, nllen = -1, -1 - if self.linebuffer: - # ugly check for cases where half of an \r\n pair was - # read on the last pass, and the \r was discarded. In this - # case we just throw away the \n at the start of the buffer. - if (self.lastdiscard, self.linebuffer[0]) == ('\r','\n'): - self.linebuffer = self.linebuffer[1:] - - for sep in self.nlSeps: - nl = self.linebuffer.find(sep) - if nl >= 0: - nllen = len(sep) - return nl, nllen - - return nl, nllen - - def readline(self, size = -1): - """Read a line with approx. size. If size is negative, - read a whole line. - """ - if size < 0: - size = sys.maxint - elif size == 0: - return '' - - # check for a newline already in buffer - nl, nllen = self._checkfornewline() - - if nl >= 0: - # the next line was already in the buffer - nl = min(nl, size) - else: - # no line break in buffer - try to read more - size -= len(self.linebuffer) - while nl < 0 and size > 0: - buf = self.read(min(size, 100)) - if not buf: - break - self.linebuffer += buf - size -= len(buf) - - # check for a newline in buffer - nl, nllen = self._checkfornewline() - - # we either ran out of bytes in the file, or - # met the specified size limit without finding a newline, - # so return current buffer - if nl < 0: - s = self.linebuffer - self.linebuffer = '' - return s - - buf = self.linebuffer[:nl] - self.lastdiscard = self.linebuffer[nl:nl + nllen] - self.linebuffer = self.linebuffer[nl + nllen:] - - # line is always returned with \n as newline char (except possibly - # for a final incomplete line in the file, which is handled above). - return buf + "\n" - - def readlines(self, sizehint = -1): - """Return a list with all (following) lines. The sizehint parameter - is ignored in this implementation. - """ - result = [] - while True: - line = self.readline() - if not line: break - result.append(line) - return result - - def read(self, size = None): - # act like file() obj and return empty string if size is 0 - if size == 0: - return '' - - # determine read size - bytesToRead = self.compress_size - self.bytes_read - - # adjust read size for encrypted files since the first 12 bytes - # are for the encryption/password information - if self.decrypter is not None: - bytesToRead -= 12 - - if size is not None and size >= 0: - if self.compress_type == ZIP_STORED: - lr = len(self.readbuffer) - bytesToRead = min(bytesToRead, size - lr) - elif self.compress_type == ZIP_DEFLATED: - if len(self.readbuffer) > size: - # the user has requested fewer bytes than we've already - # pulled through the decompressor; don't read any more - bytesToRead = 0 - else: - # user will use up the buffer, so read some more - lr = len(self.rawbuffer) - bytesToRead = min(bytesToRead, self.compreadsize - lr) - - # avoid reading past end of file contents - if bytesToRead + self.bytes_read > self.compress_size: - bytesToRead = self.compress_size - self.bytes_read - - # try to read from file (if necessary) - if bytesToRead > 0: - bytes = self.fileobj.read(bytesToRead) - self.bytes_read += len(bytes) - self.rawbuffer += bytes - - # handle contents of raw buffer - if self.rawbuffer: - newdata = self.rawbuffer - self.rawbuffer = '' - - # decrypt new data if we were given an object to handle that - if newdata and self.decrypter is not None: - newdata = ''.join(map(self.decrypter, newdata)) - - # decompress newly read data if necessary - if newdata and self.compress_type == ZIP_DEFLATED: - newdata = self.dc.decompress(newdata) - self.rawbuffer = self.dc.unconsumed_tail - if self.eof and len(self.rawbuffer) == 0: - # we're out of raw bytes (both from the file and - # the local buffer); flush just to make sure the - # decompressor is done - newdata += self.dc.flush() - # prevent decompressor from being used again - self.dc = None - - self.readbuffer += newdata - - - # return what the user asked for - if size is None or len(self.readbuffer) <= size: - bytes = self.readbuffer - self.readbuffer = '' - else: - bytes = self.readbuffer[:size] - self.readbuffer = self.readbuffer[size:] - - return bytes - - -class ZipFile: - """ Class with methods to open, read, write, close, list zip files. - - z = ZipFile(file, mode="r", compression=ZIP_STORED, allowZip64=True) - - @var file: Either the path to the file, or a file-like object. - If it is a path, the file will be opened and closed by ZipFile. - @var mode: The mode can be either read "r", write "w" or append "a". - @var compression: ZIP_STORED (no compression) or ZIP_DEFLATED (requires zlib). - @var allowZip64: if True ZipFile will create files with ZIP64 extensions when - needed, otherwise it will raise an exception when this would - be necessary. - - """ - - fp = None # Set here since __del__ checks it - - def __init__(self, file, mode="r", compression=ZIP_STORED, allowZip64=False): - """Open the ZIP file with mode read "r", write "w" or append "a".""" - if mode not in ("r", "w", "a"): - raise RuntimeError('ZipFile() requires mode "r", "w", or "a"') - - if compression == ZIP_STORED: - pass - elif compression == ZIP_DEFLATED: - if not zlib: - raise RuntimeError,\ - "Compression requires the (missing) zlib module" - else: - raise RuntimeError, "That compression method is not supported" - - self._allowZip64 = allowZip64 - self._didModify = False - self.debug = 0 # Level of printing: 0 through 3 - self.NameToInfo = {} # Find file info given name - self.filelist = [] # List of ZipInfo instances for archive - self.compression = compression # Method of compression - self.mode = key = mode.replace('b', '')[0] - self.pwd = None - - # Check if we were passed a file-like object - if isinstance(file, basestring): - self._filePassed = 0 - self.filename = file - modeDict = {'r' : 'rb', 'w': 'wb', 'a' : 'r+b'} - try: - self.fp = open(file, modeDict[mode]) - except IOError: - if mode == 'a': - mode = key = 'w' - self.fp = open(file, modeDict[mode]) - else: - raise - else: - self._filePassed = 1 - self.fp = file - self.filename = getattr(file, 'name', None) - - if key == 'r': - self._GetContents() - elif key == 'w': - pass - elif key == 'a': - try: # See if file is a zip file - self._RealGetContents() - # seek to start of directory and overwrite - self.fp.seek(self.start_dir, 0) - except BadZipfile: # file is not a zip file, just append - self.fp.seek(0, 2) - else: - if not self._filePassed: - self.fp.close() - self.fp = None - raise RuntimeError, 'Mode must be "r", "w" or "a"' - - def _GetContents(self): - """Read the directory, making sure we close the file if the format - is bad.""" - try: - self._RealGetContents() - except BadZipfile: - if not self._filePassed: - self.fp.close() - self.fp = None - raise - - def _RealGetContents(self): - """Read in the table of contents for the ZIP file.""" - fp = self.fp - endrec = _EndRecData(fp) - if not endrec: - raise BadZipfile, "File is not a zip file" - if self.debug > 1: - print endrec - size_cd = endrec[5] # bytes in central directory - offset_cd = endrec[6] # offset of central directory - self.comment = endrec[8] # archive comment - # endrec[9] is the offset of the "End of Central Dir" record - if endrec[9] > ZIP64_LIMIT: - x = endrec[9] - size_cd - 56 - 20 - else: - x = endrec[9] - size_cd - # "concat" is zero, unless zip was concatenated to another file - concat = x - offset_cd - if self.debug > 2: - print "given, inferred, offset", offset_cd, x, concat - # self.start_dir: Position of start of central directory - self.start_dir = offset_cd + concat - fp.seek(self.start_dir, 0) - data = fp.read(size_cd) - fp = cStringIO.StringIO(data) - total = 0 - while total < size_cd: - centdir = fp.read(46) - total = total + 46 - if centdir[0:4] != stringCentralDir: - raise BadZipfile, "Bad magic number for central directory" - centdir = struct.unpack(structCentralDir, centdir) - if self.debug > 2: - print centdir - filename = fp.read(centdir[_CD_FILENAME_LENGTH]) - # Create ZipInfo instance to store file information - x = ZipInfo(filename) - x.extra = fp.read(centdir[_CD_EXTRA_FIELD_LENGTH]) - x.comment = fp.read(centdir[_CD_COMMENT_LENGTH]) - total = (total + centdir[_CD_FILENAME_LENGTH] - + centdir[_CD_EXTRA_FIELD_LENGTH] - + centdir[_CD_COMMENT_LENGTH]) - x.header_offset = centdir[_CD_LOCAL_HEADER_OFFSET] - (x.create_version, x.create_system, x.extract_version, x.reserved, - x.flag_bits, x.compress_type, t, d, - x.CRC, x.compress_size, x.file_size) = centdir[1:12] - x.volume, x.internal_attr, x.external_attr = centdir[15:18] - # Convert date/time code to (year, month, day, hour, min, sec) - x._raw_time = t - x.date_time = ( (d>>9)+1980, (d>>5)&0xF, d&0x1F, - t>>11, (t>>5)&0x3F, (t&0x1F) * 2 ) - - x._decodeExtra() - x.header_offset = x.header_offset + concat - self.filelist.append(x) - self.NameToInfo[x.filename] = x - if self.debug > 2: - print "total", total - - - def namelist(self): - """Return a list of file names in the archive.""" - l = [] - for data in self.filelist: - l.append(data.filename) - return l - - def infolist(self): - """Return a list of class ZipInfo instances for files in the - archive.""" - return self.filelist - - def printdir(self): - """Print a table of contents for the zip file.""" - print "%-46s %19s %12s" % ("File Name", "Modified ", "Size") - for zinfo in self.filelist: - date = "%d-%02d-%02d %02d:%02d:%02d" % zinfo.date_time[:6] - print "%-46s %s %12d" % (zinfo.filename, date, zinfo.file_size) - - def testzip(self): - """Read all the files and check the CRC.""" - for zinfo in self.filelist: - try: - self.read(zinfo.filename) # Check CRC-32 - except BadZipfile: - return zinfo.filename - - - def getinfo(self, name): - """Return the instance of ZipInfo given 'name'.""" - info = self.NameToInfo.get(name) - if info is None: - raise KeyError( - 'There is no item named %r in the archive' % name) - - return info - - def setpassword(self, pwd): - """Set default password for encrypted files.""" - self.pwd = pwd - - def read(self, name, pwd=None): - """Return file bytes (as a string) for name.""" - return self.open(name, "r", pwd).read() - - def open(self, name, mode="r", pwd=None): - """Return file-like object for 'name'.""" - if mode not in ("r", "U", "rU"): - raise RuntimeError, 'open() requires mode "r", "U", or "rU"' - if not self.fp: - raise RuntimeError, \ - "Attempt to read ZIP archive that was already closed" - - # Only open a new file for instances where we were not - # given a file object in the constructor - if self._filePassed: - zef_file = self.fp - else: - zef_file = open(self.filename, 'rb') - - # Get info object for name - zinfo = self.getinfo(name) - - filepos = zef_file.tell() - - zef_file.seek(zinfo.header_offset, 0) - - # Skip the file header: - fheader = zef_file.read(30) - if fheader[0:4] != stringFileHeader: - raise BadZipfile, "Bad magic number for file header" - - fheader = struct.unpack(structFileHeader, fheader) - fname = zef_file.read(fheader[_FH_FILENAME_LENGTH]) - if fheader[_FH_EXTRA_FIELD_LENGTH]: - zef_file.read(fheader[_FH_EXTRA_FIELD_LENGTH]) - - if fname != zinfo.orig_filename: - raise BadZipfile, \ - 'File name in directory "%s" and header "%s" differ.' % ( - zinfo.orig_filename, fname) - - # check for encrypted flag & handle password - is_encrypted = zinfo.flag_bits & 0x1 - zd = None - if is_encrypted: - if not pwd: - pwd = self.pwd - if not pwd: - raise RuntimeError, "File %s is encrypted, " \ - "password required for extraction" % name - - zd = _ZipDecrypter(pwd) - # The first 12 bytes in the cypher stream is an encryption header - # used to strengthen the algorithm. The first 11 bytes are - # completely random, while the 12th contains the MSB of the CRC, - # or the MSB of the file time depending on the header type - # and is used to check the correctness of the password. - bytes = zef_file.read(12) - h = map(zd, bytes[0:12]) - if zinfo.flag_bits & 0x8: - # compare against the file type from extended local headers - check_byte = (zinfo._raw_time >> 8) & 0xff - else: - # compare against the CRC otherwise - check_byte = (zinfo.CRC >> 24) & 0xff - if ord(h[11]) != check_byte: - raise RuntimeError("Bad password for file", name) - - # build and return a ZipExtFile - if zd is None: - zef = ZipExtFile(zef_file, zinfo) - else: - zef = ZipExtFile(zef_file, zinfo, zd) - - # set universal newlines on ZipExtFile if necessary - if "U" in mode: - zef.set_univ_newlines(True) - return zef - - def extract(self, member, path=None, pwd=None): - """Extract a member from the archive to the current working directory, - using its full name. Its file information is extracted as accurately - as possible. `member' may be a filename or a ZipInfo object. You can - specify a different directory using `path'. - """ - if not isinstance(member, ZipInfo): - member = self.getinfo(member) - - if path is None: - path = os.getcwd() - - return self._extract_member(member, path, pwd) - - def extractall(self, path=None, members=None, pwd=None): - """Extract all members from the archive to the current working - directory. `path' specifies a different directory to extract to. - `members' is optional and must be a subset of the list returned - by namelist(). - """ - if members is None: - members = self.namelist() - - for zipinfo in members: - self.extract(zipinfo, path, pwd) - - def _extract_member(self, member, targetpath, pwd): - """Extract the ZipInfo object 'member' to a physical - file on the path targetpath. - """ - # build the destination pathname, replacing - # forward slashes to platform specific separators. - if targetpath[-1:] == "/": - targetpath = targetpath[:-1] - - # don't include leading "/" from file name if present - if os.path.isabs(member.filename): - targetpath = os.path.join(targetpath, member.filename[1:]) - else: - targetpath = os.path.join(targetpath, member.filename) - - targetpath = os.path.normpath(targetpath) - - # Create all upper directories if necessary. - upperdirs = os.path.dirname(targetpath) - if upperdirs and not os.path.exists(upperdirs): - os.makedirs(upperdirs) - - source = self.open(member.filename, pwd=pwd) - target = file(targetpath, "wb") - shutil.copyfileobj(source, target) - source.close() - target.close() - - return targetpath - - def _writecheck(self, zinfo): - """Check for errors before writing a file to the archive.""" - if zinfo.filename in self.NameToInfo: - if self.debug: # Warning for duplicate names - print "Duplicate name:", zinfo.filename - if self.mode not in ("w", "a"): - raise RuntimeError, 'write() requires mode "w" or "a"' - if not self.fp: - raise RuntimeError, \ - "Attempt to write ZIP archive that was already closed" - if zinfo.compress_type == ZIP_DEFLATED and not zlib: - raise RuntimeError, \ - "Compression requires the (missing) zlib module" - if zinfo.compress_type not in (ZIP_STORED, ZIP_DEFLATED): - raise RuntimeError, \ - "That compression method is not supported" - if zinfo.file_size > ZIP64_LIMIT: - if not self._allowZip64: - raise LargeZipFile("Filesize would require ZIP64 extensions") - if zinfo.header_offset > ZIP64_LIMIT: - if not self._allowZip64: - raise LargeZipFile("Zipfile size would require ZIP64 extensions") - - def write(self, filename, arcname=None, compress_type=None): - """Put the bytes from filename into the archive under the name - arcname.""" - if not self.fp: - raise RuntimeError( - "Attempt to write to ZIP archive that was already closed") - - st = os.stat(filename) - mtime = time.localtime(st.st_mtime) - date_time = mtime[0:6] - # Create ZipInfo instance to store file information - if arcname is None: - arcname = filename - arcname = os.path.normpath(os.path.splitdrive(arcname)[1]) - while arcname[0] in (os.sep, os.altsep): - arcname = arcname[1:] - zinfo = ZipInfo(arcname, date_time) - zinfo.external_attr = (st[0] & 0xFFFF) << 16L # Unix attributes - if compress_type is None: - zinfo.compress_type = self.compression - else: - zinfo.compress_type = compress_type - - zinfo.file_size = st.st_size - zinfo.flag_bits = 0x00 - zinfo.header_offset = self.fp.tell() # Start of header bytes - - self._writecheck(zinfo) - self._didModify = True - fp = open(filename, "rb") - # Must overwrite CRC and sizes with correct data later - zinfo.CRC = CRC = 0 - zinfo.compress_size = compress_size = 0 - zinfo.file_size = file_size = 0 - self.fp.write(zinfo.FileHeader()) - if zinfo.compress_type == ZIP_DEFLATED: - cmpr = zlib.compressobj(zlib.Z_DEFAULT_COMPRESSION, - zlib.DEFLATED, -15) - else: - cmpr = None - while 1: - buf = fp.read(1024 * 8) - if not buf: - break - file_size = file_size + len(buf) - CRC = crc32(buf, CRC) & 0xffffffff - if cmpr: - buf = cmpr.compress(buf) - compress_size = compress_size + len(buf) - self.fp.write(buf) - fp.close() - if cmpr: - buf = cmpr.flush() - compress_size = compress_size + len(buf) - self.fp.write(buf) - zinfo.compress_size = compress_size - else: - zinfo.compress_size = file_size - zinfo.CRC = CRC - zinfo.file_size = file_size - # Seek backwards and write CRC and file sizes - position = self.fp.tell() # Preserve current position in file - self.fp.seek(zinfo.header_offset + 14, 0) - self.fp.write(struct.pack(" ZIP64_LIMIT \ - or zinfo.compress_size > ZIP64_LIMIT: - extra.append(zinfo.file_size) - extra.append(zinfo.compress_size) - file_size = 0xffffffff #-1 - compress_size = 0xffffffff #-1 - else: - file_size = zinfo.file_size - compress_size = zinfo.compress_size - - if zinfo.header_offset > ZIP64_LIMIT: - extra.append(zinfo.header_offset) - header_offset = 0xffffffffL # -1 32 bit - else: - header_offset = zinfo.header_offset - - extra_data = zinfo.extra - if extra: - # Append a ZIP64 field to the extra's - extra_data = struct.pack( - '>sys.stderr, (structCentralDir, - stringCentralDir, create_version, - zinfo.create_system, extract_version, zinfo.reserved, - zinfo.flag_bits, zinfo.compress_type, dostime, dosdate, - zinfo.CRC, compress_size, file_size, - len(zinfo.filename), len(extra_data), len(zinfo.comment), - 0, zinfo.internal_attr, zinfo.external_attr, - header_offset) - raise - self.fp.write(centdir) - self.fp.write(zinfo.filename) - self.fp.write(extra_data) - self.fp.write(zinfo.comment) - - pos2 = self.fp.tell() - # Write end-of-zip-archive record - if pos1 > ZIP64_LIMIT: - # Need to write the ZIP64 end-of-archive records - zip64endrec = struct.pack( - structEndArchive64, stringEndArchive64, - 44, 45, 45, 0, 0, count, count, pos2 - pos1, pos1) - self.fp.write(zip64endrec) - - zip64locrec = struct.pack( - structEndArchive64Locator, - stringEndArchive64Locator, 0, pos2, 1) - self.fp.write(zip64locrec) - - endrec = struct.pack(structEndArchive, stringEndArchive, - 0, 0, count, count, pos2 - pos1, 0xffffffffL, 0) - self.fp.write(endrec) - - else: - endrec = struct.pack(structEndArchive, stringEndArchive, - 0, 0, count, count, pos2 - pos1, pos1, 0) - self.fp.write(endrec) - self.fp.flush() - if not self._filePassed: - self.fp.close() - self.fp = None - - -class PyZipFile(ZipFile): - """Class to create ZIP archives with Python library files and packages.""" - - def writepy(self, pathname, basename = ""): - """Add all files from "pathname" to the ZIP archive. - - If pathname is a package directory, search the directory and - all package subdirectories recursively for all *.py and enter - the modules into the archive. If pathname is a plain - directory, listdir *.py and enter all modules. Else, pathname - must be a Python *.py file and the module will be put into the - archive. Added modules are always module.pyo or module.pyc. - This method will compile the module.py into module.pyc if - necessary. - """ - dir, name = os.path.split(pathname) - if os.path.isdir(pathname): - initname = os.path.join(pathname, "__init__.py") - if os.path.isfile(initname): - # This is a package directory, add it - if basename: - basename = "%s/%s" % (basename, name) - else: - basename = name - if self.debug: - print "Adding package in", pathname, "as", basename - fname, arcname = self._get_codename(initname[0:-3], basename) - if self.debug: - print "Adding", arcname - self.write(fname, arcname) - dirlist = os.listdir(pathname) - dirlist.remove("__init__.py") - # Add all *.py files and package subdirectories - for filename in dirlist: - path = os.path.join(pathname, filename) - root, ext = os.path.splitext(filename) - if os.path.isdir(path): - if os.path.isfile(os.path.join(path, "__init__.py")): - # This is a package directory, add it - self.writepy(path, basename) # Recursive call - elif ext == ".py": - fname, arcname = self._get_codename(path[0:-3], - basename) - if self.debug: - print "Adding", arcname - self.write(fname, arcname) - else: - # This is NOT a package directory, add its files at top level - if self.debug: - print "Adding files from directory", pathname - for filename in os.listdir(pathname): - path = os.path.join(pathname, filename) - root, ext = os.path.splitext(filename) - if ext == ".py": - fname, arcname = self._get_codename(path[0:-3], - basename) - if self.debug: - print "Adding", arcname - self.write(fname, arcname) - else: - if pathname[-3:] != ".py": - raise RuntimeError, \ - 'Files added with writepy() must end with ".py"' - fname, arcname = self._get_codename(pathname[0:-3], basename) - if self.debug: - print "Adding file", arcname - self.write(fname, arcname) - - def _get_codename(self, pathname, basename): - """Return (filename, archivename) for the path. - - Given a module name path, return the correct file path and - archive name, compiling if necessary. For example, given - /python/lib/string, return (/python/lib/string.pyc, string). - """ - file_py = pathname + ".py" - file_pyc = pathname + ".pyc" - file_pyo = pathname + ".pyo" - if os.path.isfile(file_pyo) and \ - os.stat(file_pyo).st_mtime >= os.stat(file_py).st_mtime: - fname = file_pyo # Use .pyo file - elif not os.path.isfile(file_pyc) or \ - os.stat(file_pyc).st_mtime < os.stat(file_py).st_mtime: - import py_compile - if self.debug: - print "Compiling", file_py - try: - py_compile.compile(file_py, file_pyc, None, True) - except py_compile.PyCompileError,err: - print err.msg - fname = file_pyc - else: - fname = file_pyc - archivename = os.path.split(fname)[1] - if basename: - archivename = "%s/%s" % (basename, archivename) - return (fname, archivename) - - -def main(args = None): - import textwrap - USAGE=textwrap.dedent("""\ - Usage: - zipfile.py -l zipfile.zip # Show listing of a zipfile - zipfile.py -t zipfile.zip # Test if a zipfile is valid - zipfile.py -e zipfile.zip target # Extract zipfile into target dir - zipfile.py -c zipfile.zip src ... # Create zipfile from sources - """) - if args is None: - args = sys.argv[1:] - - if not args or args[0] not in ('-l', '-c', '-e', '-t'): - print USAGE - sys.exit(1) - - if args[0] == '-l': - if len(args) != 2: - print USAGE - sys.exit(1) - zf = ZipFile(args[1], 'r') - zf.printdir() - zf.close() - - elif args[0] == '-t': - if len(args) != 2: - print USAGE - sys.exit(1) - zf = ZipFile(args[1], 'r') - zf.testzip() - print "Done testing" - - elif args[0] == '-e': - if len(args) != 3: - print USAGE - sys.exit(1) - - zf = ZipFile(args[1], 'r') - out = args[2] - for path in zf.namelist(): - if path.startswith('./'): - tgt = os.path.join(out, path[2:]) - else: - tgt = os.path.join(out, path) - - tgtdir = os.path.dirname(tgt) - if not os.path.exists(tgtdir): - os.makedirs(tgtdir) - fp = open(tgt, 'wb') - fp.write(zf.read(path)) - fp.close() - zf.close() - - elif args[0] == '-c': - if len(args) < 3: - print USAGE - sys.exit(1) - - def addToZip(zf, path, zippath): - if os.path.isfile(path): - zf.write(path, zippath, ZIP_DEFLATED) - elif os.path.isdir(path): - for nm in os.listdir(path): - addToZip(zf, - os.path.join(path, nm), os.path.join(zippath, nm)) - # else: ignore - - zf = ZipFile(args[1], 'w', allowZip64=True) - for src in args[2:]: - addToZip(zf, src, os.path.basename(src)) - - zf.close() - -if __name__ == "__main__": - main() diff --git a/obitools/zipfile.pyc b/obitools/zipfile.pyc deleted file mode 100644 index 35dace0..0000000 Binary files a/obitools/zipfile.pyc and /dev/null differ