diff --git a/obitools/SVGdraw.py b/obitools/SVGdraw.py
deleted file mode 100644
index 521f750..0000000
--- a/obitools/SVGdraw.py
+++ /dev/null
@@ -1,1054 +0,0 @@
-#!/usr/bin/env python
-##Copyright (c) 2002, Fedor Baart & Hans de Wit (Stichting Farmaceutische Kengetallen)
-##All rights reserved.
-##
-##Redistribution and use in source and binary forms, with or without modification,
-##are permitted provided that the following conditions are met:
-##
-##Redistributions of source code must retain the above copyright notice, this
-##list of conditions and the following disclaimer.
-##
-##Redistributions in binary form must reproduce the above copyright notice,
-##this list of conditions and the following disclaimer in the documentation and/or
-##other materials provided with the distribution.
-##
-##Neither the name of the Stichting Farmaceutische Kengetallen nor the names of
-##its contributors may be used to endorse or promote products derived from this
-##software without specific prior written permission.
-##
-##THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
-##AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-##IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
-##DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE
-##FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
-##DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
-##SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
-##CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
-##OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-##OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-##Thanks to Gerald Rosennfellner for his help and useful comments.
-
-__doc__="""Use SVGdraw to generate your SVGdrawings.
-
-SVGdraw uses an object model drawing and a method toXML to create SVG graphics
-by using easy to use classes and methods usualy you start by creating a drawing eg
-
- d=drawing()
- #then you create a SVG root element
- s=svg()
- #then you add some elements eg a circle and add it to the svg root element
- c=circle()
- #you can supply attributes by using named arguments.
- c=circle(fill='red',stroke='blue')
- #or by updating the attributes attribute:
- c.attributes['stroke-width']=1
- s.addElement(c)
- #then you add the svg root element to the drawing
- d.setSVG(s)
- #and finaly you xmlify the drawing
- d.toXml()
-
-
-this results in the svg source of the drawing, which consists of a circle
-on a white background. Its as easy as that;)
-This module was created using the SVG specification of www.w3c.org and the
-O'Reilly (www.oreilly.com) python books as information sources. A svg viewer
-is available from www.adobe.com"""
-
-__version__="1.0"
-
-# there are two possibilities to generate svg:
-# via a dom implementation and directly using text strings
-# the latter is way faster (and shorter in coding)
-# the former is only used in debugging svg programs
-# maybe it will be removed alltogether after a while
-# with the following variable you indicate whether to use the dom implementation
-# Note that PyXML is required for using the dom implementation.
-# It is also possible to use the standard minidom. But I didn't try that one.
-# Anyway the text based approach is about 60 times faster than using the full dom implementation.
-use_dom_implementation=0
-
-
-import exceptions
-if use_dom_implementation<>0:
- try:
- from xml.dom import implementation
- from xml.dom.ext import PrettyPrint
- except:
- raise exceptions.ImportError, "PyXML is required for using the dom implementation"
-#The implementation is used for the creating the XML document.
-#The prettyprint module is used for converting the xml document object to a xml file
-
-import sys
-assert sys.version_info[0]>=2
-if sys.version_info[1]<2:
- True=1
- False=0
- file=open
-
-sys.setrecursionlimit=50
-#The recursion limit is set conservative so mistakes like s=svg() s.addElement(s)
-#won't eat up too much processor time.
-
-#the following code is pasted form xml.sax.saxutils
-#it makes it possible to run the code without the xml sax package installed
-#To make it possible to have in your text elements, it is necessary to escape the texts
-def _escape(data, entities={}):
- """Escape &, <, and > in a string of data.
-
- You can escape other strings of data by passing a dictionary as
- the optional entities parameter. The keys and values must all be
- strings; each key will be replaced with its corresponding value.
- """
- data = data.replace("&", "&")
- data = data.replace("<", "<")
- data = data.replace(">", ">")
- for chars, entity in entities.items():
- data = data.replace(chars, entity)
- return data
-
-def _quoteattr(data, entities={}):
- """Escape and quote an attribute value.
-
- Escape &, <, and > in a string of data, then quote it for use as
- an attribute value. The \" character will be escaped as well, if
- necessary.
-
- You can escape other strings of data by passing a dictionary as
- the optional entities parameter. The keys and values must all be
- strings; each key will be replaced with its corresponding value.
- """
- data = _escape(data, entities)
- if '"' in data:
- if "'" in data:
- data = '"%s"' % data.replace('"', """)
- else:
- data = "'%s'" % data
- else:
- data = '"%s"' % data
- return data
-
-
-
-def _xypointlist(a):
- """formats a list of xy pairs"""
- s=''
- for e in a: #this could be done more elegant
- s+=str(e)[1:-1] +' '
- return s
-
-def _viewboxlist(a):
- """formats a tuple"""
- s=''
- for e in a:
- s+=str(e)+' '
- return s
-
-def _pointlist(a):
- """formats a list of numbers"""
- return str(a)[1:-1]
-
-class pathdata:
- """class used to create a pathdata object which can be used for a path.
- although most methods are pretty straightforward it might be useful to look at the SVG specification."""
- #I didn't test the methods below.
- def __init__(self,x=None,y=None):
- self.path=[]
- if x is not None and y is not None:
- self.path.append('M '+str(x)+' '+str(y))
- def closepath(self):
- """ends the path"""
- self.path.append('z')
- def move(self,x,y):
- """move to absolute"""
- self.path.append('M '+str(x)+' '+str(y))
- def relmove(self,x,y):
- """move to relative"""
- self.path.append('m '+str(x)+' '+str(y))
- def line(self,x,y):
- """line to absolute"""
- self.path.append('L '+str(x)+' '+str(y))
- def relline(self,x,y):
- """line to relative"""
- self.path.append('l '+str(x)+' '+str(y))
- def hline(self,x):
- """horizontal line to absolute"""
- self.path.append('H'+str(x))
- def relhline(self,x):
- """horizontal line to relative"""
- self.path.append('h'+str(x))
- def vline(self,y):
- """verical line to absolute"""
- self.path.append('V'+str(y))
- def relvline(self,y):
- """vertical line to relative"""
- self.path.append('v'+str(y))
- def bezier(self,x1,y1,x2,y2,x,y):
- """bezier with xy1 and xy2 to xy absolut"""
- self.path.append('C'+str(x1)+','+str(y1)+' '+str(x2)+','+str(y2)+' '+str(x)+','+str(y))
- def relbezier(self,x1,y1,x2,y2,x,y):
- """bezier with xy1 and xy2 to xy relative"""
- self.path.append('c'+str(x1)+','+str(y1)+' '+str(x2)+','+str(y2)+' '+str(x)+','+str(y))
- def smbezier(self,x2,y2,x,y):
- """smooth bezier with xy2 to xy absolut"""
- self.path.append('S'+str(x2)+','+str(y2)+' '+str(x)+','+str(y))
- def relsmbezier(self,x2,y2,x,y):
- """smooth bezier with xy2 to xy relative"""
- self.path.append('s'+str(x2)+','+str(y2)+' '+str(x)+','+str(y))
- def qbezier(self,x1,y1,x,y):
- """quadratic bezier with xy1 to xy absolut"""
- self.path.append('Q'+str(x1)+','+str(y1)+' '+str(x)+','+str(y))
- def relqbezier(self,x1,y1,x,y):
- """quadratic bezier with xy1 to xy relative"""
- self.path.append('q'+str(x1)+','+str(y1)+' '+str(x)+','+str(y))
- def smqbezier(self,x,y):
- """smooth quadratic bezier to xy absolut"""
- self.path.append('T'+str(x)+','+str(y))
- def relsmqbezier(self,x,y):
- """smooth quadratic bezier to xy relative"""
- self.path.append('t'+str(x)+','+str(y))
- def ellarc(self,rx,ry,xrot,laf,sf,x,y):
- """elliptival arc with rx and ry rotating with xrot using large-arc-flag and sweep-flag to xy absolut"""
- self.path.append('A'+str(rx)+','+str(ry)+' '+str(xrot)+' '+str(laf)+' '+str(sf)+' '+str(x)+' '+str(y))
- def relellarc(self,rx,ry,xrot,laf,sf,x,y):
- """elliptival arc with rx and ry rotating with xrot using large-arc-flag and sweep-flag to xy relative"""
- self.path.append('a'+str(rx)+','+str(ry)+' '+str(xrot)+' '+str(laf)+' '+str(sf)+' '+str(x)+' '+str(y))
- def __repr__(self):
- return ' '.join(self.path)
-
-
-
-
-class SVGelement:
- """SVGelement(type,attributes,elements,text,namespace,**args)
- Creates a arbitrary svg element and is intended to be subclassed not used on its own.
- This element is the base of every svg element it defines a class which resembles
- a xml-element. The main advantage of this kind of implementation is that you don't
- have to create a toXML method for every different graph object. Every element
- consists of a type, attribute, optional subelements, optional text and an optional
- namespace. Note the elements==None, if elements = None:self.elements=[] construction.
- This is done because if you default to elements=[] every object has a reference
- to the same empty list."""
- def __init__(self,type='',attributes=None,elements=None,text='',namespace='',cdata=None,**args):
- self.type=type
- if attributes==None:
- self.attributes={}
- else:
- self.attributes=attributes
- if elements==None:
- self.elements=[]
- else:
- self.elements=elements
- self.text=text
- self.namespace=namespace
- self.cdata=cdata
- for arg in args.keys():
- self.attributes[arg]=args[arg]
- def addElement(self,SVGelement):
- """adds an element to a SVGelement
-
- SVGelement.addElement(SVGelement)
- """
- self.elements.append(SVGelement)
-
- #def toXml(self,level,f, preserveWhitespace=False):
- def toXml(self,level,f, **kwargs):
- preserve = kwargs.get("preserveWhitespace", False)
- if preserve:
- #print "PRESERVING"
- NEWLINE = ""
- TAB = ""
- else:
- #print "NOT PRESE"
- NEWLINE = "\n"
- TAB = "\t"
- f.write(TAB*level)
- f.write('<'+self.type)
- for attkey in self.attributes.keys():
- f.write(' '+_escape(str(attkey))+'='+_quoteattr(str(self.attributes[attkey])))
- if self.namespace:
- f.write(' xmlns="'+ _escape(str(self.namespace))+'" ')
- if self.elements or self.text or self.cdata:
- f.write('>')
- if self.elements:
- f.write(NEWLINE)
- for element in self.elements:
- element.toXml(level+1,f, preserveWhitespace=preserve)
- if self.cdata:
- f.write(NEWLINE+TAB*(level+1)+''+NEWLINE)
- if self.text:
- if type(self.text)==type(''): #If the text is only text
- f.write(_escape(str(self.text)))
- else: #If the text is a spannedtext class
- f.write(str(self.text))
- if self.elements:
- f.write(TAB*level+''+self.type+'>'+NEWLINE)
- elif self.text:
- f.write(''+self.type+'>'+NEWLINE)
- elif self.cdata:
- f.write(TAB*level+''+self.type+'>'+NEWLINE)
- else:
- f.write('/>'+NEWLINE)
-
-class tspan(SVGelement):
- """ts=tspan(text='',**args)
-
- a tspan element can be used for applying formatting to a textsection
- usage:
- ts=tspan('this text is bold')
- ts.attributes['font-weight']='bold'
- st=spannedtext()
- st.addtspan(ts)
- t=text(3,5,st)
- """
- def __init__(self,text=None,**args):
- SVGelement.__init__(self,'tspan',**args)
- if self.text<>None:
- self.text=text
- def __repr__(self):
- s="'
- s+=self.text
- s+=''
- return s
-
-class tref(SVGelement):
- """tr=tref(link='',**args)
-
- a tref element can be used for referencing text by a link to its id.
- usage:
- tr=tref('#linktotext')
- st=spannedtext()
- st.addtref(tr)
- t=text(3,5,st)
- """
- def __init__(self,link,**args):
- SVGelement.__init__(self,'tref',{'xlink:href':link},**args)
- def __repr__(self):
- s="'
- return s
-
-class spannedtext:
- """st=spannedtext(textlist=[])
-
- a spannedtext can be used for text which consists of text, tspan's and tref's
- You can use it to add to a text element or path element. Don't add it directly
- to a svg or a group element.
- usage:
-
- ts=tspan('this text is bold')
- ts.attributes['font-weight']='bold'
- tr=tref('#linktotext')
- tr.attributes['fill']='red'
- st=spannedtext()
- st.addtspan(ts)
- st.addtref(tr)
- st.addtext('This text is not bold')
- t=text(3,5,st)
- """
- def __init__(self,textlist=None):
- if textlist==None:
- self.textlist=[]
- else:
- self.textlist=textlist
- def addtext(self,text=''):
- self.textlist.append(text)
- def addtspan(self,tspan):
- self.textlist.append(tspan)
- def addtref(self,tref):
- self.textlist.append(tref)
- def __repr__(self):
- s=""
- for element in self.textlist:
- s+=str(element)
- return s
-
-class rect(SVGelement):
- """r=rect(width,height,x,y,fill,stroke,stroke_width,**args)
-
- a rectangle is defined by a width and height and a xy pair
- """
- def __init__(self,x=None,y=None,width=None,height=None,fill=None,stroke=None,stroke_width=None,**args):
- if width==None or height==None:
- if width<>None:
- raise ValueError, 'height is required'
- if height<>None:
- raise ValueError, 'width is required'
- else:
- raise ValueError, 'both height and width are required'
- SVGelement.__init__(self,'rect',{'width':width,'height':height},**args)
- if x<>None:
- self.attributes['x']=x
- if y<>None:
- self.attributes['y']=y
- if fill<>None:
- self.attributes['fill']=fill
- if stroke<>None:
- self.attributes['stroke']=stroke
- if stroke_width<>None:
- self.attributes['stroke-width']=stroke_width
-
-class ellipse(SVGelement):
- """e=ellipse(rx,ry,x,y,fill,stroke,stroke_width,**args)
-
- an ellipse is defined as a center and a x and y radius.
- """
- def __init__(self,cx=None,cy=None,rx=None,ry=None,fill=None,stroke=None,stroke_width=None,**args):
- if rx==None or ry== None:
- if rx<>None:
- raise ValueError, 'rx is required'
- if ry<>None:
- raise ValueError, 'ry is required'
- else:
- raise ValueError, 'both rx and ry are required'
- SVGelement.__init__(self,'ellipse',{'rx':rx,'ry':ry},**args)
- if cx<>None:
- self.attributes['cx']=cx
- if cy<>None:
- self.attributes['cy']=cy
- if fill<>None:
- self.attributes['fill']=fill
- if stroke<>None:
- self.attributes['stroke']=stroke
- if stroke_width<>None:
- self.attributes['stroke-width']=stroke_width
-
-
-class circle(SVGelement):
- """c=circle(x,y,radius,fill,stroke,stroke_width,**args)
-
- The circle creates an element using a x, y and radius values eg
- """
- def __init__(self,cx=None,cy=None,r=None,fill=None,stroke=None,stroke_width=None,**args):
- if r==None:
- raise ValueError, 'r is required'
- SVGelement.__init__(self,'circle',{'r':r},**args)
- if cx<>None:
- self.attributes['cx']=cx
- if cy<>None:
- self.attributes['cy']=cy
- if fill<>None:
- self.attributes['fill']=fill
- if stroke<>None:
- self.attributes['stroke']=stroke
- if stroke_width<>None:
- self.attributes['stroke-width']=stroke_width
-
-class point(circle):
- """p=point(x,y,color)
-
- A point is defined as a circle with a size 1 radius. It may be more efficient to use a
- very small rectangle if you use many points because a circle is difficult to render.
- """
- def __init__(self,x,y,fill='black',**args):
- circle.__init__(self,x,y,1,fill,**args)
-
-class line(SVGelement):
- """l=line(x1,y1,x2,y2,stroke,stroke_width,**args)
-
- A line is defined by a begin x,y pair and an end x,y pair
- """
- def __init__(self,x1=None,y1=None,x2=None,y2=None,stroke=None,stroke_width=None,**args):
- SVGelement.__init__(self,'line',**args)
- if x1<>None:
- self.attributes['x1']=x1
- if y1<>None:
- self.attributes['y1']=y1
- if x2<>None:
- self.attributes['x2']=x2
- if y2<>None:
- self.attributes['y2']=y2
- if stroke_width<>None:
- self.attributes['stroke-width']=stroke_width
- if stroke<>None:
- self.attributes['stroke']=stroke
-
-class polyline(SVGelement):
- """pl=polyline([[x1,y1],[x2,y2],...],fill,stroke,stroke_width,**args)
-
- a polyline is defined by a list of xy pairs
- """
- def __init__(self,points,fill=None,stroke=None,stroke_width=None,**args):
- SVGelement.__init__(self,'polyline',{'points':_xypointlist(points)},**args)
- if fill<>None:
- self.attributes['fill']=fill
- if stroke_width<>None:
- self.attributes['stroke-width']=stroke_width
- if stroke<>None:
- self.attributes['stroke']=stroke
-
-class polygon(SVGelement):
- """pl=polyline([[x1,y1],[x2,y2],...],fill,stroke,stroke_width,**args)
-
- a polygon is defined by a list of xy pairs
- """
- def __init__(self,points,fill=None,stroke=None,stroke_width=None,**args):
- SVGelement.__init__(self,'polygon',{'points':_xypointlist(points)},**args)
- if fill<>None:
- self.attributes['fill']=fill
- if stroke_width<>None:
- self.attributes['stroke-width']=stroke_width
- if stroke<>None:
- self.attributes['stroke']=stroke
-
-class path(SVGelement):
- """p=path(path,fill,stroke,stroke_width,**args)
-
- a path is defined by a path object and optional width, stroke and fillcolor
- """
- def __init__(self,pathdata,fill=None,stroke=None,stroke_width=None,id=None,**args):
- SVGelement.__init__(self,'path',{'d':str(pathdata)},**args)
- if stroke<>None:
- self.attributes['stroke']=stroke
- if fill<>None:
- self.attributes['fill']=fill
- if stroke_width<>None:
- self.attributes['stroke-width']=stroke_width
- if id<>None:
- self.attributes['id']=id
-
-
-class text(SVGelement):
- """t=text(x,y,text,font_size,font_family,**args)
-
- a text element can bge used for displaying text on the screen
- """
- def __init__(self,x=None,y=None,text=None,font_size=None,font_family=None,text_anchor=None,**args):
- SVGelement.__init__(self,'text',**args)
- if x<>None:
- self.attributes['x']=x
- if y<>None:
- self.attributes['y']=y
- if font_size<>None:
- self.attributes['font-size']=font_size
- if font_family<>None:
- self.attributes['font-family']=font_family
- if text<>None:
- self.text=text
- if text_anchor<>None:
- self.attributes['text-anchor']=text_anchor
-
- def toXml(self,level,f, **kwargs):
- preserve = self.attributes.get("xml:space", None)
- if preserve == "preserve":
- #print "FOO PRE"
- SVGelement.toXml(self,level, f, preserveWhitespace=True)
- else:
- #print "FOO NOT"
- SVGelement.toXml(self, level, f, preserveWhitespace=False)
-
-class textpath(SVGelement):
- """tp=textpath(text,link,**args)
-
- a textpath places a text on a path which is referenced by a link.
- """
- def __init__(self,link,text=None,**args):
- SVGelement.__init__(self,'textPath',{'xlink:href':link},**args)
- if text<>None:
- self.text=text
-
-class pattern(SVGelement):
- """p=pattern(x,y,width,height,patternUnits,**args)
-
- A pattern is used to fill or stroke an object using a pre-defined
- graphic object which can be replicated ("tiled") at fixed intervals
- in x and y to cover the areas to be painted.
- """
- def __init__(self,x=None,y=None,width=None,height=None,patternUnits=None,**args):
- SVGelement.__init__(self,'pattern',**args)
- if x<>None:
- self.attributes['x']=x
- if y<>None:
- self.attributes['y']=y
- if width<>None:
- self.attributes['width']=width
- if height<>None:
- self.attributes['height']=height
- if patternUnits<>None:
- self.attributes['patternUnits']=patternUnits
-
-class title(SVGelement):
- """t=title(text,**args)
-
- a title is a text element. The text is displayed in the title bar
- add at least one to the root svg element
- """
- def __init__(self,text=None,**args):
- SVGelement.__init__(self,'title',**args)
- if text<>None:
- self.text=text
-
-class description(SVGelement):
- """d=description(text,**args)
-
- a description can be added to any element and is used for a tooltip
- Add this element before adding other elements.
- """
- def __init__(self,text=None,**args):
- SVGelement.__init__(self,'desc',**args)
- if text<>None:
- self.text=text
-
-class lineargradient(SVGelement):
- """lg=lineargradient(x1,y1,x2,y2,id,**args)
-
- defines a lineargradient using two xy pairs.
- stop elements van be added to define the gradient colors.
- """
- def __init__(self,x1=None,y1=None,x2=None,y2=None,id=None,**args):
- SVGelement.__init__(self,'linearGradient',**args)
- if x1<>None:
- self.attributes['x1']=x1
- if y1<>None:
- self.attributes['y1']=y1
- if x2<>None:
- self.attributes['x2']=x2
- if y2<>None:
- self.attributes['y2']=y2
- if id<>None:
- self.attributes['id']=id
-
-class radialgradient(SVGelement):
- """rg=radialgradient(cx,cy,r,fx,fy,id,**args)
-
- defines a radial gradient using a outer circle which are defined by a cx,cy and r and by using a focalpoint.
- stop elements van be added to define the gradient colors.
- """
- def __init__(self,cx=None,cy=None,r=None,fx=None,fy=None,id=None,**args):
- SVGelement.__init__(self,'radialGradient',**args)
- if cx<>None:
- self.attributes['cx']=cx
- if cy<>None:
- self.attributes['cy']=cy
- if r<>None:
- self.attributes['r']=r
- if fx<>None:
- self.attributes['fx']=fx
- if fy<>None:
- self.attributes['fy']=fy
- if id<>None:
- self.attributes['id']=id
-
-class stop(SVGelement):
- """st=stop(offset,stop_color,**args)
-
- Puts a stop color at the specified radius
- """
- def __init__(self,offset,stop_color=None,**args):
- SVGelement.__init__(self,'stop',{'offset':offset},**args)
- if stop_color<>None:
- self.attributes['stop-color']=stop_color
-
-class style(SVGelement):
- """st=style(type,cdata=None,**args)
-
- Add a CDATA element to this element for defing in line stylesheets etc..
- """
- def __init__(self,type,cdata=None,**args):
- SVGelement.__init__(self,'style',{'type':type},cdata=cdata, **args)
-
-
-class image(SVGelement):
- """im=image(url,width,height,x,y,**args)
-
- adds an image to the drawing. Supported formats are .png, .jpg and .svg.
- """
- def __init__(self,url,x=None,y=None,width=None,height=None,**args):
- if width==None or height==None:
- if width<>None:
- raise ValueError, 'height is required'
- if height<>None:
- raise ValueError, 'width is required'
- else:
- raise ValueError, 'both height and width are required'
- SVGelement.__init__(self,'image',{'xlink:href':url,'width':width,'height':height},**args)
- if x<>None:
- self.attributes['x']=x
- if y<>None:
- self.attributes['y']=y
-
-class cursor(SVGelement):
- """c=cursor(url,**args)
-
- defines a custom cursor for a element or a drawing
- """
- def __init__(self,url,**args):
- SVGelement.__init__(self,'cursor',{'xlink:href':url},**args)
-
-
-class marker(SVGelement):
- """m=marker(id,viewbox,refX,refY,markerWidth,markerHeight,**args)
-
- defines a marker which can be used as an endpoint for a line or other pathtypes
- add an element to it which should be used as a marker.
- """
- def __init__(self,id=None,viewBox=None,refx=None,refy=None,markerWidth=None,markerHeight=None,**args):
- SVGelement.__init__(self,'marker',**args)
- if id<>None:
- self.attributes['id']=id
- if viewBox<>None:
- self.attributes['viewBox']=_viewboxlist(viewBox)
- if refx<>None:
- self.attributes['refX']=refx
- if refy<>None:
- self.attributes['refY']=refy
- if markerWidth<>None:
- self.attributes['markerWidth']=markerWidth
- if markerHeight<>None:
- self.attributes['markerHeight']=markerHeight
-
-class group(SVGelement):
- """g=group(id,**args)
-
- a group is defined by an id and is used to contain elements
- g.addElement(SVGelement)
- """
- def __init__(self,id=None,**args):
- SVGelement.__init__(self,'g',**args)
- if id<>None:
- self.attributes['id']=id
-
-class symbol(SVGelement):
- """sy=symbol(id,viewbox,**args)
-
- defines a symbol which can be used on different places in your graph using
- the use element. A symbol is not rendered but you can use 'use' elements to
- display it by referencing its id.
- sy.addElement(SVGelement)
- """
-
- def __init__(self,id=None,viewBox=None,**args):
- SVGelement.__init__(self,'symbol',**args)
- if id<>None:
- self.attributes['id']=id
- if viewBox<>None:
- self.attributes['viewBox']=_viewboxlist(viewBox)
-
-class defs(SVGelement):
- """d=defs(**args)
-
- container for defining elements
- """
- def __init__(self,**args):
- SVGelement.__init__(self,'defs',**args)
-
-class switch(SVGelement):
- """sw=switch(**args)
-
- Elements added to a switch element which are "switched" by the attributes
- requiredFeatures, requiredExtensions and systemLanguage.
- Refer to the SVG specification for details.
- """
- def __init__(self,**args):
- SVGelement.__init__(self,'switch',**args)
-
-
-class use(SVGelement):
- """u=use(link,x,y,width,height,**args)
-
- references a symbol by linking to its id and its position, height and width
- """
- def __init__(self,link,x=None,y=None,width=None,height=None,**args):
- SVGelement.__init__(self,'use',{'xlink:href':link},**args)
- if x<>None:
- self.attributes['x']=x
- if y<>None:
- self.attributes['y']=y
-
- if width<>None:
- self.attributes['width']=width
- if height<>None:
- self.attributes['height']=height
-
-
-class link(SVGelement):
- """a=link(url,**args)
-
- a link is defined by a hyperlink. add elements which have to be linked
- a.addElement(SVGelement)
- """
- def __init__(self,link='',**args):
- SVGelement.__init__(self,'a',{'xlink:href':link},**args)
-
-class view(SVGelement):
- """v=view(id,**args)
-
- a view can be used to create a view with different attributes"""
- def __init__(self,id=None,**args):
- SVGelement.__init__(self,'view',**args)
- if id<>None:
- self.attributes['id']=id
-
-class script(SVGelement):
- """sc=script(type,type,cdata,**args)
-
- adds a script element which contains CDATA to the SVG drawing
-
- """
- def __init__(self,type,cdata=None,**args):
- SVGelement.__init__(self,'script',{'type':type},cdata=cdata,**args)
-
-class animate(SVGelement):
- """an=animate(attribute,from,to,during,**args)
-
- animates an attribute.
- """
- def __init__(self,attribute,fr=None,to=None,dur=None,**args):
- SVGelement.__init__(self,'animate',{'attributeName':attribute},**args)
- if fr<>None:
- self.attributes['from']=fr
- if to<>None:
- self.attributes['to']=to
- if dur<>None:
- self.attributes['dur']=dur
-
-class animateMotion(SVGelement):
- """an=animateMotion(pathdata,dur,**args)
-
- animates a SVGelement over the given path in dur seconds
- """
- def __init__(self,pathdata,dur,**args):
- SVGelement.__init__(self,'animateMotion',**args)
- if pathdata<>None:
- self.attributes['path']=str(pathdata)
- if dur<>None:
- self.attributes['dur']=dur
-
-class animateTransform(SVGelement):
- """antr=animateTransform(type,from,to,dur,**args)
-
- transform an element from and to a value.
- """
- def __init__(self,type=None,fr=None,to=None,dur=None,**args):
- SVGelement.__init__(self,'animateTransform',{'attributeName':'transform'},**args)
- #As far as I know the attributeName is always transform
- if type<>None:
- self.attributes['type']=type
- if fr<>None:
- self.attributes['from']=fr
- if to<>None:
- self.attributes['to']=to
- if dur<>None:
- self.attributes['dur']=dur
-class animateColor(SVGelement):
- """ac=animateColor(attribute,type,from,to,dur,**args)
-
- Animates the color of a element
- """
- def __init__(self,attribute,type=None,fr=None,to=None,dur=None,**args):
- SVGelement.__init__(self,'animateColor',{'attributeName':attribute},**args)
- if type<>None:
- self.attributes['type']=type
- if fr<>None:
- self.attributes['from']=fr
- if to<>None:
- self.attributes['to']=to
- if dur<>None:
- self.attributes['dur']=dur
-class set(SVGelement):
- """st=set(attribute,to,during,**args)
-
- sets an attribute to a value for a
- """
- def __init__(self,attribute,to=None,dur=None,**args):
- SVGelement.__init__(self,'set',{'attributeName':attribute},**args)
- if to<>None:
- self.attributes['to']=to
- if dur<>None:
- self.attributes['dur']=dur
-
-
-
-class svg(SVGelement):
- """s=svg(viewbox,width,height,**args)
-
- a svg or element is the root of a drawing add all elements to a svg element.
- You can have different svg elements in one svg file
- s.addElement(SVGelement)
-
- eg
- d=drawing()
- s=svg((0,0,100,100),'100%','100%')
- c=circle(50,50,20)
- s.addElement(c)
- d.setSVG(s)
- d.toXml()
- """
- def __init__(self,viewBox=None, width=None, height=None,**args):
- SVGelement.__init__(self,'svg',**args)
- if viewBox<>None:
- self.attributes['viewBox']=_viewboxlist(viewBox)
- if width<>None:
- self.attributes['width']=width
- if height<>None:
- self.attributes['height']=height
- self.namespace="http://www.w3.org/2000/svg"
-
-class drawing:
- """d=drawing()
-
- this is the actual SVG document. It needs a svg element as a root.
- Use the addSVG method to set the svg to the root. Use the toXml method to write the SVG
- source to the screen or to a file
- d=drawing()
- d.addSVG(svg)
- d.toXml(optionalfilename)
- """
-
- def __init__(self):
- self.svg=None
- def setSVG(self,svg):
- self.svg=svg
- #Voeg een element toe aan de grafiek toe.
- if use_dom_implementation==0:
- def toXml(self, filename='',compress=False):
- import cStringIO
- xml=cStringIO.StringIO()
- xml.write('\n')
- xml.write("""]>\n""")
- self.svg.toXml(0,xml)
- if not filename:
- if compress:
- import gzip
- f=cStringIO.StringIO()
- zf=gzip.GzipFile(fileobj=f,mode='wb')
- zf.write(xml.getvalue())
- zf.close()
- f.seek(0)
- return f.read()
- else:
- return xml.getvalue()
- else:
- if filename[-4:]=='svgz':
- import gzip
- f=gzip.GzipFile(filename=filename,mode="wb", compresslevel=9)
- f.write(xml.getvalue())
- f.close()
- else:
- f=file(filename,'w')
- f.write(xml.getvalue())
- f.close()
-
- else:
- def toXml(self,filename='',compress=False):
- """drawing.toXml() ---->to the screen
- drawing.toXml(filename)---->to the file
- writes a svg drawing to the screen or to a file
- compresses if filename ends with svgz or if compress is true
- """
- doctype = implementation.createDocumentType('svg',"-//W3C//DTD SVG 1.0//EN""",'http://www.w3.org/TR/2001/REC-SVG-20010904/DTD/svg10.dtd ')
-
- global root
- #root is defined global so it can be used by the appender. Its also possible to use it as an arugument but
- #that is a bit messy.
- root=implementation.createDocument(None,None,doctype)
- #Create the xml document.
- global appender
- def appender(element,elementroot):
- """This recursive function appends elements to an element and sets the attributes
- and type. It stops when alle elements have been appended"""
- if element.namespace:
- e=root.createElementNS(element.namespace,element.type)
- else:
- e=root.createElement(element.type)
- if element.text:
- textnode=root.createTextNode(element.text)
- e.appendChild(textnode)
- for attribute in element.attributes.keys(): #in element.attributes is supported from python 2.2
- e.setAttribute(attribute,str(element.attributes[attribute]))
- if element.elements:
- for el in element.elements:
- e=appender(el,e)
- elementroot.appendChild(e)
- return elementroot
- root=appender(self.svg,root)
- if not filename:
- import cStringIO
- xml=cStringIO.StringIO()
- PrettyPrint(root,xml)
- if compress:
- import gzip
- f=cStringIO.StringIO()
- zf=gzip.GzipFile(fileobj=f,mode='wb')
- zf.write(xml.getvalue())
- zf.close()
- f.seek(0)
- return f.read()
- else:
- return xml.getvalue()
- else:
- try:
- if filename[-4:]=='svgz':
- import gzip
- import cStringIO
- xml=cStringIO.StringIO()
- PrettyPrint(root,xml)
- f=gzip.GzipFile(filename=filename,mode='wb',compresslevel=9)
- f.write(xml.getvalue())
- f.close()
- else:
- f=open(filename,'w')
- PrettyPrint(root,f)
- f.close()
- except:
- print "Cannot write SVG file: " + filename
- def validate(self):
- try:
- import xml.parsers.xmlproc.xmlval
- except:
- raise exceptions.ImportError,'PyXml is required for validating SVG'
- svg=self.toXml()
- xv=xml.parsers.xmlproc.xmlval.XMLValidator()
- try:
- xv.feed(svg)
- except:
- raise "SVG is not well formed, see messages above"
- else:
- print "SVG well formed"
-if __name__=='__main__':
-
-
- d=drawing()
- s=svg((0,0,100,100))
- r=rect(-100,-100,300,300,'cyan')
- s.addElement(r)
-
- t=title('SVGdraw Demo')
- s.addElement(t)
- g=group('animations')
- e=ellipse(0,0,5,2)
- g.addElement(e)
- c=circle(0,0,1,'red')
- g.addElement(c)
- pd=pathdata(0,-10)
- for i in range(6):
- pd.relsmbezier(10,5,0,10)
- pd.relsmbezier(-10,5,0,10)
- an=animateMotion(pd,10)
- an.attributes['rotate']='auto-reverse'
- an.attributes['repeatCount']="indefinite"
- g.addElement(an)
- s.addElement(g)
- for i in range(20,120,20):
- u=use('#animations',i,0)
- s.addElement(u)
- for i in range(0,120,20):
- for j in range(5,105,10):
- c=circle(i,j,1,'red','black',.5)
- s.addElement(c)
- d.setSVG(s)
-
- print d.toXml()
-
diff --git a/obitools/__init__.py b/obitools/__init__.py
deleted file mode 100644
index 3063d78..0000000
--- a/obitools/__init__.py
+++ /dev/null
@@ -1,711 +0,0 @@
-'''
-**obitools** main module
-------------------------
-
-.. codeauthor:: Eric Coissac
-
-
-
-obitools module provides base class for sequence manipulation.
-
-All biological sequences must be subclass of :py:class:`obitools.BioSequence`.
-Some biological sequences are defined as transformation of other
-biological sequences. For example Reversed complemented sequences
-are a transformation of a :py:class:`obitools.NucSequence`. This particular
-type of sequences are subclasses of the :py:class:`obitools.WrappedBioSequence`.
-
-.. inheritance-diagram:: BioSequence NucSequence AASequence WrappedBioSequence SubSequence DNAComplementSequence
- :parts: 1
-
-
-'''
-
-from weakref import ref
-
-from obitools.utils.iterator import uniqueChain
-from itertools import chain
-import re
-
-_default_raw_parser = " %s *= *([^;]*);"
-
-try:
- from functools import partial
-except:
- #
- # Add for compatibility purpose with Python < 2.5
- #
- def partial(func, *args, **keywords):
- def newfunc(*fargs, **fkeywords):
- newkeywords = keywords.copy()
- newkeywords.update(fkeywords)
- return func(*(args + fargs), **newkeywords)
- newfunc.func = func
- newfunc.args = args
- newfunc.keywords = keywords
- return newfunc
-
-
-from obitools.sequenceencoder import DNAComplementEncoder
-from obitools.location import Location
-
-class WrapperSetIterator(object):
- def __init__(self,s):
- self._i = set.__iter__(s)
- def next(self):
- return self._i.next()()
- def __iter__(self):
- return self
-
-class WrapperSet(set):
- def __iter__(self):
- return WrapperSetIterator(self)
-
-
-class BioSequence(object):
- '''
- BioSequence class is the base class for biological
- sequence representation.
-
- It provides storage of :
-
- - the sequence itself,
- - an identifier,
- - a definition an manage
- - a set of complementary information on a key / value principle.
-
- .. warning::
-
- :py:class:`obitools.BioSequence` is an abstract class, this constructor
- can only be called by a subclass constructor.
- '''
-
- def __init__(self,id,seq,definition=None,rawinfo=None,rawparser=_default_raw_parser,**info):
- '''
-
- :param id: sequence identifier
- :type id: `str`
-
- :param seq: the sequence
- :type seq: `str`
-
- :param definition: sequence definition (optional)
- :type definition: `str`
-
- :param rawinfo: a text containing a set of key=value; patterns
- :type definition: `str`
-
- :param rawparser: a text describing a regular patterns template
- used to parse rawinfo
- :type definition: `str`
-
- :param info: extra named parameters can be added to associate complementary
- data to the sequence
-
- '''
-
- assert type(self)!=BioSequence,"obitools.BioSequence is an abstract class"
-
- self._seq=str(seq).lower()
- self._info = dict(info)
- if rawinfo is not None:
- self._rawinfo=' ' + rawinfo
- else:
- self._rawinfo=None
- self._rawparser=rawparser
- self.definition=definition
- self.id=id
- self._hasTaxid=None
-
- def get_seq(self):
- return self.__seq
-
-
- def set_seq(self, value):
- if not isinstance(value, str):
- value=str(value)
- self.__seq = value
- self.__len = len(value)
-
-
- def clone(self):
- seq = type(self)(self.id,
- str(self),
- definition=self.definition
- )
- seq._info=dict(self.getTags())
- seq._rawinfo=self._rawinfo
- seq._rawparser=self._rawparser
- seq._hasTaxid=self._hasTaxid
- return seq
-
- def getDefinition(self):
- '''
- Sequence definition getter.
-
- :return: the sequence definition
- :rtype: str
-
- '''
- return self._definition
-
- def setDefinition(self, value):
- '''
- Sequence definition setter.
-
- :param value: the new sequence definition
- :type value: C{str}
- :return: C{None}
- '''
- self._definition = value
-
- def getId(self):
- '''
- Sequence identifier getter
-
- :return: the sequence identifier
- :rtype: C{str}
- '''
- return self._id
-
- def setId(self, value):
- '''
- Sequence identifier setter.
-
- :param value: the new sequence identifier
- :type value: C{str}
- :return: C{None}
- '''
- self._id = value
-
- def getStr(self):
- '''
- Return the sequence as a string
-
- :return: the string representation of the sequence
- :rtype: str
- '''
- return self._seq
-
- def getSymbolAt(self,position):
- '''
- Return the symbole at C{position} in the sequence
-
- :param position: the desired position. Position start from 0
- if position is < 0 then they are considered
- to reference the end of the sequence.
- :type position: `int`
-
- :return: a one letter string
- :rtype: `str`
- '''
- return str(self)[position]
-
- def getSubSeq(self,location):
- '''
- return a subsequence as described by C{location}.
-
- The C{location} parametter can be a L{obitools.location.Location} instance,
- an interger or a python C{slice} instance. If C{location}
- is an iterger this method is equivalent to L{getSymbolAt}.
-
- :param location: the positions of the subsequence to return
- :type location: C{Location} or C{int} or C{slice}
- :return: the subsequence
- :rtype: a single character as a C{str} is C{location} is an integer,
- a L{obitools.SubSequence} instance otherwise.
-
- '''
- if isinstance(location,Location):
- return location.extractSequence(self)
- elif isinstance(location, int):
- return self.getSymbolAt(location)
- elif isinstance(location, slice):
- return SubSequence(self,location)
-
- raise TypeError,'key must be a Location, an integer or a slice'
-
- def getKey(self,key):
- if key not in self._info:
- if self._rawinfo is None:
- if key=='count':
- return 1
- else:
- raise KeyError,key
- p = re.compile(self._rawparser % key)
- m = p.search(self._rawinfo)
- if m is not None:
- v=m.group(1)
- self._rawinfo=' ' + self._rawinfo[0:m.start(0)]+self._rawinfo[m.end(0):]
- try:
- v = eval(v)
- except:
- pass
- self._info[key]=v
- else:
- if key=='count':
- v=1
- else:
- raise KeyError,key
- else:
- v=self._info[key]
- return v
-
- def extractTaxon(self):
- '''
- Extract Taxonomy information from the sequence header.
- This method by default return None. It should be subclassed
- if necessary as in L{obitools.seqdb.AnnotatedSequence}.
-
- :return: None
- '''
- self._hasTaxid=self.hasKey('taxid')
- return None
-
- def __str__(self):
- return self.getStr()
-
- def __getitem__(self,key):
- if isinstance(key, str):
- if key=='taxid' and self._hasTaxid is None:
- self.extractTaxon()
- return self.getKey(key)
- else:
- return self.getSubSeq(key)
-
- def __setitem__(self,key,value):
- self._info[key]=value
- if key=='taxid':
- self._hasTaxid=value is not None
-
- def __delitem__(self,key):
- if isinstance(key, str):
- if key in self:
- del self._info[key]
- else:
- raise KeyError,key
-
- if key=='taxid':
- self._hasTaxid=False
- else:
- raise TypeError,key
-
- def __iter__(self):
- '''
- Iterate through the sequence symbols
- '''
- return iter(str(self))
-
- def __len__(self):
- return self.__len
-
- def hasKey(self,key):
- rep = key in self._info
-
- if not rep and self._rawinfo is not None:
- p = re.compile(self._rawparser % key)
- m = p.search(self._rawinfo)
- if m is not None:
- v=m.group(1)
- self._rawinfo=' ' + self._rawinfo[0:m.start(0)]+self._rawinfo[m.end(0):]
- try:
- v = eval(v)
- except:
- pass
- self._info[key]=v
- rep=True
-
- return rep
-
- def __contains__(self,key):
- '''
- methods allowing to use the C{in} operator on a C{BioSequence}.
-
- The C{in} operator test if the C{key} value is defined for this
- sequence.
-
- :param key: the name of the checked value
- :type key: str
- :return: C{True} if the value is defined, {False} otherwise.
- :rtype: C{bool}
- '''
- if key=='taxid' and self._hasTaxid is None:
- self.extractTaxon()
- return self.hasKey(key)
-
- def rawiteritems(self):
- return self._info.iteritems()
-
- def iteritems(self):
- '''
- iterate other items dictionary storing the values
- associated to the sequence. It works similarly to
- the iteritems function of C{dict}.
-
- :return: an iterator over the items (key,value)
- link to a sequence
- :rtype: iterator over tuple
- :see: L{items}
- '''
- if self._rawinfo is not None:
- p = re.compile(self._rawparser % "([a-zA-Z]\w*)")
- for k,v in p.findall(self._rawinfo):
- try:
- self._info[k]=eval(v)
- except:
- self._info[k]=v
- self._rawinfo=None
- return self._info.iteritems()
-
- def items(self):
- return [x for x in self.iteritems()]
-
- def iterkeys(self):
- return (k for k,v in self.iteritems())
-
- def keys(self):
- return [x for x in self.iterkeys()]
-
- def getTags(self):
- self.iteritems()
- return self._info
-
- def getRoot(self):
- return self
-
- def getWrappers(self):
- if not hasattr(self, '_wrappers'):
- self._wrappers=WrapperSet()
- return self._wrappers
-
- def register(self,wrapper):
- self.wrappers.add(ref(wrapper,self._unregister))
-
- def _unregister(self,ref):
- self.wrappers.remove(ref)
-
- wrappers = property(getWrappers,None,None,'')
-
- definition = property(getDefinition, setDefinition, None, "Sequence Definition")
-
- id = property(getId, setId, None, 'Sequence identifier')
-
- def _getTaxid(self):
- return self['taxid']
-
- def _setTaxid(self,taxid):
- self['taxid']=taxid
-
- taxid = property(_getTaxid,_setTaxid,None,'NCBI Taxonomy identifier')
- _seq = property(get_seq, set_seq, None, None)
-
-class NucSequence(BioSequence):
- """
- :py:class:`NucSequence` specialize the :py:class:`BioSequence` class for storing DNA
- sequences.
-
- The constructor is identical to the :py:class:`BioSequence` constructor.
- """
-
- def complement(self):
- """
- :return: The reverse complemented sequence as an instance of :py:class:`DNAComplementSequence`
- :rtype: :py:class:`DNAComplementSequence`
- """
- return DNAComplementSequence(self)
-
- def isNucleotide(self):
- return True
-
-
-class AASequence(BioSequence):
- """
- :py:class:`AASequence` specialize the :py:class:`BioSequence` class for storing protein
- sequences.
-
- The constructor is identical to the :py:class:`BioSequence` constructor.
- """
-
-
- def isNucleotide(self):
- return False
-
-
-class WrappedBioSequence(BioSequence):
- """
- .. warning::
-
- :py:class:`obitools.WrappedBioSequence` is an abstract class, this constructor
- can only be called by a subclass constructor.
- """
-
-
- def __init__(self,reference,id=None,definition=None,**info):
-
- assert type(self)!=WrappedBioSequence,"obitools.WrappedBioSequence is an abstract class"
-
- self._wrapped = reference
- reference.register(self)
- self._id=id
- self.definition=definition
- self._info=info
-
- def clone(self):
- seq = type(self)(self.wrapped,
- id=self._id,
- definition=self._definition
- )
- seq._info=dict(self._info)
-
- return seq
-
- def getWrapped(self):
- return self._wrapped
-
- def getDefinition(self):
- d = self._definition or self.wrapped.definition
- return d
-
- def getId(self):
- d = self._id or self.wrapped.id
- return d
-
- def isNucleotide(self):
- return self.wrapped.isNucleotide()
-
-
- def iterkeys(self):
- return uniqueChain(self._info.iterkeys(),
- self.wrapped.iterkeys())
-
- def rawiteritems(self):
- return chain(self._info.iteritems(),
- (x for x in self.wrapped.rawiteritems()
- if x[0] not in self._info))
-
- def iteritems(self):
- for x in self.iterkeys():
- yield (x,self[x])
-
- def getKey(self,key):
- if key in self._info:
- return self._info[key]
- else:
- return self.wrapped.getKey(key)
-
- def hasKey(self,key):
- return key in self._info or self.wrapped.hasKey(key)
-
- def getSymbolAt(self,position):
- return self.wrapped.getSymbolAt(self.posInWrapped(position))
-
- def posInWrapped(self,position,reference=None):
- if reference is None or reference is self.wrapped:
- return self._posInWrapped(position)
- else:
- return self.wrapped.posInWrapped(self._posInWrapped(position),reference)
-
-
- def getStr(self):
- return str(self.wrapped)
-
- def getRoot(self):
- return self.wrapped.getRoot()
-
- def complement(self):
- """
- The :py:meth:`complement` method of the :py:class:`WrappedBioSequence` class
- raises an exception :py:exc:`AttributeError` if the method is called and the cut
- sequence does not corresponds to a nucleic acid sequence.
- """
-
- if self.wrapped.isNucleotide():
- return DNAComplementSequence(self)
- raise AttributeError
-
-
- def _posInWrapped(self,position):
- return position
-
-
- definition = property(getDefinition,BioSequence.setDefinition, None)
- id = property(getId,BioSequence.setId, None)
-
- wrapped = property(getWrapped, None, None, "A pointer to the wrapped sequence")
-
- def _getWrappedRawInfo(self):
- return self.wrapped._rawinfo
-
- _rawinfo = property(_getWrappedRawInfo)
-
-
-class SubSequence(WrappedBioSequence):
- """
- """
-
-
- @staticmethod
- def _sign(x):
- if x == 0:
- return 0
- elif x < 0:
- return -1
- return 1
-
- def __init__(self,reference,
- location=None,
- start=None,stop=None,
- id=None,definition=None,
- **info):
- WrappedBioSequence.__init__(self,reference,id=None,definition=None,**info)
-
- if isinstance(location, slice):
- self._location = location
- else:
- step = 1
- if not isinstance(start, int):
- start = 0;
- if not isinstance(stop,int):
- stop = len(reference)
- self._location=slice(start,stop,step)
-
- self._indices=self._location.indices(len(self.wrapped))
- self._xrange=xrange(*self._indices)
-
- self._info['cut']='[%d,%d,%s]' % self._indices
-
- if hasattr(reference,'quality'):
- self.quality = reference.quality[self._location]
-
- def getId(self):
- d = self._id or ("%s_SUB" % self.wrapped.id)
- return d
-
-
- def clone(self):
- seq = WrappedBioSequence.clone(self)
- seq._location=self._location
- seq._indices=seq._location.indices(len(seq.wrapped))
- seq._xrange=xrange(*seq._indices)
- return seq
-
-
- def __len__(self):
- return len(self._xrange)
-
- def getStr(self):
- return ''.join([x for x in self])
-
- def __iter__(self):
- return (self.wrapped.getSymbolAt(x) for x in self._xrange)
-
- def _posInWrapped(self,position):
- return self._xrange[position]
-
-
- id = property(getId,BioSequence.setId, None)
-
-
-
-class DNAComplementSequence(WrappedBioSequence):
- """
- Class used to represent a reverse complemented DNA sequence. Usually instances
- of this class are produced by using the :py:meth:`NucSequence.complement` method.
- """
-
-
- _comp={'a': 't', 'c': 'g', 'g': 'c', 't': 'a',
- 'r': 'y', 'y': 'r', 'k': 'm', 'm': 'k',
- 's': 's', 'w': 'w', 'b': 'v', 'd': 'h',
- 'h': 'd', 'v': 'b', 'n': 'n', 'u': 'a',
- '-': '-'}
-
-
- def __init__(self,reference,
- id=None,definition=None,**info):
- WrappedBioSequence.__init__(self,reference,id=None,definition=None,**info)
- assert reference.isNucleotide()
- self._info['complemented']=True
- if hasattr(reference,'quality'):
- self.quality = reference.quality[::-1]
-
-
- def getId(self):
- d = self._id or ("%s_CMP" % self.wrapped.id)
- return d
-
- def __len__(self):
- return len(self._wrapped)
-
- def getStr(self):
- return ''.join([x for x in self])
-
- def __iter__(self):
- return (self.getSymbolAt(x) for x in xrange(len(self)))
-
- def _posInWrapped(self,position):
- return -(position+1)
-
- def getSymbolAt(self,position):
- return DNAComplementSequence._comp[self.wrapped.getSymbolAt(self.posInWrapped(position))]
-
- def complement(self):
- """
- The :py:meth:`complement` method of the :py:class:`DNAComplementSequence` class actually
- returns the wrapped sequenced. Effectively the reversed complemented sequence of a reversed
- complemented sequence is the initial sequence.
- """
- return self.wrapped
-
- id = property(getId,BioSequence.setId, None)
-
-
-def _isNucSeq(text):
- acgt = 0
- notnuc = 0
- ltot = len(text) * 0.8
- for c in text.lower():
- if c in 'acgt-':
- acgt+=1
- if c not in DNAComplementEncoder._comp:
- notnuc+=1
- return notnuc==0 and float(acgt) > ltot
-
-
-def bioSeqGenerator(id,seq,definition=None,rawinfo=None,rawparser=_default_raw_parser,**info):
- """
- Generate automagically the good class instance between :
-
- - :py:class:`NucSequence`
- - :py:class:`AASequence`
-
- Build a new sequence instance. Sequences are instancied as :py:class:`NucSequence` if the
- `seq` attribute contains more than 80% of *A*, *C*, *G*, *T* or *-* symbols
- in upper or lower cases. Conversely, the new sequence instance is instancied as
- :py:class:`AASequence`.
-
-
-
- :param id: sequence identifier
- :type id: `str`
-
- :param seq: the sequence
- :type seq: `str`
-
- :param definition: sequence definition (optional)
- :type definition: `str`
-
- :param rawinfo: a text containing a set of key=value; patterns
- :type definition: `str`
-
- :param rawparser: a text describing a regular patterns template
- used to parse rawinfo
- :type definition: `str`
-
- :param info: extra named parameters can be added to associate complementary
- data to the sequence
- """
- if _isNucSeq(seq):
- return NucSequence(id,seq,definition,rawinfo,rawparser,**info)
- else:
- return AASequence(id,seq,definition,rawinfo,rawparser,**info)
-
diff --git a/obitools/__init__.pyc b/obitools/__init__.pyc
deleted file mode 100644
index 3cc2111..0000000
Binary files a/obitools/__init__.pyc and /dev/null differ
diff --git a/obitools/align/__init__.py b/obitools/align/__init__.py
deleted file mode 100644
index 54cca7d..0000000
--- a/obitools/align/__init__.py
+++ /dev/null
@@ -1,13 +0,0 @@
-
-
-from _nws import NWS
-from _upperbond import indexSequences
-from _lcs import LCS,lenlcs
-from _assemble import DirectAssemble, ReverseAssemble
-from _qsassemble import QSolexaDirectAssemble,QSolexaReverseAssemble
-from _rassemble import RightDirectAssemble as RightReverseAssemble
-from _qsrassemble import QSolexaRightDirectAssemble,QSolexaRightReverseAssemble
-from _freeendgap import FreeEndGap
-from _freeendgapfm import FreeEndGapFullMatch
-from _upperbond import isLCSReachable
-
diff --git a/obitools/align/_assemble.so b/obitools/align/_assemble.so
deleted file mode 100755
index dbc2139..0000000
Binary files a/obitools/align/_assemble.so and /dev/null differ
diff --git a/obitools/align/_dynamic.so b/obitools/align/_dynamic.so
deleted file mode 100755
index 2f93d3a..0000000
Binary files a/obitools/align/_dynamic.so and /dev/null differ
diff --git a/obitools/align/_freeendgap.so b/obitools/align/_freeendgap.so
deleted file mode 100755
index 53cd9c0..0000000
Binary files a/obitools/align/_freeendgap.so and /dev/null differ
diff --git a/obitools/align/_freeendgapfm.so b/obitools/align/_freeendgapfm.so
deleted file mode 100755
index f88c07b..0000000
Binary files a/obitools/align/_freeendgapfm.so and /dev/null differ
diff --git a/obitools/align/_lcs.so b/obitools/align/_lcs.so
deleted file mode 100755
index 555a2a2..0000000
Binary files a/obitools/align/_lcs.so and /dev/null differ
diff --git a/obitools/align/_nws.so b/obitools/align/_nws.so
deleted file mode 100755
index af7e849..0000000
Binary files a/obitools/align/_nws.so and /dev/null differ
diff --git a/obitools/align/_profilenws.so b/obitools/align/_profilenws.so
deleted file mode 100755
index baa8eda..0000000
Binary files a/obitools/align/_profilenws.so and /dev/null differ
diff --git a/obitools/align/_qsassemble.so b/obitools/align/_qsassemble.so
deleted file mode 100755
index 3bc83e9..0000000
Binary files a/obitools/align/_qsassemble.so and /dev/null differ
diff --git a/obitools/align/_qsrassemble.so b/obitools/align/_qsrassemble.so
deleted file mode 100755
index 75b98aa..0000000
Binary files a/obitools/align/_qsrassemble.so and /dev/null differ
diff --git a/obitools/align/_rassemble.so b/obitools/align/_rassemble.so
deleted file mode 100755
index e2a063c..0000000
Binary files a/obitools/align/_rassemble.so and /dev/null differ
diff --git a/obitools/align/_upperbond.so b/obitools/align/_upperbond.so
deleted file mode 100755
index 5f2b1fe..0000000
Binary files a/obitools/align/_upperbond.so and /dev/null differ
diff --git a/obitools/align/homopolymere.py b/obitools/align/homopolymere.py
deleted file mode 100644
index 5efcbff..0000000
--- a/obitools/align/homopolymere.py
+++ /dev/null
@@ -1,56 +0,0 @@
-'''
-Created on 14 mai 2009
-
-@author: coissac
-'''
-
-from obitools import WrappedBioSequence
-
-class HomoNucBioSeq(WrappedBioSequence):
- '''
- classdocs
- '''
-
-
- def __init__(self,reference,id=None,definition=None,**info):
- '''
- Constructor
- '''
- assert reference.isNucleotide(),"reference must be a nucleic sequence"
- WrappedBioSequence.__init__(self,reference,id=None,definition=None,**info)
- self.__cleanHomopolymer()
-
- def __cleanHomopolymer(self):
- s = []
- c = []
- old=None
- nc=0
- for n in self._wrapped:
- if old is not None and n!=old:
- s.append(old)
- c.append(nc)
- nc=0
- old=n
- nc+=1
- self._cached=''.join(s)
- self['homopolymer']=c
- self._cumulative=[]
- sum=0
- for c in self._count:
- sum+=c
- self._cumulative.append(sum)
-
- def __len__(self):
- return len(self._cached)
-
- def getStr(self):
- return self._cached
-
- def __iter__(self):
- return iter(self._cached)
-
- def _posInWrapped(self,position):
- return self._cumulative[position]
-
-
-
\ No newline at end of file
diff --git a/obitools/align/ssearch.py b/obitools/align/ssearch.py
deleted file mode 100644
index 55a74ce..0000000
--- a/obitools/align/ssearch.py
+++ /dev/null
@@ -1,46 +0,0 @@
-import os
-import re
-
-from obitools.fasta import formatFasta
-
-class SsearchParser(object):
-
- _matchQuery = re.compile("^Query:.+\n.+?>+([^ ]+)", re.MULTILINE)
- _matchLQuery = re.compile("^Query:.+\n.+?(\d+)(?= nt\n)", re.MULTILINE)
- _matchProp = re.compile("^The best scores are:.*\n(.+?)>>>", re.DOTALL+re.MULTILINE)
- def __init__(self,file):
- if isinstance(file,str):
- file = open(file,'rU')
- self.data = file.read()
- self.query= SsearchParser._matchQuery.search(self.data).group(1)
- self.queryLength= int(SsearchParser._matchLQuery.search(self.data).group(1))
- props = SsearchParser._matchProp.search(self.data)
- if props:
- props=props.group(0).split('\n')[1:-2]
- self.props=[]
- for line in props:
- subject,tab = line.split('\t')
- tab=tab.split()
- ssp = subject.split()
- ac = ssp[0]
- dbl= int(ssp[-5][:-1])
- ident = float(tab[0])
- matchlen = abs(int(tab[5]) - int(tab[4])) +1
- self.props.append({"ac" :ac,
- "identity" :ident,
- "subjectlength":dbl,
- 'matchlength' : matchlen})
-
-def run(seq,database,program='fasta35',opts=''):
- ssearchin,ssearchout,ssearcherr = os.popen3("%s %s %s" % (program,opts,database))
- print >>ssearchin,formatFasta(seq)
- ssearchin.close()
- result = SsearchParser(ssearchout)
-
- return seq,result
-
-def ssearchIterator(sequenceIterator,database,program='ssearch35',opts=''):
- for seq in sequenceIterator:
- yield run(seq,database,program,opts)
-
-
diff --git a/obitools/alignment/__init__.py b/obitools/alignment/__init__.py
deleted file mode 100644
index a89793a..0000000
--- a/obitools/alignment/__init__.py
+++ /dev/null
@@ -1,175 +0,0 @@
-from obitools import BioSequence
-from obitools import WrappedBioSequence
-from copy import deepcopy
-
-class GappedPositionException(Exception):
- pass
-
-class AlignedSequence(WrappedBioSequence):
-
- def __init__(self,reference,
- id=None,definition=None,**info):
- WrappedBioSequence.__init__(self,reference,id=None,definition=None,**info)
- self._length=len(reference)
- self._gaps=[[self._length,0]]
-
- def clone(self):
- seq = WrappedBioSequence.clone(self)
- seq._gaps=deepcopy(self._gaps)
- seq._length=reduce(lambda x,y:x+y, (z[0]+z[1] for z in self._gaps),0)
- return seq
-
- def setGaps(self, value):
- '''
- Set gap vector to an AlignedSequence.
-
- Gap vector describes the gap positions on a sequence.
- It is a gap of couple. The first couple member is the count
- of sequence letter, the second one is the gap length.
- @param value: a list of length 2 list describing gap positions
- @type value: list of couple
- '''
- assert isinstance(value, list),'Gap vector must be a list'
- assert reduce(lambda x,y: x and y,
- (isinstance(z, list) and len(z)==2 for z in value),
- True),"Value must be a list of length 2 list"
-
- lseq = reduce(lambda x,y:x+y, (z[0] for z in value),0)
- assert lseq==len(self.wrapped),"Gap vector incompatible with the sequence"
- self._gaps = value
- self._length=reduce(lambda x,y:x+y, (z[0]+z[1] for z in value),0)
-
- def getGaps(self):
- return tuple(self._gaps)
- gaps = property(getGaps, setGaps, None, "Gaps's Docstring")
-
- def _getIndice(self,pos):
- i=0
- cpos=0
- for s,g in self._gaps:
- cpos+=s
- if cpos>pos:
- return i,pos-cpos+s
- cpos+=g
- if cpos>pos:
- return i,-pos+cpos-g-1
- i+=1
- raise IndexError
-
- def getId(self):
- d = self._id or ("%s_ALN" % self.wrapped.id)
- return d
-
- def __len__(self):
- return self._length
-
- def getStr(self):
- return ''.join([x for x in self])
-
- def __iter__(self):
- def isymb():
- cpos=0
- for s,g in self._gaps:
- for x in xrange(s):
- yield self.wrapped[cpos+x]
- for x in xrange(g):
- yield '-'
- cpos+=s
- return isymb()
-
- def _posInWrapped(self,position):
- i,s=self._getIndice(position)
- if s<0:
- raise GappedPositionException
- value=self._gaps
- p=reduce(lambda x,y:x+y, (z[0] for z in value[:i]),0)+s
- return p
-
- def getSymbolAt(self,position):
- try:
- return self.wrapped.getSymbolAt(self.posInWrapped(position))
- except GappedPositionException:
- return '-'
-
- def insertGap(self,position,count=1):
- if position==self._length:
- idx=len(self._gaps)-1
- p=-1
- else:
- idx,p = self._getIndice(position)
-
- if p >= 0:
- self._gaps.insert(idx, [p,count])
- self._gaps[idx+1][0]-=p
- else:
- self._gaps[idx][1]+=count
- self._length=reduce(lambda x,y:x+y, (z[0]+z[1] for z in self._gaps),0)
-
-
- id = property(getId,BioSequence.setId, None, "Sequence Identifier")
-
-
-class Alignment(list):
-
- def _assertData(self,data):
- assert isinstance(data, BioSequence),'You must only add bioseq to an alignement'
- if hasattr(self, '_alignlen'):
- assert self._alignlen==len(data),'All aligned sequences must have the same length'
- else:
- self._alignlen=len(data)
- return data
-
- def clone(self):
- ali = Alignment(x.clone() for x in self)
- return ali
-
- def append(self,data):
- data = self._assertData(data)
- list.append(self,data)
-
- def __setitem__(self,index,data):
-
- data = self._assertData(data)
- list.__setitem__(self,index,data)
-
- def getSite(self,key):
- if isinstance(key,int):
- return [x[key] for x in self]
-
- def insertGap(self,position,count=1):
- for s in self:
- s.insertGap(position,count)
-
- def isFullGapSite(self,key):
- return reduce(lambda x,y: x and y,(z=='-' for z in self.getSite(key)),True)
-
- def isGappedSite(self,key):
- return '-' in self.getSite(key)
-
- def __str__(self):
- l = len(self[0])
- rep=""
- idmax = max(len(x.id) for x in self)+2
- template= "%%-%ds %%-60s" % idmax
- for p in xrange(0,l,60):
- for s in self:
- rep+= (template % (s.id,s[p:p+60])).strip() + '\n'
- rep+="\n"
- return rep
-
-def alignmentReader(file,sequenceIterator):
- seqs = sequenceIterator(file)
- alignement = Alignment()
- for seq in seqs:
- alignement.append(seq)
- return alignement
-
-
-
-
-
-def columnIterator(alignment):
- lali = len(alignment[0])
- for p in xrange(lali):
- c = [x[p] for x in alignment]
- yield c
\ No newline at end of file
diff --git a/obitools/alignment/ace.py b/obitools/alignment/ace.py
deleted file mode 100644
index 59cc8f6..0000000
--- a/obitools/alignment/ace.py
+++ /dev/null
@@ -1,47 +0,0 @@
-from obitools.format.genericparser import GenericParser
-from obitools.utils import universalOpen
-from obitools.fasta import parseFastaDescription
-from obitools import NucSequence
-
-
-import sys
-
-_contigIterator=GenericParser('^CO ')
-
-_contigIterator.addParseAction('AF', '\nAF +(\S+) +([UC]) +(-?[0-9]+)')
-_contigIterator.addParseAction('RD', '\nRD +(\S+) +([0-9]+) +([0-9]+) +([0-9]+) *\n([A-Za-z\n*]+?)\n\n')
-_contigIterator.addParseAction('DS', '\nDS +(.+)')
-_contigIterator.addParseAction('CO', '^CO (\S+)')
-
-def contigIterator(file):
- file = universalOpen(file)
- for entry in _contigIterator(file):
- contig=[]
- for rd,ds,af in map(None,entry['RD'],entry['DS'],entry['AF']):
- id = rd[0]
- shift = int(af[2])
- if shift < 0:
- print >> sys.stderr,"Sequence %s in contig %s has a negative paddng value %d : skipped" % (id,entry['CO'][0],shift)
- #continue
-
- definition,info = parseFastaDescription(ds)
- info['shift']=shift
- seq = rd[4].replace('\n','').replace('*','-').strip()
- contig.append(NucSequence(id,seq,definition,**info))
-
- maxlen = max(len(x)+x['shift'] for x in contig)
- minshift=min(x['shift'] for x in contig)
- rep = []
-
- for s in contig:
- info = s.getTags()
- info['shift']-=minshift-1
- head = '-' * (info['shift']-1)
-
- tail = (maxlen + minshift - len(s) - info['shift'] - 1)
- info['tail']=tail
- newseq = NucSequence(s.id,head + str(s)+ '-' * tail,s.definition,**info)
- rep.append(newseq)
-
- yield entry['CO'][0],rep
-
\ No newline at end of file
diff --git a/obitools/barcodecoverage/__init__.py b/obitools/barcodecoverage/__init__.py
deleted file mode 100644
index 09e542e..0000000
--- a/obitools/barcodecoverage/__init__.py
+++ /dev/null
@@ -1,7 +0,0 @@
-'''
-
-@author: merciece
-Creates the tree representing the coverage of 2 primers from an ecoPCR output file and an ecoPCR database.
-
-
-'''
\ No newline at end of file
diff --git a/obitools/barcodecoverage/calcBc.py b/obitools/barcodecoverage/calcBc.py
deleted file mode 100644
index 13b0401..0000000
--- a/obitools/barcodecoverage/calcBc.py
+++ /dev/null
@@ -1,62 +0,0 @@
-#!/usr/local/bin/python
-'''
-Created on 24 nov. 2011
-
-@author: merciece
-'''
-
-
-def main(amplifiedSeqs, seqsFromDB, keptRanks, errors, tax) :
- '''
- error threshold is set to 3
- '''
-
- listtaxabygroupinDB = {}
-
- for seq in seqsFromDB :
- taxid = seq['taxid']
- p = [a for a in tax.parentalTreeIterator(taxid)]
- for a in p :
- if a != p[0] :
- if a[1] in keptRanks :
- group = a[0]
- if group in listtaxabygroupinDB and taxid not in listtaxabygroupinDB[group] :
- listtaxabygroupinDB[group].add(taxid)
- elif group not in listtaxabygroupinDB :
- listtaxabygroupinDB[group]=set([taxid])
-
- taxabygroup = dict((x,len(listtaxabygroupinDB[x])) for x in listtaxabygroupinDB)
-
- listamplifiedtaxabygroup = {}
-
- for seq in amplifiedSeqs :
- if errors[seq.id][2] <= 3 :
- taxid = seq['taxid']
- p = [a for a in tax.parentalTreeIterator(taxid)]
- for a in p :
- if a != p[0] :
- if a[1] in keptRanks :
- group = a[0]
- if group in listamplifiedtaxabygroup and taxid not in listamplifiedtaxabygroup[group] :
- listamplifiedtaxabygroup[group].add(taxid)
- elif group not in listamplifiedtaxabygroup :
- listamplifiedtaxabygroup[group]=set([taxid])
-
- amplifiedtaxabygroup = dict((x,len(listamplifiedtaxabygroup[x])) for x in listamplifiedtaxabygroup)
-
- BcValues = {}
-
- groups = [g for g in taxabygroup.keys()]
-
- for g in groups :
- if g in amplifiedtaxabygroup :
- BcValues[g] = float(amplifiedtaxabygroup[g])/taxabygroup[g]*100
- BcValues[g] = round(BcValues[g], 2)
- else :
- BcValues[g] = 0.0
-
- return BcValues
-
-
-
-
diff --git a/obitools/barcodecoverage/calculateBc.py b/obitools/barcodecoverage/calculateBc.py
deleted file mode 100644
index c5edb8a..0000000
--- a/obitools/barcodecoverage/calculateBc.py
+++ /dev/null
@@ -1,72 +0,0 @@
-#!/usr/local/bin/python
-'''
-Created on 24 nov. 2011
-
-@author: merciece
-'''
-
-import sys
-
-
-def main(amplifiedSeqs, seqsFromDB, keptRanks, tax) :
-
- BcValues = {}
-
- #speciesid = tax.findRankByName('species')
- #subspeciesid = tax.findRankByName('subspecies')
-
- listtaxonbygroup = {}
-
- for seq in seqsFromDB :
- taxid = seq['taxid']
- p = [a for a in tax.parentalTreeIterator(taxid)]
- for a in p :
- if a != p[0] :
- if a[1] in keptRanks :
- group = a
- if group in listtaxonbygroup:
- listtaxonbygroup[group].add(taxid)
- else:
- listtaxonbygroup[group]=set([taxid])
-
- #stats = dict((x,len(listtaxonbygroup[x])) for x in listtaxonbygroup)
-
- print>>sys.stderr, listtaxonbygroup
-
- listtaxonbygroup = {}
-
- for seq in amplifiedSeqs :
- taxid = seq['taxid']
- p = [a for a in tax.parentalTreeIterator(taxid)]
- for a in p :
- if a != p[0] :
- if a[1] in keptRanks :
- group = a
- if group in listtaxonbygroup:
- listtaxonbygroup[group].add(taxid)
- else:
- listtaxonbygroup[group]=set([taxid])
-
- print>>sys.stderr, listtaxonbygroup
-
- return BcValues
-
-# dbstats= dict((x,len(listtaxonbygroup[x])) for x in listtaxonbygroup)
-#
-# ranks = [r for r in keptRanks]
-# ranks.sort()
-#
-# print '%-20s\t%10s\t%10s\t%7s' % ('rank','ecopcr','db','percent')
-#
-# print>>sys.stderr, stats
-# print>>sys.stderr, dbstats
-# print>>sys.stderr, ranks
-#
-# for r in ranks:
-# if r in dbstats and dbstats[r]:
-# print '%-20s\t%10d\t%10d\t%8.2f' % (r,dbstats[r],stats[r],float(dbstats[r])/stats[r]*100)
-
-
-
-
-
diff --git a/obitools/barcodecoverage/drawBcTree.py b/obitools/barcodecoverage/drawBcTree.py
deleted file mode 100644
index 9b1e215..0000000
--- a/obitools/barcodecoverage/drawBcTree.py
+++ /dev/null
@@ -1,108 +0,0 @@
-#!/usr/local/bin/python
-'''
-Created on 25 nov. 2011
-
-@author: merciece
-'''
-
-from obitools.graph.rootedtree import nexusFormat
-
-
-figtree="""\
-begin figtree;
- set appearance.backgroundColorAttribute="User Selection";
- set appearance.backgroundColour=#-1;
- set appearance.branchColorAttribute="bc";
- set appearance.branchLineWidth=2.0;
- set appearance.foregroundColour=#-16777216;
- set appearance.selectionColour=#-2144520576;
- set branchLabels.colorAttribute="User Selection";
- set branchLabels.displayAttribute="errors";
- set branchLabels.fontName="sansserif";
- set branchLabels.fontSize=10;
- set branchLabels.fontStyle=0;
- set branchLabels.isShown=true;
- set branchLabels.significantDigits=4;
- set layout.expansion=2000;
- set layout.layoutType="RECTILINEAR";
- set layout.zoom=0;
- set nodeBars.barWidth=4.0;
- set nodeLabels.colorAttribute="User Selection";
- set nodeLabels.displayAttribute="label";
- set nodeLabels.fontName="sansserif";
- set nodeLabels.fontSize=10;
- set nodeLabels.fontStyle=0;
- set nodeLabels.isShown=true;
- set nodeLabels.significantDigits=4;
- set polarLayout.alignTipLabels=false;
- set polarLayout.angularRange=0;
- set polarLayout.rootAngle=0;
- set polarLayout.rootLength=100;
- set polarLayout.showRoot=true;
- set radialLayout.spread=0.0;
- set rectilinearLayout.alignTipLabels=false;
- set rectilinearLayout.curvature=0;
- set rectilinearLayout.rootLength=100;
- set scale.offsetAge=0.0;
- set scale.rootAge=1.0;
- set scale.scaleFactor=1.0;
- set scale.scaleRoot=false;
- set scaleAxis.automaticScale=true;
- set scaleAxis.fontSize=8.0;
- set scaleAxis.isShown=false;
- set scaleAxis.lineWidth=2.0;
- set scaleAxis.majorTicks=1.0;
- set scaleAxis.origin=0.0;
- set scaleAxis.reverseAxis=false;
- set scaleAxis.showGrid=true;
- set scaleAxis.significantDigits=4;
- set scaleBar.automaticScale=true;
- set scaleBar.fontSize=10.0;
- set scaleBar.isShown=true;
- set scaleBar.lineWidth=1.0;
- set scaleBar.scaleRange=0.0;
- set scaleBar.significantDigits=4;
- set tipLabels.colorAttribute="User Selection";
- set tipLabels.displayAttribute="Names";
- set tipLabels.fontName="sansserif";
- set tipLabels.fontSize=10;
- set tipLabels.fontStyle=0;
- set tipLabels.isShown=true;
- set tipLabels.significantDigits=4;
- set trees.order=false;
- set trees.orderType="increasing";
- set trees.rooting=false;
- set trees.rootingType="User Selection";
- set trees.transform=false;
- set trees.transformType="cladogram";
-end;
-"""
-
-
-def cartoonRankGenerator(rank):
- def cartoon(node):
- return 'rank' in node and node['rank']==rank
-
- return cartoon
-
-
-def collapseBcGenerator(Bclimit):
- def collapse(node):
- return 'bc' in node and node['bc']<=Bclimit
- return collapse
-
-
-def label(node):
- if 'bc' in node:
- return "(%+3.1f) %s" % (node['bc'],node['name'])
- else:
- return " %s" % node['name']
-
-
-def main(coverageTree) :
- print nexusFormat(coverageTree,
- label=label,
- blocks=figtree,
- cartoon=cartoonRankGenerator('family'))
- #collapse=collapseBcGenerator(70))
-
diff --git a/obitools/barcodecoverage/findErrors.py b/obitools/barcodecoverage/findErrors.py
deleted file mode 100644
index dae20a0..0000000
--- a/obitools/barcodecoverage/findErrors.py
+++ /dev/null
@@ -1,56 +0,0 @@
-#!/usr/local/bin/python
-'''
-Created on 24 nov. 2011
-
-@author: merciece
-'''
-
-
-def main(seqs, keptRanks, tax):
- errorsBySeq = getErrorsOnLeaves(seqs)
- errorsByTaxon = propagateErrors(errorsBySeq, keptRanks, tax)
- return errorsBySeq, errorsByTaxon
-
-
-def getErrorsOnLeaves(seqs) :
- errors = {}
- for s in seqs :
- taxid = s['taxid']
- forErrs = s['forward_error']
- revErrs = s['reverse_error']
- total = forErrs + revErrs
- seqNb = 1
- errors[s.id] = [forErrs,revErrs,total,seqNb,taxid]
- return errors
-
-
-def propagateErrors(errorsOnLeaves, keptRanks, tax) :
- allErrors = {}
- for seq in errorsOnLeaves :
- taxid = errorsOnLeaves[seq][4]
- p = [a for a in tax.parentalTreeIterator(taxid)]
- for a in p :
- if a[1] in keptRanks :
- group = a[0]
- if group in allErrors :
- allErrors[group][0] += errorsOnLeaves[seq][0]
- allErrors[group][1] += errorsOnLeaves[seq][1]
- allErrors[group][2] += errorsOnLeaves[seq][2]
- allErrors[group][3] += 1
- else :
- allErrors[group] = errorsOnLeaves[seq]
-
- for group in allErrors :
- allErrors[group][0] /= float(allErrors[group][3])
- allErrors[group][1] /= float(allErrors[group][3])
- allErrors[group][2] /= float(allErrors[group][3])
-
- allErrors[group][0] = round(allErrors[group][0], 2)
- allErrors[group][1] = round(allErrors[group][1], 2)
- allErrors[group][2] = round(allErrors[group][2], 2)
-
- return allErrors
-
-
-
-
diff --git a/obitools/barcodecoverage/readFiles.py b/obitools/barcodecoverage/readFiles.py
deleted file mode 100644
index b03e72a..0000000
--- a/obitools/barcodecoverage/readFiles.py
+++ /dev/null
@@ -1,69 +0,0 @@
-#!/usr/local/bin/python
-'''
-Created on 23 nov. 2011
-
-@author: merciece
-'''
-
-from obitools.ecopcr import sequence
-from obitools.ecopcr import taxonomy
-
-
-def main(entries,options):
- filteredDataFromDB = ecoPCRDatabaseReader(options)
- filteredData = ecoPCRFileReader(entries,filteredDataFromDB)
- return filteredDataFromDB,filteredData
-
-
-def ecoPCRDatabaseReader(options):
-
- tax = taxonomy.EcoTaxonomyDB(options.taxonomy)
- seqs = sequence.EcoPCRDBSequenceIterator(options.taxonomy,taxonomy=tax)
-
- norankid = tax.findRankByName('no rank')
- speciesid = tax.findRankByName('species')
- genusid = tax.findRankByName('genus')
- familyid = tax.findRankByName('family')
-
- minrankseq = set([speciesid,genusid,familyid])
-
- usedrankid = {}
-
- ingroup = {}
- outgroup= {}
-
- for s in seqs :
- if 'taxid' in s :
- taxid = s['taxid']
- allrank = set()
- for p in tax.parentalTreeIterator(taxid):
- if p[1]!=norankid:
- allrank.add(p[1])
- if len(minrankseq & allrank) == 3:
- for r in allrank:
- usedrankid[r]=usedrankid.get(r,0) + 1
-
- if tax.isAncestor(options.ingroup,taxid):
- ingroup[s.id] = s
- else:
- outgroup[s.id] = s
-
- keptranks = set(r for r in usedrankid
- if float(usedrankid[r])/float(len(ingroup)) > options.rankthresold)
-
- return { 'ingroup' : ingroup,
- 'outgroup': outgroup,
- 'ranks' : keptranks,
- 'taxonomy': tax
- }
-
-
-def ecoPCRFileReader(entries,filteredDataFromDB) :
- filteredData = []
- for s in entries :
- if 'taxid' in s :
- seqId = s.id
- if seqId in filteredDataFromDB['ingroup'] :
- filteredData.append(s)
- return filteredData
-
diff --git a/obitools/barcodecoverage/writeBcTree.py b/obitools/barcodecoverage/writeBcTree.py
deleted file mode 100644
index 7c8243e..0000000
--- a/obitools/barcodecoverage/writeBcTree.py
+++ /dev/null
@@ -1,42 +0,0 @@
-#!/usr/local/bin/python
-'''
-Created on 25 nov. 2011
-
-@author: merciece
-'''
-
-from obitools.graph.rootedtree import RootedTree
-
-
-def main(BcValues,errors,tax) :
-
- tree = RootedTree()
- tset = set(BcValues)
-
- for taxon in BcValues:
- if taxon in errors :
- forErr = errors[taxon][0]
- revErr = errors[taxon][1]
- totErr = errors[taxon][2]
- else :
- forErr = -1.0
- revErr = -1.0
- totErr = -1.0
-
- tree.addNode(taxon, rank=tax.getRank(taxon),
- name=tax.getScientificName(taxon),
- bc = BcValues[taxon],
- errors = str(forErr)+' '+str(revErr),
- totError = totErr
- )
-
- for taxon in BcValues:
- piter = tax.parentalTreeIterator(taxon)
- taxon = piter.next()
- for parent in piter:
- if taxon[0] in tset and parent[0] in BcValues:
- tset.remove(taxon[0])
- tree.addEdge(parent[0], taxon[0])
- taxon=parent
-
- return tree
diff --git a/obitools/blast/__init__.py b/obitools/blast/__init__.py
deleted file mode 100644
index 11b5274..0000000
--- a/obitools/blast/__init__.py
+++ /dev/null
@@ -1,207 +0,0 @@
-from os import popen2
-from itertools import imap,count
-
-from obitools.table import iTableIterator,TableRow,Table,SelectionIterator
-from obitools.utils import ColumnFile
-from obitools.location import SimpleLocation
-from obitools.fasta import formatFasta
-import sys
-
-class Blast(object):
- '''
- Run blast
- '''
-
- def __init__(self,mode,db,program='blastall',**options):
- self._mode = mode
- self._db = db
- self._program = program
- self._options = options
-
- def getMode(self):
- return self._mode
-
-
- def getDb(self):
- return self._db
-
-
- def getProgram(self):
- return self._program
-
- def _blastcmd(self):
- tmp = """%(program)s \\
- -p %(mode)s \\
- -d %(db)s \\
- -m 8 \\
- %(options)s \\
- """
- options = ' '.join(['-%s %s' % (x[0],str(x[1]))
- for x in self._options.iteritems()])
- data = {
- 'program' : self.program,
- 'db' : self.db,
- 'mode' : self.mode,
- 'options' : options
- }
-
- return tmp % data
-
- def __call__(self,sequence):
- '''
- Run blast with one sequence object
- @param sequence:
- @type sequence:
- '''
- cmd = self._blastcmd()
-
- (blast_in,blast_out) = popen2(cmd)
-
- print >>blast_in,formatFasta(sequence)
- blast_in.close()
-
- blast = BlastResultIterator(blast_out)
-
- return blast
-
- mode = property(getMode, None, None, "Mode's Docstring")
-
- db = property(getDb, None, None, "Db's Docstring")
-
- program = property(getProgram, None, None, "Program's Docstring")
-
-
-class NetBlast(Blast):
- '''
- Run blast on ncbi servers
- '''
-
- def __init__(self,mode,db,**options):
- '''
-
- @param mode:
- @param db:
- '''
- Blast.__init__(self, mode, db, 'blastcl3',**options)
-
-
-class BlastResultIterator(iTableIterator):
-
- def __init__(self,blastoutput,query=None):
- '''
-
- @param blastoutput:
- @type blastoutput:
- '''
- self._blast = ColumnFile(blastoutput,
- strip=True,
- skip="#",
- sep="\t",
- types=self.types
- )
- self._query = query
- self._hindex = dict((k,i) for i,k in imap(None,count(),self._getHeaders()))
-
- def _getHeaders(self):
- return ('Query id','Subject id',
- '% identity','alignment length',
- 'mismatches', 'gap openings',
- 'q. start', 'q. end',
- 's. start', 's. end',
- 'e-value', 'bit score')
-
- def _getTypes(self):
- return (str,str,
- float,int,
- int,int,
- int,int,
- int,int,
- float,float)
-
- def _getRowFactory(self):
- return BlastMatch
-
- def _getSubrowFactory(self):
- return TableRow
-
- def _getQuery(self):
- return self._query
-
-
- headers = property(_getHeaders,None,None)
- types = property(_getTypes,None,None)
- rowFactory = property(_getRowFactory,None,None)
- subrowFactory = property(_getSubrowFactory,None,None)
- query = property(_getQuery,None,None)
-
- def next(self):
- '''
-
- '''
- value = self._blast.next()
- return self.rowFactory(self,value)
-
-
-
-class BlastResult(Table):
- '''
- Results of a blast run
- '''
-
-class BlastMatch(TableRow):
- '''
- Blast high scoring pair between two sequences
- '''
-
- def getQueryLocation(self):
- l = SimpleLocation(self[6], self[7])
- return l
-
- def getSubjectLocation(self):
- l = SimpleLocation(self[8], self[9])
- return l
-
- def getSubjectSequence(self,database):
- return database[self[1]]
-
- def queryCov(self,query=None):
- '''
- Compute coverage of match on query sequence.
-
- @param query: the query sequence. Default is None.
- In this case the query sequence associated
- to this blast result is used.
- @type query: L{obitools.BioSequence}
-
- @return: coverage fraction
- @rtype: float
- '''
- if query is None:
- query = self.table.query
- assert query is not None
- return float(self[7]-self[6]+1)/float(len(query))
-
- def __getitem__(self,key):
- if key=='query coverage' and self.table.query is not None:
- return self.queryCov()
- else:
- return TableRow.__getitem__(self,key)
-
-class BlastCovMinFilter(SelectionIterator):
-
- def __init__(self,blastiterator,covmin,query=None,**conditions):
- if query is None:
- query = blastiterator.table.query
- assert query is not None
- SelectionIterator.__init__(self,blastiterator,**conditions)
- self._query = query
- self._covmin=covmin
-
- def _covMinPredicat(self,row):
- return row.queryCov(self._query)>=self._covmin
-
- def _checkCondition(self,row):
- return self._covMinPredicat(row) and SelectionIterator._checkCondition(self, row)
-
-
-
\ No newline at end of file
diff --git a/obitools/carto/__init__.py b/obitools/carto/__init__.py
deleted file mode 100644
index b7ac176..0000000
--- a/obitools/carto/__init__.py
+++ /dev/null
@@ -1,376 +0,0 @@
-# -*- coding: latin1 -*-
-
-
-
-from obitools import SVGdraw
-import math
-
-class Map(object):
- """
- Map represente une instance d'une carte genetique physique.
- Une telle carte est definie par la longueur de la sequence
- qui lui est associe.
-
- A une carte est associe un certain nombre de niveaux (Level)
- eux meme decoupe en sous-niveau (SubLevel)
- Les sous niveaux contiennent eux des features
- """
- def __init__(self,name,seqlength,scale=1):
- """
- Constructeur d'une nouvelle carte
-
- *Param*:
-
- name
- nom de la carte
-
- seqlength
- longueur de la sequence associee a la carte
-
- scale
- echelle de la carte indicant combien de pixel
- correspondent a une unite de la carte
- """
- self.name = name
- self.seqlength = seqlength
- self.scale = scale
- self.levels = {}
- self.basicHSize = 10
-
- def __str__(self):
- return '<%s>' % self.name
-
- def __getitem__(self,level):
- """
- retourne le niveau *level* de la carte et
- le cree s'il n'existe pas
- """
- if not isinstance(level,int):
- raise TypeError('level must be an non Zero integer value')
- elif level==0:
- raise AssertionError('Level cannot be set to 0')
- try:
- return self.levels[level]
- except KeyError:
- self.levels[level] = Level(level,self)
- return self.levels[level]
-
- def getBasicHSize(self):
- """
- retourne la hauteur de base d'un element de cartographie
- exprimee en pixel
- """
- return self.basicHSize
-
- def getScale(self):
- """
- Retourne l'echelle de la carte en nombre de pixels par
- unite physique de la carte
- """
- return self.scale
-
-
-
- def getNegativeBase(self):
- return reduce(lambda x,y:x-y,[self.levels[z].getHeight()
- for z in self.levels
- if z < 0],self.getHeight())
-
- def getPositiveBase(self):
- return self.getNegativeBase() - 3 * self.getBasicHSize()
-
- def getHeight(self):
- return reduce(lambda x,y:x+y,[z.getHeight() for z in self.levels.values()],0) \
- + 4 * self.getBasicHSize()
-
- def toXML(self,file=None,begin=0,end=None):
- dessin = SVGdraw.drawing()
- if end==None:
- end = self.seqlength
- hauteur= self.getHeight()
- largeur=(end-begin+1)*self.scale
- svg = SVGdraw.svg((begin*self.scale,0,largeur,hauteur),
- '%fpx' % (self.seqlength * self.scale),
- '%dpx' % hauteur)
-
- centre = self.getPositiveBase() + (1 + 1/4) * self.getBasicHSize()
- svg.addElement(SVGdraw.rect(0,centre,self.seqlength * self.scale,self.getBasicHSize()/2))
- for e in self.levels.values():
- svg.addElement(e.getElement())
- dessin.setSVG(svg)
- return dessin.toXml(file)
-
-class Feature(object):
- pass
-
-class Level(object):
-
- def __init__(self,level,map):
- if not isinstance(map,Map):
- raise AssertionError('map is not an instance of class Map')
- if level in map.levels:
- raise AssertionError('Level %d already define for map %s' % (level,map))
- else:
- map.levels[level] = self
- self.map = map
- self.level = level
- self.sublevels = {}
-
- def __getitem__(self,sublevel):
- """
- retourne le niveau *sublevel* du niveau en
- le creant s'il n'existe pas
- """
- if not isinstance(sublevel,int):
- raise TypeError('sublevel must be a positive integer value')
- elif sublevel<0:
- raise AssertionError('Level cannot be negative')
- try:
- return self.sublevels[sublevel]
- except KeyError:
- self.sublevels[sublevel] = SubLevel(sublevel,self)
- return self.sublevels[sublevel]
-
- def getBase(self):
- if self.level < 0:
- base = self.map.getNegativeBase()
- base += reduce(lambda x,y:x+y,[self.map.levels[z].getHeight()
- for z in self.map.levels
- if z <0 and z >= self.level],0)
- return base
- else:
- base = self.map.getPositiveBase()
- base -= reduce(lambda x,y:x+y,[self.map.levels[z].getHeight()
- for z in self.map.levels
- if z >0 and z < self.level],0)
- return base
-
- def getElement(self):
- objet = SVGdraw.group('level%d' % self.level)
- for e in self.sublevels.values():
- objet.addElement(e.getElement())
- return objet
-
-
-
- def getHeight(self):
- return reduce(lambda x,y:x+y,[z.getHeight() for z in self.sublevels.values()],0) \
- + 2 * self.map.getBasicHSize()
-
-class SubLevel(object):
-
- def __init__(self,sublevel,level):
- if not isinstance(level,Level):
- raise AssertionError('level is not an instance of class Level')
- if level in level.sublevels:
- raise AssertionError('Sublevel %d already define for level %s' % (sublevel,level))
- else:
- level.sublevels[sublevel] = self
- self.level = level
- self.sublevel = sublevel
- self.features = {}
-
- def getHeight(self):
- return max([x.getHeight() for x in self.features.values()]+[0]) + 4 * self.level.map.getBasicHSize()
-
- def getBase(self):
- base = self.level.getBase()
- if self.level.level < 0:
- base -= self.level.getHeight() - 2 * self.level.map.getBasicHSize()
- base += reduce(lambda x,y:x+y,[self.level.sublevels[z].getHeight()
- for z in self.level.sublevels
- if z <= self.sublevel],0)
- base -= 2* self.level.map.getBasicHSize()
- else:
- base -= reduce(lambda x,y:x+y,[self.level.sublevels[z].getHeight()
- for z in self.level.sublevels
- if z < self.sublevel],0)
- base -= self.level.map.getBasicHSize()
- return base
-
- def getElement(self):
- base = self.getBase()
- objet = SVGdraw.group('sublevel%d' % self.sublevel)
- for e in self.features.values():
- objet.addElement(e.getElement(base))
- return objet
-
- def add(self,feature):
- if not isinstance(feature,Feature):
- raise TypeError('feature must be an instance oof Feature')
- if feature.name in self.features:
- raise AssertionError('A feature with the same name (%s) have already be insert in this sublevel'
- % feature.name)
- self.features[feature.name]=feature
- feature.sublevel=self
-
-class SimpleFeature(Feature):
-
- def __init__(self,name,begin,end,visiblename=False,color=0):
- self.begin = begin
- self.end = end
- self.name = name
- self.color = color
- self.sublevel = None
- self.visiblename=visiblename
-
- def getHeight(self):
- if not self.sublevel:
- raise AssertionError('Not affected Simple feature')
- if self.visiblename:
- return self.sublevel.level.map.getBasicHSize() * 2
- else:
- return self.sublevel.level.map.getBasicHSize()
-
- def getElement(self,base):
- scale = self.sublevel.level.map.getScale()
- y = base - self.sublevel.level.map.getBasicHSize()
- x = self.begin * scale
- width = (self.end - self.begin + 1) * scale
- heigh = self.sublevel.level.map.getBasicHSize()
-
- objet = SVGdraw.rect(x,y,width,heigh,stroke=self.color)
- objet.addElement(SVGdraw.description(self.name))
-
- return objet
-
-class BoxFeature(SimpleFeature):
-
- def getHeight(self):
- if not self.sublevel:
- raise AssertionError('Not affected Box feature')
- if self.visiblename:
- return self.sublevel.level.map.getBasicHSize() * 4
- else:
- return self.sublevel.level.map.getBasicHSize() * 3
-
- def getElement(self,base):
- scale = self.sublevel.level.map.getScale()
- y = base - self.sublevel.level.map.getBasicHSize() * 2
- x = self.begin * scale
- width = (self.end - self.begin + 1) * scale
- height = self.sublevel.level.map.getBasicHSize() * 3
-
- objet = SVGdraw.rect(x,y,width,height,stroke=self.color,fill="none")
- objet.addElement(SVGdraw.description(self.name))
-
- return objet
-
-class MultiPartFeature(Feature):
-
- def __init__(self,name,*args,**kargs):
- self.limits = args
- self.name = name
- try:
- self.color = kargs['color']
- except KeyError:
- self.color = "black"
-
- try:
- self.visiblename=kargs['visiblename']
- except KeyError:
- self.visiblename=None
-
- try:
- self.flatlink=kargs['flatlink']
- except KeyError:
- self.flatlink=False
-
- try:
- self.roundlink=kargs['roundlink']
- except KeyError:
- self.roundlink=False
-
- self.sublevel = None
-
-
- def getHeight(self):
- if not self.sublevel:
- raise AssertionError('Not affected Simple feature')
- if self.visiblename:
- return self.sublevel.level.map.getBasicHSize() * 3
- else:
- return self.sublevel.level.map.getBasicHSize() * 2
-
- def getElement(self,base):
- scale = self.sublevel.level.map.getScale()
-
- y = base - self.sublevel.level.map.getBasicHSize()
- height = self.sublevel.level.map.getBasicHSize()
- objet = SVGdraw.group(self.name)
- for (debut,fin) in self.limits:
- x = debut * scale
- width = (fin - debut + 1) * scale
- part = SVGdraw.rect(x,y,width,height,fill=self.color)
- objet.addElement(part)
-
- debut = self.limits[0][1]
- for (fin,next) in self.limits[1:]:
- debut*=scale
- fin*=scale
- path = SVGdraw.pathdata(debut,y + height / 2)
- delta = height / 2
- if self.roundlink:
- path.qbezier((debut+fin)/2, y - delta,fin,y + height / 2)
- else:
- if self.flatlink:
- delta = - height / 2
- path.line((debut+fin)/2, y - delta)
- path.line(fin,y + height / 2)
- path = SVGdraw.path(path,fill="none",stroke=self.color)
- objet.addElement(path)
- debut = next
-
- objet.addElement(SVGdraw.description(self.name))
-
- return objet
-
-class TagFeature(Feature):
-
- def __init__(self,name,begin,length,ratio,visiblename=False,color=0):
- self.begin = begin
- self.length = length
- self.ratio = ratio
- self.name = name
- self.color = color
- self.sublevel = None
- self.visiblename=visiblename
-
- def getHeight(self):
- if not self.sublevel:
- raise AssertionError('Not affected Tag feature')
-
- return self.sublevel.level.map.getBasicHSize()*11
-
- def getElement(self,base):
- scale = self.sublevel.level.map.getScale()
- height = math.floor(max(1,self.sublevel.level.map.getBasicHSize()* 10 * self.ratio))
- y = base + self.sublevel.level.map.getBasicHSize() - height
- x = self.begin * scale
- width = self.length * scale
- objet = SVGdraw.rect(x,y,width,height,stroke=self.color)
- objet.addElement(SVGdraw.description(self.name))
-
- return objet
-
-if __name__ == '__main__':
- carte = Map('essai',20000,scale=0.5)
- carte[-1][0].add(SimpleFeature('toto',100,300))
- carte[1][0].add(SimpleFeature('toto',100,300))
- carte[1][1].add(SimpleFeature('toto',200,1000))
-
- carte[1][0].add(MultiPartFeature('bout',(1400,1450),(1470,1550),(1650,1800),color='red',flatlink=True))
- carte[1][0].add(MultiPartFeature('titi',(400,450),(470,550),(650,800),color='red',flatlink=True))
- carte[-1][1].add(MultiPartFeature('titi',(400,450),(470,550),(650,800),color='green'))
- carte[-1][2].add(MultiPartFeature('titi',(400,450),(470,550),(650,800),color='purple',roundlink=True))
-
- carte[-1][1].add(BoxFeature('tutu',390,810,color='purple'))
- carte[1][0].add(BoxFeature('tutu',390,810,color='red'))
- carte[2][0].add(TagFeature('t1',1400,20,0.8))
- carte[2][0].add(TagFeature('t2',1600,20,0.2))
- carte.basicHSize=6
- print carte.toXML('truc.svg',begin=0,end=1000)
- print carte.toXML('truc2.svg',begin=460,end=2000)
-
-
-
diff --git a/obitools/decorator.py b/obitools/decorator.py
deleted file mode 100644
index e69de29..0000000
diff --git a/obitools/distances/__init__.py b/obitools/distances/__init__.py
deleted file mode 100644
index 1542fa9..0000000
--- a/obitools/distances/__init__.py
+++ /dev/null
@@ -1,29 +0,0 @@
-class DistanceMatrix(object):
-
- def __init__(self,alignment):
- '''
- DistanceMatrix constructor.
-
- @param alignment: aligment used to compute distance matrix
- @type alignment: obitools.align.Alignment
- '''
- self.aligment = alignment
- self.matrix = [[None] * (x+1) for x in xrange(len(alignment))]
-
- def evaluateDist(self,x,y):
- raise NotImplementedError
-
- def __getitem__(self,key):
- assert isinstance(key,(tuple,list)) and len(key)==2, \
- 'key must be a tuple or a list of two integers'
- x,y = key
- if y < x:
- z=x
- x=y
- y=z
- rep = self.matrix[y][x]
- if rep is None:
- rep = self.evaluateDist(x,y)
- self.matrix[y][x] = rep
-
- return rep
\ No newline at end of file
diff --git a/obitools/distances/observed.py b/obitools/distances/observed.py
deleted file mode 100644
index 8828d92..0000000
--- a/obitools/distances/observed.py
+++ /dev/null
@@ -1,77 +0,0 @@
-'''
-Module dedicated to compute observed divergeances from
-an alignment. No distance correction is applied at all
-'''
-
-from itertools import imap
-
-from obitools.distances import DistanceMatrix
-
-class PairewiseGapRemoval(DistanceMatrix):
- '''
- Observed divergeance matrix from an alignment.
- Gap are removed from the alignemt on a pairewise
- sequence base
- '''
-
- def evaluateDist(self,x,y):
- '''
- Compute the observed divergeance from two sequences
- of an aligment.
-
- @attention: For performance purpose this method should
- be directly used. use instead the __getitem__
- method from DistanceMatrix.
-
- @see: L{__getitem__}
-
- @param x: number of the fisrt sequence in the aligment
- @type x: int
- @param y: umber of the second sequence in the aligment
- @type y: int
-
-
- '''
-
- seq1 = self.aligment[x]
- seq2 = self.aligment[y]
-
- diff,tot = reduce(lambda x,y: (x[0]+y,x[1]+1),
- (z[0]!=z[1] for z in imap(None,seq1,seq2)
- if '-' not in z),(0,0))
- return float(diff)/tot
-
-
-class Pairewise(DistanceMatrix):
- '''
- Observed divergeance matrix from an alignment.
- Gap are kept from the alignemt
- '''
-
- def evaluateDist(self,x,y):
- '''
- Compute the observed divergeance from two sequences
- of an aligment.
-
- @attention: For performance purpose this method should
- be directly used. use instead the __getitem__
- method from DistanceMatrix.
-
- @see: L{__getitem__}
-
- @param x: number of the fisrt sequence in the aligment
- @type x: int
- @param y: umber of the second sequence in the aligment
- @type y: int
-
-
- '''
-
- seq1 = self.aligment[x]
- seq2 = self.aligment[y]
-
- diff,tot = reduce(lambda x,y: (x[0]+y,x[1]+1),
- (z[0]!=z[1] for z in imap(None,seq1,seq2)),
- (0,0))
- return float(diff)/tot
-
\ No newline at end of file
diff --git a/obitools/distances/phylip.py b/obitools/distances/phylip.py
deleted file mode 100644
index e2043fa..0000000
--- a/obitools/distances/phylip.py
+++ /dev/null
@@ -1,35 +0,0 @@
-import sys
-
-from itertools import imap,count
-
-def writePhylipMatrix(matrix):
- names = [x.id for x in matrix.aligment]
- pnames= [x[:10] for x in names]
- unicity={}
- redundent=[]
- for n in pnames:
- unicity[n]=unicity.get(n,0)+1
- redundent.append(unicity[n])
-
- for i,n,r in imap(None,count(),pnames,redundent):
- alternate = n
- if r > 1:
- while alternate in pnames:
- lcut = 9 - len(str(r))
- alternate = n[:lcut]+ '_%d' % r
- r+=1
- pnames[i]='%-10s' % alternate
-
- firstline = '%5d' % len(matrix.aligment)
- rep = [firstline]
- for i,n in imap(None,count(),pnames):
- line = [n]
- for j in xrange(i):
- line.append('%5.4f' % matrix[(j,i)])
- rep.append(' '.join(line))
- return '\n'.join(rep)
-
-
-
-
-
\ No newline at end of file
diff --git a/obitools/distances/r.py b/obitools/distances/r.py
deleted file mode 100644
index f674a4c..0000000
--- a/obitools/distances/r.py
+++ /dev/null
@@ -1,25 +0,0 @@
-import sys
-
-from itertools import imap,count
-
-def writeRMatrix(matrix):
- names = [x.id for x in matrix.aligment]
- lmax = max(max(len(x) for x in names),5)
- lali = len(matrix.aligment)
-
- nformat = '%%-%ds' % lmax
- dformat = '%%%d.4f' % lmax
-
- pnames=[nformat % x for x in names]
-
- rep = [' '.join(pnames)]
-
- for i in xrange(lali):
- line=[]
- for j in xrange(lali):
- line.append('%5.4f' % matrix[(j,i)])
- rep.append(' '.join(line))
- return '\n'.join(rep)
-
-
-
\ No newline at end of file
diff --git a/obitools/dnahash/__init__.py b/obitools/dnahash/__init__.py
deleted file mode 100644
index ca02e35..0000000
--- a/obitools/dnahash/__init__.py
+++ /dev/null
@@ -1,100 +0,0 @@
-_A=[0]
-_C=[1]
-_G=[2]
-_T=[3]
-_R= _A + _G
-_Y= _C + _T
-_M= _C + _A
-_K= _T + _G
-_W= _T + _A
-_S= _C + _G
-_B= _C + _G + _T
-_D= _A + _G + _T
-_H= _A + _C + _T
-_V= _A + _C + _G
-_N= _A + _C + _G + _T
-
-_dnahash={'a':_A,
- 'c':_C,
- 'g':_G,
- 't':_T,
- 'r':_R,
- 'y':_Y,
- 'm':_M,
- 'k':_K,
- 'w':_W,
- 's':_S,
- 'b':_B,
- 'd':_D,
- 'h':_H,
- 'v':_V,
- 'n':_N,
- }
-
-def hashCodeIterator(sequence,wsize,degeneratemax=0,offset=0):
- errors = 0
- emask = [0] * wsize
- epointer = 0
- size = 0
- position = offset
- hashs = set([0])
- hashmask = 0
- for i in xrange(wsize):
- hashmask <<= 2
- hashmask +=3
-
- for l in sequence:
- l = l.lower()
- hl = _dnahash[l]
-
- if emask[epointer]:
- errors-=1
- emask[epointer]=0
-
- if len(hl) > 1:
- errors +=1
- emask[epointer]=1
-
- epointer+=1
- epointer%=wsize
-
- if errors > degeneratemax:
- hl=set([hl[0]])
-
- hashs=set((((hc<<2) | cl) & hashmask)
- for hc in hashs
- for cl in hl)
-
- if size < wsize:
- size+=1
-
- if size==wsize:
- if errors <= degeneratemax:
- yield (position,hashs,errors)
- position+=1
-
-def hashSequence(sequence,wsize,degeneratemax=0,offset=0,hashs=None):
- if hashs is None:
- hashs=[[] for x in xrange(4**wsize)]
-
- for pos,keys,errors in hashCodeIterator(sequence, wsize, degeneratemax, offset):
- for k in keys:
- hashs[k].append(pos)
-
- return hashs
-
-def hashSequences(sequences,wsize,maxpos,degeneratemax=0):
- hashs=None
- offsets=[]
- offset=0
- for s in sequences:
- offsets.append(offset)
- hashSequence(s,wsize,degeneratemax=degeneratemax,offset=offset,hashs=hashs)
- offset+=len(s)
-
- return hashs,offsets
-
-
-
-
-
\ No newline at end of file
diff --git a/obitools/ecobarcode/__init__.py b/obitools/ecobarcode/__init__.py
deleted file mode 100644
index e69de29..0000000
diff --git a/obitools/ecobarcode/databases.py b/obitools/ecobarcode/databases.py
deleted file mode 100644
index 70d2319..0000000
--- a/obitools/ecobarcode/databases.py
+++ /dev/null
@@ -1,32 +0,0 @@
-'''
-Created on 25 sept. 2010
-
-@author: coissac
-'''
-from obitools import NucSequence
-
-def referenceDBIterator(options):
-
- cursor = options.ecobarcodedb.cursor()
-
- cursor.execute("select id from databases.database where name='%s'" % options.database)
- options.dbid = cursor.fetchone()[0]
-
- cursor.execute('''
- select s.accession,r.id,r.taxid,r.sequence
- from databases.database d,
- databases.reference r,
- databases.relatedsequences s
- where r.database = d.id
- and s.reference= r.id
- and s.mainac
- and d.name = '%s'
- ''' % options.database
- )
-
- for ac,id,taxid,sequence in cursor:
- s = NucSequence(ac,sequence)
- s['taxid']=taxid
- s['refdbid']=id
- yield s
-
\ No newline at end of file
diff --git a/obitools/ecobarcode/ecotag.py b/obitools/ecobarcode/ecotag.py
deleted file mode 100644
index 2ebd3fb..0000000
--- a/obitools/ecobarcode/ecotag.py
+++ /dev/null
@@ -1,50 +0,0 @@
-'''
-Created on 25 sept. 2010
-
-@author: coissac
-'''
-
-def alreadyIdentified(seqid,options):
- cursor = options.ecobarcodedb.cursor()
- cursor.execute('''
- select count(*)
- from ecotag.identification
- where sequence=%s
- and database=%s
- ''',(int(seqid),int(options.dbid)))
-
- return int(cursor.fetchone()[0]) > 0;
-
-def storeIdentification(seqid,
- idstatus,taxid,
- matches,
- options
- ):
-
- cursor = options.ecobarcodedb.cursor()
-
- if not options.updatedb:
- cursor.execute('''
- delete from ecotag.identification where sequence=%s and database=%s
- ''',(int(seqid),int(options.dbid)))
-
- cursor.execute('''
- insert into ecotag.identification (sequence,database,idstatus,taxid)
- values (%s,%s,%s,%s)
- returning id
- ''' , (int(seqid),int(options.dbid),idstatus,int(taxid)))
-
- idid = cursor.fetchone()[0]
-
- for seq,identity in matches.iteritems():
- cursor.execute('''
- insert into ecotag.evidence (identification,reference,identity)
- values (%s,
- %s,
- %s)
- ''',(idid,seq,identity))
-
-
- cursor.close()
-
- options.ecobarcodedb.commit()
diff --git a/obitools/ecobarcode/options.py b/obitools/ecobarcode/options.py
deleted file mode 100644
index 6086423..0000000
--- a/obitools/ecobarcode/options.py
+++ /dev/null
@@ -1,64 +0,0 @@
-'''
-Created on 23 sept. 2010
-
-@author: coissac
-'''
-import psycopg2
-
-from obitools.ecobarcode.taxonomy import EcoTaxonomyDB
-
-def addEcoBarcodeDBOption(optionManager):
- optionManager.add_option('--dbname',
- action="store", dest="ecobarcodedb",
- type='str',
- default=None,
- help="Specify the name of the ecobarcode database")
-
- optionManager.add_option('--server',
- action="store", dest="dbserver",
- type='str',
- default="localhost",
- help="Specify the adress of the ecobarcode database server")
-
- optionManager.add_option('--user',
- action="store", dest="dbuser",
- type='str',
- default='postgres',
- help="Specify the user of the ecobarcode database")
-
- optionManager.add_option('--port',
- action="store", dest="dbport",
- type='str',
- default=5432,
- help="Specify the port of the ecobarcode database")
-
- optionManager.add_option('--passwd',
- action="store", dest="dbpasswd",
- type='str',
- default='',
- help="Specify the passwd of the ecobarcode database")
-
- optionManager.add_option('--primer',
- action="store", dest="primer",
- type='str',
- default=None,
- help="Specify the primer used for amplification")
-
-
-def ecobarcodeDatabaseConnection(options):
- if options.ecobarcodedb is not None:
- connection = psycopg2.connect(database=options.ecobarcodedb,
- user=options.dbuser,
- password=options.dbpasswd,
- host=options.dbserver,
- port=options.dbport)
- options.dbname=options.ecobarcodedb
- else:
- connection=None
- if connection is not None:
- options.ecobarcodedb=connection
- taxonomy = EcoTaxonomyDB(connection)
- else:
- taxonomy=None
- return taxonomy
-
diff --git a/obitools/ecobarcode/rawdata.py b/obitools/ecobarcode/rawdata.py
deleted file mode 100644
index a5f58cf..0000000
--- a/obitools/ecobarcode/rawdata.py
+++ /dev/null
@@ -1,38 +0,0 @@
-'''
-Created on 25 sept. 2010
-
-@author: coissac
-'''
-
-from obitools import NucSequence
-from obitools.utils import progressBar
-from obitools.ecobarcode.ecotag import alreadyIdentified
-
-import sys
-
-def sequenceIterator(options):
- cursor = options.ecobarcodedb.cursor()
-
- cursor.execute('''
- select s.id,sum(o.count),s.sequence
- from rawdata.sequence s,
- rawdata.occurrences o
- where o.sequence= s.id
- and s.primers = '%s'
- group by s.id,s.sequence
- ''' % options.primer
- )
-
- nbseq = cursor.rowcount
- progressBar(1, nbseq, True, head=options.dbname)
- for id,count,sequence in cursor:
- progressBar(cursor.rownumber+1, nbseq, head=options.dbname)
- if not options.updatedb or not alreadyIdentified(id,options):
- s = NucSequence(id,sequence)
- s['count']=count
- print >>sys.stderr,' +', cursor.rownumber+1,
- yield s
- else:
- print >>sys.stderr,' @', cursor.rownumber+1,
-
- print >>sys.stderr
diff --git a/obitools/ecobarcode/taxonomy.py b/obitools/ecobarcode/taxonomy.py
deleted file mode 100644
index c7d0185..0000000
--- a/obitools/ecobarcode/taxonomy.py
+++ /dev/null
@@ -1,120 +0,0 @@
-'''
-Created on 24 sept. 2010
-
-@author: coissac
-'''
-
-from obitools.ecopcr.taxonomy import TaxonomyDump
-from obitools.ecopcr.taxonomy import Taxonomy
-import sys
-
-class EcoTaxonomyDB(TaxonomyDump) :
-
- def __init__(self,dbconnect):
- self._dbconnect=dbconnect
-
- print >> sys.stderr,"Reading ecobarcode taxonomy database..."
-
- self._readNodeTable()
- print >> sys.stderr," ok"
-
- print >>sys.stderr,"Adding scientific name..."
-
- self._name=[]
- for taxid,name,classname in self._nameIterator():
- self._name.append((name,classname,self._index[taxid]))
- if classname == 'scientific name':
- self._taxonomy[self._index[taxid]].append(name)
-
- print >>sys.stderr,"Adding taxid alias..."
- for taxid,current in self._mergedNodeIterator():
- self._index[taxid]=self._index[current]
-
- print >>sys.stderr,"Adding deleted taxid..."
- for taxid in self._deletedNodeIterator():
- self._index[taxid]=None
-
-
- Taxonomy.__init__(self)
-
- #####
- #
- # Iterator functions
- #
- #####
-
- def _readNodeTable(self):
-
- cursor = self._dbconnect.cursor()
-
- cursor.execute("""
- select taxid,rank,parent
- from ncbitaxonomy.nodes
- """)
-
- print >>sys.stderr,"Reading taxonomy nodes..."
- taxonomy=[list(n) for n in cursor]
-
- print >>sys.stderr,"List all taxonomy rank..."
- ranks =list(set(x[1] for x in taxonomy))
- ranks.sort()
- rankidx = dict(map(None,ranks,xrange(len(ranks))))
-
- print >>sys.stderr,"Sorting taxons..."
- taxonomy.sort(TaxonomyDump._taxonCmp)
-
- self._taxonomy=taxonomy
-
- print >>sys.stderr,"Indexing taxonomy..."
- index = {}
- for t in self._taxonomy:
- index[t[0]]=self._bsearchTaxon(t[0])
-
- print >>sys.stderr,"Indexing parent and rank..."
- for t in self._taxonomy:
- t[1]=rankidx[t[1]]
- t[2]=index[t[2]]
-
- self._ranks=ranks
- self._index=index
-
- cursor.close()
-
- def _nameIterator(self):
- cursor = self._dbconnect.cursor()
-
- cursor.execute("""
- select taxid,name,nameclass
- from ncbitaxonomy.names
- """)
-
- for taxid,name,nameclass in cursor:
- yield taxid,name,nameclass
-
- cursor.close()
-
- def _mergedNodeIterator(self):
- cursor = self._dbconnect.cursor()
-
- cursor.execute("""
- select oldtaxid,newtaxid
- from ncbitaxonomy.merged
- """)
-
- for oldtaxid,newtaxid in cursor:
- yield oldtaxid,newtaxid
-
- cursor.close()
-
- def _deletedNodeIterator(self):
- cursor = self._dbconnect.cursor()
-
- cursor.execute("""
- select taxid
- from ncbitaxonomy.delnodes
- """)
-
- for taxid in cursor:
- yield taxid[0]
-
- cursor.close()
diff --git a/obitools/ecopcr/__init__.py b/obitools/ecopcr/__init__.py
deleted file mode 100644
index 10a90e5..0000000
--- a/obitools/ecopcr/__init__.py
+++ /dev/null
@@ -1,69 +0,0 @@
-from obitools import utils
-from obitools import NucSequence
-from obitools.utils import universalOpen, universalTell, fileSize, progressBar
-import struct
-import sys
-
-
-class EcoPCRFile(utils.ColumnFile):
- def __init__(self,stream):
- utils.ColumnFile.__init__(self,
- stream, '|', True,
- (str,int,int,
- str,int,str,
- int,str,int,
- str,int,str,
- str,str,int,float,
- str,int,float,
- int,
- str,str), "#")
-
-
- def next(self):
- data = utils.ColumnFile.next(self)
- seq = NucSequence(data[0],data[20],data[21])
- seq['seq_length_ori']=data[1]
- seq['taxid']=data[2]
- seq['rank']=data[3]
- seq['species']=data[4]
- seq['species_sn']=data[5]
- seq['genus']=data[6]
- seq['genus_sn']=data[7]
- seq['family']=data[8]
- seq['family_sn']=data[9]
- seq['strand']=data[12]
- seq['forward_primer']=data[13]
- seq['forward_error']=data[14]
- seq['forward_tm']=data[15]
- seq['reverse_primer']=data[16]
- seq['reverse_error']=data[17]
- seq['reverse_tm']=data[18]
-
- return seq
-
-
-
-class EcoPCRDBFile(object):
-
- def _ecoRecordIterator(self,file):
- file = universalOpen(file)
- (recordCount,) = struct.unpack('> I',file.read(4))
- self._recover=False
-
- if recordCount:
- for i in xrange(recordCount):
- (recordSize,)=struct.unpack('>I',file.read(4))
- record = file.read(recordSize)
- yield record
- else:
- print >> sys.stderr,"\n\n WARNING : EcoPCRDB readding set into recover data mode\n"
- self._recover=True
- ok=True
- while(ok):
- try:
- (recordSize,)=struct.unpack('>I',file.read(4))
- record = file.read(recordSize)
- yield record
- except:
- ok=False
-
\ No newline at end of file
diff --git a/obitools/ecopcr/annotation.py b/obitools/ecopcr/annotation.py
deleted file mode 100644
index 7c76fb2..0000000
--- a/obitools/ecopcr/annotation.py
+++ /dev/null
@@ -1,104 +0,0 @@
-import struct
-
-class EcoPCRDBAnnotationWriter(object):
- '''
- Class used to write Annotation description in EcoPCRDB format.
-
- EcoPCRDBAnnotationWriter is oftenly called through the EcoPCRDBSequenceWriter class
-
- @see: L{ecopcr.sequence.EcoPCRDBSequenceWriter}
- '''
-
- def __init__(self,dbname,id,fileidx=1,type=('CDS'),definition=None):
- '''
- class constructor
-
- @param dbname: name of ecoPCR database
- @type dbname: C{str}
- @param id: name of the qualifier used as feature id
- @type id: C{str}
- @param fileidx:
- @type fileidx: C{int}
- @param type:
- @type type: C{list} or C{tuple}
- @param definition:
- @type definition: C{str}
- '''
- self._type = type
- self._definition = definition
- self._id = id
- self._filename="%s_%03d.adx" % (dbname,fileidx)
- self._file = open(self._filename,'wb')
- self._sequenceIdx=0
-
-
- ftname ="%s.fdx" % (dbname)
- ft = open(ftname,'wb')
-
- self._fttypeidx=dict(map(None,type,xrange(len(type))))
-
- ft.write(struct.pack('> I',len(type)))
-
- for t in type:
- ft.write(self._ecoFtTypePacker(t))
-
- ft.close()
-
- self._annotationCount=0
- self._file.write(struct.pack('> I',self._annotationCount))
-
-
- def _ecoFtTypePacker(self,type):
- totalSize = len(type)
- packed = struct.pack('> I %ds' % totalSize,totalSize,type)
-
- assert len(packed) == totalSize+4, "error in feature type packing"
-
- return packed
-
- def _ecoAnnotationPacker(self,feature,seqidx):
- begin = feature.begin-1
- end = feature.end
- type = self._fttypeidx[feature.ftType]
- strand = feature.isDirect()
- id = feature[self._id][0]
- if self._definition in feature:
- definition = feature[self._definition][0]
- else:
- definition = ''
-
- assert strand is not None,"Only strand defined features can be stored"
-
- deflength = len(definition)
-
- totalSize = 4 + 4 + 4 + 4 + 4 + 20 + 4 + deflength
-
- packed = struct.pack('> I I I I I 20s I %ds' % (deflength),
- totalSize,
- seqidx,
- begin,
- end,
- type,
- int(strand),
- id,
- deflength,
- definition)
-
- assert len(packed) == totalSize+4, "error in annotation packing"
-
- return packed
-
-
- def put(self,sequence,seqidx=None):
- if seqidx is None:
- seqidx = self._sequenceIdx
- self._sequenceIdx+=1
- for feature in sequence.getFeatureTable():
- if feature.ftType in self._type:
- self._annotationCount+=1
- self._file.write(self._ecoAnnotationPacker(feature,seqidx))
-
- def __del__(self):
- self._file.seek(0,0)
- self._file.write(struct.pack('> I',self._annotationCount))
- self._file.close()
diff --git a/obitools/ecopcr/options.py b/obitools/ecopcr/options.py
deleted file mode 100644
index 03663cd..0000000
--- a/obitools/ecopcr/options.py
+++ /dev/null
@@ -1,129 +0,0 @@
-'''
-Created on 13 fevr. 2011
-
-@author: coissac
-'''
-
-from obitools.ecopcr.taxonomy import Taxonomy, EcoTaxonomyDB, TaxonomyDump, ecoTaxonomyWriter
-
-try:
- from obitools.ecobarcode.options import addEcoBarcodeDBOption,ecobarcodeDatabaseConnection
-except ImportError:
- def addEcoBarcodeDBOption(optionmanager):
- pass
- def ecobarcodeDatabaseConnection(options):
- return None
-
-def addTaxonomyDBOptions(optionManager):
- addEcoBarcodeDBOption(optionManager)
- optionManager.add_option('-d','--database',
- action="store", dest="taxonomy",
- metavar="",
- type="string",
- help="ecoPCR taxonomy Database "
- "name")
- optionManager.add_option('-t','--taxonomy-dump',
- action="store", dest="taxdump",
- metavar="",
- type="string",
- help="NCBI Taxonomy dump repository "
- "name")
-
-
-def addTaxonomyFilterOptions(optionManager):
- addTaxonomyDBOptions(optionManager)
- optionManager.add_option('--require-rank',
- action="append",
- dest='requiredRank',
- metavar="",
- type="string",
- default=[],
- help="select sequence with taxid tag containing "
- "a parent of rank ")
-
- optionManager.add_option('-r','--required',
- action="append",
- dest='required',
- metavar="",
- type="int",
- default=[],
- help="required taxid")
-
- optionManager.add_option('-i','--ignore',
- action="append",
- dest='ignored',
- metavar="",
- type="int",
- default=[],
- help="ignored taxid")
-
-def loadTaxonomyDatabase(options):
- if isinstance(options.taxonomy, Taxonomy):
- return options.taxonomy
- taxonomy = ecobarcodeDatabaseConnection(options)
- if (taxonomy is not None or
- options.taxonomy is not None or
- options.taxdump is not None):
- if options.taxdump is not None:
- taxonomy = TaxonomyDump(options.taxdump)
- if taxonomy is not None and isinstance(options.taxonomy, str):
- ecoTaxonomyWriter(options.taxonomy,taxonomy)
- options.ecodb=options.taxonomy
- if isinstance(options.taxonomy, Taxonomy):
- taxonomy = options.taxonomy
- if taxonomy is None and isinstance(options.taxonomy, str):
- taxonomy = EcoTaxonomyDB(options.taxonomy)
- options.ecodb=options.taxonomy
- options.taxonomy=taxonomy
- return options.taxonomy
-
-def taxonomyFilterGenerator(options):
- loadTaxonomyDatabase(options)
- if options.taxonomy is not None:
- taxonomy=options.taxonomy
- def taxonomyFilter(seq):
- def annotateAtRank(seq,rank):
- if 'taxid' in seq and seq['taxid'] is not None:
- rtaxid= taxonomy.getTaxonAtRank(seq['taxid'],rank)
- return rtaxid
- return None
- good = True
- if 'taxid' in seq:
- taxid = seq['taxid']
-# print taxid,
- if options.requiredRank:
- taxonatrank = reduce(lambda x,y: x and y,
- (annotateAtRank(seq,rank) is not None
- for rank in options.requiredRank),True)
- good = good and taxonatrank
-# print >>sys.stderr, " Has rank : ",good,
- if options.required:
- good = good and reduce(lambda x,y: x or y,
- (taxonomy.isAncestor(r,taxid) for r in options.required),
- False)
-# print " Required : ",good,
- if options.ignored:
- good = good and not reduce(lambda x,y: x or y,
- (taxonomy.isAncestor(r,taxid) for r in options.ignored),
- False)
-# print " Ignored : ",good,
-# print " Global : ",good
-
- return good
-
-
- else:
- def taxonomyFilter(seq):
- return True
-
- return taxonomyFilter
-
-def taxonomyFilterIteratorGenerator(options):
- taxonomyFilter = taxonomyFilterGenerator(options)
-
- def filterIterator(seqiterator):
- for seq in seqiterator:
- if taxonomyFilter(seq):
- yield seq
-
- return filterIterator
\ No newline at end of file
diff --git a/obitools/ecopcr/sequence.py b/obitools/ecopcr/sequence.py
deleted file mode 100644
index 1465e69..0000000
--- a/obitools/ecopcr/sequence.py
+++ /dev/null
@@ -1,133 +0,0 @@
-from obitools import NucSequence
-from obitools.ecopcr import EcoPCRDBFile
-from obitools.ecopcr.taxonomy import EcoTaxonomyDB, ecoTaxonomyWriter
-from obitools.ecopcr.annotation import EcoPCRDBAnnotationWriter
-from obitools.utils import universalOpen
-from glob import glob
-import struct
-import gzip
-import sys
-
-
-class EcoPCRDBSequenceIterator(EcoPCRDBFile):
- '''
- Build an iterator over the sequences include in a sequence database
- formated for ecoPCR
- '''
-
- def __init__(self,path,taxonomy=None):
- '''
- ecoPCR data iterator constructor
-
- @param path: path to the ecoPCR database including the database prefix name
- @type path: C{str}
- @param taxonomy: a taxonomy can be given to the reader to decode the taxonomic data
- associated to the sequences. If no Taxonomy is furnish, it will be read
- before the sequence database files using the same path.
- @type taxonomy: L{obitools.ecopcr.taxonomy.Taxonomy}
- '''
- self._path = path
-
- if taxonomy is not None:
- self._taxonomy=taxonomy
- else:
- self._taxonomy=EcoTaxonomyDB(path)
-
- self._seqfilesFiles = glob('%s_???.sdx' % self._path)
- self._seqfilesFiles.sort()
-
- def __ecoSequenceIterator(self,file):
- for record in self._ecoRecordIterator(file):
- lrecord = len(record)
- lnames = lrecord - (4*4+20)
- (taxid,seqid,deflength,seqlength,cptseqlength,string)=struct.unpack('> I 20s I I I %ds' % lnames, record)
- seqid=seqid.strip('\x00')
- de = string[:deflength]
- seq = gzip.zlib.decompress(string[deflength:])
- bioseq = NucSequence(seqid,seq,de,taxidx=taxid,taxid=self._taxonomy._taxonomy[taxid][0])
- yield bioseq
-
- def __iter__(self):
- for seqfile in self._seqfilesFiles:
- for seq in self.__ecoSequenceIterator(seqfile):
- yield seq
-
-class EcoPCRDBSequenceWriter(object):
-
- def __init__(self,dbname,fileidx=1,taxonomy=None,ftid=None,type=None,definition=None,append=False):
- self._taxonomy=taxonomy
- self._filename="%s_%03d.sdx" % (dbname,fileidx)
- if append:
- mode ='r+b'
- f = universalOpen(self._filename)
- (recordCount,) = struct.unpack('> I',f.read(4))
- self._sequenceCount=recordCount
- del f
- self._file = open(self._filename,mode)
- self._file.seek(0,0)
- self._file.write(struct.pack('> I',0))
- self._file.seek(0,2)
- else:
- self._sequenceCount=0
- mode = 'wb'
- self._file = open(self._filename,mode)
- self._file.write(struct.pack('> I',self._sequenceCount))
-
- if self._taxonomy is not None:
- print >> sys.stderr,"Writing the taxonomy file...",
- ecoTaxonomyWriter(dbname,self._taxonomy)
- print >> sys.stderr,"Ok"
-
- if type is not None:
- assert ftid is not None,"You must specify an id attribute for features"
- self._annotation = EcoPCRDBAnnotationWriter(dbname, ftid, fileidx, type, definition)
- else:
- self._annotation = None
-
- def _ecoSeqPacker(self,seq):
-
- compactseq = gzip.zlib.compress(str(seq).upper(),9)
- cptseqlength = len(compactseq)
- delength = len(seq.definition)
-
- totalSize = 4 + 20 + 4 + 4 + 4 + cptseqlength + delength
-
- if self._taxonomy is None or 'taxid' not in seq:
- taxon=-1
- else:
- taxon=self._taxonomy.findIndex(seq['taxid'])
-
- try:
- packed = struct.pack('> I i 20s I I I %ds %ds' % (delength,cptseqlength),
- totalSize,
- taxon,
- seq.id,
- delength,
- len(seq),
- cptseqlength,
- seq.definition,
- compactseq)
- except struct.error as e:
- print >>sys.stderr,"\n\n============\n\nError on sequence : %s\n\n" % seq.id
- raise e
-
- assert len(packed) == totalSize+4, "error in sequence packing"
-
- return packed
-
-
- def put(self,sequence):
- if self._taxonomy is not None:
- if 'taxid' not in sequence and hasattr(sequence, 'extractTaxon'):
- sequence.extractTaxon()
- self._file.write(self._ecoSeqPacker(sequence))
- if self._annotation is not None:
- self._annotation.put(sequence, self._sequenceCount)
- self._sequenceCount+=1
-
- def __del__(self):
- self._file.seek(0,0)
- self._file.write(struct.pack('> I',self._sequenceCount))
- self._file.close()
-
-
diff --git a/obitools/ecopcr/taxonomy.py b/obitools/ecopcr/taxonomy.py
deleted file mode 100644
index bb2ec4e..0000000
--- a/obitools/ecopcr/taxonomy.py
+++ /dev/null
@@ -1,630 +0,0 @@
-import struct
-import sys
-
-from itertools import count,imap
-
-from obitools.ecopcr import EcoPCRDBFile
-from obitools.utils import universalOpen
-from obitools.utils import ColumnFile
-
-class Taxonomy(object):
- def __init__(self):
- '''
- The taxonomy database constructor
-
- @param path: path to the ecoPCR database including the database prefix name
- @type path: C{str}
- '''
-
- self._ranks.append('obi')
-
- self._speciesidx = self._ranks.index('species')
- self._genusidx = self._ranks.index('genus')
- self._familyidx = self._ranks.index('family')
- self._orderidx = self._ranks.index('order')
- self._nameidx=dict((x[0],x[2]) for x in self._name)
- self._nameidx.update(dict((x[0],x[2]) for x in self._preferedName))
- self._preferedidx=dict((x[2],x[1]) for x in self._preferedName)
-
- self._bigestTaxid = max(x[0] for x in self._taxonomy)
-
-
- def findTaxonByIdx(self,idx):
- if idx is None:
- return None
- return self._taxonomy[idx]
-
- def findIndex(self,taxid):
- if taxid is None:
- return None
- return self._index[taxid]
-
- def findTaxonByTaxid(self,taxid):
- return self.findTaxonByIdx(self.findIndex(taxid))
-
- def findTaxonByName(self,name):
- return self._taxonomy[self._nameidx[name]]
-
- def findRankByName(self,rank):
- try:
- return self._ranks.index(rank)
- except ValueError:
- return None
-
- def __contains__(self,taxid):
- return self.findTaxonByTaxid(taxid) is not None
-
-
-
-
- #####
- #
- # PUBLIC METHODS
- #
- #####
-
-
- def subTreeIterator(self, taxid):
- "return subtree for given taxonomic id "
- idx = self.findTaxonByTaxid(taxid)
- yield self._taxonomy[idx]
- for t in self._taxonomy:
- if t[2] == idx:
- for subt in self.subTreeIterator(t[0]):
- yield subt
-
- def parentalTreeIterator(self, taxid):
- """
- return parental tree for given taxonomic id starting from
- first ancester to the root.
- """
- taxon=self.findTaxonByTaxid(taxid)
- if taxon is not None:
- while taxon[2]!= 0:
- yield taxon
- taxon = self._taxonomy[taxon[2]]
- yield self._taxonomy[0]
- else:
- raise StopIteration
-
- def isAncestor(self,parent,taxid):
- return parent in [x[0] for x in self.parentalTreeIterator(taxid)]
-
- def lastCommonTaxon(self,*taxids):
- if not taxids:
- return None
- if len(taxids)==1:
- return taxids[0]
-
- if len(taxids)==2:
- t1 = [x[0] for x in self.parentalTreeIterator(taxids[0])]
- t2 = [x[0] for x in self.parentalTreeIterator(taxids[1])]
- t1.reverse()
- t2.reverse()
-
- count = min(len(t1),len(t2))
- i=0
- while(i < count and t1[i]==t2[i]):
- i+=1
- i-=1
-
- return t1[i]
-
- ancetre = taxids[0]
- for taxon in taxids[1:]:
- ancetre = self.lastCommonTaxon(ancetre,taxon)
-
- return ancetre
-
- def betterCommonTaxon(self,error=1,*taxids):
- lca = self.lastCommonTaxon(*taxids)
- idx = self._index[lca]
- sublca = [t[0] for t in self._taxonomy if t[2]==idx]
- return sublca
-
-
- def getPreferedName(self,taxid):
- idx = self.findIndex(taxid)
- return self._preferedidx.get(idx,self._taxonomy[idx][3])
-
-
- def getScientificName(self,taxid):
- return self.findTaxonByTaxid(taxid)[3]
-
- def getRankId(self,taxid):
- return self.findTaxonByTaxid(taxid)[1]
-
- def getRank(self,taxid):
- return self._ranks[self.getRankId(taxid)]
-
- def getTaxonAtRank(self,taxid,rankid):
- if isinstance(rankid, str):
- rankid=self._ranks.index(rankid)
- try:
- return [x[0] for x in self.parentalTreeIterator(taxid)
- if x[1]==rankid][0]
- except IndexError:
- return None
-
- def getSpecies(self,taxid):
- return self.getTaxonAtRank(taxid, self._speciesidx)
-
- def getGenus(self,taxid):
- return self.getTaxonAtRank(taxid, self._genusidx)
-
- def getFamily(self,taxid):
- return self.getTaxonAtRank(taxid, self._familyidx)
-
- def getOrder(self,taxid):
- return self.getTaxonAtRank(taxid, self._orderidx)
-
- def rankIterator(self):
- for x in imap(None,self._ranks,xrange(len(self._ranks))):
- yield x
-
- def groupTaxa(self,taxa,groupname):
- t=[self.findTaxonByTaxid(x) for x in taxa]
- a=set(x[2] for x in t)
- assert len(a)==1,"All taxa must have the same parent"
- newtaxid=max([2999999]+[x[0] for x in self._taxonomy if x[0]>=3000000 and x[0]<4000000])+1
- newidx=len(self._taxonomy)
- if 'GROUP' not in self._ranks:
- self._ranks.append('GROUP')
- rankid=self._ranks.index('GROUP')
- self._taxonomy.append((newtaxid,rankid,a.pop(),groupname))
- for x in t:
- x[2]=newidx
-
- def addLocalTaxon(self,name,rank,parent,minimaltaxid=10000000):
- newtaxid = minimaltaxid if (self._bigestTaxid < minimaltaxid) else self._bigestTaxid+1
-
- rankid=self.findRankByName(rank)
- parentidx = self.findIndex(int(parent))
- tx = (newtaxid,rankid,parentidx,name,'local')
- self._taxonomy.append(tx)
- newidx=len(self._taxonomy)-1
- self._name.append((name,'scientific name',newidx))
- self._nameidx[name]=newidx
- self._index[newtaxid]=newidx
-
- self._bigestTaxid=newtaxid
-
- return newtaxid
-
- def removeLocalTaxon(self,taxid):
- raise NotImplemented
- txidx = self.findIndex(taxid)
- taxon = self.findTaxonByIdx(txidx)
-
- assert txidx >= self._localtaxon,"Only local taxon can be deleted"
-
- for t in self._taxonomy:
- if t[2] == txidx:
- self.removeLocalTaxon(t[0])
-
-
-
-
- return taxon
-
- def addPreferedName(self,taxid,name):
- idx = self.findIndex(taxid)
- self._preferedName.append(name,'obi',idx)
- self._preferedidx[idx]=name
- return taxid
-
-class EcoTaxonomyDB(Taxonomy,EcoPCRDBFile):
- '''
- A taxonomy database class
- '''
-
-
- def __init__(self,path):
- '''
- The taxonomy database constructor
-
- @param path: path to the ecoPCR database including the database prefix name
- @type path: C{str}
- '''
- self._path = path
- self._taxonFile = "%s.tdx" % self._path
- self._localTaxonFile = "%s.ldx" % self._path
- self._ranksFile = "%s.rdx" % self._path
- self._namesFile = "%s.ndx" % self._path
- self._preferedNamesFile = "%s.pdx" % self._path
- self._aliasFile = "%s.adx" % self._path
-
- print >> sys.stderr,"Reading binary taxonomy database...",
-
- self.__readNodeTable()
-
- print >> sys.stderr," ok"
-
- Taxonomy.__init__(self)
-
-
- #####
- #
- # Iterator functions
- #
- #####
-
- def __ecoNameIterator(self,file):
- for record in self._ecoRecordIterator(file):
- lrecord = len(record)
- lnames = lrecord - 16
- (isScientificName,namelength,classLength,indextaxid,names)=struct.unpack('> I I I I %ds' % lnames, record)
- name=names[:namelength]
- classname=names[namelength:]
- yield (name,classname,indextaxid)
-
-
- def __ecoTaxonomicIterator(self):
- for record in self._ecoRecordIterator(self._taxonFile):
- lrecord = len(record)
- lnames = lrecord - 16
- (taxid,rankid,parentidx,nameLength,name)=struct.unpack('> I I I I %ds' % lnames, record)
- yield (taxid,rankid,parentidx,name,'ncbi')
-
- try :
- lt=0
- for record in self._ecoRecordIterator(self._localTaxonFile):
- lrecord = len(record)
- lnames = lrecord - 16
- (taxid,rankid,parentidx,nameLength,name)=struct.unpack('> I I I I %ds' % lnames, record)
- lt+=1
- yield (taxid,rankid,parentidx,name,'local')
- print >> sys.stderr, " [INFO : Local taxon file found] : %d added taxa" % lt
- except:
- print >> sys.stderr, " [INFO : Local taxon file not found] "
-
- def __ecoRankIterator(self):
- for record in self._ecoRecordIterator(self._ranksFile):
- yield record
-
- def __ecoAliasIterator(self):
- for record in self._ecoRecordIterator(self._aliasFile):
- (taxid,index) = struct.unpack('> I i',record)
- yield taxid,index
-
- #####
- #
- # Indexes
- #
- #####
-
- def __ecoNameIndex(self):
- indexName = [x for x in self.__ecoNameIterator(self._namesFile)]
- return indexName
-
- def __ecoRankIndex(self):
- rank = [r for r in self.__ecoRankIterator()]
- return rank
-
- def __ecoTaxonomyIndex(self):
- taxonomy = []
-
- try :
- index = dict(self.__ecoAliasIterator())
- print >> sys.stderr, " [INFO : Taxon alias file found] "
- buildIndex=False
- except:
- print >> sys.stderr, " [INFO : Taxon alias file not found] "
- index={}
- i = 0;
- buildIndex=True
-
- localtaxon=0
- i=0
- for x in self.__ecoTaxonomicIterator():
- taxonomy.append(x)
- if x[4]=='ncbi':
- localtaxon+=1
-
- if buildIndex or x[4]!='ncbi':
- index[x[0]] = i
- i+=1
-
-
- print >> sys.stderr,"Taxonomical tree read",
- return taxonomy, index,localtaxon
-
- def __readNodeTable(self):
- self._taxonomy, self._index, self._localtaxon= self.__ecoTaxonomyIndex()
- self._ranks = self.__ecoRankIndex()
- self._name = self.__ecoNameIndex()
-
- # Add local taxon tame to the name index
- i=self._localtaxon
- for t in self._taxonomy[self._localtaxon:]:
- self._name.append((t[3],'scientific name',i))
- i+=1
-
- try :
- self._preferedName = [(x[0],'obi',x[2])
- for x in self.__ecoNameIterator(self._preferedNamesFile)]
- print >> sys.stderr, " [INFO : Prefered taxon name file found] : %d added taxa" % len(self._preferedName)
- except:
- print >> sys.stderr, " [INFO : Prefered taxon name file not found]"
- self._preferedName = []
-
-
-
-
-class TaxonomyDump(Taxonomy):
-
- def __init__(self,taxdir):
-
- self._path=taxdir
- self._readNodeTable('%s/nodes.dmp' % taxdir)
-
- print >>sys.stderr,"Adding scientific name..."
-
- self._name=[]
- for taxid,name,classname in self._nameIterator('%s/names.dmp' % taxdir):
- self._name.append((name,classname,self._index[taxid]))
- if classname == 'scientific name':
- self._taxonomy[self._index[taxid]].extend([name,'ncbi'])
-
- print >>sys.stderr,"Adding taxid alias..."
- for taxid,current in self._mergedNodeIterator('%s/merged.dmp' % taxdir):
- self._index[taxid]=self._index[current]
-
- print >>sys.stderr,"Adding deleted taxid..."
- for taxid in self._deletedNodeIterator('%s/delnodes.dmp' % taxdir):
- self._index[taxid]=None
-
- self._nameidx=dict((x[0],x[2]) for x in self._name)
-
-
- def _taxonCmp(t1,t2):
- if t1[0] < t2[0]:
- return -1
- elif t1[0] > t2[0]:
- return +1
- return 0
-
- _taxonCmp=staticmethod(_taxonCmp)
-
- def _bsearchTaxon(self,taxid):
- taxCount = len(self._taxonomy)
- begin = 0
- end = taxCount
- oldcheck=taxCount
- check = begin + end / 2
- while check != oldcheck and self._taxonomy[check][0]!=taxid :
- if self._taxonomy[check][0] < taxid:
- begin=check
- else:
- end=check
- oldcheck=check
- check = (begin + end) / 2
-
-
- if self._taxonomy[check][0]==taxid:
- return check
- else:
- return None
-
-
-
- def _readNodeTable(self,file):
-
- file = universalOpen(file)
-
- nodes = ColumnFile(file,
- sep='|',
- types=(int,int,str,
- str,str,bool,
- int,bool,int,
- bool,bool,bool,str))
- print >>sys.stderr,"Reading taxonomy dump file..."
- # (taxid,rank,parent)
- taxonomy=[[n[0],n[2],n[1]] for n in nodes]
- print >>sys.stderr,"List all taxonomy rank..."
- ranks =list(set(x[1] for x in taxonomy))
- ranks.sort()
- rankidx = dict(map(None,ranks,xrange(len(ranks))))
-
- print >>sys.stderr,"Sorting taxons..."
- taxonomy.sort(TaxonomyDump._taxonCmp)
-
- self._taxonomy=taxonomy
- self._localtaxon=len(taxonomy)
-
- print >>sys.stderr,"Indexing taxonomy..."
- index = {}
- for t in self._taxonomy:
- index[t[0]]=self._bsearchTaxon(t[0])
-
- print >>sys.stderr,"Indexing parent and rank..."
- for t in self._taxonomy:
- t[1]=rankidx[t[1]]
- t[2]=index[t[2]]
-
- self._ranks=ranks
- self._index=index
- self._preferedName = []
-
- def _nameIterator(self,file):
- file = universalOpen(file)
- names = ColumnFile(file,
- sep='|',
- types=(int,str,
- str,str))
- for taxid,name,unique,classname,white in names:
- yield taxid,name,classname
-
- def _mergedNodeIterator(self,file):
- file = universalOpen(file)
- merged = ColumnFile(file,
- sep='|',
- types=(int,int,str))
- for taxid,current,white in merged:
- yield taxid,current
-
- def _deletedNodeIterator(self,file):
- file = universalOpen(file)
- deleted = ColumnFile(file,
- sep='|',
- types=(int,str))
- for taxid,white in deleted:
- yield taxid
-
-#####
-#
-#
-# Binary writer
-#
-#
-#####
-
-def ecoTaxonomyWriter(prefix, taxonomy,onlyLocal=False):
-
- def ecoTaxPacker(tx):
-
- namelength = len(tx[3])
-
- totalSize = 4 + 4 + 4 + 4 + namelength
-
- packed = struct.pack('> I I I I I %ds' % namelength,
- totalSize,
- tx[0],
- tx[1],
- tx[2],
- namelength,
- tx[3])
-
- return packed
-
- def ecoRankPacker(rank):
-
- namelength = len(rank)
-
- packed = struct.pack('> I %ds' % namelength,
- namelength,
- rank)
-
- return packed
-
- def ecoAliasPacker(taxid,index):
-
- totalSize = 4 + 4
- try:
- packed = struct.pack('> I I i',
- totalSize,
- taxid,
- index)
- except struct.error,e:
- print >>sys.stderr,(totalSize,taxid,index)
- print >>sys.stderr,"Total size : %d taxid : %d index : %d" %(totalSize,taxid,index)
- raise e
-
- return packed
-
- def ecoNamePacker(name):
-
- namelength = len(name[0])
- classlength= len(name[1])
- totalSize = namelength + classlength + 4 + 4 + 4 + 4
-
- packed = struct.pack('> I I I I I %ds %ds' % (namelength,classlength),
- totalSize,
- int(name[1]=='scientific name'),
- namelength,
- classlength,
- name[2],
- name[0],
- name[1])
-
- return packed
-
-
- def ecoTaxWriter(file,taxonomy):
- output = open(file,'wb')
- nbtaxon = reduce(lambda x,y:x+y,(1 for t in taxonomy if t[4]=='ncbi'),0)
-
- output.write(struct.pack('> I',nbtaxon))
-
- for tx in taxonomy:
- if tx[4]=='ncbi':
- output.write(ecoTaxPacker(tx))
-
- output.close()
- return nbtaxon < len(taxonomy)
-
- def ecoLocalTaxWriter(file,taxonomy):
- nbtaxon = reduce(lambda x,y:x+y,(1 for t in taxonomy if t[4]!='ncbi'),0)
-
- if nbtaxon:
- output = open(file,'wb')
-
- output.write(struct.pack('> I',nbtaxon))
-
- for tx in taxonomy:
- if tx[4]!='ncbi':
- output.write(ecoTaxPacker(tx))
-
- output.close()
-
-
- def ecoRankWriter(file,ranks):
- output = open(file,'wb')
- output.write(struct.pack('> I',len(ranks)))
-
- for rank in ranks:
- output.write(ecoRankPacker(rank))
-
- output.close()
-
- def ecoAliasWriter(file,index):
- output = open(file,'wb')
- output.write(struct.pack('> I',len(index)))
-
- for taxid in index:
- i=index[taxid]
- if i is None:
- i=-1
- output.write(ecoAliasPacker(taxid, i))
-
- output.close()
-
- def nameCmp(n1,n2):
- name1=n1[0].upper()
- name2=n2[0].upper()
- if name1 < name2:
- return -1
- elif name1 > name2:
- return 1
- return 0
-
-
- def ecoNameWriter(file,names):
- output = open(file,'wb')
- output.write(struct.pack('> I',len(names)))
-
- names.sort(nameCmp)
-
- for name in names:
- output.write(ecoNamePacker(name))
-
- output.close()
-
- def ecoPreferedNameWriter(file,names):
- output = open(file,'wb')
- output.write(struct.pack('> I',len(names)))
- for name in names:
- output.write(ecoNamePacker(name))
-
- output.close()
-
- localtaxon=True
- if not onlyLocal:
- ecoRankWriter('%s.rdx' % prefix, taxonomy._ranks)
- localtaxon = ecoTaxWriter('%s.tdx' % prefix, taxonomy._taxonomy)
- ecoNameWriter('%s.ndx' % prefix, [x for x in taxonomy._name if x[2] < taxonomy._localtaxon])
- ecoAliasWriter('%s.adx' % prefix, taxonomy._index)
- if localtaxon:
- ecoLocalTaxWriter('%s.ldx' % prefix, taxonomy._taxonomy)
- if taxonomy._preferedName:
- ecoNameWriter('%s.pdx' % prefix, taxonomy._preferedName)
diff --git a/obitools/ecotag/__init__.py b/obitools/ecotag/__init__.py
deleted file mode 100644
index 26c94d3..0000000
--- a/obitools/ecotag/__init__.py
+++ /dev/null
@@ -1,2 +0,0 @@
-class EcoTagResult(dict):
- pass
\ No newline at end of file
diff --git a/obitools/ecotag/parser.py b/obitools/ecotag/parser.py
deleted file mode 100644
index f431e34..0000000
--- a/obitools/ecotag/parser.py
+++ /dev/null
@@ -1,150 +0,0 @@
-from itertools import imap
-from obitools import utils
-
-from obitools.ecotag import EcoTagResult
-
-class EcoTagFileIterator(utils.ColumnFile):
-
- @staticmethod
- def taxid(x):
- x = int(x)
- if x < 0:
- return None
- else:
- return x
-
- @staticmethod
- def scientificName(x):
- if x=='--':
- return None
- else:
- return x
-
- @staticmethod
- def value(x):
- if x=='--':
- return None
- else:
- return float(x)
-
- @staticmethod
- def count(x):
- if x=='--':
- return None
- else:
- return int(x)
-
-
- def __init__(self,stream):
- utils.ColumnFile.__init__(self,
- stream, '\t', True,
- (str,str,str,
- EcoTagFileIterator.value,
- EcoTagFileIterator.value,
- EcoTagFileIterator.value,
- EcoTagFileIterator.count,
- EcoTagFileIterator.count,
- EcoTagFileIterator.taxid,
- EcoTagFileIterator.scientificName,
- str,
- EcoTagFileIterator.taxid,
- EcoTagFileIterator.scientificName,
- EcoTagFileIterator.taxid,
- EcoTagFileIterator.scientificName,
- EcoTagFileIterator.taxid,
- EcoTagFileIterator.scientificName,
- str
- ))
- self._memory=None
-
- _colname = ['identification',
- 'seqid',
- 'best_match_ac',
- 'max_identity',
- 'min_identity',
- 'theorical_min_identity',
- 'count',
- 'match_count',
- 'taxid',
- 'scientific_name',
- 'rank',
- 'order_taxid',
- 'order_sn',
- 'family_taxid',
- 'family_sn',
- 'genus_taxid',
- 'genus_sn',
- 'species_taxid',
- 'species_sn',
- 'sequence']
-
- def next(self):
- if self._memory is not None:
- data=self._memory
- self._memory=None
- else:
- data = utils.ColumnFile.next(self)
- data = EcoTagResult(imap(None,EcoTagFileIterator._colname[:len(data)],data))
-
- if data['identification']=='ID':
- data.cd=[]
- try:
- nextone = utils.ColumnFile.next(self)
- nextone = EcoTagResult(imap(None,EcoTagFileIterator._colname[:len(nextone)],nextone))
- except StopIteration:
- nextone = None
- while nextone is not None and nextone['identification']=='CD':
- data.cd.append(nextone)
- try:
- nextone = utils.ColumnFile.next(self)
- nextone = EcoTagResult(imap(None,EcoTagFileIterator._colname[:len(nextone)],nextone))
- except StopIteration:
- nextone = None
- self._memory=nextone
-
- return data
-
-def ecoTagIdentifiedFilter(ecoTagIterator):
- for x in ecoTagIterator:
- if x['identification']=='ID':
- yield x
-
-
-class EcoTagAbstractIterator(utils.ColumnFile):
-
- _colname = ['scientific_name',
- 'taxid',
- 'rank',
- 'count',
- 'max_identity',
- 'min_identity']
-
-
- @staticmethod
- def taxid(x):
- x = int(x)
- if x < 0:
- return None
- else:
- return x
-
- def __init__(self,stream):
- utils.ColumnFile.__init__(self,
- stream, '\t', True,
- (str,
- EcoTagFileIterator.taxid,
- str,
- int,
- float,float,float))
-
- def next(self):
- data = utils.ColumnFile.next(self)
- data = dict(imap(None,EcoTagAbstractIterator._colname,data))
-
- return data
-
-def ecoTagAbstractFilter(ecoTagAbsIterator):
- for x in ecoTagAbsIterator:
- if x['taxid'] is not None:
- yield x
-
\ No newline at end of file
diff --git a/obitools/eutils/__init__.py b/obitools/eutils/__init__.py
deleted file mode 100644
index 1e7d3b2..0000000
--- a/obitools/eutils/__init__.py
+++ /dev/null
@@ -1,54 +0,0 @@
-import time
-from urllib2 import urlopen
-import shelve
-from threading import Lock
-import sys
-
-class EUtils(object):
- '''
-
- '''
-
- _last_request=0
- _interval=3
-
- def __init__(self):
- self._lock = Lock()
-
- def wait(self):
- now=time.time()
- delta = now - EUtils._last_request
- while delta < EUtils._interval:
- time.sleep(delta)
- now=time.time()
- delta = now - EUtils._last_request
-
- def _sendRequest(self,url):
- self.wait()
- EUtils._last_request=time.time()
- t = EUtils._last_request
- print >>sys.stderr,"Sending request to NCBI @ %f" % t
- data = urlopen(url).read()
- print >>sys.stderr,"Data red from NCBI @ %f (%f)" % (t,time.time()-t)
- return data
-
- def setInterval(self,seconde):
- EUtils._interval=seconde
-
-
-class EFetch(EUtils):
- '''
-
- '''
- def __init__(self,db,tool='OBITools',
- retmode='text',rettype="native",
- server='eutils.ncbi.nlm.nih.gov'):
- EUtils.__init__(self)
- self._url = "http://%s/entrez/eutils/efetch.fcgi?db=%s&tool=%s&retmode=%s&rettype=%s"
- self._url = self._url % (server,db,tool,retmode,rettype)
-
-
- def get(self,**args):
- key = "&".join(['%s=%s' % x for x in args.items()])
- return self._sendRequest(self._url +"&" + key)
-
diff --git a/obitools/fast.py b/obitools/fast.py
deleted file mode 100644
index 760f493..0000000
--- a/obitools/fast.py
+++ /dev/null
@@ -1,56 +0,0 @@
-"""
- implement fastn/fastp sililarity search algorithm for BioSequence.
-"""
-
-class Fast(object):
-
- def __init__(self,seq,kup=2):
- '''
- @param seq: sequence to hash
- @type seq: BioSequence
- @param kup: word size used for hashing process
- @type kup: int
- '''
- hash={}
- seq = str(seq)
- for word,pos in ((seq[i:i+kup].upper(),i) for i in xrange(len(seq)-kup)):
- if word in hash:
- hash[word].append(pos)
- else:
- hash[word]=[pos]
-
- self._kup = kup
- self._hash= hash
- self._seq = seq
-
- def __call__(self,seq):
- '''
- Align one sequence with the fast hash table.
-
- @param seq: the sequence to align
- @type seq: BioSequence
-
- @return: where smax is the
- score of the largest diagonal and pmax the
- associated shift
- @rtype: a int tuple (smax,pmax)
- '''
- histo={}
- seq = str(seq).upper()
- hash= self._hash
- kup = self._kup
-
- for word,pos in ((seq[i:i+kup],i) for i in xrange(len(seq)-kup)):
- matchedpos = hash.get(word,[])
- for p in matchedpos:
- delta = pos - p
- histo[delta]=histo.get(delta,0) + 1
- smax = max(histo.values())
- pmax = [x for x in histo if histo[x]==smax]
- return smax,pmax
-
- def __len__(self):
- return len(self._seq)
-
-
-
diff --git a/obitools/fasta/__init__.py b/obitools/fasta/__init__.py
deleted file mode 100644
index d5b90c5..0000000
--- a/obitools/fasta/__init__.py
+++ /dev/null
@@ -1,384 +0,0 @@
-"""
-fasta module provides functions to read and write sequences in fasta format.
-
-
-"""
-
-#from obitools.format.genericparser import fastGenericEntryIteratorGenerator
-from obitools.format.genericparser import genericEntryIteratorGenerator
-from obitools import bioSeqGenerator,BioSequence,AASequence,NucSequence
-from obitools import _default_raw_parser
-
-#from obitools.alignment import alignmentReader
-#from obitools.utils import universalOpen
-
-import re
-from obitools.ecopcr.options import loadTaxonomyDatabase
-from obitools.format import SequenceFileIterator
-
-#from _fasta import parseFastaDescription,fastaParser
-#from _fasta import _fastaJoinSeq
-#from _fasta import _parseFastaTag
-
-
-#fastaEntryIterator=fastGenericEntryIteratorGenerator(startEntry='>')
-fastaEntryIterator=genericEntryIteratorGenerator(startEntry='>')
-rawFastaEntryIterator=genericEntryIteratorGenerator(startEntry='\s*>')
-
-def _fastaJoinSeq(seqarray):
- return ''.join([x.strip() for x in seqarray])
-
-
-def parseFastaDescription(ds,tagparser):
-
- m = tagparser.search(' '+ds)
- if m is not None:
- info=m.group(0)
- definition = ds[m.end(0):].strip()
- else:
- info=None
- definition=ds
-
- return definition,info
-
-def fastaParser(seq,bioseqfactory,tagparser,rawparser,joinseq=_fastaJoinSeq):
- '''
- Parse a fasta record.
-
- @attention: internal purpose function
-
- @param seq: a sequence object containing all lines corresponding
- to one fasta sequence
- @type seq: C{list} or C{tuple} of C{str}
-
- @param bioseqfactory: a callable object return a BioSequence
- instance.
- @type bioseqfactory: a callable object
-
- @param tagparser: a compiled regular expression usable
- to identify key, value couples from
- title line.
- @type tagparser: regex instance
-
- @return: a C{BioSequence} instance
- '''
- seq = seq.split('\n')
- title = seq[0].strip()[1:].split(None,1)
- id=title[0]
- if len(title) == 2:
- definition,info=parseFastaDescription(title[1], tagparser)
- else:
- info= None
- definition=None
-
- seq=joinseq(seq[1:])
- return bioseqfactory(id, seq, definition,info,rawparser)
-
-
-def fastaNucParser(seq,tagparser=_default_raw_parser,joinseq=_fastaJoinSeq):
- return fastaParser(seq,NucSequence,tagparser=tagparser,joinseq=_fastaJoinSeq)
-
-def fastaAAParser(seq,tagparser=_default_raw_parser,joinseq=_fastaJoinSeq):
- return fastaParser(seq,AASequence,tagparser=tagparser,joinseq=_fastaJoinSeq)
-
-def fastaIterator(file,bioseqfactory=bioSeqGenerator,
- tagparser=_default_raw_parser,
- joinseq=_fastaJoinSeq):
- '''
- iterate through a fasta file sequence by sequence.
- Returned sequences by this iterator will be BioSequence
- instances
-
- @param file: a line iterator containing fasta data or a filename
- @type file: an iterable object or str
- @param bioseqfactory: a callable object return a BioSequence
- instance.
- @type bioseqfactory: a callable object
-
- @param tagparser: a compiled regular expression usable
- to identify key, value couples from
- title line.
- @type tagparser: regex instance
-
- @return: an iterator on C{BioSequence} instance
-
- @see: L{fastaNucIterator}
- @see: L{fastaAAIterator}
-
- >>> from obitools.format.sequence.fasta import fastaIterator
- >>> f = fastaIterator('monfichier')
- >>> s = f.next()
- >>> print s
- gctagctagcatgctagcatgcta
- >>>
- '''
- rawparser=tagparser
- allparser = tagparser % '[a-zA-Z][a-zA-Z0-9_]*'
- tagparser = re.compile('( *%s)+' % allparser)
-
- for entry in fastaEntryIterator(file):
- yield fastaParser(entry,bioseqfactory,tagparser,rawparser,joinseq)
-
-def rawFastaIterator(file,bioseqfactory=bioSeqGenerator,
- tagparser=_default_raw_parser,
- joinseq=_fastaJoinSeq):
-
- rawparser=tagparser
- allparser = tagparser % '[a-zA-Z][a-zA-Z0-9_]*'
- tagparser = re.compile('( *%s)+' % allparser)
-
- for entry in rawFastaEntryIterator(file):
- entry=entry.strip()
- yield fastaParser(entry,bioseqfactory,tagparser,rawparser,joinseq)
-
-def fastaNucIterator(file,tagparser=_default_raw_parser):
- '''
- iterate through a fasta file sequence by sequence.
- Returned sequences by this iterator will be NucSequence
- instances
-
- @param file: a line iterator containint fasta data
- @type file: an iterable object
-
- @param tagparser: a compiled regular expression usable
- to identify key, value couples from
- title line.
- @type tagparser: regex instance
-
- @return: an iterator on C{NucBioSequence} instance
- @rtype: a generator object
-
- @see: L{fastaIterator}
- @see: L{fastaAAIterator}
- '''
- return fastaIterator(file, NucSequence,tagparser)
-
-def fastaAAIterator(file,tagparser=_default_raw_parser):
- '''
- iterate through a fasta file sequence by sequence.
- Returned sequences by this iterator will be AASequence
- instances
-
- @param file: a line iterator containing fasta data
- @type file: an iterable object
-
- @param tagparser: a compiled regular expression usable
- to identify key, value couples from
- title line.
- @type tagparser: regex instance
-
- @return: an iterator on C{AABioSequence} instance
-
- @see: L{fastaIterator}
- @see: L{fastaNucIterator}
- '''
- return fastaIterator(file, AASequence,tagparser)
-
-def formatFasta(data,gbmode=False,upper=False,restrict=None):
- '''
- Convert a seqence or a set of sequences in a
- string following the fasta format
-
- @param data: sequence or a set of sequences
- @type data: BioSequence instance or an iterable object
- on BioSequence instances
-
- @param gbmode: if set to C{True} identifier part of the title
- line follows recommendation from nbci to allow
- sequence indexing with the blast formatdb command.
- @type gbmode: bool
-
- @param restrict: a set of key name that will be print in the formated
- output. If restrict is set to C{None} (default) then
- all keys are formated.
- @type restrict: any iterable value or None
-
- @return: a fasta formated string
- @rtype: str
- '''
- if isinstance(data, BioSequence):
- data = [data]
-
- if restrict is not None and not isinstance(restrict, set):
- restrict = set(restrict)
-
- rep = []
- for sequence in data:
- seq = str(sequence)
- if sequence.definition is None:
- definition=''
- else:
- definition=sequence.definition
- if upper:
- frgseq = '\n'.join([seq[x:x+60].upper() for x in xrange(0,len(seq),60)])
- else:
- frgseq = '\n'.join([seq[x:x+60] for x in xrange(0,len(seq),60)])
- info='; '.join(['%s=%s' % x
- for x in sequence.rawiteritems()
- if restrict is None or x[0] in restrict])
- if info:
- info=info+';'
- if sequence._rawinfo is not None and sequence._rawinfo:
- info+=" " + sequence._rawinfo.strip()
-
- id = sequence.id
- if gbmode:
- if 'gi' in sequence:
- id = "gi|%s|%s" % (sequence['gi'],id)
- else:
- id = "lcl|%s|" % (id)
- title='>%s %s %s' %(id,info,definition)
- rep.append("%s\n%s" % (title,frgseq))
- return '\n'.join(rep)
-
-def formatSAPFastaGenerator(options):
- loadTaxonomyDatabase(options)
-
- taxonomy=None
- if options.taxonomy is not None:
- taxonomy=options.taxonomy
-
- assert taxonomy is not None,"SAP formating require indication of a taxonomy database"
-
- ranks = ('superkingdom', 'kingdom', 'subkingdom', 'superphylum',
- 'phylum', 'subphylum', 'superclass', 'class', 'subclass',
- 'infraclass', 'superorder', 'order', 'suborder', 'infraorder',
- 'parvorder', 'superfamily', 'family', 'subfamily', 'supertribe', 'tribe',
- 'subtribe', 'supergenus', 'genus', 'subgenus', 'species group',
- 'species subgroup', 'species', 'subspecies')
-
- trank=set(taxonomy._ranks)
- ranks = [taxonomy._ranks.index(x) for x in ranks if x in trank]
-
- strict= options.strictsap
-
- def formatSAPFasta(data,gbmode=False,upper=False,restrict=None):
- '''
- Convert a seqence or a set of sequences in a
- string following the fasta format as recommended for the SAP
- software
-
- http://ib.berkeley.edu/labs/slatkin/munch/StatisticalAssignmentPackage.html
-
- @param data: sequence or a set of sequences
- @type data: BioSequence instance or an iterable object
- on BioSequence instances
-
- @param gbmode: if set to C{True} identifier part of the title
- line follows recommendation from nbci to allow
- sequence indexing with the blast formatdb command.
- @type gbmode: bool
-
- @param restrict: a set of key name that will be print in the formated
- output. If restrict is set to C{None} (default) then
- all keys are formated.
- @type restrict: any iterable value or None
-
- @return: a fasta formated string
- @rtype: str
- '''
- if isinstance(data, BioSequence):
- data = [data]
-
- if restrict is not None and not isinstance(restrict, set):
- restrict = set(restrict)
-
- rep = []
- for sequence in data:
- seq = str(sequence)
-
- if upper:
- frgseq = '\n'.join([seq[x:x+60].upper() for x in xrange(0,len(seq),60)])
- else:
- frgseq = '\n'.join([seq[x:x+60] for x in xrange(0,len(seq),60)])
-
- try:
- taxid = sequence["taxid"]
- except KeyError:
- if strict:
- raise AssertionError('All sequence must have a taxid')
- else:
- continue
-
- definition=' ;'
-
- for r in ranks:
- taxon = taxonomy.getTaxonAtRank(taxid,r)
- if taxon is not None:
- definition+=' %s: %s,' % (taxonomy._ranks[r],taxonomy.getPreferedName(taxon))
-
- definition='%s ; %s' % (definition[0:-1],taxonomy.getPreferedName(taxid))
-
- id = sequence.id
- if gbmode:
- if 'gi' in sequence:
- id = "gi|%s|%s" % (sequence['gi'],id)
- else:
- id = "lcl|%s|" % (id)
- title='>%s%s' %(id,definition)
- rep.append("%s\n%s" % (title,frgseq))
- return '\n'.join(rep)
-
- return formatSAPFasta
-
-class FastaIterator(SequenceFileIterator):
-
-
- entryIterator = genericEntryIteratorGenerator(startEntry='>')
- classmethod(entryIterator)
-
- def __init__(self,inputfile,bioseqfactory=bioSeqGenerator,
- tagparser=_default_raw_parser,
- joinseq=_fastaJoinSeq):
-
- SequenceFileIterator.__init__(self, inputfile, bioseqfactory)
-
- self.__file = FastaIterator.entryIterator(self._inputfile)
-
- self._tagparser = tagparser
- self._joinseq = joinseq
-
- def get_tagparser(self):
- return self.__tagparser
-
-
- def set_tagparser(self, value):
- self._rawparser = value
- allparser = value % '[a-zA-Z][a-zA-Z0-9_]*'
- self.__tagparser = re.compile('( *%s)+' % allparser)
-
- def _parseFastaDescription(self,ds):
-
- m = self._tagparser.search(' '+ds)
- if m is not None:
- info=m.group(0)
- definition = ds[m.end(0):].strip()
- else:
- info=None
- definition=ds
-
- return definition,info
-
-
- def _parser(self):
- '''
- Parse a fasta record.
-
- @attention: internal purpose function
-
- @return: a C{BioSequence} instance
- '''
- seq = self._seq.split('\n')
- title = seq[0].strip()[1:].split(None,1)
- id=title[0]
- if len(title) == 2:
- definition,info=self._parseFastaDescription(title[1])
- else:
- info= None
- definition=None
-
- seq=self._joinseq(seq[1:])
-
- return self._bioseqfactory(id, seq, definition,info,self._rawparser)
-
- _tagparser = property(get_tagparser, set_tagparser, None, "_tagparser's docstring")
diff --git a/obitools/fasta/_fasta.so b/obitools/fasta/_fasta.so
deleted file mode 100755
index de300ce..0000000
Binary files a/obitools/fasta/_fasta.so and /dev/null differ
diff --git a/obitools/fastq/__init__.py b/obitools/fastq/__init__.py
deleted file mode 100644
index 1cf3535..0000000
--- a/obitools/fastq/__init__.py
+++ /dev/null
@@ -1,190 +0,0 @@
-'''
-Created on 29 aout 2009
-
-@author: coissac
-'''
-
-from obitools import BioSequence
-from obitools import _default_raw_parser
-from obitools.format.genericparser import genericEntryIteratorGenerator
-from obitools import bioSeqGenerator,AASequence,NucSequence
-from obitools.fasta import parseFastaDescription
-from _fastq import fastqQualitySangerDecoder,fastqQualitySolexaDecoder
-from _fastq import qualityToSangerError,qualityToSolexaError
-from _fastq import errorToSangerFastQStr
-from _fastq import formatFastq
-from _fastq import fastqParserGenetator
-from obitools.utils import universalOpen
-
-import re
-
-fastqEntryIterator=genericEntryIteratorGenerator(startEntry='^@',endEntry="^\+",strip=True,join=False)
-
-#def fastqParserGenetator(fastqvariant='sanger',bioseqfactory=NucSequence,tagparser=_parseFastaTag):
-#
-# qualityDecoder,errorDecoder = {'sanger' : (fastqQualitySangerDecoder,qualityToSangerError),
-# 'solexa' : (fastqQualitySolexaDecoder,qualityToSolexaError),
-# 'illumina' : (fastqQualitySolexaDecoder,qualityToSangerError)}[fastqvariant]
-#
-# def fastqParser(seq):
-# '''
-# Parse a fasta record.
-#
-# @attention: internal purpose function
-#
-# @param seq: a sequence object containing all lines corresponding
-# to one fasta sequence
-# @type seq: C{list} or C{tuple} of C{str}
-#
-# @param bioseqfactory: a callable object return a BioSequence
-# instance.
-# @type bioseqfactory: a callable object
-#
-# @param tagparser: a compiled regular expression usable
-# to identify key, value couples from
-# title line.
-# @type tagparser: regex instance
-#
-# @return: a C{BioSequence} instance
-# '''
-#
-# title = seq[0][1:].split(None,1)
-# id=title[0]
-# if len(title) == 2:
-# definition,info=parseFastaDescription(title[1], tagparser)
-# else:
-# info= {}
-# definition=None
-#
-# quality=errorDecoder(qualityDecoder(seq[3]))
-#
-# seq=seq[1]
-#
-# seq = bioseqfactory(id, seq, definition,False,**info)
-# seq.quality = quality
-#
-# return seq
-#
-# return fastqParser
-
-
-def fastqIterator(file,fastqvariant='sanger',bioseqfactory=NucSequence,tagparser=_default_raw_parser):
- '''
- iterate through a fasta file sequence by sequence.
- Returned sequences by this iterator will be BioSequence
- instances
-
- @param file: a line iterator containing fasta data or a filename
- @type file: an iterable object or str
- @param bioseqfactory: a callable object return a BioSequence
- instance.
- @type bioseqfactory: a callable object
-
- @param tagparser: a compiled regular expression usable
- to identify key, value couples from
- title line.
- @type tagparser: regex instance
-
- @return: an iterator on C{BioSequence} instance
-
- @see: L{fastaNucIterator}
- @see: L{fastaAAIterator}
-
- '''
- fastqParser=fastqParserGenetator(fastqvariant, bioseqfactory, tagparser)
- file = universalOpen(file)
- for entry in fastqEntryIterator(file):
- title=entry[0]
- seq="".join(entry[1:-1])
- quality=''
- lenseq=len(seq)
- while (len(quality) < lenseq):
- quality+=file.next().strip()
-
- yield fastqParser([title,seq,'+',quality])
-
-def fastqSangerIterator(file,tagparser=_default_raw_parser):
- '''
- iterate through a fastq file sequence by sequence.
- Returned sequences by this iterator will be NucSequence
- instances
-
- @param file: a line iterator containint fasta data
- @type file: an iterable object
-
- @param tagparser: a compiled regular expression usable
- to identify key, value couples from
- title line.
- @type tagparser: regex instance
-
- @return: an iterator on C{NucBioSequence} instance
-
- @see: L{fastqIterator}
- @see: L{fastqAAIterator}
- '''
- return fastqIterator(file,'sanger',NucSequence,tagparser)
-
-def fastqSolexaIterator(file,tagparser=_default_raw_parser):
- '''
- iterate through a fastq file sequence by sequence.
- Returned sequences by this iterator will be NucSequence
- instances
-
- @param file: a line iterator containint fasta data
- @type file: an iterable object
-
- @param tagparser: a compiled regular expression usable
- to identify key, value couples from
- title line.
- @type tagparser: regex instance
-
- @return: an iterator on C{NucBioSequence} instance
-
- @see: L{fastqIterator}
- @see: L{fastqAAIterator}
- '''
- return fastqIterator(file,'solexa',NucSequence,tagparser)
-
-def fastqIlluminaIterator(file,tagparser=_default_raw_parser):
- '''
- iterate through a fastq file sequence by sequence.
- Returned sequences by this iterator will be NucSequence
- instances
-
- @param file: a line iterator containint fasta data
- @type file: an iterable object
-
- @param tagparser: a compiled regular expression usable
- to identify key, value couples from
- title line.
- @type tagparser: regex instance
-
- @return: an iterator on C{NucBioSequence} instance
-
- @see: L{fastqIterator}
- @see: L{fastqAAIterator}
- '''
- return fastqIterator(file,'illumina',NucSequence,tagparser)
-
-def fastqAAIterator(file,tagparser=_default_raw_parser):
- '''
- iterate through a fastq file sequence by sequence.
- Returned sequences by this iterator will be AASequence
- instances
-
- @param file: a line iterator containing fasta data
- @type file: an iterable object
-
- @param tagparser: a compiled regular expression usable
- to identify key, value couples from
- title line.
- @type tagparser: regex instance
-
- @return: an iterator on C{AABioSequence} instance
-
- @see: L{fastqIterator}
- @see: L{fastqNucIterator}
- '''
- return fastqIterator(file,'sanger',AASequence,tagparser)
-
-
diff --git a/obitools/fastq/_fastq.so b/obitools/fastq/_fastq.so
deleted file mode 100755
index 4e3b942..0000000
Binary files a/obitools/fastq/_fastq.so and /dev/null differ
diff --git a/obitools/fnaqual/__init__.py b/obitools/fnaqual/__init__.py
deleted file mode 100644
index 384eb96..0000000
--- a/obitools/fnaqual/__init__.py
+++ /dev/null
@@ -1,2 +0,0 @@
-
-fnaTag=' %s *= *([^\s]+)'
diff --git a/obitools/fnaqual/fasta.py b/obitools/fnaqual/fasta.py
deleted file mode 100644
index 102a13e..0000000
--- a/obitools/fnaqual/fasta.py
+++ /dev/null
@@ -1,8 +0,0 @@
-from obitools.fasta import fastaNucIterator
-from obitools.fnaqual import fnaTag
-
-def fnaFastaIterator(file):
-
- x = fastaNucIterator(file, fnaTag)
-
- return x
\ No newline at end of file
diff --git a/obitools/fnaqual/quality.py b/obitools/fnaqual/quality.py
deleted file mode 100644
index 092f610..0000000
--- a/obitools/fnaqual/quality.py
+++ /dev/null
@@ -1,137 +0,0 @@
-"""
-
-
-"""
-
-from obitools import _default_raw_parser
-from obitools.fasta import fastaIterator
-from obitools.fnaqual import fnaTag
-from obitools.location import Location
-
-import re
-
-
-class QualitySequence(list):
-
- def __init__(self,id,seq,definition=None,rawinfo=None,rawparser=_default_raw_parser,**info):
- '''
-
- @param id:
- @param seq:
- @param definition:
- '''
- list.__init__(self,seq)
- self._info = info
- self.definition=definition
- self.id=id
- self._rawinfo=' ' + rawinfo
- self._rawparser=rawparser
-
- def getDefinition(self):
- '''
- Sequence definition getter
-
- @return: the sequence definition
- @rtype: str
-
- '''
- return self._definition
-
- def setDefinition(self, value):
- self._definition = value
-
- def getId(self):
- return self._id
-
- def setId(self, value):
- self._id = value
-
- def getKey(self,key):
- if key not in self._info:
- p = re.compile(self._rawparser % key)
- m = p.search(self._rawinfo)
- if m is not None:
- v=m.group(1)
- self._rawinfo=' ' + self._rawinfo[0:m.start(0)]+self._rawinfo[m.end(0):]
- try:
- v = eval(v)
- except:
- pass
- self._info[key]=v
- else:
- raise KeyError,key
- else:
- v=self._info[key]
- return v
-
- def __getitem__(self,key):
- if isinstance(key,Location):
- return key.extractSequence(self)
- elif isinstance(key, str):
- return self._getKey(key)
- elif isinstance(key, int):
- return list.__getitem__(self,key)
- elif isinstance(key, slice):
- subseq=list.__getitem__(self,key)
- info = dict(self._info)
- if key.start is not None:
- start = key.start +1
- else:
- start = 1
- if key.stop is not None:
- stop = key.stop+1
- else:
- stop = len(self)
- if key.step is not None:
- step = key.step
- else:
- step = 1
-
- info['cut']='[%d,%d,%s]' % (start,stop,step)
- return QualitySequence(self.id, subseq, self.definition,self._rawinfo,self._rawparser,**info)
-
- raise TypeError,'key must be an integer, a str or a slice'
-
- def __setitem__(self,key,value):
- self._info[key]=value
-
- def __delitem__(self,key):
- if isinstance(key, str):
- del self._info[key]
- else:
- raise TypeError,key
-
- def __iter__(self):
- return list.__iter__(self)
-
- def __contains__(self,key):
- return key in self._info
-
- def getTags(self):
- return self._info
-
- def complement(self):
- '''
-
- '''
- cseq = self[::-1]
- rep = QualitySequence(self.id,cseq,self.definition,self._rawinfo,self._rawparser,**self._info)
- rep._info['complemented']=not rep._info.get('complemented',False)
- return rep
-
-
- definition = property(getDefinition, setDefinition, None, "Sequence Definition")
-
- id = property(getId, setId, None, 'Sequence identifier')
-
-
-def _qualityJoinSeq(seqarray):
- text = ' '.join([x.strip() for x in seqarray])
- return [int(x) for x in text.split()]
-
-def qualityIterator(file):
- for q in fastaIterator(file, QualitySequence, fnaTag, _qualityJoinSeq):
- yield q
-
-
-
\ No newline at end of file
diff --git a/obitools/format/__init__.py b/obitools/format/__init__.py
deleted file mode 100644
index a680505..0000000
--- a/obitools/format/__init__.py
+++ /dev/null
@@ -1,28 +0,0 @@
-from obitools import bioSeqGenerator
-from obitools.utils import universalOpen
-
-
-class SequenceFileIterator:
-
- def __init__(self,inputfile,bioseqfactory=bioSeqGenerator):
- self._inputfile = universalOpen(inputfile)
- self._bioseqfactory = bioseqfactory
-
- def get_inputfile(self):
- return self.__file
-
-
- def get_bioseqfactory(self):
- return self.__bioseqfactory
-
- def next(self):
- entry = self.inputfile.next()
- return self._parse(entry)
-
- def __iter__(self):
- return self
-
- _inputfile = property(get_inputfile, None, None, "_file's docstring")
- _bioseqfactory = property(get_bioseqfactory, None, None, "_bioseqfactory's docstring")
-
-
\ No newline at end of file
diff --git a/obitools/format/_format.so b/obitools/format/_format.so
deleted file mode 100755
index 92e460d..0000000
Binary files a/obitools/format/_format.so and /dev/null differ
diff --git a/obitools/format/genericparser/__init__.py b/obitools/format/genericparser/__init__.py
deleted file mode 100644
index fecc72f..0000000
--- a/obitools/format/genericparser/__init__.py
+++ /dev/null
@@ -1,217 +0,0 @@
-"""
-G{packagetree format}
-"""
-import re
-
-from obitools.utils import universalOpen
-
-def genericEntryIteratorGenerator(startEntry=None,endEntry=None,
- head=False,tail=False,
- strip=False,join=True):
- '''
- Transfome a text line iterator to an entry oriented iterator.
-
- This iterator converted is useful to implement first stage
- of flat file parsing.
-
- @param startEntry: a regular pattern matching the beginning of
- an entry
- @type startEntry: C{str} or None
- @param endEntry: a regular pattern matching the end of
- an entry
- @type endEntry: C{str} or None
- @param head: indicate if an header is present before
- the first entry (as in many original genbank
- files)
- @type head: C{bool}
- @param tail: indicate if some extra informations are present
- after the last entry.
- @type tail: C{bool}
-
- @return: an iterator on entries in text format
- @rtype: an iterator on C{str}
- '''
-
- def isBeginning(line):
- return startEntry is None or startEntry.match(line) is not None
-
- def isEnding(line):
- return ((endEntry is not None and endEntry.match(line) is not None) or
- (endEntry is None and startEntry is not None and startEntry.match(line) is not None))
-
- def transparentIteratorEntry(file):
- file = universalOpen(file)
- return file
-
- def genericEntryIterator(file):
- file = universalOpen(file)
- entry = []
- line = file.next()
- started = head or isBeginning(line)
-
- try:
- while 1:
- while not started:
- line = file.next()
- started = isBeginning(line)
-
- if endEntry is None:
- entry.append(line)
- line = file.next()
-
- while started:
- end = isEnding(line)
- if end:
- if endEntry is not None:
- entry.append(line)
- if join:
- e = ''.join(entry)
- if strip:
- e=e.strip()
- else:
- e=entry
- if strip:
- e=[x.strip() for x in e]
- entry=[]
- yield e
- started=False
- if endEntry is not None:
- line = file.next()
- else:
- entry.append(line)
- line = file.next()
-
- started = isBeginning(line)
-
- except StopIteration:
- if entry and (endEntry is None or tail):
- if join:
- e = ''.join(entry)
- if strip:
- e=e.strip()
- else:
- e=entry
- if strip:
- e=[x.strip() for x in e]
- yield e
-
-
-
- if startEntry is not None:
- startEntry = re.compile(startEntry)
- if endEntry is not None:
- endEntry = re.compile(endEntry)
-
- if startEntry is None and endEntry is None:
- return transparentIteratorEntry
-
- return genericEntryIterator
-
-
-class GenericParser(object):
-
- def __init__(self,
- startEntry=None,
- endEntry=None,
- head=False,
- tail=False,
- strip=False,
- **parseAction):
- """
- @param startEntry: a regular pattern matching the beginning of
- an entry
- @type startEntry: C{str} or None
- @param endEntry: a regular pattern matching the end of
- an entry
- @type endEntry: C{str} or None
- @param head: indicate if an header is present before
- the first entry (as in many original genbank
- files)
- @type head: C{bool}
- @param tail: indicate if some extra informations are present
- after the last entry.
- @type tail: C{bool}
-
- @param parseAction:
-
- """
- self.flatiterator= genericEntryIteratorGenerator(startEntry,
- endEntry,
- head,
- tail,
- strip)
-
- self.action={}
-
- for k in parseAction:
- self.addParseAction(k,*parseAction[k])
-
- def addParseAction(self,name,dataMatcher,dataCleaner=None,cleanSub=''):
- '''
- Add a parse action to the generic parser. A parse action
- allows to extract one information from an entry. A parse
- action is defined by a name and a method to extract this
- information from the full text entry.
-
- A parse action can be defined following two ways.
-
- - via regular expression patterns
-
- - via dedicated function.
-
- In the first case, you have to indicate at least the
- dataMatcher regular pattern. This pattern should match exactly
- the data part you want to retrieve. If cleanning of extra
- characters is needed. The second pattern dataCLeanner can be
- used to specifyed these characters.
-
- In the second case you must provide a callable object (function)
- that extract and clean data from the text entry. This function
- should return an array containing all data retrevied even if
- no data or only one data is retrevied.
-
- @summary: Add a parse action to the generic parser.
-
- @param name: name of the data extracted
- @type name: C{str}
- @param dataMatcher: a regular pattern matching the data
- or a callable object parsing the
- entry and returning a list of marched data
- @type dataMatcher: C{str} or C{SRE_Pattern} instance or a callable
- object
- @param dataCleaner: a regular pattern matching part of the data
- to suppress.
- @type dataCleaner: C{str} or C{SRE_Pattern} instance or C{None}
- @param cleanSub: string used to replace dataCleaner matches.
- Default is an empty string
- @type cleanSub: C{str}
-
- '''
- if callable(dataMatcher):
- self.action[name]=dataMatcher
- else :
- if isinstance(dataMatcher, str):
- dataMatcher=re.compile(dataMatcher)
- if isinstance(dataCleaner, str):
- dataCleaner=re.compile(dataCleaner)
- self.action[name]=self._buildREParser(dataMatcher,
- dataCleaner,
- cleanSub)
-
- def _buildREParser(self,dataMatcher,dataCleaner,cleanSub):
- def parser(data):
- x = dataMatcher.findall(data)
- if dataCleaner is not None:
- x = [dataCleaner.sub(cleanSub,y) for y in x]
- return x
- return parser
-
- def __call__(self,file):
- for e in self.flatiterator(file):
- pe = {'fullentry':e}
- for k in self.action:
- pe[k]=self.action[k](e)
- yield pe
-
-
-
\ No newline at end of file
diff --git a/obitools/format/ontology/__init__.py b/obitools/format/ontology/__init__.py
deleted file mode 100644
index e69de29..0000000
diff --git a/obitools/format/ontology/go_obo.py b/obitools/format/ontology/go_obo.py
deleted file mode 100644
index cd1d87e..0000000
--- a/obitools/format/ontology/go_obo.py
+++ /dev/null
@@ -1,274 +0,0 @@
-__docformat__ = 'restructuredtext'
-
-import re
-import string
-import textwrap
-
-
-from obitools.obo.go.parser import GOEntryIterator
-from obitools.obo.go.parser import GOTerm
-from obitools.obo.go.parser import GOEntry
-
-"""
-go_obo.py : gene_ontology_edit.obo file parser:
-----------------------------------------------------
-
-- OBOFile class: open a flat file and return an entry.
-
-"""
-class OBOFile(object):
- """
- Iterator over all entries of an OBO file
- """
-
- def __init__(self,_path):
- self.file = GOEntryIterator(_path)
-
- def __iter__(self):
- return self
-
- def next(self):
- fiche = self.file.next()
-
- if isinstance(fiche, GOTerm):
- self.isaterm=True
- return Term(fiche)
- elif isinstance(fiche, GOEntry):
- self.isaterm=False
- return Entry(fiche)
- else:
- self.isaterm=False
- return Header(fiche)
-
-
-############# tout le reste doit descendre a l'etage obitools/ogo/go/parser.py ##########
-
-# define an XRef into a go_obo.py script in the microbi pylib
-class Xref(object):
- """
- Class Xref
- Xref.db Xref database
- Xref.id Xref identifier
- """
-
- def __init__(self,description):
- data = description.split(':')
- self.db = data[0].strip()
- self.id = data[1].strip()
-
-# define a RelatedTerm into a go_obo.py script in the microbi pylib
-class RelatedTerm(object):
- """
- Class RelatedTerm
- RelatedTerm.relation RelatedTerm relation
- RelatedTerm.related_term RelatedTerm GO identifier
- RelatedTerm.comment all terms have 0 or 1 comment
- """
-
- def __init__(self,relation,value,comment):
- self.relation = relation
- self.related_term = value.strip('GO:')
- self.comment = comment
-
-
-# define into a go_obo.py script in the microbi pylib
-#class Term(object):
-# """
-# class representing an OBO term (entry).
-# """
-#
-# def __init__(self):
-# raise RuntimeError('biodb.go_obo is an abstract class')
-#
-# def __checkEntry__(self):
-# minimum=(hasattr(self,'goid') )
-# if not minimum:
-# raise AssertionError('Misconstructed GO Term instance %s' % [x for x in dir(self) if x[0]!='_'])
-
-class Term(object):
- """
- Class Term
- representing a GO term.
- """
-
- def __init__(self,data=None):
- """
- """
- self.data=data
- self.isaterm = True
-
- if data:
- self.__filtreGoid__()
- self.__filtreName__()
- self.__filtreComment__()
- self.__filtreSynonyms__()
- self.__filtreDef__()
- self.__filtreParents__()
- self.__filtreRelationships__()
- self.__filtreRelation__()
- self.__filtreObsolete__()
- self.__filtreAltIds__()
- self.__filtreXRefs__()
- self.__filtreSubsets__()
-
- # check if all required attributes were valued
- self.__checkEntry__()
-
-
- def __checkEntry__(self):
- minimum=(hasattr(self,'goid') )
- if not minimum:
- raise AssertionError('Misconstructed GO Term instance %s' % [x for x in dir(self) if x[0]!='_'])
-
-
- def __filtreGoid__(self):
- """
- Extract GO id.
- """
- self.goid = self.data.id.value.strip('GO:')
-
- def __filtreName__(self):
- """
- Extract GO name.
- """
- self.name = self.data.name.value
-
- def __filtreSynonyms__(self):
- """
- Extract GO synonym(s).
- """
- self.list_synonyms = {}
- if self.data.synonyms:
- for y in self.data.synonyms:
- self.list_synonyms[y.value] = y.scope
-
-
- def __filtreComment__(self):
- """
- manage None comments
- """
- if self.data.comment != None:
- self.comment = self.data.comment.value
- else:
- self.comment = ""
-
- def __filtreDef__(self):
- """
- Extract GO definition.
- """
- if self.data.definition != None:
- self.definition = self.data.definition.value
- else:
- self.definition = ""
-
- def __filtreParents__(self):
- """
- To make the is_a hierarchy
- """
- if self.data.is_a != None:
- self.is_a = set([isa.value.strip('GO:') for isa in self.data.is_a])
- else:
- self.is_a = set()
-
- def __filtreRelation__(self):
- """
- To make the part_of hierarchy
- """
- self.part_of = set()
- self.regulates = set()
- self.negatively_regulates = set()
- self.positively_regulates = set()
-
- if self.data.relationship != None:
- for rel in self.data.relationship:
- if rel.relationship == "part_of":
- self.part_of.add(rel.value.strip('GO:'))
- elif rel.relationship == "regulates":
- self.regulates.add(rel.value.strip('GO:'))
- elif rel.relationship == "negatively_regulates":
- self.negatively_regulates.add(rel.value.strip('GO:'))
- elif rel.relationship == "positively_regulates":
- self.positively_regulates.add(rel.value.strip('GO:'))
-
-
- def __filtreRelationships__(self):
- """
- Relation list with other GO Terms (is_a, part_of or some regulates relation)
- """
- self.related_term =[]
- if self.data.relationship != None:
- for x in self.data.relationship:
- self.related_term.append(RelatedTerm(x.relationship,x.value,x.__doc__))
- #self.related_term.append(RelatedTerm(x.relationship,x.value,x.comment))
- if self.data.is_a != None:
- for x in self.data.is_a:
- self.related_term.append(RelatedTerm('is_a',x.value,x.__doc__))
- #self.related_term.append(RelatedTerm('is_a',x.value,x.comment))
-
-
-
- def __filtreObsolete__(self):
- """
- for each obsolete terms corresponds a set of GO Identifiers
- so that this GO term is consider as others GO Terms
- """
- self.considers = set()
- self.replaces = set()
- self.is_obsolete = self.data.is_obsolete
- if self.data.is_obsolete:
- if self.data.consider:
- self.considers = set([considered.value.strip('GO:') for considered in self.data.consider])
- if self.data.replaced_by:
- self.replaces = set([replaced.value.strip('GO:') for replaced in self.data.replaced_by])
-
-
- def __filtreAltIds__(self):
- """
- alternate(s) id(s) for this term (= alias in the geneontology schema model!)
- """
- if self.data.alt_ids:
- self.alt_ids = set([x.value.strip('GO:') for x in self.data.alt_ids])
- else:
- self.alt_ids = set()
-
- def __filtreXRefs__(self):
- """
- cross references to other databases
- """
- self.xrefs = set()
- if self.data.xrefs:
- self.xrefs = set([Xref(x.value.reference) for x in self.data.xrefs])
-
-
- def __filtreSubsets__(self):
- """
- subset label to make smaller sets of GO Terms
- """
- self.subsets = set()
- if self.data.subsets:
- self.subsets = set([x.value for x in self.data.subsets])
-
-
-class Entry(object):
- """
- a Stanza entry, like [Typedef] for example
- """
- def __init__(self,data=None):
- self.data=data
- self.isaterm=False
- self.isanentry=True
-
-
-class Header(object):
- """
- class representing a GO header.
- """
-
- def __init__(self,data=None):
- """
- """
- self.data=data
- self.isaterm = False
-
-
-
diff --git a/obitools/format/options.py b/obitools/format/options.py
deleted file mode 100644
index c42a23f..0000000
--- a/obitools/format/options.py
+++ /dev/null
@@ -1,284 +0,0 @@
-'''
-Created on 13 oct. 2009
-
-@author: coissac
-'''
-
-from obitools.format.sequence.embl import emblIterator
-from obitools.format.sequence.genbank import genbankIterator
-from obitools.format.sequence.fnaqual import fnaFastaIterator
-from obitools.format.sequence.fasta import fastaAAIterator,fastaNucIterator,fastaIterator
-from obitools.format.sequence.fastq import fastqIlluminaIterator,fastqSolexaIterator
-from obitools.fastq import fastqSangerIterator
-from obitools.fnaqual.quality import qualityIterator
-from obitools.fasta import formatFasta, rawFastaIterator,\
- formatSAPFastaGenerator
-from obitools.fastq import formatFastq
-
-from obitools.ecopcr.sequence import EcoPCRDBSequenceWriter
-from obitools.ecopcr.options import loadTaxonomyDatabase
-
-#from obitools.format._format import printOutput
-
-from array import array
-from itertools import chain
-import sys
-
-import re
-from obitools.ecopcr import EcoPCRFile
-
-
-def addInputFormatOption(optionManager):
-# optionManager.add_option('--rank',
-# action="store_true", dest='addrank',
-# default=False,
-# help="add a rank attribute to the sequence "
-# "indicating the sequence position in the input data")
- optionManager.add_option('--genbank',
- action="store_const", dest="seqinformat",
- default=None,
- const='genbank',
- help="input file is in genbank format")
- optionManager.add_option('--embl',
- action="store_const", dest="seqinformat",
- default=None,
- const='embl',
- help="input file is in embl format")
-
- optionManager.add_option('--fasta',
- action="store_const", dest="seqinformat",
- default=None,
- const='fasta',
- help="input file is in fasta nucleic format (including obitools fasta extentions)")
-
- optionManager.add_option('--ecopcr',
- action="store_const", dest="seqinformat",
- default=None,
- const='ecopcr',
- help="input file is in fasta nucleic format (including obitools fasta extentions)")
-
- optionManager.add_option('--raw-fasta',
- action="store_const", dest="seqinformat",
- default=None,
- const='rawfasta',
- help="input file is in fasta format (but more tolerant to format variant)")
-
- optionManager.add_option('--fna',
- action="store_const", dest="seqinformat",
- default=None,
- const='fna',
- help="input file is in fasta nucleic format produced by 454 sequencer pipeline")
-
- optionManager.add_option('--qual',
- action="store", dest="withqualfile",
- type='str',
- default=None,
- help="Specify the name of a quality file produced by 454 sequencer pipeline")
-
- optionManager.add_option('--sanger',
- action="store_const", dest="seqinformat",
- default=None,
- const='sanger',
- help="input file is in sanger fastq nucleic format (standard fastq)")
-
- optionManager.add_option('--solexa',
- action="store_const", dest="seqinformat",
- default=None,
- const='solexa',
- help="input file is in fastq nucleic format produced by solexa sequencer")
-
- optionManager.add_option('--illumina',
- action="store_const", dest="seqinformat",
- default=None,
- const='illumina',
- help="input file is in fastq nucleic format produced by old solexa sequencer")
-
- optionManager.add_option('--nuc',
- action="store_const", dest="moltype",
- default=None,
- const='nuc',
- help="input file is nucleic sequences")
- optionManager.add_option('--prot',
- action="store_const", dest="moltype",
- default=None,
- const='pep',
- help="input file is protein sequences")
-
-
-def addOutputFormatOption(optionManager):
- optionManager.add_option('--fastq-output',
- action="store_const", dest="output",
- default=None,
- const=formatFastq,
- help="output sequences in sanger fastq format")
- optionManager.add_option('--fasta-output',
- action="store_const", dest="output",
- default=None,
- const=formatFasta,
- help="output sequences in obitools fasta format")
- optionManager.add_option('--sap-output',
- action="store_const", dest="output",
- default=None,
- const=formatSAPFastaGenerator,
- help="output sequences in sap fasta format")
- optionManager.add_option('--strict-sap',
- action='store_true',dest='strictsap',
- default=False,
- help="Print sequences in upper case (defualt is lower case)")
- optionManager.add_option('--ecopcr-output',
- action="store", dest="ecopcroutput",
- default=None,
- help="output sequences in obitools ecopcr format")
- optionManager.add_option('--uppercase',
- action='store_true',dest='uppercase',
- default=False,
- help="Print sequences in upper case (defualt is lower case)")
-
-
-
-def addInOutputOption(optionManager):
- addInputFormatOption(optionManager)
- addOutputFormatOption(optionManager)
-
-
-
-
-
-def autoEntriesIterator(options):
- options.outputFormater=formatFasta
- options.outputFormat="fasta"
-
- ecopcr_pattern = re.compile('^[^ ]+ +| +[0-9]+ +| + [0-9]+ + | +')
-
- def annotatedIterator(formatIterator):
- options.outputFormater=formatFasta
- options.outputFormat="fasta"
- def iterator(lineiterator):
- for s in formatIterator(lineiterator):
- s.extractTaxon()
- yield s
-
- return iterator
-
- def withQualIterator(qualityfile):
- options.outputFormater=formatFastq
- options.outputFormat="fastq"
- def iterator(lineiterator):
- for s in fnaFastaIterator(lineiterator):
- q = qualityfile.next()
- quality = array('d',(10.**(-x/10.) for x in q))
- s.quality=quality
- yield s
-
- return iterator
-
- def autoSequenceIterator(lineiterator):
- options.outputFormater=formatFasta
- options.outputFormat="fasta"
- first = lineiterator.next()
- if first[0]==">":
- if options.withqualfile is not None:
- qualfile=qualityIterator(options.withqualfile)
- reader=withQualIterator(qualfile)
- options.outputFormater=formatFastq
- options.outputFormat="fastq"
- elif options.moltype=='nuc':
- reader=fastaNucIterator
- elif options.moltype=='pep':
- reader=fastaAAIterator
- else:
- reader=fastaIterator
- elif first[0]=='@':
- reader=fastqSangerIterator
- options.outputFormater=formatFastq
- options.outputFormat="fastq"
- elif first[0:3]=='ID ':
- reader=emblIterator
- elif first[0:6]=='LOCUS ':
- reader=genbankIterator
- elif first[0]=="#" or ecopcr_pattern.search(first):
- reader=EcoPCRFile
- else:
- raise AssertionError,'file is not in fasta, fasta, embl, genbank or ecoPCR format'
-
- input = reader(chain([first],lineiterator))
-
- return input
-
- if options.seqinformat is None:
- reader = autoSequenceIterator
- else:
- if options.seqinformat=='fasta':
- if options.moltype=='nuc':
- reader=fastaNucIterator
- elif options.moltype=='pep':
- reader=fastaAAIterator
- else:
- reader=fastaIterator
- elif options.seqinformat=='rawfasta':
- reader=annotatedIterator(rawFastaIterator)
- elif options.seqinformat=='genbank':
- reader=annotatedIterator(genbankIterator)
- elif options.seqinformat=='embl':
- reader=annotatedIterator(emblIterator)
- elif options.seqinformat=='fna':
- reader=fnaFastaIterator
- elif options.seqinformat=='sanger':
- options.outputFormater=formatFastq
- options.outputFormat="fastq"
- reader=fastqSangerIterator
- elif options.seqinformat=='solexa':
- options.outputFormater=formatFastq
- options.outputFormat="fastq"
- reader=fastqSolexaIterator
- elif options.seqinformat=='illumina':
- options.outputFormater=formatFastq
- options.outputFormat="fastq"
- reader=fastqIlluminaIterator
- elif options.seqinformat=='ecopcr':
- reader=EcoPCRFile
-
- if options.seqinformat=='fna' and options.withqualfile is not None:
- qualfile=qualityIterator(options.withqualfile)
- reader=withQualIterator(qualfile)
- options.outputFormater=formatFastq
- options.outputFormat="fastq"
-
-# if options.addrank:
-# reader = withRankIterator(reader)
- return reader
-
-def sequenceWriterGenerator(options,output=sys.stdout):
- class SequenceWriter:
- def __init__(self,options,file=sys.stdout):
- self._format=None
- self._file=file
- self._upper=options.uppercase
- def put(self,seq):
- if self._format is None:
- self._format=formatFasta
- if options.output is not None:
- self._format=options.output
- if self._format is formatSAPFastaGenerator:
- self._format=formatSAPFastaGenerator(options)
- elif options.outputFormater is not None:
- self._format=options.outputFormater
- s = self._format(seq,upper=self._upper)
- try:
- self._file.write(s)
- self._file.write("\n")
- except IOError:
- sys.exit(0)
-
- if options.ecopcroutput is not None:
- taxo = loadTaxonomyDatabase(options)
- writer=EcoPCRDBSequenceWriter(options.ecopcroutput,taxonomy=taxo)
- else:
- writer=SequenceWriter(options,output)
-
- def sequenceWriter(sequence):
- writer.put(sequence)
-
- return sequenceWriter
-
-
\ No newline at end of file
diff --git a/obitools/format/sequence/__init__.py b/obitools/format/sequence/__init__.py
deleted file mode 100644
index 3918761..0000000
--- a/obitools/format/sequence/__init__.py
+++ /dev/null
@@ -1,24 +0,0 @@
-from obitools.fasta import fastaIterator
-from obitools.fastq import fastqSangerIterator
-from obitools.seqdb.embl.parser import emblIterator
-from obitools.seqdb.genbank.parser import genbankIterator
-from itertools import chain
-from obitools.utils import universalOpen
-
-def autoSequenceIterator(file):
- lineiterator = universalOpen(file)
- first = lineiterator.next()
- if first[0]==">":
- reader=fastaIterator
- elif first[0]=='@':
- reader=fastqSangerIterator
- elif first[0:3]=='ID ':
- reader=emblIterator
- elif first[0:6]=='LOCUS ':
- reader=genbankIterator
- else:
- raise AssertionError,'file is not in fasta, fasta, embl, or genbank format'
-
- input = reader(chain([first],lineiterator))
-
- return input
diff --git a/obitools/format/sequence/embl.py b/obitools/format/sequence/embl.py
deleted file mode 100644
index f59f14a..0000000
--- a/obitools/format/sequence/embl.py
+++ /dev/null
@@ -1,2 +0,0 @@
-from obitools.seqdb.embl.parser import emblIterator,emblParser
-
diff --git a/obitools/format/sequence/fasta.py b/obitools/format/sequence/fasta.py
deleted file mode 100644
index 1d7bd49..0000000
--- a/obitools/format/sequence/fasta.py
+++ /dev/null
@@ -1,4 +0,0 @@
-from obitools.fasta import fastaIterator,fastaParser
-from obitools.fasta import fastaAAIterator,fastaAAParser
-from obitools.fasta import fastaNucIterator,fastaNucParser
-from obitools.fasta import formatFasta
diff --git a/obitools/format/sequence/fastq.py b/obitools/format/sequence/fastq.py
deleted file mode 100644
index 54fdf89..0000000
--- a/obitools/format/sequence/fastq.py
+++ /dev/null
@@ -1,13 +0,0 @@
-'''
-Created on 15 janv. 2010
-
-@author: coissac
-'''
-
-from obitools.fastq import fastqIterator,fastqParserGenetator
-from obitools.fastq import fastqSangerIterator,fastqSolexaIterator, \
- fastqIlluminaIterator
-from obitools.fastq import fastqAAIterator
-from obitools.fastq import formatFastq
-
-
diff --git a/obitools/format/sequence/fnaqual.py b/obitools/format/sequence/fnaqual.py
deleted file mode 100644
index ab69916..0000000
--- a/obitools/format/sequence/fnaqual.py
+++ /dev/null
@@ -1,8 +0,0 @@
-'''
-Created on 12 oct. 2009
-
-@author: coissac
-'''
-
-from obitools.fnaqual.fasta import fnaFastaIterator
-from obitools.fnaqual.quality import qualityIterator
diff --git a/obitools/format/sequence/genbank.py b/obitools/format/sequence/genbank.py
deleted file mode 100644
index 8524b6f..0000000
--- a/obitools/format/sequence/genbank.py
+++ /dev/null
@@ -1,4 +0,0 @@
-from obitools.seqdb.genbank.parser import genpepIterator,genpepParser
-from obitools.seqdb.genbank.parser import genbankIterator,genbankParser
-
-
diff --git a/obitools/format/sequence/tagmatcher.py b/obitools/format/sequence/tagmatcher.py
deleted file mode 100644
index 60ad8d8..0000000
--- a/obitools/format/sequence/tagmatcher.py
+++ /dev/null
@@ -1,5 +0,0 @@
-from obitools.tagmatcher.parser import tagMatcherParser
-from obitools.tagmatcher.parser import TagMatcherIterator
-from obitools.tagmatcher.parser import formatTagMatcher
-
-tagMatcherIterator=TagMatcherIterator
diff --git a/obitools/goa/__init__.py b/obitools/goa/__init__.py
deleted file mode 100644
index e69de29..0000000
diff --git a/obitools/goa/parser.py b/obitools/goa/parser.py
deleted file mode 100644
index 8ffd1e3..0000000
--- a/obitools/goa/parser.py
+++ /dev/null
@@ -1,33 +0,0 @@
-from itertools import imap
-from obitools import utils
-
-class GoAFileIterator(utils.ColumnFile):
- def __init__(self,stream):
- utils.ColumnFile.__init__(self,
- stream, '\t', True,
- (str,))
-
- _colname = ['database',
- 'ac',
- 'symbol',
- 'qualifier',
- 'goid',
- 'origin',
- 'evidence',
- 'evidnce_origine',
- 'namespace',
- 'db_object_name',
- 'gene',
- 'object_type',
- 'taxid',
- 'date',
- 'assigned_by']
-
- def next(self):
- data = utils.ColumnFile.next(self)
- data = dict(imap(None,GoAFileIterator._colname,data))
-
- return data
-
-
-
diff --git a/obitools/graph/__init__.py b/obitools/graph/__init__.py
deleted file mode 100644
index fbc5253..0000000
--- a/obitools/graph/__init__.py
+++ /dev/null
@@ -1,962 +0,0 @@
-'''
-**obitool.graph** for representing graph structure in obitools
---------------------------------------------------------------
-
-.. codeauthor:: Eric Coissac
-
-
-This module offert classes to manipulate graphs, mainly trough the
-:py:class:`obitools.graph.Graph` class.
-
-.. inheritance-diagram:: Graph DiGraph UndirectedGraph
- :parts: 2
-
-'''
-
-import sys
-
-
-from obitools.utils import progressBar
-
-
-class Indexer(dict):
- '''
- Allow to manage convertion between an arbitrarly hashable python
- value and an unique integer key
- '''
-
- def __init__(self):
-
- self.__max=0
- self.__reverse=[]
-
- def getLabel(self,index):
- '''
- Return the python value associated to an integer index.
-
- :param index: an index value
- :type index: int
-
- :raises: IndexError if the index is not used in this
- Indexer instance
- '''
- return self.__reverse[index]
-
- def getIndex(self,key,strict=False):
- '''
- Return the index associated to a **key** in the indexer. Two
- modes are available :
-
- - strict mode :
-
- if the key is not known by the :py:class:`Indexer` instance
- a :py:exc:`KeyError` exception is raised.
-
- - non strict mode :
-
- in this mode if the requested *key** is absent, it is added to
- the :py:class:`Indexer` instance and the new index is returned
-
- :param key: the requested key
- :type key: a hashable python value
-
- :param strict: select the looking for mode
- :type strict: bool
-
- :return: the index corresponding to the key
- :rtype: int
-
- :raises: - :py:exc:`KeyError` in strict mode is key is absent
- of the :py:class:`Indexer` instance
-
- - :py:exc:`TypeError` if key is not an hashable value.
- '''
- if dict.__contains__(self,key):
- return dict.__getitem__(self,key)
- elif strict:
- raise KeyError,key
- else:
- value = self.__max
- self[key]= value
- self.__reverse.append(key)
- self.__max+=1
- return value
-
- def __getitem__(self,key):
- '''
- Implement the [] operateor to emulate the standard dictionnary
- behaviour on :py:class:`Indexer` and returns the integer key
- associated to a python value.
-
- Actually this method call the:py:meth:`getIndex` method in
- non strict mode so it only raises an :py:exc:`TypeError`
- if key is not an hashable value.
-
- :param key: the value to index
- :type key: an hashable python value
-
- :return: an unique integer value associated to the key
- :rtype: int
-
- :raises: :py:exc:`TypeError` if **key** is not an hashable value.
-
- '''
- return self.getIndex(key)
-
- def __equal__(self,index):
- '''
- Implement equal operator **==** for comparing two :py:class:`Indexer` instances.
- Two :py:class:`Indexer` instances are equals only if they are physically
- the same instance
-
- :param index: the second Indexer
- :type index: an :py:class:`Indexer` instance
-
- :return: True is the two :py:class:`Indexer` instances are the same
- :rtype: bool
- '''
- return id(self)==id(index)
-
-
-class Graph(object):
- '''
- Class used to represent directed or undirected graph.
-
- .. warning::
-
- Only one edge can connect two nodes in a given direction.
-
- .. warning::
-
- Specifying nodes through their index seepud your code but as no check
- is done on index value, it may result in inconsistency. So prefer the
- use of node label to specify a node.
-
-
- '''
- def __init__(self,label='G',directed=False,indexer=None,nodes=None,edges=None):
- '''
- :param label: Graph name, set to 'G' by default
- :type label: str
-
- :param directed: true for directed graph, set to False by defalt
- :type directed: boolean
-
- :param indexer: node label indexer. This allows to define several graphs
- sharing the same indexer (see : :py:meth:`newEmpty`)
- :type indexer: :py:class:`Indexer`
-
- :param nodes: set of nodes to add to the graph
- :type nodes: iterable value
-
- :param edges: set of edges to add to the graph
- :type edges: iterable value
- '''
-
- self._directed=directed
- if indexer is None:
- indexer = Indexer()
- self._index = indexer
- self._node = {}
- self._node_attrs = {}
- self._edge_attrs = {}
- self._label=label
-
- def newEmpty(self):
- """
- Build a new empty graph using the same :py:class:`Indexer` instance.
- This allows two graph for sharing their vertices through their indices.
- """
- n = Graph(self._label+"_compact",self._directed,self._index)
-
- return n
-
- def addNode(self,node=None,index=None,**data):
- '''
- Add a new node or update an existing one.
-
- :param node: the new node label or the label of an existing node
- for updating it.
- :type node: an hashable python value
-
- :param index: the index of an existing node for updating it.
- :type index: int
-
- :return: the index of the node
- :rtype: int
-
- :raises: :py:exc:`IndexError` is index is not **None** and
- corresponds to a not used index in this graph.
- '''
- if index is None:
- index = self._index[node]
-
- if index not in self._node:
- self._node[index]=set()
- else:
- if index not in self._node:
- raise IndexError,"This index is not used in this graph"
-
- if data:
- if index in self._node_attrs:
- self._node_attrs[index].update(data)
- else:
- self._node_attrs[index]=dict(data)
-
- return index
-
- def __contains__(self,node):
- try:
- index = self._index.getIndex(node,strict=True)
- r = index in self._node
- except KeyError:
- r=False
- return r
-
- def getNode(self,node=None,index=None):
- """
- :param node: a node label.
- :type node: an hashable python value
-
- :param index: the index of an existing node.
- :type index: int
-
- .. note:: Index value are prevalent over node label.
-
- :return: the looked for node
- :rtype: :py:class:`Node`
-
- :raises: :py:exc:`IndexError` if specified node lablel
- corresponds to a non-existing node.
-
- .. warning:: no check on index value
- """
- if index is None:
- index = self._index.getIndex(node, True)
- return Node(index,self)
-
- def getBestNode(self,estimator):
- '''
- Select the node maximizing the estimator function
-
- :param estimator: the function to maximize
- :type estimator: a function returning a numerical value and accepting one
- argument of type :py:class:`Node`
-
- :return: the best node
- :rtype: py:class:`Node`
- '''
-
- bestScore=0
- best=None
- for n in self:
- score = estimator(n)
- if best is None or score > bestScore:
- bestScore = score
- best=n
- return best
-
-
- def delNode(self,node=None,index=None):
- """
- Delete a node from a graph and all associated edges.
-
- :param node: a node label.
- :type node: an hashable python value
-
- :param index: the index of an existing node.
- :type index: int
-
- .. note:: Index value are prevalent over node label.
-
- :raises: :py:exc:`IndexError` if specified node lablel
- corresponds to a non-existing node.
-
- .. warning:: no check on index value
- """
- if index is None:
- index = self._index[node]
-
- for n in self._node:
- if n!=index:
- e = self._node[n]
- if index in e:
- if (n,index) in self._edge_attrs:
- del self._edge_attrs[(n,index)]
- e.remove(index)
-
- e = self._node[index]
-
- for n in e:
- if (index,n) in self._edge_attrs:
- del self._edge_attrs[(index,n)]
-
- del self._node[index]
- if index in self._node_attrs:
- del self._node_attrs[index]
-
-
- def addEdge(self,node1=None,node2=None,index1=None,index2=None,**data):
- '''
- Create a new edge in the graph between both the specified nodes.
-
- .. note:: Nodes can be specified using their label or their index in the graph
- if both values are indicated the index is used.
-
- :param node1: The first vertex label
- :type node1: an hashable python value
- :param node2: The second vertex label
- :type node2: an hashable python value
- :param index1: The first vertex index
- :type index1: int
- :param index2: The second vertex index
- :type index2: int
-
- :raises: :py:exc:`IndexError` if one of both the specified node lablel
- corresponds to a non-existing node.
-
-
- .. warning:: no check on index value
- '''
-
- index1=self.addNode(node1, index1)
- index2=self.addNode(node2, index2)
-
- self._node[index1].add(index2)
-
- if not self._directed:
- self._node[index2].add(index1)
-
- if data:
- if (index1,index2) not in self._edge_attrs:
- data =dict(data)
- self._edge_attrs[(index1,index2)]=data
- if not self._directed:
- self._edge_attrs[(index2,index1)]=data
- else:
- self._edge_attrs[(index2,index1)].update(data)
-
- return (index1,index2)
-
- def getEdge(self,node1=None,node2=None,index1=None,index2=None):
- '''
- Extract the :py:class:`Edge` instance linking two nodes of the graph.
-
- .. note:: Nodes can be specified using their label or their index in the graph
- if both values are indicated the index is used.
-
- :param node1: The first vertex label
- :type node1: an hashable python value
- :param node2: The second vertex label
- :type node2: an hashable python value
- :param index1: The first vertex index
- :type index1: int
- :param index2: The second vertex index
- :type index2: int
-
- :raises: :py:exc:`IndexError` if one of both the specified node lablel
- corresponds to a non-existing node.
-
-
- .. warning:: no check on index value
- '''
- node1=self.getNode(node1, index1)
- node2=self.getNode(node2, index2)
- return Edge(node1,node2)
-
- def delEdge(self,node1=None,node2=None,index1=None,index2=None):
- """
- Delete the edge linking node 1 to node 2.
-
- .. note:: Nodes can be specified using their label or their index in the graph
- if both values are indicated the index is used.
-
-
- :param node1: The first vertex label
- :type node1: an hashable python value
- :param node2: The second vertex label
- :type node2: an hashable python value
- :param index1: The first vertex index
- :type index1: int
- :param index2: The second vertex index
- :type index2: int
-
- :raises: :py:exc:`IndexError` if one of both the specified node lablel
- corresponds to a non-existing node.
-
-
- .. warning:: no check on index value
- """
- if index1 is None:
- index1 = self._index[node1]
- if index2 is None:
- index2 = self._index[node2]
- if index1 in self._node and index2 in self._node[index1]:
- self._node[index1].remove(index2)
- if (index1,index2) in self._node_attrs:
- del self._node_attrs[(index1,index2)]
- if not self._directed:
- self._node[index2].remove(index1)
- if (index2,index1) in self._node_attrs:
- del self._node_attrs[(index2,index1)]
-
- def edgeIterator(self,predicate=None):
- """
- Iterate through a set of selected vertices.
-
- :param predicate: a function allowing node selection. Default value
- is **None** and indicate that all nodes are selected.
- :type predicate: a function returning a boolean value
- and accepting one argument of class :py:class:`Edge`
-
- :return: an iterator over selected edge
- :rtype: interator over :py:class:`Edge` instances
-
- .. seealso::
- function :py:func:`selectEdgeAttributeFactory` for simple predicate.
-
- """
- for n1 in self._node:
- for n2 in self._node[n1]:
- if self._directed or n1 <= n2:
- e = self.getEdge(index1=n1, index2=n2)
- if predicate is None or predicate(e):
- yield e
-
-
- def nodeIterator(self,predicate=None):
- """
- Iterate through a set of selected vertices.
-
- :param predicate: a function allowing edge selection. Default value
- is **None** and indicate that all edges are selected.
- :type predicate: a function returning a boolean value
- and accepting one argument of class :py:class:`Node`
-
- :return: an iterator over selected nodes.
- :rtype: interator over :py:class:`Node` instances
-
- """
- for n in self._node:
- node = self.getNode(index=n)
- if predicate is None or predicate(node):
- yield node
-
- def nodeIndexIterator(self,predicate=None):
- """
- Iterate through the indexes of a set of selected vertices.
-
- :param predicate: a function allowing edge selection. Default value
- is **None** and indicate that all edges are selected.
- :type predicate: a function returning a boolean value
- and accepting one argument of class :py:class:`Node`
-
- :return: an iterator over selected node indices.
- :rtype: interator over `int`
-
- """
- for n in self._node:
- node = self.getNode(index=n)
- if predicate is None or predicate(node):
- yield n
-
- def neighbourIndexSet(self,node=None,index=None):
- if index is None:
- index=self.getNode(node).index
- return self._node[index]
-
- def edgeCount(self):
- n = reduce(lambda x,y:x+y, (len(z) for z in self._node.itervalues()),0)
- if not self._directed:
- n=n/2
- return n
-
- def subgraph(self,nodes,name='G'):
- sub = Graph(name,self._directed,self._index)
- if not isinstance(nodes, set):
- nodes = set(nodes)
- for n in nodes:
- sub._node[n]=nodes & self._node[n]
- if n in self._node_attrs:
- sub._node_attrs[n]=dict(self._node_attrs[n])
- for n2 in sub._node[n]:
- if not self._directed:
- if n <= n2:
- if (n,n2) in self._edge_attrs:
- data=dict(self._edge_attrs[(n,n2)])
- sub._edge_attrs[(n,n2)]=data
- sub._edge_attrs[(n2,n)]=data
- else:
- if (n,n2) in self._edge_attrs:
- data=dict(self._edge_attrs[(n,n2)])
- sub._edge_attrs[(n,n2)]=data
- return sub
-
- def __len__(self):
- return len(self._node)
-
- def __getitem__(self,key):
- return self.getNode(node=key)
-
- def __delitem__(self,key):
- self.delNode(node=key)
-
- def __iter__(self):
- return self.nodeIterator()
-
- def __str__(self):
- if self._directed:
- kw ='digraph'
- else:
- kw='graph'
-
- nodes = "\n ".join([str(x) for x in self])
- edges = "\n ".join([str(x) for x in self.edgeIterator()])
-
- return "%s %s {\n %s\n\n %s\n}" % (kw,self._label,nodes,edges)
-
-class Node(object):
- """
- Class used for representing one node or vertex in a graph
-
- """
- def __init__(self,index,graph):
- '''
- .. warning::
-
- :py:class:`Node` constructor is usualy called through the :py:class:`Graph` methods
-
- :param index: Index of the node in the graph
- :type index: int
- :param graph: graph instance owning the node
- :type graph: :py:class:`obitools.graph.Graph`
- '''
- self.index = index
- self.__graph = graph
-
- def getGraph(self):
- '''
- return graph owning this node.
-
- :rtype: :py:class:`obitools.graph.Graph`
- '''
- return self.__graph
-
-
- def getLabel(self):
- '''
- return label associated to this node.
- '''
- return self.__graph._index.getLabel(self.index)
-
-
- def has_key(self,key):
- '''
- test is the node instance has a property named 'key'.
-
- :param key: the name of a property
- :type key: str
-
- :return: True if the nade has a property named
- :rtype: bool
- '''
- if self.index in self.__graph._node_attrs:
- return key in self.__graph._node_attrs[self.index]
- else:
- return False
-
- def neighbourIterator(self,nodePredicat=None,edgePredicat=None):
- '''
- iterate through the nodes directly connected to
- this node.
-
- :param nodePredicat: a function accepting one node as parameter
- and returning **True** if this node must be
- returned by the iterator.
- :type nodePredicat: function
-
- :param edgePredicat: a function accepting one edge as parameter
- and returning True if the edge linking self and
- the current must be considered.
- :type edgePredicat: function
-
-
- :rtype: iterator on Node instances
- '''
- for n in self.neighbourIndexIterator(nodePredicat, edgePredicat):
- node = self.graph.getNode(index=n)
- yield node
-
- def neighbourIndexSet(self):
- '''
- Return a set of node indexes directely connected
- to this node.
-
- .. warning::
-
- do not change this set unless you know
- exactly what you do.
-
- @rtype: set of int
- '''
- return self.__graph._node[self.index]
-
- def neighbourIndexIterator(self,nodePredicat=None,edgePredicat=None):
- '''
- iterate through the node indexes directly connected to
- this node.
-
- :param nodePredicat: a function accepting one node as parameter
- and returning True if this node must be
- returned by the iterator.
- :type nodePredicat: function
-
- :param edgePredicat: a function accepting one edge as parameter
- and returning True if the edge linking self and
- the current must be considered.
- :type edgePredicat: function
-
- :rtype: iterator on int
- '''
- for n in self.neighbourIndexSet():
- if nodePredicat is None or nodePredicat(self.__graph.getNode(index=n)):
- if edgePredicat is None or edgePredicat(self.__graph.getEdge(index1=self.index,index2=n)):
- yield n
-
- def degree(self,nodeIndexes=None):
- '''
- return count of edges linking this node to the
- set of nodes describes by their index in nodeIndexes
-
- :param nodeIndexes: set of node indexes.
- if set to None, all nodes of the
- graph are take into account.
- Set to None by default.
- :type nodeIndexes: set of int
-
- :rtype: int
- '''
- if nodeIndexes is None:
- return len(self.__graph._node[self.index])
- else:
- return len(self.__graph._node[self.index] & nodeIndexes)
-
- def componentIndexSet(self,nodePredicat=None,edgePredicat=None):
- '''
- Return the set of node index in the same connected component.
-
- :param nodePredicat: a function accepting one node as parameter
- and returning True if this node must be
- returned by the iterator.
- :type nodePredicat: function
-
- :param edgePredicat: a function accepting one edge as parameter
- and returning True if the edge linking self and
- the current must be considered.
- :type edgePredicat: function
-
-
- :rtype: set of int
- '''
- cc=set([self.index])
- added = set(x for x in self.neighbourIndexIterator(nodePredicat, edgePredicat))
- while added:
- cc |= added
- added = reduce(lambda x,y : x | y,
- (set(z for z in self.graph.getNode(index=c).neighbourIndexIterator(nodePredicat, edgePredicat))
- for c in added),
- set())
- added -= cc
- return cc
-
- def componentIterator(self,nodePredicat=None,edgePredicat=None):
- '''
- Iterate through the nodes in the same connected
- component.
-
- :rtype: iterator on :py:class:`Node` instance
- '''
- for c in self.componentIndexSet(nodePredicat, edgePredicat):
- yield self.graph.getNode(c)
-
- def shortestPathIterator(self,nodes=None):
- '''
- Iterate through the shortest path sourcing
- from this node. if nodes is not None, iterates
- only path linkink this node to one node listed in
- nodes
-
- :param nodes: set of node index
- :type nodes: iterable on int
-
- :return: an iterator on list of int describing path
- :rtype: iterator on list of int
- '''
- if nodes is not None:
- nodes = set(nodes)
-
-
- Q=[(self.index,-1)]
-
- gray = set([self.index])
- paths = {}
-
- while Q and (nodes is None or nodes):
- u,p = Q.pop()
- paths[u]=p
- next = self.graph._node[u] - gray
- gray|=next
- Q.extend((x,u) for x in next)
- if nodes is None or u in nodes:
- if nodes:
- nodes.remove(u)
- path = [u]
- while p >= 0:
- path.append(p)
- p = paths[p]
- path.reverse()
- yield path
-
- def shortestPathTo(self,node=None,index=None):
- '''
- return one of the shortest path linking this
- node to specified node.
-
- :param node: a node label or None
- :param index: a node index or None. the parameter index
- has a priority on the parameter node.
- :type index: int
-
- :return: list of node index corresponding to the path or None
- if no path exists.
- :rtype: list of int or None
- '''
- if index is None:
- index=self.graph.getNode(node).index
- for p in self.shortestPathIterator([index]):
- return p
-
-
- def __getitem__(self,key):
- '''
- return the value of the property of this node
-
- :param key: the name of a property
- :type key: str
- '''
- return self.__graph._node_attrs.get(self.index,{})[key]
-
- def __setitem__(self,key,value):
- '''
- set the value of a node property. In the property doesn't
- already exist a new property is added to this node.
-
- :param key: the name of a property
- :type key: str
- :param value: the value of the property
-
- .. seealso::
-
- :py:meth:`Node.__getitem__`
- '''
- if self.index in self.__graph._node_attrs:
- data = self.__graph._node_attrs[self.index]
- data[key]=value
- else:
- self.graph._node_attrs[self.index]={key:value}
-
- def __len__(self):
- '''
- Count neighbour of this node
-
- :rtype: int
-
- .. seealso::
-
- :py:meth:`Node.degree`
- '''
- return len(self.__graph._node[self.index])
-
- def __iter__(self):
- '''
- iterate through neighbour of this node
-
- :rtype: iterator in :py:class:`Node` instances
-
- .. seealso::
-
- :py:meth:`Node.neighbourIterator`
- '''
- return self.neighbourIterator()
-
- def __contains__(self,key):
- return self.has_key(key)
-
- def __str__(self):
-
- if self.index in self.__graph._node_attrs:
- keys = " ".join(['%s="%s"' % (x[0],str(x[1]).replace('"','\\"').replace('\n','\\n'))
- for x in self.__graph._node_attrs[self.index].iteritems()]
- )
- else:
- keys=''
-
- return '%d [label="%s" %s]' % (self.index,
- str(self.label).replace('"','\\"').replace('\n','\\n'),
- keys)
-
- def keys(self):
- if self.index in self.__graph._node_attrs:
- k = self.__graph._node_attrs[self.index].keys()
- else:
- k=[]
- return k
-
- label = property(getLabel, None, None, "Label of the node")
-
- graph = property(getGraph, None, None, "Graph owning this node")
-
-
-
-class Edge(object):
- """
- Class used for representing one edge of a graph
-
- """
-
- def __init__(self,node1,node2):
- '''
- .. warning::
-
- :py:class:`Edge` constructor is usualy called through the :py:class:`Graph` methods
-
- :param node1: First node likend by the edge
- :type node1: :py:class:`Node`
- :param node2: Seconde node likend by the edge
- :type node2: :py:class:`Node`
- '''
- self.node1 = node1
- self.node2 = node2
-
- def getGraph(self):
- """
- Return the :py:class:`Graph` instance owning this edge.
- """
- return self.node1.graph
-
- def has_key(self,key):
- '''
- test is the :py:class:`Edge` instance has a property named **key**.
-
- :param key: the name of a property
- :type key: str
-
- :return: True if the edge has a property named
- :rtype: bool
- '''
- if (self.node1.index,self.node2.index) in self.graph._edge_attrs:
- return key in self.graph._edge_attrs[(self.node1.index,self.node2.index)]
- else:
- return False
-
-
- def getDirected(self):
- return self.node1.graph._directed
-
- def __getitem__(self,key):
- return self.graph._edge_attrs.get((self.node1.index,self.node2.index),{})[key]
-
- def __setitem__(self,key,value):
- e = (self.node1.index,self.node2.index)
- if e in self.graph._edge_attrs:
- data = self.graph._edge_attrs[e]
- data[key]=value
- else:
- self.graph._edge_attrs[e]={key:value}
-
- def __str__(self):
- e = (self.node1.index,self.node2.index)
- if e in self.graph._edge_attrs:
- keys = "[%s]" % " ".join(['%s="%s"' % (x[0],str(x[1]).replace('"','\\"'))
- for x in self.graph._edge_attrs[e].iteritems()]
- )
- else:
- keys = ""
-
- if self.directed:
- link='->'
- else:
- link='--'
-
- return "%d %s %d %s" % (self.node1.index,link,self.node2.index,keys)
-
- def __contains__(self,key):
- return self.has_key(key)
-
-
- graph = property(getGraph, None, None, "Graph owning this edge")
-
- directed = property(getDirected, None, None, "Directed's Docstring")
-
-
-class DiGraph(Graph):
- """
- :py:class:`DiGraph class`is a specialisation of the :py:class:`Graph` class
- dedicated to directed graph representation
-
- .. seealso::
-
- :py:class:`UndirectedGraph`
-
- """
- def __init__(self,label='G',indexer=None,nodes=None,edges=None):
- '''
- :param label: Graph name, set to 'G' by default
- :type label: str
- :param indexer: node label indexer
- :type indexer: Indexer instance
- :param nodes: set of nodes to add to the graph
- :type nodes: iterable value
- :param edges: set of edges to add to the graph
- :type edges: iterable value
- '''
-
- Graph.__init__(self, label, True, indexer, nodes, edges)
-
-class UndirectedGraph(Graph):
- """
- :py:class:`UndirectGraph class`is a specialisation of the :py:class:`Graph` class
- dedicated to undirected graph representation
-
- .. seealso::
-
- :py:class:`DiGraph`
-
- """
- def __init__(self,label='G',indexer=None,nodes=None,edges=None):
- '''
- :param label: Graph name, set to 'G' by default
- :type label: str
- :param indexer: node label indexer
- :type indexer: Indexer instance
- :param nodes: set of nodes to add to the graph
- :type nodes: iterable value
- :param edges: set of edges to add to the graph
- :type edges: iterable value
- '''
-
- Graph.__init__(self, label, False, indexer, nodes, edges)
-
-
-
-def selectEdgeAttributeFactory(attribut,value):
- """
- This function help in building predicat function usable for selecting edge
- in the folowing :py:class:`Graph` methods :
-
- - :py:meth:`Graph.edgeIterator`
-
- """
- def selectEdge(e):
- return attribut in e and e[attribut]==value
- return selectEdge
diff --git a/obitools/graph/__init__.pyc b/obitools/graph/__init__.pyc
deleted file mode 100644
index 397e5c0..0000000
Binary files a/obitools/graph/__init__.pyc and /dev/null differ
diff --git a/obitools/graph/algorithms/__init__.py b/obitools/graph/algorithms/__init__.py
deleted file mode 100644
index e69de29..0000000
diff --git a/obitools/graph/algorithms/__init__.pyc b/obitools/graph/algorithms/__init__.pyc
deleted file mode 100644
index 1f2edcc..0000000
Binary files a/obitools/graph/algorithms/__init__.pyc and /dev/null differ
diff --git a/obitools/graph/algorithms/clique.py b/obitools/graph/algorithms/clique.py
deleted file mode 100644
index 2007c1a..0000000
--- a/obitools/graph/algorithms/clique.py
+++ /dev/null
@@ -1,134 +0,0 @@
-import time
-import sys
-
-
-
-_maxsize=0
-_solution=0
-_notbound=0
-_sizebound=0
-_lastyield=0
-_maxclique=None
-
-def cliqueIterator(graph,minsize=1,node=None,timeout=None):
- global _maxsize,_solution,_notbound,_sizebound,_lastyield
- _maxsize=0
- _solution=0
- _notbound=0
- _sizebound=0
- starttime = time.time()
-
- if node:
- node = graph.getNode(node)
- index = node.index
- clique= set([index])
- candidates= set(graph.neighbourIndexSet(index=index))
- else:
- clique=set()
- candidates = set(x.index for x in graph)
-
-
-# candidates = set(x for x in candidates
-# if len(graph.neighbourIndexSet(index=x) & candidates) >= (minsize - 1))
-
- _lastyield=time.time()
- for c in _cliqueIterator(graph,clique,candidates,set(),minsize,start=starttime,timeout=timeout):
- yield c
-
-
-
-
-
-def _cliqueIterator(graph,clique,candidates,notlist,minsize=0,start=None,timeout=None):
- global _maxsize,_maxclique,_solution,_notbound,_sizebound,_lastyield
-
- # Speed indicator
- lclique = len(clique)
- lcandidates = len(candidates)
- notmin = lcandidates
- notfix = None
-
- for n in notlist:
- nnc = candidates - graph.neighbourIndexSet(index=n)
- nc = len(nnc)
- if nc < notmin:
- notmin=nc
- notfix=n
- notfixneib = nnc
-
- if lclique > _maxsize or not _solution % 1000 :
- if start is not None:
- top = time.time()
- delta = top - start
- if delta==0:
- delta=1e-6
- speed = _solution / delta
- start = top
- else:
- speed = 0
- print >>sys.stderr,"\rCandidates : %-5d Maximum clique size : %-5d Solutions explored : %10d speed = %5.2f solutions/sec sizebound=%10d notbound=%10d " % (lcandidates,_maxsize,_solution,speed,_sizebound,_notbound),
- sys.stderr.flush()
- if lclique > _maxsize:
- _maxsize=lclique
-
-# print >>sys.stderr,'koukou'
-
- timer = time.time() - _lastyield
-
- if not candidates and not notlist:
- if lclique==_maxsize:
- _maxclique=set(clique)
- if lclique >= minsize:
- yield set(clique)
- if timeout is not None and timer > timeout and _maxclique is not None:
- yield _maxclique
- _maxclique=None
-
- else:
- while notmin and candidates and ((lclique + len(candidates)) >= minsize or (timeout is not None and timer > timeout)):
- # count explored solution
- _solution+=1
-
- if notfix is None:
- nextcandidate = candidates.pop()
- else:
- nextcandidate = notfixneib.pop()
- candidates.remove(nextcandidate)
-
- clique.add(nextcandidate)
-
- neighbours = graph.neighbourIndexSet(index=nextcandidate)
-
- nextcandidates = candidates & neighbours
- nextnot = notlist & neighbours
-
- nnc = candidates - neighbours
- lnnc=len(nnc)
-
- for c in _cliqueIterator(graph,
- set(clique),
- nextcandidates,
- nextnot,
- minsize,
- start,
- timeout=timeout):
- yield c
-
-
- clique.remove(nextcandidate)
-
- notmin-=1
-
- if lnnc < notmin:
- notmin = lnnc
- notfix = nextcandidate
- notfixneib = nnc
-
- if notmin==0:
- _notbound+=1
-
- notlist.add(nextcandidate)
- else:
- if (lclique + len(candidates)) < minsize:
- _sizebound+=1
-
diff --git a/obitools/graph/algorithms/compact.py b/obitools/graph/algorithms/compact.py
deleted file mode 100644
index 8065a93..0000000
--- a/obitools/graph/algorithms/compact.py
+++ /dev/null
@@ -1,8 +0,0 @@
-
-def compactGraph(graph,nodeSetIterator):
- compact = graph.newEmpty()
- for ns in nodeSetIterator(graph):
- nlabel = "\n".join([str(graph.getNode(index=x).label) for x in ns])
- compact.addNode(nlabel)
- print
- print compact
diff --git a/obitools/graph/algorithms/component.py b/obitools/graph/algorithms/component.py
deleted file mode 100644
index a17c8dd..0000000
--- a/obitools/graph/algorithms/component.py
+++ /dev/null
@@ -1,82 +0,0 @@
-"""
-Iterate through the connected components of a graph
----------------------------------------------------
-
-the module :py:mod:`obitools.graph.algorithm.component` provides
-two functions to deal with the connected component of a graph
-represented as a :py:class:`obitools.graph.Graph` instance.
-
-The whole set of connected component of a graph is a partition of this graph.
-So a node cannot belongs to two distinct connected component.
-
-Two nodes are in the same connected component if it exits a path through
-the graph edges linking them.
-
-TODO: THere is certainly a bug with DirectedGraph
-
-"""
-
-def componentIterator(graph,nodePredicat=None,edgePredicat=None):
- '''
- Build an iterator over the connected component of a graph.
- Each connected component returned by the iterator is represented
- as a `set` of node indices.
-
- :param graph: the graph to partitionne
- :type graph: :py:class:`obitools.graph.Graph`
-
- :param predicate: a function allowing edge selection. Default value
- is **None** and indicate that all edges are selected.
- :type predicate: a function returning a boolean value
- and accepting one argument of class :py:class:`Node`
-
- :param predicate: a function allowing node selection. Default value
- is **None** and indicate that all nodes are selected.
- :type predicate: a function returning a boolean value
- and accepting one argument of class :py:class:`Edge`
-
- :return: an iterator over the connected component set
- :rtype: an iterator over `set` of `int`
-
- .. seealso::
- the :py:meth:`obitools.graph.Graph.componentIndexSet` method
- on which is based this function.
- '''
- seen = set()
- for n in graph.nodeIterator(nodePredicat):
- if n.index not in seen:
- cc=n.componentIndexSet(nodePredicat, edgePredicat)
- yield cc
- seen |= cc
-
-def componentCount(graph,nodePredicat=None,edgePredicat=None):
- '''
- Count the connected componnent in a graph.
-
- :param graph: the graph to partitionne
- :type graph: :py:class:`obitools.graph.Graph`
-
- :param predicate: a function allowing edge selection. Default value
- is **None** and indicate that all edges are selected.
- :type predicate: a function returning a boolean value
- and accepting one argument of class :py:class:`Node`
-
- :param predicate: a function allowing node selection. Default value
- is **None** and indicate that all nodes are selected.
- :type predicate: a function returning a boolean value
- and accepting one argument of class :py:class:`Edge`
-
- :return: an iterator over the connected component set
- :rtype: an iterator over `set` of `int`
-
- .. seealso::
- the :py:func:`componentIterator` function
- on which is based this function.
- '''
- n=0
- for c in componentIterator(graph,nodePredicat, edgePredicat):
- n+=1
- return n
-
-
-
\ No newline at end of file
diff --git a/obitools/graph/algorithms/component.pyc b/obitools/graph/algorithms/component.pyc
deleted file mode 100644
index a3b6298..0000000
Binary files a/obitools/graph/algorithms/component.pyc and /dev/null differ
diff --git a/obitools/graph/dag.py b/obitools/graph/dag.py
deleted file mode 100644
index f9a7a96..0000000
--- a/obitools/graph/dag.py
+++ /dev/null
@@ -1,80 +0,0 @@
-from obitools.graph import DiGraph,Node
-from obitools.graph.algorithms.component import componentIterator
-
-class DAG(DiGraph):
- def __init__(self,label='G',indexer=None,nodes=None,edges=None):
- '''
- Directed Graph constructor.
-
- @param label: Graph name, set to 'G' by default
- @type label: str
- @param indexer: node label indexer
- @type indexer: Indexer instance
- @param nodes: set of nodes to add to the graph
- @type nodes: iterable value
- @param edges: set of edges to add to the graph
- @type edges: iterable value
- '''
-
- self._parents={}
- DiGraph.__init__(self, label, indexer, nodes, edges)
-
- def getNode(self,node=None,index=None):
- if index is None:
- index = self._index.getIndex(node, True)
- return DAGNode(index,self)
-
- def addEdge(self,parent=None,node=None,indexp=None,index=None,**data):
- indexp=self.addNode(parent, indexp)
- index =self.addNode(node , index)
-
- pindex = set(n.index
- for n in self.getNode(index=indexp).ancestorIterator())
-
- assert index not in pindex,'Child node cannot be a parent node'
-
- DiGraph.addEdge(self,index1=indexp,index2=index,**data)
-
- if index in self._parents:
- self._parents[index].add(indexp)
- else:
- self._parents[index]=set([indexp])
-
-
- return (indexp,index)
-
- def getRoots(self):
- return [self.getNode(index=cc.pop()).getRoot()
- for cc in componentIterator(self)]
-
-
-
-
-class DAGNode(Node):
-
- def ancestorIterator(self):
- if self.index in self.graph._parents:
- for p in self.graph._parents[self.index]:
- parent = DAGNode(p,self.graph)
- yield parent
- for pnode in parent.ancestorIterator():
- yield pnode
-
- def getRoot(self):
- for x in self.ancestorIterator():
- pass
- return x
-
- def leavesIterator(self):
- if not self:
- yield self
- for n in self:
- for nn in n.leavesIterator():
- yield nn
-
- def subgraphIterator(self):
- yield self
- for n in self:
- for nn in n.subgraphIterator():
- yield nn
-
diff --git a/obitools/graph/layout/__init__.py b/obitools/graph/layout/__init__.py
deleted file mode 100644
index e69de29..0000000
diff --git a/obitools/graph/layout/radialtree.py b/obitools/graph/layout/radialtree.py
deleted file mode 100644
index e69de29..0000000
diff --git a/obitools/graph/rootedtree.py b/obitools/graph/rootedtree.py
deleted file mode 100644
index 803316d..0000000
--- a/obitools/graph/rootedtree.py
+++ /dev/null
@@ -1,117 +0,0 @@
-from obitools.graph.dag import DAG,DAGNode
-
-class RootedTree(DAG):
-
- def addEdge(self,parent=None,node=None,indexp=None,index=None,**data):
- indexp=self.addNode(parent, indexp)
- index =self.addNode(node , index)
-
- assert index not in self._parents or indexp in self._parents[index], \
- 'Child node cannot have more than one parent node'
-
- return DAG.addEdge(self,indexp=indexp,index=index,**data)
-
- def getNode(self,node=None,index=None):
- if index is None:
- index = self._index.getIndex(node, True)
- return RootedTreeNode(index,self)
-
-
-
-class RootedTreeNode(DAGNode):
-
- def subTreeSize(self):
- n=1
- for subnode in self:
- n+=subnode.subTreeSize()
- return n
-
- def subTreeLeaves(self):
- if not self:
- return 1
- n=0
- for subnode in self:
- n+=subnode.subTreeLeaves()
- return n
-
-
-def nodeWriter(node,deep=0,label=None,distance="distance", bootstrap="bootstrap",cartoon=None,collapse=None):
-
- ks = node.keys()
-
-
- if label is None:
- name=node.label
- elif callable(label):
- name=label(node)
- elif isinstance(label, str) and label in node:
- name=node[label]
- ks.remove(label)
- else:
- name=''
-
- if distance in node:
- dist=':%6.5f' % node[distance]
- ks.remove(distance)
- else:
- dist=''
-
- ks = ["%s=%s" % (k,node[k]) for k in ks]
-
- if cartoon is not None and cartoon(node):
- ks.append("!cartoon={%d,0.0}" % node.subTreeLeaves())
-
- if collapse is not None and collapse(node):
- ks.append('!collapse={"collapsed",0.0}')
-
- if ks:
- ks="[&"+",".join(ks)+"]"
- else:
- ks=''
-
-
- nodeseparator = ',\n' + ' ' * (deep+1)
-
- subnodes = nodeseparator.join([nodeWriter(x, deep+1,label,distance,bootstrap,cartoon=cartoon,collapse=collapse)
- for x in node])
- if subnodes:
- subnodes='(\n' + ' ' * (deep+1) + subnodes + '\n' + ' ' * deep + ')'
-
- return '%s"%s"%s%s' % (subnodes,name,ks,dist)
-
-
-def nexusFormat(tree,startnode=None,label=None,blocks="",cartoon=None,collapse=None):
- head="#NEXUS\n"
-
- tx = []
-
- for n in tree:
- if label is None:
- name=n.label
- elif callable(label):
- name=label(n)
- elif isinstance(label, str) and label in n:
- name=n[label]
- else:
- name=''
-
- if name:
- tx.append('"%s"' % name)
-
- taxa = "begin taxa;\n\tdimensions ntax=%d;\n\ttaxlabels\n\t" % len(tx)
-
- taxa+="\n\t".join(tx)
-
- taxa+="\n;\nend;\n\n"
-
-
-
- if startnode is not None:
- roots =[startnode]
- else:
- roots = tree.getRoots()
- trees = nodeWriter(roots[0],0,label,cartoon=cartoon,collapse=collapse)
- trees = "begin trees;\n\ttree tree_1 = [&R] "+ trees +";\nend;\n\n"
- return head+taxa+trees+"\n\n"+blocks+"\n"
-
-
\ No newline at end of file
diff --git a/obitools/graph/tree.py b/obitools/graph/tree.py
deleted file mode 100644
index 940ee44..0000000
--- a/obitools/graph/tree.py
+++ /dev/null
@@ -1,37 +0,0 @@
-from obitools.graph import UndirectedGraph,Node
-from obitools.graph.algorithms.component import componentCount
-
-
-class Forest(UndirectedGraph):
-
-
- def getNode(self,node=None,index=None):
- if index is None:
- index = self._index.getIndex(node, True)
- return TreeNode(index,self)
-
- def addEdge(self,node1=None,node2=None,index1=None,index2=None,**data):
- index1=self.addNode(node1, index1)
- index2=self.addNode(node2, index2)
-
- cc = set(n.index for n in self.getNode(index=index2).componentIterator())
-
- assert index1 in self._node[index2] or index1 not in cc, \
- "No more than one path is alloed between two nodes in a tree"
-
- UndirectedGraph.addEdge(self, index1=index1, index2=index2,**data)
-
- return (index1,index2)
-
- def isASingleTree(self):
- return componentCount(self)==1
-
-class TreeNode(Node):
-
- def componentIterator(self):
- for c in self:
- yield c
- for cc in c:
- yield cc
-
-
\ No newline at end of file
diff --git a/obitools/gzip.py b/obitools/gzip.py
deleted file mode 100644
index 841641a..0000000
--- a/obitools/gzip.py
+++ /dev/null
@@ -1,504 +0,0 @@
-"""Functions that read and write gzipped files.
-
-The user of the file doesn't have to worry about the compression,
-but random access is not allowed.
-
-This consisted on a patched version of of standard gzip python
-module based on Andrew Kuchling's minigzip.py distributed with the zlib module
-
-"""
-
-# based on Andrew Kuchling's minigzip.py distributed with the zlib module
-
-import struct, sys, time
-import zlib
-import __builtin__
-
-__all__ = ["GzipFile","open"]
-
-FTEXT, FHCRC, FEXTRA, FNAME, FCOMMENT = 1, 2, 4, 8, 16
-
-READ, WRITE = 1, 2
-
-def U32(i):
- """Return i as an unsigned integer, assuming it fits in 32 bits.
-
- If it's >= 2GB when viewed as a 32-bit unsigned int, return a long.
- """
- if i < 0:
- i += 1L << 32
- return i
-
-def LOWU32(i):
- """Return the low-order 32 bits of an int, as a non-negative int."""
- return i & 0xFFFFFFFFL
-
-def write32(output, value):
- output.write(struct.pack("'
-
- def _init_write(self, filename):
- if filename[-3:] != '.gz':
- filename = filename + '.gz'
- self.filename = filename
- self.crc = zlib.crc32("")
- self.size = 0
- self.writebuf = []
- self.bufsize = 0
-
- def _write_gzip_header(self):
- self.fileobj.write('\037\213') # magic header
- self.fileobj.write('\010') # compression method
- fname = self.filename[:-3]
- flags = 0
- if fname:
- flags = FNAME
- self.fileobj.write(chr(flags))
- write32u(self.fileobj, long(time.time()))
- self.fileobj.write('\002')
- self.fileobj.write('\377')
- if fname:
- self.fileobj.write(fname + '\000')
-
- def _init_read(self):
- self.crc = zlib.crc32("")
- self.size = 0
-
- def _read_internal(self, size):
- if len(self.inputbuf) < size:
- self.inputbuf += self.fileobj.read(size-len(self.inputbuf))
- chunk = self.inputbuf[:size]
- # need to use len(chunk) bellow instead of size in case it's EOF.
- if len(chunk) < 8:
- self.last8 = self.last8[len(chunk):] + chunk
- else:
- self.last8 = chunk[-8:]
- self.inputbuf = self.inputbuf[size:]
- return chunk
-
- def _read_gzip_header(self):
- magic = self._read_internal(2)
- if len(magic) != 2:
- raise EOFError, "Reached EOF"
- if magic != '\037\213':
- raise IOError, 'Not a gzipped file'
- method = ord( self._read_internal(1) )
- if method != 8:
- raise IOError, 'Unknown compression method'
- flag = ord( self._read_internal(1) )
- # modtime = self.fileobj.read(4)
- # extraflag = self.fileobj.read(1)
- # os = self.fileobj.read(1)
- self._read_internal(6)
-
- if flag & FEXTRA:
- # Read & discard the extra field, if present
- xlen = ord(self._read_internal(1))
- xlen = xlen + 256*ord(self._read_internal(1))
- self._read_internal(xlen)
- if flag & FNAME:
- # Read and discard a null-terminated string containing the filename
- while True:
- s = self._read_internal(1)
- if not s or s=='\000':
- break
- if flag & FCOMMENT:
- # Read and discard a null-terminated string containing a comment
- while True:
- s = self._read_internal(1)
- if not s or s=='\000':
- break
- if flag & FHCRC:
- self._read_internal(2) # Read & discard the 16-bit header CRC
-
-
- def write(self,data):
- if self.mode != WRITE:
- import errno
- raise IOError(errno.EBADF, "write() on read-only GzipFile object")
-
- if self.fileobj is None:
- raise ValueError, "write() on closed GzipFile object"
- if len(data) > 0:
- self.size = self.size + len(data)
- self.crc = zlib.crc32(data, self.crc)
- self.fileobj.write( self.compress.compress(data) )
- self.offset += len(data)
-
- def read(self, size=-1):
- if self.mode != READ:
- import errno
- raise IOError(errno.EBADF, "read() on write-only GzipFile object")
-
- if self.extrasize <= 0 and self.fileobj is None:
- return ''
-
- readsize = 1024
- if size < 0: # get the whole thing
- try:
- while True:
- self._read(readsize)
- readsize = min(self.max_read_chunk, readsize * 2)
- except EOFError:
- size = self.extrasize
- else: # just get some more of it
- try:
- while size > self.extrasize:
- self._read(readsize)
- readsize = min(self.max_read_chunk, readsize * 2)
- except EOFError:
- if size > self.extrasize:
- size = self.extrasize
-
- chunk = self.extrabuf[:size]
- self.extrabuf = self.extrabuf[size:]
- self.extrasize = self.extrasize - size
-
- self.offset += size
- return chunk
-
- def _unread(self, buf):
- self.extrabuf = buf + self.extrabuf
- self.extrasize = len(buf) + self.extrasize
- self.offset -= len(buf)
-
- def _read(self, size=1024):
- if self.fileobj is None:
- raise EOFError, "Reached EOF"
-
- if self._new_member:
- # If the _new_member flag is set, we have to
- # jump to the next member, if there is one.
- #
- # _read_gzip_header will raise EOFError exception
- # if there no more members to read.
- self._init_read()
- self._read_gzip_header()
- self.decompress = zlib.decompressobj(-zlib.MAX_WBITS)
- self._new_member = False
-
- # Read a chunk of data from the file
- buf = self._read_internal(size)
-
- # If the EOF has been reached, flush the decompression object
- # and mark this object as finished.
-
- if buf == "":
- uncompress = self.decompress.flush()
- self._read_eof()
- self._add_read_data( uncompress )
- raise EOFError, 'Reached EOF'
-
- uncompress = self.decompress.decompress(buf)
- self._add_read_data( uncompress )
-
- if self.decompress.unused_data != "":
- # Ending case: we've come to the end of a member in the file,
- # so put back unused_data and initialize last8 by reading them.
- self.inputbuf = self.decompress.unused_data + self.inputbuf
- self._read_internal(8)
-
- # Check the CRC and file size, and set the flag so we read
- # a new member on the next call
- self._read_eof()
- self._new_member = True
-
- def _add_read_data(self, data):
- self.crc = zlib.crc32(data, self.crc)
- self.extrabuf = self.extrabuf + data
- self.extrasize = self.extrasize + len(data)
- self.size = self.size + len(data)
-
- def _read_eof(self):
- # We've read to the end of the file, so we have to rewind in order
- # to reread the 8 bytes containing the CRC and the file size.
- # We check the that the computed CRC and size of the
- # uncompressed data matches the stored values. Note that the size
- # stored is the true file size mod 2**32.
- crc32 = unpack32(self.last8[:4])
- isize = U32(unpack32(self.last8[4:])) # may exceed 2GB
- if U32(crc32) != U32(self.crc):
- raise IOError, "CRC check failed"
- elif isize != LOWU32(self.size):
- raise IOError, "Incorrect length of data produced"
-
- def close(self):
- if self.mode == WRITE:
- self.fileobj.write(self.compress.flush())
- # The native zlib crc is an unsigned 32-bit integer, but
- # the Python wrapper implicitly casts that to a signed C
- # long. So, on a 32-bit box self.crc may "look negative",
- # while the same crc on a 64-bit box may "look positive".
- # To avoid irksome warnings from the `struct` module, force
- # it to look positive on all boxes.
- write32u(self.fileobj, LOWU32(self.crc))
- # self.size may exceed 2GB, or even 4GB
- write32u(self.fileobj, LOWU32(self.size))
- self.fileobj = None
- elif self.mode == READ:
- self.fileobj = None
- if self.myfileobj:
- self.myfileobj.close()
- self.myfileobj = None
-
- def __del__(self):
- try:
- if (self.myfileobj is None and
- self.fileobj is None):
- return
- except AttributeError:
- return
- self.close()
-
- def flush(self,zlib_mode=zlib.Z_SYNC_FLUSH):
- if self.mode == WRITE:
- # Ensure the compressor's buffer is flushed
- self.fileobj.write(self.compress.flush(zlib_mode))
- self.fileobj.flush()
-
- def fileno(self):
- """Invoke the underlying file object's fileno() method.
-
- This will raise AttributeError if the underlying file object
- doesn't support fileno().
- """
- return self.fileobj.fileno()
-
- def isatty(self):
- return False
-
- def tell(self):
- return self.offset
-
- def rewind(self):
- '''Return the uncompressed stream file position indicator to the
- beginning of the file'''
- if self.mode != READ:
- raise IOError("Can't rewind in write mode")
- self.fileobj.seek(0)
- self._new_member = True
- self.extrabuf = ""
- self.extrasize = 0
- self.offset = 0
-
- def seek(self, offset):
- if self.mode == WRITE:
- if offset < self.offset:
- raise IOError('Negative seek in write mode')
- count = offset - self.offset
- for i in range(count // 1024):
- self.write(1024 * '\0')
- self.write((count % 1024) * '\0')
- elif self.mode == READ:
- if offset < self.offset:
- # for negative seek, rewind and do positive seek
- self.rewind()
- count = offset - self.offset
- for i in range(count // 1024):
- self.read(1024)
- self.read(count % 1024)
-
- def readline(self, size=-1):
- if size < 0:
- size = sys.maxint
- readsize = self.min_readsize
- else:
- readsize = size
- bufs = []
- while size != 0:
- c = self.read(readsize)
- i = c.find('\n')
-
- # We set i=size to break out of the loop under two
- # conditions: 1) there's no newline, and the chunk is
- # larger than size, or 2) there is a newline, but the
- # resulting line would be longer than 'size'.
- if (size <= i) or (i == -1 and len(c) > size):
- i = size - 1
-
- if i >= 0 or c == '':
- bufs.append(c[:i + 1]) # Add portion of last chunk
- self._unread(c[i + 1:]) # Push back rest of chunk
- break
-
- # Append chunk to list, decrease 'size',
- bufs.append(c)
- size = size - len(c)
- readsize = min(size, readsize * 2)
- if readsize > self.min_readsize:
- self.min_readsize = min(readsize, self.min_readsize * 2, 512)
- return ''.join(bufs) # Return resulting line
-
- def readlines(self, sizehint=0):
- # Negative numbers result in reading all the lines
- if sizehint <= 0:
- sizehint = sys.maxint
- L = []
- while sizehint > 0:
- line = self.readline()
- if line == "":
- break
- L.append(line)
- sizehint = sizehint - len(line)
-
- return L
-
- def writelines(self, L):
- for line in L:
- self.write(line)
-
- def __iter__(self):
- return self
-
- def next(self):
- line = self.readline()
- if line:
- return line
- else:
- raise StopIteration
-
-
-def _test():
- # Act like gzip; with -d, act like gunzip.
- # The input file is not deleted, however, nor are any other gzip
- # options or features supported.
- args = sys.argv[1:]
- decompress = args and args[0] == "-d"
- if decompress:
- args = args[1:]
- if not args:
- args = ["-"]
- for arg in args:
- if decompress:
- if arg == "-":
- f = GzipFile(filename="", mode="rb", fileobj=sys.stdin)
- g = sys.stdout
- else:
- if arg[-3:] != ".gz":
- print "filename doesn't end in .gz:", repr(arg)
- continue
- f = open(arg, "rb")
- g = __builtin__.open(arg[:-3], "wb")
- else:
- if arg == "-":
- f = sys.stdin
- g = GzipFile(filename="", mode="wb", fileobj=sys.stdout)
- else:
- f = __builtin__.open(arg, "rb")
- g = open(arg + ".gz", "wb")
- while True:
- chunk = f.read(1024)
- if not chunk:
- break
- g.write(chunk)
- if g is not sys.stdout:
- g.close()
- if f is not sys.stdin:
- f.close()
-
-if __name__ == '__main__':
- _test()
diff --git a/obitools/gzip.pyc b/obitools/gzip.pyc
deleted file mode 100644
index 9c44a43..0000000
Binary files a/obitools/gzip.pyc and /dev/null differ
diff --git a/obitools/location/__init__.py b/obitools/location/__init__.py
deleted file mode 100644
index b5463b0..0000000
--- a/obitools/location/__init__.py
+++ /dev/null
@@ -1,538 +0,0 @@
-import obitools
-import re
-import array
-
-class Location(object):
- """
- Define a location on a sequence.
- """
-
- def extractSequence(self,sequence):
- '''
- Extract subsequence corresponding to a Location.
-
- @param sequence:
- @type sequence: C{BioSequence} or C{str}
- '''
- assert isinstance(sequence, (obitools.BioSequence,str)), \
- "sequence must be an instance of str or BioSequence"
-
- if isinstance(sequence, str):
- seq = self._extractSequence(sequence)
- else:
- if isinstance(sequence, obitools.AASequence):
- assert not self.needNucleic(), \
- "This location can be used only with Nucleic sequences"
- seq = self._extractSequence(str(sequence))
-
- if isinstance(sequence, obitools.AASequence):
- st = obitools.AASequence
- else:
- st = obitools.NucSequence
-
- seq = st(sequence.id,
- seq,
- sequence.definition,
- **sequence.getTags())
- seq['location']=str(self)
-
- if 'length' in sequence.getTags():
- seq['length']=len(seq)
-
- if hasattr(sequence, 'quality'):
- quality = self._extractQuality(sequence)
- seq.quality=quality
-
- return seq
-
- def isDirect(self):
- return None
-
- def isSimple(self):
- '''
- Indicate if a location is composed of a single continuous
- region or is composed by the junction of several locations
- by the C{join} operator.
-
- @return: C{True} if the location is composed of a single
- continuous region.
- @rtype: bool
- '''
-
- return None
-
- def isFullLength(self):
- return None
-
- def needNucleic(self):
- '''
- If a location contains a complement operator, it can be use
- only on nucleic sequence.
-
- @return: C{True} if location contains a complement operator
- @rtype: bool
- '''
- return None
-
- def getGloc(self):
- loc = self.simplify()
- assert loc.isDirect() is not None,"Gloc cannot be created for multi oriented location : %s" % str(loc)
- positions = ','.join([str(x) for x in loc._getglocpos()])
- return "(%s,%s)" % ({True:'T',False:'F'}[loc.isDirect()],
- positions)
-
- def shift(self,s):
- return None
-
- def getBegin(self):
- return None
-
- def getEnd(self):
- return None
-
- def getFivePrime(self):
- return self.getBegin()
-
- def getThreePrime(self):
- return self.getEnd()
-
- begin = property(getBegin,None,None,"beginning position of the location")
- end = property(getEnd,None,None,"ending position of the location")
- fivePrime=property(getFivePrime,None,None,"5' position of the location")
- threePrime=property(getThreePrime,None,None,"3' position of the location")
-
- def __abs__(self):
- assert self.isDirect() is not None,"Abs operator cannot be applied on non oriented location"
- if self.isDirect():
- return self
- else:
- return ComplementLocation(self).simplify()
-
- def __cmp__(self,y):
- if self.begin < y.begin:
- return -1
- if self.begin > y.begin:
- return 1
- if self.isDirect() == y.isDirect():
- return 0
- if self.isDirect() and not y.isDirect():
- return -1
- return 1
-
-class SimpleLocation(Location):
- """
- A simple location is describe a continuous region of
- a sequence define by a C{begin} and a C{end} position.
- """
-
- def __init__(self,begin,end):
- '''
- Build a new C{SimpleLocation} instance. Valid
- position are define on M{[1,N]} with N the length
- of the sequence.
-
- @param begin: start position of the location
- @type begin: int
- @param end: end position of the location
- @type end: int
- '''
- assert begin > 0 and end > 0
-
- self._begin = begin
- self._end = end
- self._before=False
- self._after=False
-
- def _extractSequence(self,sequence):
-
- assert ( self._begin < len(sequence)
- and self._end <= len(sequence)), \
- "Sequence length %d is too short" % len(sequence)
-
- return sequence[self._begin-1:self._end]
-
- def _extractQuality(self,sequence):
-
- assert ( self._begin < len(sequence)
- and self._end <= len(sequence)), \
- "Sequence length %d is too short" % len(sequence)
-
- return sequence.quality[self._begin-1:self._end]
-
-
- def isDirect(self):
- return True
-
- def isSimple(self):
- return True
-
- def isFullLength(self):
- return not (self.before or self.after)
-
- def simplify(self):
- if self._begin == self._end:
- return PointLocation(self._begin)
- else:
- return self
-
- def needNucleic(self):
- return False
-
- def __str__(self):
- before = {True:'<',False:''}[self.before]
- after = {True:'>',False:''}[self.after]
- return "%s%d..%s%d" % (before,self._begin,after,self._end)
-
- def shift(self,s):
- assert (self._begin + s) > 0,"shift to large (%d)" % s
- if s == 0:
- return self
- return SimpleLocation(self._begin + s, self._end + s)
-
- def _getglocpos(self):
- return (self.begin,self.end)
-
- def getGloc(self):
- positions = ','.join([str(x) for x in self._getglocpos()])
- return "(%s,%s)" % ({True:'T',False:'F'}[self.isDirect()],
- positions)
-
- def getBegin(self):
- return self._begin
-
- def getEnd(self):
- return self._end
-
-
- begin = property(getBegin,None,None,"beginning position of the location")
- end = property(getEnd,None,None,"ending position of the location")
-
- def getBefore(self):
- return self._before
-
- def getAfter(self):
- return self._after
-
- def setBefore(self,value):
- assert isinstance(value, bool)
- self._before=value
-
- def setAfter(self,value):
- assert isinstance(value, bool)
- self._after=value
-
- before=property(getBefore,setBefore,None)
- after=property(getAfter,setAfter,None)
-
-
-
-
-class PointLocation(Location):
- """
- A point location describes a location on a sequence
- limited to a single position
- """
-
- def __init__(self,position):
- assert position > 0
- self._pos=position
-
- def _extractSequence(self,sequence):
-
- assert self._end <= len(sequence), \
- "Sequence length %d is too short" % len(sequence)
-
- return sequence[self._pos-1]
-
- def _extractQuality(self,sequence):
-
- assert self._end <= len(sequence), \
- "Sequence length %d is too short" % len(sequence)
-
- return sequence[self._pos-1:self._pos]
-
- def isDirect(self):
- return True
-
- def isSimple(self):
- return True
-
- def isFullLength(self):
- return True
-
- def simplify(self):
- return self
-
- def needNucleic(self):
- return False
-
- def shift(self,s):
- assert (self._pos + s) > 0,"shift to large (%d)" % s
- if s == 0:
- return self
- return PointLocation(self._pos + s)
-
- def _getglocpos(self):
- return (self._pos,self._pos)
-
- def getBegin(self):
- return self._pos
-
- def getEnd(self):
- return self._pos
-
- begin = property(getBegin,None,None,"beginning position of the location")
- end = property(getEnd,None,None,"ending position of the location")
-
- def __str__(self):
- return str(self._pos)
-
-class CompositeLocation(Location):
- """
- """
- def __init__(self,locations):
- self._locs = tuple(locations)
-
-
- def _extractSequence(self,sequence):
- seq = ''.join([x._extractSequence(sequence)
- for x in self._locs])
- return seq
-
- def _extractQuality(self,sequence):
- rep=array.array('d',[])
- for x in self._locs:
- rep.extend(x._extractQuality(sequence))
- return rep
-
- def isDirect(self):
- hasDirect,hasReverse = reduce(lambda x,y: (x[0] or y,x[1] or not y),
- (z.isDirect() for z in self._locs),(False,False))
-
- if hasDirect and not hasReverse:
- return True
- if hasReverse and not hasDirect:
- return False
-
- return None
-
-
- def isSimple(self):
- return False
-
-
- def simplify(self):
- if len(self._locs)==1:
- return self._locs[0]
-
- rep = CompositeLocation(x.simplify() for x in self._locs)
-
- if reduce(lambda x,y : x and y,
- (isinstance(z, ComplementLocation)
- for z in self._locs)):
- rep = ComplementLocation(CompositeLocation(x._loc.simplify()
- for x in rep._locs[::-1]))
-
- return rep
-
- def isFullLength(self):
- return reduce(lambda x,y : x and y, (z.isFullLength() for z in self._locs),1)
-
- def needNucleic(self):
- return reduce(lambda x,y : x or y,
- (z.needNucleic for z in self._locs),
- False)
-
- def _getglocpos(self):
- return reduce(lambda x,y : x + y,
- (z._getglocpos() for z in self._locs))
-
-
- def getBegin(self):
- return min(x.getBegin() for x in self._locs)
-
- def getEnd(self):
- return max(x.getEnd() for x in self._locs)
-
- def shift(self,s):
- assert (self.getBegin() + s) > 0,"shift to large (%d)" % s
- if s == 0:
- return self
- return CompositeLocation(x.shift(s) for x in self._locs)
-
-
- begin = property(getBegin,None,None,"beginning position of the location")
- end = property(getEnd,None,None,"ending position of the location")
-
-
- def __str__(self):
- return "join(%s)" % ','.join([str(x)
- for x in self._locs])
-
-class ComplementLocation(Location):
- """
- """
-
- _comp={'a': 't', 'c': 'g', 'g': 'c', 't': 'a',
- 'r': 'y', 'y': 'r', 'k': 'm', 'm': 'k',
- 's': 's', 'w': 'w', 'b': 'v', 'd': 'h',
- 'h': 'd', 'v': 'b', 'n': 'n', 'u': 'a',
- '-': '-'}
-
- def __init__(self,location):
- self._loc = location
-
- def _extractSequence(self,sequence):
- seq = self._loc._extractSequence(sequence)
- seq = ''.join([ComplementLocation._comp.get(x.lower(),'n') for x in seq[::-1]])
- return seq
-
- def _extractQuality(self,sequence):
- return sequence.quality[::-1]
-
- def isDirect(self):
- return False
-
- def isSimple(self):
- return self._loc.isSimple()
-
- def isFullLength(self):
- return self._loc.isFullLength()
-
- def simplify(self):
- if isinstance(self._loc, ComplementLocation):
- return self._loc._loc.simplify()
- else:
- return self
-
- def needNucleic(self):
- return True
-
- def __str__(self):
- return "complement(%s)" % self._loc
-
- def shift(self,s):
- assert (self.getBegin() + s) > 0,"shift to large (%d)" % s
- if s == 0:
- return self
- return ComplementLocation(self._loc.shift(s))
-
- def _getglocpos(self):
- return self._loc._getglocpos()
-
- def getBegin(self):
- return self._loc.getBegin()
-
- def getEnd(self):
- return self._loc.getEnd()
-
- def getFivePrime(self):
- return self.getEnd()
-
- def getThreePrime(self):
- return self.getBegin()
-
-
- begin = property(getBegin,None,None,"beginning position of the location")
- end = property(getEnd,None,None,"ending position of the location")
- fivePrime=property(getFivePrime,None,None,"5' potisition of the location")
- threePrime=property(getThreePrime,None,None,"3' potisition of the location")
-
-
- #
- # Internal functions used for location parsing
- #
-
-def __sublocationIterator(text):
- sl = []
- plevel=0
- for c in text:
- assert plevel>=0,"Misformated location : %s" % text
- if c == '(':
- plevel+=1
- sl.append(c)
- elif c==')':
- plevel-=1
- sl.append(c)
- elif c==',' and plevel == 0:
- assert sl,"Misformated location : %s" % text
- yield ''.join(sl)
- sl=[]
- else:
- sl.append(c)
- assert sl and plevel==0,"Misformated location : %s" % text
- yield ''.join(sl)
-
-
-
- #
- # Internal functions used for location parsing
- #
-
-__simplelocparser = re.compile('(?P)(?P[0-9]+)(\.\.(?P>?)(?P[0-9]+))?')
-
-
-def __locationParser(text):
- text=text.strip()
- if text[0:5]=='join(':
- assert text[-1]==')',"Misformated location : %s" % text
- return CompositeLocation(__locationParser(sl) for sl in __sublocationIterator(text[5:-1]))
- elif text[0:11]=='complement(':
- assert text[-1]==')',"Misformated location : %s" % text
- subl = tuple(__locationParser(sl) for sl in __sublocationIterator(text[11:-1]))
- if len(subl)>1:
- subl = CompositeLocation(subl)
- else:
- subl = subl[0]
- return ComplementLocation(subl)
- else:
- data = __simplelocparser.match(text)
- assert data is not None,"Misformated location : %s" % text
- data = data.groupdict()
- if not data['to'] :
- sl = PointLocation(int(data['from']))
- else:
- sl = SimpleLocation(int(data['from']),int(data['to']))
- sl.before=data['before']=='<'
- sl.after=data['after']=='>'
- return sl
-
-def locationGenerator(locstring):
- '''
- Parse a location string as present in genbank or embl file.
-
- @param locstring: string description of the location in embl/gb format
- @type locstring: str
-
- @return: a Location instance
- @rtype: C{Location} subclass instance
- '''
- return __locationParser(locstring)
-
-
-_matchExternalRef = re.compile('[A-Za-z0-9_|]+(\.[0-9]+)?(?=:)')
-
-def extractExternalRefs(locstring):
- '''
- When a location describe external references (ex: D28156.1:1..>1292)
- separate the external reference part of the location and the location
- by itself.
-
- @param locstring: text representation of the location.
- @type locstring: str
-
- @return: a tuple with a set of string describing accession number
- of the referred sequences and a C{Location} instance.
-
- @rtype: tuple(set,Location)
- '''
- m = set(x.group() for x in _matchExternalRef.finditer(locstring))
- clean = re.compile(':|'.join([re.escape(x) for x in m])+':')
- cloc = locationGenerator(clean.sub('',locstring))
-
- return m,cloc
-
-
-
-
-
diff --git a/obitools/location/__init__.pyc b/obitools/location/__init__.pyc
deleted file mode 100644
index 545f024..0000000
Binary files a/obitools/location/__init__.pyc and /dev/null differ
diff --git a/obitools/location/feature.py b/obitools/location/feature.py
deleted file mode 100644
index 89a183f..0000000
--- a/obitools/location/feature.py
+++ /dev/null
@@ -1,177 +0,0 @@
-from obitools.location import Location,locationGenerator
-import logging
-import re
-
-
-
-
-_featureMatcher = re.compile('^(FT| ) [^ ].+\n((FT| ) .+\n)+',re.M)
-_featureCleaner = re.compile('^FT',re.M)
-
-
-def textFeatureIterator(fttable):
- '''
- Iterate through a textual description of a feature table in a genbank
- or embl format. Return at each step a text representation of each individual
- feature composing the table.
-
- @param fttable: a string corresponding to the feature table of a genbank
- or an embl entry
-
- @type fttable: C{str}
-
- @return: an iterator on str
- @rtype: iterator
-
- @see: L{ftParser}
- '''
- for m in _featureMatcher.finditer(fttable):
- t = m.group()
- t = _featureCleaner.sub(' ',t)
- yield t
-
-_qualifierMatcher = re.compile('(?<=^ {21}/).+(\n {21}[^/].+)*',re.M)
-_qualifierCleanner= re.compile("^ +",re.M)
-
-def qualifierIterator(qualifiers):
- '''
- Parse a textual description of a feature in embl or genbank format
- as returned by the textFeatureIterator iterator and iterate through
- the key, value qualified defining this location.
-
- @param qualifiers: substring containing qualifiers
- @type qualifiers: str
-
- @return: an iterator on tuple (key,value), where keys are C{str}
- @rtype: iterator
- '''
- for m in _qualifierMatcher.finditer(qualifiers):
- t = m.group()
- t = _qualifierCleanner.sub('',t)
- t = t.split('=',1)
- if len(t)==1:
- t = (t[0],None)
- else:
- if t[0]=='translation':
- value = t[1].replace('\n','')
- else:
- value = t[1].replace('\n',' ')
- try:
- value = eval(value)
- except:
- pass
- t = (t[0],value)
- yield t
-
-
-_ftmatcher = re.compile('(?<=^ {5})\S+')
-_locmatcher= re.compile('(?<=^.{21})[^/]+',re.DOTALL)
-_cleanloc = re.compile('[\s\n]+')
-_qualifiersMatcher = re.compile('^ +/.+',re.M+re.DOTALL)
-
-def ftParser(feature):
- fttype = _ftmatcher.search(feature).group()
- location=_locmatcher.search(feature).group()
- location=_cleanloc.sub('',location)
- qualifiers=_qualifiersMatcher.search(feature)
- if qualifiers is not None:
- qualifiers=qualifiers.group()
- else:
- qualifiers=""
- logging.debug("Qualifiers regex not matching on \n=====\n%s\n========" % feature)
-
- return fttype,location,qualifiers
-
-
-class Feature(dict,Location):
- def __init__(self,type,location):
- self._fttype=type
- self._loc=location
-
- def getFttype(self):
- return self._fttype
-
-
- def extractSequence(self,sequence,withQualifier=False):
- seq = self._loc.extractSequence(sequence)
- if withQualifier:
- seq.getInfo().update(self)
- return seq
-
- def isDirect(self):
- return self._loc.isDirect()
-
- def isSimple(self):
- return self._loc.isSimple()
-
- def isFullLength(self):
- return self._loc.isFullLength()
-
- def simplify(self):
- f = Feature(self._fttype,self._loc.simplify())
- f.update(self)
- return f
-
- def locStr(self):
- return str(self._loc)
-
- def needNucleic(self):
- return self._loc.needNucleic()
-
- def __str__(self):
- return repr(self)
-
- def __repr__(self):
- return str((self.ftType,str(self._loc),dict.__repr__(self)))
-
- def __cmp__(self,y):
- return self._loc.__cmp__(y)
-
- def _getglocpos(self):
- return self._loc._getglocpos()
-
- ftType = property(getFttype, None, None, "Feature type name")
-
- def shift(self,s):
- assert (self.getBegin() + s) > 0,"shift to large (%d)" % s
- if s == 0:
- return self
- f = Feature(self._fttype,self._loc.shift(s))
- f.update(self)
- return f
-
-
- def getBegin(self):
- return self._loc.getBegin()
-
- def getEnd(self):
- return self._loc.getEnd()
-
- begin = property(getBegin,None,None,"beginning position of the location")
- end = property(getEnd,None,None,"ending position of the location")
-
-
-def featureFactory(featureDescription):
- fttype,location,qualifiers = ftParser(featureDescription)
- location = locationGenerator(location)
- feature = Feature(fttype,location)
- feature.raw = featureDescription
-
- for k,v in qualifierIterator(qualifiers):
- feature.setdefault(k,[]).append(v)
-
- return feature
-
-def featureIterator(featureTable,skipError=False):
- for tft in textFeatureIterator(featureTable):
- try:
- feature = featureFactory(tft)
- except AssertionError,e:
- logging.debug("Parsing error on feature :\n===============\n%s\n===============" % tft)
- if not skipError:
- raise e
- logging.debug("\t===> Error skipped")
- continue
-
- yield feature
-
\ No newline at end of file
diff --git a/obitools/metabarcoding/__init__.py b/obitools/metabarcoding/__init__.py
deleted file mode 100644
index 3b29b17..0000000
--- a/obitools/metabarcoding/__init__.py
+++ /dev/null
@@ -1,265 +0,0 @@
-from obitools.ecopcr.options import addTaxonomyFilterOptions,\
- loadTaxonomyDatabase
-from obitools.graph import UndirectedGraph
-from obitools.align import lenlcs,isLCSReachable
-from obitools.graph.algorithms.component import componentIterator
-from obitools.utils.bioseq import uniqSequence
-from obitools.utils import progressBar
-import math
-import sys
-from obitools.graph.rootedtree import RootedTree
-
-def average(x):
- x=list(x)
- s = sum(i*j for (i,j) in x)
- n = sum(i[1] for i in x)
- return (float(s)/float(n),n)
-
-def minimum(x):
- x=list(x)
- m = min(i[0] for i in x)
- n = sum(i[1] for i in x)
- return (float(m),n)
-
-def ecoPCRReader(entries,options):
-
- taxonomy = loadTaxonomyDatabase(options)
-
- norankid =options.taxonomy.findRankByName('no rank')
- speciesid=options.taxonomy.findRankByName('species')
- genusid =options.taxonomy.findRankByName('genus')
- familyid =options.taxonomy.findRankByName('family')
-
- minrankseq = set([speciesid,genusid,familyid])
-
- usedrankid = {}
-
- ingroup = []
- outgroup= []
-
- for s in entries:
- if 'taxid' in s :
- taxid = s['taxid']
- if taxid in taxonomy:
- allrank = set()
- for p in options.taxonomy.parentalTreeIterator(taxid):
- if p[1]!=norankid:
- allrank.add(p[1])
- if len(minrankseq & allrank) == 3:
- for r in allrank:
- usedrankid[r]=usedrankid.get(r,0) + 1
-
- if taxonomy.isAncestor(options.ingroup,taxid):
- ingroup.append(s)
- else:
- outgroup.append(s)
-
- keptrank = set(r for r in usedrankid
- if float(usedrankid[r])/float(len(ingroup)) > options.rankthresold)
-
- return { 'ingroup' : ingroup,
- 'outgroup': outgroup,
- 'ranks' : keptrank
- }
-
-def buildSimilarityGraph(dbseq,ranks,taxonomy,dcmax=5):
-
- ldbseq = len(dbseq)
- pos = 1
- digit = int(math.ceil(math.log10(ldbseq)))
- header = "Alignment : %%0%dd x %%0%dd -> %%0%dd " % (digit,digit,digit)
- aligncount = ldbseq*(ldbseq+1)/2
- edgecount = 0
- print >>sys.stderr
-
- progressBar(1,aligncount,True,"Alignment : %s x %s -> %s " % ('-'*digit,'-'*digit, '0'*digit))
-
-
- sim = UndirectedGraph()
-
- i=0
- for s in dbseq:
- taxid = s['taxid']
-
- rtaxon = dict((rid,taxonomy.getTaxonAtRank(taxid,rid))
- for rid in ranks)
-
- sim.addNode(i, seq=s,taxid=taxid,rtaxon=rtaxon)
-
- i+=1
-
-# aligner = LCS()
-
- for is1 in xrange(ldbseq):
- s1 = dbseq[is1]
- ls1= len(s1)
-# aligner.seqA=s1
-
- for is2 in xrange(is1+1,ldbseq):
-
- s2=dbseq[is2]
- ls2=len(s2)
-
- lm = max(ls1,ls2)
- lcsmin = lm - dcmax
-
- if isLCSReachable(s1,s2,lcsmin):
- llcs,lali=lenlcs(s1,s2)
- ds1s2 = lali - llcs
-
- if ds1s2 <= dcmax:
- sim.addEdge(node1=is1, node2=is2,ds1s2=ds1s2,label=ds1s2)
- edgecount+=1
-
- progressBar(pos,aligncount,head=header % (is1,is2,edgecount))
- pos+=(ldbseq-is1-1)
-
- return sim
-
-def buildTsr(component):
- '''
- Build for each consider taxonomic rank the list of taxa
- present in the connected component
-
- :param component: the analyzed connected component
- :type component: :py:class:`UndirectedGraph`
-
- :return: a dictionary indexed by rankid containing a `dict` indexed by taxid and containing count of sequences for this taxid
- :rtype: `dict` indexed by `int` containing `dict` indexed by `int` and containing of `int`
-
- '''
- taxalist = {}
- for n in component:
- for r in n['rtaxon']:
- rtaxid = n['rtaxon'][r]
- if rtaxid is not None:
- ts = taxalist.get(r,{})
- ts[rtaxid]=ts.get(rtaxid,0)+1
- taxalist[r]=ts
-
- return taxalist
-
-def edgeDistSelector(dcmax):
- def predicate(e):
- return e['ds1s2'] <= dcmax
- return predicate
-
-def distanceOfConfusion(simgraph,dcmax=5,aggregate=average):
-
- alltaxa = set()
-
- for n in simgraph:
- alltaxa|=set(n['rtaxon'].values())
-
- taxacount = len(alltaxa)
-
- result = {}
-
- pos = [1]
- header = "Component : %-5d Identified : %-8d "
- progressBar(1,taxacount,True,header % (0,0))
-
- def _idc(cc,dcmax):
- composante=[]
- for x in cc:
- composante.extend(simgraph.subgraph(c)
- for c in componentIterator(x,
- edgePredicat=edgeDistSelector(dcmax)))
-
- good = set()
- bad = {}
-
- complexe = []
-
- for c in composante:
- tsr = buildTsr(c)
- newbad=False
- for r in tsr:
- if len(tsr[r]) == 1:
- taxid = tsr[r].keys()[0]
- good.add((taxid,tsr[r][taxid]))
- else:
- newbad=True
- for taxid in tsr[r]:
- bad[taxid]=bad.get(taxid,0)+tsr[r][taxid]
- if newbad:
- complexe.append(c)
-
-# good = good - bad
-
- for taxid,weight in good:
- if taxid not in result:
- result[taxid]=[]
- result[taxid].append((dcmax+1,weight))
-
-
- progressBar(pos[0],taxacount,False,header % (len(composante),pos[0]))
- pos[0]=len(result)
-
- if dcmax > 0:
- dcmax-=1
- _idc(complexe,dcmax)
-
- else:
- for taxid in bad:
- if taxid not in result:
- result[taxid]=[]
- result[taxid].append((0,bad[taxid]))
-
- progressBar(pos[0],taxacount,False,header % (len(composante),pos[0]))
- pos[0]=len(result)
-
- _idc([simgraph],dcmax)
-
- for taxid in result:
- result[taxid]=aggregate(result[taxid])
- return result
-
-def propagateDc(tree,node=None,aggregate=min):
- if node is None:
- node = tree.getRoots()[0]
- dca=aggregate(n['dc'] for n in node.leavesIterator())
- node['dc']=dca
- for n in node:
- propagateDc(tree, n, aggregate)
-
-def confusionTree(distances,ranks,taxonomy,aggregate=min,bsrank='species',dcmax=1):
-
- def Bs(node,rank,dcmax):
- n = len(node)
- if n:
- g = [int(x['dc']>=dcmax) for x in node.subgraphIterator() if x['rank']==bsrank]
- n = len(g)
- g = sum(g)
- bs= float(g)/float(n)
- node['bs']=bs
- node['bs_label']="%3.2f (%d)" % (bs,n)
-
- for n in node:
- Bs(n,rank,dcmax)
-
- tree = RootedTree()
- ranks = set(ranks)
- tset = set(distances)
-
- for taxon in distances:
- tree.addNode(taxon, rank=taxonomy.getRank(taxon),
- name=taxonomy.getScientificName(taxon),
- dc=float(distances[taxon][0]),
- n=distances[taxon][1],
- dc_label="%4.2f (%d)" % (float(distances[taxon][0]),distances[taxon][1])
- )
-
- for taxon in distances:
- piter = taxonomy.parentalTreeIterator(taxon)
- taxon = piter.next()
- for parent in piter:
- if taxon[0] in tset and parent[0] in distances:
- tset.remove(taxon[0])
- tree.addEdge(parent[0], taxon[0])
- taxon=parent
-
- root = tree.getRoots()[0]
- Bs(root,bsrank,dcmax)
-
- return tree
diff --git a/obitools/metabarcoding/options.py b/obitools/metabarcoding/options.py
deleted file mode 100644
index 08ff423..0000000
--- a/obitools/metabarcoding/options.py
+++ /dev/null
@@ -1,34 +0,0 @@
-'''
-Created on 30 oct. 2011
-
-@author: coissac
-'''
-
-from obitools.ecopcr.options import addTaxonomyDBOptions
-
-
-def addMetabarcodingOption(optionManager):
-
- addTaxonomyDBOptions(optionManager)
-
- optionManager.add_option('--dcmax',
- action="store", dest="dc",
- metavar="###",
- type="int",
- default=0,
- help="Maximum confusion distance considered")
-
- optionManager.add_option('--ingroup',
- action="store", dest="ingroup",
- metavar="###",
- type="int",
- default=1,
- help="ncbi taxid delimitation the in group")
-
- optionManager.add_option('--rank-thresold',
- action="store", dest="rankthresold",
- metavar="#.##",
- type="float",
- default=0.5,
- help="minimum fraction of the ingroup sequences "
- "for concidering the rank")
diff --git a/obitools/obischemas/__init__.py b/obitools/obischemas/__init__.py
deleted file mode 100644
index 6bcafde..0000000
--- a/obitools/obischemas/__init__.py
+++ /dev/null
@@ -1,28 +0,0 @@
-from obitools.obischemas import kb
-__connection__ = None
-
-def initConnection(options):
- global __connection__
- param = {}
- if hasattr(options, "dbname") and options.dbname is not None:
- param["database"]=options.dbname
- if hasattr(options, "dbhost") and options.dbhost is not None:
- param["host"]=options.dbhost
- if hasattr(options, "dbuser") and options.dbuser is not None:
- param["username"]=options.dbuser
- if hasattr(options, "dbpassword") and options.dbpassword is not None:
- param["password"]=options.dbpassword
-
- __connection__=kb.getConnection(**param)
- __connection__.autocommit=options.autocommit
-
-def getConnection(options=None):
- global __connection__
-
- if options is not None:
- initConnection(options)
-
- assert __connection__ is not None,"database connection is not initialized"
-
- return __connection__
-
\ No newline at end of file
diff --git a/obitools/obischemas/kb/__init__.py b/obitools/obischemas/kb/__init__.py
deleted file mode 100644
index 7d35dcb..0000000
--- a/obitools/obischemas/kb/__init__.py
+++ /dev/null
@@ -1,55 +0,0 @@
-"""
- kb package is devoted to manage access to postgresql database from python
- script
-"""
-
-
-class Connection(object):
-
- def __init__(self):
- raise RuntimeError('pyROM.KB.Connection is an abstract class')
-
- def cursor(self):
- raise RuntimeError('pyROM.KB.Connection.cursor is an abstract function')
-
- def commit(self):
- raise RuntimeError('pyROM.KB.Connection.commit is an abstract function')
-
- def rollback(self):
- raise RuntimeError('pyROM.KB.Connection.rollback is an abstract function')
-
- def __call__(self,query):
- return self.cursor().execute(query)
-
-
-class Cursor(object):
-
- def __init__(self,db):
- raise RuntimeError('pyROM.KB.Cursor is an abstract class')
-
- def execute(self,query):
- raise RuntimeError('pyROM.KB.Cursor.execute is an abstract function')
-
- __call__=execute
-
-
-_current_connection = None # Static variable used to store connection to KB
-
-def getConnection(*args,**kargs):
- """
- return a connection to the database.
- When call from database backend no argument are needed.
- All connection returned by this function
- """
- global _current_connection
-
- if _current_connection==None or args or kargs :
- try:
- from obischemas.kb import backend
- _current_connection = backend.Connection()
- except ImportError:
- from obischemas.kb import extern
- _current_connection = extern.Connection(*args,**kargs)
- return _current_connection
-
-
diff --git a/obitools/obischemas/kb/extern.py b/obitools/obischemas/kb/extern.py
deleted file mode 100644
index ce2ff84..0000000
--- a/obitools/obischemas/kb/extern.py
+++ /dev/null
@@ -1,78 +0,0 @@
-"""
-Module : KB.extern
-Author : Eric Coissac
-Date : 03/05/2004
-
-Module wrapping psycopg interface module to allow connection
-to a postgresql databases with the same interface from
-backend and external script.
-
-This module define a class usable from external script
-"""
-
-
-import psycopg2
-import sys
-from obischemas import kb
-
-class Connection(kb.Connection):
-
- def __init__(self,*connectParam,**kconnectParam):
- if connectParam:
- self.connectParam=={'dsn':connectParam}
- else:
- self.connectParam=kconnectParam
- print self.connectParam
- self.db = psycopg2.connect(**(self.connectParam))
-
- def restart(self):
- ok=1
- while (ok and ok < 1000):
- try:
- self.db = psycopg2.connect(**self.connectParam)
- except:
- ok+=1
- else:
- ok=0
-
-
- def cursor(self):
- curs = Cursor(self.db)
- if hasattr(self,'autocommit') and self.autocommit:
- curs.autocommit = self.autocommit
- return curs
-
- def commit(self):
- self.db.commit()
-
- def rollback(self):
- if hasattr(self,'db'):
- self.db.rollback()
-
- def __del__(self):
- if hasattr(self,'db'):
- self.rollback()
-
-class Cursor(kb.Cursor):
-
- def __init__(self,db):
- self.db = db
- self.curs = db.cursor()
-
- def execute(self,query):
- try:
- self.curs.execute(query)
- if hasattr(self,'autocommit') and self.autocommit:
- self.db.commit()
- except psycopg2.ProgrammingError,e:
- print >>sys.stderr,"===> %s" % query
- raise e
- except psycopg2.IntegrityError,e:
- print >>sys.stderr,"---> %s" % query
- raise e
- try:
- label = [x[0] for x in self.curs.description]
- return [dict(map(None,label,y))
- for y in self.curs.fetchall()]
- except TypeError:
- return []
diff --git a/obitools/obischemas/options.py b/obitools/obischemas/options.py
deleted file mode 100644
index 66f5138..0000000
--- a/obitools/obischemas/options.py
+++ /dev/null
@@ -1,31 +0,0 @@
-def addConnectionOptions(optionManager):
-
- optionManager.add_option('-d','--dbname',
- action="store", dest="dbname",
- metavar="",
- type="string",
- help="OBISchema database name containing"
- "taxonomical data")
-
- optionManager.add_option('-H','--host',
- action="store", dest="dbhost",
- metavar="",
- type="string",
- help="host hosting OBISchema database")
-
- optionManager.add_option('-U','--user',
- action="store", dest="dbuser",
- metavar="",
- type="string",
- help="user for OBISchema database connection")
-
- optionManager.add_option('-W','--password',
- action="store", dest="dbpassword",
- metavar="",
- type="string",
- help="password for OBISchema database connection")
-
- optionManager.add_option('-A','--autocommit',
- action="store_true",dest="autocommit",
- default=False,
- help="add commit action after each query")
\ No newline at end of file
diff --git a/obitools/obo/__init__.py b/obitools/obo/__init__.py
deleted file mode 100644
index e69de29..0000000
diff --git a/obitools/obo/go/__init__.py b/obitools/obo/go/__init__.py
deleted file mode 100644
index e69de29..0000000
diff --git a/obitools/obo/go/parser.py b/obitools/obo/go/parser.py
deleted file mode 100644
index 6902974..0000000
--- a/obitools/obo/go/parser.py
+++ /dev/null
@@ -1,53 +0,0 @@
-from obitools.obo.parser import OBOTerm
-from obitools.obo.parser import OBOEntry
-from obitools.obo.parser import stanzaIterator
-from logging import debug
-
-class GOEntry(OBOEntry):
- '''
- An entry of a GeneOntology .obo file. It can be a header (without a stanza name) or
- a stanza (with a stanza name between brackets). It inherits from the class dict.
- '''
-
-
-class GOTerm(OBOTerm):
-
- '''
- A stanza named 'Term'. It inherits from the class OBOTerm.
- '''
-
- def __init__(self,stanza):
-
- ## use of the OBOEntry constructor.
- OBOTerm.__init__(self, stanza)
-
- assert 'namespace' in self and len(self['namespace'])==1, "An OBOTerm must belong to one of the cell_component, molecular_function or biological_process namespace"
-
-
-def GOEntryFactory(stanza):
- '''
- Dispatcher of stanza.
-
- @param stanza: a stanza composed of several lines.
- @type stanza: text
-
- @return: an C{OBOTerm} | C{OBOEntry} instance
-
- @note: The dispatcher treats differently the stanza which are OBO "Term"
- and the others.
- '''
-
- stanzaType = OBOEntry.parseStanzaName(stanza)
-
- if stanzaType=="Term":
- return GOTerm(stanza)
- else:
- return OBOEntry(stanza)
-
-
-def GOEntryIterator(file):
- entries = stanzaIterator(file)
- for e in entries:
- debug(e)
- yield GOEntryFactory(e)
-
diff --git a/obitools/obo/parser.py b/obitools/obo/parser.py
deleted file mode 100644
index f6f05f3..0000000
--- a/obitools/obo/parser.py
+++ /dev/null
@@ -1,707 +0,0 @@
-from obitools.utils import skipWhiteLineIterator,multiLineWrapper
-from obitools.utils import universalOpen
-from obitools.format.genericparser import genericEntryIteratorGenerator
-from logging import debug,warning
-
-import re
-
-
-#################################################################################
-## Stanza preparation area ##
-#################################################################################
-
-
-class FileFormatError(Exception):
- '''
- An error derived from the class Exception.
- '''
- pass
-
-_oboEntryIterator = genericEntryIteratorGenerator(endEntry='^ *$',
- strip=True)
-
-def stanzaIterator(inputfile):
- '''
- Iterator of stanza. The stanza are the basic units of OBO files.
-
- @param inputfile: a stream of strings from an opened OBO file.
- @type inputfile: a stream of strings
-
- @return: a stream of stanza
- @rtype: a stream of aggregated strings
-
- @note: The iterator constructs stanza by aggregate strings from the
- OBO file.
- '''
- inputfile = universalOpen(inputfile)
- inputfile = multiLineWrapper(inputfile)
- return _oboEntryIterator(inputfile)
-
-
-
-#################################################################################
-## Trailing Modifiers treatment area ##
-#################################################################################
-
-
-class TrailingModifier(dict):
- '''
- A class object which inherits from the class dict. Trailing modifiers can be found
- at the end of TaggedValue objects when they exist.
- '''
-
- _match_brace = re.compile('(?<=\ {)[^\]]*(\}) *( !|$)')
-
- def __init__(self,string):
-
- ## search for trailing modifiers signals
- trailing_modifiers = TrailingModifier._match_brace.search(string)
-
- ## the trailing modifiers exist
- if trailing_modifiers:
- trailing_modifiers=trailing_modifiers.group(0).strip()
- print trailing_modifiers
- ## creates and feeds the dictionary of trailing modifiers
- dict.__init__(self,(x.strip().split('=',1) for x in trailing_modifiers.split(',')))
-
-
-def trailingModifierFactory(string):
- '''
- Dispatcher of trailing modifiers.
-
- @param string: a string from a TaggedValue object with a trailing modifiers signal.
- @type string: string
-
- @return: a class object
-
- @note: The dispatcher is currently very simple. Only one case is treated by the function.
- `the function returns a class object inherited from the class dict if the trailing modifiers
- exist, None if they don't.
- '''
-
- trailing_modifiers = TrailingModifier(string)
- if not trailing_modifiers:
- trailing_modifiers=None
- return trailing_modifiers
-
-
-#################################################################################
-## TaggedValue treatment area ##
-#################################################################################
-
-
-class TaggedValue(object):
- '''
- A couple 'tag:value' of an OBOEntry.
- '''
-
- _match_value = re.compile('(("(\\\\"|[^\"])*")|(\\\\"|[^\"]))*?( !| {|$)')
- _split_comment = re.compile('^!| !')
- _match_quotedString = re.compile('(?<=")(\\\\"|[^\"])*(?=")')
- _match_bracket = re.compile('\[[^\]]*\]')
-
- def __init__(self,line):
- '''
- Constructor of the class TaggedValue.
-
- @param line: a line of an OBOEntry composed of a tag and a value.
- @type line: string
-
- @note: The constructor separates tags from right terms. 'value' is extracted
- from right terms using a regular expression (value is at the beginning of the
- string, between quotes or not). Then, 'comment' is extracted from the rest of the
- string using another regular expression ('comment' is at the end of the string
- after a '!'. By default, 'comment' is set to None). Finally, 'trailing_modifiers'
- are extracted from the last string using another regular expression.
- The tag, the value, the comment and the trailing_modifiers are saved.
- '''
-
- debug("tagValueParser : %s" % line)
-
- ## by default :
- trailing_modifiers = None
- comment = None
-
- ## the tag is saved. 'right' is composed of the value, the comment and the trailing modifiers
- tag,rigth = line.split(':',1)
-
- ## the value is saved
- value = TaggedValue._match_value.search(rigth).group(0)
- debug("Extracted value : %s" % value)
-
- ## if there is a value AND a sign of a comment or trailing modifiers
- if value and value[-1] in '!{':
- lvalue = len(value)
- ## whatever it is a comment or trailing modifiers, it is saved into 'extra'
- extra = rigth[lvalue-1:].strip()
- ## a comment is extracted
- extra =TaggedValue._split_comment.split(extra,1)
- ## and saved if it exists
- if len(extra)==2:
- comment=extra[1].strip()
- ## trailing modifiers are extracted
- extra=extra[0]
- trailing_modifiers = trailingModifierFactory(extra)
- ## the value is cleaned of any comment or trailing modifiers signals
- value = value[0:-1]
-
- if tag=='use_term':
- tag='consider'
- raise DeprecationWarning,"user_term is a deprecated tag, you should instead use consider"
-
- ## recording zone
- self.value =value.strip()
- self.tag = tag
- self.__doc__=comment
- self.trailing_modifiers=trailing_modifiers
-
- def __str__(self):
- return str(self.value)
-
- def __repr__(self):
- return '''"""%s"""''' % str(self)
-
-
-class NameValue(TaggedValue):
- '''
- A couple 'name:value' inherited from the class TaggedValue. Used to manage name tags.
- '''
-
- def __init__(self,line):
-
- ## no use of the TaggedValue constructor. The NameValue is very simple.
- tag,rigth = line.split(':',1)
-
- ## recording zone
- self.value = rigth.strip()
- self.tag = 'name'
- self.__doc__=None
- self.trailing_modifiers=None
-
-
-
-class DefValue(TaggedValue):
- '''
- A couple 'def:value' inherited from the class TaggedValue. Used to manage def tags.
- '''
-
- def __init__(self,line):
- '''
- Constructor of the class DefValue.
-
- @param line: a line of an OBOEntry composed of a tag named 'def' and a value.
- @type line: string
-
- @note: The constructor calls the TaggedValue constructor. A regular expression
- is used to extract the 'definition' from TaggedValue.value (definition is a not
- quoted TaggedValue.value). A regular expression is used to extract 'dbxrefs'
- from the aggedValue.value without the definition (dbxrefs are between brackets
- and definition can be so). Definition is saved as the new value of the DefValue.
- dbxrefs are saved.
- '''
-
- ## use of the TaggedValue constructor
- TaggedValue.__init__(self, line)
-
- ## definition, which is quoted, is extracted from the standard value of a TaggedValue.
- definition = TaggedValue._match_quotedString.search(self.value).group(0)
-
- ## the standard value is cleaned of the definition.
- cleanvalue = self.value.replace(definition,'')
- cleanvalue = cleanvalue.replace(' ',' ')
-
- ## dbxrefs are searched into the rest of the standard value.
- dbxrefs = TaggedValue._match_bracket.search(cleanvalue).group(0)
-
- ## recording zone
- self.tag = 'def'
- ## the value of a DefValue is not the standard value but the definition.
- self.value=definition
- self.dbxrefs=xrefFactory(dbxrefs)
-
-
-class SynonymValue(TaggedValue):
- '''
- A couple 'synonym:value' inherited from the class TaggedValue. Used to manage
- synonym tags, exact_synonym tags, broad_synonym tags and narrow_synonym tags.
- '''
-
- _match_scope = re.compile('(?<="")[^\[]*(?=\[|$)')
-
- def __init__(self,line):
- '''
- Constructor of the class SynonymValue.
-
- @param line: a line of an OBOEntry composed of a tag named 'synonym' or
- 'exact_synonym' or 'broad_synonym' or 'narrow_synonym' and a value.
- @type line: string
-
- @note: SynonymValue is composed of a tag, a value, a scope, a list of types and
- dbxrefs.
- The constructor calls the TaggedValue constructor. A regular expression
- is used to extract 'definition' from TaggedValue.value (definition is a not
- quoted TaggedValue.value). Definition is saved as the new value of the class
- SynonymValue.
- A regular expression is used to extract 'attributes' from the rest of the
- string. Attributes may contain an optional synonym scope and an optional list
- of synonym types. The scope is extracted from attributes or set by default to
- 'RELATED'. It is saved as the scope of the class. The types are the rest of the
- attributes and are saved as the list of types of the class.
- For deprecated tags 'exact_synonym', 'broad_synonym' and 'narrow_synonym', tag
- is set to 'synonym' and scope is set respectively to 'EXACT', 'BROAD' and 'NARROW'.
- A regular expression is used to extract 'dbxrefs' from the TaggedValue.value
- without the definition (dbxrefs are between brackets and definition can be so).
- dbxrefs are saved.
- '''
-
- ## use of the TaggedValue constructor
- TaggedValue.__init__(self, line)
-
- ## definition, which is quoted, is extracted from the standard value of a TaggedValue.
- definition = TaggedValue._match_quotedString.search(self.value).group(0)
-
- ## the standard value is cleaned of the definition.
- cleanvalue = self.value.replace(definition,'')
- cleanvalue = cleanvalue.replace(' ',' ')
-
- ## 1) attributes are searched into the rest of the standard value.
- ## 2) then they are stripped.
- ## 3) then they are split on every ' '.
- ## 4) finally they are ordered into a set.
- attributes = set(SynonymValue._match_scope.search(cleanvalue).group(0).strip().split())
-
- ## the scopes are the junction between the attributes and a set of specific terms.
- scopes = attributes & set(['RELATED','EXACT','BROAD','NARROW'])
-
- ## the types are the rest of the attributes.
- types = attributes - scopes
-
- ## this is a constraint of the OBO format
- assert len(scopes)< 2,"Only one synonym scope allowed"
-
- ## the scope of the SynonymValue is into scopes or set by default to RELATED
- if scopes:
- scope = scopes.pop()
- else:
- scope = 'RELATED'
-
- ## Specific rules are defined for the following tags :
- if self.tag == 'exact_synonym':
- raise DeprecationWarning,'exact_synonym is a deprecated tag use instead synonym tag'
- self.tag = 'synonym'
- scope = 'EXACT'
-
- if self.tag == 'broad_synonym':
- raise DeprecationWarning,'broad_synonym is a deprecated tag use instead synonym tag'
- self.tag = 'synonym'
- scope = 'BROAD'
-
- if self.tag == 'narrow_synonym':
- raise DeprecationWarning,'narrow_synonym is a deprecated tag use instead synonym tag'
- self.tag = 'synonym'
- scope = 'NARROW'
-
- if self.tag == 'systematic_synonym':
- #raise DeprecationWarning,'narrow_synonym is a deprecated tag use instead sysnonym tag'
- self.tag = 'synonym'
- scope = 'SYSTEMATIC'
-
- ## this is our own constraint. deprecated tags are not saved by this parser.
- assert self.tag =='synonym',"%s synonym type is not managed" % self.tag
-
- ## dbxrefs are searched into the rest of the standard value.
- dbxrefs = TaggedValue._match_bracket.search(cleanvalue).group(0)
-
- ## recording zone
- ## the value of a SynonymValue is not the standard value but the definition.
- self.value = definition
- self.dbxrefs = xrefFactory(dbxrefs)
- self.scope = scope
- self.types = list(types)
-
- def __eq__(self,b):
- return ((self.value==b.value) and (self.dbxrefs==b.dbxrefs)
- and (self.scope==b.scope) and (self.types==b.types)
- and (self.__doc__==b.__doc__) and (self.tag==b.tag)
- and (self.trailing_modifiers==b.trailing_modifiers))
-
- def __hash__(self):
- return (reduce(lambda x,y:x+y,(hash(z) for z in [self.__doc__,
- self.value,
- frozenset(self.dbxrefs),
- self.scope,
- frozenset(self.types),
- self.tag,
- self.trailing_modifiers]),0)) % (2**31)
-
-
-class XrefValue(TaggedValue):
- '''
- A couple 'xref:value' inherited from the class TaggedValue. Used to manage
- xref tags.
- '''
-
- def __init__(self,line):
-
- ## use of the TaggedValue constructor
- TaggedValue.__init__(self, line)
-
- ## use the same function as the dbxrefs
- self.value=xrefFactory(self.value)
-
- if self.tag in ('xref_analog','xref_unk'):
- raise DeprecationWarning,'%s is a deprecated tag use instead sysnonym tag' % self.tag
- self.tag='xref'
-
- ## this is our own constraint. deprecated tags are not saved by this parser.
- assert self.tag=='xref'
-
-
-class RelationshipValue(TaggedValue):
- '''
- A couple 'xref:value' inherited from the class TaggedValue. Used to manage
- xref tags.
- '''
-
- def __init__(self,line):
-
- ## use of the TaggedValue constructor
- TaggedValue.__init__(self, line)
-
- ## the value is split on the first ' '.
- value = self.value.split(None,1)
-
- ## succesful split !
- if len(value)==2:
- relationship=value[0]
- term=value[1]
- ## unsuccesful split. The relationship is set by default to IS_A
- else:
- relationship='is_a'
- term=value[0]
-
- ## recording zone
- self.value=term
- self.relationship=relationship
-
-
-class NamespaceValue(TaggedValue):
- def __init__(self,line):
- TaggedValue.__init__(self, line)
-
-class RemarkValue(TaggedValue):
- def __init__(self,line):
- TaggedValue.__init__(self, line)
- label,value = self.value.split(':',1)
- label = label.strip()
- value = value.strip()
- self.value=value
- self.label=label
-
-
-def taggedValueFactory(line):
- '''
- A function used to dispatch lines of an OBOEntry between the class TaggedValue
- and its inherited classes.
-
- @param line: a line of an OBOEntry composed of a tag and a value.
- @type line: string
-
- @return: a class object
- '''
-
- if (line[0:9]=='namespace' or
- line[0:17]=='default-namespace'):
- return NamespaceValue(line)
- ## DefValue is an inherited class of TaggedValue
- elif line[0:3]=='def':
- return DefValue(line)
- ## SynonymValue is an inherited class of TaggedValue
- elif ((line[0:7]=="synonym" and line[0:14]!="synonymtypedef") or
- line[0:13]=="exact_synonym" or
- line[0:13]=="broad_synonym" or
- line[0:14]=="narrow_synonym"):
- return SynonymValue(line)
- ## XrefValue is an inherited class of TaggedValue
- elif line[0:4]=='xref':
- return XrefValue(line)
- ## NameValue is an inherited class of TaggedValue
- elif line[0:4]=='name':
- return NameValue(line)
- ## RelationshipValue is an inherited class of TaggedValue
- elif (line[0:15]=='intersection_of' or
- line[0:8] =='union_of' or
- line[0:12]=='relationship'):
- return RelationshipValue(line)
- elif (line[0:6]=='remark'):
- return RemarkValue(line)
- ## each line is a couple : tag / value (and some more features)
- else:
- return TaggedValue(line)
-
-
-#################################################################################
-## Xref treatment area ##
-#################################################################################
-
-
-
-class Xref(object):
- '''
- A xref object of an OBOentry. It may be the 'dbxrefs' of SynonymValue and
- DefValue objects or the 'value' of XrefValue objects.
- '''
-
- __splitdata__ = re.compile(' +(?=["{])')
-
- def __init__(self,ref):
- if ref == '' : #
- ref = None #
- data = '' #
- else : # Modifs JJ sinon erreur : list index out of range
- data = Xref.__splitdata__.split(ref,1) #
- ref = data[0] #
- description=None
- trailing_modifiers = None
- if len(data)> 1:
- extra = data[1]
- description = TaggedValue._match_quotedString.search(extra)
- if description is not None:
- description = description.group(0)
- extra.replace(description,'')
- trailing_modifiers=trailingModifierFactory(extra)
- self.reference=ref
- self.description=description
- self.trailing_modifiers=trailing_modifiers
-
- def __eq__(self,b):
- return ((self.reference==b.reference) and (self.description==b.description)
- and (self.trailing_modifiers==b.trailing_modifiers))
-
- def __hash__(self):
- return (reduce(lambda x,y:x+y,(hash(z) for z in [self.reference,
- self.description,
- self.trailing_modifiers]),0)) % (2**31)
-
-
-def xrefFactory(string):
- '''
- Dispatcher of xrefs.
-
- @param string: a string (between brackets) from an inherited TaggedValue object with a dbxrefs
- signal (actually, the signal can only be found into SynonymValue and DefValue
- objects) or a string (without brackets) from a XrefValue object.
- @type string: string
-
- @return: a class object
-
- @note: The dispatcher treats differently the strings between brackets (from SynonymValue and
- DefValue objects) and without brackets (from XrefValue objects).
- '''
-
- string = string.strip()
- if string[0]=='[':
- return [Xref(x.strip()) for x in string[1:-1].split(',')]
- else:
- return Xref(string)
-
-
-#################################################################################
-## Stanza treatment area ##
-#################################################################################
-
-
-class OBOEntry(dict):
- '''
- An entry of an OBOFile. It can be a header (without a stanza name) or
- a stanza (with a stanza name between brackets). It inherits from the class dict.
- '''
- _match_stanza_name = re.compile('(?<=^\[)[^\]]*(?=\])')
-
- def __init__(self,stanza):
- ## tests if it is the header of the OBO file (returns TRUE) or not (returns FALSE)
- self.isHeader = stanza[0]!='['
- lines = stanza.split('\n')
- ## not the header : there is a [stanzaName]
- if not self.isHeader:
- self.stanzaName = lines[0].strip()[1:-1]
- lines=lines[1:]
- self["stanza"] = [stanza.strip()]
-
- ## whatever the stanza is.
- for line in lines:
- ## each line is a couple : tag / value
- taggedvalue = taggedValueFactory(line)
- if taggedvalue.tag in self:
- self[taggedvalue.tag].append(taggedvalue)
- else:
- self[taggedvalue.tag]=[taggedvalue]
-
-
- def parseStanzaName(stanza):
- sm = OBOEntry._match_stanza_name.search(stanza)
- if sm:
- return sm.group(0)
- else:
- return None
-
- parseStanzaName=staticmethod(parseStanzaName)
-
-
-
-class OBOTerm(OBOEntry):
- '''
- A stanza named 'Term'. It inherits from the class OBOEntry.
- '''
- def __init__(self,stanza):
-
- ## use of the OBOEntry constructor.
- OBOEntry.__init__(self, stanza)
-
- assert self.stanzaName=='Term'
- assert 'stanza' in self
- assert 'id' in self and len(self['id'])==1,"An OBOTerm must have an id"
- assert 'name' in self and len(self['name'])==1,"An OBOTerm must have a name"
- assert 'namespace' not in self or len(self['namespace'])==1, "Only one namespace is allowed for an OBO term"
-
- assert 'def' not in self or len(self['def'])==1,"Only one definition is allowed for an OBO term"
- assert 'comment' not in self or len(self['comment'])==1,"Only one comment is allowed for an OBO term"
-
- assert 'union_of' not in self or len(self['union_of'])>=2,"Only one union relationship is allowed for an OBO term"
- assert 'intersection_of' not in self or len(self['intersection_of'])>=2,"Only one intersection relationship is allowed for an OBO term"
-
- if self._isObsolete():
- #assert 'is_a' not in self
- assert 'relationship' not in self
- assert 'inverse_of' not in self
- assert 'disjoint_from' not in self
- assert 'union_of' not in self
- assert 'intersection_of' not in self
-
- assert 'replaced_by' not in self or self._isObsolete()
- assert 'consider' not in self or self._isObsolete()
-
- def _getStanza(self):
- return self['stanza'][0]
-
- ## make-up functions.
- def _getDefinition(self):
- if 'def' in self:
- return self['def'][0]
- return None
-
- def _getId(self):
- return self['id'][0]
-
- def _getNamespace(self):
- return self['namespace'][0]
-
- def _getName(self):
- return self['name'][0]
-
- def _getComment(self):
- if 'comment' in self:
- return self['comment'][0]
- return None
-
- def _getAltIds(self):
- if 'alt_id' in self:
- return list(set(self.get('alt_id',None)))
- return None
-
- def _getIsA(self):
- if 'is_a' in self:
- return list(set(self.get('is_a',None)))
- return None
-
- def _getSynonym(self):
- if 'synonym' in self :
- return list(set(self.get('synonym',None)))
- return None
-
- def _getSubset(self):
- if self.get('subset',None) != None:
- return list(set(self.get('subset',None)))
- else:
- return None
-
- def _getXref(self):
- if 'xref' in self:
- return list(set(self.get('xref',None)))
- return None
-
- def _getRelationShip(self):
- if 'relationship' in self:
- return list(set(self.get('relationship',None)))
- return None
-
- def _getUnion(self):
- return list(set(self.get('union_of',None)))
-
- def _getIntersection(self):
- return list(set(self.get('intersection_of',None)))
-
- def _getDisjonction(self):
- return list(set(self.get('disjoint_from',None)))
-
- def _isObsolete(self):
- return 'is_obsolete' in self and str(self['is_obsolete'][0])=='true'
-
- def _getReplacedBy(self):
- if 'replaced_by' in self:
- return list(set(self.get('replaced_by',None)))
- return None
-
- def _getConsider(self):
- if 'consider' in self:
- return list(set(self.get('consider',None)))
- return None
-
- ## automatically make-up !
- stanza = property(_getStanza,None,None)
- definition = property(_getDefinition,None,None)
- id = property(_getId,None,None)
- namespace = property(_getNamespace,None,None)
- name = property(_getName,None,None)
- comment = property(_getComment,None,None)
- alt_ids = property(_getAltIds,None,None)
- is_a = property(_getIsA,None,None)
- synonyms = property(_getSynonym,None,None)
- subsets = property(_getSubset,None,None)
- xrefs = property(_getXref,None,None)
- relationship = property(_getRelationShip,None,None)
- union_of = property(_getUnion,None,None)
- intersection_of = property(_getIntersection,None,None)
- disjoint_from = property(_getDisjonction,None,None)
- is_obsolete = property(_isObsolete,None,None)
- replaced_by = property(_getReplacedBy,None,None)
- consider = property(_getConsider,None,None)
-
-
-def OBOEntryFactory(stanza):
- '''
- Dispatcher of stanza.
-
- @param stanza: a stanza composed of several lines.
- @type stanza: text
-
- @return: an C{OBOTerm} | C{OBOEntry} instance
-
- @note: The dispatcher treats differently the stanza which are OBO "Term"
- and the others.
- '''
-
- stanzaType = OBOEntry.parseStanzaName(stanza)
-
- if stanzaType=="Term":
- return OBOTerm(stanza)
- else:
- return OBOEntry(stanza)
-
-def OBOEntryIterator(file):
- entries = stanzaIterator(file)
- for e in entries:
- debug(e)
- yield OBOEntryFactory(e)
-
-
\ No newline at end of file
diff --git a/obitools/options/__init__.py b/obitools/options/__init__.py
deleted file mode 100644
index d6793d6..0000000
--- a/obitools/options/__init__.py
+++ /dev/null
@@ -1,137 +0,0 @@
-"""
- Module providing high level functions to manage command line options.
-"""
-import logging
-import sys
-
-from logging import debug
-
-from optparse import OptionParser
-
-from obitools.utils import universalOpen
-from obitools.utils import fileSize
-from obitools.utils import universalTell
-from obitools.utils import progressBar
-from obitools.format.options import addInputFormatOption, addInOutputOption,\
- autoEntriesIterator
-import time
-
-
-
-def getOptionManager(optionDefinitions,entryIterator=None,progdoc=None):
- '''
- Build an option manager fonction. that is able to parse
- command line options of the script.
-
- @param optionDefinitions: list of function describing a set of
- options. Each function must allows as
- unique parametter an instance of OptionParser.
- @type optionDefinitions: list of functions.
-
- @param entryIterator: an iterator generator function returning
- entries from the data files.
-
- @type entryIterator: an iterator generator function with only one
- parametter of type file
- '''
- parser = OptionParser(progdoc)
- parser.add_option('--DEBUG',
- action="store_true", dest="debug",
- default=False,
- help="Set logging in debug mode")
-
- parser.add_option('--no-psyco',
- action="store_true", dest="noPsyco",
- default=False,
- help="Don't use psyco even if it installed")
-
- parser.add_option('--without-progress-bar',
- action="store_false", dest="progressbar",
- default=True,
- help="desactivate progress bar")
-
- checkFormat=False
- for f in optionDefinitions:
- if f == addInputFormatOption or f == addInOutputOption:
- checkFormat=True
- f(parser)
-
- def commandLineAnalyzer():
- options,files = parser.parse_args()
- if options.debug:
- logging.root.setLevel(logging.DEBUG)
-
- if checkFormat:
- ei=autoEntriesIterator(options)
- else:
- ei=entryIterator
-
- i = allEntryIterator(files,ei,with_progress=options.progressbar)
- return options,i
-
- return commandLineAnalyzer
-
-_currentInputFileName=None
-_currentFile = None
-_currentFileSize = None
-
-def currentInputFileName():
- return _currentInputFileName
-
-def currentInputFile():
- return _currentFile
-
-def currentFileSize():
- return _currentFileSize
-
-def currentFileTell():
- return universalTell(_currentFile)
-
-def fileWithProgressBar(file,step=100):
- try:
- size = currentFileSize()
- except:
- size = None
-
- def fileBar():
- pos=1
- progressBar(pos, size, True,currentInputFileName())
- for l in file:
- progressBar(currentFileTell,size,head=currentInputFileName())
- yield l
- print >>sys.stderr,''
- if size is None:
- return file
- else:
- f = fileBar()
- return f
-
-
-def allEntryIterator(files,entryIterator,with_progress=False,histo_step=102):
- global _currentFile
- global _currentInputFileName
- global _currentFileSize
- if files :
- for f in files:
- _currentInputFileName=f
- f = universalOpen(f)
- _currentFile=f
- _currentFileSize=fileSize(_currentFile)
- debug(f)
- if with_progress:
- f=fileWithProgressBar(f,step=histo_step)
- if entryIterator is None:
- for line in f:
- yield line
- else:
- for entry in entryIterator(f):
- yield entry
- else:
- if entryIterator is None:
- for line in sys.stdin:
- yield line
- else:
- for entry in entryIterator(sys.stdin):
- yield entry
-
-
\ No newline at end of file
diff --git a/obitools/options/bioseqcutter.py b/obitools/options/bioseqcutter.py
deleted file mode 100644
index 77189af..0000000
--- a/obitools/options/bioseqcutter.py
+++ /dev/null
@@ -1,85 +0,0 @@
-from logging import debug
-
-def _beginOptionCallback(options,opt,value,parser):
- def beginCutPosition(seq):
- debug("begin = %s" % value )
- if hasattr(options, 'taxonomy') and options.taxonomy is not None:
- environ = {'taxonomy' : options.taxonomy,'sequence':seq}
- else:
- environ = {'sequence':seq}
-
- return eval(value,environ,seq) - 1
-
- parser.values.beginCutPosition=beginCutPosition
-
-def _endOptionCallback(options,opt,value,parser):
- def endCutPosition(seq):
- if hasattr(options, 'taxonomy') and options.taxonomy is not None:
- environ = {'taxonomy' : options.taxonomy,'sequence':seq}
- else:
- environ = {'sequence':seq}
-
- return eval(value,environ,seq)
-
- parser.values.endCutPosition=endCutPosition
-
-
-
-
-def addSequenceCuttingOptions(optionManager):
-
- optionManager.add_option('-b','--begin',
- action="callback", callback=_beginOptionCallback,
- metavar="",
- type="string",
- help="python expression to be evaluated in the "
- "sequence context. The attribute name can be "
- "used in the expression as variable name. "
- "An extra variable named 'sequence' refers "
- "to the sequence object itself. ")
-
- optionManager.add_option('-e','--end',
- action="callback", callback=_endOptionCallback,
- metavar="",
- type="string",
- help="python expression to be evaluated in the "
- "sequence context. The attribute name can be "
- "used in the expression as variable name ."
- "An extra variable named 'sequence' refers"
- "to the sequence object itself. ")
-
-
-def cutterGenerator(options):
-
- def sequenceCutter(seq):
-
- lseq = len(seq)
-
- if hasattr(options, 'beginCutPosition'):
- begin = int(options.beginCutPosition(seq))
- else:
- begin = 0
-
- if hasattr(options, 'endCutPosition'):
- end = int(options.endCutPosition(seq))
- else:
- end = lseq
-
- if begin > 0 or end < lseq:
- seq = seq[begin:end]
- seq['subsequence']="%d..%d" % (begin+1,end)
-
- return seq
-
- return sequenceCutter
-
-def cutterIteratorGenerator(options):
- _cutter = cutterGenerator(options)
-
- def sequenceCutterIterator(seqIterator):
- for seq in seqIterator:
- yield _cutter(seq)
-
- return sequenceCutterIterator
-
-
diff --git a/obitools/options/bioseqedittag.py b/obitools/options/bioseqedittag.py
deleted file mode 100644
index 6eb1c36..0000000
--- a/obitools/options/bioseqedittag.py
+++ /dev/null
@@ -1,237 +0,0 @@
-import sys
-from obitools.options.taxonomyfilter import loadTaxonomyDatabase
-def addSequenceEditTagOptions(optionManager):
-
- optionManager.add_option('--rank',
- action="store_true", dest='addrank',
- default=False,
- help="add a rank attribute to the sequence "
- "indicating the sequence position in the input data")
-
- optionManager.add_option('-R','--rename-tag',
- action="append",
- dest='renameTags',
- metavar="",
- type="string",
- default=[],
- help="change tag name from OLD_NAME to NEW_NAME")
-
- optionManager.add_option('--delete-tag',
- action="append",
- dest='deleteTags',
- metavar="",
- type="string",
- default=[],
- help="delete tag TAG_NAME")
-
- optionManager.add_option('-S','--set-tag',
- action="append",
- dest='setTags',
- metavar="",
- type="string",
- default=[],
- help="Add a new tag named TAG_NAME with "
- "a value computed from PYTHON_EXPRESSION")
-
- optionManager.add_option('--set-identifier',
- action="store",
- dest='setIdentifier',
- metavar="",
- type="string",
- default=None,
- help="Set sequence identifier with "
- "a value computed from PYTHON_EXPRESSION")
-
- optionManager.add_option('--set-sequence',
- action="store",
- dest='setSequence',
- metavar="",
- type="string",
- default=None,
- help="Change the sequence itself with "
- "a value computed from PYTHON_EXPRESSION")
-
- optionManager.add_option('-T','--set-definition',
- action="store",
- dest='setDefinition',
- metavar="",
- type="string",
- default=None,
- help="Set sequence definition with "
- "a value computed from PYTHON_EXPRESSION")
-
- optionManager.add_option('-O','--only-valid-python',
- action="store_true",
- dest='onlyValid',
- default=False,
- help="only valid python expressions are allowed")
-
- optionManager.add_option('-C','--clear',
- action="store_true",
- dest='clear',
- default=False,
- help="clear all tags associated to the sequences")
-
- optionManager.add_option('-k','--keep',
- action='append',
- dest='keep',
- default=[],
- type="string",
- help="only keep this tag")
-
- optionManager.add_option('--length',
- action="store_true",
- dest='length',
- default=False,
- help="add seqLength tag with sequence length")
-
- optionManager.add_option('--with-taxon-at-rank',
- action='append',
- dest='taxonrank',
- default=[],
- type="string",
- help="add taxonomy annotation at a speciefied rank level")
-
- optionManager.add_option('-m','--mcl',
- action="store", dest="mcl",
- metavar="",
- type="string",
- default=None,
- help="split following mcl graph clustering partition")
-
-
-def readMCLFile(file):
- partition=1
- parts = {}
- for l in file:
- for seq in l.strip().split():
- parts[seq]=partition
- partition+=1
- return parts
-
-
-
-
-def sequenceTaggerGenerator(options):
- toDelete = options.deleteTags[:]
- toRename = [x.split(':',1) for x in options.renameTags if len(x.split(':',1))==2]
- toSet = [x.split(':',1) for x in options.setTags if len(x.split(':',1))==2]
- newId = options.setIdentifier
- newDef = options.setDefinition
- newSeq = options.setSequence
- clear = options.clear
- keep = set(options.keep)
- length = options.length
- counter = [0]
- loadTaxonomyDatabase(options)
- if options.taxonomy is not None:
- annoteRank=options.taxonrank
- else:
- annoteRank=[]
-
- if options.mcl is not None:
- parts = readMCLFile(open(options.mcl))
- else:
- parts = False
-
- def sequenceTagger(seq):
-
- if counter[0]>=0:
- counter[0]+=1
-
- if clear or keep:
- ks = seq.keys()
- for k in ks:
- if k not in keep:
- del seq[k]
- else:
- for i in toDelete:
- if i in seq:
- del seq[i]
- for o,n in toRename:
- if o in seq:
- seq[n]=seq[o]
- del seq[o]
-
- for rank in annoteRank:
- if 'taxid' in seq:
- taxid = seq['taxid']
- if taxid is not None:
- rtaxid = options.taxonomy.getTaxonAtRank(taxid,rank)
- if rtaxid is not None:
- scn = options.taxonomy.getScientificName(rtaxid)
- else:
- scn=None
- seq[rank]=rtaxid
- seq["%s_name"%rank]=scn
-
- if parts and seq.id in parts:
- seq['cluster']=parts[seq.id]
-
- if options.addrank:
- seq['rank']=counter[0]
-
- for i,v in toSet:
- try:
- if options.taxonomy is not None:
- environ = {'taxonomy' : options.taxonomy,'sequence':seq, 'counter':counter[0]}
- else:
- environ = {'sequence':seq, 'counter':counter[0]}
-
- val = eval(v,environ,seq)
- except Exception,e:
- if options.onlyValid:
- raise e
- val = v
- seq[i]=val
-
- if length:
- seq['seqLength']=len(seq)
-
- if newId is not None:
- try:
- if options.taxonomy is not None:
- environ = {'taxonomy' : options.taxonomy,'sequence':seq, 'counter':counter[0]}
- else:
- environ = {'sequence':seq, 'counter':counter[0]}
-
- val = eval(newId,environ,seq)
- except Exception,e:
- if options.onlyValid:
- raise e
- val = newId
- seq.id=val
- if newDef is not None:
- try:
- if options.taxonomy is not None:
- environ = {'taxonomy' : options.taxonomy,'sequence':seq, 'counter':counter[0]}
- else:
- environ = {'sequence':seq, 'counter':counter[0]}
-
- val = eval(newDef,environ,seq)
- except Exception,e:
- if options.onlyValid:
- raise e
- val = newDef
- seq.definition=val
-
- if newSeq is not None:
- try:
- if options.taxonomy is not None:
- environ = {'taxonomy' : options.taxonomy,'sequence':seq, 'counter':counter[0]}
- else:
- environ = {'sequence':seq, 'counter':counter[0]}
-
- val = eval(newSeq,environ,seq)
- except Exception,e:
- if options.onlyValid:
- raise e
- val = newSeq
- if hasattr(seq, '_seq'):
- seq._seq=str(val).lower()
- if 'seqLength' in seq:
- seq['seqLength']=len(seq)
-
- return seq
-
- return sequenceTagger
\ No newline at end of file
diff --git a/obitools/options/bioseqfilter.py b/obitools/options/bioseqfilter.py
deleted file mode 100644
index d52c9b5..0000000
--- a/obitools/options/bioseqfilter.py
+++ /dev/null
@@ -1,179 +0,0 @@
-import re
-
-from obitools.options.taxonomyfilter import addTaxonomyFilterOptions
-from obitools.options.taxonomyfilter import taxonomyFilterGenerator
-
-def _sequenceOptionCallback(options,opt,value,parser):
- parser.values.sequencePattern = re.compile(value,re.I)
-
-def _defintionOptionCallback(options,opt,value,parser):
- parser.values.definitionPattern = re.compile(value)
-
-def _identifierOptionCallback(options,opt,value,parser):
- parser.values.identifierPattern = re.compile(value)
-
-def _attributeOptionCallback(options,opt,value,parser):
- if not hasattr(options, 'attributePatterns'):
- parser.values.attributePatterns={}
- attribute,pattern=value.split(':',1)
- parser.values.attributePatterns[attribute]=re.compile(pattern)
-
-def _predicatOptionCallback(options,opt,value,parser):
- if not hasattr(options, 'predicats'):
- options.predicats=[]
- parser.values.predicats.append(value)
-
-
-def addSequenceFilteringOptions(optionManager):
-
- optionManager.add_option('-s','--sequence',
- action="callback", callback=_sequenceOptionCallback,
- metavar="",
- type="string",
- help="regular expression pattern used to select "
- "the sequence. The pattern is case insensitive")
-
- optionManager.add_option('-D','--definition',
- action="callback", callback=_defintionOptionCallback,
- type="string",
- metavar="",
- help="regular expression pattern matched against "
- "the definition of the sequence. "
- "The pattern is case sensitive")
-
- optionManager.add_option('-I','--identifier',
- action="callback", callback=_identifierOptionCallback,
- type="string",
- metavar="",
- help="regular expression pattern matched against "
- "the identifier of the sequence. "
- "The pattern is case sensitive")
-
- optionManager.add_option('-a','--attribute',
- action="callback", callback=_attributeOptionCallback,
- type="string",
- metavar=":",
- help="regular expression pattern matched against "
- "the attributes of the sequence. "
- "the value of this atribute is of the form : "
- "attribute_name:regular_pattern. "
- "The pattern is case sensitive."
- "Several -a option can be used on the same "
- "commande line.")
-
- optionManager.add_option('-A','--has-attribute',
- action="append",
- type="string",
- dest="has_attribute",
- default=[],
- metavar="",
- help="select sequence with attribute "
- "defined")
-
- optionManager.add_option('-p','--predicat',
- action="append", dest="predicats",
- metavar="",
- help="python boolean expression to be evaluated in the "
- "sequence context. The attribute name can be "
- "used in the expression as variable name ."
- "An extra variable named 'sequence' refers"
- "to the sequence object itself. "
- "Several -p option can be used on the same "
- "commande line.")
-
- optionManager.add_option('-L','--lmax',
- action='store',
- metavar="<##>",
- type="int",dest="lmax",
- help="keep sequences shorter than lmax")
-
- optionManager.add_option('-l','--lmin',
- action='store',
- metavar="<##>",
- type="int",dest="lmin",
- help="keep sequences longer than lmin")
-
- optionManager.add_option('-v','--inverse-match',
- action='store_true',
- default=False,
- dest="invertedFilter",
- help="revert the sequence selection "
- "[default : %default]")
-
- addTaxonomyFilterOptions(optionManager)
-
-
-
-
-
-def filterGenerator(options):
- taxfilter = taxonomyFilterGenerator(options)
-
- def sequenceFilter(seq):
- good = True
-
- if hasattr(options, 'sequencePattern'):
- good = bool(options.sequencePattern.search(str(seq)))
-
- if good and hasattr(options, 'identifierPattern'):
- good = bool(options.identifierPattern.search(seq.id))
-
- if good and hasattr(options, 'definitionPattern'):
- good = bool(options.definitionPattern.search(seq.definition))
-
- if good :
- good = reduce(lambda x,y:x and y,
- (k in seq for k in options.has_attribute),
- True)
-
- if good and hasattr(options, 'attributePatterns'):
- good = (reduce(lambda x,y : x and y,
- (bool(options.attributePatterns[p].search(str(seq[p])))
- for p in options.attributePatterns
- if p in seq),True)
- and
- reduce(lambda x,y : x and y,
- (bool(p in seq)
- for p in options.attributePatterns),True)
- )
-
- if good and hasattr(options, 'predicats') and options.predicats is not None:
- if options.taxonomy is not None:
- e = {'taxonomy' : options.taxonomy,'sequence':seq}
- else:
- e = {'sequence':seq}
-
- good = (reduce(lambda x,y: x and y,
- (bool(eval(p,e,seq))
- for p in options.predicats),True)
- )
-
- if good and hasattr(options, 'lmin') and options.lmin is not None:
- good = len(seq) >= options.lmin
-
- if good and hasattr(options, 'lmax') and options.lmax is not None:
- good = len(seq) <= options.lmax
-
- if good:
- good = taxfilter(seq)
-
- if hasattr(options, 'invertedFilter') and options.invertedFilter:
- good=not good
-
-
- return good
-
- return sequenceFilter
-
-def sequenceFilterIteratorGenerator(options):
- filter = filterGenerator(options)
-
- def sequenceFilterIterator(seqIterator):
- for seq in seqIterator:
- if filter(seq):
- yield seq
-
- return sequenceFilterIterator
-
-
-
\ No newline at end of file
diff --git a/obitools/options/taxonomyfilter.py b/obitools/options/taxonomyfilter.py
deleted file mode 100644
index 5526c79..0000000
--- a/obitools/options/taxonomyfilter.py
+++ /dev/null
@@ -1,6 +0,0 @@
-from obitools.ecopcr.options import addTaxonomyDBOptions, \
- addTaxonomyFilterOptions, \
- loadTaxonomyDatabase, \
- taxonomyFilterGenerator, \
- taxonomyFilterIteratorGenerator
-
diff --git a/obitools/parallel/__init__.py b/obitools/parallel/__init__.py
deleted file mode 100644
index 2aa1b07..0000000
--- a/obitools/parallel/__init__.py
+++ /dev/null
@@ -1,99 +0,0 @@
-import threading
-
-class TaskPool(object):
-
- def __init__(self,iterable,function,count=2):
- self.pool = []
- self.queue= []
- self.plock= threading.Lock()
- self.qlock= threading.Lock()
- self.function=function
- self.event=threading.Event()
- self.iterable=iterable
- for i in xrange(count):
- Task(self)
-
- def register(self,task):
- self.plock.acquire()
- self.pool.append(task)
- self.plock.release()
- self.ready(task)
-
- def unregister(self,task):
- task.thread.join()
- self.plock.acquire()
- self.pool.remove(task)
- self.plock.release()
-
-
- def ready(self,task):
- self.qlock.acquire()
- self.queue.append(task)
- self.qlock.release()
- self.event.set()
-
- def __iter__(self):
- for data in self.iterable:
- while not self.queue:
- self.event.wait()
- self.event.clear()
- self.qlock.acquire()
- task=self.queue.pop(0)
- self.qlock.release()
- if hasattr(task, 'rep'):
- yield task.rep
- #print "send ",data
- if isinstance(data,dict):
- task.submit(**data)
- else:
- task.submit(*data)
-
- while self.pool:
- self.pool[0].finish()
- while self.queue:
- self.event.clear()
- self.qlock.acquire()
- task=self.queue.pop(0)
- self.qlock.release()
- if hasattr(task, 'rep'):
- yield task.rep
-
-
-
-
-
-class Task(object):
- def __init__(self,pool):
- self.pool = pool
- self.lock = threading.Lock()
- self.dataOk = threading.Event()
- self.repOk = threading.Event()
- self.args = None
- self.kwargs=None
- self.stop=False
- self.thread = threading.Thread(target=self)
- self.thread.start()
- self.pool.register(self)
-
- def __call__(self):
- self.dataOk.wait()
- while(not self.stop):
- self.lock.acquire()
- self.dataOk.clear()
- self.rep=self.pool.function(*self.args,**self.kwargs)
- self.pool.ready(self)
- self.lock.release()
- self.dataOk.wait()
-
- def submit(self,*args,**kwargs):
- self.args=args
- self.kwargs=kwargs
- self.dataOk.set()
-
- def finish(self):
- self.lock.acquire()
- self.stop=True
- self.dataOk.set()
- self.pool.unregister(self)
-
-
diff --git a/obitools/parallel/jobqueue.py b/obitools/parallel/jobqueue.py
deleted file mode 100644
index 9df4804..0000000
--- a/obitools/parallel/jobqueue.py
+++ /dev/null
@@ -1,183 +0,0 @@
-import threading
-from logging import warning,info
-from time import sleep,time
-
-from obitools.parallel import TaskPool
-
-
-class JobPool(dict):
- '''
- JobPool is dedicated to manage a job queue. These jobs
- will run in a limited number of thread.
- '''
-
- def __init__(self,count,precision=0.01):
- '''
-
- @param count: number of thread dedicated to this JobPool
- @type count: int
- @param precision: delay between two check for new job (in second)
- @type precision: float
- '''
- self._iterator = JobIterator(self)
- self._taskPool = TaskPool(self._iterator,
- self._runJob,
- count)
- self._precision=precision
- self._toRun=set()
- self._runnerThread = threading.Thread(target=self._runner)
- self._runnerThread.start()
- self._finalyzed=False
-
- def _runner(self):
- for rep in self._taskPool:
- info('Job %d finnished' % id(rep))
- info('All jobs in %d JobPool finished' % id(self))
-
- def _jobIterator(self):
- return self._iterator
-
- def _runJob(self,job):
- job.started= time()
- info('Job %d started' % id(job))
- job.result = job()
- job.ended = time()
- job.finished=True
- return job
-
- def submit(self,job,priority=1.0,userid=None):
- '''
- Submit a new job to the JobPool.
-
- @param job: the new submited job
- @type job: Job instance
- @param priority: priority level of this job (higher is better)
- @type priority: float
- @param userid: a user identifier (Default is None)
-
- @return: job identifier
- @rtype: int
- '''
-
- assert not self._finalyzed,\
- "This jobPool does not accept new job"
- if job.submitted is not None:
- warning('Job %d was already submitted' % id(job))
- return id(job)
-
- job.submitted = time()
- job.priority = priority
- job.userid = userid
- i=id(job)
- job.id=id
- self[i]=job
- self._toRun.add(job)
-
- info('Job %d submitted' % i)
-
- return i
-
- def finalyze(self):
- '''
- Indicate to the JobPool, that no new jobs will
- be submitted.
- '''
- self._iterator.finalyze()
- self._finalyzed=True
-
- def __del__(self):
- self.finalyze()
-
-
-class JobIterator(object):
- def __init__(self,pool):
- self._pool = pool
- self._finalyze=False
- self._nextLock=threading.Lock()
-
-
- def __iter__(self):
- return self
-
- def finalyze(self):
- '''
- Indicate to the JobIterator, that no new jobs will
- be submitted.
- '''
- self._finalyze=True
-
-
- def next(self):
- '''
-
- @return: the next job to run
- @rtype: Job instance
- '''
- self._nextLock.acquire()
- while self._pool._toRun or not self._finalyze:
- rep = None
- maxScore=0
- for k in self._pool._toRun:
- s = k.runScore()
- if s > maxScore:
- maxScore=s
- rep=k
- if rep is not None:
- self._pool._toRun.remove(rep)
- self._nextLock.release()
- return (rep,)
- sleep(self._pool._precision)
- self._nextLock.release()
- info('No more jobs in %d JobPool' % id(self._pool))
- raise StopIteration
-
-
-
-class Job(object):
-
- def __init__(self,pool=None,function=None,*args,**kwargs):
- '''
- Create a new job
-
- @param pool: the jobpool used to run job. Can be None to not
- execute the job immediately.
- @type pool: JobPool instance
-
- @param function: the function to run for the job
- @type function: callable object
-
- @param args: parametters for function call
- @param kwargs: named parametters for function call
-
- @precondition: function cannot be None
- '''
- assert function is not None
- self._args=args
- self._kwargs = kwargs
- self._function = function
- self.running = False
- self.finished= False
- self.submitted = None
- self.priority = None
- self.userid = None
-
- if pool is not None:
- pool.submit(self)
-
- def runScore(self):
- '''
- @return: the score used to ordonnance job in the queue
- @rtype: C{float}
- '''
-
- return (time() - self.submitted) * self.priority
-
- def __call__(self):
- return self._function(*self._args,**self._kwargs)
-
-
-
-
-
-
-
\ No newline at end of file
diff --git a/obitools/phylogeny/__init__.py b/obitools/phylogeny/__init__.py
deleted file mode 100644
index 8eb1587..0000000
--- a/obitools/phylogeny/__init__.py
+++ /dev/null
@@ -1,119 +0,0 @@
-
-from obitools.graph.tree import Forest,TreeNode
-from obitools.graph import Edge
-
-
-
-class PhylogenicTree(Forest):
-
- def __init__(self,label='G',indexer=None,nodes=None,edges=None):
- Forest.__init__(self, label, indexer, nodes, edges)
- self.root=None
- self.comment=None
-
- def addNode(self,node=None,index=None,**data):
- if node is None and index is None:
- node = '__%d' % (len(self._node)+1)
-
- return Forest.addNode(self, node, index, **data)
-
- def getNode(self,node=None,index=None):
- if index is None:
- index = self._index.getIndex(node, True)
- return PhylogenicNode(index,self)
-
- def getEdge(self,node1=None,node2=None,index1=None,index2=None):
- '''
-
- @param node1:
- @type node1:
- @param node2:
- @type node2:
- @param index1:
- @type index1:
- @param index2:
- @type index2:
- '''
- node1=self.getNode(node1, index1)
- node2=self.getNode(node2, index2)
- return PhylogenicEdge(node1,node2)
-
-
-
-class PhylogenicNode(TreeNode):
-
- def getLabel(self):
- label = TreeNode.getLabel(self)
- if label[0:2]=='__':
- return None
- else:
- return label
-
- def __str__(self):
-
- if self.index in self.graph._node_attrs:
- keys = " ".join(['%s="%s"' % (x[0],str(x[1]).replace('"','\\"'))
- for x in self.graph._node_attrs[self.index].iteritems()]
- )
- else:
- keys=''
-
- if self.label is None:
- label=''
- shape='point'
- else:
- label=self.label
- shape='box'
-
- return '%d [label="%s" shape="%s" %s]' % (self.index,str(label).replace('"','\\"'),shape,keys)
-
- def distanceTo(self,node=None,index=None):
- '''
- compute branch length between the two nodes.
- If distances are not secified for this tree, None is returned.
-
- @param node: a node label or None
- @param index: a node index or None. the parameter index
- has a priority on the parameter node.
- @type index: int
-
- @return: the evolutive distance between the two nodes
- @rtype: int, float or None
- '''
- path = self.shortestPathTo(node, index)
-
- start = path.pop(0)
- dist=0
- for dest in path:
- edge = self.graph.getEdge(index1=start,index2=dest)
- if 'distance' in edge:
- dist+=edge['distance']
- else:
- return None
- start=dest
-
- return dist
-
- label = property(getLabel, None, None, "Label of the node")
-
-class PhylogenicEdge(Edge):
-
- def __str__(self):
- e = (self.node1.index,self.node2.index)
- if e in self.graph._edge_attrs:
- keys = "[%s]" % " ".join(['%s="%s"' % (x[0],str(x[1]).replace('"','\\"'))
- for x in self.graph._edge_attrs[e].iteritems()
- if x[0] not in ('distance','bootstrap')]
- )
- else:
- keys = ""
-
-
-
- if self.directed:
- link='->'
- else:
- link='--'
-
- return "%d %s %d %s" % (self.node1.index,link,self.node2.index,keys)
-
diff --git a/obitools/phylogeny/newick.py b/obitools/phylogeny/newick.py
deleted file mode 100644
index cf0330c..0000000
--- a/obitools/phylogeny/newick.py
+++ /dev/null
@@ -1,123 +0,0 @@
-import re
-import sys
-
-from obitools.utils import universalOpen
-from obitools.phylogeny import PhylogenicTree
-
-def subNodeIterator(data):
- level=0
- start = 1
- if data[0]=='(':
- for i in xrange(1,len(data)):
- c=data[i]
- if c=='(':
- level+=1
- elif c==')':
- level-=1
- if c==',' and not level:
- yield data[start:i]
- start = i+1
- yield data[start:i]
- else:
- yield data
-
-
-_nodeParser=re.compile('\s*(?P\(.*\))?(?P[^ :]+)? *(?P[0-9.]+)?(:(?P-?[0-9.]+))?')
-
-def nodeParser(data):
- parsedNode = _nodeParser.match(data).groupdict(0)
- if not parsedNode['name']:
- parsedNode['name']=None
-
- if not parsedNode['bootstrap']:
- parsedNode['bootstrap']=None
- else:
- parsedNode['bootstrap']=float(parsedNode['bootstrap'])
-
- if not parsedNode['distance']:
- parsedNode['distance']=None
- else:
- parsedNode['distance']=float(parsedNode['distance'])
-
- if not parsedNode['subnodes']:
- parsedNode['subnodes']=None
-
- return parsedNode
-
-_cleanTreeData=re.compile('\s+')
-
-def treeParser(data,tree=None,parent=None):
- if tree is None:
- tree = PhylogenicTree()
- data = _cleanTreeData.sub(' ',data).strip()
-
- parsedNode = nodeParser(data)
-
- if parent is not None:
- son,parent = tree.addEdge(node1=parsedNode['name'],
- index2=parent,
- distance=parsedNode['distance'],
- bootstrap=parsedNode['bootstrap'])
- else:
- son = tree.addNode(node1=parsedNode['name'])
- tree.root=son
-
-
-
- if parsedNode['subnodes']:
- for subnode in subNodeIterator(parsedNode['subnodes']):
- treeParser(subnode,tree,son)
-
- return tree
-
-_treecomment=re.compile('\[.*\]')
-
-def treeIterator(file):
- file = universalOpen(file)
- data = file.read()
-
- comment = _treecomment.findall(data)
- data=_treecomment.sub('',data).strip()
-
- if comment:
- comment=comment[0]
- else:
- comment=None
- for tree in data.split(';'):
- t = treeParser(tree)
- if comment:
- t.comment=comment
- yield t
-
-def nodeWriter(tree,node,deep=0):
- name = node._name
- if name is None:
- name=''
-
- distance=node._dist
- if distance is None:
- distance=''
- else:
- distance = ':%6.5f' % distance
-
- bootstrap=node._bootstrap
- if bootstrap is None:
- bootstrap=''
- else:
- bootstrap=' %d' % int(bootstrap)
-
- nodeseparator = ',\n' + ' ' * (deep+1)
-
- subnodes = nodeseparator.join([nodeWriter(tree, x, deep+1)
- for x in tree.childNodeIterator(node)])
- if subnodes:
- subnodes='(\n' + ' ' * (deep+1) + subnodes + '\n' + ' ' * deep + ')'
-
- return '%s%s%s%s' % (subnodes,name,bootstrap,distance)
-
-def treeWriter(tree,startnode=None):
- if startnode is not None:
- root=startnode
- else:
- root = tree.getRoot()
- return nodeWriter(tree,root)+';'
diff --git a/obitools/profile/__init__.py b/obitools/profile/__init__.py
deleted file mode 100644
index e69de29..0000000
diff --git a/obitools/profile/_profile.so b/obitools/profile/_profile.so
deleted file mode 100755
index 7f52483..0000000
Binary files a/obitools/profile/_profile.so and /dev/null differ
diff --git a/obitools/sample.py b/obitools/sample.py
deleted file mode 100644
index 4894c94..0000000
--- a/obitools/sample.py
+++ /dev/null
@@ -1,76 +0,0 @@
-'''
-Created on 31 oct. 2009
-
-@author: coissac
-'''
-from random import shuffle, randrange
-
-def lookfor(x,cumsum):
- lmax=len(cumsum)
- lmin=0
-
- assert x < cumsum[-1],"x must be smaller then cumulative sum"
-
- while((lmax - lmin) > 0):
-
- i=(lmax+lmin)/2
- #print i,lmin,lmax
- if (xcumsum[i-1])):
- #print "return 1 :",i,cumsum[i-1],"<",x,"<",cumsum[i]
- return i
- elif cumsum[i]==x:
- while cumsum[i]==x:
- i+=1
- #print "return 2 :",i,cumsum[i],"<",x,"<",cumsum[i+1]
- return i
- elif x0]
- shuffle(entries)
- cumul=[]
- s=0
- for e in entries:
- s+=events[e]
- cumul.append(s)
-
- #print cumul
- result={}
-
- for t in xrange(size):
- e=lookfor(randrange(s), cumul)
- k=entries[e]
- result[k]=result.get(k,0)+1
-
- return result
-
-def weigthedSampleWithoutReplacement(events,size):
- entries = [k for k in events.iterkeys() if events[k]>0]
- shuffle(entries)
- cumul=[]
- s=0
- for e in entries:
- s+=events[e]
- cumul.append(s)
-
- #print cumul
- result={}
-
- for t in xrange(size):
- # print s,cumul,
- e=lookfor(randrange(s), cumul)
- # print e
- k=entries[e]
- for x in xrange(e,len(cumul)):
- cumul[x]-=1
- s-=1
- result[k]=result.get(k,0)+1
-
- return result
\ No newline at end of file
diff --git a/obitools/seqdb/__init__.py b/obitools/seqdb/__init__.py
deleted file mode 100644
index 274cbad..0000000
--- a/obitools/seqdb/__init__.py
+++ /dev/null
@@ -1,88 +0,0 @@
-from obitools import NucSequence,AASequence
-from obitools.format.genericparser import genericEntryIteratorGenerator
-from obitools.location.feature import featureIterator
-
-from itertools import chain
-
-class AnnotatedSequence(object):
-
- def __init__(self,header,featureTable,secondaryAcs):
- self._header = header
- self._featureTableText = featureTable
- self._featureTable=None
- self._secondaryAcs=secondaryAcs
- self._hasTaxid=None
-
- def getHeader(self):
- return self._header
-
-
- def getFeatureTable(self,skipError=False):
- if self._featureTable is None:
- self._featureTable = [x for x in featureIterator(self._featureTableText,skipError)]
- return self._featureTable
-
-
- def getSecondaryAcs(self):
- return self._secondaryAcs
-
- def extractTaxon(self):
- if self._hasTaxid is None:
-
- if self._featureTable is not None:
- s = [f for f in self._featureTable if f.ftType=='source']
- else:
- s = featureIterator(self._featureTableText).next()
- if s.ftType=='source':
- s = [s]
- else:
- s = [f for f in self.featureTable if f.ftType=='source']
-
- t =set(int(v[6:]) for v in chain(*tuple(f['db_xref'] for f in s if 'db_xref' in f))
- if v[0:6]=='taxon:')
-
- self._hasTaxid=False
-
- if len(t)==1 :
- taxid=t.pop()
- if taxid >=0:
- self['taxid']=taxid
- self._hasTaxid=True
-
-
- t =set(chain(*tuple(f['organism'] for f in s if 'organism' in f)))
-
- if len(t)==1:
- self['organism']=t.pop()
-
-
- header = property(getHeader, None, None, "Header's Docstring")
-
- featureTable = property(getFeatureTable, None, None, "FeatureTable's Docstring")
-
- secondaryAcs = property(getSecondaryAcs, None, None, "SecondaryAcs's Docstring")
-
-class AnnotatedNucSequence(AnnotatedSequence,NucSequence):
- '''
-
- '''
- def __init__(self,id,seq,de,header,featureTable,secondaryAcs,**info):
- NucSequence.__init__(self, id, seq, de,**info)
- AnnotatedSequence.__init__(self, header, featureTable, secondaryAcs)
-
-
-class AnnotatedAASequence(AnnotatedSequence,AASequence):
- '''
-
- '''
- def __init__(self,id,seq,de,header,featureTable,secondaryAcs,**info):
- AASequence.__init__(self, id, seq, de,**info)
- AnnotatedSequence.__init__(self, header, featureTable, secondaryAcs)
-
-
-
-nucEntryIterator=genericEntryIteratorGenerator(endEntry='^//')
-aaEntryIterator=genericEntryIteratorGenerator(endEntry='^//')
-
-
-
diff --git a/obitools/seqdb/blastdb/__init__.py b/obitools/seqdb/blastdb/__init__.py
deleted file mode 100644
index e69de29..0000000
diff --git a/obitools/seqdb/dnaparser.py b/obitools/seqdb/dnaparser.py
deleted file mode 100644
index 85b82a2..0000000
--- a/obitools/seqdb/dnaparser.py
+++ /dev/null
@@ -1,16 +0,0 @@
-from obitools.format.sequence import embl,fasta,genbank
-
-class UnknownFormatError(Exception):
- pass
-
-def whichParser(seq):
- if seq[0]=='>':
- return fasta.fastaNucParser
- if seq[0:2]=='ID':
- return embl.emblParser
- if seq[0:5]=='LOCUS':
- return genbank.genbankParser
- raise UnknownFormatError,"Unknown nucleic format"
-
-def nucleicParser(seq):
- return whichParser(seq)(seq)
diff --git a/obitools/seqdb/embl/__init__.py b/obitools/seqdb/embl/__init__.py
deleted file mode 100644
index 94f9efc..0000000
--- a/obitools/seqdb/embl/__init__.py
+++ /dev/null
@@ -1,13 +0,0 @@
-from obitools.seqdb import AnnotatedNucSequence, AnnotatedAASequence
-from obitools.location import locationGenerator,extractExternalRefs
-
-
-
-class EmblSequence(AnnotatedNucSequence):
- '''
- Class used to represent a nucleic sequence issued from EMBL.
- '''
-
-
-
-
diff --git a/obitools/seqdb/embl/parser.py b/obitools/seqdb/embl/parser.py
deleted file mode 100644
index 2e3624f..0000000
--- a/obitools/seqdb/embl/parser.py
+++ /dev/null
@@ -1,50 +0,0 @@
-import re
-import sys
-
-from obitools.seqdb import embl
-from obitools.seqdb import nucEntryIterator
-
-_featureMatcher = re.compile('(^FT .*\n)+', re.M)
-_cleanFT = re.compile('^FT',re.M)
-
-_headerMatcher = re.compile('^ID.+(?=\nFH )', re.DOTALL)
-_seqMatcher = re.compile('(^ ).+(?=//\n)', re.DOTALL + re.M)
-_cleanSeq = re.compile('[ \n0-9]+')
-_acMatcher = re.compile('(?<=^AC ).+',re.M)
-_deMatcher = re.compile('(^DE .+\n)+',re.M)
-_cleanDe = re.compile('(^|\n)DE +')
-
-def __emblparser(text):
- try:
- header = _headerMatcher.search(text).group()
-
- ft = _featureMatcher.search(text).group()
- ft = _cleanFT.sub(' ',ft)
-
- seq = _seqMatcher.search(text).group()
- seq = _cleanSeq.sub('',seq).upper()
-
- acs = _acMatcher.search(text).group()
- acs = acs.split()
- ac = acs[0]
- acs = acs[1:]
-
- de = _deMatcher.search(header).group()
- de = _cleanDe.sub(' ',de).strip().strip('.')
- except AttributeError,e:
- print >>sys.stderr,'======================================================='
- print >>sys.stderr,text
- print >>sys.stderr,'======================================================='
- raise e
-
- return (ac,seq,de,header,ft,acs)
-
-def emblParser(text):
- return embl.EmblSequence(*__emblparser(text))
-
-
-def emblIterator(file):
- for e in nucEntryIterator(file):
- yield emblParser(e)
-
-
\ No newline at end of file
diff --git a/obitools/seqdb/genbank/__init__.py b/obitools/seqdb/genbank/__init__.py
deleted file mode 100644
index fb5b622..0000000
--- a/obitools/seqdb/genbank/__init__.py
+++ /dev/null
@@ -1,84 +0,0 @@
-from obitools.seqdb import AnnotatedNucSequence, AnnotatedAASequence
-from obitools.location import locationGenerator,extractExternalRefs
-
-
-
-class GbSequence(AnnotatedNucSequence):
- '''
- Class used to represent a nucleic sequence issued from Genbank.
- '''
-
-
-class GpepSequence(AnnotatedAASequence):
- '''
- Class used to represent a peptidic sequence issued from Genpep.
- '''
-
- def __init__(self,id,seq,de,header,featureTable,secondaryAcs,**info):
- AnnotatedAASequence.__init__(self,id, seq, de, header, featureTable, secondaryAcs,**info)
- self.__hasNucRef=None
-
- def __getGeneRef(self):
- if self.__hasNucRef is None:
- self.__hasNucRef=False
- cds = [x for x in self.featureTable
- if x.ftType=='CDS'
- and 'coded_by' in x]
-
- if cds:
- source = cds[0]['coded_by'][0]
- if 'transl_table' in cds[0]:
- tt = cds[0]['transl_table'][0]
- else:
- tt=None
- ac,loc = extractExternalRefs(source)
-
- if len(ac)==1:
- ac = ac.pop()
- self.__hasNucRef=True
- self.__nucRef = (ac,loc,tt)
-
-
-
- def geneAvailable(self):
- '''
- Predicat indicating if reference to the nucleic sequence encoding
- this protein is available in feature table.
-
- @return: True if gene description is available
- @rtype: bool
- '''
- self.__getGeneRef()
- return self.__hasNucRef is not None and self.__hasNucRef
-
-
- def getCDS(self,database):
- '''
- Return the nucleic sequence coding for this protein if
- data are available.
-
- @param database: a database object where looking for the sequence
- @type database: a C{dict} like object
-
- @return: a NucBioseq instance carreponding to the CDS
- @rtype: NucBioSeq
-
- @raise AssertionError: if no gene references are available
- @see: L{geneAvailable}
-
- '''
-
- assert self.geneAvailable(), \
- "No information available to retreive gene sequence"
-
- ac,loc,tt = self.__nucRef
- seq = database[ac]
- seq.extractTaxon()
- gene = seq[loc]
- if tt is not None:
- gene['transl_table']=tt
- return gene
-
-
-
-
diff --git a/obitools/seqdb/genbank/ncbi.py b/obitools/seqdb/genbank/ncbi.py
deleted file mode 100644
index 40ddf91..0000000
--- a/obitools/seqdb/genbank/ncbi.py
+++ /dev/null
@@ -1,79 +0,0 @@
-from urllib2 import urlopen
-import sys
-import re
-
-import cStringIO
-
-from obitools.eutils import EFetch
-from parser import genbankParser,genpepParser
-from parser import genbankIterator,genpepIterator
-
-from obitools.utils import CachedDB
-
-
-class NCBIGenbank(EFetch):
- def __init__(self):
- EFetch.__init__(self,db='nucleotide',
- rettype='gbwithparts')
-
- def __getitem__(self,ac):
- if isinstance(ac,str):
- text = self.get(id=ac)
- seq = genbankParser(text)
- return seq
- else:
- query = ','.join([x for x in ac])
- data = cStringIO.StringIO(self.get(id=query))
- return genbankIterator(data)
-
-
-
-
-class NCBIGenpep(EFetch):
- def __init__(self):
- EFetch.__init__(self,db='protein',
- rettype='gbwithparts')
-
- def __getitem__(self,ac):
- if isinstance(ac,str):
- text = self.get(id=ac)
- seq = genpepParser(text)
- return seq
- else:
- query = ','.join([x for x in ac])
- data = cStringIO.StringIO(self.get(id=query))
- return genpepIterator(data)
-
-class NCBIAccession(EFetch):
-
- _matchACS = re.compile(' +accession +"([^"]+)"')
-
- def __init__(self):
- EFetch.__init__(self,db='nucleotide',
- rettype='seqid')
-
- def __getitem__(self,ac):
- if isinstance(ac,str):
- text = self.get(id=ac)
- rep = NCBIAccession._matchACS.search(text).group(1)
- return rep
- else:
- query = ','.join([x for x in ac])
- text = self.get(id=query)
- rep = (ac.group(1) for ac in NCBIAccession._matchACS.finditer(text))
- return rep
-
-def Genbank(cache=None):
- gb = NCBIGenbank()
- if cache is not None:
- gb = CachedDB(cache, gb)
- return gb
-
-
-def Genpep(cache=None):
- gp = NCBIGenpep()
- if cache is not None:
- gp = CachedDB(cache, gp)
- return gp
-
-
diff --git a/obitools/seqdb/genbank/parser.py b/obitools/seqdb/genbank/parser.py
deleted file mode 100644
index b52fe59..0000000
--- a/obitools/seqdb/genbank/parser.py
+++ /dev/null
@@ -1,53 +0,0 @@
-import re
-import sys
-
-import obitools.seqdb.genbank as gb
-from obitools.seqdb import nucEntryIterator,aaEntryIterator
-
-_featureMatcher = re.compile('^FEATURES.+\n(?=ORIGIN)',re.DOTALL + re.M)
-
-_headerMatcher = re.compile('^LOCUS.+(?=\nFEATURES)', re.DOTALL + re.M)
-_seqMatcher = re.compile('(?<=ORIGIN).+(?=//\n)', re.DOTALL + re.M)
-_cleanSeq = re.compile('[ \n0-9]+')
-_acMatcher = re.compile('(?<=^ACCESSION ).+',re.M)
-_deMatcher = re.compile('(?<=^DEFINITION ).+\n( .+\n)*',re.M)
-_cleanDe = re.compile('\n *')
-
-def __gbparser(text):
- try:
- header = _headerMatcher.search(text).group()
- ft = _featureMatcher.search(text).group()
- seq = _seqMatcher.search(text).group()
- seq = _cleanSeq.sub('',seq).upper()
- acs = _acMatcher.search(text).group()
- acs = acs.split()
- ac = acs[0]
- acs = acs[1:]
- de = _deMatcher.search(header).group()
- de = _cleanDe.sub(' ',de).strip().strip('.')
- except AttributeError,e:
- print >>sys.stderr,'======================================================='
- print >>sys.stderr,text
- print >>sys.stderr,'======================================================='
- raise e
-
- return (ac,seq,de,header,ft,acs)
-
-def genbankParser(text):
- return gb.GbSequence(*__gbparser(text))
-
-
-def genbankIterator(file):
- for e in nucEntryIterator(file):
- yield genbankParser(e)
-
-
-def genpepParser(text):
- return gb.GpepSequence(*__gbparser(text))
-
-
-def genpepIterator(file):
- for e in aaEntryIterator(file):
- yield genpepParser(e)
-
-
\ No newline at end of file
diff --git a/obitools/sequenceencoder/__init__.py b/obitools/sequenceencoder/__init__.py
deleted file mode 100644
index 89a8a59..0000000
--- a/obitools/sequenceencoder/__init__.py
+++ /dev/null
@@ -1,73 +0,0 @@
-from obitools import location
-
-class SequenceEncoder(object):
- pass
-
-class DNAComplementEncoder(SequenceEncoder):
- _comp={'a': 't', 'c': 'g', 'g': 'c', 't': 'a',
- 'r': 'y', 'y': 'r', 'k': 'm', 'm': 'k',
- 's': 's', 'w': 'w', 'b': 'v', 'd': 'h',
- 'h': 'd', 'v': 'b', 'n': 'n', 'u': 'a',
- '-': '-'}
-
- _info={'complemented':True}
-
- @staticmethod
- def _encode(seq,position=slice(None, None, -1)):
- cseq = [DNAComplementEncoder._comp.get(x.lower(),'n') for x in seq[position]]
- return ''.join(cseq)
-
- @staticmethod
- def _check(seq):
- assert seq.isNucleotide()
-
- @staticmethod
- def _convertpos(position):
- if isinstance(position, int):
- return -(position+1)
- elif isinstance(position, slice):
- return slice(-(position.stop+1),
- -(position.start+1),
- -position.step)
- elif isinstance(position, location.Location):
- return location.ComplementLocation(position).simplify()
-
- raise TypeError,"position must be an int, slice or Location instance"
-
- @staticmethod
- def complement(seq):
- return seq
-
-class SeqFragmentEncoder(SequenceEncoder):
- def __init__(self,begin,end):
- assert begin < end and begin >=0
- self._limits = slice(begin,end)
- self._info = {'cut' : [begin,end,1]}
- self._len = end - begin + 1
-
- def _check(self,seq):
- lseq = len(seq)
- assert self._limits.stop <= lseq
-
- def _encode(self,seq,position=None):
- return str(seq)[self._limits]
-
- def _convertpos(self,position):
- if isinstance(position, int):
- if position < -self._len or position >= self._len:
- raise IndexError,position
- if position >=0:
- return self._limits.start + position
- else:
- return self._limits.stop + position + 1
- elif isinstance(position, slice):
- return slice(-(position.stop+1),
- -(position.start+1),
- -position.step)
- elif isinstance(position, location.Location):
- return location.ComplementLocation(position).simplify()
-
- raise TypeError,"position must be an int, slice or Location instance"
-
-
-
\ No newline at end of file
diff --git a/obitools/sequenceencoder/__init__.pyc b/obitools/sequenceencoder/__init__.pyc
deleted file mode 100644
index 463f84f..0000000
Binary files a/obitools/sequenceencoder/__init__.pyc and /dev/null differ
diff --git a/obitools/solexa/__init__.py b/obitools/solexa/__init__.py
deleted file mode 100644
index 60e35f8..0000000
--- a/obitools/solexa/__init__.py
+++ /dev/null
@@ -1,45 +0,0 @@
-from obitools import utils
-from obitools import NucSequence
-from obitools.dnahash import hashCodeIterator
-
-
-class SolexaSequence(NucSequence):
- def __init__(self,id,seq,definition=None,quality=None,**info):
- NucSequence.__init__(self, id, seq, definition,**info)
- self._quality=quality
- self._hash=None
-
- def getQuality(self):
- if isinstance(self._quality, str):
- self._quality=[int(x) for x in self._quality.split()]
- return self._quality
-
-
- def __hash__(self):
- if self._hash is None:
- self._hash = hashCodeIterator(str(self), len(str(self)), 16, 0).next()[1].pop()
- return self._hash
-
-class SolexaFile(utils.ColumnFile):
- def __init__(self,stream):
- utils.ColumnFile.__init__(self,
- stream, ':', True,
- (str,
- int,int,int,int,
- str,
- str), "#")
-
-
- def next(self):
- data = utils.ColumnFile.next(self)
- seq = SolexaSequence('%d_%d_%d_%d'%(data[1],data[2],data[3],data[4]),
- data[5],
- quality=data[6])
- seq['machine']=data[0]
- seq['channel']=data[1]
- seq['tile']=data[2]
- seq['pos_x']=data[3]
- seq['pos_y']=data[4]
-
- #assert len(seq['quality'])==len(seq),"Error in file format"
- return seq
diff --git a/obitools/statistics/__init__.py b/obitools/statistics/__init__.py
deleted file mode 100644
index e69de29..0000000
diff --git a/obitools/statistics/hypergeometric.py b/obitools/statistics/hypergeometric.py
deleted file mode 100644
index 9a9b812..0000000
--- a/obitools/statistics/hypergeometric.py
+++ /dev/null
@@ -1,166 +0,0 @@
-# -*- coding: utf-8 -*-
-"""
- Module de calcules statistiques.
-
- Le module `statistics` contient des fonctions permettant le calcule
- des probabilités associées à la loi hypergéométrique et
- hypergéométrique cumulée, ainsi d'une méthode de correction pour les
- tests multiples.
-
-"""
-
-from decimal import *
-
-getcontext().prec = 28
-
-
-def _hyper0(N,n,r):
- """
- Fonction interne permetant le calcule du terme 0 de la loi hypergéométrique.
-
- Le calcule est réalisé selon la méthode décrite dans l'article
-
- Trong Wu, An accurate computation of the hypergeometric distribution function,
- ACM Trans. Math. Softw. 19 (1993), no. 1, 33–43.
-
- Paramètres:
-
- - `N` : La taille de la population
- - `n` : Le nombre d'éléments marqués
- - `r` : La taille de l'echantillon
-
- Retourne un *float* indiquant la probabilité de récupérer 0 élément
- marqué parmi *n* dans une population de taille *N* lors du tirage
- d'un échantillon de taille *r*
- """
-
- #
- # au numerateur nous avons :
- # [N -r + 1 -n;N - n + 1[
- #
- # au denominateur :
- # [N - r + 1; N + 1]
- #
- # avec X = N - r + 1
- # et Y = N + 1
- #
- # Numerateur -> [ X - n; Y - n [
- # Denominateur -> [ X ; Y [
- #
- # On peut donc siplifier
- #
- # Numerateur -> [X - n; X [
- # Denominateur -> [Y - n; Y [
-
- numerateur = xrange(N - r + 1 - n, N - r + 1)
- denominateur= xrange(N + 1 - n, N + 1)
-#
-# version original
-#
-# m = N - n
-# numerateur = set(range(m-r+1,m+1))
-# denominateur = set(range(N-r+1,N+1))
-# simplification = numerateur & denominateur
-# numerateur -= simplification
-# denominateur -= simplification
-# numerateur = list(numerateur)
-# denominateur=list(denominateur)
-# numerateur.sort()
-# denominateur.sort()
-
-
- p = reduce(lambda x,y:x*y,map(lambda i,j:Decimal(i)/Decimal(j),numerateur,denominateur))
- return p
-
-
-def hypergeometric(x,N,n,r):
- """
- Calcule le terme *x* d'une loi hypergéométrique
-
- Le calcule est réalisé selon la méthode décrite dans l'article
-
- Trong Wu, An accurate computation of the hypergeometric distribution function,
- ACM Trans. Math. Softw. 19 (1993), no. 1, 33–43.
-
- Paramètres:
-
- - `x` : Nombre d'éléments marqués attendu
- - `N` : La taille de la population
- - `n` : Le nombre d'éléments marqués
- - `r` : La taille de l'echantillon
-
- Retourne un *float* indiquant la probabilité de récupérer *x* éléments
- marqués parmi *n* dans une population de taille *N* lors du tirage
- d'un échantillon de taille *r*
- """
- if n < r:
- s = n
- n = r
- r = s
- assert x>=0 and x <= r,"x out of limits"
- if x > 0 :
- return hypergeometric(x-1,N,n,r) * (n - x + 1)/x * (r - x + 1)/(N-n-r+x)
- else:
- return _hyper0(N,n,r)
-
-def chypergeometric(xmin,xmax,N,n,r):
- """
- Calcule le terme *x* d'une loi hypergéométrique
-
- Le calcule est réalisé selon la méthode décrite dans l'article
-
- Trong Wu, An accurate computation of the hypergeometric distribution function,
- ACM Trans. Math. Softw. 19 (1993), no. 1, 33–43.
-
- Paramètres:
-
- - `xmin` : Nombre d'éléments marqués minimum attendu
- - `xmax` : Nombre d'éléments marqués maximum attendu
- - `N` : La taille de la population
- - `n` : Le nombre d'éléments marqués
- - `r` : La taille de l'echantillon
-
- Retourne un *float* indiquant la probabilité de récupérer entre
- *xmin* et *xmax* éléments marqués parmi *n* dans une population
- de taille *N* lors du tirage d'un échantillon de taille *r*
- """
- if n < r:
- s = n
- n = r
- r = s
- assert xmin>=0 and xmin <= r and xmax>=0 and xmax <= r and xmin <=xmax,"x out of limits"
- hg = hypergeometric(xmin,N,n,r)
- rep = hg
- for x in xrange(xmin+1,xmax+1):
- hg = hg * (n - x + 1)/x * (r - x + 1)/(N-n-r+x)
- rep+=hg
- return rep
-
-def multipleTest(globalPvalue,testList):
- """
- Correction pour les tests multiples.
-
- Séléctionne parmis un ensemble de test le plus grand sous ensemble
- telque le risque global soit inférieur à une pvalue déterminée.
-
- Paramètres:
-
- - `globalPvalue` : Risque global à prendre pour l'ensemble des tests
- - `testList` : un élément itérable sur un ensemble de tests.
- Chaque test est une liste ou un tuple dont le dernier élément
- est la pvalue associée au test
-
- Retourne une liste contenant le sous ensemble des tests selectionnés dans
- `testList`
- """
- testList=list(testList)
- testList.sort(lambda x,y:cmp(x[-1],y[-1]))
- h0=1.0-globalPvalue
- p=1.0
- rep = []
- for t in testList:
- p*=1.0-t[-1]
- if p > h0:
- rep.append(t)
- return rep
-
\ No newline at end of file
diff --git a/obitools/statistics/noncentralhypergeo.py b/obitools/statistics/noncentralhypergeo.py
deleted file mode 100644
index e6a96ce..0000000
--- a/obitools/statistics/noncentralhypergeo.py
+++ /dev/null
@@ -1,208 +0,0 @@
-from decimal import *
-from math import log
-
-#from obitools.utils import moduleInDevelopment
-
-#moduleInDevelopment(__name__)
-
-# from : http://www.programmish.com/?p=25
-
-def dec_log(self, base=10):
- cur_prec = getcontext().prec
- getcontext().prec += 2
- baseDec = Decimal(10)
- retValue = self
-
- if isinstance(base, Decimal):
- baseDec = base
- elif isinstance(base, float):
- baseDec = Decimal("%f" % (base))
- else:
- baseDec = Decimal(base)
-
- integer_part = Decimal(0)
- while retValue < 1:
- integer_part = integer_part - 1
- retValue = retValue * baseDec
- while retValue >= baseDec:
- integer_part = integer_part + 1
- retValue = retValue / baseDec
-
- retValue = retValue ** 10
- decimal_frac = Decimal(0)
- partial_part = Decimal(1)
- while cur_prec > 0:
- partial_part = partial_part / Decimal(10)
- digit = Decimal(0)
- while retValue >= baseDec:
- digit += 1
- retValue = retValue / baseDec
- decimal_frac = decimal_frac + digit * partial_part
- retValue = retValue ** 10
- cur_prec -= 1
- getcontext().prec -= 2
-
- return integer_part + decimal_frac
-
-class Interval(object):
- def __init__(self,begin,end,facteur=1):
- self._begin = begin
- self._end = end
- self._facteur=facteur
-
- def __str__(self):
- return '[%d,%d] ^ %d' % (self._begin,self._end,self._facteur)
-
- def __repr__(self):
- return 'Interval(%d,%d,%d)' % (self._begin,self._end,self._facteur)
-
- def begin(self):
- return (self._begin,self._facteur,True)
-
- def end(self):
- return (self._end,-self._facteur,False)
-
-
-def cmpb(i1,i2):
- x= cmp(i1[0],i2[0])
- if x==0:
- x = cmp(i2[2],i1[2])
- return x
-
-class Product(object):
- def __init__(self,i=None):
- if i is not None:
- self.prod=[i]
- else:
- self.prod=[]
- self._simplify()
-
- def _simplify(self):
- bornes=[]
- prod =[]
-
- if self.prod:
-
- for i in self.prod:
- bornes.append(i.begin())
- bornes.append(i.end())
- bornes.sort(cmpb)
-
-
- j=0
- r=len(bornes)
- for i in xrange(1,len(bornes)):
- if bornes[i][0]==bornes[j][0] and bornes[i][2]==bornes[j][2]:
- bornes[j]=(bornes[j][0],bornes[j][1]+bornes[i][1],bornes[i][2])
- r-=1
- else:
- j+=1
- bornes[j]=bornes[i]
-
- bornes=bornes[0:r]
-
- facteur=0
- close=1
-
- for b,level,open in bornes:
- if not open:
- close=0
- else:
- close=1
- if facteur:
- prod.append(Interval(debut,b-close,facteur))
- debut=b+1-close
- facteur+=level
-
- self.prod=prod
-
-
-
-
- def __mul__(self,p):
- res = Product()
- res.prod=list(self.prod)
- res.prod.extend(p.prod)
- res._simplify()
- return res
-
- def __div__(self,p):
- np = Product()
- np.prod = [Interval(x._begin,x._end,-x._facteur) for x in p.prod]
- return self * np
-
- def __str__(self):
- return str(self.prod)
-
- def log(self):
- p=Decimal(0)
- for k in self.prod:
- p+= Decimal(k._facteur) * reduce(lambda x,y:x+dec_log(Decimal(y),Decimal(10)),xrange(k._begin,k._end+1),Decimal(0))
- return p
-
- def product(self):
- p=Decimal(1)
- for k in self.prod:
- p*= reduce(lambda x,y:x*Decimal(y),xrange(k._begin,k._end+1),Decimal(1)) ** Decimal(k._facteur)
- return p
-
- def __call__(self,log=True):
- if log:
- return self.log()
- else:
- return self.product()
-
-
-def fact(n):
- return Product(Interval(1,n))
-
-def cnp(n,p):
- return fact(n)/fact(p)/fact(n-p)
-
-def hypergeometic(x,n,M,N):
- '''
-
- @param x: Variable aleatoire
- @type x: int
- @param n: taille du tirage
- @type n: int
- @param M: boule gagnante
- @type M: int
- @param N: nombre total dans l'urne
- @type N: int
-
- p(x)= cnp(M,x) * cnp(N-M,n-x) / cnp(N,n)
- '''
- return cnp(M,x) * cnp(N-M,n-x) / cnp(N,n)
-
-def nchypergeometique(x,n,M,N,r):
- '''
-
- @param x: Variable aleatoire
- @type x: int
- @param n: taille du tirage
- @type n: int
- @param M: boule gagnante
- @type M: int
- @param N: nombre total dans l'urne
- @type N: int
- @param r: odd ratio
- @type r: float
-
- p(x)= cnp(M,x) * cnp(N-M,n-x) / cnp(N,n)
- '''
-
- xmin = max(0,n-N+M)
- xmax = min(n,M)
- lr = dec_log(r)
- xlr = x * lr
- num = cnp(M,x) * cnp(N-M,n-x)
- den = [cnp(M,y) * cnp(N-M,n-y) / num for y in xrange(xmin,xmax+1)]
- fden = [lr * y - xlr for y in xrange(xmin,xmax+1)]
-
- inverse=reduce(lambda x,y : x+y,
- map(lambda i,j: i(False) * 10**j ,den,fden))
- return 1/inverse
-
-
-
\ No newline at end of file
diff --git a/obitools/svg.py b/obitools/svg.py
deleted file mode 100644
index c42e3ef..0000000
--- a/obitools/svg.py
+++ /dev/null
@@ -1,120 +0,0 @@
-#!/usr/bin/env python
-"""\
-SVG.py - Construct/display SVG scenes.
-
-The following code is a lightweight wrapper around SVG files. The metaphor
-is to construct a scene, add objects to it, and then write it to a file
-to display it.
-
-This program uses ImageMagick to display the SVG files. ImageMagick also
-does a remarkable job of converting SVG files into other formats.
-"""
-
-import os
-display_prog = 'display' # Command to execute to display images.
-
-class Scene:
- def __init__(self,name="svg",height=400,width=400):
- self.name = name
- self.items = []
- self.height = height
- self.width = width
- return
-
- def add(self,item): self.items.append(item)
-
- def strarray(self):
- var = ["\n",
- "\n"]
- return var
-
- def write_svg(self,filename=None):
- if filename:
- self.svgname = filename
- else:
- self.svgname = self.name + ".svg"
- file = open(self.svgname,'w')
- file.writelines(self.strarray())
- file.close()
- return
-
- def display(self,prog=display_prog):
- os.system("%s %s" % (prog,self.svgname))
- return
-
-
-class Line:
- def __init__(self,start,end):
- self.start = start #xy tuple
- self.end = end #xy tuple
- return
-
- def strarray(self):
- return [" \n" %\
- (self.start[0],self.start[1],self.end[0],self.end[1])]
-
-
-class Circle:
- def __init__(self,center,radius,color):
- self.center = center #xy tuple
- self.radius = radius #xy tuple
- self.color = color #rgb tuple in range(0,256)
- return
-
- def strarray(self):
- return [" \n" % colorstr(self.color)]
-
-class Rectangle:
- def __init__(self,origin,height,width,color):
- self.origin = origin
- self.height = height
- self.width = width
- self.color = color
- return
-
- def strarray(self):
- return [" \n" %\
- (self.width,colorstr(self.color))]
-
-class Text:
- def __init__(self,origin,text,size=24):
- self.origin = origin
- self.text = text
- self.size = size
- return
-
- def strarray(self):
- return [" \n" %\
- (self.origin[0],self.origin[1],self.size),
- " %s\n" % self.text,
- " \n"]
-
-
-def colorstr(rgb): return "#%x%x%x" % (rgb[0]/16,rgb[1]/16,rgb[2]/16)
-
-def test():
- scene = Scene('test')
- scene.add(Rectangle((100,100),200,200,(0,255,255)))
- scene.add(Line((200,200),(200,300)))
- scene.add(Line((200,200),(300,200)))
- scene.add(Line((200,200),(100,200)))
- scene.add(Line((200,200),(200,100)))
- scene.add(Circle((200,200),30,(0,0,255)))
- scene.add(Circle((200,300),30,(0,255,0)))
- scene.add(Circle((300,200),30,(255,0,0)))
- scene.add(Circle((100,200),30,(255,255,0)))
- scene.add(Circle((200,100),30,(255,0,255)))
- scene.add(Text((50,50),"Testing SVG"))
- scene.write_svg()
- scene.display()
- return
-
-if __name__ == '__main__': test()
diff --git a/obitools/table/__init__.py b/obitools/table/__init__.py
deleted file mode 100644
index 41e00bd..0000000
--- a/obitools/table/__init__.py
+++ /dev/null
@@ -1,633 +0,0 @@
-'''
-
-'''
-
-from itertools import imap,count,chain
-
-from itertools import imap,count,chain
-
-class Table(list):
- """
- Tables are list of rows of the same model
- """
- def __init__(self, headers=None,
- types=None,
- colcount=None,
- rowFactory=None,
- subrowFactory=None):
- '''
-
- @param headers: the list of column header.
-
- if this parametter is C{None}, C{colcount}
- parametter must be set.
-
- @type headers: C{list}, C{tuple} or and iterable object
-
- @param types: the list of data type associated to each column.
-
- If this parametter is specified its length must be
- equal to the C{headers} length or to C{colcount}.
-
- @type types: C{list}, C{tuple} or and iterable object
-
- @param colcount: number of column in the created table.
-
- If C{headers} parametter is not C{None} this
- parametter is ignored
-
- @type colcount: int
- '''
-
- assert headers is not None or colcount is not None,\
- 'headers or colcount parametter must be not None value'
-
- if headers is None:
- headers = tuple('Col_%d' % x for x in xrange(colcount))
-
- self.headers = headers
- self.types = types
- self.colcount= len(self.headers)
-
- if rowFactory is None:
- self.rowFactory=TableRow
- else:
- self.rowFactory=rowFactory
-
- if subrowFactory is None:
- self.subrowFactory=TableRow
- else:
- self.subrowFactory=rowFactory
-
-
- self.likedTo=set()
-
-
-
- def isCompatible(self,data):
- assert isinstance(data,(Table,TableRow))
- return (self.colcount == data.colcount and
- (id(self.types)==id(data.types) or
- self.types==data.types
- )
- )
-
- def __setitem__ (self,key,value):
- '''
-
- @param key:
- @type key: C{int}, C{slice} or C{str}
- @param value:
- @type value:
- '''
-
- if isintance(key,int):
- if not isinstance(value, TableRow):
- value = self.rowFactory(self,value)
- else:
- assert self.isCompatible(value)
- list.__setitem__(self,key,value.row)
-
- elif isinstance(key,slice):
- indices = xrange(key.indices(len(self)))
- for i,d in imap(None,indices,value):
- self[i]=d
-
- else:
- raise TypeError, "Key must be an int or slice value"
-
- def __getitem__(self,key):
- '''
- this function has different comportements depending
- of the data type of C{key} and the table used.
-
- @param key: description of the table part to return
- @type key: C{int} or C{slice}
-
- @return: return a TableRow (if key is C{int})
- or a subpart of the table (if key is C{slice}).
- '''
- if isinstance(key,int):
- return self.rowFactory(self,
- list.__getitem__(self,key))
-
- if isinstance(key,slice):
- newtable=Table(self.headers,self.types)
- indices = xrange(key.indices(len(self)))
- for i in indices:
- list.append(newtable,list.__getitem__(self,i))
- self.likedTo.add(newtable)
- return newtable
-
- raise TypeError
-
-
- def __getslice__(self,x,y):
- return self.__getitem__(slice(x,y))
-
- def __iter__(self):
- return TableIterator(self)
-
- def __hash__(self):
- return id(self)
-
- def __add__(self,itable):
- return concatTables(self,itable)
-
- def _setTypes(self,types):
- if types is not None and not isinstance(type,tuple):
- types = tuple(x for x in types)
-
- assert types is None or len(types)==len(self._headers)
-
- self._types = types
-
- if types is not None:
- for row in self:
- row.castRow()
-
- def _getTypes(self):
- return self._types
-
- types = property(_getTypes,_setTypes)
-
- def _getHeaders(self):
- return self._headers
-
- def _setHeaders(self,headers):
- if not isinstance(headers, tuple):
- headers = tuple(x for x in headers)
-
- self._hindex = dict((k,i) for i,k in imap(None,count(),headers))
- self._headers=headers
- self.colcount=len(headers)
-
- headers=property(_getHeaders,_setHeaders)
-
- def append(self,value):
- if not isinstance(value, TableRow):
- value = self.rowFactory(self,value)
- else:
- assert self.isCompatible(value)
- list.append(self,value.row)
-
-
-
-class _Row(list):
- def __init__(self,data,size):
- if data is None:
- list.__init__(self,(None for x in xrange(size)))
- else:
- list.__init__(self,data)
- assert len(self)==size, \
- "Size of data is not correct (%d instead of %d)" % (len(self),size)
-
- def append(self,value):
- raise NotImplementedError, \
- "Rows cannot change of size"
-
- def pop(self,key=None):
- raise NotImplementedError, \
- "Rows cannot change of size"
-
- def extend(self,values):
- raise NotImplementedError, \
- "Rows cannot change of size"
-
-
-
-
-class TableRow(object):
- '''
-
- '''
- def __init__(self, table,
- data=None,
- ):
-
- self.table = table
-
- if isinstance(data,_Row):
- self.row=row
- else:
- data = self._castRow(data)
- self.row=_Row(data,self._colcount)
-
- def getType(self):
- return self.table.types
-
- def getHeaders(self):
- return self.table.headers
-
- def getHIndex(self):
- return self.table._hindex
-
- def getColCount(self):
- return self.table.colcount
-
- types = property(getType,None,None,
- "List of types associated to this row")
- headers= property(getHeaders,None,None,
- "List of headers associated to this row")
-
- _hindex= property(getHIndex,None,None)
- _colcount = property(getColCount,None,None)
-
- def _castValue(t,x):
- '''
- Cast a value to a specified type, with exception of
- C{None} values that are returned without cast.
-
- @param t: the destination type
- @type t: C{type}
- @param x: the value to cast
-
- @return: the casted value or C{None}
-
- '''
- if x is None or t is None:
- return x
- else:
- return t(x)
-
- _castValue=staticmethod(_castValue)
-
- def _castRow(self,data):
-
- if not isinstance(data, (list,dict)):
- data=[x for x in data]
-
- if isinstance(data,list):
- assert len(data)==self._colcount, \
- 'values has not good length'
- if self.types is not None:
- data=[TableRow._castValue(t, x)
- for t,x in imap(None,self.types,data)]
-
- elif isinstance(data,dict):
- lvalue = [None] * len(self.header)
-
- for k,v in data.items():
- try:
- hindex = self._hindex[k]
- if self.types is not None:
- lvalue[hindex]=TableRow._castValue(self.types[hindex], v)
- else:
- lvalue[hindex]=v
- except KeyError:
- info('%s is not a table column' % k)
-
- data=lvalue
- else:
- raise TypeError
-
- return data
-
- def __getitem__(self,key):
- '''
-
- @param key:
- @type key:
- '''
-
- if isinstance(key,(int,slice)):
- return self.row[key]
-
- if isinstance(key,str):
- i = self._hindex[key]
- return self.row[i]
-
- raise TypeError, "Key must be an int, slice or str value"
-
- def __setitem__(self,key,value):
- '''
-
- @param key:
- @type key:
- @param value:
- @type value:
- '''
-
- if isinstance(key,str):
- key = self._hindex[key]
-
- elif isinstance(key,int):
- if self.types is not None:
- value = TableRow._castValue(self.types[key], value)
- self.row[key]=value
-
- elif isinstance(key,slice):
- indices = xrange(key.indices(len(self.row)))
- for i,v in imap(None,indices,value):
- self[i]=v
- else:
- raise TypeError, "Key must be an int, slice or str value"
-
-
-
- def __iter__(self):
- '''
-
- '''
- return iter(self.row)
-
- def append(self,value):
- raise NotImplementedError, \
- "Rows cannot change of size"
-
- def pop(self,key=None):
- raise NotImplementedError, \
- "Rows cannot change of size"
-
- def extend(self,values):
- raise NotImplementedError, \
- "Rows cannot change of size"
-
- def __len__(self):
- return self._colcount
-
- def __repr__(self):
- return repr(self.row)
-
- def __str__(self):
- return str(self.row)
-
- def castRow(self):
- self.row = _Row(self._castRow(self.row),len(self.row))
-
-
-class iTableIterator(object):
-
- def _getHeaders(self):
- raise NotImplemented
-
- def _getTypes(self):
- raise NotImplemented
-
- def _getRowFactory(self):
- raise NotImplemented
-
- def _getSubrowFactory(self):
- raise NotImplemented
-
- def _getColcount(self):
- return len(self._getTypes())
-
- def __iter__(self):
- return self
-
- headers = property(_getHeaders,None,None)
- types = property(_getTypes,None,None)
- rowFactory = property(_getRowFactory,None,None)
- subrowFactory = property(_getSubrowFactory,None,None)
- colcount = property(_getColcount,None,None)
-
- def columnIndex(self,name):
- if isinstance(name,str):
- return self._reference.headers.index(name)
- elif isinstance(name,int):
- lh = len(self._reference.headers)
- if name < lh and name >=0:
- return name
- elif name < 0 and name >= -lh:
- return lh - name
- raise IndexError
- raise TypeError
-
- def next(self):
- raise NotImplemented
-
-
-class TableIterator(iTableIterator):
-
- def __init__(self,table):
- if not isinstance(table,Table):
- raise TypeError
-
- self._reftable=table
- self._i=0
-
- def _getHeaders(self):
- return self._reftable.headers
-
- def _getTypes(self):
- return self._reftable.types
-
- def _getRowFactory(self):
- return self._reftable.rowFactory
-
- def _getSubrowFactory(self):
- return self._reftable.subrowFactory
-
- def columnIndex(self,name):
- if isinstance(name,str):
- return self._reftable._hindex[name]
- elif isinstance(name,int):
- lh = len(self._reftable._headers)
- if name < lh and name >=0:
- return name
- elif name < 0 and name >= -lh:
- return lh - name
- raise IndexError
- raise TypeError
-
-
- def rewind(self):
- i=0
-
- def next(self):
- if self._i < len(self._reftable):
- rep=self._reftable[self._i]
- self._i+=1
- return rep
- else:
- raise StopIteration
-
- headers = property(_getHeaders,None,None)
- types = property(_getTypes,None,None)
- rowFactory = property(_getRowFactory,None,None)
- subrowFactory = property(_getSubrowFactory,None,None)
-
-
-class ProjectionIterator(iTableIterator):
-
- def __init__(self,tableiterator,*cols):
- self._reference = iter(tableiterator)
-
- assert isinstance(self._reference, iTableIterator)
-
- self._selected = tuple(self._reference.columnIndex(x)
- for x in cols)
- self._headers = tuple(self._reference.headers[x]
- for x in self._selected)
-
- if self._reference.types is not None:
- self._types= tuple(self._reference.types[x]
- for x in self._selected)
- else:
- self._types=None
-
- def _getRowFactory(self):
- return self._reference.subrowFactory
-
- def _getSubrowFactory(self):
- return self._reference.subrowFactory
-
- def _getHeaders(self):
- return self._headers
-
- def _getTypes(self):
- return self._types
-
- headers = property(_getHeaders,None,None)
- types = property(_getTypes,None,None)
- rowFactory = property(_getRowFactory,None,None)
- subrowFactory = property(_getSubrowFactory,None,None)
-
- def next(self):
- value = self._reference.next()
- value = (value[x] for x in self._selected)
- return self.rowFactory(self,value)
-
-class SelectionIterator(iTableIterator):
- def __init__(self,tableiterator,**conditions):
- self._reference = iter(tableiterator)
-
- assert isinstance(self._reference, iTableIterator)
-
- self._conditions=dict((self._reference.columnIndex(i),c)
- for i,c in conditions.iteritems())
-
- def _checkCondition(self,row):
- return reduce(lambda x,y : x and y,
- (bool(self._conditions[i](row[i]))
- for i in self._conditions),
- True)
-
- def _getRowFactory(self):
- return self._reference.rowFactory
-
- def _getSubrowFactory(self):
- return self._reference.subrowFactory
-
- def _getHeaders(self):
- return self._reference.headers
-
- def _getTypes(self):
- return self._reference.types
-
- def next(self):
- row = self._reference.next()
- while not self._checkCondition(row):
- row = self._reference.next()
- return row
-
-
- headers = property(_getHeaders,None,None)
- types = property(_getTypes,None,None)
- rowFactory = property(_getRowFactory,None,None)
- subrowFactory = property(_getSubrowFactory,None,None)
-
-
-class UnionIterator(iTableIterator):
- def __init__(self,*itables):
- self._itables=[iter(x) for x in itables]
- self._types = self._itables[0].types
- self._headers = self._itables[0].headers
-
- assert reduce(lambda x,y: x and y,
- ( isinstance(z,iTableIterator)
- and len(z.headers)==len(self._headers)
- for z in self._itables),
- True)
-
- self._iterator = chain(*self._itables)
-
- def _getRowFactory(self):
- return self._itables[0].rowFactory
-
- def _getSubrowFactory(self):
- return self._itables[0].subrowFactory
-
- def _getHeaders(self):
- return self._headers
-
- def _getTypes(self):
- return self._types
-
- def next(self):
- value = self._iterator.next()
- return self.rowFactory(self,value.row)
-
- headers = property(_getHeaders,None,None)
- types = property(_getTypes,None,None)
- rowFactory = property(_getRowFactory,None,None)
- subrowFactory = property(_getSubrowFactory,None,None)
-
-
-
-def tableFactory(tableiterator):
- tableiterator = iter(tableiterator)
- assert isinstance(tableiterator, iTableIterator)
-
- newtable = Table(tableiterator.headers,
- tableiterator.types,
- tableiterator.rowFactory,
- tableiterator.subrowFactory)
-
- for r in tableiterator:
- newtable.append(r)
-
- return newtable
-
-def projectTable(tableiterator,*cols):
- return tableFactory(ProjectionIterator(tableiterator,*cols))
-
-def subTable(tableiterator,**conditions):
- return tableFactory(SelectionIterator(tableiterator,**conditions))
-
-def concatTables(*itables):
- '''
- Concatene severals tables.
-
- concatenation is done using the L{UnionIterator}
-
- @type itables: iTableIterator or Table
-
- @return: a new Table
- @rtype: c{Table}
-
- @see: L{UnionIterator}
- '''
- return tableFactory(UnionIterator(*itables))
-
-class TableIteratorAsDict(object):
-
- def __init__(self,tableiterator):
- self._reference = iter(tableiterator)
-
- assert isinstance(self._reference, iTableIterator)
-
- self._headers = self._reference.headers
- self._types = self._reference.types
- if self._types is not None:
- self._types = dict((n,t)
- for n,t in imap(None,self._headers,self._types))
-
- def __iter__(self):
- return self
-
- def next(self):
- value = self._reference.next()
- return dict((n,t)
- for n,t in imap(None,self._headers,value))
-
- def _getHeaders(self):
- return self._headers
-
- def _getTypes(self):
- return self._types
-
- headers = property(_getHeaders,None,None)
- types = property(_getTypes,None,None)
-
\ No newline at end of file
diff --git a/obitools/table/csv.py b/obitools/table/csv.py
deleted file mode 100644
index 1d9a73d..0000000
--- a/obitools/table/csv.py
+++ /dev/null
@@ -1,52 +0,0 @@
-"""
-obitools.table.csv module provides an iterator adapter
-allowing to parse csv (comma separatted value) file
-"""
-
-import re
-
-def csvIterator(lineIterator,sep=','):
- '''
- Allows easy parsing of a csv file. This function
- convert an iterator on line over a csv text file
- in an iterator on data list. Each list corresponds
- to all values present n one line.
-
- @param lineIterator: iterator on text lines
- @type lineIterator: iterator
- @param sep: string of one letter used as separator
- blank charactere or " is not allowed as
- separator
- @type sep: string
- @return: an iterator on data list
- @rtype: iterator
- '''
- assert len(sep)==1 and not sep.isspace() and sep!='"'
- valueMatcher=re.compile('\s*((")(([^"]|"")*)"|([^%s]*?))\s*(%s|$)' % (sep,sep))
- def iterator():
- for l in lineIterator:
- yield _csvParse(l,valueMatcher)
- return iterator()
-
-
-def _csvParse(line,valueMatcher):
- data=[]
- i = iter(valueMatcher.findall(line))
- m = i.next()
- if m[0]:
- while m[-1]!='':
- if m[1]=='"':
- data.append(m[2].replace('""','"'))
- else:
- data.append(m[0])
- m=i.next()
- if m[1]=='"':
- data.append(m[2].replace('""','"'))
- else:
- data.append(m[0])
- return data
-
-
-
-
-
\ No newline at end of file
diff --git a/obitools/tagmatcher/__init__.py b/obitools/tagmatcher/__init__.py
deleted file mode 100644
index 880ead0..0000000
--- a/obitools/tagmatcher/__init__.py
+++ /dev/null
@@ -1,35 +0,0 @@
-from obitools import NucSequence
-from obitools.location import locationGenerator,extractExternalRefs
-
-
-
-class TagMatcherSequence(NucSequence):
- '''
- Class used to represent a nucleic sequence issued mapped
- on a genome by the tagMatcher software.
- '''
-
- def __init__(self,seq,cd,locs,dm,rm):
- NucSequence.__init__(self, seq, seq)
- self['locations']=locs
- self['conditions']=cd
- self['dm']=dm
- self['rm']=rm
- self['tm']=dm+rm
-
- def eminEmaxFilter(self,emin=None,emax=None):
- result = [x for x in self['locations']
- if (emin is None or x['error'] >=emin)
- and (emax is None or x['error'] <=emax)]
- self['locations']=result
- dm=0
- rm=0
- for x in result:
- if x.isDirect():
- dm+=1
- else:
- rm+=1
- self['dm']=dm
- self['rm']=rm
- self['tm']=dm+rm
- return self
diff --git a/obitools/tagmatcher/options.py b/obitools/tagmatcher/options.py
deleted file mode 100644
index 45673ce..0000000
--- a/obitools/tagmatcher/options.py
+++ /dev/null
@@ -1,14 +0,0 @@
-def addTagMatcherErrorOptions(optionManager):
- optionManager.add_option('-E','--emax',
- action='store',
- metavar="<##>",
- type="int",dest="emax",
- default=None,
- help="keep match with no more than emax errors")
-
- optionManager.add_option('-e','--emin',
- action='store',
- metavar="<##>",
- type="int",dest="emin",
- default=0,
- help="keep match with at least emin errors")
diff --git a/obitools/tagmatcher/parser.py b/obitools/tagmatcher/parser.py
deleted file mode 100644
index a843e66..0000000
--- a/obitools/tagmatcher/parser.py
+++ /dev/null
@@ -1,89 +0,0 @@
-import re
-import sys
-
-from obitools import tagmatcher
-from obitools.seqdb import nucEntryIterator
-from obitools.location.feature import Feature
-from obitools.location import locationGenerator
-
-_seqMatcher = re.compile('(?<=TG )[acgtrymkwsbdhvnACGTRYMKWSBDHVN]+')
-_cdMatcher = re.compile('(?<=CD ) *([^:]+?) +: +([0-9]+)')
-_loMatcher = re.compile('(?<=LO ) *([ACGTRYMKWSBDHVN]+) +([^ ]+) +([^ ]+) +\(([0-9]+)\)')
-_dmMatcher = re.compile('(?<=DM )[0-9]+')
-_rmMatcher = re.compile('(?<=RM )[0-9]+')
-
-
-def __tagmatcherparser(text):
- try:
- seq = _seqMatcher.search(text).group()
- cd = dict((x[0],int(x[1])) for x in _cdMatcher.findall(text))
- locs = []
-
- for (match,ac,loc,err) in _loMatcher.findall(text):
- feat = Feature('location', locationGenerator(loc))
- feat['error']=int(err)
- feat['match']=match
- feat['contig']=ac
- locs.append(feat)
-
- dm = int(_dmMatcher.search(text).group())
- rm = int(_rmMatcher.search(text).group())
-
- except AttributeError,e:
- print >>sys.stderr,'======================================================='
- print >>sys.stderr,text
- print >>sys.stderr,'======================================================='
- raise e
-
- return (seq,cd,locs,dm,rm)
-
-def tagMatcherParser(text):
- return tagmatcher.TagMatcherSequence(*__tagmatcherparser(text))
-
-
-class TagMatcherIterator(object):
- _cdheadparser = re.compile('condition [0-9]+ : (.+)')
-
- def __init__(self,file):
- self._ni = nucEntryIterator(file)
- self.header=self._ni.next()
- self.conditions=TagMatcherIterator._cdheadparser.findall(self.header)
-
- def next(self):
- return tagMatcherParser(self._ni.next())
-
- def __iter__(self):
- return self
-
-def formatTagMatcher(tmseq,reader=None):
- if isinstance(tmseq, TagMatcherIterator):
- return tmseq.header
-
- assert isinstance(tmseq,tagmatcher.TagMatcherSequence),'Only TagMatcherSequence can be used'
- lo = '\n'.join(['LO %s %s %s (%d)' % (l['match'],l['contig'],l.locStr(),l['error'])
- for l in tmseq['locations']])
- if reader is not None:
- cd = '\n'.join(['CD %s : %d' % (x,tmseq['conditions'][x])
- for x in reader.conditions])
- else:
- cd = '\n'.join(['CD %s : %d' % (x,tmseq['conditions'][x])
- for x in tmseq['conditions']])
-
- tg = 'TG %s' % str(tmseq)
-
- e=[tg]
- if cd:
- e.append(cd)
- if lo:
- e.append(lo)
-
- tm = 'TM %d' % tmseq['tm']
- dm = 'DM %d' % tmseq['dm']
- rm = 'RM %d' % tmseq['rm']
-
- e.extend((tm,dm,rm,'//'))
-
- return '\n'.join(e)
-
-
-
diff --git a/obitools/thermo/__init__.py b/obitools/thermo/__init__.py
deleted file mode 100644
index 492dbb9..0000000
--- a/obitools/thermo/__init__.py
+++ /dev/null
@@ -1,597 +0,0 @@
-from math import log
-from array import array
-from copy import deepcopy
-
-bpencoder={'A':1,'C':2,'G':3,'T':4,
- 'a':1,'c':2,'g':3,'t':4,
- '-':0
- }
-
-rvencoder={'A':4,'C':3,'G':2,'T':1,
- 'a':4,'c':3,'g':2,'t':1,
- '-':0
- }
-
-R = 1.987
-SALT_METHOD_SANTALUCIA = 1
-SALT_METHOD_OWCZARZY = 2
-DEF_CONC_PRIMERS = 8.e-7
-DEF_CONC_SEQUENCES = 0.
-DEF_SALT = 0.05
-forbidden_entropy = 0.
-forbidden_enthalpy = 1.e18
-
-__dH = [[[[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.]],
- [[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.]],
- [[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.]],
- [[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.]],
- [[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.]],
- [[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.]]],
- [[[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.]],
- [[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.]],
- [[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.]],
- [[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.]],
- [[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.]],
- [[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.]]],
- [[[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.]],
- [[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.]],
- [[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.]],
- [[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.]],
- [[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.]],
- [[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.]]],
- [[[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.]],
- [[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.]],
- [[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.]],
- [[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.]],
- [[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.]],
- [[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.]]],
- [[[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.]],
- [[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.]],
- [[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.]],
- [[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.]],
- [[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.]],
- [[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.]]],
- [[[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.]],
- [[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.]],
- [[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.]],
- [[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.]],
- [[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.]],
- [[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.]]]
- ]
-__dS = [[[[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.]],
- [[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.]],
- [[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.]],
- [[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.]],
- [[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.]],
- [[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.]]],
- [[[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.]],
- [[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.]],
- [[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.]],
- [[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.]],
- [[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.]],
- [[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.]]],
- [[[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.]],
- [[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.]],
- [[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.]],
- [[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.]],
- [[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.]],
- [[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.]]],
- [[[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.]],
- [[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.]],
- [[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.]],
- [[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.]],
- [[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.]],
- [[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.]]],
- [[[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.]],
- [[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.]],
- [[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.]],
- [[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.]],
- [[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.]],
- [[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.]]],
- [[[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.]],
- [[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.]],
- [[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.]],
- [[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.]],
- [[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.]],
- [[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.],[0.,0.,0.,0.,0.,0.]]]
- ]
-
-def initParams(c1, c2, kp, sm,nparm={}):
- global forbidden_entropy
- global dH,dS
-
- dH=deepcopy(__dH)
- dS=deepcopy(__dS)
-
- nparm['Ct1'] = c1;
- nparm['Ct2'] = c2;
- nparm['kplus'] = kp;
- maxCT = 1;
-
- if(nparm['Ct2'] > nparm['Ct1']):
- maxCT = 2
-
- if(nparm['Ct1'] == nparm['Ct2']):
- ctFactor = nparm['Ct1']/2
- elif (maxCT == 1):
- ctFactor = nparm['Ct1']-nparm['Ct2']/2
- else:
- ctFactor = nparm['Ct2']-nparm['Ct1']/2
-
- nparm['rlogc'] = R * log(ctFactor)
- forbidden_entropy = nparm['rlogc']
- nparm['kfac'] = 0.368 * log(nparm['kplus'])
- nparm['saltMethod'] = sm
-
-
- # Set all X-/Y-, -X/Y- and X-/-Y so, that TM will be VERY small!
- for x in xrange(1,5):
- for y in xrange(1,5):
- dH[0][x][y][0]=forbidden_enthalpy;
- dS[0][x][y][0]=forbidden_entropy;
- dH[x][0][0][y]=forbidden_enthalpy;
- dS[x][0][0][y]=forbidden_entropy;
- dH[x][0][y][0]=forbidden_enthalpy;
- dS[x][0][y][0]=forbidden_entropy;
- # forbid X-/Y$ and X$/Y- etc., i.e. terminal must not be paired with gap!
- dH[x][5][y][0]=forbidden_enthalpy;
- dS[x][5][y][0]=forbidden_entropy;
- dH[x][0][y][5]=forbidden_enthalpy;
- dS[x][0][y][5]=forbidden_entropy;
- dH[5][x][0][y]=forbidden_enthalpy;
- dS[5][x][0][y]=forbidden_entropy;
- dH[0][x][5][y]=forbidden_enthalpy;
- dS[0][x][5][y]=forbidden_entropy;
-
- #forbid X$/-Y etc.
- dH[x][5][0][y]=forbidden_enthalpy;
- dS[x][5][0][y]=forbidden_entropy;
- dH[x][0][5][y]=forbidden_enthalpy;
- dS[x][0][5][y]=forbidden_entropy;
- dH[5][x][y][0]=forbidden_enthalpy;
- dS[5][x][y][0]=forbidden_entropy;
- dH[0][x][y][5]=forbidden_enthalpy;
- dS[0][x][y][5]=forbidden_entropy;
-
-
-
- #also, forbid x-/-- and --/x-, i.e. no two inner gaps paired
- dH[x][0][0][0]=forbidden_enthalpy;
- dS[x][0][0][0]=forbidden_entropy;
- dH[0][0][x][0]=forbidden_enthalpy;
- dS[0][0][x][0]=forbidden_entropy;
- # x-/-$
- dH[x][0][0][5]=forbidden_enthalpy;
- dS[x][0][0][5]=forbidden_entropy;
- dH[5][0][0][x]=forbidden_enthalpy;
- dS[5][0][0][x]=forbidden_entropy;
- dH[0][5][x][0]=forbidden_enthalpy;
- dS[x][0][0][5]=forbidden_entropy;
- dH[0][x][5][0]=forbidden_enthalpy;
- dS[0][x][5][0]=forbidden_entropy;
-
- # forbid --/--
- dH[0][0][0][0]=forbidden_enthalpy;
- dS[0][0][0][0]=forbidden_entropy;
-
- dH[5][0][0][0]=forbidden_enthalpy;
- dS[5][0][0][0]=forbidden_entropy;
- dH[0][0][5][0]=forbidden_enthalpy;
- dS[0][0][5][0]=forbidden_entropy;
- dH[0][5][5][0]=forbidden_enthalpy;
- dS[0][5][5][0]=forbidden_entropy;
-
- # Interior loops (double Mismatches)
- iloop_entropy=-0.97
- iloop_enthalpy=0.0
-
- for x in xrange(1,5):
- for y in xrange(1,5):
- for a in xrange(1,5):
- for b in xrange(1,5):
- # AT and CG pair, and as A=1, C=2, G=3, T=4 this means
- # we have Watson-Crick pairs if (x+a==5) and (y+b)==5.
- if ( not ((x+a==5) or (y+b==5))):
- # No watson-crick-pair, i.e. double mismatch!
- # set enthalpy/entropy to loop expansion!
- dH[x][y][a][b] = iloop_enthalpy;
- dS[x][y][a][b] = iloop_entropy;
-
-
- # xy/-- and --/xy (Bulge Loops of size > 1)
- bloop_entropy=-1.3
- bloop_enthalpy=0.0
-
- for x in xrange(1,5):
- for y in xrange(1,5):
- dH[x][y][0][0] = bloop_enthalpy;
- dS[x][y][0][0] = bloop_entropy;
- dH[0][0][x][y] = bloop_enthalpy;
- dS[0][0][x][y] = bloop_entropy;
-
-
- # x-/ya abd xa/y- as well as -x/ay and ax/-y
- # bulge opening and closing parameters with
- # adjacent matches / mismatches
- # obulge_mism and cbulge_mism chosen so high to avoid
- # AAAAAAAAA
- # T--G----T
- # being better than
- # AAAAAAAAA
- # TG------T
- obulge_match_H =-2.66e3
- obulge_match_S =-14.22
- cbulge_match_H =-2.66e3
- cbulge_match_S =-14.22
- obulge_mism_H = 0.0
- obulge_mism_S = -6.45
- cbulge_mism_H = 0.0
- cbulge_mism_S =-6.45
-
- for x in xrange(1,5):
- for y in xrange(1,5):
- for a in xrange(1,5):
- if (x+y==5): # other base pair matches!
-
- dH[x][0][y][a]=obulge_match_H; # bulge opening
- dS[x][0][y][a]=obulge_match_S;
- dH[x][a][y][0]=obulge_match_H;
- dS[x][a][y][0]=obulge_match_S;
- dH[0][x][a][y]=cbulge_match_H; # bulge closing
- dS[0][x][a][y]=cbulge_match_S;
- dH[a][x][0][y]=cbulge_match_H;
- dS[a][x][0][y]=cbulge_match_S;
- else:
- # mismatch in other base pair!
- dH[x][0][y][a]=obulge_mism_H; # bulge opening
- dS[x][0][y][a]=obulge_mism_S;
- dH[x][a][y][0]=obulge_mism_H;
- dS[x][a][y][0]=obulge_mism_S;
- dH[0][x][a][y]=cbulge_mism_H; # bulge closing
- dS[0][x][a][y]=cbulge_mism_S;
- dH[a][x][0][y]=cbulge_mism_H;
- dS[a][x][0][y]=cbulge_mism_S;
-
-
-
- # Watson-Crick pairs (note that only ten are unique, as obviously
- # 5'-AG-3'/3'-TC-5' = 5'-CT-3'/3'-GA-5' etc.
- dH[1][1][4][4]=-7.6e3; dS[1][1][4][4]=-21.3 # AA/TT 04
- dH[1][2][4][3]=-8.4e3; dS[1][2][4][3]=-22.4 # AC/TG adapted GT/CA
- dH[1][3][4][2]=-7.8e3; dS[1][3][4][2]=-21.0 # AG/TC adapted CT/GA
- dH[1][4][4][1]=-7.2e3; dS[1][4][4][1]=-20.4 # AT/TA 04
- dH[2][1][3][4]=-8.5e3; dS[2][1][3][4]=-22.7 # CA/GT 04
- dH[2][2][3][3]=-8.0e3; dS[2][2][3][3]=-19.9 # CC/GG adapted GG/CC
- dH[2][3][3][2]=-10.6e3; dS[2][3][3][2]=-27.2 # CG/GC 04
- dH[2][4][3][1]=-7.8e3; dS[2][4][3][1]=-21.0 # CT/GA 04
- dH[3][1][2][4]=-8.2e3; dS[3][1][2][4]=-22.2 # GA/CT 04
- dH[3][2][2][3]=-9.8e3; dS[3][2][2][3]=-24.4 # GC/CG 04
- dH[3][3][2][2]=-8.0e3; dS[3][3][2][2]=-19.9 # GG/CC 04
- dH[3][4][2][1]=-8.4e3; dS[3][4][2][1]=-22.4 # GT/CA 04
- dH[4][1][1][4]=-7.2e3; dS[4][1][1][4]=-21.3 # TA/AT 04
- dH[4][2][1][3]=-8.2e3; dS[4][2][1][3]=-22.2 # TC/AG adapted GA/CT
- dH[4][3][1][2]=-8.5e3; dS[4][3][1][2]=-22.7 # TG/AC adapted CA/GT
- dH[4][4][1][1]=-7.6e3; dS[4][4][1][1]=-21.3 # TT/AA adapted AA/TT
-
- # A-C Mismatches (Values for pH 7.0)
- dH[1][1][2][4]=7.6e3; dS[1][1][2][4]=20.2 # AA/CT
- dH[1][1][4][2]=2.3e3; dS[1][1][4][2]=4.6 # AA/TC
- dH[1][2][2][3]=-0.7e3; dS[1][2][2][3]=-3.8 # AC/CG
- dH[1][2][4][1]=5.3e3; dS[1][2][4][1]=14.6 # AC/TA
- dH[1][3][2][2]=0.6e3; dS[1][3][2][2]=-0.6 # AG/CC
- dH[1][4][2][1]=5.3e3; dS[1][4][2][1]=14.6 # AT/CA
- dH[2][1][1][4]=3.4e3; dS[2][1][1][4]=8.0 # CA/AT
- dH[2][1][3][2]=1.9e3; dS[2][1][3][2]=3.7 # CA/GC
- dH[2][2][1][3]=5.2e3; dS[2][2][1][3]=14.2 # CC/AG
- dH[2][2][3][1]=0.6e3; dS[2][2][3][1]=-0.6 # CC/GA
- dH[2][3][1][2]=1.9e3; dS[2][3][1][2]=3.7 # CG/AC
- dH[2][4][1][1]=2.3e3; dS[2][4][1][1]=4.6 # CT/AA
- dH[3][1][2][2]=5.2e3; dS[3][1][2][2]=14.2 # GA/CC
- dH[3][2][2][1]=-0.7e3; dS[3][2][2][1]=-3.8 # GC/CA
- dH[4][1][1][2]=3.4e3; dS[4][1][1][2]=8.0 # TA/AC
- dH[4][2][1][1]=7.6e3; dS[4][2][1][1]=20.2 # TC/AA
-
- # C-T Mismatches
- dH[1][2][4][4]=0.7e3; dS[1][2][4][4]=0.2 # AC/TT
- dH[1][4][4][2]=-1.2e3; dS[1][4][4][2]=-6.2 # AT/TC
- dH[2][1][4][4]=1.0e3; dS[2][1][4][4]=0.7 # CA/TT
- dH[2][2][3][4]=-0.8e3; dS[2][2][3][4]=-4.5 # CC/GT
- dH[2][2][4][3]=5.2e3; dS[2][2][4][3]=13.5 # CC/TG
- dH[2][3][4][2]=-1.5e3; dS[2][3][4][2]=-6.1 # CG/TC
- dH[2][4][3][2]=-1.5e3; dS[2][4][3][2]=-6.1 # CT/GC
- dH[2][4][4][1]=-1.2e3; dS[2][4][4][1]=-6.2 # CT/TA
- dH[3][2][2][4]=2.3e3; dS[3][2][2][4]=5.4 # GC/CT
- dH[3][4][2][2]=5.2e3; dS[3][4][2][2]=13.5 # GT/CC
- dH[4][1][2][4]=1.2e3; dS[4][1][2][4]=0.7 # TA/CT
- dH[4][2][2][3]=2.3e3; dS[4][2][2][3]=5.4 # TC/CG
- dH[4][2][1][4]=1.2e3; dS[4][2][1][4]=0.7 # TC/AT
- dH[4][3][2][2]=-0.8e3; dS[4][3][2][2]=-4.5 # TG/CC
- dH[4][4][2][1]=0.7e3; dS[4][4][2][1]=0.2 # TT/CA
- dH[4][4][1][2]=1.0e3; dS[4][4][1][2]=0.7 # TT/AC
-
- # G-A Mismatches
- dH[1][1][3][4]=3.0e3; dS[1][1][3][4]=7.4 # AA/GT
- dH[1][1][4][3]=-0.6e3; dS[1][1][4][3]=-2.3 # AA/TG
- dH[1][2][3][3]=0.5e3; dS[1][2][3][3]=3.2 # AC/GG
- dH[1][3][3][2]=-4.0e3; dS[1][3][3][2]=-13.2 # AG/GC
- dH[1][3][4][1]=-0.7e3; dS[1][3][4][1]=-2.3 # AG/TA
- dH[1][4][3][1]=-0.7e3; dS[1][4][3][1]=-2.3 # AT/GA
- dH[2][1][3][3]=-0.7e3; dS[2][1][3][3]=-2.3 # CA/GG
- dH[2][3][3][1]=-4.0e3; dS[2][3][3][1]=-13.2 # CG/GA
- dH[3][1][1][4]=0.7e3; dS[3][1][1][4]=0.7 # GA/AT
- dH[3][1][2][3]=-0.6e3; dS[3][1][2][3]=-1.0 # GA/CG
- dH[3][2][1][3]=-0.6e3; dS[3][2][1][3]=-1.0 # GC/AG
- dH[3][3][1][2]=-0.7e3; dS[3][3][1][2]=-2.3 # GG/AC
- dH[3][3][2][1]=0.5e3; dS[3][3][2][1]=3.2 # GG/CA
- dH[3][4][1][1]=-0.6e3; dS[3][4][1][1]=-2.3 # GT/AA
- dH[4][1][1][3]=0.7e3; dS[4][1][1][3]=0.7 # TA/AG
- dH[4][3][1][1]=3.0e3; dS[4][3][1][1]=7.4 # TG/AA
-
- # G-T Mismatches
- dH[1][3][4][4]=1.0e3; dS[1][3][4][4]=0.9 # AG/TT
- dH[1][4][4][3]=-2.5e3; dS[1][4][4][3]=-8.3 # AT/TG
- dH[2][3][3][4]=-4.1e3; dS[2][3][3][4]=-11.7 # CG/GT
- dH[2][4][3][3]=-2.8e3; dS[2][4][3][3]=-8.0 # CT/GG
- dH[3][1][4][4]=-1.3e3; dS[3][1][4][4]=-5.3 # GA/TT
- dH[3][2][4][3]=-4.4e3; dS[3][2][4][3]=-12.3 # GC/TG
- dH[3][3][2][4]=3.3e3; dS[3][3][2][4]=10.4 # GG/CT
- dH[3][3][4][2]=-2.8e3; dS[3][3][4][2]=-8.0 # GG/TC
-# dH[3][3][4][4]=5.8e3; dS[3][3][4][4]=16.3 # GG/TT
- dH[3][4][2][3]=-4.4e3; dS[3][4][2][3]=-12.3 # GT/CG
- dH[3][4][4][1]=-2.5e3; dS[3][4][4][1]=-8.3 # GT/TA
-# dH[3][4][4][3]=4.1e3; dS[3][4][4][3]=9.5 # GT/TG
- dH[4][1][3][4]=-0.1e3; dS[4][1][3][4]=-1.7 # TA/GT
- dH[4][2][3][3]=3.3e3; dS[4][2][3][3]=10.4 # TC/GG
- dH[4][3][1][4]=-0.1e3; dS[4][3][1][4]=-1.7 # TG/AT
- dH[4][3][3][2]=-4.1e3; dS[4][3][3][2]=-11.7 # TG/GC
-# dH[4][3][3][4]=-1.4e3; dS[4][3][3][4]=-6.2 # TG/GT
- dH[4][4][1][3]=-1.3e3; dS[4][4][1][3]=-5.3 # TT/AG
- dH[4][4][3][1]=1.0e3; dS[4][4][3][1]=0.9 # TT/GA
-# dH[4][4][3][3]=5.8e3; dS[4][4][3][3]=16.3 # TT/GG
-
- # A-A Mismatches
- dH[1][1][1][4]=4.7e3; dS[1][1][1][4]=12.9 # AA/AT
- dH[1][1][4][1]=1.2e3; dS[1][1][4][1]=1.7 # AA/TA
- dH[1][2][1][3]=-2.9e3; dS[1][2][1][3]=-9.8 # AC/AG
- dH[1][3][1][2]=-0.9e3; dS[1][3][1][2]=-4.2 # AG/AC
- dH[1][4][1][1]=1.2e3; dS[1][4][1][1]=1.7 # AT/AA
- dH[2][1][3][1]=-0.9e3; dS[2][1][3][1]=-4.2 # CA/GA
- dH[3][1][2][1]=-2.9e3; dS[3][1][2][1]=-9.8 # GA/CA
- dH[4][1][1][1]=4.7e3; dS[4][1][1][1]=12.9 # TA/AA
-
- # C-C Mismatches
- dH[1][2][4][2]=0.0e3; dS[1][2][4][2]=-4.4 # AC/TC
- dH[2][1][2][4]=6.1e3; dS[2][1][2][4]=16.4 # CA/CT
- dH[2][2][2][3]=3.6e3; dS[2][2][2][3]=8.9 # CC/CG
- dH[2][2][3][2]=-1.5e3; dS[2][2][3][2]=-7.2 # CC/GC
- dH[2][3][2][2]=-1.5e3; dS[2][3][2][2]=-7.2 # CG/CC
- dH[2][4][2][1]=0.0e3; dS[2][4][2][1]=-4.4 # CT/CA
- dH[3][2][2][2]=3.6e3; dS[3][2][2][2]=8.9 # GC/CC
- dH[4][2][1][2]=6.1e3; dS[4][2][1][2]=16.4 # TC/AC
-
- # G-G Mismatches
- dH[1][3][4][3]=-3.1e3; dS[1][3][4][3]=-9.5 # AG/TG
- dH[2][3][3][3]=-4.9e3; dS[2][3][3][3]=-15.3 # CG/GG
- dH[3][1][3][4]=1.6e3; dS[3][1][3][4]=3.6 # GA/GT
- dH[3][2][3][3]=-6.0e3; dS[3][2][3][3]=-15.8 # GC/GG
- dH[3][3][2][3]=-6.0e3; dS[3][3][2][3]=-15.8 # GG/CG
- dH[3][3][3][2]=-4.9e3; dS[3][3][3][2]=-15.3 # GG/GC
- dH[3][4][3][1]=-3.1e3; dS[3][4][3][1]=-9.5 # GT/GA
- dH[4][3][1][3]=1.6e3; dS[4][3][1][3]=3.6 # TG/AG
-
- # T-T Mismatches
- dH[1][4][4][4]=-2.7e3; dS[1][4][4][4]=-10.8 # AT/TT
- dH[2][4][3][4]=-5.0e3; dS[2][4][3][4]=-15.8 # CT/GT
- dH[3][4][2][4]=-2.2e3; dS[3][4][2][4]=-8.4 # GT/CT
- dH[4][1][4][4]=0.2e3; dS[4][1][4][4]=-1.5 # TA/TT
- dH[4][2][4][3]=-2.2e3; dS[4][2][4][3]=-8.4 # TC/TG
- dH[4][3][4][2]=-5.0e3; dS[4][3][4][2]=-15.8 # TG/TC
- dH[4][4][1][4]=0.2e3; dS[4][4][1][4]=-1.5 # TT/AT
- dH[4][4][4][1]=-2.7e3; dS[4][4][4][1]=-10.8 # TT/TA
-
- # Dangling Eds
- dH[5][1][1][4]=-0.7e3; dS[5][1][1][4]=-0.8 # $A/AT
- dH[5][1][2][4]=4.4e3; dS[5][1][2][4]=14.9 # $A/CT
- dH[5][1][3][4]=-1.6e3; dS[5][1][3][4]=-3.6 # $A/GT
- dH[5][1][4][4]=2.9e3; dS[5][1][4][4]=10.4 # $A/TT
- dH[5][2][1][3]=-2.1e3; dS[5][2][1][3]=-3.9 # $C/AG
- dH[5][2][2][3]=-0.2e3; dS[5][2][2][3]=-0.1 # $C/CG
- dH[5][2][3][3]=-3.9e3; dS[5][2][3][3]=-11.2 # $C/GG
- dH[5][2][4][3]=-4.4e3; dS[5][2][4][3]=-13.1 # $C/TG
- dH[5][3][1][2]=-5.9e3; dS[5][3][1][2]=-16.5 # $G/AC
- dH[5][3][2][2]=-2.6e3; dS[5][3][2][2]=-7.4 # $G/CC
- dH[5][3][3][2]=-3.2e3; dS[5][3][3][2]=-10.4 # $G/GC
- dH[5][3][4][2]=-5.2e3; dS[5][3][4][2]=-15.0 # $G/TC
- dH[5][4][1][1]=-0.5e3; dS[5][4][1][1]=-1.1 # $T/AA
- dH[5][4][2][1]=4.7e3; dS[5][4][2][1]=14.2 # $T/CA
- dH[5][4][3][1]=-4.1e3; dS[5][4][3][1]=-13.1 # $T/GA
- dH[5][4][4][1]=-3.8e3; dS[5][4][4][1]=-12.6 # $T/TA
- dH[1][5][4][1]=-2.9e3; dS[1][5][4][1]=-7.6 # A$/TA
- dH[1][5][4][2]=-4.1e3; dS[1][5][4][2]=-13.0 # A$/TC
- dH[1][5][4][3]=-4.2e3; dS[1][5][4][3]=-15.0 # A$/TG
- dH[1][5][4][4]=-0.2e3; dS[1][5][4][4]=-0.5 # A$/TT
- dH[1][1][5][4]=0.2e3; dS[1][1][5][4]=2.3 # AA/$T
- dH[1][1][4][5]=-0.5e3; dS[1][1][4][5]=-1.1 # AA/T$
- dH[1][2][5][3]=-6.3e3; dS[1][2][5][3]=-17.1 # AC/$G
- dH[1][2][4][5]=4.7e3; dS[1][2][4][5]=14.2 # AC/T$
- dH[1][3][5][2]=-3.7e3; dS[1][3][5][2]=-10.0 # AG/$C
- dH[1][3][4][5]=-4.1e3; dS[1][3][4][5]=-13.1 # AG/T$
- dH[1][4][5][1]=-2.9e3; dS[1][4][5][1]=-7.6 # AT/$A
- dH[1][4][4][5]=-3.8e3; dS[1][4][4][5]=-12.6 # AT/T$
- dH[2][5][3][1]=-3.7e3; dS[2][5][3][1]=-10.0 # C$/GA
- dH[2][5][3][2]=-4.0e3; dS[2][5][3][2]=-11.9 # C$/GC
- dH[2][5][3][3]=-3.9e3; dS[2][5][3][3]=-10.9 # C$/GG
- dH[2][5][3][4]=-4.9e3; dS[2][5][3][4]=-13.8 # C$/GT
- dH[2][1][5][4]=0.6e3; dS[2][1][5][4]=3.3 # CA/$T
- dH[2][1][3][5]=-5.9e3; dS[2][1][3][5]=-16.5 # CA/G$
- dH[2][2][5][3]=-4.4e3; dS[2][2][5][3]=-12.6 # CC/$G
- dH[2][2][3][5]=-2.6e3; dS[2][2][3][5]=-7.4 # CC/G$
- dH[2][3][5][2]=-4.0e3; dS[2][3][5][2]=-11.9 # CG/$C
- dH[2][3][3][5]=-3.2e3; dS[2][3][3][5]=-10.4 # CG/G$
- dH[2][4][5][1]=-4.1e3; dS[2][4][5][1]=-13.0 # CT/$A
- dH[2][4][3][5]=-5.2e3; dS[2][4][3][5]=-15.0 # CT/G$
- dH[3][5][2][1]=-6.3e3; dS[3][5][2][1]=-17.1 # G$/CA
- dH[3][5][2][2]=-4.4e3; dS[3][5][2][2]=-12.6 # G$/CC
- dH[3][5][2][3]=-5.1e3; dS[3][5][2][3]=-14.0 # G$/CG
- dH[3][5][2][4]=-4.0e3; dS[3][5][2][4]=-10.9 # G$/CT
- dH[3][1][5][4]=-1.1e3; dS[3][1][5][4]=-1.6 # GA/$T
- dH[3][1][2][5]=-2.1e3; dS[3][1][2][5]=-3.9 # GA/C$
- dH[3][2][5][3]=-5.1e3; dS[3][2][5][3]=-14.0 # GC/$G
- dH[3][2][2][5]=-0.2e3; dS[3][2][2][5]=-0.1 # GC/C$
- dH[3][3][5][2]=-3.9e3; dS[3][3][5][2]=-10.9 # GG/$C
- dH[3][3][2][5]=-3.9e3; dS[3][3][2][5]=-11.2 # GG/C$
- dH[3][4][5][1]=-4.2e3; dS[3][4][5][1]=-15.0 # GT/$A
- dH[3][4][2][5]=-4.4e3; dS[3][4][2][5]=-13.1 # GT/C$
- dH[4][5][1][1]=0.2e3; dS[4][5][1][1]=2.3 # T$/AA
- dH[4][5][1][2]=0.6e3; dS[4][5][1][2]=3.3 # T$/AC
- dH[4][5][1][3]=-1.1e3; dS[4][5][1][3]=-1.6 # T$/AG
- dH[4][5][1][4]=-6.9e3; dS[4][5][1][4]=-20.0 # T$/AT
- dH[4][1][5][4]=-6.9e3; dS[4][1][5][4]=-20.0 # TA/$T
- dH[4][1][1][5]=-0.7e3; dS[4][1][1][5]=-0.7 # TA/A$
- dH[4][2][5][3]=-4.0e3; dS[4][2][5][3]=-10.9 # TC/$G
- dH[4][2][1][5]=4.4e3; dS[4][2][1][5]=14.9 # TC/A$
- dH[4][3][5][2]=-4.9e3; dS[4][3][5][2]=-13.8 # TG/$C
- dH[4][3][1][5]=-1.6e3; dS[4][3][1][5]=-3.6 # TG/A$
- dH[4][4][5][1]=-0.2e3; dS[4][4][5][1]=-0.5 # TT/$A
- dH[4][4][1][5]=2.9e3; dS[4][4][1][5]=10.4 # TT/A$
-
-
- nparm['dH']=dH
- nparm['dS']=dS
-
- return nparm
-
-
-defaultParm=initParams(DEF_CONC_PRIMERS,DEF_CONC_SEQUENCES,DEF_SALT, SALT_METHOD_SANTALUCIA)
-
-def seqencoder(seq):
- return [bpencoder[x] for x in seq]
-
-def getInitialEntropy(nparm=defaultParm):
- return -5.9+nparm['rlogc']
-
-def getEnthalpy(x0, x1, y0, y1,nparm=defaultParm):
- return nparm['dH'][x0][x1][y0][y1]
-
-def GetEntropy(x0, x1, y0, y1,nparm=defaultParm):
-
- nx0=x0
- nx1=x1
- ny0=y0
- ny1=y1
- dH=nparm['dH']
- dS=nparm['dS']
- answer = dS[nx0][nx1][ny0][ny1]
-
- if (nparm['saltMethod'] == SALT_METHOD_SANTALUCIA):
- if(nx0!=5 and 1<= nx1 and nx1<=4):
- answer += 0.5*nparm['kfac']
-
- if(ny1!=5 and 1<= ny0 and ny0<=4):
- answer += 0.5*nparm['kfac']
-
- if (nparm['saltMethod'] == SALT_METHOD_OWCZARZY):
- logk = log(nparm['kplus']);
- answer += dH[nx0][nx1][ny0][ny1]*((4.29 * nparm['gcContent']-3.95)* 1e-5 * logk + 0.0000094*logk**2);
-
- return answer;
-
-def CalcTM(entropy,enthalpy):
- tm = 0
- if (enthalpy>=forbidden_enthalpy) :
- return 0;
-
- if (entropy<0) :
- tm = enthalpy/entropy
- if (tm<0):
- return 0;
-
- return tm;
-
-
-
-
-def countGCContent(seq):
- count = 0;
- for k in seq :
- if k in 'cgGC':
- count+=1;
- return count;
-
-
-#def cleanSeq (inseq,outseq,length):
-#
-# seqlen = len(inseq)
-# if (len != 0)
-# seqlen = length;
-#
-# j=0
-# for i in xrange(seqlen):
-# {
-# switch (inseq[i])
-# {
-# case 'a':
-# case '\0':
-# case 'A':
-# outseq[j++] = 'A'; break;
-# case 'c':
-# case '\1':
-# case 'C':
-# outseq[j++] = 'C'; break;
-# case 'g':
-# case '\2':
-# case 'G':
-# outseq[j++] = 'G'; break;
-# case 't':
-# case '\3':
-# case 'T':
-# outseq[j++] = 'T'; break;
-# }
-# }
-# outseq[j] = '\0';
-#}
-
-def calcSelfTM(seq,nparm=defaultParm):
- dH=nparm['dH']
- dS=nparm['dS']
- length=len(seq)
-
- thedH = 0;
- thedS = -5.9+nparm['rlogc']
- for i in xrange(1,length):
- c1 = rvencoder[seq[i-1]];
- c2 = rvencoder[seq[i]];
- c3 = bpencoder[seq[i-1]];
- c4 = bpencoder[seq[i]];
-
- thedH += dH[c3][c4][c1][c2];
- thedS += GetEntropy(c3, c4, c1, c2, nparm)
-
- mtemp = CalcTM(thedS,thedH);
-# print thedH,thedS,nparm['rlogc']
- return mtemp-273.15;
-
-
-def calcTMTwoSeq(seq1,seq2,nparm=defaultParm):
-
- thedH = 0;
- thedS = -5.9+nparm['rlogc']
- dH=nparm['dH']
- dS=nparm['dS']
- length=len(seq1)
-
- for i in xrange(1,length):
- c1 = rvencoder[seq2[i-1]]
- c2 = rvencoder[seq2[i]]
- c3 = bpencoder[seq1[i-1]]
- c4 = bpencoder[seq1[i]]
-
- thedH += dH[c3][c4][c1][c2]
- thedS += GetEntropy(c3, c4, c1, c2, nparm)
-
- mtemp = CalcTM(thedS,thedH);
-# print thedH,thedS,nparm['rlogc']
-
- return mtemp-273.15;
-
-
diff --git a/obitools/tools/__init__.py b/obitools/tools/__init__.py
deleted file mode 100644
index e69de29..0000000
diff --git a/obitools/tools/_solexapairend.so b/obitools/tools/_solexapairend.so
deleted file mode 100755
index 2d9e075..0000000
Binary files a/obitools/tools/_solexapairend.so and /dev/null differ
diff --git a/obitools/tools/solexapairend.py b/obitools/tools/solexapairend.py
deleted file mode 100644
index 609f533..0000000
--- a/obitools/tools/solexapairend.py
+++ /dev/null
@@ -1,51 +0,0 @@
-'''
-Created on 17 mai 2010
-
-@author: coissac
-'''
-
-from obitools.alignment import columnIterator
-
-
-def iterOnAligment(ali):
- pos0=0
- pos1=len(ali[1].wrapped)-1
- begin0=False
- end0=False
- begin1=False
- end1=False
- for nuc0,nuc1 in columnIterator(ali):
- if nuc0=='-':
- if begin0:
- if not end0:
- score0 = ( ali[0].wrapped.quality[pos0-1]
- +ali[0].wrapped.quality[pos0]
- )/2
- else:
- score0 = 1.
- else:
- score0 = 0.
- else:
- begin0=True
- score0 = ali[0].wrapped.quality[pos0]
- pos0+=1
- end0= pos0==len(ali[0].wrapped)
-
- if nuc1=='-':
- if begin1:
- if not end1:
- score1 = ( ali[1].wrapped.wrapped.quality[pos1]
- +ali[1].wrapped.wrapped.quality[pos1+1]
- )/2
- else:
- score1 = 0.
- else:
- score1 = 1.
- else:
- begin1=True
- score1 = ali[1].wrapped.wrapped.quality[pos1]
- pos1-=1
- end1=pos1<0
-
- result = (nuc0,score0,nuc1,score1)
- yield result
diff --git a/obitools/tree/__init__.py b/obitools/tree/__init__.py
deleted file mode 100644
index facb5ff..0000000
--- a/obitools/tree/__init__.py
+++ /dev/null
@@ -1,116 +0,0 @@
-import re
-
-
-class Tree(set):
- def registerNode(self,node):
- assert isinstance(node, TreeNode)
- self.add(node)
-
- def childNodeIterator(self,node):
- assert isinstance(node, TreeNode)
- return (x for x in self if x._parent==node)
-
- def subTreeSize(self,node):
- n=1
- for subnode in self.childNodeIterator(node):
- n+=self.subTreeSize(subnode)
- return n
-
- def getRoot(self):
- roots = [x for x in self if x._parent is None]
- assert len(roots)==1,'Tree cannot have several root node'
- return roots[0]
-
- def ancestorNodeIterator(self,node):
- assert isinstance(node, TreeNode)
- while node._parent is not None:
- yield node
- node=node._parent
- yield node
-
- def terminalNodeIterator(self):
- return (x for x in self if x._isterminal)
-
- def commonAncestor(self,node1,node2):
- anc1 = set(x for x in self.ancestorNodeIterator(node1))
- rep = [x for x in self.ancestorNodeIterator(node2)
- if x in anc1]
- assert len(rep)>=1
- return rep[0]
-
- def getDist(self,node1,node2):
- ca = self.commonAncestor(node1, node2)
- dist = 0
- while node1 != ca:
- dist+=node1._dist
- node1=node1._parent
- while node2 != ca:
- dist+=node2._dist
- node2=node2._parent
- return dist
-
- def farestNodes(self):
- dmax=0
- n1=None
- n2=None
- for node1 in self.terminalNodeIterator():
- for node2 in self.terminalNodeIterator():
- d = self.getDist(node1, node2)
- if d > dmax:
- dmax = d
- n1=node1
- n2=node2
- return node1,node2,dmax
-
- def setRoot(self,node,dist):
- assert node in self
- assert node._parent and node._dist > dist
-
- newroot = TreeNode(self)
- parent = node._parent
- node._parent = newroot
- compdist = node._dist - dist
- node._dist=dist
- node = parent
-
- while node:
- parent = node._parent
- if parent:
- dist = node._dist
-
- node._parent = newroot
- node._dist = compdist
-
- newroot = node
- node = parent
-
- if node:
- compdist=dist
-
- for child in self.childNodeIterator(newroot):
- child._parent = newroot._parent
- child._dist += newroot._dist
-
- self.remove(newroot)
-
-
-class TreeNode(object):
- def __init__(self,tree,name=None,dist=None,bootstrap=None,**info):
- self._parent=None
- self._name=name
- self._dist=dist
- self._bootstrap=bootstrap
- self._info=info
- tree.registerNode(self)
- self._isterminal=True
-
-
- def linkToParent(self,parent):
- assert isinstance(parent, TreeNode) or parent is None
- self._parent=parent
- if parent is not None:
- parent._isterminal=False
-
-
-
-
diff --git a/obitools/tree/dot.py b/obitools/tree/dot.py
deleted file mode 100644
index a21c4a1..0000000
--- a/obitools/tree/dot.py
+++ /dev/null
@@ -1,18 +0,0 @@
-
-from obitools.utils import universalOpen
-from obitools.tree import Tree,TreeNode
-
-def nodeWriter(tree,node,nodes):
- data=[]
- if node._parent:
- data.append('%d -> %d ' % (nodes[node],nodes[node._parent]))
- return "\n".join(data)
-
-
-def treeWriter(tree):
- nodes=dict(map(None,tree,xrange(len(tree))))
- code=[]
- for node in tree:
- code.append(nodeWriter(tree,node,nodes))
- code = "\n".join(code)
- return 'digraph tree { node [shape=point]\n%s\n};' % code
\ No newline at end of file
diff --git a/obitools/tree/layout.py b/obitools/tree/layout.py
deleted file mode 100644
index a39ba77..0000000
--- a/obitools/tree/layout.py
+++ /dev/null
@@ -1,103 +0,0 @@
-
-class NodeLayout(dict):
- '''
- Layout data associated to a tree node.
- '''
- pass
-
-class TreeLayout(dict):
- '''
- Description of a phylogenetic tree layout
-
- @see:
- '''
- def addNode(self,node):
- self[node]=NodeLayout()
-
- def setAttribute(self,node,key,value):
- self[node][key]=value
-
- def hasAttribute(self,node,key):
- return key in self[node]
-
- def getAttribute(self,node,key,default=None):
- return self[node].get(key,default)
-
- def setNodesColor(self,color,predicate=True):
- '''
-
- @param color:
- @type color:
- @param predicat:
- @type predicat:
- '''
- for node in self:
- if callable(predicat):
- change = predicat(node)
- else:
- change = predicat
-
- if change:
- if callable(color):
- c = color(node)
- else:
- c = color
- self.setAttribute(node, 'color', color)
-
- def setCircular(self,iscircularpredicat):
- for node in self:
- if callable(iscircularpredicat):
- change = iscircularpredicat(node)
- else:
- change = iscircularpredicat
-
- if change:
- self.setAttribute(node, 'shape', 'circle')
- else:
- self.setAttribute(node, 'shape', 'square')
-
- def setRadius(self,radius,predicate=True):
- for node in self:
- if callable(predicat):
- change = predicat(node)
- else:
- change = predicat
-
- if change:
- if callable(radius):
- r = radius(node)
- else:
- r = radius
- self.setAttribute(node, 'radius', r)
-
-def predicatGeneratorIsInfoEqual(info,value):
- def isInfoEqual(node):
- data = node._info
- return data is not None and info in data and data[info]==value
-
- return isInfoEqual
-
-def isTerminalNode(node):
- return node._isterminal
-
-def constantColorGenerator(color):
- def colorMaker(node):
- return color
-
- return colorMaker
-
-def constantColorGenerator(color):
- def colorMaker(node):
- return color
-
- return colorMaker
-
-def notPredicatGenerator(predicate):
- def notpred(x):
- return not predicat(x)
- return notpred
-
-
-
-
-
\ No newline at end of file
diff --git a/obitools/tree/newick.py b/obitools/tree/newick.py
deleted file mode 100644
index c69d0d3..0000000
--- a/obitools/tree/newick.py
+++ /dev/null
@@ -1,117 +0,0 @@
-import re
-import sys
-
-from obitools.utils import universalOpen
-from obitools.tree import Tree,TreeNode
-
-def subNodeIterator(data):
- level=0
- start = 1
- if data[0]=='(':
- for i in xrange(1,len(data)):
- c=data[i]
- if c=='(':
- level+=1
- elif c==')':
- level-=1
- if c==',' and not level:
- yield data[start:i]
- start = i+1
- yield data[start:i]
- else:
- yield data
-
-
-_nodeParser=re.compile('\s*(?P\(.*\))?(?P[^ :]+)? *(?P[0-9.]+)?(:(?P-?[0-9.]+))?')
-
-def nodeParser(data):
- parsedNode = _nodeParser.match(data).groupdict(0)
- if not parsedNode['name']:
- parsedNode['name']=None
-
- if not parsedNode['bootstrap']:
- parsedNode['bootstrap']=None
- else:
- parsedNode['bootstrap']=float(parsedNode['bootstrap'])
-
- if not parsedNode['distance']:
- parsedNode['distance']=None
- else:
- parsedNode['distance']=float(parsedNode['distance'])
-
- if not parsedNode['subnodes']:
- parsedNode['subnodes']=None
-
- return parsedNode
-
-_cleanTreeData=re.compile('\s+')
-
-def treeParser(data,tree=None,parent=None):
- if tree is None:
- tree = Tree()
- data = _cleanTreeData.sub(' ',data).strip()
-
- parsedNode = nodeParser(data)
- node = TreeNode(tree,
- parsedNode['name'],
- parsedNode['distance'],
- parsedNode['bootstrap'])
-
- node.linkToParent(parent)
-
- if parsedNode['subnodes']:
- for subnode in subNodeIterator(parsedNode['subnodes']):
- treeParser(subnode,tree,node)
- return tree
-
-_treecomment=re.compile('\[.*\]')
-
-def treeIterator(file):
- file = universalOpen(file)
- data = file.read()
-
- comment = _treecomment.findall(data)
- data=_treecomment.sub('',data).strip()
-
- if comment:
- comment=comment[0]
- else:
- comment=None
- for tree in data.split(';'):
- t = treeParser(tree)
- if comment:
- t.comment=comment
- yield t
-
-def nodeWriter(tree,node,deep=0):
- name = node._name
- if name is None:
- name=''
-
- distance=node._dist
- if distance is None:
- distance=''
- else:
- distance = ':%6.5f' % distance
-
- bootstrap=node._bootstrap
- if bootstrap is None:
- bootstrap=''
- else:
- bootstrap=' %d' % int(bootstrap)
-
- nodeseparator = ',\n' + ' ' * (deep+1)
-
- subnodes = nodeseparator.join([nodeWriter(tree, x, deep+1)
- for x in tree.childNodeIterator(node)])
- if subnodes:
- subnodes='(\n' + ' ' * (deep+1) + subnodes + '\n' + ' ' * deep + ')'
-
- return '%s%s%s%s' % (subnodes,name,bootstrap,distance)
-
-def treeWriter(tree,startnode=None):
- if startnode is not None:
- root=startnode
- else:
- root = tree.getRoot()
- return nodeWriter(tree,root)+';'
diff --git a/obitools/tree/svg.py b/obitools/tree/svg.py
deleted file mode 100644
index ff51a8c..0000000
--- a/obitools/tree/svg.py
+++ /dev/null
@@ -1,70 +0,0 @@
-import math
-
-from obitools.svg import Scene,Circle,Line,Rectangle,Text
-from obitools.tree import Tree
-
-def displayTreeLayout(layout,width=400,height=400,radius=3,scale=1.0):
- '''
- Convert a tree layout object in an svg file.
-
- @param layout: the tree layout object
- @type layout: obitools.tree.layout.TreeLayout
- @param width: svg document width
- @type width: int
- @param height: svg document height
- @type height: int
- @param radius: default radius of node in svg unit (default 3)
- @type radius: int
- @param scale: scale factor applied to the svg coordinates (default 1.0)
- @type scale: float
-
- @return: str containing svg code
- '''
- xmin = min(layout.getAttribute(n,'x') for n in layout)
- xmax = max(layout.getAttribute(n,'x') for n in layout)
- ymin = min(layout.getAttribute(n,'y') for n in layout)
- ymax = max(layout.getAttribute(n,'y') for n in layout)
-
- dx = xmax - xmin
- dy = ymax - ymin
-
- xscale = width * 0.95 / dx * scale
- yscale = height * 0.95 / dy * scale
-
- def X(x):
- return (x - xmin ) * xscale + width * 0.025
-
- def Y(y):
- return (y - ymin ) * yscale + height * 0.025
-
- scene = Scene('unrooted', height, width)
-
- for n in layout:
- if n._parent is not None:
- parent = n._parent
- xf = layout.getAttribute(n,'x')
- yf = layout.getAttribute(n,'y')
- xp = layout.getAttribute(parent,'x')
- yp = layout.getAttribute(parent,'y')
- scene.add(Line((X(xf),Y(yf)),(X(xp),Y(yp))))
-
- for n in layout:
- xf = layout.getAttribute(n,'x')
- yf = layout.getAttribute(n,'y')
- cf = layout.getAttribute(n,'color')
- sf = layout.getAttribute(n,'shape')
- if layout.hasAttribute(n,'radius'):
- rf=layout.getAttribute(n,'radius')
- else:
- rf=radius
-
- if sf=='circle':
- scene.add(Circle((X(xf),Y(yf)),rf,cf))
- else:
- scene.add(Rectangle((X(xf)-rf,Y(yf)-rf),2*rf,2*rf,cf))
-
-
- return ''.join(scene.strarray())
-
-
-
\ No newline at end of file
diff --git a/obitools/tree/unrooted.py b/obitools/tree/unrooted.py
deleted file mode 100644
index 9a9f3e6..0000000
--- a/obitools/tree/unrooted.py
+++ /dev/null
@@ -1,33 +0,0 @@
-from obitools.tree.layout import TreeLayout
-import math
-
-def subtreeLayout(tree,node,layout,start,end,x,y,default):
- nbotu = tree.subTreeSize(node)
- delta = (end-start)/(nbotu+1)
-
- layout.addNode(node)
- layout.setAttribute(node,'x',x)
- layout.setAttribute(node,'y',y)
- layout.setAttribute(node,'color',(255,0,0))
- layout.setAttribute(node,'shape','circle')
-
- for subnode in tree.childNodeIterator(node):
- snbotu = tree.subTreeSize(subnode)
- end = start + snbotu * delta
- med = start + snbotu * delta /2
- r = subnode._dist
- if r is None or r <=0:
- r=default
- subx=math.cos(med) * r + x
- suby=math.sin(med) * r + y
- subtreeLayout(tree, subnode, layout, start, end, subx, suby, default)
- start=end
-
- return layout
-
-def treeLayout(tree):
- layout = TreeLayout()
- root = tree.getRoot()
- dmin = min(n._dist for n in tree if n._dist is not None and n._dist > 0)
- return subtreeLayout(tree,root,layout,0,2*math.pi,0,0,dmin / 100)
-
\ No newline at end of file
diff --git a/obitools/unit/__init__.py b/obitools/unit/__init__.py
deleted file mode 100644
index d02c812..0000000
--- a/obitools/unit/__init__.py
+++ /dev/null
@@ -1,8 +0,0 @@
-import unittest
-
-from obitools import tests_group as obitools_tests_group
-
-tests_group=obitools_tests_group
-
-
-
diff --git a/obitools/unit/obitools/__init__.py b/obitools/unit/obitools/__init__.py
deleted file mode 100644
index ab1bcec..0000000
--- a/obitools/unit/obitools/__init__.py
+++ /dev/null
@@ -1,89 +0,0 @@
-import unittest
-
-import obitools
-
-class BioseqTest(unittest.TestCase):
-
- sequenceId = 'id1'
- sequenceDefinition = 'sequence definition'
- sequenceQualifier = {'extra':3}
-
- def setUp(self):
- self.bioseq = self.bioseqClass(self.sequenceId,
- self.sequenceString,
- self.sequenceDefinition,
- **self.sequenceQualifier)
-
- title = self.__doc__.strip()
- underline = "=" * len(title)
-
- #print "%s\n%s" % (title,underline)
-
- def tearDown(self):
- pass
- #print "\n"
-
- def testIdAttribute(self):
- '''
- test if id attribute exists
- '''
- self.failUnless(hasattr(self.bioseq, 'id'), 'id missing attribute')
-
- def testIdValue(self):
- '''
- test if id attribute value is 'id1'
- '''
- self.failUnlessEqual(self.bioseq.id, 'id1',
- 'identifier is created with good value')
-
- def testDefinitionAttribute(self):
- '''
- test if definition attribute exists
- '''
- self.failUnless(hasattr(self.bioseq, 'definition'), 'definition missing attribute')
-
- def testSequenceIsLowerCase(self):
- '''
- test if sequence is stored as lower case letter
- '''
- self.failUnlessEqual(str(self.bioseq),
- str(self.bioseq).lower(),
- "Sequence is not stored as lower case string")
-
- def testSequenceQualifier(self):
- '''
- test if the extra qualifier is present and its value is three.
- '''
- self.failUnlessEqual(self.bioseq['extra'],
- 3,
- "Sequence qualifier cannot be successfully retrieve")
-
- def testCreateSequenceQualifier(self):
- self.bioseq['testqualifier']='ok'
- self.failUnlessEqual(self.bioseq['testqualifier'],
- 'ok',
- "Sequence qualifier cannot be successfully created")
-
-
-
-class NucBioseqTest(BioseqTest):
- '''
- Test obitools.NucSequence class
- '''
-
- bioseqClass = obitools.NucSequence
- sequenceString = 'AACGT' * 5
-
-
-class AABioseqTest(BioseqTest):
- '''
- Test obitools.AASequence class
- '''
-
- bioseqClass = obitools.AASequence
- sequenceString = 'MLKCVT' * 5
-
-
-
-
-tests_group = [NucBioseqTest,AABioseqTest]
\ No newline at end of file
diff --git a/obitools/utils/__init__.py b/obitools/utils/__init__.py
deleted file mode 100644
index fd7076f..0000000
--- a/obitools/utils/__init__.py
+++ /dev/null
@@ -1,324 +0,0 @@
-import sys
-
-import time
-import re
-import shelve
-
-from threading import Lock
-from logging import warning
-import urllib2
-
-from obitools.gzip import GzipFile
-from obitools.zipfile import ZipFile
-import os.path
-
-
-class FileFormatError(Exception):
- pass
-
-
-
-def universalOpen(file,*options):
- '''
- Open a file gziped or not.
-
- If file is a C{str} instance, file is
- concidered as a file name. In this case
- the C{.gz} suffixe is tested to eventually
- open it a a gziped file.
-
- If file is an other kind of object, it is assumed
- that this object follow the C{file} interface
- and it is return as is.
-
- @param file: the file to open
- @type file: C{str} or a file like object
-
- @return: an iterator on text lines.
- '''
- if isinstance(file,str):
- if urllib2.urlparse.urlparse(file)[0]=='':
- rep = open(file,*options)
- else:
- rep = urllib2.urlopen(file,timeout=15)
-
- if file[-3:] == '.gz':
- rep = GzipFile(fileobj=rep)
- if file[-4:] == '.zip':
- zip = ZipFile(file=rep)
- data = zip.infolist()
- assert len(data)==1,'Only zipped file containning a single file can be open'
- name = data[0].filename
- rep = zip.open(name)
- else:
- rep = file
- return rep
-
-def universalTell(file):
- '''
- Return the position in the file even if
- it is a gziped one.
-
- @param file: the file to check
- @type file: a C{file} like instance
-
- @return: position in the file
- @rtype: C{int}
- '''
- if isinstance(file, GzipFile):
- file=file.myfileobj
- return file.tell()
-
-def fileSize(file):
- '''
- Return the file size even if it is a
- gziped one.
-
- @param file: the file to check
- @type file: a C{file} like instance
-
- @return: the size of the file
- @rtype: C{int}
- '''
- if isinstance(file, GzipFile):
- file=file.myfileobj
- pos = file.tell()
- file.seek(0,2)
- length = file.tell()
- file.seek(pos,0)
- return length
-
-def progressBar(pos,maxi,reset=False,head='',delta=[],step=[1,0,0]):
- if reset:
- del delta[:]
- if not delta:
- delta.append(time.time())
- delta.append(time.time())
- assert maxi>0
-
- step[1]+=1
- if step[1] % step[0] == 0:
- step[1]=1
- newtime = time.time()
- d = newtime-delta[1]
- if d < 0.2:
- step[0]*=2
- elif d > 0.4 and step[0]>1:
- step[0]/=2
-
- delta[1]=newtime
- elapsed = delta[1]-delta[0]
-
- if callable(pos):
- pos=pos()
- percent = float(pos)/maxi * 100
- remain = time.gmtime(elapsed / percent * (100-percent))
- days = remain.tm_yday - 1
- hour = remain.tm_hour
- minu = remain.tm_min
- sec = remain.tm_sec
- if days:
- remain = "%d days %02d:%02d:%02d" % (days,hour,minu,sec)
- else:
- remain = "%02d:%02d:%02d" % (hour,minu,sec)
- bar = '#' * int(percent/2)
- step[2]=(step[2]+1) % 4
- bar+= '|/-\\'[step[2]]
- bar+= ' ' * (50 - int(percent/2))
- sys.stderr.write('\r%s %5.1f %% |%s] remain : %s' %(head,percent,bar,remain))
- else:
- step[1]+=1
-
-def endLessIterator(endedlist):
- for x in endedlist:
- yield x
- while(1):
- yield endedlist[-1]
-
-
-def multiLineWrapper(lineiterator):
- '''
- Aggregator of strings.
-
- @param lineiterator: a stream of strings from an opened OBO file.
- @type lineiterator: a stream of strings.
-
- @return: an aggregated stanza.
- @rtype: an iterotor on str
-
- @note: The aggregator aggregates strings from an opened OBO file.
- When the length of a string is < 2, the current stanza is over.
- '''
-
- for line in lineiterator:
- rep = [line]
- while len(line)>=2 and line[-2]=='\\':
- rep[-1]=rep[-1][0:-2]
- try:
- line = lineiterator.next()
- except StopIteration:
- raise FileFormatError
- rep.append(line)
- yield ''.join(rep)
-
-
-def skipWhiteLineIterator(lineiterator):
- '''
- Curator of stanza.
-
- @param lineiterator: a stream of strings from an opened OBO file.
- @type lineiterator: a stream of strings.
-
- @return: a stream of strings without blank strings.
- @rtype: a stream strings
-
- @note: The curator skip white lines of the current stanza.
- '''
-
- for line in lineiterator:
- cleanline = line.strip()
- if cleanline:
- yield line
- else:
- print 'skipped'
-
-
-class ColumnFile(object):
-
- def __init__(self,stream,sep=None,strip=True,
- types=None,skip=None,head=None,
- extra=None,
- extraformat='([a-zA-Z]\w*) *= *([^;]+);'):
- self._stream = universalOpen(stream)
- self._delimiter=sep
- self._strip=strip
- self._extra=extra
- self._extraformat = re.compile(extraformat)
-
- if types:
- self._types=[x for x in types]
- for i in xrange(len(self._types)):
- if self._types[i] is bool:
- self._types[i]=ColumnFile.str2bool
- else:
- self._types=None
-
- self._skip = skip
- if skip is not None:
- self._lskip= len(skip)
- else:
- self._lskip= 0
- self._head=head
-
- def str2bool(x):
- return bool(eval(x.strip()[0].upper(),{'T':True,'V':True,'F':False}))
-
- str2bool = staticmethod(str2bool)
-
-
- def __iter__(self):
- return self
-
- def next(self):
-
- def cast(txt,type):
- try:
- v = type(txt)
- except:
- v=None
- return v
- ligne = self._stream.next()
- if self._skip is not None:
- while ligne[0:self._lskip]==self._skip:
- ligne = self._stream.next()
- if self._extra is not None:
- try:
- (ligne,extra) = ligne.rsplit(self._extra,1)
- extra = dict(self._extraformat.findall(extra))
- except ValueError:
- extra=None
- else:
- extra = None
- data = ligne.split(self._delimiter)
- if self._strip or self._types:
- data = [x.strip() for x in data]
- if self._types:
- it = endLessIterator(self._types)
- data = [cast(*x) for x in ((y,it.next()) for y in data)]
- if self._head is not None:
- data=dict(map(None, self._head,data))
- if extra is not None:
- data['__extra__']=extra
- else:
- if extra is not None:
- data.append(extra)
- return data
-
- def tell(self):
- return universalTell(self._stream)
-
-
-class CachedDB(object):
-
- def __init__(self,cachefile,masterdb):
- self._cache = shelve.open(cachefile,'c')
- self._db = masterdb
- self._lock=Lock()
-
- def _cacheSeq(self,seq):
- self._lock.acquire()
- self._cache[seq.id]=seq
- self._lock.release()
- return seq
-
- def __getitem__(self,ac):
- if isinstance(ac,str):
- self._lock.acquire()
- if ac in self._cache:
-# print >>sys.stderr,"Use cache for %s" % ac
- data = self._cache[ac]
- self._lock.release()
-
- else:
- self._lock.release()
- data = self._db[ac]
- self._cacheSeq(data)
- return data
- else:
- self._lock.acquire()
- acs = [[x,self._cache.get(x,None)] for x in ac]
- self._lock.release()
- newacs = [ac for ac,cached in acs if cached is None]
- if newacs:
- newseqs = self._db[newacs]
- else:
- newseqs = iter([])
- for r in acs:
- if r[1] is None:
- r[1]=self._cacheSeq(newseqs.next())
-# else:
-# print >>sys.stderr,"Use cache for %s" % r[0]
- return (x[1] for x in acs)
-
-
-def moduleInDevelopment(name):
- Warning('This module %s is under development : use it with caution' % name)
-
-
-def deprecatedScript(newscript):
- current = sys.argv[0]
- print >>sys.stderr," "
- print >>sys.stderr," "
- print >>sys.stderr," "
- print >>sys.stderr,"#########################################################"
- print >>sys.stderr,"# #"
- print >>sys.stderr," W A R N I N G :"
- print >>sys.stderr," %s is a deprecated script " % os.path.split(current)[1]
- print >>sys.stderr," it will disappear in the next obitools version"
- print >>sys.stderr," "
- print >>sys.stderr," The new corresponding command is %s " % newscript
- print >>sys.stderr,"# #"
- print >>sys.stderr,"#########################################################"
- print >>sys.stderr," "
- print >>sys.stderr," "
- print >>sys.stderr," "
diff --git a/obitools/utils/__init__.pyc b/obitools/utils/__init__.pyc
deleted file mode 100644
index 99512dc..0000000
Binary files a/obitools/utils/__init__.pyc and /dev/null differ
diff --git a/obitools/utils/bioseq.py b/obitools/utils/bioseq.py
deleted file mode 100644
index 71337c7..0000000
--- a/obitools/utils/bioseq.py
+++ /dev/null
@@ -1,232 +0,0 @@
-def mergeTaxonomyClassification(uniqSeq,taxonomy):
- for seq in uniqSeq:
- if seq['merged_taxid']:
- seq['taxid']=taxonomy.lastCommonTaxon(*seq['merged_taxid'].keys())
- tsp = taxonomy.getSpecies(seq['taxid'])
- tgn = taxonomy.getGenus(seq['taxid'])
- tfa = taxonomy.getFamily(seq['taxid'])
-
- if tsp is not None:
- sp_sn = taxonomy.getScientificName(tsp)
- else:
- sp_sn="###"
- tsp=-1
-
- if tgn is not None:
- gn_sn = taxonomy.getScientificName(tgn)
- else:
- gn_sn="###"
- tgn=-1
-
- if tfa is not None:
- fa_sn = taxonomy.getScientificName(tfa)
- else:
- fa_sn="###"
- tfa=-1
-
- seq['species']=tsp
- seq['genus']=tgn
- seq['family']=tfa
-
- seq['species_sn']=sp_sn
- seq['genus_sn']=gn_sn
- seq['family_sn']=fa_sn
-
- seq['rank']=taxonomy.getRank(seq['taxid'])
- seq['scientific_name']=fa_sn = taxonomy.getScientificName(seq['taxid'])
-
-def uniqSequence(seqIterator,taxonomy=None,mergedKey=None,mergeIds=False,categories=None):
- uniques={}
- uniqSeq=[]
-
- if categories is None:
- categories=[]
-
- if mergedKey is not None:
- mergedKey=set(mergedKey)
- else:
- mergedKey=set()
-
- if taxonomy is not None:
- mergedKey.add('taxid')
-
- for seq in seqIterator:
- s = tuple(seq[x] for x in categories) + (str(seq),)
- if s in uniques:
- s = uniques[s]
- if 'count' in seq:
- s['count']+=seq['count']
- else:
- s['count']+=1
-# if taxonomy is not None and 'taxid' in seq:
-# s['merged_taxid'][seq['taxid']]=
- for key in mergedKey:
- if key=='taxid' and mergeIds:
- if 'taxid_dist' in seq:
- s["taxid_dist"].update(seq["taxid_dist"])
- if 'taxid' in seq:
- s["taxid_dist"][seq.id]=seq['taxid']
-
- mkey = "merged_%s" % key
- if key in seq:
- s[mkey][seq[key]]=s[mkey].get(seq[key],0)+1
- if mkey in seq:
- for skey in seq[mkey]:
- if skey in s:
- s[mkey][skey]=s[mkey].get(seq[skey],0)+seq[mkey][skey]
- else:
- s[mkey][skey]=seq[mkey][skey]
-
- for key in seq.iterkeys():
- # Merger proprement l'attribut merged s'il exist
- if key in s and s[key]!=seq[key] and key!='count' and key[0:7]!='merged_' and key!='merged':
- del(s[key])
-
-
- if mergeIds:
- s['merged'].append(seq.id)
- else:
- uniques[s]=seq
- for key in mergedKey:
- if key=='taxid' and mergeIds:
- if 'taxid_dist' not in seq:
- seq["taxid_dist"]={}
- if 'taxid' in seq:
- seq["taxid_dist"][seq.id]=seq['taxid']
- mkey = "merged_%s" % key
- if mkey not in seq:
- seq[mkey]={}
- if key in seq:
- seq[mkey][seq[key]]=seq[mkey].get(seq[key],0)+1
- del(seq[key])
-
- if 'count' not in seq:
- seq['count']=1
- if mergeIds:
- seq['merged']=[seq.id]
- uniqSeq.append(seq)
-
- if taxonomy is not None:
- mergeTaxonomyClassification(uniqSeq, taxonomy)
-
-
-
- return uniqSeq
-
-def uniqPrefixSequence(seqIterator,taxonomy=None,mergedKey=None,mergeIds=False,categories=None):
-
- if categories is None:
- categories=[]
-
- def cmpseq(s1,s2):
- return cmp(str(s1),str(s2))
-
- if mergedKey is not None:
- mergedKey=set(mergedKey)
- else:
- mergedKey=set()
-
- if taxonomy is not None:
- mergedKey.add('taxid')
-
- sequences=list(seqIterator)
-
- if not sequences:
- return []
-
- sequences.sort(cmpseq)
-
-
- old=sequences.pop()
- uniqSeq=[old]
- if 'count' not in old:
- old['count']=1
- for key in mergedKey:
- mkey = "merged_%s" % key
- if mkey not in old:
- old[mkey]={}
- if key in old:
- old[mkey][old[key]]=old[mkey].get(old[key],0)+1
- if mergeIds:
- old['merged']=[old.id]
-
-
- while(sequences):
- seq=sequences.pop()
- lseq=len(seq)
- pold = str(old)[0:lseq]
- if pold==str(seq):
-
- if 'count' in seq:
- old['count']+=seq['count']
- else:
- old['count']+=1
-
- for key in mergedKey:
- mkey = "merged_%s" % key
- if key in seq:
- old[mkey][seq[key]]=old[mkey].get(seq[key],0)+1
- if mkey in seq:
- for skey in seq[mkey]:
- if skey in old:
- old[mkey][skey]=old[mkey].get(seq[skey],0)+seq[mkey][skey]
- else:
- old[mkey][skey]=seq[mkey][skey]
-
- for key in seq.iterkeys():
- if key in old and old[key]!=seq[key]:
- del(old[key])
-
-
- if mergeIds:
- old['merged'].append(seq.id)
- else:
- old=seq
-
- for key in mergedKey:
- mkey = "merged_%s" % key
- if mkey not in seq:
- seq[mkey]={}
- if key in seq:
- seq[mkey][seq[key]]=seq[mkey].get(seq[key],0)+1
- del(seq[key])
-
- if 'count' not in seq:
- seq['count']=1
- if mergeIds:
- seq['merged']=[seq.id]
- uniqSeq.append(seq)
-
- if taxonomy is not None:
- mergeTaxonomyClassification(uniqSeq, taxonomy)
-
- return uniqSeq
-
-
-
-
-def _cmpOnKeyGenerator(key,reverse=False):
- def compare(x,y):
- try:
- c1 = x[key]
- except KeyError:
- c1=None
-
- try:
- c2 = y[key]
- except KeyError:
- c2=None
-
- if reverse:
- s=c1
- c1=c2
- c2=s
- return cmp(c1,c2)
-
- return compare
-
-def sortSequence(seqIterator,key,reverse=False):
- seqs = list(seqIterator)
- seqs.sort(_cmpOnKeyGenerator(key, reverse))
- return seqs
-
\ No newline at end of file
diff --git a/obitools/utils/crc64.py b/obitools/utils/crc64.py
deleted file mode 100644
index 537391e..0000000
--- a/obitools/utils/crc64.py
+++ /dev/null
@@ -1,53 +0,0 @@
-#
-# Code obtained from :
-# http://aspn.activestate.com/ASPN/Cookbook/Python/Recipe/259177/index_txt
-#
-
-# Initialisation
-# 32 first bits of generator polynomial for CRC64
-# the 32 lower bits are assumed to be zero
-
-POLY64REVh = 0xd8000000L
-CRCTableh = [0] * 256
-CRCTablel = [0] * 256
-isInitialized = False
-
-def CRC64(aString):
- global isInitialized
- crcl = 0
- crch = 0
- if (isInitialized is not True):
- isInitialized = True
- for i in xrange(256):
- partl = i
- parth = 0L
- for j in xrange(8):
- rflag = partl & 1L
- partl >>= 1L
- if (parth & 1):
- partl |= (1L << 31L)
- parth >>= 1L
- if rflag:
- parth ^= POLY64REVh
- CRCTableh[i] = parth;
- CRCTablel[i] = partl;
-
- for item in aString:
- shr = 0L
- shr = (crch & 0xFF) << 24
- temp1h = crch >> 8L
- temp1l = (crcl >> 8L) | shr
- tableindex = (crcl ^ ord(item)) & 0xFF
-
- crch = temp1h ^ CRCTableh[tableindex]
- crcl = temp1l ^ CRCTablel[tableindex]
- return (crch, crcl)
-
-def CRC64digest(aString):
- return "%08X%08X" % (CRC64(aString))
-
-if __name__ == '__main__':
- assert CRC64("IHATEMATH") == (3822890454, 2600578513)
- assert CRC64digest("IHATEMATH") == "E3DCADD69B01ADD1"
- print 'CRC64: dumb test successful'
-
diff --git a/obitools/utils/iterator.py b/obitools/utils/iterator.py
deleted file mode 100644
index f53537f..0000000
--- a/obitools/utils/iterator.py
+++ /dev/null
@@ -1,8 +0,0 @@
-from itertools import chain
-
-def uniqueChain(*args):
- see = set()
- for x in chain(*args):
- if x not in see:
- see.add(x)
- yield x
\ No newline at end of file
diff --git a/obitools/utils/iterator.pyc b/obitools/utils/iterator.pyc
deleted file mode 100644
index 88d415e..0000000
Binary files a/obitools/utils/iterator.pyc and /dev/null differ
diff --git a/obitools/word/__init__.py b/obitools/word/__init__.py
deleted file mode 100644
index c1a4b6b..0000000
--- a/obitools/word/__init__.py
+++ /dev/null
@@ -1,72 +0,0 @@
-from itertools import imap
-from _binary import *
-
-def wordCount(liste):
- count = {}
-
- for e in liste:
- count[e]=count.get(e,0) + 1
-
- return count
-
-
-def wordIterator(sequence,lword,step=1,endIncluded=False,circular=False):
-
- assert not (endIncluded and circular), \
- "endIncluded and circular cannot not be set to True at the same time"
-
- L = len(sequence)
- sequence = str(sequence)
- if circular:
- sequence += sequence[0:lword]
- pmax=L
- elif endIncluded:
- pmax=L
- else:
- pmax = L - lword + 1
-
- pos = xrange(0,pmax,step)
-
- for x in pos:
- yield encodeWord(sequence[x:x+lword])
-
-
-
-def wordSelector(words,accept=None,reject=None):
- '''
- Filter over a DNA word iterator.
-
- @param words: an iterable object other a list of DNA words
- @type words: an iterator
- @param accept: a list of predicate. Each predicate is a function
- accepting one str parametter and returning a boolean
- value.
- @type accept: list
- @param reject: a list of predicat. Each predicat is a function
- accepting one str parametter and returning a boolean
- value.
- @type reject: list
-
- @return: an iterator on DNA word (str)
- @rtype: iterator
- '''
- if accept is None:
- accept=[]
- if reject is None:
- reject=[]
- for w in words:
-# print [bool(p(w)) for p in accept]
- accepted = reduce(lambda x,y: bool(x) and bool(y),
- (p(w) for p in accept),
- True)
-# print [(p.__name__,bool(p(w))) for p in reject]
- rejected = reduce(lambda x,y:bool(x) or bool(y),
- (p(w) for p in reject),
- False)
-# print decodeWord(w,5),accepted,rejected,
- if accepted and not rejected:
-# print " conserved"
- yield w
-# else:
-# print
-
diff --git a/obitools/word/_binary.so b/obitools/word/_binary.so
deleted file mode 100755
index 1780762..0000000
Binary files a/obitools/word/_binary.so and /dev/null differ
diff --git a/obitools/word/options.py b/obitools/word/options.py
deleted file mode 100644
index ff44e57..0000000
--- a/obitools/word/options.py
+++ /dev/null
@@ -1,116 +0,0 @@
-from obitools.word import wordSelector
-from obitools.word import allDNAWordIterator,encodeWord
-from obitools.word import predicate
-
-
-
-
-def _acceptedOptionCallback(options,opt,value,parser):
- if not hasattr(parser.values, 'acceptedOligo'):
- parser.values.acceptedOligo=[]
- parser.values.acceptedOligo.append(predicate.predicateMatchPattern(value,))
-
-def _rejectedOptionCallback(options,opt,value,parser):
- if not hasattr(parser.values, 'rejectedOligo'):
- parser.values.rejectedOligo=[]
- parser.values.rejectedOligo.append(predicate.predicateMatchPattern(value))
-
-
-
-def addOligoOptions(optionManager):
-
- optionManager.add_option('-L','--oligo-list',
- action="store", dest="oligoList",
- metavar="",
- type="str",
- help="filename containing a list of oligonucleotide")
-
-
- optionManager.add_option('-s','--oligo-size',
- action="store", dest="oligoSize",
- metavar="<###>",
- type="int",
- help="Size of oligonucleotide to generate")
-
- optionManager.add_option('-f','--family-size',
- action="store", dest="familySize",
- metavar="<###>",
- type="int",
- help="Size of oligonucleotide family to generate")
-
- optionManager.add_option('-d','--distance',
- action="store", dest="oligoDist",
- metavar="<###>",
- type="int",
- default=1,
- help="minimal distance between two oligonucleotides")
-
- optionManager.add_option('-g','--gc-max',
- action="store", dest="gcMax",
- metavar="<###>",
- type="int",
- default=0,
- help="maximum count of G or C nucleotide acceptable in a word")
-
- optionManager.add_option('-a','--accepted',
- action="append",dest="acceptedPattern",
- metavar="",
- default=[],
- type="str",
- help="pattern of accepted oligonucleotide")
-
- optionManager.add_option('-r','--rejected',
- action="append",dest="rejectedPattern",
- metavar="",
- default=[],
- type="str",
- help="pattern of rejected oligonucleotide")
-
- optionManager.add_option('-p','--homopolymer',
- action="store", dest="homopolymere",
- metavar="<###>",
- type="int",
- default=0,
- help="reject oligo with homopolymer longer than.")
-
- optionManager.add_option('-P','--homopolymer-min',
- action="store", dest="homopolymere_min",
- metavar="<###>",
- type="int",
- default=0,
- help="accept only oligo with homopolymer longer or equal to.")
-
-def dnaWordIterator(options):
-
- assert options.oligoSize is not None or options.oligoList is not None,"option -s or --oligo-size must be specified"
- assert options.familySize is not None,"option -f or --family-size must be specified"
- assert options.oligoDist is not None,"option -d or --distance must be specified"
-
- if options.oligoList is not None:
- words = (encodeWord(x.strip().lower()) for x in open(options.oligoList))
- else:
- words = allDNAWordIterator(options.oligoSize)
- #seed = 'a' * options.oligoSize
- options.acceptedOligo=[]
- for p in options.acceptedPattern:
- assert len(p)==options.oligoSize,"Accept pattern with bad lenth : %s" % p
- options.acceptedOligo.append(predicate.predicateMatchPattern(p, options.oligoSize))
-
- options.rejectedOligo=[]
- for p in options.rejectedPattern:
- assert len(p)==options.oligoSize,"Reject pattern with bad lenth : %s" % p
- options.rejectedOligo.append(predicate.predicateMatchPattern(p, options.oligoSize))
-
-
- #options.acceptedOligo.append(predicat.distMinGenerator(seed, options.oligoDist))
-
- if options.homopolymere:
- options.rejectedOligo.append(predicate.predicateHomoPolymerLarger(options.homopolymere, options.oligoSize))
-
- if options.homopolymere_min:
- options.acceptedOligo.append(predicate.predicateHomoPolymerLarger(options.homopolymere_min-1, options.oligoSize))
-
- if options.gcMax:
- options.rejectedOligo.append(predicate.predicateGCUpperBond(options.gcMax, options.oligoSize))
-
- return wordSelector(words, options.acceptedOligo, options.rejectedOligo)
diff --git a/obitools/word/predicate.py b/obitools/word/predicate.py
deleted file mode 100644
index 082b80f..0000000
--- a/obitools/word/predicate.py
+++ /dev/null
@@ -1,41 +0,0 @@
-#@PydevCodeAnalysisIgnore
-'''
-Created on 14 oct. 2009
-
-@author: coissac
-'''
-
-from _binary import wordDist, \
- homoMax, \
- countCG, \
- matchPattern, \
- encodePattern
-
-def predicateWordDistMin(word,dmin,size):
- def predicate(w):
- return wordDist(word, w) >= dmin
- return predicate
-
-def predicateHomoPolymerLarger(count,size):
- def predicate(w):
- return homoMax(w, size) > count
- return predicate
-
-def predicateHomoPolymerSmaller(count,size):
- def predicate(w):
- return homoMax(w, size) < count
- return predicate
-
-def predicateGCUpperBond(count,size):
- def predicate(w):
- return countCG(w, size) > count
- return predicate
-
-def predicateMatchPattern(pattern,size):
- pattern=encodePattern(pattern)
- def predicate(w):
- return matchPattern(w, pattern)
- return predicate
-
-
-
diff --git a/obitools/zipfile.py b/obitools/zipfile.py
deleted file mode 100644
index 41e4bcb..0000000
--- a/obitools/zipfile.py
+++ /dev/null
@@ -1,1282 +0,0 @@
-"""
-Read and write ZIP files.
-"""
-import struct, os, time, sys, shutil
-import binascii, cStringIO
-
-try:
- import zlib # We may need its compression method
- crc32 = zlib.crc32
-except ImportError:
- zlib = None
- crc32 = binascii.crc32
-
-__all__ = ["BadZipfile", "error", "ZIP_STORED", "ZIP_DEFLATED", "is_zipfile",
- "ZipInfo", "ZipFile", "PyZipFile", "LargeZipFile" ]
-
-class BadZipfile(Exception):
- pass
-
-
-class LargeZipFile(Exception):
- """
- Raised when writing a zipfile, the zipfile requires ZIP64 extensions
- and those extensions are disabled.
- """
-
-error = BadZipfile # The exception raised by this module
-
-ZIP64_LIMIT= (1 << 31) - 1
-
-# constants for Zip file compression methods
-ZIP_STORED = 0
-ZIP_DEFLATED = 8
-# Other ZIP compression methods not supported
-
-# Here are some struct module formats for reading headers
-structEndArchive = "<4s4H2LH" # 9 items, end of archive, 22 bytes
-stringEndArchive = "PK\005\006" # magic number for end of archive record
-structCentralDir = "<4s4B4HLLL5HLL"# 19 items, central directory, 46 bytes
-stringCentralDir = "PK\001\002" # magic number for central directory
-structFileHeader = "<4s2B4HLLL2H" # 12 items, file header record, 30 bytes
-stringFileHeader = "PK\003\004" # magic number for file header
-structEndArchive64Locator = "<4sLQL" # 4 items, locate Zip64 header, 20 bytes
-stringEndArchive64Locator = "PK\x06\x07" # magic token for locator header
-structEndArchive64 = "<4sQHHLLQQQQ" # 10 items, end of archive (Zip64), 56 bytes
-stringEndArchive64 = "PK\x06\x06" # magic token for Zip64 header
-
-
-# indexes of entries in the central directory structure
-_CD_SIGNATURE = 0
-_CD_CREATE_VERSION = 1
-_CD_CREATE_SYSTEM = 2
-_CD_EXTRACT_VERSION = 3
-_CD_EXTRACT_SYSTEM = 4 # is this meaningful?
-_CD_FLAG_BITS = 5
-_CD_COMPRESS_TYPE = 6
-_CD_TIME = 7
-_CD_DATE = 8
-_CD_CRC = 9
-_CD_COMPRESSED_SIZE = 10
-_CD_UNCOMPRESSED_SIZE = 11
-_CD_FILENAME_LENGTH = 12
-_CD_EXTRA_FIELD_LENGTH = 13
-_CD_COMMENT_LENGTH = 14
-_CD_DISK_NUMBER_START = 15
-_CD_INTERNAL_FILE_ATTRIBUTES = 16
-_CD_EXTERNAL_FILE_ATTRIBUTES = 17
-_CD_LOCAL_HEADER_OFFSET = 18
-
-# indexes of entries in the local file header structure
-_FH_SIGNATURE = 0
-_FH_EXTRACT_VERSION = 1
-_FH_EXTRACT_SYSTEM = 2 # is this meaningful?
-_FH_GENERAL_PURPOSE_FLAG_BITS = 3
-_FH_COMPRESSION_METHOD = 4
-_FH_LAST_MOD_TIME = 5
-_FH_LAST_MOD_DATE = 6
-_FH_CRC = 7
-_FH_COMPRESSED_SIZE = 8
-_FH_UNCOMPRESSED_SIZE = 9
-_FH_FILENAME_LENGTH = 10
-_FH_EXTRA_FIELD_LENGTH = 11
-
-def is_zipfile(filename):
- """Quickly see if file is a ZIP file by checking the magic number."""
- try:
- fpin = open(filename, "rb")
- endrec = _EndRecData(fpin)
- fpin.close()
- if endrec:
- return True # file has correct magic number
- except IOError:
- pass
- return False
-
-def _EndRecData64(fpin, offset, endrec):
- """
- Read the ZIP64 end-of-archive records and use that to update endrec
- """
- locatorSize = struct.calcsize(structEndArchive64Locator)
- fpin.seek(offset - locatorSize, 2)
- data = fpin.read(locatorSize)
- sig, diskno, reloff, disks = struct.unpack(structEndArchive64Locator, data)
- if sig != stringEndArchive64Locator:
- return endrec
-
- if diskno != 0 or disks != 1:
- raise BadZipfile("zipfiles that span multiple disks are not supported")
-
- # Assume no 'zip64 extensible data'
- endArchiveSize = struct.calcsize(structEndArchive64)
- fpin.seek(offset - locatorSize - endArchiveSize, 2)
- data = fpin.read(endArchiveSize)
- sig, sz, create_version, read_version, disk_num, disk_dir, \
- dircount, dircount2, dirsize, diroffset = \
- struct.unpack(structEndArchive64, data)
- if sig != stringEndArchive64:
- return endrec
-
- # Update the original endrec using data from the ZIP64 record
- endrec[1] = disk_num
- endrec[2] = disk_dir
- endrec[3] = dircount
- endrec[4] = dircount2
- endrec[5] = dirsize
- endrec[6] = diroffset
- return endrec
-
-
-def _EndRecData(fpin):
- """Return data from the "End of Central Directory" record, or None.
-
- The data is a list of the nine items in the ZIP "End of central dir"
- record followed by a tenth item, the file seek offset of this record."""
- fpin.seek(-22, 2) # Assume no archive comment.
- filesize = fpin.tell() + 22 # Get file size
- data = fpin.read()
- if data[0:4] == stringEndArchive and data[-2:] == "\000\000":
- endrec = struct.unpack(structEndArchive, data)
- endrec = list(endrec)
- endrec.append("") # Append the archive comment
- endrec.append(filesize - 22) # Append the record start offset
- if endrec[-4] == 0xffffffff:
- return _EndRecData64(fpin, -22, endrec)
- return endrec
- # Search the last END_BLOCK bytes of the file for the record signature.
- # The comment is appended to the ZIP file and has a 16 bit length.
- # So the comment may be up to 64K long. We limit the search for the
- # signature to a few Kbytes at the end of the file for efficiency.
- # also, the signature must not appear in the comment.
- END_BLOCK = min(filesize, 1024 * 4)
- fpin.seek(filesize - END_BLOCK, 0)
- data = fpin.read()
- start = data.rfind(stringEndArchive)
- if start >= 0: # Correct signature string was found
- endrec = struct.unpack(structEndArchive, data[start:start+22])
- endrec = list(endrec)
- comment = data[start+22:]
- if endrec[7] == len(comment): # Comment length checks out
- # Append the archive comment and start offset
- endrec.append(comment)
- endrec.append(filesize - END_BLOCK + start)
- if endrec[-4] == 0xffffffff:
- return _EndRecData64(fpin, - END_BLOCK + start, endrec)
- return endrec
- return # Error, return None
-
-
-class ZipInfo (object):
- """Class with attributes describing each file in the ZIP archive."""
-
- __slots__ = (
- 'orig_filename',
- 'filename',
- 'date_time',
- 'compress_type',
- 'comment',
- 'extra',
- 'create_system',
- 'create_version',
- 'extract_version',
- 'reserved',
- 'flag_bits',
- 'volume',
- 'internal_attr',
- 'external_attr',
- 'header_offset',
- 'CRC',
- 'compress_size',
- 'file_size',
- '_raw_time',
- )
-
- def __init__(self, filename="NoName", date_time=(1980,1,1,0,0,0)):
- self.orig_filename = filename # Original file name in archive
-
- # Terminate the file name at the first null byte. Null bytes in file
- # names are used as tricks by viruses in archives.
- null_byte = filename.find(chr(0))
- if null_byte >= 0:
- filename = filename[0:null_byte]
- # This is used to ensure paths in generated ZIP files always use
- # forward slashes as the directory separator, as required by the
- # ZIP format specification.
- if os.sep != "/" and os.sep in filename:
- filename = filename.replace(os.sep, "/")
-
- self.filename = filename # Normalized file name
- self.date_time = date_time # year, month, day, hour, min, sec
- # Standard values:
- self.compress_type = ZIP_STORED # Type of compression for the file
- self.comment = "" # Comment for each file
- self.extra = "" # ZIP extra data
- if sys.platform == 'win32':
- self.create_system = 0 # System which created ZIP archive
- else:
- # Assume everything else is unix-y
- self.create_system = 3 # System which created ZIP archive
- self.create_version = 20 # Version which created ZIP archive
- self.extract_version = 20 # Version needed to extract archive
- self.reserved = 0 # Must be zero
- self.flag_bits = 0 # ZIP flag bits
- self.volume = 0 # Volume number of file header
- self.internal_attr = 0 # Internal attributes
- self.external_attr = 0 # External file attributes
- # Other attributes are set by class ZipFile:
- # header_offset Byte offset to the file header
- # CRC CRC-32 of the uncompressed file
- # compress_size Size of the compressed file
- # file_size Size of the uncompressed file
-
- def FileHeader(self):
- """Return the per-file header as a string."""
- dt = self.date_time
- dosdate = (dt[0] - 1980) << 9 | dt[1] << 5 | dt[2]
- dostime = dt[3] << 11 | dt[4] << 5 | (dt[5] // 2)
- if self.flag_bits & 0x08:
- # Set these to zero because we write them after the file data
- CRC = compress_size = file_size = 0
- else:
- CRC = self.CRC
- compress_size = self.compress_size
- file_size = self.file_size
-
- extra = self.extra
-
- if file_size > ZIP64_LIMIT or compress_size > ZIP64_LIMIT:
- # File is larger than what fits into a 4 byte integer,
- # fall back to the ZIP64 extension
- fmt = '= 24:
- counts = unpack('> 1) & 0x7FFFFFFF) ^ poly
- else:
- crc = ((crc >> 1) & 0x7FFFFFFF)
- table[i] = crc
- return table
- crctable = _GenerateCRCTable()
-
- def _crc32(self, ch, crc):
- """Compute the CRC32 primitive on one byte."""
- return ((crc >> 8) & 0xffffff) ^ self.crctable[(crc ^ ord(ch)) & 0xff]
-
- def __init__(self, pwd):
- self.key0 = 305419896
- self.key1 = 591751049
- self.key2 = 878082192
- for p in pwd:
- self._UpdateKeys(p)
-
- def _UpdateKeys(self, c):
- self.key0 = self._crc32(c, self.key0)
- self.key1 = (self.key1 + (self.key0 & 255)) & 4294967295
- self.key1 = (self.key1 * 134775813 + 1) & 4294967295
- self.key2 = self._crc32(chr((self.key1 >> 24) & 255), self.key2)
-
- def __call__(self, c):
- """Decrypt a single character."""
- c = ord(c)
- k = self.key2 | 2
- c = c ^ (((k * (k^1)) >> 8) & 255)
- c = chr(c)
- self._UpdateKeys(c)
- return c
-
-class ZipExtFile:
- """File-like object for reading an archive member.
- Is returned by ZipFile.open().
- """
-
- def __init__(self, fileobj, zipinfo, decrypt=None):
- self.fileobj = fileobj
- self.decrypter = decrypt
- self.bytes_read = 0L
- self.rawbuffer = ''
- self.readbuffer = ''
- self.linebuffer = ''
- self.eof = False
- self.univ_newlines = False
- self.nlSeps = ("\n", )
- self.lastdiscard = ''
-
- self.compress_type = zipinfo.compress_type
- self.compress_size = zipinfo.compress_size
-
- self.closed = False
- self.mode = "r"
- self.name = zipinfo.filename
-
- # read from compressed files in 64k blocks
- self.compreadsize = 64*1024
- if self.compress_type == ZIP_DEFLATED:
- self.dc = zlib.decompressobj(-15)
-
- def set_univ_newlines(self, univ_newlines):
- self.univ_newlines = univ_newlines
-
- # pick line separator char(s) based on universal newlines flag
- self.nlSeps = ("\n", )
- if self.univ_newlines:
- self.nlSeps = ("\r\n", "\r", "\n")
-
- def __iter__(self):
- return self
-
- def next(self):
- nextline = self.readline()
- if not nextline:
- raise StopIteration()
-
- return nextline
-
- def close(self):
- self.closed = True
-
- def _checkfornewline(self):
- nl, nllen = -1, -1
- if self.linebuffer:
- # ugly check for cases where half of an \r\n pair was
- # read on the last pass, and the \r was discarded. In this
- # case we just throw away the \n at the start of the buffer.
- if (self.lastdiscard, self.linebuffer[0]) == ('\r','\n'):
- self.linebuffer = self.linebuffer[1:]
-
- for sep in self.nlSeps:
- nl = self.linebuffer.find(sep)
- if nl >= 0:
- nllen = len(sep)
- return nl, nllen
-
- return nl, nllen
-
- def readline(self, size = -1):
- """Read a line with approx. size. If size is negative,
- read a whole line.
- """
- if size < 0:
- size = sys.maxint
- elif size == 0:
- return ''
-
- # check for a newline already in buffer
- nl, nllen = self._checkfornewline()
-
- if nl >= 0:
- # the next line was already in the buffer
- nl = min(nl, size)
- else:
- # no line break in buffer - try to read more
- size -= len(self.linebuffer)
- while nl < 0 and size > 0:
- buf = self.read(min(size, 100))
- if not buf:
- break
- self.linebuffer += buf
- size -= len(buf)
-
- # check for a newline in buffer
- nl, nllen = self._checkfornewline()
-
- # we either ran out of bytes in the file, or
- # met the specified size limit without finding a newline,
- # so return current buffer
- if nl < 0:
- s = self.linebuffer
- self.linebuffer = ''
- return s
-
- buf = self.linebuffer[:nl]
- self.lastdiscard = self.linebuffer[nl:nl + nllen]
- self.linebuffer = self.linebuffer[nl + nllen:]
-
- # line is always returned with \n as newline char (except possibly
- # for a final incomplete line in the file, which is handled above).
- return buf + "\n"
-
- def readlines(self, sizehint = -1):
- """Return a list with all (following) lines. The sizehint parameter
- is ignored in this implementation.
- """
- result = []
- while True:
- line = self.readline()
- if not line: break
- result.append(line)
- return result
-
- def read(self, size = None):
- # act like file() obj and return empty string if size is 0
- if size == 0:
- return ''
-
- # determine read size
- bytesToRead = self.compress_size - self.bytes_read
-
- # adjust read size for encrypted files since the first 12 bytes
- # are for the encryption/password information
- if self.decrypter is not None:
- bytesToRead -= 12
-
- if size is not None and size >= 0:
- if self.compress_type == ZIP_STORED:
- lr = len(self.readbuffer)
- bytesToRead = min(bytesToRead, size - lr)
- elif self.compress_type == ZIP_DEFLATED:
- if len(self.readbuffer) > size:
- # the user has requested fewer bytes than we've already
- # pulled through the decompressor; don't read any more
- bytesToRead = 0
- else:
- # user will use up the buffer, so read some more
- lr = len(self.rawbuffer)
- bytesToRead = min(bytesToRead, self.compreadsize - lr)
-
- # avoid reading past end of file contents
- if bytesToRead + self.bytes_read > self.compress_size:
- bytesToRead = self.compress_size - self.bytes_read
-
- # try to read from file (if necessary)
- if bytesToRead > 0:
- bytes = self.fileobj.read(bytesToRead)
- self.bytes_read += len(bytes)
- self.rawbuffer += bytes
-
- # handle contents of raw buffer
- if self.rawbuffer:
- newdata = self.rawbuffer
- self.rawbuffer = ''
-
- # decrypt new data if we were given an object to handle that
- if newdata and self.decrypter is not None:
- newdata = ''.join(map(self.decrypter, newdata))
-
- # decompress newly read data if necessary
- if newdata and self.compress_type == ZIP_DEFLATED:
- newdata = self.dc.decompress(newdata)
- self.rawbuffer = self.dc.unconsumed_tail
- if self.eof and len(self.rawbuffer) == 0:
- # we're out of raw bytes (both from the file and
- # the local buffer); flush just to make sure the
- # decompressor is done
- newdata += self.dc.flush()
- # prevent decompressor from being used again
- self.dc = None
-
- self.readbuffer += newdata
-
-
- # return what the user asked for
- if size is None or len(self.readbuffer) <= size:
- bytes = self.readbuffer
- self.readbuffer = ''
- else:
- bytes = self.readbuffer[:size]
- self.readbuffer = self.readbuffer[size:]
-
- return bytes
-
-
-class ZipFile:
- """ Class with methods to open, read, write, close, list zip files.
-
- z = ZipFile(file, mode="r", compression=ZIP_STORED, allowZip64=True)
-
- @var file: Either the path to the file, or a file-like object.
- If it is a path, the file will be opened and closed by ZipFile.
- @var mode: The mode can be either read "r", write "w" or append "a".
- @var compression: ZIP_STORED (no compression) or ZIP_DEFLATED (requires zlib).
- @var allowZip64: if True ZipFile will create files with ZIP64 extensions when
- needed, otherwise it will raise an exception when this would
- be necessary.
-
- """
-
- fp = None # Set here since __del__ checks it
-
- def __init__(self, file, mode="r", compression=ZIP_STORED, allowZip64=False):
- """Open the ZIP file with mode read "r", write "w" or append "a"."""
- if mode not in ("r", "w", "a"):
- raise RuntimeError('ZipFile() requires mode "r", "w", or "a"')
-
- if compression == ZIP_STORED:
- pass
- elif compression == ZIP_DEFLATED:
- if not zlib:
- raise RuntimeError,\
- "Compression requires the (missing) zlib module"
- else:
- raise RuntimeError, "That compression method is not supported"
-
- self._allowZip64 = allowZip64
- self._didModify = False
- self.debug = 0 # Level of printing: 0 through 3
- self.NameToInfo = {} # Find file info given name
- self.filelist = [] # List of ZipInfo instances for archive
- self.compression = compression # Method of compression
- self.mode = key = mode.replace('b', '')[0]
- self.pwd = None
-
- # Check if we were passed a file-like object
- if isinstance(file, basestring):
- self._filePassed = 0
- self.filename = file
- modeDict = {'r' : 'rb', 'w': 'wb', 'a' : 'r+b'}
- try:
- self.fp = open(file, modeDict[mode])
- except IOError:
- if mode == 'a':
- mode = key = 'w'
- self.fp = open(file, modeDict[mode])
- else:
- raise
- else:
- self._filePassed = 1
- self.fp = file
- self.filename = getattr(file, 'name', None)
-
- if key == 'r':
- self._GetContents()
- elif key == 'w':
- pass
- elif key == 'a':
- try: # See if file is a zip file
- self._RealGetContents()
- # seek to start of directory and overwrite
- self.fp.seek(self.start_dir, 0)
- except BadZipfile: # file is not a zip file, just append
- self.fp.seek(0, 2)
- else:
- if not self._filePassed:
- self.fp.close()
- self.fp = None
- raise RuntimeError, 'Mode must be "r", "w" or "a"'
-
- def _GetContents(self):
- """Read the directory, making sure we close the file if the format
- is bad."""
- try:
- self._RealGetContents()
- except BadZipfile:
- if not self._filePassed:
- self.fp.close()
- self.fp = None
- raise
-
- def _RealGetContents(self):
- """Read in the table of contents for the ZIP file."""
- fp = self.fp
- endrec = _EndRecData(fp)
- if not endrec:
- raise BadZipfile, "File is not a zip file"
- if self.debug > 1:
- print endrec
- size_cd = endrec[5] # bytes in central directory
- offset_cd = endrec[6] # offset of central directory
- self.comment = endrec[8] # archive comment
- # endrec[9] is the offset of the "End of Central Dir" record
- if endrec[9] > ZIP64_LIMIT:
- x = endrec[9] - size_cd - 56 - 20
- else:
- x = endrec[9] - size_cd
- # "concat" is zero, unless zip was concatenated to another file
- concat = x - offset_cd
- if self.debug > 2:
- print "given, inferred, offset", offset_cd, x, concat
- # self.start_dir: Position of start of central directory
- self.start_dir = offset_cd + concat
- fp.seek(self.start_dir, 0)
- data = fp.read(size_cd)
- fp = cStringIO.StringIO(data)
- total = 0
- while total < size_cd:
- centdir = fp.read(46)
- total = total + 46
- if centdir[0:4] != stringCentralDir:
- raise BadZipfile, "Bad magic number for central directory"
- centdir = struct.unpack(structCentralDir, centdir)
- if self.debug > 2:
- print centdir
- filename = fp.read(centdir[_CD_FILENAME_LENGTH])
- # Create ZipInfo instance to store file information
- x = ZipInfo(filename)
- x.extra = fp.read(centdir[_CD_EXTRA_FIELD_LENGTH])
- x.comment = fp.read(centdir[_CD_COMMENT_LENGTH])
- total = (total + centdir[_CD_FILENAME_LENGTH]
- + centdir[_CD_EXTRA_FIELD_LENGTH]
- + centdir[_CD_COMMENT_LENGTH])
- x.header_offset = centdir[_CD_LOCAL_HEADER_OFFSET]
- (x.create_version, x.create_system, x.extract_version, x.reserved,
- x.flag_bits, x.compress_type, t, d,
- x.CRC, x.compress_size, x.file_size) = centdir[1:12]
- x.volume, x.internal_attr, x.external_attr = centdir[15:18]
- # Convert date/time code to (year, month, day, hour, min, sec)
- x._raw_time = t
- x.date_time = ( (d>>9)+1980, (d>>5)&0xF, d&0x1F,
- t>>11, (t>>5)&0x3F, (t&0x1F) * 2 )
-
- x._decodeExtra()
- x.header_offset = x.header_offset + concat
- self.filelist.append(x)
- self.NameToInfo[x.filename] = x
- if self.debug > 2:
- print "total", total
-
-
- def namelist(self):
- """Return a list of file names in the archive."""
- l = []
- for data in self.filelist:
- l.append(data.filename)
- return l
-
- def infolist(self):
- """Return a list of class ZipInfo instances for files in the
- archive."""
- return self.filelist
-
- def printdir(self):
- """Print a table of contents for the zip file."""
- print "%-46s %19s %12s" % ("File Name", "Modified ", "Size")
- for zinfo in self.filelist:
- date = "%d-%02d-%02d %02d:%02d:%02d" % zinfo.date_time[:6]
- print "%-46s %s %12d" % (zinfo.filename, date, zinfo.file_size)
-
- def testzip(self):
- """Read all the files and check the CRC."""
- for zinfo in self.filelist:
- try:
- self.read(zinfo.filename) # Check CRC-32
- except BadZipfile:
- return zinfo.filename
-
-
- def getinfo(self, name):
- """Return the instance of ZipInfo given 'name'."""
- info = self.NameToInfo.get(name)
- if info is None:
- raise KeyError(
- 'There is no item named %r in the archive' % name)
-
- return info
-
- def setpassword(self, pwd):
- """Set default password for encrypted files."""
- self.pwd = pwd
-
- def read(self, name, pwd=None):
- """Return file bytes (as a string) for name."""
- return self.open(name, "r", pwd).read()
-
- def open(self, name, mode="r", pwd=None):
- """Return file-like object for 'name'."""
- if mode not in ("r", "U", "rU"):
- raise RuntimeError, 'open() requires mode "r", "U", or "rU"'
- if not self.fp:
- raise RuntimeError, \
- "Attempt to read ZIP archive that was already closed"
-
- # Only open a new file for instances where we were not
- # given a file object in the constructor
- if self._filePassed:
- zef_file = self.fp
- else:
- zef_file = open(self.filename, 'rb')
-
- # Get info object for name
- zinfo = self.getinfo(name)
-
- filepos = zef_file.tell()
-
- zef_file.seek(zinfo.header_offset, 0)
-
- # Skip the file header:
- fheader = zef_file.read(30)
- if fheader[0:4] != stringFileHeader:
- raise BadZipfile, "Bad magic number for file header"
-
- fheader = struct.unpack(structFileHeader, fheader)
- fname = zef_file.read(fheader[_FH_FILENAME_LENGTH])
- if fheader[_FH_EXTRA_FIELD_LENGTH]:
- zef_file.read(fheader[_FH_EXTRA_FIELD_LENGTH])
-
- if fname != zinfo.orig_filename:
- raise BadZipfile, \
- 'File name in directory "%s" and header "%s" differ.' % (
- zinfo.orig_filename, fname)
-
- # check for encrypted flag & handle password
- is_encrypted = zinfo.flag_bits & 0x1
- zd = None
- if is_encrypted:
- if not pwd:
- pwd = self.pwd
- if not pwd:
- raise RuntimeError, "File %s is encrypted, " \
- "password required for extraction" % name
-
- zd = _ZipDecrypter(pwd)
- # The first 12 bytes in the cypher stream is an encryption header
- # used to strengthen the algorithm. The first 11 bytes are
- # completely random, while the 12th contains the MSB of the CRC,
- # or the MSB of the file time depending on the header type
- # and is used to check the correctness of the password.
- bytes = zef_file.read(12)
- h = map(zd, bytes[0:12])
- if zinfo.flag_bits & 0x8:
- # compare against the file type from extended local headers
- check_byte = (zinfo._raw_time >> 8) & 0xff
- else:
- # compare against the CRC otherwise
- check_byte = (zinfo.CRC >> 24) & 0xff
- if ord(h[11]) != check_byte:
- raise RuntimeError("Bad password for file", name)
-
- # build and return a ZipExtFile
- if zd is None:
- zef = ZipExtFile(zef_file, zinfo)
- else:
- zef = ZipExtFile(zef_file, zinfo, zd)
-
- # set universal newlines on ZipExtFile if necessary
- if "U" in mode:
- zef.set_univ_newlines(True)
- return zef
-
- def extract(self, member, path=None, pwd=None):
- """Extract a member from the archive to the current working directory,
- using its full name. Its file information is extracted as accurately
- as possible. `member' may be a filename or a ZipInfo object. You can
- specify a different directory using `path'.
- """
- if not isinstance(member, ZipInfo):
- member = self.getinfo(member)
-
- if path is None:
- path = os.getcwd()
-
- return self._extract_member(member, path, pwd)
-
- def extractall(self, path=None, members=None, pwd=None):
- """Extract all members from the archive to the current working
- directory. `path' specifies a different directory to extract to.
- `members' is optional and must be a subset of the list returned
- by namelist().
- """
- if members is None:
- members = self.namelist()
-
- for zipinfo in members:
- self.extract(zipinfo, path, pwd)
-
- def _extract_member(self, member, targetpath, pwd):
- """Extract the ZipInfo object 'member' to a physical
- file on the path targetpath.
- """
- # build the destination pathname, replacing
- # forward slashes to platform specific separators.
- if targetpath[-1:] == "/":
- targetpath = targetpath[:-1]
-
- # don't include leading "/" from file name if present
- if os.path.isabs(member.filename):
- targetpath = os.path.join(targetpath, member.filename[1:])
- else:
- targetpath = os.path.join(targetpath, member.filename)
-
- targetpath = os.path.normpath(targetpath)
-
- # Create all upper directories if necessary.
- upperdirs = os.path.dirname(targetpath)
- if upperdirs and not os.path.exists(upperdirs):
- os.makedirs(upperdirs)
-
- source = self.open(member.filename, pwd=pwd)
- target = file(targetpath, "wb")
- shutil.copyfileobj(source, target)
- source.close()
- target.close()
-
- return targetpath
-
- def _writecheck(self, zinfo):
- """Check for errors before writing a file to the archive."""
- if zinfo.filename in self.NameToInfo:
- if self.debug: # Warning for duplicate names
- print "Duplicate name:", zinfo.filename
- if self.mode not in ("w", "a"):
- raise RuntimeError, 'write() requires mode "w" or "a"'
- if not self.fp:
- raise RuntimeError, \
- "Attempt to write ZIP archive that was already closed"
- if zinfo.compress_type == ZIP_DEFLATED and not zlib:
- raise RuntimeError, \
- "Compression requires the (missing) zlib module"
- if zinfo.compress_type not in (ZIP_STORED, ZIP_DEFLATED):
- raise RuntimeError, \
- "That compression method is not supported"
- if zinfo.file_size > ZIP64_LIMIT:
- if not self._allowZip64:
- raise LargeZipFile("Filesize would require ZIP64 extensions")
- if zinfo.header_offset > ZIP64_LIMIT:
- if not self._allowZip64:
- raise LargeZipFile("Zipfile size would require ZIP64 extensions")
-
- def write(self, filename, arcname=None, compress_type=None):
- """Put the bytes from filename into the archive under the name
- arcname."""
- if not self.fp:
- raise RuntimeError(
- "Attempt to write to ZIP archive that was already closed")
-
- st = os.stat(filename)
- mtime = time.localtime(st.st_mtime)
- date_time = mtime[0:6]
- # Create ZipInfo instance to store file information
- if arcname is None:
- arcname = filename
- arcname = os.path.normpath(os.path.splitdrive(arcname)[1])
- while arcname[0] in (os.sep, os.altsep):
- arcname = arcname[1:]
- zinfo = ZipInfo(arcname, date_time)
- zinfo.external_attr = (st[0] & 0xFFFF) << 16L # Unix attributes
- if compress_type is None:
- zinfo.compress_type = self.compression
- else:
- zinfo.compress_type = compress_type
-
- zinfo.file_size = st.st_size
- zinfo.flag_bits = 0x00
- zinfo.header_offset = self.fp.tell() # Start of header bytes
-
- self._writecheck(zinfo)
- self._didModify = True
- fp = open(filename, "rb")
- # Must overwrite CRC and sizes with correct data later
- zinfo.CRC = CRC = 0
- zinfo.compress_size = compress_size = 0
- zinfo.file_size = file_size = 0
- self.fp.write(zinfo.FileHeader())
- if zinfo.compress_type == ZIP_DEFLATED:
- cmpr = zlib.compressobj(zlib.Z_DEFAULT_COMPRESSION,
- zlib.DEFLATED, -15)
- else:
- cmpr = None
- while 1:
- buf = fp.read(1024 * 8)
- if not buf:
- break
- file_size = file_size + len(buf)
- CRC = crc32(buf, CRC) & 0xffffffff
- if cmpr:
- buf = cmpr.compress(buf)
- compress_size = compress_size + len(buf)
- self.fp.write(buf)
- fp.close()
- if cmpr:
- buf = cmpr.flush()
- compress_size = compress_size + len(buf)
- self.fp.write(buf)
- zinfo.compress_size = compress_size
- else:
- zinfo.compress_size = file_size
- zinfo.CRC = CRC
- zinfo.file_size = file_size
- # Seek backwards and write CRC and file sizes
- position = self.fp.tell() # Preserve current position in file
- self.fp.seek(zinfo.header_offset + 14, 0)
- self.fp.write(struct.pack(" ZIP64_LIMIT \
- or zinfo.compress_size > ZIP64_LIMIT:
- extra.append(zinfo.file_size)
- extra.append(zinfo.compress_size)
- file_size = 0xffffffff #-1
- compress_size = 0xffffffff #-1
- else:
- file_size = zinfo.file_size
- compress_size = zinfo.compress_size
-
- if zinfo.header_offset > ZIP64_LIMIT:
- extra.append(zinfo.header_offset)
- header_offset = 0xffffffffL # -1 32 bit
- else:
- header_offset = zinfo.header_offset
-
- extra_data = zinfo.extra
- if extra:
- # Append a ZIP64 field to the extra's
- extra_data = struct.pack(
- '>sys.stderr, (structCentralDir,
- stringCentralDir, create_version,
- zinfo.create_system, extract_version, zinfo.reserved,
- zinfo.flag_bits, zinfo.compress_type, dostime, dosdate,
- zinfo.CRC, compress_size, file_size,
- len(zinfo.filename), len(extra_data), len(zinfo.comment),
- 0, zinfo.internal_attr, zinfo.external_attr,
- header_offset)
- raise
- self.fp.write(centdir)
- self.fp.write(zinfo.filename)
- self.fp.write(extra_data)
- self.fp.write(zinfo.comment)
-
- pos2 = self.fp.tell()
- # Write end-of-zip-archive record
- if pos1 > ZIP64_LIMIT:
- # Need to write the ZIP64 end-of-archive records
- zip64endrec = struct.pack(
- structEndArchive64, stringEndArchive64,
- 44, 45, 45, 0, 0, count, count, pos2 - pos1, pos1)
- self.fp.write(zip64endrec)
-
- zip64locrec = struct.pack(
- structEndArchive64Locator,
- stringEndArchive64Locator, 0, pos2, 1)
- self.fp.write(zip64locrec)
-
- endrec = struct.pack(structEndArchive, stringEndArchive,
- 0, 0, count, count, pos2 - pos1, 0xffffffffL, 0)
- self.fp.write(endrec)
-
- else:
- endrec = struct.pack(structEndArchive, stringEndArchive,
- 0, 0, count, count, pos2 - pos1, pos1, 0)
- self.fp.write(endrec)
- self.fp.flush()
- if not self._filePassed:
- self.fp.close()
- self.fp = None
-
-
-class PyZipFile(ZipFile):
- """Class to create ZIP archives with Python library files and packages."""
-
- def writepy(self, pathname, basename = ""):
- """Add all files from "pathname" to the ZIP archive.
-
- If pathname is a package directory, search the directory and
- all package subdirectories recursively for all *.py and enter
- the modules into the archive. If pathname is a plain
- directory, listdir *.py and enter all modules. Else, pathname
- must be a Python *.py file and the module will be put into the
- archive. Added modules are always module.pyo or module.pyc.
- This method will compile the module.py into module.pyc if
- necessary.
- """
- dir, name = os.path.split(pathname)
- if os.path.isdir(pathname):
- initname = os.path.join(pathname, "__init__.py")
- if os.path.isfile(initname):
- # This is a package directory, add it
- if basename:
- basename = "%s/%s" % (basename, name)
- else:
- basename = name
- if self.debug:
- print "Adding package in", pathname, "as", basename
- fname, arcname = self._get_codename(initname[0:-3], basename)
- if self.debug:
- print "Adding", arcname
- self.write(fname, arcname)
- dirlist = os.listdir(pathname)
- dirlist.remove("__init__.py")
- # Add all *.py files and package subdirectories
- for filename in dirlist:
- path = os.path.join(pathname, filename)
- root, ext = os.path.splitext(filename)
- if os.path.isdir(path):
- if os.path.isfile(os.path.join(path, "__init__.py")):
- # This is a package directory, add it
- self.writepy(path, basename) # Recursive call
- elif ext == ".py":
- fname, arcname = self._get_codename(path[0:-3],
- basename)
- if self.debug:
- print "Adding", arcname
- self.write(fname, arcname)
- else:
- # This is NOT a package directory, add its files at top level
- if self.debug:
- print "Adding files from directory", pathname
- for filename in os.listdir(pathname):
- path = os.path.join(pathname, filename)
- root, ext = os.path.splitext(filename)
- if ext == ".py":
- fname, arcname = self._get_codename(path[0:-3],
- basename)
- if self.debug:
- print "Adding", arcname
- self.write(fname, arcname)
- else:
- if pathname[-3:] != ".py":
- raise RuntimeError, \
- 'Files added with writepy() must end with ".py"'
- fname, arcname = self._get_codename(pathname[0:-3], basename)
- if self.debug:
- print "Adding file", arcname
- self.write(fname, arcname)
-
- def _get_codename(self, pathname, basename):
- """Return (filename, archivename) for the path.
-
- Given a module name path, return the correct file path and
- archive name, compiling if necessary. For example, given
- /python/lib/string, return (/python/lib/string.pyc, string).
- """
- file_py = pathname + ".py"
- file_pyc = pathname + ".pyc"
- file_pyo = pathname + ".pyo"
- if os.path.isfile(file_pyo) and \
- os.stat(file_pyo).st_mtime >= os.stat(file_py).st_mtime:
- fname = file_pyo # Use .pyo file
- elif not os.path.isfile(file_pyc) or \
- os.stat(file_pyc).st_mtime < os.stat(file_py).st_mtime:
- import py_compile
- if self.debug:
- print "Compiling", file_py
- try:
- py_compile.compile(file_py, file_pyc, None, True)
- except py_compile.PyCompileError,err:
- print err.msg
- fname = file_pyc
- else:
- fname = file_pyc
- archivename = os.path.split(fname)[1]
- if basename:
- archivename = "%s/%s" % (basename, archivename)
- return (fname, archivename)
-
-
-def main(args = None):
- import textwrap
- USAGE=textwrap.dedent("""\
- Usage:
- zipfile.py -l zipfile.zip # Show listing of a zipfile
- zipfile.py -t zipfile.zip # Test if a zipfile is valid
- zipfile.py -e zipfile.zip target # Extract zipfile into target dir
- zipfile.py -c zipfile.zip src ... # Create zipfile from sources
- """)
- if args is None:
- args = sys.argv[1:]
-
- if not args or args[0] not in ('-l', '-c', '-e', '-t'):
- print USAGE
- sys.exit(1)
-
- if args[0] == '-l':
- if len(args) != 2:
- print USAGE
- sys.exit(1)
- zf = ZipFile(args[1], 'r')
- zf.printdir()
- zf.close()
-
- elif args[0] == '-t':
- if len(args) != 2:
- print USAGE
- sys.exit(1)
- zf = ZipFile(args[1], 'r')
- zf.testzip()
- print "Done testing"
-
- elif args[0] == '-e':
- if len(args) != 3:
- print USAGE
- sys.exit(1)
-
- zf = ZipFile(args[1], 'r')
- out = args[2]
- for path in zf.namelist():
- if path.startswith('./'):
- tgt = os.path.join(out, path[2:])
- else:
- tgt = os.path.join(out, path)
-
- tgtdir = os.path.dirname(tgt)
- if not os.path.exists(tgtdir):
- os.makedirs(tgtdir)
- fp = open(tgt, 'wb')
- fp.write(zf.read(path))
- fp.close()
- zf.close()
-
- elif args[0] == '-c':
- if len(args) < 3:
- print USAGE
- sys.exit(1)
-
- def addToZip(zf, path, zippath):
- if os.path.isfile(path):
- zf.write(path, zippath, ZIP_DEFLATED)
- elif os.path.isdir(path):
- for nm in os.listdir(path):
- addToZip(zf,
- os.path.join(path, nm), os.path.join(zippath, nm))
- # else: ignore
-
- zf = ZipFile(args[1], 'w', allowZip64=True)
- for src in args[2:]:
- addToZip(zf, src, os.path.basename(src))
-
- zf.close()
-
-if __name__ == "__main__":
- main()
diff --git a/obitools/zipfile.pyc b/obitools/zipfile.pyc
deleted file mode 100644
index 35dace0..0000000
Binary files a/obitools/zipfile.pyc and /dev/null differ