remove extra obitools directory from ecoPCR subversion archive

git-svn-id: https://www.grenoble.prabi.fr/svn/LECASofts/ecoPCR/trunk@418 60f365c0-8329-0410-b2a4-ec073aeeaa1d
2012-05-07 09:31:08 +00:00
parent 2a118eedda
commit e2fe83fcb7
155 changed files with 0 additions and 16897 deletions
--- a/obitools/SVGdraw.py
+++ b/obitools/SVGdraw.py
--- a/obitools/init.py
+++ b/obitools/init.py
@ -1,711 +0,0 @@
 '''
 **obitools** main module
 ------------------------
 .. codeauthor:: Eric Coissac <eric.coissac@metabarcoding.org>
 obitools module provides base class for sequence manipulation.
 All biological sequences must be subclass of :py:class:`obitools.BioSequence`.
 Some biological sequences are defined as transformation of other
 biological sequences. For example Reversed complemented sequences
 are a transformation of a :py:class:`obitools.NucSequence`. This particular
 type of sequences are subclasses of the :py:class:`obitools.WrappedBioSequence`.
 .. inheritance-diagram:: BioSequence NucSequence AASequence WrappedBioSequence SubSequence DNAComplementSequence
        :parts: 1
 '''
 from weakref import ref
 from obitools.utils.iterator import uniqueChain
 from itertools import chain
 import re
 _default_raw_parser = " %s *= *([^;]*);"
 try:
    from functools import partial
 except:
    #
    # Add for compatibility purpose with Python < 2.5
    #
    def partial(func, *args, **keywords):
        def newfunc(*fargs, **fkeywords):
            newkeywords = keywords.copy()
            newkeywords.update(fkeywords)
            return func(*(args + fargs), **newkeywords)
        newfunc.func = func
        newfunc.args = args
        newfunc.keywords = keywords
        return newfunc
 from obitools.sequenceencoder import DNAComplementEncoder
 from obitools.location import Location
 class WrapperSetIterator(object):
    def __init__(self,s):
        self._i = set.__iter__(s)
    def next(self):
        return self._i.next()()
    def __iter__(self):
        return self
 class WrapperSet(set):
    def __iter__(self):
        return WrapperSetIterator(self)
 class BioSequence(object):
    '''
    BioSequence class is the base class for biological
    sequence representation.
    It provides storage of :
        - the sequence itself, 
        - an identifier,
        - a definition an manage 
        - a set of complementary information on a key / value principle.
    .. warning:: 
            :py:class:`obitools.BioSequence` is an abstract class, this constructor
            can only be called by a subclass constructor.
    '''
    def __init__(self,id,seq,definition=None,rawinfo=None,rawparser=_default_raw_parser,**info):
        '''        
        :param id: sequence identifier
        :type id:  `str`
        :param seq: the sequence
        :type seq:  `str`
        :param definition: sequence definition (optional)
        :type definition: `str`
        :param rawinfo: a text containing a set of key=value; patterns
        :type definition: `str`
        :param rawparser: a text describing a regular patterns template 
                          used to parse rawinfo
        :type definition: `str`
        :param info: extra named parameters can be added to associate complementary
                     data to the sequence
        '''
        assert type(self)!=BioSequence,"obitools.BioSequence is an abstract class"
        self._seq=str(seq).lower()
        self._info = dict(info)
        if rawinfo is not None:
            self._rawinfo=' ' + rawinfo
        else:
            self._rawinfo=None
        self._rawparser=rawparser
        self.definition=definition
        self.id=id
        self._hasTaxid=None
    def get_seq(self):
        return self.__seq
    def set_seq(self, value):
        if not isinstance(value, str):
            value=str(value)
        self.__seq = value
        self.__len = len(value)
    def clone(self):
        seq = type(self)(self.id,
                         str(self),
                         definition=self.definition
                         )
        seq._info=dict(self.getTags())
        seq._rawinfo=self._rawinfo
        seq._rawparser=self._rawparser
        seq._hasTaxid=self._hasTaxid
        return seq
    def getDefinition(self):
        '''
        Sequence definition getter.
        :return: the sequence definition
        :rtype: str
        '''
        return self._definition
    def setDefinition(self, value):
        '''
        Sequence definition setter.
        :param value: the new sequence definition
        :type value: C{str}
        :return: C{None}
        '''
        self._definition = value
    def getId(self):
        '''
        Sequence identifier getter
        :return: the sequence identifier
        :rtype: C{str}
        '''
        return self._id
    def setId(self, value):
        '''
        Sequence identifier setter.
        :param value: the new sequence identifier
        :type value:  C{str}
        :return: C{None}
        '''
        self._id = value
    def getStr(self):
        '''
        Return the sequence as a string
        :return: the string representation of the sequence
        :rtype: str
        '''
        return self._seq
    def getSymbolAt(self,position):
        '''
        Return the symbole at C{position} in the sequence
        :param position: the desired position. Position start from 0
                         if position is < 0 then they are considered
                         to reference the end of the sequence.
        :type position: `int`
        :return: a one letter string
        :rtype: `str`
        '''
        return str(self)[position]
    def getSubSeq(self,location):
        '''
        return a subsequence as described by C{location}.
        The C{location} parametter can be a L{obitools.location.Location} instance,
        an interger or a python C{slice} instance. If C{location}
        is an iterger this method is equivalent to L{getSymbolAt}.
        :param location: the positions of the subsequence to return
        :type location: C{Location} or C{int} or C{slice}
        :return: the subsequence
        :rtype: a single character as a C{str} is C{location} is an integer,
                a L{obitools.SubSequence} instance otherwise.
        '''
        if isinstance(location,Location):
            return location.extractSequence(self)
        elif isinstance(location, int):
            return self.getSymbolAt(location)
        elif isinstance(location, slice):
            return SubSequence(self,location)
        raise TypeError,'key must be a Location, an integer or a slice'  
    def getKey(self,key):
        if key not in self._info:
            if self._rawinfo is None:
                if key=='count':
                    return 1
                else:
                    raise KeyError,key
            p = re.compile(self._rawparser % key)
            m = p.search(self._rawinfo)
            if m is not None:
                v=m.group(1)
                self._rawinfo=' ' + self._rawinfo[0:m.start(0)]+self._rawinfo[m.end(0):]
                try:
                    v = eval(v)
                except:
                    pass
                self._info[key]=v
            else:
                if key=='count':
                    v=1
                else:
                    raise KeyError,key
        else:
            v=self._info[key]
        return v
    def extractTaxon(self):
        '''
        Extract Taxonomy information from the sequence header.
        This method by default return None. It should be subclassed
        if necessary as in L{obitools.seqdb.AnnotatedSequence}.
        :return: None
        '''
        self._hasTaxid=self.hasKey('taxid')
        return None
    def __str__(self):
        return self.getStr()
    def __getitem__(self,key):
        if isinstance(key, str):
            if key=='taxid' and self._hasTaxid is None:
                self.extractTaxon()
            return self.getKey(key)
        else:
            return self.getSubSeq(key)
    def __setitem__(self,key,value):
        self._info[key]=value
        if key=='taxid':
            self._hasTaxid=value is not None
    def __delitem__(self,key):
        if isinstance(key, str):
            if key in self:
                del self._info[key]
            else:
                raise KeyError,key    
            if key=='taxid':
                self._hasTaxid=False
        else:
            raise TypeError,key
    def __iter__(self):
        '''
        Iterate through the sequence symbols
        '''
        return iter(str(self))
    def __len__(self):
        return self.__len
    def hasKey(self,key):
        rep = key in self._info
        if not rep and self._rawinfo is not None:
            p = re.compile(self._rawparser % key)
            m = p.search(self._rawinfo)
            if m is not None:
                v=m.group(1)
                self._rawinfo=' ' + self._rawinfo[0:m.start(0)]+self._rawinfo[m.end(0):]
                try:
                    v = eval(v)
                except:
                    pass
                self._info[key]=v
                rep=True
        return rep
    def __contains__(self,key):
        '''
        methods allowing to use the C{in} operator on a C{BioSequence}.
        The C{in} operator test if the C{key} value is defined for this
        sequence.
        :param key: the name of the checked value
        :type key: str
        :return: C{True} if the value is defined, {False} otherwise.
        :rtype: C{bool}
        '''
        if key=='taxid' and self._hasTaxid is None:
            self.extractTaxon()
        return self.hasKey(key)
    def rawiteritems(self):
        return self._info.iteritems()
    def iteritems(self):
        '''
        iterate other items dictionary storing the values
        associated to the sequence. It works similarly to
        the iteritems function of C{dict}.
        :return: an iterator over the items (key,value)
                 link to a sequence
        :rtype: iterator over tuple
        :see: L{items}
        '''
        if self._rawinfo is not None:
            p = re.compile(self._rawparser % "([a-zA-Z]\w*)")
            for k,v in p.findall(self._rawinfo):
                try:
                    self._info[k]=eval(v)
                except:
                    self._info[k]=v
            self._rawinfo=None
        return self._info.iteritems()
    def items(self):
        return [x for x in self.iteritems()]
    def iterkeys(self):
        return (k for k,v in self.iteritems())
    def keys(self):
        return [x for x in self.iterkeys()]
    def getTags(self):
        self.iteritems()
        return self._info
    def getRoot(self):
        return self
    def getWrappers(self):
        if not hasattr(self, '_wrappers'):
            self._wrappers=WrapperSet()
        return self._wrappers
    def register(self,wrapper):
        self.wrappers.add(ref(wrapper,self._unregister))
    def _unregister(self,ref):
        self.wrappers.remove(ref)
    wrappers = property(getWrappers,None,None,'')
    definition = property(getDefinition, setDefinition, None, "Sequence Definition")
    id = property(getId, setId, None, 'Sequence identifier')
    def _getTaxid(self):
        return self['taxid']
    def _setTaxid(self,taxid):
        self['taxid']=taxid
    taxid = property(_getTaxid,_setTaxid,None,'NCBI Taxonomy identifier')
    _seq = property(get_seq, set_seq, None, None)
 class NucSequence(BioSequence):
    """
    :py:class:`NucSequence` specialize the :py:class:`BioSequence` class for storing DNA
    sequences. 
    The constructor is identical to the :py:class:`BioSequence` constructor.
    """
    def complement(self):
        """
        :return: The reverse complemented sequence as an instance of :py:class:`DNAComplementSequence`
        :rtype: :py:class:`DNAComplementSequence`
        """
        return DNAComplementSequence(self)
    def isNucleotide(self):
        return True
 class AASequence(BioSequence):
    """
    :py:class:`AASequence` specialize the :py:class:`BioSequence` class for storing protein
    sequences. 
    The constructor is identical to the :py:class:`BioSequence` constructor.
    """
    def isNucleotide(self):
        return False
 class WrappedBioSequence(BioSequence):
    """
    .. warning:: 
            :py:class:`obitools.WrappedBioSequence` is an abstract class, this constructor
            can only be called by a subclass constructor.
    """
    def __init__(self,reference,id=None,definition=None,**info):
        assert type(self)!=WrappedBioSequence,"obitools.WrappedBioSequence is an abstract class"
        self._wrapped = reference
        reference.register(self)
        self._id=id
        self.definition=definition
        self._info=info
    def clone(self):
        seq = type(self)(self.wrapped,
                         id=self._id,
                         definition=self._definition
                         )
        seq._info=dict(self._info)
        return seq
    def getWrapped(self):
        return self._wrapped
    def getDefinition(self):
        d = self._definition or self.wrapped.definition
        return d
    def getId(self):
        d = self._id or self.wrapped.id
        return d
    def isNucleotide(self):
        return self.wrapped.isNucleotide()
    def iterkeys(self):
        return uniqueChain(self._info.iterkeys(),
                               self.wrapped.iterkeys())
    def rawiteritems(self):
        return chain(self._info.iteritems(),
                        (x for x in self.wrapped.rawiteritems()
                         if x[0] not in self._info))
    def iteritems(self):
        for x in self.iterkeys():
            yield (x,self[x])
    def getKey(self,key):
        if key in self._info:
            return self._info[key]
        else:
            return self.wrapped.getKey(key)
    def hasKey(self,key):
        return key in self._info or self.wrapped.hasKey(key)
    def getSymbolAt(self,position):
        return self.wrapped.getSymbolAt(self.posInWrapped(position))
    def posInWrapped(self,position,reference=None):
        if reference is None or reference is self.wrapped:
            return self._posInWrapped(position)
        else:
            return self.wrapped.posInWrapped(self._posInWrapped(position),reference)
    def getStr(self):
        return str(self.wrapped)
    def getRoot(self):
        return self.wrapped.getRoot()
    def complement(self):
        """
        The :py:meth:`complement` method of the :py:class:`WrappedBioSequence` class 
        raises an exception :py:exc:`AttributeError` if the method is called and the cut
        sequence does not corresponds to a nucleic acid sequence.
        """
        if self.wrapped.isNucleotide():
            return DNAComplementSequence(self)
        raise AttributeError
    def _posInWrapped(self,position):
        return position
    definition = property(getDefinition,BioSequence.setDefinition, None)
    id = property(getId,BioSequence.setId, None)
    wrapped = property(getWrapped, None, None, "A pointer to the wrapped sequence")
    def  _getWrappedRawInfo(self):
        return self.wrapped._rawinfo
    _rawinfo = property(_getWrappedRawInfo)
 class SubSequence(WrappedBioSequence):
    """
    """
    @staticmethod
    def _sign(x):
        if x == 0:
            return 0
        elif x < 0:
            return -1
        return 1
    def __init__(self,reference,
                 location=None,
                 start=None,stop=None,
                 id=None,definition=None,
                 **info):
        WrappedBioSequence.__init__(self,reference,id=None,definition=None,**info)
        if isinstance(location, slice):
            self._location = location
        else:
            step = 1
            if not isinstance(start, int):
                start = 0;
            if not isinstance(stop,int):
                stop = len(reference)
            self._location=slice(start,stop,step)
        self._indices=self._location.indices(len(self.wrapped))
        self._xrange=xrange(*self._indices)
        self._info['cut']='[%d,%d,%s]' % self._indices
        if hasattr(reference,'quality'):
            self.quality = reference.quality[self._location]
    def getId(self):
        d = self._id or ("%s_SUB" % self.wrapped.id)
        return d
    def clone(self):
        seq = WrappedBioSequence.clone(self)
        seq._location=self._location
        seq._indices=seq._location.indices(len(seq.wrapped))
        seq._xrange=xrange(*seq._indices)
        return seq
    def __len__(self):
        return len(self._xrange)
    def getStr(self):
        return ''.join([x for x in self])
    def __iter__(self):
        return (self.wrapped.getSymbolAt(x) for x in self._xrange)
    def _posInWrapped(self,position):
        return self._xrange[position]
    id = property(getId,BioSequence.setId, None)
 class DNAComplementSequence(WrappedBioSequence):
    """
    Class used to represent a reverse complemented DNA sequence. Usually instances
    of this class are produced by using the :py:meth:`NucSequence.complement` method.
    """
    _comp={'a': 't', 'c': 'g', 'g': 'c', 't': 'a',
           'r': 'y', 'y': 'r', 'k': 'm', 'm': 'k', 
           's': 's', 'w': 'w', 'b': 'v', 'd': 'h', 
           'h': 'd', 'v': 'b', 'n': 'n', 'u': 'a',
           '-': '-'}
    def __init__(self,reference,
                 id=None,definition=None,**info):
        WrappedBioSequence.__init__(self,reference,id=None,definition=None,**info)
        assert reference.isNucleotide()
        self._info['complemented']=True
        if hasattr(reference,'quality'):
            self.quality = reference.quality[::-1]
    def getId(self):
        d = self._id or ("%s_CMP" % self.wrapped.id)
        return d
    def __len__(self):
        return len(self._wrapped)
    def getStr(self):
        return ''.join([x for x in self])
    def __iter__(self):
        return (self.getSymbolAt(x) for x in xrange(len(self)))
    def _posInWrapped(self,position):
        return -(position+1)
    def getSymbolAt(self,position):
        return DNAComplementSequence._comp[self.wrapped.getSymbolAt(self.posInWrapped(position))]
    def complement(self):
        """
        The :py:meth:`complement` method of the :py:class:`DNAComplementSequence` class actually
        returns the wrapped sequenced. Effectively the reversed complemented sequence of a reversed
        complemented sequence is the initial sequence.
        """
        return self.wrapped
    id = property(getId,BioSequence.setId, None)
 def _isNucSeq(text):
    acgt   = 0
    notnuc = 0
    ltot   = len(text) * 0.8
    for c in text.lower():
        if c in 'acgt-':
            acgt+=1
        if c not in DNAComplementEncoder._comp:
            notnuc+=1
    return notnuc==0 and float(acgt) > ltot
 def bioSeqGenerator(id,seq,definition=None,rawinfo=None,rawparser=_default_raw_parser,**info):
    """
    Generate automagically the good class instance between :
        - :py:class:`NucSequence`
        - :py:class:`AASequence`
    Build a new sequence instance. Sequences are instancied as :py:class:`NucSequence` if the
    `seq` attribute contains more than 80% of *A*, *C*, *G*, *T* or *-* symbols 
    in upper or lower cases. Conversely, the new sequence instance is instancied as 
    :py:class:`AASequence`.
    :param id: sequence identifier
    :type id:  `str`
    :param seq: the sequence
    :type seq:  `str`
    :param definition: sequence definition (optional)
    :type definition: `str`
    :param rawinfo: a text containing a set of key=value; patterns
    :type definition: `str`
    :param rawparser: a text describing a regular patterns template 
                      used to parse rawinfo
    :type definition: `str`
    :param info: extra named parameters can be added to associate complementary
                 data to the sequence
    """
    if _isNucSeq(seq):
        return NucSequence(id,seq,definition,rawinfo,rawparser,**info)
    else:
        return AASequence(id,seq,definition,rawinfo,rawparser,**info)
--- a/obitools/init.pyc
+++ b/obitools/init.pyc
--- a/obitools/align/init.py
+++ b/obitools/align/init.py
@ -1,13 +0,0 @@
 from _nws import NWS
 from _upperbond import indexSequences
 from _lcs import LCS,lenlcs
 from _assemble import DirectAssemble, ReverseAssemble
 from _qsassemble import QSolexaDirectAssemble,QSolexaReverseAssemble 
 from _rassemble import RightDirectAssemble as RightReverseAssemble
 from _qsrassemble import QSolexaRightDirectAssemble,QSolexaRightReverseAssemble 
 from _freeendgap import FreeEndGap
 from _freeendgapfm import FreeEndGapFullMatch
 from _upperbond import isLCSReachable
--- a/obitools/align/_assemble.so
+++ b/obitools/align/_assemble.so
--- a/obitools/align/_dynamic.so
+++ b/obitools/align/_dynamic.so
--- a/obitools/align/_freeendgap.so
+++ b/obitools/align/_freeendgap.so
--- a/obitools/align/_freeendgapfm.so
+++ b/obitools/align/_freeendgapfm.so
--- a/obitools/align/_lcs.so
+++ b/obitools/align/_lcs.so
--- a/obitools/align/_nws.so
+++ b/obitools/align/_nws.so
--- a/obitools/align/_profilenws.so
+++ b/obitools/align/_profilenws.so
--- a/obitools/align/_qsassemble.so
+++ b/obitools/align/_qsassemble.so
--- a/obitools/align/_qsrassemble.so
+++ b/obitools/align/_qsrassemble.so
--- a/obitools/align/_rassemble.so
+++ b/obitools/align/_rassemble.so
--- a/obitools/align/_upperbond.so
+++ b/obitools/align/_upperbond.so
--- a/obitools/align/homopolymere.py
+++ b/obitools/align/homopolymere.py
@ -1,56 +0,0 @@
 '''
 Created on 14 mai 2009
@author: coissac
 '''
 from obitools import WrappedBioSequence
 class HomoNucBioSeq(WrappedBioSequence):
    '''
    classdocs
    '''
    def __init__(self,reference,id=None,definition=None,**info):
        '''
        Constructor
        '''
        assert reference.isNucleotide(),"reference must be a nucleic sequence"
        WrappedBioSequence.__init__(self,reference,id=None,definition=None,**info)
        self.__cleanHomopolymer()
    def __cleanHomopolymer(self):
        s = []
        c = []
        old=None
        nc=0
        for n in self._wrapped:
            if old is not None and n!=old:
                s.append(old)
                c.append(nc)
                nc=0 
                old=n
            nc+=1
        self._cached=''.join(s)
        self['homopolymer']=c
        self._cumulative=[]
        sum=0
        for c in self._count:
            sum+=c
            self._cumulative.append(sum)
    def __len__(self):
        return len(self._cached)
    def getStr(self):
        return self._cached
    def __iter__(self):
        return iter(self._cached)
    def _posInWrapped(self,position):
        return self._cumulative[position]
--- a/obitools/align/ssearch.py
+++ b/obitools/align/ssearch.py
@ -1,46 +0,0 @@
 import os
 import re
 from obitools.fasta import formatFasta
 class SsearchParser(object):
    _matchQuery = re.compile("^Query:.+\n.+?>+([^ ]+)", re.MULTILINE)
    _matchLQuery = re.compile("^Query:.+\n.+?(\d+)(?= nt\n)", re.MULTILINE)
    _matchProp   = re.compile("^The best scores are:.*\n(.+?)>>>", re.DOTALL+re.MULTILINE)
    def __init__(self,file):
        if isinstance(file,str):
            file = open(file,'rU')
        self.data = file.read()
        self.query= SsearchParser._matchQuery.search(self.data).group(1)
        self.queryLength= int(SsearchParser._matchLQuery.search(self.data).group(1))
        props = SsearchParser._matchProp.search(self.data)
        if props:
            props=props.group(0).split('\n')[1:-2]
            self.props=[]
            for line in props:
                subject,tab = line.split('\t')
                tab=tab.split()
                ssp = subject.split()
                ac = ssp[0]
                dbl= int(ssp[-5][:-1])
                ident = float(tab[0])
                matchlen = abs(int(tab[5]) - int(tab[4])) +1
                self.props.append({"ac"       :ac,
                                   "identity" :ident,
                                   "subjectlength":dbl,
                                   'matchlength' : matchlen})
 def run(seq,database,program='fasta35',opts=''):
    ssearchin,ssearchout,ssearcherr = os.popen3("%s %s %s" % (program,opts,database))
    print >>ssearchin,formatFasta(seq)
    ssearchin.close()
    result = SsearchParser(ssearchout)
    return seq,result
 def ssearchIterator(sequenceIterator,database,program='ssearch35',opts=''):
    for seq in sequenceIterator:
        yield run(seq,database,program,opts)
--- a/obitools/alignment/init.py
+++ b/obitools/alignment/init.py
@ -1,175 +0,0 @@
 from obitools import BioSequence
 from obitools import WrappedBioSequence
 from copy import deepcopy
 class GappedPositionException(Exception):
    pass
 class AlignedSequence(WrappedBioSequence):
    def __init__(self,reference,
                 id=None,definition=None,**info):
        WrappedBioSequence.__init__(self,reference,id=None,definition=None,**info)
        self._length=len(reference)
        self._gaps=[[self._length,0]]
    def clone(self):
        seq = WrappedBioSequence.clone(self)
        seq._gaps=deepcopy(self._gaps)
        seq._length=reduce(lambda x,y:x+y, (z[0]+z[1] for z in self._gaps),0)
        return seq
    def setGaps(self, value):
        '''
        Set gap vector to an AlignedSequence.
        Gap vector describes the gap positions on a sequence.
        It is a gap of couple. The first couple member is the count
        of sequence letter, the second one is the gap length. 
        @param value: a list of length 2 list describing gap positions
        @type value: list of couple
        '''
        assert isinstance(value, list),'Gap vector must be a list'
        assert reduce(lambda x,y: x and y,
                      (isinstance(z, list) and len(z)==2 for z in value),
                      True),"Value must be a list of length 2 list"
        lseq = reduce(lambda x,y:x+y, (z[0] for z in value),0)
        assert lseq==len(self.wrapped),"Gap vector incompatible with the sequence"
        self._gaps = value
        self._length=reduce(lambda x,y:x+y, (z[0]+z[1] for z in value),0)
    def getGaps(self):
        return tuple(self._gaps)
    gaps = property(getGaps, setGaps, None, "Gaps's Docstring")
    def _getIndice(self,pos):
        i=0
        cpos=0
        for s,g in self._gaps:
            cpos+=s
            if cpos>pos:
                return i,pos-cpos+s
            cpos+=g 
            if cpos>pos:
                return i,-pos+cpos-g-1
            i+=1
        raise IndexError
    def getId(self):
        d = self._id or ("%s_ALN" % self.wrapped.id)
        return d
    def __len__(self):
        return self._length
    def getStr(self):
        return ''.join([x for x in self])
    def __iter__(self):
        def isymb():
            cpos=0
            for s,g in self._gaps:
                for x in xrange(s):
                    yield self.wrapped[cpos+x]
                for x in xrange(g):
                    yield '-'
                cpos+=s
        return isymb()
    def _posInWrapped(self,position):
        i,s=self._getIndice(position)
        if s<0:
            raise GappedPositionException
        value=self._gaps
        p=reduce(lambda x,y:x+y, (z[0] for z in value[:i]),0)+s
        return p
    def getSymbolAt(self,position):
        try:
            return self.wrapped.getSymbolAt(self.posInWrapped(position))
        except GappedPositionException:
            return '-'
    def insertGap(self,position,count=1):
        if position==self._length:
            idx=len(self._gaps)-1
            p=-1
        else:
            idx,p = self._getIndice(position)
        if p >= 0:
            self._gaps.insert(idx, [p,count])
            self._gaps[idx+1][0]-=p
        else:
            self._gaps[idx][1]+=count
        self._length=reduce(lambda x,y:x+y, (z[0]+z[1] for z in self._gaps),0)
    id = property(getId,BioSequence.setId, None, "Sequence Identifier")
 class Alignment(list):
    def _assertData(self,data):
        assert isinstance(data, BioSequence),'You must only add bioseq to an alignement'
        if hasattr(self, '_alignlen'):
            assert self._alignlen==len(data),'All aligned sequences must have the same length'
        else:
            self._alignlen=len(data)  
        return data  
    def clone(self):
        ali = Alignment(x.clone() for x in self)
        return ali
    def append(self,data):
        data = self._assertData(data)
        list.append(self,data)
    def __setitem__(self,index,data):
        data = self._assertData(data)
        list.__setitem__(self,index,data)
    def getSite(self,key):
        if isinstance(key,int):
            return [x[key] for x in self]
    def insertGap(self,position,count=1):
        for s in self:
            s.insertGap(position,count)
    def isFullGapSite(self,key):
        return reduce(lambda x,y: x and y,(z=='-' for z in self.getSite(key)),True)
    def isGappedSite(self,key):
        return '-' in self.getSite(key)
    def __str__(self):
        l = len(self[0])
        rep=""
        idmax = max(len(x.id) for x in self)+2
        template= "%%-%ds  %%-60s" % idmax
        for p in xrange(0,l,60):
            for s in self:
                rep+= (template % (s.id,s[p:p+60])).strip() + '\n'
            rep+="\n"
        return rep
 def alignmentReader(file,sequenceIterator):
    seqs = sequenceIterator(file)
    alignement = Alignment()
    for seq in seqs:
        alignement.append(seq)
    return alignement
 def columnIterator(alignment):
    lali = len(alignment[0])
    for p in xrange(lali):
        c = [x[p] for x in alignment]
        yield c
--- a/obitools/alignment/ace.py
+++ b/obitools/alignment/ace.py
@ -1,47 +0,0 @@
 from obitools.format.genericparser import GenericParser
 from obitools.utils import universalOpen
 from obitools.fasta import parseFastaDescription
 from obitools import NucSequence
 import sys
 _contigIterator=GenericParser('^CO ')
 _contigIterator.addParseAction('AF', '\nAF +(\S+) +([UC]) +(-?[0-9]+)')
 _contigIterator.addParseAction('RD', '\nRD +(\S+) +([0-9]+) +([0-9]+) +([0-9]+) *\n([A-Za-z\n*]+?)\n\n')
 _contigIterator.addParseAction('DS', '\nDS +(.+)')
 _contigIterator.addParseAction('CO',  '^CO (\S+)')
 def contigIterator(file):
    file = universalOpen(file)
    for entry in _contigIterator(file):
        contig=[]
        for rd,ds,af in map(None,entry['RD'],entry['DS'],entry['AF']):
            id = rd[0]
            shift = int(af[2])
            if shift < 0:
                print >> sys.stderr,"Sequence %s in contig %s has a negative paddng value %d : skipped" % (id,entry['CO'][0],shift)
                #continue
            definition,info = parseFastaDescription(ds)
            info['shift']=shift
            seq = rd[4].replace('\n','').replace('*','-').strip()
            contig.append(NucSequence(id,seq,definition,**info))
        maxlen = max(len(x)+x['shift'] for x in contig)
        minshift=min(x['shift'] for x in contig)
        rep = []
        for s in contig:
            info = s.getTags()
            info['shift']-=minshift-1
            head = '-' * (info['shift']-1)
            tail = (maxlen + minshift - len(s) - info['shift'] - 1)
            info['tail']=tail
            newseq = NucSequence(s.id,head + str(s)+ '-' * tail,s.definition,**info)
            rep.append(newseq) 
        yield entry['CO'][0],rep
--- a/obitools/barcodecoverage/init.py
+++ b/obitools/barcodecoverage/init.py
@ -1,7 +0,0 @@
 '''
@author: merciece
 Creates the tree representing the coverage of 2 primers from an ecoPCR output file and an ecoPCR database.
 '''
--- a/obitools/barcodecoverage/calcBc.py
+++ b/obitools/barcodecoverage/calcBc.py
@ -1,62 +0,0 @@
 #!/usr/local/bin/python
 '''
 Created on 24 nov. 2011
@author: merciece
 '''
 def main(amplifiedSeqs, seqsFromDB, keptRanks, errors, tax) :
    '''
    error threshold is set to 3 
    '''   
    listtaxabygroupinDB = {}
    for seq in seqsFromDB :
        taxid = seq['taxid']
        p = [a for a in tax.parentalTreeIterator(taxid)]
        for a in p :
            if a != p[0] :
                if a[1] in keptRanks :
                    group = a[0]
                    if group in listtaxabygroupinDB and taxid not in listtaxabygroupinDB[group] :
                        listtaxabygroupinDB[group].add(taxid)
                    elif group not in listtaxabygroupinDB :
                        listtaxabygroupinDB[group]=set([taxid])
    taxabygroup = dict((x,len(listtaxabygroupinDB[x])) for x in listtaxabygroupinDB)
    listamplifiedtaxabygroup = {}
    for seq in amplifiedSeqs :
        if errors[seq.id][2] <= 3 :
            taxid = seq['taxid']   
            p = [a for a in tax.parentalTreeIterator(taxid)]
            for a in p :
                if a != p[0] :
                    if a[1] in keptRanks :
                        group = a[0]
                        if group in listamplifiedtaxabygroup and taxid not in listamplifiedtaxabygroup[group] :
                            listamplifiedtaxabygroup[group].add(taxid)
                        elif group not in listamplifiedtaxabygroup :
                            listamplifiedtaxabygroup[group]=set([taxid])
    amplifiedtaxabygroup = dict((x,len(listamplifiedtaxabygroup[x])) for x in listamplifiedtaxabygroup)
    BcValues = {}
    groups = [g for g in taxabygroup.keys()]
    for g in groups :
        if g in amplifiedtaxabygroup :
            BcValues[g] = float(amplifiedtaxabygroup[g])/taxabygroup[g]*100
            BcValues[g] = round(BcValues[g], 2)
        else :
            BcValues[g] = 0.0
    return BcValues
--- a/obitools/barcodecoverage/calculateBc.py
+++ b/obitools/barcodecoverage/calculateBc.py
@ -1,72 +0,0 @@
 #!/usr/local/bin/python
 '''
 Created on 24 nov. 2011
@author: merciece
 '''
 import sys
 def main(amplifiedSeqs, seqsFromDB, keptRanks, tax) :
    BcValues = {}
    #speciesid = tax.findRankByName('species')
    #subspeciesid = tax.findRankByName('subspecies')
    listtaxonbygroup = {}
    for seq in seqsFromDB :
        taxid = seq['taxid']
        p = [a for a in tax.parentalTreeIterator(taxid)]
        for a in p :
            if a != p[0] :
                if a[1] in keptRanks :
                    group = a
                    if group in listtaxonbygroup:
                        listtaxonbygroup[group].add(taxid)
                    else:
                        listtaxonbygroup[group]=set([taxid])
    #stats = dict((x,len(listtaxonbygroup[x])) for x in listtaxonbygroup)
    print>>sys.stderr, listtaxonbygroup
    listtaxonbygroup = {}
    for seq in amplifiedSeqs :
        taxid = seq['taxid']
        p = [a for a in tax.parentalTreeIterator(taxid)]
        for a in p :
            if a != p[0] :
                if a[1] in keptRanks :
                    group = a
                    if group in listtaxonbygroup:
                        listtaxonbygroup[group].add(taxid)
                    else:
                        listtaxonbygroup[group]=set([taxid])
    print>>sys.stderr, listtaxonbygroup
    return BcValues
 #    dbstats= dict((x,len(listtaxonbygroup[x])) for x in listtaxonbygroup)
 #    
 #    ranks = [r for r in keptRanks]
 #    ranks.sort()
 #    
 #    print '%-20s\t%10s\t%10s\t%7s' % ('rank','ecopcr','db','percent')
 #    
 #    print>>sys.stderr, stats
 #    print>>sys.stderr, dbstats
 #    print>>sys.stderr, ranks
 #    
 #    for r in ranks:
 #        if  r in dbstats and dbstats[r]:
 #            print '%-20s\t%10d\t%10d\t%8.2f' % (r,dbstats[r],stats[r],float(dbstats[r])/stats[r]*100)
--- a/obitools/barcodecoverage/drawBcTree.py
+++ b/obitools/barcodecoverage/drawBcTree.py
@ -1,108 +0,0 @@
 #!/usr/local/bin/python
 '''
 Created on 25 nov. 2011
@author: merciece
 '''
 from obitools.graph.rootedtree import nexusFormat 
 figtree="""\
 begin figtree;
    set appearance.backgroundColorAttribute="User Selection";
    set appearance.backgroundColour=#-1;
    set appearance.branchColorAttribute="bc";
    set appearance.branchLineWidth=2.0;
    set appearance.foregroundColour=#-16777216;
    set appearance.selectionColour=#-2144520576;
    set branchLabels.colorAttribute="User Selection";
    set branchLabels.displayAttribute="errors";
    set branchLabels.fontName="sansserif";
    set branchLabels.fontSize=10;
    set branchLabels.fontStyle=0;
    set branchLabels.isShown=true;
    set branchLabels.significantDigits=4;
    set layout.expansion=2000;
    set layout.layoutType="RECTILINEAR";
    set layout.zoom=0;
    set nodeBars.barWidth=4.0;
    set nodeLabels.colorAttribute="User Selection";
    set nodeLabels.displayAttribute="label";
    set nodeLabels.fontName="sansserif";
    set nodeLabels.fontSize=10;
    set nodeLabels.fontStyle=0;
    set nodeLabels.isShown=true;
    set nodeLabels.significantDigits=4;
    set polarLayout.alignTipLabels=false;
    set polarLayout.angularRange=0;
    set polarLayout.rootAngle=0;
    set polarLayout.rootLength=100;
    set polarLayout.showRoot=true;
    set radialLayout.spread=0.0;
    set rectilinearLayout.alignTipLabels=false;
    set rectilinearLayout.curvature=0;
    set rectilinearLayout.rootLength=100;
    set scale.offsetAge=0.0;
    set scale.rootAge=1.0;
    set scale.scaleFactor=1.0;
    set scale.scaleRoot=false;
    set scaleAxis.automaticScale=true;
    set scaleAxis.fontSize=8.0;
    set scaleAxis.isShown=false;
    set scaleAxis.lineWidth=2.0;
    set scaleAxis.majorTicks=1.0;
    set scaleAxis.origin=0.0;
    set scaleAxis.reverseAxis=false;
    set scaleAxis.showGrid=true;
    set scaleAxis.significantDigits=4;
    set scaleBar.automaticScale=true;
    set scaleBar.fontSize=10.0;
    set scaleBar.isShown=true;
    set scaleBar.lineWidth=1.0;
    set scaleBar.scaleRange=0.0;
    set scaleBar.significantDigits=4;
    set tipLabels.colorAttribute="User Selection";
    set tipLabels.displayAttribute="Names";
    set tipLabels.fontName="sansserif";
    set tipLabels.fontSize=10;
    set tipLabels.fontStyle=0;
    set tipLabels.isShown=true;
    set tipLabels.significantDigits=4;
    set trees.order=false;
    set trees.orderType="increasing";
    set trees.rooting=false;
    set trees.rootingType="User Selection";
    set trees.transform=false;
    set trees.transformType="cladogram";
 end;
 """
 def cartoonRankGenerator(rank):
    def cartoon(node):
        return 'rank' in node and node['rank']==rank
    return cartoon
 def collapseBcGenerator(Bclimit):
    def collapse(node):
        return 'bc' in node and node['bc']<=Bclimit
    return collapse
 def label(node):
    if 'bc' in node:
        return "(%+3.1f) %s" % (node['bc'],node['name'])
    else:
        return "      %s" % node['name']
 def main(coverageTree) :
    print nexusFormat(coverageTree,
                      label=label,
                      blocks=figtree,
                      cartoon=cartoonRankGenerator('family'))
                      #collapse=collapseBcGenerator(70))
--- a/obitools/barcodecoverage/findErrors.py
+++ b/obitools/barcodecoverage/findErrors.py
@ -1,56 +0,0 @@
 #!/usr/local/bin/python
 '''
 Created on 24 nov. 2011
@author: merciece
 '''
 def main(seqs, keptRanks, tax):
    errorsBySeq = getErrorsOnLeaves(seqs)
    errorsByTaxon = propagateErrors(errorsBySeq, keptRanks, tax)
    return errorsBySeq, errorsByTaxon
 def getErrorsOnLeaves(seqs) :    
    errors = {}
    for s in seqs :
        taxid = s['taxid']
        forErrs = s['forward_error']
        revErrs = s['reverse_error']
        total = forErrs + revErrs
        seqNb = 1
        errors[s.id] = [forErrs,revErrs,total,seqNb,taxid]
    return errors
 def propagateErrors(errorsOnLeaves, keptRanks, tax) :
    allErrors = {}
    for seq in errorsOnLeaves :
        taxid = errorsOnLeaves[seq][4]
        p = [a for a in tax.parentalTreeIterator(taxid)]
        for a in p :
            if a[1] in keptRanks :
                group = a[0]
                if group in allErrors :
                    allErrors[group][0] += errorsOnLeaves[seq][0]
                    allErrors[group][1] += errorsOnLeaves[seq][1]
                    allErrors[group][2] += errorsOnLeaves[seq][2]
                    allErrors[group][3] += 1
                else :
                    allErrors[group] = errorsOnLeaves[seq]
    for group in allErrors :
        allErrors[group][0] /= float(allErrors[group][3])
        allErrors[group][1] /= float(allErrors[group][3])
        allErrors[group][2] /= float(allErrors[group][3])
        allErrors[group][0] = round(allErrors[group][0], 2)
        allErrors[group][1] = round(allErrors[group][1], 2)
        allErrors[group][2] = round(allErrors[group][2], 2)
    return allErrors
--- a/obitools/barcodecoverage/readFiles.py
+++ b/obitools/barcodecoverage/readFiles.py
@ -1,69 +0,0 @@
 #!/usr/local/bin/python
 '''
 Created on 23 nov. 2011
@author: merciece
 '''
 from obitools.ecopcr import sequence
 from obitools.ecopcr import taxonomy
 def main(entries,options):
    filteredDataFromDB = ecoPCRDatabaseReader(options)
    filteredData = ecoPCRFileReader(entries,filteredDataFromDB)
    return filteredDataFromDB,filteredData
 def ecoPCRDatabaseReader(options):
    tax = taxonomy.EcoTaxonomyDB(options.taxonomy)
    seqs = sequence.EcoPCRDBSequenceIterator(options.taxonomy,taxonomy=tax)
    norankid  = tax.findRankByName('no rank')
    speciesid = tax.findRankByName('species')
    genusid   = tax.findRankByName('genus')
    familyid  = tax.findRankByName('family')
    minrankseq = set([speciesid,genusid,familyid])
    usedrankid   = {}
    ingroup = {}
    outgroup= {}
    for s in seqs :
        if 'taxid' in s :
            taxid = s['taxid']
            allrank = set()
            for p in tax.parentalTreeIterator(taxid):
                if p[1]!=norankid:
                    allrank.add(p[1])
                if len(minrankseq & allrank) == 3:
                    for r in allrank:
                        usedrankid[r]=usedrankid.get(r,0) + 1
                    if tax.isAncestor(options.ingroup,taxid):
                        ingroup[s.id] = s
                    else:
                        outgroup[s.id] = s
    keptranks = set(r for r in usedrankid 
                   if float(usedrankid[r])/float(len(ingroup)) > options.rankthresold)
    return { 'ingroup' : ingroup,
             'outgroup': outgroup,
             'ranks'   : keptranks,
             'taxonomy': tax
           }
 def ecoPCRFileReader(entries,filteredDataFromDB) :
    filteredData = []
    for s in entries :
        if 'taxid' in s :
            seqId = s.id
            if seqId in filteredDataFromDB['ingroup'] :
                filteredData.append(s)
    return filteredData
--- a/obitools/barcodecoverage/writeBcTree.py
+++ b/obitools/barcodecoverage/writeBcTree.py
@ -1,42 +0,0 @@
 #!/usr/local/bin/python
 '''
 Created on 25 nov. 2011
@author: merciece
 '''
 from obitools.graph.rootedtree import RootedTree
 def main(BcValues,errors,tax) :
    tree = RootedTree()
    tset = set(BcValues)
    for taxon in BcValues:
        if taxon in errors :
            forErr = errors[taxon][0]
            revErr = errors[taxon][1]
            totErr = errors[taxon][2]
        else :
            forErr = -1.0
            revErr = -1.0
            totErr = -1.0
        tree.addNode(taxon, rank=tax.getRank(taxon),
                       name=tax.getScientificName(taxon),
                       bc = BcValues[taxon],
                       errors = str(forErr)+' '+str(revErr),
                       totError = totErr
                    )
    for taxon in BcValues:
        piter = tax.parentalTreeIterator(taxon)
        taxon = piter.next()
        for parent in piter:
            if taxon[0] in tset and parent[0] in BcValues:
                tset.remove(taxon[0])
                tree.addEdge(parent[0], taxon[0])
                taxon=parent
    return tree
--- a/obitools/blast/init.py
+++ b/obitools/blast/init.py
@ -1,207 +0,0 @@
 from os import popen2
 from itertools import imap,count
 from obitools.table import iTableIterator,TableRow,Table,SelectionIterator
 from obitools.utils import ColumnFile
 from obitools.location import SimpleLocation
 from obitools.fasta import formatFasta
 import sys
 class Blast(object):
    '''
    Run blast
    '''
    def __init__(self,mode,db,program='blastall',**options):
        self._mode = mode
        self._db = db
        self._program = program
        self._options = options
    def getMode(self):
        return self._mode
    def getDb(self):
        return self._db
    def getProgram(self):
        return self._program
    def _blastcmd(self):
        tmp = """%(program)s     \\
                    -p %(mode)s  \\
                    -d %(db)s    \\
                    -m 8         \\
                    %(options)s   \\
              """
        options = ' '.join(['-%s %s' % (x[0],str(x[1])) 
                               for x in self._options.iteritems()])
        data = {
                 'program' : self.program,
                 'db'      : self.db,
                 'mode'    : self.mode,
                 'options' : options
               }
        return tmp % data
    def __call__(self,sequence):
        '''
        Run blast with one sequence object
        @param sequence:
        @type sequence:
        '''
        cmd = self._blastcmd()
        (blast_in,blast_out) = popen2(cmd)
        print >>blast_in,formatFasta(sequence)
        blast_in.close()
        blast  = BlastResultIterator(blast_out)
        return blast
    mode = property(getMode, None, None, "Mode's Docstring")
    db = property(getDb, None, None, "Db's Docstring")
    program = property(getProgram, None, None, "Program's Docstring")
 class NetBlast(Blast):
    '''
    Run blast on ncbi servers
    '''
    def __init__(self,mode,db,**options):
        '''
        @param mode:
        @param db:
        '''
        Blast.__init__(self, mode, db, 'blastcl3',**options)
 class BlastResultIterator(iTableIterator):
    def __init__(self,blastoutput,query=None):
        '''
        @param blastoutput:
        @type blastoutput:
        '''
        self._blast = ColumnFile(blastoutput, 
                                 strip=True, 
                                 skip="#",
                                 sep="\t",
                                 types=self.types
                                 )
        self._query = query
        self._hindex = dict((k,i) for i,k in imap(None,count(),self._getHeaders()))
    def _getHeaders(self):
        return ('Query id','Subject id',
               '% identity','alignment length', 
               'mismatches', 'gap openings', 
               'q. start', 'q. end', 
               's. start', 's. end', 
               'e-value', 'bit score')
    def _getTypes(self):
        return (str,str,
                float,int,
                int,int,
                int,int,
                int,int,
                float,float)
    def _getRowFactory(self):
        return BlastMatch
    def _getSubrowFactory(self):
        return TableRow 
    def _getQuery(self):
        return self._query
    headers = property(_getHeaders,None,None)
    types   = property(_getTypes,None,None)
    rowFactory    = property(_getRowFactory,None,None)
    subrowFactory = property(_getSubrowFactory,None,None)
    query = property(_getQuery,None,None)
    def next(self):
        '''
        '''
        value = self._blast.next()
        return self.rowFactory(self,value)
 class BlastResult(Table):
    '''
    Results of a blast run
    '''
 class BlastMatch(TableRow):
    '''
    Blast high scoring pair between two sequences
    '''
    def getQueryLocation(self):
        l = SimpleLocation(self[6], self[7])
        return l
    def getSubjectLocation(self):
        l = SimpleLocation(self[8], self[9])
        return l
    def getSubjectSequence(self,database):
        return database[self[1]]
    def queryCov(self,query=None):
        '''
        Compute coverage of match on query sequence.
        @param query: the query sequence. Default is None.
                      In this case the query sequence associated
                      to this blast result is used.
        @type query: L{obitools.BioSequence}
        @return: coverage fraction 
        @rtype: float
        '''
        if query is None:
            query = self.table.query
        assert query is not None
        return float(self[7]-self[6]+1)/float(len(query))
    def __getitem__(self,key):
        if key=='query coverage' and self.table.query is not None:
            return self.queryCov()
        else:
            return TableRow.__getitem__(self,key)
 class BlastCovMinFilter(SelectionIterator):
    def __init__(self,blastiterator,covmin,query=None,**conditions):
        if query is None:
            query = blastiterator.table.query
        assert query is not None
        SelectionIterator.__init__(self,blastiterator,**conditions)
        self._query = query
        self._covmin=covmin
    def _covMinPredicat(self,row):
        return row.queryCov(self._query)>=self._covmin
    def _checkCondition(self,row):
        return self._covMinPredicat(row) and SelectionIterator._checkCondition(self, row)
--- a/obitools/carto/init.py
+++ b/obitools/carto/init.py
@ -1,376 +0,0 @@
 # -*- coding: latin1 -*-
 from obitools import SVGdraw
 import math
 class Map(object):
    """
        Map represente une instance d'une carte genetique physique.
        Une telle carte est definie par la longueur de la sequence
        qui lui est associe.
        A une carte est associe un certain nombre de niveaux (Level)
        eux meme decoupe en sous-niveau (SubLevel)
        Les sous niveaux contiennent eux des features 
    """
    def __init__(self,name,seqlength,scale=1):
        """
            Constructeur d'une nouvelle carte 
            *Param*:
                name
                    nom de la carte
                seqlength
                    longueur de la sequence associee a la carte
                scale
                    echelle de la carte indicant combien de pixel
                    correspondent a une unite de la carte
        """
        self.name = name
        self.seqlength = seqlength
        self.scale = scale
        self.levels = {}
        self.basicHSize = 10
    def __str__(self):
        return '<%s>' % self.name
    def __getitem__(self,level):
        """
            retourne le niveau *level* de la carte et
            le cree s'il n'existe pas
        """
        if not isinstance(level,int):
            raise TypeError('level must be an non Zero integer value')
        elif level==0:
            raise AssertionError('Level cannot be set to 0')
        try:
            return self.levels[level]
        except KeyError:
            self.levels[level] = Level(level,self)
        return self.levels[level] 
    def getBasicHSize(self):
        """
            retourne la hauteur de base d'un element de cartographie
            exprimee en pixel
        """
        return self.basicHSize
    def getScale(self):
        """
            Retourne l'echelle de la carte en nombre de pixels par
            unite physique de la carte
        """
        return self.scale
    def getNegativeBase(self):
        return reduce(lambda x,y:x-y,[self.levels[z].getHeight() 
                                          for z in self.levels
                                          if z < 0],self.getHeight())
    def getPositiveBase(self):
        return self.getNegativeBase() - 3 * self.getBasicHSize()
    def getHeight(self):
        return reduce(lambda x,y:x+y,[z.getHeight() for z in self.levels.values()],0) \
               + 4 * self.getBasicHSize()
    def toXML(self,file=None,begin=0,end=None):
        dessin = SVGdraw.drawing()
        if end==None:
            end = self.seqlength
        hauteur= self.getHeight()
        largeur=(end-begin+1)*self.scale
        svg    = SVGdraw.svg((begin*self.scale,0,largeur,hauteur),
                             '%fpx' % (self.seqlength * self.scale),
                             '%dpx' % hauteur)
        centre = self.getPositiveBase() + (1 + 1/4) * self.getBasicHSize()
        svg.addElement(SVGdraw.rect(0,centre,self.seqlength * self.scale,self.getBasicHSize()/2))
        for e in self.levels.values():
            svg.addElement(e.getElement())
        dessin.setSVG(svg)
        return dessin.toXml(file)
 class Feature(object):
    pass
 class Level(object):
    def __init__(self,level,map):
        if not isinstance(map,Map):
            raise AssertionError('map is not an instance of class Map')
        if level in map.levels:
            raise AssertionError('Level %d already define for map %s' % (level,map))
        else:
            map.levels[level] = self
        self.map = map
        self.level = level
        self.sublevels = {}
    def __getitem__(self,sublevel):
        """
            retourne le niveau *sublevel* du niveau en
            le creant s'il n'existe pas
        """
        if not isinstance(sublevel,int):
            raise TypeError('sublevel must be a positive integer value')
        elif sublevel<0:
            raise AssertionError('Level cannot be negative')
        try:
            return self.sublevels[sublevel]
        except KeyError:
            self.sublevels[sublevel] = SubLevel(sublevel,self)
        return self.sublevels[sublevel] 
    def getBase(self):
        if self.level < 0:
            base = self.map.getNegativeBase()
            base += reduce(lambda x,y:x+y,[self.map.levels[z].getHeight() 
                                           for z in self.map.levels
                                           if z <0 and z >= self.level],0)
            return base
        else:
            base = self.map.getPositiveBase() 
            base -= reduce(lambda x,y:x+y,[self.map.levels[z].getHeight() 
                                           for z in self.map.levels
                                           if z >0 and z < self.level],0)
            return base
    def getElement(self):
        objet = SVGdraw.group('level%d' % self.level)
        for e in self.sublevels.values():
            objet.addElement(e.getElement())
        return objet
    def getHeight(self):
        return reduce(lambda x,y:x+y,[z.getHeight() for z in self.sublevels.values()],0) \
               + 2 * self.map.getBasicHSize()
 class SubLevel(object):
    def __init__(self,sublevel,level):
        if not isinstance(level,Level):
            raise AssertionError('level is not an instance of class Level')
        if level in level.sublevels:
            raise AssertionError('Sublevel %d already define for level %s' % (sublevel,level))
        else:
            level.sublevels[sublevel] = self
        self.level = level
        self.sublevel = sublevel
        self.features = {}
    def getHeight(self):
        return max([x.getHeight() for x in self.features.values()]+[0]) + 4 * self.level.map.getBasicHSize()
    def getBase(self):
        base = self.level.getBase()
        if self.level.level < 0:
            base -= self.level.getHeight() - 2 * self.level.map.getBasicHSize()
            base += reduce(lambda x,y:x+y,[self.level.sublevels[z].getHeight() 
                                           for z in self.level.sublevels
                                           if z <= self.sublevel],0)
            base -= 2* self.level.map.getBasicHSize()
        else:
            base -= reduce(lambda x,y:x+y,[self.level.sublevels[z].getHeight() 
                                           for z in self.level.sublevels
                                           if z < self.sublevel],0)
            base -= self.level.map.getBasicHSize()
        return base
    def getElement(self):
        base = self.getBase()
        objet = SVGdraw.group('sublevel%d' % self.sublevel)
        for e in self.features.values():
            objet.addElement(e.getElement(base))
        return objet
    def add(self,feature):
        if not isinstance(feature,Feature):
            raise TypeError('feature must be an instance oof Feature')
        if feature.name in self.features:
            raise AssertionError('A feature with the same name (%s) have already be insert in this sublevel'
                                 % feature.name)
        self.features[feature.name]=feature
        feature.sublevel=self
 class SimpleFeature(Feature):
    def __init__(self,name,begin,end,visiblename=False,color=0):
        self.begin    = begin
        self.end      = end
        self.name     = name
        self.color    = color
        self.sublevel = None 
        self.visiblename=visiblename
    def getHeight(self):
        if not self.sublevel:
            raise AssertionError('Not affected Simple feature')
        if self.visiblename:
            return self.sublevel.level.map.getBasicHSize() * 2
        else:
            return self.sublevel.level.map.getBasicHSize() 
    def getElement(self,base):
        scale = self.sublevel.level.map.getScale()
        y     = base - self.sublevel.level.map.getBasicHSize()
        x     = self.begin * scale
        width = (self.end - self.begin + 1) * scale
        heigh = self.sublevel.level.map.getBasicHSize()
        objet = SVGdraw.rect(x,y,width,heigh,stroke=self.color)
        objet.addElement(SVGdraw.description(self.name))
        return objet
 class BoxFeature(SimpleFeature):
    def getHeight(self):
        if not self.sublevel:
            raise AssertionError('Not affected Box feature')
        if self.visiblename:
            return self.sublevel.level.map.getBasicHSize() * 4
        else:
            return self.sublevel.level.map.getBasicHSize() * 3 
    def getElement(self,base):
        scale = self.sublevel.level.map.getScale()
        y     = base - self.sublevel.level.map.getBasicHSize() * 2
        x     = self.begin * scale
        width = (self.end - self.begin + 1) * scale
        height = self.sublevel.level.map.getBasicHSize() * 3
        objet = SVGdraw.rect(x,y,width,height,stroke=self.color,fill="none")
        objet.addElement(SVGdraw.description(self.name))
        return objet
 class MultiPartFeature(Feature):
    def __init__(self,name,*args,**kargs):
        self.limits    = args
        self.name     = name
        try:
            self.color    = kargs['color']
        except KeyError:
            self.color    = "black"
        try:
            self.visiblename=kargs['visiblename']
        except KeyError:
            self.visiblename=None
        try:
            self.flatlink=kargs['flatlink']
        except KeyError:
            self.flatlink=False
        try:
            self.roundlink=kargs['roundlink']
        except KeyError:
            self.roundlink=False
        self.sublevel = None 
    def getHeight(self):
        if not self.sublevel:
            raise AssertionError('Not affected Simple feature')
        if self.visiblename:
            return self.sublevel.level.map.getBasicHSize() * 3
        else:
            return self.sublevel.level.map.getBasicHSize() * 2
    def getElement(self,base):
        scale = self.sublevel.level.map.getScale()
        y     = base - self.sublevel.level.map.getBasicHSize()
        height = self.sublevel.level.map.getBasicHSize()
        objet = SVGdraw.group(self.name)
        for (debut,fin) in self.limits:
            x     = debut * scale
            width = (fin - debut + 1) * scale
            part = SVGdraw.rect(x,y,width,height,fill=self.color)
            objet.addElement(part)
        debut = self.limits[0][1]
        for (fin,next) in self.limits[1:]:
            debut*=scale
            fin*=scale
            path = SVGdraw.pathdata(debut,y + height / 2)
            delta = height / 2
            if self.roundlink:
                path.qbezier((debut+fin)/2, y - delta,fin,y + height / 2)
            else:
                if self.flatlink:
                    delta = - height / 2
                path.line((debut+fin)/2, y - delta)
                path.line(fin,y + height / 2)
            path = SVGdraw.path(path,fill="none",stroke=self.color)
            objet.addElement(path)
            debut = next
        objet.addElement(SVGdraw.description(self.name))
        return objet
 class TagFeature(Feature):
    def __init__(self,name,begin,length,ratio,visiblename=False,color=0):
        self.begin    = begin
        self.length   = length
        self.ratio    = ratio
        self.name     = name
        self.color    = color
        self.sublevel = None 
        self.visiblename=visiblename
    def getHeight(self):
        if not self.sublevel:
            raise AssertionError('Not affected Tag feature')
        return self.sublevel.level.map.getBasicHSize()*11
    def getElement(self,base):
        scale = self.sublevel.level.map.getScale()
        height = math.floor(max(1,self.sublevel.level.map.getBasicHSize()* 10 * self.ratio))
        y     = base + self.sublevel.level.map.getBasicHSize() - height
        x     = self.begin * scale
        width = self.length * scale
        objet = SVGdraw.rect(x,y,width,height,stroke=self.color)
        objet.addElement(SVGdraw.description(self.name))
        return objet
 if __name__ == '__main__':
    carte = Map('essai',20000,scale=0.5)
    carte[-1][0].add(SimpleFeature('toto',100,300))
    carte[1][0].add(SimpleFeature('toto',100,300))
    carte[1][1].add(SimpleFeature('toto',200,1000))
    carte[1][0].add(MultiPartFeature('bout',(1400,1450),(1470,1550),(1650,1800),color='red',flatlink=True))
    carte[1][0].add(MultiPartFeature('titi',(400,450),(470,550),(650,800),color='red',flatlink=True))
    carte[-1][1].add(MultiPartFeature('titi',(400,450),(470,550),(650,800),color='green'))
    carte[-1][2].add(MultiPartFeature('titi',(400,450),(470,550),(650,800),color='purple',roundlink=True))
    carte[-1][1].add(BoxFeature('tutu',390,810,color='purple'))
    carte[1][0].add(BoxFeature('tutu',390,810,color='red'))
    carte[2][0].add(TagFeature('t1',1400,20,0.8))
    carte[2][0].add(TagFeature('t2',1600,20,0.2))
    carte.basicHSize=6
    print carte.toXML('truc.svg',begin=0,end=1000)
    print carte.toXML('truc2.svg',begin=460,end=2000)
--- a/obitools/decorator.py
+++ b/obitools/decorator.py
--- a/obitools/distances/init.py
+++ b/obitools/distances/init.py
@ -1,29 +0,0 @@
 class DistanceMatrix(object):
    def __init__(self,alignment):
        '''
        DistanceMatrix constructor.
            @param alignment: aligment used to compute distance matrix
            @type alignment: obitools.align.Alignment
        '''
        self.aligment = alignment
        self.matrix = [[None] * (x+1) for x in xrange(len(alignment))]
    def evaluateDist(self,x,y):
        raise NotImplementedError
    def __getitem__(self,key):
        assert isinstance(key,(tuple,list)) and len(key)==2, \
               'key must be a tuple or a list of two integers'
        x,y = key
        if y < x:
            z=x
            x=y
            y=z
        rep = self.matrix[y][x]
        if rep is None:
            rep = self.evaluateDist(x,y)
            self.matrix[y][x] = rep
        return rep
--- a/obitools/distances/observed.py
+++ b/obitools/distances/observed.py
@ -1,77 +0,0 @@
 '''
 Module dedicated to compute observed divergeances from
 an alignment. No distance correction is applied at all
 '''
 from itertools import imap
 from obitools.distances import DistanceMatrix
 class PairewiseGapRemoval(DistanceMatrix):
    '''
    Observed divergeance matrix from an alignment.
    Gap are removed from the alignemt on a pairewise
    sequence base
    '''
    def evaluateDist(self,x,y):
        '''
        Compute the observed divergeance from two sequences
        of an aligment. 
        @attention: For performance purpose this method should 
                    be directly used. use instead the __getitem__
                    method from DistanceMatrix.
        @see: L{__getitem__}
        @param x: number of the fisrt sequence in the aligment
        @type x: int
        @param y: umber of the second sequence in the aligment
        @type y: int
        '''
        seq1 = self.aligment[x]
        seq2 = self.aligment[y]
        diff,tot = reduce(lambda x,y: (x[0]+y,x[1]+1),
                          (z[0]!=z[1] for z in imap(None,seq1,seq2)
                           if '-' not in z),(0,0))
        return float(diff)/tot
 class Pairewise(DistanceMatrix):
    '''
    Observed divergeance matrix from an alignment.
    Gap are kept from the alignemt
    '''
    def evaluateDist(self,x,y):
        '''
        Compute the observed divergeance from two sequences
        of an aligment.
        @attention: For performance purpose this method should 
                    be directly used. use instead the __getitem__
                    method from DistanceMatrix.
        @see: L{__getitem__}
        @param x: number of the fisrt sequence in the aligment
        @type x: int
        @param y: umber of the second sequence in the aligment
        @type y: int
        '''
        seq1 = self.aligment[x]
        seq2 = self.aligment[y]
        diff,tot = reduce(lambda x,y: (x[0]+y,x[1]+1),
                          (z[0]!=z[1] for z in imap(None,seq1,seq2)),
                          (0,0))
        return float(diff)/tot
--- a/obitools/distances/phylip.py
+++ b/obitools/distances/phylip.py
@ -1,35 +0,0 @@
 import sys
 from itertools import imap,count
 def writePhylipMatrix(matrix):
    names = [x.id for x in matrix.aligment]
    pnames= [x[:10] for x in names]
    unicity={}
    redundent=[]
    for n in pnames:
        unicity[n]=unicity.get(n,0)+1
        redundent.append(unicity[n])
    for i,n,r in imap(None,count(),pnames,redundent):
        alternate = n
        if r > 1:
            while alternate in pnames:
                lcut = 9 - len(str(r)) 
                alternate = n[:lcut]+ '_%d' % r
                r+=1
        pnames[i]='%-10s' % alternate
    firstline = '%5d' % len(matrix.aligment)
    rep = [firstline]
    for i,n in imap(None,count(),pnames):
        line = [n]
        for j in xrange(i):
            line.append('%5.4f' % matrix[(j,i)])
        rep.append('  '.join(line))
    return '\n'.join(rep)
--- a/obitools/distances/r.py
+++ b/obitools/distances/r.py
@ -1,25 +0,0 @@
 import sys
 from itertools import imap,count
 def writeRMatrix(matrix):
    names = [x.id for x in matrix.aligment]
    lmax = max(max(len(x) for x in names),5)
    lali = len(matrix.aligment)
    nformat = '%%-%ds' % lmax
    dformat = '%%%d.4f' % lmax
    pnames=[nformat % x for x in names]
    rep = ['  '.join(pnames)]
    for i in xrange(lali):
        line=[]
        for j in xrange(lali):
            line.append('%5.4f' % matrix[(j,i)])
        rep.append('  '.join(line))
    return '\n'.join(rep)
--- a/obitools/dnahash/init.py
+++ b/obitools/dnahash/init.py
@ -1,100 +0,0 @@
 _A=[0]
 _C=[1]
 _G=[2]
 _T=[3]
 _R= _A + _G
 _Y= _C + _T
 _M= _C + _A
 _K= _T + _G
 _W= _T + _A
 _S= _C + _G
 _B= _C + _G + _T
 _D= _A + _G + _T
 _H= _A + _C + _T
 _V= _A + _C + _G
 _N= _A + _C + _G + _T
 _dnahash={'a':_A,
          'c':_C,
          'g':_G,
          't':_T,
          'r':_R,
          'y':_Y,
          'm':_M,
          'k':_K,
          'w':_W,
          's':_S,
          'b':_B,
          'd':_D,
          'h':_H,
          'v':_V,
          'n':_N,
          }
 def hashCodeIterator(sequence,wsize,degeneratemax=0,offset=0):
    errors   = 0
    emask    = [0] * wsize
    epointer = 0
    size = 0
    position = offset
    hashs = set([0])
    hashmask = 0
    for i in xrange(wsize):
        hashmask <<= 2
        hashmask +=3
    for l in sequence:
        l = l.lower()
        hl = _dnahash[l]
        if emask[epointer]:
            errors-=1
            emask[epointer]=0
        if len(hl) > 1:
            errors +=1
            emask[epointer]=1
        epointer+=1
        epointer%=wsize
        if errors > degeneratemax:
            hl=set([hl[0]])  
        hashs=set((((hc<<2) | cl) & hashmask)
                  for hc in hashs
                  for cl in hl) 
        if size < wsize:
            size+=1
        if size==wsize:
            if errors <= degeneratemax:
                yield (position,hashs,errors)
            position+=1
 def hashSequence(sequence,wsize,degeneratemax=0,offset=0,hashs=None):       
    if hashs is None:
        hashs=[[] for x in xrange(4**wsize)]
    for pos,keys,errors in hashCodeIterator(sequence, wsize, degeneratemax, offset):
        for k in keys:
            hashs[k].append(pos)
    return hashs
 def hashSequences(sequences,wsize,maxpos,degeneratemax=0):       
    hashs=None
    offsets=[]
    offset=0
    for s in sequences:
        offsets.append(offset)
        hashSequence(s,wsize,degeneratemax=degeneratemax,offset=offset,hashs=hashs)
        offset+=len(s)
    return hashs,offsets
--- a/obitools/ecobarcode/init.py
+++ b/obitools/ecobarcode/init.py
--- a/obitools/ecobarcode/databases.py
+++ b/obitools/ecobarcode/databases.py
@ -1,32 +0,0 @@
 '''
 Created on 25 sept. 2010
@author: coissac
 '''
 from obitools import NucSequence
 def referenceDBIterator(options):
    cursor = options.ecobarcodedb.cursor()
    cursor.execute("select id from databases.database where name='%s'" % options.database)
    options.dbid = cursor.fetchone()[0]
    cursor.execute('''
                      select s.accession,r.id,r.taxid,r.sequence
                      from databases.database    d,
                           databases.reference   r,
                           databases.relatedsequences s
                      where r.database = d.id
                        and s.reference= r.id
                        and s.mainac
                        and d.name = '%s'
                   ''' % options.database
                  )
    for ac,id,taxid,sequence in cursor:
        s = NucSequence(ac,sequence)
        s['taxid']=taxid
        s['refdbid']=id
        yield s
--- a/obitools/ecobarcode/ecotag.py
+++ b/obitools/ecobarcode/ecotag.py
@ -1,50 +0,0 @@
 '''
 Created on 25 sept. 2010
@author: coissac
 '''
 def alreadyIdentified(seqid,options):
    cursor = options.ecobarcodedb.cursor()    
    cursor.execute('''
                     select count(*) 
                     from ecotag.identification 
                     where sequence=%s
                     and database=%s
                   ''',(int(seqid),int(options.dbid)))
    return int(cursor.fetchone()[0]) > 0;
 def storeIdentification(seqid,
                        idstatus,taxid,
                        matches,
                        options
                        ):
    cursor = options.ecobarcodedb.cursor()    
    if not options.updatedb:
        cursor.execute('''
                       delete from ecotag.identification where sequence=%s and database=%s
                       ''',(int(seqid),int(options.dbid)))
    cursor.execute('''
                    insert into ecotag.identification (sequence,database,idstatus,taxid)
                    values (%s,%s,%s,%s)
                    returning id
                   ''' , (int(seqid),int(options.dbid),idstatus,int(taxid)))
    idid = cursor.fetchone()[0]
    for seq,identity in matches.iteritems():
        cursor.execute('''
                        insert into ecotag.evidence (identification,reference,identity)
                        values (%s,
                                %s,
                                %s)
                       ''',(idid,seq,identity))
    cursor.close()
    options.ecobarcodedb.commit()   
--- a/obitools/ecobarcode/options.py
+++ b/obitools/ecobarcode/options.py
@ -1,64 +0,0 @@
 '''
 Created on 23 sept. 2010
@author: coissac
 '''
 import psycopg2 
 from obitools.ecobarcode.taxonomy import EcoTaxonomyDB
 def addEcoBarcodeDBOption(optionManager):
    optionManager.add_option('--dbname',
                             action="store", dest="ecobarcodedb",
                             type='str',
                             default=None,
                             help="Specify the name of the ecobarcode database")
    optionManager.add_option('--server',
                             action="store", dest="dbserver",
                             type='str',
                             default="localhost",
                             help="Specify the adress of the ecobarcode database server")
    optionManager.add_option('--user',
                             action="store", dest="dbuser",
                             type='str',
                             default='postgres',
                             help="Specify the user of the ecobarcode database")
    optionManager.add_option('--port',
                             action="store", dest="dbport",
                             type='str',
                             default=5432,
                             help="Specify the port of the ecobarcode database")
    optionManager.add_option('--passwd',
                             action="store", dest="dbpasswd",
                             type='str',
                             default='',
                             help="Specify the passwd of the ecobarcode database")
    optionManager.add_option('--primer',
                             action="store", dest="primer",
                             type='str',
                             default=None,
                             help="Specify the primer used for amplification")
 def ecobarcodeDatabaseConnection(options):
    if options.ecobarcodedb is not None:
        connection = psycopg2.connect(database=options.ecobarcodedb, 
                                      user=options.dbuser, 
                                      password=options.dbpasswd,
                                      host=options.dbserver,
                                      port=options.dbport)
        options.dbname=options.ecobarcodedb
    else:
        connection=None
    if connection is not None:
        options.ecobarcodedb=connection
        taxonomy = EcoTaxonomyDB(connection)
    else:
        taxonomy=None
    return taxonomy
--- a/obitools/ecobarcode/rawdata.py
+++ b/obitools/ecobarcode/rawdata.py
@ -1,38 +0,0 @@
 '''
 Created on 25 sept. 2010
@author: coissac
 '''
 from obitools import NucSequence
 from obitools.utils import progressBar
 from obitools.ecobarcode.ecotag import alreadyIdentified
 import sys
 def sequenceIterator(options):
    cursor = options.ecobarcodedb.cursor()
    cursor.execute('''
                      select s.id,sum(o.count),s.sequence
                      from rawdata.sequence      s,
                           rawdata.occurrences   o
                      where o.sequence= s.id
                        and s.primers = '%s'
                      group by s.id,s.sequence
                   ''' % options.primer
                  )
    nbseq = cursor.rowcount
    progressBar(1, nbseq, True, head=options.dbname)
    for id,count,sequence in cursor:
        progressBar(cursor.rownumber+1, nbseq, head=options.dbname)
        if not options.updatedb or not alreadyIdentified(id,options):
            s = NucSequence(id,sequence)
            s['count']=count
            print >>sys.stderr,' +', cursor.rownumber+1,
            yield s
        else:
            print >>sys.stderr,' @', cursor.rownumber+1,
    print >>sys.stderr
--- a/obitools/ecobarcode/taxonomy.py
+++ b/obitools/ecobarcode/taxonomy.py
@ -1,120 +0,0 @@
 '''
 Created on 24 sept. 2010
@author: coissac
 '''
 from obitools.ecopcr.taxonomy import  TaxonomyDump 
 from obitools.ecopcr.taxonomy import  Taxonomy
 import sys
 class EcoTaxonomyDB(TaxonomyDump) :
    def __init__(self,dbconnect):
        self._dbconnect=dbconnect
        print >> sys.stderr,"Reading ecobarcode taxonomy database..."
        self._readNodeTable()
        print >> sys.stderr," ok"
        print >>sys.stderr,"Adding scientific name..."
        self._name=[]
        for taxid,name,classname in self._nameIterator():
            self._name.append((name,classname,self._index[taxid]))
            if classname == 'scientific name':
                self._taxonomy[self._index[taxid]].append(name)
        print >>sys.stderr,"Adding taxid alias..."
        for taxid,current in self._mergedNodeIterator():
            self._index[taxid]=self._index[current]
        print >>sys.stderr,"Adding deleted taxid..."
        for taxid in self._deletedNodeIterator():
            self._index[taxid]=None
        Taxonomy.__init__(self)
    #####
    #
    # Iterator functions
    #
    #####
    def _readNodeTable(self):
        cursor = self._dbconnect.cursor()
        cursor.execute("""
                            select     taxid,rank,parent
                            from ncbitaxonomy.nodes
                       """)
        print >>sys.stderr,"Reading taxonomy nodes..."
        taxonomy=[list(n) for n in cursor]
        print >>sys.stderr,"List all taxonomy rank..."    
        ranks =list(set(x[1] for x in taxonomy))
        ranks.sort()
        rankidx = dict(map(None,ranks,xrange(len(ranks))))
        print >>sys.stderr,"Sorting taxons..."
        taxonomy.sort(TaxonomyDump._taxonCmp)
        self._taxonomy=taxonomy
        print >>sys.stderr,"Indexing taxonomy..."
        index = {}
        for t in self._taxonomy:
            index[t[0]]=self._bsearchTaxon(t[0])
        print >>sys.stderr,"Indexing parent and rank..."
        for t in self._taxonomy:
            t[1]=rankidx[t[1]]
            t[2]=index[t[2]]
        self._ranks=ranks
        self._index=index 
        cursor.close()
    def _nameIterator(self):
        cursor = self._dbconnect.cursor()
        cursor.execute("""
                            select     taxid,name,nameclass
                            from ncbitaxonomy.names
                       """)
        for taxid,name,nameclass in cursor:
            yield taxid,name,nameclass
        cursor.close()
    def _mergedNodeIterator(self):
        cursor = self._dbconnect.cursor()
        cursor.execute("""
                            select     oldtaxid,newtaxid
                            from ncbitaxonomy.merged
                       """)
        for oldtaxid,newtaxid in cursor:
                yield oldtaxid,newtaxid
        cursor.close()
    def _deletedNodeIterator(self):
        cursor = self._dbconnect.cursor()
        cursor.execute("""
                            select  taxid
                            from ncbitaxonomy.delnodes
                       """)
        for taxid in cursor:
                yield taxid[0]
        cursor.close()
--- a/obitools/ecopcr/init.py
+++ b/obitools/ecopcr/init.py
@ -1,69 +0,0 @@
 from obitools import utils
 from obitools import NucSequence
 from obitools.utils import universalOpen, universalTell, fileSize, progressBar
 import struct
 import sys
 class EcoPCRFile(utils.ColumnFile):
    def __init__(self,stream):
        utils.ColumnFile.__init__(self,
                                  stream, '|', True, 
                                  (str,int,int,
                                   str,int,str,
                                   int,str,int,
                                   str,int,str,
                                   str,str,int,float,
                                   str,int,float,
                                   int,
                                   str,str), "#")
    def next(self):
        data = utils.ColumnFile.next(self)
        seq = NucSequence(data[0],data[20],data[21])
        seq['seq_length_ori']=data[1]
        seq['taxid']=data[2]
        seq['rank']=data[3]
        seq['species']=data[4]
        seq['species_sn']=data[5]
        seq['genus']=data[6]
        seq['genus_sn']=data[7]
        seq['family']=data[8]
        seq['family_sn']=data[9]
        seq['strand']=data[12]
        seq['forward_primer']=data[13]
        seq['forward_error']=data[14]
        seq['forward_tm']=data[15]
        seq['reverse_primer']=data[16]
        seq['reverse_error']=data[17]
        seq['reverse_tm']=data[18]
        return seq
 class EcoPCRDBFile(object):
    def _ecoRecordIterator(self,file):
        file = universalOpen(file)
        (recordCount,) = struct.unpack('> I',file.read(4))
        self._recover=False
        if recordCount:
            for i in xrange(recordCount):
                (recordSize,)=struct.unpack('>I',file.read(4))
                record = file.read(recordSize)
                yield record
        else:
            print >> sys.stderr,"\n\n  WARNING : EcoPCRDB readding set into recover data mode\n"
            self._recover=True
            ok=True
            while(ok):
                try:
                    (recordSize,)=struct.unpack('>I',file.read(4))
                    record = file.read(recordSize)
                    yield record
                except:
                    ok=False
--- a/obitools/ecopcr/annotation.py
+++ b/obitools/ecopcr/annotation.py
@ -1,104 +0,0 @@
 import struct
 class EcoPCRDBAnnotationWriter(object):
    '''
    Class used to write Annotation description in EcoPCRDB format.
    EcoPCRDBAnnotationWriter is oftenly called through the EcoPCRDBSequenceWriter class
    @see: L{ecopcr.sequence.EcoPCRDBSequenceWriter}
    '''
    def __init__(self,dbname,id,fileidx=1,type=('CDS'),definition=None):
        '''
        class constructor
        @param dbname: name of ecoPCR database
        @type dbname: C{str}
        @param id: name of the qualifier used as feature id
        @type id: C{str}
        @param fileidx:
        @type fileidx: C{int}
        @param type:
        @type type: C{list} or C{tuple}
        @param definition: 
        @type definition: C{str}
        '''
        self._type = type
        self._definition = definition
        self._id = id
        self._filename="%s_%03d.adx" % (dbname,fileidx)
        self._file = open(self._filename,'wb')
        self._sequenceIdx=0
        ftname  ="%s.fdx" % (dbname)
        ft = open(ftname,'wb')
        self._fttypeidx=dict(map(None,type,xrange(len(type))))
        ft.write(struct.pack('> I',len(type)))
        for t in type:
            ft.write(self._ecoFtTypePacker(t))
        ft.close()
        self._annotationCount=0
        self._file.write(struct.pack('> I',self._annotationCount))
    def _ecoFtTypePacker(self,type):
        totalSize = len(type)
        packed = struct.pack('> I %ds' % totalSize,totalSize,type)
        assert len(packed) == totalSize+4, "error in feature type packing"
        return packed
    def _ecoAnnotationPacker(self,feature,seqidx):
        begin  = feature.begin-1
        end    = feature.end
        type   = self._fttypeidx[feature.ftType]
        strand = feature.isDirect()
        id     = feature[self._id][0]
        if self._definition in feature:
            definition = feature[self._definition][0]
        else:
            definition = ''
        assert strand is not None,"Only strand defined features can be stored"
        deflength = len(definition)
        totalSize = 4 + 4 + 4 + 4 + 4 + 20 + 4 + deflength
        packed = struct.pack('> I I I I I 20s I %ds' % (deflength),
                             totalSize,
                             seqidx,
                             begin,
                             end,
                             type,
                             int(strand),
                             id,
                             deflength,
                             definition)
        assert len(packed) == totalSize+4, "error in annotation packing"
        return packed
    def put(self,sequence,seqidx=None):
        if seqidx is None:
            seqidx = self._sequenceIdx
            self._sequenceIdx+=1
        for feature in sequence.getFeatureTable():
            if feature.ftType in self._type:
                self._annotationCount+=1
                self._file.write(self._ecoAnnotationPacker(feature,seqidx))
    def __del__(self):
        self._file.seek(0,0)
        self._file.write(struct.pack('> I',self._annotationCount))
        self._file.close()
--- a/obitools/ecopcr/options.py
+++ b/obitools/ecopcr/options.py
@ -1,129 +0,0 @@
 '''
 Created on 13 fevr. 2011
@author: coissac
 '''
 from obitools.ecopcr.taxonomy import Taxonomy, EcoTaxonomyDB, TaxonomyDump, ecoTaxonomyWriter
 try:
    from obitools.ecobarcode.options import addEcoBarcodeDBOption,ecobarcodeDatabaseConnection
 except ImportError:
    def addEcoBarcodeDBOption(optionmanager):
        pass
    def ecobarcodeDatabaseConnection(options):
        return None
 def addTaxonomyDBOptions(optionManager):
    addEcoBarcodeDBOption(optionManager)
    optionManager.add_option('-d','--database',
                             action="store", dest="taxonomy",
                             metavar="<FILENAME>",
                             type="string",
                             help="ecoPCR taxonomy Database "
                                  "name")
    optionManager.add_option('-t','--taxonomy-dump',
                             action="store", dest="taxdump",
                             metavar="<FILENAME>",
                             type="string",
                             help="NCBI Taxonomy dump repository "
                                  "name")
 def addTaxonomyFilterOptions(optionManager):
    addTaxonomyDBOptions(optionManager)
    optionManager.add_option('--require-rank',
                             action="append", 
                             dest='requiredRank',
                             metavar="<RANK_NAME>",
                             type="string",
                             default=[],
                             help="select sequence with taxid tag containing "
                                  "a parent of rank <RANK_NAME>")
    optionManager.add_option('-r','--required',
                             action="append", 
                             dest='required',
                             metavar="<TAXID>",
                             type="int",
                             default=[],
                             help="required taxid")
    optionManager.add_option('-i','--ignore',
                             action="append", 
                             dest='ignored',
                             metavar="<TAXID>",
                             type="int",
                             default=[],
                             help="ignored taxid")
 def loadTaxonomyDatabase(options):
    if isinstance(options.taxonomy, Taxonomy):
        return options.taxonomy
    taxonomy = ecobarcodeDatabaseConnection(options)
    if (taxonomy is not None or 
        options.taxonomy is not None or 
        options.taxdump is not None):
        if options.taxdump is not None:
            taxonomy = TaxonomyDump(options.taxdump)
        if taxonomy is not None and isinstance(options.taxonomy, str):
            ecoTaxonomyWriter(options.taxonomy,taxonomy)
            options.ecodb=options.taxonomy
        if isinstance(options.taxonomy, Taxonomy):
            taxonomy = options.taxonomy
        if taxonomy is None and isinstance(options.taxonomy, str):
            taxonomy = EcoTaxonomyDB(options.taxonomy)
            options.ecodb=options.taxonomy
        options.taxonomy=taxonomy
    return options.taxonomy
 def taxonomyFilterGenerator(options):
    loadTaxonomyDatabase(options)
    if options.taxonomy is not None:
        taxonomy=options.taxonomy
        def taxonomyFilter(seq):
            def annotateAtRank(seq,rank):
                if 'taxid' in seq and seq['taxid'] is not None:
                    rtaxid= taxonomy.getTaxonAtRank(seq['taxid'],rank)
                    return rtaxid
                return None
            good = True
            if 'taxid' in seq:
                taxid = seq['taxid']
 #                print taxid,
                if options.requiredRank:
                    taxonatrank = reduce(lambda x,y: x and y,
                                         (annotateAtRank(seq,rank) is not None
                                            for rank in options.requiredRank),True)
                    good = good and taxonatrank 
 #                    print >>sys.stderr, " Has rank : ",good,
                if options.required:
                    good = good and reduce(lambda x,y: x or y,
                                  (taxonomy.isAncestor(r,taxid) for r in options.required),
                                  False)
 #                    print " Required : ",good,
                if options.ignored:
                    good = good and not reduce(lambda x,y: x or y,
                                  (taxonomy.isAncestor(r,taxid) for r in options.ignored),
                                  False)
 #                    print " Ignored : ",good,
 #                print " Global : ",good
            return good
    else:
        def taxonomyFilter(seq):
            return True
    return taxonomyFilter
 def taxonomyFilterIteratorGenerator(options):
    taxonomyFilter = taxonomyFilterGenerator(options)
    def filterIterator(seqiterator):
        for seq in seqiterator:
            if taxonomyFilter(seq):
                yield seq
    return filterIterator
--- a/obitools/ecopcr/sequence.py
+++ b/obitools/ecopcr/sequence.py
@ -1,133 +0,0 @@
 from obitools import NucSequence
 from obitools.ecopcr import EcoPCRDBFile
 from obitools.ecopcr.taxonomy import EcoTaxonomyDB, ecoTaxonomyWriter
 from obitools.ecopcr.annotation import EcoPCRDBAnnotationWriter
 from obitools.utils import universalOpen
 from glob import glob
 import struct
 import gzip
 import sys
 class EcoPCRDBSequenceIterator(EcoPCRDBFile):
    '''
    Build an iterator over the sequences include in a sequence database
    formated for ecoPCR
    '''
    def __init__(self,path,taxonomy=None):
        '''
        ecoPCR data iterator constructor
        @param path: path to the ecoPCR database including the database prefix name
        @type path: C{str}
        @param taxonomy: a taxonomy can be given to the reader to decode the taxonomic data
                         associated to the sequences. If no Taxonomy is furnish, it will be read 
                         before the sequence database files using the same path.
        @type taxonomy: L{obitools.ecopcr.taxonomy.Taxonomy}
        '''
        self._path = path
        if taxonomy is not None:
            self._taxonomy=taxonomy
        else:
            self._taxonomy=EcoTaxonomyDB(path)
        self._seqfilesFiles =  glob('%s_???.sdx' % self._path)
        self._seqfilesFiles.sort()
    def __ecoSequenceIterator(self,file):
        for record in self._ecoRecordIterator(file):
            lrecord = len(record)
            lnames  = lrecord - (4*4+20)
            (taxid,seqid,deflength,seqlength,cptseqlength,string)=struct.unpack('> I 20s I I I %ds' % lnames, record)
            seqid=seqid.strip('\x00')
            de = string[:deflength]
            seq = gzip.zlib.decompress(string[deflength:])
            bioseq = NucSequence(seqid,seq,de,taxidx=taxid,taxid=self._taxonomy._taxonomy[taxid][0])
            yield  bioseq
    def __iter__(self):
        for seqfile in self._seqfilesFiles:
            for seq in self.__ecoSequenceIterator(seqfile):
                yield seq
 class EcoPCRDBSequenceWriter(object):
    def __init__(self,dbname,fileidx=1,taxonomy=None,ftid=None,type=None,definition=None,append=False):
        self._taxonomy=taxonomy
        self._filename="%s_%03d.sdx" % (dbname,fileidx)
        if append:
            mode ='r+b'
            f = universalOpen(self._filename)
            (recordCount,) = struct.unpack('> I',f.read(4))
            self._sequenceCount=recordCount
            del f
            self._file = open(self._filename,mode)
            self._file.seek(0,0)
            self._file.write(struct.pack('> I',0))
            self._file.seek(0,2)
        else:
            self._sequenceCount=0
            mode = 'wb'
            self._file = open(self._filename,mode)
            self._file.write(struct.pack('> I',self._sequenceCount))
        if self._taxonomy is not None:
            print >> sys.stderr,"Writing the taxonomy file...",
            ecoTaxonomyWriter(dbname,self._taxonomy)
            print >> sys.stderr,"Ok"
        if type is not None:
            assert ftid is not None,"You must specify an id attribute for features"
            self._annotation = EcoPCRDBAnnotationWriter(dbname, ftid, fileidx, type, definition)
        else: 
            self._annotation = None
    def _ecoSeqPacker(self,seq):
        compactseq = gzip.zlib.compress(str(seq).upper(),9)
        cptseqlength  = len(compactseq)
        delength   = len(seq.definition)
        totalSize = 4 + 20 + 4 + 4 + 4 + cptseqlength + delength
        if self._taxonomy is None or 'taxid' not in seq:
            taxon=-1
        else:
            taxon=self._taxonomy.findIndex(seq['taxid'])
        try:
            packed = struct.pack('> I i 20s I I I %ds %ds' % (delength,cptseqlength),
                                 totalSize,
                                 taxon,
                                 seq.id,
                                 delength,
                                 len(seq),
                                 cptseqlength,
                                 seq.definition,
                                 compactseq)
        except struct.error as e:
            print >>sys.stderr,"\n\n============\n\nError on sequence : %s\n\n" % seq.id
            raise e
        assert len(packed) == totalSize+4, "error in sequence packing"
        return packed
    def put(self,sequence):
        if self._taxonomy is not None:
            if 'taxid' not in sequence and hasattr(sequence, 'extractTaxon'):
                sequence.extractTaxon()
        self._file.write(self._ecoSeqPacker(sequence))
        if self._annotation is not None:
            self._annotation.put(sequence, self._sequenceCount)
        self._sequenceCount+=1
    def __del__(self):
        self._file.seek(0,0)
        self._file.write(struct.pack('> I',self._sequenceCount))
        self._file.close()
--- a/obitools/ecopcr/taxonomy.py
+++ b/obitools/ecopcr/taxonomy.py
@ -1,630 +0,0 @@
 import struct
 import sys
 from itertools import count,imap
 from obitools.ecopcr import EcoPCRDBFile
 from obitools.utils import universalOpen
 from obitools.utils import ColumnFile
 class Taxonomy(object):
    def __init__(self):
        '''
        The taxonomy database constructor
        @param path: path to the ecoPCR database including the database prefix name
        @type path: C{str}
        '''
        self._ranks.append('obi')
        self._speciesidx = self._ranks.index('species')
        self._genusidx   = self._ranks.index('genus')
        self._familyidx   = self._ranks.index('family')
        self._orderidx   = self._ranks.index('order')
        self._nameidx=dict((x[0],x[2]) for x in self._name)
        self._nameidx.update(dict((x[0],x[2]) for x in self._preferedName))
        self._preferedidx=dict((x[2],x[1]) for x in self._preferedName)
        self._bigestTaxid = max(x[0] for x in self._taxonomy)
    def findTaxonByIdx(self,idx):
        if idx is None:
            return None
        return self._taxonomy[idx]
    def findIndex(self,taxid):
        if taxid is None:
            return None
        return self._index[taxid]
    def findTaxonByTaxid(self,taxid):
        return self.findTaxonByIdx(self.findIndex(taxid))
    def findTaxonByName(self,name):
        return self._taxonomy[self._nameidx[name]]
    def findRankByName(self,rank):
        try:
            return self._ranks.index(rank)
        except ValueError:
            return None
    def __contains__(self,taxid):
        return self.findTaxonByTaxid(taxid) is not None
    #####
    #
    # PUBLIC METHODS
    #
    #####
    def subTreeIterator(self, taxid):
        "return subtree for given taxonomic id "
        idx = self.findTaxonByTaxid(taxid)
        yield self._taxonomy[idx]
        for t in self._taxonomy:
            if t[2] == idx:
                for subt in self.subTreeIterator(t[0]):
                    yield subt
    def parentalTreeIterator(self, taxid):
        """
           return parental tree for given taxonomic id starting from
           first ancester to the root.
        """
        taxon=self.findTaxonByTaxid(taxid)
        if taxon is not None:
            while taxon[2]!= 0: 
                yield taxon
                taxon = self._taxonomy[taxon[2]]
            yield self._taxonomy[0]
        else:
            raise StopIteration
    def isAncestor(self,parent,taxid):
        return parent in [x[0] for x in self.parentalTreeIterator(taxid)]
    def lastCommonTaxon(self,*taxids):
        if not taxids:
            return None
        if len(taxids)==1:
            return taxids[0]
        if len(taxids)==2:
            t1 = [x[0] for x in self.parentalTreeIterator(taxids[0])]
            t2 = [x[0] for x in self.parentalTreeIterator(taxids[1])]
            t1.reverse()
            t2.reverse()
            count = min(len(t1),len(t2))
            i=0
            while(i < count and t1[i]==t2[i]):
                i+=1
            i-=1
            return t1[i]
        ancetre = taxids[0]
        for taxon in taxids[1:]:
            ancetre = self.lastCommonTaxon(ancetre,taxon)
        return ancetre
    def betterCommonTaxon(self,error=1,*taxids):
        lca = self.lastCommonTaxon(*taxids)
        idx = self._index[lca]
        sublca = [t[0] for t in self._taxonomy if t[2]==idx]
        return sublca
    def getPreferedName(self,taxid):
        idx = self.findIndex(taxid)
        return self._preferedidx.get(idx,self._taxonomy[idx][3])
    def getScientificName(self,taxid):
        return self.findTaxonByTaxid(taxid)[3]
    def getRankId(self,taxid):
        return self.findTaxonByTaxid(taxid)[1]
    def getRank(self,taxid):
        return self._ranks[self.getRankId(taxid)]
    def getTaxonAtRank(self,taxid,rankid):
        if isinstance(rankid, str):
            rankid=self._ranks.index(rankid)
        try:
            return [x[0] for x in self.parentalTreeIterator(taxid)
                    if x[1]==rankid][0]
        except IndexError:
            return None
    def getSpecies(self,taxid):
        return self.getTaxonAtRank(taxid, self._speciesidx)
    def getGenus(self,taxid):
        return self.getTaxonAtRank(taxid, self._genusidx)
    def getFamily(self,taxid):
        return self.getTaxonAtRank(taxid, self._familyidx)
    def getOrder(self,taxid):
        return self.getTaxonAtRank(taxid, self._orderidx)
    def rankIterator(self):
        for x in imap(None,self._ranks,xrange(len(self._ranks))):
            yield x
    def groupTaxa(self,taxa,groupname):
        t=[self.findTaxonByTaxid(x) for x in taxa]
        a=set(x[2] for x in t)
        assert len(a)==1,"All taxa must have the same parent"
        newtaxid=max([2999999]+[x[0] for x in self._taxonomy if x[0]>=3000000 and x[0]<4000000])+1
        newidx=len(self._taxonomy)
        if 'GROUP' not in self._ranks:
            self._ranks.append('GROUP')
        rankid=self._ranks.index('GROUP')
        self._taxonomy.append((newtaxid,rankid,a.pop(),groupname))
        for x in t:
            x[2]=newidx
    def addLocalTaxon(self,name,rank,parent,minimaltaxid=10000000):
        newtaxid = minimaltaxid if (self._bigestTaxid < minimaltaxid) else self._bigestTaxid+1
        rankid=self.findRankByName(rank)
        parentidx = self.findIndex(int(parent))
        tx = (newtaxid,rankid,parentidx,name,'local')
        self._taxonomy.append(tx)
        newidx=len(self._taxonomy)-1
        self._name.append((name,'scientific name',newidx))
        self._nameidx[name]=newidx
        self._index[newtaxid]=newidx
        self._bigestTaxid=newtaxid
        return newtaxid
    def removeLocalTaxon(self,taxid):
        raise NotImplemented
        txidx = self.findIndex(taxid)
        taxon = self.findTaxonByIdx(txidx)
        assert txidx >= self._localtaxon,"Only local taxon can be deleted"
        for t in self._taxonomy:
            if t[2] == txidx:
                self.removeLocalTaxon(t[0])
        return taxon
    def addPreferedName(self,taxid,name):
        idx = self.findIndex(taxid)
        self._preferedName.append(name,'obi',idx)
        self._preferedidx[idx]=name
        return taxid
 class EcoTaxonomyDB(Taxonomy,EcoPCRDBFile):
    '''
    A taxonomy database class
    '''
    def __init__(self,path):
        '''
        The taxonomy database constructor
        @param path: path to the ecoPCR database including the database prefix name
        @type path: C{str}
        '''
        self._path = path
        self._taxonFile =  "%s.tdx" % self._path
        self._localTaxonFile =  "%s.ldx" % self._path
        self._ranksFile =  "%s.rdx" % self._path
        self._namesFile =  "%s.ndx" % self._path
        self._preferedNamesFile =  "%s.pdx" % self._path
        self._aliasFile =  "%s.adx" % self._path
        print >> sys.stderr,"Reading binary taxonomy database...",
        self.__readNodeTable()
        print >> sys.stderr," ok"
        Taxonomy.__init__(self)
    #####
    #
    # Iterator functions
    #
    #####
    def __ecoNameIterator(self,file):
        for record in self._ecoRecordIterator(file):
            lrecord = len(record)
            lnames  = lrecord - 16
            (isScientificName,namelength,classLength,indextaxid,names)=struct.unpack('> I I I I %ds' % lnames, record)
            name=names[:namelength]
            classname=names[namelength:]
            yield (name,classname,indextaxid)
    def __ecoTaxonomicIterator(self):
        for record in self._ecoRecordIterator(self._taxonFile):
            lrecord = len(record)
            lnames  = lrecord - 16
            (taxid,rankid,parentidx,nameLength,name)=struct.unpack('> I I I I %ds' % lnames, record)
            yield  (taxid,rankid,parentidx,name,'ncbi')
        try :
            lt=0
            for record in self._ecoRecordIterator(self._localTaxonFile):
                lrecord = len(record)
                lnames  = lrecord - 16
                (taxid,rankid,parentidx,nameLength,name)=struct.unpack('> I I I I %ds' % lnames, record)
                lt+=1
                yield  (taxid,rankid,parentidx,name,'local')
            print >> sys.stderr, " [INFO : Local taxon file found] : %d added taxa" % lt
        except:
            print >> sys.stderr, " [INFO : Local taxon file not found] "
    def __ecoRankIterator(self):
        for record in self._ecoRecordIterator(self._ranksFile):
            yield  record
    def __ecoAliasIterator(self):
        for record in self._ecoRecordIterator(self._aliasFile):
            (taxid,index) = struct.unpack('> I i',record)
            yield taxid,index
    #####
    #
    # Indexes
    #
    #####
    def __ecoNameIndex(self):
        indexName = [x for x in self.__ecoNameIterator(self._namesFile)]
        return indexName
    def __ecoRankIndex(self):
        rank = [r for r in self.__ecoRankIterator()]
        return rank
    def __ecoTaxonomyIndex(self):
        taxonomy = []
        try :
            index = dict(self.__ecoAliasIterator())
            print >> sys.stderr, " [INFO : Taxon alias file found] "
            buildIndex=False
        except:
            print >> sys.stderr, " [INFO : Taxon alias file not found] "
            index={}
            i = 0;
            buildIndex=True
        localtaxon=0
        i=0
        for x in self.__ecoTaxonomicIterator():
            taxonomy.append(x)
            if x[4]=='ncbi':
                localtaxon+=1
            if buildIndex or x[4]!='ncbi':
                index[x[0]] = i 
            i+=1
        print  >> sys.stderr,"Taxonomical tree read",
        return taxonomy, index,localtaxon
    def __readNodeTable(self):
        self._taxonomy, self._index, self._localtaxon= self.__ecoTaxonomyIndex()
        self._ranks = self.__ecoRankIndex()
        self._name  = self.__ecoNameIndex()
        # Add local taxon tame to the name index
        i=self._localtaxon
        for t in self._taxonomy[self._localtaxon:]:
            self._name.append((t[3],'scientific name',i))
            i+=1
        try :
            self._preferedName = [(x[0],'obi',x[2]) 
                                  for x in self.__ecoNameIterator(self._preferedNamesFile)]
            print >> sys.stderr, " [INFO : Prefered taxon name file found] : %d added taxa" % len(self._preferedName)
        except:
            print >> sys.stderr, " [INFO : Prefered taxon name file not found]"
            self._preferedName = []
 class TaxonomyDump(Taxonomy):  
    def __init__(self,taxdir):
        self._path=taxdir
        self._readNodeTable('%s/nodes.dmp' % taxdir)
        print >>sys.stderr,"Adding scientific name..."
        self._name=[]
        for taxid,name,classname in self._nameIterator('%s/names.dmp' % taxdir):
            self._name.append((name,classname,self._index[taxid]))
            if classname == 'scientific name':
                self._taxonomy[self._index[taxid]].extend([name,'ncbi'])
        print >>sys.stderr,"Adding taxid alias..."
        for taxid,current in self._mergedNodeIterator('%s/merged.dmp' % taxdir):
            self._index[taxid]=self._index[current]
        print >>sys.stderr,"Adding deleted taxid..."
        for taxid in self._deletedNodeIterator('%s/delnodes.dmp' % taxdir):
            self._index[taxid]=None
        self._nameidx=dict((x[0],x[2]) for x in self._name)
    def _taxonCmp(t1,t2):
        if t1[0] < t2[0]:
            return -1
        elif t1[0] > t2[0]:
            return +1
        return 0
    _taxonCmp=staticmethod(_taxonCmp)
    def _bsearchTaxon(self,taxid):
        taxCount = len(self._taxonomy)
        begin = 0
        end   = taxCount 
        oldcheck=taxCount
        check = begin + end / 2
        while check != oldcheck and self._taxonomy[check][0]!=taxid :
            if self._taxonomy[check][0] < taxid:
                begin=check
            else:
                end=check
            oldcheck=check
            check = (begin + end) / 2
        if self._taxonomy[check][0]==taxid:
            return check
        else:
            return None
    def _readNodeTable(self,file):
        file = universalOpen(file)
        nodes = ColumnFile(file, 
                           sep='|', 
                           types=(int,int,str,
                                  str,str,bool,
                                  int,bool,int,
                                  bool,bool,bool,str))
        print >>sys.stderr,"Reading taxonomy dump file..."
            # (taxid,rank,parent)
        taxonomy=[[n[0],n[2],n[1]] for n in nodes]
        print >>sys.stderr,"List all taxonomy rank..."    
        ranks =list(set(x[1] for x in taxonomy))
        ranks.sort()
        rankidx = dict(map(None,ranks,xrange(len(ranks))))
        print >>sys.stderr,"Sorting taxons..."
        taxonomy.sort(TaxonomyDump._taxonCmp)
        self._taxonomy=taxonomy
        self._localtaxon=len(taxonomy)
        print >>sys.stderr,"Indexing taxonomy..."
        index = {}
        for t in self._taxonomy:
            index[t[0]]=self._bsearchTaxon(t[0])
        print >>sys.stderr,"Indexing parent and rank..."
        for t in self._taxonomy:
            t[1]=rankidx[t[1]]
            t[2]=index[t[2]]
        self._ranks=ranks
        self._index=index 
        self._preferedName = []
    def _nameIterator(self,file):
        file = universalOpen(file)
        names = ColumnFile(file, 
                           sep='|', 
                           types=(int,str,
                                  str,str))
        for taxid,name,unique,classname,white in names:
            yield taxid,name,classname
    def _mergedNodeIterator(self,file):
        file = universalOpen(file)
        merged = ColumnFile(file, 
                           sep='|', 
                           types=(int,int,str))
        for taxid,current,white in merged:
                yield taxid,current
    def _deletedNodeIterator(self,file):
        file = universalOpen(file)
        deleted = ColumnFile(file, 
                           sep='|', 
                           types=(int,str))
        for taxid,white in deleted:
                yield taxid
 #####
 #
 #
 # Binary writer
 #
 #
 #####
 def ecoTaxonomyWriter(prefix, taxonomy,onlyLocal=False):
    def ecoTaxPacker(tx):
        namelength = len(tx[3])
        totalSize = 4 + 4 + 4 + 4 + namelength
        packed = struct.pack('> I I I I I %ds' % namelength, 
                             totalSize, 
                             tx[0],
                             tx[1],
                             tx[2], 
                             namelength,
                             tx[3])
        return packed
    def ecoRankPacker(rank):
        namelength = len(rank)
        packed = struct.pack('> I %ds' % namelength,
                             namelength,
                             rank)
        return packed
    def ecoAliasPacker(taxid,index):
        totalSize = 4 + 4
        try:
            packed = struct.pack('> I I i',
                                 totalSize,
                                 taxid,
                                 index)
        except struct.error,e:
            print >>sys.stderr,(totalSize,taxid,index)
            print >>sys.stderr,"Total size : %d  taxid : %d  index : %d" %(totalSize,taxid,index)
            raise e
        return packed
    def ecoNamePacker(name):
        namelength = len(name[0])
        classlength= len(name[1])
        totalSize =  namelength + classlength + 4 + 4 + 4 + 4
        packed = struct.pack('> I I I I I %ds %ds' % (namelength,classlength),
                             totalSize,
                             int(name[1]=='scientific name'),
                             namelength,
                             classlength,
                             name[2],
                             name[0],
                             name[1])
        return packed
    def ecoTaxWriter(file,taxonomy):
        output = open(file,'wb')
        nbtaxon = reduce(lambda x,y:x+y,(1 for t in taxonomy if t[4]=='ncbi'),0)
        output.write(struct.pack('> I',nbtaxon))
        for tx in taxonomy:
            if tx[4]=='ncbi':
                output.write(ecoTaxPacker(tx))
        output.close()
        return nbtaxon < len(taxonomy)
    def ecoLocalTaxWriter(file,taxonomy):
        nbtaxon = reduce(lambda x,y:x+y,(1 for t in taxonomy if t[4]!='ncbi'),0)
        if nbtaxon:
            output = open(file,'wb')
            output.write(struct.pack('> I',nbtaxon))
            for tx in taxonomy:
                if tx[4]!='ncbi':
                    output.write(ecoTaxPacker(tx))
            output.close()
    def ecoRankWriter(file,ranks):
        output = open(file,'wb')
        output.write(struct.pack('> I',len(ranks)))
        for rank in ranks:
            output.write(ecoRankPacker(rank))
        output.close()
    def ecoAliasWriter(file,index):
        output = open(file,'wb')
        output.write(struct.pack('> I',len(index)))
        for taxid in index:
            i=index[taxid]
            if i is None:
                i=-1
            output.write(ecoAliasPacker(taxid, i))
        output.close()
    def nameCmp(n1,n2):
        name1=n1[0].upper()
        name2=n2[0].upper()
        if name1 < name2:
            return -1
        elif name1 > name2:
            return 1
        return 0
    def ecoNameWriter(file,names):
        output = open(file,'wb')
        output.write(struct.pack('> I',len(names)))
        names.sort(nameCmp)
        for name in names:
            output.write(ecoNamePacker(name))
        output.close()
    def ecoPreferedNameWriter(file,names):
        output = open(file,'wb')
        output.write(struct.pack('> I',len(names)))
        for name in names:
            output.write(ecoNamePacker(name))
        output.close()
    localtaxon=True
    if not onlyLocal:
        ecoRankWriter('%s.rdx' % prefix, taxonomy._ranks)
        localtaxon = ecoTaxWriter('%s.tdx' % prefix, taxonomy._taxonomy)
        ecoNameWriter('%s.ndx' % prefix, [x for x in taxonomy._name if x[2] < taxonomy._localtaxon])
        ecoAliasWriter('%s.adx' % prefix, taxonomy._index)
    if localtaxon:
        ecoLocalTaxWriter('%s.ldx' % prefix, taxonomy._taxonomy)
    if taxonomy._preferedName:
        ecoNameWriter('%s.pdx' % prefix, taxonomy._preferedName)
--- a/obitools/ecotag/init.py
+++ b/obitools/ecotag/init.py
@ -1,2 +0,0 @@
 class EcoTagResult(dict):
    pass
--- a/obitools/ecotag/parser.py
+++ b/obitools/ecotag/parser.py
@ -1,150 +0,0 @@
 from itertools import imap
 from obitools import utils
 from obitools.ecotag import EcoTagResult
 class EcoTagFileIterator(utils.ColumnFile):
    @staticmethod
    def taxid(x):
        x = int(x)
        if x < 0:
            return None
        else:
            return x
    @staticmethod
    def scientificName(x):
        if x=='--':
            return None
        else:
            return x
    @staticmethod
    def value(x):
        if x=='--':
            return None
        else:
            return float(x)
    @staticmethod
    def count(x):
        if x=='--':
            return None
        else:
            return int(x)
    def __init__(self,stream):
        utils.ColumnFile.__init__(self,
                                  stream, '\t', True, 
                                  (str,str,str,
                                   EcoTagFileIterator.value,
                                   EcoTagFileIterator.value,
                                   EcoTagFileIterator.value,
                                   EcoTagFileIterator.count,
                                   EcoTagFileIterator.count,
                                   EcoTagFileIterator.taxid,
                                   EcoTagFileIterator.scientificName,
                                   str,
                                   EcoTagFileIterator.taxid,
                                   EcoTagFileIterator.scientificName,
                                   EcoTagFileIterator.taxid,
                                   EcoTagFileIterator.scientificName,
                                   EcoTagFileIterator.taxid,
                                   EcoTagFileIterator.scientificName,
                                   str
                                   ))
        self._memory=None
    _colname = ['identification',
                'seqid',
                'best_match_ac',
                'max_identity',
                'min_identity',
                'theorical_min_identity',
                'count',
                'match_count',
                'taxid',
                'scientific_name',
                'rank',
                'order_taxid',
                'order_sn',
                'family_taxid',
                'family_sn',
                'genus_taxid',
                'genus_sn',
                'species_taxid',
                'species_sn',
                'sequence']
    def next(self):
        if self._memory is not None:
            data=self._memory
            self._memory=None
        else:
            data = utils.ColumnFile.next(self)
            data = EcoTagResult(imap(None,EcoTagFileIterator._colname[:len(data)],data))
        if data['identification']=='ID':
            data.cd=[]
            try:
                nextone = utils.ColumnFile.next(self)
                nextone = EcoTagResult(imap(None,EcoTagFileIterator._colname[:len(nextone)],nextone))
            except StopIteration:
                nextone = None
            while nextone is not None and nextone['identification']=='CD':
                data.cd.append(nextone)
                try:
                    nextone = utils.ColumnFile.next(self)
                    nextone = EcoTagResult(imap(None,EcoTagFileIterator._colname[:len(nextone)],nextone))
                except StopIteration:
                    nextone = None
            self._memory=nextone
        return data
 def ecoTagIdentifiedFilter(ecoTagIterator):
    for x in ecoTagIterator:
        if x['identification']=='ID':
            yield x
 class EcoTagAbstractIterator(utils.ColumnFile):
    _colname = ['scientific_name',
                'taxid',
                'rank',
                'count',
                'max_identity',
                'min_identity']
    @staticmethod
    def taxid(x):
        x = int(x)
        if x < 0:
            return None
        else:
            return x
    def __init__(self,stream):
        utils.ColumnFile.__init__(self,
                                  stream, '\t', True, 
                                  (str,
                                   EcoTagFileIterator.taxid,
                                   str,
                                   int,
                                   float,float,float))
    def next(self):
        data = utils.ColumnFile.next(self)
        data = dict(imap(None,EcoTagAbstractIterator._colname,data))
        return data
 def ecoTagAbstractFilter(ecoTagAbsIterator):
    for x in ecoTagAbsIterator:
        if x['taxid'] is not None:
            yield x
--- a/obitools/eutils/init.py
+++ b/obitools/eutils/init.py
@ -1,54 +0,0 @@
 import time
 from urllib2 import urlopen
 import shelve
 from threading import Lock
 import sys
 class EUtils(object):
    '''
    '''
    _last_request=0
    _interval=3
    def __init__(self):
        self._lock = Lock()
    def wait(self):
        now=time.time() 
        delta = now - EUtils._last_request
        while delta < EUtils._interval:
            time.sleep(delta)
            now=time.time() 
            delta = now - EUtils._last_request
    def _sendRequest(self,url):
        self.wait()
        EUtils._last_request=time.time()
        t = EUtils._last_request
        print >>sys.stderr,"Sending request to NCBI @ %f" % t
        data = urlopen(url).read()
        print >>sys.stderr,"Data red from NCBI @ %f (%f)" % (t,time.time()-t)
        return data
    def setInterval(self,seconde):
        EUtils._interval=seconde
 class EFetch(EUtils):
    '''
    '''
    def __init__(self,db,tool='OBITools',
                 retmode='text',rettype="native",
                 server='eutils.ncbi.nlm.nih.gov'):
        EUtils.__init__(self)
        self._url = "http://%s/entrez/eutils/efetch.fcgi?db=%s&tool=%s&retmode=%s&rettype=%s"
        self._url = self._url % (server,db,tool,retmode,rettype)
    def get(self,**args):
        key = "&".join(['%s=%s' % x for x in args.items()])
        return self._sendRequest(self._url +"&" + key)
--- a/obitools/fast.py
+++ b/obitools/fast.py
@ -1,56 +0,0 @@
 """
    implement fastn/fastp sililarity search algorithm for BioSequence.
 """
 class Fast(object):
    def __init__(self,seq,kup=2):
        '''
        @param seq: sequence to hash
        @type seq: BioSequence
        @param kup: word size used for hashing process
        @type kup: int
        '''
        hash={}
        seq = str(seq)
        for word,pos in ((seq[i:i+kup].upper(),i) for i in xrange(len(seq)-kup)):
            if word in hash:
                hash[word].append(pos)
            else:
                hash[word]=[pos]
        self._kup = kup
        self._hash= hash
        self._seq = seq
    def __call__(self,seq):
        '''
        Align one sequence with the fast hash table.
        @param seq: the sequence to align
        @type seq: BioSequence
        @return: where smax is the
                 score of the largest diagonal and pmax the
                 associated shift 
        @rtype: a int tuple (smax,pmax)
        '''
        histo={}
        seq = str(seq).upper()
        hash= self._hash
        kup = self._kup
        for word,pos in ((seq[i:i+kup],i) for i in xrange(len(seq)-kup)):
            matchedpos = hash.get(word,[])
            for p in matchedpos:
                delta = pos - p
                histo[delta]=histo.get(delta,0) + 1
        smax = max(histo.values())
        pmax = [x for x in histo if histo[x]==smax]
        return smax,pmax
    def __len__(self):
        return len(self._seq)
--- a/obitools/fasta/init.py
+++ b/obitools/fasta/init.py
@ -1,384 +0,0 @@
 """
 fasta module provides functions to read and write sequences in fasta format.
 """
 #from obitools.format.genericparser import fastGenericEntryIteratorGenerator
 from obitools.format.genericparser import genericEntryIteratorGenerator
 from obitools import bioSeqGenerator,BioSequence,AASequence,NucSequence
 from obitools import _default_raw_parser
 #from obitools.alignment import alignmentReader
 #from obitools.utils import universalOpen
 import re
 from obitools.ecopcr.options import loadTaxonomyDatabase
 from obitools.format import SequenceFileIterator
 #from _fasta import parseFastaDescription,fastaParser
 #from _fasta import _fastaJoinSeq
 #from _fasta import _parseFastaTag
 #fastaEntryIterator=fastGenericEntryIteratorGenerator(startEntry='>')
 fastaEntryIterator=genericEntryIteratorGenerator(startEntry='>')
 rawFastaEntryIterator=genericEntryIteratorGenerator(startEntry='\s*>')
 def _fastaJoinSeq(seqarray):
    return  ''.join([x.strip() for x in seqarray])
 def parseFastaDescription(ds,tagparser):
    m = tagparser.search(' '+ds)
    if m is not None:
        info=m.group(0)
        definition = ds[m.end(0):].strip()
    else:
        info=None
        definition=ds
    return definition,info
 def fastaParser(seq,bioseqfactory,tagparser,rawparser,joinseq=_fastaJoinSeq):
    '''
    Parse a fasta record.
    @attention: internal purpose function
    @param seq: a sequence object containing all lines corresponding
                to one fasta sequence
    @type seq: C{list} or C{tuple} of C{str}
    @param bioseqfactory: a callable object return a BioSequence
                          instance.
    @type bioseqfactory: a callable object
    @param tagparser: a compiled regular expression usable
                      to identify key, value couples from 
                      title line.
    @type tagparser: regex instance
    @return: a C{BioSequence} instance   
    '''
    seq = seq.split('\n')
    title = seq[0].strip()[1:].split(None,1)
    id=title[0]
    if len(title) == 2:
        definition,info=parseFastaDescription(title[1], tagparser)
    else:
        info= None
        definition=None
    seq=joinseq(seq[1:])
    return bioseqfactory(id, seq, definition,info,rawparser)
 def fastaNucParser(seq,tagparser=_default_raw_parser,joinseq=_fastaJoinSeq):
    return fastaParser(seq,NucSequence,tagparser=tagparser,joinseq=_fastaJoinSeq)
 def fastaAAParser(seq,tagparser=_default_raw_parser,joinseq=_fastaJoinSeq):
    return fastaParser(seq,AASequence,tagparser=tagparser,joinseq=_fastaJoinSeq)
 def fastaIterator(file,bioseqfactory=bioSeqGenerator,
                  tagparser=_default_raw_parser,
                  joinseq=_fastaJoinSeq):
    '''
    iterate through a fasta file sequence by sequence.
    Returned sequences by this iterator will be BioSequence
    instances
    @param file: a line iterator containing fasta data or a filename
    @type file:  an iterable object or str
    @param bioseqfactory: a callable object return a BioSequence
                          instance.
    @type bioseqfactory: a callable object
    @param tagparser: a compiled regular expression usable
                      to identify key, value couples from 
                      title line.
    @type tagparser: regex instance
    @return: an iterator on C{BioSequence} instance
    @see: L{fastaNucIterator}
    @see: L{fastaAAIterator}
    >>> from obitools.format.sequence.fasta import fastaIterator
    >>> f = fastaIterator('monfichier')
    >>> s = f.next()
    >>> print s
    gctagctagcatgctagcatgcta
    >>>
    '''
    rawparser=tagparser
    allparser = tagparser % '[a-zA-Z][a-zA-Z0-9_]*'
    tagparser = re.compile('( *%s)+' % allparser)
    for entry in fastaEntryIterator(file):
        yield fastaParser(entry,bioseqfactory,tagparser,rawparser,joinseq)
 def rawFastaIterator(file,bioseqfactory=bioSeqGenerator,
                     tagparser=_default_raw_parser,
                     joinseq=_fastaJoinSeq):
    rawparser=tagparser
    allparser = tagparser % '[a-zA-Z][a-zA-Z0-9_]*'
    tagparser = re.compile('( *%s)+' % allparser)
    for entry in rawFastaEntryIterator(file):
        entry=entry.strip()
        yield fastaParser(entry,bioseqfactory,tagparser,rawparser,joinseq)
 def fastaNucIterator(file,tagparser=_default_raw_parser):
    '''
    iterate through a fasta file sequence by sequence.
    Returned sequences by this iterator will be NucSequence
    instances
    @param file: a line iterator containint fasta data
    @type file: an iterable object
    @param tagparser: a compiled regular expression usable
                      to identify key, value couples from 
                      title line.
    @type tagparser: regex instance
    @return: an iterator on C{NucBioSequence} instance
    @rtype: a generator object
    @see: L{fastaIterator}
    @see: L{fastaAAIterator}
    '''
    return fastaIterator(file, NucSequence,tagparser)
 def fastaAAIterator(file,tagparser=_default_raw_parser):
    '''
    iterate through a fasta file sequence by sequence.
    Returned sequences by this iterator will be AASequence
    instances
    @param file: a line iterator containing fasta data
    @type file: an iterable object
    @param tagparser: a compiled regular expression usable
                      to identify key, value couples from 
                      title line.
    @type tagparser: regex instance
    @return: an iterator on C{AABioSequence} instance
    @see: L{fastaIterator}
    @see: L{fastaNucIterator}
    '''
    return fastaIterator(file, AASequence,tagparser)
 def formatFasta(data,gbmode=False,upper=False,restrict=None):
    '''
    Convert a seqence or a set of sequences in a
    string following the fasta format
    @param data: sequence or a set of sequences
    @type data: BioSequence instance or an iterable object 
                on BioSequence instances
    @param gbmode: if set to C{True} identifier part of the title
                   line follows recommendation from nbci to allow
                   sequence indexing with the blast formatdb command.
    @type gbmode: bool
    @param restrict: a set of key name that will be print in the formated
                     output. If restrict is set to C{None} (default) then
                     all keys are formated.
    @type restrict: any iterable value or None
    @return: a fasta formated string
    @rtype: str
    '''
    if isinstance(data, BioSequence):
        data = [data]
    if restrict is not None and not isinstance(restrict, set):
        restrict = set(restrict)    
    rep = []
    for sequence in data:
        seq = str(sequence)
        if sequence.definition is None:
            definition=''
        else:
            definition=sequence.definition
        if upper:
            frgseq = '\n'.join([seq[x:x+60].upper() for x in xrange(0,len(seq),60)])
        else:
            frgseq = '\n'.join([seq[x:x+60] for x in xrange(0,len(seq),60)])
        info='; '.join(['%s=%s' % x 
                        for x in sequence.rawiteritems()
                        if restrict is None or x[0] in restrict])
        if info:
            info=info+';'
        if sequence._rawinfo is not None and sequence._rawinfo:
            info+=" " + sequence._rawinfo.strip()
        id = sequence.id
        if gbmode:
            if 'gi' in sequence:
                id = "gi|%s|%s" % (sequence['gi'],id)
            else:
                id = "lcl|%s|" % (id)
        title='>%s %s %s' %(id,info,definition)
        rep.append("%s\n%s" % (title,frgseq))
    return '\n'.join(rep)
 def formatSAPFastaGenerator(options):
    loadTaxonomyDatabase(options)
    taxonomy=None
    if options.taxonomy is not None:
        taxonomy=options.taxonomy
    assert taxonomy is not None,"SAP formating require indication of a taxonomy database"
    ranks = ('superkingdom', 'kingdom', 'subkingdom', 'superphylum', 
             'phylum', 'subphylum', 'superclass', 'class', 'subclass', 
             'infraclass', 'superorder', 'order', 'suborder', 'infraorder', 
             'parvorder', 'superfamily', 'family', 'subfamily', 'supertribe', 'tribe', 
             'subtribe', 'supergenus', 'genus', 'subgenus', 'species group', 
             'species subgroup', 'species', 'subspecies')
    trank=set(taxonomy._ranks)
    ranks = [taxonomy._ranks.index(x) for x in ranks if x in trank]
    strict= options.strictsap
    def formatSAPFasta(data,gbmode=False,upper=False,restrict=None):
        '''
        Convert a seqence or a set of sequences in a
        string following the fasta format as recommended for the SAP
        software 
        http://ib.berkeley.edu/labs/slatkin/munch/StatisticalAssignmentPackage.html
        @param data: sequence or a set of sequences
        @type data: BioSequence instance or an iterable object 
                    on BioSequence instances
        @param gbmode: if set to C{True} identifier part of the title
                       line follows recommendation from nbci to allow
                       sequence indexing with the blast formatdb command.
        @type gbmode: bool
        @param restrict: a set of key name that will be print in the formated
                         output. If restrict is set to C{None} (default) then
                         all keys are formated.
        @type restrict: any iterable value or None
        @return: a fasta formated string
        @rtype: str
        '''
        if isinstance(data, BioSequence):
            data = [data]
        if restrict is not None and not isinstance(restrict, set):
            restrict = set(restrict)    
        rep = []
        for sequence in data:
            seq = str(sequence)
            if upper:
                frgseq = '\n'.join([seq[x:x+60].upper() for x in xrange(0,len(seq),60)])
            else:
                frgseq = '\n'.join([seq[x:x+60] for x in xrange(0,len(seq),60)])
            try:    
                taxid = sequence["taxid"]
            except KeyError:
                if strict:
                    raise AssertionError('All sequence must have a taxid')
                else:
                    continue
            definition=' ;'
            for r in ranks:
                taxon = taxonomy.getTaxonAtRank(taxid,r)
                if taxon is not None:
                    definition+=' %s: %s,' % (taxonomy._ranks[r],taxonomy.getPreferedName(taxon))
            definition='%s ; %s' % (definition[0:-1],taxonomy.getPreferedName(taxid))
            id = sequence.id
            if gbmode:
                if 'gi' in sequence:
                    id = "gi|%s|%s" % (sequence['gi'],id)
                else:
                    id = "lcl|%s|" % (id)
            title='>%s%s' %(id,definition)
            rep.append("%s\n%s" % (title,frgseq))
        return '\n'.join(rep)
    return formatSAPFasta
 class FastaIterator(SequenceFileIterator):
    entryIterator = genericEntryIteratorGenerator(startEntry='>')
    classmethod(entryIterator)
    def __init__(self,inputfile,bioseqfactory=bioSeqGenerator,
                      tagparser=_default_raw_parser,
                      joinseq=_fastaJoinSeq):
        SequenceFileIterator.__init__(self, inputfile, bioseqfactory)
        self.__file = FastaIterator.entryIterator(self._inputfile)
        self._tagparser = tagparser
        self._joinseq   = joinseq
    def get_tagparser(self):
        return self.__tagparser
    def set_tagparser(self, value):
        self._rawparser = value
        allparser = value % '[a-zA-Z][a-zA-Z0-9_]*'
        self.__tagparser = re.compile('( *%s)+' % allparser)
    def _parseFastaDescription(self,ds):
        m = self._tagparser.search(' '+ds)
        if m is not None:
            info=m.group(0)
            definition = ds[m.end(0):].strip()
        else:
            info=None
            definition=ds
        return definition,info
    def _parser(self):
        '''
        Parse a fasta record.
        @attention: internal purpose function
        @return: a C{BioSequence} instance   
        '''
        seq = self._seq.split('\n')
        title = seq[0].strip()[1:].split(None,1)
        id=title[0]
        if len(title) == 2:
            definition,info=self._parseFastaDescription(title[1])
        else:
            info= None
            definition=None
        seq=self._joinseq(seq[1:])
        return self._bioseqfactory(id, seq, definition,info,self._rawparser)
    _tagparser = property(get_tagparser, set_tagparser, None, "_tagparser's docstring")
--- a/obitools/fasta/_fasta.so
+++ b/obitools/fasta/_fasta.so
--- a/obitools/fastq/init.py
+++ b/obitools/fastq/init.py
@ -1,190 +0,0 @@
 '''
 Created on 29 aout 2009
@author: coissac
 '''
 from obitools import BioSequence
 from obitools import _default_raw_parser
 from obitools.format.genericparser import genericEntryIteratorGenerator
 from obitools import bioSeqGenerator,AASequence,NucSequence
 from obitools.fasta import parseFastaDescription
 from _fastq import fastqQualitySangerDecoder,fastqQualitySolexaDecoder
 from _fastq import qualityToSangerError,qualityToSolexaError
 from _fastq import errorToSangerFastQStr
 from _fastq import formatFastq
 from _fastq import fastqParserGenetator
 from obitools.utils import universalOpen
 import re 
 fastqEntryIterator=genericEntryIteratorGenerator(startEntry='^@',endEntry="^\+",strip=True,join=False)
 #def fastqParserGenetator(fastqvariant='sanger',bioseqfactory=NucSequence,tagparser=_parseFastaTag):
 #    
 #    qualityDecoder,errorDecoder = {'sanger'   : (fastqQualitySangerDecoder,qualityToSangerError),
 #                                   'solexa'   : (fastqQualitySolexaDecoder,qualityToSolexaError),
 #                                   'illumina' : (fastqQualitySolexaDecoder,qualityToSangerError)}[fastqvariant]
 #
 #    def fastqParser(seq):
 #        '''
 #        Parse a fasta record.
 #        
 #        @attention: internal purpose function
 #        
 #        @param seq: a sequence object containing all lines corresponding
 #                    to one fasta sequence
 #        @type seq: C{list} or C{tuple} of C{str}
 #        
 #        @param bioseqfactory: a callable object return a BioSequence
 #                              instance.
 #        @type bioseqfactory: a callable object
 #        
 #        @param tagparser: a compiled regular expression usable
 #                          to identify key, value couples from 
 #                          title line.
 #        @type tagparser: regex instance
 #        
 #        @return: a C{BioSequence} instance   
 #        '''
 #        
 #        title = seq[0][1:].split(None,1)
 #        id=title[0]
 #        if len(title) == 2:
 #            definition,info=parseFastaDescription(title[1], tagparser)
 #        else:
 #            info= {}
 #            definition=None
 #        
 #        quality=errorDecoder(qualityDecoder(seq[3]))
 #    
 #        seq=seq[1]
 #        
 #        seq = bioseqfactory(id, seq, definition,False,**info)
 #        seq.quality = quality
 #        
 #        return seq
 #    
 #    return fastqParser
 def fastqIterator(file,fastqvariant='sanger',bioseqfactory=NucSequence,tagparser=_default_raw_parser):
    '''
    iterate through a fasta file sequence by sequence.
    Returned sequences by this iterator will be BioSequence
    instances
    @param file: a line iterator containing fasta data or a filename
    @type file:  an iterable object or str
    @param bioseqfactory: a callable object return a BioSequence
                          instance.
    @type bioseqfactory: a callable object
    @param tagparser: a compiled regular expression usable
                      to identify key, value couples from 
                      title line.
    @type tagparser: regex instance
    @return: an iterator on C{BioSequence} instance
    @see: L{fastaNucIterator}
    @see: L{fastaAAIterator}
    '''
    fastqParser=fastqParserGenetator(fastqvariant, bioseqfactory, tagparser)
    file = universalOpen(file)
    for entry in fastqEntryIterator(file):
        title=entry[0]
        seq="".join(entry[1:-1])
        quality=''
        lenseq=len(seq)
        while (len(quality) < lenseq):
            quality+=file.next().strip()
        yield fastqParser([title,seq,'+',quality])
 def fastqSangerIterator(file,tagparser=_default_raw_parser):
    '''
    iterate through a fastq file sequence by sequence.
    Returned sequences by this iterator will be NucSequence
    instances
    @param file: a line iterator containint fasta data
    @type file: an iterable object
    @param tagparser: a compiled regular expression usable
                      to identify key, value couples from 
                      title line.
    @type tagparser: regex instance
    @return: an iterator on C{NucBioSequence} instance
    @see: L{fastqIterator}
    @see: L{fastqAAIterator}
    '''
    return fastqIterator(file,'sanger',NucSequence,tagparser)
 def fastqSolexaIterator(file,tagparser=_default_raw_parser):
    '''
    iterate through a fastq file sequence by sequence.
    Returned sequences by this iterator will be NucSequence
    instances
    @param file: a line iterator containint fasta data
    @type file: an iterable object
    @param tagparser: a compiled regular expression usable
                      to identify key, value couples from 
                      title line.
    @type tagparser: regex instance
    @return: an iterator on C{NucBioSequence} instance
    @see: L{fastqIterator}
    @see: L{fastqAAIterator}
    '''
    return fastqIterator(file,'solexa',NucSequence,tagparser)
 def fastqIlluminaIterator(file,tagparser=_default_raw_parser):
    '''
    iterate through a fastq file sequence by sequence.
    Returned sequences by this iterator will be NucSequence
    instances
    @param file: a line iterator containint fasta data
    @type file: an iterable object
    @param tagparser: a compiled regular expression usable
                      to identify key, value couples from 
                      title line.
    @type tagparser: regex instance
    @return: an iterator on C{NucBioSequence} instance
    @see: L{fastqIterator}
    @see: L{fastqAAIterator}
    '''
    return fastqIterator(file,'illumina',NucSequence,tagparser)
 def fastqAAIterator(file,tagparser=_default_raw_parser):
    '''
    iterate through a fastq file sequence by sequence.
    Returned sequences by this iterator will be AASequence
    instances
    @param file: a line iterator containing fasta data
    @type file: an iterable object
    @param tagparser: a compiled regular expression usable
                      to identify key, value couples from 
                      title line.
    @type tagparser: regex instance
    @return: an iterator on C{AABioSequence} instance
    @see: L{fastqIterator}
    @see: L{fastqNucIterator}
    '''
    return fastqIterator(file,'sanger',AASequence,tagparser)
--- a/obitools/fastq/_fastq.so
+++ b/obitools/fastq/_fastq.so
--- a/obitools/fnaqual/init.py
+++ b/obitools/fnaqual/init.py
@ -1,2 +0,0 @@
 fnaTag=' %s *= *([^\s]+)'
--- a/obitools/fnaqual/fasta.py
+++ b/obitools/fnaqual/fasta.py
@ -1,8 +0,0 @@
 from obitools.fasta import fastaNucIterator
 from obitools.fnaqual import fnaTag
 def fnaFastaIterator(file):
    x = fastaNucIterator(file, fnaTag)
    return x
--- a/obitools/fnaqual/quality.py
+++ b/obitools/fnaqual/quality.py
@ -1,137 +0,0 @@
 """
 """
 from obitools import _default_raw_parser
 from obitools.fasta import fastaIterator
 from obitools.fnaqual import fnaTag
 from obitools.location import Location
 import re
 class QualitySequence(list):
    def __init__(self,id,seq,definition=None,rawinfo=None,rawparser=_default_raw_parser,**info):
        '''
        @param id:
        @param seq:
        @param definition:
        '''
        list.__init__(self,seq)
        self._info = info
        self.definition=definition
        self.id=id
        self._rawinfo=' ' + rawinfo
        self._rawparser=rawparser
    def getDefinition(self):
        '''
        Sequence definition getter
            @return: the sequence definition
            @rtype: str
        '''
        return self._definition
    def setDefinition(self, value):
        self._definition = value
    def getId(self):
        return self._id
    def setId(self, value):
        self._id = value
    def getKey(self,key):
        if key not in self._info:
            p = re.compile(self._rawparser % key)
            m = p.search(self._rawinfo)
            if m is not None:
                v=m.group(1)
                self._rawinfo=' ' + self._rawinfo[0:m.start(0)]+self._rawinfo[m.end(0):]
                try:
                    v = eval(v)
                except:
                    pass
                self._info[key]=v
            else:
                raise KeyError,key
        else:
            v=self._info[key]
        return v
    def __getitem__(self,key):
        if isinstance(key,Location):
            return key.extractSequence(self)
        elif isinstance(key, str):
            return self._getKey(key)
        elif isinstance(key, int):
            return list.__getitem__(self,key)
        elif isinstance(key, slice):
            subseq=list.__getitem__(self,key)
            info = dict(self._info)
            if key.start is not None:
                start = key.start +1
            else:
                start = 1
            if key.stop is not None:
                stop = key.stop+1
            else:
                stop = len(self)
            if key.step is not None:
                step = key.step
            else:
                step = 1
            info['cut']='[%d,%d,%s]' % (start,stop,step)
            return QualitySequence(self.id, subseq, self.definition,self._rawinfo,self._rawparser,**info)
        raise TypeError,'key must be an integer, a str or a slice'  
    def __setitem__(self,key,value):
        self._info[key]=value
    def __delitem__(self,key):
        if isinstance(key, str):
            del self._info[key]
        else:
            raise TypeError,key
    def __iter__(self):
        return list.__iter__(self)
    def __contains__(self,key):
        return key in self._info
    def getTags(self):
        return self._info
    def complement(self):
        '''
        '''
        cseq = self[::-1]
        rep = QualitySequence(self.id,cseq,self.definition,self._rawinfo,self._rawparser,**self._info)
        rep._info['complemented']=not rep._info.get('complemented',False)
        return rep
    definition = property(getDefinition, setDefinition, None, "Sequence Definition")
    id = property(getId, setId, None, 'Sequence identifier')
 def _qualityJoinSeq(seqarray):
    text =  ' '.join([x.strip() for x in seqarray])
    return [int(x) for x in text.split()]
 def qualityIterator(file):
    for q in fastaIterator(file, QualitySequence, fnaTag, _qualityJoinSeq):
        yield q
--- a/obitools/format/init.py
+++ b/obitools/format/init.py
@ -1,28 +0,0 @@
 from obitools import bioSeqGenerator
 from obitools.utils import universalOpen
 class SequenceFileIterator:
    def __init__(self,inputfile,bioseqfactory=bioSeqGenerator):
        self._inputfile = universalOpen(inputfile)
        self._bioseqfactory = bioseqfactory
    def get_inputfile(self):
        return self.__file
    def get_bioseqfactory(self):
        return self.__bioseqfactory
    def next(self):
        entry = self.inputfile.next()
        return self._parse(entry)
    def __iter__(self):
        return self
    _inputfile = property(get_inputfile, None, None, "_file's docstring")
    _bioseqfactory = property(get_bioseqfactory, None, None, "_bioseqfactory's docstring")
--- a/obitools/format/_format.so
+++ b/obitools/format/_format.so
--- a/obitools/format/genericparser/init.py
+++ b/obitools/format/genericparser/init.py
@ -1,217 +0,0 @@
 """
 G{packagetree format}
 """
 import re
 from obitools.utils import universalOpen
 def genericEntryIteratorGenerator(startEntry=None,endEntry=None,
                                  head=False,tail=False,
                                  strip=False,join=True):
    '''
    Transfome a text line iterator to an entry oriented iterator.
    This iterator converted is useful to implement first stage
    of flat file parsing.
    @param startEntry: a regular pattern matching the beginning of
                       an entry
    @type startEntry: C{str} or None
    @param endEntry:   a regular pattern matching the end of
                       an entry
    @type endEntry: C{str} or None
    @param head:       indicate if an header is present before
                       the first entry (as in many original genbank
                       files)
    @type head: C{bool}
    @param tail:       indicate if some extra informations are present 
                       after the last entry.
    @type tail: C{bool}
    @return: an iterator on entries in text format
    @rtype: an iterator on C{str}
    '''
    def isBeginning(line):
        return startEntry is None or startEntry.match(line) is not None
    def isEnding(line):
        return ((endEntry is not None and endEntry.match(line) is not None) or
                (endEntry is None and startEntry is not None and startEntry.match(line) is not None))
    def transparentIteratorEntry(file):
        file = universalOpen(file)
        return file
    def genericEntryIterator(file):
        file = universalOpen(file)
        entry = []
        line = file.next()
        started = head or isBeginning(line)
        try:
            while 1:
                while not started:
                    line = file.next()
                    started = isBeginning(line)
                if endEntry is None:
                    entry.append(line)
                    line = file.next()
                while started:
                    end = isEnding(line)
                    if end:
                        if endEntry is not None:
                            entry.append(line)
                        if join:
                            e = ''.join(entry)
                            if strip:
                                e=e.strip()
                        else:
                            e=entry
                            if strip:
                                e=[x.strip() for x in e]
                        entry=[]
                        yield e
                        started=False
                        if endEntry is not None:
                            line = file.next()
                    else:
                        entry.append(line)
                        line = file.next()
                started = isBeginning(line) 
        except StopIteration:
            if entry and (endEntry is None or tail):
                if join:
                    e = ''.join(entry)
                    if strip:
                        e=e.strip()
                else:
                    e=entry
                    if strip:
                        e=[x.strip() for x in e]
                yield e
    if startEntry is not None:
        startEntry = re.compile(startEntry)
    if endEntry is not None:
        endEntry = re.compile(endEntry)
    if startEntry is None and endEntry is None:
        return transparentIteratorEntry
    return genericEntryIterator
 class GenericParser(object):
    def __init__(self,
                 startEntry=None,
                 endEntry=None,
                 head=False,
                 tail=False,
                 strip=False,
                 **parseAction):
        """
        @param startEntry: a regular pattern matching the beginning of
                           an entry
        @type startEntry: C{str} or None
        @param endEntry:   a regular pattern matching the end of
                           an entry
        @type endEntry: C{str} or None
        @param head:       indicate if an header is present before
                           the first entry (as in many original genbank
                           files)
        @type head: C{bool}
        @param tail:       indicate if some extra informations are present 
                           after the last entry.
        @type tail: C{bool}
        @param parseAction:  
        """
        self.flatiterator= genericEntryIteratorGenerator(startEntry, 
                                                         endEntry, 
                                                         head, 
                                                         tail,
                                                         strip)
        self.action={}
        for k in parseAction:
            self.addParseAction(k,*parseAction[k])
    def addParseAction(self,name,dataMatcher,dataCleaner=None,cleanSub=''):
        '''
        Add a parse action to the generic parser. A parse action
        allows to extract one information from an entry. A parse
        action is defined by a name and a method to extract this 
        information from the full text entry.
        A parse action can be defined following two ways.
            - via regular expression patterns
            - via dedicated function.
        In the first case, you have to indicate at least the
        dataMatcher regular pattern. This pattern should match exactly
        the data part you want to retrieve. If cleanning of extra 
        characters is needed. The second pattern dataCLeanner can be
        used to specifyed these characters.
        In the second case you must provide a callable object (function)
        that extract and clean data from the text entry. This function
        should return an array containing all data retrevied even if 
        no data or only one data is retrevied.
        @summary: Add a parse action to the generic parser.
        @param name: name of the data extracted
        @type name:    C{str}
        @param dataMatcher: a regular pattern matching the data
                            or a callable object parsing the
                            entry and returning a list of marched data
        @type dataMatcher:  C{str} or C{SRE_Pattern} instance or a callable 
                            object
        @param dataCleaner: a regular pattern matching part of the data
                            to suppress.
        @type dataCleaner: C{str} or C{SRE_Pattern} instance or C{None}
        @param cleanSub: string used to replace dataCleaner matches.
                         Default is an empty string
        @type cleanSub: C{str}
        '''
        if callable(dataMatcher):
            self.action[name]=dataMatcher
        else :
            if isinstance(dataMatcher, str):
                dataMatcher=re.compile(dataMatcher)
            if isinstance(dataCleaner, str):
                dataCleaner=re.compile(dataCleaner)
            self.action[name]=self._buildREParser(dataMatcher,
                                                 dataCleaner,
                                                 cleanSub)
    def _buildREParser(self,dataMatcher,dataCleaner,cleanSub):
        def parser(data):
            x = dataMatcher.findall(data)
            if dataCleaner is not None:
                x = [dataCleaner.sub(cleanSub,y) for y in x]
            return x
        return parser
    def __call__(self,file):
        for e in self.flatiterator(file):
            pe = {'fullentry':e}
            for k in self.action:
                pe[k]=self.action[k](e)
            yield pe
--- a/obitools/format/ontology/init.py
+++ b/obitools/format/ontology/init.py
--- a/obitools/format/ontology/go_obo.py
+++ b/obitools/format/ontology/go_obo.py
@ -1,274 +0,0 @@
 __docformat__ = 'restructuredtext'
 import re
 import string
 import textwrap
 from obitools.obo.go.parser import GOEntryIterator
 from obitools.obo.go.parser import GOTerm
 from obitools.obo.go.parser import GOEntry
 """
 go_obo.py : gene_ontology_edit.obo  file parser:
 ----------------------------------------------------
 - OBOFile class: open a flat file and return an entry.
 """
 class OBOFile(object):
    """
        Iterator over all entries of an OBO file
    """
    def __init__(self,_path):
        self.file = GOEntryIterator(_path)
    def __iter__(self):
        return self
    def next(self):
        fiche = self.file.next()
        if isinstance(fiche, GOTerm):
            self.isaterm=True
            return Term(fiche)
        elif isinstance(fiche, GOEntry):
            self.isaterm=False
            return Entry(fiche)
        else:
            self.isaterm=False
            return Header(fiche)
 ############# tout le reste doit descendre a l'etage obitools/ogo/go/parser.py ##########
 # define an XRef into a go_obo.py script in the microbi pylib
 class Xref(object):
    """
    Class Xref
        Xref.db    Xref database
        Xref.id    Xref identifier
    """
    def __init__(self,description):
        data = description.split(':')
        self.db = data[0].strip()
        self.id = data[1].strip()
 # define a RelatedTerm into a go_obo.py script in the microbi pylib
 class RelatedTerm(object):
    """
    Class RelatedTerm
        RelatedTerm.relation    RelatedTerm relation
        RelatedTerm.related_term    RelatedTerm GO identifier
        RelatedTerm.comment    all terms have 0 or 1 comment
    """
    def __init__(self,relation,value,comment):
        self.relation = relation
        self.related_term = value.strip('GO:')
        self.comment = comment
 # define into a go_obo.py script in the microbi pylib
 #class Term(object):
 #    """
 #    class representing an OBO term (entry).
 #    """
 #
 #    def __init__(self):
 #      raise RuntimeError('biodb.go_obo is an abstract class')
 #
 #    def __checkEntry__(self):
 #      minimum=(hasattr(self,'goid') )
 #      if not minimum:
 #        raise AssertionError('Misconstructed GO Term instance %s' % [x for x in dir(self) if x[0]!='_'])
 class Term(object):
    """
    Class Term
        representing a GO term.
    """
    def __init__(self,data=None):
        """
        """
        self.data=data
        self.isaterm = True
        if data:
            self.__filtreGoid__()
            self.__filtreName__()
            self.__filtreComment__()
            self.__filtreSynonyms__()
            self.__filtreDef__()
            self.__filtreParents__()
            self.__filtreRelationships__()
            self.__filtreRelation__()
            self.__filtreObsolete__()
            self.__filtreAltIds__()
            self.__filtreXRefs__()
            self.__filtreSubsets__()
        # check if all required attributes were valued
        self.__checkEntry__()
    def __checkEntry__(self):
      minimum=(hasattr(self,'goid') )
      if not minimum:
        raise AssertionError('Misconstructed GO Term instance %s' % [x for x in dir(self) if x[0]!='_'])
    def __filtreGoid__(self):
        """
        Extract GO id.
        """
        self.goid = self.data.id.value.strip('GO:')
    def __filtreName__(self):
        """
        Extract GO name.
        """
        self.name = self.data.name.value
    def __filtreSynonyms__(self):
        """
        Extract GO synonym(s).
        """
        self.list_synonyms = {}
        if self.data.synonyms:
            for y in self.data.synonyms:
                self.list_synonyms[y.value] = y.scope
    def __filtreComment__(self):
        """
            manage None comments
        """
        if self.data.comment != None:
            self.comment = self.data.comment.value
        else:
            self.comment = ""
    def __filtreDef__(self):
        """
            Extract GO definition.
        """
        if self.data.definition != None:
            self.definition = self.data.definition.value
        else:
            self.definition = ""
    def __filtreParents__(self):
        """
            To make the is_a hierarchy
        """
        if self.data.is_a != None:
            self.is_a = set([isa.value.strip('GO:') for isa in self.data.is_a])
        else:
            self.is_a = set()    
    def __filtreRelation__(self):
        """
            To make the part_of hierarchy
        """
        self.part_of = set()
        self.regulates = set()
        self.negatively_regulates = set()
        self.positively_regulates = set()
        if self.data.relationship != None:
            for rel in self.data.relationship:
                if rel.relationship == "part_of":
                    self.part_of.add(rel.value.strip('GO:'))
                elif rel.relationship == "regulates":
                    self.regulates.add(rel.value.strip('GO:'))
                elif rel.relationship == "negatively_regulates":
                    self.negatively_regulates.add(rel.value.strip('GO:'))
                elif rel.relationship == "positively_regulates":
                    self.positively_regulates.add(rel.value.strip('GO:'))
    def __filtreRelationships__(self):
        """
            Relation list with other GO Terms (is_a, part_of or some regulates relation)
        """
        self.related_term =[]
        if self.data.relationship != None:
            for x in self.data.relationship:
                self.related_term.append(RelatedTerm(x.relationship,x.value,x.__doc__))
                #self.related_term.append(RelatedTerm(x.relationship,x.value,x.comment))
        if self.data.is_a != None:
            for x in self.data.is_a:
                self.related_term.append(RelatedTerm('is_a',x.value,x.__doc__))
                #self.related_term.append(RelatedTerm('is_a',x.value,x.comment))
    def __filtreObsolete__(self):
        """
            for each obsolete terms corresponds a set of GO Identifiers
            so that this GO term is consider as others GO Terms
        """
        self.considers = set()
        self.replaces = set()
        self.is_obsolete = self.data.is_obsolete
        if self.data.is_obsolete:
            if self.data.consider:
                self.considers = set([considered.value.strip('GO:') for considered in self.data.consider])
            if self.data.replaced_by:
                self.replaces = set([replaced.value.strip('GO:') for replaced in self.data.replaced_by])
    def __filtreAltIds__(self):
        """
            alternate(s) id(s) for this term (= alias in the geneontology schema model!)
        """
        if self.data.alt_ids:
            self.alt_ids = set([x.value.strip('GO:') for x in self.data.alt_ids])
        else:
            self.alt_ids = set()
    def __filtreXRefs__(self):
        """
            cross references to other databases
        """
        self.xrefs = set()
        if self.data.xrefs:    
            self.xrefs = set([Xref(x.value.reference) for x in self.data.xrefs])
    def __filtreSubsets__(self):
        """
            subset label to make smaller sets of GO Terms
        """
        self.subsets = set()
        if self.data.subsets:
            self.subsets = set([x.value for x in self.data.subsets])
 class Entry(object):
    """
        a Stanza entry, like [Typedef] for example
    """
    def __init__(self,data=None):
        self.data=data
        self.isaterm=False
        self.isanentry=True
 class Header(object):
    """
        class representing a GO header.
    """
    def __init__(self,data=None):
        """
        """
        self.data=data
        self.isaterm = False
--- a/obitools/format/options.py
+++ b/obitools/format/options.py
@ -1,284 +0,0 @@
 '''
 Created on 13 oct. 2009
@author: coissac
 '''
 from obitools.format.sequence.embl import emblIterator
 from obitools.format.sequence.genbank import genbankIterator
 from obitools.format.sequence.fnaqual import fnaFastaIterator
 from obitools.format.sequence.fasta import fastaAAIterator,fastaNucIterator,fastaIterator
 from obitools.format.sequence.fastq import fastqIlluminaIterator,fastqSolexaIterator
 from obitools.fastq import fastqSangerIterator
 from obitools.fnaqual.quality import qualityIterator
 from obitools.fasta import formatFasta, rawFastaIterator,\
    formatSAPFastaGenerator
 from obitools.fastq import formatFastq
 from obitools.ecopcr.sequence import EcoPCRDBSequenceWriter
 from obitools.ecopcr.options  import loadTaxonomyDatabase
 #from obitools.format._format import printOutput
 from array import array
 from itertools import chain
 import sys
 import re
 from obitools.ecopcr import EcoPCRFile
 def addInputFormatOption(optionManager):
 #    optionManager.add_option('--rank',
 #                             action="store_true", dest='addrank',
 #                             default=False,
 #                             help="add a rank attribute to the sequence "
 #                                  "indicating the sequence position in the input data")
    optionManager.add_option('--genbank',
                             action="store_const", dest="seqinformat",
                             default=None,
                             const='genbank',
                             help="input file is in genbank format")
    optionManager.add_option('--embl',
                             action="store_const", dest="seqinformat",
                             default=None,
                             const='embl',
                             help="input file is in embl format")
    optionManager.add_option('--fasta',
                             action="store_const", dest="seqinformat",
                             default=None,
                             const='fasta',
                             help="input file is in fasta nucleic format (including obitools fasta extentions)")
    optionManager.add_option('--ecopcr',
                             action="store_const", dest="seqinformat",
                             default=None,
                             const='ecopcr',
                             help="input file is in fasta nucleic format (including obitools fasta extentions)")
    optionManager.add_option('--raw-fasta',
                             action="store_const", dest="seqinformat",
                             default=None,
                             const='rawfasta',
                             help="input file is in fasta format (but more tolerant to format variant)")
    optionManager.add_option('--fna',
                             action="store_const", dest="seqinformat",
                             default=None,
                             const='fna',
                             help="input file is in fasta nucleic format produced by 454 sequencer pipeline")
    optionManager.add_option('--qual',
                             action="store", dest="withqualfile",
                             type='str',
                             default=None,
                             help="Specify the name of a quality file produced by 454 sequencer pipeline")
    optionManager.add_option('--sanger',
                             action="store_const", dest="seqinformat",
                             default=None,
                             const='sanger',
                             help="input file is in sanger fastq nucleic format (standard fastq)")
    optionManager.add_option('--solexa',
                             action="store_const", dest="seqinformat",
                             default=None,
                             const='solexa',
                             help="input file is in fastq nucleic format produced by solexa sequencer")
    optionManager.add_option('--illumina',
                             action="store_const", dest="seqinformat",
                             default=None,
                             const='illumina',
                             help="input file is in fastq nucleic format produced by old solexa sequencer")
    optionManager.add_option('--nuc',
                             action="store_const", dest="moltype",
                             default=None,
                             const='nuc',
                             help="input file is nucleic sequences")
    optionManager.add_option('--prot',
                             action="store_const", dest="moltype",
                             default=None,
                             const='pep',
                             help="input file is protein sequences")
 def addOutputFormatOption(optionManager):
    optionManager.add_option('--fastq-output',
                             action="store_const", dest="output",
                             default=None,
                             const=formatFastq,
                             help="output sequences in sanger fastq format")
    optionManager.add_option('--fasta-output',
                             action="store_const", dest="output",
                             default=None,
                             const=formatFasta,
                             help="output sequences in obitools fasta format")
    optionManager.add_option('--sap-output',
                             action="store_const", dest="output",
                             default=None,
                             const=formatSAPFastaGenerator,
                             help="output sequences in sap fasta format")
    optionManager.add_option('--strict-sap',
                             action='store_true',dest='strictsap',
                             default=False,
                             help="Print sequences in upper case (defualt is lower case)")
    optionManager.add_option('--ecopcr-output',
                             action="store", dest="ecopcroutput",
                             default=None,
                             help="output sequences in obitools ecopcr format")
    optionManager.add_option('--uppercase',
                             action='store_true',dest='uppercase',
                             default=False,
                             help="Print sequences in upper case (defualt is lower case)")
 def addInOutputOption(optionManager):
    addInputFormatOption(optionManager)
    addOutputFormatOption(optionManager)
 def autoEntriesIterator(options):
    options.outputFormater=formatFasta
    options.outputFormat="fasta"
    ecopcr_pattern = re.compile('^[^ ]+ +| +[0-9]+ +| + [0-9]+ + | +')
    def annotatedIterator(formatIterator):
        options.outputFormater=formatFasta
        options.outputFormat="fasta"
        def iterator(lineiterator):
            for s in formatIterator(lineiterator):
                s.extractTaxon()
                yield s
        return iterator
    def withQualIterator(qualityfile):
        options.outputFormater=formatFastq
        options.outputFormat="fastq"
        def iterator(lineiterator):
            for s in fnaFastaIterator(lineiterator):
                q = qualityfile.next()
                quality = array('d',(10.**(-x/10.) for x in q))
                s.quality=quality
                yield s
        return iterator
    def autoSequenceIterator(lineiterator):
        options.outputFormater=formatFasta
        options.outputFormat="fasta"
        first = lineiterator.next()
        if first[0]==">":
            if options.withqualfile is not None:
                qualfile=qualityIterator(options.withqualfile)
                reader=withQualIterator(qualfile)
                options.outputFormater=formatFastq
                options.outputFormat="fastq"
            elif options.moltype=='nuc':
                reader=fastaNucIterator
            elif options.moltype=='pep':
                reader=fastaAAIterator
            else:
                reader=fastaIterator
        elif first[0]=='@':
            reader=fastqSangerIterator
            options.outputFormater=formatFastq
            options.outputFormat="fastq"
        elif first[0:3]=='ID ':
            reader=emblIterator
        elif first[0:6]=='LOCUS ':
            reader=genbankIterator
        elif first[0]=="#" or ecopcr_pattern.search(first):
            reader=EcoPCRFile 
        else:
            raise AssertionError,'file is not in fasta, fasta, embl, genbank or ecoPCR format'
        input = reader(chain([first],lineiterator))
        return input
    if options.seqinformat is None:
        reader = autoSequenceIterator
    else:
        if options.seqinformat=='fasta':
            if options.moltype=='nuc':
                reader=fastaNucIterator
            elif options.moltype=='pep':
                reader=fastaAAIterator
            else:
                reader=fastaIterator
        elif options.seqinformat=='rawfasta':
            reader=annotatedIterator(rawFastaIterator)
        elif options.seqinformat=='genbank':
            reader=annotatedIterator(genbankIterator)
        elif options.seqinformat=='embl':
            reader=annotatedIterator(emblIterator)
        elif options.seqinformat=='fna':
            reader=fnaFastaIterator
        elif options.seqinformat=='sanger':
            options.outputFormater=formatFastq
            options.outputFormat="fastq"
            reader=fastqSangerIterator
        elif options.seqinformat=='solexa':
            options.outputFormater=formatFastq
            options.outputFormat="fastq"
            reader=fastqSolexaIterator
        elif options.seqinformat=='illumina':
            options.outputFormater=formatFastq
            options.outputFormat="fastq"
            reader=fastqIlluminaIterator
        elif options.seqinformat=='ecopcr':
            reader=EcoPCRFile
        if options.seqinformat=='fna' and options.withqualfile is not None:
            qualfile=qualityIterator(options.withqualfile)
            reader=withQualIterator(qualfile)
            options.outputFormater=formatFastq
            options.outputFormat="fastq"
 #    if options.addrank:
 #        reader = withRankIterator(reader)
    return reader
 def sequenceWriterGenerator(options,output=sys.stdout):
    class SequenceWriter:
        def __init__(self,options,file=sys.stdout):
            self._format=None
            self._file=file
            self._upper=options.uppercase
        def put(self,seq):
            if self._format is None:
                self._format=formatFasta
                if options.output is not None:
                    self._format=options.output
                    if self._format is formatSAPFastaGenerator:
                        self._format=formatSAPFastaGenerator(options)
                elif options.outputFormater is not None:
                    self._format=options.outputFormater
            s = self._format(seq,upper=self._upper)
            try:
                self._file.write(s)
                self._file.write("\n")
            except IOError:
                sys.exit(0)
    if options.ecopcroutput is not None:
        taxo = loadTaxonomyDatabase(options)
        writer=EcoPCRDBSequenceWriter(options.ecopcroutput,taxonomy=taxo)
    else:
        writer=SequenceWriter(options,output)
    def sequenceWriter(sequence):
        writer.put(sequence)
    return sequenceWriter
--- a/obitools/format/sequence/init.py
+++ b/obitools/format/sequence/init.py
@ -1,24 +0,0 @@
 from obitools.fasta import fastaIterator
 from obitools.fastq import fastqSangerIterator
 from obitools.seqdb.embl.parser import emblIterator
 from obitools.seqdb.genbank.parser import genbankIterator
 from itertools import chain
 from obitools.utils import universalOpen
 def autoSequenceIterator(file):
    lineiterator = universalOpen(file)
    first = lineiterator.next()
    if first[0]==">":
            reader=fastaIterator
    elif first[0]=='@':
        reader=fastqSangerIterator
    elif first[0:3]=='ID ':
        reader=emblIterator
    elif first[0:6]=='LOCUS ':
        reader=genbankIterator
    else:
        raise AssertionError,'file is not in fasta, fasta, embl, or genbank format'
    input = reader(chain([first],lineiterator))
    return input
--- a/obitools/format/sequence/embl.py
+++ b/obitools/format/sequence/embl.py
@ -1,2 +0,0 @@
 from obitools.seqdb.embl.parser import emblIterator,emblParser
--- a/obitools/format/sequence/fasta.py
+++ b/obitools/format/sequence/fasta.py
@ -1,4 +0,0 @@
 from obitools.fasta import fastaIterator,fastaParser
 from obitools.fasta import fastaAAIterator,fastaAAParser
 from obitools.fasta import fastaNucIterator,fastaNucParser
 from obitools.fasta import formatFasta
--- a/obitools/format/sequence/fastq.py
+++ b/obitools/format/sequence/fastq.py
@ -1,13 +0,0 @@
 '''
 Created on 15 janv. 2010
@author: coissac
 '''
 from obitools.fastq import fastqIterator,fastqParserGenetator
 from obitools.fastq import fastqSangerIterator,fastqSolexaIterator, \
                           fastqIlluminaIterator
 from obitools.fastq import fastqAAIterator
 from obitools.fastq import formatFastq
--- a/obitools/format/sequence/fnaqual.py
+++ b/obitools/format/sequence/fnaqual.py
@ -1,8 +0,0 @@
 '''
 Created on 12 oct. 2009
@author: coissac
 '''
 from obitools.fnaqual.fasta import fnaFastaIterator
 from obitools.fnaqual.quality import qualityIterator
--- a/obitools/format/sequence/genbank.py
+++ b/obitools/format/sequence/genbank.py
@ -1,4 +0,0 @@
 from obitools.seqdb.genbank.parser import genpepIterator,genpepParser
 from obitools.seqdb.genbank.parser import genbankIterator,genbankParser
--- a/obitools/format/sequence/tagmatcher.py
+++ b/obitools/format/sequence/tagmatcher.py
@ -1,5 +0,0 @@
 from obitools.tagmatcher.parser import tagMatcherParser
 from obitools.tagmatcher.parser import TagMatcherIterator
 from obitools.tagmatcher.parser import formatTagMatcher
 tagMatcherIterator=TagMatcherIterator
--- a/obitools/goa/init.py
+++ b/obitools/goa/init.py
--- a/obitools/goa/parser.py
+++ b/obitools/goa/parser.py
@ -1,33 +0,0 @@
 from itertools import imap
 from obitools import utils
 class GoAFileIterator(utils.ColumnFile):
    def __init__(self,stream):
        utils.ColumnFile.__init__(self,
                                  stream, '\t', True, 
                                  (str,))
    _colname = ['database',
                'ac',
                'symbol',
                'qualifier',
                'goid',
                'origin',
                'evidence',
                'evidnce_origine',
                'namespace',
                'db_object_name',
                'gene',
                'object_type',
                'taxid',
                'date',
                'assigned_by']
    def next(self):
        data = utils.ColumnFile.next(self)
        data = dict(imap(None,GoAFileIterator._colname,data))
        return data
--- a/obitools/graph/init.py
+++ b/obitools/graph/init.py
@ -1,962 +0,0 @@
 '''
 **obitool.graph** for representing graph structure in obitools
 --------------------------------------------------------------
 .. codeauthor:: Eric Coissac <eric.coissac@metabarcoding.org>
 This module offert classes to manipulate graphs, mainly trough the
 :py:class:`obitools.graph.Graph` class.
 .. inheritance-diagram:: Graph DiGraph UndirectedGraph
    :parts: 2
 '''
 import sys
 from obitools.utils import progressBar
 class Indexer(dict):
    '''
    Allow to manage convertion between an arbitrarly hashable python
    value and an unique integer key 
    '''
    def __init__(self):
        self.__max=0
        self.__reverse=[]
    def getLabel(self,index):
        '''
        Return the python value associated to an integer index.
        :param index: an index value
        :type index: int
        :raises: IndexError if the index is not used in this 
                           Indexer instance
        '''
        return self.__reverse[index]
    def getIndex(self,key,strict=False):
        '''
        Return the index associated to a **key** in the indexer. Two
        modes are available :
            - strict mode  :
                if the key is not known by the :py:class:`Indexer` instance
                a :py:exc:`KeyError` exception is raised.
            - non strict mode :
                in this mode if the requested *key** is absent, it is added to
                the :py:class:`Indexer` instance and the new index is returned
        :param key: the requested key
        :type key: a hashable python value
        :param strict: select the looking for mode
        :type strict: bool
        :return: the index corresponding to the key
        :rtype: int
        :raises: - :py:exc:`KeyError` in strict mode is key is absent 
                   of the :py:class:`Indexer` instance
                 - :py:exc:`TypeError` if key is not an hashable value.
        '''
        if dict.__contains__(self,key):
            return dict.__getitem__(self,key)
        elif strict:
            raise KeyError,key
        else:
            value = self.__max
            self[key]= value
            self.__reverse.append(key)
            self.__max+=1
            return value
    def __getitem__(self,key):
        '''
        Implement the [] operateor to emulate the standard dictionnary
        behaviour on :py:class:`Indexer` and returns the integer key 
        associated to a python value.
        Actually this method call the:py:meth:`getIndex` method in
        non strict mode so it only raises an :py:exc:`TypeError` 
        if key is not an hashable value.
        :param key: the value to index
        :type key: an hashable python value
        :return: an unique integer value associated to the key
        :rtype: int
        :raises: :py:exc:`TypeError` if **key** is not an hashable value.
        '''
        return self.getIndex(key)
    def __equal__(self,index):
        '''
        Implement equal  operator **==** for comparing two :py:class:`Indexer` instances. 
        Two :py:class:`Indexer` instances are equals only if they are physically 
        the same instance
        :param index: the second Indexer
        :type index: an :py:class:`Indexer` instance
        :return: True is the two :py:class:`Indexer` instances are the same
        :rtype: bool
        '''
        return id(self)==id(index)
 class Graph(object):
    '''
    Class used to represent directed or undirected graph.
    .. warning::
        Only one edge can connect two nodes in a given direction.
    .. warning::
        Specifying nodes through their index seepud your code but as no check
        is done on index value, it may result in inconsistency. So prefer the
        use of node label to specify a node.
    '''
    def __init__(self,label='G',directed=False,indexer=None,nodes=None,edges=None):
        '''
        :param label: Graph name, set to 'G' by default
        :type label: str
        :param directed: true for directed graph, set to False by defalt
        :type directed: boolean
        :param indexer: node label indexer. This allows to define several graphs
                        sharing the same indexer (see : :py:meth:`newEmpty`)
        :type indexer: :py:class:`Indexer` 
        :param nodes: set of nodes to add to the graph
        :type nodes: iterable value
        :param edges: set of edges to add to the graph
        :type edges: iterable value
        '''
        self._directed=directed
        if indexer is None:
            indexer = Indexer()
        self._index = indexer
        self._node = {}
        self._node_attrs = {} 
        self._edge_attrs = {}
        self._label=label
    def newEmpty(self):
        """
        Build a new empty graph using the same :py:class:`Indexer` instance.
        This allows two graph for sharing their vertices through their indices.
        """
        n = Graph(self._label+"_compact",self._directed,self._index)
        return n
    def addNode(self,node=None,index=None,**data):
        '''
        Add a new node or update an existing one.
        :param node: the new node label or the label of an existing node
                     for updating it.
        :type node:  an hashable python value
        :param index: the index of an existing node for updating it.
        :type index: int
        :return: the index of the node
        :rtype: int
        :raises: :py:exc:`IndexError` is index is not **None** and 
                 corresponds to a not used index in this graph.
        '''
        if index is None:
            index = self._index[node]
            if index not in self._node:
                self._node[index]=set()
        else:
            if index not in self._node:
                raise IndexError,"This index is not used in this graph"
        if data:
            if index in self._node_attrs:
                self._node_attrs[index].update(data)
            else:
                self._node_attrs[index]=dict(data)
        return index
    def __contains__(self,node):
        try:
            index = self._index.getIndex(node,strict=True)
            r = index in self._node
        except KeyError:
            r=False
        return r
    def getNode(self,node=None,index=None):
        """
        :param node: a node label.
        :type node:  an hashable python value
        :param index: the index of an existing node.
        :type index: int
        .. note:: Index value are prevalent over node label.
        :return: the looked for node
        :rtype: :py:class:`Node`
        :raises: :py:exc:`IndexError` if specified node lablel
                 corresponds to a non-existing node.
        .. warning:: no check on index value
        """
        if index is None:
            index = self._index.getIndex(node, True)
        return Node(index,self)
    def getBestNode(self,estimator):
        '''
        Select the node maximizing the estimator function
        :param estimator: the function to maximize
        :type estimator: a function returning a numerical value and accepting one
                         argument of type :py:class:`Node`
        :return: the best node
        :rtype: py:class:`Node`
        '''
        bestScore=0
        best=None
        for n in self:
            score = estimator(n)
            if best is None or score > bestScore:
                bestScore = score
                best=n
        return best
    def delNode(self,node=None,index=None):
        """
        Delete a node from a graph and all associated edges.
        :param node: a node label.
        :type node:  an hashable python value
        :param index: the index of an existing node.
        :type index: int
        .. note:: Index value are prevalent over node label.
        :raises: :py:exc:`IndexError` if specified node lablel
                 corresponds to a non-existing node.
        .. warning:: no check on index value
        """
        if index is None:
            index = self._index[node]
        for n in self._node:
            if n!=index:
                e = self._node[n]
                if index in e:
                    if (n,index) in self._edge_attrs:
                        del self._edge_attrs[(n,index)]
                    e.remove(index)
        e = self._node[index]
        for n in e:
            if (index,n) in self._edge_attrs:
                del self._edge_attrs[(index,n)]
        del self._node[index]
        if index in self._node_attrs:
            del self._node_attrs[index]
    def addEdge(self,node1=None,node2=None,index1=None,index2=None,**data):
        '''
        Create a new edge in the graph between both the specified nodes.
        .. note:: Nodes can be specified using their label or their index in the graph
        if both values are indicated the index is used.
        :param node1: The first vertex label
        :type node1:  an hashable python value
        :param node2: The second vertex label
        :type node2:  an hashable python value
        :param index1: The first vertex index
        :type index1:  int
        :param index2: The second vertex index
        :type index2:  int
        :raises: :py:exc:`IndexError` if one of both the specified node lablel
                 corresponds to a non-existing node.
        .. warning:: no check on index value
        '''
        index1=self.addNode(node1, index1)
        index2=self.addNode(node2, index2)
        self._node[index1].add(index2)
        if not self._directed:
            self._node[index2].add(index1)
        if data:
            if (index1,index2) not in self._edge_attrs: 
                data =dict(data) 
                self._edge_attrs[(index1,index2)]=data   
                if not self._directed:
                    self._edge_attrs[(index2,index1)]=data
            else:
                self._edge_attrs[(index2,index1)].update(data)
        return (index1,index2)
    def getEdge(self,node1=None,node2=None,index1=None,index2=None):
        '''
        Extract the :py:class:`Edge` instance linking two nodes of the graph. 
        .. note:: Nodes can be specified using their label or their index in the graph
        if both values are indicated the index is used.
        :param node1: The first vertex label
        :type node1:  an hashable python value
        :param node2: The second vertex label
        :type node2:  an hashable python value
        :param index1: The first vertex index
        :type index1:  int
        :param index2: The second vertex index
        :type index2:  int
        :raises: :py:exc:`IndexError` if one of both the specified node lablel
                 corresponds to a non-existing node.
        .. warning:: no check on index value
        '''
        node1=self.getNode(node1, index1)
        node2=self.getNode(node2, index2)
        return Edge(node1,node2)
    def delEdge(self,node1=None,node2=None,index1=None,index2=None):
        """
        Delete the edge linking node 1 to node 2.
        .. note:: Nodes can be specified using their label or their index in the graph
        if both values are indicated the index is used.
        :param node1: The first vertex label
        :type node1:  an hashable python value
        :param node2: The second vertex label
        :type node2:  an hashable python value
        :param index1: The first vertex index
        :type index1:  int
        :param index2: The second vertex index
        :type index2:  int
        :raises: :py:exc:`IndexError` if one of both the specified node lablel
                 corresponds to a non-existing node.
        .. warning:: no check on index value
        """
        if index1 is None:
            index1 = self._index[node1]
        if index2 is None:
            index2 = self._index[node2]
        if index1 in self._node and index2 in self._node[index1]:
            self._node[index1].remove(index2)
            if (index1,index2) in self._node_attrs:
                del self._node_attrs[(index1,index2)]
            if not self._directed:
                self._node[index2].remove(index1)
                if (index2,index1) in self._node_attrs:
                    del self._node_attrs[(index2,index1)]
    def edgeIterator(self,predicate=None):
        """
        Iterate through a set of selected vertices.
        :param predicate: a function allowing node selection. Default value
                          is **None** and indicate that all nodes are selected.
        :type predicate:  a function returning a boolean value
                          and accepting one argument of class :py:class:`Edge`
        :return: an iterator over selected edge 
        :rtype: interator over :py:class:`Edge` instances
        .. seealso::
            function :py:func:`selectEdgeAttributeFactory` for simple predicate.
        """
        for n1 in self._node:
            for n2 in self._node[n1]:
                if self._directed or n1 <= n2:
                    e = self.getEdge(index1=n1, index2=n2) 
                    if predicate is None or predicate(e):
                        yield e
    def nodeIterator(self,predicate=None):
        """
        Iterate through a set of selected vertices.
        :param predicate: a function allowing edge selection. Default value
                          is **None** and indicate that all edges are selected.
        :type predicate:  a function returning a boolean value
                          and accepting one argument of class :py:class:`Node`
        :return: an iterator over selected nodes. 
        :rtype: interator over :py:class:`Node` instances
        """
        for n in self._node:
            node = self.getNode(index=n)
            if predicate is None or predicate(node):
                yield node
    def nodeIndexIterator(self,predicate=None):
        """
        Iterate through the indexes of a set of selected vertices.
        :param predicate: a function allowing edge selection. Default value
                          is **None** and indicate that all edges are selected.
        :type predicate:  a function returning a boolean value
                          and accepting one argument of class :py:class:`Node`
        :return: an iterator over selected node indices. 
        :rtype: interator over `int`
        """
        for n in self._node:
            node = self.getNode(index=n)
            if predicate is None or predicate(node):
                yield n
    def neighbourIndexSet(self,node=None,index=None):
        if index is None:
            index=self.getNode(node).index
        return self._node[index]
    def edgeCount(self):
        n = reduce(lambda x,y:x+y, (len(z) for z in self._node.itervalues()),0)
        if not self._directed:
            n=n/2
        return n
    def subgraph(self,nodes,name='G'):
        sub = Graph(name,self._directed,self._index)
        if not isinstance(nodes, set):
            nodes = set(nodes)
        for n in nodes:
            sub._node[n]=nodes & self._node[n]
            if n in self._node_attrs:
                sub._node_attrs[n]=dict(self._node_attrs[n])
            for n2 in sub._node[n]:
                if not self._directed:
                    if n <= n2:
                        if (n,n2) in self._edge_attrs:
                            data=dict(self._edge_attrs[(n,n2)])
                            sub._edge_attrs[(n,n2)]=data
                            sub._edge_attrs[(n2,n)]=data
                else:
                    if (n,n2) in self._edge_attrs:
                        data=dict(self._edge_attrs[(n,n2)])
                        sub._edge_attrs[(n,n2)]=data
        return sub
    def __len__(self):
        return len(self._node)
    def __getitem__(self,key):
        return self.getNode(node=key)
    def __delitem__(self,key):
        self.delNode(node=key)
    def __iter__(self):
        return self.nodeIterator()
    def __str__(self):
        if self._directed:
            kw ='digraph'
        else:
            kw='graph'
        nodes = "\n    ".join([str(x) for x in self])
        edges = "\n    ".join([str(x) for x in self.edgeIterator()])
        return "%s %s {\n    %s\n\n    %s\n}" % (kw,self._label,nodes,edges)
 class Node(object):
    """
    Class used for representing one node or vertex in a graph
    """
    def __init__(self,index,graph):
        '''        
        .. warning::
            :py:class:`Node` constructor is usualy called through the :py:class:`Graph` methods
        :param index: Index of the node in the graph
        :type index:  int
        :param graph: graph instance owning the node
        :type graph:  :py:class:`obitools.graph.Graph`
        '''
        self.index = index
        self.__graph = graph
    def getGraph(self):
        '''
        return graph owning this node.
        :rtype: :py:class:`obitools.graph.Graph`
        '''
        return self.__graph
    def getLabel(self):
        '''
        return label associated to this node.
        '''
        return self.__graph._index.getLabel(self.index)
    def has_key(self,key):
        '''
        test is the node instance has a property named 'key'.
        :param key: the name of a property
        :type key: str
        :return: True if the nade has a property named <key>
        :rtype: bool
        '''
        if self.index in self.__graph._node_attrs:
            return key in self.__graph._node_attrs[self.index]
        else:
            return False
    def neighbourIterator(self,nodePredicat=None,edgePredicat=None):
        '''
        iterate through the nodes directly connected to
        this node.
        :param nodePredicat: a function accepting one node as parameter
                         and returning **True** if this node must be
                         returned by the iterator.
        :type nodePredicat: function
        :param edgePredicat: a function accepting one edge as parameter
                         and returning True if the edge linking self and
                         the current must be considered.
        :type edgePredicat: function
        :rtype: iterator on Node instances
        '''
        for n in self.neighbourIndexIterator(nodePredicat, edgePredicat):
            node = self.graph.getNode(index=n)
            yield node
    def neighbourIndexSet(self):
        '''
        Return a set of node indexes directely connected
        to this node.
        .. warning:: 
                do not change this set unless you know
                exactly what you do.
        @rtype: set of int
        '''
        return self.__graph._node[self.index]
    def neighbourIndexIterator(self,nodePredicat=None,edgePredicat=None):
        '''
        iterate through the node indexes directly connected to
        this node.
        :param nodePredicat: a function accepting one node as parameter
                         and returning True if this node must be
                         returned by the iterator.
        :type nodePredicat: function
        :param edgePredicat: a function accepting one edge as parameter
                         and returning True if the edge linking self and
                         the current must be considered.
        :type edgePredicat: function
        :rtype: iterator on int
        '''
        for n in self.neighbourIndexSet():
            if nodePredicat is None or nodePredicat(self.__graph.getNode(index=n)):
                if edgePredicat is None or edgePredicat(self.__graph.getEdge(index1=self.index,index2=n)):
                    yield n
    def degree(self,nodeIndexes=None):
        '''
        return count of edges linking this node to the
        set of nodes describes by their index in  nodeIndexes
        :param nodeIndexes: set of node indexes. 
                            if set to None, all nodes of the
                            graph are take into account.
                            Set to None by default.
        :type nodeIndexes:  set of int
        :rtype: int
        '''
        if nodeIndexes is None:
            return len(self.__graph._node[self.index])
        else:
            return len(self.__graph._node[self.index] & nodeIndexes)
    def componentIndexSet(self,nodePredicat=None,edgePredicat=None):
        '''
        Return the set of node index in the same connected component.
        :param nodePredicat: a function accepting one node as parameter
                         and returning True if this node must be
                         returned by the iterator.
        :type nodePredicat: function
        :param edgePredicat: a function accepting one edge as parameter
                         and returning True if the edge linking self and
                         the current must be considered.
        :type edgePredicat: function
        :rtype: set of int
        '''
        cc=set([self.index])
        added = set(x for x in self.neighbourIndexIterator(nodePredicat, edgePredicat))
        while added:
            cc |= added
            added = reduce(lambda x,y : x | y,
                           (set(z for z in self.graph.getNode(index=c).neighbourIndexIterator(nodePredicat, edgePredicat)) 
                                for c in added),
                           set())
            added -= cc
        return cc
    def componentIterator(self,nodePredicat=None,edgePredicat=None):
        '''
        Iterate through the nodes in the same connected
        component.
        :rtype: iterator on :py:class:`Node` instance
        '''
        for c in self.componentIndexSet(nodePredicat, edgePredicat):
            yield self.graph.getNode(c)
    def shortestPathIterator(self,nodes=None):
        '''
        Iterate through the shortest path sourcing
        from this node. if nodes is not None, iterates
        only path linkink this node to one node listed in
        nodes
        :param nodes: set of node index
        :type nodes: iterable on int
        :return: an iterator on list of int describing path
        :rtype: iterator on list of int
        '''
        if nodes is not None:
            nodes = set(nodes)
        Q=[(self.index,-1)]
        gray = set([self.index])
        paths = {}
        while Q and (nodes is None or nodes):
            u,p = Q.pop()
            paths[u]=p
            next = self.graph._node[u] - gray
            gray|=next
            Q.extend((x,u) for x in next)
            if nodes is None or u in nodes:
                if nodes:
                    nodes.remove(u)
                path = [u]
                while p >= 0:
                    path.append(p)
                    p = paths[p]
                path.reverse()
                yield path
    def shortestPathTo(self,node=None,index=None):
        '''
        return one of the shortest path linking this
        node to specified node.
        :param node: a node label or None
        :param index: a node index or None. the parameter index  
                     has a priority on the parameter node.
        :type index: int
        :return: list of node index corresponding to the path or None
                 if no path exists.
        :rtype: list of int or None 
        '''
        if index is None:
            index=self.graph.getNode(node).index
        for p in self.shortestPathIterator([index]):
            return p
    def __getitem__(self,key):
        '''
        return the value of the <key> property of this node
        :param key: the name of a property
        :type key: str
        '''
        return self.__graph._node_attrs.get(self.index,{})[key]
    def __setitem__(self,key,value):
        '''
        set the value of a node property. In the property doesn't
        already exist a new property is added to this node.
        :param key: the name of a property
        :type key: str
        :param value: the value of the property
        .. seealso:: 
            :py:meth:`Node.__getitem__`
        '''
        if self.index in self.__graph._node_attrs:
            data = self.__graph._node_attrs[self.index]
            data[key]=value
        else:
            self.graph._node_attrs[self.index]={key:value}
    def __len__(self):
        '''
        Count neighbour of this node
        :rtype: int
        .. seealso::  
            :py:meth:`Node.degree`
        '''
        return len(self.__graph._node[self.index])
    def __iter__(self):
        '''
        iterate through neighbour of this node
        :rtype: iterator in :py:class:`Node` instances
        .. seealso::
            :py:meth:`Node.neighbourIterator`
        '''
        return self.neighbourIterator()
    def __contains__(self,key):
        return self.has_key(key)
    def __str__(self):
        if self.index in self.__graph._node_attrs:
            keys = " ".join(['%s="%s"' % (x[0],str(x[1]).replace('"','\\"').replace('\n','\\n'))
                              for x in self.__graph._node_attrs[self.index].iteritems()]
                           )
        else:
            keys=''
        return '%d  [label="%s" %s]' % (self.index,
                                        str(self.label).replace('"','\\"').replace('\n','\\n'),
                                        keys)      
    def keys(self):
        if self.index in self.__graph._node_attrs:
            k = self.__graph._node_attrs[self.index].keys()
        else:
            k=[]
        return k
    label = property(getLabel, None, None, "Label of the node")
    graph = property(getGraph, None, None, "Graph owning this node")
 class Edge(object):
    """
    Class used for representing one edge of a graph
    """
    def __init__(self,node1,node2):
        '''
        .. warning::
            :py:class:`Edge` constructor is usualy called through the :py:class:`Graph` methods
        :param node1: First node likend by the edge
        :type node1:  :py:class:`Node`
        :param node2: Seconde node likend by the edge
        :type node2:  :py:class:`Node`
        '''
        self.node1 = node1
        self.node2 = node2
    def getGraph(self):
        """
        Return the :py:class:`Graph` instance owning this edge.
        """
        return self.node1.graph
    def has_key(self,key):
        '''
        test is the :py:class:`Edge` instance has a property named **key**.
        :param key: the name of a property
        :type key: str
        :return: True if the edge has a property named <key>
        :rtype: bool
        '''
        if (self.node1.index,self.node2.index) in self.graph._edge_attrs:
            return key in self.graph._edge_attrs[(self.node1.index,self.node2.index)]
        else:
            return False
    def getDirected(self):
        return self.node1.graph._directed
    def __getitem__(self,key):
        return self.graph._edge_attrs.get((self.node1.index,self.node2.index),{})[key]
    def __setitem__(self,key,value):
        e = (self.node1.index,self.node2.index)
        if e in self.graph._edge_attrs:
            data = self.graph._edge_attrs[e]
            data[key]=value
        else:
            self.graph._edge_attrs[e]={key:value}
    def __str__(self):
        e = (self.node1.index,self.node2.index)
        if e in self.graph._edge_attrs:
            keys = "[%s]" % " ".join(['%s="%s"' % (x[0],str(x[1]).replace('"','\\"'))
                                      for x in self.graph._edge_attrs[e].iteritems()]
                                    )
        else:
            keys = ""
        if self.directed:
            link='->'
        else:
            link='--'
        return "%d %s %d %s" % (self.node1.index,link,self.node2.index,keys) 
    def __contains__(self,key):
        return self.has_key(key)
    graph = property(getGraph, None, None, "Graph owning this edge")
    directed = property(getDirected, None, None, "Directed's Docstring")
 class DiGraph(Graph):
    """
    :py:class:`DiGraph class`is a specialisation of the :py:class:`Graph` class
    dedicated to directed graph representation
    .. seealso::
        :py:class:`UndirectedGraph`
    """
    def __init__(self,label='G',indexer=None,nodes=None,edges=None):
        '''
        :param label: Graph name, set to 'G' by default
        :type label: str
        :param indexer: node label indexer
        :type indexer: Indexer instance
        :param nodes: set of nodes to add to the graph
        :type nodes: iterable value
        :param edges: set of edges to add to the graph
        :type edges: iterable value
        '''
        Graph.__init__(self, label, True, indexer, nodes, edges)
 class UndirectedGraph(Graph):
    """
    :py:class:`UndirectGraph class`is a specialisation of the :py:class:`Graph` class
    dedicated to undirected graph representation
    .. seealso::
        :py:class:`DiGraph`
    """
    def __init__(self,label='G',indexer=None,nodes=None,edges=None):
        '''
        :param label: Graph name, set to 'G' by default
        :type label: str
        :param indexer: node label indexer
        :type indexer: Indexer instance
        :param nodes: set of nodes to add to the graph
        :type nodes: iterable value
        :param edges: set of edges to add to the graph
        :type edges: iterable value
        '''
        Graph.__init__(self, label, False, indexer, nodes, edges)
 def selectEdgeAttributeFactory(attribut,value):
    """
    This function help in building predicat function usable for selecting edge
    in the folowing :py:class:`Graph` methods :
        - :py:meth:`Graph.edgeIterator`
    """
    def selectEdge(e):
        return attribut in e and e[attribut]==value
    return selectEdge   
--- a/obitools/graph/init.pyc
+++ b/obitools/graph/init.pyc
--- a/obitools/graph/algorithms/init.py
+++ b/obitools/graph/algorithms/init.py
--- a/obitools/graph/algorithms/init.pyc
+++ b/obitools/graph/algorithms/init.pyc
--- a/obitools/graph/algorithms/clique.py
+++ b/obitools/graph/algorithms/clique.py
@ -1,134 +0,0 @@
 import time
 import sys
 _maxsize=0
 _solution=0
 _notbound=0
 _sizebound=0
 _lastyield=0
 _maxclique=None
 def cliqueIterator(graph,minsize=1,node=None,timeout=None):
    global _maxsize,_solution,_notbound,_sizebound,_lastyield
    _maxsize=0
    _solution=0
    _notbound=0
    _sizebound=0
    starttime = time.time()  
    if node:
        node = graph.getNode(node)
        index = node.index
        clique= set([index])
        candidates= set(graph.neighbourIndexSet(index=index))
    else:
        clique=set()
        candidates = set(x.index for x in graph)
 #    candidates = set(x for x in candidates
 #                     if len(graph.neighbourIndexSet(index=x) & candidates) >= (minsize - 1))
    _lastyield=time.time()  
    for c in _cliqueIterator(graph,clique,candidates,set(),minsize,start=starttime,timeout=timeout):
        yield c
 def _cliqueIterator(graph,clique,candidates,notlist,minsize=0,start=None,timeout=None):
    global _maxsize,_maxclique,_solution,_notbound,_sizebound,_lastyield
                            # Speed indicator
    lclique     = len(clique)
    lcandidates = len(candidates)
    notmin = lcandidates
    notfix = None
    for n in notlist:
        nnc = candidates - graph.neighbourIndexSet(index=n) 
        nc = len(nnc)
        if nc < notmin:
            notmin=nc
            notfix=n
            notfixneib = nnc
    if lclique > _maxsize or not _solution % 1000 :   
        if start is not None:
            top   = time.time()
            delta = top - start
            if delta==0:
                delta=1e-6
            speed = _solution / delta
            start = top
        else:
            speed = 0
        print >>sys.stderr,"\rCandidates : %-5d Maximum clique size : %-5d Solutions explored : %10d   speed = %5.2f solutions/sec  sizebound=%10d notbound=%10d          " % (lcandidates,_maxsize,_solution,speed,_sizebound,_notbound),
        sys.stderr.flush()
        if lclique > _maxsize:
            _maxsize=lclique
 #   print >>sys.stderr,'koukou'        
    timer = time.time() - _lastyield
    if not candidates and not notlist:
        if lclique==_maxsize:
            _maxclique=set(clique)
        if lclique >= minsize:
            yield set(clique)
        if timeout is not None and timer > timeout and _maxclique is not None:
            yield _maxclique
            _maxclique=None
    else:                        
        while notmin and candidates and ((lclique + len(candidates)) >= minsize or (timeout is not None and timer > timeout)):
                    # count explored solution
            _solution+=1
            if notfix is None:
                nextcandidate = candidates.pop()
            else:
                nextcandidate = notfixneib.pop()
                candidates.remove(nextcandidate)
            clique.add(nextcandidate)     
            neighbours = graph.neighbourIndexSet(index=nextcandidate)   
            nextcandidates = candidates & neighbours
            nextnot        = notlist    & neighbours
            nnc = candidates - neighbours
            lnnc=len(nnc)
            for c in _cliqueIterator(graph, 
                                     set(clique), 
                                     nextcandidates,
                                     nextnot,
                                     minsize,
                                     start,
                                     timeout=timeout):
                yield c
            clique.remove(nextcandidate)
            notmin-=1
            if lnnc < notmin:
                notmin = lnnc
                notfix = nextcandidate
                notfixneib = nnc
            if notmin==0:
                _notbound+=1
            notlist.add(nextcandidate)
        else:
            if (lclique + len(candidates)) < minsize:
                _sizebound+=1
--- a/obitools/graph/algorithms/compact.py
+++ b/obitools/graph/algorithms/compact.py
@ -1,8 +0,0 @@
 def compactGraph(graph,nodeSetIterator):
    compact = graph.newEmpty()
    for ns in nodeSetIterator(graph):
        nlabel = "\n".join([str(graph.getNode(index=x).label) for x in ns])
        compact.addNode(nlabel)
        print 
        print compact
--- a/obitools/graph/algorithms/component.py
+++ b/obitools/graph/algorithms/component.py
@ -1,82 +0,0 @@
 """
 Iterate through the connected components of a graph
 ---------------------------------------------------
 the module :py:mod:`obitools.graph.algorithm.component` provides
 two functions to deal with the connected component of a graph
 represented as a :py:class:`obitools.graph.Graph` instance.
 The whole set of connected component of a graph is a partition of this graph.
 So a node cannot belongs to two distinct connected component.
 Two nodes are in the same connected component if it exits a path through 
 the graph edges linking them.
 TODO: THere is certainly a bug with DirectedGraph
 """
 def componentIterator(graph,nodePredicat=None,edgePredicat=None):
    '''
    Build an iterator over the connected component of a graph.
    Each connected component returned by the iterator is represented 
    as a `set` of node indices.
    :param graph: the graph to partitionne
    :type graph:  :py:class:`obitools.graph.Graph`
    :param predicate: a function allowing edge selection. Default value
                      is **None** and indicate that all edges are selected.
    :type predicate:  a function returning a boolean value
                      and accepting one argument of class :py:class:`Node`
    :param predicate: a function allowing node selection. Default value
                      is **None** and indicate that all nodes are selected.
    :type predicate:  a function returning a boolean value
                      and accepting one argument of class :py:class:`Edge`
    :return: an iterator over the connected component set
    :rtype: an iterator over `set` of `int`
    .. seealso::
        the :py:meth:`obitools.graph.Graph.componentIndexSet` method
        on which is based this function.
    '''
    seen = set()
    for n in graph.nodeIterator(nodePredicat):
        if n.index not in seen:
            cc=n.componentIndexSet(nodePredicat, edgePredicat)
            yield cc
            seen |= cc
 def componentCount(graph,nodePredicat=None,edgePredicat=None):
    '''
    Count the connected componnent in a graph.
    :param graph: the graph to partitionne
    :type graph:  :py:class:`obitools.graph.Graph`
    :param predicate: a function allowing edge selection. Default value
                      is **None** and indicate that all edges are selected.
    :type predicate:  a function returning a boolean value
                      and accepting one argument of class :py:class:`Node`
    :param predicate: a function allowing node selection. Default value
                      is **None** and indicate that all nodes are selected.
    :type predicate:  a function returning a boolean value
                      and accepting one argument of class :py:class:`Edge`
    :return: an iterator over the connected component set
    :rtype: an iterator over `set` of `int`
    .. seealso::
        the :py:func:`componentIterator` function
        on which is based this function.
    '''
    n=0
    for c in componentIterator(graph,nodePredicat, edgePredicat):
        n+=1
    return n
--- a/obitools/graph/algorithms/component.pyc
+++ b/obitools/graph/algorithms/component.pyc
--- a/obitools/graph/dag.py
+++ b/obitools/graph/dag.py
@ -1,80 +0,0 @@
 from obitools.graph import DiGraph,Node
 from obitools.graph.algorithms.component import componentIterator
 class DAG(DiGraph):
    def __init__(self,label='G',indexer=None,nodes=None,edges=None):
        '''
        Directed Graph constructor.
        @param label: Graph name, set to 'G' by default
        @type label: str
        @param indexer: node label indexer
        @type indexer: Indexer instance
        @param nodes: set of nodes to add to the graph
        @type nodes: iterable value
        @param edges: set of edges to add to the graph
        @type edges: iterable value
        '''
        self._parents={}
        DiGraph.__init__(self, label, indexer, nodes, edges)
    def getNode(self,node=None,index=None):
        if index is None:
            index = self._index.getIndex(node, True)
        return DAGNode(index,self)
    def addEdge(self,parent=None,node=None,indexp=None,index=None,**data):
        indexp=self.addNode(parent, indexp)
        index =self.addNode(node  , index)
        pindex = set(n.index 
                     for n in self.getNode(index=indexp).ancestorIterator())
        assert index not in pindex,'Child node cannot be a parent node'
        DiGraph.addEdge(self,index1=indexp,index2=index,**data)   
        if index in self._parents:
            self._parents[index].add(indexp)
        else:
            self._parents[index]=set([indexp])   
        return (indexp,index)
    def getRoots(self):
        return [self.getNode(index=cc.pop()).getRoot()
                for cc in componentIterator(self)]
 class DAGNode(Node):
    def ancestorIterator(self):
        if self.index in self.graph._parents:
            for p in self.graph._parents[self.index]:
                parent = DAGNode(p,self.graph)
                yield parent
                for pnode in parent.ancestorIterator():
                    yield pnode
    def getRoot(self):
        for x in self.ancestorIterator():
            pass
        return x 
    def leavesIterator(self):
        if not self:
            yield self
        for n in self:
            for nn in n.leavesIterator():
                yield nn
    def subgraphIterator(self):
        yield self
        for n in self:
            for nn in n.subgraphIterator():
                yield nn
--- a/obitools/graph/layout/init.py
+++ b/obitools/graph/layout/init.py
--- a/obitools/graph/layout/radialtree.py
+++ b/obitools/graph/layout/radialtree.py
--- a/obitools/graph/rootedtree.py
+++ b/obitools/graph/rootedtree.py
@ -1,117 +0,0 @@
 from obitools.graph.dag import DAG,DAGNode
 class RootedTree(DAG):
    def addEdge(self,parent=None,node=None,indexp=None,index=None,**data):
        indexp=self.addNode(parent, indexp)
        index =self.addNode(node  , index)
        assert index not in self._parents or indexp in self._parents[index], \
                'Child node cannot have more than one parent node'
        return DAG.addEdge(self,indexp=indexp,index=index,**data)   
    def getNode(self,node=None,index=None):
        if index is None:
            index = self._index.getIndex(node, True)
        return RootedTreeNode(index,self)
 class RootedTreeNode(DAGNode):
    def subTreeSize(self):
        n=1
        for subnode in self:
            n+=subnode.subTreeSize()
        return n
    def subTreeLeaves(self):
        if not self:
            return 1
        n=0
        for subnode in self:
            n+=subnode.subTreeLeaves()
        return n
 def nodeWriter(node,deep=0,label=None,distance="distance", bootstrap="bootstrap",cartoon=None,collapse=None):
    ks = node.keys()
    if label is None:
        name=node.label
    elif callable(label):
        name=label(node)
    elif isinstance(label, str) and label in node:
        name=node[label]
        ks.remove(label)
    else:
        name=''
    if distance in node:
        dist=':%6.5f' % node[distance]
        ks.remove(distance)
    else:
        dist=''
    ks = ["%s=%s" % (k,node[k]) for k in ks]
    if cartoon is not None and cartoon(node):
        ks.append("!cartoon={%d,0.0}" % node.subTreeLeaves())
    if collapse is not None and collapse(node):
        ks.append('!collapse={"collapsed",0.0}')
    if ks:
        ks="[&"+",".join(ks)+"]"
    else:
        ks=''
    nodeseparator = ',\n' + ' ' * (deep+1)     
    subnodes = nodeseparator.join([nodeWriter(x, deep+1,label,distance,bootstrap,cartoon=cartoon,collapse=collapse) 
                                   for x in node])
    if subnodes:
        subnodes='(\n' + ' ' * (deep+1) + subnodes + '\n' + ' ' * deep + ')'
    return '%s"%s"%s%s' % (subnodes,name,ks,dist)
 def nexusFormat(tree,startnode=None,label=None,blocks="",cartoon=None,collapse=None):
    head="#NEXUS\n"
    tx = []
    for n in tree:
        if label is None:
            name=n.label
        elif callable(label):
            name=label(n)
        elif isinstance(label, str) and label in n:
            name=n[label]
        else:
            name=''
        if name:
            tx.append('"%s"' % name)
    taxa = "begin taxa;\n\tdimensions ntax=%d;\n\ttaxlabels\n\t" % len(tx)
    taxa+="\n\t".join(tx)
    taxa+="\n;\nend;\n\n"
    if startnode is not None:
        roots =[startnode]
    else:
        roots = tree.getRoots()
    trees = nodeWriter(roots[0],0,label,cartoon=cartoon,collapse=collapse)
    trees = "begin trees;\n\ttree tree_1 = [&R] "+ trees +";\nend;\n\n"
    return head+taxa+trees+"\n\n"+blocks+"\n"
--- a/obitools/graph/tree.py
+++ b/obitools/graph/tree.py
@ -1,37 +0,0 @@
 from obitools.graph import UndirectedGraph,Node
 from obitools.graph.algorithms.component import componentCount
 class Forest(UndirectedGraph):
    def getNode(self,node=None,index=None):
        if index is None:
            index = self._index.getIndex(node, True)
        return TreeNode(index,self)
    def addEdge(self,node1=None,node2=None,index1=None,index2=None,**data):
        index1=self.addNode(node1, index1)
        index2=self.addNode(node2, index2)
        cc = set(n.index for n in self.getNode(index=index2).componentIterator())
        assert index1 in self._node[index2] or index1 not in cc, \
               "No more than one path is alloed between two nodes in a tree"
        UndirectedGraph.addEdge(self, index1=index1, index2=index2,**data)
        return (index1,index2)
    def isASingleTree(self):
        return componentCount(self)==1
 class TreeNode(Node):
    def componentIterator(self):
        for c in self:
            yield c
            for cc in c:
                yield cc
--- a/obitools/gzip.py
+++ b/obitools/gzip.py
@ -1,504 +0,0 @@
 """Functions that read and write gzipped files.
 The user of the file doesn't have to worry about the compression,
 but random access is not allowed.
 This consisted on a patched version of of standard gzip python
 module based on Andrew Kuchling's minigzip.py distributed with the zlib module
 """
 # based on Andrew Kuchling's minigzip.py distributed with the zlib module
 import struct, sys, time
 import zlib
 import __builtin__
 __all__ = ["GzipFile","open"]
 FTEXT, FHCRC, FEXTRA, FNAME, FCOMMENT = 1, 2, 4, 8, 16
 READ, WRITE = 1, 2
 def U32(i):
    """Return i as an unsigned integer, assuming it fits in 32 bits.
    If it's >= 2GB when viewed as a 32-bit unsigned int, return a long.
    """
    if i < 0:
        i += 1L << 32
    return i
 def LOWU32(i):
    """Return the low-order 32 bits of an int, as a non-negative int."""
    return i & 0xFFFFFFFFL
 def write32(output, value):
    output.write(struct.pack("<l", value))
 def write32u(output, value):
    # The L format writes the bit pattern correctly whether signed
    # or unsigned.
    output.write(struct.pack("<L", value))
 def read32(input):
    return struct.unpack("<l", input.read(4))[0]
 def unpack32(buf):
    return struct.unpack("<l", buf)[0]
 def open(filename, mode="rb", compresslevel=9):
    """Shorthand for GzipFile(filename, mode, compresslevel).
    The filename argument is required; mode defaults to 'rb'
    and compresslevel defaults to 9.
    """
    return GzipFile(filename, mode, compresslevel)
 class GzipFile:
    """The GzipFile class simulates most of the methods of a file object with
    the exception of the readinto() and truncate() methods.
    """
    myfileobj = None
    max_read_chunk = 10 * 1024 * 1024   # 10Mb
    def __init__(self, filename=None, mode=None,
                 compresslevel=9, fileobj=None):
        """Constructor for the GzipFile class.
        At least one of fileobj and filename must be given a
        non-trivial value.
        The new class instance is based on fileobj, which can be a regular
        file, a StringIO object, or any other object which simulates a file.
        It defaults to None, in which case filename is opened to provide
        a file object.
        When fileobj is not None, the filename argument is only used to be
        included in the gzip file header, which may includes the original
        filename of the uncompressed file.  It defaults to the filename of
        fileobj, if discernible; otherwise, it defaults to the empty string,
        and in this case the original filename is not included in the header.
        The mode argument can be any of 'r', 'rb', 'a', 'ab', 'w', or 'wb',
        depending on whether the file will be read or written.  The default
        is the mode of fileobj if discernible; otherwise, the default is 'rb'.
        Be aware that only the 'rb', 'ab', and 'wb' values should be used
        for cross-platform portability.
        The compresslevel argument is an integer from 1 to 9 controlling the
        level of compression; 1 is fastest and produces the least compression,
        and 9 is slowest and produces the most compression.  The default is 9.
        """
        # guarantee the file is opened in binary mode on platforms
        # that care about that sort of thing
        if mode and 'b' not in mode:
            mode += 'b'
        if fileobj is None:
            fileobj = self.myfileobj = __builtin__.open(filename, mode or 'rb')
        if filename is None:
            if hasattr(fileobj, 'name'): filename = fileobj.name
            else: filename = ''
        if mode is None:
            if hasattr(fileobj, 'mode'): mode = fileobj.mode
            else: mode = 'rb'
        if mode[0:1] == 'r':
            self.mode = READ
            # Set flag indicating start of a new member
            self._new_member = True
            self.extrabuf = ""
            self.extrasize = 0
            self.filename = filename
            # Starts small, scales exponentially
            self.min_readsize = 100
        elif mode[0:1] == 'w' or mode[0:1] == 'a':
            self.mode = WRITE
            self._init_write(filename)
            self.compress = zlib.compressobj(compresslevel,
                                             zlib.DEFLATED,
                                             -zlib.MAX_WBITS,
                                             zlib.DEF_MEM_LEVEL,
                                             0)
        else:
            raise IOError, "Mode " + mode + " not supported"
        self.fileobj = fileobj
        self.offset = 0
        self.inputbuf = ''
        self.last8 = ''
        if self.mode == WRITE:
            self._write_gzip_header()
    def __repr__(self):
        s = repr(self.fileobj)
        return '<gzip ' + s[1:-1] + ' ' + hex(id(self)) + '>'
    def _init_write(self, filename):
        if filename[-3:] != '.gz':
            filename = filename + '.gz'
        self.filename = filename
        self.crc = zlib.crc32("")
        self.size = 0
        self.writebuf = []
        self.bufsize = 0
    def _write_gzip_header(self):
        self.fileobj.write('\037\213')             # magic header
        self.fileobj.write('\010')                 # compression method
        fname = self.filename[:-3]
        flags = 0
        if fname:
            flags = FNAME
        self.fileobj.write(chr(flags))
        write32u(self.fileobj, long(time.time()))
        self.fileobj.write('\002')
        self.fileobj.write('\377')
        if fname:
            self.fileobj.write(fname + '\000')
    def _init_read(self):
        self.crc = zlib.crc32("")
        self.size = 0
    def _read_internal(self, size):
        if len(self.inputbuf) < size:
            self.inputbuf += self.fileobj.read(size-len(self.inputbuf))
        chunk = self.inputbuf[:size]
        # need to use len(chunk) bellow instead of size in case it's EOF.
        if len(chunk) < 8:
            self.last8 = self.last8[len(chunk):] + chunk
        else:
            self.last8 = chunk[-8:]
        self.inputbuf = self.inputbuf[size:]
        return chunk
    def _read_gzip_header(self):
        magic = self._read_internal(2)
        if len(magic) != 2:
            raise EOFError, "Reached EOF"
        if magic != '\037\213':
            raise IOError, 'Not a gzipped file'
        method = ord( self._read_internal(1) )
        if method != 8:
            raise IOError, 'Unknown compression method'
        flag = ord( self._read_internal(1) )
        # modtime = self.fileobj.read(4)
        # extraflag = self.fileobj.read(1)
        # os = self.fileobj.read(1)
        self._read_internal(6)
        if flag & FEXTRA:
            # Read & discard the extra field, if present
            xlen = ord(self._read_internal(1))
            xlen = xlen + 256*ord(self._read_internal(1))
            self._read_internal(xlen)
        if flag & FNAME:
            # Read and discard a null-terminated string containing the filename
            while True:
                s = self._read_internal(1)
                if not s or s=='\000':
                    break
        if flag & FCOMMENT:
            # Read and discard a null-terminated string containing a comment
            while True:
                s = self._read_internal(1)
                if not s or s=='\000':
                    break
        if flag & FHCRC:
            self._read_internal(2)     # Read & discard the 16-bit header CRC
    def write(self,data):
        if self.mode != WRITE:
            import errno
            raise IOError(errno.EBADF, "write() on read-only GzipFile object")
        if self.fileobj is None:
            raise ValueError, "write() on closed GzipFile object"
        if len(data) > 0:
            self.size = self.size + len(data)
            self.crc = zlib.crc32(data, self.crc)
            self.fileobj.write( self.compress.compress(data) )
            self.offset += len(data)
    def read(self, size=-1):
        if self.mode != READ:
            import errno
            raise IOError(errno.EBADF, "read() on write-only GzipFile object")
        if self.extrasize <= 0 and self.fileobj is None:
            return ''
        readsize = 1024
        if size < 0:        # get the whole thing
            try:
                while True:
                    self._read(readsize)
                    readsize = min(self.max_read_chunk, readsize * 2)
            except EOFError:
                size = self.extrasize
        else:               # just get some more of it
            try:
                while size > self.extrasize:
                    self._read(readsize)
                    readsize = min(self.max_read_chunk, readsize * 2)
            except EOFError:
                if size > self.extrasize:
                    size = self.extrasize
        chunk = self.extrabuf[:size]
        self.extrabuf = self.extrabuf[size:]
        self.extrasize = self.extrasize - size
        self.offset += size
        return chunk
    def _unread(self, buf):
        self.extrabuf = buf + self.extrabuf
        self.extrasize = len(buf) + self.extrasize
        self.offset -= len(buf)
    def _read(self, size=1024):
        if self.fileobj is None:
            raise EOFError, "Reached EOF"
        if self._new_member:
            # If the _new_member flag is set, we have to
            # jump to the next member, if there is one.
            #
            # _read_gzip_header will raise EOFError exception
            # if there no more members to read.
            self._init_read()
            self._read_gzip_header()
            self.decompress = zlib.decompressobj(-zlib.MAX_WBITS)
            self._new_member = False
        # Read a chunk of data from the file
        buf = self._read_internal(size)
        # If the EOF has been reached, flush the decompression object
        # and mark this object as finished.
        if buf == "":
            uncompress = self.decompress.flush()
            self._read_eof()
            self._add_read_data( uncompress )
            raise EOFError, 'Reached EOF'
        uncompress = self.decompress.decompress(buf)
        self._add_read_data( uncompress )
        if self.decompress.unused_data != "":
            # Ending case: we've come to the end of a member in the file,
            # so put back unused_data and initialize last8 by reading them.
            self.inputbuf = self.decompress.unused_data + self.inputbuf
            self._read_internal(8)
            # Check the CRC and file size, and set the flag so we read
            # a new member on the next call
            self._read_eof()
            self._new_member = True
    def _add_read_data(self, data):
        self.crc = zlib.crc32(data, self.crc)
        self.extrabuf = self.extrabuf + data
        self.extrasize = self.extrasize + len(data)
        self.size = self.size + len(data)
    def _read_eof(self):
        # We've read to the end of the file, so we have to rewind in order
        # to reread the 8 bytes containing the CRC and the file size.
        # We check the that the computed CRC and size of the
        # uncompressed data matches the stored values.  Note that the size
        # stored is the true file size mod 2**32.
        crc32 = unpack32(self.last8[:4])
        isize = U32(unpack32(self.last8[4:]))   # may exceed 2GB
        if U32(crc32) != U32(self.crc):
            raise IOError, "CRC check failed"
        elif isize != LOWU32(self.size):
            raise IOError, "Incorrect length of data produced"
    def close(self):
        if self.mode == WRITE:
            self.fileobj.write(self.compress.flush())
            # The native zlib crc is an unsigned 32-bit integer, but
            # the Python wrapper implicitly casts that to a signed C
            # long.  So, on a 32-bit box self.crc may "look negative",
            # while the same crc on a 64-bit box may "look positive".
            # To avoid irksome warnings from the `struct` module, force
            # it to look positive on all boxes.
            write32u(self.fileobj, LOWU32(self.crc))
            # self.size may exceed 2GB, or even 4GB
            write32u(self.fileobj, LOWU32(self.size))
            self.fileobj = None
        elif self.mode == READ:
            self.fileobj = None
        if self.myfileobj:
            self.myfileobj.close()
            self.myfileobj = None
    def __del__(self):
        try:
            if (self.myfileobj is None and
                self.fileobj is None):
                return
        except AttributeError:
            return
        self.close()
    def flush(self,zlib_mode=zlib.Z_SYNC_FLUSH):
        if self.mode == WRITE:
            # Ensure the compressor's buffer is flushed
            self.fileobj.write(self.compress.flush(zlib_mode))
        self.fileobj.flush()
    def fileno(self):
        """Invoke the underlying file object's fileno() method.
        This will raise AttributeError if the underlying file object
        doesn't support fileno().
        """
        return self.fileobj.fileno()
    def isatty(self):
        return False
    def tell(self):
        return self.offset
    def rewind(self):
        '''Return the uncompressed stream file position indicator to the
        beginning of the file'''
        if self.mode != READ:
            raise IOError("Can't rewind in write mode")
        self.fileobj.seek(0)
        self._new_member = True
        self.extrabuf = ""
        self.extrasize = 0
        self.offset = 0
    def seek(self, offset):
        if self.mode == WRITE:
            if offset < self.offset:
                raise IOError('Negative seek in write mode')
            count = offset - self.offset
            for i in range(count // 1024):
                self.write(1024 * '\0')
            self.write((count % 1024) * '\0')
        elif self.mode == READ:
            if offset < self.offset:
                # for negative seek, rewind and do positive seek
                self.rewind()
            count = offset - self.offset
            for i in range(count // 1024):
                self.read(1024)
            self.read(count % 1024)
    def readline(self, size=-1):
        if size < 0:
            size = sys.maxint
            readsize = self.min_readsize
        else:
            readsize = size
        bufs = []
        while size != 0:
            c = self.read(readsize)
            i = c.find('\n')
            # We set i=size to break out of the loop under two
            # conditions: 1) there's no newline, and the chunk is
            # larger than size, or 2) there is a newline, but the
            # resulting line would be longer than 'size'.
            if (size <= i) or (i == -1 and len(c) > size):
                i = size - 1
            if i >= 0 or c == '':
                bufs.append(c[:i + 1])    # Add portion of last chunk
                self._unread(c[i + 1:])   # Push back rest of chunk
                break
            # Append chunk to list, decrease 'size',
            bufs.append(c)
            size = size - len(c)
            readsize = min(size, readsize * 2)
        if readsize > self.min_readsize:
            self.min_readsize = min(readsize, self.min_readsize * 2, 512)
        return ''.join(bufs) # Return resulting line
    def readlines(self, sizehint=0):
        # Negative numbers result in reading all the lines
        if sizehint <= 0:
            sizehint = sys.maxint
        L = []
        while sizehint > 0:
            line = self.readline()
            if line == "":
                break
            L.append(line)
            sizehint = sizehint - len(line)
        return L
    def writelines(self, L):
        for line in L:
            self.write(line)
    def __iter__(self):
        return self
    def next(self):
        line = self.readline()
        if line:
            return line
        else:
            raise StopIteration
 def _test():
    # Act like gzip; with -d, act like gunzip.
    # The input file is not deleted, however, nor are any other gzip
    # options or features supported.
    args = sys.argv[1:]
    decompress = args and args[0] == "-d"
    if decompress:
        args = args[1:]
    if not args:
        args = ["-"]
    for arg in args:
        if decompress:
            if arg == "-":
                f = GzipFile(filename="", mode="rb", fileobj=sys.stdin)
                g = sys.stdout
            else:
                if arg[-3:] != ".gz":
                    print "filename doesn't end in .gz:", repr(arg)
                    continue
                f = open(arg, "rb")
                g = __builtin__.open(arg[:-3], "wb")
        else:
            if arg == "-":
                f = sys.stdin
                g = GzipFile(filename="", mode="wb", fileobj=sys.stdout)
            else:
                f = __builtin__.open(arg, "rb")
                g = open(arg + ".gz", "wb")
        while True:
            chunk = f.read(1024)
            if not chunk:
                break
            g.write(chunk)
        if g is not sys.stdout:
            g.close()
        if f is not sys.stdin:
            f.close()
 if __name__ == '__main__':
    _test()
--- a/obitools/gzip.pyc
+++ b/obitools/gzip.pyc
--- a/obitools/location/init.py
+++ b/obitools/location/init.py
@ -1,538 +0,0 @@
 import obitools
 import re
 import array
 class Location(object):
    """
    Define a location on a sequence.  
    """
    def extractSequence(self,sequence):
        '''
        Extract subsequence corresponding to a Location.
        @param sequence: 
        @type sequence: C{BioSequence} or C{str}
        '''
        assert isinstance(sequence, (obitools.BioSequence,str)), \
           "sequence must be an instance of str or BioSequence"
        if isinstance(sequence, str):
            seq = self._extractSequence(sequence)
        else:
            if isinstance(sequence, obitools.AASequence):
                assert not self.needNucleic(), \
                    "This location can be used only with Nucleic sequences"
            seq = self._extractSequence(str(sequence))
            if isinstance(sequence, obitools.AASequence):
                st = obitools.AASequence
            else:
                st = obitools.NucSequence
            seq = st(sequence.id,
                     seq,
                     sequence.definition,
                     **sequence.getTags())
            seq['location']=str(self)
            if 'length' in  sequence.getTags():
                seq['length']=len(seq)
        if hasattr(sequence, 'quality'):
            quality = self._extractQuality(sequence)
            seq.quality=quality
        return seq
    def isDirect(self):
        return None
    def isSimple(self):
        '''
        Indicate if a location is composed of a single continuous 
        region or is composed by the junction of several locations
        by the C{join} operator.
        @return: C{True} if the location is composed of a single
                 continuous region.
        @rtype: bool
        '''
        return None
    def isFullLength(self):
        return None
    def needNucleic(self):
        '''
        If a location contains a complement operator, it can be use
        only on nucleic sequence.
        @return: C{True} if location contains a complement operator
        @rtype: bool
        '''
        return None
    def getGloc(self):
        loc = self.simplify()
        assert loc.isDirect() is not None,"Gloc cannot be created for multi oriented location : %s" % str(loc)
        positions = ','.join([str(x) for x in loc._getglocpos()])
        return "(%s,%s)" % ({True:'T',False:'F'}[loc.isDirect()],
                            positions)
    def shift(self,s):
        return None
    def getBegin(self):
        return None
    def getEnd(self):
        return None
    def getFivePrime(self):
        return self.getBegin()
    def getThreePrime(self):
        return self.getEnd()
    begin = property(getBegin,None,None,"beginning position of the location")
    end = property(getEnd,None,None,"ending position of the location")
    fivePrime=property(getFivePrime,None,None,"5' position of the location")
    threePrime=property(getThreePrime,None,None,"3' position of the location")
    def __abs__(self):
        assert self.isDirect() is not None,"Abs operator cannot be applied on non oriented location"
        if self.isDirect():
            return self
        else:
            return ComplementLocation(self).simplify()
    def __cmp__(self,y):
        if self.begin < y.begin:
            return -1
        if self.begin > y.begin:
            return 1
        if self.isDirect() == y.isDirect():
            return 0
        if self.isDirect() and not y.isDirect():
            return -1
        return 1
 class SimpleLocation(Location):
    """
    A simple location is describe a continuous region of 
    a sequence define by a C{begin} and a C{end} position.
    """
    def __init__(self,begin,end):
        '''
        Build a new C{SimpleLocation} instance. Valid
        position are define on M{[1,N]} with N the length
        of the sequence.
        @param begin: start position of the location
        @type begin:  int
        @param end:   end position of the location
        @type end:    int
        '''
        assert begin > 0 and end > 0
        self._begin = begin
        self._end   = end
        self._before=False
        self._after=False
    def _extractSequence(self,sequence):
        assert (    self._begin < len(sequence) 
                and self._end <= len(sequence)), \
                "Sequence length %d is too short" % len(sequence)
        return sequence[self._begin-1:self._end]
    def _extractQuality(self,sequence):
        assert (    self._begin < len(sequence) 
                and self._end <= len(sequence)), \
                "Sequence length %d is too short" % len(sequence)
        return sequence.quality[self._begin-1:self._end]
    def isDirect(self):
        return True
    def isSimple(self):
        return True
    def isFullLength(self):
        return not (self.before or self.after)
    def simplify(self):
        if self._begin == self._end:
            return PointLocation(self._begin)
        else:
            return self
    def needNucleic(self):
        return False
    def __str__(self):
        before = {True:'<',False:''}[self.before]
        after  = {True:'>',False:''}[self.after]
        return "%s%d..%s%d" % (before,self._begin,after,self._end)
    def shift(self,s):
        assert (self._begin + s) > 0,"shift to large (%d)" % s 
        if s == 0:
            return self
        return SimpleLocation(self._begin + s, self._end + s)
    def _getglocpos(self):
        return (self.begin,self.end)
    def getGloc(self):
        positions = ','.join([str(x) for x in self._getglocpos()])
        return "(%s,%s)" % ({True:'T',False:'F'}[self.isDirect()],
                            positions)
    def getBegin(self):
        return self._begin
    def getEnd(self):
        return self._end
    begin = property(getBegin,None,None,"beginning position of the location")
    end = property(getEnd,None,None,"ending position of the location")
    def getBefore(self):
        return self._before
    def getAfter(self):
        return self._after
    def setBefore(self,value):
        assert isinstance(value, bool)
        self._before=value
    def setAfter(self,value):
        assert isinstance(value, bool)
        self._after=value
    before=property(getBefore,setBefore,None)
    after=property(getAfter,setAfter,None)
 class PointLocation(Location):
    """
    A point location describes a location on a sequence
    limited to a single position
    """
    def __init__(self,position):
        assert position > 0
        self._pos=position
    def _extractSequence(self,sequence):
        assert self._end <= len(sequence), \
                "Sequence length %d is too short" % len(sequence)
        return sequence[self._pos-1]
    def _extractQuality(self,sequence):
        assert self._end <= len(sequence), \
                "Sequence length %d is too short" % len(sequence)
        return sequence[self._pos-1:self._pos]
    def isDirect(self):
        return True
    def isSimple(self):
        return True
    def isFullLength(self):
        return True
    def simplify(self):
        return self
    def needNucleic(self):
        return False
    def shift(self,s):
        assert (self._pos + s) > 0,"shift to large (%d)" % s 
        if s == 0:
            return self
        return PointLocation(self._pos + s)
    def _getglocpos(self):
        return (self._pos,self._pos)
    def getBegin(self):
        return self._pos
    def getEnd(self):
        return self._pos
    begin = property(getBegin,None,None,"beginning position of the location")
    end = property(getEnd,None,None,"ending position of the location")
    def __str__(self):
        return str(self._pos)
 class CompositeLocation(Location):
    """
    """
    def __init__(self,locations):            
        self._locs = tuple(locations)
    def _extractSequence(self,sequence):
        seq = ''.join([x._extractSequence(sequence)
                       for x in self._locs])
        return seq
    def _extractQuality(self,sequence):
        rep=array.array('d',[])
        for x in self._locs:
            rep.extend(x._extractQuality(sequence))
        return rep
    def isDirect(self):
        hasDirect,hasReverse = reduce(lambda x,y: (x[0] or y,x[1] or not y),
                            (z.isDirect() for z in self._locs),(False,False))
        if hasDirect and not hasReverse:
            return True
        if hasReverse and not hasDirect:
            return False
        return None
    def isSimple(self):
        return False
    def simplify(self):
        if len(self._locs)==1:
            return self._locs[0]
        rep = CompositeLocation(x.simplify() for x in self._locs)
        if reduce(lambda x,y : x and y,
                      (isinstance(z, ComplementLocation) 
                       for z in self._locs)):
            rep = ComplementLocation(CompositeLocation(x._loc.simplify() 
                                                       for x in rep._locs[::-1]))
        return rep
    def isFullLength(self):
        return reduce(lambda x,y : x and y, (z.isFullLength() for z in self._locs),1)
    def needNucleic(self):
        return reduce(lambda x,y : x or y, 
                      (z.needNucleic for z in self._locs),
                      False)
    def _getglocpos(self):
        return reduce(lambda x,y : x + y,
                      (z._getglocpos() for z in self._locs))
    def getBegin(self):
        return min(x.getBegin() for x in self._locs)
    def getEnd(self):
        return max(x.getEnd() for x in self._locs)
    def shift(self,s):
        assert (self.getBegin() + s) > 0,"shift to large (%d)" % s 
        if s == 0:
            return self
        return CompositeLocation(x.shift(s) for x in self._locs)
    begin = property(getBegin,None,None,"beginning position of the location")
    end = property(getEnd,None,None,"ending position of the location")
    def __str__(self):
        return "join(%s)" % ','.join([str(x) 
                                      for x in self._locs])
 class ComplementLocation(Location):
    """
    """
    _comp={'a': 't', 'c': 'g', 'g': 'c', 't': 'a',
           'r': 'y', 'y': 'r', 'k': 'm', 'm': 'k', 
           's': 's', 'w': 'w', 'b': 'v', 'd': 'h', 
           'h': 'd', 'v': 'b', 'n': 'n', 'u': 'a',
           '-': '-'}
    def __init__(self,location):
        self._loc = location
    def _extractSequence(self,sequence):
        seq = self._loc._extractSequence(sequence)
        seq = ''.join([ComplementLocation._comp.get(x.lower(),'n') for x in seq[::-1]])
        return seq
    def _extractQuality(self,sequence):
        return sequence.quality[::-1]
    def isDirect(self):
        return False
    def isSimple(self):
        return self._loc.isSimple()
    def isFullLength(self):
        return self._loc.isFullLength()
    def simplify(self):
        if isinstance(self._loc, ComplementLocation):
            return self._loc._loc.simplify()
        else:
            return self
    def needNucleic(self):
        return True
    def __str__(self):
        return "complement(%s)" % self._loc
    def shift(self,s):
        assert (self.getBegin() + s) > 0,"shift to large (%d)" % s 
        if s == 0:
            return self
        return ComplementLocation(self._loc.shift(s))
    def _getglocpos(self):
        return self._loc._getglocpos()
    def getBegin(self):
        return self._loc.getBegin()
    def getEnd(self):
        return self._loc.getEnd()
    def getFivePrime(self):
        return self.getEnd()
    def getThreePrime(self):
        return self.getBegin()
    begin = property(getBegin,None,None,"beginning position of the location")
    end = property(getEnd,None,None,"ending position of the location")
    fivePrime=property(getFivePrime,None,None,"5' potisition of the location")
    threePrime=property(getThreePrime,None,None,"3' potisition of the location")
                                    #
                                    # Internal functions used for location parsing
                                    #
 def __sublocationIterator(text):
    sl = []
    plevel=0
    for c in text:
        assert plevel>=0,"Misformated location : %s" % text
        if c == '(':
            plevel+=1
            sl.append(c)
        elif c==')':
            plevel-=1
            sl.append(c)
        elif c==',' and plevel == 0:
            assert sl,"Misformated location : %s" % text
            yield ''.join(sl)
            sl=[]
        else:
            sl.append(c)
    assert sl and plevel==0,"Misformated location : %s" % text
    yield ''.join(sl)
                                    #
                                    # Internal functions used for location parsing
                                    #
 __simplelocparser = re.compile('(?P<before><?)(?P<from>[0-9]+)(\.\.(?P<after>>?)(?P<to>[0-9]+))?')
 def __locationParser(text):
    text=text.strip()
    if text[0:5]=='join(':
        assert text[-1]==')',"Misformated location : %s" % text
        return CompositeLocation(__locationParser(sl) for sl in __sublocationIterator(text[5:-1]))
    elif text[0:11]=='complement(':
        assert text[-1]==')',"Misformated location : %s" % text
        subl = tuple(__locationParser(sl) for sl in __sublocationIterator(text[11:-1]))
        if len(subl)>1:
            subl = CompositeLocation(subl)
        else:
            subl = subl[0]
        return ComplementLocation(subl)
    else:
        data = __simplelocparser.match(text)
        assert data is not None,"Misformated location : %s" % text
        data = data.groupdict()
        if not data['to'] :
            sl = PointLocation(int(data['from']))
        else:
            sl = SimpleLocation(int(data['from']),int(data['to']))
        sl.before=data['before']=='<'
        sl.after=data['after']=='>'
        return sl
 def locationGenerator(locstring):
    '''
    Parse a location string as present in genbank or embl file.
    @param locstring: string description of the location in embl/gb format
    @type locstring: str
    @return: a Location instance
    @rtype: C{Location} subclass instance
    '''
    return __locationParser(locstring)
 _matchExternalRef = re.compile('[A-Za-z0-9_|]+(\.[0-9]+)?(?=:)')
 def extractExternalRefs(locstring):
    '''
    When a location describe external references (ex: D28156.1:1..>1292)
    separate the external reference part of the location and the location
    by itself.
    @param locstring: text representation of the location.
    @type locstring: str
    @return: a tuple with a set of string describing accession number
             of the referred sequences and a C{Location} instance.
    @rtype: tuple(set,Location)
    '''
    m = set(x.group() for x in _matchExternalRef.finditer(locstring))
    clean = re.compile(':|'.join([re.escape(x) for x in m])+':')
    cloc = locationGenerator(clean.sub('',locstring))
    return m,cloc
--- a/obitools/location/init.pyc
+++ b/obitools/location/init.pyc
--- a/obitools/location/feature.py
+++ b/obitools/location/feature.py
@ -1,177 +0,0 @@
 from obitools.location import Location,locationGenerator
 import logging
 import re
 _featureMatcher = re.compile('^(FT|  )   [^ ].+\n((FT|  )    .+\n)+',re.M)
 _featureCleaner = re.compile('^FT',re.M)
 def textFeatureIterator(fttable):
    '''
    Iterate through a textual description of a feature table in a genbank
    or embl format. Return at each step a text representation of each individual
    feature composing the table.
    @param fttable:  a string corresponding to the feature table of a genbank
                     or an embl entry
    @type fttable: C{str}
    @return: an iterator on str
    @rtype: iterator
    @see: L{ftParser}
    '''
    for m in _featureMatcher.finditer(fttable):
        t = m.group()
        t = _featureCleaner.sub('  ',t)
        yield t
 _qualifierMatcher = re.compile('(?<=^ {21}/).+(\n {21}[^/].+)*',re.M)
 _qualifierCleanner= re.compile("^ +",re.M)
 def qualifierIterator(qualifiers):
    '''
    Parse a textual description of a feature in embl or genbank format
    as returned by the textFeatureIterator iterator and iterate through 
    the key, value qualified defining this location.
    @param qualifiers: substring containing qualifiers
    @type qualifiers: str
    @return: an iterator on tuple (key,value), where keys are C{str}
    @rtype: iterator
    '''
    for m in _qualifierMatcher.finditer(qualifiers):
        t = m.group()
        t = _qualifierCleanner.sub('',t)
        t = t.split('=',1)
        if len(t)==1:
            t = (t[0],None)
        else:
            if t[0]=='translation':
                value = t[1].replace('\n','')
            else:
                value = t[1].replace('\n',' ')
            try:
                value = eval(value)
            except:
                pass
            t = (t[0],value)
        yield t
 _ftmatcher = re.compile('(?<=^ {5})\S+')
 _locmatcher= re.compile('(?<=^.{21})[^/]+',re.DOTALL)
 _cleanloc  = re.compile('[\s\n]+')
 _qualifiersMatcher = re.compile('^ +/.+',re.M+re.DOTALL)
 def ftParser(feature):
    fttype = _ftmatcher.search(feature).group()
    location=_locmatcher.search(feature).group()
    location=_cleanloc.sub('',location)
    qualifiers=_qualifiersMatcher.search(feature)
    if qualifiers is not None:
        qualifiers=qualifiers.group()
    else:
        qualifiers=""
        logging.debug("Qualifiers regex not matching on \n=====\n%s\n========" % feature)
    return fttype,location,qualifiers
 class Feature(dict,Location):
    def __init__(self,type,location):
        self._fttype=type
        self._loc=location
    def getFttype(self):
        return self._fttype
    def extractSequence(self,sequence,withQualifier=False):
        seq = self._loc.extractSequence(sequence)
        if withQualifier:
            seq.getInfo().update(self)
        return seq
    def isDirect(self):
        return self._loc.isDirect()
    def isSimple(self):
        return self._loc.isSimple()
    def isFullLength(self):
        return self._loc.isFullLength()
    def simplify(self):
        f = Feature(self._fttype,self._loc.simplify())
        f.update(self)
        return f
    def locStr(self):
        return str(self._loc)
    def needNucleic(self):
        return self._loc.needNucleic()
    def __str__(self):
        return repr(self)
    def __repr__(self):
        return str((self.ftType,str(self._loc),dict.__repr__(self)))
    def __cmp__(self,y):
        return self._loc.__cmp__(y)
    def _getglocpos(self):
        return self._loc._getglocpos()
    ftType = property(getFttype, None, None, "Feature type name")
    def shift(self,s):
        assert (self.getBegin() + s) > 0,"shift to large (%d)" % s 
        if s == 0:
            return self
        f = Feature(self._fttype,self._loc.shift(s))
        f.update(self)
        return f
    def getBegin(self):
        return self._loc.getBegin()
    def getEnd(self):
        return self._loc.getEnd()
    begin = property(getBegin,None,None,"beginning position of the location")
    end = property(getEnd,None,None,"ending position of the location")
 def featureFactory(featureDescription):
    fttype,location,qualifiers = ftParser(featureDescription)
    location = locationGenerator(location)
    feature = Feature(fttype,location)
    feature.raw  = featureDescription
    for k,v in qualifierIterator(qualifiers):
        feature.setdefault(k,[]).append(v)
    return feature
 def featureIterator(featureTable,skipError=False):
    for tft in textFeatureIterator(featureTable):
        try:
            feature = featureFactory(tft)
        except AssertionError,e:
            logging.debug("Parsing error on feature :\n===============\n%s\n===============" % tft)
            if not skipError:
                raise e
            logging.debug("\t===> Error skipped")
            continue
        yield feature
--- a/obitools/metabarcoding/init.py
+++ b/obitools/metabarcoding/init.py
@ -1,265 +0,0 @@
 from obitools.ecopcr.options import addTaxonomyFilterOptions,\
                                    loadTaxonomyDatabase
 from obitools.graph import UndirectedGraph
 from obitools.align import lenlcs,isLCSReachable
 from obitools.graph.algorithms.component import componentIterator
 from obitools.utils.bioseq import uniqSequence
 from obitools.utils import progressBar
 import math
 import sys
 from obitools.graph.rootedtree import RootedTree
 def average(x):
    x=list(x)
    s = sum(i*j for (i,j) in x)
    n = sum(i[1] for i in x)
    return (float(s)/float(n),n)
 def minimum(x):
    x=list(x)
    m = min(i[0] for i in x)
    n = sum(i[1] for i in x)
    return (float(m),n)
 def ecoPCRReader(entries,options):
    taxonomy = loadTaxonomyDatabase(options) 
    norankid =options.taxonomy.findRankByName('no rank')
    speciesid=options.taxonomy.findRankByName('species')
    genusid  =options.taxonomy.findRankByName('genus')
    familyid =options.taxonomy.findRankByName('family')
    minrankseq = set([speciesid,genusid,familyid])
    usedrankid   = {}
    ingroup = []
    outgroup= []
    for s in entries:
        if 'taxid' in s :
            taxid = s['taxid']
            if taxid in taxonomy:
                allrank = set()
                for p in options.taxonomy.parentalTreeIterator(taxid):
                    if p[1]!=norankid:
                        allrank.add(p[1])
                if len(minrankseq & allrank) == 3:
                    for r in allrank:
                        usedrankid[r]=usedrankid.get(r,0) + 1
                    if taxonomy.isAncestor(options.ingroup,taxid):
                        ingroup.append(s)
                    else:
                        outgroup.append(s)
    keptrank = set(r for r in usedrankid 
                   if float(usedrankid[r])/float(len(ingroup)) > options.rankthresold)
    return { 'ingroup' : ingroup,
             'outgroup': outgroup,
             'ranks'   : keptrank
           }
 def buildSimilarityGraph(dbseq,ranks,taxonomy,dcmax=5):
    ldbseq = len(dbseq)
    pos = 1
    digit = int(math.ceil(math.log10(ldbseq)))
    header = "Alignment  : %%0%dd x %%0%dd -> %%0%dd " % (digit,digit,digit)
    aligncount = ldbseq*(ldbseq+1)/2
    edgecount = 0
    print >>sys.stderr
    progressBar(1,aligncount,True,"Alignment  : %s x %s -> %s " % ('-'*digit,'-'*digit, '0'*digit))
    sim = UndirectedGraph()
    i=0
    for s in dbseq:
        taxid = s['taxid']
        rtaxon = dict((rid,taxonomy.getTaxonAtRank(taxid,rid))
                      for rid in ranks)
        sim.addNode(i, seq=s,taxid=taxid,rtaxon=rtaxon) 
        i+=1
 #    aligner = LCS()
    for is1 in xrange(ldbseq):
        s1 = dbseq[is1]
        ls1= len(s1)
 #        aligner.seqA=s1
        for is2 in xrange(is1+1,ldbseq):
            s2=dbseq[is2]
            ls2=len(s2)
            lm = max(ls1,ls2)
            lcsmin = lm - dcmax
            if isLCSReachable(s1,s2,lcsmin):
                llcs,lali=lenlcs(s1,s2)
                ds1s2 = lali - llcs
                if ds1s2 <= dcmax:
                    sim.addEdge(node1=is1, node2=is2,ds1s2=ds1s2,label=ds1s2)
                    edgecount+=1
        progressBar(pos,aligncount,head=header % (is1,is2,edgecount))
        pos+=(ldbseq-is1-1)
    return sim
 def buildTsr(component):
    '''
    Build for each consider taxonomic rank the list of taxa
    present in the connected component
    :param component: the analyzed connected component
    :type component: :py:class:`UndirectedGraph`
    :return: a dictionary indexed by rankid containing a `dict` indexed by taxid and containing count of sequences for this taxid
    :rtype: `dict` indexed by `int` containing `dict` indexed by `int` and containing of `int`
    '''
    taxalist = {}
    for n in component:
        for r in n['rtaxon']:
            rtaxid = n['rtaxon'][r]
            if rtaxid is not None:
                ts =  taxalist.get(r,{})
                ts[rtaxid]=ts.get(rtaxid,0)+1
                taxalist[r]=ts
    return taxalist
 def edgeDistSelector(dcmax):
    def predicate(e):
        return e['ds1s2'] <= dcmax
    return predicate
 def distanceOfConfusion(simgraph,dcmax=5,aggregate=average):
    alltaxa = set()
    for n in simgraph:
        alltaxa|=set(n['rtaxon'].values())
    taxacount = len(alltaxa)
    result = {}
    pos = [1]
    header = "Component  : %-5d Identified : %-8d "
    progressBar(1,taxacount,True,header % (0,0))
    def _idc(cc,dcmax):
        composante=[]
        for x in cc:
            composante.extend(simgraph.subgraph(c)
                              for c in componentIterator(x, 
                                                         edgePredicat=edgeDistSelector(dcmax)))
        good = set()
        bad  = {}
        complexe = []
        for c in composante:       
            tsr = buildTsr(c)
            newbad=False
            for r in tsr:
                if len(tsr[r]) == 1:
                    taxid = tsr[r].keys()[0]
                    good.add((taxid,tsr[r][taxid]))
                else:
                    newbad=True
                    for taxid in tsr[r]:
                        bad[taxid]=bad.get(taxid,0)+tsr[r][taxid]
            if newbad:
                complexe.append(c)
 #       good = good - bad
        for taxid,weight in good:
            if taxid not in result:
                result[taxid]=[]
            result[taxid].append((dcmax+1,weight))
            progressBar(pos[0],taxacount,False,header % (len(composante),pos[0]))
            pos[0]=len(result)
        if dcmax > 0:
            dcmax-=1
            _idc(complexe,dcmax)
        else:
            for taxid in bad:
                if taxid not in result:
                    result[taxid]=[]
                result[taxid].append((0,bad[taxid]))                
                progressBar(pos[0],taxacount,False,header % (len(composante),pos[0]))
                pos[0]=len(result)
    _idc([simgraph],dcmax)
    for taxid in result:
        result[taxid]=aggregate(result[taxid])
    return result
 def propagateDc(tree,node=None,aggregate=min):
    if node is None:
        node = tree.getRoots()[0]
    dca=aggregate(n['dc'] for n in node.leavesIterator())
    node['dc']=dca
    for n in node:
        propagateDc(tree, n, aggregate)
 def confusionTree(distances,ranks,taxonomy,aggregate=min,bsrank='species',dcmax=1):
    def Bs(node,rank,dcmax):
        n = len(node)
        if n:
            g = [int(x['dc']>=dcmax) for x in node.subgraphIterator() if x['rank']==bsrank]
            n = len(g)
            g = sum(g)
            bs= float(g)/float(n)
            node['bs']=bs
            node['bs_label']="%3.2f (%d)" % (bs,n)
            for n in node:
                Bs(n,rank,dcmax)
    tree = RootedTree()
    ranks = set(ranks)
    tset = set(distances)
    for taxon in distances:
        tree.addNode(taxon, rank=taxonomy.getRank(taxon),
                       name=taxonomy.getScientificName(taxon),
                       dc=float(distances[taxon][0]),
                       n=distances[taxon][1],
                       dc_label="%4.2f (%d)" % (float(distances[taxon][0]),distances[taxon][1])
                    )
    for taxon in distances:
        piter = taxonomy.parentalTreeIterator(taxon)
        taxon = piter.next()
        for parent in piter:
            if taxon[0] in tset and parent[0] in distances:
                tset.remove(taxon[0])
                tree.addEdge(parent[0], taxon[0])
                taxon=parent
    root = tree.getRoots()[0]
    Bs(root,bsrank,dcmax)
    return tree
--- a/obitools/metabarcoding/options.py
+++ b/obitools/metabarcoding/options.py
@ -1,34 +0,0 @@
 '''
 Created on 30 oct. 2011
@author: coissac
 '''
 from obitools.ecopcr.options import addTaxonomyDBOptions
 def addMetabarcodingOption(optionManager):
    addTaxonomyDBOptions(optionManager)
    optionManager.add_option('--dcmax',
                             action="store", dest="dc",
                             metavar="###",
                             type="int",
                             default=0,
                             help="Maximum confusion distance considered")
    optionManager.add_option('--ingroup',
                             action="store", dest="ingroup",
                             metavar="###",
                             type="int",
                             default=1,
                             help="ncbi taxid delimitation the in group")
    optionManager.add_option('--rank-thresold',
                             action="store", dest="rankthresold",
                             metavar="#.##",
                             type="float",
                             default=0.5,
                             help="minimum fraction of the ingroup sequences "
                                  "for concidering the rank")
--- a/obitools/obischemas/init.py
+++ b/obitools/obischemas/init.py
@ -1,28 +0,0 @@
 from obitools.obischemas import kb
 __connection__ = None
 def initConnection(options):
    global __connection__
    param = {}
    if hasattr(options, "dbname") and options.dbname is not None:
        param["database"]=options.dbname
    if hasattr(options, "dbhost") and options.dbhost is not None:
        param["host"]=options.dbhost
    if hasattr(options, "dbuser") and options.dbuser is not None:
        param["username"]=options.dbuser
    if hasattr(options, "dbpassword") and options.dbpassword is not None:
        param["password"]=options.dbpassword
    __connection__=kb.getConnection(**param)
    __connection__.autocommit=options.autocommit
 def getConnection(options=None):
    global __connection__
    if options is not None:
        initConnection(options)
    assert __connection__ is not None,"database connection is not initialized"
    return __connection__
--- a/obitools/obischemas/kb/init.py
+++ b/obitools/obischemas/kb/init.py
@ -1,55 +0,0 @@
 """
    kb package is devoted to manage access to postgresql database from python
    script
 """
 class Connection(object):
    def __init__(self):
        raise RuntimeError('pyROM.KB.Connection is an abstract class')
    def cursor(self):
        raise RuntimeError('pyROM.KB.Connection.cursor is an abstract function')
    def commit(self):
        raise RuntimeError('pyROM.KB.Connection.commit is an abstract function')
    def rollback(self):
        raise RuntimeError('pyROM.KB.Connection.rollback is an abstract function')
    def __call__(self,query):
        return self.cursor().execute(query)
 class Cursor(object):
    def __init__(self,db):
        raise RuntimeError('pyROM.KB.Cursor is an abstract class')
    def execute(self,query):
        raise RuntimeError('pyROM.KB.Cursor.execute is an abstract function')
    __call__=execute
 _current_connection = None  # Static variable used to store connection to KB
 def getConnection(*args,**kargs):
    """
        return a connection to the database.
        When call from database backend no argument are needed.
        All connection returned by this function 
    """
    global _current_connection
    if _current_connection==None or args or kargs :
        try:
            from obischemas.kb import backend
            _current_connection = backend.Connection()
        except ImportError:
            from obischemas.kb import extern
            _current_connection = extern.Connection(*args,**kargs)
    return _current_connection
--- a/obitools/obischemas/kb/extern.py
+++ b/obitools/obischemas/kb/extern.py
@ -1,78 +0,0 @@
 """
 Module : KB.extern
 Author : Eric Coissac
 Date   : 03/05/2004
 Module wrapping psycopg interface module to allow connection
 to a postgresql databases with the same interface from
 backend and external script.
 This module define a class usable from external script 
 """
 import psycopg2
 import sys
 from obischemas import kb
 class Connection(kb.Connection):
    def __init__(self,*connectParam,**kconnectParam):
        if connectParam:
            self.connectParam=={'dsn':connectParam}
        else:
            self.connectParam=kconnectParam 
        print self.connectParam
        self.db = psycopg2.connect(**(self.connectParam))
    def restart(self):
 	ok=1
 	while (ok and ok < 1000):
 	  try:	
 	    self.db = psycopg2.connect(**self.connectParam)
 	  except:
            ok+=1
 	  else:
            ok=0
    def cursor(self):
        curs = Cursor(self.db)
        if hasattr(self,'autocommit') and self.autocommit:
            curs.autocommit = self.autocommit
        return curs
    def commit(self):
        self.db.commit()
    def rollback(self):
        if hasattr(self,'db'):
            self.db.rollback()
    def __del__(self):
        if hasattr(self,'db'):
            self.rollback()
 class Cursor(kb.Cursor):
    def __init__(self,db):
 	self.db   = db
        self.curs = db.cursor()
    def execute(self,query):
        try:
            self.curs.execute(query)
            if hasattr(self,'autocommit') and self.autocommit:
                self.db.commit()
        except psycopg2.ProgrammingError,e:
            print >>sys.stderr,"===> %s" % query
            raise e
        except psycopg2.IntegrityError,e:
            print >>sys.stderr,"---> %s" % query
            raise e
        try:
           label = [x[0] for x in self.curs.description]
           return [dict(map(None,label,y))
                   for y in self.curs.fetchall()]
        except TypeError:
           return []
--- a/obitools/obischemas/options.py
+++ b/obitools/obischemas/options.py
@ -1,31 +0,0 @@
 def addConnectionOptions(optionManager):
    optionManager.add_option('-d','--dbname',
                             action="store", dest="dbname",
                             metavar="<DB NAME>",
                             type="string",
                             help="OBISchema database name containing"
                                  "taxonomical data")
    optionManager.add_option('-H','--host',
                             action="store", dest="dbhost",
                             metavar="<DB HOST>",
                             type="string",
                             help="host hosting OBISchema database")
    optionManager.add_option('-U','--user',
                             action="store", dest="dbuser",
                             metavar="<DB USER>",
                             type="string",
                             help="user for OBISchema database connection")
    optionManager.add_option('-W','--password',
                             action="store", dest="dbpassword",
                             metavar="<DB PASSWORD>",
                             type="string",
                             help="password for OBISchema database connection")
    optionManager.add_option('-A','--autocommit',
                             action="store_true",dest="autocommit",
                             default=False,
                             help="add commit action after each query")
--- a/obitools/obo/init.py
+++ b/obitools/obo/init.py
--- a/obitools/obo/go/init.py
+++ b/obitools/obo/go/init.py
--- a/obitools/obo/go/parser.py
+++ b/obitools/obo/go/parser.py
@ -1,53 +0,0 @@
 from obitools.obo.parser import OBOTerm
 from obitools.obo.parser import OBOEntry
 from obitools.obo.parser import stanzaIterator
 from logging import debug
 class GOEntry(OBOEntry):
    '''
       An entry of a GeneOntology .obo file. It can be a header (without a stanza name) or
       a stanza (with a stanza name between brackets). It inherits from the class dict.
    '''
 class GOTerm(OBOTerm):
    '''
       A stanza named 'Term'. It inherits from the class OBOTerm.
    '''
    def __init__(self,stanza):
        ## use of the OBOEntry constructor.
        OBOTerm.__init__(self, stanza)
        assert 'namespace' in self and len(self['namespace'])==1, "An OBOTerm must belong to one of the cell_component, molecular_function or biological_process namespace"
 def GOEntryFactory(stanza):
    '''
    Dispatcher of stanza.
    @param stanza: a stanza composed of several lines.
    @type stanza: text
    @return: an C{OBOTerm} | C{OBOEntry} instance
    @note: The dispatcher treats differently the stanza which are OBO "Term"
    and the others.
    '''
    stanzaType = OBOEntry.parseStanzaName(stanza)
    if stanzaType=="Term":
        return GOTerm(stanza)
    else:
        return OBOEntry(stanza)
 def GOEntryIterator(file):
    entries =  stanzaIterator(file)
    for e in entries:
        debug(e)
        yield GOEntryFactory(e)
--- a/obitools/obo/parser.py
+++ b/obitools/obo/parser.py
@ -1,707 +0,0 @@
 from obitools.utils import skipWhiteLineIterator,multiLineWrapper
 from obitools.utils import universalOpen
 from obitools.format.genericparser import genericEntryIteratorGenerator
 from logging import debug,warning
 import re
 #################################################################################
 ##                           Stanza preparation area                           ##
 #################################################################################
 class FileFormatError(Exception):
    '''
       An error derived from the class Exception.
    ''' 
    pass
 _oboEntryIterator = genericEntryIteratorGenerator(endEntry='^ *$',
                                                  strip=True)
 def stanzaIterator(inputfile):
    '''
    Iterator of stanza. The stanza are the basic units of OBO files.
    @param inputfile: a stream of strings from an opened OBO file.
    @type inputfile: a stream of strings
    @return: a stream of stanza
    @rtype: a stream of aggregated strings
    @note: The iterator constructs stanza by aggregate strings from the
    OBO file.
    '''
    inputfile = universalOpen(inputfile)
    inputfile = multiLineWrapper(inputfile)
    return _oboEntryIterator(inputfile)
 #################################################################################
 ##                      Trailing Modifiers treatment area                      ##
 #################################################################################
 class TrailingModifier(dict):
    '''
       A class object which inherits from the class dict. Trailing modifiers can be found
       at the end of TaggedValue objects when they exist.
    '''
    _match_brace = re.compile('(?<=\ {)[^\]]*(\}) *( !|$)')
    def __init__(self,string):
        ## search for trailing modifiers signals
        trailing_modifiers = TrailingModifier._match_brace.search(string)
        ## the trailing modifiers exist
        if trailing_modifiers:
            trailing_modifiers=trailing_modifiers.group(0).strip()
            print trailing_modifiers
            ## creates and feeds the dictionary of trailing modifiers
            dict.__init__(self,(x.strip().split('=',1) for x in trailing_modifiers.split(',')))
 def trailingModifierFactory(string):
    '''
    Dispatcher of trailing modifiers.
    @param string: a string from a TaggedValue object with a trailing modifiers signal.
    @type string: string
    @return: a class object
    @note: The dispatcher is currently very simple. Only one case is treated by the function.
    `the function returns a class object inherited from the class dict if the trailing modifiers
    exist, None if they don't.
    '''
    trailing_modifiers = TrailingModifier(string)
    if not trailing_modifiers:
        trailing_modifiers=None
    return trailing_modifiers
 #################################################################################
 ##                          TaggedValue treatment area                         ##
 #################################################################################
 class TaggedValue(object):
    '''
       A couple 'tag:value' of an OBOEntry.
    ''' 
    _match_value   = re.compile('(("(\\\\"|[^\"])*")|(\\\\"|[^\"]))*?( !| {|$)')
    _split_comment = re.compile('^!| !')
    _match_quotedString = re.compile('(?<=")(\\\\"|[^\"])*(?=")')
    _match_bracket = re.compile('\[[^\]]*\]')
    def __init__(self,line):
        '''
        Constructor of the class TaggedValue.
        @param line: a line of an OBOEntry composed of a tag and a value.
        @type line: string
        @note: The constructor separates tags from right terms. 'value' is extracted 
        from right terms using a regular expression (value is at the beginning of the
        string, between quotes or not). Then, 'comment' is extracted from the rest of the 
        string using another regular expression ('comment' is at the end of the string 
        after a '!'. By default, 'comment' is set to None). Finally, 'trailing_modifiers'
        are extracted from the last string using another regular expression.
        The tag, the value, the comment and the trailing_modifiers are saved.
        '''
        debug("tagValueParser : %s" % line)
        ## by default :
        trailing_modifiers = None
        comment = None
        ## the tag is saved. 'right' is composed of the value, the comment and the trailing modifiers
        tag,rigth = line.split(':',1)
        ## the value is saved
        value = TaggedValue._match_value.search(rigth).group(0)
        debug("Extracted value : %s" % value)
        ## if there is a value AND a sign of a comment or trailing modifiers
        if value and value[-1] in '!{':
            lvalue = len(value)
            ## whatever it is a comment or trailing modifiers, it is saved into 'extra'
            extra = rigth[lvalue-1:].strip()
            ## a comment is extracted
            extra =TaggedValue._split_comment.split(extra,1)
            ## and saved if it exists
            if len(extra)==2:
                comment=extra[1].strip()
            ## trailing modifiers are extracted
            extra=extra[0]
            trailing_modifiers = trailingModifierFactory(extra)
            ## the value is cleaned of any comment or trailing modifiers signals
            value = value[0:-1]
        if tag=='use_term':
            tag='consider'
            raise DeprecationWarning,"user_term is a deprecated tag, you should instead use consider"
        ## recording zone
        self.value  =value.strip()
        self.tag    = tag
        self.__doc__=comment
        self.trailing_modifiers=trailing_modifiers
    def __str__(self):
        return str(self.value)
    def __repr__(self):
        return '''"""%s"""''' % str(self)
 class NameValue(TaggedValue):
    '''
       A couple 'name:value' inherited from the class TaggedValue. Used to manage name tags.
    ''' 
    def __init__(self,line):
        ## no use of the TaggedValue constructor. The NameValue is very simple.
        tag,rigth = line.split(':',1)
        ## recording zone
        self.value = rigth.strip()
        self.tag = 'name'
        self.__doc__=None
        self.trailing_modifiers=None
 class DefValue(TaggedValue):
    '''
       A couple 'def:value' inherited from the class TaggedValue. Used to manage def tags.
    ''' 
    def __init__(self,line):
        '''
        Constructor of the class DefValue.
        @param line: a line of an OBOEntry composed of a tag named 'def' and a value.
        @type line: string
        @note: The constructor calls the TaggedValue constructor. A regular expression 
        is used to extract the 'definition' from TaggedValue.value (definition is a not 
        quoted TaggedValue.value). A regular expression is used to extract 'dbxrefs' 
        from the aggedValue.value without the definition (dbxrefs are between brackets
        and definition can be so). Definition is saved as the new value of the DefValue.
        dbxrefs are saved.
        '''
        ## use of the TaggedValue constructor
        TaggedValue.__init__(self, line)
        ## definition, which is quoted, is extracted from the standard value of a TaggedValue.
        definition = TaggedValue._match_quotedString.search(self.value).group(0)
        ## the standard value is cleaned of the definition.
        cleanvalue = self.value.replace(definition,'')
        cleanvalue = cleanvalue.replace('  ',' ')
        ## dbxrefs are searched into the rest of the standard value.
        dbxrefs    = TaggedValue._match_bracket.search(cleanvalue).group(0)
        ## recording zone
        self.tag = 'def'
        ## the value of a DefValue is not the standard value but the definition.
        self.value=definition
        self.dbxrefs=xrefFactory(dbxrefs)
 class SynonymValue(TaggedValue):
    '''
       A couple 'synonym:value' inherited from the class TaggedValue. Used to manage 
       synonym tags, exact_synonym tags, broad_synonym tags and narrow_synonym tags.
    ''' 
    _match_scope = re.compile('(?<="")[^\[]*(?=\[|$)')
    def __init__(self,line):
        '''
        Constructor of the class SynonymValue.
        @param line: a line of an OBOEntry composed of a tag named 'synonym' or
        'exact_synonym' or 'broad_synonym' or 'narrow_synonym' and a value.
        @type line: string
        @note: SynonymValue is composed of a tag, a value, a scope, a list of types and 
        dbxrefs.
        The constructor calls the TaggedValue constructor. A regular expression 
        is used to extract 'definition' from TaggedValue.value (definition is a not 
        quoted TaggedValue.value). Definition is saved as the new value of the class
        SynonymValue.
        A regular expression is used to extract 'attributes' from the rest of the
        string. Attributes may contain an optional synonym scope and an optional list 
        of synonym types. The scope is extracted from attributes or set by default to
        'RELATED'. It is saved as the scope of the class. The types are the rest of the 
        attributes and are saved as the list of types of the class.
        For deprecated tags 'exact_synonym', 'broad_synonym' and 'narrow_synonym', tag
        is set to 'synonym' and scope is set respectively to 'EXACT', 'BROAD' and 'NARROW'.
        A regular expression is used to extract 'dbxrefs' from the TaggedValue.value 
        without the definition (dbxrefs are between brackets and definition can be so).
        dbxrefs are saved.
        '''
        ## use of the TaggedValue constructor
        TaggedValue.__init__(self, line)
        ## definition, which is quoted, is extracted from the standard value of a TaggedValue.
        definition = TaggedValue._match_quotedString.search(self.value).group(0)
        ## the standard value is cleaned of the definition.
        cleanvalue = self.value.replace(definition,'')
        cleanvalue = cleanvalue.replace('  ',' ')
        ## 1) attributes are searched into the rest of the standard value.
        ## 2) then they are stripped.
        ## 3) then they are split on every ' '.
        ## 4) finally they are ordered into a set.
        attributes = set(SynonymValue._match_scope.search(cleanvalue).group(0).strip().split())
        ## the scopes are the junction between the attributes and a set of specific terms. 
        scopes     = attributes & set(['RELATED','EXACT','BROAD','NARROW'])
        ## the types are the rest of the attributes.
        types      = attributes - scopes
        ## this is a constraint of the OBO format
        assert len(scopes)< 2,"Only one synonym scope allowed"
        ## the scope of the SynonymValue is into scopes or set by default to RELATED
        if scopes:
            scope = scopes.pop()
        else:
            scope = 'RELATED'
        ## Specific rules are defined for the following tags :    
        if self.tag == 'exact_synonym':
            raise DeprecationWarning,'exact_synonym is a deprecated tag use instead synonym tag'
            self.tag   = 'synonym'
            scope = 'EXACT'
        if self.tag == 'broad_synonym':
            raise DeprecationWarning,'broad_synonym is a deprecated tag use instead synonym tag'
            self.tag   = 'synonym'
            scope = 'BROAD'
        if self.tag == 'narrow_synonym':
            raise DeprecationWarning,'narrow_synonym is a deprecated tag use instead synonym tag'
            self.tag   = 'synonym'
            scope = 'NARROW'
        if self.tag == 'systematic_synonym':
            #raise DeprecationWarning,'narrow_synonym is a deprecated tag use instead sysnonym tag'
            self.tag   = 'synonym'
            scope = 'SYSTEMATIC'
        ## this is our own constraint. deprecated tags are not saved by this parser.    
        assert self.tag =='synonym',"%s synonym type is not managed" % self.tag
        ## dbxrefs are searched into the rest of the standard value.
        dbxrefs    = TaggedValue._match_bracket.search(cleanvalue).group(0)
        ## recording zone
        ## the value of a SynonymValue is not the standard value but the definition.
        self.value   = definition
        self.dbxrefs = xrefFactory(dbxrefs)
        self.scope   = scope
        self.types   = list(types)
    def __eq__(self,b):
        return ((self.value==b.value) and (self.dbxrefs==b.dbxrefs) 
                and (self.scope==b.scope) and (self.types==b.types)
                and (self.__doc__==b.__doc__) and (self.tag==b.tag)
                and (self.trailing_modifiers==b.trailing_modifiers))
    def __hash__(self):
        return (reduce(lambda x,y:x+y,(hash(z) for z in [self.__doc__,
                                                         self.value,
                                                         frozenset(self.dbxrefs),
                                                         self.scope,
                                                         frozenset(self.types),
                                                         self.tag,
                                                         self.trailing_modifiers]),0)) % (2**31)
 class XrefValue(TaggedValue):
    '''
       A couple 'xref:value' inherited from the class TaggedValue. Used to manage 
       xref tags.
    ''' 
    def __init__(self,line):
        ## use of the TaggedValue constructor
        TaggedValue.__init__(self, line)
        ## use the same function as the dbxrefs
        self.value=xrefFactory(self.value)
        if self.tag in ('xref_analog','xref_unk'):
            raise DeprecationWarning,'%s is a deprecated tag use instead sysnonym tag' % self.tag
            self.tag='xref'
        ## this is our own constraint. deprecated tags are not saved by this parser.    
        assert self.tag=='xref'
 class RelationshipValue(TaggedValue):
    '''
       A couple 'xref:value' inherited from the class TaggedValue. Used to manage 
       xref tags.
    ''' 
    def __init__(self,line):
        ## use of the TaggedValue constructor
        TaggedValue.__init__(self, line)
        ## the value is split on the first ' '.
        value = self.value.split(None,1)
        ## succesful split !
        if len(value)==2:
            relationship=value[0]
            term=value[1]
        ## unsuccesful split. The relationship is set by default to IS_A
        else:
            relationship='is_a'
            term=value[0]
        ## recording zone    
        self.value=term
        self.relationship=relationship
 class NamespaceValue(TaggedValue):
    def __init__(self,line):
        TaggedValue.__init__(self, line)
 class RemarkValue(TaggedValue):
    def __init__(self,line):
        TaggedValue.__init__(self, line)
        label,value = self.value.split(':',1)
        label = label.strip()
        value = value.strip()
        self.value=value
        self.label=label
 def taggedValueFactory(line):
    '''
    A function used to dispatch lines of an OBOEntry between the class TaggedValue
    and its inherited classes.
    @param line: a line of an OBOEntry composed of a tag and a value.
    @type line: string
    @return: a class object
    '''
    if (line[0:9]=='namespace' or
          line[0:17]=='default-namespace'):
        return NamespaceValue(line)
    ## DefValue is an inherited class of TaggedValue
    elif line[0:3]=='def':
        return DefValue(line)
    ## SynonymValue is an inherited class of TaggedValue
    elif ((line[0:7]=="synonym" and line[0:14]!="synonymtypedef") or
          line[0:13]=="exact_synonym" or
          line[0:13]=="broad_synonym" or
          line[0:14]=="narrow_synonym"):
        return SynonymValue(line)
    ## XrefValue is an inherited class of TaggedValue
    elif line[0:4]=='xref':
        return XrefValue(line)
    ## NameValue is an inherited class of TaggedValue
    elif line[0:4]=='name':
        return NameValue(line)
    ## RelationshipValue is an inherited class of TaggedValue
    elif (line[0:15]=='intersection_of' or
          line[0:8] =='union_of' or
          line[0:12]=='relationship'):
        return RelationshipValue(line)
    elif (line[0:6]=='remark'):
        return RemarkValue(line)
    ## each line is a couple : tag / value (and some more features)
    else:
        return TaggedValue(line)
 #################################################################################
 ##                               Xref treatment area                           ##
 #################################################################################
 class Xref(object):
    '''
       A xref object of an OBOentry. It may be the 'dbxrefs' of SynonymValue and 
       DefValue objects or the 'value' of XrefValue objects.
    '''
    __splitdata__ = re.compile(' +(?=["{])')
    def __init__(self,ref):
        if ref == '' :                    #
            ref  = None                   #
            data = ''                     #
        else :                            # Modifs JJ sinon erreur : list index out of range
            data = Xref.__splitdata__.split(ref,1)      #
            ref  = data[0]                #
        description=None
        trailing_modifiers = None
        if len(data)> 1:
            extra = data[1]
            description = TaggedValue._match_quotedString.search(extra)
            if description is not None:
                description = description.group(0)
                extra.replace(description,'')
            trailing_modifiers=trailingModifierFactory(extra)
        self.reference=ref
        self.description=description
        self.trailing_modifiers=trailing_modifiers
    def __eq__(self,b):
        return ((self.reference==b.reference) and (self.description==b.description) 
                and (self.trailing_modifiers==b.trailing_modifiers))
    def __hash__(self):
        return (reduce(lambda x,y:x+y,(hash(z) for z in [self.reference,
                                                         self.description,
                                                         self.trailing_modifiers]),0)) % (2**31)
 def xrefFactory(string):
    '''
    Dispatcher of xrefs.
    @param string: a string (between brackets) from an inherited TaggedValue object with a dbxrefs 
                   signal (actually, the signal can only be found into SynonymValue and DefValue 
                   objects) or a string (without brackets) from a XrefValue object.
    @type string: string
    @return: a class object
    @note: The dispatcher treats differently the strings between brackets (from SynonymValue and 
    DefValue objects) and without brackets (from XrefValue objects).
    '''
    string = string.strip()
    if string[0]=='[':
        return [Xref(x.strip()) for x in string[1:-1].split(',')]  
    else:
        return Xref(string)
 #################################################################################
 ##                              Stanza treatment area                          ##
 #################################################################################
 class OBOEntry(dict):
    '''
       An entry of an OBOFile. It can be a header (without a stanza name) or
       a stanza (with a stanza name between brackets). It inherits from the class dict.
    '''
    _match_stanza_name = re.compile('(?<=^\[)[^\]]*(?=\])')
    def __init__(self,stanza):
        ## tests if it is the header of the OBO file (returns TRUE) or not (returns FALSE)
        self.isHeader = stanza[0]!='['
        lines = stanza.split('\n')
        ## not the header : there is a [stanzaName]
        if not self.isHeader:
            self.stanzaName = lines[0].strip()[1:-1]
            lines=lines[1:]
            self["stanza"] = [stanza.strip()]    
        ## whatever the stanza is. 
        for line in lines:
            ## each line is a couple : tag / value
            taggedvalue = taggedValueFactory(line)
            if taggedvalue.tag in self:
                self[taggedvalue.tag].append(taggedvalue)
            else:
                self[taggedvalue.tag]=[taggedvalue]
    def parseStanzaName(stanza):
        sm = OBOEntry._match_stanza_name.search(stanza)
        if sm:
            return sm.group(0)
        else:
            return None
    parseStanzaName=staticmethod(parseStanzaName)           
 class OBOTerm(OBOEntry):
    '''
       A stanza named 'Term'. It inherits from the class OBOEntry.
    '''
    def __init__(self,stanza):
        ## use of the OBOEntry constructor.
        OBOEntry.__init__(self, stanza)
        assert self.stanzaName=='Term'
        assert 'stanza' in self
        assert 'id' in self and len(self['id'])==1,"An OBOTerm must have an id"
        assert 'name' in self and len(self['name'])==1,"An OBOTerm must have a name"
        assert 'namespace' not in self or len(self['namespace'])==1, "Only one namespace is allowed for an OBO term"
        assert 'def' not in self or len(self['def'])==1,"Only one definition is allowed for an OBO term"
        assert 'comment' not in self or len(self['comment'])==1,"Only one comment is allowed for an OBO term"
        assert 'union_of' not in self or len(self['union_of'])>=2,"Only one union relationship is allowed for an OBO term"
        assert 'intersection_of' not in self or len(self['intersection_of'])>=2,"Only one intersection relationship is allowed for an OBO term"
        if self._isObsolete():
            #assert 'is_a' not in self
            assert 'relationship' not in self
            assert 'inverse_of' not in self
            assert 'disjoint_from' not in self
            assert 'union_of' not in self
            assert 'intersection_of' not in self
        assert 'replaced_by' not in self or self._isObsolete()
        assert 'consider' not in self or self._isObsolete()
    def _getStanza(self):
        return self['stanza'][0]
    ## make-up functions.            
    def _getDefinition(self):
        if 'def' in self:
            return self['def'][0]
        return None
    def _getId(self):
        return self['id'][0]
    def _getNamespace(self):
        return self['namespace'][0]
    def _getName(self):
        return self['name'][0]
    def _getComment(self):
        if 'comment' in self:
            return self['comment'][0]
        return None
    def _getAltIds(self):
        if 'alt_id' in self:
            return list(set(self.get('alt_id',None)))
        return None
    def _getIsA(self):
        if 'is_a' in self:
            return list(set(self.get('is_a',None)))
        return None
    def _getSynonym(self):
        if 'synonym' in self :
            return list(set(self.get('synonym',None)))
        return None
    def _getSubset(self):
        if self.get('subset',None) != None:
            return list(set(self.get('subset',None)))
        else:
            return None
    def _getXref(self):
        if 'xref' in self:
            return list(set(self.get('xref',None)))
        return None
    def _getRelationShip(self):
        if 'relationship' in self:
            return list(set(self.get('relationship',None)))
        return None
    def _getUnion(self):
        return list(set(self.get('union_of',None)))
    def _getIntersection(self):
        return list(set(self.get('intersection_of',None)))
    def _getDisjonction(self):
        return list(set(self.get('disjoint_from',None)))
    def _isObsolete(self):
        return 'is_obsolete' in self and str(self['is_obsolete'][0])=='true'
    def _getReplacedBy(self):
        if 'replaced_by' in self:
            return list(set(self.get('replaced_by',None)))
        return None
    def _getConsider(self):
        if 'consider' in self:
            return list(set(self.get('consider',None)))
        return None
    ## automatically make-up !
    stanza             = property(_getStanza,None,None)
    definition         = property(_getDefinition,None,None)
    id                 = property(_getId,None,None) 
    namespace          = property(_getNamespace,None,None)
    name               = property(_getName,None,None) 
    comment            = property(_getComment,None,None)
    alt_ids            = property(_getAltIds,None,None)
    is_a               = property(_getIsA,None,None)
    synonyms           = property(_getSynonym,None,None)
    subsets            = property(_getSubset,None,None)
    xrefs              = property(_getXref,None,None)
    relationship       = property(_getRelationShip,None,None)
    union_of           = property(_getUnion,None,None)
    intersection_of    = property(_getIntersection,None,None)
    disjoint_from      = property(_getDisjonction,None,None)
    is_obsolete        = property(_isObsolete,None,None)
    replaced_by        = property(_getReplacedBy,None,None)
    consider           = property(_getConsider,None,None)      
 def OBOEntryFactory(stanza):
    '''
    Dispatcher of stanza.
    @param stanza: a stanza composed of several lines.
    @type stanza: text
    @return: an C{OBOTerm} | C{OBOEntry} instance
    @note: The dispatcher treats differently the stanza which are OBO "Term"
    and the others.
    '''
    stanzaType = OBOEntry.parseStanzaName(stanza)
    if stanzaType=="Term":
        return OBOTerm(stanza)
    else:
        return OBOEntry(stanza)
 def OBOEntryIterator(file):
    entries =  stanzaIterator(file)
    for e in entries:
        debug(e)
        yield OBOEntryFactory(e)
--- a/obitools/options/init.py
+++ b/obitools/options/init.py
@ -1,137 +0,0 @@
 """
    Module providing high level functions to manage command line options.
 """
 import logging
 import sys
 from logging import debug
 from optparse import OptionParser
 from obitools.utils import universalOpen
 from obitools.utils import fileSize
 from obitools.utils import universalTell
 from obitools.utils import progressBar
 from obitools.format.options import addInputFormatOption, addInOutputOption,\
    autoEntriesIterator
 import time
 def getOptionManager(optionDefinitions,entryIterator=None,progdoc=None):
    '''
    Build an option manager fonction. that is able to parse
    command line options of the script.
    @param optionDefinitions: list of function describing a set of 
                              options. Each function must allows as
                              unique parametter an instance of OptionParser.
    @type optionDefinitions:  list of functions.
    @param entryIterator:     an iterator generator function returning
                              entries from the data files. 
    @type entryIterator:      an iterator generator function with only one
                              parametter of type file
    '''
    parser = OptionParser(progdoc)
    parser.add_option('--DEBUG',
                      action="store_true", dest="debug",
                      default=False,
                      help="Set logging in debug mode")
    parser.add_option('--no-psyco',
                      action="store_true", dest="noPsyco",
                      default=False,
                      help="Don't use psyco even if it installed")
    parser.add_option('--without-progress-bar',
                      action="store_false", dest="progressbar",
                      default=True,
                      help="desactivate progress bar")
    checkFormat=False
    for f in optionDefinitions:
        if f == addInputFormatOption or f == addInOutputOption:
            checkFormat=True 
        f(parser)
    def commandLineAnalyzer():
        options,files = parser.parse_args()
        if options.debug:
            logging.root.setLevel(logging.DEBUG)
        if checkFormat:
            ei=autoEntriesIterator(options)
        else:
            ei=entryIterator
        i = allEntryIterator(files,ei,with_progress=options.progressbar)
        return options,i
    return commandLineAnalyzer
 _currentInputFileName=None
 _currentFile = None
 _currentFileSize = None
 def currentInputFileName():
    return _currentInputFileName
 def currentInputFile():
    return _currentFile
 def currentFileSize():
    return _currentFileSize
 def currentFileTell():
    return universalTell(_currentFile)
 def fileWithProgressBar(file,step=100):
    try:
        size = currentFileSize()
    except:
        size = None
    def fileBar():
        pos=1
        progressBar(pos, size, True,currentInputFileName())
        for l in file:
            progressBar(currentFileTell,size,head=currentInputFileName())
            yield l 
        print >>sys.stderr,''    
    if size is None:
        return file
    else:
        f = fileBar()
        return f
 def allEntryIterator(files,entryIterator,with_progress=False,histo_step=102):
    global _currentFile
    global _currentInputFileName
    global _currentFileSize
    if files :
        for f in files:
            _currentInputFileName=f
            f = universalOpen(f)
            _currentFile=f
            _currentFileSize=fileSize(_currentFile)
            debug(f)
            if with_progress:
                f=fileWithProgressBar(f,step=histo_step)
            if entryIterator is None:
                for line in f:
                    yield line
            else:
                for entry in entryIterator(f):
                    yield entry
    else:
        if entryIterator is None:
            for line in sys.stdin:
                yield line
        else:
            for entry in entryIterator(sys.stdin):
                yield entry
--- a/Show More
+++ b/Show More
		`@ -1,2 +0,0 @@`
			`from obitools.seqdb.embl.parser import emblIterator,emblParser`
		`@ -1,4 +0,0 @@`
			`from obitools.seqdb.genbank.parser import genpepIterator,genpepParser`
			`from obitools.seqdb.genbank.parser import genbankIterator,genbankParser`