git-svn-id: https://www.grenoble.prabi.fr/svn/LECASofts/ecoPCR/trunk@390 60f365c0-8329-0410-b2a4-ec073aeeaa1d

2011-12-05 12:59:34 +00:00
parent 957a59eb5d
commit 4e5d8893e5
155 changed files with 16897 additions and 0 deletions
--- a/obitools/SVGdraw.py
+++ b/obitools/SVGdraw.py
--- a/obitools/init.py
+++ b/obitools/init.py
@ -0,0 +1,711 @@
+'''
+**obitools** main module
+------------------------
+
+.. codeauthor:: Eric Coissac <eric.coissac@metabarcoding.org>
+
+
+
+obitools module provides base class for sequence manipulation.
+
+All biological sequences must be subclass of :py:class:`obitools.BioSequence`.
+Some biological sequences are defined as transformation of other
+biological sequences. For example Reversed complemented sequences
+are a transformation of a :py:class:`obitools.NucSequence`. This particular
+type of sequences are subclasses of the :py:class:`obitools.WrappedBioSequence`.
+
+.. inheritance-diagram:: BioSequence NucSequence AASequence WrappedBioSequence SubSequence DNAComplementSequence
+        :parts: 1
+
+
+'''
+
+from weakref import ref
+
+from obitools.utils.iterator import uniqueChain
+from itertools import chain
+import re
+
+_default_raw_parser = " %s *= *([^;]*);"
+
+try:
+    from functools import partial
+except:
+    #
+    # Add for compatibility purpose with Python < 2.5
+    #
+    def partial(func, *args, **keywords):
+        def newfunc(*fargs, **fkeywords):
+            newkeywords = keywords.copy()
+            newkeywords.update(fkeywords)
+            return func(*(args + fargs), **newkeywords)
+        newfunc.func = func
+        newfunc.args = args
+        newfunc.keywords = keywords
+        return newfunc
+
+
+from obitools.sequenceencoder import DNAComplementEncoder
+from obitools.location import Location
+
+class WrapperSetIterator(object):
+    def __init__(self,s):
+        self._i = set.__iter__(s)
+    def next(self):
+        return self._i.next()()
+    def __iter__(self):
+        return self
+    
+class WrapperSet(set):
+    def __iter__(self):
+        return WrapperSetIterator(self)
+ 
+
+class BioSequence(object):
+    '''
+    BioSequence class is the base class for biological
+    sequence representation.
+    
+    It provides storage of :
+    
+        - the sequence itself, 
+        - an identifier,
+        - a definition an manage 
+        - a set of complementary information on a key / value principle.
+    
+    .. warning:: 
+        
+            :py:class:`obitools.BioSequence` is an abstract class, this constructor
+            can only be called by a subclass constructor.
+    '''
+    
+    def __init__(self,id,seq,definition=None,rawinfo=None,rawparser=_default_raw_parser,**info):
+        '''        
+        
+        :param id: sequence identifier
+        :type id:  `str`
+ 
+        :param seq: the sequence
+        :type seq:  `str`
+
+        :param definition: sequence definition (optional)
+        :type definition: `str`
+        
+        :param rawinfo: a text containing a set of key=value; patterns
+        :type definition: `str`
+        
+        :param rawparser: a text describing a regular patterns template 
+                          used to parse rawinfo
+        :type definition: `str`
+        
+        :param info: extra named parameters can be added to associate complementary
+                     data to the sequence
+        
+        '''
+        
+        assert type(self)!=BioSequence,"obitools.BioSequence is an abstract class"
+        
+        self._seq=str(seq).lower()
+        self._info = dict(info)
+        if rawinfo is not None:
+            self._rawinfo=' ' + rawinfo
+        else:
+            self._rawinfo=None
+        self._rawparser=rawparser
+        self.definition=definition
+        self.id=id
+        self._hasTaxid=None
+
+    def get_seq(self):
+        return self.__seq
+
+
+    def set_seq(self, value):
+        if not isinstance(value, str):
+            value=str(value)
+        self.__seq = value
+        self.__len = len(value)
+
+        
+    def clone(self):
+        seq = type(self)(self.id,
+                         str(self),
+                         definition=self.definition
+                         )
+        seq._info=dict(self.getTags())
+        seq._rawinfo=self._rawinfo
+        seq._rawparser=self._rawparser
+        seq._hasTaxid=self._hasTaxid
+        return seq
+        
+    def getDefinition(self):
+        '''
+        Sequence definition getter.
+        
+        :return: the sequence definition
+        :rtype: str
+            
+        '''
+        return self._definition
+
+    def setDefinition(self, value):
+        '''
+        Sequence definition setter.
+        
+        :param value: the new sequence definition
+        :type value: C{str}
+        :return: C{None}
+        '''
+        self._definition = value
+
+    def getId(self):
+        '''
+        Sequence identifier getter
+        
+        :return: the sequence identifier
+        :rtype: C{str}
+        '''
+        return self._id
+
+    def setId(self, value):
+        '''
+        Sequence identifier setter.
+        
+        :param value: the new sequence identifier
+        :type value:  C{str}
+        :return: C{None}
+        '''
+        self._id = value
+
+    def getStr(self):
+        '''
+        Return the sequence as a string
+        
+        :return: the string representation of the sequence
+        :rtype: str
+        '''
+        return self._seq
+    
+    def getSymbolAt(self,position):
+        '''
+        Return the symbole at C{position} in the sequence
+        
+        :param position: the desired position. Position start from 0
+                         if position is < 0 then they are considered
+                         to reference the end of the sequence.
+        :type position: `int`
+        
+        :return: a one letter string
+        :rtype: `str`
+        '''
+        return str(self)[position]
+    
+    def getSubSeq(self,location):
+        '''
+        return a subsequence as described by C{location}.
+        
+        The C{location} parametter can be a L{obitools.location.Location} instance,
+        an interger or a python C{slice} instance. If C{location}
+        is an iterger this method is equivalent to L{getSymbolAt}.
+        
+        :param location: the positions of the subsequence to return
+        :type location: C{Location} or C{int} or C{slice}
+        :return: the subsequence
+        :rtype: a single character as a C{str} is C{location} is an integer,
+                a L{obitools.SubSequence} instance otherwise.
+        
+        '''
+        if isinstance(location,Location):
+            return location.extractSequence(self)
+        elif isinstance(location, int):
+            return self.getSymbolAt(location)
+        elif isinstance(location, slice):
+            return SubSequence(self,location)
+
+        raise TypeError,'key must be a Location, an integer or a slice'  
+    
+    def getKey(self,key):
+        if key not in self._info:
+            if self._rawinfo is None:
+                if key=='count':
+                    return 1
+                else:
+                    raise KeyError,key
+            p = re.compile(self._rawparser % key)
+            m = p.search(self._rawinfo)
+            if m is not None:
+                v=m.group(1)
+                self._rawinfo=' ' + self._rawinfo[0:m.start(0)]+self._rawinfo[m.end(0):]
+                try:
+                    v = eval(v)
+                except:
+                    pass
+                self._info[key]=v
+            else:
+                if key=='count':
+                    v=1
+                else:
+                    raise KeyError,key
+        else:
+            v=self._info[key]
+        return v
+            
+    def extractTaxon(self):
+        '''
+        Extract Taxonomy information from the sequence header.
+        This method by default return None. It should be subclassed
+        if necessary as in L{obitools.seqdb.AnnotatedSequence}.
+        
+        :return: None
+        '''
+        self._hasTaxid=self.hasKey('taxid')
+        return None
+        
+    def __str__(self):
+        return self.getStr()
+    
+    def __getitem__(self,key):
+        if isinstance(key, str):
+            if key=='taxid' and self._hasTaxid is None:
+                self.extractTaxon()
+            return self.getKey(key)
+        else:
+            return self.getSubSeq(key)
+        
+    def __setitem__(self,key,value):
+        self._info[key]=value
+        if key=='taxid':
+            self._hasTaxid=value is not None
+        
+    def __delitem__(self,key):
+        if isinstance(key, str):
+            if key in self:
+                del self._info[key]
+            else:
+                raise KeyError,key    
+            
+            if key=='taxid':
+                self._hasTaxid=False
+        else:
+            raise TypeError,key
+        
+    def __iter__(self):
+        '''
+        Iterate through the sequence symbols
+        '''
+        return iter(str(self))
+    
+    def __len__(self):
+        return self.__len
+    
+    def hasKey(self,key):
+        rep = key in self._info
+        
+        if not rep and self._rawinfo is not None:
+            p = re.compile(self._rawparser % key)
+            m = p.search(self._rawinfo)
+            if m is not None:
+                v=m.group(1)
+                self._rawinfo=' ' + self._rawinfo[0:m.start(0)]+self._rawinfo[m.end(0):]
+                try:
+                    v = eval(v)
+                except:
+                    pass
+                self._info[key]=v
+                rep=True
+        
+        return rep
+    
+    def __contains__(self,key):
+        '''
+        methods allowing to use the C{in} operator on a C{BioSequence}.
+        
+        The C{in} operator test if the C{key} value is defined for this
+        sequence.
+         
+        :param key: the name of the checked value
+        :type key: str
+        :return: C{True} if the value is defined, {False} otherwise.
+        :rtype: C{bool}
+        '''
+        if key=='taxid' and self._hasTaxid is None:
+            self.extractTaxon()
+        return self.hasKey(key)
+    
+    def rawiteritems(self):
+        return self._info.iteritems()
+
+    def iteritems(self):
+        '''
+        iterate other items dictionary storing the values
+        associated to the sequence. It works similarly to
+        the iteritems function of C{dict}.
+        
+        :return: an iterator over the items (key,value)
+                 link to a sequence
+        :rtype: iterator over tuple
+        :see: L{items}
+        '''
+        if self._rawinfo is not None:
+            p = re.compile(self._rawparser % "([a-zA-Z]\w*)")
+            for k,v in p.findall(self._rawinfo):
+                try:
+                    self._info[k]=eval(v)
+                except:
+                    self._info[k]=v
+            self._rawinfo=None
+        return self._info.iteritems()
+    
+    def items(self):
+        return [x for x in self.iteritems()]
+    
+    def iterkeys(self):
+        return (k for k,v in self.iteritems())
+    
+    def keys(self):
+        return [x for x in self.iterkeys()]
+    
+    def getTags(self):
+        self.iteritems()
+        return self._info
+    
+    def getRoot(self):
+        return self
+    
+    def getWrappers(self):
+        if not hasattr(self, '_wrappers'):
+            self._wrappers=WrapperSet()
+        return self._wrappers
+    
+    def register(self,wrapper):
+        self.wrappers.add(ref(wrapper,self._unregister))
+        
+    def _unregister(self,ref):
+        self.wrappers.remove(ref)
+        
+    wrappers = property(getWrappers,None,None,'')
+
+    definition = property(getDefinition, setDefinition, None, "Sequence Definition")
+
+    id = property(getId, setId, None, 'Sequence identifier')
+        
+    def _getTaxid(self):
+        return self['taxid']
+    
+    def _setTaxid(self,taxid):
+        self['taxid']=taxid
+        
+    taxid = property(_getTaxid,_setTaxid,None,'NCBI Taxonomy identifier')
+    _seq = property(get_seq, set_seq, None, None)
+        
+class NucSequence(BioSequence):
+    """
+    :py:class:`NucSequence` specialize the :py:class:`BioSequence` class for storing DNA
+    sequences. 
+    
+    The constructor is identical to the :py:class:`BioSequence` constructor.
+    """
+ 
+    def complement(self):
+        """
+        :return: The reverse complemented sequence as an instance of :py:class:`DNAComplementSequence`
+        :rtype: :py:class:`DNAComplementSequence`
+        """
+        return DNAComplementSequence(self)
+    
+    def isNucleotide(self):
+        return True
+    
+
+class AASequence(BioSequence):
+    """
+    :py:class:`AASequence` specialize the :py:class:`BioSequence` class for storing protein
+    sequences. 
+    
+    The constructor is identical to the :py:class:`BioSequence` constructor.
+    """
+ 
+
+    def isNucleotide(self):
+        return False
+    
+   
+class WrappedBioSequence(BioSequence):
+    """
+    .. warning:: 
+        
+            :py:class:`obitools.WrappedBioSequence` is an abstract class, this constructor
+            can only be called by a subclass constructor.
+    """
+ 
+
+    def __init__(self,reference,id=None,definition=None,**info):
+
+        assert type(self)!=WrappedBioSequence,"obitools.WrappedBioSequence is an abstract class"
+
+        self._wrapped = reference
+        reference.register(self)
+        self._id=id
+        self.definition=definition
+        self._info=info
+        
+    def clone(self):
+        seq = type(self)(self.wrapped,
+                         id=self._id,
+                         definition=self._definition
+                         )
+        seq._info=dict(self._info)
+        
+        return seq
+        
+    def getWrapped(self):
+        return self._wrapped
+        
+    def getDefinition(self):
+        d = self._definition or self.wrapped.definition
+        return d
+    
+    def getId(self):
+        d = self._id or self.wrapped.id
+        return d
+    
+    def isNucleotide(self):
+        return self.wrapped.isNucleotide()
+    
+
+    def iterkeys(self):
+        return uniqueChain(self._info.iterkeys(),
+                               self.wrapped.iterkeys())
+        
+    def rawiteritems(self):
+        return chain(self._info.iteritems(),
+                        (x for x in self.wrapped.rawiteritems()
+                         if x[0] not in self._info))
+
+    def iteritems(self):
+        for x in self.iterkeys():
+            yield (x,self[x])
+            
+    def getKey(self,key):
+        if key in self._info:
+            return self._info[key]
+        else:
+            return self.wrapped.getKey(key)
+        
+    def hasKey(self,key):
+        return key in self._info or self.wrapped.hasKey(key)
+            
+    def getSymbolAt(self,position):
+        return self.wrapped.getSymbolAt(self.posInWrapped(position))
+    
+    def posInWrapped(self,position,reference=None):
+        if reference is None or reference is self.wrapped:
+            return self._posInWrapped(position)
+        else:
+            return self.wrapped.posInWrapped(self._posInWrapped(position),reference)
+            
+    
+    def getStr(self):
+        return str(self.wrapped)
+    
+    def getRoot(self):
+        return self.wrapped.getRoot()
+    
+    def complement(self):
+        """
+        The :py:meth:`complement` method of the :py:class:`WrappedBioSequence` class 
+        raises an exception :py:exc:`AttributeError` if the method is called and the cut
+        sequence does not corresponds to a nucleic acid sequence.
+        """
+        
+        if self.wrapped.isNucleotide():
+            return DNAComplementSequence(self)
+        raise AttributeError
+
+    
+    def _posInWrapped(self,position):
+        return position
+    
+    
+    definition = property(getDefinition,BioSequence.setDefinition, None)
+    id = property(getId,BioSequence.setId, None)
+
+    wrapped = property(getWrapped, None, None, "A pointer to the wrapped sequence")
+    
+    def  _getWrappedRawInfo(self):
+        return self.wrapped._rawinfo
+    
+    _rawinfo = property(_getWrappedRawInfo)
+        
+    
+class SubSequence(WrappedBioSequence):
+    """
+    """
+ 
+    
+    @staticmethod
+    def _sign(x):
+        if x == 0:
+            return 0
+        elif x < 0:
+            return -1
+        return 1
+    
+    def __init__(self,reference,
+                 location=None,
+                 start=None,stop=None,
+                 id=None,definition=None,
+                 **info):
+        WrappedBioSequence.__init__(self,reference,id=None,definition=None,**info)
+        
+        if isinstance(location, slice):
+            self._location = location
+        else:
+            step = 1
+            if not isinstance(start, int):
+                start = 0;
+            if not isinstance(stop,int):
+                stop = len(reference)
+            self._location=slice(start,stop,step)
+
+        self._indices=self._location.indices(len(self.wrapped))
+        self._xrange=xrange(*self._indices)
+ 
+        self._info['cut']='[%d,%d,%s]' % self._indices
+        
+        if hasattr(reference,'quality'):
+            self.quality = reference.quality[self._location]
+        
+    def getId(self):
+        d = self._id or ("%s_SUB" % self.wrapped.id)
+        return d
+
+        
+    def clone(self):
+        seq = WrappedBioSequence.clone(self)
+        seq._location=self._location
+        seq._indices=seq._location.indices(len(seq.wrapped))
+        seq._xrange=xrange(*seq._indices)
+        return seq
+        
+           
+    def __len__(self):
+        return len(self._xrange)
+    
+    def getStr(self):
+        return ''.join([x for x in self])
+    
+    def __iter__(self):
+        return (self.wrapped.getSymbolAt(x) for x in self._xrange)
+    
+    def _posInWrapped(self,position):
+        return self._xrange[position]
+    
+    
+    id = property(getId,BioSequence.setId, None)
+
+                
+ 
+class DNAComplementSequence(WrappedBioSequence):
+    """
+    Class used to represent a reverse complemented DNA sequence. Usually instances
+    of this class are produced by using the :py:meth:`NucSequence.complement` method.
+    """
+ 
+
+    _comp={'a': 't', 'c': 'g', 'g': 'c', 't': 'a',
+           'r': 'y', 'y': 'r', 'k': 'm', 'm': 'k', 
+           's': 's', 'w': 'w', 'b': 'v', 'd': 'h', 
+           'h': 'd', 'v': 'b', 'n': 'n', 'u': 'a',
+           '-': '-'}
+
+    
+    def __init__(self,reference,
+                 id=None,definition=None,**info):
+        WrappedBioSequence.__init__(self,reference,id=None,definition=None,**info)
+        assert reference.isNucleotide()
+        self._info['complemented']=True
+        if hasattr(reference,'quality'):
+            self.quality = reference.quality[::-1]
+
+           
+    def getId(self):
+        d = self._id or ("%s_CMP" % self.wrapped.id)
+        return d
+    
+    def __len__(self):
+        return len(self._wrapped)
+    
+    def getStr(self):
+        return ''.join([x for x in self])
+    
+    def __iter__(self):
+        return (self.getSymbolAt(x) for x in xrange(len(self)))
+    
+    def _posInWrapped(self,position):
+        return -(position+1)
+
+    def getSymbolAt(self,position):
+        return DNAComplementSequence._comp[self.wrapped.getSymbolAt(self.posInWrapped(position))]
+    
+    def complement(self):
+        """
+        The :py:meth:`complement` method of the :py:class:`DNAComplementSequence` class actually
+        returns the wrapped sequenced. Effectively the reversed complemented sequence of a reversed
+        complemented sequence is the initial sequence.
+        """
+        return self.wrapped
+    
+    id = property(getId,BioSequence.setId, None)
+                
+   
+def _isNucSeq(text):
+    acgt   = 0
+    notnuc = 0
+    ltot   = len(text) * 0.8
+    for c in text.lower():
+        if c in 'acgt-':
+            acgt+=1
+        if c not in DNAComplementEncoder._comp:
+            notnuc+=1
+    return notnuc==0 and float(acgt) > ltot
+
+ 
+def bioSeqGenerator(id,seq,definition=None,rawinfo=None,rawparser=_default_raw_parser,**info):
+    """
+    Generate automagically the good class instance between :
+    
+        - :py:class:`NucSequence`
+        - :py:class:`AASequence`
+    
+    Build a new sequence instance. Sequences are instancied as :py:class:`NucSequence` if the
+    `seq` attribute contains more than 80% of *A*, *C*, *G*, *T* or *-* symbols 
+    in upper or lower cases. Conversely, the new sequence instance is instancied as 
+    :py:class:`AASequence`.
+    
+
+    
+    :param id: sequence identifier
+    :type id:  `str`
+    
+    :param seq: the sequence
+    :type seq:  `str`
+    
+    :param definition: sequence definition (optional)
+    :type definition: `str`
+    
+    :param rawinfo: a text containing a set of key=value; patterns
+    :type definition: `str`
+    
+    :param rawparser: a text describing a regular patterns template 
+                      used to parse rawinfo
+    :type definition: `str`
+    
+    :param info: extra named parameters can be added to associate complementary
+                 data to the sequence
+    """
+    if _isNucSeq(seq):
+        return NucSequence(id,seq,definition,rawinfo,rawparser,**info)
+    else:
+        return AASequence(id,seq,definition,rawinfo,rawparser,**info)
+    
--- a/obitools/init.pyc
+++ b/obitools/init.pyc
--- a/obitools/align/init.py
+++ b/obitools/align/init.py
@ -0,0 +1,13 @@
+
+
+from _nws import NWS
+from _upperbond import indexSequences
+from _lcs import LCS,lenlcs
+from _assemble import DirectAssemble, ReverseAssemble
+from _qsassemble import QSolexaDirectAssemble,QSolexaReverseAssemble 
+from _rassemble import RightDirectAssemble as RightReverseAssemble
+from _qsrassemble import QSolexaRightDirectAssemble,QSolexaRightReverseAssemble 
+from _freeendgap import FreeEndGap
+from _freeendgapfm import FreeEndGapFullMatch
+from _upperbond import isLCSReachable
+
--- a/obitools/align/_assemble.so
+++ b/obitools/align/_assemble.so
--- a/obitools/align/_dynamic.so
+++ b/obitools/align/_dynamic.so
--- a/obitools/align/_freeendgap.so
+++ b/obitools/align/_freeendgap.so
--- a/obitools/align/_freeendgapfm.so
+++ b/obitools/align/_freeendgapfm.so
--- a/obitools/align/_lcs.so
+++ b/obitools/align/_lcs.so
--- a/obitools/align/_nws.so
+++ b/obitools/align/_nws.so
--- a/obitools/align/_profilenws.so
+++ b/obitools/align/_profilenws.so
--- a/obitools/align/_qsassemble.so
+++ b/obitools/align/_qsassemble.so
--- a/obitools/align/_qsrassemble.so
+++ b/obitools/align/_qsrassemble.so
--- a/obitools/align/_rassemble.so
+++ b/obitools/align/_rassemble.so
--- a/obitools/align/_upperbond.so
+++ b/obitools/align/_upperbond.so
--- a/obitools/align/homopolymere.py
+++ b/obitools/align/homopolymere.py
@ -0,0 +1,56 @@
+'''
+Created on 14 mai 2009
+
+@author: coissac
+'''
+
+from obitools import WrappedBioSequence
+
+class HomoNucBioSeq(WrappedBioSequence):
+    '''
+    classdocs
+    '''
+
+
+    def __init__(self,reference,id=None,definition=None,**info):
+        '''
+        Constructor
+        '''
+        assert reference.isNucleotide(),"reference must be a nucleic sequence"
+        WrappedBioSequence.__init__(self,reference,id=None,definition=None,**info)
+        self.__cleanHomopolymer()
+        
+    def __cleanHomopolymer(self):
+        s = []
+        c = []
+        old=None
+        nc=0
+        for n in self._wrapped:
+            if old is not None and n!=old:
+                s.append(old)
+                c.append(nc)
+                nc=0 
+                old=n
+            nc+=1
+        self._cached=''.join(s)
+        self['homopolymer']=c
+        self._cumulative=[]
+        sum=0
+        for c in self._count:
+            sum+=c
+            self._cumulative.append(sum)
+
+    def __len__(self):
+        return len(self._cached)
+    
+    def getStr(self):
+        return self._cached
+    
+    def __iter__(self):
+        return iter(self._cached)
+    
+    def _posInWrapped(self,position):
+        return self._cumulative[position]
+            
+        
+    
--- a/obitools/align/ssearch.py
+++ b/obitools/align/ssearch.py
@ -0,0 +1,46 @@
+import os
+import re
+
+from obitools.fasta import formatFasta
+
+class SsearchParser(object):
+
+    _matchQuery = re.compile("^Query:.+\n.+?>+([^ ]+)", re.MULTILINE)
+    _matchLQuery = re.compile("^Query:.+\n.+?(\d+)(?= nt\n)", re.MULTILINE)
+    _matchProp   = re.compile("^The best scores are:.*\n(.+?)>>>", re.DOTALL+re.MULTILINE)
+    def __init__(self,file):
+        if isinstance(file,str):
+            file = open(file,'rU')
+        self.data = file.read()
+        self.query= SsearchParser._matchQuery.search(self.data).group(1)
+        self.queryLength= int(SsearchParser._matchLQuery.search(self.data).group(1))
+        props = SsearchParser._matchProp.search(self.data)
+        if props:
+            props=props.group(0).split('\n')[1:-2]
+            self.props=[]
+            for line in props:
+                subject,tab = line.split('\t')
+                tab=tab.split()
+                ssp = subject.split()
+                ac = ssp[0]
+                dbl= int(ssp[-5][:-1])
+                ident = float(tab[0])
+                matchlen = abs(int(tab[5]) - int(tab[4])) +1
+                self.props.append({"ac"       :ac,
+                                   "identity" :ident,
+                                   "subjectlength":dbl,
+                                   'matchlength' : matchlen})
+                
+def run(seq,database,program='fasta35',opts=''):
+    ssearchin,ssearchout,ssearcherr = os.popen3("%s %s %s" % (program,opts,database))
+    print >>ssearchin,formatFasta(seq)
+    ssearchin.close()
+    result = SsearchParser(ssearchout)
+    
+    return seq,result
+
+def ssearchIterator(sequenceIterator,database,program='ssearch35',opts=''):
+    for seq in sequenceIterator:
+        yield run(seq,database,program,opts)
+    
+            
--- a/obitools/alignment/init.py
+++ b/obitools/alignment/init.py
@ -0,0 +1,175 @@
+from obitools import BioSequence
+from obitools import WrappedBioSequence
+from copy import deepcopy
+
+class GappedPositionException(Exception):
+    pass
+
+class AlignedSequence(WrappedBioSequence):
+
+    def __init__(self,reference,
+                 id=None,definition=None,**info):
+        WrappedBioSequence.__init__(self,reference,id=None,definition=None,**info)
+        self._length=len(reference)
+        self._gaps=[[self._length,0]]
+        
+    def clone(self):
+        seq = WrappedBioSequence.clone(self)
+        seq._gaps=deepcopy(self._gaps)
+        seq._length=reduce(lambda x,y:x+y, (z[0]+z[1] for z in self._gaps),0)
+        return seq
+
+    def setGaps(self, value):
+        '''
+        Set gap vector to an AlignedSequence.
+        
+        Gap vector describes the gap positions on a sequence.
+        It is a gap of couple. The first couple member is the count
+        of sequence letter, the second one is the gap length. 
+        @param value: a list of length 2 list describing gap positions
+        @type value: list of couple
+        '''
+        assert isinstance(value, list),'Gap vector must be a list'
+        assert reduce(lambda x,y: x and y,
+                      (isinstance(z, list) and len(z)==2 for z in value),
+                      True),"Value must be a list of length 2 list"
+                       
+        lseq = reduce(lambda x,y:x+y, (z[0] for z in value),0)
+        assert lseq==len(self.wrapped),"Gap vector incompatible with the sequence"
+        self._gaps = value
+        self._length=reduce(lambda x,y:x+y, (z[0]+z[1] for z in value),0)
+
+    def getGaps(self):
+        return tuple(self._gaps)
+    gaps = property(getGaps, setGaps, None, "Gaps's Docstring")
+    
+    def _getIndice(self,pos):
+        i=0
+        cpos=0
+        for s,g in self._gaps:
+            cpos+=s
+            if cpos>pos:
+                return i,pos-cpos+s
+            cpos+=g 
+            if cpos>pos:
+                return i,-pos+cpos-g-1
+            i+=1
+        raise IndexError
+                
+    def getId(self):
+        d = self._id or ("%s_ALN" % self.wrapped.id)
+        return d
+    
+    def __len__(self):
+        return self._length
+    
+    def getStr(self):
+        return ''.join([x for x in self])
+    
+    def __iter__(self):
+        def isymb():
+            cpos=0
+            for s,g in self._gaps:
+                for x in xrange(s):
+                    yield self.wrapped[cpos+x]
+                for x in xrange(g):
+                    yield '-'
+                cpos+=s
+        return isymb()
+    
+    def _posInWrapped(self,position):
+        i,s=self._getIndice(position)
+        if s<0:
+            raise GappedPositionException
+        value=self._gaps
+        p=reduce(lambda x,y:x+y, (z[0] for z in value[:i]),0)+s
+        return p
+
+    def getSymbolAt(self,position):
+        try:
+            return self.wrapped.getSymbolAt(self.posInWrapped(position))
+        except GappedPositionException:
+            return '-'
+        
+    def insertGap(self,position,count=1):
+        if position==self._length:
+            idx=len(self._gaps)-1
+            p=-1
+        else:
+            idx,p = self._getIndice(position)
+
+        if p >= 0:
+            self._gaps.insert(idx, [p,count])
+            self._gaps[idx+1][0]-=p
+        else:
+            self._gaps[idx][1]+=count
+        self._length=reduce(lambda x,y:x+y, (z[0]+z[1] for z in self._gaps),0)
+            
+        
+    id = property(getId,BioSequence.setId, None, "Sequence Identifier")
+    
+
+class Alignment(list):
+
+    def _assertData(self,data):
+        assert isinstance(data, BioSequence),'You must only add bioseq to an alignement'
+        if hasattr(self, '_alignlen'):
+            assert self._alignlen==len(data),'All aligned sequences must have the same length'
+        else:
+            self._alignlen=len(data)  
+        return data  
+            
+    def clone(self):
+        ali = Alignment(x.clone() for x in self)
+        return ali
+    
+    def append(self,data):
+        data = self._assertData(data)
+        list.append(self,data)
+        
+    def __setitem__(self,index,data):
+        
+        data = self._assertData(data)
+        list.__setitem__(self,index,data)
+        
+    def getSite(self,key):
+        if isinstance(key,int):
+            return [x[key] for x in self]
+        
+    def insertGap(self,position,count=1):
+        for s in self:
+            s.insertGap(position,count)
+        
+    def isFullGapSite(self,key):
+        return reduce(lambda x,y: x and y,(z=='-' for z in self.getSite(key)),True)
+    
+    def isGappedSite(self,key):
+        return '-' in self.getSite(key)
+
+    def __str__(self):
+        l = len(self[0])
+        rep=""
+        idmax = max(len(x.id) for x in self)+2
+        template= "%%-%ds  %%-60s" % idmax
+        for p in xrange(0,l,60):
+            for s in self:
+                rep+= (template % (s.id,s[p:p+60])).strip() + '\n'
+            rep+="\n"
+        return rep
+    
+def alignmentReader(file,sequenceIterator):
+    seqs = sequenceIterator(file)
+    alignement = Alignment()
+    for seq in seqs:
+        alignement.append(seq)
+    return alignement
+        
+        
+
+
+
+def columnIterator(alignment):
+    lali = len(alignment[0])
+    for p in xrange(lali):
+        c = [x[p] for x in alignment]
+        yield c
--- a/obitools/alignment/ace.py
+++ b/obitools/alignment/ace.py
@ -0,0 +1,47 @@
+from obitools.format.genericparser import GenericParser
+from obitools.utils import universalOpen
+from obitools.fasta import parseFastaDescription
+from obitools import NucSequence
+
+
+import sys
+
+_contigIterator=GenericParser('^CO ')
+
+_contigIterator.addParseAction('AF', '\nAF +(\S+) +([UC]) +(-?[0-9]+)')
+_contigIterator.addParseAction('RD', '\nRD +(\S+) +([0-9]+) +([0-9]+) +([0-9]+) *\n([A-Za-z\n*]+?)\n\n')
+_contigIterator.addParseAction('DS', '\nDS +(.+)')
+_contigIterator.addParseAction('CO',  '^CO (\S+)')
+
+def contigIterator(file):
+    file = universalOpen(file)
+    for entry in _contigIterator(file):
+        contig=[]
+        for rd,ds,af in map(None,entry['RD'],entry['DS'],entry['AF']):
+            id = rd[0]
+            shift = int(af[2])
+            if shift < 0:
+                print >> sys.stderr,"Sequence %s in contig %s has a negative paddng value %d : skipped" % (id,entry['CO'][0],shift)
+                #continue
+                
+            definition,info = parseFastaDescription(ds)
+            info['shift']=shift
+            seq = rd[4].replace('\n','').replace('*','-').strip()
+            contig.append(NucSequence(id,seq,definition,**info))
+          
+        maxlen = max(len(x)+x['shift'] for x in contig)
+        minshift=min(x['shift'] for x in contig)
+        rep = []
+        
+        for s in contig:
+            info = s.getTags()
+            info['shift']-=minshift-1
+            head = '-' * (info['shift']-1)
+            
+            tail = (maxlen + minshift - len(s) - info['shift'] - 1)
+            info['tail']=tail
+            newseq = NucSequence(s.id,head + str(s)+ '-' * tail,s.definition,**info)
+            rep.append(newseq) 
+              
+        yield entry['CO'][0],rep
+    
--- a/obitools/barcodecoverage/init.py
+++ b/obitools/barcodecoverage/init.py
@ -0,0 +1,7 @@
+'''
+
+@author: merciece
+Creates the tree representing the coverage of 2 primers from an ecoPCR output file and an ecoPCR database.
+
+
+'''
--- a/obitools/barcodecoverage/calcBc.py
+++ b/obitools/barcodecoverage/calcBc.py
@ -0,0 +1,62 @@
+#!/usr/local/bin/python
+'''
+Created on 24 nov. 2011
+
+@author: merciece
+'''
+
+
+def main(amplifiedSeqs, seqsFromDB, keptRanks, errors, tax) :
+    '''
+    error threshold is set to 3 
+    '''   
+    
+    listtaxabygroupinDB = {}
+    
+    for seq in seqsFromDB :
+        taxid = seq['taxid']
+        p = [a for a in tax.parentalTreeIterator(taxid)]
+        for a in p :
+            if a != p[0] :
+                if a[1] in keptRanks :
+                    group = a[0]
+                    if group in listtaxabygroupinDB and taxid not in listtaxabygroupinDB[group] :
+                        listtaxabygroupinDB[group].add(taxid)
+                    elif group not in listtaxabygroupinDB :
+                        listtaxabygroupinDB[group]=set([taxid])
+                            
+    taxabygroup = dict((x,len(listtaxabygroupinDB[x])) for x in listtaxabygroupinDB)
+    
+    listamplifiedtaxabygroup = {}
+
+    for seq in amplifiedSeqs :
+        if errors[seq.id][2] <= 3 :
+            taxid = seq['taxid']   
+            p = [a for a in tax.parentalTreeIterator(taxid)]
+            for a in p :
+                if a != p[0] :
+                    if a[1] in keptRanks :
+                        group = a[0]
+                        if group in listamplifiedtaxabygroup and taxid not in listamplifiedtaxabygroup[group] :
+                            listamplifiedtaxabygroup[group].add(taxid)
+                        elif group not in listamplifiedtaxabygroup :
+                            listamplifiedtaxabygroup[group]=set([taxid])
+
+    amplifiedtaxabygroup = dict((x,len(listamplifiedtaxabygroup[x])) for x in listamplifiedtaxabygroup)
+    
+    BcValues = {}
+        
+    groups = [g for g in taxabygroup.keys()]
+        
+    for g in groups :
+        if g in amplifiedtaxabygroup :
+            BcValues[g] = float(amplifiedtaxabygroup[g])/taxabygroup[g]*100
+            BcValues[g] = round(BcValues[g], 2)
+        else :
+            BcValues[g] = 0.0
+    
+    return BcValues
+
+
+
+
--- a/obitools/barcodecoverage/calculateBc.py
+++ b/obitools/barcodecoverage/calculateBc.py
@ -0,0 +1,72 @@
+#!/usr/local/bin/python
+'''
+Created on 24 nov. 2011
+
+@author: merciece
+'''
+
+import sys
+
+
+def main(amplifiedSeqs, seqsFromDB, keptRanks, tax) :
+        
+    BcValues = {}
+    
+    #speciesid = tax.findRankByName('species')
+    #subspeciesid = tax.findRankByName('subspecies')
+    
+    listtaxonbygroup = {}
+    
+    for seq in seqsFromDB :
+        taxid = seq['taxid']
+        p = [a for a in tax.parentalTreeIterator(taxid)]
+        for a in p :
+            if a != p[0] :
+                if a[1] in keptRanks :
+                    group = a
+                    if group in listtaxonbygroup:
+                        listtaxonbygroup[group].add(taxid)
+                    else:
+                        listtaxonbygroup[group]=set([taxid])
+                            
+    #stats = dict((x,len(listtaxonbygroup[x])) for x in listtaxonbygroup)
+
+    print>>sys.stderr, listtaxonbygroup
+    
+    listtaxonbygroup = {}
+        
+    for seq in amplifiedSeqs :
+        taxid = seq['taxid']
+        p = [a for a in tax.parentalTreeIterator(taxid)]
+        for a in p :
+            if a != p[0] :
+                if a[1] in keptRanks :
+                    group = a
+                    if group in listtaxonbygroup:
+                        listtaxonbygroup[group].add(taxid)
+                    else:
+                        listtaxonbygroup[group]=set([taxid])
+    
+    print>>sys.stderr, listtaxonbygroup
+
+    return BcValues
+                            
+#    dbstats= dict((x,len(listtaxonbygroup[x])) for x in listtaxonbygroup)
+#    
+#    ranks = [r for r in keptRanks]
+#    ranks.sort()
+#    
+#    print '%-20s\t%10s\t%10s\t%7s' % ('rank','ecopcr','db','percent')
+#    
+#    print>>sys.stderr, stats
+#    print>>sys.stderr, dbstats
+#    print>>sys.stderr, ranks
+#    
+#    for r in ranks:
+#        if  r in dbstats and dbstats[r]:
+#            print '%-20s\t%10d\t%10d\t%8.2f' % (r,dbstats[r],stats[r],float(dbstats[r])/stats[r]*100)
+            
+
+
+
+
--- a/obitools/barcodecoverage/drawBcTree.py
+++ b/obitools/barcodecoverage/drawBcTree.py
@ -0,0 +1,108 @@
+#!/usr/local/bin/python
+'''
+Created on 25 nov. 2011
+
+@author: merciece
+'''
+
+from obitools.graph.rootedtree import nexusFormat 
+
+
+figtree="""\
+begin figtree;
+    set appearance.backgroundColorAttribute="User Selection";
+    set appearance.backgroundColour=#-1;
+    set appearance.branchColorAttribute="bc";
+    set appearance.branchLineWidth=2.0;
+    set appearance.foregroundColour=#-16777216;
+    set appearance.selectionColour=#-2144520576;
+    set branchLabels.colorAttribute="User Selection";
+    set branchLabels.displayAttribute="errors";
+    set branchLabels.fontName="sansserif";
+    set branchLabels.fontSize=10;
+    set branchLabels.fontStyle=0;
+    set branchLabels.isShown=true;
+    set branchLabels.significantDigits=4;
+    set layout.expansion=2000;
+    set layout.layoutType="RECTILINEAR";
+    set layout.zoom=0;
+    set nodeBars.barWidth=4.0;
+    set nodeLabels.colorAttribute="User Selection";
+    set nodeLabels.displayAttribute="label";
+    set nodeLabels.fontName="sansserif";
+    set nodeLabels.fontSize=10;
+    set nodeLabels.fontStyle=0;
+    set nodeLabels.isShown=true;
+    set nodeLabels.significantDigits=4;
+    set polarLayout.alignTipLabels=false;
+    set polarLayout.angularRange=0;
+    set polarLayout.rootAngle=0;
+    set polarLayout.rootLength=100;
+    set polarLayout.showRoot=true;
+    set radialLayout.spread=0.0;
+    set rectilinearLayout.alignTipLabels=false;
+    set rectilinearLayout.curvature=0;
+    set rectilinearLayout.rootLength=100;
+    set scale.offsetAge=0.0;
+    set scale.rootAge=1.0;
+    set scale.scaleFactor=1.0;
+    set scale.scaleRoot=false;
+    set scaleAxis.automaticScale=true;
+    set scaleAxis.fontSize=8.0;
+    set scaleAxis.isShown=false;
+    set scaleAxis.lineWidth=2.0;
+    set scaleAxis.majorTicks=1.0;
+    set scaleAxis.origin=0.0;
+    set scaleAxis.reverseAxis=false;
+    set scaleAxis.showGrid=true;
+    set scaleAxis.significantDigits=4;
+    set scaleBar.automaticScale=true;
+    set scaleBar.fontSize=10.0;
+    set scaleBar.isShown=true;
+    set scaleBar.lineWidth=1.0;
+    set scaleBar.scaleRange=0.0;
+    set scaleBar.significantDigits=4;
+    set tipLabels.colorAttribute="User Selection";
+    set tipLabels.displayAttribute="Names";
+    set tipLabels.fontName="sansserif";
+    set tipLabels.fontSize=10;
+    set tipLabels.fontStyle=0;
+    set tipLabels.isShown=true;
+    set tipLabels.significantDigits=4;
+    set trees.order=false;
+    set trees.orderType="increasing";
+    set trees.rooting=false;
+    set trees.rootingType="User Selection";
+    set trees.transform=false;
+    set trees.transformType="cladogram";
+end;
+"""
+
+
+def cartoonRankGenerator(rank):
+    def cartoon(node):
+        return 'rank' in node and node['rank']==rank
+    
+    return cartoon
+
+
+def collapseBcGenerator(Bclimit):
+    def collapse(node):
+        return 'bc' in node and node['bc']<=Bclimit
+    return collapse
+
+
+def label(node):
+    if 'bc' in node:
+        return "(%+3.1f) %s" % (node['bc'],node['name'])
+    else:
+        return "      %s" % node['name']
+
+
+def main(coverageTree) :
+    print nexusFormat(coverageTree,
+                      label=label,
+                      blocks=figtree,
+                      cartoon=cartoonRankGenerator('family'))
+                      #collapse=collapseBcGenerator(70))
+        
--- a/obitools/barcodecoverage/findErrors.py
+++ b/obitools/barcodecoverage/findErrors.py
@ -0,0 +1,56 @@
+#!/usr/local/bin/python
+'''
+Created on 24 nov. 2011
+
+@author: merciece
+'''
+
+
+def main(seqs, keptRanks, tax):
+    errorsBySeq = getErrorsOnLeaves(seqs)
+    errorsByTaxon = propagateErrors(errorsBySeq, keptRanks, tax)
+    return errorsBySeq, errorsByTaxon
+    
+
+def getErrorsOnLeaves(seqs) :    
+    errors = {}
+    for s in seqs :
+        taxid = s['taxid']
+        forErrs = s['forward_error']
+        revErrs = s['reverse_error']
+        total = forErrs + revErrs
+        seqNb = 1
+        errors[s.id] = [forErrs,revErrs,total,seqNb,taxid]
+    return errors
+
+
+def propagateErrors(errorsOnLeaves, keptRanks, tax) :
+    allErrors = {}
+    for seq in errorsOnLeaves :
+        taxid = errorsOnLeaves[seq][4]
+        p = [a for a in tax.parentalTreeIterator(taxid)]
+        for a in p :
+            if a[1] in keptRanks :
+                group = a[0]
+                if group in allErrors :
+                    allErrors[group][0] += errorsOnLeaves[seq][0]
+                    allErrors[group][1] += errorsOnLeaves[seq][1]
+                    allErrors[group][2] += errorsOnLeaves[seq][2]
+                    allErrors[group][3] += 1
+                else :
+                    allErrors[group] = errorsOnLeaves[seq]
+    
+    for group in allErrors :
+        allErrors[group][0] /= float(allErrors[group][3])
+        allErrors[group][1] /= float(allErrors[group][3])
+        allErrors[group][2] /= float(allErrors[group][3])
+        
+        allErrors[group][0] = round(allErrors[group][0], 2)
+        allErrors[group][1] = round(allErrors[group][1], 2)
+        allErrors[group][2] = round(allErrors[group][2], 2)
+        
+    return allErrors
+
+
+
+
--- a/obitools/barcodecoverage/readFiles.py
+++ b/obitools/barcodecoverage/readFiles.py
@ -0,0 +1,69 @@
+#!/usr/local/bin/python
+'''
+Created on 23 nov. 2011
+
+@author: merciece
+'''
+
+from obitools.ecopcr import sequence
+from obitools.ecopcr import taxonomy
+
+
+def main(entries,options):
+    filteredDataFromDB = ecoPCRDatabaseReader(options)
+    filteredData = ecoPCRFileReader(entries,filteredDataFromDB)
+    return filteredDataFromDB,filteredData
+
+
+def ecoPCRDatabaseReader(options):
+    
+    tax = taxonomy.EcoTaxonomyDB(options.taxonomy)
+    seqs = sequence.EcoPCRDBSequenceIterator(options.taxonomy,taxonomy=tax)
+       
+    norankid  = tax.findRankByName('no rank')
+    speciesid = tax.findRankByName('species')
+    genusid   = tax.findRankByName('genus')
+    familyid  = tax.findRankByName('family')
+    
+    minrankseq = set([speciesid,genusid,familyid])
+    
+    usedrankid   = {}
+    
+    ingroup = {}
+    outgroup= {}
+        
+    for s in seqs :
+        if 'taxid' in s :
+            taxid = s['taxid']
+            allrank = set()
+            for p in tax.parentalTreeIterator(taxid):
+                if p[1]!=norankid:
+                    allrank.add(p[1])
+                if len(minrankseq & allrank) == 3:
+                    for r in allrank:
+                        usedrankid[r]=usedrankid.get(r,0) + 1
+                    
+                    if tax.isAncestor(options.ingroup,taxid):
+                        ingroup[s.id] = s
+                    else:
+                        outgroup[s.id] = s
+                        
+    keptranks = set(r for r in usedrankid 
+                   if float(usedrankid[r])/float(len(ingroup)) > options.rankthresold)
+                        
+    return { 'ingroup' : ingroup,
+             'outgroup': outgroup,
+             'ranks'   : keptranks,
+             'taxonomy': tax
+           }
+
+
+def ecoPCRFileReader(entries,filteredDataFromDB) :
+    filteredData = []
+    for s in entries :
+        if 'taxid' in s :
+            seqId = s.id
+            if seqId in filteredDataFromDB['ingroup'] :
+                filteredData.append(s)
+    return filteredData
+
--- a/obitools/barcodecoverage/writeBcTree.py
+++ b/obitools/barcodecoverage/writeBcTree.py
@ -0,0 +1,42 @@
+#!/usr/local/bin/python
+'''
+Created on 25 nov. 2011
+
+@author: merciece
+'''
+
+from obitools.graph.rootedtree import RootedTree
+
+
+def main(BcValues,errors,tax) :
+    
+    tree = RootedTree()
+    tset = set(BcValues)
+    
+    for taxon in BcValues:
+        if taxon in errors :
+            forErr = errors[taxon][0]
+            revErr = errors[taxon][1]
+            totErr = errors[taxon][2]
+        else :
+            forErr = -1.0
+            revErr = -1.0
+            totErr = -1.0
+                     
+        tree.addNode(taxon, rank=tax.getRank(taxon),
+                       name=tax.getScientificName(taxon),
+                       bc = BcValues[taxon],
+                       errors = str(forErr)+' '+str(revErr),
+                       totError = totErr
+                    )
+
+    for taxon in BcValues:
+        piter = tax.parentalTreeIterator(taxon)
+        taxon = piter.next()
+        for parent in piter:
+            if taxon[0] in tset and parent[0] in BcValues:
+                tset.remove(taxon[0])
+                tree.addEdge(parent[0], taxon[0])
+                taxon=parent
+                    
+    return tree
--- a/obitools/blast/init.py
+++ b/obitools/blast/init.py
@ -0,0 +1,207 @@
+from os import popen2
+from itertools import imap,count
+
+from obitools.table import iTableIterator,TableRow,Table,SelectionIterator
+from obitools.utils import ColumnFile
+from obitools.location import SimpleLocation
+from obitools.fasta import formatFasta
+import sys
+
+class Blast(object):
+    '''
+    Run blast
+    '''
+    
+    def __init__(self,mode,db,program='blastall',**options):
+        self._mode = mode
+        self._db = db
+        self._program = program
+        self._options = options
+
+    def getMode(self):
+        return self._mode
+
+
+    def getDb(self):
+        return self._db
+
+
+    def getProgram(self):
+        return self._program
+    
+    def _blastcmd(self):
+        tmp = """%(program)s     \\
+                    -p %(mode)s  \\
+                    -d %(db)s    \\
+                    -m 8         \\
+                    %(options)s   \\
+              """
+        options = ' '.join(['-%s %s' % (x[0],str(x[1])) 
+                               for x in self._options.iteritems()])
+        data = {
+                 'program' : self.program,
+                 'db'      : self.db,
+                 'mode'    : self.mode,
+                 'options' : options
+               }
+        
+        return tmp % data
+    
+    def __call__(self,sequence):
+        '''
+        Run blast with one sequence object
+        @param sequence:
+        @type sequence:
+        '''
+        cmd = self._blastcmd()
+        
+        (blast_in,blast_out) = popen2(cmd)
+        
+        print >>blast_in,formatFasta(sequence)
+        blast_in.close()
+        
+        blast  = BlastResultIterator(blast_out)
+        
+        return blast
+    
+    mode = property(getMode, None, None, "Mode's Docstring")
+
+    db = property(getDb, None, None, "Db's Docstring")
+
+    program = property(getProgram, None, None, "Program's Docstring")
+
+
+class NetBlast(Blast):
+    '''
+    Run blast on ncbi servers
+    '''
+    
+    def __init__(self,mode,db,**options):
+        '''
+        
+        @param mode:
+        @param db:
+        '''
+        Blast.__init__(self, mode, db, 'blastcl3',**options)
+        
+        
+class BlastResultIterator(iTableIterator):
+    
+    def __init__(self,blastoutput,query=None):
+        '''
+        
+        @param blastoutput:
+        @type blastoutput:
+        '''
+        self._blast = ColumnFile(blastoutput, 
+                                 strip=True, 
+                                 skip="#",
+                                 sep="\t",
+                                 types=self.types
+                                 )
+        self._query = query
+        self._hindex = dict((k,i) for i,k in imap(None,count(),self._getHeaders()))
+        
+    def _getHeaders(self):
+        return ('Query id','Subject id',
+               '% identity','alignment length', 
+               'mismatches', 'gap openings', 
+               'q. start', 'q. end', 
+               's. start', 's. end', 
+               'e-value', 'bit score')
+    
+    def _getTypes(self):
+        return (str,str,
+                float,int,
+                int,int,
+                int,int,
+                int,int,
+                float,float)
+    
+    def _getRowFactory(self):
+        return BlastMatch
+    
+    def _getSubrowFactory(self):
+        return TableRow 
+    
+    def _getQuery(self):
+        return self._query
+    
+
+    headers = property(_getHeaders,None,None)
+    types   = property(_getTypes,None,None)
+    rowFactory    = property(_getRowFactory,None,None)
+    subrowFactory = property(_getSubrowFactory,None,None)
+    query = property(_getQuery,None,None)
+    
+    def next(self):
+        '''
+        
+        '''
+        value = self._blast.next()
+        return self.rowFactory(self,value)
+        
+        
+
+class BlastResult(Table):
+    '''
+    Results of a blast run
+    '''
+    
+class BlastMatch(TableRow):
+    '''
+    Blast high scoring pair between two sequences
+    '''
+    
+    def getQueryLocation(self):
+        l = SimpleLocation(self[6], self[7])
+        return l
+    
+    def getSubjectLocation(self):
+        l = SimpleLocation(self[8], self[9])
+        return l
+    
+    def getSubjectSequence(self,database):
+        return database[self[1]]
+    
+    def queryCov(self,query=None):
+        '''
+        Compute coverage of match on query sequence.
+        
+        @param query: the query sequence. Default is None.
+                      In this case the query sequence associated
+                      to this blast result is used.
+        @type query: L{obitools.BioSequence}
+        
+        @return: coverage fraction 
+        @rtype: float
+        '''
+        if query is None:
+            query = self.table.query
+        assert query is not None
+        return float(self[7]-self[6]+1)/float(len(query))
+        
+    def __getitem__(self,key):
+        if key=='query coverage' and self.table.query is not None:
+            return self.queryCov()
+        else:
+            return TableRow.__getitem__(self,key)
+
+class BlastCovMinFilter(SelectionIterator):
+    
+    def __init__(self,blastiterator,covmin,query=None,**conditions):
+        if query is None:
+            query = blastiterator.table.query
+        assert query is not None
+        SelectionIterator.__init__(self,blastiterator,**conditions)
+        self._query = query
+        self._covmin=covmin
+        
+    def _covMinPredicat(self,row):
+        return row.queryCov(self._query)>=self._covmin
+    
+    def _checkCondition(self,row):
+        return self._covMinPredicat(row) and SelectionIterator._checkCondition(self, row)
+    
+    
+    
--- a/obitools/carto/init.py
+++ b/obitools/carto/init.py
@ -0,0 +1,376 @@
+# -*- coding: latin1 -*-
+
+
+
+from obitools import SVGdraw
+import math
+
+class Map(object):
+    """
+        Map represente une instance d'une carte genetique physique.
+        Une telle carte est definie par la longueur de la sequence
+        qui lui est associe.
+        
+        A une carte est associe un certain nombre de niveaux (Level)
+        eux meme decoupe en sous-niveau (SubLevel)
+        Les sous niveaux contiennent eux des features 
+    """
+    def __init__(self,name,seqlength,scale=1):
+        """
+            Constructeur d'une nouvelle carte 
+            
+            *Param*:
+            
+                name
+                    nom de la carte
+                    
+                seqlength
+                    longueur de la sequence associee a la carte
+                    
+                scale
+                    echelle de la carte indicant combien de pixel
+                    correspondent a une unite de la carte
+        """
+        self.name = name
+        self.seqlength = seqlength
+        self.scale = scale
+        self.levels = {}
+        self.basicHSize = 10
+        
+    def __str__(self):
+        return '<%s>' % self.name
+        
+    def __getitem__(self,level):
+        """
+            retourne le niveau *level* de la carte et
+            le cree s'il n'existe pas
+        """
+        if not isinstance(level,int):
+            raise TypeError('level must be an non Zero integer value')
+        elif level==0:
+            raise AssertionError('Level cannot be set to 0')
+        try:
+            return self.levels[level]
+        except KeyError:
+            self.levels[level] = Level(level,self)
+        return self.levels[level] 
+        
+    def getBasicHSize(self):
+        """
+            retourne la hauteur de base d'un element de cartographie
+            exprimee en pixel
+        """
+        return self.basicHSize
+        
+    def getScale(self):
+        """
+            Retourne l'echelle de la carte en nombre de pixels par
+            unite physique de la carte
+        """
+        return self.scale
+    
+        
+                
+    def getNegativeBase(self):
+        return reduce(lambda x,y:x-y,[self.levels[z].getHeight() 
+                                          for z in self.levels
+                                          if z < 0],self.getHeight())
+                                          
+    def getPositiveBase(self):
+        return self.getNegativeBase() - 3 * self.getBasicHSize()
+    
+    def getHeight(self):
+        return reduce(lambda x,y:x+y,[z.getHeight() for z in self.levels.values()],0) \
+               + 4 * self.getBasicHSize()
+        
+    def toXML(self,file=None,begin=0,end=None):
+        dessin = SVGdraw.drawing()
+        if end==None:
+            end = self.seqlength
+        hauteur= self.getHeight()
+        largeur=(end-begin+1)*self.scale
+        svg    = SVGdraw.svg((begin*self.scale,0,largeur,hauteur),
+                             '%fpx' % (self.seqlength * self.scale),
+                             '%dpx' % hauteur)
+                             
+        centre = self.getPositiveBase() + (1 + 1/4) * self.getBasicHSize()
+        svg.addElement(SVGdraw.rect(0,centre,self.seqlength * self.scale,self.getBasicHSize()/2))
+        for e in self.levels.values():
+            svg.addElement(e.getElement())
+        dessin.setSVG(svg)
+        return dessin.toXml(file)
+        
+class Feature(object):
+    pass
+    
+class Level(object):
+
+    def __init__(self,level,map):
+        if not isinstance(map,Map):
+            raise AssertionError('map is not an instance of class Map')
+        if level in map.levels:
+            raise AssertionError('Level %d already define for map %s' % (level,map))
+        else:
+            map.levels[level] = self
+        self.map = map
+        self.level = level
+        self.sublevels = {}
+        
+    def __getitem__(self,sublevel):
+        """
+            retourne le niveau *sublevel* du niveau en
+            le creant s'il n'existe pas
+        """
+        if not isinstance(sublevel,int):
+            raise TypeError('sublevel must be a positive integer value')
+        elif sublevel<0:
+            raise AssertionError('Level cannot be negative')
+        try:
+            return self.sublevels[sublevel]
+        except KeyError:
+            self.sublevels[sublevel] = SubLevel(sublevel,self)
+        return self.sublevels[sublevel] 
+
+    def getBase(self):
+        if self.level < 0:
+            base = self.map.getNegativeBase()
+            base += reduce(lambda x,y:x+y,[self.map.levels[z].getHeight() 
+                                           for z in self.map.levels
+                                           if z <0 and z >= self.level],0)
+            return base
+        else:
+            base = self.map.getPositiveBase() 
+            base -= reduce(lambda x,y:x+y,[self.map.levels[z].getHeight() 
+                                           for z in self.map.levels
+                                           if z >0 and z < self.level],0)
+            return base
+
+    def getElement(self):
+        objet = SVGdraw.group('level%d' % self.level)
+        for e in self.sublevels.values():
+            objet.addElement(e.getElement())
+        return objet
+        
+         
+            
+    def getHeight(self):
+        return reduce(lambda x,y:x+y,[z.getHeight() for z in self.sublevels.values()],0) \
+               + 2 * self.map.getBasicHSize()
+        
+class SubLevel(object):
+
+    def __init__(self,sublevel,level):
+        if not isinstance(level,Level):
+            raise AssertionError('level is not an instance of class Level')
+        if level in level.sublevels:
+            raise AssertionError('Sublevel %d already define for level %s' % (sublevel,level))
+        else:
+            level.sublevels[sublevel] = self
+        self.level = level
+        self.sublevel = sublevel
+        self.features = {}
+        
+    def getHeight(self):
+        return max([x.getHeight() for x in self.features.values()]+[0]) + 4 * self.level.map.getBasicHSize()
+
+    def getBase(self):
+        base = self.level.getBase()
+        if self.level.level < 0:
+            base -= self.level.getHeight() - 2 * self.level.map.getBasicHSize()
+            base += reduce(lambda x,y:x+y,[self.level.sublevels[z].getHeight() 
+                                           for z in self.level.sublevels
+                                           if z <= self.sublevel],0)
+            base -= 2* self.level.map.getBasicHSize()
+        else:
+            base -= reduce(lambda x,y:x+y,[self.level.sublevels[z].getHeight() 
+                                           for z in self.level.sublevels
+                                           if z < self.sublevel],0)
+            base -= self.level.map.getBasicHSize()
+        return base
+    
+    def getElement(self):
+        base = self.getBase()
+        objet = SVGdraw.group('sublevel%d' % self.sublevel)
+        for e in self.features.values():
+            objet.addElement(e.getElement(base))
+        return objet
+        
+    def add(self,feature):
+        if not isinstance(feature,Feature):
+            raise TypeError('feature must be an instance oof Feature')
+        if feature.name in self.features:
+            raise AssertionError('A feature with the same name (%s) have already be insert in this sublevel'
+                                 % feature.name)
+        self.features[feature.name]=feature
+        feature.sublevel=self
+        
+class SimpleFeature(Feature):
+    
+    def __init__(self,name,begin,end,visiblename=False,color=0):
+        self.begin    = begin
+        self.end      = end
+        self.name     = name
+        self.color    = color
+        self.sublevel = None 
+        self.visiblename=visiblename
+        
+    def getHeight(self):
+        if not self.sublevel:
+            raise AssertionError('Not affected Simple feature')
+        if self.visiblename:
+            return self.sublevel.level.map.getBasicHSize() * 2
+        else:
+            return self.sublevel.level.map.getBasicHSize() 
+            
+    def getElement(self,base):
+        scale = self.sublevel.level.map.getScale()
+        y     = base - self.sublevel.level.map.getBasicHSize()
+        x     = self.begin * scale
+        width = (self.end - self.begin + 1) * scale
+        heigh = self.sublevel.level.map.getBasicHSize()
+        
+        objet = SVGdraw.rect(x,y,width,heigh,stroke=self.color)
+        objet.addElement(SVGdraw.description(self.name))
+        
+        return objet
+
+class BoxFeature(SimpleFeature):
+
+    def getHeight(self):
+        if not self.sublevel:
+            raise AssertionError('Not affected Box feature')
+        if self.visiblename:
+            return self.sublevel.level.map.getBasicHSize() * 4
+        else:
+            return self.sublevel.level.map.getBasicHSize() * 3 
+            
+    def getElement(self,base):
+        scale = self.sublevel.level.map.getScale()
+        y     = base - self.sublevel.level.map.getBasicHSize() * 2
+        x     = self.begin * scale
+        width = (self.end - self.begin + 1) * scale
+        height = self.sublevel.level.map.getBasicHSize() * 3
+        
+        objet = SVGdraw.rect(x,y,width,height,stroke=self.color,fill="none")
+        objet.addElement(SVGdraw.description(self.name))
+        
+        return objet
+         
+class MultiPartFeature(Feature):
+    
+    def __init__(self,name,*args,**kargs):
+        self.limits    = args
+        self.name     = name
+        try:
+            self.color    = kargs['color']
+        except KeyError:
+            self.color    = "black"
+                    
+        try:
+            self.visiblename=kargs['visiblename']
+        except KeyError:
+            self.visiblename=None
+
+        try:
+            self.flatlink=kargs['flatlink']
+        except KeyError:
+            self.flatlink=False
+
+        try:
+            self.roundlink=kargs['roundlink']
+        except KeyError:
+            self.roundlink=False
+
+        self.sublevel = None 
+
+
+    def getHeight(self):
+        if not self.sublevel:
+            raise AssertionError('Not affected Simple feature')
+        if self.visiblename:
+            return self.sublevel.level.map.getBasicHSize() * 3
+        else:
+            return self.sublevel.level.map.getBasicHSize() * 2
+
+    def getElement(self,base):
+        scale = self.sublevel.level.map.getScale()
+
+        y     = base - self.sublevel.level.map.getBasicHSize()
+        height = self.sublevel.level.map.getBasicHSize()
+        objet = SVGdraw.group(self.name)
+        for (debut,fin) in self.limits:
+            x     = debut * scale
+            width = (fin - debut + 1) * scale
+            part = SVGdraw.rect(x,y,width,height,fill=self.color)
+            objet.addElement(part)
+            
+        debut = self.limits[0][1]
+        for (fin,next) in self.limits[1:]:
+            debut*=scale
+            fin*=scale
+            path = SVGdraw.pathdata(debut,y + height / 2)
+            delta = height / 2
+            if self.roundlink:
+                path.qbezier((debut+fin)/2, y - delta,fin,y + height / 2)
+            else:
+                if self.flatlink:
+                    delta = - height / 2
+                path.line((debut+fin)/2, y - delta)
+                path.line(fin,y + height / 2)
+            path = SVGdraw.path(path,fill="none",stroke=self.color)
+            objet.addElement(path)
+            debut = next
+            
+        objet.addElement(SVGdraw.description(self.name))
+        
+        return objet
+
+class TagFeature(Feature):
+    
+    def __init__(self,name,begin,length,ratio,visiblename=False,color=0):
+        self.begin    = begin
+        self.length   = length
+        self.ratio    = ratio
+        self.name     = name
+        self.color    = color
+        self.sublevel = None 
+        self.visiblename=visiblename
+        
+    def getHeight(self):
+        if not self.sublevel:
+            raise AssertionError('Not affected Tag feature')
+        
+        return self.sublevel.level.map.getBasicHSize()*11
+             
+    def getElement(self,base):
+        scale = self.sublevel.level.map.getScale()
+        height = math.floor(max(1,self.sublevel.level.map.getBasicHSize()* 10 * self.ratio))
+        y     = base + self.sublevel.level.map.getBasicHSize() - height
+        x     = self.begin * scale
+        width = self.length * scale
+        objet = SVGdraw.rect(x,y,width,height,stroke=self.color)
+        objet.addElement(SVGdraw.description(self.name))
+        
+        return objet
+         
+if __name__ == '__main__':
+    carte = Map('essai',20000,scale=0.5)
+    carte[-1][0].add(SimpleFeature('toto',100,300))
+    carte[1][0].add(SimpleFeature('toto',100,300))
+    carte[1][1].add(SimpleFeature('toto',200,1000))
+
+    carte[1][0].add(MultiPartFeature('bout',(1400,1450),(1470,1550),(1650,1800),color='red',flatlink=True))
+    carte[1][0].add(MultiPartFeature('titi',(400,450),(470,550),(650,800),color='red',flatlink=True))
+    carte[-1][1].add(MultiPartFeature('titi',(400,450),(470,550),(650,800),color='green'))
+    carte[-1][2].add(MultiPartFeature('titi',(400,450),(470,550),(650,800),color='purple',roundlink=True))
+
+    carte[-1][1].add(BoxFeature('tutu',390,810,color='purple'))
+    carte[1][0].add(BoxFeature('tutu',390,810,color='red'))
+    carte[2][0].add(TagFeature('t1',1400,20,0.8))
+    carte[2][0].add(TagFeature('t2',1600,20,0.2))
+    carte.basicHSize=6
+    print carte.toXML('truc.svg',begin=0,end=1000)
+    print carte.toXML('truc2.svg',begin=460,end=2000)
+
+            
+            
--- a/obitools/decorator.py
+++ b/obitools/decorator.py
--- a/obitools/distances/init.py
+++ b/obitools/distances/init.py
@ -0,0 +1,29 @@
+class DistanceMatrix(object):
+    
+    def __init__(self,alignment):
+        '''
+        DistanceMatrix constructor.
+        
+            @param alignment: aligment used to compute distance matrix
+            @type alignment: obitools.align.Alignment
+        '''
+        self.aligment = alignment
+        self.matrix = [[None] * (x+1) for x in xrange(len(alignment))]
+        
+    def evaluateDist(self,x,y):
+        raise NotImplementedError
+        
+    def __getitem__(self,key):
+        assert isinstance(key,(tuple,list)) and len(key)==2, \
+               'key must be a tuple or a list of two integers'
+        x,y = key
+        if y < x:
+            z=x
+            x=y
+            y=z
+        rep = self.matrix[y][x]
+        if rep is None:
+            rep = self.evaluateDist(x,y)
+            self.matrix[y][x] = rep
+            
+        return rep
--- a/obitools/distances/observed.py
+++ b/obitools/distances/observed.py
@ -0,0 +1,77 @@
+'''
+Module dedicated to compute observed divergeances from
+an alignment. No distance correction is applied at all
+'''
+
+from itertools import imap
+
+from obitools.distances import DistanceMatrix
+
+class PairewiseGapRemoval(DistanceMatrix):
+    '''
+    Observed divergeance matrix from an alignment.
+    Gap are removed from the alignemt on a pairewise
+    sequence base
+    '''
+    
+    def evaluateDist(self,x,y):
+        '''
+        Compute the observed divergeance from two sequences
+        of an aligment. 
+        
+        @attention: For performance purpose this method should 
+                    be directly used. use instead the __getitem__
+                    method from DistanceMatrix.
+                    
+        @see: L{__getitem__}
+        
+        @param x: number of the fisrt sequence in the aligment
+        @type x: int
+        @param y: umber of the second sequence in the aligment
+        @type y: int
+        
+     
+        '''
+        
+        seq1 = self.aligment[x]
+        seq2 = self.aligment[y]
+        
+        diff,tot = reduce(lambda x,y: (x[0]+y,x[1]+1),
+                          (z[0]!=z[1] for z in imap(None,seq1,seq2)
+                           if '-' not in z),(0,0))
+        return float(diff)/tot
+    
+    
+class Pairewise(DistanceMatrix):
+    '''
+    Observed divergeance matrix from an alignment.
+    Gap are kept from the alignemt
+    '''
+    
+    def evaluateDist(self,x,y):
+        '''
+        Compute the observed divergeance from two sequences
+        of an aligment.
+         
+        @attention: For performance purpose this method should 
+                    be directly used. use instead the __getitem__
+                    method from DistanceMatrix.
+                    
+        @see: L{__getitem__}
+        
+        @param x: number of the fisrt sequence in the aligment
+        @type x: int
+        @param y: umber of the second sequence in the aligment
+        @type y: int
+        
+     
+        '''
+        
+        seq1 = self.aligment[x]
+        seq2 = self.aligment[y]
+        
+        diff,tot = reduce(lambda x,y: (x[0]+y,x[1]+1),
+                          (z[0]!=z[1] for z in imap(None,seq1,seq2)),
+                          (0,0))
+        return float(diff)/tot
+    
--- a/obitools/distances/phylip.py
+++ b/obitools/distances/phylip.py
@ -0,0 +1,35 @@
+import sys
+
+from itertools import imap,count
+
+def writePhylipMatrix(matrix):
+    names = [x.id for x in matrix.aligment]
+    pnames= [x[:10] for x in names]
+    unicity={}
+    redundent=[]
+    for n in pnames:
+        unicity[n]=unicity.get(n,0)+1
+        redundent.append(unicity[n])
+
+    for i,n,r in imap(None,count(),pnames,redundent):
+        alternate = n
+        if r > 1:
+            while alternate in pnames:
+                lcut = 9 - len(str(r)) 
+                alternate = n[:lcut]+ '_%d' % r
+                r+=1
+        pnames[i]='%-10s' % alternate
+        
+    firstline = '%5d' % len(matrix.aligment)
+    rep = [firstline]
+    for i,n in imap(None,count(),pnames):
+        line = [n]
+        for j in xrange(i):
+            line.append('%5.4f' % matrix[(j,i)])
+        rep.append('  '.join(line))
+    return '\n'.join(rep)
+        
+    
+            
+    
+    
--- a/obitools/distances/r.py
+++ b/obitools/distances/r.py
@ -0,0 +1,25 @@
+import sys
+
+from itertools import imap,count
+
+def writeRMatrix(matrix):
+    names = [x.id for x in matrix.aligment]
+    lmax = max(max(len(x) for x in names),5)
+    lali = len(matrix.aligment)
+    
+    nformat = '%%-%ds' % lmax
+    dformat = '%%%d.4f' % lmax
+
+    pnames=[nformat % x for x in names]
+
+    rep = ['  '.join(pnames)]
+    
+    for i in xrange(lali):
+        line=[]
+        for j in xrange(lali):
+            line.append('%5.4f' % matrix[(j,i)])
+        rep.append('  '.join(line))
+    return '\n'.join(rep)
+        
+    
+            
--- a/obitools/dnahash/init.py
+++ b/obitools/dnahash/init.py
@ -0,0 +1,100 @@
+_A=[0]
+_C=[1]
+_G=[2]
+_T=[3]
+_R= _A + _G
+_Y= _C + _T
+_M= _C + _A
+_K= _T + _G
+_W= _T + _A
+_S= _C + _G
+_B= _C + _G + _T
+_D= _A + _G + _T
+_H= _A + _C + _T
+_V= _A + _C + _G
+_N= _A + _C + _G + _T
+
+_dnahash={'a':_A,
+          'c':_C,
+          'g':_G,
+          't':_T,
+          'r':_R,
+          'y':_Y,
+          'm':_M,
+          'k':_K,
+          'w':_W,
+          's':_S,
+          'b':_B,
+          'd':_D,
+          'h':_H,
+          'v':_V,
+          'n':_N,
+          }
+
+def hashCodeIterator(sequence,wsize,degeneratemax=0,offset=0):
+    errors   = 0
+    emask    = [0] * wsize
+    epointer = 0
+    size = 0
+    position = offset
+    hashs = set([0])
+    hashmask = 0
+    for i in xrange(wsize):
+        hashmask <<= 2
+        hashmask +=3
+    
+    for l in sequence:
+        l = l.lower()
+        hl = _dnahash[l]
+        
+        if emask[epointer]:
+            errors-=1
+            emask[epointer]=0
+            
+        if len(hl) > 1:
+            errors +=1
+            emask[epointer]=1
+            
+        epointer+=1
+        epointer%=wsize
+            
+        if errors > degeneratemax:
+            hl=set([hl[0]])  
+            
+        hashs=set((((hc<<2) | cl) & hashmask)
+                  for hc in hashs
+                  for cl in hl) 
+        
+        if size < wsize:
+            size+=1
+        
+        if size==wsize:
+            if errors <= degeneratemax:
+                yield (position,hashs,errors)
+            position+=1
+        
+def hashSequence(sequence,wsize,degeneratemax=0,offset=0,hashs=None):       
+    if hashs is None:
+        hashs=[[] for x in xrange(4**wsize)]
+        
+    for pos,keys,errors in hashCodeIterator(sequence, wsize, degeneratemax, offset):
+        for k in keys:
+            hashs[k].append(pos)
+                    
+    return hashs
+
+def hashSequences(sequences,wsize,maxpos,degeneratemax=0):       
+    hashs=None
+    offsets=[]
+    offset=0
+    for s in sequences:
+        offsets.append(offset)
+        hashSequence(s,wsize,degeneratemax=degeneratemax,offset=offset,hashs=hashs)
+        offset+=len(s)
+        
+    return hashs,offsets
+
+
+    
+            
+         
--- a/obitools/ecobarcode/init.py
+++ b/obitools/ecobarcode/init.py
--- a/obitools/ecobarcode/databases.py
+++ b/obitools/ecobarcode/databases.py
@ -0,0 +1,32 @@
+'''
+Created on 25 sept. 2010
+
+@author: coissac
+'''
+from obitools import NucSequence
+
+def referenceDBIterator(options):
+    
+    cursor = options.ecobarcodedb.cursor()
+
+    cursor.execute("select id from databases.database where name='%s'" % options.database)
+    options.dbid = cursor.fetchone()[0]
+   
+    cursor.execute('''
+                      select s.accession,r.id,r.taxid,r.sequence
+                      from databases.database    d,
+                           databases.reference   r,
+                           databases.relatedsequences s
+                      where r.database = d.id
+                        and s.reference= r.id
+                        and s.mainac
+                        and d.name = '%s'
+                   ''' % options.database
+                  )
+    
+    for ac,id,taxid,sequence in cursor:
+        s = NucSequence(ac,sequence)
+        s['taxid']=taxid
+        s['refdbid']=id
+        yield s
+        
--- a/obitools/ecobarcode/ecotag.py
+++ b/obitools/ecobarcode/ecotag.py
@ -0,0 +1,50 @@
+'''
+Created on 25 sept. 2010
+
+@author: coissac
+'''
+
+def alreadyIdentified(seqid,options):
+    cursor = options.ecobarcodedb.cursor()    
+    cursor.execute('''
+                     select count(*) 
+                     from ecotag.identification 
+                     where sequence=%s
+                     and database=%s
+                   ''',(int(seqid),int(options.dbid)))
+    
+    return int(cursor.fetchone()[0]) > 0;
+
+def storeIdentification(seqid,
+                        idstatus,taxid,
+                        matches,
+                        options
+                        ):
+    
+    cursor = options.ecobarcodedb.cursor()    
+    
+    if not options.updatedb:
+        cursor.execute('''
+                       delete from ecotag.identification where sequence=%s and database=%s
+                       ''',(int(seqid),int(options.dbid)))
+    
+    cursor.execute('''
+                    insert into ecotag.identification (sequence,database,idstatus,taxid)
+                    values (%s,%s,%s,%s)
+                    returning id
+                   ''' , (int(seqid),int(options.dbid),idstatus,int(taxid)))
+    
+    idid = cursor.fetchone()[0]
+    
+    for seq,identity in matches.iteritems():
+        cursor.execute('''
+                        insert into ecotag.evidence (identification,reference,identity)
+                        values (%s,
+                                %s,
+                                %s)
+                       ''',(idid,seq,identity))
+        
+
+    cursor.close()
+    
+    options.ecobarcodedb.commit()   
--- a/obitools/ecobarcode/options.py
+++ b/obitools/ecobarcode/options.py
@ -0,0 +1,64 @@
+'''
+Created on 23 sept. 2010
+
+@author: coissac
+'''
+import psycopg2 
+
+from obitools.ecobarcode.taxonomy import EcoTaxonomyDB
+
+def addEcoBarcodeDBOption(optionManager):
+    optionManager.add_option('--dbname',
+                             action="store", dest="ecobarcodedb",
+                             type='str',
+                             default=None,
+                             help="Specify the name of the ecobarcode database")
+
+    optionManager.add_option('--server',
+                             action="store", dest="dbserver",
+                             type='str',
+                             default="localhost",
+                             help="Specify the adress of the ecobarcode database server")
+
+    optionManager.add_option('--user',
+                             action="store", dest="dbuser",
+                             type='str',
+                             default='postgres',
+                             help="Specify the user of the ecobarcode database")
+
+    optionManager.add_option('--port',
+                             action="store", dest="dbport",
+                             type='str',
+                             default=5432,
+                             help="Specify the port of the ecobarcode database")
+
+    optionManager.add_option('--passwd',
+                             action="store", dest="dbpasswd",
+                             type='str',
+                             default='',
+                             help="Specify the passwd of the ecobarcode database")
+
+    optionManager.add_option('--primer',
+                             action="store", dest="primer",
+                             type='str',
+                             default=None,
+                             help="Specify the primer used for amplification")
+    
+    
+def ecobarcodeDatabaseConnection(options):
+    if options.ecobarcodedb is not None:
+        connection = psycopg2.connect(database=options.ecobarcodedb, 
+                                      user=options.dbuser, 
+                                      password=options.dbpasswd,
+                                      host=options.dbserver,
+                                      port=options.dbport)
+        options.dbname=options.ecobarcodedb
+    else:
+        connection=None
+    if connection is not None:
+        options.ecobarcodedb=connection
+        taxonomy = EcoTaxonomyDB(connection)
+    else:
+        taxonomy=None
+    return taxonomy
+    
--- a/obitools/ecobarcode/rawdata.py
+++ b/obitools/ecobarcode/rawdata.py
@ -0,0 +1,38 @@
+'''
+Created on 25 sept. 2010
+
+@author: coissac
+'''
+
+from obitools import NucSequence
+from obitools.utils import progressBar
+from obitools.ecobarcode.ecotag import alreadyIdentified
+
+import sys
+
+def sequenceIterator(options):
+    cursor = options.ecobarcodedb.cursor()
+    
+    cursor.execute('''
+                      select s.id,sum(o.count),s.sequence
+                      from rawdata.sequence      s,
+                           rawdata.occurrences   o
+                      where o.sequence= s.id
+                        and s.primers = '%s'
+                      group by s.id,s.sequence
+                   ''' % options.primer
+                  )
+    
+    nbseq = cursor.rowcount
+    progressBar(1, nbseq, True, head=options.dbname)
+    for id,count,sequence in cursor:
+        progressBar(cursor.rownumber+1, nbseq, head=options.dbname)
+        if not options.updatedb or not alreadyIdentified(id,options):
+            s = NucSequence(id,sequence)
+            s['count']=count
+            print >>sys.stderr,' +', cursor.rownumber+1,
+            yield s
+        else:
+            print >>sys.stderr,' @', cursor.rownumber+1,
+        
+    print >>sys.stderr
--- a/obitools/ecobarcode/taxonomy.py
+++ b/obitools/ecobarcode/taxonomy.py
@ -0,0 +1,120 @@
+'''
+Created on 24 sept. 2010
+
+@author: coissac
+'''
+
+from obitools.ecopcr.taxonomy import  TaxonomyDump 
+from obitools.ecopcr.taxonomy import  Taxonomy
+import sys
+
+class EcoTaxonomyDB(TaxonomyDump) :
+
+    def __init__(self,dbconnect):
+        self._dbconnect=dbconnect
+        
+        print >> sys.stderr,"Reading ecobarcode taxonomy database..."
+        
+        self._readNodeTable()
+        print >> sys.stderr," ok"
+         
+        print >>sys.stderr,"Adding scientific name..."
+    
+        self._name=[]
+        for taxid,name,classname in self._nameIterator():
+            self._name.append((name,classname,self._index[taxid]))
+            if classname == 'scientific name':
+                self._taxonomy[self._index[taxid]].append(name)
+            
+        print >>sys.stderr,"Adding taxid alias..."
+        for taxid,current in self._mergedNodeIterator():
+            self._index[taxid]=self._index[current]
+        
+        print >>sys.stderr,"Adding deleted taxid..."
+        for taxid in self._deletedNodeIterator():
+            self._index[taxid]=None
+
+        
+        Taxonomy.__init__(self)
+
+    #####
+    #
+    # Iterator functions
+    #
+    #####
+                   
+    def _readNodeTable(self):
+    
+        cursor = self._dbconnect.cursor()
+        
+        cursor.execute("""
+                            select     taxid,rank,parent
+                            from ncbitaxonomy.nodes
+                       """)
+        
+        print >>sys.stderr,"Reading taxonomy nodes..."
+        taxonomy=[list(n) for n in cursor]
+        
+        print >>sys.stderr,"List all taxonomy rank..."    
+        ranks =list(set(x[1] for x in taxonomy))
+        ranks.sort()
+        rankidx = dict(map(None,ranks,xrange(len(ranks))))
+        
+        print >>sys.stderr,"Sorting taxons..."
+        taxonomy.sort(TaxonomyDump._taxonCmp)
+
+        self._taxonomy=taxonomy
+    
+        print >>sys.stderr,"Indexing taxonomy..."
+        index = {}
+        for t in self._taxonomy:
+            index[t[0]]=self._bsearchTaxon(t[0])
+        
+        print >>sys.stderr,"Indexing parent and rank..."
+        for t in self._taxonomy:
+            t[1]=rankidx[t[1]]
+            t[2]=index[t[2]]
+         
+        self._ranks=ranks
+        self._index=index 
+        
+        cursor.close()
+
+    def _nameIterator(self):
+        cursor = self._dbconnect.cursor()
+        
+        cursor.execute("""
+                            select     taxid,name,nameclass
+                            from ncbitaxonomy.names
+                       """)
+
+        for taxid,name,nameclass in cursor:
+            yield taxid,name,nameclass
+            
+        cursor.close()
+                        
+    def _mergedNodeIterator(self):
+        cursor = self._dbconnect.cursor()
+        
+        cursor.execute("""
+                            select     oldtaxid,newtaxid
+                            from ncbitaxonomy.merged
+                       """)
+
+        for oldtaxid,newtaxid in cursor:
+                yield oldtaxid,newtaxid
+                
+        cursor.close()
+      
+    def _deletedNodeIterator(self):
+        cursor = self._dbconnect.cursor()
+        
+        cursor.execute("""
+                            select  taxid
+                            from ncbitaxonomy.delnodes
+                       """)
+
+        for taxid in cursor:
+                yield taxid[0]
+
+        cursor.close()
--- a/obitools/ecopcr/init.py
+++ b/obitools/ecopcr/init.py
@ -0,0 +1,69 @@
+from obitools import utils
+from obitools import NucSequence
+from obitools.utils import universalOpen, universalTell, fileSize, progressBar
+import struct
+import sys
+
+
+class EcoPCRFile(utils.ColumnFile):
+    def __init__(self,stream):
+        utils.ColumnFile.__init__(self,
+                                  stream, '|', True, 
+                                  (str,int,int,
+                                   str,int,str,
+                                   int,str,int,
+                                   str,int,str,
+                                   str,str,int,float,
+                                   str,int,float,
+                                   int,
+                                   str,str), "#")
+
+
+    def next(self):
+        data = utils.ColumnFile.next(self)
+        seq = NucSequence(data[0],data[20],data[21])
+        seq['seq_length_ori']=data[1]
+        seq['taxid']=data[2]
+        seq['rank']=data[3]
+        seq['species']=data[4]
+        seq['species_sn']=data[5]
+        seq['genus']=data[6]
+        seq['genus_sn']=data[7]
+        seq['family']=data[8]
+        seq['family_sn']=data[9]
+        seq['strand']=data[12]
+        seq['forward_primer']=data[13]
+        seq['forward_error']=data[14]
+        seq['forward_tm']=data[15]
+        seq['reverse_primer']=data[16]
+        seq['reverse_error']=data[17]
+        seq['reverse_tm']=data[18]
+                 
+        return seq
+    
+        
+        
+class EcoPCRDBFile(object):
+    
+    def _ecoRecordIterator(self,file):
+        file = universalOpen(file)
+        (recordCount,) = struct.unpack('> I',file.read(4))
+        self._recover=False
+    
+        if recordCount:
+            for i in xrange(recordCount):
+                (recordSize,)=struct.unpack('>I',file.read(4))
+                record = file.read(recordSize)
+                yield record
+        else:
+            print >> sys.stderr,"\n\n  WARNING : EcoPCRDB readding set into recover data mode\n"
+            self._recover=True
+            ok=True
+            while(ok):
+                try:
+                    (recordSize,)=struct.unpack('>I',file.read(4))
+                    record = file.read(recordSize)
+                    yield record
+                except:
+                    ok=False
+                
--- a/obitools/ecopcr/annotation.py
+++ b/obitools/ecopcr/annotation.py
@ -0,0 +1,104 @@
+import struct
+
+class EcoPCRDBAnnotationWriter(object):
+    '''
+    Class used to write Annotation description in EcoPCRDB format.
+    
+    EcoPCRDBAnnotationWriter is oftenly called through the EcoPCRDBSequenceWriter class
+    
+    @see: L{ecopcr.sequence.EcoPCRDBSequenceWriter}
+    '''
+    
+    def __init__(self,dbname,id,fileidx=1,type=('CDS'),definition=None):
+        '''
+        class constructor
+        
+        @param dbname: name of ecoPCR database
+        @type dbname: C{str}
+        @param id: name of the qualifier used as feature id
+        @type id: C{str}
+        @param fileidx:
+        @type fileidx: C{int}
+        @param type:
+        @type type: C{list} or C{tuple}
+        @param definition: 
+        @type definition: C{str}
+        '''
+        self._type = type
+        self._definition = definition
+        self._id = id
+        self._filename="%s_%03d.adx" % (dbname,fileidx)
+        self._file = open(self._filename,'wb')
+        self._sequenceIdx=0
+        
+
+        ftname  ="%s.fdx" % (dbname)
+        ft = open(ftname,'wb')
+        
+        self._fttypeidx=dict(map(None,type,xrange(len(type))))
+        
+        ft.write(struct.pack('> I',len(type)))
+        
+        for t in type:
+            ft.write(self._ecoFtTypePacker(t))
+            
+        ft.close()
+        
+        self._annotationCount=0
+        self._file.write(struct.pack('> I',self._annotationCount))
+        
+        
+    def _ecoFtTypePacker(self,type):
+        totalSize = len(type)
+        packed = struct.pack('> I %ds' % totalSize,totalSize,type)
+
+        assert len(packed) == totalSize+4, "error in feature type packing"
+    
+        return packed
+                             
+    def _ecoAnnotationPacker(self,feature,seqidx):
+        begin  = feature.begin-1
+        end    = feature.end
+        type   = self._fttypeidx[feature.ftType]
+        strand = feature.isDirect()
+        id     = feature[self._id][0]
+        if self._definition in feature:
+            definition = feature[self._definition][0]
+        else:
+            definition = ''
+        
+        assert strand is not None,"Only strand defined features can be stored"
+
+        deflength = len(definition)
+        
+        totalSize = 4 + 4 + 4 + 4 + 4 + 20 + 4 + deflength
+        
+        packed = struct.pack('> I I I I I 20s I %ds' % (deflength),
+                             totalSize,
+                             seqidx,
+                             begin,
+                             end,
+                             type,
+                             int(strand),
+                             id,
+                             deflength,
+                             definition)
+        
+        assert len(packed) == totalSize+4, "error in annotation packing"
+    
+        return packed
+
+        
+    def put(self,sequence,seqidx=None):
+        if seqidx is None:
+            seqidx = self._sequenceIdx
+            self._sequenceIdx+=1
+        for feature in sequence.getFeatureTable():
+            if feature.ftType in self._type:
+                self._annotationCount+=1
+                self._file.write(self._ecoAnnotationPacker(feature,seqidx))
+
+    def __del__(self):
+        self._file.seek(0,0)
+        self._file.write(struct.pack('> I',self._annotationCount))
+        self._file.close()
--- a/obitools/ecopcr/options.py
+++ b/obitools/ecopcr/options.py
@ -0,0 +1,129 @@
+'''
+Created on 13 fevr. 2011
+
+@author: coissac
+'''
+
+from obitools.ecopcr.taxonomy import Taxonomy, EcoTaxonomyDB, TaxonomyDump, ecoTaxonomyWriter
+
+try:
+    from obitools.ecobarcode.options import addEcoBarcodeDBOption,ecobarcodeDatabaseConnection
+except ImportError:
+    def addEcoBarcodeDBOption(optionmanager):
+        pass
+    def ecobarcodeDatabaseConnection(options):
+        return None
+
+def addTaxonomyDBOptions(optionManager):
+    addEcoBarcodeDBOption(optionManager)
+    optionManager.add_option('-d','--database',
+                             action="store", dest="taxonomy",
+                             metavar="<FILENAME>",
+                             type="string",
+                             help="ecoPCR taxonomy Database "
+                                  "name")
+    optionManager.add_option('-t','--taxonomy-dump',
+                             action="store", dest="taxdump",
+                             metavar="<FILENAME>",
+                             type="string",
+                             help="NCBI Taxonomy dump repository "
+                                  "name")
+
+    
+def addTaxonomyFilterOptions(optionManager):
+    addTaxonomyDBOptions(optionManager)
+    optionManager.add_option('--require-rank',
+                             action="append", 
+                             dest='requiredRank',
+                             metavar="<RANK_NAME>",
+                             type="string",
+                             default=[],
+                             help="select sequence with taxid tag containing "
+                                  "a parent of rank <RANK_NAME>")
+     
+    optionManager.add_option('-r','--required',
+                             action="append", 
+                             dest='required',
+                             metavar="<TAXID>",
+                             type="int",
+                             default=[],
+                             help="required taxid")
+     
+    optionManager.add_option('-i','--ignore',
+                             action="append", 
+                             dest='ignored',
+                             metavar="<TAXID>",
+                             type="int",
+                             default=[],
+                             help="ignored taxid")
+     
+def loadTaxonomyDatabase(options):
+    if isinstance(options.taxonomy, Taxonomy):
+        return options.taxonomy
+    taxonomy = ecobarcodeDatabaseConnection(options)
+    if (taxonomy is not None or 
+        options.taxonomy is not None or 
+        options.taxdump is not None):
+        if options.taxdump is not None:
+            taxonomy = TaxonomyDump(options.taxdump)
+        if taxonomy is not None and isinstance(options.taxonomy, str):
+            ecoTaxonomyWriter(options.taxonomy,taxonomy)
+            options.ecodb=options.taxonomy
+        if isinstance(options.taxonomy, Taxonomy):
+            taxonomy = options.taxonomy
+        if taxonomy is None and isinstance(options.taxonomy, str):
+            taxonomy = EcoTaxonomyDB(options.taxonomy)
+            options.ecodb=options.taxonomy
+        options.taxonomy=taxonomy
+    return options.taxonomy
+    
+def taxonomyFilterGenerator(options):
+    loadTaxonomyDatabase(options)
+    if options.taxonomy is not None:
+        taxonomy=options.taxonomy
+        def taxonomyFilter(seq):
+            def annotateAtRank(seq,rank):
+                if 'taxid' in seq and seq['taxid'] is not None:
+                    rtaxid= taxonomy.getTaxonAtRank(seq['taxid'],rank)
+                    return rtaxid
+                return None
+            good = True
+            if 'taxid' in seq:
+                taxid = seq['taxid']
+#                print taxid,
+                if options.requiredRank:
+                    taxonatrank = reduce(lambda x,y: x and y,
+                                         (annotateAtRank(seq,rank) is not None
+                                            for rank in options.requiredRank),True)
+                    good = good and taxonatrank 
+#                    print >>sys.stderr, " Has rank : ",good,
+                if options.required:
+                    good = good and reduce(lambda x,y: x or y,
+                                  (taxonomy.isAncestor(r,taxid) for r in options.required),
+                                  False)
+#                    print " Required : ",good,
+                if options.ignored:
+                    good = good and not reduce(lambda x,y: x or y,
+                                  (taxonomy.isAncestor(r,taxid) for r in options.ignored),
+                                  False)
+#                    print " Ignored : ",good,
+#                print " Global : ",good
+                    
+            return good
+            
+            
+    else:
+        def taxonomyFilter(seq):
+            return True
+ 
+    return taxonomyFilter
+       
+def taxonomyFilterIteratorGenerator(options):
+    taxonomyFilter = taxonomyFilterGenerator(options)
+    
+    def filterIterator(seqiterator):
+        for seq in seqiterator:
+            if taxonomyFilter(seq):
+                yield seq
+                
+    return filterIterator
--- a/obitools/ecopcr/sequence.py
+++ b/obitools/ecopcr/sequence.py
@ -0,0 +1,133 @@
+from obitools import NucSequence
+from obitools.ecopcr import EcoPCRDBFile
+from obitools.ecopcr.taxonomy import EcoTaxonomyDB, ecoTaxonomyWriter
+from obitools.ecopcr.annotation import EcoPCRDBAnnotationWriter
+from obitools.utils import universalOpen
+from glob import glob
+import struct
+import gzip
+import sys
+
+
+class EcoPCRDBSequenceIterator(EcoPCRDBFile):
+    '''
+    Build an iterator over the sequences include in a sequence database
+    formated for ecoPCR
+    '''
+
+    def __init__(self,path,taxonomy=None):
+        '''
+        ecoPCR data iterator constructor
+        
+        @param path: path to the ecoPCR database including the database prefix name
+        @type path: C{str}
+        @param taxonomy: a taxonomy can be given to the reader to decode the taxonomic data
+                         associated to the sequences. If no Taxonomy is furnish, it will be read 
+                         before the sequence database files using the same path.
+        @type taxonomy: L{obitools.ecopcr.taxonomy.Taxonomy}
+        '''
+        self._path = path
+        
+        if taxonomy is not None:
+            self._taxonomy=taxonomy
+        else:
+            self._taxonomy=EcoTaxonomyDB(path)
+            
+        self._seqfilesFiles =  glob('%s_???.sdx' % self._path)
+        self._seqfilesFiles.sort()
+
+    def __ecoSequenceIterator(self,file):
+        for record in self._ecoRecordIterator(file):
+            lrecord = len(record)
+            lnames  = lrecord - (4*4+20)
+            (taxid,seqid,deflength,seqlength,cptseqlength,string)=struct.unpack('> I 20s I I I %ds' % lnames, record)
+            seqid=seqid.strip('\x00')
+            de = string[:deflength]
+            seq = gzip.zlib.decompress(string[deflength:])
+            bioseq = NucSequence(seqid,seq,de,taxidx=taxid,taxid=self._taxonomy._taxonomy[taxid][0])
+            yield  bioseq
+        
+    def __iter__(self):
+        for seqfile in self._seqfilesFiles:
+            for seq in self.__ecoSequenceIterator(seqfile):
+                yield seq
+                
+class EcoPCRDBSequenceWriter(object):
+    
+    def __init__(self,dbname,fileidx=1,taxonomy=None,ftid=None,type=None,definition=None,append=False):
+        self._taxonomy=taxonomy
+        self._filename="%s_%03d.sdx" % (dbname,fileidx)
+        if append:
+            mode ='r+b'
+            f = universalOpen(self._filename)
+            (recordCount,) = struct.unpack('> I',f.read(4))
+            self._sequenceCount=recordCount
+            del f
+            self._file = open(self._filename,mode)
+            self._file.seek(0,0)
+            self._file.write(struct.pack('> I',0))
+            self._file.seek(0,2)
+        else:
+            self._sequenceCount=0
+            mode = 'wb'
+            self._file = open(self._filename,mode)
+            self._file.write(struct.pack('> I',self._sequenceCount))
+        
+        if self._taxonomy is not None:
+            print >> sys.stderr,"Writing the taxonomy file...",
+            ecoTaxonomyWriter(dbname,self._taxonomy)
+            print >> sys.stderr,"Ok"
+        
+        if type is not None:
+            assert ftid is not None,"You must specify an id attribute for features"
+            self._annotation = EcoPCRDBAnnotationWriter(dbname, ftid, fileidx, type, definition)
+        else: 
+            self._annotation = None
+        
+    def _ecoSeqPacker(self,seq):
+    
+        compactseq = gzip.zlib.compress(str(seq).upper(),9)
+        cptseqlength  = len(compactseq)
+        delength   = len(seq.definition)
+        
+        totalSize = 4 + 20 + 4 + 4 + 4 + cptseqlength + delength
+        
+        if self._taxonomy is None or 'taxid' not in seq:
+            taxon=-1
+        else:
+            taxon=self._taxonomy.findIndex(seq['taxid'])
+        
+        try:
+            packed = struct.pack('> I i 20s I I I %ds %ds' % (delength,cptseqlength),
+                                 totalSize,
+                                 taxon,
+                                 seq.id,
+                                 delength,
+                                 len(seq),
+                                 cptseqlength,
+                                 seq.definition,
+                                 compactseq)
+        except struct.error as e:
+            print >>sys.stderr,"\n\n============\n\nError on sequence : %s\n\n" % seq.id
+            raise e
+        
+        assert len(packed) == totalSize+4, "error in sequence packing"
+    
+        return packed
+
+        
+    def put(self,sequence):
+        if self._taxonomy is not None:
+            if 'taxid' not in sequence and hasattr(sequence, 'extractTaxon'):
+                sequence.extractTaxon()
+        self._file.write(self._ecoSeqPacker(sequence))
+        if self._annotation is not None:
+            self._annotation.put(sequence, self._sequenceCount)
+        self._sequenceCount+=1
+        
+    def __del__(self):
+        self._file.seek(0,0)
+        self._file.write(struct.pack('> I',self._sequenceCount))
+        self._file.close()
+        
+    
--- a/obitools/ecopcr/taxonomy.py
+++ b/obitools/ecopcr/taxonomy.py
@ -0,0 +1,630 @@
+import struct
+import sys
+
+from itertools import count,imap
+
+from obitools.ecopcr import EcoPCRDBFile
+from obitools.utils import universalOpen
+from obitools.utils import ColumnFile
+
+class Taxonomy(object):
+    def __init__(self):
+        '''
+        The taxonomy database constructor
+        
+        @param path: path to the ecoPCR database including the database prefix name
+        @type path: C{str}
+        '''
+        
+        self._ranks.append('obi')
+
+        self._speciesidx = self._ranks.index('species')
+        self._genusidx   = self._ranks.index('genus')
+        self._familyidx   = self._ranks.index('family')
+        self._orderidx   = self._ranks.index('order')
+        self._nameidx=dict((x[0],x[2]) for x in self._name)
+        self._nameidx.update(dict((x[0],x[2]) for x in self._preferedName))
+        self._preferedidx=dict((x[2],x[1]) for x in self._preferedName)
+        
+        self._bigestTaxid = max(x[0] for x in self._taxonomy)
+        
+
+    def findTaxonByIdx(self,idx):
+        if idx is None:
+            return None
+        return self._taxonomy[idx]
+
+    def findIndex(self,taxid):
+        if taxid is None:
+            return None
+        return self._index[taxid]
+        
+    def findTaxonByTaxid(self,taxid):
+        return self.findTaxonByIdx(self.findIndex(taxid))
+        
+    def findTaxonByName(self,name):
+        return self._taxonomy[self._nameidx[name]]
+    
+    def findRankByName(self,rank):
+        try:
+            return self._ranks.index(rank)
+        except ValueError:
+            return None
+        
+    def __contains__(self,taxid):
+        return self.findTaxonByTaxid(taxid) is not None
+    
+        
+
+
+    #####
+    #
+    # PUBLIC METHODS
+    #
+    #####
+
+
+    def subTreeIterator(self, taxid):
+        "return subtree for given taxonomic id "
+        idx = self.findTaxonByTaxid(taxid)
+        yield self._taxonomy[idx]
+        for t in self._taxonomy:
+            if t[2] == idx:
+                for subt in self.subTreeIterator(t[0]):
+                    yield subt
+    
+    def parentalTreeIterator(self, taxid):
+        """
+           return parental tree for given taxonomic id starting from
+           first ancester to the root.
+        """
+        taxon=self.findTaxonByTaxid(taxid)
+        if taxon is not None:
+            while taxon[2]!= 0: 
+                yield taxon
+                taxon = self._taxonomy[taxon[2]]
+            yield self._taxonomy[0]
+        else:
+            raise StopIteration
+        
+    def isAncestor(self,parent,taxid):
+        return parent in [x[0] for x in self.parentalTreeIterator(taxid)]
+    
+    def lastCommonTaxon(self,*taxids):
+        if not taxids:
+            return None
+        if len(taxids)==1:
+            return taxids[0]
+         
+        if len(taxids)==2:
+            t1 = [x[0] for x in self.parentalTreeIterator(taxids[0])]
+            t2 = [x[0] for x in self.parentalTreeIterator(taxids[1])]
+            t1.reverse()
+            t2.reverse()
+            
+            count = min(len(t1),len(t2))
+            i=0
+            while(i < count and t1[i]==t2[i]):
+                i+=1
+            i-=1
+            
+            return t1[i]
+        
+        ancetre = taxids[0]
+        for taxon in taxids[1:]:
+            ancetre = self.lastCommonTaxon(ancetre,taxon)
+            
+        return ancetre
+    
+    def betterCommonTaxon(self,error=1,*taxids):
+        lca = self.lastCommonTaxon(*taxids)
+        idx = self._index[lca]
+        sublca = [t[0] for t in self._taxonomy if t[2]==idx]
+        return sublca
+
+    
+    def getPreferedName(self,taxid):
+        idx = self.findIndex(taxid)
+        return self._preferedidx.get(idx,self._taxonomy[idx][3])
+    
+    
+    def getScientificName(self,taxid):
+        return self.findTaxonByTaxid(taxid)[3]
+    
+    def getRankId(self,taxid):
+        return self.findTaxonByTaxid(taxid)[1]
+    
+    def getRank(self,taxid):
+        return self._ranks[self.getRankId(taxid)]
+    
+    def getTaxonAtRank(self,taxid,rankid):
+        if isinstance(rankid, str):
+            rankid=self._ranks.index(rankid)
+        try:
+            return [x[0] for x in self.parentalTreeIterator(taxid)
+                    if x[1]==rankid][0]
+        except IndexError:
+            return None
+        
+    def getSpecies(self,taxid):
+        return self.getTaxonAtRank(taxid, self._speciesidx)
+    
+    def getGenus(self,taxid):
+        return self.getTaxonAtRank(taxid, self._genusidx)
+    
+    def getFamily(self,taxid):
+        return self.getTaxonAtRank(taxid, self._familyidx)
+    
+    def getOrder(self,taxid):
+        return self.getTaxonAtRank(taxid, self._orderidx)
+    
+    def rankIterator(self):
+        for x in imap(None,self._ranks,xrange(len(self._ranks))):
+            yield x
+
+    def groupTaxa(self,taxa,groupname):
+        t=[self.findTaxonByTaxid(x) for x in taxa]
+        a=set(x[2] for x in t)
+        assert len(a)==1,"All taxa must have the same parent"
+        newtaxid=max([2999999]+[x[0] for x in self._taxonomy if x[0]>=3000000 and x[0]<4000000])+1
+        newidx=len(self._taxonomy)
+        if 'GROUP' not in self._ranks:
+            self._ranks.append('GROUP')
+        rankid=self._ranks.index('GROUP')
+        self._taxonomy.append((newtaxid,rankid,a.pop(),groupname))
+        for x in t:
+            x[2]=newidx
+            
+    def addLocalTaxon(self,name,rank,parent,minimaltaxid=10000000):
+        newtaxid = minimaltaxid if (self._bigestTaxid < minimaltaxid) else self._bigestTaxid+1
+        
+        rankid=self.findRankByName(rank)
+        parentidx = self.findIndex(int(parent))
+        tx = (newtaxid,rankid,parentidx,name,'local')
+        self._taxonomy.append(tx)
+        newidx=len(self._taxonomy)-1
+        self._name.append((name,'scientific name',newidx))
+        self._nameidx[name]=newidx
+        self._index[newtaxid]=newidx
+
+        self._bigestTaxid=newtaxid
+        
+        return newtaxid
+    
+    def removeLocalTaxon(self,taxid):
+        raise NotImplemented
+        txidx = self.findIndex(taxid)
+        taxon = self.findTaxonByIdx(txidx)
+
+        assert txidx >= self._localtaxon,"Only local taxon can be deleted"
+
+        for t in self._taxonomy:
+            if t[2] == txidx:
+                self.removeLocalTaxon(t[0])
+                
+        
+        
+        
+        return taxon
+    
+    def addPreferedName(self,taxid,name):
+        idx = self.findIndex(taxid)
+        self._preferedName.append(name,'obi',idx)
+        self._preferedidx[idx]=name
+        return taxid
+
+class EcoTaxonomyDB(Taxonomy,EcoPCRDBFile):
+    '''
+    A taxonomy database class
+    '''
+    
+    
+    def __init__(self,path):
+        '''
+        The taxonomy database constructor
+        
+        @param path: path to the ecoPCR database including the database prefix name
+        @type path: C{str}
+        '''
+        self._path = path
+        self._taxonFile =  "%s.tdx" % self._path
+        self._localTaxonFile =  "%s.ldx" % self._path
+        self._ranksFile =  "%s.rdx" % self._path
+        self._namesFile =  "%s.ndx" % self._path
+        self._preferedNamesFile =  "%s.pdx" % self._path
+        self._aliasFile =  "%s.adx" % self._path
+        
+        print >> sys.stderr,"Reading binary taxonomy database...",
+        
+        self.__readNodeTable()
+        
+        print >> sys.stderr," ok"
+        
+        Taxonomy.__init__(self)
+        
+
+    #####
+    #
+    # Iterator functions
+    #
+    #####
+                   
+    def __ecoNameIterator(self,file):
+        for record in self._ecoRecordIterator(file):
+            lrecord = len(record)
+            lnames  = lrecord - 16
+            (isScientificName,namelength,classLength,indextaxid,names)=struct.unpack('> I I I I %ds' % lnames, record)
+            name=names[:namelength]
+            classname=names[namelength:]
+            yield (name,classname,indextaxid)
+    
+    
+    def __ecoTaxonomicIterator(self):
+        for record in self._ecoRecordIterator(self._taxonFile):
+            lrecord = len(record)
+            lnames  = lrecord - 16
+            (taxid,rankid,parentidx,nameLength,name)=struct.unpack('> I I I I %ds' % lnames, record)
+            yield  (taxid,rankid,parentidx,name,'ncbi')
+            
+        try :
+            lt=0
+            for record in self._ecoRecordIterator(self._localTaxonFile):
+                lrecord = len(record)
+                lnames  = lrecord - 16
+                (taxid,rankid,parentidx,nameLength,name)=struct.unpack('> I I I I %ds' % lnames, record)
+                lt+=1
+                yield  (taxid,rankid,parentidx,name,'local')
+            print >> sys.stderr, " [INFO : Local taxon file found] : %d added taxa" % lt
+        except:
+            print >> sys.stderr, " [INFO : Local taxon file not found] "
+            
+    def __ecoRankIterator(self):
+        for record in self._ecoRecordIterator(self._ranksFile):
+            yield  record
+    
+    def __ecoAliasIterator(self):
+        for record in self._ecoRecordIterator(self._aliasFile):
+            (taxid,index) = struct.unpack('> I i',record)
+            yield taxid,index
+            
+    #####
+    #
+    # Indexes
+    #
+    #####
+    
+    def __ecoNameIndex(self):
+        indexName = [x for x in self.__ecoNameIterator(self._namesFile)]
+        return indexName
+
+    def __ecoRankIndex(self):
+        rank = [r for r in self.__ecoRankIterator()]
+        return rank
+
+    def __ecoTaxonomyIndex(self):
+        taxonomy = []
+        
+        try :
+            index = dict(self.__ecoAliasIterator())
+            print >> sys.stderr, " [INFO : Taxon alias file found] "
+            buildIndex=False
+        except:
+            print >> sys.stderr, " [INFO : Taxon alias file not found] "
+            index={}
+            i = 0;
+            buildIndex=True
+            
+        localtaxon=0
+        i=0
+        for x in self.__ecoTaxonomicIterator():
+            taxonomy.append(x)
+            if x[4]=='ncbi':
+                localtaxon+=1
+                
+            if buildIndex or x[4]!='ncbi':
+                index[x[0]] = i 
+            i+=1
+            
+                
+        print  >> sys.stderr,"Taxonomical tree read",
+        return taxonomy, index,localtaxon
+
+    def __readNodeTable(self):
+        self._taxonomy, self._index, self._localtaxon= self.__ecoTaxonomyIndex()
+        self._ranks = self.__ecoRankIndex()
+        self._name  = self.__ecoNameIndex()
+        
+        # Add local taxon tame to the name index
+        i=self._localtaxon
+        for t in self._taxonomy[self._localtaxon:]:
+            self._name.append((t[3],'scientific name',i))
+            i+=1
+                
+        try :
+            self._preferedName = [(x[0],'obi',x[2]) 
+                                  for x in self.__ecoNameIterator(self._preferedNamesFile)]
+            print >> sys.stderr, " [INFO : Prefered taxon name file found] : %d added taxa" % len(self._preferedName)
+        except:
+            print >> sys.stderr, " [INFO : Prefered taxon name file not found]"
+            self._preferedName = []
+            
+                
+    
+
+class TaxonomyDump(Taxonomy):  
+        
+    def __init__(self,taxdir):
+        
+        self._path=taxdir
+        self._readNodeTable('%s/nodes.dmp' % taxdir)
+        
+        print >>sys.stderr,"Adding scientific name..."
+    
+        self._name=[]
+        for taxid,name,classname in self._nameIterator('%s/names.dmp' % taxdir):
+            self._name.append((name,classname,self._index[taxid]))
+            if classname == 'scientific name':
+                self._taxonomy[self._index[taxid]].extend([name,'ncbi'])
+            
+        print >>sys.stderr,"Adding taxid alias..."
+        for taxid,current in self._mergedNodeIterator('%s/merged.dmp' % taxdir):
+            self._index[taxid]=self._index[current]
+        
+        print >>sys.stderr,"Adding deleted taxid..."
+        for taxid in self._deletedNodeIterator('%s/delnodes.dmp' % taxdir):
+            self._index[taxid]=None
+            
+        self._nameidx=dict((x[0],x[2]) for x in self._name)
+
+            
+    def _taxonCmp(t1,t2):
+        if t1[0] < t2[0]:
+            return -1
+        elif t1[0] > t2[0]:
+            return +1
+        return 0
+    
+    _taxonCmp=staticmethod(_taxonCmp)
+    
+    def _bsearchTaxon(self,taxid):
+        taxCount = len(self._taxonomy)
+        begin = 0
+        end   = taxCount 
+        oldcheck=taxCount
+        check = begin + end / 2
+        while check != oldcheck and self._taxonomy[check][0]!=taxid :
+            if self._taxonomy[check][0] < taxid:
+                begin=check
+            else:
+                end=check
+            oldcheck=check
+            check = (begin + end) / 2
+            
+            
+        if self._taxonomy[check][0]==taxid:
+            return check
+        else:
+            return None
+            
+    
+    
+    def _readNodeTable(self,file):
+    
+        file = universalOpen(file)
+        
+        nodes = ColumnFile(file, 
+                           sep='|', 
+                           types=(int,int,str,
+                                  str,str,bool,
+                                  int,bool,int,
+                                  bool,bool,bool,str))
+        print >>sys.stderr,"Reading taxonomy dump file..."
+            # (taxid,rank,parent)
+        taxonomy=[[n[0],n[2],n[1]] for n in nodes]
+        print >>sys.stderr,"List all taxonomy rank..."    
+        ranks =list(set(x[1] for x in taxonomy))
+        ranks.sort()
+        rankidx = dict(map(None,ranks,xrange(len(ranks))))
+        
+        print >>sys.stderr,"Sorting taxons..."
+        taxonomy.sort(TaxonomyDump._taxonCmp)
+
+        self._taxonomy=taxonomy
+        self._localtaxon=len(taxonomy)
+    
+        print >>sys.stderr,"Indexing taxonomy..."
+        index = {}
+        for t in self._taxonomy:
+            index[t[0]]=self._bsearchTaxon(t[0])
+        
+        print >>sys.stderr,"Indexing parent and rank..."
+        for t in self._taxonomy:
+            t[1]=rankidx[t[1]]
+            t[2]=index[t[2]]
+         
+        self._ranks=ranks
+        self._index=index 
+        self._preferedName = []
+
+    def _nameIterator(self,file):
+        file = universalOpen(file)
+        names = ColumnFile(file, 
+                           sep='|', 
+                           types=(int,str,
+                                  str,str))
+        for taxid,name,unique,classname,white in names:
+            yield taxid,name,classname
+                        
+    def _mergedNodeIterator(self,file):
+        file = universalOpen(file)
+        merged = ColumnFile(file, 
+                           sep='|', 
+                           types=(int,int,str))
+        for taxid,current,white in merged:
+                yield taxid,current
+      
+    def _deletedNodeIterator(self,file):
+        file = universalOpen(file)
+        deleted = ColumnFile(file, 
+                           sep='|', 
+                           types=(int,str))
+        for taxid,white in deleted:
+                yield taxid
+        
+#####
+#
+#
+# Binary writer
+#
+#
+#####
+
+def ecoTaxonomyWriter(prefix, taxonomy,onlyLocal=False):
+
+    def ecoTaxPacker(tx):
+        
+        namelength = len(tx[3])
+        
+        totalSize = 4 + 4 + 4 + 4 + namelength
+        
+        packed = struct.pack('> I I I I I %ds' % namelength, 
+                             totalSize, 
+                             tx[0],
+                             tx[1],
+                             tx[2], 
+                             namelength,
+                             tx[3])
+        
+        return packed
+    
+    def ecoRankPacker(rank):
+        
+        namelength = len(rank)
+        
+        packed = struct.pack('> I %ds' % namelength,
+                             namelength,
+                             rank)
+        
+        return packed
+    
+    def ecoAliasPacker(taxid,index):
+        
+        totalSize = 4 + 4
+        try:
+            packed = struct.pack('> I I i',
+                                 totalSize,
+                                 taxid,
+                                 index)
+        except struct.error,e:
+            print >>sys.stderr,(totalSize,taxid,index)
+            print >>sys.stderr,"Total size : %d  taxid : %d  index : %d" %(totalSize,taxid,index)
+            raise e
+         
+        return packed
+                    
+    def ecoNamePacker(name):
+        
+        namelength = len(name[0])
+        classlength= len(name[1])
+        totalSize =  namelength + classlength + 4 + 4 + 4 + 4
+        
+        packed = struct.pack('> I I I I I %ds %ds' % (namelength,classlength),
+                             totalSize,
+                             int(name[1]=='scientific name'),
+                             namelength,
+                             classlength,
+                             name[2],
+                             name[0],
+                             name[1])
+        
+        return packed
+        
+    
+    def ecoTaxWriter(file,taxonomy):
+        output = open(file,'wb')
+        nbtaxon = reduce(lambda x,y:x+y,(1 for t in taxonomy if t[4]=='ncbi'),0)
+
+        output.write(struct.pack('> I',nbtaxon))
+                     
+        for tx in taxonomy:
+            if tx[4]=='ncbi':
+                output.write(ecoTaxPacker(tx))
+                
+        output.close()
+        return nbtaxon < len(taxonomy)
+
+    def ecoLocalTaxWriter(file,taxonomy):
+        nbtaxon = reduce(lambda x,y:x+y,(1 for t in taxonomy if t[4]!='ncbi'),0)
+
+        if nbtaxon:
+            output = open(file,'wb')
+    
+            output.write(struct.pack('> I',nbtaxon))
+                         
+            for tx in taxonomy:
+                if tx[4]!='ncbi':
+                    output.write(ecoTaxPacker(tx))
+                    
+            output.close()
+
+        
+    def ecoRankWriter(file,ranks):
+        output = open(file,'wb')
+        output.write(struct.pack('> I',len(ranks)))
+            
+        for rank in ranks:
+            output.write(ecoRankPacker(rank))
+    
+        output.close()
+    
+    def ecoAliasWriter(file,index):
+        output = open(file,'wb')
+        output.write(struct.pack('> I',len(index)))
+        
+        for taxid in index:
+            i=index[taxid]
+            if i is None:
+                i=-1
+            output.write(ecoAliasPacker(taxid, i))
+    
+        output.close()
+    
+    def nameCmp(n1,n2):
+        name1=n1[0].upper()
+        name2=n2[0].upper()
+        if name1 < name2:
+            return -1
+        elif name1 > name2:
+            return 1
+        return 0
+    
+    
+    def ecoNameWriter(file,names):
+        output = open(file,'wb')
+        output.write(struct.pack('> I',len(names)))
+    
+        names.sort(nameCmp)
+        
+        for name in names:
+            output.write(ecoNamePacker(name))
+    
+        output.close()
+        
+    def ecoPreferedNameWriter(file,names):
+        output = open(file,'wb')
+        output.write(struct.pack('> I',len(names)))
+        for name in names:
+            output.write(ecoNamePacker(name))
+    
+        output.close()
+       
+    localtaxon=True
+    if not onlyLocal:
+        ecoRankWriter('%s.rdx' % prefix, taxonomy._ranks)
+        localtaxon = ecoTaxWriter('%s.tdx' % prefix, taxonomy._taxonomy)
+        ecoNameWriter('%s.ndx' % prefix, [x for x in taxonomy._name if x[2] < taxonomy._localtaxon])
+        ecoAliasWriter('%s.adx' % prefix, taxonomy._index)
+    if localtaxon:
+        ecoLocalTaxWriter('%s.ldx' % prefix, taxonomy._taxonomy)
+    if taxonomy._preferedName:
+        ecoNameWriter('%s.pdx' % prefix, taxonomy._preferedName)
--- a/obitools/ecotag/init.py
+++ b/obitools/ecotag/init.py
@ -0,0 +1,2 @@
+class EcoTagResult(dict):
+    pass
--- a/obitools/ecotag/parser.py
+++ b/obitools/ecotag/parser.py
@ -0,0 +1,150 @@
+from itertools import imap
+from obitools import utils
+
+from obitools.ecotag import EcoTagResult
+
+class EcoTagFileIterator(utils.ColumnFile):
+    
+    @staticmethod
+    def taxid(x):
+        x = int(x)
+        if x < 0:
+            return None
+        else:
+            return x
+        
+    @staticmethod
+    def scientificName(x):
+        if x=='--':
+            return None
+        else:
+            return x
+        
+    @staticmethod
+    def value(x):
+        if x=='--':
+            return None
+        else:
+            return float(x)
+        
+    @staticmethod
+    def count(x):
+        if x=='--':
+            return None
+        else:
+            return int(x)
+        
+    
+    def __init__(self,stream):
+        utils.ColumnFile.__init__(self,
+                                  stream, '\t', True, 
+                                  (str,str,str,
+                                   EcoTagFileIterator.value,
+                                   EcoTagFileIterator.value,
+                                   EcoTagFileIterator.value,
+                                   EcoTagFileIterator.count,
+                                   EcoTagFileIterator.count,
+                                   EcoTagFileIterator.taxid,
+                                   EcoTagFileIterator.scientificName,
+                                   str,
+                                   EcoTagFileIterator.taxid,
+                                   EcoTagFileIterator.scientificName,
+                                   EcoTagFileIterator.taxid,
+                                   EcoTagFileIterator.scientificName,
+                                   EcoTagFileIterator.taxid,
+                                   EcoTagFileIterator.scientificName,
+                                   str
+                                   ))
+        self._memory=None
+
+    _colname = ['identification',
+                'seqid',
+                'best_match_ac',
+                'max_identity',
+                'min_identity',
+                'theorical_min_identity',
+                'count',
+                'match_count',
+                'taxid',
+                'scientific_name',
+                'rank',
+                'order_taxid',
+                'order_sn',
+                'family_taxid',
+                'family_sn',
+                'genus_taxid',
+                'genus_sn',
+                'species_taxid',
+                'species_sn',
+                'sequence']
+
+    def next(self):
+        if self._memory is not None:
+            data=self._memory
+            self._memory=None
+        else:
+            data = utils.ColumnFile.next(self)
+            data = EcoTagResult(imap(None,EcoTagFileIterator._colname[:len(data)],data))
+               
+        if data['identification']=='ID':
+            data.cd=[]
+            try:
+                nextone = utils.ColumnFile.next(self)
+                nextone = EcoTagResult(imap(None,EcoTagFileIterator._colname[:len(nextone)],nextone))
+            except StopIteration:
+                nextone = None
+            while nextone is not None and nextone['identification']=='CD':
+                data.cd.append(nextone)
+                try:
+                    nextone = utils.ColumnFile.next(self)
+                    nextone = EcoTagResult(imap(None,EcoTagFileIterator._colname[:len(nextone)],nextone))
+                except StopIteration:
+                    nextone = None
+            self._memory=nextone
+                
+        return data
+    
+def ecoTagIdentifiedFilter(ecoTagIterator):
+    for x in ecoTagIterator:
+        if x['identification']=='ID':
+            yield x
+            
+            
+class EcoTagAbstractIterator(utils.ColumnFile):
+
+    _colname = ['scientific_name',
+                'taxid',
+                'rank',
+                'count',
+                'max_identity',
+                'min_identity']
+
+
+    @staticmethod
+    def taxid(x):
+        x = int(x)
+        if x < 0:
+            return None
+        else:
+            return x
+
+    def __init__(self,stream):
+        utils.ColumnFile.__init__(self,
+                                  stream, '\t', True, 
+                                  (str,
+                                   EcoTagFileIterator.taxid,
+                                   str,
+                                   int,
+                                   float,float,float))
+       
+    def next(self):
+        data = utils.ColumnFile.next(self)
+        data = dict(imap(None,EcoTagAbstractIterator._colname,data))
+
+        return data
+   
+def ecoTagAbstractFilter(ecoTagAbsIterator):
+    for x in ecoTagAbsIterator:
+        if x['taxid'] is not None:
+            yield x
+ 
--- a/obitools/eutils/init.py
+++ b/obitools/eutils/init.py
@ -0,0 +1,54 @@
+import time
+from urllib2 import urlopen
+import shelve
+from threading import Lock
+import sys
+
+class EUtils(object):
+    '''
+    
+    '''
+
+    _last_request=0
+    _interval=3
+    
+    def __init__(self):
+        self._lock = Lock()
+    
+    def wait(self):
+        now=time.time() 
+        delta = now - EUtils._last_request
+        while delta < EUtils._interval:
+            time.sleep(delta)
+            now=time.time() 
+            delta = now - EUtils._last_request
+        
+    def _sendRequest(self,url):
+        self.wait()
+        EUtils._last_request=time.time()
+        t = EUtils._last_request
+        print >>sys.stderr,"Sending request to NCBI @ %f" % t
+        data = urlopen(url).read()
+        print >>sys.stderr,"Data red from NCBI @ %f (%f)" % (t,time.time()-t)
+        return data
+    
+    def setInterval(self,seconde):
+        EUtils._interval=seconde
+            
+    
+class EFetch(EUtils):
+    '''
+    
+    '''
+    def __init__(self,db,tool='OBITools',
+                 retmode='text',rettype="native",
+                 server='eutils.ncbi.nlm.nih.gov'):
+        EUtils.__init__(self)
+        self._url = "http://%s/entrez/eutils/efetch.fcgi?db=%s&tool=%s&retmode=%s&rettype=%s"
+        self._url = self._url % (server,db,tool,retmode,rettype)
+        
+
+    def get(self,**args):
+        key = "&".join(['%s=%s' % x for x in args.items()])
+        return self._sendRequest(self._url +"&" + key)
+    
--- a/obitools/fast.py
+++ b/obitools/fast.py
@ -0,0 +1,56 @@
+"""
+    implement fastn/fastp sililarity search algorithm for BioSequence.
+"""
+
+class Fast(object):
+    
+    def __init__(self,seq,kup=2):
+        '''
+        @param seq: sequence to hash
+        @type seq: BioSequence
+        @param kup: word size used for hashing process
+        @type kup: int
+        '''
+        hash={}
+        seq = str(seq)
+        for word,pos in ((seq[i:i+kup].upper(),i) for i in xrange(len(seq)-kup)):
+            if word in hash:
+                hash[word].append(pos)
+            else:
+                hash[word]=[pos]
+        
+        self._kup = kup
+        self._hash= hash
+        self._seq = seq
+        
+    def __call__(self,seq):
+        '''
+        Align one sequence with the fast hash table.
+        
+        @param seq: the sequence to align
+        @type seq: BioSequence
+        
+        @return: where smax is the
+                 score of the largest diagonal and pmax the
+                 associated shift 
+        @rtype: a int tuple (smax,pmax)
+        '''
+        histo={}
+        seq = str(seq).upper()
+        hash= self._hash
+        kup = self._kup
+        
+        for word,pos in ((seq[i:i+kup],i) for i in xrange(len(seq)-kup)):
+            matchedpos = hash.get(word,[])
+            for p in matchedpos:
+                delta = pos - p
+                histo[delta]=histo.get(delta,0) + 1
+        smax = max(histo.values())
+        pmax = [x for x in histo if histo[x]==smax]
+        return smax,pmax
+    
+    def __len__(self):
+        return len(self._seq)
+    
+
+    
--- a/obitools/fasta/init.py
+++ b/obitools/fasta/init.py
@ -0,0 +1,384 @@
+"""
+fasta module provides functions to read and write sequences in fasta format.
+
+
+"""
+
+#from obitools.format.genericparser import fastGenericEntryIteratorGenerator
+from obitools.format.genericparser import genericEntryIteratorGenerator
+from obitools import bioSeqGenerator,BioSequence,AASequence,NucSequence
+from obitools import _default_raw_parser
+
+#from obitools.alignment import alignmentReader
+#from obitools.utils import universalOpen
+
+import re
+from obitools.ecopcr.options import loadTaxonomyDatabase
+from obitools.format import SequenceFileIterator
+
+#from _fasta import parseFastaDescription,fastaParser
+#from _fasta import _fastaJoinSeq
+#from _fasta import _parseFastaTag
+
+
+#fastaEntryIterator=fastGenericEntryIteratorGenerator(startEntry='>')
+fastaEntryIterator=genericEntryIteratorGenerator(startEntry='>')
+rawFastaEntryIterator=genericEntryIteratorGenerator(startEntry='\s*>')
+
+def _fastaJoinSeq(seqarray):
+    return  ''.join([x.strip() for x in seqarray])
+
+
+def parseFastaDescription(ds,tagparser):
+    
+    m = tagparser.search(' '+ds)
+    if m is not None:
+        info=m.group(0)
+        definition = ds[m.end(0):].strip()
+    else:
+        info=None
+        definition=ds
+ 
+    return definition,info
+
+def fastaParser(seq,bioseqfactory,tagparser,rawparser,joinseq=_fastaJoinSeq):
+    '''
+    Parse a fasta record.
+    
+    @attention: internal purpose function
+    
+    @param seq: a sequence object containing all lines corresponding
+                to one fasta sequence
+    @type seq: C{list} or C{tuple} of C{str}
+    
+    @param bioseqfactory: a callable object return a BioSequence
+                          instance.
+    @type bioseqfactory: a callable object
+    
+    @param tagparser: a compiled regular expression usable
+                      to identify key, value couples from 
+                      title line.
+    @type tagparser: regex instance
+    
+    @return: a C{BioSequence} instance   
+    '''
+    seq = seq.split('\n')
+    title = seq[0].strip()[1:].split(None,1)
+    id=title[0]
+    if len(title) == 2:
+        definition,info=parseFastaDescription(title[1], tagparser)
+    else:
+        info= None
+        definition=None
+
+    seq=joinseq(seq[1:])
+    return bioseqfactory(id, seq, definition,info,rawparser)
+
+
+def fastaNucParser(seq,tagparser=_default_raw_parser,joinseq=_fastaJoinSeq):
+    return fastaParser(seq,NucSequence,tagparser=tagparser,joinseq=_fastaJoinSeq)
+
+def fastaAAParser(seq,tagparser=_default_raw_parser,joinseq=_fastaJoinSeq):
+    return fastaParser(seq,AASequence,tagparser=tagparser,joinseq=_fastaJoinSeq)
+
+def fastaIterator(file,bioseqfactory=bioSeqGenerator,
+                  tagparser=_default_raw_parser,
+                  joinseq=_fastaJoinSeq):
+    '''
+    iterate through a fasta file sequence by sequence.
+    Returned sequences by this iterator will be BioSequence
+    instances
+
+    @param file: a line iterator containing fasta data or a filename
+    @type file:  an iterable object or str
+    @param bioseqfactory: a callable object return a BioSequence
+                          instance.
+    @type bioseqfactory: a callable object
+
+    @param tagparser: a compiled regular expression usable
+                      to identify key, value couples from 
+                      title line.
+    @type tagparser: regex instance
+    
+    @return: an iterator on C{BioSequence} instance
+ 
+    @see: L{fastaNucIterator}
+    @see: L{fastaAAIterator}
+    
+    >>> from obitools.format.sequence.fasta import fastaIterator
+    >>> f = fastaIterator('monfichier')
+    >>> s = f.next()
+    >>> print s
+    gctagctagcatgctagcatgcta
+    >>>
+    '''
+    rawparser=tagparser
+    allparser = tagparser % '[a-zA-Z][a-zA-Z0-9_]*'
+    tagparser = re.compile('( *%s)+' % allparser)
+
+    for entry in fastaEntryIterator(file):
+        yield fastaParser(entry,bioseqfactory,tagparser,rawparser,joinseq)
+        
+def rawFastaIterator(file,bioseqfactory=bioSeqGenerator,
+                     tagparser=_default_raw_parser,
+                     joinseq=_fastaJoinSeq):
+
+    rawparser=tagparser
+    allparser = tagparser % '[a-zA-Z][a-zA-Z0-9_]*'
+    tagparser = re.compile('( *%s)+' % allparser)
+
+    for entry in rawFastaEntryIterator(file):
+        entry=entry.strip()
+        yield fastaParser(entry,bioseqfactory,tagparser,rawparser,joinseq)
+
+def fastaNucIterator(file,tagparser=_default_raw_parser):
+    '''
+    iterate through a fasta file sequence by sequence.
+    Returned sequences by this iterator will be NucSequence
+    instances
+    
+    @param file: a line iterator containint fasta data
+    @type file: an iterable object
+
+    @param tagparser: a compiled regular expression usable
+                      to identify key, value couples from 
+                      title line.
+    @type tagparser: regex instance
+    
+    @return: an iterator on C{NucBioSequence} instance
+    @rtype: a generator object
+    
+    @see: L{fastaIterator}
+    @see: L{fastaAAIterator}
+    '''
+    return fastaIterator(file, NucSequence,tagparser)
+
+def fastaAAIterator(file,tagparser=_default_raw_parser):
+    '''
+    iterate through a fasta file sequence by sequence.
+    Returned sequences by this iterator will be AASequence
+    instances
+    
+    @param file: a line iterator containing fasta data
+    @type file: an iterable object
+    
+    @param tagparser: a compiled regular expression usable
+                      to identify key, value couples from 
+                      title line.
+    @type tagparser: regex instance
+    
+    @return: an iterator on C{AABioSequence} instance
+
+    @see: L{fastaIterator}
+    @see: L{fastaNucIterator}
+    '''
+    return fastaIterator(file, AASequence,tagparser)
+
+def formatFasta(data,gbmode=False,upper=False,restrict=None):
+    '''
+    Convert a seqence or a set of sequences in a
+    string following the fasta format
+    
+    @param data: sequence or a set of sequences
+    @type data: BioSequence instance or an iterable object 
+                on BioSequence instances
+                
+    @param gbmode: if set to C{True} identifier part of the title
+                   line follows recommendation from nbci to allow
+                   sequence indexing with the blast formatdb command.
+    @type gbmode: bool
+                
+    @param restrict: a set of key name that will be print in the formated
+                     output. If restrict is set to C{None} (default) then
+                     all keys are formated.
+    @type restrict: any iterable value or None
+    
+    @return: a fasta formated string
+    @rtype: str
+    '''
+    if isinstance(data, BioSequence):
+        data = [data]
+
+    if restrict is not None and not isinstance(restrict, set):
+        restrict = set(restrict)    
+
+    rep = []
+    for sequence in data:
+        seq = str(sequence)
+        if sequence.definition is None:
+            definition=''
+        else:
+            definition=sequence.definition
+        if upper:
+            frgseq = '\n'.join([seq[x:x+60].upper() for x in xrange(0,len(seq),60)])
+        else:
+            frgseq = '\n'.join([seq[x:x+60] for x in xrange(0,len(seq),60)])
+        info='; '.join(['%s=%s' % x 
+                        for x in sequence.rawiteritems()
+                        if restrict is None or x[0] in restrict])
+        if info:
+            info=info+';'
+        if sequence._rawinfo is not None and sequence._rawinfo:
+            info+=" " + sequence._rawinfo.strip()
+            
+        id = sequence.id
+        if gbmode:
+            if 'gi' in sequence:
+                id = "gi|%s|%s" % (sequence['gi'],id)
+            else:
+                id = "lcl|%s|" % (id)
+        title='>%s %s %s' %(id,info,definition)
+        rep.append("%s\n%s" % (title,frgseq))
+    return '\n'.join(rep)
+
+def formatSAPFastaGenerator(options):
+    loadTaxonomyDatabase(options)
+    
+    taxonomy=None
+    if options.taxonomy is not None:
+        taxonomy=options.taxonomy
+    
+    assert taxonomy is not None,"SAP formating require indication of a taxonomy database"
+
+    ranks = ('superkingdom', 'kingdom', 'subkingdom', 'superphylum', 
+             'phylum', 'subphylum', 'superclass', 'class', 'subclass', 
+             'infraclass', 'superorder', 'order', 'suborder', 'infraorder', 
+             'parvorder', 'superfamily', 'family', 'subfamily', 'supertribe', 'tribe', 
+             'subtribe', 'supergenus', 'genus', 'subgenus', 'species group', 
+             'species subgroup', 'species', 'subspecies')
+    
+    trank=set(taxonomy._ranks)
+    ranks = [taxonomy._ranks.index(x) for x in ranks if x in trank]
+    
+    strict= options.strictsap
+    
+    def formatSAPFasta(data,gbmode=False,upper=False,restrict=None):
+        '''
+        Convert a seqence or a set of sequences in a
+        string following the fasta format as recommended for the SAP
+        software 
+        
+        http://ib.berkeley.edu/labs/slatkin/munch/StatisticalAssignmentPackage.html
+        
+        @param data: sequence or a set of sequences
+        @type data: BioSequence instance or an iterable object 
+                    on BioSequence instances
+                    
+        @param gbmode: if set to C{True} identifier part of the title
+                       line follows recommendation from nbci to allow
+                       sequence indexing with the blast formatdb command.
+        @type gbmode: bool
+                    
+        @param restrict: a set of key name that will be print in the formated
+                         output. If restrict is set to C{None} (default) then
+                         all keys are formated.
+        @type restrict: any iterable value or None
+        
+        @return: a fasta formated string
+        @rtype: str
+        '''
+        if isinstance(data, BioSequence):
+            data = [data]
+    
+        if restrict is not None and not isinstance(restrict, set):
+            restrict = set(restrict)    
+    
+        rep = []
+        for sequence in data:
+            seq = str(sequence)
+
+            if upper:
+                frgseq = '\n'.join([seq[x:x+60].upper() for x in xrange(0,len(seq),60)])
+            else:
+                frgseq = '\n'.join([seq[x:x+60] for x in xrange(0,len(seq),60)])
+                        
+            try:    
+                taxid = sequence["taxid"]
+            except KeyError:
+                if strict:
+                    raise AssertionError('All sequence must have a taxid')
+                else:
+                    continue
+                
+            definition=' ;'
+            
+            for r in ranks:
+                taxon = taxonomy.getTaxonAtRank(taxid,r)
+                if taxon is not None:
+                    definition+=' %s: %s,' % (taxonomy._ranks[r],taxonomy.getPreferedName(taxon))
+                    
+            definition='%s ; %s' % (definition[0:-1],taxonomy.getPreferedName(taxid))
+            
+            id = sequence.id
+            if gbmode:
+                if 'gi' in sequence:
+                    id = "gi|%s|%s" % (sequence['gi'],id)
+                else:
+                    id = "lcl|%s|" % (id)
+            title='>%s%s' %(id,definition)
+            rep.append("%s\n%s" % (title,frgseq))
+        return '\n'.join(rep)
+
+    return formatSAPFasta
+
+class FastaIterator(SequenceFileIterator):
+    
+    
+    entryIterator = genericEntryIteratorGenerator(startEntry='>')
+    classmethod(entryIterator)
+    
+    def __init__(self,inputfile,bioseqfactory=bioSeqGenerator,
+                      tagparser=_default_raw_parser,
+                      joinseq=_fastaJoinSeq):
+        
+        SequenceFileIterator.__init__(self, inputfile, bioseqfactory)
+
+        self.__file = FastaIterator.entryIterator(self._inputfile)
+        
+        self._tagparser = tagparser
+        self._joinseq   = joinseq
+
+    def get_tagparser(self):
+        return self.__tagparser
+
+
+    def set_tagparser(self, value):
+        self._rawparser = value
+        allparser = value % '[a-zA-Z][a-zA-Z0-9_]*'
+        self.__tagparser = re.compile('( *%s)+' % allparser)
+
+    def _parseFastaDescription(self,ds):
+        
+        m = self._tagparser.search(' '+ds)
+        if m is not None:
+            info=m.group(0)
+            definition = ds[m.end(0):].strip()
+        else:
+            info=None
+            definition=ds
+     
+        return definition,info
+
+
+    def _parser(self):
+        '''
+        Parse a fasta record.
+        
+        @attention: internal purpose function
+                
+        @return: a C{BioSequence} instance   
+        '''
+        seq = self._seq.split('\n')
+        title = seq[0].strip()[1:].split(None,1)
+        id=title[0]
+        if len(title) == 2:
+            definition,info=self._parseFastaDescription(title[1])
+        else:
+            info= None
+            definition=None
+    
+        seq=self._joinseq(seq[1:])
+        
+        return self._bioseqfactory(id, seq, definition,info,self._rawparser)
+    
+    _tagparser = property(get_tagparser, set_tagparser, None, "_tagparser's docstring")
--- a/obitools/fasta/_fasta.so
+++ b/obitools/fasta/_fasta.so
--- a/obitools/fastq/init.py
+++ b/obitools/fastq/init.py
@ -0,0 +1,190 @@
+'''
+Created on 29 aout 2009
+
+@author: coissac
+'''
+
+from obitools import BioSequence
+from obitools import _default_raw_parser
+from obitools.format.genericparser import genericEntryIteratorGenerator
+from obitools import bioSeqGenerator,AASequence,NucSequence
+from obitools.fasta import parseFastaDescription
+from _fastq import fastqQualitySangerDecoder,fastqQualitySolexaDecoder
+from _fastq import qualityToSangerError,qualityToSolexaError
+from _fastq import errorToSangerFastQStr
+from _fastq import formatFastq
+from _fastq import fastqParserGenetator
+from obitools.utils import universalOpen
+
+import re 
+
+fastqEntryIterator=genericEntryIteratorGenerator(startEntry='^@',endEntry="^\+",strip=True,join=False)
+
+#def fastqParserGenetator(fastqvariant='sanger',bioseqfactory=NucSequence,tagparser=_parseFastaTag):
+#    
+#    qualityDecoder,errorDecoder = {'sanger'   : (fastqQualitySangerDecoder,qualityToSangerError),
+#                                   'solexa'   : (fastqQualitySolexaDecoder,qualityToSolexaError),
+#                                   'illumina' : (fastqQualitySolexaDecoder,qualityToSangerError)}[fastqvariant]
+#
+#    def fastqParser(seq):
+#        '''
+#        Parse a fasta record.
+#        
+#        @attention: internal purpose function
+#        
+#        @param seq: a sequence object containing all lines corresponding
+#                    to one fasta sequence
+#        @type seq: C{list} or C{tuple} of C{str}
+#        
+#        @param bioseqfactory: a callable object return a BioSequence
+#                              instance.
+#        @type bioseqfactory: a callable object
+#        
+#        @param tagparser: a compiled regular expression usable
+#                          to identify key, value couples from 
+#                          title line.
+#        @type tagparser: regex instance
+#        
+#        @return: a C{BioSequence} instance   
+#        '''
+#        
+#        title = seq[0][1:].split(None,1)
+#        id=title[0]
+#        if len(title) == 2:
+#            definition,info=parseFastaDescription(title[1], tagparser)
+#        else:
+#            info= {}
+#            definition=None
+#        
+#        quality=errorDecoder(qualityDecoder(seq[3]))
+#    
+#        seq=seq[1]
+#        
+#        seq = bioseqfactory(id, seq, definition,False,**info)
+#        seq.quality = quality
+#        
+#        return seq
+#    
+#    return fastqParser
+
+
+def fastqIterator(file,fastqvariant='sanger',bioseqfactory=NucSequence,tagparser=_default_raw_parser):
+    '''
+    iterate through a fasta file sequence by sequence.
+    Returned sequences by this iterator will be BioSequence
+    instances
+
+    @param file: a line iterator containing fasta data or a filename
+    @type file:  an iterable object or str
+    @param bioseqfactory: a callable object return a BioSequence
+                          instance.
+    @type bioseqfactory: a callable object
+
+    @param tagparser: a compiled regular expression usable
+                      to identify key, value couples from 
+                      title line.
+    @type tagparser: regex instance
+    
+    @return: an iterator on C{BioSequence} instance
+ 
+    @see: L{fastaNucIterator}
+    @see: L{fastaAAIterator}
+
+    '''
+    fastqParser=fastqParserGenetator(fastqvariant, bioseqfactory, tagparser)
+    file = universalOpen(file)
+    for entry in fastqEntryIterator(file):
+        title=entry[0]
+        seq="".join(entry[1:-1])
+        quality=''
+        lenseq=len(seq)
+        while (len(quality) < lenseq):
+            quality+=file.next().strip()
+            
+        yield fastqParser([title,seq,'+',quality])
+
+def fastqSangerIterator(file,tagparser=_default_raw_parser):
+    '''
+    iterate through a fastq file sequence by sequence.
+    Returned sequences by this iterator will be NucSequence
+    instances
+    
+    @param file: a line iterator containint fasta data
+    @type file: an iterable object
+
+    @param tagparser: a compiled regular expression usable
+                      to identify key, value couples from 
+                      title line.
+    @type tagparser: regex instance
+    
+    @return: an iterator on C{NucBioSequence} instance
+    
+    @see: L{fastqIterator}
+    @see: L{fastqAAIterator}
+    '''
+    return fastqIterator(file,'sanger',NucSequence,tagparser)
+
+def fastqSolexaIterator(file,tagparser=_default_raw_parser):
+    '''
+    iterate through a fastq file sequence by sequence.
+    Returned sequences by this iterator will be NucSequence
+    instances
+    
+    @param file: a line iterator containint fasta data
+    @type file: an iterable object
+
+    @param tagparser: a compiled regular expression usable
+                      to identify key, value couples from 
+                      title line.
+    @type tagparser: regex instance
+    
+    @return: an iterator on C{NucBioSequence} instance
+    
+    @see: L{fastqIterator}
+    @see: L{fastqAAIterator}
+    '''
+    return fastqIterator(file,'solexa',NucSequence,tagparser)
+
+def fastqIlluminaIterator(file,tagparser=_default_raw_parser):
+    '''
+    iterate through a fastq file sequence by sequence.
+    Returned sequences by this iterator will be NucSequence
+    instances
+    
+    @param file: a line iterator containint fasta data
+    @type file: an iterable object
+
+    @param tagparser: a compiled regular expression usable
+                      to identify key, value couples from 
+                      title line.
+    @type tagparser: regex instance
+    
+    @return: an iterator on C{NucBioSequence} instance
+    
+    @see: L{fastqIterator}
+    @see: L{fastqAAIterator}
+    '''
+    return fastqIterator(file,'illumina',NucSequence,tagparser)
+
+def fastqAAIterator(file,tagparser=_default_raw_parser):
+    '''
+    iterate through a fastq file sequence by sequence.
+    Returned sequences by this iterator will be AASequence
+    instances
+    
+    @param file: a line iterator containing fasta data
+    @type file: an iterable object
+    
+    @param tagparser: a compiled regular expression usable
+                      to identify key, value couples from 
+                      title line.
+    @type tagparser: regex instance
+    
+    @return: an iterator on C{AABioSequence} instance
+
+    @see: L{fastqIterator}
+    @see: L{fastqNucIterator}
+    '''
+    return fastqIterator(file,'sanger',AASequence,tagparser)
+
+
--- a/obitools/fastq/_fastq.so
+++ b/obitools/fastq/_fastq.so
--- a/obitools/fnaqual/init.py
+++ b/obitools/fnaqual/init.py
@ -0,0 +1,2 @@
+
+fnaTag=' %s *= *([^\s]+)'
--- a/obitools/fnaqual/fasta.py
+++ b/obitools/fnaqual/fasta.py
@ -0,0 +1,8 @@
+from obitools.fasta import fastaNucIterator
+from obitools.fnaqual import fnaTag
+
+def fnaFastaIterator(file):
+    
+    x = fastaNucIterator(file, fnaTag)
+    
+    return x
--- a/obitools/fnaqual/quality.py
+++ b/obitools/fnaqual/quality.py
@ -0,0 +1,137 @@
+"""
+
+
+"""
+
+from obitools import _default_raw_parser
+from obitools.fasta import fastaIterator
+from obitools.fnaqual import fnaTag
+from obitools.location import Location
+
+import re
+
+
+class QualitySequence(list):
+    
+    def __init__(self,id,seq,definition=None,rawinfo=None,rawparser=_default_raw_parser,**info):
+        '''
+        
+        @param id:
+        @param seq:
+        @param definition:
+        '''
+        list.__init__(self,seq)
+        self._info = info
+        self.definition=definition
+        self.id=id
+        self._rawinfo=' ' + rawinfo
+        self._rawparser=rawparser
+
+    def getDefinition(self):
+        '''
+        Sequence definition getter
+        
+            @return: the sequence definition
+            @rtype: str
+            
+        '''
+        return self._definition
+
+    def setDefinition(self, value):
+        self._definition = value
+        
+    def getId(self):
+        return self._id
+
+    def setId(self, value):
+        self._id = value
+    
+    def getKey(self,key):
+        if key not in self._info:
+            p = re.compile(self._rawparser % key)
+            m = p.search(self._rawinfo)
+            if m is not None:
+                v=m.group(1)
+                self._rawinfo=' ' + self._rawinfo[0:m.start(0)]+self._rawinfo[m.end(0):]
+                try:
+                    v = eval(v)
+                except:
+                    pass
+                self._info[key]=v
+            else:
+                raise KeyError,key
+        else:
+            v=self._info[key]
+        return v
+
+    def __getitem__(self,key):
+        if isinstance(key,Location):
+            return key.extractSequence(self)
+        elif isinstance(key, str):
+            return self._getKey(key)
+        elif isinstance(key, int):
+            return list.__getitem__(self,key)
+        elif isinstance(key, slice):
+            subseq=list.__getitem__(self,key)
+            info = dict(self._info)
+            if key.start is not None:
+                start = key.start +1
+            else:
+                start = 1
+            if key.stop is not None:
+                stop = key.stop+1
+            else:
+                stop = len(self)
+            if key.step is not None:
+                step = key.step
+            else:
+                step = 1
+            
+            info['cut']='[%d,%d,%s]' % (start,stop,step)
+            return QualitySequence(self.id, subseq, self.definition,self._rawinfo,self._rawparser,**info)
+
+        raise TypeError,'key must be an integer, a str or a slice'  
+        
+    def __setitem__(self,key,value):
+        self._info[key]=value
+        
+    def __delitem__(self,key):
+        if isinstance(key, str):
+            del self._info[key]
+        else:
+            raise TypeError,key
+        
+    def __iter__(self):
+        return list.__iter__(self)
+        
+    def __contains__(self,key):
+        return key in self._info
+        
+    def getTags(self):
+        return self._info
+        
+    def complement(self):
+        '''
+        
+        '''
+        cseq = self[::-1]
+        rep = QualitySequence(self.id,cseq,self.definition,self._rawinfo,self._rawparser,**self._info)
+        rep._info['complemented']=not rep._info.get('complemented',False)
+        return rep
+    
+        
+    definition = property(getDefinition, setDefinition, None, "Sequence Definition")
+
+    id = property(getId, setId, None, 'Sequence identifier')
+
+
+def _qualityJoinSeq(seqarray):
+    text =  ' '.join([x.strip() for x in seqarray])
+    return [int(x) for x in text.split()]
+
+def qualityIterator(file):
+    for q in fastaIterator(file, QualitySequence, fnaTag, _qualityJoinSeq):
+        yield q
+    
+    
+    
--- a/obitools/format/init.py
+++ b/obitools/format/init.py
@ -0,0 +1,28 @@
+from obitools import bioSeqGenerator
+from obitools.utils import universalOpen
+
+
+class SequenceFileIterator:
+    
+    def __init__(self,inputfile,bioseqfactory=bioSeqGenerator):
+        self._inputfile = universalOpen(inputfile)
+        self._bioseqfactory = bioseqfactory
+
+    def get_inputfile(self):
+        return self.__file
+
+
+    def get_bioseqfactory(self):
+        return self.__bioseqfactory
+    
+    def next(self):
+        entry = self.inputfile.next()
+        return self._parse(entry)
+    
+    def __iter__(self):
+        return self
+
+    _inputfile = property(get_inputfile, None, None, "_file's docstring")
+    _bioseqfactory = property(get_bioseqfactory, None, None, "_bioseqfactory's docstring")
+        
+    
--- a/obitools/format/_format.so
+++ b/obitools/format/_format.so
--- a/obitools/format/genericparser/init.py
+++ b/obitools/format/genericparser/init.py
@ -0,0 +1,217 @@
+"""
+G{packagetree format}
+"""
+import re
+
+from obitools.utils import universalOpen
+
+def genericEntryIteratorGenerator(startEntry=None,endEntry=None,
+                                  head=False,tail=False,
+                                  strip=False,join=True):
+    '''
+    Transfome a text line iterator to an entry oriented iterator.
+    
+    This iterator converted is useful to implement first stage
+    of flat file parsing.
+    
+    @param startEntry: a regular pattern matching the beginning of
+                       an entry
+    @type startEntry: C{str} or None
+    @param endEntry:   a regular pattern matching the end of
+                       an entry
+    @type endEntry: C{str} or None
+    @param head:       indicate if an header is present before
+                       the first entry (as in many original genbank
+                       files)
+    @type head: C{bool}
+    @param tail:       indicate if some extra informations are present 
+                       after the last entry.
+    @type tail: C{bool}
+    
+    @return: an iterator on entries in text format
+    @rtype: an iterator on C{str}
+    '''
+                
+    def isBeginning(line):
+        return startEntry is None or startEntry.match(line) is not None
+    
+    def isEnding(line):
+        return ((endEntry is not None and endEntry.match(line) is not None) or
+                (endEntry is None and startEntry is not None and startEntry.match(line) is not None))
+        
+    def transparentIteratorEntry(file):
+        file = universalOpen(file)
+        return file
+    
+    def genericEntryIterator(file):
+        file = universalOpen(file)
+        entry = []
+        line = file.next()
+        started = head or isBeginning(line)
+        
+        try:
+            while 1:
+                while not started:
+                    line = file.next()
+                    started = isBeginning(line)
+                    
+                if endEntry is None:
+                    entry.append(line)
+                    line = file.next()
+                    
+                while started:
+                    end = isEnding(line)
+                    if end:
+                        if endEntry is not None:
+                            entry.append(line)
+                        if join:
+                            e = ''.join(entry)
+                            if strip:
+                                e=e.strip()
+                        else:
+                            e=entry
+                            if strip:
+                                e=[x.strip() for x in e]
+                        entry=[]
+                        yield e
+                        started=False
+                        if endEntry is not None:
+                            line = file.next()
+                    else:
+                        entry.append(line)
+                        line = file.next()
+                        
+                started = isBeginning(line) 
+                       
+        except StopIteration:
+            if entry and (endEntry is None or tail):
+                if join:
+                    e = ''.join(entry)
+                    if strip:
+                        e=e.strip()
+                else:
+                    e=entry
+                    if strip:
+                        e=[x.strip() for x in e]
+                yield e
+
+                            
+                
+    if startEntry is not None:
+        startEntry = re.compile(startEntry)
+    if endEntry is not None:
+        endEntry = re.compile(endEntry)
+        
+    if startEntry is None and endEntry is None:
+        return transparentIteratorEntry
+    
+    return genericEntryIterator
+
+
+class GenericParser(object):
+    
+    def __init__(self,
+                 startEntry=None,
+                 endEntry=None,
+                 head=False,
+                 tail=False,
+                 strip=False,
+                 **parseAction):
+        """
+        @param startEntry: a regular pattern matching the beginning of
+                           an entry
+        @type startEntry: C{str} or None
+        @param endEntry:   a regular pattern matching the end of
+                           an entry
+        @type endEntry: C{str} or None
+        @param head:       indicate if an header is present before
+                           the first entry (as in many original genbank
+                           files)
+        @type head: C{bool}
+        @param tail:       indicate if some extra informations are present 
+                           after the last entry.
+        @type tail: C{bool}
+        
+        @param parseAction:  
+        
+        """
+        self.flatiterator= genericEntryIteratorGenerator(startEntry, 
+                                                         endEntry, 
+                                                         head, 
+                                                         tail,
+                                                         strip)
+        
+        self.action={}
+        
+        for k in parseAction:
+            self.addParseAction(k,*parseAction[k])
+            
+    def addParseAction(self,name,dataMatcher,dataCleaner=None,cleanSub=''):
+        '''
+        Add a parse action to the generic parser. A parse action
+        allows to extract one information from an entry. A parse
+        action is defined by a name and a method to extract this 
+        information from the full text entry.
+        
+        A parse action can be defined following two ways.
+        
+            - via regular expression patterns
+            
+            - via dedicated function.
+            
+        In the first case, you have to indicate at least the
+        dataMatcher regular pattern. This pattern should match exactly
+        the data part you want to retrieve. If cleanning of extra 
+        characters is needed. The second pattern dataCLeanner can be
+        used to specifyed these characters.
+        
+        In the second case you must provide a callable object (function)
+        that extract and clean data from the text entry. This function
+        should return an array containing all data retrevied even if 
+        no data or only one data is retrevied.
+        
+        @summary: Add a parse action to the generic parser.
+        
+        @param name: name of the data extracted
+        @type name:    C{str}
+        @param dataMatcher: a regular pattern matching the data
+                            or a callable object parsing the
+                            entry and returning a list of marched data
+        @type dataMatcher:  C{str} or C{SRE_Pattern} instance or a callable 
+                            object
+        @param dataCleaner: a regular pattern matching part of the data
+                            to suppress.
+        @type dataCleaner: C{str} or C{SRE_Pattern} instance or C{None}
+        @param cleanSub: string used to replace dataCleaner matches.
+                         Default is an empty string
+        @type cleanSub: C{str}
+        
+        '''
+        if callable(dataMatcher):
+            self.action[name]=dataMatcher
+        else :
+            if isinstance(dataMatcher, str):
+                dataMatcher=re.compile(dataMatcher)
+            if isinstance(dataCleaner, str):
+                dataCleaner=re.compile(dataCleaner)
+            self.action[name]=self._buildREParser(dataMatcher,
+                                                 dataCleaner,
+                                                 cleanSub)
+            
+    def _buildREParser(self,dataMatcher,dataCleaner,cleanSub):
+        def parser(data):
+            x = dataMatcher.findall(data)
+            if dataCleaner is not None:
+                x = [dataCleaner.sub(cleanSub,y) for y in x]
+            return x
+        return parser
+    
+    def __call__(self,file):
+        for e in self.flatiterator(file):
+            pe = {'fullentry':e}
+            for k in self.action:
+                pe[k]=self.action[k](e)
+            yield pe
+            
+            
+    
--- a/obitools/format/ontology/init.py
+++ b/obitools/format/ontology/init.py
--- a/obitools/format/ontology/go_obo.py
+++ b/obitools/format/ontology/go_obo.py
@ -0,0 +1,274 @@
+__docformat__ = 'restructuredtext'
+
+import re
+import string
+import textwrap
+
+
+from obitools.obo.go.parser import GOEntryIterator
+from obitools.obo.go.parser import GOTerm
+from obitools.obo.go.parser import GOEntry
+
+"""
+go_obo.py : gene_ontology_edit.obo  file parser:
+----------------------------------------------------
+  
+- OBOFile class: open a flat file and return an entry.
+            
+"""
+class OBOFile(object):
+    """
+        Iterator over all entries of an OBO file
+    """
+    
+    def __init__(self,_path):
+        self.file = GOEntryIterator(_path)
+        
+    def __iter__(self):
+        return self
+
+    def next(self):
+        fiche = self.file.next()
+        
+        if isinstance(fiche, GOTerm):
+            self.isaterm=True
+            return Term(fiche)
+        elif isinstance(fiche, GOEntry):
+            self.isaterm=False
+            return Entry(fiche)
+        else:
+            self.isaterm=False
+            return Header(fiche)
+          
+
+############# tout le reste doit descendre a l'etage obitools/ogo/go/parser.py ##########
+
+# define an XRef into a go_obo.py script in the microbi pylib
+class Xref(object):
+    """
+    Class Xref
+        Xref.db    Xref database
+        Xref.id    Xref identifier
+    """
+
+    def __init__(self,description):
+        data = description.split(':')
+        self.db = data[0].strip()
+        self.id = data[1].strip()
+
+# define a RelatedTerm into a go_obo.py script in the microbi pylib
+class RelatedTerm(object):
+    """
+    Class RelatedTerm
+        RelatedTerm.relation    RelatedTerm relation
+        RelatedTerm.related_term    RelatedTerm GO identifier
+        RelatedTerm.comment    all terms have 0 or 1 comment
+    """
+
+    def __init__(self,relation,value,comment):
+        self.relation = relation
+        self.related_term = value.strip('GO:')
+        self.comment = comment
+        
+
+# define into a go_obo.py script in the microbi pylib
+#class Term(object):
+#    """
+#    class representing an OBO term (entry).
+#    """
+#
+#    def __init__(self):
+#      raise RuntimeError('biodb.go_obo is an abstract class')
+#
+#    def __checkEntry__(self):
+#      minimum=(hasattr(self,'goid') )
+#      if not minimum:
+#        raise AssertionError('Misconstructed GO Term instance %s' % [x for x in dir(self) if x[0]!='_'])
+
+class Term(object):
+    """
+    Class Term
+        representing a GO term.
+    """
+    
+    def __init__(self,data=None):
+        """
+        """
+        self.data=data
+        self.isaterm = True
+        
+        if data:
+            self.__filtreGoid__()
+            self.__filtreName__()
+            self.__filtreComment__()
+            self.__filtreSynonyms__()
+            self.__filtreDef__()
+            self.__filtreParents__()
+            self.__filtreRelationships__()
+            self.__filtreRelation__()
+            self.__filtreObsolete__()
+            self.__filtreAltIds__()
+            self.__filtreXRefs__()
+            self.__filtreSubsets__()
+                
+        # check if all required attributes were valued
+        self.__checkEntry__()
+    
+    
+    def __checkEntry__(self):
+      minimum=(hasattr(self,'goid') )
+      if not minimum:
+        raise AssertionError('Misconstructed GO Term instance %s' % [x for x in dir(self) if x[0]!='_'])
+    
+        
+    def __filtreGoid__(self):
+        """
+        Extract GO id.
+        """
+        self.goid = self.data.id.value.strip('GO:')
+
+    def __filtreName__(self):
+        """
+        Extract GO name.
+        """
+        self.name = self.data.name.value
+
+    def __filtreSynonyms__(self):
+        """
+        Extract GO synonym(s).
+        """
+        self.list_synonyms = {}
+        if self.data.synonyms:
+            for y in self.data.synonyms:
+                self.list_synonyms[y.value] = y.scope
+                
+                
+    def __filtreComment__(self):
+        """
+            manage None comments
+        """
+        if self.data.comment != None:
+            self.comment = self.data.comment.value
+        else:
+            self.comment = ""
+    
+    def __filtreDef__(self):
+        """
+            Extract GO definition.
+        """
+        if self.data.definition != None:
+            self.definition = self.data.definition.value
+        else:
+            self.definition = ""
+                
+    def __filtreParents__(self):
+        """
+            To make the is_a hierarchy
+        """
+        if self.data.is_a != None:
+            self.is_a = set([isa.value.strip('GO:') for isa in self.data.is_a])
+        else:
+            self.is_a = set()    
+
+    def __filtreRelation__(self):
+        """
+            To make the part_of hierarchy
+        """
+        self.part_of = set()
+        self.regulates = set()
+        self.negatively_regulates = set()
+        self.positively_regulates = set()
+        
+        if self.data.relationship != None:
+            for rel in self.data.relationship:
+                if rel.relationship == "part_of":
+                    self.part_of.add(rel.value.strip('GO:'))
+                elif rel.relationship == "regulates":
+                    self.regulates.add(rel.value.strip('GO:'))
+                elif rel.relationship == "negatively_regulates":
+                    self.negatively_regulates.add(rel.value.strip('GO:'))
+                elif rel.relationship == "positively_regulates":
+                    self.positively_regulates.add(rel.value.strip('GO:'))
+        
+        
+    def __filtreRelationships__(self):
+        """
+            Relation list with other GO Terms (is_a, part_of or some regulates relation)
+        """
+        self.related_term =[]
+        if self.data.relationship != None:
+            for x in self.data.relationship:
+                self.related_term.append(RelatedTerm(x.relationship,x.value,x.__doc__))
+                #self.related_term.append(RelatedTerm(x.relationship,x.value,x.comment))
+        if self.data.is_a != None:
+            for x in self.data.is_a:
+                self.related_term.append(RelatedTerm('is_a',x.value,x.__doc__))
+                #self.related_term.append(RelatedTerm('is_a',x.value,x.comment))
+                
+                     
+        
+    def __filtreObsolete__(self):
+        """
+            for each obsolete terms corresponds a set of GO Identifiers
+            so that this GO term is consider as others GO Terms
+        """
+        self.considers = set()
+        self.replaces = set()
+        self.is_obsolete = self.data.is_obsolete
+        if self.data.is_obsolete:
+            if self.data.consider:
+                self.considers = set([considered.value.strip('GO:') for considered in self.data.consider])
+            if self.data.replaced_by:
+                self.replaces = set([replaced.value.strip('GO:') for replaced in self.data.replaced_by])
+        
+                
+    def __filtreAltIds__(self):
+        """
+            alternate(s) id(s) for this term (= alias in the geneontology schema model!)
+        """
+        if self.data.alt_ids:
+            self.alt_ids = set([x.value.strip('GO:') for x in self.data.alt_ids])
+        else:
+            self.alt_ids = set()
+              
+    def __filtreXRefs__(self):
+        """
+            cross references to other databases
+        """
+        self.xrefs = set()
+        if self.data.xrefs:    
+            self.xrefs = set([Xref(x.value.reference) for x in self.data.xrefs])
+        
+            
+    def __filtreSubsets__(self):
+        """
+            subset label to make smaller sets of GO Terms
+        """
+        self.subsets = set()
+        if self.data.subsets:
+            self.subsets = set([x.value for x in self.data.subsets])
+        
+        
+class Entry(object):
+    """
+        a Stanza entry, like [Typedef] for example
+    """
+    def __init__(self,data=None):
+        self.data=data
+        self.isaterm=False
+        self.isanentry=True
+    
+
+class Header(object):
+    """
+        class representing a GO header.
+    """
+         
+    def __init__(self,data=None):
+        """
+        """
+        self.data=data
+        self.isaterm = False
+        
+            
+
--- a/obitools/format/options.py
+++ b/obitools/format/options.py
@ -0,0 +1,284 @@
+'''
+Created on 13 oct. 2009
+
+@author: coissac
+'''
+
+from obitools.format.sequence.embl import emblIterator
+from obitools.format.sequence.genbank import genbankIterator
+from obitools.format.sequence.fnaqual import fnaFastaIterator
+from obitools.format.sequence.fasta import fastaAAIterator,fastaNucIterator,fastaIterator
+from obitools.format.sequence.fastq import fastqIlluminaIterator,fastqSolexaIterator
+from obitools.fastq import fastqSangerIterator
+from obitools.fnaqual.quality import qualityIterator
+from obitools.fasta import formatFasta, rawFastaIterator,\
+    formatSAPFastaGenerator
+from obitools.fastq import formatFastq
+
+from obitools.ecopcr.sequence import EcoPCRDBSequenceWriter
+from obitools.ecopcr.options  import loadTaxonomyDatabase
+
+#from obitools.format._format import printOutput
+
+from array import array
+from itertools import chain
+import sys
+
+import re
+from obitools.ecopcr import EcoPCRFile
+
+
+def addInputFormatOption(optionManager):
+#    optionManager.add_option('--rank',
+#                             action="store_true", dest='addrank',
+#                             default=False,
+#                             help="add a rank attribute to the sequence "
+#                                  "indicating the sequence position in the input data")
+    optionManager.add_option('--genbank',
+                             action="store_const", dest="seqinformat",
+                             default=None,
+                             const='genbank',
+                             help="input file is in genbank format")
+    optionManager.add_option('--embl',
+                             action="store_const", dest="seqinformat",
+                             default=None,
+                             const='embl',
+                             help="input file is in embl format")
+
+    optionManager.add_option('--fasta',
+                             action="store_const", dest="seqinformat",
+                             default=None,
+                             const='fasta',
+                             help="input file is in fasta nucleic format (including obitools fasta extentions)")
+
+    optionManager.add_option('--ecopcr',
+                             action="store_const", dest="seqinformat",
+                             default=None,
+                             const='ecopcr',
+                             help="input file is in fasta nucleic format (including obitools fasta extentions)")
+
+    optionManager.add_option('--raw-fasta',
+                             action="store_const", dest="seqinformat",
+                             default=None,
+                             const='rawfasta',
+                             help="input file is in fasta format (but more tolerant to format variant)")
+
+    optionManager.add_option('--fna',
+                             action="store_const", dest="seqinformat",
+                             default=None,
+                             const='fna',
+                             help="input file is in fasta nucleic format produced by 454 sequencer pipeline")
+
+    optionManager.add_option('--qual',
+                             action="store", dest="withqualfile",
+                             type='str',
+                             default=None,
+                             help="Specify the name of a quality file produced by 454 sequencer pipeline")
+
+    optionManager.add_option('--sanger',
+                             action="store_const", dest="seqinformat",
+                             default=None,
+                             const='sanger',
+                             help="input file is in sanger fastq nucleic format (standard fastq)")
+
+    optionManager.add_option('--solexa',
+                             action="store_const", dest="seqinformat",
+                             default=None,
+                             const='solexa',
+                             help="input file is in fastq nucleic format produced by solexa sequencer")
+
+    optionManager.add_option('--illumina',
+                             action="store_const", dest="seqinformat",
+                             default=None,
+                             const='illumina',
+                             help="input file is in fastq nucleic format produced by old solexa sequencer")
+
+    optionManager.add_option('--nuc',
+                             action="store_const", dest="moltype",
+                             default=None,
+                             const='nuc',
+                             help="input file is nucleic sequences")
+    optionManager.add_option('--prot',
+                             action="store_const", dest="moltype",
+                             default=None,
+                             const='pep',
+                             help="input file is protein sequences")
+        
+
+def addOutputFormatOption(optionManager):
+    optionManager.add_option('--fastq-output',
+                             action="store_const", dest="output",
+                             default=None,
+                             const=formatFastq,
+                             help="output sequences in sanger fastq format")
+    optionManager.add_option('--fasta-output',
+                             action="store_const", dest="output",
+                             default=None,
+                             const=formatFasta,
+                             help="output sequences in obitools fasta format")
+    optionManager.add_option('--sap-output',
+                             action="store_const", dest="output",
+                             default=None,
+                             const=formatSAPFastaGenerator,
+                             help="output sequences in sap fasta format")
+    optionManager.add_option('--strict-sap',
+                             action='store_true',dest='strictsap',
+                             default=False,
+                             help="Print sequences in upper case (defualt is lower case)")
+    optionManager.add_option('--ecopcr-output',
+                             action="store", dest="ecopcroutput",
+                             default=None,
+                             help="output sequences in obitools ecopcr format")
+    optionManager.add_option('--uppercase',
+                             action='store_true',dest='uppercase',
+                             default=False,
+                             help="Print sequences in upper case (defualt is lower case)")
+
+
+
+def addInOutputOption(optionManager):
+    addInputFormatOption(optionManager)
+    addOutputFormatOption(optionManager)
+
+
+
+
+
+def autoEntriesIterator(options):
+    options.outputFormater=formatFasta
+    options.outputFormat="fasta"
+    
+    ecopcr_pattern = re.compile('^[^ ]+ +| +[0-9]+ +| + [0-9]+ + | +')
+    
+    def annotatedIterator(formatIterator):
+        options.outputFormater=formatFasta
+        options.outputFormat="fasta"
+        def iterator(lineiterator):
+            for s in formatIterator(lineiterator):
+                s.extractTaxon()
+                yield s
+
+        return iterator
+
+    def withQualIterator(qualityfile):
+        options.outputFormater=formatFastq
+        options.outputFormat="fastq"
+        def iterator(lineiterator):
+            for s in fnaFastaIterator(lineiterator):
+                q = qualityfile.next()
+                quality = array('d',(10.**(-x/10.) for x in q))
+                s.quality=quality
+                yield s
+                
+        return iterator
+
+    def autoSequenceIterator(lineiterator):
+        options.outputFormater=formatFasta
+        options.outputFormat="fasta"
+        first = lineiterator.next()
+        if first[0]==">":
+            if options.withqualfile is not None:
+                qualfile=qualityIterator(options.withqualfile)
+                reader=withQualIterator(qualfile)
+                options.outputFormater=formatFastq
+                options.outputFormat="fastq"
+            elif options.moltype=='nuc':
+                reader=fastaNucIterator
+            elif options.moltype=='pep':
+                reader=fastaAAIterator
+            else:
+                reader=fastaIterator
+        elif first[0]=='@':
+            reader=fastqSangerIterator
+            options.outputFormater=formatFastq
+            options.outputFormat="fastq"
+        elif first[0:3]=='ID ':
+            reader=emblIterator
+        elif first[0:6]=='LOCUS ':
+            reader=genbankIterator
+        elif first[0]=="#" or ecopcr_pattern.search(first):
+            reader=EcoPCRFile 
+        else:
+            raise AssertionError,'file is not in fasta, fasta, embl, genbank or ecoPCR format'
+        
+        input = reader(chain([first],lineiterator))
+        
+        return input
+                
+    if options.seqinformat is None:
+        reader = autoSequenceIterator
+    else:
+        if options.seqinformat=='fasta':
+            if options.moltype=='nuc':
+                reader=fastaNucIterator
+            elif options.moltype=='pep':
+                reader=fastaAAIterator
+            else:
+                reader=fastaIterator
+        elif options.seqinformat=='rawfasta':
+            reader=annotatedIterator(rawFastaIterator)
+        elif options.seqinformat=='genbank':
+            reader=annotatedIterator(genbankIterator)
+        elif options.seqinformat=='embl':
+            reader=annotatedIterator(emblIterator)
+        elif options.seqinformat=='fna':
+            reader=fnaFastaIterator
+        elif options.seqinformat=='sanger':
+            options.outputFormater=formatFastq
+            options.outputFormat="fastq"
+            reader=fastqSangerIterator
+        elif options.seqinformat=='solexa':
+            options.outputFormater=formatFastq
+            options.outputFormat="fastq"
+            reader=fastqSolexaIterator
+        elif options.seqinformat=='illumina':
+            options.outputFormater=formatFastq
+            options.outputFormat="fastq"
+            reader=fastqIlluminaIterator
+        elif options.seqinformat=='ecopcr':
+            reader=EcoPCRFile
+            
+        if options.seqinformat=='fna' and options.withqualfile is not None:
+            qualfile=qualityIterator(options.withqualfile)
+            reader=withQualIterator(qualfile)
+            options.outputFormater=formatFastq
+            options.outputFormat="fastq"
+            
+#    if options.addrank:
+#        reader = withRankIterator(reader)
+    return reader
+
+def sequenceWriterGenerator(options,output=sys.stdout):
+    class SequenceWriter:
+        def __init__(self,options,file=sys.stdout):
+            self._format=None
+            self._file=file
+            self._upper=options.uppercase
+        def put(self,seq):
+            if self._format is None:
+                self._format=formatFasta
+                if options.output is not None:
+                    self._format=options.output
+                    if self._format is formatSAPFastaGenerator:
+                        self._format=formatSAPFastaGenerator(options)
+                elif options.outputFormater is not None:
+                    self._format=options.outputFormater
+            s = self._format(seq,upper=self._upper)
+            try:
+                self._file.write(s)
+                self._file.write("\n")
+            except IOError:
+                sys.exit(0)
+                
+    if options.ecopcroutput is not None:
+        taxo = loadTaxonomyDatabase(options)
+        writer=EcoPCRDBSequenceWriter(options.ecopcroutput,taxonomy=taxo)
+    else:
+        writer=SequenceWriter(options,output)
+        
+    def sequenceWriter(sequence):
+        writer.put(sequence)
+
+    return sequenceWriter
+    
+    
--- a/obitools/format/sequence/init.py
+++ b/obitools/format/sequence/init.py
@ -0,0 +1,24 @@
+from obitools.fasta import fastaIterator
+from obitools.fastq import fastqSangerIterator
+from obitools.seqdb.embl.parser import emblIterator
+from obitools.seqdb.genbank.parser import genbankIterator
+from itertools import chain
+from obitools.utils import universalOpen
+
+def autoSequenceIterator(file):
+    lineiterator = universalOpen(file)
+    first = lineiterator.next()
+    if first[0]==">":
+            reader=fastaIterator
+    elif first[0]=='@':
+        reader=fastqSangerIterator
+    elif first[0:3]=='ID ':
+        reader=emblIterator
+    elif first[0:6]=='LOCUS ':
+        reader=genbankIterator
+    else:
+        raise AssertionError,'file is not in fasta, fasta, embl, or genbank format'
+    
+    input = reader(chain([first],lineiterator))
+    
+    return input
--- a/obitools/format/sequence/embl.py
+++ b/obitools/format/sequence/embl.py
@ -0,0 +1,2 @@
+from obitools.seqdb.embl.parser import emblIterator,emblParser
+
--- a/obitools/format/sequence/fasta.py
+++ b/obitools/format/sequence/fasta.py
@ -0,0 +1,4 @@
+from obitools.fasta import fastaIterator,fastaParser
+from obitools.fasta import fastaAAIterator,fastaAAParser
+from obitools.fasta import fastaNucIterator,fastaNucParser
+from obitools.fasta import formatFasta
--- a/obitools/format/sequence/fastq.py
+++ b/obitools/format/sequence/fastq.py
@ -0,0 +1,13 @@
+'''
+Created on 15 janv. 2010
+
+@author: coissac
+'''
+
+from obitools.fastq import fastqIterator,fastqParserGenetator
+from obitools.fastq import fastqSangerIterator,fastqSolexaIterator, \
+                           fastqIlluminaIterator
+from obitools.fastq import fastqAAIterator
+from obitools.fastq import formatFastq
+
+
--- a/obitools/format/sequence/fnaqual.py
+++ b/obitools/format/sequence/fnaqual.py
@ -0,0 +1,8 @@
+'''
+Created on 12 oct. 2009
+
+@author: coissac
+'''
+
+from obitools.fnaqual.fasta import fnaFastaIterator
+from obitools.fnaqual.quality import qualityIterator
--- a/obitools/format/sequence/genbank.py
+++ b/obitools/format/sequence/genbank.py
@ -0,0 +1,4 @@
+from obitools.seqdb.genbank.parser import genpepIterator,genpepParser
+from obitools.seqdb.genbank.parser import genbankIterator,genbankParser
+
+
--- a/obitools/format/sequence/tagmatcher.py
+++ b/obitools/format/sequence/tagmatcher.py
@ -0,0 +1,5 @@
+from obitools.tagmatcher.parser import tagMatcherParser
+from obitools.tagmatcher.parser import TagMatcherIterator
+from obitools.tagmatcher.parser import formatTagMatcher
+
+tagMatcherIterator=TagMatcherIterator
--- a/obitools/goa/init.py
+++ b/obitools/goa/init.py
--- a/obitools/goa/parser.py
+++ b/obitools/goa/parser.py
@ -0,0 +1,33 @@
+from itertools import imap
+from obitools import utils
+
+class GoAFileIterator(utils.ColumnFile):
+    def __init__(self,stream):
+        utils.ColumnFile.__init__(self,
+                                  stream, '\t', True, 
+                                  (str,))
+
+    _colname = ['database',
+                'ac',
+                'symbol',
+                'qualifier',
+                'goid',
+                'origin',
+                'evidence',
+                'evidnce_origine',
+                'namespace',
+                'db_object_name',
+                'gene',
+                'object_type',
+                'taxid',
+                'date',
+                'assigned_by']
+
+    def next(self):
+        data = utils.ColumnFile.next(self)
+        data = dict(imap(None,GoAFileIterator._colname,data))
+                 
+        return data
+    
+        
+        
--- a/obitools/graph/init.py
+++ b/obitools/graph/init.py
@ -0,0 +1,962 @@
+'''
+**obitool.graph** for representing graph structure in obitools
+--------------------------------------------------------------
+
+.. codeauthor:: Eric Coissac <eric.coissac@metabarcoding.org>
+
+
+This module offert classes to manipulate graphs, mainly trough the
+:py:class:`obitools.graph.Graph` class.
+
+.. inheritance-diagram:: Graph DiGraph UndirectedGraph
+    :parts: 2
+
+'''
+
+import sys
+
+
+from obitools.utils import progressBar
+
+
+class Indexer(dict):
+    '''
+    Allow to manage convertion between an arbitrarly hashable python
+    value and an unique integer key 
+    '''
+    
+    def __init__(self):
+
+        self.__max=0
+        self.__reverse=[]
+        
+    def getLabel(self,index):
+        '''
+        Return the python value associated to an integer index.
+        
+        :param index: an index value
+        :type index: int
+        
+        :raises: IndexError if the index is not used in this 
+                           Indexer instance
+        '''
+        return self.__reverse[index]
+    
+    def getIndex(self,key,strict=False):
+        '''
+        Return the index associated to a **key** in the indexer. Two
+        modes are available :
+        
+            - strict mode  :
+                
+                if the key is not known by the :py:class:`Indexer` instance
+                a :py:exc:`KeyError` exception is raised.
+                
+            - non strict mode :
+            
+                in this mode if the requested *key** is absent, it is added to
+                the :py:class:`Indexer` instance and the new index is returned
+                                
+        :param key: the requested key
+        :type key: a hashable python value
+        
+        :param strict: select the looking for mode
+        :type strict: bool
+        
+        :return: the index corresponding to the key
+        :rtype: int
+        
+        :raises: - :py:exc:`KeyError` in strict mode is key is absent 
+                   of the :py:class:`Indexer` instance
+                   
+                 - :py:exc:`TypeError` if key is not an hashable value.
+        '''
+        if dict.__contains__(self,key):
+            return dict.__getitem__(self,key)
+        elif strict:
+            raise KeyError,key
+        else:
+            value = self.__max
+            self[key]= value
+            self.__reverse.append(key)
+            self.__max+=1
+            return value
+            
+    def __getitem__(self,key):
+        '''
+        Implement the [] operateor to emulate the standard dictionnary
+        behaviour on :py:class:`Indexer` and returns the integer key 
+        associated to a python value.
+        
+        Actually this method call the:py:meth:`getIndex` method in
+        non strict mode so it only raises an :py:exc:`TypeError` 
+        if key is not an hashable value.
+        
+        :param key: the value to index
+        :type key: an hashable python value
+        
+        :return: an unique integer value associated to the key
+        :rtype: int
+        
+        :raises: :py:exc:`TypeError` if **key** is not an hashable value.
+        
+        '''
+        return self.getIndex(key)
+        
+    def __equal__(self,index):
+        '''
+        Implement equal  operator **==** for comparing two :py:class:`Indexer` instances. 
+        Two :py:class:`Indexer` instances are equals only if they are physically 
+        the same instance
+        
+        :param index: the second Indexer
+        :type index: an :py:class:`Indexer` instance
+        
+        :return: True is the two :py:class:`Indexer` instances are the same
+        :rtype: bool
+        '''
+        return id(self)==id(index)
+        
+        
+class Graph(object):
+    '''
+    Class used to represent directed or undirected graph.
+    
+    .. warning::
+    
+        Only one edge can connect two nodes in a given direction.
+        
+    .. warning::
+        
+        Specifying nodes through their index seepud your code but as no check
+        is done on index value, it may result in inconsistency. So prefer the
+        use of node label to specify a node.
+    
+    
+    '''
+    def __init__(self,label='G',directed=False,indexer=None,nodes=None,edges=None):
+        '''
+        :param label: Graph name, set to 'G' by default
+        :type label: str
+        
+        :param directed: true for directed graph, set to False by defalt
+        :type directed: boolean
+        
+        :param indexer: node label indexer. This allows to define several graphs
+                        sharing the same indexer (see : :py:meth:`newEmpty`)
+        :type indexer: :py:class:`Indexer` 
+        
+        :param nodes: set of nodes to add to the graph
+        :type nodes: iterable value
+        
+        :param edges: set of edges to add to the graph
+        :type edges: iterable value
+        '''
+        
+        self._directed=directed
+        if indexer is None:
+            indexer = Indexer()
+        self._index = indexer
+        self._node = {}
+        self._node_attrs = {} 
+        self._edge_attrs = {}
+        self._label=label
+        
+    def newEmpty(self):
+        """
+        Build a new empty graph using the same :py:class:`Indexer` instance.
+        This allows two graph for sharing their vertices through their indices.
+        """
+        n = Graph(self._label+"_compact",self._directed,self._index)
+        
+        return n
+        
+    def addNode(self,node=None,index=None,**data):
+        '''
+        Add a new node or update an existing one.
+        
+        :param node: the new node label or the label of an existing node
+                     for updating it.
+        :type node:  an hashable python value
+        
+        :param index: the index of an existing node for updating it.
+        :type index: int
+        
+        :return: the index of the node
+        :rtype: int
+        
+        :raises: :py:exc:`IndexError` is index is not **None** and 
+                 corresponds to a not used index in this graph.
+        '''
+        if index is None:
+            index = self._index[node]
+
+            if index not in self._node:
+                self._node[index]=set()
+        else:
+            if index not in self._node:
+                raise IndexError,"This index is not used in this graph"
+
+        if data:
+            if index in self._node_attrs:
+                self._node_attrs[index].update(data)
+            else:
+                self._node_attrs[index]=dict(data)
+                
+        return index
+    
+    def __contains__(self,node):
+        try:
+            index = self._index.getIndex(node,strict=True)
+            r = index in self._node
+        except KeyError:
+            r=False
+        return r
+    
+    def getNode(self,node=None,index=None):
+        """
+        :param node: a node label.
+        :type node:  an hashable python value
+        
+        :param index: the index of an existing node.
+        :type index: int
+        
+        .. note:: Index value are prevalent over node label.
+        
+        :return: the looked for node
+        :rtype: :py:class:`Node`
+        
+        :raises: :py:exc:`IndexError` if specified node lablel
+                 corresponds to a non-existing node.
+                 
+        .. warning:: no check on index value
+        """
+        if index is None:
+            index = self._index.getIndex(node, True)
+        return Node(index,self)
+    
+    def getBestNode(self,estimator):
+        '''
+        Select the node maximizing the estimator function
+
+        :param estimator: the function to maximize
+        :type estimator: a function returning a numerical value and accepting one
+                         argument of type :py:class:`Node`
+                         
+        :return: the best node
+        :rtype: py:class:`Node`
+        '''
+
+        bestScore=0
+        best=None
+        for n in self:
+            score = estimator(n)
+            if best is None or score > bestScore:
+                bestScore = score
+                best=n
+        return best
+            
+    
+    def delNode(self,node=None,index=None):
+        """
+        Delete a node from a graph and all associated edges.
+        
+        :param node: a node label.
+        :type node:  an hashable python value
+        
+        :param index: the index of an existing node.
+        :type index: int
+        
+        .. note:: Index value are prevalent over node label.
+        
+        :raises: :py:exc:`IndexError` if specified node lablel
+                 corresponds to a non-existing node.
+                 
+        .. warning:: no check on index value
+        """
+        if index is None:
+            index = self._index[node]
+            
+        for n in self._node:
+            if n!=index:
+                e = self._node[n]
+                if index in e:
+                    if (n,index) in self._edge_attrs:
+                        del self._edge_attrs[(n,index)]
+                    e.remove(index)
+                    
+        e = self._node[index]
+        
+        for n in e:
+            if (index,n) in self._edge_attrs:
+                del self._edge_attrs[(index,n)]
+                
+        del self._node[index]
+        if index in self._node_attrs:
+            del self._node_attrs[index]
+                
+                    
+    def addEdge(self,node1=None,node2=None,index1=None,index2=None,**data):
+        '''
+        Create a new edge in the graph between both the specified nodes.
+        
+        .. note:: Nodes can be specified using their label or their index in the graph
+        if both values are indicated the index is used.
+        
+        :param node1: The first vertex label
+        :type node1:  an hashable python value
+        :param node2: The second vertex label
+        :type node2:  an hashable python value
+        :param index1: The first vertex index
+        :type index1:  int
+        :param index2: The second vertex index
+        :type index2:  int
+
+        :raises: :py:exc:`IndexError` if one of both the specified node lablel
+                 corresponds to a non-existing node.
+                 
+
+        .. warning:: no check on index value
+        '''
+        
+        index1=self.addNode(node1, index1)
+        index2=self.addNode(node2, index2)
+        
+        self._node[index1].add(index2)
+        
+        if not self._directed:
+            self._node[index2].add(index1)
+        
+        if data:
+            if (index1,index2) not in self._edge_attrs: 
+                data =dict(data) 
+                self._edge_attrs[(index1,index2)]=data   
+                if not self._directed:
+                    self._edge_attrs[(index2,index1)]=data
+            else:
+                self._edge_attrs[(index2,index1)].update(data)
+                
+        return (index1,index2)
+    
+    def getEdge(self,node1=None,node2=None,index1=None,index2=None):
+        '''
+        Extract the :py:class:`Edge` instance linking two nodes of the graph. 
+        
+        .. note:: Nodes can be specified using their label or their index in the graph
+        if both values are indicated the index is used.
+        
+        :param node1: The first vertex label
+        :type node1:  an hashable python value
+        :param node2: The second vertex label
+        :type node2:  an hashable python value
+        :param index1: The first vertex index
+        :type index1:  int
+        :param index2: The second vertex index
+        :type index2:  int
+
+        :raises: :py:exc:`IndexError` if one of both the specified node lablel
+                 corresponds to a non-existing node.
+                 
+
+        .. warning:: no check on index value
+        '''
+        node1=self.getNode(node1, index1)
+        node2=self.getNode(node2, index2)
+        return Edge(node1,node2)
+    
+    def delEdge(self,node1=None,node2=None,index1=None,index2=None):
+        """
+        Delete the edge linking node 1 to node 2.
+        
+        .. note:: Nodes can be specified using their label or their index in the graph
+        if both values are indicated the index is used.
+        
+        
+        :param node1: The first vertex label
+        :type node1:  an hashable python value
+        :param node2: The second vertex label
+        :type node2:  an hashable python value
+        :param index1: The first vertex index
+        :type index1:  int
+        :param index2: The second vertex index
+        :type index2:  int
+
+        :raises: :py:exc:`IndexError` if one of both the specified node lablel
+                 corresponds to a non-existing node.
+                 
+
+        .. warning:: no check on index value
+        """
+        if index1 is None:
+            index1 = self._index[node1]
+        if index2 is None:
+            index2 = self._index[node2]
+        if index1 in self._node and index2 in self._node[index1]:
+            self._node[index1].remove(index2)
+            if (index1,index2) in self._node_attrs:
+                del self._node_attrs[(index1,index2)]
+            if not self._directed:
+                self._node[index2].remove(index1)
+                if (index2,index1) in self._node_attrs:
+                    del self._node_attrs[(index2,index1)]
+                    
+    def edgeIterator(self,predicate=None):
+        """
+        Iterate through a set of selected vertices.
+        
+        :param predicate: a function allowing node selection. Default value
+                          is **None** and indicate that all nodes are selected.
+        :type predicate:  a function returning a boolean value
+                          and accepting one argument of class :py:class:`Edge`
+                          
+        :return: an iterator over selected edge 
+        :rtype: interator over :py:class:`Edge` instances
+        
+        .. seealso::
+            function :py:func:`selectEdgeAttributeFactory` for simple predicate.
+            
+        """
+        for n1 in self._node:
+            for n2 in self._node[n1]:
+                if self._directed or n1 <= n2:
+                    e = self.getEdge(index1=n1, index2=n2) 
+                    if predicate is None or predicate(e):
+                        yield e
+                    
+        
+    def nodeIterator(self,predicate=None):
+        """
+        Iterate through a set of selected vertices.
+        
+        :param predicate: a function allowing edge selection. Default value
+                          is **None** and indicate that all edges are selected.
+        :type predicate:  a function returning a boolean value
+                          and accepting one argument of class :py:class:`Node`
+                          
+        :return: an iterator over selected nodes. 
+        :rtype: interator over :py:class:`Node` instances
+        
+        """
+        for n in self._node:
+            node = self.getNode(index=n)
+            if predicate is None or predicate(node):
+                yield node
+            
+    def nodeIndexIterator(self,predicate=None):
+        """
+        Iterate through the indexes of a set of selected vertices.
+        
+        :param predicate: a function allowing edge selection. Default value
+                          is **None** and indicate that all edges are selected.
+        :type predicate:  a function returning a boolean value
+                          and accepting one argument of class :py:class:`Node`
+                          
+        :return: an iterator over selected node indices. 
+        :rtype: interator over `int`
+        
+        """
+        for n in self._node:
+            node = self.getNode(index=n)
+            if predicate is None or predicate(node):
+                yield n
+            
+    def neighbourIndexSet(self,node=None,index=None):
+        if index is None:
+            index=self.getNode(node).index
+        return self._node[index]
+    
+    def edgeCount(self):
+        n = reduce(lambda x,y:x+y, (len(z) for z in self._node.itervalues()),0)
+        if not self._directed:
+            n=n/2
+        return n
+               
+    def subgraph(self,nodes,name='G'):
+        sub = Graph(name,self._directed,self._index)
+        if not isinstance(nodes, set):
+            nodes = set(nodes)
+        for n in nodes:
+            sub._node[n]=nodes & self._node[n]
+            if n in self._node_attrs:
+                sub._node_attrs[n]=dict(self._node_attrs[n])
+            for n2 in sub._node[n]:
+                if not self._directed:
+                    if n <= n2:
+                        if (n,n2) in self._edge_attrs:
+                            data=dict(self._edge_attrs[(n,n2)])
+                            sub._edge_attrs[(n,n2)]=data
+                            sub._edge_attrs[(n2,n)]=data
+                else:
+                    if (n,n2) in self._edge_attrs:
+                        data=dict(self._edge_attrs[(n,n2)])
+                        sub._edge_attrs[(n,n2)]=data
+        return sub
+        
+    def __len__(self):
+        return len(self._node)
+    
+    def __getitem__(self,key):
+        return self.getNode(node=key)
+    
+    def __delitem__(self,key):
+        self.delNode(node=key)
+    
+    def __iter__(self):
+        return self.nodeIterator()
+    
+    def __str__(self):
+        if self._directed:
+            kw ='digraph'
+        else:
+            kw='graph'
+            
+        nodes = "\n    ".join([str(x) for x in self])
+        edges = "\n    ".join([str(x) for x in self.edgeIterator()])
+        
+        return "%s %s {\n    %s\n\n    %s\n}" % (kw,self._label,nodes,edges)
+        
+class Node(object):
+    """
+    Class used for representing one node or vertex in a graph
+    
+    """
+    def __init__(self,index,graph):
+        '''        
+        .. warning::
+            
+            :py:class:`Node` constructor is usualy called through the :py:class:`Graph` methods
+        
+        :param index: Index of the node in the graph
+        :type index:  int
+        :param graph: graph instance owning the node
+        :type graph:  :py:class:`obitools.graph.Graph`
+        '''
+        self.index = index
+        self.__graph = graph
+
+    def getGraph(self):
+        '''
+        return graph owning this node.
+        
+        :rtype: :py:class:`obitools.graph.Graph`
+        '''
+        return self.__graph
+
+
+    def getLabel(self):
+        '''
+        return label associated to this node.
+        '''
+        return self.__graph._index.getLabel(self.index)
+
+        
+    def has_key(self,key):
+        '''
+        test is the node instance has a property named 'key'.
+        
+        :param key: the name of a property
+        :type key: str
+        
+        :return: True if the nade has a property named <key>
+        :rtype: bool
+        '''
+        if self.index in self.__graph._node_attrs:
+            return key in self.__graph._node_attrs[self.index]
+        else:
+            return False
+        
+    def neighbourIterator(self,nodePredicat=None,edgePredicat=None):
+        '''
+        iterate through the nodes directly connected to
+        this node.
+        
+        :param nodePredicat: a function accepting one node as parameter
+                         and returning **True** if this node must be
+                         returned by the iterator.
+        :type nodePredicat: function
+        
+        :param edgePredicat: a function accepting one edge as parameter
+                         and returning True if the edge linking self and
+                         the current must be considered.
+        :type edgePredicat: function
+        
+        
+        :rtype: iterator on Node instances
+        '''
+        for n in self.neighbourIndexIterator(nodePredicat, edgePredicat):
+            node = self.graph.getNode(index=n)
+            yield node
+            
+    def neighbourIndexSet(self):
+        '''
+        Return a set of node indexes directely connected
+        to this node.
+        
+        .. warning:: 
+        
+                do not change this set unless you know
+                exactly what you do.
+                    
+        @rtype: set of int
+        '''
+        return self.__graph._node[self.index]
+
+    def neighbourIndexIterator(self,nodePredicat=None,edgePredicat=None):
+        '''
+        iterate through the node indexes directly connected to
+        this node.
+        
+        :param nodePredicat: a function accepting one node as parameter
+                         and returning True if this node must be
+                         returned by the iterator.
+        :type nodePredicat: function
+        
+        :param edgePredicat: a function accepting one edge as parameter
+                         and returning True if the edge linking self and
+                         the current must be considered.
+        :type edgePredicat: function
+        
+        :rtype: iterator on int
+        '''
+        for n in self.neighbourIndexSet():
+            if nodePredicat is None or nodePredicat(self.__graph.getNode(index=n)):
+                if edgePredicat is None or edgePredicat(self.__graph.getEdge(index1=self.index,index2=n)):
+                    yield n
+
+    def degree(self,nodeIndexes=None):
+        '''
+        return count of edges linking this node to the
+        set of nodes describes by their index in  nodeIndexes
+        
+        :param nodeIndexes: set of node indexes. 
+                            if set to None, all nodes of the
+                            graph are take into account.
+                            Set to None by default.
+        :type nodeIndexes:  set of int
+        
+        :rtype: int
+        '''
+        if nodeIndexes is None:
+            return len(self.__graph._node[self.index])
+        else:
+            return len(self.__graph._node[self.index] & nodeIndexes)
+        
+    def componentIndexSet(self,nodePredicat=None,edgePredicat=None):
+        '''
+        Return the set of node index in the same connected component.
+        
+        :param nodePredicat: a function accepting one node as parameter
+                         and returning True if this node must be
+                         returned by the iterator.
+        :type nodePredicat: function
+        
+        :param edgePredicat: a function accepting one edge as parameter
+                         and returning True if the edge linking self and
+                         the current must be considered.
+        :type edgePredicat: function
+        
+
+        :rtype: set of int
+        '''
+        cc=set([self.index])
+        added = set(x for x in self.neighbourIndexIterator(nodePredicat, edgePredicat))
+        while added:
+            cc |= added
+            added = reduce(lambda x,y : x | y,
+                           (set(z for z in self.graph.getNode(index=c).neighbourIndexIterator(nodePredicat, edgePredicat)) 
+                                for c in added),
+                           set())
+            added -= cc
+        return cc
+        
+    def componentIterator(self,nodePredicat=None,edgePredicat=None):
+        '''
+        Iterate through the nodes in the same connected
+        component.
+        
+        :rtype: iterator on :py:class:`Node` instance
+        '''
+        for c in self.componentIndexSet(nodePredicat, edgePredicat):
+            yield self.graph.getNode(c)
+        
+    def shortestPathIterator(self,nodes=None):
+        '''
+        Iterate through the shortest path sourcing
+        from this node. if nodes is not None, iterates
+        only path linkink this node to one node listed in
+        nodes
+        
+        :param nodes: set of node index
+        :type nodes: iterable on int
+        
+        :return: an iterator on list of int describing path
+        :rtype: iterator on list of int
+        '''
+        if nodes is not None:
+            nodes = set(nodes)
+        
+        
+        Q=[(self.index,-1)]
+        
+        gray = set([self.index])
+        paths = {}
+        
+        while Q and (nodes is None or nodes):
+            u,p = Q.pop()
+            paths[u]=p
+            next = self.graph._node[u] - gray
+            gray|=next
+            Q.extend((x,u) for x in next)
+            if nodes is None or u in nodes:
+                if nodes:
+                    nodes.remove(u)
+                path = [u]
+                while p >= 0:
+                    path.append(p)
+                    p = paths[p]
+                path.reverse()
+                yield path
+            
+    def shortestPathTo(self,node=None,index=None):
+        '''
+        return one of the shortest path linking this
+        node to specified node.
+        
+        :param node: a node label or None
+        :param index: a node index or None. the parameter index  
+                     has a priority on the parameter node.
+        :type index: int
+        
+        :return: list of node index corresponding to the path or None
+                 if no path exists.
+        :rtype: list of int or None 
+        '''
+        if index is None:
+            index=self.graph.getNode(node).index
+        for p in self.shortestPathIterator([index]):
+            return p
+
+        
+    def __getitem__(self,key):
+        '''
+        return the value of the <key> property of this node
+        
+        :param key: the name of a property
+        :type key: str
+        '''
+        return self.__graph._node_attrs.get(self.index,{})[key]
+    
+    def __setitem__(self,key,value):
+        '''
+        set the value of a node property. In the property doesn't
+        already exist a new property is added to this node.
+        
+        :param key: the name of a property
+        :type key: str
+        :param value: the value of the property
+        
+        .. seealso:: 
+        
+            :py:meth:`Node.__getitem__`
+        '''
+        if self.index in self.__graph._node_attrs:
+            data = self.__graph._node_attrs[self.index]
+            data[key]=value
+        else:
+            self.graph._node_attrs[self.index]={key:value}
+            
+    def __len__(self):
+        '''
+        Count neighbour of this node
+        
+        :rtype: int
+        
+        .. seealso::  
+        
+            :py:meth:`Node.degree`
+        '''
+        return len(self.__graph._node[self.index])
+    
+    def __iter__(self):
+        '''
+        iterate through neighbour of this node
+        
+        :rtype: iterator in :py:class:`Node` instances
+        
+        .. seealso::
+         
+            :py:meth:`Node.neighbourIterator`
+        '''
+        return self.neighbourIterator()
+        
+    def __contains__(self,key):
+        return self.has_key(key)
+        
+    def __str__(self):
+        
+        if self.index in self.__graph._node_attrs:
+            keys = " ".join(['%s="%s"' % (x[0],str(x[1]).replace('"','\\"').replace('\n','\\n'))
+                              for x in self.__graph._node_attrs[self.index].iteritems()]
+                           )
+        else:
+            keys=''
+            
+        return '%d  [label="%s" %s]' % (self.index,
+                                        str(self.label).replace('"','\\"').replace('\n','\\n'),
+                                        keys)      
+        
+    def keys(self):
+        if self.index in self.__graph._node_attrs:
+            k = self.__graph._node_attrs[self.index].keys()
+        else:
+            k=[]
+        return k
+    
+    label = property(getLabel, None, None, "Label of the node")
+
+    graph = property(getGraph, None, None, "Graph owning this node")
+            
+        
+    
+class Edge(object):
+    """
+    Class used for representing one edge of a graph
+    
+    """
+
+    def __init__(self,node1,node2):
+        '''
+        .. warning::
+            
+            :py:class:`Edge` constructor is usualy called through the :py:class:`Graph` methods
+            
+        :param node1: First node likend by the edge
+        :type node1:  :py:class:`Node`
+        :param node2: Seconde node likend by the edge
+        :type node2:  :py:class:`Node`
+        '''
+        self.node1 = node1
+        self.node2 = node2
+
+    def getGraph(self):
+        """
+        Return the :py:class:`Graph` instance owning this edge.
+        """
+        return self.node1.graph
+
+    def has_key(self,key):
+        '''
+        test is the :py:class:`Edge` instance has a property named **key**.
+        
+        :param key: the name of a property
+        :type key: str
+        
+        :return: True if the edge has a property named <key>
+        :rtype: bool
+        '''
+        if (self.node1.index,self.node2.index) in self.graph._edge_attrs:
+            return key in self.graph._edge_attrs[(self.node1.index,self.node2.index)]
+        else:
+            return False
+
+
+    def getDirected(self):
+        return self.node1.graph._directed
+
+    def __getitem__(self,key):
+        return self.graph._edge_attrs.get((self.node1.index,self.node2.index),{})[key]
+    
+    def __setitem__(self,key,value):
+        e = (self.node1.index,self.node2.index)
+        if e in self.graph._edge_attrs:
+            data = self.graph._edge_attrs[e]
+            data[key]=value
+        else:
+            self.graph._edge_attrs[e]={key:value}
+
+    def __str__(self):
+        e = (self.node1.index,self.node2.index)
+        if e in self.graph._edge_attrs:
+            keys = "[%s]" % " ".join(['%s="%s"' % (x[0],str(x[1]).replace('"','\\"'))
+                                      for x in self.graph._edge_attrs[e].iteritems()]
+                                    )
+        else:
+            keys = ""
+                           
+        if self.directed:
+            link='->'
+        else:
+            link='--'
+            
+        return "%d %s %d %s" % (self.node1.index,link,self.node2.index,keys) 
+ 
+    def __contains__(self,key):
+        return self.has_key(key)
+       
+
+    graph = property(getGraph, None, None, "Graph owning this edge")
+
+    directed = property(getDirected, None, None, "Directed's Docstring")
+    
+        
+class DiGraph(Graph):
+    """
+    :py:class:`DiGraph class`is a specialisation of the :py:class:`Graph` class
+    dedicated to directed graph representation
+    
+    .. seealso::
+        
+        :py:class:`UndirectedGraph`
+        
+    """
+    def __init__(self,label='G',indexer=None,nodes=None,edges=None):
+        '''
+        :param label: Graph name, set to 'G' by default
+        :type label: str
+        :param indexer: node label indexer
+        :type indexer: Indexer instance
+        :param nodes: set of nodes to add to the graph
+        :type nodes: iterable value
+        :param edges: set of edges to add to the graph
+        :type edges: iterable value
+        '''
+        
+        Graph.__init__(self, label, True, indexer, nodes, edges)
+        
+class UndirectedGraph(Graph):
+    """
+    :py:class:`UndirectGraph class`is a specialisation of the :py:class:`Graph` class
+    dedicated to undirected graph representation
+    
+    .. seealso::
+        
+        :py:class:`DiGraph`
+        
+    """
+    def __init__(self,label='G',indexer=None,nodes=None,edges=None):
+        '''
+        :param label: Graph name, set to 'G' by default
+        :type label: str
+        :param indexer: node label indexer
+        :type indexer: Indexer instance
+        :param nodes: set of nodes to add to the graph
+        :type nodes: iterable value
+        :param edges: set of edges to add to the graph
+        :type edges: iterable value
+        '''
+        
+        Graph.__init__(self, label, False, indexer, nodes, edges)
+        
+   
+        
+def selectEdgeAttributeFactory(attribut,value):
+    """
+    This function help in building predicat function usable for selecting edge
+    in the folowing :py:class:`Graph` methods :
+    
+        - :py:meth:`Graph.edgeIterator`
+        
+    """
+    def selectEdge(e):
+        return attribut in e and e[attribut]==value
+    return selectEdge   
--- a/obitools/graph/init.pyc
+++ b/obitools/graph/init.pyc
--- a/obitools/graph/algorithms/init.py
+++ b/obitools/graph/algorithms/init.py
--- a/obitools/graph/algorithms/init.pyc
+++ b/obitools/graph/algorithms/init.pyc
--- a/obitools/graph/algorithms/clique.py
+++ b/obitools/graph/algorithms/clique.py
@ -0,0 +1,134 @@
+import time
+import sys
+
+
+
+_maxsize=0
+_solution=0
+_notbound=0
+_sizebound=0
+_lastyield=0
+_maxclique=None
+
+def cliqueIterator(graph,minsize=1,node=None,timeout=None):
+    global _maxsize,_solution,_notbound,_sizebound,_lastyield
+    _maxsize=0
+    _solution=0
+    _notbound=0
+    _sizebound=0
+    starttime = time.time()  
+    
+    if node:
+        node = graph.getNode(node)
+        index = node.index
+        clique= set([index])
+        candidates= set(graph.neighbourIndexSet(index=index))
+    else:
+        clique=set()
+        candidates = set(x.index for x in graph)
+        
+    
+#    candidates = set(x for x in candidates
+#                     if len(graph.neighbourIndexSet(index=x) & candidates) >= (minsize - 1))
+            
+    _lastyield=time.time()  
+    for c in _cliqueIterator(graph,clique,candidates,set(),minsize,start=starttime,timeout=timeout):
+        yield c
+
+
+
+
+        
+def _cliqueIterator(graph,clique,candidates,notlist,minsize=0,start=None,timeout=None):
+    global _maxsize,_maxclique,_solution,_notbound,_sizebound,_lastyield
+
+                            # Speed indicator
+    lclique     = len(clique)
+    lcandidates = len(candidates)
+    notmin = lcandidates
+    notfix = None
+    
+    for n in notlist:
+        nnc = candidates - graph.neighbourIndexSet(index=n) 
+        nc = len(nnc)
+        if nc < notmin:
+            notmin=nc
+            notfix=n
+            notfixneib = nnc
+                
+    if lclique > _maxsize or not _solution % 1000 :   
+        if start is not None:
+            top   = time.time()
+            delta = top - start
+            if delta==0:
+                delta=1e-6
+            speed = _solution / delta
+            start = top
+        else:
+            speed = 0
+        print >>sys.stderr,"\rCandidates : %-5d Maximum clique size : %-5d Solutions explored : %10d   speed = %5.2f solutions/sec  sizebound=%10d notbound=%10d          " % (lcandidates,_maxsize,_solution,speed,_sizebound,_notbound),
+        sys.stderr.flush()
+        if lclique > _maxsize:
+            _maxsize=lclique
+
+#   print >>sys.stderr,'koukou'        
+
+    timer = time.time() - _lastyield
+    
+    if not candidates and not notlist:
+        if lclique==_maxsize:
+            _maxclique=set(clique)
+        if lclique >= minsize:
+            yield set(clique)
+        if timeout is not None and timer > timeout and _maxclique is not None:
+            yield _maxclique
+            _maxclique=None
+        
+    else:                        
+        while notmin and candidates and ((lclique + len(candidates)) >= minsize or (timeout is not None and timer > timeout)):
+                    # count explored solution
+            _solution+=1
+            
+            if notfix is None:
+                nextcandidate = candidates.pop()
+            else:
+                nextcandidate = notfixneib.pop()
+                candidates.remove(nextcandidate)
+                
+            clique.add(nextcandidate)     
+
+            neighbours = graph.neighbourIndexSet(index=nextcandidate)   
+
+            nextcandidates = candidates & neighbours
+            nextnot        = notlist    & neighbours
+            
+            nnc = candidates - neighbours
+            lnnc=len(nnc)
+            
+            for c in _cliqueIterator(graph, 
+                                     set(clique), 
+                                     nextcandidates,
+                                     nextnot,
+                                     minsize,
+                                     start,
+                                     timeout=timeout):
+                yield c
+    
+                        
+            clique.remove(nextcandidate)
+            
+            notmin-=1
+            
+            if lnnc < notmin:
+                notmin = lnnc
+                notfix = nextcandidate
+                notfixneib = nnc
+                            
+            if notmin==0:
+                _notbound+=1
+                
+            notlist.add(nextcandidate)
+        else:
+            if (lclique + len(candidates)) < minsize:
+                _sizebound+=1
+
--- a/obitools/graph/algorithms/compact.py
+++ b/obitools/graph/algorithms/compact.py
@ -0,0 +1,8 @@
+
+def compactGraph(graph,nodeSetIterator):
+    compact = graph.newEmpty()
+    for ns in nodeSetIterator(graph):
+        nlabel = "\n".join([str(graph.getNode(index=x).label) for x in ns])
+        compact.addNode(nlabel)
+        print 
+        print compact
--- a/obitools/graph/algorithms/component.py
+++ b/obitools/graph/algorithms/component.py
@ -0,0 +1,82 @@
+"""
+Iterate through the connected components of a graph
+---------------------------------------------------
+
+the module :py:mod:`obitools.graph.algorithm.component` provides
+two functions to deal with the connected component of a graph
+represented as a :py:class:`obitools.graph.Graph` instance.
+
+The whole set of connected component of a graph is a partition of this graph.
+So a node cannot belongs to two distinct connected component.
+
+Two nodes are in the same connected component if it exits a path through 
+the graph edges linking them.
+
+TODO: THere is certainly a bug with DirectedGraph
+
+"""
+
+def componentIterator(graph,nodePredicat=None,edgePredicat=None):
+    '''
+    Build an iterator over the connected component of a graph.
+    Each connected component returned by the iterator is represented 
+    as a `set` of node indices.
+    
+    :param graph: the graph to partitionne
+    :type graph:  :py:class:`obitools.graph.Graph`
+    
+    :param predicate: a function allowing edge selection. Default value
+                      is **None** and indicate that all edges are selected.
+    :type predicate:  a function returning a boolean value
+                      and accepting one argument of class :py:class:`Node`
+                      
+    :param predicate: a function allowing node selection. Default value
+                      is **None** and indicate that all nodes are selected.
+    :type predicate:  a function returning a boolean value
+                      and accepting one argument of class :py:class:`Edge`
+                      
+    :return: an iterator over the connected component set
+    :rtype: an iterator over `set` of `int`
+    
+    .. seealso::
+        the :py:meth:`obitools.graph.Graph.componentIndexSet` method
+        on which is based this function.
+    '''
+    seen = set()
+    for n in graph.nodeIterator(nodePredicat):
+        if n.index not in seen:
+            cc=n.componentIndexSet(nodePredicat, edgePredicat)
+            yield cc
+            seen |= cc
+            
+def componentCount(graph,nodePredicat=None,edgePredicat=None):
+    '''
+    Count the connected componnent in a graph.
+    
+    :param graph: the graph to partitionne
+    :type graph:  :py:class:`obitools.graph.Graph`
+    
+    :param predicate: a function allowing edge selection. Default value
+                      is **None** and indicate that all edges are selected.
+    :type predicate:  a function returning a boolean value
+                      and accepting one argument of class :py:class:`Node`
+                      
+    :param predicate: a function allowing node selection. Default value
+                      is **None** and indicate that all nodes are selected.
+    :type predicate:  a function returning a boolean value
+                      and accepting one argument of class :py:class:`Edge`
+                      
+    :return: an iterator over the connected component set
+    :rtype: an iterator over `set` of `int`
+    
+    .. seealso::
+        the :py:func:`componentIterator` function
+        on which is based this function.
+    '''
+    n=0
+    for c in componentIterator(graph,nodePredicat, edgePredicat):
+        n+=1
+    return n
+
+ 
+        
--- a/obitools/graph/algorithms/component.pyc
+++ b/obitools/graph/algorithms/component.pyc
--- a/obitools/graph/dag.py
+++ b/obitools/graph/dag.py
@ -0,0 +1,80 @@
+from obitools.graph import DiGraph,Node
+from obitools.graph.algorithms.component import componentIterator
+
+class DAG(DiGraph):
+    def __init__(self,label='G',indexer=None,nodes=None,edges=None):
+        '''
+        Directed Graph constructor.
+
+        @param label: Graph name, set to 'G' by default
+        @type label: str
+        @param indexer: node label indexer
+        @type indexer: Indexer instance
+        @param nodes: set of nodes to add to the graph
+        @type nodes: iterable value
+        @param edges: set of edges to add to the graph
+        @type edges: iterable value
+        '''
+        
+        self._parents={}
+        DiGraph.__init__(self, label, indexer, nodes, edges)
+        
+    def getNode(self,node=None,index=None):
+        if index is None:
+            index = self._index.getIndex(node, True)
+        return DAGNode(index,self)
+    
+    def addEdge(self,parent=None,node=None,indexp=None,index=None,**data):
+        indexp=self.addNode(parent, indexp)
+        index =self.addNode(node  , index)
+
+        pindex = set(n.index 
+                     for n in self.getNode(index=indexp).ancestorIterator())
+        
+        assert index not in pindex,'Child node cannot be a parent node'
+
+        DiGraph.addEdge(self,index1=indexp,index2=index,**data)   
+
+        if index in self._parents:
+            self._parents[index].add(indexp)
+        else:
+            self._parents[index]=set([indexp])   
+
+                                 
+        return (indexp,index)
+    
+    def getRoots(self):
+        return [self.getNode(index=cc.pop()).getRoot()
+                for cc in componentIterator(self)]
+            
+
+        
+    
+class DAGNode(Node):
+    
+    def ancestorIterator(self):
+        if self.index in self.graph._parents:
+            for p in self.graph._parents[self.index]:
+                parent = DAGNode(p,self.graph)
+                yield parent
+                for pnode in parent.ancestorIterator():
+                    yield pnode
+                    
+    def getRoot(self):
+        for x in self.ancestorIterator():
+            pass
+        return x 
+                    
+    def leavesIterator(self):
+        if not self:
+            yield self
+        for n in self:
+            for nn in n.leavesIterator():
+                yield nn
+                
+    def subgraphIterator(self):
+        yield self
+        for n in self:
+            for nn in n.subgraphIterator():
+                yield nn
+        
--- a/obitools/graph/layout/init.py
+++ b/obitools/graph/layout/init.py
--- a/obitools/graph/layout/radialtree.py
+++ b/obitools/graph/layout/radialtree.py
--- a/obitools/graph/rootedtree.py
+++ b/obitools/graph/rootedtree.py
@ -0,0 +1,117 @@
+from obitools.graph.dag import DAG,DAGNode
+
+class RootedTree(DAG):
+    
+    def addEdge(self,parent=None,node=None,indexp=None,index=None,**data):
+        indexp=self.addNode(parent, indexp)
+        index =self.addNode(node  , index)
+        
+        assert index not in self._parents or indexp in self._parents[index], \
+                'Child node cannot have more than one parent node'
+            
+        return DAG.addEdge(self,indexp=indexp,index=index,**data)   
+
+    def getNode(self,node=None,index=None):
+        if index is None:
+            index = self._index.getIndex(node, True)
+        return RootedTreeNode(index,self)
+    
+
+    
+class RootedTreeNode(DAGNode):
+
+    def subTreeSize(self):
+        n=1
+        for subnode in self:
+            n+=subnode.subTreeSize()
+        return n
+    
+    def subTreeLeaves(self):
+        if not self:
+            return 1
+        n=0
+        for subnode in self:
+            n+=subnode.subTreeLeaves()
+        return n
+    
+                
+def nodeWriter(node,deep=0,label=None,distance="distance", bootstrap="bootstrap",cartoon=None,collapse=None):
+
+    ks = node.keys()
+
+    
+    if label is None:
+        name=node.label
+    elif callable(label):
+        name=label(node)
+    elif isinstance(label, str) and label in node:
+        name=node[label]
+        ks.remove(label)
+    else:
+        name=''
+        
+    if distance in node:
+        dist=':%6.5f' % node[distance]
+        ks.remove(distance)
+    else:
+        dist=''
+                
+    ks = ["%s=%s" % (k,node[k]) for k in ks]
+    
+    if cartoon is not None and cartoon(node):
+        ks.append("!cartoon={%d,0.0}" % node.subTreeLeaves())
+     
+    if collapse is not None and collapse(node):
+        ks.append('!collapse={"collapsed",0.0}')
+    
+    if ks:
+        ks="[&"+",".join(ks)+"]"
+    else:
+        ks=''
+        
+               
+    nodeseparator = ',\n' + ' ' * (deep+1)     
+        
+    subnodes = nodeseparator.join([nodeWriter(x, deep+1,label,distance,bootstrap,cartoon=cartoon,collapse=collapse) 
+                                   for x in node])
+    if subnodes:
+        subnodes='(\n' + ' ' * (deep+1) + subnodes + '\n' + ' ' * deep + ')'
+        
+    return '%s"%s"%s%s' % (subnodes,name,ks,dist)
+
+
+def nexusFormat(tree,startnode=None,label=None,blocks="",cartoon=None,collapse=None):
+    head="#NEXUS\n"
+    
+    tx = []
+    
+    for n in tree:
+        if label is None:
+            name=n.label
+        elif callable(label):
+            name=label(n)
+        elif isinstance(label, str) and label in n:
+            name=n[label]
+        else:
+            name=''
+            
+        if name:
+            tx.append('"%s"' % name)
+
+    taxa = "begin taxa;\n\tdimensions ntax=%d;\n\ttaxlabels\n\t" % len(tx)
+            
+    taxa+="\n\t".join(tx)
+
+    taxa+="\n;\nend;\n\n"
+    
+    
+
+    if startnode is not None:
+        roots =[startnode]
+    else:
+        roots = tree.getRoots()
+    trees = nodeWriter(roots[0],0,label,cartoon=cartoon,collapse=collapse)
+    trees = "begin trees;\n\ttree tree_1 = [&R] "+ trees +";\nend;\n\n"
+    return head+taxa+trees+"\n\n"+blocks+"\n"
+
+   
--- a/obitools/graph/tree.py
+++ b/obitools/graph/tree.py
@ -0,0 +1,37 @@
+from obitools.graph import UndirectedGraph,Node
+from obitools.graph.algorithms.component import componentCount
+
+
+class Forest(UndirectedGraph):
+
+ 
+    def getNode(self,node=None,index=None):
+        if index is None:
+            index = self._index.getIndex(node, True)
+        return TreeNode(index,self)
+   
+    def addEdge(self,node1=None,node2=None,index1=None,index2=None,**data):
+        index1=self.addNode(node1, index1)
+        index2=self.addNode(node2, index2)
+        
+        cc = set(n.index for n in self.getNode(index=index2).componentIterator())
+        
+        assert index1 in self._node[index2] or index1 not in cc, \
+               "No more than one path is alloed between two nodes in a tree"
+        
+        UndirectedGraph.addEdge(self, index1=index1, index2=index2,**data)
+                
+        return (index1,index2)
+    
+    def isASingleTree(self):
+        return componentCount(self)==1
+   
+class TreeNode(Node):
+    
+    def componentIterator(self):
+        for c in self:
+            yield c
+            for cc in c:
+                yield cc
+            
+                
--- a/obitools/gzip.py
+++ b/obitools/gzip.py
@ -0,0 +1,504 @@
+"""Functions that read and write gzipped files.
+
+The user of the file doesn't have to worry about the compression,
+but random access is not allowed.
+
+This consisted on a patched version of of standard gzip python
+module based on Andrew Kuchling's minigzip.py distributed with the zlib module
+
+"""
+
+# based on Andrew Kuchling's minigzip.py distributed with the zlib module
+
+import struct, sys, time
+import zlib
+import __builtin__
+
+__all__ = ["GzipFile","open"]
+
+FTEXT, FHCRC, FEXTRA, FNAME, FCOMMENT = 1, 2, 4, 8, 16
+
+READ, WRITE = 1, 2
+
+def U32(i):
+    """Return i as an unsigned integer, assuming it fits in 32 bits.
+
+    If it's >= 2GB when viewed as a 32-bit unsigned int, return a long.
+    """
+    if i < 0:
+        i += 1L << 32
+    return i
+
+def LOWU32(i):
+    """Return the low-order 32 bits of an int, as a non-negative int."""
+    return i & 0xFFFFFFFFL
+
+def write32(output, value):
+    output.write(struct.pack("<l", value))
+
+def write32u(output, value):
+    # The L format writes the bit pattern correctly whether signed
+    # or unsigned.
+    output.write(struct.pack("<L", value))
+
+def read32(input):
+    return struct.unpack("<l", input.read(4))[0]
+
+def unpack32(buf):
+    return struct.unpack("<l", buf)[0]
+
+def open(filename, mode="rb", compresslevel=9):
+    """Shorthand for GzipFile(filename, mode, compresslevel).
+
+    The filename argument is required; mode defaults to 'rb'
+    and compresslevel defaults to 9.
+
+    """
+    return GzipFile(filename, mode, compresslevel)
+
+class GzipFile:
+    """The GzipFile class simulates most of the methods of a file object with
+    the exception of the readinto() and truncate() methods.
+
+    """
+
+    myfileobj = None
+    max_read_chunk = 10 * 1024 * 1024   # 10Mb
+
+    def __init__(self, filename=None, mode=None,
+                 compresslevel=9, fileobj=None):
+        """Constructor for the GzipFile class.
+
+        At least one of fileobj and filename must be given a
+        non-trivial value.
+
+        The new class instance is based on fileobj, which can be a regular
+        file, a StringIO object, or any other object which simulates a file.
+        It defaults to None, in which case filename is opened to provide
+        a file object.
+
+        When fileobj is not None, the filename argument is only used to be
+        included in the gzip file header, which may includes the original
+        filename of the uncompressed file.  It defaults to the filename of
+        fileobj, if discernible; otherwise, it defaults to the empty string,
+        and in this case the original filename is not included in the header.
+
+        The mode argument can be any of 'r', 'rb', 'a', 'ab', 'w', or 'wb',
+        depending on whether the file will be read or written.  The default
+        is the mode of fileobj if discernible; otherwise, the default is 'rb'.
+        Be aware that only the 'rb', 'ab', and 'wb' values should be used
+        for cross-platform portability.
+
+        The compresslevel argument is an integer from 1 to 9 controlling the
+        level of compression; 1 is fastest and produces the least compression,
+        and 9 is slowest and produces the most compression.  The default is 9.
+
+        """
+
+        # guarantee the file is opened in binary mode on platforms
+        # that care about that sort of thing
+        if mode and 'b' not in mode:
+            mode += 'b'
+        if fileobj is None:
+            fileobj = self.myfileobj = __builtin__.open(filename, mode or 'rb')
+        if filename is None:
+            if hasattr(fileobj, 'name'): filename = fileobj.name
+            else: filename = ''
+        if mode is None:
+            if hasattr(fileobj, 'mode'): mode = fileobj.mode
+            else: mode = 'rb'
+
+        if mode[0:1] == 'r':
+            self.mode = READ
+            # Set flag indicating start of a new member
+            self._new_member = True
+            self.extrabuf = ""
+            self.extrasize = 0
+            self.filename = filename
+            # Starts small, scales exponentially
+            self.min_readsize = 100
+
+        elif mode[0:1] == 'w' or mode[0:1] == 'a':
+            self.mode = WRITE
+            self._init_write(filename)
+            self.compress = zlib.compressobj(compresslevel,
+                                             zlib.DEFLATED,
+                                             -zlib.MAX_WBITS,
+                                             zlib.DEF_MEM_LEVEL,
+                                             0)
+        else:
+            raise IOError, "Mode " + mode + " not supported"
+
+        self.fileobj = fileobj
+        self.offset = 0
+        self.inputbuf = ''
+        self.last8 = ''
+
+        if self.mode == WRITE:
+            self._write_gzip_header()
+
+    def __repr__(self):
+        s = repr(self.fileobj)
+        return '<gzip ' + s[1:-1] + ' ' + hex(id(self)) + '>'
+
+    def _init_write(self, filename):
+        if filename[-3:] != '.gz':
+            filename = filename + '.gz'
+        self.filename = filename
+        self.crc = zlib.crc32("")
+        self.size = 0
+        self.writebuf = []
+        self.bufsize = 0
+
+    def _write_gzip_header(self):
+        self.fileobj.write('\037\213')             # magic header
+        self.fileobj.write('\010')                 # compression method
+        fname = self.filename[:-3]
+        flags = 0
+        if fname:
+            flags = FNAME
+        self.fileobj.write(chr(flags))
+        write32u(self.fileobj, long(time.time()))
+        self.fileobj.write('\002')
+        self.fileobj.write('\377')
+        if fname:
+            self.fileobj.write(fname + '\000')
+
+    def _init_read(self):
+        self.crc = zlib.crc32("")
+        self.size = 0
+
+    def _read_internal(self, size):
+        if len(self.inputbuf) < size:
+            self.inputbuf += self.fileobj.read(size-len(self.inputbuf))
+        chunk = self.inputbuf[:size]
+        # need to use len(chunk) bellow instead of size in case it's EOF.
+        if len(chunk) < 8:
+            self.last8 = self.last8[len(chunk):] + chunk
+        else:
+            self.last8 = chunk[-8:]
+        self.inputbuf = self.inputbuf[size:]
+        return chunk
+
+    def _read_gzip_header(self):
+        magic = self._read_internal(2)
+        if len(magic) != 2:
+            raise EOFError, "Reached EOF"
+        if magic != '\037\213':
+            raise IOError, 'Not a gzipped file'
+        method = ord( self._read_internal(1) )
+        if method != 8:
+            raise IOError, 'Unknown compression method'
+        flag = ord( self._read_internal(1) )
+        # modtime = self.fileobj.read(4)
+        # extraflag = self.fileobj.read(1)
+        # os = self.fileobj.read(1)
+        self._read_internal(6)
+
+        if flag & FEXTRA:
+            # Read & discard the extra field, if present
+            xlen = ord(self._read_internal(1))
+            xlen = xlen + 256*ord(self._read_internal(1))
+            self._read_internal(xlen)
+        if flag & FNAME:
+            # Read and discard a null-terminated string containing the filename
+            while True:
+                s = self._read_internal(1)
+                if not s or s=='\000':
+                    break
+        if flag & FCOMMENT:
+            # Read and discard a null-terminated string containing a comment
+            while True:
+                s = self._read_internal(1)
+                if not s or s=='\000':
+                    break
+        if flag & FHCRC:
+            self._read_internal(2)     # Read & discard the 16-bit header CRC
+
+
+    def write(self,data):
+        if self.mode != WRITE:
+            import errno
+            raise IOError(errno.EBADF, "write() on read-only GzipFile object")
+
+        if self.fileobj is None:
+            raise ValueError, "write() on closed GzipFile object"
+        if len(data) > 0:
+            self.size = self.size + len(data)
+            self.crc = zlib.crc32(data, self.crc)
+            self.fileobj.write( self.compress.compress(data) )
+            self.offset += len(data)
+
+    def read(self, size=-1):
+        if self.mode != READ:
+            import errno
+            raise IOError(errno.EBADF, "read() on write-only GzipFile object")
+
+        if self.extrasize <= 0 and self.fileobj is None:
+            return ''
+
+        readsize = 1024
+        if size < 0:        # get the whole thing
+            try:
+                while True:
+                    self._read(readsize)
+                    readsize = min(self.max_read_chunk, readsize * 2)
+            except EOFError:
+                size = self.extrasize
+        else:               # just get some more of it
+            try:
+                while size > self.extrasize:
+                    self._read(readsize)
+                    readsize = min(self.max_read_chunk, readsize * 2)
+            except EOFError:
+                if size > self.extrasize:
+                    size = self.extrasize
+
+        chunk = self.extrabuf[:size]
+        self.extrabuf = self.extrabuf[size:]
+        self.extrasize = self.extrasize - size
+
+        self.offset += size
+        return chunk
+
+    def _unread(self, buf):
+        self.extrabuf = buf + self.extrabuf
+        self.extrasize = len(buf) + self.extrasize
+        self.offset -= len(buf)
+
+    def _read(self, size=1024):
+        if self.fileobj is None:
+            raise EOFError, "Reached EOF"
+
+        if self._new_member:
+            # If the _new_member flag is set, we have to
+            # jump to the next member, if there is one.
+            #
+            # _read_gzip_header will raise EOFError exception
+            # if there no more members to read.
+            self._init_read()
+            self._read_gzip_header()
+            self.decompress = zlib.decompressobj(-zlib.MAX_WBITS)
+            self._new_member = False
+
+        # Read a chunk of data from the file
+        buf = self._read_internal(size)
+
+        # If the EOF has been reached, flush the decompression object
+        # and mark this object as finished.
+
+        if buf == "":
+            uncompress = self.decompress.flush()
+            self._read_eof()
+            self._add_read_data( uncompress )
+            raise EOFError, 'Reached EOF'
+
+        uncompress = self.decompress.decompress(buf)
+        self._add_read_data( uncompress )
+
+        if self.decompress.unused_data != "":
+            # Ending case: we've come to the end of a member in the file,
+            # so put back unused_data and initialize last8 by reading them.
+            self.inputbuf = self.decompress.unused_data + self.inputbuf
+            self._read_internal(8)
+
+            # Check the CRC and file size, and set the flag so we read
+            # a new member on the next call
+            self._read_eof()
+            self._new_member = True
+
+    def _add_read_data(self, data):
+        self.crc = zlib.crc32(data, self.crc)
+        self.extrabuf = self.extrabuf + data
+        self.extrasize = self.extrasize + len(data)
+        self.size = self.size + len(data)
+
+    def _read_eof(self):
+        # We've read to the end of the file, so we have to rewind in order
+        # to reread the 8 bytes containing the CRC and the file size.
+        # We check the that the computed CRC and size of the
+        # uncompressed data matches the stored values.  Note that the size
+        # stored is the true file size mod 2**32.
+        crc32 = unpack32(self.last8[:4])
+        isize = U32(unpack32(self.last8[4:]))   # may exceed 2GB
+        if U32(crc32) != U32(self.crc):
+            raise IOError, "CRC check failed"
+        elif isize != LOWU32(self.size):
+            raise IOError, "Incorrect length of data produced"
+
+    def close(self):
+        if self.mode == WRITE:
+            self.fileobj.write(self.compress.flush())
+            # The native zlib crc is an unsigned 32-bit integer, but
+            # the Python wrapper implicitly casts that to a signed C
+            # long.  So, on a 32-bit box self.crc may "look negative",
+            # while the same crc on a 64-bit box may "look positive".
+            # To avoid irksome warnings from the `struct` module, force
+            # it to look positive on all boxes.
+            write32u(self.fileobj, LOWU32(self.crc))
+            # self.size may exceed 2GB, or even 4GB
+            write32u(self.fileobj, LOWU32(self.size))
+            self.fileobj = None
+        elif self.mode == READ:
+            self.fileobj = None
+        if self.myfileobj:
+            self.myfileobj.close()
+            self.myfileobj = None
+
+    def __del__(self):
+        try:
+            if (self.myfileobj is None and
+                self.fileobj is None):
+                return
+        except AttributeError:
+            return
+        self.close()
+
+    def flush(self,zlib_mode=zlib.Z_SYNC_FLUSH):
+        if self.mode == WRITE:
+            # Ensure the compressor's buffer is flushed
+            self.fileobj.write(self.compress.flush(zlib_mode))
+        self.fileobj.flush()
+
+    def fileno(self):
+        """Invoke the underlying file object's fileno() method.
+
+        This will raise AttributeError if the underlying file object
+        doesn't support fileno().
+        """
+        return self.fileobj.fileno()
+
+    def isatty(self):
+        return False
+
+    def tell(self):
+        return self.offset
+
+    def rewind(self):
+        '''Return the uncompressed stream file position indicator to the
+        beginning of the file'''
+        if self.mode != READ:
+            raise IOError("Can't rewind in write mode")
+        self.fileobj.seek(0)
+        self._new_member = True
+        self.extrabuf = ""
+        self.extrasize = 0
+        self.offset = 0
+
+    def seek(self, offset):
+        if self.mode == WRITE:
+            if offset < self.offset:
+                raise IOError('Negative seek in write mode')
+            count = offset - self.offset
+            for i in range(count // 1024):
+                self.write(1024 * '\0')
+            self.write((count % 1024) * '\0')
+        elif self.mode == READ:
+            if offset < self.offset:
+                # for negative seek, rewind and do positive seek
+                self.rewind()
+            count = offset - self.offset
+            for i in range(count // 1024):
+                self.read(1024)
+            self.read(count % 1024)
+
+    def readline(self, size=-1):
+        if size < 0:
+            size = sys.maxint
+            readsize = self.min_readsize
+        else:
+            readsize = size
+        bufs = []
+        while size != 0:
+            c = self.read(readsize)
+            i = c.find('\n')
+
+            # We set i=size to break out of the loop under two
+            # conditions: 1) there's no newline, and the chunk is
+            # larger than size, or 2) there is a newline, but the
+            # resulting line would be longer than 'size'.
+            if (size <= i) or (i == -1 and len(c) > size):
+                i = size - 1
+
+            if i >= 0 or c == '':
+                bufs.append(c[:i + 1])    # Add portion of last chunk
+                self._unread(c[i + 1:])   # Push back rest of chunk
+                break
+
+            # Append chunk to list, decrease 'size',
+            bufs.append(c)
+            size = size - len(c)
+            readsize = min(size, readsize * 2)
+        if readsize > self.min_readsize:
+            self.min_readsize = min(readsize, self.min_readsize * 2, 512)
+        return ''.join(bufs) # Return resulting line
+
+    def readlines(self, sizehint=0):
+        # Negative numbers result in reading all the lines
+        if sizehint <= 0:
+            sizehint = sys.maxint
+        L = []
+        while sizehint > 0:
+            line = self.readline()
+            if line == "":
+                break
+            L.append(line)
+            sizehint = sizehint - len(line)
+
+        return L
+
+    def writelines(self, L):
+        for line in L:
+            self.write(line)
+
+    def __iter__(self):
+        return self
+
+    def next(self):
+        line = self.readline()
+        if line:
+            return line
+        else:
+            raise StopIteration
+
+
+def _test():
+    # Act like gzip; with -d, act like gunzip.
+    # The input file is not deleted, however, nor are any other gzip
+    # options or features supported.
+    args = sys.argv[1:]
+    decompress = args and args[0] == "-d"
+    if decompress:
+        args = args[1:]
+    if not args:
+        args = ["-"]
+    for arg in args:
+        if decompress:
+            if arg == "-":
+                f = GzipFile(filename="", mode="rb", fileobj=sys.stdin)
+                g = sys.stdout
+            else:
+                if arg[-3:] != ".gz":
+                    print "filename doesn't end in .gz:", repr(arg)
+                    continue
+                f = open(arg, "rb")
+                g = __builtin__.open(arg[:-3], "wb")
+        else:
+            if arg == "-":
+                f = sys.stdin
+                g = GzipFile(filename="", mode="wb", fileobj=sys.stdout)
+            else:
+                f = __builtin__.open(arg, "rb")
+                g = open(arg + ".gz", "wb")
+        while True:
+            chunk = f.read(1024)
+            if not chunk:
+                break
+            g.write(chunk)
+        if g is not sys.stdout:
+            g.close()
+        if f is not sys.stdin:
+            f.close()
+
+if __name__ == '__main__':
+    _test()
--- a/obitools/gzip.pyc
+++ b/obitools/gzip.pyc
--- a/obitools/location/init.py
+++ b/obitools/location/init.py
@ -0,0 +1,538 @@
+import obitools
+import re
+import array
+
+class Location(object):
+    """
+    Define a location on a sequence.  
+    """
+    
+    def extractSequence(self,sequence):
+        '''
+        Extract subsequence corresponding to a Location.
+        
+        @param sequence: 
+        @type sequence: C{BioSequence} or C{str}
+        '''
+        assert isinstance(sequence, (obitools.BioSequence,str)), \
+           "sequence must be an instance of str or BioSequence"
+           
+        if isinstance(sequence, str):
+            seq = self._extractSequence(sequence)
+        else:
+            if isinstance(sequence, obitools.AASequence):
+                assert not self.needNucleic(), \
+                    "This location can be used only with Nucleic sequences"
+            seq = self._extractSequence(str(sequence))
+            
+            if isinstance(sequence, obitools.AASequence):
+                st = obitools.AASequence
+            else:
+                st = obitools.NucSequence
+            
+            seq = st(sequence.id,
+                     seq,
+                     sequence.definition,
+                     **sequence.getTags())
+            seq['location']=str(self)
+            
+            if 'length' in  sequence.getTags():
+                seq['length']=len(seq)
+                
+        if hasattr(sequence, 'quality'):
+            quality = self._extractQuality(sequence)
+            seq.quality=quality
+                
+        return seq
+
+    def isDirect(self):
+        return None
+
+    def isSimple(self):
+        '''
+        Indicate if a location is composed of a single continuous 
+        region or is composed by the junction of several locations
+        by the C{join} operator.
+        
+        @return: C{True} if the location is composed of a single
+                 continuous region.
+        @rtype: bool
+        '''
+        
+        return None
+    
+    def isFullLength(self):
+        return None
+    
+    def needNucleic(self):
+        '''
+        If a location contains a complement operator, it can be use
+        only on nucleic sequence.
+        
+        @return: C{True} if location contains a complement operator
+        @rtype: bool
+        '''
+        return None
+    
+    def getGloc(self):
+        loc = self.simplify()
+        assert loc.isDirect() is not None,"Gloc cannot be created for multi oriented location : %s" % str(loc)
+        positions = ','.join([str(x) for x in loc._getglocpos()])
+        return "(%s,%s)" % ({True:'T',False:'F'}[loc.isDirect()],
+                            positions)
+
+    def shift(self,s):
+        return None
+    
+    def getBegin(self):
+        return None
+    
+    def getEnd(self):
+        return None
+    
+    def getFivePrime(self):
+        return self.getBegin()
+    
+    def getThreePrime(self):
+        return self.getEnd()
+    
+    begin = property(getBegin,None,None,"beginning position of the location")
+    end = property(getEnd,None,None,"ending position of the location")
+    fivePrime=property(getFivePrime,None,None,"5' position of the location")
+    threePrime=property(getThreePrime,None,None,"3' position of the location")
+       
+    def __abs__(self):
+        assert self.isDirect() is not None,"Abs operator cannot be applied on non oriented location"
+        if self.isDirect():
+            return self
+        else:
+            return ComplementLocation(self).simplify()
+       
+    def __cmp__(self,y):
+        if self.begin < y.begin:
+            return -1
+        if self.begin > y.begin:
+            return 1
+        if self.isDirect() == y.isDirect():
+            return 0
+        if self.isDirect() and not y.isDirect():
+            return -1
+        return 1
+    
+class SimpleLocation(Location):
+    """
+    A simple location is describe a continuous region of 
+    a sequence define by a C{begin} and a C{end} position.
+    """
+    
+    def __init__(self,begin,end):
+        '''
+        Build a new C{SimpleLocation} instance. Valid
+        position are define on M{[1,N]} with N the length
+        of the sequence.
+        
+        @param begin: start position of the location
+        @type begin:  int
+        @param end:   end position of the location
+        @type end:    int
+        '''
+        assert begin > 0 and end > 0
+        
+        self._begin = begin
+        self._end   = end
+        self._before=False
+        self._after=False
+        
+    def _extractSequence(self,sequence):
+        
+        assert (    self._begin < len(sequence) 
+                and self._end <= len(sequence)), \
+                "Sequence length %d is too short" % len(sequence)
+                
+        return sequence[self._begin-1:self._end]
+    
+    def _extractQuality(self,sequence):
+        
+        assert (    self._begin < len(sequence) 
+                and self._end <= len(sequence)), \
+                "Sequence length %d is too short" % len(sequence)
+                
+        return sequence.quality[self._begin-1:self._end]
+    
+    
+    def isDirect(self):
+        return True
+    
+    def isSimple(self):
+        return True
+    
+    def isFullLength(self):
+        return not (self.before or self.after)
+    
+    def simplify(self):
+        if self._begin == self._end:
+            return PointLocation(self._begin)
+        else:
+            return self
+    
+    def needNucleic(self):
+        return False
+
+    def __str__(self):
+        before = {True:'<',False:''}[self.before]
+        after  = {True:'>',False:''}[self.after]
+        return "%s%d..%s%d" % (before,self._begin,after,self._end)
+    
+    def shift(self,s):
+        assert (self._begin + s) > 0,"shift to large (%d)" % s 
+        if s == 0:
+            return self
+        return SimpleLocation(self._begin + s, self._end + s)
+
+    def _getglocpos(self):
+        return (self.begin,self.end)
+    
+    def getGloc(self):
+        positions = ','.join([str(x) for x in self._getglocpos()])
+        return "(%s,%s)" % ({True:'T',False:'F'}[self.isDirect()],
+                            positions)
+    
+    def getBegin(self):
+        return self._begin
+    
+    def getEnd(self):
+        return self._end
+    
+    
+    begin = property(getBegin,None,None,"beginning position of the location")
+    end = property(getEnd,None,None,"ending position of the location")
+    
+    def getBefore(self):
+        return self._before
+    
+    def getAfter(self):
+        return self._after
+    
+    def setBefore(self,value):
+        assert isinstance(value, bool)
+        self._before=value
+        
+    def setAfter(self,value):
+        assert isinstance(value, bool)
+        self._after=value
+        
+    before=property(getBefore,setBefore,None)
+    after=property(getAfter,setAfter,None)
+    
+        
+        
+    
+class PointLocation(Location):
+    """
+    A point location describes a location on a sequence
+    limited to a single position
+    """
+    
+    def __init__(self,position):
+        assert position > 0
+        self._pos=position
+    
+    def _extractSequence(self,sequence):
+        
+        assert self._end <= len(sequence), \
+                "Sequence length %d is too short" % len(sequence)
+                
+        return sequence[self._pos-1]
+
+    def _extractQuality(self,sequence):
+        
+        assert self._end <= len(sequence), \
+                "Sequence length %d is too short" % len(sequence)
+                
+        return sequence[self._pos-1:self._pos]
+
+    def isDirect(self):
+        return True
+
+    def isSimple(self):
+        return True
+    
+    def isFullLength(self):
+        return True
+
+    def simplify(self):
+        return self
+    
+    def needNucleic(self):
+        return False
+
+    def shift(self,s):
+        assert (self._pos + s) > 0,"shift to large (%d)" % s 
+        if s == 0:
+            return self
+        return PointLocation(self._pos + s)
+
+    def _getglocpos(self):
+        return (self._pos,self._pos)
+    
+    def getBegin(self):
+        return self._pos
+    
+    def getEnd(self):
+        return self._pos
+    
+    begin = property(getBegin,None,None,"beginning position of the location")
+    end = property(getEnd,None,None,"ending position of the location")
+
+    def __str__(self):
+        return str(self._pos)
+    
+class CompositeLocation(Location):
+    """
+    """
+    def __init__(self,locations):            
+        self._locs = tuple(locations)
+        
+        
+    def _extractSequence(self,sequence):
+        seq = ''.join([x._extractSequence(sequence)
+                       for x in self._locs])
+        return seq
+    
+    def _extractQuality(self,sequence):
+        rep=array.array('d',[])
+        for x in self._locs:
+            rep.extend(x._extractQuality(sequence))
+        return rep
+
+    def isDirect(self):
+        hasDirect,hasReverse = reduce(lambda x,y: (x[0] or y,x[1] or not y),
+                            (z.isDirect() for z in self._locs),(False,False))
+        
+        if hasDirect and not hasReverse:
+            return True
+        if hasReverse and not hasDirect:
+            return False
+        
+        return None
+
+
+    def isSimple(self):
+        return False
+    
+    
+    def simplify(self):
+        if len(self._locs)==1:
+            return self._locs[0]
+        
+        rep = CompositeLocation(x.simplify() for x in self._locs)
+        
+        if reduce(lambda x,y : x and y,
+                      (isinstance(z, ComplementLocation) 
+                       for z in self._locs)):
+            rep = ComplementLocation(CompositeLocation(x._loc.simplify() 
+                                                       for x in rep._locs[::-1]))
+            
+        return rep
+
+    def isFullLength(self):
+        return reduce(lambda x,y : x and y, (z.isFullLength() for z in self._locs),1)
+
+    def needNucleic(self):
+        return reduce(lambda x,y : x or y, 
+                      (z.needNucleic for z in self._locs),
+                      False)
+
+    def _getglocpos(self):
+        return reduce(lambda x,y : x + y,
+                      (z._getglocpos() for z in self._locs))
+
+
+    def getBegin(self):
+        return min(x.getBegin() for x in self._locs)
+    
+    def getEnd(self):
+        return max(x.getEnd() for x in self._locs)
+    
+    def shift(self,s):
+        assert (self.getBegin() + s) > 0,"shift to large (%d)" % s 
+        if s == 0:
+            return self
+        return CompositeLocation(x.shift(s) for x in self._locs)
+
+    
+    begin = property(getBegin,None,None,"beginning position of the location")
+    end = property(getEnd,None,None,"ending position of the location")
+
+   
+    def __str__(self):
+        return "join(%s)" % ','.join([str(x) 
+                                      for x in self._locs])
+    
+class ComplementLocation(Location):
+    """
+    """
+    
+    _comp={'a': 't', 'c': 'g', 'g': 'c', 't': 'a',
+           'r': 'y', 'y': 'r', 'k': 'm', 'm': 'k', 
+           's': 's', 'w': 'w', 'b': 'v', 'd': 'h', 
+           'h': 'd', 'v': 'b', 'n': 'n', 'u': 'a',
+           '-': '-'}
+
+    def __init__(self,location):
+        self._loc = location
+        
+    def _extractSequence(self,sequence):
+        seq = self._loc._extractSequence(sequence)
+        seq = ''.join([ComplementLocation._comp.get(x.lower(),'n') for x in seq[::-1]])
+        return seq
+    
+    def _extractQuality(self,sequence):
+        return sequence.quality[::-1]
+    
+    def isDirect(self):
+        return False
+    
+    def isSimple(self):
+        return self._loc.isSimple()
+
+    def isFullLength(self):
+        return self._loc.isFullLength()
+    
+    def simplify(self):
+        if isinstance(self._loc, ComplementLocation):
+            return self._loc._loc.simplify()
+        else:
+            return self
+
+    def needNucleic(self):
+        return True
+
+    def __str__(self):
+        return "complement(%s)" % self._loc
+    
+    def shift(self,s):
+        assert (self.getBegin() + s) > 0,"shift to large (%d)" % s 
+        if s == 0:
+            return self
+        return ComplementLocation(self._loc.shift(s))
+
+    def _getglocpos(self):
+        return self._loc._getglocpos()
+
+    def getBegin(self):
+        return self._loc.getBegin()
+    
+    def getEnd(self):
+        return self._loc.getEnd()
+
+    def getFivePrime(self):
+        return self.getEnd()
+    
+    def getThreePrime(self):
+        return self.getBegin()
+    
+    
+    begin = property(getBegin,None,None,"beginning position of the location")
+    end = property(getEnd,None,None,"ending position of the location")
+    fivePrime=property(getFivePrime,None,None,"5' potisition of the location")
+    threePrime=property(getThreePrime,None,None,"3' potisition of the location")
+
+    
+                                    #
+                                    # Internal functions used for location parsing
+                                    #
+    
+def __sublocationIterator(text):
+    sl = []
+    plevel=0
+    for c in text:
+        assert plevel>=0,"Misformated location : %s" % text
+        if c == '(':
+            plevel+=1
+            sl.append(c)
+        elif c==')':
+            plevel-=1
+            sl.append(c)
+        elif c==',' and plevel == 0:
+            assert sl,"Misformated location : %s" % text
+            yield ''.join(sl)
+            sl=[]
+        else:
+            sl.append(c)
+    assert sl and plevel==0,"Misformated location : %s" % text
+    yield ''.join(sl)
+    
+
+    
+                                    #
+                                    # Internal functions used for location parsing
+                                    #
+
+__simplelocparser = re.compile('(?P<before><?)(?P<from>[0-9]+)(\.\.(?P<after>>?)(?P<to>[0-9]+))?')
+
+    
+def __locationParser(text):
+    text=text.strip()
+    if text[0:5]=='join(':
+        assert text[-1]==')',"Misformated location : %s" % text
+        return CompositeLocation(__locationParser(sl) for sl in __sublocationIterator(text[5:-1]))
+    elif text[0:11]=='complement(':
+        assert text[-1]==')',"Misformated location : %s" % text
+        subl = tuple(__locationParser(sl) for sl in __sublocationIterator(text[11:-1]))
+        if len(subl)>1:
+            subl = CompositeLocation(subl)
+        else:
+            subl = subl[0]
+        return ComplementLocation(subl)
+    else:
+        data = __simplelocparser.match(text)
+        assert data is not None,"Misformated location : %s" % text
+        data = data.groupdict()
+        if not data['to'] :
+            sl = PointLocation(int(data['from']))
+        else:
+            sl = SimpleLocation(int(data['from']),int(data['to']))
+        sl.before=data['before']=='<'
+        sl.after=data['after']=='>'
+        return sl
+            
+def locationGenerator(locstring):
+    '''
+    Parse a location string as present in genbank or embl file.
+    
+    @param locstring: string description of the location in embl/gb format
+    @type locstring: str
+    
+    @return: a Location instance
+    @rtype: C{Location} subclass instance
+    '''
+    return __locationParser(locstring)
+
+
+_matchExternalRef = re.compile('[A-Za-z0-9_|]+(\.[0-9]+)?(?=:)')
+
+def extractExternalRefs(locstring):
+    '''
+    When a location describe external references (ex: D28156.1:1..>1292)
+    separate the external reference part of the location and the location
+    by itself.
+    
+    @param locstring: text representation of the location.
+    @type locstring: str
+    
+    @return: a tuple with a set of string describing accession number
+             of the referred sequences and a C{Location} instance.
+            
+    @rtype: tuple(set,Location)
+    '''
+    m = set(x.group() for x in _matchExternalRef.finditer(locstring))
+    clean = re.compile(':|'.join([re.escape(x) for x in m])+':')
+    cloc = locationGenerator(clean.sub('',locstring))
+        
+    return m,cloc
+
+
+    
+    
+    
--- a/obitools/location/init.pyc
+++ b/obitools/location/init.pyc
--- a/obitools/location/feature.py
+++ b/obitools/location/feature.py
@ -0,0 +1,177 @@
+from obitools.location import Location,locationGenerator
+import logging
+import re
+
+
+        
+        
+_featureMatcher = re.compile('^(FT|  )   [^ ].+\n((FT|  )    .+\n)+',re.M)
+_featureCleaner = re.compile('^FT',re.M)
+
+
+def textFeatureIterator(fttable):
+    '''
+    Iterate through a textual description of a feature table in a genbank
+    or embl format. Return at each step a text representation of each individual
+    feature composing the table.
+    
+    @param fttable:  a string corresponding to the feature table of a genbank
+                     or an embl entry
+                      
+    @type fttable: C{str}
+    
+    @return: an iterator on str
+    @rtype: iterator
+    
+    @see: L{ftParser}
+    '''
+    for m in _featureMatcher.finditer(fttable):
+        t = m.group()
+        t = _featureCleaner.sub('  ',t)
+        yield t
+        
+_qualifierMatcher = re.compile('(?<=^ {21}/).+(\n {21}[^/].+)*',re.M)
+_qualifierCleanner= re.compile("^ +",re.M)
+        
+def qualifierIterator(qualifiers):
+    '''
+    Parse a textual description of a feature in embl or genbank format
+    as returned by the textFeatureIterator iterator and iterate through 
+    the key, value qualified defining this location.
+     
+    @param qualifiers: substring containing qualifiers
+    @type qualifiers: str
+    
+    @return: an iterator on tuple (key,value), where keys are C{str}
+    @rtype: iterator
+    '''
+    for m in _qualifierMatcher.finditer(qualifiers):
+        t = m.group()
+        t = _qualifierCleanner.sub('',t)
+        t = t.split('=',1)
+        if len(t)==1:
+            t = (t[0],None)
+        else:
+            if t[0]=='translation':
+                value = t[1].replace('\n','')
+            else:
+                value = t[1].replace('\n',' ')
+            try:
+                value = eval(value)
+            except:
+                pass
+            t = (t[0],value)
+        yield t
+    
+     
+_ftmatcher = re.compile('(?<=^ {5})\S+')
+_locmatcher= re.compile('(?<=^.{21})[^/]+',re.DOTALL)
+_cleanloc  = re.compile('[\s\n]+')
+_qualifiersMatcher = re.compile('^ +/.+',re.M+re.DOTALL)
+
+def ftParser(feature):
+    fttype = _ftmatcher.search(feature).group()
+    location=_locmatcher.search(feature).group()
+    location=_cleanloc.sub('',location)
+    qualifiers=_qualifiersMatcher.search(feature)
+    if qualifiers is not None:
+        qualifiers=qualifiers.group()
+    else:
+        qualifiers=""
+        logging.debug("Qualifiers regex not matching on \n=====\n%s\n========" % feature)
+        
+    return fttype,location,qualifiers
+
+
+class Feature(dict,Location):
+    def __init__(self,type,location):
+        self._fttype=type
+        self._loc=location
+
+    def getFttype(self):
+        return self._fttype
+
+        
+    def extractSequence(self,sequence,withQualifier=False):
+        seq = self._loc.extractSequence(sequence)
+        if withQualifier:
+            seq.getInfo().update(self)
+        return seq
+    
+    def isDirect(self):
+        return self._loc.isDirect()
+
+    def isSimple(self):
+        return self._loc.isSimple()
+    
+    def isFullLength(self):
+        return self._loc.isFullLength()
+
+    def simplify(self):
+        f = Feature(self._fttype,self._loc.simplify())
+        f.update(self)
+        return f
+    
+    def locStr(self):
+        return str(self._loc)
+    
+    def needNucleic(self):
+        return self._loc.needNucleic()
+    
+    def __str__(self):
+        return repr(self)
+    
+    def __repr__(self):
+        return str((self.ftType,str(self._loc),dict.__repr__(self)))
+    
+    def __cmp__(self,y):
+        return self._loc.__cmp__(y)
+    
+    def _getglocpos(self):
+        return self._loc._getglocpos()
+
+    ftType = property(getFttype, None, None, "Feature type name")
+
+    def shift(self,s):
+        assert (self.getBegin() + s) > 0,"shift to large (%d)" % s 
+        if s == 0:
+            return self
+        f = Feature(self._fttype,self._loc.shift(s))
+        f.update(self)
+        return f
+
+    
+    def getBegin(self):
+        return self._loc.getBegin()
+    
+    def getEnd(self):
+        return self._loc.getEnd()
+    
+    begin = property(getBegin,None,None,"beginning position of the location")
+    end = property(getEnd,None,None,"ending position of the location")
+    
+
+def featureFactory(featureDescription):
+    fttype,location,qualifiers = ftParser(featureDescription)
+    location = locationGenerator(location)
+    feature = Feature(fttype,location)
+    feature.raw  = featureDescription
+
+    for k,v in qualifierIterator(qualifiers):
+        feature.setdefault(k,[]).append(v)
+        
+    return feature
+        
+def featureIterator(featureTable,skipError=False):
+    for tft in textFeatureIterator(featureTable):
+        try:
+            feature = featureFactory(tft)
+        except AssertionError,e:
+            logging.debug("Parsing error on feature :\n===============\n%s\n===============" % tft)
+            if not skipError:
+                raise e
+            logging.debug("\t===> Error skipped")
+            continue
+            
+        yield feature
+        
--- a/obitools/metabarcoding/init.py
+++ b/obitools/metabarcoding/init.py
@ -0,0 +1,265 @@
+from obitools.ecopcr.options import addTaxonomyFilterOptions,\
+                                    loadTaxonomyDatabase
+from obitools.graph import UndirectedGraph
+from obitools.align import lenlcs,isLCSReachable
+from obitools.graph.algorithms.component import componentIterator
+from obitools.utils.bioseq import uniqSequence
+from obitools.utils import progressBar
+import math
+import sys
+from obitools.graph.rootedtree import RootedTree
+
+def average(x):
+    x=list(x)
+    s = sum(i*j for (i,j) in x)
+    n = sum(i[1] for i in x)
+    return (float(s)/float(n),n)
+
+def minimum(x):
+    x=list(x)
+    m = min(i[0] for i in x)
+    n = sum(i[1] for i in x)
+    return (float(m),n)
+
+def ecoPCRReader(entries,options):
+    
+    taxonomy = loadTaxonomyDatabase(options) 
+       
+    norankid =options.taxonomy.findRankByName('no rank')
+    speciesid=options.taxonomy.findRankByName('species')
+    genusid  =options.taxonomy.findRankByName('genus')
+    familyid =options.taxonomy.findRankByName('family')
+    
+    minrankseq = set([speciesid,genusid,familyid])
+    
+    usedrankid   = {}
+    
+    ingroup = []
+    outgroup= []
+    
+    for s in entries:
+        if 'taxid' in s :
+            taxid = s['taxid']
+            if taxid in taxonomy:
+                allrank = set()
+                for p in options.taxonomy.parentalTreeIterator(taxid):
+                    if p[1]!=norankid:
+                        allrank.add(p[1])
+                if len(minrankseq & allrank) == 3:
+                    for r in allrank:
+                        usedrankid[r]=usedrankid.get(r,0) + 1
+                    
+                    if taxonomy.isAncestor(options.ingroup,taxid):
+                        ingroup.append(s)
+                    else:
+                        outgroup.append(s)
+                        
+    keptrank = set(r for r in usedrankid 
+                   if float(usedrankid[r])/float(len(ingroup)) > options.rankthresold)
+                        
+    return { 'ingroup' : ingroup,
+             'outgroup': outgroup,
+             'ranks'   : keptrank
+           }
+                
+def buildSimilarityGraph(dbseq,ranks,taxonomy,dcmax=5):
+    
+    ldbseq = len(dbseq)
+    pos = 1
+    digit = int(math.ceil(math.log10(ldbseq)))
+    header = "Alignment  : %%0%dd x %%0%dd -> %%0%dd " % (digit,digit,digit)
+    aligncount = ldbseq*(ldbseq+1)/2
+    edgecount = 0
+    print >>sys.stderr
+
+    progressBar(1,aligncount,True,"Alignment  : %s x %s -> %s " % ('-'*digit,'-'*digit, '0'*digit))
+    
+        
+    sim = UndirectedGraph()
+    
+    i=0
+    for s in dbseq:
+        taxid = s['taxid']
+        
+        rtaxon = dict((rid,taxonomy.getTaxonAtRank(taxid,rid))
+                      for rid in ranks)
+        
+        sim.addNode(i, seq=s,taxid=taxid,rtaxon=rtaxon) 
+        
+        i+=1
+
+#    aligner = LCS()
+        
+    for is1 in xrange(ldbseq):
+        s1 = dbseq[is1]
+        ls1= len(s1)
+#        aligner.seqA=s1
+        
+        for is2 in xrange(is1+1,ldbseq):
+
+            s2=dbseq[is2]
+            ls2=len(s2)
+            
+            lm = max(ls1,ls2)
+            lcsmin = lm - dcmax
+            
+            if isLCSReachable(s1,s2,lcsmin):
+                llcs,lali=lenlcs(s1,s2)
+                ds1s2 = lali - llcs
+                
+                if ds1s2 <= dcmax:
+                    sim.addEdge(node1=is1, node2=is2,ds1s2=ds1s2,label=ds1s2)
+                    edgecount+=1
+                    
+        progressBar(pos,aligncount,head=header % (is1,is2,edgecount))
+        pos+=(ldbseq-is1-1)
+        
+    return sim
+            
+def buildTsr(component):
+    '''
+    Build for each consider taxonomic rank the list of taxa
+    present in the connected component
+    
+    :param component: the analyzed connected component
+    :type component: :py:class:`UndirectedGraph`
+    
+    :return: a dictionary indexed by rankid containing a `dict` indexed by taxid and containing count of sequences for this taxid
+    :rtype: `dict` indexed by `int` containing `dict` indexed by `int` and containing of `int`
+     
+    '''
+    taxalist = {}
+    for n in component:
+        for r in n['rtaxon']:
+            rtaxid = n['rtaxon'][r]
+            if rtaxid is not None:
+                ts =  taxalist.get(r,{})
+                ts[rtaxid]=ts.get(rtaxid,0)+1
+                taxalist[r]=ts
+        
+    return taxalist
+    
+def edgeDistSelector(dcmax):
+    def predicate(e):
+        return e['ds1s2'] <= dcmax
+    return predicate
+
+def distanceOfConfusion(simgraph,dcmax=5,aggregate=average):
+    
+    alltaxa = set()
+    
+    for n in simgraph:
+        alltaxa|=set(n['rtaxon'].values())
+        
+    taxacount = len(alltaxa)
+    
+    result = {}
+
+    pos = [1]
+    header = "Component  : %-5d Identified : %-8d "
+    progressBar(1,taxacount,True,header % (0,0))
+       
+    def _idc(cc,dcmax):
+        composante=[]
+        for x in cc:
+            composante.extend(simgraph.subgraph(c)
+                              for c in componentIterator(x, 
+                                                         edgePredicat=edgeDistSelector(dcmax)))
+
+        good = set()
+        bad  = {}
+        
+        complexe = []
+        
+        for c in composante:       
+            tsr = buildTsr(c)
+            newbad=False
+            for r in tsr:
+                if len(tsr[r]) == 1:
+                    taxid = tsr[r].keys()[0]
+                    good.add((taxid,tsr[r][taxid]))
+                else:
+                    newbad=True
+                    for taxid in tsr[r]:
+                        bad[taxid]=bad.get(taxid,0)+tsr[r][taxid]
+            if newbad:
+                complexe.append(c)
+                                        
+#       good = good - bad
+            
+        for taxid,weight in good:
+            if taxid not in result:
+                result[taxid]=[]
+            result[taxid].append((dcmax+1,weight))
+            
+
+            progressBar(pos[0],taxacount,False,header % (len(composante),pos[0]))
+            pos[0]=len(result)
+                
+        if dcmax > 0:
+            dcmax-=1
+            _idc(complexe,dcmax)
+            
+        else:
+            for taxid in bad:
+                if taxid not in result:
+                    result[taxid]=[]
+                result[taxid].append((0,bad[taxid]))                
+
+                progressBar(pos[0],taxacount,False,header % (len(composante),pos[0]))
+                pos[0]=len(result)
+                
+    _idc([simgraph],dcmax)
+    
+    for taxid in result:
+        result[taxid]=aggregate(result[taxid])
+    return result
+
+def propagateDc(tree,node=None,aggregate=min):
+    if node is None:
+        node = tree.getRoots()[0]
+    dca=aggregate(n['dc'] for n in node.leavesIterator())
+    node['dc']=dca
+    for n in node:
+        propagateDc(tree, n, aggregate)
+
+def confusionTree(distances,ranks,taxonomy,aggregate=min,bsrank='species',dcmax=1):
+    
+    def Bs(node,rank,dcmax):
+        n = len(node)
+        if n:
+            g = [int(x['dc']>=dcmax) for x in node.subgraphIterator() if x['rank']==bsrank]
+            n = len(g)
+            g = sum(g)
+            bs= float(g)/float(n)
+            node['bs']=bs
+            node['bs_label']="%3.2f (%d)" % (bs,n)
+            
+            for n in node:
+                Bs(n,rank,dcmax)
+                
+    tree = RootedTree()
+    ranks = set(ranks)
+    tset = set(distances)
+    
+    for taxon in distances:
+        tree.addNode(taxon, rank=taxonomy.getRank(taxon),
+                       name=taxonomy.getScientificName(taxon),
+                       dc=float(distances[taxon][0]),
+                       n=distances[taxon][1],
+                       dc_label="%4.2f (%d)" % (float(distances[taxon][0]),distances[taxon][1])
+                    )
+
+    for taxon in distances:
+        piter = taxonomy.parentalTreeIterator(taxon)
+        taxon = piter.next()
+        for parent in piter:
+            if taxon[0] in tset and parent[0] in distances:
+                tset.remove(taxon[0])
+                tree.addEdge(parent[0], taxon[0])
+                taxon=parent
+                
+    root = tree.getRoots()[0]
+    Bs(root,bsrank,dcmax)
+    
+    return tree
--- a/obitools/metabarcoding/options.py
+++ b/obitools/metabarcoding/options.py
@ -0,0 +1,34 @@
+'''
+Created on 30 oct. 2011
+
+@author: coissac
+'''
+
+from obitools.ecopcr.options import addTaxonomyDBOptions
+
+
+def addMetabarcodingOption(optionManager):
+    
+    addTaxonomyDBOptions(optionManager)
+    
+    optionManager.add_option('--dcmax',
+                             action="store", dest="dc",
+                             metavar="###",
+                             type="int",
+                             default=0,
+                             help="Maximum confusion distance considered")
+    
+    optionManager.add_option('--ingroup',
+                             action="store", dest="ingroup",
+                             metavar="###",
+                             type="int",
+                             default=1,
+                             help="ncbi taxid delimitation the in group")
+
+    optionManager.add_option('--rank-thresold',
+                             action="store", dest="rankthresold",
+                             metavar="#.##",
+                             type="float",
+                             default=0.5,
+                             help="minimum fraction of the ingroup sequences "
+                                  "for concidering the rank")
--- a/obitools/obischemas/init.py
+++ b/obitools/obischemas/init.py
@ -0,0 +1,28 @@
+from obitools.obischemas import kb
+__connection__ = None
+
+def initConnection(options):
+    global __connection__
+    param = {}
+    if hasattr(options, "dbname") and options.dbname is not None:
+        param["database"]=options.dbname
+    if hasattr(options, "dbhost") and options.dbhost is not None:
+        param["host"]=options.dbhost
+    if hasattr(options, "dbuser") and options.dbuser is not None:
+        param["username"]=options.dbuser
+    if hasattr(options, "dbpassword") and options.dbpassword is not None:
+        param["password"]=options.dbpassword
+        
+    __connection__=kb.getConnection(**param)
+    __connection__.autocommit=options.autocommit
+
+def getConnection(options=None):
+    global __connection__
+
+    if options is not None:
+        initConnection(options)
+    
+    assert __connection__ is not None,"database connection is not initialized"
+        
+    return __connection__
+   
--- a/obitools/obischemas/kb/init.py
+++ b/obitools/obischemas/kb/init.py
@ -0,0 +1,55 @@
+"""
+    kb package is devoted to manage access to postgresql database from python
+    script
+"""
+
+
+class Connection(object):
+
+    def __init__(self):
+        raise RuntimeError('pyROM.KB.Connection is an abstract class')
+
+    def cursor(self):
+        raise RuntimeError('pyROM.KB.Connection.cursor is an abstract function')
+
+    def commit(self):
+        raise RuntimeError('pyROM.KB.Connection.commit is an abstract function')
+
+    def rollback(self):
+        raise RuntimeError('pyROM.KB.Connection.rollback is an abstract function')
+        
+    def __call__(self,query):
+        return self.cursor().execute(query)
+    
+
+class Cursor(object):
+
+    def __init__(self,db):
+        raise RuntimeError('pyROM.KB.Cursor is an abstract class')
+
+    def execute(self,query):
+        raise RuntimeError('pyROM.KB.Cursor.execute is an abstract function')
+        
+    __call__=execute
+    
+    
+_current_connection = None  # Static variable used to store connection to KB
+
+def getConnection(*args,**kargs):
+    """
+        return a connection to the database.
+        When call from database backend no argument are needed.
+        All connection returned by this function 
+    """
+    global _current_connection
+    
+    if _current_connection==None or args or kargs :
+        try:
+            from obischemas.kb import backend
+            _current_connection = backend.Connection()
+        except ImportError:
+            from obischemas.kb import extern
+            _current_connection = extern.Connection(*args,**kargs)
+    return _current_connection
+    
+ 
--- a/obitools/obischemas/kb/extern.py
+++ b/obitools/obischemas/kb/extern.py
@ -0,0 +1,78 @@
+"""
+Module : KB.extern
+Author : Eric Coissac
+Date   : 03/05/2004
+
+Module wrapping psycopg interface module to allow connection
+to a postgresql databases with the same interface from
+backend and external script.
+
+This module define a class usable from external script 
+"""
+
+
+import psycopg2
+import sys
+from obischemas import kb
+
+class Connection(kb.Connection):
+
+    def __init__(self,*connectParam,**kconnectParam):
+        if connectParam:
+            self.connectParam=={'dsn':connectParam}
+        else:
+            self.connectParam=kconnectParam 
+        print self.connectParam
+        self.db = psycopg2.connect(**(self.connectParam))
+
+    def restart(self):
+	ok=1
+	while (ok and ok < 1000):
+	  try:	
+	    self.db = psycopg2.connect(**self.connectParam)
+	  except:
+            ok+=1
+	  else:
+            ok=0
+
+	    
+    def cursor(self):
+        curs = Cursor(self.db)
+        if hasattr(self,'autocommit') and self.autocommit:
+            curs.autocommit = self.autocommit
+        return curs
+    
+    def commit(self):
+        self.db.commit()
+
+    def rollback(self):
+        if hasattr(self,'db'):
+            self.db.rollback()
+
+    def __del__(self):
+        if hasattr(self,'db'):
+            self.rollback()
+        
+class Cursor(kb.Cursor):
+
+    def __init__(self,db):
+	self.db   = db
+        self.curs = db.cursor()
+        
+    def execute(self,query):
+        try:
+            self.curs.execute(query)
+            if hasattr(self,'autocommit') and self.autocommit:
+                self.db.commit()
+        except psycopg2.ProgrammingError,e:
+            print >>sys.stderr,"===> %s" % query
+            raise e
+        except psycopg2.IntegrityError,e:
+            print >>sys.stderr,"---> %s" % query
+            raise e
+        try:
+           label = [x[0] for x in self.curs.description]
+           return [dict(map(None,label,y))
+                   for y in self.curs.fetchall()]
+        except TypeError:
+           return []
--- a/obitools/obischemas/options.py
+++ b/obitools/obischemas/options.py
@ -0,0 +1,31 @@
+def addConnectionOptions(optionManager):
+    
+    optionManager.add_option('-d','--dbname',
+                             action="store", dest="dbname",
+                             metavar="<DB NAME>",
+                             type="string",
+                             help="OBISchema database name containing"
+                                  "taxonomical data")
+
+    optionManager.add_option('-H','--host',
+                             action="store", dest="dbhost",
+                             metavar="<DB HOST>",
+                             type="string",
+                             help="host hosting OBISchema database")
+
+    optionManager.add_option('-U','--user',
+                             action="store", dest="dbuser",
+                             metavar="<DB USER>",
+                             type="string",
+                             help="user for OBISchema database connection")
+
+    optionManager.add_option('-W','--password',
+                             action="store", dest="dbpassword",
+                             metavar="<DB PASSWORD>",
+                             type="string",
+                             help="password for OBISchema database connection")
+
+    optionManager.add_option('-A','--autocommit',
+                             action="store_true",dest="autocommit",
+                             default=False,
+                             help="add commit action after each query")
--- a/obitools/obo/init.py
+++ b/obitools/obo/init.py
--- a/obitools/obo/go/init.py
+++ b/obitools/obo/go/init.py
--- a/obitools/obo/go/parser.py
+++ b/obitools/obo/go/parser.py
@ -0,0 +1,53 @@
+from obitools.obo.parser import OBOTerm
+from obitools.obo.parser import OBOEntry
+from obitools.obo.parser import stanzaIterator
+from logging import debug
+
+class GOEntry(OBOEntry):
+    '''
+       An entry of a GeneOntology .obo file. It can be a header (without a stanza name) or
+       a stanza (with a stanza name between brackets). It inherits from the class dict.
+    '''
+        
+   
+class GOTerm(OBOTerm):
+  
+    '''
+       A stanza named 'Term'. It inherits from the class OBOTerm.
+    '''
+    
+    def __init__(self,stanza):
+        
+        ## use of the OBOEntry constructor.
+        OBOTerm.__init__(self, stanza)
+        
+        assert 'namespace' in self and len(self['namespace'])==1, "An OBOTerm must belong to one of the cell_component, molecular_function or biological_process namespace"
+    
+    
+def GOEntryFactory(stanza):
+    '''
+    Dispatcher of stanza.
+    
+    @param stanza: a stanza composed of several lines.
+    @type stanza: text
+    
+    @return: an C{OBOTerm} | C{OBOEntry} instance
+    
+    @note: The dispatcher treats differently the stanza which are OBO "Term"
+    and the others.
+    '''
+    
+    stanzaType = OBOEntry.parseStanzaName(stanza)
+    
+    if stanzaType=="Term":
+        return GOTerm(stanza)
+    else:
+        return OBOEntry(stanza)
+    
+    
+def GOEntryIterator(file):
+    entries =  stanzaIterator(file)
+    for e in entries:
+        debug(e)
+        yield GOEntryFactory(e)
+        
--- a/obitools/obo/parser.py
+++ b/obitools/obo/parser.py
@ -0,0 +1,707 @@
+from obitools.utils import skipWhiteLineIterator,multiLineWrapper
+from obitools.utils import universalOpen
+from obitools.format.genericparser import genericEntryIteratorGenerator
+from logging import debug,warning
+
+import re
+
+
+#################################################################################
+##                           Stanza preparation area                           ##
+#################################################################################
+
+
+class FileFormatError(Exception):
+    '''
+       An error derived from the class Exception.
+    ''' 
+    pass
+
+_oboEntryIterator = genericEntryIteratorGenerator(endEntry='^ *$',
+                                                  strip=True)
+
+def stanzaIterator(inputfile):
+    '''
+    Iterator of stanza. The stanza are the basic units of OBO files.
+    
+    @param inputfile: a stream of strings from an opened OBO file.
+    @type inputfile: a stream of strings
+    
+    @return: a stream of stanza
+    @rtype: a stream of aggregated strings
+    
+    @note: The iterator constructs stanza by aggregate strings from the
+    OBO file.
+    '''
+    inputfile = universalOpen(inputfile)
+    inputfile = multiLineWrapper(inputfile)
+    return _oboEntryIterator(inputfile)
+    
+    
+
+#################################################################################
+##                      Trailing Modifiers treatment area                      ##
+#################################################################################
+
+
+class TrailingModifier(dict):
+    '''
+       A class object which inherits from the class dict. Trailing modifiers can be found
+       at the end of TaggedValue objects when they exist.
+    '''
+    
+    _match_brace = re.compile('(?<=\ {)[^\]]*(\}) *( !|$)')
+
+    def __init__(self,string):
+        
+        ## search for trailing modifiers signals
+        trailing_modifiers = TrailingModifier._match_brace.search(string)
+        
+        ## the trailing modifiers exist
+        if trailing_modifiers:
+            trailing_modifiers=trailing_modifiers.group(0).strip()
+            print trailing_modifiers
+            ## creates and feeds the dictionary of trailing modifiers
+            dict.__init__(self,(x.strip().split('=',1) for x in trailing_modifiers.split(',')))
+            
+        
+def trailingModifierFactory(string):
+    '''
+    Dispatcher of trailing modifiers.
+    
+    @param string: a string from a TaggedValue object with a trailing modifiers signal.
+    @type string: string
+    
+    @return: a class object
+    
+    @note: The dispatcher is currently very simple. Only one case is treated by the function.
+    `the function returns a class object inherited from the class dict if the trailing modifiers
+    exist, None if they don't.
+    '''
+    
+    trailing_modifiers = TrailingModifier(string)
+    if not trailing_modifiers:
+        trailing_modifiers=None
+    return trailing_modifiers
+
+
+#################################################################################
+##                          TaggedValue treatment area                         ##
+#################################################################################
+
+
+class TaggedValue(object):
+    '''
+       A couple 'tag:value' of an OBOEntry.
+    ''' 
+
+    _match_value   = re.compile('(("(\\\\"|[^\"])*")|(\\\\"|[^\"]))*?( !| {|$)')
+    _split_comment = re.compile('^!| !')
+    _match_quotedString = re.compile('(?<=")(\\\\"|[^\"])*(?=")')
+    _match_bracket = re.compile('\[[^\]]*\]')
+
+    def __init__(self,line):
+        '''
+        Constructor of the class TaggedValue.
+        
+        @param line: a line of an OBOEntry composed of a tag and a value.
+        @type line: string
+        
+        @note: The constructor separates tags from right terms. 'value' is extracted 
+        from right terms using a regular expression (value is at the beginning of the
+        string, between quotes or not). Then, 'comment' is extracted from the rest of the 
+        string using another regular expression ('comment' is at the end of the string 
+        after a '!'. By default, 'comment' is set to None). Finally, 'trailing_modifiers'
+        are extracted from the last string using another regular expression.
+        The tag, the value, the comment and the trailing_modifiers are saved.
+        '''
+        
+        debug("tagValueParser : %s" % line)
+
+        ## by default :
+        trailing_modifiers = None
+        comment = None
+        
+        ## the tag is saved. 'right' is composed of the value, the comment and the trailing modifiers
+        tag,rigth = line.split(':',1)
+        
+        ## the value is saved
+        value = TaggedValue._match_value.search(rigth).group(0)
+        debug("Extracted value : %s" % value)
+        
+        ## if there is a value AND a sign of a comment or trailing modifiers
+        if value and value[-1] in '!{':
+            lvalue = len(value)
+            ## whatever it is a comment or trailing modifiers, it is saved into 'extra'
+            extra = rigth[lvalue-1:].strip()
+            ## a comment is extracted
+            extra =TaggedValue._split_comment.split(extra,1)
+            ## and saved if it exists
+            if len(extra)==2:
+                comment=extra[1].strip()
+            ## trailing modifiers are extracted
+            extra=extra[0]
+            trailing_modifiers = trailingModifierFactory(extra)
+            ## the value is cleaned of any comment or trailing modifiers signals
+            value = value[0:-1]
+            
+        if tag=='use_term':
+            tag='consider'
+            raise DeprecationWarning,"user_term is a deprecated tag, you should instead use consider"
+        
+        ## recording zone
+        self.value  =value.strip()
+        self.tag    = tag
+        self.__doc__=comment
+        self.trailing_modifiers=trailing_modifiers
+        
+    def __str__(self):
+        return str(self.value)
+    
+    def __repr__(self):
+        return '''"""%s"""''' % str(self)
+
+
+class NameValue(TaggedValue):
+    '''
+       A couple 'name:value' inherited from the class TaggedValue. Used to manage name tags.
+    ''' 
+    
+    def __init__(self,line):
+        
+        ## no use of the TaggedValue constructor. The NameValue is very simple.
+        tag,rigth = line.split(':',1)
+        
+        ## recording zone
+        self.value = rigth.strip()
+        self.tag = 'name'
+        self.__doc__=None
+        self.trailing_modifiers=None
+        
+        
+        
+class DefValue(TaggedValue):
+    '''
+       A couple 'def:value' inherited from the class TaggedValue. Used to manage def tags.
+    ''' 
+    
+    def __init__(self,line):
+        '''
+        Constructor of the class DefValue.
+        
+        @param line: a line of an OBOEntry composed of a tag named 'def' and a value.
+        @type line: string
+        
+        @note: The constructor calls the TaggedValue constructor. A regular expression 
+        is used to extract the 'definition' from TaggedValue.value (definition is a not 
+        quoted TaggedValue.value). A regular expression is used to extract 'dbxrefs' 
+        from the aggedValue.value without the definition (dbxrefs are between brackets
+        and definition can be so). Definition is saved as the new value of the DefValue.
+        dbxrefs are saved.
+        '''
+        
+        ## use of the TaggedValue constructor
+        TaggedValue.__init__(self, line)
+        
+        ## definition, which is quoted, is extracted from the standard value of a TaggedValue.
+        definition = TaggedValue._match_quotedString.search(self.value).group(0)
+        
+        ## the standard value is cleaned of the definition.
+        cleanvalue = self.value.replace(definition,'')
+        cleanvalue = cleanvalue.replace('  ',' ')
+        
+        ## dbxrefs are searched into the rest of the standard value.
+        dbxrefs    = TaggedValue._match_bracket.search(cleanvalue).group(0)
+        
+        ## recording zone
+        self.tag = 'def'
+        ## the value of a DefValue is not the standard value but the definition.
+        self.value=definition
+        self.dbxrefs=xrefFactory(dbxrefs)
+        
+        
+class SynonymValue(TaggedValue):
+    '''
+       A couple 'synonym:value' inherited from the class TaggedValue. Used to manage 
+       synonym tags, exact_synonym tags, broad_synonym tags and narrow_synonym tags.
+    ''' 
+    
+    _match_scope = re.compile('(?<="")[^\[]*(?=\[|$)')
+    
+    def __init__(self,line):
+        '''
+        Constructor of the class SynonymValue.
+        
+        @param line: a line of an OBOEntry composed of a tag named 'synonym' or
+        'exact_synonym' or 'broad_synonym' or 'narrow_synonym' and a value.
+        @type line: string
+        
+        @note: SynonymValue is composed of a tag, a value, a scope, a list of types and 
+        dbxrefs.
+        The constructor calls the TaggedValue constructor. A regular expression 
+        is used to extract 'definition' from TaggedValue.value (definition is a not 
+        quoted TaggedValue.value). Definition is saved as the new value of the class
+        SynonymValue.
+        A regular expression is used to extract 'attributes' from the rest of the
+        string. Attributes may contain an optional synonym scope and an optional list 
+        of synonym types. The scope is extracted from attributes or set by default to
+        'RELATED'. It is saved as the scope of the class. The types are the rest of the 
+        attributes and are saved as the list of types of the class.
+        For deprecated tags 'exact_synonym', 'broad_synonym' and 'narrow_synonym', tag
+        is set to 'synonym' and scope is set respectively to 'EXACT', 'BROAD' and 'NARROW'.
+        A regular expression is used to extract 'dbxrefs' from the TaggedValue.value 
+        without the definition (dbxrefs are between brackets and definition can be so).
+        dbxrefs are saved.
+        '''
+        
+        ## use of the TaggedValue constructor
+        TaggedValue.__init__(self, line)
+        
+        ## definition, which is quoted, is extracted from the standard value of a TaggedValue.
+        definition = TaggedValue._match_quotedString.search(self.value).group(0)
+        
+        ## the standard value is cleaned of the definition.
+        cleanvalue = self.value.replace(definition,'')
+        cleanvalue = cleanvalue.replace('  ',' ')
+        
+        ## 1) attributes are searched into the rest of the standard value.
+        ## 2) then they are stripped.
+        ## 3) then they are split on every ' '.
+        ## 4) finally they are ordered into a set.
+        attributes = set(SynonymValue._match_scope.search(cleanvalue).group(0).strip().split())
+        
+        ## the scopes are the junction between the attributes and a set of specific terms. 
+        scopes     = attributes & set(['RELATED','EXACT','BROAD','NARROW'])
+        
+        ## the types are the rest of the attributes.
+        types      = attributes - scopes
+        
+        ## this is a constraint of the OBO format
+        assert len(scopes)< 2,"Only one synonym scope allowed"
+        
+        ## the scope of the SynonymValue is into scopes or set by default to RELATED
+        if scopes:
+            scope = scopes.pop()
+        else:
+            scope = 'RELATED'
+        
+        ## Specific rules are defined for the following tags :    
+        if self.tag == 'exact_synonym':
+            raise DeprecationWarning,'exact_synonym is a deprecated tag use instead synonym tag'
+            self.tag   = 'synonym'
+            scope = 'EXACT'
+            
+        if self.tag == 'broad_synonym':
+            raise DeprecationWarning,'broad_synonym is a deprecated tag use instead synonym tag'
+            self.tag   = 'synonym'
+            scope = 'BROAD'
+            
+        if self.tag == 'narrow_synonym':
+            raise DeprecationWarning,'narrow_synonym is a deprecated tag use instead synonym tag'
+            self.tag   = 'synonym'
+            scope = 'NARROW'
+            
+        if self.tag == 'systematic_synonym':
+            #raise DeprecationWarning,'narrow_synonym is a deprecated tag use instead sysnonym tag'
+            self.tag   = 'synonym'
+            scope = 'SYSTEMATIC'
+        
+        ## this is our own constraint. deprecated tags are not saved by this parser.    
+        assert self.tag =='synonym',"%s synonym type is not managed" % self.tag
+        
+        ## dbxrefs are searched into the rest of the standard value.
+        dbxrefs    = TaggedValue._match_bracket.search(cleanvalue).group(0)
+        
+        ## recording zone
+        ## the value of a SynonymValue is not the standard value but the definition.
+        self.value   = definition
+        self.dbxrefs = xrefFactory(dbxrefs)
+        self.scope   = scope
+        self.types   = list(types)
+        
+    def __eq__(self,b):
+        return ((self.value==b.value) and (self.dbxrefs==b.dbxrefs) 
+                and (self.scope==b.scope) and (self.types==b.types)
+                and (self.__doc__==b.__doc__) and (self.tag==b.tag)
+                and (self.trailing_modifiers==b.trailing_modifiers))
+        
+    def __hash__(self):
+        return (reduce(lambda x,y:x+y,(hash(z) for z in [self.__doc__,
+                                                         self.value,
+                                                         frozenset(self.dbxrefs),
+                                                         self.scope,
+                                                         frozenset(self.types),
+                                                         self.tag,
+                                                         self.trailing_modifiers]),0)) % (2**31)
+        
+        
+class XrefValue(TaggedValue):
+    '''
+       A couple 'xref:value' inherited from the class TaggedValue. Used to manage 
+       xref tags.
+    ''' 
+    
+    def __init__(self,line):
+        
+        ## use of the TaggedValue constructor
+        TaggedValue.__init__(self, line)
+        
+        ## use the same function as the dbxrefs
+        self.value=xrefFactory(self.value)
+        
+        if self.tag in ('xref_analog','xref_unk'):
+            raise DeprecationWarning,'%s is a deprecated tag use instead sysnonym tag' % self.tag
+            self.tag='xref'
+        
+        ## this is our own constraint. deprecated tags are not saved by this parser.    
+        assert self.tag=='xref'
+        
+        
+class RelationshipValue(TaggedValue):
+    '''
+       A couple 'xref:value' inherited from the class TaggedValue. Used to manage 
+       xref tags.
+    ''' 
+    
+    def __init__(self,line):
+        
+        ## use of the TaggedValue constructor
+        TaggedValue.__init__(self, line)
+        
+        ## the value is split on the first ' '.
+        value = self.value.split(None,1)
+        
+        ## succesful split !
+        if len(value)==2:
+            relationship=value[0]
+            term=value[1]
+        ## unsuccesful split. The relationship is set by default to IS_A
+        else:
+            relationship='is_a'
+            term=value[0]
+        
+        ## recording zone    
+        self.value=term
+        self.relationship=relationship
+        
+        
+class NamespaceValue(TaggedValue):
+    def __init__(self,line):
+        TaggedValue.__init__(self, line)
+        
+class RemarkValue(TaggedValue):
+    def __init__(self,line):
+        TaggedValue.__init__(self, line)
+        label,value = self.value.split(':',1)
+        label = label.strip()
+        value = value.strip()
+        self.value=value
+        self.label=label
+    
+    
+def taggedValueFactory(line):
+    '''
+    A function used to dispatch lines of an OBOEntry between the class TaggedValue
+    and its inherited classes.
+    
+    @param line: a line of an OBOEntry composed of a tag and a value.
+    @type line: string
+    
+    @return: a class object
+    '''
+    
+    if (line[0:9]=='namespace' or
+          line[0:17]=='default-namespace'):
+        return NamespaceValue(line)
+    ## DefValue is an inherited class of TaggedValue
+    elif line[0:3]=='def':
+        return DefValue(line)
+    ## SynonymValue is an inherited class of TaggedValue
+    elif ((line[0:7]=="synonym" and line[0:14]!="synonymtypedef") or
+          line[0:13]=="exact_synonym" or
+          line[0:13]=="broad_synonym" or
+          line[0:14]=="narrow_synonym"):
+        return SynonymValue(line)
+    ## XrefValue is an inherited class of TaggedValue
+    elif line[0:4]=='xref':
+        return XrefValue(line)
+    ## NameValue is an inherited class of TaggedValue
+    elif line[0:4]=='name':
+        return NameValue(line)
+    ## RelationshipValue is an inherited class of TaggedValue
+    elif (line[0:15]=='intersection_of' or
+          line[0:8] =='union_of' or
+          line[0:12]=='relationship'):
+        return RelationshipValue(line)
+    elif (line[0:6]=='remark'):
+        return RemarkValue(line)
+    ## each line is a couple : tag / value (and some more features)
+    else:
+        return TaggedValue(line)
+
+
+#################################################################################
+##                               Xref treatment area                           ##
+#################################################################################
+
+    
+    
+class Xref(object):
+    '''
+       A xref object of an OBOentry. It may be the 'dbxrefs' of SynonymValue and 
+       DefValue objects or the 'value' of XrefValue objects.
+    '''
+    
+    __splitdata__ = re.compile(' +(?=["{])')
+    
+    def __init__(self,ref):
+        if ref == '' :                    #
+            ref  = None                   #
+            data = ''                     #
+        else :                            # Modifs JJ sinon erreur : list index out of range
+            data = Xref.__splitdata__.split(ref,1)      #
+            ref  = data[0]                #
+        description=None
+        trailing_modifiers = None
+        if len(data)> 1:
+            extra = data[1]
+            description = TaggedValue._match_quotedString.search(extra)
+            if description is not None:
+                description = description.group(0)
+                extra.replace(description,'')
+            trailing_modifiers=trailingModifierFactory(extra)
+        self.reference=ref
+        self.description=description
+        self.trailing_modifiers=trailing_modifiers
+        
+    def __eq__(self,b):
+        return ((self.reference==b.reference) and (self.description==b.description) 
+                and (self.trailing_modifiers==b.trailing_modifiers))
+        
+    def __hash__(self):
+        return (reduce(lambda x,y:x+y,(hash(z) for z in [self.reference,
+                                                         self.description,
+                                                         self.trailing_modifiers]),0)) % (2**31)
+        
+        
+def xrefFactory(string):
+    '''
+    Dispatcher of xrefs.
+    
+    @param string: a string (between brackets) from an inherited TaggedValue object with a dbxrefs 
+                   signal (actually, the signal can only be found into SynonymValue and DefValue 
+                   objects) or a string (without brackets) from a XrefValue object.
+    @type string: string
+    
+    @return: a class object
+    
+    @note: The dispatcher treats differently the strings between brackets (from SynonymValue and 
+    DefValue objects) and without brackets (from XrefValue objects).
+    '''
+    
+    string = string.strip()
+    if string[0]=='[':
+        return [Xref(x.strip()) for x in string[1:-1].split(',')]  
+    else:
+        return Xref(string)
+    
+
+#################################################################################
+##                              Stanza treatment area                          ##
+#################################################################################
+                
+                
+class OBOEntry(dict):
+    '''
+       An entry of an OBOFile. It can be a header (without a stanza name) or
+       a stanza (with a stanza name between brackets). It inherits from the class dict.
+    '''
+    _match_stanza_name = re.compile('(?<=^\[)[^\]]*(?=\])')
+    
+    def __init__(self,stanza):
+        ## tests if it is the header of the OBO file (returns TRUE) or not (returns FALSE)
+        self.isHeader = stanza[0]!='['
+        lines = stanza.split('\n')
+        ## not the header : there is a [stanzaName]
+        if not self.isHeader:
+            self.stanzaName = lines[0].strip()[1:-1]
+            lines=lines[1:]
+            self["stanza"] = [stanza.strip()]    
+            
+        ## whatever the stanza is. 
+        for line in lines:
+            ## each line is a couple : tag / value
+            taggedvalue = taggedValueFactory(line)
+            if taggedvalue.tag in self:
+                self[taggedvalue.tag].append(taggedvalue)
+            else:
+                self[taggedvalue.tag]=[taggedvalue]
+    
+        
+    def parseStanzaName(stanza):
+        sm = OBOEntry._match_stanza_name.search(stanza)
+        if sm:
+            return sm.group(0)
+        else:
+            return None
+        
+    parseStanzaName=staticmethod(parseStanzaName)           
+        
+        
+    
+class OBOTerm(OBOEntry):
+    '''
+       A stanza named 'Term'. It inherits from the class OBOEntry.
+    '''
+    def __init__(self,stanza):
+        
+        ## use of the OBOEntry constructor.
+        OBOEntry.__init__(self, stanza)
+        
+        assert self.stanzaName=='Term'
+        assert 'stanza' in self
+        assert 'id' in self and len(self['id'])==1,"An OBOTerm must have an id"
+        assert 'name' in self and len(self['name'])==1,"An OBOTerm must have a name"
+        assert 'namespace' not in self or len(self['namespace'])==1, "Only one namespace is allowed for an OBO term"
+        
+        assert 'def' not in self or len(self['def'])==1,"Only one definition is allowed for an OBO term"
+        assert 'comment' not in self or len(self['comment'])==1,"Only one comment is allowed for an OBO term"
+
+        assert 'union_of' not in self or len(self['union_of'])>=2,"Only one union relationship is allowed for an OBO term"
+        assert 'intersection_of' not in self or len(self['intersection_of'])>=2,"Only one intersection relationship is allowed for an OBO term"
+
+        if self._isObsolete():
+            #assert 'is_a' not in self
+            assert 'relationship' not in self
+            assert 'inverse_of' not in self
+            assert 'disjoint_from' not in self
+            assert 'union_of' not in self
+            assert 'intersection_of' not in self
+            
+        assert 'replaced_by' not in self or self._isObsolete()
+        assert 'consider' not in self or self._isObsolete()
+    
+    def _getStanza(self):
+        return self['stanza'][0]
+    
+    ## make-up functions.            
+    def _getDefinition(self):
+        if 'def' in self:
+            return self['def'][0]
+        return None
+    
+    def _getId(self):
+        return self['id'][0]
+    
+    def _getNamespace(self):
+        return self['namespace'][0]
+    
+    def _getName(self):
+        return self['name'][0]
+    
+    def _getComment(self):
+        if 'comment' in self:
+            return self['comment'][0]
+        return None
+
+    def _getAltIds(self):
+        if 'alt_id' in self:
+            return list(set(self.get('alt_id',None)))
+        return None
+        
+    def _getIsA(self):
+        if 'is_a' in self:
+            return list(set(self.get('is_a',None)))
+        return None
+    
+    def _getSynonym(self):
+        if 'synonym' in self :
+            return list(set(self.get('synonym',None)))
+        return None
+        
+    def _getSubset(self):
+        if self.get('subset',None) != None:
+            return list(set(self.get('subset',None)))
+        else:
+            return None
+        
+    def _getXref(self):
+        if 'xref' in self:
+            return list(set(self.get('xref',None)))
+        return None
+    
+    def _getRelationShip(self):
+        if 'relationship' in self:
+            return list(set(self.get('relationship',None)))
+        return None
+            
+    def _getUnion(self):
+        return list(set(self.get('union_of',None)))
+
+    def _getIntersection(self):
+        return list(set(self.get('intersection_of',None)))
+        
+    def _getDisjonction(self):
+        return list(set(self.get('disjoint_from',None)))
+    
+    def _isObsolete(self):
+        return 'is_obsolete' in self and str(self['is_obsolete'][0])=='true'
+    
+    def _getReplacedBy(self):
+        if 'replaced_by' in self:
+            return list(set(self.get('replaced_by',None)))
+        return None
+    
+    def _getConsider(self):
+        if 'consider' in self:
+            return list(set(self.get('consider',None)))
+        return None
+
+    ## automatically make-up !
+    stanza             = property(_getStanza,None,None)
+    definition         = property(_getDefinition,None,None)
+    id                 = property(_getId,None,None) 
+    namespace          = property(_getNamespace,None,None)
+    name               = property(_getName,None,None) 
+    comment            = property(_getComment,None,None)
+    alt_ids            = property(_getAltIds,None,None)
+    is_a               = property(_getIsA,None,None)
+    synonyms           = property(_getSynonym,None,None)
+    subsets            = property(_getSubset,None,None)
+    xrefs              = property(_getXref,None,None)
+    relationship       = property(_getRelationShip,None,None)
+    union_of           = property(_getUnion,None,None)
+    intersection_of    = property(_getIntersection,None,None)
+    disjoint_from      = property(_getDisjonction,None,None)
+    is_obsolete        = property(_isObsolete,None,None)
+    replaced_by        = property(_getReplacedBy,None,None)
+    consider           = property(_getConsider,None,None)      
+    
+            
+def OBOEntryFactory(stanza):
+    '''
+    Dispatcher of stanza.
+    
+    @param stanza: a stanza composed of several lines.
+    @type stanza: text
+    
+    @return: an C{OBOTerm} | C{OBOEntry} instance
+    
+    @note: The dispatcher treats differently the stanza which are OBO "Term"
+    and the others.
+    '''
+    
+    stanzaType = OBOEntry.parseStanzaName(stanza)
+    
+    if stanzaType=="Term":
+        return OBOTerm(stanza)
+    else:
+        return OBOEntry(stanza)
+    
+def OBOEntryIterator(file):
+    entries =  stanzaIterator(file)
+    for e in entries:
+        debug(e)
+        yield OBOEntryFactory(e)
+        
+        
--- a/obitools/options/init.py
+++ b/obitools/options/init.py
@ -0,0 +1,137 @@
+"""
+    Module providing high level functions to manage command line options.
+"""
+import logging
+import sys
+
+from logging import debug
+
+from optparse import OptionParser
+
+from obitools.utils import universalOpen
+from obitools.utils import fileSize
+from obitools.utils import universalTell
+from obitools.utils import progressBar
+from obitools.format.options import addInputFormatOption, addInOutputOption,\
+    autoEntriesIterator
+import time
+    
+
+
+def getOptionManager(optionDefinitions,entryIterator=None,progdoc=None):
+    '''
+    Build an option manager fonction. that is able to parse
+    command line options of the script.
+    
+    @param optionDefinitions: list of function describing a set of 
+                              options. Each function must allows as
+                              unique parametter an instance of OptionParser.
+    @type optionDefinitions:  list of functions.
+    
+    @param entryIterator:     an iterator generator function returning
+                              entries from the data files. 
+                              
+    @type entryIterator:      an iterator generator function with only one
+                              parametter of type file
+    '''
+    parser = OptionParser(progdoc)
+    parser.add_option('--DEBUG',
+                      action="store_true", dest="debug",
+                      default=False,
+                      help="Set logging in debug mode")
+
+    parser.add_option('--no-psyco',
+                      action="store_true", dest="noPsyco",
+                      default=False,
+                      help="Don't use psyco even if it installed")
+
+    parser.add_option('--without-progress-bar',
+                      action="store_false", dest="progressbar",
+                      default=True,
+                      help="desactivate progress bar")
+
+    checkFormat=False
+    for f in optionDefinitions:
+        if f == addInputFormatOption or f == addInOutputOption:
+            checkFormat=True 
+        f(parser)
+        
+    def commandLineAnalyzer():
+        options,files = parser.parse_args()
+        if options.debug:
+            logging.root.setLevel(logging.DEBUG)
+            
+        if checkFormat:
+            ei=autoEntriesIterator(options)
+        else:
+            ei=entryIterator
+        
+        i = allEntryIterator(files,ei,with_progress=options.progressbar)
+        return options,i
+    
+    return commandLineAnalyzer
+
+_currentInputFileName=None
+_currentFile = None
+_currentFileSize = None
+
+def currentInputFileName():
+    return _currentInputFileName
+
+def currentInputFile():
+    return _currentFile
+
+def currentFileSize():
+    return _currentFileSize
+
+def currentFileTell():
+    return universalTell(_currentFile)
+
+def fileWithProgressBar(file,step=100):
+    try:
+        size = currentFileSize()
+    except:
+        size = None
+                
+    def fileBar():
+        pos=1
+        progressBar(pos, size, True,currentInputFileName())
+        for l in file:
+            progressBar(currentFileTell,size,head=currentInputFileName())
+            yield l 
+        print >>sys.stderr,''    
+    if size is None:
+        return file
+    else:
+        f = fileBar()
+        return f
+
+
+def allEntryIterator(files,entryIterator,with_progress=False,histo_step=102):
+    global _currentFile
+    global _currentInputFileName
+    global _currentFileSize
+    if files :
+        for f in files:
+            _currentInputFileName=f
+            f = universalOpen(f)
+            _currentFile=f
+            _currentFileSize=fileSize(_currentFile)
+            debug(f)
+            if with_progress:
+                f=fileWithProgressBar(f,step=histo_step)
+            if entryIterator is None:
+                for line in f:
+                    yield line
+            else:
+                for entry in entryIterator(f):
+                    yield entry
+    else:
+        if entryIterator is None:
+            for line in sys.stdin:
+                yield line
+        else:
+            for entry in entryIterator(sys.stdin):
+                yield entry
+            
+            
--- a/Show More
+++ b/Show More
				`@ -0,0 +1,2 @@`
				`from obitools.seqdb.embl.parser import emblIterator,emblParser`