Add some docs to fasta module

This commit is contained in:
2008-03-18 17:02:40 +00:00
parent f5c818bff2
commit de2b771e19

View File

@ -1,3 +1,9 @@
"""
fasta module provides functions to read and write sequences in fasta format.
"""
from obitools import bioSeqGenerator,BioSequence,AASequence,NucSequence
from obitools.align import alignmentReader
from obitools.utils import universalOpen
@ -9,6 +15,28 @@ def _fastaJoinSeq(seqarray):
return ''.join([x.strip() for x in seqarray])
def _parseFasta(seq,bioseqfactory,tagparser=_fastaTag,joinseq=_fastaJoinSeq):
'''
Parse a fasta record.
@attention: internal purpuse function
@param seq: a sequence object containing all lines corresponding
to one fasta sequence
@type seq: C{list} or C{tuple} of C{str}
@param bioseqfactory: a callable object return a BioSequence
instance.
@type bioseqfactory: a callable object
@param tagparser: a compiled regular expression usable
to identify key, value couples from
title line.
@type tagparser: regex instance
@return: a C{BioSequence} instance
@see: L{FastaIterator}
'''
title = seq[0].strip()[1:].split(None,1)
id=title[0]
if len(title) == 2:
@ -39,6 +67,17 @@ def fastaIterator(file,bioseqfactory=bioSeqGenerator,tagparser=_fastaTag,joinseq
@param bioseqfactory: a callable object return a BioSequence
instance.
@type bioseqfactory: a callable object
@param tagparser: a compiled regular expression usable
to identify key, value couples from
title line.
@type tagparser: regex instance
@return: an iterator on C{BioSequence} instance
@see: L{fastaNucIterator}
@see: L{fastaAAIterator}
'''
file = universalOpen(file,'ru')
@ -59,6 +98,16 @@ def fastaNucIterator(file,tagparser=_fastaTag):
@param file: a line iterator containint fasta data
@type file: an iterable object
@param tagparser: a compiled regular expression usable
to identify key, value couples from
title line.
@type tagparser: regex instance
@return: an iterator on C{NucBioSequence} instance
@see: L{fastaIterator}
@see: L{fastaAAIterator}
'''
return fastaIterator(file, NucSequence,tagparser)
@ -68,8 +117,18 @@ def fastaAAIterator(file,tagparser=_fastaTag):
Returned sequences by this iterator will be AASequence
instances
@param file: a line iterator containint fasta data
@param file: a line iterator containing fasta data
@type file: an iterable object
@param tagparser: a compiled regular expression usable
to identify key, value couples from
title line.
@type tagparser: regex instance
@return: an iterator on C{AABioSequence} instance
@see: L{fastaIterator}
@see: L{fastaNucIterator}
'''
return fastaIterator(file, AASequence,tagparser)
@ -82,6 +141,11 @@ def formatFasta(data,gbmode=False):
@type data: BioSequence instance or an iterable object
on BioSequence instances
@param gbmode: if set to C{True} identifier part of the title
line follows recommendation from nbci to allow
sequence indexing with the blast formatdb command.
@type gbmode: bool
@return: a fasta formated string
@rtype: str
'''