Add some docs to fasta module

This commit is contained in:
2008-03-18 17:02:40 +00:00
parent f5c818bff2
commit de2b771e19

View File

@ -1,3 +1,9 @@
"""
fasta module provides functions to read and write sequences in fasta format.
"""
from obitools import bioSeqGenerator,BioSequence,AASequence,NucSequence from obitools import bioSeqGenerator,BioSequence,AASequence,NucSequence
from obitools.align import alignmentReader from obitools.align import alignmentReader
from obitools.utils import universalOpen from obitools.utils import universalOpen
@ -9,6 +15,28 @@ def _fastaJoinSeq(seqarray):
return ''.join([x.strip() for x in seqarray]) return ''.join([x.strip() for x in seqarray])
def _parseFasta(seq,bioseqfactory,tagparser=_fastaTag,joinseq=_fastaJoinSeq): def _parseFasta(seq,bioseqfactory,tagparser=_fastaTag,joinseq=_fastaJoinSeq):
'''
Parse a fasta record.
@attention: internal purpuse function
@param seq: a sequence object containing all lines corresponding
to one fasta sequence
@type seq: C{list} or C{tuple} of C{str}
@param bioseqfactory: a callable object return a BioSequence
instance.
@type bioseqfactory: a callable object
@param tagparser: a compiled regular expression usable
to identify key, value couples from
title line.
@type tagparser: regex instance
@return: a C{BioSequence} instance
@see: L{FastaIterator}
'''
title = seq[0].strip()[1:].split(None,1) title = seq[0].strip()[1:].split(None,1)
id=title[0] id=title[0]
if len(title) == 2: if len(title) == 2:
@ -39,6 +67,17 @@ def fastaIterator(file,bioseqfactory=bioSeqGenerator,tagparser=_fastaTag,joinseq
@param bioseqfactory: a callable object return a BioSequence @param bioseqfactory: a callable object return a BioSequence
instance. instance.
@type bioseqfactory: a callable object @type bioseqfactory: a callable object
@param tagparser: a compiled regular expression usable
to identify key, value couples from
title line.
@type tagparser: regex instance
@return: an iterator on C{BioSequence} instance
@see: L{fastaNucIterator}
@see: L{fastaAAIterator}
''' '''
file = universalOpen(file,'ru') file = universalOpen(file,'ru')
@ -59,6 +98,16 @@ def fastaNucIterator(file,tagparser=_fastaTag):
@param file: a line iterator containint fasta data @param file: a line iterator containint fasta data
@type file: an iterable object @type file: an iterable object
@param tagparser: a compiled regular expression usable
to identify key, value couples from
title line.
@type tagparser: regex instance
@return: an iterator on C{NucBioSequence} instance
@see: L{fastaIterator}
@see: L{fastaAAIterator}
''' '''
return fastaIterator(file, NucSequence,tagparser) return fastaIterator(file, NucSequence,tagparser)
@ -68,8 +117,18 @@ def fastaAAIterator(file,tagparser=_fastaTag):
Returned sequences by this iterator will be AASequence Returned sequences by this iterator will be AASequence
instances instances
@param file: a line iterator containint fasta data @param file: a line iterator containing fasta data
@type file: an iterable object @type file: an iterable object
@param tagparser: a compiled regular expression usable
to identify key, value couples from
title line.
@type tagparser: regex instance
@return: an iterator on C{AABioSequence} instance
@see: L{fastaIterator}
@see: L{fastaNucIterator}
''' '''
return fastaIterator(file, AASequence,tagparser) return fastaIterator(file, AASequence,tagparser)
@ -82,6 +141,11 @@ def formatFasta(data,gbmode=False):
@type data: BioSequence instance or an iterable object @type data: BioSequence instance or an iterable object
on BioSequence instances on BioSequence instances
@param gbmode: if set to C{True} identifier part of the title
line follows recommendation from nbci to allow
sequence indexing with the blast formatdb command.
@type gbmode: bool
@return: a fasta formated string @return: a fasta formated string
@rtype: str @rtype: str
''' '''