47 lines
1.7 KiB
Python
47 lines
1.7 KiB
Python
from obitools.format.genericparser import GenericParser
|
|
from obitools.utils import universalOpen
|
|
from obitools.fasta import parseFastaDescription
|
|
from obitools import NucSequence
|
|
|
|
|
|
import sys
|
|
|
|
_contigIterator=GenericParser('^CO ')
|
|
|
|
_contigIterator.addParseAction('AF', '\nAF +(\S+) +([UC]) +(-?[0-9]+)')
|
|
_contigIterator.addParseAction('RD', '\nRD +(\S+) +([0-9]+) +([0-9]+) +([0-9]+) *\n([A-Za-z\n*]+?)\n\n')
|
|
_contigIterator.addParseAction('DS', '\nDS +(.+)')
|
|
_contigIterator.addParseAction('CO', '^CO (\S+)')
|
|
|
|
def contigIterator(file):
|
|
file = universalOpen(file)
|
|
for entry in _contigIterator(file):
|
|
contig=[]
|
|
for rd,ds,af in map(None,entry['RD'],entry['DS'],entry['AF']):
|
|
id = rd[0]
|
|
shift = int(af[2])
|
|
if shift < 0:
|
|
print >> sys.stderr,"Sequence %s in contig %s has a negative paddng value %d : skipped" % (id,entry['CO'][0],shift)
|
|
#continue
|
|
|
|
definition,info = parseFastaDescription(ds)
|
|
info['shift']=shift
|
|
seq = rd[4].replace('\n','').replace('*','-').strip()
|
|
contig.append(NucSequence(id,seq,definition,**info))
|
|
|
|
maxlen = max(len(x)+x['shift'] for x in contig)
|
|
minshift=min(x['shift'] for x in contig)
|
|
rep = []
|
|
|
|
for s in contig:
|
|
info = s.getTags()
|
|
info['shift']-=minshift-1
|
|
head = '-' * (info['shift']-1)
|
|
|
|
tail = (maxlen + minshift - len(s) - info['shift'] - 1)
|
|
info['tail']=tail
|
|
newseq = NucSequence(s.id,head + str(s)+ '-' * tail,s.definition,**info)
|
|
rep.append(newseq)
|
|
|
|
yield entry['CO'][0],rep
|
|
|