git-svn-id: https://www.grenoble.prabi.fr/svn/LECASofts/ecoPCR/branches/refactoring@67 60f365c0-8329-0410-b2a4-ec073aeeaa1d
This commit is contained in:
@ -267,6 +267,30 @@ def genbankEntryParser(entry):
|
||||
Tx = None
|
||||
return {'id':Id,'taxid':Tx,'definition':De,'sequence':Sq}
|
||||
|
||||
######################
|
||||
|
||||
_cleanDef = re.compile('[\nDE]')
|
||||
|
||||
def cleanDef(definition):
|
||||
return _cleanDef.sub('',definition)
|
||||
|
||||
_emblParseID = re.compile('(?<=^ID {3})[^ ]+(?=;)',re.MULTILINE)
|
||||
_emblParseDE = re.compile('(?<=^DE {3}).+?\. *$(?=[^ ])',re.MULTILINE+re.DOTALL)
|
||||
_emblParseSQ = re.compile('(?<=^ ).+?(?=^//$)',re.MULTILINE+re.DOTALL)
|
||||
_emblParseTX = re.compile('(?<= /db_xref="taxon:)[0-9]+(?=")')
|
||||
|
||||
def emblEntryParser(entry):
|
||||
Id = _emblParseID.findall(entry)[0]
|
||||
De = ' '.join(cleanDef(_emblParseDE.findall(entry)[0]).split())
|
||||
Sq = cleanSeq(_emblParseSQ.findall(entry)[0].upper())
|
||||
try:
|
||||
Tx = int(_emblParseTX.findall(entry)[0])
|
||||
except IndexError:
|
||||
Tx = None
|
||||
return {'id':Id,'taxid':Tx,'definition':De,'sequence':Sq}
|
||||
|
||||
######################
|
||||
|
||||
_fastaParseID = re.compile('(?<=^>)[^ ]+')
|
||||
_fastaParseDE = re.compile('(?<=^>).+',)
|
||||
_fastaParseSQ = re.compile('^[^>].+',re.MULTILINE+re.DOTALL)
|
||||
@ -464,32 +488,53 @@ def ecoParseOptions(arguments):
|
||||
}
|
||||
|
||||
o,filenames = getopt.getopt(arguments,
|
||||
'ht:n:gf',
|
||||
'ht:n:gfe',
|
||||
['help',
|
||||
'taxonomy=',
|
||||
'name=',
|
||||
'genbank',
|
||||
'fasta'])
|
||||
'fasta',
|
||||
'embl'])
|
||||
|
||||
for name,value in o:
|
||||
if name in ('-h','--help'):
|
||||
pass
|
||||
printHelp()
|
||||
exit()
|
||||
elif name in ('-t','--taxonomy'):
|
||||
opt['taxdir']=value
|
||||
elif name in ('-n','--name'):
|
||||
opt['prefix']=value
|
||||
elif name in ('-g','--genbank'):
|
||||
opt['parser']=sequenceIteratorFactory(genbankEntryParser,
|
||||
entryIterator
|
||||
)
|
||||
entryIterator)
|
||||
|
||||
elif name in ('-f','--fasta'):
|
||||
opt['parser']=sequenceIteratorFactory(fastaEntryParser,
|
||||
fastaEntryIterator)
|
||||
|
||||
elif name in ('-e','--embl'):
|
||||
opt['parser']=sequenceIteratorFactory(emblEntryParser,
|
||||
entryIterator)
|
||||
else:
|
||||
raise ValueError,'Unknown option %s' % name
|
||||
|
||||
return opt,filenames
|
||||
|
||||
def printHelp():
|
||||
print "-----------------------------------"
|
||||
print " ecoPCRFormat.py"
|
||||
print "-----------------------------------"
|
||||
print "ecoPCRFormat.py [option] <argument>"
|
||||
print "-----------------------------------"
|
||||
print "-e --embl :[E]mbl format file name"
|
||||
print "-f --fasta :[F]asta format file name"
|
||||
print "-g --genbank :[G]enbank format file name"
|
||||
print "-h --help :[H]elp - print this help"
|
||||
print "-n --name :[N]ame of the new database created"
|
||||
print "-t --taxonomy :[T]axonomy - path to the taxonomy database"
|
||||
print " :bcp-like dump from GenBank taxonomy database."
|
||||
print "-----------------------------------"
|
||||
|
||||
if __name__ == '__main__':
|
||||
|
||||
opt,filenames = ecoParseOptions(sys.argv[1:])
|
||||
|
Reference in New Issue
Block a user