git-svn-id: https://www.grenoble.prabi.fr/svn/LECASofts/ecoPCR/branches/refactoring@67 60f365c0-8329-0410-b2a4-ec073aeeaa1d
This commit is contained in:
@ -267,6 +267,30 @@ def genbankEntryParser(entry):
|
|||||||
Tx = None
|
Tx = None
|
||||||
return {'id':Id,'taxid':Tx,'definition':De,'sequence':Sq}
|
return {'id':Id,'taxid':Tx,'definition':De,'sequence':Sq}
|
||||||
|
|
||||||
|
######################
|
||||||
|
|
||||||
|
_cleanDef = re.compile('[\nDE]')
|
||||||
|
|
||||||
|
def cleanDef(definition):
|
||||||
|
return _cleanDef.sub('',definition)
|
||||||
|
|
||||||
|
_emblParseID = re.compile('(?<=^ID {3})[^ ]+(?=;)',re.MULTILINE)
|
||||||
|
_emblParseDE = re.compile('(?<=^DE {3}).+?\. *$(?=[^ ])',re.MULTILINE+re.DOTALL)
|
||||||
|
_emblParseSQ = re.compile('(?<=^ ).+?(?=^//$)',re.MULTILINE+re.DOTALL)
|
||||||
|
_emblParseTX = re.compile('(?<= /db_xref="taxon:)[0-9]+(?=")')
|
||||||
|
|
||||||
|
def emblEntryParser(entry):
|
||||||
|
Id = _emblParseID.findall(entry)[0]
|
||||||
|
De = ' '.join(cleanDef(_emblParseDE.findall(entry)[0]).split())
|
||||||
|
Sq = cleanSeq(_emblParseSQ.findall(entry)[0].upper())
|
||||||
|
try:
|
||||||
|
Tx = int(_emblParseTX.findall(entry)[0])
|
||||||
|
except IndexError:
|
||||||
|
Tx = None
|
||||||
|
return {'id':Id,'taxid':Tx,'definition':De,'sequence':Sq}
|
||||||
|
|
||||||
|
######################
|
||||||
|
|
||||||
_fastaParseID = re.compile('(?<=^>)[^ ]+')
|
_fastaParseID = re.compile('(?<=^>)[^ ]+')
|
||||||
_fastaParseDE = re.compile('(?<=^>).+',)
|
_fastaParseDE = re.compile('(?<=^>).+',)
|
||||||
_fastaParseSQ = re.compile('^[^>].+',re.MULTILINE+re.DOTALL)
|
_fastaParseSQ = re.compile('^[^>].+',re.MULTILINE+re.DOTALL)
|
||||||
@ -464,32 +488,53 @@ def ecoParseOptions(arguments):
|
|||||||
}
|
}
|
||||||
|
|
||||||
o,filenames = getopt.getopt(arguments,
|
o,filenames = getopt.getopt(arguments,
|
||||||
'ht:n:gf',
|
'ht:n:gfe',
|
||||||
['help',
|
['help',
|
||||||
'taxonomy=',
|
'taxonomy=',
|
||||||
'name=',
|
'name=',
|
||||||
'genbank',
|
'genbank',
|
||||||
'fasta'])
|
'fasta',
|
||||||
|
'embl'])
|
||||||
|
|
||||||
for name,value in o:
|
for name,value in o:
|
||||||
if name in ('-h','--help'):
|
if name in ('-h','--help'):
|
||||||
pass
|
printHelp()
|
||||||
|
exit()
|
||||||
elif name in ('-t','--taxonomy'):
|
elif name in ('-t','--taxonomy'):
|
||||||
opt['taxdir']=value
|
opt['taxdir']=value
|
||||||
elif name in ('-n','--name'):
|
elif name in ('-n','--name'):
|
||||||
opt['prefix']=value
|
opt['prefix']=value
|
||||||
elif name in ('-g','--genbank'):
|
elif name in ('-g','--genbank'):
|
||||||
opt['parser']=sequenceIteratorFactory(genbankEntryParser,
|
opt['parser']=sequenceIteratorFactory(genbankEntryParser,
|
||||||
entryIterator
|
entryIterator)
|
||||||
)
|
|
||||||
elif name in ('-f','--fasta'):
|
elif name in ('-f','--fasta'):
|
||||||
opt['parser']=sequenceIteratorFactory(fastaEntryParser,
|
opt['parser']=sequenceIteratorFactory(fastaEntryParser,
|
||||||
fastaEntryIterator)
|
fastaEntryIterator)
|
||||||
|
|
||||||
|
elif name in ('-e','--embl'):
|
||||||
|
opt['parser']=sequenceIteratorFactory(emblEntryParser,
|
||||||
|
entryIterator)
|
||||||
else:
|
else:
|
||||||
raise ValueError,'Unknown option %s' % name
|
raise ValueError,'Unknown option %s' % name
|
||||||
|
|
||||||
return opt,filenames
|
return opt,filenames
|
||||||
|
|
||||||
|
def printHelp():
|
||||||
|
print "-----------------------------------"
|
||||||
|
print " ecoPCRFormat.py"
|
||||||
|
print "-----------------------------------"
|
||||||
|
print "ecoPCRFormat.py [option] <argument>"
|
||||||
|
print "-----------------------------------"
|
||||||
|
print "-e --embl :[E]mbl format file name"
|
||||||
|
print "-f --fasta :[F]asta format file name"
|
||||||
|
print "-g --genbank :[G]enbank format file name"
|
||||||
|
print "-h --help :[H]elp - print this help"
|
||||||
|
print "-n --name :[N]ame of the new database created"
|
||||||
|
print "-t --taxonomy :[T]axonomy - path to the taxonomy database"
|
||||||
|
print " :bcp-like dump from GenBank taxonomy database."
|
||||||
|
print "-----------------------------------"
|
||||||
|
|
||||||
if __name__ == '__main__':
|
if __name__ == '__main__':
|
||||||
|
|
||||||
opt,filenames = ecoParseOptions(sys.argv[1:])
|
opt,filenames = ecoParseOptions(sys.argv[1:])
|
||||||
|
Reference in New Issue
Block a user