git-svn-id: https://www.grenoble.prabi.fr/svn/LECASofts/ecoPCR/branches/refactoring@76 60f365c0-8329-0410-b2a4-ec073aeeaa1d
This commit is contained in:
@ -171,15 +171,14 @@ def readNodeTable(file):
|
||||
|
||||
return taxonomy,ranks,index
|
||||
|
||||
def scientificNameIterator(file):
|
||||
def nameIterator(file):
|
||||
file = universalOpen(file)
|
||||
names = ColumnFile(file,
|
||||
sep='|',
|
||||
types=(int,str,
|
||||
str,str))
|
||||
for taxid,name,unique,classname,white in names:
|
||||
if classname == 'scientific name':
|
||||
yield taxid,name
|
||||
yield taxid,name,classname
|
||||
|
||||
def mergedNodeIterator(file):
|
||||
file = universalOpen(file)
|
||||
@ -201,8 +200,12 @@ def readTaxonomyDump(taxdir):
|
||||
taxonomy,ranks,index = readNodeTable('%s/nodes.dmp' % taxdir)
|
||||
|
||||
print >>sys.stderr,"Adding scientific name..."
|
||||
for taxid,name in scientificNameIterator('%s/names.dmp' % taxdir):
|
||||
taxonomy[index[taxid]].append(name)
|
||||
|
||||
alternativeName=[]
|
||||
for taxid,name,classname in nameIterator('%s/names.dmp' % taxdir):
|
||||
alternativeName.append((name,classname,index[taxid]))
|
||||
if classname == 'scientific name':
|
||||
taxonomy[index[taxid]].append(name)
|
||||
|
||||
print >>sys.stderr,"Adding taxid alias..."
|
||||
for taxid,current in mergedNodeIterator('%s/merged.dmp' % taxdir):
|
||||
@ -212,7 +215,7 @@ def readTaxonomyDump(taxdir):
|
||||
for taxid in deletedNodeIterator('%s/delnodes.dmp' % taxdir):
|
||||
index[taxid]=None
|
||||
|
||||
return taxonomy,ranks,index
|
||||
return taxonomy,ranks,alternativeName,index
|
||||
|
||||
|
||||
#####
|
||||
@ -405,6 +408,22 @@ def ecoRankPacker(rank):
|
||||
|
||||
return packed
|
||||
|
||||
def ecoNamePacker(name):
|
||||
|
||||
namelength = len(name[0])
|
||||
classlength= len(name[1])
|
||||
totalSize = namelength + classlength + 4 + 4 + 4 + 4
|
||||
|
||||
packed = struct.pack('> I I I I I %ds %ds' % (namelength,classlength),
|
||||
totalSize,
|
||||
int(name[1]=='scientific name'),
|
||||
namelength,
|
||||
classlength,
|
||||
name[2],
|
||||
name[0],
|
||||
name[1])
|
||||
|
||||
return packed
|
||||
|
||||
def ecoSeqWriter(file,input,taxindex,parser):
|
||||
output = open(file,'wb')
|
||||
@ -462,18 +481,40 @@ def ecoRankWriter(file,ranks):
|
||||
output.write(ecoRankPacker(rank))
|
||||
|
||||
output.close()
|
||||
|
||||
def nameCmp(n1,n2):
|
||||
name1=n1[0].upper()
|
||||
name2=n2[0].upper()
|
||||
if name1 < name2:
|
||||
return -1
|
||||
elif name1 > name2:
|
||||
return 1
|
||||
return 0
|
||||
|
||||
|
||||
def ecoNameWriter(file,names):
|
||||
output = open(file,'wb')
|
||||
output.write(struct.pack('> I',len(names)))
|
||||
|
||||
names.sort(nameCmp)
|
||||
|
||||
for name in names:
|
||||
output.write(ecoNamePacker(name))
|
||||
|
||||
output.close()
|
||||
|
||||
def ecoDBWriter(prefix,taxonomy,seqFileNames,parser):
|
||||
|
||||
ecoRankWriter('%s.rdx' % prefix, taxonomy[1])
|
||||
ecoTaxWriter('%s.tdx' % prefix, taxonomy[0])
|
||||
|
||||
ecoNameWriter('%s.ndx' % prefix, taxonomy[2])
|
||||
|
||||
filecount = 0
|
||||
for filename in seqFileNames:
|
||||
filecount+=1
|
||||
sk=ecoSeqWriter('%s_%03d.sdx' % (prefix,filecount),
|
||||
filename,
|
||||
taxonomy[2],
|
||||
taxonomy[3],
|
||||
parser)
|
||||
if sk:
|
||||
print >>sys.stderr,"Skipped entry :"
|
||||
@ -526,9 +567,9 @@ def printHelp():
|
||||
print "-----------------------------------"
|
||||
print "ecoPCRFormat.py [option] <argument>"
|
||||
print "-----------------------------------"
|
||||
print "-e --embl :[E]mbl format file name"
|
||||
print "-f --fasta :[F]asta format file name"
|
||||
print "-g --genbank :[G]enbank format file name"
|
||||
print "-e --embl :[E]mbl format"
|
||||
print "-f --fasta :[F]asta format"
|
||||
print "-g --genbank :[G]enbank format"
|
||||
print "-h --help :[H]elp - print this help"
|
||||
print "-n --name :[N]ame of the new database created"
|
||||
print "-t --taxonomy :[T]axonomy - path to the taxonomy database"
|
||||
|
Reference in New Issue
Block a user