git-svn-id: https://www.grenoble.prabi.fr/svn/LECASofts/ecoPCR/branches/refactoring@76 60f365c0-8329-0410-b2a4-ec073aeeaa1d
This commit is contained in:
@ -171,15 +171,14 @@ def readNodeTable(file):
|
|||||||
|
|
||||||
return taxonomy,ranks,index
|
return taxonomy,ranks,index
|
||||||
|
|
||||||
def scientificNameIterator(file):
|
def nameIterator(file):
|
||||||
file = universalOpen(file)
|
file = universalOpen(file)
|
||||||
names = ColumnFile(file,
|
names = ColumnFile(file,
|
||||||
sep='|',
|
sep='|',
|
||||||
types=(int,str,
|
types=(int,str,
|
||||||
str,str))
|
str,str))
|
||||||
for taxid,name,unique,classname,white in names:
|
for taxid,name,unique,classname,white in names:
|
||||||
if classname == 'scientific name':
|
yield taxid,name,classname
|
||||||
yield taxid,name
|
|
||||||
|
|
||||||
def mergedNodeIterator(file):
|
def mergedNodeIterator(file):
|
||||||
file = universalOpen(file)
|
file = universalOpen(file)
|
||||||
@ -201,8 +200,12 @@ def readTaxonomyDump(taxdir):
|
|||||||
taxonomy,ranks,index = readNodeTable('%s/nodes.dmp' % taxdir)
|
taxonomy,ranks,index = readNodeTable('%s/nodes.dmp' % taxdir)
|
||||||
|
|
||||||
print >>sys.stderr,"Adding scientific name..."
|
print >>sys.stderr,"Adding scientific name..."
|
||||||
for taxid,name in scientificNameIterator('%s/names.dmp' % taxdir):
|
|
||||||
taxonomy[index[taxid]].append(name)
|
alternativeName=[]
|
||||||
|
for taxid,name,classname in nameIterator('%s/names.dmp' % taxdir):
|
||||||
|
alternativeName.append((name,classname,index[taxid]))
|
||||||
|
if classname == 'scientific name':
|
||||||
|
taxonomy[index[taxid]].append(name)
|
||||||
|
|
||||||
print >>sys.stderr,"Adding taxid alias..."
|
print >>sys.stderr,"Adding taxid alias..."
|
||||||
for taxid,current in mergedNodeIterator('%s/merged.dmp' % taxdir):
|
for taxid,current in mergedNodeIterator('%s/merged.dmp' % taxdir):
|
||||||
@ -212,7 +215,7 @@ def readTaxonomyDump(taxdir):
|
|||||||
for taxid in deletedNodeIterator('%s/delnodes.dmp' % taxdir):
|
for taxid in deletedNodeIterator('%s/delnodes.dmp' % taxdir):
|
||||||
index[taxid]=None
|
index[taxid]=None
|
||||||
|
|
||||||
return taxonomy,ranks,index
|
return taxonomy,ranks,alternativeName,index
|
||||||
|
|
||||||
|
|
||||||
#####
|
#####
|
||||||
@ -405,6 +408,22 @@ def ecoRankPacker(rank):
|
|||||||
|
|
||||||
return packed
|
return packed
|
||||||
|
|
||||||
|
def ecoNamePacker(name):
|
||||||
|
|
||||||
|
namelength = len(name[0])
|
||||||
|
classlength= len(name[1])
|
||||||
|
totalSize = namelength + classlength + 4 + 4 + 4 + 4
|
||||||
|
|
||||||
|
packed = struct.pack('> I I I I I %ds %ds' % (namelength,classlength),
|
||||||
|
totalSize,
|
||||||
|
int(name[1]=='scientific name'),
|
||||||
|
namelength,
|
||||||
|
classlength,
|
||||||
|
name[2],
|
||||||
|
name[0],
|
||||||
|
name[1])
|
||||||
|
|
||||||
|
return packed
|
||||||
|
|
||||||
def ecoSeqWriter(file,input,taxindex,parser):
|
def ecoSeqWriter(file,input,taxindex,parser):
|
||||||
output = open(file,'wb')
|
output = open(file,'wb')
|
||||||
@ -463,17 +482,39 @@ def ecoRankWriter(file,ranks):
|
|||||||
|
|
||||||
output.close()
|
output.close()
|
||||||
|
|
||||||
|
def nameCmp(n1,n2):
|
||||||
|
name1=n1[0].upper()
|
||||||
|
name2=n2[0].upper()
|
||||||
|
if name1 < name2:
|
||||||
|
return -1
|
||||||
|
elif name1 > name2:
|
||||||
|
return 1
|
||||||
|
return 0
|
||||||
|
|
||||||
|
|
||||||
|
def ecoNameWriter(file,names):
|
||||||
|
output = open(file,'wb')
|
||||||
|
output.write(struct.pack('> I',len(names)))
|
||||||
|
|
||||||
|
names.sort(nameCmp)
|
||||||
|
|
||||||
|
for name in names:
|
||||||
|
output.write(ecoNamePacker(name))
|
||||||
|
|
||||||
|
output.close()
|
||||||
|
|
||||||
def ecoDBWriter(prefix,taxonomy,seqFileNames,parser):
|
def ecoDBWriter(prefix,taxonomy,seqFileNames,parser):
|
||||||
|
|
||||||
ecoRankWriter('%s.rdx' % prefix, taxonomy[1])
|
ecoRankWriter('%s.rdx' % prefix, taxonomy[1])
|
||||||
ecoTaxWriter('%s.tdx' % prefix, taxonomy[0])
|
ecoTaxWriter('%s.tdx' % prefix, taxonomy[0])
|
||||||
|
ecoNameWriter('%s.ndx' % prefix, taxonomy[2])
|
||||||
|
|
||||||
filecount = 0
|
filecount = 0
|
||||||
for filename in seqFileNames:
|
for filename in seqFileNames:
|
||||||
filecount+=1
|
filecount+=1
|
||||||
sk=ecoSeqWriter('%s_%03d.sdx' % (prefix,filecount),
|
sk=ecoSeqWriter('%s_%03d.sdx' % (prefix,filecount),
|
||||||
filename,
|
filename,
|
||||||
taxonomy[2],
|
taxonomy[3],
|
||||||
parser)
|
parser)
|
||||||
if sk:
|
if sk:
|
||||||
print >>sys.stderr,"Skipped entry :"
|
print >>sys.stderr,"Skipped entry :"
|
||||||
@ -526,9 +567,9 @@ def printHelp():
|
|||||||
print "-----------------------------------"
|
print "-----------------------------------"
|
||||||
print "ecoPCRFormat.py [option] <argument>"
|
print "ecoPCRFormat.py [option] <argument>"
|
||||||
print "-----------------------------------"
|
print "-----------------------------------"
|
||||||
print "-e --embl :[E]mbl format file name"
|
print "-e --embl :[E]mbl format"
|
||||||
print "-f --fasta :[F]asta format file name"
|
print "-f --fasta :[F]asta format"
|
||||||
print "-g --genbank :[G]enbank format file name"
|
print "-g --genbank :[G]enbank format"
|
||||||
print "-h --help :[H]elp - print this help"
|
print "-h --help :[H]elp - print this help"
|
||||||
print "-n --name :[N]ame of the new database created"
|
print "-n --name :[N]ame of the new database created"
|
||||||
print "-t --taxonomy :[T]axonomy - path to the taxonomy database"
|
print "-t --taxonomy :[T]axonomy - path to the taxonomy database"
|
||||||
|
Reference in New Issue
Block a user