git-svn-id: https://www.grenoble.prabi.fr/svn/LECASofts/ecoPCR/branches/refactoring@83 60f365c0-8329-0410-b2a4-ec073aeeaa1d
This commit is contained in:
786
tools/ecoSort.py
Executable file
786
tools/ecoSort.py
Executable file
@ -0,0 +1,786 @@
|
||||
#!/usr/bin/env python
|
||||
|
||||
import struct
|
||||
import sys
|
||||
import os
|
||||
import gzip
|
||||
import re
|
||||
|
||||
from reportlab.graphics.shapes import *
|
||||
from reportlab.graphics.charts.barcharts import VerticalBarChart
|
||||
from reportlab.graphics.charts.piecharts import Pie
|
||||
from reportlab.graphics import renderPDF
|
||||
|
||||
#####
|
||||
#
|
||||
# Generic file function
|
||||
#
|
||||
#####
|
||||
|
||||
class Filter(object):
|
||||
"""
|
||||
This object provides different iterators and method :
|
||||
* findTaxonByTaxid
|
||||
* subTreeIterator
|
||||
* parentalTreeIterator
|
||||
* ecoPCRResultIterator
|
||||
* rankFilter
|
||||
* lastCommonTaxon
|
||||
|
||||
see each method for more informations
|
||||
"""
|
||||
|
||||
def __init__(self,path):
|
||||
self._path = path
|
||||
self._taxonFile = "%s.tdx" % self._path
|
||||
self._ranksFile = "%s.rdx" % self._path
|
||||
self._namesFile = "%s.ndx" % self._path
|
||||
self._taxonomy, self._index, self._ranks, self._name = self.__readNodeTable()
|
||||
|
||||
|
||||
def __universalOpen(self,file):
|
||||
if isinstance(file,str):
|
||||
if file[-3:] == '.gz':
|
||||
rep = gzip.open(file)
|
||||
else:
|
||||
rep = open(file)
|
||||
else:
|
||||
rep = file
|
||||
return rep
|
||||
|
||||
def __universalTell(self,file):
|
||||
if isinstance(file, gzip.GzipFile):
|
||||
file=file.myfileobj
|
||||
return file.tell()
|
||||
|
||||
def __fileSize(self,file):
|
||||
if isinstance(file, gzip.GzipFile):
|
||||
file=file.myfileobj
|
||||
pos = file.tell()
|
||||
file.seek(0,2)
|
||||
length = file.tell()
|
||||
file.seek(pos,0)
|
||||
return length
|
||||
|
||||
def __progressBar(self,pos,max,reset=False,delta=[]):
|
||||
if reset:
|
||||
del delta[:]
|
||||
if not delta:
|
||||
delta.append(time.time())
|
||||
delta.append(time.time())
|
||||
|
||||
delta[1]=time.time()
|
||||
elapsed = delta[1]-delta[0]
|
||||
percent = float(pos)/max * 100
|
||||
remain = time.strftime('%H:%M:%S',time.gmtime(elapsed / percent * (100-percent)))
|
||||
bar = '#' * int(percent/2)
|
||||
bar+= '|/-\\-'[pos % 5]
|
||||
bar+= ' ' * (50 - int(percent/2))
|
||||
sys.stderr.write('\r%5.1f %% |%s] remain : %s' %(percent,bar,remain))
|
||||
|
||||
|
||||
|
||||
|
||||
#####
|
||||
#
|
||||
# Iterator functions
|
||||
#
|
||||
#####
|
||||
|
||||
|
||||
|
||||
def __ecoRecordIterator(self,file):
|
||||
file = self.__universalOpen(file)
|
||||
(recordCount,) = struct.unpack('> I',file.read(4))
|
||||
|
||||
for i in xrange(recordCount):
|
||||
(recordSize,)=struct.unpack('>I',file.read(4))
|
||||
record = file.read(recordSize)
|
||||
yield record
|
||||
|
||||
|
||||
def __ecoNameIterator(self):
|
||||
for record in self.__ecoRecordIterator(self._namesFile):
|
||||
lrecord = len(record)
|
||||
lnames = lrecord - 16
|
||||
(isScientificName,namelength,classLength,indextaxid,names)=struct.unpack('> I I I I %ds' % lnames, record)
|
||||
name=names[:namelength]
|
||||
classname=names[namelength:]
|
||||
yield (name,classname,indextaxid)
|
||||
|
||||
|
||||
def __ecoTaxonomicIterator(self):
|
||||
for record in self.__ecoRecordIterator(self._taxonFile):
|
||||
lrecord = len(record)
|
||||
lnames = lrecord - 16
|
||||
(taxid,rankid,parentidx,nameLength,name)=struct.unpack('> I I I I %ds' % lnames, record)
|
||||
yield (taxid,rankid,parentidx,name)
|
||||
|
||||
|
||||
def __ecoSequenceIterator(self,file):
|
||||
for record in self.__ecoRecordIterator(file):
|
||||
lrecord = len(record)
|
||||
lnames = lrecord - (4*4+20)
|
||||
(taxid,seqid,deflength,seqlength,cptseqlength,string)=struct.unpack('> I 20s I I I %ds' % lnames, record)
|
||||
de = string[:deflength]
|
||||
seq = gzip.zlib.decompress(string[deflength:])
|
||||
yield (taxid,seqid,deflength,seqlength,cptseqlength,de,seq)
|
||||
|
||||
|
||||
def __ecoRankIterator(self):
|
||||
for record in self.__ecoRecordIterator(self._ranksFile):
|
||||
yield record
|
||||
|
||||
|
||||
#####
|
||||
#
|
||||
# Indexes
|
||||
#
|
||||
#####
|
||||
|
||||
def __ecoNameIndex(self):
|
||||
indexName = [x for x in self.__ecoNameIterator()]
|
||||
return indexName
|
||||
|
||||
def __ecoRankIndex(self):
|
||||
rank = [r for r in self.__ecoRankIterator()]
|
||||
return rank
|
||||
|
||||
def __ecoTaxonomyIndex(self):
|
||||
taxonomy = []
|
||||
index = {}
|
||||
i = 0;
|
||||
for x in self.__ecoTaxonomicIterator():
|
||||
taxonomy.append(x)
|
||||
index[x[0]] = i
|
||||
i = i + 1
|
||||
return taxonomy, index
|
||||
|
||||
def __readNodeTable(self):
|
||||
taxonomy, index = self.__ecoTaxonomyIndex()
|
||||
ranks = self.__ecoRankIndex()
|
||||
name = self.__ecoNameIndex()
|
||||
return taxonomy,index,ranks,name
|
||||
|
||||
|
||||
#####
|
||||
#
|
||||
# PUBLIC METHODS
|
||||
#
|
||||
#####
|
||||
|
||||
def findTaxonByTaxid(self,taxid):
|
||||
"""
|
||||
Returns a list containing [taxid,rankid,parent_index,nameLength,name]
|
||||
It takes one argument : a taxonomic id
|
||||
"""
|
||||
return self._taxonomy[self._index[taxid]]
|
||||
|
||||
|
||||
def subTreeIterator(self, taxid):
|
||||
"""
|
||||
Returns subtree for given taxid from first child
|
||||
to last child. It takes one argument : a taxonomic id
|
||||
"""
|
||||
idx = self._index[taxid]
|
||||
yield self._taxonomy[idx]
|
||||
for t in self._taxonomy:
|
||||
if t[2] == idx:
|
||||
for subt in self.subTreeIterator(t[0]):
|
||||
yield subt
|
||||
|
||||
|
||||
def parentalTreeIterator(self, taxid):
|
||||
"""
|
||||
return parental tree for given taxonomic id starting from
|
||||
first ancester to the root.
|
||||
"""
|
||||
taxon=self.findTaxonByTaxid(taxid)
|
||||
while taxon[2]!= 0:
|
||||
yield taxon
|
||||
taxon = self._taxonomy[taxon[2]]
|
||||
yield self._taxonomy[0]
|
||||
|
||||
|
||||
def ecoPCRResultIterator(self, file):
|
||||
"""
|
||||
iteration on ecoPCR result file"
|
||||
It returns a dictionnary
|
||||
"""
|
||||
file = self.__universalOpen(file)
|
||||
data = ColumnFile(file,
|
||||
sep='|',
|
||||
types=(str,int,int,
|
||||
str,int,str,
|
||||
int,str,int,
|
||||
str,int,str,
|
||||
str,str,int,
|
||||
str,int,int,
|
||||
str,str),skip='#')
|
||||
|
||||
|
||||
for ac, sq_len, taxid,\
|
||||
rank, sp_taxid, species,\
|
||||
ge_taxid, genus, fa_taxid,\
|
||||
family, sk_taxid, s_kgdom,\
|
||||
strand, oligo_1, error_1,\
|
||||
oligo_2, error_2, amp_len,\
|
||||
sq_des, definition in data:
|
||||
|
||||
yield {'ac':ac, 'sq_len':sq_len, 'taxid':taxid,
|
||||
'rank':rank, 'sp_taxid':sp_taxid, 'species':species,
|
||||
'ge_taxid':ge_taxid, 'genus':genus, 'fa_taxid':fa_taxid,
|
||||
'family':family, 'sk_taxid':sk_taxid, 's_kgdom':s_kgdom,
|
||||
'strand':strand, 'oligo_1':oligo_1, 'error_1':error_1,
|
||||
'oligo_2':oligo_2, 'error_2':error_2, 'amp_len':amp_len,
|
||||
'sq_des':sq_des, 'definition':definition}
|
||||
|
||||
def rankFilter(self,rankid,filter):
|
||||
"""
|
||||
boolean telling whether rankid match filter
|
||||
takes 2 arguments :
|
||||
1- rankid
|
||||
2- filter (i.e genus)
|
||||
"""
|
||||
return self._ranks[rankid] == filter
|
||||
|
||||
|
||||
def lastCommonTaxon(self,taxid_1, taxid_2):
|
||||
"""
|
||||
returns a last common parent for two given taxon.
|
||||
It starts from the root and goes down the tree
|
||||
until their parents diverge.
|
||||
It takes 2 arguments :
|
||||
1- taxid 1
|
||||
2- taxid 2
|
||||
"""
|
||||
t1 = [x[0] for x in self.parentalTreeIterator(taxid_1)]
|
||||
t2 = [x[0] for x in self.parentalTreeIterator(taxid_2)]
|
||||
t1.reverse()
|
||||
t2.reverse()
|
||||
count = t1 < t2 and len(t1) or len(t2)
|
||||
for i in range(count):
|
||||
if t1[i] != t2[i]:
|
||||
return t1[i-1]
|
||||
return t1[count-1]
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
class ColumnFile(object):
|
||||
"""
|
||||
cut an ecoPCR file into a list
|
||||
"""
|
||||
def __init__(self,stream,sep=None,strip=True,types=None,skip=None):
|
||||
if isinstance(stream,str):
|
||||
self._stream = open(stream)
|
||||
elif hasattr(stream,'next'):
|
||||
self._stream = stream
|
||||
else:
|
||||
raise ValueError,'stream must be string or an iterator'
|
||||
self._delimiter=sep
|
||||
self._strip=strip
|
||||
if types:
|
||||
self._types=[x for x in types]
|
||||
for i in xrange(len(self._types)):
|
||||
if self._types[i] is bool:
|
||||
self._types[i]=ColumnFile.str2bool
|
||||
else:
|
||||
self._types=None
|
||||
self._skip = skip
|
||||
self._oligo = {}
|
||||
|
||||
def str2bool(x):
|
||||
return bool(eval(x.strip()[0].upper(),{'T':True,'V':True,'F':False}))
|
||||
|
||||
str2bool = staticmethod(str2bool)
|
||||
|
||||
|
||||
def __iter__(self):
|
||||
return self
|
||||
|
||||
def next(self):
|
||||
ligne = self._stream.next()
|
||||
while ligne[0] == self._skip:
|
||||
ligne = self._stream.next()
|
||||
data = ligne.split(self._delimiter)
|
||||
if self._strip or self._types:
|
||||
data = [x.strip() for x in data]
|
||||
if self._types:
|
||||
it = self.endLessIterator(self._types)
|
||||
data = [x[1](x[0]) for x in ((y,it.next()) for y in data)]
|
||||
return data
|
||||
|
||||
def endLessIterator(self,endedlist):
|
||||
for x in endedlist:
|
||||
yield x
|
||||
while(1):
|
||||
yield endedlist[-1]
|
||||
|
||||
def getOligo(self,line):
|
||||
if line[2:8] == 'direct':
|
||||
r = re.compile('(?<=direct strand oligo1 : )[A-Z]+(?= *)')
|
||||
self._oligo['o1'] = r.findall(line)
|
||||
if line[2:9] == 'reverse':
|
||||
r = re.compile('(?<=reverse strand oligo2 : )[A-Z]+(?= *)')
|
||||
self._oligo['o2'] = r.findall(line)
|
||||
return None
|
||||
|
||||
|
||||
|
||||
|
||||
###########
|
||||
#
|
||||
# DATA STRUCTURE
|
||||
#
|
||||
###########
|
||||
|
||||
|
||||
class Table(list):
|
||||
"""
|
||||
Data object inheriting from list
|
||||
"""
|
||||
def __init__(self, headers, types):
|
||||
list.__init__(self)
|
||||
self.headers = headers
|
||||
self.types = types
|
||||
|
||||
|
||||
def __setitem__ (self,key,value):
|
||||
"""
|
||||
method overloaded to check data types
|
||||
"""
|
||||
for e in range(len(value)):
|
||||
value[e] = self.types[e](value[e])
|
||||
list.__setitem__(self,key,value)
|
||||
|
||||
|
||||
|
||||
###########
|
||||
#
|
||||
# GRAPH FUNCTIONS
|
||||
#
|
||||
###########
|
||||
|
||||
|
||||
def _makeHistogram(data,label,title,path):
|
||||
|
||||
drawing = Drawing(350, 450)
|
||||
|
||||
drawing.add(String(35,430,title,fontSize=10))
|
||||
|
||||
bc = VerticalBarChart()
|
||||
bc.x = 40
|
||||
bc.y = 100
|
||||
bc.height = 300
|
||||
bc.width = 300
|
||||
bc.bars.strokeWidth = 1
|
||||
bc.barSpacing = 2
|
||||
bc.barLabelFormat = '%d'
|
||||
bc.barLabels.dy = -5
|
||||
bc.barLabels.fontSize = 9 - (len(data[0])/8)
|
||||
bc.data = data
|
||||
|
||||
bc.valueAxis.valueMin = min(data[0])*0.7
|
||||
bc.valueAxis.valueMax = max(data[0])
|
||||
bc.valueAxis.valueStep = (max(data[0]) - min(data[0])) / 10
|
||||
bc.valueAxis.strokeWidth = 1
|
||||
|
||||
bc.categoryAxis.labels.boxAnchor = 'e'
|
||||
bc.categoryAxis.labels.dx = -40
|
||||
bc.categoryAxis.labels.dy = -50
|
||||
bc.categoryAxis.labels.angle = 45
|
||||
bc.categoryAxis.labels.width = 10
|
||||
bc.categoryAxis.labels.height = 4
|
||||
bc.categoryAxis.categoryNames = label
|
||||
bc.categoryAxis.strokeWidth = 1
|
||||
bc.categoryAxis.labels.fontSize = 9
|
||||
|
||||
|
||||
drawing.add(bc)
|
||||
|
||||
renderPDF.drawToFile(drawing, path )
|
||||
|
||||
def _makePie(data, label, title, path):
|
||||
|
||||
drawing = Drawing(400,400)
|
||||
drawing.add(String(50,50,title,fontSize=10))
|
||||
|
||||
pc = Pie()
|
||||
pc.x = 100
|
||||
pc.y = 100
|
||||
pc.data = data
|
||||
pc.labels = label
|
||||
pc.labels.angle = 45
|
||||
|
||||
drawing.add(pc)
|
||||
|
||||
renderPDF.drawToFile(drawing,path)
|
||||
|
||||
|
||||
###########
|
||||
#
|
||||
# PARSE FUNCTIONS
|
||||
#
|
||||
###########
|
||||
|
||||
def _parseOligoResult(filter,file,strand):
|
||||
seq = {}
|
||||
|
||||
if strand == 'direct':
|
||||
key = 'oligo_1'
|
||||
elif strand == 'reverse':
|
||||
key = 'oligo_2'
|
||||
|
||||
for s in filter.ecoPCRResultIterator(file):
|
||||
o = s[key]
|
||||
taxid = s['taxid']
|
||||
if not seq.has_key(o):
|
||||
seq[o] = [1,taxid]
|
||||
else:
|
||||
seq[o][0] = seq[o][0] + 1
|
||||
seq[o][1] = filter.lastCommonTaxon(seq[o][1],taxid)
|
||||
return seq
|
||||
|
||||
|
||||
def _parseTaxonomyResult(table):
|
||||
tax = {}
|
||||
for l in table:
|
||||
taxid = l[2]
|
||||
scName = l[3]
|
||||
count = l[1]
|
||||
if not tax.has_key(taxid):
|
||||
tax[taxid] = [1,scName,count]
|
||||
else:
|
||||
tax[taxid][0] = tax[taxid][0] + 1
|
||||
tax[taxid][2] = tax[taxid][2] + count
|
||||
return tax
|
||||
|
||||
|
||||
def _sortTable(e1,e2):
|
||||
e1 = e1[1]
|
||||
e2 = e2[1]
|
||||
if e1 < e2:
|
||||
return 1
|
||||
if e1 > e2:
|
||||
return -1
|
||||
return 0
|
||||
|
||||
|
||||
def _countOligoMismatch(o1,o2):
|
||||
"""
|
||||
define mismatch between two oligonucleotids
|
||||
return number of mismatch
|
||||
"""
|
||||
mmatch = 0
|
||||
if len(o1) < len(o2):
|
||||
mmatch = int(len(o2) - len(o1))
|
||||
for i in range(len(o1)):
|
||||
try:
|
||||
if o1[i] != o2[i]:
|
||||
mmatch = mmatch + 1
|
||||
except:
|
||||
mmatch = mmatch + 1
|
||||
|
||||
return mmatch
|
||||
|
||||
###########
|
||||
#
|
||||
# TOOLS FUNCTIONS
|
||||
#
|
||||
###########
|
||||
|
||||
def customSort(table,x,y):
|
||||
"""
|
||||
|
||||
"""
|
||||
x = x-1
|
||||
y = y-1
|
||||
h = (table.headers[x],table.headers[y])
|
||||
t = (table.types[x],table.types[y])
|
||||
cTable = Table(h,t)
|
||||
|
||||
tmp = {}
|
||||
|
||||
for l in table:
|
||||
if tmp.has_key(l[x]):
|
||||
tmp[l[x]] = tmp[l[x]] + l[y]
|
||||
else:
|
||||
tmp[l[x]] = l[y]
|
||||
|
||||
for k,v in tmp.items():
|
||||
cTable.append([k,v])
|
||||
|
||||
return cTable
|
||||
|
||||
|
||||
def countColumnOccurrence(table,x):
|
||||
x = x-1
|
||||
h = (table.headers[x],"count")
|
||||
t = (table.types[x],int)
|
||||
cTable = Table(h,t)
|
||||
|
||||
tmp = {}
|
||||
|
||||
for l in table:
|
||||
if tmp.has_key(l[x]):
|
||||
tmp[l[x]] = tmp[l[x]] + 1
|
||||
else:
|
||||
tmp[l[x]] = 1
|
||||
|
||||
for k,v in tmp.items():
|
||||
cTable.append([k,v])
|
||||
|
||||
return cTable
|
||||
|
||||
|
||||
def buildSpecificityTable(table):
|
||||
header = ("mismatch","taxon","count")
|
||||
type = (int,str,int)
|
||||
speTable = Table(header,type)
|
||||
|
||||
tmp = {}
|
||||
for l in table:
|
||||
if not tmp.has_key(l[5]):
|
||||
tmp[l[5]] = {}
|
||||
if not tmp[l[5]].has_key(l[3]):
|
||||
tmp[l[5]][l[3]] = l[1]
|
||||
else:
|
||||
tmp[l[5]][l[3]] = tmp[l[5]][l[3]] + l[1]
|
||||
|
||||
for mismatch in tmp.items():
|
||||
for taxon,count in mismatch[1].items():
|
||||
speTable.append([mismatch[0],taxon,count])
|
||||
|
||||
return speTable
|
||||
|
||||
|
||||
def buildOligoTable(table, file, filter, oligoRef, strand='direct'):
|
||||
"""
|
||||
Fills and sorts a Table object with ecoPCR result file
|
||||
|
||||
Takes 4 arguments
|
||||
1- Table object
|
||||
2- ecoPCR result file path
|
||||
3- Filter Object
|
||||
4- the oligo used in ecoPCR
|
||||
5- the oligo type : direct or reverse
|
||||
|
||||
"""
|
||||
seq = _parseOligoResult(filter, file, strand)
|
||||
|
||||
i = 0
|
||||
for oligo, info in seq.items():
|
||||
table.append(0)
|
||||
count, lctTaxid = info[0], info[1]
|
||||
scName = filter.findTaxonByTaxid(info[1])[3]
|
||||
rank = filter._ranks[filter.findTaxonByTaxid(info[1])[1]]
|
||||
mismatch = _countOligoMismatch(oligoRef,oligo)
|
||||
table[i]=[oligo,count,lctTaxid,scName,rank,mismatch]
|
||||
i = i + 1
|
||||
|
||||
table.sort(sortTable)
|
||||
|
||||
|
||||
def buildTaxonomicTable(table):
|
||||
"""
|
||||
Fill a Table object with a taxonomic synthesis
|
||||
"""
|
||||
taxHeaders = ("scName","numOfOligo","numOfAmpl","taxid")
|
||||
taxTypes = (str, int, int, int)
|
||||
taxTable = Table(taxHeaders, taxTypes)
|
||||
|
||||
tax = _parseTaxonomyResult(table)
|
||||
|
||||
i = 0
|
||||
for taxid, info in tax.items():
|
||||
taxTable.append(0)
|
||||
numOfOligo, scName, numOfAmpl = info[0], info[1], info[2]
|
||||
taxTable[i]=[scName,numOfOligo,numOfAmpl,taxid]
|
||||
i = i + 1
|
||||
|
||||
taxTable.sort(sortTable)
|
||||
|
||||
return taxTable
|
||||
|
||||
def _parseSequenceResult(filter,file,id):
|
||||
sequences = {}
|
||||
idIndex = {}
|
||||
|
||||
if id == 'family':
|
||||
key = 'fa_taxid'
|
||||
elif id == 'genus':
|
||||
key = 'ge_taxid'
|
||||
else:
|
||||
key = 'taxid'
|
||||
|
||||
for s in filter.ecoPCRResultIterator(file):
|
||||
seq = s['sq_des']
|
||||
id = s[key]
|
||||
if not idIndex.has_key(id):
|
||||
idIndex[id] = []
|
||||
if not sequences.has_key(seq):
|
||||
sequences[seq] = [id]
|
||||
else:
|
||||
sequences[seq].append(id)
|
||||
return sequences, idIndex
|
||||
|
||||
|
||||
def _sameValuesInList(array):
|
||||
for i in range(len(array)-1):
|
||||
if array[i] != array[i+1]:
|
||||
return False
|
||||
return True
|
||||
|
||||
|
||||
def buildSequenceTable(table,file,filter):
|
||||
|
||||
sequences, idIndex = _parseSequenceResult(filter,file,'species')
|
||||
|
||||
for s,id in sequences.items():
|
||||
if len(id) == 1 or _sameValuesInList(id):
|
||||
idIndex[id[0]].append(1)
|
||||
else:
|
||||
idIndex[id[0]].append(0)
|
||||
|
||||
|
||||
for id,values in idIndex.items():
|
||||
c = values.count(1)
|
||||
if c == 0:
|
||||
idIndex[id] = -1
|
||||
else:
|
||||
idIndex[id] = len(values) == c and 1 or 0
|
||||
|
||||
identified = {}
|
||||
non_identified = {}
|
||||
ambiguous = {}
|
||||
for sequence...
|
||||
|
||||
|
||||
|
||||
|
||||
###########
|
||||
#
|
||||
# OUTPUT FUNCTIONS
|
||||
#
|
||||
###########
|
||||
|
||||
def printTable(table):
|
||||
"""
|
||||
Displays the content a of Table object
|
||||
Take 1 arguments
|
||||
1- Table object
|
||||
"""
|
||||
|
||||
format = ("%20s | " * len(table.headers))[:-3]
|
||||
print format % tuple([str(e) for e in table.headers ]) +"\n" + "-"*23*len(table.headers)
|
||||
for l in table:
|
||||
print format % tuple([str(e) for e in l ])
|
||||
print "# %d results" % len(table)
|
||||
|
||||
|
||||
def printColumn(table,n):
|
||||
"""
|
||||
Displays a column of a Table object
|
||||
Takes 2 arguments
|
||||
1- Table object
|
||||
2- Number of the Column to display
|
||||
"""
|
||||
n = n - 1
|
||||
print "\t%s\n" % table.headers[n]
|
||||
for l in table:
|
||||
print "%s" % l[n]
|
||||
|
||||
|
||||
def saveAsCSV(table,path):
|
||||
"""
|
||||
Creates a csv file from a Table object
|
||||
Takes 2 arguments
|
||||
1- Table object
|
||||
2- path of the file-to-be
|
||||
"""
|
||||
file = open(path,'w')
|
||||
file.write(','.join([str(e) for e in table.headers ]) + "\n")
|
||||
for l in table:
|
||||
file.write(','.join([str(e) for e in l ]) + "\n")
|
||||
file.close()
|
||||
|
||||
|
||||
def grepTable(table,col,pattern):
|
||||
"""
|
||||
Filters a Table object with regular expression
|
||||
Takes 3 arguments :
|
||||
1- Table object
|
||||
2- number of column to match with
|
||||
3- regular expression pattern
|
||||
|
||||
Returns a Table object
|
||||
"""
|
||||
col = col -1
|
||||
p = re.compile(pattern, re.IGNORECASE)
|
||||
out = Table(table.headers,table.types)
|
||||
for l in table:
|
||||
if p.search(l[col]):
|
||||
out.append(l)
|
||||
return out
|
||||
|
||||
def drawGraph(table,x,y,path='graph.pdf',title='',treshold=10,graphType=1):
|
||||
"""
|
||||
Creates an histogram as pdf file
|
||||
Takes 5 arguments :
|
||||
1- a Table object
|
||||
2- number of column of Table object for y axis
|
||||
3- number of column of Table object for x axis
|
||||
4- path of the pdf-to-be
|
||||
5- a title for the graph (optional)
|
||||
6- the x first highest results (10 by default)
|
||||
7- the graph type : 1 for histogram, 2 for pie
|
||||
"""
|
||||
count, label = [], []
|
||||
x = x-1
|
||||
y = y-1
|
||||
for l in table:
|
||||
count.append(l[y])
|
||||
label.append(str(l[x]))
|
||||
if graphType == 1:
|
||||
_makeHistogram([tuple(count[:treshold])], label[:treshold], title, path)
|
||||
elif graphType == 2:
|
||||
_makePie(count[:treshold], label[:treshold], title, path)
|
||||
|
||||
|
||||
|
||||
######################
|
||||
|
||||
|
||||
def init():
|
||||
file = "/Users/bessiere/Documents/workspace/ecoPCR/src/toto.tmp"
|
||||
oligo = {'o1':'ATGTTTAAAA','o2':'ATGGGGGTATTG'}
|
||||
|
||||
filter = Filter("/ecoPCRDB/gbmam")
|
||||
|
||||
# headers = ("oligo", "Num", "LCT Taxid", "Sc Name", "Rank", "distance")
|
||||
# types = (str, int, int, str, str, int)
|
||||
#
|
||||
# o1Table = Table(headers, types)
|
||||
# o2Table = Table(headers, types)
|
||||
#
|
||||
# buildOligoTable(o1Table, file, filter, oligo['o1'], 'direct')
|
||||
# buildOligoTable(o2Table, file, filter, oligo['o2'], 'reverse')
|
||||
#
|
||||
#
|
||||
# taxTable = buildTaxonomicTable(o1Table)
|
||||
# speTable = buildSpecificityTable(o1Table)
|
||||
#
|
||||
|
||||
seqHeaders = ("sequence","taxid")
|
||||
seqTypes = (str,list)
|
||||
seqTable = Table(seqHeaders,seqTypes)
|
||||
|
||||
return buildSequenceTable(seqTable, file, filter)
|
||||
|
||||
|
||||
|
||||
return seqTable
|
||||
|
||||
|
Reference in New Issue
Block a user