diff --git a/tools/ecoSort.py b/tools/ecoSort.py index e030e60..c7d6ec3 100755 --- a/tools/ecoSort.py +++ b/tools/ecoSort.py @@ -359,125 +359,26 @@ class ecoTable(list): for e in range(len(value)): value[e] = self.types[e](value[e]) list.__setitem__(self,key,value) - - - -########### -# -# GRAPH FUNCTIONS -# -########### - - - - -def drawGraphs(data,path='graph.pdf'): - """ - Generate a pdf file with graph illustrating the data given as - arguments. Takes two arguments : - 1- an array of array. Each element contains : - * Table object - * number of columns 1 and 2 to be ploted - * a Title - * Number of element to be ploted (10 by default) - * type of graph (1 for histogram or 2 for pie) - 2- path of file-to-be (graph.pdf by default) - """ - - def box(flt, center=True): - box_style = [('BOX', (0, 0), (-1, -1), 0.5, colors.lightgrey)] - if center: - box_style += [('ALIGN', (0, 0), (-1, -1), 'CENTER')] - return Table([[flt]], style=box_style) - - def makeHistogram(data,label): - c = VerticalBarChart() - c.x = 10 - c.y = 70 - c.height = 150 - c.width = 300 - c.bars.strokeWidth = 1 - c.barSpacing = 1 - c.barLabels.dy = 5 - c.barLabelFormat = '%d' - c.barLabels.fontSize = 9 - (len(data[0])/10) - c.data = data - - c.categoryAxis.labels.boxAnchor = 'e' - c.categoryAxis.labels.textAnchor = 'start' - c.categoryAxis.labels.dx = -40 - c.categoryAxis.labels.dy = -50 - c.categoryAxis.labels.angle = 45 - c.categoryAxis.labels.width = 10 - c.categoryAxis.labels.height = 4 - c.categoryAxis.categoryNames = label - c.categoryAxis.strokeWidth = 1 - c.categoryAxis.labels.fontSize = 8 - - - c.valueAxis.valueMin = min(data[0])*0.7 - c.valueAxis.valueMax = max(data[0]) - step = (max(data[0]) - min(data[0])) / 10 - c.valueAxis.valueStep = step > 1 and step or 1 - - - return c - - - def makePie(data, label): - - c = Pie() - c.x = 100 - c.y = 100 - c.data = data - c.labels = label - return c - - styles = getSampleStyleSheet() - doc = SimpleDocTemplate(path) - - elements = [] - elements.append(box(Paragraph("EcoPCR report", styles['Title']))) - elements.append(Spacer(0, 0.5 * cm)) - - for e in data: - count, label = [], [] - table = e[0] - col1, col2 = e[1]-1, e[2]-1 - title = e[3] - try: - treshold = e[4] - except: - treshold = 10 - try: - graphType= e[5] - except: - graphType= 1 - - for i in range(treshold): - count.append(table[i][col2]) - label.append(table[i][col1]) - - elements.append(box(Paragraph(title, styles['Normal']))) - if graphType == 2: - chart = makePie(count,label) + def __getitem__(self,index): + newtable = ecoTable(self.headers,self.types) + if isinstance(index,slice): + newtable.extend(list.__getitem__(self,index)) else: - chart = makeHistogram([tuple(count)],label) - drawing = Drawing(300, 250) - drawing.add(chart) - elements.append(box(drawing)) - elements.append(Spacer(0, 2 * cm)) - - doc.build(elements) - - - - + newtable.append(list.__getitem__(self,index)) + + return newtable + def getColumns(self,columnList): + newhead = [self.headers[x] for x in columnList] + newtype = [self.types[x] for x in columnList] + newtable = ecoTable(newhead,newtype) + for line in self: + newtable.append([line[x] for x in columnList]) + + return newtable + - - ########### # # PARSE FUNCTIONS @@ -693,7 +594,7 @@ def _sameValuesInList(array): return True -def buildSequenceTable(table,file,filter): +def _sortSequences(file,filter): sequences, idIndex = _parseSequenceResult(filter,file,'species') @@ -701,24 +602,37 @@ def buildSequenceTable(table,file,filter): if len(id) == 1 or _sameValuesInList(id): idIndex[id[0]].append(1) else: - idIndex[id[0]].append(0) + for e in id: + idIndex[e].append(0) for id,values in idIndex.items(): - c = values.count(1) - if c == 0: - idIndex[id] = -1 - else: - idIndex[id] = len(values) == c and 1 or 0 + idIndex[id] = float(values.count(1)) / float(len(values)) * 100 + identified = {} non_identified = {} ambiguous = {} + + return sequences, idIndex + +def getIntraSpeciesDiversity(table,file,filter): - for sequence in sequences: - pass - - + intraDiv = {} + + seq, idIndex = _sortSequences(file,filter) + + for id,percent in idIndex.items(): + if percent == 100: + intraDiv[id] = [0,[]] + for seq,idList in sequences.items(): + if id in idList: + intraDiv[id][0] = intraDiv[id][0] + 1 + intraDiv[id][1].append(seq) + + for id, values in intraDiv.items(): + table.append(id,values[0],values[1]) + ########### @@ -739,20 +653,7 @@ def printTable(table): for l in table: print format % tuple([str(e) for e in l ]) print "# %d results" % len(table) - - -def printColumn(table,n): - """ - Displays a column of a Table object - Takes 2 arguments - 1- Table object - 2- Number of the Column to display - """ - n = n - 1 - print "\t%s\n" % table.headers[n] - for l in table: - print "%s" % l[n] - + def saveAsCSV(table,path): """ @@ -786,44 +687,90 @@ def grepTable(table,col,pattern): out.append(l) return out -def drawGraph(data,path='graph.pdf'): - """ - Creates an histogram as pdf file - Takes 5 arguments : - 1- a Table object - 2- number of column of Table object for y axis - 3- number of column of Table object for x axis - 4- path of the pdf-to-be - 5- a title for the graph (optional) - 6- the x first highest results (10 by default) - 7- the graph type : 1 for histogram, 2 for pie - """ - drawing = Drawing(350, 450) + +########### +# +# GRAPH FUNCTIONS +# +########### + +class EcoGraph(object): - - - for e in data: - count, label = [], [] - table = e[0] - col1 = e[1]-1 - col2 = e[2]-1 - title = e[3] - treshold = e[4] - graphType= e[5] + def __init__(self): + self._styles = getSampleStyleSheet() - for l in table: - count.append(l[y]) - label.append(str(l[x])) - if graphType == 1: - graph = _makeHistogram([tuple(count[:treshold])], label[:treshold], title, path) - elif graphType == 2: - _makePie(count[:treshold], label[:treshold], title, path) - - drawing.add(String(35,430,title,fontSize=10)) - drawing.add(graph) + self._element = [] + self._element.append(self._box(Paragraph("EcoPCR report", self._styles['Title']))) + self._element.append(Spacer(0, 0.5 * cm)) - renderPDF.drawToFile(drawing, path ) + def _box(self,flt, center=True): + box_style = [('BOX', (0, 0), (-1, -1), 0.5, colors.lightgrey)] + if center: + box_style += [('ALIGN', (0, 0), (-1, -1), 'CENTER')] + return Table([[flt]], style=box_style) + def _addChart(self,chart,title): + drawing = Drawing(300, 250) + drawing.add(chart) + self._element.append(self._box(Paragraph(title, self._styles['Normal']))) + self._element.append(self._box(drawing)) + self._element.append(Spacer(0, 0.5 * cm)) + + def _formatData(self,table): + data, label = [],[] + for i in range(len(table)): + label.append(table[i][0]) + data.append(table[i][1]) + return data, label + + def makePie(self, table, title): + data, label = self._formatData(table) + pie = Pie() + pie.x = 100 + pie.y = 100 + pie.data = data + pie.labels = label + self._addChart(pie, title) + + def makeHistogram(self, table, title): + data, label = self._formatData(table) + data = [tuple(data)] + + histo = VerticalBarChart() + histo.x = 10 + histo.y = 70 + histo.height = 150 + histo.width = 300 + histo.bars.strokeWidth = 1 + histo.barSpacing = 1 + histo.barLabels.dy = 5 + histo.barLabelFormat = '%d' + histo.barLabels.fontSize = 9 - (len(data[0])/10) + histo.data = data + + histo.categoryAxis.labels.boxAnchor = 'e' + histo.categoryAxis.labels.textAnchor = 'start' + histo.categoryAxis.labels.dx = -40 + histo.categoryAxis.labels.dy = -50 + histo.categoryAxis.labels.angle = 45 + histo.categoryAxis.labels.width = 10 + histo.categoryAxis.labels.height = 4 + histo.categoryAxis.categoryNames = label + histo.categoryAxis.strokeWidth = 1 + histo.categoryAxis.labels.fontSize = 8 + + histo.valueAxis.valueMin = min(data[0])*0.7 + histo.valueAxis.valueMax = max(data[0]) + step = (max(data[0]) - min(data[0])) / 10 + histo.valueAxis.valueStep = step > 1 and step or 1 + + self._addChart(histo, title) + + def makeReport(self,path): + doc = SimpleDocTemplate(path) + doc.build(self._element) + + ###################### @@ -845,12 +792,20 @@ def init(): taxTable = buildTaxonomicTable(o1Table) speTable = buildSpecificityTable(o1Table) - -# -# seqHeaders = ("sequence","taxid") -# seqTypes = (str,list) -# seqTable = Table(seqHeaders,seqTypes) - + return o1Table, o2Table, taxTable + +def start(): + file = "/Users/bessiere/Documents/workspace/ecoPCR/src/toto.tmp" + filter = Filter("/ecoPCRDB/gbmam") + + speHeaders = ("taxid","num of seq","list of seq") + speTypes = (int,int,list) + speTable = ecoTable(speHeaders,speTypes) + + getIntraSpeciesDiversity(speTable, file, filter) + + +