2013-04-19 12:19:53 +00:00
parent 69f7371b50
commit bac706c9c0
1 changed files with 61 additions and 6 deletions
--- a/src/ecotaxspecificity.py
+++ b/src/ecotaxspecificity.py
@@ -1,4 +1,58 @@
 #!/usr/local/bin/python
 '''
 :py:mod:`ecotaxspecificity`: Evaluates barcode resolution
 =========================================================
 .. codeauthor:: Eric Coissac <eric.coissac@metabarcoding.org>
 The :py:mod:`ecotaxspecificity` command evaluates barcode resolution at different 
 taxonomic ranks. 
 As inputs, it takes a sequence record file annotated with taxids in the sequence 
 header, and a database formated as an ecopcr database (see :doc:`obitaxonomy 
 <obitaxonomy>`) or a NCBI taxdump (see NCBI ftp site).
 An example of output is reported below::
                Number of sequences added in graph: 284
                Number of nodes in all components: 269
                Number of sequences lost: 15!
                rank                      taxon_ok      taxon_total     percent
                order                            8               8        100.00
                superfamily                      1               1        100.00
                parvorder                        1               1        100.00
                subkingdom                       1               1        100.00
                superkingdom                     1               1        100.00
                kingdom                          3               3        100.00
                phylum                           5               5        100.00
                infraorder                       1               1        100.00
                subfamily                        3               3        100.00
                class                            6               6        100.00
                species                         35             176         19.89
                superorder                       1               1        100.00
                suborder                         1               1        100.00
                subtribe                         1               1        100.00
                subclass                         3               3        100.00
                genus                            9              15         60.00
                superclass                       1               1        100.00
                family                          10              10        100.00
                tribe                            2               2        100.00
                subphylum                        1               1        100.00
 In this example, the input sequence file contains 284 sequence records, but only 
 269 have been examined, because taxonomic information was not recovered for the
 the 15 remaining ones.
 "Taxon_total" refers to the number of different taxa observed at this rank 
 in the sequence record file (when taxonomic information is available at this 
 rank), and "taxon_ok" corresponds to the number of taxa that the barcode sequence
 identifies unambiguously in the taxonomic database. In this example, the sequence 
 records correspond to 176 different species, but only 35 of these have specific 
 barcodes. "percent" is the percentage of unambiguously identified taxa among 
 the total number of taxa (taxon_ok/taxon_total*100).
 '''
 import math
 import sys
@@ -16,13 +70,14 @@ from obitools.ecopcr.options import addTaxonomyDBOptions, loadTaxonomyDatabase
 def addSpecificityOptions(optionManager):
-    optionManager.add_option('-e','--errors',
+    group = optionManager.add_option_group('ecotaxspecificity specific options')
    group.add_option('-e','--errors',
                             action="store", dest="dist",
                             metavar="###",
                             type="int",
                             default=1,
                             help="Maximum errors between two sequences")
-    optionManager.add_option('-q','--quorum',
+    group.add_option('-q','--quorum',
                            action="store", dest="quorum",
                            type="float",
                            default=0.0,
@@ -31,7 +86,7 @@ def addSpecificityOptions(optionManager):
 if __name__=='__main__':
-    optionParser = getOptionManager([addInOutputOption,addTaxonomyDBOptions,addSpecificityOptions])
+    optionParser = getOptionManager([addInputFormatOption,addTaxonomyDBOptions,addSpecificityOptions])
    (options, entries) = optionParser()
@@ -110,9 +165,9 @@ if __name__=='__main__':
                indexbyseq[s].add(seq)
            yy = yy + 1
-    print "Total Sequences added in graph: " + str(xx)
+    print "Number of sequences added in graph: " + str(xx)
-    print "Total nodes in all components: " + str (yy)
+    print "Number of nodes in all components: " + str (yy)
-    print "Lost sequences: " + str (xx-yy) + "!"
+    print "Number of sequences lost: " + str (xx-yy) + "!"
    # since multiple different sequences have one key, we need to know what that key is for each sequence
    indexbykey={} #it will have elements like: {"seq1":key, "seq2":key, ...} where 'key' is the component key to which 'seqx' belongs