Compare commits
12 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
| ffb33f61a9 | |||
| adbeac9684 | |||
| 4aac47e639 | |||
| 2028c69b3f | |||
| 2649cd268c | |||
| 46b446f0c8 | |||
| 6208cbee02 | |||
| 1d4c3f99b2 | |||
| ba30e74dc9 | |||
| 0e8b97ca40 | |||
| a95ae5126b | |||
| 0e2f859839 |
@@ -15,7 +15,7 @@
|
||||
output file. FLOAT is included in a [0,1] interval.
|
||||
|
||||
.. cmdoption:: --minimum-circle=FLOAT
|
||||
|
||||
|
||||
minimum identity considered for the assignment circle.
|
||||
FLOAT is included in a [0,1] interval.
|
||||
|
||||
@@ -40,11 +40,20 @@
|
||||
.. cmdoption:: -E FLOAT, --errors=FLOAT
|
||||
|
||||
FLOAT is the fraction of reference sequences that will
|
||||
be ignored when looking for the most recent common ancestor. This
|
||||
be ignored when looking for the lowest common ancestor. This
|
||||
option is useful when a non-negligible proportion of reference sequences
|
||||
is expected to be assigned to the wrong taxon, for example because of
|
||||
taxonomic misidentification. FLOAT is included in a [0,1] interval.
|
||||
|
||||
|
||||
.. cmdoption:: -M INTEGER, --min-matches=FLOAT
|
||||
|
||||
Define the minimum congruent assignation. If this minimum is reached and
|
||||
the -E option is activated, the lowest common ancestor algorithm tolarated
|
||||
that some sequences do not provide the same taxonomic annotation (see the
|
||||
-E option).
|
||||
|
||||
|
||||
.. cmdoption:: --cache-size=INTEGER
|
||||
|
||||
A cache for computed similarities is maintained by `ecotag`. the default
|
||||
|
||||
@@ -19,7 +19,7 @@ from os import path
|
||||
|
||||
|
||||
PACKAGE = "OBITools"
|
||||
VERSION = "1.2.0"
|
||||
VERSION = "1.2.3"
|
||||
AUTHOR = 'Eric Coissac'
|
||||
EMAIL = 'eric@coissac.eu'
|
||||
URL = 'metabarcoding.org/obitools'
|
||||
|
||||
+18
-6
@@ -1,4 +1,4 @@
|
||||
#!/usr/local/bin/python
|
||||
#!/usr/local/OBITools-1.1.22/bin/python
|
||||
'''
|
||||
:py:mod:`ecotag`: assigns sequences to taxa
|
||||
===========================================
|
||||
@@ -145,9 +145,16 @@ def addSearchOptions(optionManager):
|
||||
|
||||
optionManager.add_option('-E','--errors',
|
||||
action='store',dest='error',
|
||||
type='float',
|
||||
default=0.0,
|
||||
help='Tolerated rate of wrong assignation')
|
||||
|
||||
optionManager.add_option('-M','--min-matches',
|
||||
action='store',dest='minmatches',
|
||||
type="int",
|
||||
default=1,
|
||||
help='Minimum congruent assignation')
|
||||
|
||||
optionManager.add_option('--cache-size',
|
||||
action='store',dest='cache',
|
||||
type='int',
|
||||
@@ -333,10 +340,10 @@ if __name__=='__main__':
|
||||
rankid = taxonomy.findRankByName(options.explain)
|
||||
|
||||
for seq in db:
|
||||
id = seq.id[0:46]
|
||||
seq.id=id
|
||||
assert id not in taxonlink
|
||||
taxonlink[id]=int(seq['taxid'])
|
||||
seqid = seq.id[0:46]
|
||||
seq.id=seqid
|
||||
assert seqid not in taxonlink
|
||||
taxonlink[seqid]=int(seq['taxid'])
|
||||
|
||||
|
||||
if options.uniq:
|
||||
@@ -363,7 +370,12 @@ if __name__=='__main__':
|
||||
|
||||
if best[0]:
|
||||
taxlist = set(taxonlink[p[0].id] for p in match)
|
||||
lca = taxonomy.betterCommonTaxon(options.error,*tuple(taxlist))
|
||||
if options.error > 0.0 and len(match) >= int(options.minmatches / (1.0 - options.error)):
|
||||
lca = taxonomy.betterCommonTaxon(options.error,
|
||||
*tuple(taxlist))
|
||||
else:
|
||||
lca = taxonomy.betterCommonTaxon(0.0,*tuple(taxlist))
|
||||
|
||||
scname = taxonomy.getScientificName(lca)
|
||||
rank = taxonomy.getRank(lca)
|
||||
if len(taxlist) < 15:
|
||||
|
||||
+2
-1
@@ -48,7 +48,7 @@ Otherwise,
|
||||
|
||||
.. code-block:: bash
|
||||
|
||||
> obiaddtaxids -T species_name -g genus_identified.fasta \\
|
||||
> obiaddtaxids -k species_name -g genus_identified.fasta \\
|
||||
-u unidentified.fasta -d my_ecopcr_database \\
|
||||
my_sequences.fasta > identified.fasta
|
||||
|
||||
@@ -340,6 +340,7 @@ if __name__=='__main__':
|
||||
entries = entryIterator(entries)
|
||||
elif options.db_type == 'UNITE_FULL' :
|
||||
entryIterator = UNITEIterator_FULL
|
||||
entries = entryIterator(entries)
|
||||
elif options.db_type == 'UNITE_GENERAL' :
|
||||
entryIterator = UNITEIterator_GENERAL
|
||||
entries = entryIterator(entries)
|
||||
|
||||
+96
-73
@@ -23,8 +23,11 @@ def minimum(seqs):
|
||||
return min(s['select'] for s in seqs)
|
||||
|
||||
def maximum(seqs):
|
||||
return max(s['select'] for s in seqs)
|
||||
|
||||
try:
|
||||
return max(s['select'] for s in seqs)
|
||||
except TypeError, e:
|
||||
print >>sys.stderr, seqs
|
||||
raise e
|
||||
def mean(seqs):
|
||||
ss= reduce(lambda x,y: x + y,(s['select'] for s in seqs),0)
|
||||
return float(ss) / len(seqs)
|
||||
@@ -101,6 +104,13 @@ def addSelectOptions(optionManager):
|
||||
default=[],
|
||||
help="attributes to merge within each group")
|
||||
|
||||
group.add_option('-s','--sample',
|
||||
action="store", dest="sample",
|
||||
metavar="<TAGNAME>",
|
||||
type="str",
|
||||
default=None,
|
||||
help="Tag containing sample descriptions, the default value is set to *merged_sample*")
|
||||
|
||||
group.add_option('--merge-ids',
|
||||
action="store_true", dest="mergeids",
|
||||
default=False,
|
||||
@@ -129,16 +139,22 @@ if __name__ == '__main__':
|
||||
classes = {}
|
||||
|
||||
print >>sys.stderr,"\nLoading sequences...\n"
|
||||
|
||||
with_taxonomy=hasattr(options, 'taxonomy') and options.taxonomy is not None
|
||||
|
||||
nbseq=0
|
||||
|
||||
for s in entries:
|
||||
nbseq+=1
|
||||
category = []
|
||||
|
||||
if with_taxonomy:
|
||||
environ = {'taxonomy' : options.taxonomy,'sequence':s,'random':random()}
|
||||
else:
|
||||
environ = {'sequence':s,'random':random()}
|
||||
|
||||
for c in options.categories:
|
||||
try:
|
||||
if hasattr(options, 'taxonomy') and options.taxonomy is not None:
|
||||
environ = {'taxonomy' : options.taxonomy,'sequence':s,'random':random()}
|
||||
else:
|
||||
environ = {'sequence':s,'random':random()}
|
||||
|
||||
v = eval(c,environ,s)
|
||||
category.append(v)
|
||||
except:
|
||||
@@ -149,11 +165,6 @@ if __name__ == '__main__':
|
||||
group.append(s)
|
||||
classes[category]= group
|
||||
|
||||
if hasattr(options, 'taxonomy') and options.taxonomy is not None:
|
||||
environ = {'taxonomy' : options.taxonomy,'sequence':s,'random':random()}
|
||||
else:
|
||||
environ = {'sequence':s, 'random':random()}
|
||||
|
||||
try:
|
||||
select = eval(options.function,environ,s)
|
||||
s['select']=select
|
||||
@@ -181,75 +192,87 @@ if __name__ == '__main__':
|
||||
i+=1
|
||||
progressBar(i,lclasses,False,"%15s" % ("/".join(map(str,c)),))
|
||||
seqs = classes[c]
|
||||
sortclass(seqs, options)
|
||||
if len(c)==1:
|
||||
c=c[0]
|
||||
if options.sample is not None:
|
||||
subsets = {}
|
||||
for s in seqs:
|
||||
for sid in s[options.sample]:
|
||||
ss = subsets.get(sid,[])
|
||||
ss.append(s)
|
||||
subsets[sid]=ss
|
||||
else:
|
||||
subsets={"all":seqs}
|
||||
|
||||
if options.number==1:
|
||||
s = seqs[0]
|
||||
for seqs in subsets.values():
|
||||
sortclass(seqs, options)
|
||||
|
||||
for key in mergedKey:
|
||||
if key=='taxid' and mergeIds:
|
||||
if 'taxid_dist' not in s:
|
||||
s["taxid_dist"]={}
|
||||
if 'taxid' in s:
|
||||
s["taxid_dist"][s.id]=s['taxid']
|
||||
mkey = "merged_%s" % key
|
||||
if mkey not in s:
|
||||
if key in s:
|
||||
s[mkey]={s[key]:1}
|
||||
else:
|
||||
s[mkey]={}
|
||||
|
||||
if 'count' not in s:
|
||||
s['count']=1
|
||||
if mergeIds:
|
||||
s['merged']=[s.id]
|
||||
|
||||
for seq in seqs[1:]:
|
||||
if len(c)==1:
|
||||
c=c[0]
|
||||
|
||||
s['count']+=seq['count']
|
||||
if options.number==1 and options.sample is None:
|
||||
s = seqs[0]
|
||||
|
||||
for key in mergedKey:
|
||||
if key=='taxid' and mergeIds:
|
||||
if 'taxid_dist' in seq:
|
||||
s["taxid_dist"].update(seq["taxid_dist"])
|
||||
if 'taxid' in seq:
|
||||
s["taxid_dist"][seq.id]=seq['taxid']
|
||||
|
||||
if 'taxid_dist' not in s:
|
||||
s["taxid_dist"]={}
|
||||
if 'taxid' in s:
|
||||
s["taxid_dist"][s.id]=s['taxid']
|
||||
mkey = "merged_%s" % key
|
||||
if mkey in seq:
|
||||
m = seq[mkey]
|
||||
else:
|
||||
if key in seq:
|
||||
m={seq[key]:1}
|
||||
|
||||
allmkey = set(m.keys()) | set(s[mkey].keys())
|
||||
s[mkey] = dict((k,m.get(k,0)+s[mkey].get(k,0)) for k in allmkey)
|
||||
|
||||
# if mkey in seq:
|
||||
# for skey in seq[mkey]:
|
||||
# if skey in s:
|
||||
# s[mkey][skey]=s[mkey].get(seq[skey],0)+seq[mkey][skey]
|
||||
# else:
|
||||
# s[mkey][skey]=seq[mkey][skey]
|
||||
|
||||
#for key in seq.iterkeys():
|
||||
# # Merger proprement l'attribut merged s'il exist
|
||||
# if key in s and s[key]!=seq[key] and key!='count' and key[0:7]!='merged_' and key!='merged' and key!='select':
|
||||
# del(s[key])
|
||||
|
||||
|
||||
if mkey not in s:
|
||||
if key in s:
|
||||
s[mkey]={s[key]:1}
|
||||
else:
|
||||
s[mkey]={}
|
||||
|
||||
if 'count' not in s:
|
||||
s['count']=1
|
||||
if mergeIds:
|
||||
s['merged'].append(seq.id)
|
||||
|
||||
if taxonomy is not None:
|
||||
mergeTaxonomyClassification(seqs, taxonomy)
|
||||
s['merged']=[s.id]
|
||||
|
||||
for seq in seqs[1:]:
|
||||
|
||||
|
||||
for s in seqs[0:options.number]:
|
||||
s['class']=c
|
||||
del s['select']
|
||||
writer(s)
|
||||
s['count']+=seq['count']
|
||||
|
||||
for key in mergedKey:
|
||||
if key=='taxid' and mergeIds:
|
||||
if 'taxid_dist' in seq:
|
||||
s["taxid_dist"].update(seq["taxid_dist"])
|
||||
if 'taxid' in seq:
|
||||
s["taxid_dist"][seq.id]=seq['taxid']
|
||||
|
||||
mkey = "merged_%s" % key
|
||||
if mkey in seq:
|
||||
m = seq[mkey]
|
||||
else:
|
||||
if key in seq:
|
||||
m={seq[key]:1}
|
||||
|
||||
allmkey = set(m.keys()) | set(s[mkey].keys())
|
||||
s[mkey] = dict((k,m.get(k,0)+s[mkey].get(k,0)) for k in allmkey)
|
||||
|
||||
if mergeIds:
|
||||
s['merged'].append(seq.id)
|
||||
|
||||
if taxonomy is not None:
|
||||
mergeTaxonomyClassification(seqs, taxonomy)
|
||||
|
||||
|
||||
for s in seqs[0:options.number]:
|
||||
s['class']=c
|
||||
s['__@TOWRITE@__']=True
|
||||
|
||||
print >>sys.stderr,"\Writing sequences...\n"
|
||||
progressBar(1,nbseq,True,'Writing')
|
||||
|
||||
i=0
|
||||
for c in classes:
|
||||
seqs = classes[c]
|
||||
for s in seqs:
|
||||
i+=1
|
||||
progressBar(i,nbseq,False,"Writing")
|
||||
if '__@TOWRITE@__' in s:
|
||||
del s['__@TOWRITE@__']
|
||||
del s['select']
|
||||
writer(s)
|
||||
|
||||
print >>sys.stderr
|
||||
|
||||
+3
-3
@@ -320,9 +320,9 @@ if __name__ == '__main__':
|
||||
parent[3],parent[0],options.taxonomy._ranks[parent[1]])
|
||||
localdata=True
|
||||
|
||||
for n in options.newname:
|
||||
tx = t.split(t,':')
|
||||
taxid = options.taxonomy.addPreferedName(tx[0].strip(),tx[1])
|
||||
for t in options.newname:
|
||||
tx = t.split(':')
|
||||
taxid = options.taxonomy.addPreferedName(int(tx[1]), tx[0].strip())
|
||||
print "name : %8d\t->\t%s" % (taxid,options.taxonomy.getPreferedName(taxid))
|
||||
|
||||
ecoTaxonomyWriter(options.ecodb,options.taxonomy,onlyLocal=True)
|
||||
|
||||
@@ -269,7 +269,7 @@ class Taxonomy(object):
|
||||
|
||||
def addPreferedName(self,taxid,name):
|
||||
idx = self.findIndex(taxid)
|
||||
self._preferedName.append(name,'obi',idx)
|
||||
self._preferedName.append([name,'obi',idx])
|
||||
self._preferedidx[idx]=name
|
||||
return taxid
|
||||
|
||||
@@ -399,10 +399,10 @@ class EcoTaxonomyDB(Taxonomy,EcoPCRDBFile):
|
||||
for t in self._taxonomy[self._localtaxon:]:
|
||||
self._name.append((t[3],'scientific name',i))
|
||||
i+=1
|
||||
|
||||
|
||||
try :
|
||||
self._preferedName = [(x[0],'obi',x[2])
|
||||
for x in self.__ecoNameIterator(self._preferedNamesFile,noError=True)]
|
||||
for x in self.__ecoNameIterator(self._preferedNamesFile)]
|
||||
print >> sys.stderr, " [INFO : Preferred taxon name file found] : %d added taxa" % len(self._preferedName)
|
||||
except:
|
||||
print >> sys.stderr, " [INFO : Preferred taxon name file not found]"
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
major = 1
|
||||
minor = 2
|
||||
serial= '0'
|
||||
serial= '3'
|
||||
|
||||
version = "%2d.%02d %s" % (major,minor,serial)
|
||||
|
||||
Reference in New Issue
Block a user