Restore old obisort and add LCA functionnality to obiannotate.

Former-commit-id: aecaacc9dae49f74bd888a8eb8140822d31a42a6
This commit is contained in:
2023-05-02 10:43:22 +02:00
parent e9dcacbf24
commit 60c187404d
5 changed files with 190 additions and 6 deletions

View File

@@ -70,11 +70,11 @@ func EvalAttributeWorker(expression map[string]string) obiseq.SeqWorker {
var w obiseq.SeqWorker
w = nil
for a,e := range expression {
for a, e := range expression {
if w == nil {
w = obiseq.EditAttributeWorker(a,e)
w = obiseq.EditAttributeWorker(a, e)
} else {
w.ChainWorkers(obiseq.EditAttributeWorker(a,e))
w.ChainWorkers(obiseq.EditAttributeWorker(a, e))
}
}
@@ -132,6 +132,12 @@ func CLIAnnotationWorker() obiseq.SeqWorker {
annotator = annotator.ChainWorkers(w)
}
if CLIHasAddLCA() {
taxo := obigrep.CLILoadSelectedTaxonomy()
w := obitax.AddLCAWorker(taxo, CLILCASlotName(), CLILCAThreshold())
annotator = annotator.ChainWorkers(w)
}
if CLIHasSetLengthFlag() {
w := AddSeqLengthWorker()
annotator = annotator.ChainWorkers(w)

View File

@@ -22,6 +22,8 @@ var _clearAll = false
var _setSeqLength = false
var _uniqueID = false
var _ahoCorazick = ""
var _lcaSlot = ""
var _lcaError = 0.0
func SequenceAnnotationOptionSet(options *getoptions.GetOpt) {
// options.BoolVar(&_addRank, "seq-rank", _addRank,
@@ -38,6 +40,20 @@ func SequenceAnnotationOptionSet(options *getoptions.GetOpt) {
options.StringVar(&_ahoCorazick, "aho-corasick", _ahoCorazick,
options.Description("Adds an aho-corasick attribut with the count of matches of the provided patterns."))
options.StringVar(&_lcaSlot, "add-lca", _lcaSlot,
options.ArgName("SLOT_NAME"),
options.Description("From the taxonomic annotation of the sequence (taxid slot or merged_taxid slot), "+
"a new slot named <SLOT_NAME> is added with the taxid of the lowest common ancester corresponding "+
"to the current annotation."))
options.Float64Var(&_lcaError, "lca-error", _lcaError,
options.ArgName("#.###"),
options.Description("Error rate tolerated on the taxonomical discription during the lowest common "+
"ancestor. At most a fraction of lca-error of the taxonomic information can disagree with the "+
"estimated LCA."),
)
// options.BoolVar(&_uniqueID, "uniq-id", _uniqueID,
// options.Description("Forces sequence record ids to be unique."),
// )
@@ -155,9 +171,6 @@ func CLISetAttributeExpression() map[string]string {
return _evalAttribute
}
func CLIHasAhoCorasick() bool {
_, err := os.Stat(_ahoCorazick)
return err == nil
@@ -182,3 +195,15 @@ func CLIAhoCorazick() []string {
return lines
}
func CLILCASlotName() string {
return _lcaSlot
}
func CLIHasAddLCA() bool {
return _lcaSlot != ""
}
func CLILCAThreshold() float64 {
return 1 - _lcaError
}