Patch a bug in obitag when some reference sequences have taxid absent from the taxonomy

This commit is contained in:
Eric Coissac
2025-03-27 16:45:02 +01:00
parent 2d52322876
commit 03b5ce9397
5 changed files with 27 additions and 11 deletions

View File

@ -68,11 +68,11 @@ if [[ ! -d "${INSTALL_DIR}/bin" ]]; then
exit 1 exit 1
fi fi
INSTALL_DIR="$(cd $INSTALL_DIR && pwd)" INSTALL_DIR="$(cd ${INSTALL_DIR} && pwd)"
echo WORK_DIR=$WORK_DIR 1>&2 echo "WORK_DIR=$WORK_DIR" 1>&2
echo INSTALL_DIR=$INSTALL_DIR 1>&2 echo "INSTALL_DIR=$INSTALL_DIR" 1>&2
echo OBITOOLS_PREFIX=$OBITOOLS_PREFIX 1>&2 echo "OBITOOLS_PREFIX=$OBITOOLS_PREFIX" 1>&2
pushd "$WORK_DIR"|| exit pushd "$WORK_DIR"|| exit

View File

@ -8,7 +8,7 @@ import (
// corresponds to the last commit, and not the one when the file will be // corresponds to the last commit, and not the one when the file will be
// commited // commited
var _Commit = "fd80249" var _Commit = "2d52322"
var _Version = "Release 4.4.0" var _Version = "Release 4.4.0"
// Version returns the version of the obitools package. // Version returns the version of the obitools package.

View File

@ -27,7 +27,7 @@ type Taxon struct {
// Returns: // Returns:
// - A formatted string representing the Taxon in the form "taxonomy_code:taxon_id [scientific_name]". // - A formatted string representing the Taxon in the form "taxonomy_code:taxon_id [scientific_name]".
func (taxon *Taxon) String() string { func (taxon *Taxon) String() string {
if taxon == nil { if taxon == nil || taxon.Node == nil {
return "NA" return "NA"
} }
return taxon.Node.String(taxon.Taxonomy.code) return taxon.Node.String(taxon.Taxonomy.code)

View File

@ -145,3 +145,18 @@ func (slice *TaxonSlice) Set(index int, taxon *Taxon) *TaxonSlice {
return slice return slice
} }
func (slice *TaxonSlice) Push(taxon *Taxon) *TaxonSlice {
if slice.taxonomy != taxon.Taxonomy {
log.Panic("Cannot add taxon from a different taxonomy")
}
slice.slice = append(slice.slice, taxon.Node)
return slice
}
func (slice *TaxonSlice) ReduceToSize(size int) *TaxonSlice {
slice.slice = slice.slice[:size]
return slice
}

View File

@ -249,16 +249,16 @@ func CLIAssignTaxonomy(iterator obiiter.IBioSequence,
[]*obikmer.Table4mer, []*obikmer.Table4mer,
len(references)) len(references))
taxa := taxo.NewTaxonSlice(references.Len(), references.Len()) taxa := taxo.NewTaxonSlice(0, references.Len())
buffer := make([]byte, 0, 1000) buffer := make([]byte, 0, 1000)
j := 0 j := 0
for _, seq := range references { for _, seq := range references {
references[j] = seq
refcounts[j] = obikmer.Count4Mer(seq, &buffer, nil)
taxon := seq.Taxon(taxo) taxon := seq.Taxon(taxo)
if taxon != nil { if taxon != nil && taxon.Node != nil {
taxa.Set(j, taxon) references[j] = seq
refcounts[j] = obikmer.Count4Mer(seq, &buffer, nil)
taxa.Push(taxon)
j++ j++
} else { } else {
obilog.Warnf("Taxid %s is not described in the taxonomy %s."+ obilog.Warnf("Taxid %s is not described in the taxonomy %s."+
@ -267,6 +267,7 @@ func CLIAssignTaxonomy(iterator obiiter.IBioSequence,
} }
} }
log.Infof("%d reference sequences conserved on %d", j, len(references))
references = references[:j] references = references[:j]
refcounts = refcounts[:j] refcounts = refcounts[:j]