mirror of
https://github.com/metabarcoding/obitools4.git
synced 2025-06-29 16:20:46 +00:00
Changes to be committed:
modified: cmd/obitools/obitag/main.go modified: cmd/obitools/obitag2/main.go modified: go.mod modified: go.sum modified: pkg/obiformats/ncbitaxdump/read.go modified: pkg/obioptions/version.go modified: pkg/obiseq/attributes.go modified: pkg/obiseq/taxonomy_lca.go modified: pkg/obiseq/taxonomy_methods.go modified: pkg/obiseq/taxonomy_predicate.go modified: pkg/obitax/inner.go modified: pkg/obitax/lca.go new file: pkg/obitax/taxid.go modified: pkg/obitax/taxon.go modified: pkg/obitax/taxonomy.go modified: pkg/obitax/taxonslice.go modified: pkg/obitools/obicleandb/obicleandb.go modified: pkg/obitools/obigrep/options.go modified: pkg/obitools/obilandmark/obilandmark.go modified: pkg/obitools/obilandmark/options.go modified: pkg/obitools/obirefidx/famlilyindexing.go modified: pkg/obitools/obirefidx/geomindexing.go modified: pkg/obitools/obirefidx/obirefidx.go modified: pkg/obitools/obirefidx/options.go modified: pkg/obitools/obitag/obigeomtag.go modified: pkg/obitools/obitag/obitag.go modified: pkg/obitools/obitag/options.go modified: pkg/obiutils/strings.go
This commit is contained in:
@ -7,8 +7,8 @@ import (
|
|||||||
log "github.com/sirupsen/logrus"
|
log "github.com/sirupsen/logrus"
|
||||||
|
|
||||||
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obiiter"
|
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obiiter"
|
||||||
|
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obitax"
|
||||||
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obitools/obiconvert"
|
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obitools/obiconvert"
|
||||||
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obitools/obifind"
|
|
||||||
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obitools/obitag"
|
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obitools/obitag"
|
||||||
|
|
||||||
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obioptions"
|
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obioptions"
|
||||||
@ -44,9 +44,9 @@ func main() {
|
|||||||
fs, err := obiconvert.CLIReadBioSequences(args...)
|
fs, err := obiconvert.CLIReadBioSequences(args...)
|
||||||
obiconvert.OpenSequenceDataErrorMessage(args, err)
|
obiconvert.OpenSequenceDataErrorMessage(args, err)
|
||||||
|
|
||||||
taxo, error := obifind.CLILoadSelectedTaxonomy()
|
taxo := obitax.DefaultTaxonomy()
|
||||||
if error != nil {
|
if taxo == nil {
|
||||||
log.Panicln(error)
|
log.Panicln("No loaded taxonomy")
|
||||||
}
|
}
|
||||||
|
|
||||||
references := obitag.CLIRefDB()
|
references := obitag.CLIRefDB()
|
||||||
|
@ -7,8 +7,8 @@ import (
|
|||||||
log "github.com/sirupsen/logrus"
|
log "github.com/sirupsen/logrus"
|
||||||
|
|
||||||
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obiiter"
|
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obiiter"
|
||||||
|
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obitax"
|
||||||
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obitools/obiconvert"
|
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obitools/obiconvert"
|
||||||
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obitools/obifind"
|
|
||||||
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obitools/obitag"
|
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obitools/obitag"
|
||||||
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obitools/obitag2"
|
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obitools/obitag2"
|
||||||
|
|
||||||
@ -45,9 +45,9 @@ func main() {
|
|||||||
fs, err := obiconvert.CLIReadBioSequences(args...)
|
fs, err := obiconvert.CLIReadBioSequences(args...)
|
||||||
obiconvert.OpenSequenceDataErrorMessage(args, err)
|
obiconvert.OpenSequenceDataErrorMessage(args, err)
|
||||||
|
|
||||||
taxo, error := obifind.CLILoadSelectedTaxonomy()
|
taxo := obitax.DefaultTaxonomy()
|
||||||
if error != nil {
|
if taxo == nil {
|
||||||
log.Panicln(error)
|
log.Panicln("No loaded taxonomy")
|
||||||
}
|
}
|
||||||
|
|
||||||
references := obitag.CLIRefDB()
|
references := obitag.CLIRefDB()
|
||||||
|
1
go.mod
1
go.mod
@ -25,6 +25,7 @@ require (
|
|||||||
|
|
||||||
require (
|
require (
|
||||||
github.com/Clever/csvlint v0.3.0 // indirect
|
github.com/Clever/csvlint v0.3.0 // indirect
|
||||||
|
github.com/buger/jsonparser v1.1.1 // indirect
|
||||||
github.com/davecgh/go-spew v1.1.1 // indirect
|
github.com/davecgh/go-spew v1.1.1 // indirect
|
||||||
github.com/goombaio/orderedmap v0.0.0-20180924084748-ba921b7e2419 // indirect
|
github.com/goombaio/orderedmap v0.0.0-20180924084748-ba921b7e2419 // indirect
|
||||||
github.com/kr/pretty v0.3.0 // indirect
|
github.com/kr/pretty v0.3.0 // indirect
|
||||||
|
2
go.sum
2
go.sum
@ -8,6 +8,8 @@ github.com/PaesslerAG/jsonpath v0.1.0 h1:gADYeifvlqK3R3i2cR5B4DGgxLXIPb3TRTH1mGi
|
|||||||
github.com/PaesslerAG/jsonpath v0.1.0/go.mod h1:4BzmtoM/PI8fPO4aQGIusjGxGir2BzcV0grWtFzq1Y8=
|
github.com/PaesslerAG/jsonpath v0.1.0/go.mod h1:4BzmtoM/PI8fPO4aQGIusjGxGir2BzcV0grWtFzq1Y8=
|
||||||
github.com/barkimedes/go-deepcopy v0.0.0-20220514131651-17c30cfc62df h1:GSoSVRLoBaFpOOds6QyY1L8AX7uoY+Ln3BHc22W40X0=
|
github.com/barkimedes/go-deepcopy v0.0.0-20220514131651-17c30cfc62df h1:GSoSVRLoBaFpOOds6QyY1L8AX7uoY+Ln3BHc22W40X0=
|
||||||
github.com/barkimedes/go-deepcopy v0.0.0-20220514131651-17c30cfc62df/go.mod h1:hiVxq5OP2bUGBRNS3Z/bt/reCLFNbdcST6gISi1fiOM=
|
github.com/barkimedes/go-deepcopy v0.0.0-20220514131651-17c30cfc62df/go.mod h1:hiVxq5OP2bUGBRNS3Z/bt/reCLFNbdcST6gISi1fiOM=
|
||||||
|
github.com/buger/jsonparser v1.1.1 h1:2PnMjfWD7wBILjqQbt530v576A/cAbQvEW9gGIpYMUs=
|
||||||
|
github.com/buger/jsonparser v1.1.1/go.mod h1:6RYKKt7H4d4+iWqouImQ9R2FZql3VbhNgx27UK13J/0=
|
||||||
github.com/chen3feng/stl4go v0.1.1 h1:0L1+mDw7pomftKDruM23f1mA7miavOj6C6MZeadzN2Q=
|
github.com/chen3feng/stl4go v0.1.1 h1:0L1+mDw7pomftKDruM23f1mA7miavOj6C6MZeadzN2Q=
|
||||||
github.com/chen3feng/stl4go v0.1.1/go.mod h1:5ml3psLgETJjRJnMbPE+JiHLrCpt+Ajc2weeTECXzWU=
|
github.com/chen3feng/stl4go v0.1.1/go.mod h1:5ml3psLgETJjRJnMbPE+JiHLrCpt+Ajc2weeTECXzWU=
|
||||||
github.com/creack/pty v1.1.9/go.mod h1:oKZEueFk5CKHvIhNR5MUki03XCEU+Q6VDXinZuGJ33E=
|
github.com/creack/pty v1.1.9/go.mod h1:oKZEueFk5CKHvIhNR5MUki03XCEU+Q6VDXinZuGJ33E=
|
||||||
|
@ -12,6 +12,7 @@ import (
|
|||||||
log "github.com/sirupsen/logrus"
|
log "github.com/sirupsen/logrus"
|
||||||
|
|
||||||
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obitax"
|
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obitax"
|
||||||
|
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obiutils"
|
||||||
)
|
)
|
||||||
|
|
||||||
func loadNodeTable(reader io.Reader, taxonomy *obitax.Taxonomy) {
|
func loadNodeTable(reader io.Reader, taxonomy *obitax.Taxonomy) {
|
||||||
@ -103,7 +104,7 @@ func loadMergedTable(reader io.Reader, taxonomy *obitax.Taxonomy) int {
|
|||||||
// if any of the files cannot be opened or read.
|
// if any of the files cannot be opened or read.
|
||||||
func LoadNCBITaxDump(directory string, onlysn bool) (*obitax.Taxonomy, error) {
|
func LoadNCBITaxDump(directory string, onlysn bool) (*obitax.Taxonomy, error) {
|
||||||
|
|
||||||
taxonomy := obitax.NewTaxonomy("NCBI Taxonomy", "taxon", "[[:digit:]]")
|
taxonomy := obitax.NewTaxonomy("NCBI Taxonomy", "taxon", obiutils.AsciiDigitSet)
|
||||||
|
|
||||||
//
|
//
|
||||||
// Load the Taxonomy nodes
|
// Load the Taxonomy nodes
|
||||||
|
@ -7,7 +7,7 @@ import (
|
|||||||
// TODO: The version number is extracted from git. This induces that the version
|
// TODO: The version number is extracted from git. This induces that the version
|
||||||
// corresponds to the last commit, and not the one when the file will be
|
// corresponds to the last commit, and not the one when the file will be
|
||||||
// commited
|
// commited
|
||||||
var _Commit = "00b0edc"
|
var _Commit = "f41a6fb"
|
||||||
var _Version = "Release 4.2.0"
|
var _Version = "Release 4.2.0"
|
||||||
|
|
||||||
// Version returns the version of the obitools package.
|
// Version returns the version of the obitools package.
|
||||||
|
@ -389,18 +389,6 @@ func (s *BioSequence) SetCount(count int) {
|
|||||||
s.SetAttribute("count", count)
|
s.SetAttribute("count", count)
|
||||||
}
|
}
|
||||||
|
|
||||||
// SetTaxid sets the taxid for the BioSequence.
|
|
||||||
//
|
|
||||||
// Parameters:
|
|
||||||
//
|
|
||||||
// taxid - the taxid to set.
|
|
||||||
func (s *BioSequence) SetTaxid(taxid string) {
|
|
||||||
if taxid == "" {
|
|
||||||
taxid = "NA"
|
|
||||||
}
|
|
||||||
s.SetAttribute("taxid", taxid)
|
|
||||||
}
|
|
||||||
|
|
||||||
func (s *BioSequence) OBITagRefIndex(slot ...string) map[int]string {
|
func (s *BioSequence) OBITagRefIndex(slot ...string) map[int]string {
|
||||||
key := "obitag_ref_index"
|
key := "obitag_ref_index"
|
||||||
|
|
||||||
|
@ -123,7 +123,6 @@ func AddLCAWorker(taxonomy *obitax.Taxonomy, slot_name string, threshold float64
|
|||||||
sequence.SetAttribute(slot_name, lca.String())
|
sequence.SetAttribute(slot_name, lca.String())
|
||||||
sequence.SetAttribute(lca_name, lca.ScientificName())
|
sequence.SetAttribute(lca_name, lca.ScientificName())
|
||||||
sequence.SetAttribute(lca_error, math.Round((1-rans)*1000)/1000)
|
sequence.SetAttribute(lca_error, math.Round((1-rans)*1000)/1000)
|
||||||
|
|
||||||
return BioSequenceSlice{sequence}, nil
|
return BioSequenceSlice{sequence}, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -1,6 +1,10 @@
|
|||||||
package obiseq
|
package obiseq
|
||||||
|
|
||||||
import (
|
import (
|
||||||
|
"fmt"
|
||||||
|
|
||||||
|
log "github.com/sirupsen/logrus"
|
||||||
|
|
||||||
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obitax"
|
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obitax"
|
||||||
)
|
)
|
||||||
|
|
||||||
@ -12,6 +16,23 @@ func (s *BioSequence) Taxon(taxonomy *obitax.Taxonomy) *obitax.Taxon {
|
|||||||
return taxonomy.Taxon(taxid)
|
return taxonomy.Taxon(taxid)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// SetTaxid sets the taxid for the BioSequence.
|
||||||
|
//
|
||||||
|
// Parameters:
|
||||||
|
//
|
||||||
|
// taxid - the taxid to set.
|
||||||
|
func (s *BioSequence) SetTaxid(taxid string) {
|
||||||
|
if taxid == "" {
|
||||||
|
taxid = "NA"
|
||||||
|
}
|
||||||
|
s.SetAttribute("taxid", taxid)
|
||||||
|
}
|
||||||
|
|
||||||
|
func (s *BioSequence) SetTaxon(taxon *obitax.Taxon) {
|
||||||
|
taxid := taxon.String()
|
||||||
|
s.SetTaxid(taxid)
|
||||||
|
}
|
||||||
|
|
||||||
// Taxid returns the taxonomic ID associated with the BioSequence.
|
// Taxid returns the taxonomic ID associated with the BioSequence.
|
||||||
//
|
//
|
||||||
// It retrieves the "taxid" attribute from the BioSequence's attributes map.
|
// It retrieves the "taxid" attribute from the BioSequence's attributes map.
|
||||||
@ -25,7 +46,20 @@ func (s *BioSequence) Taxid() (taxid string) {
|
|||||||
taxid = s.taxon.String()
|
taxid = s.taxon.String()
|
||||||
ok = true
|
ok = true
|
||||||
} else {
|
} else {
|
||||||
taxid, ok = s.GetStringAttribute("taxid")
|
var ta interface{}
|
||||||
|
ta, ok = s.GetAttribute("taxid")
|
||||||
|
if ok {
|
||||||
|
switch tv := ta.(type) {
|
||||||
|
case string:
|
||||||
|
taxid = tv
|
||||||
|
case int:
|
||||||
|
taxid = fmt.Sprintf("%d", tv)
|
||||||
|
case float64:
|
||||||
|
taxid = fmt.Sprintf("%d", int(tv))
|
||||||
|
default:
|
||||||
|
log.Fatalf("Taxid: %v is not a string or an integer (%T)", ta, ta)
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if !ok {
|
if !ok {
|
||||||
|
@ -47,14 +47,7 @@ func IsAValidTaxon(taxonomy *obitax.Taxonomy, withAutoCorrection ...bool) Sequen
|
|||||||
|
|
||||||
// A function that takes a taxonomy and a taxid as arguments and returns a function that takes a
|
// A function that takes a taxonomy and a taxid as arguments and returns a function that takes a
|
||||||
// pointer to a BioSequence as an argument and returns a boolean.
|
// pointer to a BioSequence as an argument and returns a boolean.
|
||||||
func IsSubCladeOf(taxonomy *obitax.Taxonomy, taxid string) SequencePredicate {
|
func IsSubCladeOf(taxonomy *obitax.Taxonomy, parent *obitax.Taxon) SequencePredicate {
|
||||||
parent := taxonomy.Taxon(taxid)
|
|
||||||
|
|
||||||
if parent == nil {
|
|
||||||
log.Fatalf("Cannot find taxon : %s in taxonomy %s",
|
|
||||||
taxid,
|
|
||||||
taxonomy.Name())
|
|
||||||
}
|
|
||||||
|
|
||||||
f := func(sequence *BioSequence) bool {
|
f := func(sequence *BioSequence) bool {
|
||||||
taxon := sequence.Taxon(taxonomy)
|
taxon := sequence.Taxon(taxonomy)
|
||||||
|
@ -1,6 +1,9 @@
|
|||||||
package obitax
|
package obitax
|
||||||
|
|
||||||
import "sync"
|
import (
|
||||||
|
"strings"
|
||||||
|
"sync"
|
||||||
|
)
|
||||||
|
|
||||||
// InnerString is a struct that holds a map of strings and a read-write lock for concurrent access.
|
// InnerString is a struct that holds a map of strings and a read-write lock for concurrent access.
|
||||||
// The index map is used to store key-value pairs of strings.
|
// The index map is used to store key-value pairs of strings.
|
||||||
@ -31,10 +34,10 @@ func (i *InnerString) Innerize(value string) *string {
|
|||||||
defer i.lock.Unlock()
|
defer i.lock.Unlock()
|
||||||
s, ok := i.index[value]
|
s, ok := i.index[value]
|
||||||
if !ok {
|
if !ok {
|
||||||
|
value = strings.Clone(value)
|
||||||
s = &value
|
s = &value
|
||||||
i.index[value] = s
|
i.index[value] = s
|
||||||
}
|
}
|
||||||
|
|
||||||
return s
|
return s
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -16,11 +16,24 @@ import (
|
|||||||
// if either of the taxa is nil, if they are not in the same taxonomy, or
|
// if either of the taxa is nil, if they are not in the same taxonomy, or
|
||||||
// if the taxonomy is unrooted.
|
// if the taxonomy is unrooted.
|
||||||
func (t1 *Taxon) LCA(t2 *Taxon) (*Taxon, error) {
|
func (t1 *Taxon) LCA(t2 *Taxon) (*Taxon, error) {
|
||||||
if t1 == nil || t1.Node == nil {
|
if t1 == nil && t2 != nil {
|
||||||
return nil, fmt.Errorf("try to get LCA of nil taxon")
|
return t2, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
if t2 == nil || t2.Node == nil {
|
if t2 == nil && t1 != nil {
|
||||||
|
|
||||||
|
return t1, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
if t1 == nil && t2 == nil {
|
||||||
|
return nil, fmt.Errorf("try to get LCA of nil taxa")
|
||||||
|
}
|
||||||
|
|
||||||
|
if t1.Node == nil {
|
||||||
|
return nil, fmt.Errorf("try to get LCA of nil taxa")
|
||||||
|
}
|
||||||
|
|
||||||
|
if t2.Node == nil {
|
||||||
return nil, fmt.Errorf("try to get LCA of nil taxon")
|
return nil, fmt.Errorf("try to get LCA of nil taxon")
|
||||||
}
|
}
|
||||||
|
|
||||||
|
60
pkg/obitax/taxid.go
Normal file
60
pkg/obitax/taxid.go
Normal file
@ -0,0 +1,60 @@
|
|||||||
|
package obitax
|
||||||
|
|
||||||
|
import (
|
||||||
|
"fmt"
|
||||||
|
"strconv"
|
||||||
|
|
||||||
|
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obiutils"
|
||||||
|
)
|
||||||
|
|
||||||
|
// Taxid represents a taxonomic identifier as a pointer to a string.
|
||||||
|
type Taxid *string
|
||||||
|
|
||||||
|
// TaxidFactory is a factory for creating Taxid instances from strings and integers.
|
||||||
|
type TaxidFactory struct {
|
||||||
|
inner *InnerString
|
||||||
|
code string
|
||||||
|
alphabet obiutils.AsciiSet
|
||||||
|
}
|
||||||
|
|
||||||
|
// NewTaxidFactory creates and returns a new instance of TaxidFactory.
|
||||||
|
func NewTaxidFactory(code string, alphabet obiutils.AsciiSet) *TaxidFactory {
|
||||||
|
return &TaxidFactory{
|
||||||
|
inner: NewInnerString(),
|
||||||
|
code: code + ":",
|
||||||
|
alphabet: alphabet,
|
||||||
|
}
|
||||||
|
// Initialize and return a new TaxidFactory.
|
||||||
|
}
|
||||||
|
|
||||||
|
// FromString converts a string representation of a taxonomic identifier into a Taxid.
|
||||||
|
// It extracts the relevant part of the string after the first colon (':') if present.
|
||||||
|
func (f *TaxidFactory) FromString(taxid string) (Taxid, error) {
|
||||||
|
taxid = obiutils.AsciiSpaceSet.TrimLeft(taxid)
|
||||||
|
part1, part2 := obiutils.SplitInTwo(taxid, ':')
|
||||||
|
if len(part2) == 0 {
|
||||||
|
taxid = part1
|
||||||
|
} else {
|
||||||
|
if part1 != f.code {
|
||||||
|
return nil, fmt.Errorf("taxid %s string does not start with taxonomy code %s", taxid, f.code)
|
||||||
|
}
|
||||||
|
taxid = part2
|
||||||
|
}
|
||||||
|
|
||||||
|
taxid, err := f.alphabet.FirstWord(taxid) // Get the first word from the input string.
|
||||||
|
|
||||||
|
if err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
|
||||||
|
// Return a new Taxid by innerizing the extracted taxid string.
|
||||||
|
rep := Taxid(f.inner.Innerize(taxid))
|
||||||
|
return rep, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// FromInt converts an integer taxonomic identifier into a Taxid.
|
||||||
|
// It first converts the integer to a string and then innerizes it.
|
||||||
|
func (f *TaxidFactory) FromInt(taxid int) (Taxid, error) {
|
||||||
|
s := strconv.Itoa(taxid) // Convert the integer to a string.
|
||||||
|
return f.inner.Innerize(s), nil // Return a new Taxid by innerizing the string.
|
||||||
|
}
|
@ -179,9 +179,9 @@ func (taxon *Taxon) IPath() iter.Seq[*Taxon] {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// Path returns a slice of TaxNode representing the path from the current Taxon
|
// Path returns a slice of TaxNode representing the path from the current Taxon.
|
||||||
// to the root Taxon in the associated Taxonomy. It collects all the nodes in the path
|
// The first element of the slice is the current Taxon, and the last element is the
|
||||||
// using the IPath method and returns them as a TaxonSlice.
|
// to the root Taxon in the associated Taxonomy.
|
||||||
//
|
//
|
||||||
// Returns:
|
// Returns:
|
||||||
// - A pointer to a TaxonSlice containing the TaxNode instances in the path
|
// - A pointer to a TaxonSlice containing the TaxNode instances in the path
|
||||||
@ -371,3 +371,11 @@ func (taxon *Taxon) MetadataStringValues() []string {
|
|||||||
}
|
}
|
||||||
return values
|
return values
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func (taxon *Taxon) SameAs(other *Taxon) bool {
|
||||||
|
if taxon == nil || other == nil {
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
|
||||||
|
return taxon.Taxonomy == other.Taxonomy && taxon.Node.id == other.Node.id
|
||||||
|
}
|
||||||
|
@ -8,9 +8,10 @@ and retrieving information about taxa.
|
|||||||
package obitax
|
package obitax
|
||||||
|
|
||||||
import (
|
import (
|
||||||
|
"errors"
|
||||||
"fmt"
|
"fmt"
|
||||||
"regexp"
|
|
||||||
|
|
||||||
|
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obiutils"
|
||||||
log "github.com/sirupsen/logrus"
|
log "github.com/sirupsen/logrus"
|
||||||
)
|
)
|
||||||
|
|
||||||
@ -32,13 +33,12 @@ import (
|
|||||||
type Taxonomy struct {
|
type Taxonomy struct {
|
||||||
name string
|
name string
|
||||||
code string
|
code string
|
||||||
ids *InnerString
|
ids *TaxidFactory
|
||||||
ranks *InnerString
|
ranks *InnerString
|
||||||
nameclasses *InnerString
|
nameclasses *InnerString
|
||||||
names *InnerString
|
names *InnerString
|
||||||
nodes *TaxonSet
|
nodes *TaxonSet
|
||||||
root *TaxNode
|
root *TaxNode
|
||||||
matcher *regexp.Regexp
|
|
||||||
index map[*string]*TaxonSet
|
index map[*string]*TaxonSet
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -52,21 +52,18 @@ type Taxonomy struct {
|
|||||||
//
|
//
|
||||||
// Returns:
|
// Returns:
|
||||||
// - A pointer to the newly created Taxonomy instance.
|
// - A pointer to the newly created Taxonomy instance.
|
||||||
func NewTaxonomy(name, code, codeCharacters string) *Taxonomy {
|
func NewTaxonomy(name, code string, codeCharacters obiutils.AsciiSet) *Taxonomy {
|
||||||
set := make(map[*string]*TaxNode)
|
set := make(map[*string]*TaxNode)
|
||||||
|
|
||||||
matcher := regexp.MustCompile(fmt.Sprintf("^[[:blank:]]*(%s:)?(%s+)", code, codeCharacters))
|
|
||||||
|
|
||||||
taxonomy := &Taxonomy{
|
taxonomy := &Taxonomy{
|
||||||
name: name,
|
name: name,
|
||||||
code: code,
|
code: code,
|
||||||
ids: NewInnerString(),
|
ids: NewTaxidFactory(code, codeCharacters),
|
||||||
ranks: NewInnerString(),
|
ranks: NewInnerString(),
|
||||||
nameclasses: NewInnerString(),
|
nameclasses: NewInnerString(),
|
||||||
names: NewInnerString(),
|
names: NewInnerString(),
|
||||||
nodes: &TaxonSet{set: set},
|
nodes: &TaxonSet{set: set},
|
||||||
root: nil,
|
root: nil,
|
||||||
matcher: matcher,
|
|
||||||
index: make(map[*string]*TaxonSet),
|
index: make(map[*string]*TaxonSet),
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -85,23 +82,17 @@ func NewTaxonomy(name, code, codeCharacters string) *Taxonomy {
|
|||||||
// Returns:
|
// Returns:
|
||||||
// - The taxon identifier as a *string corresponding to the provided taxid.
|
// - The taxon identifier as a *string corresponding to the provided taxid.
|
||||||
// - An error if the taxid is not valid or cannot be converted.
|
// - An error if the taxid is not valid or cannot be converted.
|
||||||
func (taxonomy *Taxonomy) Id(taxid string) (*string, error) {
|
func (taxonomy *Taxonomy) Id(taxid string) (Taxid, error) {
|
||||||
taxonomy = taxonomy.OrDefault(false)
|
taxonomy = taxonomy.OrDefault(false)
|
||||||
|
|
||||||
if taxonomy == nil {
|
if taxonomy == nil {
|
||||||
return nil, fmt.Errorf("Cannot extract Id from nil Taxonomy")
|
return nil, errors.New("Cannot extract Id from nil Taxonomy")
|
||||||
}
|
}
|
||||||
|
|
||||||
matches := taxonomy.matcher.FindStringSubmatch(taxid)
|
return taxonomy.ids.FromString(taxid)
|
||||||
|
|
||||||
if matches == nil {
|
|
||||||
return nil, fmt.Errorf("taxid %s is not a valid taxid", taxid)
|
|
||||||
}
|
|
||||||
|
|
||||||
return taxonomy.ids.Innerize(matches[2]), nil
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// TaxidSting retrieves the string representation of a taxon node identified by the given ID.
|
// TaxidString retrieves the string representation of a taxon node identified by the given ID.
|
||||||
// It looks up the node in the taxonomy and returns its formatted string representation
|
// It looks up the node in the taxonomy and returns its formatted string representation
|
||||||
// along with the taxonomy code. If the node does not exist, it returns an error.
|
// along with the taxonomy code. If the node does not exist, it returns an error.
|
||||||
//
|
//
|
||||||
@ -111,7 +102,7 @@ func (taxonomy *Taxonomy) Id(taxid string) (*string, error) {
|
|||||||
// Returns:
|
// Returns:
|
||||||
// - A string representing the taxon node in the format "taxonomyCode:id [scientificName]",
|
// - A string representing the taxon node in the format "taxonomyCode:id [scientificName]",
|
||||||
// or an error if the taxon node with the specified ID does not exist in the taxonomy.
|
// or an error if the taxon node with the specified ID does not exist in the taxonomy.
|
||||||
func (taxonomy *Taxonomy) TaxidSting(id string) (string, error) {
|
func (taxonomy *Taxonomy) TaxidString(id string) (string, error) {
|
||||||
taxonomy = taxonomy.OrDefault(false)
|
taxonomy = taxonomy.OrDefault(false)
|
||||||
|
|
||||||
pid, err := taxonomy.Id(id)
|
pid, err := taxonomy.Id(id)
|
||||||
|
@ -13,6 +13,7 @@ package obitax
|
|||||||
import (
|
import (
|
||||||
"bytes"
|
"bytes"
|
||||||
"fmt"
|
"fmt"
|
||||||
|
"log"
|
||||||
|
|
||||||
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obiutils"
|
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obiutils"
|
||||||
)
|
)
|
||||||
@ -59,6 +60,16 @@ func (slice *TaxonSlice) Get(i int) *TaxNode {
|
|||||||
return slice.slice[i]
|
return slice.slice[i]
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func (slice *TaxonSlice) Taxon(i int) *Taxon {
|
||||||
|
if slice == nil {
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
return &Taxon{
|
||||||
|
Node: slice.slice[i],
|
||||||
|
Taxonomy: slice.taxonomy,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
// Len returns the number of TaxNode instances in the TaxonSlice.
|
// Len returns the number of TaxNode instances in the TaxonSlice.
|
||||||
// It provides the count of taxon nodes contained within the slice.
|
// It provides the count of taxon nodes contained within the slice.
|
||||||
//
|
//
|
||||||
@ -124,3 +135,13 @@ func (slice *TaxonSlice) Reverse(inplace bool) *TaxonSlice {
|
|||||||
slice: rep,
|
slice: rep,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func (slice *TaxonSlice) Set(index int, taxon *Taxon) *TaxonSlice {
|
||||||
|
if slice.taxonomy != taxon.Taxonomy {
|
||||||
|
log.Panic("Cannot add taxon from a different taxonomy")
|
||||||
|
}
|
||||||
|
|
||||||
|
slice.slice[index] = taxon.Node
|
||||||
|
|
||||||
|
return slice
|
||||||
|
}
|
||||||
|
@ -250,22 +250,26 @@ func ICleanDB(itertator obiiter.IBioSequence) obiiter.IBioSequence {
|
|||||||
if len(obigrep.CLIRequiredRanks()) > 0 {
|
if len(obigrep.CLIRequiredRanks()) > 0 {
|
||||||
rankPredicate = obigrep.CLIHasRankDefinedPredicate()
|
rankPredicate = obigrep.CLIHasRankDefinedPredicate()
|
||||||
} else {
|
} else {
|
||||||
rankPredicate = taxonomy.HasRequiredRank("species").And(taxonomy.HasRequiredRank("genus")).And(taxonomy.HasRequiredRank("family"))
|
rankPredicate = obiseq.HasRequiredRank(
|
||||||
|
taxonomy, "species",
|
||||||
|
).And(obiseq.HasRequiredRank(
|
||||||
|
taxonomy, "genus",
|
||||||
|
)).And(obiseq.HasRequiredRank(taxonomy, "family"))
|
||||||
}
|
}
|
||||||
|
|
||||||
goodTaxa := taxonomy.IsAValidTaxon(CLIUpdateTaxids()).And(rankPredicate)
|
goodTaxa := obiseq.IsAValidTaxon(taxonomy, CLIUpdateTaxids()).And(rankPredicate)
|
||||||
|
|
||||||
usable := unique.FilterOn(goodTaxa,
|
usable := unique.FilterOn(goodTaxa,
|
||||||
obioptions.CLIBatchSize(),
|
obioptions.CLIBatchSize(),
|
||||||
obioptions.CLIParallelWorkers())
|
obioptions.CLIParallelWorkers())
|
||||||
|
|
||||||
annotated := usable.MakeIWorker(taxonomy.MakeSetSpeciesWorker(),
|
annotated := usable.MakeIWorker(obiseq.MakeSetSpeciesWorker(taxonomy),
|
||||||
false,
|
false,
|
||||||
obioptions.CLIParallelWorkers(),
|
obioptions.CLIParallelWorkers(),
|
||||||
).MakeIWorker(taxonomy.MakeSetGenusWorker(),
|
).MakeIWorker(obiseq.MakeSetGenusWorker(taxonomy),
|
||||||
false,
|
false,
|
||||||
obioptions.CLIParallelWorkers(),
|
obioptions.CLIParallelWorkers(),
|
||||||
).MakeIWorker(taxonomy.MakeSetFamilyWorker(),
|
).MakeIWorker(obiseq.MakeSetFamilyWorker(taxonomy),
|
||||||
false,
|
false,
|
||||||
obioptions.CLIParallelWorkers(),
|
obioptions.CLIParallelWorkers(),
|
||||||
)
|
)
|
||||||
|
@ -1,7 +1,6 @@
|
|||||||
package obigrep
|
package obigrep
|
||||||
|
|
||||||
import (
|
import (
|
||||||
"strconv"
|
|
||||||
"strings"
|
"strings"
|
||||||
|
|
||||||
log "github.com/sirupsen/logrus"
|
log "github.com/sirupsen/logrus"
|
||||||
@ -16,7 +15,7 @@ import (
|
|||||||
)
|
)
|
||||||
|
|
||||||
var _BelongTaxa = make([]string, 0)
|
var _BelongTaxa = make([]string, 0)
|
||||||
var _NotBelongTaxa = make([]int, 0)
|
var _NotBelongTaxa = make([]string, 0)
|
||||||
var _RequiredRanks = make([]string, 0)
|
var _RequiredRanks = make([]string, 0)
|
||||||
|
|
||||||
var _MinimumLength = 1
|
var _MinimumLength = 1
|
||||||
@ -60,7 +59,7 @@ func TaxonomySelectionOptionSet(options *getoptions.GetOpt) {
|
|||||||
options.ArgName("TAXID"),
|
options.ArgName("TAXID"),
|
||||||
options.Description("Require that the actual taxon of the sequence belongs the provided taxid."))
|
options.Description("Require that the actual taxon of the sequence belongs the provided taxid."))
|
||||||
|
|
||||||
options.IntSliceVar(&_NotBelongTaxa, "ignore-taxon", 1, 1,
|
options.StringSliceVar(&_NotBelongTaxa, "ignore-taxon", 1, 1,
|
||||||
options.Alias("i"),
|
options.Alias("i"),
|
||||||
options.ArgName("TAXID"),
|
options.ArgName("TAXID"),
|
||||||
options.Description("Require that the actual taxon of the sequence doesn't belong the provided taxid."))
|
options.Description("Require that the actual taxon of the sequence doesn't belong the provided taxid."))
|
||||||
@ -273,18 +272,18 @@ func CLIRestrictTaxonomyPredicate() obiseq.SequencePredicate {
|
|||||||
if len(_BelongTaxa) > 0 {
|
if len(_BelongTaxa) > 0 {
|
||||||
taxonomy := CLILoadSelectedTaxonomy()
|
taxonomy := CLILoadSelectedTaxonomy()
|
||||||
|
|
||||||
taxid, err := strconv.Atoi(_BelongTaxa[0])
|
taxon := taxonomy.Taxon(_BelongTaxa[0])
|
||||||
if err != nil {
|
if taxon == nil {
|
||||||
p = taxonomy.IsSubCladeOfSlot(_BelongTaxa[0])
|
p = obiseq.IsSubCladeOfSlot(taxonomy, _BelongTaxa[0])
|
||||||
} else {
|
} else {
|
||||||
p = taxonomy.IsSubCladeOf(taxid)
|
p = obiseq.IsSubCladeOf(taxonomy, taxon)
|
||||||
}
|
}
|
||||||
for _, staxid := range _BelongTaxa[1:] {
|
for _, staxid := range _BelongTaxa[1:] {
|
||||||
taxid, err := strconv.Atoi(staxid)
|
taxon := taxonomy.Taxon(staxid)
|
||||||
if err != nil {
|
if taxon == nil {
|
||||||
p2 = taxonomy.IsSubCladeOfSlot(staxid)
|
p2 = obiseq.IsSubCladeOfSlot(taxonomy, staxid)
|
||||||
} else {
|
} else {
|
||||||
p2 = taxonomy.IsSubCladeOf(taxid)
|
p2 = obiseq.IsSubCladeOf(taxonomy, taxon)
|
||||||
}
|
}
|
||||||
|
|
||||||
p = p.Or(p2)
|
p = p.Or(p2)
|
||||||
@ -297,13 +296,28 @@ func CLIRestrictTaxonomyPredicate() obiseq.SequencePredicate {
|
|||||||
}
|
}
|
||||||
|
|
||||||
func CLIAvoidTaxonomyPredicate() obiseq.SequencePredicate {
|
func CLIAvoidTaxonomyPredicate() obiseq.SequencePredicate {
|
||||||
|
var p obiseq.SequencePredicate
|
||||||
|
var p2 obiseq.SequencePredicate
|
||||||
|
|
||||||
if len(_NotBelongTaxa) > 0 {
|
if len(_NotBelongTaxa) > 0 {
|
||||||
taxonomy := CLILoadSelectedTaxonomy()
|
taxonomy := CLILoadSelectedTaxonomy()
|
||||||
p := taxonomy.IsSubCladeOf(_NotBelongTaxa[0])
|
|
||||||
|
taxon := taxonomy.Taxon(_NotBelongTaxa[0])
|
||||||
|
if taxon == nil {
|
||||||
|
p = obiseq.IsSubCladeOfSlot(taxonomy, _NotBelongTaxa[0])
|
||||||
|
} else {
|
||||||
|
p = obiseq.IsSubCladeOf(taxonomy, taxon)
|
||||||
|
}
|
||||||
|
|
||||||
for _, taxid := range _NotBelongTaxa[1:] {
|
for _, taxid := range _NotBelongTaxa[1:] {
|
||||||
p = p.Or(taxonomy.IsSubCladeOf(taxid))
|
taxon := taxonomy.Taxon(taxid)
|
||||||
|
if taxon == nil {
|
||||||
|
p2 = obiseq.IsSubCladeOfSlot(taxonomy, taxid)
|
||||||
|
} else {
|
||||||
|
p2 = obiseq.IsSubCladeOf(taxonomy, taxon)
|
||||||
|
}
|
||||||
|
|
||||||
|
p = p.Or(p2)
|
||||||
}
|
}
|
||||||
|
|
||||||
return p.Not()
|
return p.Not()
|
||||||
@ -316,10 +330,10 @@ func CLIHasRankDefinedPredicate() obiseq.SequencePredicate {
|
|||||||
|
|
||||||
if len(_RequiredRanks) > 0 {
|
if len(_RequiredRanks) > 0 {
|
||||||
taxonomy := CLILoadSelectedTaxonomy()
|
taxonomy := CLILoadSelectedTaxonomy()
|
||||||
p := taxonomy.HasRequiredRank(_RequiredRanks[0])
|
p := obiseq.HasRequiredRank(taxonomy, _RequiredRanks[0])
|
||||||
|
|
||||||
for _, rank := range _RequiredRanks[1:] {
|
for _, rank := range _RequiredRanks[1:] {
|
||||||
p = p.And(taxonomy.HasRequiredRank(rank))
|
p = p.And(obiseq.HasRequiredRank(taxonomy, rank))
|
||||||
}
|
}
|
||||||
|
|
||||||
return p
|
return p
|
||||||
|
@ -11,7 +11,6 @@ import (
|
|||||||
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obiseq"
|
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obiseq"
|
||||||
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obistats"
|
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obistats"
|
||||||
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obitax"
|
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obitax"
|
||||||
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obitools/obifind"
|
|
||||||
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obitools/obirefidx"
|
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obitools/obirefidx"
|
||||||
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obiutils"
|
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obiutils"
|
||||||
"github.com/schollz/progressbar/v3"
|
"github.com/schollz/progressbar/v3"
|
||||||
@ -155,19 +154,20 @@ func CLISelectLandmarkSequences(iterator obiiter.IBioSequence) obiiter.IBioSeque
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if obifind.CLIHasSelectedTaxonomy() {
|
if obioptions.CLIHasSelectedTaxonomy() {
|
||||||
taxo, err := obifind.CLILoadSelectedTaxonomy()
|
taxo := obitax.DefaultTaxonomy()
|
||||||
if err != nil {
|
if taxo == nil {
|
||||||
log.Fatal(err)
|
log.Fatal("No taxonomy available")
|
||||||
}
|
}
|
||||||
|
|
||||||
taxa := make(obitax.TaxonSet, len(library))
|
taxa := obitax.DefaultTaxonomy().NewTaxonSlice(len(library), len(library))
|
||||||
|
|
||||||
for i, seq := range library {
|
for i, seq := range library {
|
||||||
taxa[i], err = taxo.Taxon(seq.Taxid())
|
taxon := seq.Taxon(taxo)
|
||||||
if err != nil {
|
if taxon == nil {
|
||||||
log.Fatal(err)
|
log.Fatal("%s: Cannot identify taxid %s in %s", seq.Id(), seq.Taxid(), taxo.Name())
|
||||||
}
|
}
|
||||||
|
taxa.Set(i, taxon)
|
||||||
}
|
}
|
||||||
|
|
||||||
pbopt := make([]progressbar.Option, 0, 5)
|
pbopt := make([]progressbar.Option, 0, 5)
|
||||||
@ -182,7 +182,7 @@ func CLISelectLandmarkSequences(iterator obiiter.IBioSequence) obiiter.IBioSeque
|
|||||||
bar := progressbar.NewOptions(len(library), pbopt...)
|
bar := progressbar.NewOptions(len(library), pbopt...)
|
||||||
|
|
||||||
for i, seq := range library {
|
for i, seq := range library {
|
||||||
idx := obirefidx.GeomIndexSesquence(i, library, &taxa, taxo)
|
idx := obirefidx.GeomIndexSesquence(i, library, taxa, taxo)
|
||||||
seq.SetOBITagGeomRefIndex(idx)
|
seq.SetOBITagGeomRefIndex(idx)
|
||||||
|
|
||||||
if i%10 == 0 {
|
if i%10 == 0 {
|
||||||
|
@ -1,8 +1,8 @@
|
|||||||
package obilandmark
|
package obilandmark
|
||||||
|
|
||||||
import (
|
import (
|
||||||
|
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obioptions"
|
||||||
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obitools/obiconvert"
|
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obitools/obiconvert"
|
||||||
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obitools/obifind"
|
|
||||||
"github.com/DavidGamba/go-getoptions"
|
"github.com/DavidGamba/go-getoptions"
|
||||||
)
|
)
|
||||||
|
|
||||||
@ -25,7 +25,7 @@ func LandmarkOptionSet(options *getoptions.GetOpt) {
|
|||||||
func OptionSet(options *getoptions.GetOpt) {
|
func OptionSet(options *getoptions.GetOpt) {
|
||||||
obiconvert.InputOptionSet(options)
|
obiconvert.InputOptionSet(options)
|
||||||
obiconvert.OutputOptionSet(options)
|
obiconvert.OutputOptionSet(options)
|
||||||
obifind.LoadTaxonomyOptionSet(options, false, false)
|
obioptions.LoadTaxonomyOptionSet(options, false, false)
|
||||||
LandmarkOptionSet(options)
|
LandmarkOptionSet(options)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -13,7 +13,6 @@ import (
|
|||||||
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obioptions"
|
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obioptions"
|
||||||
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obiseq"
|
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obiseq"
|
||||||
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obitax"
|
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obitax"
|
||||||
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obitools/obifind"
|
|
||||||
"github.com/schollz/progressbar/v3"
|
"github.com/schollz/progressbar/v3"
|
||||||
log "github.com/sirupsen/logrus"
|
log "github.com/sirupsen/logrus"
|
||||||
)
|
)
|
||||||
@ -73,9 +72,7 @@ func MakeIndexingSliceWorker(indexslot, idslot string,
|
|||||||
[]*obikmer.Table4mer,
|
[]*obikmer.Table4mer,
|
||||||
len(sequences))
|
len(sequences))
|
||||||
|
|
||||||
taxa := make(
|
taxa := taxonomy.NewTaxonSlice(sequences.Len(), sequences.Len())
|
||||||
obitax.TaxonSet,
|
|
||||||
len(sequences))
|
|
||||||
|
|
||||||
for i, seq := range sequences {
|
for i, seq := range sequences {
|
||||||
j, ok := seq.GetIntAttribute(idslot)
|
j, ok := seq.GetIntAttribute(idslot)
|
||||||
@ -85,8 +82,11 @@ func MakeIndexingSliceWorker(indexslot, idslot string,
|
|||||||
}
|
}
|
||||||
|
|
||||||
kmercounts[i] = (*kmers)[j]
|
kmercounts[i] = (*kmers)[j]
|
||||||
|
taxon := seq.Taxon(taxonomy)
|
||||||
taxa[i], _ = taxonomy.Taxon(seq.Taxid())
|
if taxon == nil {
|
||||||
|
log.Panicf("%s: Cannot get the taxon %s in %s", seq.Id(), seq.Taxid(), taxonomy.Name())
|
||||||
|
}
|
||||||
|
taxa.Set(i, taxon)
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -103,7 +103,7 @@ func MakeIndexingSliceWorker(indexslot, idslot string,
|
|||||||
f := func() {
|
f := func() {
|
||||||
for l := range limits {
|
for l := range limits {
|
||||||
for i := l[0]; i < l[1]; i++ {
|
for i := l[0]; i < l[1]; i++ {
|
||||||
idx := IndexSequence(i, sequences, &kmercounts, &taxa, taxonomy)
|
idx := IndexSequence(i, sequences, &kmercounts, taxa, taxonomy)
|
||||||
sequences[i].SetAttribute(indexslot, idx)
|
sequences[i].SetAttribute(indexslot, idx)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -134,7 +134,7 @@ func IndexFamilyDB(iterator obiiter.IBioSequence) obiiter.IBioSequence {
|
|||||||
nref := len(references)
|
nref := len(references)
|
||||||
log.Infof("Done. Database contains %d sequences", nref)
|
log.Infof("Done. Database contains %d sequences", nref)
|
||||||
|
|
||||||
taxonomy, error := obifind.CLILoadSelectedTaxonomy()
|
taxonomy, error := obioptions.CLILoadSelectedTaxonomy()
|
||||||
if error != nil {
|
if error != nil {
|
||||||
log.Panicln(error)
|
log.Panicln(error)
|
||||||
}
|
}
|
||||||
@ -155,13 +155,13 @@ func IndexFamilyDB(iterator obiiter.IBioSequence) obiiter.IBioSequence {
|
|||||||
log.Info("done")
|
log.Info("done")
|
||||||
|
|
||||||
partof := obiiter.IBatchOver(source, references,
|
partof := obiiter.IBatchOver(source, references,
|
||||||
obioptions.CLIBatchSize()).MakeIWorker(taxonomy.MakeSetSpeciesWorker(),
|
obioptions.CLIBatchSize()).MakeIWorker(obiseq.MakeSetSpeciesWorker(taxonomy),
|
||||||
false,
|
false,
|
||||||
obioptions.CLIParallelWorkers(),
|
obioptions.CLIParallelWorkers(),
|
||||||
).MakeIWorker(taxonomy.MakeSetGenusWorker(),
|
).MakeIWorker(obiseq.MakeSetGenusWorker(taxonomy),
|
||||||
false,
|
false,
|
||||||
obioptions.CLIParallelWorkers(),
|
obioptions.CLIParallelWorkers(),
|
||||||
).MakeIWorker(taxonomy.MakeSetFamilyWorker(),
|
).MakeIWorker(obiseq.MakeSetFamilyWorker(taxonomy),
|
||||||
false,
|
false,
|
||||||
obioptions.CLIParallelWorkers(),
|
obioptions.CLIParallelWorkers(),
|
||||||
)
|
)
|
||||||
@ -187,14 +187,20 @@ func IndexFamilyDB(iterator obiiter.IBioSequence) obiiter.IBioSequence {
|
|||||||
|
|
||||||
clusters := obiseq.MakeBioSequenceSlice(0)
|
clusters := obiseq.MakeBioSequenceSlice(0)
|
||||||
kcluster := make([]*obikmer.Table4mer, 0)
|
kcluster := make([]*obikmer.Table4mer, 0)
|
||||||
taxa := make(obitax.TaxonSet, 0)
|
taxa := taxonomy.NewTaxonSlice(references.Len(), references.Len())
|
||||||
|
|
||||||
j := 0
|
j := 0
|
||||||
for i, seq := range references {
|
for i, seq := range references {
|
||||||
if is_centrer, _ := seq.GetBoolAttribute("reffamidx_clusterhead"); is_centrer {
|
if is_centrer, _ := seq.GetBoolAttribute("reffamidx_clusterhead"); is_centrer {
|
||||||
clusters = append(clusters, seq)
|
clusters = append(clusters, seq)
|
||||||
kcluster = append(kcluster, refcounts[i])
|
kcluster = append(kcluster, refcounts[i])
|
||||||
taxa[j], _ = taxonomy.Taxon(seq.Taxid())
|
|
||||||
|
taxon := seq.Taxon(taxonomy)
|
||||||
|
if taxon == nil {
|
||||||
|
log.Panicf("%s: Cannot get the taxon %s in %s", seq.Id(), seq.Taxid(), taxonomy.Name())
|
||||||
|
}
|
||||||
|
taxa.Set(j, taxon)
|
||||||
|
|
||||||
j++
|
j++
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -225,7 +231,7 @@ func IndexFamilyDB(iterator obiiter.IBioSequence) obiiter.IBioSequence {
|
|||||||
f := func() {
|
f := func() {
|
||||||
for l := range limits {
|
for l := range limits {
|
||||||
for i := l[0]; i < l[1]; i++ {
|
for i := l[0]; i < l[1]; i++ {
|
||||||
idx := IndexSequence(i, clusters, &kcluster, &taxa, taxonomy)
|
idx := IndexSequence(i, clusters, &kcluster, taxa, taxonomy)
|
||||||
clusters[i].SetOBITagRefIndex(idx)
|
clusters[i].SetOBITagRefIndex(idx)
|
||||||
bar.Add(1)
|
bar.Add(1)
|
||||||
}
|
}
|
||||||
|
@ -2,10 +2,11 @@ package obirefidx
|
|||||||
|
|
||||||
import (
|
import (
|
||||||
"fmt"
|
"fmt"
|
||||||
log "github.com/sirupsen/logrus"
|
|
||||||
"sort"
|
"sort"
|
||||||
"sync"
|
"sync"
|
||||||
|
|
||||||
|
log "github.com/sirupsen/logrus"
|
||||||
|
|
||||||
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obioptions"
|
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obioptions"
|
||||||
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obiseq"
|
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obiseq"
|
||||||
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obistats"
|
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obistats"
|
||||||
@ -15,7 +16,7 @@ import (
|
|||||||
|
|
||||||
func GeomIndexSesquence(seqidx int,
|
func GeomIndexSesquence(seqidx int,
|
||||||
references obiseq.BioSequenceSlice,
|
references obiseq.BioSequenceSlice,
|
||||||
taxa *obitax.TaxonSet,
|
taxa *obitax.TaxonSlice,
|
||||||
taxo *obitax.Taxonomy) map[int]string {
|
taxo *obitax.Taxonomy) map[int]string {
|
||||||
|
|
||||||
sequence := references[seqidx]
|
sequence := references[seqidx]
|
||||||
@ -56,28 +57,28 @@ func GeomIndexSesquence(seqidx int,
|
|||||||
|
|
||||||
order := obiutils.Order(sort.IntSlice(seq_dist))
|
order := obiutils.Order(sort.IntSlice(seq_dist))
|
||||||
|
|
||||||
lca := (*taxa)[seqidx]
|
lca := taxa.Taxon(seqidx)
|
||||||
|
|
||||||
index := make(map[int]string)
|
index := make(map[int]string)
|
||||||
index[0] = fmt.Sprintf(
|
index[0] = fmt.Sprintf(
|
||||||
"%d@%s@%s",
|
"%s@%s",
|
||||||
lca.Taxid(),
|
lca.String(),
|
||||||
lca.ScientificName(),
|
lca.Rank(),
|
||||||
lca.Rank())
|
)
|
||||||
|
|
||||||
for _, o := range order {
|
for _, o := range order {
|
||||||
new_lca, _ := lca.LCA((*taxa)[o])
|
new_lca, _ := lca.LCA(taxa.Taxon(o))
|
||||||
if new_lca.Taxid() != lca.Taxid() {
|
if new_lca.SameAs(lca) {
|
||||||
lca = new_lca
|
lca = new_lca
|
||||||
index[int(seq_dist[o])] = fmt.Sprintf(
|
index[int(seq_dist[o])] = fmt.Sprintf(
|
||||||
"%d@%s@%s",
|
"%s@%s",
|
||||||
lca.Taxid(),
|
lca.String(),
|
||||||
lca.ScientificName(),
|
lca.Rank(),
|
||||||
lca.Rank())
|
)
|
||||||
}
|
|
||||||
|
|
||||||
if lca.Taxid() == 1 {
|
if lca.IsRoot() {
|
||||||
break
|
break
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -12,102 +12,171 @@ import (
|
|||||||
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obioptions"
|
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obioptions"
|
||||||
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obiseq"
|
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obiseq"
|
||||||
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obitax"
|
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obitax"
|
||||||
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obitools/obifind"
|
|
||||||
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obiutils"
|
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obiutils"
|
||||||
"github.com/schollz/progressbar/v3"
|
"github.com/schollz/progressbar/v3"
|
||||||
)
|
)
|
||||||
|
|
||||||
|
// IndexSequence processes a biological sequence and indexes it based on taxonomic information.
|
||||||
|
// It computes the least common ancestors (LCA) for the sequence and its references,
|
||||||
|
// evaluates common k-mers, and calculates differences in alignment scores.
|
||||||
|
//
|
||||||
|
// Parameters:
|
||||||
|
// - seqidx: The index of the sequence to process within the references slice.
|
||||||
|
// - references: A slice of biological sequences to compare against.
|
||||||
|
// - kmers: A pointer to a slice of k-mer tables used for common k-mer calculations.
|
||||||
|
// - taxa: A slice of taxonomic information corresponding to the sequences.
|
||||||
|
// - taxo: A taxonomy object used for LCA calculations.
|
||||||
|
//
|
||||||
|
// Returns:
|
||||||
|
//
|
||||||
|
// A map where the keys are integers representing alignment differences,
|
||||||
|
// and the values are strings formatted as "Taxon@Rank" indicating the taxonomic
|
||||||
|
// classification of the sequence based on the computed differences.
|
||||||
func IndexSequence(seqidx int,
|
func IndexSequence(seqidx int,
|
||||||
references obiseq.BioSequenceSlice,
|
references obiseq.BioSequenceSlice,
|
||||||
kmers *[]*obikmer.Table4mer,
|
kmers *[]*obikmer.Table4mer,
|
||||||
taxa *obitax.TaxonSet,
|
taxa *obitax.TaxonSlice,
|
||||||
taxo *obitax.Taxonomy) map[int]string {
|
taxo *obitax.Taxonomy) map[int]string {
|
||||||
|
|
||||||
|
// Retrieve the biological sequence at the specified index from the references slice.
|
||||||
sequence := references[seqidx]
|
sequence := references[seqidx]
|
||||||
|
seq_len := sequence.Len()
|
||||||
|
// Get the taxon corresponding to the current sequence index.
|
||||||
|
tseq := taxa.Taxon(seqidx)
|
||||||
|
|
||||||
|
// Get the taxonomic path for the current sequence.
|
||||||
|
pseq := tseq.Path()
|
||||||
|
path_len := pseq.Len()
|
||||||
|
|
||||||
|
// For each taxonomic ancestor in the path, a biosequence slice is created to store
|
||||||
|
// the reference sequences having that ancestor as their LCA with the current sequence.
|
||||||
|
refs := make(map[*obitax.TaxNode]*[]int, path_len)
|
||||||
|
|
||||||
|
for i := 0; i < path_len; i++ {
|
||||||
|
temp := make([]int, 0, 100)
|
||||||
|
refs[pseq.Taxon(i).Node] = &temp
|
||||||
|
}
|
||||||
|
|
||||||
|
// log.Infof("%s length of path: %d", sequence.Id(), len(refs))
|
||||||
|
|
||||||
|
n := taxa.Len()
|
||||||
|
lcaCache := make(map[*obitax.TaxNode]*obitax.TaxNode, n)
|
||||||
|
|
||||||
|
for i := 0; i < n; i++ {
|
||||||
|
taxon := taxa.Taxon(i) // Get the taxon at index i.
|
||||||
|
// Compute the LCA between the current taxon and the taxon of the sequence.
|
||||||
|
node, ok := lcaCache[taxon.Node]
|
||||||
|
if !ok {
|
||||||
|
lca, err := tseq.LCA(taxon)
|
||||||
|
if err != nil {
|
||||||
|
// Log a fatal error if the LCA computation fails, including the taxon details.
|
||||||
|
log.Fatalf("(%s,%s): %+v", tseq.String(), taxon.String(), err)
|
||||||
|
}
|
||||||
|
node = lca.Node
|
||||||
|
lcaCache[taxon.Node] = node
|
||||||
|
}
|
||||||
|
|
||||||
|
// log.Infof("%s Processing taxon: %s x %s -> %s", sequence.Id(), tseq.String(), taxon.String(), node.String(taxo.Code()))
|
||||||
|
|
||||||
|
// Append the current sequence to the LCA's reference sequence slice.
|
||||||
|
*refs[node] = append(*refs[node], i)
|
||||||
|
}
|
||||||
|
|
||||||
|
closest := make([]int, path_len)
|
||||||
|
|
||||||
|
closest[0] = 0
|
||||||
|
|
||||||
|
// Initialize a matrix to store alignment scores
|
||||||
var matrix []uint64
|
var matrix []uint64
|
||||||
|
|
||||||
lca := make(obitax.TaxonSet, len(references))
|
// log.Warnf("%s : %s", sequence.Id(), pseq.String())
|
||||||
tseq := (*taxa)[seqidx]
|
for idx_path := 1; idx_path < path_len; idx_path++ {
|
||||||
|
mini := -1
|
||||||
|
seqidcs := refs[pseq.Taxon(idx_path).Node]
|
||||||
|
|
||||||
for i, taxon := range *taxa {
|
ns := len(*seqidcs)
|
||||||
lca[i], _ = tseq.LCA(taxon)
|
|
||||||
}
|
|
||||||
|
|
||||||
cw := make([]int, len(references))
|
if ns > 0 {
|
||||||
sw := (*kmers)[seqidx]
|
|
||||||
for i, ref := range *kmers {
|
|
||||||
cw[i] = obikmer.Common4Mer(sw, ref)
|
|
||||||
}
|
|
||||||
|
|
||||||
ow := obiutils.Reverse(obiutils.IntOrder(cw), true)
|
shared := make([]int, ns)
|
||||||
pseq, _ := tseq.Path()
|
|
||||||
obiutils.Reverse(*pseq, true)
|
|
||||||
// score := make([]int, len(references))
|
|
||||||
mindiff := make([]int, len(*pseq))
|
|
||||||
/*
|
|
||||||
nseq := make([]int, len(*pseq))
|
|
||||||
nali := make([]int, len(*pseq))
|
|
||||||
nok := make([]int, len(*pseq))
|
|
||||||
nfast := make([]int, len(*pseq))
|
|
||||||
nfastok := make([]int, len(*pseq))
|
|
||||||
*/
|
|
||||||
|
|
||||||
lseq := sequence.Len()
|
for j, is := range *seqidcs {
|
||||||
|
shared[j] = obikmer.Common4Mer((*kmers)[seqidx], (*kmers)[is])
|
||||||
|
}
|
||||||
|
|
||||||
mini := -1
|
ow := obiutils.Reverse(obiutils.IntOrder(shared), true)
|
||||||
wordmin := 0
|
|
||||||
|
for _, order := range ow {
|
||||||
|
is := (*seqidcs)[order]
|
||||||
|
suject := references[is]
|
||||||
|
|
||||||
for i, ancestor := range *pseq {
|
|
||||||
for _, order := range ow {
|
|
||||||
if lca[order] == ancestor {
|
|
||||||
// nseq[i]++
|
|
||||||
if mini != -1 {
|
if mini != -1 {
|
||||||
wordmin = max(sequence.Len(), references[order].Len()) - 3 - 4*mini
|
wordmin := max(seq_len, suject.Len()) - 3 - 4*mini
|
||||||
}
|
|
||||||
|
// If the common k-mer count for the current order is less than the
|
||||||
if cw[order] < wordmin {
|
// minimum word length, break the loop.
|
||||||
break
|
if shared[order] < wordmin {
|
||||||
|
break
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Initialize variables for Longest Common Subsequence (LCS) score and alignment length.
|
||||||
lcs, alilength := -1, -1
|
lcs, alilength := -1, -1
|
||||||
errs := int(1e9)
|
errs := int(1e9) // Initialize errors to a large number.
|
||||||
if mini != -1 && mini <= 1 {
|
|
||||||
// nfast[i]++
|
// If mini is set and less than or equal to 1, perform a specific alignment.
|
||||||
d, _, _, _ := obialign.D1Or0(sequence, references[order])
|
if mini == 0 || mini == 1 {
|
||||||
if d >= 0 {
|
// Perform a specific alignment and get the distance.
|
||||||
errs = d
|
d, _, _, _ := obialign.D1Or0(sequence, suject)
|
||||||
// nfastok[i]++
|
if d >= 0 { // If the distance is valid (non-negative).
|
||||||
|
errs = d // Update errors with the distance.
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
// nali[i]++
|
// Perform a Fast LCS score calculation for the sequence and reference.
|
||||||
lcs, alilength = obialign.FastLCSScore(sequence, references[order], mini, &matrix)
|
lcs, alilength = obialign.FastLCSScore(sequence, suject, mini, &matrix)
|
||||||
if lcs >= 0 {
|
if lcs >= 0 { // If LCS score is valid (non-negative).
|
||||||
// nok[i]++
|
errs = alilength - lcs // Calculate errors based on alignment length.
|
||||||
errs = alilength - lcs
|
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Update mini with the minimum errors found.
|
||||||
if mini == -1 || errs < mini {
|
if mini == -1 || errs < mini {
|
||||||
mini = errs
|
mini = errs
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if mini == 0 {
|
||||||
|
// log.Warnf("%s: %s", sequence.Id(), sequence.String())
|
||||||
|
// log.Warnf("%s: %s", suject.Id(), suject.String())
|
||||||
|
break
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if mini == -1 {
|
||||||
|
log.Fatalf("(%s,%s): No alignment found.", sequence.Id(), pseq.Taxon(idx_path).String())
|
||||||
|
}
|
||||||
|
|
||||||
|
closest[idx_path] = mini
|
||||||
|
// insure than closest is strictly increasing
|
||||||
|
for k := idx_path - 1; k >= 0 && mini < closest[k]; k-- {
|
||||||
|
closest[k] = mini
|
||||||
|
// log.Warnf("(%s,%s) Smaller alignment found than previous (%d,%d). Resetting closest.", sequence.Id(), pseq.Taxon(idx_path).String(), mini, closest[k])
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
closest[idx_path] = seq_len
|
||||||
}
|
}
|
||||||
mindiff[i] = mini
|
|
||||||
}
|
}
|
||||||
|
|
||||||
obitag_index := make(map[int]string, len(*pseq))
|
obitag_index := make(map[int]string, pseq.Len())
|
||||||
|
|
||||||
old := lseq
|
// log.Warnf("(%s,%s): %v", sequence.Id(), pseq.Taxon(0).String(), closest)
|
||||||
for i, d := range mindiff {
|
for i, d := range closest {
|
||||||
if d != -1 && d < old {
|
if i < (len(closest)-1) && d < closest[i+1] {
|
||||||
current_taxid := (*pseq)[i]
|
current_taxon := pseq.Taxon(i)
|
||||||
obitag_index[d] = fmt.Sprintf(
|
obitag_index[d] = fmt.Sprintf(
|
||||||
"%d@%s@%s",
|
"%s@%s",
|
||||||
current_taxid.Taxid(),
|
current_taxon.String(),
|
||||||
current_taxid.ScientificName(),
|
current_taxon.Rank(),
|
||||||
current_taxid.Rank())
|
)
|
||||||
old = d
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -128,22 +197,21 @@ func IndexReferenceDB(iterator obiiter.IBioSequence) obiiter.IBioSequence {
|
|||||||
source, references := iterator.Load()
|
source, references := iterator.Load()
|
||||||
log.Infof("Done. Database contains %d sequences", len(references))
|
log.Infof("Done. Database contains %d sequences", len(references))
|
||||||
|
|
||||||
taxo, error := obifind.CLILoadSelectedTaxonomy()
|
taxo, error := obioptions.CLILoadSelectedTaxonomy()
|
||||||
if error != nil {
|
if error != nil {
|
||||||
log.Panicln(error)
|
log.Panicln(error)
|
||||||
}
|
}
|
||||||
|
|
||||||
log.Infoln("Indexing sequence taxids...")
|
log.Infoln("Indexing sequence taxids...")
|
||||||
|
|
||||||
taxa := make(
|
n := len(references)
|
||||||
obitax.TaxonSet,
|
taxa := taxo.NewTaxonSlice(n, n)
|
||||||
len(references))
|
|
||||||
|
|
||||||
j := 0
|
j := 0
|
||||||
for i, seq := range references {
|
for i, seq := range references {
|
||||||
taxon, err := taxo.Taxon(seq.Taxid())
|
taxon := seq.Taxon(taxo)
|
||||||
if err == nil {
|
if taxon != nil {
|
||||||
taxa[j] = taxon
|
taxa.Set(j, taxon)
|
||||||
references[j] = references[i]
|
references[j] = references[i]
|
||||||
j++
|
j++
|
||||||
}
|
}
|
||||||
@ -198,7 +266,7 @@ func IndexReferenceDB(iterator obiiter.IBioSequence) obiiter.IBioSequence {
|
|||||||
for l := range limits {
|
for l := range limits {
|
||||||
sl := obiseq.MakeBioSequenceSlice()
|
sl := obiseq.MakeBioSequenceSlice()
|
||||||
for i := l[0]; i < l[1]; i++ {
|
for i := l[0]; i < l[1]; i++ {
|
||||||
idx := IndexSequence(i, references, &refcounts, &taxa, taxo)
|
idx := IndexSequence(i, references, &refcounts, taxa, taxo)
|
||||||
iref := references[i].Copy()
|
iref := references[i].Copy()
|
||||||
iref.SetOBITagRefIndex(idx)
|
iref.SetOBITagRefIndex(idx)
|
||||||
sl = append(sl, iref)
|
sl = append(sl, iref)
|
||||||
|
@ -1,8 +1,8 @@
|
|||||||
package obirefidx
|
package obirefidx
|
||||||
|
|
||||||
import (
|
import (
|
||||||
|
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obioptions"
|
||||||
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obitools/obiconvert"
|
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obitools/obiconvert"
|
||||||
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obitools/obifind"
|
|
||||||
"github.com/DavidGamba/go-getoptions"
|
"github.com/DavidGamba/go-getoptions"
|
||||||
)
|
)
|
||||||
|
|
||||||
@ -10,5 +10,5 @@ import (
|
|||||||
// the obiuniq command
|
// the obiuniq command
|
||||||
func OptionSet(options *getoptions.GetOpt) {
|
func OptionSet(options *getoptions.GetOpt) {
|
||||||
obiconvert.OptionSet(options)
|
obiconvert.OptionSet(options)
|
||||||
obifind.LoadTaxonomyOptionSet(options, true, false)
|
obioptions.LoadTaxonomyOptionSet(options, true, false)
|
||||||
}
|
}
|
||||||
|
@ -48,19 +48,18 @@ func ExtractLandmarkSeqs(references *obiseq.BioSequenceSlice) *obiseq.BioSequenc
|
|||||||
// - taxonomy: a pointer to a Taxonomy object.
|
// - taxonomy: a pointer to a Taxonomy object.
|
||||||
//
|
//
|
||||||
// The function returns a pointer to a TaxonSet, which is a set of taxa.
|
// The function returns a pointer to a TaxonSet, which is a set of taxa.
|
||||||
func ExtractTaxonSet(references *obiseq.BioSequenceSlice, taxonomy *obitax.Taxonomy) *obitax.TaxonSet {
|
func ExtractTaxonSet(references *obiseq.BioSequenceSlice, taxonomy *obitax.Taxonomy) *obitax.TaxonSlice {
|
||||||
var err error
|
taxa := taxonomy.NewTaxonSlice(references.Len(), references.Len())
|
||||||
taxa := make(obitax.TaxonSet, len(*references))
|
|
||||||
|
|
||||||
for i, ref := range *references {
|
for i, ref := range *references {
|
||||||
taxid := ref.Taxid()
|
taxon := ref.Taxon(taxonomy)
|
||||||
taxa[i], err = taxonomy.Taxon(taxid)
|
if taxon == nil {
|
||||||
if err != nil {
|
log.Panicf("%s: Cannot get the taxon %s in %s", ref.Id(), ref.Taxid(), taxonomy.Name())
|
||||||
log.Panicf("Taxid %d, for sequence %s not found in taxonomy", taxid, ref.Id())
|
|
||||||
}
|
}
|
||||||
|
taxa.Set(i, taxon)
|
||||||
}
|
}
|
||||||
|
|
||||||
return &taxa
|
return taxa
|
||||||
}
|
}
|
||||||
|
|
||||||
// MapOnLandmarkSequences calculates the coordinates of landmarks on a given sequence.
|
// MapOnLandmarkSequences calculates the coordinates of landmarks on a given sequence.
|
||||||
@ -149,29 +148,26 @@ func FindGeomClosest(sequence *obiseq.BioSequence,
|
|||||||
func GeomIdentify(sequence *obiseq.BioSequence,
|
func GeomIdentify(sequence *obiseq.BioSequence,
|
||||||
landmarks *obiseq.BioSequenceSlice,
|
landmarks *obiseq.BioSequenceSlice,
|
||||||
references *obiseq.BioSequenceSlice,
|
references *obiseq.BioSequenceSlice,
|
||||||
taxa *obitax.TaxonSet,
|
taxa *obitax.TaxonSlice,
|
||||||
taxo *obitax.Taxonomy,
|
taxo *obitax.Taxonomy,
|
||||||
buffer *[]uint64) *obiseq.BioSequence {
|
buffer *[]uint64) *obiseq.BioSequence {
|
||||||
best_seq, min_dist, best_id, query_location, matches := FindGeomClosest(sequence, landmarks, references, buffer)
|
best_seq, min_dist, best_id, query_location, matches := FindGeomClosest(sequence, landmarks, references, buffer)
|
||||||
|
|
||||||
taxon := (*obitax.TaxNode)(nil)
|
taxon := (*obitax.Taxon)(nil)
|
||||||
var err error
|
var err error
|
||||||
|
|
||||||
if best_id > 0.5 {
|
if best_id > 0.5 {
|
||||||
taxid, _, _ := MatchDistanceIndex(min_dist, (*matches)[0].OBITagGeomRefIndex())
|
taxon = MatchDistanceIndex(taxo, min_dist, (*matches)[0].OBITagGeomRefIndex())
|
||||||
taxon, _ = taxo.Taxon(taxid)
|
|
||||||
for i := 1; i < len(*matches); i++ {
|
for i := 1; i < len(*matches); i++ {
|
||||||
taxid, _, _ := MatchDistanceIndex(min_dist, (*matches)[i].OBITagGeomRefIndex())
|
newTaxon := MatchDistanceIndex(taxo, min_dist, (*matches)[i].OBITagGeomRefIndex())
|
||||||
newTaxon, _ := taxo.Taxon(taxid)
|
|
||||||
taxon, err = newTaxon.LCA(taxon)
|
taxon, err = newTaxon.LCA(taxon)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
log.Panicf("LCA error: %v", err)
|
log.Panicf("LCA error: %v", err)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
sequence.SetTaxid(taxon.Taxid())
|
sequence.SetTaxon(taxon)
|
||||||
} else {
|
} else {
|
||||||
taxon, _ = taxo.Taxon(1)
|
sequence.SetTaxon(taxo.Root())
|
||||||
sequence.SetTaxid(1)
|
|
||||||
}
|
}
|
||||||
|
|
||||||
sequence.SetAttribute("scientific_name", taxon.ScientificName())
|
sequence.SetAttribute("scientific_name", taxon.ScientificName())
|
||||||
|
@ -2,8 +2,6 @@ package obitag
|
|||||||
|
|
||||||
import (
|
import (
|
||||||
"sort"
|
"sort"
|
||||||
"strconv"
|
|
||||||
"strings"
|
|
||||||
|
|
||||||
log "github.com/sirupsen/logrus"
|
log "github.com/sirupsen/logrus"
|
||||||
"golang.org/x/exp/maps"
|
"golang.org/x/exp/maps"
|
||||||
@ -29,7 +27,9 @@ import (
|
|||||||
// - taxid: The taxid associated with the matched distance.
|
// - taxid: The taxid associated with the matched distance.
|
||||||
// - rank: The rank associated with the matched distance.
|
// - rank: The rank associated with the matched distance.
|
||||||
// - scientificName: The scientific name associated with the matched distance.
|
// - scientificName: The scientific name associated with the matched distance.
|
||||||
func MatchDistanceIndex(distance int, distanceIdx map[int]string) (int, string, string) {
|
func MatchDistanceIndex(taxonomy *obitax.Taxonomy, distance int, distanceIdx map[int]string) *obitax.Taxon {
|
||||||
|
taxonomy = taxonomy.OrDefault(true)
|
||||||
|
|
||||||
keys := maps.Keys(distanceIdx)
|
keys := maps.Keys(distanceIdx)
|
||||||
slices.Sort(keys)
|
slices.Sort(keys)
|
||||||
|
|
||||||
@ -37,24 +37,20 @@ func MatchDistanceIndex(distance int, distanceIdx map[int]string) (int, string,
|
|||||||
return distance <= keys[i]
|
return distance <= keys[i]
|
||||||
})
|
})
|
||||||
|
|
||||||
var taxid int
|
var taxon *obitax.Taxon
|
||||||
var rank string
|
|
||||||
var scientificName string
|
|
||||||
|
|
||||||
if i == len(keys) || distance > keys[len(keys)-1] {
|
if i == len(keys) || distance > keys[len(keys)-1] {
|
||||||
taxid = 1
|
taxon = taxonomy.Root()
|
||||||
rank = "no rank"
|
|
||||||
scientificName = "root"
|
|
||||||
} else {
|
} else {
|
||||||
parts := strings.Split(distanceIdx[keys[i]], "@")
|
taxon = taxonomy.Taxon(distanceIdx[keys[i]])
|
||||||
taxid, _ = strconv.Atoi(parts[0])
|
if taxon == nil {
|
||||||
rank = parts[1]
|
log.Panicf("Cannot identify taxon %s in %s", distanceIdx[keys[i]], taxonomy.Name())
|
||||||
scientificName = parts[2]
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// log.Info("taxid:", taxid, " rank:", rank, " scientificName:", scientificName)
|
// log.Info("taxid:", taxid, " rank:", rank, " scientificName:", scientificName)
|
||||||
|
|
||||||
return taxid, rank, scientificName
|
return taxon
|
||||||
}
|
}
|
||||||
|
|
||||||
// FindClosests finds the closest bio sequence from a given sequence and a slice of reference sequences.
|
// FindClosests finds the closest bio sequence from a given sequence and a slice of reference sequences.
|
||||||
@ -169,12 +165,12 @@ func FindClosests(sequence *obiseq.BioSequence,
|
|||||||
func Identify(sequence *obiseq.BioSequence,
|
func Identify(sequence *obiseq.BioSequence,
|
||||||
references obiseq.BioSequenceSlice,
|
references obiseq.BioSequenceSlice,
|
||||||
refcounts []*obikmer.Table4mer,
|
refcounts []*obikmer.Table4mer,
|
||||||
taxa obitax.TaxonSet,
|
taxa *obitax.TaxonSlice,
|
||||||
taxo *obitax.Taxonomy,
|
taxo *obitax.Taxonomy,
|
||||||
runExact bool) *obiseq.BioSequence {
|
runExact bool) *obiseq.BioSequence {
|
||||||
|
|
||||||
bests, differences, identity, bestmatch, seqidxs := FindClosests(sequence, references, refcounts, runExact)
|
bests, differences, identity, bestmatch, seqidxs := FindClosests(sequence, references, refcounts, runExact)
|
||||||
taxon := (*obitax.TaxNode)(nil)
|
taxon := (*obitax.Taxon)(nil)
|
||||||
|
|
||||||
if identity >= 0.5 && differences >= 0 {
|
if identity >= 0.5 && differences >= 0 {
|
||||||
newidx := 0
|
newidx := 0
|
||||||
@ -183,56 +179,24 @@ func Identify(sequence *obiseq.BioSequence,
|
|||||||
if idx == nil {
|
if idx == nil {
|
||||||
// log.Debugln("Need of indexing")
|
// log.Debugln("Need of indexing")
|
||||||
newidx++
|
newidx++
|
||||||
idx = obirefidx.IndexSequence(seqidxs[i], references, &refcounts, &taxa, taxo)
|
idx = obirefidx.IndexSequence(seqidxs[i], references, &refcounts, taxa, taxo)
|
||||||
references[seqidxs[i]].SetOBITagRefIndex(idx)
|
references[seqidxs[i]].SetOBITagRefIndex(idx)
|
||||||
log.Debugln(references[seqidxs[i]].Id(), idx)
|
log.Debugln(references[seqidxs[i]].Id(), idx)
|
||||||
}
|
}
|
||||||
|
|
||||||
d := differences
|
d := differences
|
||||||
identification, ok := idx[d]
|
identification, ok := idx[d]
|
||||||
found := false
|
|
||||||
var parts []string
|
|
||||||
|
|
||||||
/*
|
for !ok && d >= 0 {
|
||||||
Here is an horrible hack for xprize challence.
|
d--
|
||||||
With Euka01 the part[0] was equal to "" for at
|
identification, ok = idx[d]
|
||||||
least a sequence consensus. Which is not normal.
|
|
||||||
|
|
||||||
TO BE CHECKED AND CORRECTED
|
|
||||||
|
|
||||||
The problem seems related to idx that doesn't have
|
|
||||||
a 0 distance
|
|
||||||
*/
|
|
||||||
for !found && d >= 0 {
|
|
||||||
for !ok && d >= 0 {
|
|
||||||
identification, ok = idx[d]
|
|
||||||
d--
|
|
||||||
}
|
|
||||||
|
|
||||||
parts = strings.Split(identification, "@")
|
|
||||||
|
|
||||||
found = parts[0] != ""
|
|
||||||
if !found {
|
|
||||||
log.Debugln("Problem in identification line : ", best.Id(), "idx:", idx, "distance:", d)
|
|
||||||
for !ok && d <= 1000 {
|
|
||||||
identification, ok = idx[d]
|
|
||||||
d++
|
|
||||||
}
|
|
||||||
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
match_taxid, err := strconv.Atoi(parts[0])
|
if !ok {
|
||||||
|
log.Panic("Problem in identification line : ", best.Id(), "idx:", idx, "distance:", d)
|
||||||
if err != nil {
|
|
||||||
log.Panicln("Cannot extract taxid from :", identification)
|
|
||||||
}
|
}
|
||||||
|
|
||||||
match_taxon, err := taxo.Taxon(match_taxid)
|
match_taxon := taxo.Taxon(identification)
|
||||||
|
|
||||||
if err != nil {
|
|
||||||
log.Panicln("Cannot find taxon corresponding to taxid :", match_taxid)
|
|
||||||
}
|
|
||||||
|
|
||||||
if taxon != nil {
|
if taxon != nil {
|
||||||
taxon, _ = taxon.LCA(match_taxon)
|
taxon, _ = taxon.LCA(match_taxon)
|
||||||
@ -241,16 +205,16 @@ func Identify(sequence *obiseq.BioSequence,
|
|||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
log.Debugln(sequence.Id(), "Best matches:", len(bests), "New index:", newidx)
|
log.Debugln(sequence.Id(), "Best matches:", len(bests), "New index:", newidx)
|
||||||
|
|
||||||
sequence.SetTaxid(taxon.Taxid())
|
sequence.SetTaxon(taxon)
|
||||||
|
|
||||||
} else {
|
} else {
|
||||||
taxon, _ = taxo.Taxon(1)
|
taxon = taxo.Root()
|
||||||
sequence.SetTaxid(1)
|
sequence.SetTaxon(taxon)
|
||||||
}
|
}
|
||||||
|
|
||||||
sequence.SetAttribute("scientific_name", taxon.ScientificName())
|
|
||||||
sequence.SetAttribute("obitag_rank", taxon.Rank())
|
sequence.SetAttribute("obitag_rank", taxon.Rank())
|
||||||
sequence.SetAttribute("obitag_bestid", identity)
|
sequence.SetAttribute("obitag_bestid", identity)
|
||||||
sequence.SetAttribute("obitag_bestmatch", bestmatch)
|
sequence.SetAttribute("obitag_bestmatch", bestmatch)
|
||||||
@ -262,7 +226,7 @@ func Identify(sequence *obiseq.BioSequence,
|
|||||||
|
|
||||||
func IdentifySeqWorker(references obiseq.BioSequenceSlice,
|
func IdentifySeqWorker(references obiseq.BioSequenceSlice,
|
||||||
refcounts []*obikmer.Table4mer,
|
refcounts []*obikmer.Table4mer,
|
||||||
taxa obitax.TaxonSet,
|
taxa *obitax.TaxonSlice,
|
||||||
taxo *obitax.Taxonomy,
|
taxo *obitax.Taxonomy,
|
||||||
runExact bool) obiseq.SeqWorker {
|
runExact bool) obiseq.SeqWorker {
|
||||||
return func(sequence *obiseq.BioSequence) (obiseq.BioSequenceSlice, error) {
|
return func(sequence *obiseq.BioSequence) (obiseq.BioSequenceSlice, error) {
|
||||||
@ -279,23 +243,21 @@ func CLIAssignTaxonomy(iterator obiiter.IBioSequence,
|
|||||||
[]*obikmer.Table4mer,
|
[]*obikmer.Table4mer,
|
||||||
len(references))
|
len(references))
|
||||||
|
|
||||||
taxa := make(obitax.TaxonSet,
|
taxa := taxo.NewTaxonSlice(references.Len(), references.Len())
|
||||||
len(references))
|
|
||||||
|
|
||||||
buffer := make([]byte, 0, 1000)
|
buffer := make([]byte, 0, 1000)
|
||||||
|
|
||||||
var err error
|
|
||||||
j := 0
|
j := 0
|
||||||
for _, seq := range references {
|
for _, seq := range references {
|
||||||
references[j] = seq
|
references[j] = seq
|
||||||
refcounts[j] = obikmer.Count4Mer(seq, &buffer, nil)
|
refcounts[j] = obikmer.Count4Mer(seq, &buffer, nil)
|
||||||
taxa[j], err = taxo.Taxon(seq.Taxid())
|
taxon := seq.Taxon(taxo)
|
||||||
if err == nil {
|
taxa.Set(j, taxon)
|
||||||
|
if taxon != nil {
|
||||||
j++
|
j++
|
||||||
} else {
|
} else {
|
||||||
log.Warnf("Taxid %d is not described in the taxonomy."+
|
log.Warnf("Taxid %d is not described in the taxonomy %s."+
|
||||||
" Sequence %s is discared from the reference database",
|
" Sequence %s is discared from the reference database",
|
||||||
seq.Taxid(), seq.Id())
|
seq.Taxid(), taxo.Name(), seq.Id())
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -8,7 +8,6 @@ import (
|
|||||||
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obioptions"
|
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obioptions"
|
||||||
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obiseq"
|
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obiseq"
|
||||||
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obitools/obiconvert"
|
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obitools/obiconvert"
|
||||||
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obitools/obifind"
|
|
||||||
"github.com/DavidGamba/go-getoptions"
|
"github.com/DavidGamba/go-getoptions"
|
||||||
)
|
)
|
||||||
|
|
||||||
@ -42,7 +41,7 @@ func TagOptionSet(options *getoptions.GetOpt) {
|
|||||||
// the obiuniq command
|
// the obiuniq command
|
||||||
func OptionSet(options *getoptions.GetOpt) {
|
func OptionSet(options *getoptions.GetOpt) {
|
||||||
obiconvert.OptionSet(options)
|
obiconvert.OptionSet(options)
|
||||||
obifind.LoadTaxonomyOptionSet(options, true, false)
|
obioptions.LoadTaxonomyOptionSet(options, true, false)
|
||||||
TagOptionSet(options)
|
TagOptionSet(options)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -1,12 +1,158 @@
|
|||||||
package obiutils
|
package obiutils
|
||||||
|
|
||||||
import "unsafe"
|
import (
|
||||||
|
"fmt"
|
||||||
|
"unsafe"
|
||||||
|
)
|
||||||
|
|
||||||
|
type AsciiSet [256]bool
|
||||||
|
|
||||||
|
var AsciiSpaceSet = AsciiSetFromString("\t\n\v\f\r ")
|
||||||
|
var AsciiDigitSet = AsciiSetFromString("0123456789")
|
||||||
|
var AsciiUpperSet = AsciiSetFromString("ABCDEFGHIJKLMNOPQRSTUVWXYZ")
|
||||||
|
var AsciiLowerSet = AsciiSetFromString("abcdefghijklmnopqrstuvwxyz")
|
||||||
|
var AsciiAlphaSet = AsciiUpperSet.Union(AsciiLowerSet)
|
||||||
|
var AsciiAlphaNumSet = AsciiAlphaSet.Union(AsciiDigitSet)
|
||||||
|
|
||||||
|
// UnsafeStringFromBytes converts a byte slice into a string without making a copy of the data.
|
||||||
|
// This function is considered unsafe because it directly manipulates memory and does not
|
||||||
|
// perform any checks on the byte slice's contents. It should be used with caution.
|
||||||
|
//
|
||||||
|
// Parameters:
|
||||||
|
// - data: A byte slice that contains the data to be converted to a string.
|
||||||
|
//
|
||||||
|
// Returns:
|
||||||
|
//
|
||||||
|
// A string representation of the byte slice. If the input byte slice is empty,
|
||||||
|
// an empty string is returned.
|
||||||
func UnsafeStringFromBytes(data []byte) string {
|
func UnsafeStringFromBytes(data []byte) string {
|
||||||
if len(data) > 0 {
|
if len(data) > 0 {
|
||||||
|
// Convert the byte slice to a string using unsafe operations.
|
||||||
s := unsafe.String(unsafe.SliceData(data), len(data))
|
s := unsafe.String(unsafe.SliceData(data), len(data))
|
||||||
return s
|
return s
|
||||||
}
|
}
|
||||||
|
|
||||||
return ""
|
return "" // Return an empty string if the input slice is empty.
|
||||||
|
}
|
||||||
|
|
||||||
|
func AsciiSetFromString(s string) AsciiSet {
|
||||||
|
r := [256]bool{}
|
||||||
|
for _, c := range s {
|
||||||
|
r[c] = true
|
||||||
|
}
|
||||||
|
return r
|
||||||
|
}
|
||||||
|
|
||||||
|
func (r *AsciiSet) Contains(c byte) bool {
|
||||||
|
return r[c]
|
||||||
|
}
|
||||||
|
|
||||||
|
func (r *AsciiSet) Union(s AsciiSet) AsciiSet {
|
||||||
|
for i := 0; i < 256; i++ {
|
||||||
|
s[i] = r[i] || s[i]
|
||||||
|
}
|
||||||
|
|
||||||
|
return s
|
||||||
|
}
|
||||||
|
|
||||||
|
func (r *AsciiSet) Intersect(s AsciiSet) AsciiSet {
|
||||||
|
for i := 0; i < 256; i++ {
|
||||||
|
s[i] = r[i] && s[i]
|
||||||
|
}
|
||||||
|
|
||||||
|
return s
|
||||||
|
}
|
||||||
|
|
||||||
|
// FirstWord extracts the first word from a given string.
|
||||||
|
// A word is defined as a sequence of non-space characters.
|
||||||
|
// It ignores leading whitespace and stops at the first whitespace character encountered.
|
||||||
|
//
|
||||||
|
// Parameters:
|
||||||
|
// - s: The input string from which the first word is to be extracted.
|
||||||
|
//
|
||||||
|
// Returns:
|
||||||
|
//
|
||||||
|
// A string containing the first word found in the input string. If the input string
|
||||||
|
// is empty or contains only whitespace, an empty string is returned.
|
||||||
|
func FirstWord(s string) string {
|
||||||
|
// Fast path for ASCII: look for the first ASCII non-space byte
|
||||||
|
start := 0
|
||||||
|
for ; start < len(s); start++ {
|
||||||
|
c := s[start]
|
||||||
|
if !AsciiSpaceSet.Contains(c) {
|
||||||
|
break
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
stop := start
|
||||||
|
for ; stop < len(s); stop++ {
|
||||||
|
c := s[stop]
|
||||||
|
if AsciiSpaceSet.Contains(c) {
|
||||||
|
break
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return s[start:stop]
|
||||||
|
}
|
||||||
|
|
||||||
|
// FirstRestrictedWord extracts the first word from a given string while enforcing character restrictions.
|
||||||
|
// A word is defined as a sequence of non-space characters. The function checks each character
|
||||||
|
// against the provided restriction array, which indicates whether a character is allowed.
|
||||||
|
//
|
||||||
|
// Parameters:
|
||||||
|
// - s: The input string from which the first restricted word is to be extracted.
|
||||||
|
// - restriction: A boolean array of size 256 where each index represents a character's ASCII value.
|
||||||
|
// If restriction[c] is false, the character c is not allowed in the word.
|
||||||
|
//
|
||||||
|
// Returns:
|
||||||
|
//
|
||||||
|
// A string containing the first word found in the input string that does not contain any restricted characters.
|
||||||
|
// If a restricted character is found, an error is returned indicating the invalid character.
|
||||||
|
// If the input string is empty or contains only whitespace, an empty string is returned with no error.
|
||||||
|
func (restriction *AsciiSet) FirstWord(s string) (string, error) {
|
||||||
|
// Fast path for ASCII: look for the first ASCII non-space byte
|
||||||
|
start := 0
|
||||||
|
for ; start < len(s); start++ {
|
||||||
|
c := s[start]
|
||||||
|
if !AsciiSpaceSet.Contains(c) {
|
||||||
|
break
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
stop := start
|
||||||
|
for ; stop < len(s); stop++ {
|
||||||
|
c := s[stop]
|
||||||
|
if AsciiSpaceSet.Contains(c) {
|
||||||
|
break
|
||||||
|
}
|
||||||
|
if !restriction.Contains(c) {
|
||||||
|
return "", fmt.Errorf("invalid character '%c' in string: %s", c, s)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return s[start:stop], nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func (r *AsciiSet) TrimLeft(s string) string {
|
||||||
|
i := 0
|
||||||
|
for ; i < len(s); i++ {
|
||||||
|
c := s[i]
|
||||||
|
if !AsciiSpaceSet.Contains(c) {
|
||||||
|
break
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return s[i:]
|
||||||
|
}
|
||||||
|
|
||||||
|
func SplitInTwo(s string, sep byte) (string, string) {
|
||||||
|
i := 0
|
||||||
|
for ; i < len(s); i++ {
|
||||||
|
c := s[i]
|
||||||
|
if c == sep {
|
||||||
|
break
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if i == len(s) {
|
||||||
|
return s, ""
|
||||||
|
}
|
||||||
|
return s[:i], s[i+1:]
|
||||||
}
|
}
|
||||||
|
Reference in New Issue
Block a user