From 9bf006af93801a1dfcbe81ce957c5f1c34256a7d Mon Sep 17 00:00:00 2001 From: Eric Coissac Date: Sun, 27 Aug 2023 14:58:55 +0200 Subject: [PATCH] A first prototype for the space of sequences Former-commit-id: 07dc6ef044b5b6a6fb45dc2acb01dffe71a96195 --- pkg/obiseq/attributes.go | 287 ++++++++++++++++++++++-- pkg/obiseq/biosequence.go | 13 ++ pkg/obistats/kmeans.go | 3 +- pkg/obitools/obifind/options.go | 5 +- pkg/obitools/obilandmark/obilandmark.go | 47 +++- pkg/obitools/obilandmark/options.go | 19 +- pkg/obitools/obirefidx/geomindexing.go | 79 +++++++ pkg/obitools/obirefidx/obirefidx.go | 7 +- pkg/obitools/obitag/obigeomtag.go | 209 +++++++++++++++++ pkg/obitools/obitag/obitag.go | 87 ++++++- pkg/obitools/obitag/options.go | 9 + pkg/obiutils/array.go | 49 +++- pkg/obiutils/bytes.go | 14 +- pkg/obiutils/cast_interface.go | 54 +++++ pkg/obiutils/goutils.go | 157 ++++++++----- pkg/obiutils/ranks.go | 4 + pkg/obiutils/slices.go | 43 +++- 17 files changed, 969 insertions(+), 117 deletions(-) create mode 100644 pkg/obitools/obirefidx/geomindexing.go create mode 100644 pkg/obitools/obitag/obigeomtag.go create mode 100644 pkg/obiutils/cast_interface.go diff --git a/pkg/obiseq/attributes.go b/pkg/obiseq/attributes.go index c9be3d6..e5696c4 100644 --- a/pkg/obiseq/attributes.go +++ b/pkg/obiseq/attributes.go @@ -8,35 +8,67 @@ import ( log "github.com/sirupsen/logrus" ) +// HasAttribute checks if the BioSequence has the specified attribute. +// +// Parameters: +// - key: a string representing the attribute key to check. +// +// Returns: +// - a boolean indicating whether the BioSequence has the attribute. func (s *BioSequence) HasAttribute(key string) bool { ok := s.annotations != nil if ok { + defer s.AnnotationsUnlock() + s.AnnotationsLock() _, ok = s.annotations[key] } return ok } -// A method that returns the value of the key in the annotation map. +// GetAttribute returns the value associated with the given key in the BioSequence's annotations map and a boolean indicating whether the key exists. +// +// Parameters: +// - key: The key to look up in the annotations map. +// +// Returns: +// - val: The value associated with the given key. +// - ok: A boolean indicating whether the key exists in the annotations map. func (s *BioSequence) GetAttribute(key string) (interface{}, bool) { var val interface{} ok := s.annotations != nil if ok { + defer s.AnnotationsUnlock() + s.AnnotationsLock() val, ok = s.annotations[key] } return val, ok } -// A method that sets the value of the key in the annotation map. +// SetAttribute sets the value of a given key in the BioSequence annotations. +// +// Parameters: +// - key: the key to set the value for. +// - value: the value to set for the given key. func (s *BioSequence) SetAttribute(key string, value interface{}) { annot := s.Annotations() + + defer s.AnnotationsUnlock() + s.AnnotationsLock() annot[key] = value } -// A method that returns the value of the key in the annotation map. +// GetIntAttribute returns an integer attribute value based on the provided key. +// +// It takes a key as a parameter and returns the corresponding integer value along +// with a boolean value indicating whether the key exists in the BioSequence, and if it can be converted to an integer. +// +// If the stored values is convertible to an integer, but was not stored as an integer, then the value will be stored as an integer. +// +// The returned boolean value will be true if the key exists, and false otherwise. func (s *BioSequence) GetIntAttribute(key string) (int, bool) { var val int var err error @@ -44,19 +76,39 @@ func (s *BioSequence) GetIntAttribute(key string) (int, bool) { v, ok := s.GetAttribute(key) if ok { - val, err = obiutils.InterfaceToInt(v) - ok = err == nil + val, ok = v.(int) + if !ok { + val, err = obiutils.InterfaceToInt(v) + ok = err == nil + if ok { + s.SetAttribute(key, val) + } + } } return val, ok } -// Deleting the key from the annotation map. +// DeleteAttribute deletes the attribute with the given key from the BioSequence. +// +// Parameters: +// - key: the key of the attribute to be deleted. +// +// No return value. func (s *BioSequence) DeleteAttribute(key string) { - delete(s.Annotations(), key) + if s.annotations != nil { + defer s.AnnotationsUnlock() + s.AnnotationsLock() + delete(s.annotations, key) + } } -// Renaming the key in the annotation map. +// RenameAttribute renames an attribute in the BioSequence. +// +// It takes two string parameters: +// - newName: the new name for the attribute. +// - oldName: the old name of the attribute to be renamed. +// It does not return anything. func (s *BioSequence) RenameAttribute(newName, oldName string) { val, ok := s.GetAttribute(oldName) @@ -66,7 +118,15 @@ func (s *BioSequence) RenameAttribute(newName, oldName string) { } } -// A method that returns the value of the key in the annotation map. +// GetNumericAttribute returns the numeric value of the specified attribute key +// in the BioSequence object. +// +// Parameters: +// - key: the attribute key to retrieve the numeric value for. +// +// Returns: +// - float64: the numeric value of the attribute key. +// - bool: indicates whether the attribute key exists and can be converted to a float64. func (s *BioSequence) GetNumericAttribute(key string) (float64, bool) { var val float64 var err error @@ -81,7 +141,14 @@ func (s *BioSequence) GetNumericAttribute(key string) (float64, bool) { return val, ok } -// A method that returns the value of the key in the annotation map. +// GetStringAttribute retrieves the string value of a specific attribute from the BioSequence. +// +// Parameters: +// - key: the key of the attribute to retrieve. +// +// Returns: +// - string: the value of the attribute as a string. +// - bool: a boolean indicating whether the attribute was found or not. func (s *BioSequence) GetStringAttribute(key string) (string, bool) { var val string v, ok := s.GetAttribute(key) @@ -93,7 +160,14 @@ func (s *BioSequence) GetStringAttribute(key string) (string, bool) { return val, ok } -// A method that returns the value of the key in the annotation map. +// GetBoolAttribute returns the boolean attribute value associated with the given key in the BioSequence object. +// +// Parameters: +// - key: The key to retrieve the boolean attribute value. +// +// Return: +// - val: The boolean attribute value associated with the given key and can be converted to a boolean. +// - ok: A boolean value indicating whether the attribute value was successfully retrieved. func (s *BioSequence) GetBoolAttribute(key string) (bool, bool) { var val bool var err error @@ -108,6 +182,14 @@ func (s *BioSequence) GetBoolAttribute(key string) (bool, bool) { return val, ok } +// GetIntMap returns a map[string]int and a boolean value indicating whether the key exists in the BioSequence. +// +// Parameters: +// - key: The key to retrieve the value from the BioSequence. +// +// Returns: +// - val: A map[string]int representing the value associated with the key and can be converted to a map[string]int. +// - ok: A boolean value indicating whether the key exists in the BioSequence. func (s *BioSequence) GetIntMap(key string) (map[string]int, bool) { var val map[string]int var err error @@ -122,7 +204,41 @@ func (s *BioSequence) GetIntMap(key string) (map[string]int, bool) { return val, ok } -// Returning the number of times the sequence has been observed. +// GetIntSlice returns the integer slice value associated with the given key in the BioSequence object. +// +// Parameters: +// - key: The key used to retrieve the integer slice value. +// +// Returns: +// - []int: The integer slice value associated with the given key. +// - bool: A boolean indicating whether the key exists in the BioSequence object. +func (s *BioSequence) GetIntSlice(key string) ([]int, bool) { + var val []int + var err error + + v, ok := s.GetAttribute(key) + + if ok { + val, ok = v.([]int) + if !ok { + val, err = obiutils.InterfaceToIntSlice(v) + ok = err == nil + if ok { + s.SetAttribute(key, val) + } + } + } + + return val, ok +} + +// Count returns the value of the "count" attribute of the BioSequence. +// +// The count of a sequence is the number of times it has been observed in the dataset. +// It is represented in the sequence header as the "count" attribute. +// If the attribute is not found, the function returns 1 as the default count. +// +// It returns an integer representing the count value. func (s *BioSequence) Count() int { count, ok := s.GetIntAttribute("count") @@ -133,13 +249,27 @@ func (s *BioSequence) Count() int { return count } -// Setting the number of times the sequence has been observed. +// SetCount sets the count of the BioSequence. +// +// The count of a sequence is the number of times it has been observed in the dataset. +// The value of the "count" attribute is set to the new count, event if the new count is 1. +// If the count is less than 1, the count is set to 1. +// +// count - the new count to set. func (s *BioSequence) SetCount(count int) { - annot := s.Annotations() - annot["count"] = count + if count < 1 { + count = 1 + } + s.SetAttribute("count", count) } -// Returning the taxid of the sequence. +// Taxid returns the taxonomic ID associated with the BioSequence. +// +// It retrieves the "taxid" attribute from the BioSequence's attributes map. +// If the attribute is not found, the function returns 1 as the default taxonomic ID. +// The taxid 1 corresponds to the root taxonomic level. +// +// The function returns an integer representing the taxonomic ID. func (s *BioSequence) Taxid() int { taxid, ok := s.GetIntAttribute("taxid") @@ -150,10 +280,16 @@ func (s *BioSequence) Taxid() int { return taxid } -// Setting the taxid of the sequence. +// SetTaxid sets the taxid for the BioSequence. +// +// Parameters: +// +// taxid - the taxid to set. func (s *BioSequence) SetTaxid(taxid int) { - annot := s.Annotations() - annot["taxid"] = taxid + if taxid < 1 { + taxid = 1 + } + s.SetAttribute("taxid", taxid) } func (s *BioSequence) OBITagRefIndex() map[int]string { @@ -201,4 +337,115 @@ func (s *BioSequence) OBITagRefIndex() map[int]string { func (s *BioSequence) SetOBITagRefIndex(idx map[int]string) { s.SetAttribute("obitag_ref_index", idx) -} \ No newline at end of file +} + +func (s *BioSequence) SetOBITagGeomRefIndex(idx map[int]string) { + s.SetAttribute("obitag_geomref_index", idx) +} + +func (s *BioSequence) OBITagGeomRefIndex() map[int]string { + var val map[int]string + + i, ok := s.GetAttribute("obitag_geomref_index") + + if !ok { + return nil + } + + switch i := i.(type) { + case map[int]string: + val = i + case map[string]interface{}: + val = make(map[int]string, len(i)) + for k, v := range i { + score, err := strconv.Atoi(k) + if err != nil { + log.Panicln(err) + } + + val[score], err = obiutils.InterfaceToString(v) + if err != nil { + log.Panicln(err) + } + } + case map[string]string: + val = make(map[int]string, len(i)) + for k, v := range i { + score, err := strconv.Atoi(k) + if err != nil { + log.Panicln(err) + } + val[score] = v + + } + default: + log.Panicln("value of attribute obitag_geomref_index cannot be casted to a map[int]string") + } + + return val +} + +// GetCoordinate returns the coordinate of the BioSequence. +// +// Returns the coordinate of the BioSequence in the space of its reference database landmark sequences. +// if no coordinate is found, it returns nil. +// +// This function does not take any parameters. +// +// It returns a slice of integers ([]int). +func (s *BioSequence) GetCoordinate() []int { + coord, ok := s.GetIntSlice("landmark_coord") + if !ok { + return nil + } + + return coord +} + +// SetCoordinate sets the coordinate of the BioSequence. +// +// coord: An array of integers representing the coordinate. +// This function does not return anything. +func (s *BioSequence) SetCoordinate(coord []int) { + s.SetAttribute("landmark_coord", coord) +} + +// SetLandmarkID sets the landmark ID of the BioSequence. +// +// Trying to set a negative landmark ID leads to a no operation. +// +// Parameters: +// id: The ID of the landmark. +func (s *BioSequence) SetLandmarkID(id int) { + if id < 0 { + return + } + s.SetAttribute("landmark_id", id) +} + +// GetLandmarkID returns the landmark ID associated with the BioSequence. +// +// It retrieves the "landmark_id" attribute from the BioSequence's attributes map. +// If the attribute is not found, the function returns -1 as the default landmark ID. +// The landmark ID is an integer representing the number of the axis in the landmark space. +// +// It does not take any parameters. +// It returns an integer representing the landmark ID. +func (s *BioSequence) GetLandmarkID() int { + val, ok := s.GetIntAttribute("landmark_id") + + if !ok { + return -1 + } + + return val +} + +// IsALandmark checks if the BioSequence is a landmark. +// +// A sequence is a landmark if its landmark ID is set (attribute "landmark_id"). +// +// It returns a boolean indicating whether the BioSequence is a landmark or not. +func (s *BioSequence) IsALandmark() bool { + return s.GetLandmarkID() != -1 +} diff --git a/pkg/obiseq/biosequence.go b/pkg/obiseq/biosequence.go index 76d0e9d..624d6cf 100644 --- a/pkg/obiseq/biosequence.go +++ b/pkg/obiseq/biosequence.go @@ -12,6 +12,7 @@ package obiseq import ( "crypto/md5" + "sync" "sync/atomic" "git.metabarcoding.org/lecasofts/go/obitools/pkg/obiutils" @@ -58,6 +59,7 @@ type BioSequence struct { feature []byte paired *BioSequence // A pointer to the paired sequence annotations Annotation + annot_lock sync.Mutex } // MakeEmptyBioSequence() creates a new BioSequence object with no data @@ -79,6 +81,7 @@ func MakeEmptyBioSequence(preallocate int) BioSequence { feature: nil, paired: nil, annotations: nil, + annot_lock: sync.Mutex{}, } } @@ -142,6 +145,8 @@ func (s *BioSequence) Copy() *BioSequence { newSeq.feature = CopySlice(s.feature) if len(s.annotations) > 0 { + defer s.annot_lock.Unlock() + s.annot_lock.Lock() newSeq.annotations = GetAnnotation(s.annotations) } @@ -206,6 +211,14 @@ func (s *BioSequence) Annotations() Annotation { return s.annotations } +func (s *BioSequence) AnnotationsLock() { + s.annot_lock.Lock() +} + +func (s *BioSequence) AnnotationsUnlock() { + s.annot_lock.Unlock() +} + // Checking if the BioSequence has a source. func (s *BioSequence) HasSource() bool { return len(s.source) > 0 diff --git a/pkg/obistats/kmeans.go b/pkg/obistats/kmeans.go index 2d1f8d3..3b77b75 100644 --- a/pkg/obistats/kmeans.go +++ b/pkg/obistats/kmeans.go @@ -74,8 +74,7 @@ func AssignToClass(data, centers *obiutils.Matrix[float64]) []int { // Returns: // - centers: a pointer to a matrix of float64 values representing the centers of the clusters. func ComputeCenters(data *obiutils.Matrix[float64], k int, classes []int) *obiutils.Matrix[float64] { - centers := obiutils.Make2DArray[float64](k, len((*data)[0])) - centers.Init(0.0) + centers := obiutils.Make2DNumericArray[float64](k, len((*data)[0]), true) ns := make([]int, k) var wg sync.WaitGroup diff --git a/pkg/obitools/obifind/options.go b/pkg/obitools/obifind/options.go index 285745c..9872a94 100644 --- a/pkg/obitools/obifind/options.go +++ b/pkg/obitools/obifind/options.go @@ -48,11 +48,14 @@ func FilterTaxonomyOptionSet(options *getoptions.GetOpt) { options.Description("Restrict output to some subclades.")) } - func CLISelectedNCBITaxDump() string { return __taxdump__ } +func CLIHasSelectedTaxonomy() bool { + return __taxdump__ != "" +} + func CLIAreAlternativeNamesSelected() bool { return __alternative_name__ } diff --git a/pkg/obitools/obilandmark/obilandmark.go b/pkg/obitools/obilandmark/obilandmark.go index 6609995..b0b3005 100644 --- a/pkg/obitools/obilandmark/obilandmark.go +++ b/pkg/obitools/obilandmark/obilandmark.go @@ -10,6 +10,9 @@ import ( "git.metabarcoding.org/lecasofts/go/obitools/pkg/obioptions" "git.metabarcoding.org/lecasofts/go/obitools/pkg/obiseq" "git.metabarcoding.org/lecasofts/go/obitools/pkg/obistats" + "git.metabarcoding.org/lecasofts/go/obitools/pkg/obitax" + "git.metabarcoding.org/lecasofts/go/obitools/pkg/obitools/obifind" + "git.metabarcoding.org/lecasofts/go/obitools/pkg/obitools/obirefidx" "git.metabarcoding.org/lecasofts/go/obitools/pkg/obiutils" "github.com/schollz/progressbar/v3" log "github.com/sirupsen/logrus" @@ -103,14 +106,14 @@ func CLISelectLandmarkSequences(iterator obiiter.IBioSequence) obiiter.IBioSeque library := iterator.Load() library_size := len(library) - n_landmark := NCenter() + n_landmark := CLINCenter() landmark_idx := obistats.SampleIntWithoutReplacement(n_landmark, library_size) log.Infof("Library contains %d sequence", len(library)) var seqworld obiutils.Matrix[float64] - for loop := 0; loop < 5; loop++ { + for loop := 0; loop < 2; loop++ { sort.IntSlice(landmark_idx).Sort() log.Debugf("Selected indices : %v", landmark_idx) @@ -154,14 +157,52 @@ func CLISelectLandmarkSequences(iterator obiiter.IBioSequence) obiiter.IBioSeque } classes := obistats.AssignToClass(&seqworld, &initialCenters) + for i, seq := range library { - seq.SetAttribute("landmark_coord", seqworld[i]) + ic, _ := obiutils.InterfaceToIntSlice(seqworld[i]) + seq.SetCoordinate(ic) seq.SetAttribute("landmark_class", classes[i]) if i, ok := seq_landmark[i]; ok { seq.SetAttribute("landmark_id", i) } } + if obifind.CLIHasSelectedTaxonomy() { + taxo, err := obifind.CLILoadSelectedTaxonomy() + if err != nil { + log.Fatal(err) + } + + taxa := make(obitax.TaxonSet, len(library)) + + for i, seq := range library { + taxa[i], err = taxo.Taxon(seq.Taxid()) + if err != nil { + log.Fatal(err) + } + } + + pbopt := make([]progressbar.Option, 0, 5) + pbopt = append(pbopt, + progressbar.OptionSetWriter(os.Stderr), + progressbar.OptionSetWidth(15), + progressbar.OptionShowCount(), + progressbar.OptionShowIts(), + progressbar.OptionSetDescription("[Sequence Indexing]"), + ) + + bar := progressbar.NewOptions(len(library), pbopt...) + + for i, seq := range library { + idx := obirefidx.GeomIndexSesquence(i, library, &taxa, taxo) + seq.SetOBITagGeomRefIndex(idx) + + if i%10 == 0 { + bar.Add(10) + } + } + } + return obiiter.IBatchOver(library, obioptions.CLIBatchSize()) } diff --git a/pkg/obitools/obilandmark/options.go b/pkg/obitools/obilandmark/options.go index bdeff8e..ffb58de 100644 --- a/pkg/obitools/obilandmark/options.go +++ b/pkg/obitools/obilandmark/options.go @@ -2,28 +2,37 @@ package obilandmark import ( "git.metabarcoding.org/lecasofts/go/obitools/pkg/obitools/obiconvert" + "git.metabarcoding.org/lecasofts/go/obitools/pkg/obitools/obifind" "github.com/DavidGamba/go-getoptions" ) var _nCenter = 200 -// ObilandmarkOptionSet sets the options for Obilandmark. +// LandmarkOptionSet sets the options for Obilandmark. // // options: a pointer to the getoptions.GetOpt struct. // Return type: none. -func ObilandmarkOptionSet(options *getoptions.GetOpt) { +func LandmarkOptionSet(options *getoptions.GetOpt) { options.IntVar(&_nCenter, "center", _nCenter, options.Alias("n"), - options.Description("Maximum numbers of differences between two variant sequences (default: %d).")) + options.Description("Number of landmark sequences to be selected.")) } +// OptionSet is a function that sets the options for the GetOpt struct. +// +// It takes a pointer to a GetOpt struct as its parameter and does not return anything. func OptionSet(options *getoptions.GetOpt) { obiconvert.InputOptionSet(options) obiconvert.OutputOptionSet(options) - ObilandmarkOptionSet(options) + obifind.LoadTaxonomyOptionSet(options, false, false) + LandmarkOptionSet(options) } -func NCenter() int { +// CLINCenter returns desired number of centers as specified by user. +// +// No parameters. +// Returns an integer value. +func CLINCenter() int { return _nCenter } diff --git a/pkg/obitools/obirefidx/geomindexing.go b/pkg/obitools/obirefidx/geomindexing.go new file mode 100644 index 0000000..b5ad40a --- /dev/null +++ b/pkg/obitools/obirefidx/geomindexing.go @@ -0,0 +1,79 @@ +package obirefidx + +import ( + "fmt" + "log" + "sort" + "sync" + + "git.metabarcoding.org/lecasofts/go/obitools/pkg/obiseq" + "git.metabarcoding.org/lecasofts/go/obitools/pkg/obitax" + "git.metabarcoding.org/lecasofts/go/obitools/pkg/obiutils" +) + +func GeomIndexSesquence(seqidx int, + references obiseq.BioSequenceSlice, + taxa *obitax.TaxonSet, + taxo *obitax.Taxonomy) map[int]string { + + sequence := references[seqidx] + location := sequence.GetCoordinate() + + if location == nil { + log.Fatalf("Sequence %s does not have a coordinate", sequence.Id()) + } + + seq_dist := make([]float64, len(references)) + + var wg sync.WaitGroup + + for i, ref := range references { + wg.Add(1) + go func(i int, ref *obiseq.BioSequence) { + defer wg.Done() + reflocation := ref.GetCoordinate() + if reflocation == nil { + log.Fatalf("Sequence %s does not have a coordinate", ref.Id()) + } + d := 0.0 + for i, x := range location { + diff := float64(x - reflocation[i]) + d += diff * diff + } + seq_dist[i] = d + }(i, ref) + } + + wg.Wait() + + order := obiutils.Order(sort.Float64Slice(seq_dist)) + + lca := (*taxa)[seqidx] + + index := make(map[int]string) + index[0.0] = fmt.Sprintf( + "%d@%s@%s", + lca.Taxid(), + lca.ScientificName(), + lca.Rank()) + + old_dist := 0.0 + for _, o := range order { + new_lca, _ := lca.LCA((*taxa)[o]) + if new_lca.Taxid() != lca.Taxid() || seq_dist[o] != old_dist { + lca = new_lca + old_dist = seq_dist[o] + index[int(seq_dist[o])] = fmt.Sprintf( + "%d@%s@%s", + lca.Taxid(), + lca.ScientificName(), + lca.Rank()) + } + + if lca.Taxid() == 1 { + break + } + } + + return index +} diff --git a/pkg/obitools/obirefidx/obirefidx.go b/pkg/obitools/obirefidx/obirefidx.go index 757bbd5..c42f2f3 100644 --- a/pkg/obitools/obirefidx/obirefidx.go +++ b/pkg/obitools/obirefidx/obirefidx.go @@ -24,9 +24,6 @@ func IndexSequence(seqidx int, taxo *obitax.Taxonomy) map[int]string { sequence := references[seqidx] - // matrix := obialign.NewFullLCSMatrix(nil, - // sequence.Length(), - // sequence.Length()) var matrix []uint64 @@ -54,7 +51,9 @@ func IndexSequence(seqidx int, nok := make([]int, len(*pseq)) nfast := make([]int, len(*pseq)) nfastok := make([]int, len(*pseq)) - */lseq := sequence.Len() + */ + + lseq := sequence.Len() mini := -1 wordmin := 0 diff --git a/pkg/obitools/obitag/obigeomtag.go b/pkg/obitools/obitag/obigeomtag.go new file mode 100644 index 0000000..759cdf5 --- /dev/null +++ b/pkg/obitools/obitag/obigeomtag.go @@ -0,0 +1,209 @@ +package obitag + +import ( + "log" + "math" + + "git.metabarcoding.org/lecasofts/go/obitools/pkg/obialign" + "git.metabarcoding.org/lecasofts/go/obitools/pkg/obiiter" + "git.metabarcoding.org/lecasofts/go/obitools/pkg/obioptions" + "git.metabarcoding.org/lecasofts/go/obitools/pkg/obiseq" + "git.metabarcoding.org/lecasofts/go/obitools/pkg/obitax" +) + +// ExtractLandmarkSeqs extracts landmark sequences from the given BioSequenceSlice. +// +// The landmark sequences are extracted from the given BioSequenceSlice and put in a new BioSequenceSlice +// in the order corresponding to their landmark IDs. +// +// references: A pointer to a BioSequenceSlice containing the references. +// Returns: A pointer to a BioSequenceSlice containing the extracted landmark sequences. +func ExtractLandmarkSeqs(references *obiseq.BioSequenceSlice) *obiseq.BioSequenceSlice { + landmarks := make(map[int]*obiseq.BioSequence, 100) + + for _, ref := range *references { + if id := ref.GetLandmarkID(); id != -1 { + landmarks[id] = ref + } + } + + ls := obiseq.NewBioSequenceSlice(len(landmarks)) + *ls = (*ls)[0:len(landmarks)] + + for k, l := range landmarks { + (*ls)[k] = l + } + + return ls +} + +// ExtractTaxonSet extracts a set of taxa from the given references and taxonomy. +// +// If a reference sequence has a taxid absent from the taxonomy, the function will panic. +// +// The function takes two parameters: +// - references: a pointer to a BioSequenceSlice, which is a slice of BioSequence objects. +// - taxonomy: a pointer to a Taxonomy object. +// +// The function returns a pointer to a TaxonSet, which is a set of taxa. +func ExtractTaxonSet(references *obiseq.BioSequenceSlice, taxonomy *obitax.Taxonomy) *obitax.TaxonSet { + var err error + taxa := make(obitax.TaxonSet, len(*references)) + + for i, ref := range *references { + taxid := ref.Taxid() + taxa[i], err = taxonomy.Taxon(taxid) + if err != nil { + log.Panicf("Taxid %d, for sequence %s not found in taxonomy", taxid, ref.Id()) + } + } + + return &taxa +} + +// MapOnLandmarkSequences calculates the coordinates of landmarks on a given sequence. +// +// It takes in three parameters: +// - sequence: a pointer to a BioSequence object representing the sequence. +// - landmarks: a pointer to a BioSequenceSlice object representing the landmarks. +// - buffer: a pointer to a slice of uint64, used as a buffer for calculations. +// +// It returns a slice of integers representing the coordinates of the landmarks on the sequence. +func MapOnLandmarkSequences(sequence *obiseq.BioSequence, landmarks *obiseq.BioSequenceSlice, buffer *[]uint64) []int { + + coords := make([]int, len(*landmarks)) + + for i, l := range *landmarks { + lcs, length := obialign.FastLCSEGFScore(sequence, l, -1, buffer) + coords[i] = length - lcs + } + + return coords +} + +// FindGeomClosest finds the closest geometric sequence in a given set of reference sequences to a query sequence. +// +// Parameters: +// - sequence: A pointer to a BioSequence object representing the query sequence. +// - landmarks: A pointer to a BioSequenceSlice object representing the landmarks. +// - references: A pointer to a BioSequenceSlice object representing the reference sequences. +// - buffer: A pointer to a slice of uint64 representing a buffer. +// +// Returns: +// - A pointer to a BioSequence object representing the closest sequence. +// - An int representing the minimum distance. +// - A float64 representing the best identity score. +// - An array of int representing the indices of the closest sequences. +// - A pointer to a BioSequenceSlice object representing the matched sequences. +func FindGeomClosest(sequence *obiseq.BioSequence, + landmarks *obiseq.BioSequenceSlice, + references *obiseq.BioSequenceSlice, + buffer *[]uint64) (*obiseq.BioSequence, int, float64, []int, *obiseq.BioSequenceSlice) { + + min_dist := math.MaxInt64 + min_idx := make([]int, 0) + + query_location := MapOnLandmarkSequences(sequence, landmarks, buffer) + + for i, l := range *references { + coord := l.GetCoordinate() + if len(coord) == 0 { + log.Panicf("Empty coordinate for reference sequence %s", l.Id()) + } + dist := 0 + for j := 0; j < len(coord); j++ { + diff := query_location[j] - coord[j] + dist += diff * diff + } + + if dist == min_dist { + min_idx = append(min_idx, i) + } + if dist < min_dist { + min_dist = dist + min_idx = make([]int, 0) + min_idx = append(min_idx, i) + } + } + + best_seq := (*references)[min_idx[0]] + best_id := 0.0 + + for _, i := range min_idx { + seq := (*references)[i] + lcs, length := obialign.FastLCSEGFScore(sequence, seq, -1, buffer) + ident := float64(lcs) / float64(length) + if ident > best_id { + best_id = ident + best_seq = seq + } + } + + matches := obiseq.MakeBioSequenceSlice(len(min_idx)) + matches = matches[0:len(min_idx)] + for i, j := range min_idx { + matches[i] = (*references)[j] + } + + return best_seq, min_dist, best_id, query_location, &matches +} + +func GeomIdentify(sequence *obiseq.BioSequence, + landmarks *obiseq.BioSequenceSlice, + references *obiseq.BioSequenceSlice, + taxa *obitax.TaxonSet, + taxo *obitax.Taxonomy, + buffer *[]uint64) *obiseq.BioSequence { + best_seq, min_dist, best_id, query_location, matches := FindGeomClosest(sequence, landmarks, references, buffer) + + taxon := (*obitax.TaxNode)(nil) + var err error + + if best_id > 0.5 { + taxid, _, _ := MatchDistanceIndex(min_dist, (*matches)[0].OBITagGeomRefIndex()) + taxon, _ = taxo.Taxon(taxid) + for i := 1; i < len(*matches); i++ { + taxid, _, _ := MatchDistanceIndex(min_dist, (*matches)[i].OBITagGeomRefIndex()) + newTaxon, _ := taxo.Taxon(taxid) + taxon, err = newTaxon.LCA(taxon) + if err != nil { + log.Panicf("LCA error: %v", err) + } + } + sequence.SetTaxid(taxon.Taxid()) + } else { + taxon, _ = taxo.Taxon(1) + sequence.SetTaxid(1) + } + + sequence.SetAttribute("scientific_name", taxon.ScientificName()) + sequence.SetAttribute("obitag_rank", taxon.Rank()) + sequence.SetAttribute("obitag_bestid", best_id) + sequence.SetAttribute("obitag_bestmatch", best_seq.Id()) + sequence.SetAttribute("obitag_min_dist", min_dist) + sequence.SetAttribute("obitag_coord", query_location) + sequence.SetAttribute("obitag_match_count", len(*matches)) + sequence.SetAttribute("obitag_similarity_method", "geometric") + + return sequence +} + +func GeomIdentifySeqWorker(references *obiseq.BioSequenceSlice, + taxo *obitax.Taxonomy) obiseq.SeqWorker { + + landmarks := ExtractLandmarkSeqs(references) + taxa := ExtractTaxonSet(references, taxo) + return func(sequence *obiseq.BioSequence) *obiseq.BioSequence { + buffer := make([]uint64, 100) + return GeomIdentify(sequence, landmarks, references, taxa, taxo, &buffer) + } +} + +func CLIGeomAssignTaxonomy(iterator obiiter.IBioSequence, + references obiseq.BioSequenceSlice, + taxo *obitax.Taxonomy, +) obiiter.IBioSequence { + + worker := GeomIdentifySeqWorker(&references, taxo) + return iterator.MakeIWorker(worker, obioptions.CLIParallelWorkers(), 0) +} diff --git a/pkg/obitools/obitag/obitag.go b/pkg/obitools/obitag/obitag.go index dae4e55..690f99e 100644 --- a/pkg/obitools/obitag/obitag.go +++ b/pkg/obitools/obitag/obitag.go @@ -1,6 +1,7 @@ package obitag import ( + "sort" "strconv" "strings" @@ -16,6 +17,61 @@ import ( "git.metabarcoding.org/lecasofts/go/obitools/pkg/obiutils" ) +// MatchDistanceIndex returns the taxid, rank, and scientificName based on the given distance and distanceIdx. +// +// Parameters: +// - distance: The distance to match against the keys in distanceIdx. +// - distanceIdx: A map containing distances as keys and corresponding values in the format "taxid@rank@scientificName". +// +// Returns: +// - taxid: The taxid associated with the matched distance. +// - rank: The rank associated with the matched distance. +// - scientificName: The scientific name associated with the matched distance. +func MatchDistanceIndex(distance int, distanceIdx map[int]string) (int, string, string) { + keys := make([]int, 0, len(distanceIdx)) + for k := range distanceIdx { + keys = append(keys, k) + } + sort.Ints(keys) + + i := sort.Search(len(keys), func(i int) bool { + return distance <= keys[i] + }) + + var taxid int + var rank string + var scientificName string + + if i == len(keys) || distance > keys[len(keys)-1] { + taxid = 1 + rank = "no rank" + scientificName = "root" + } else { + parts := strings.Split(distanceIdx[keys[i]], "@") + taxid, _ = strconv.Atoi(parts[0]) + rank = parts[1] + scientificName = parts[2] + } + + // log.Info("taxid:", taxid, " rank:", rank, " scientificName:", scientificName) + + return taxid, rank, scientificName +} + +// FindClosests finds the closest bio sequence from a given sequence and a slice of reference sequences. +// +// Parameters: +// - sequence: the bio sequence to find the closest matches for. +// - references: a slice of reference sequences to compare against. +// - refcounts: a slice of reference sequence counts. +// - runExact: a boolean flag indicating whether to run an exact match. +// +// Returns: +// - bests: a slice of the closest bio sequences. +// - maxe: the maximum score. +// - bestId: the best ID. +// - bestmatch: the best match. +// - bestidxs: a slice of the best indexes. func FindClosests(sequence *obiseq.BioSequence, references obiseq.BioSequenceSlice, refcounts []*obikmer.Table4mer, @@ -94,6 +150,18 @@ func FindClosests(sequence *obiseq.BioSequence, return bests, maxe, bestId, bestmatch, bestidxs } +// Identify makes the taxonomic identification of a BioSequence. +// +// Parameters: +// - sequence: A pointer to a BioSequence to identify. +// - references: A BioSequenceSlice. +// - refcounts: A slice of pointers to Table4mer. +// - taxa: A TaxonSet. +// - taxo: A pointer to a Taxonomy. +// - runExact: A boolean value indicating whether to run exact matching. +// +// Returns: +// - A pointer to a BioSequence. func Identify(sequence *obiseq.BioSequence, references obiseq.BioSequenceSlice, refcounts []*obikmer.Table4mer, @@ -171,24 +239,19 @@ func Identify(sequence *obiseq.BioSequence, log.Debugln(sequence.Id(), "Best matches:", len(bests), "New index:", newidx) sequence.SetTaxid(taxon.Taxid()) - sequence.SetAttribute("scientific_name", taxon.ScientificName()) - sequence.SetAttribute("obitag_rank", taxon.Rank()) - sequence.SetAttribute("obitag_bestid", identity) - sequence.SetAttribute("obitag_difference", differences) - sequence.SetAttribute("obitag_bestmatch", bestmatch) - sequence.SetAttribute("obitag_match_count", len(bests)) } else { taxon, _ = taxo.Taxon(1) sequence.SetTaxid(1) - sequence.SetAttribute("scientific_name", taxon.ScientificName()) - sequence.SetAttribute("obitag_rank", taxon.Rank()) - sequence.SetAttribute("obitag_bestid", identity) - sequence.SetAttribute("obitag_difference", differences) - sequence.SetAttribute("obitag_bestmatch", bestmatch) - sequence.SetAttribute("obitag_match_count", len(bests)) } + sequence.SetAttribute("scientific_name", taxon.ScientificName()) + sequence.SetAttribute("obitag_rank", taxon.Rank()) + sequence.SetAttribute("obitag_bestid", identity) + sequence.SetAttribute("obitag_bestmatch", bestmatch) + sequence.SetAttribute("obitag_match_count", len(bests)) + sequence.SetAttribute("obitag_similarity_method", "lcs") + return sequence } diff --git a/pkg/obitools/obitag/options.go b/pkg/obitools/obitag/options.go index 15116b7..349d1aa 100644 --- a/pkg/obitools/obitag/options.go +++ b/pkg/obitools/obitag/options.go @@ -15,6 +15,7 @@ import ( var _RefDB = "" var _SaveRefDB = "" var _RunExact = false +var _GeomSim = false func TagOptionSet(options *getoptions.GetOpt) { options.StringVar(&_RefDB, "reference-db", _RefDB, @@ -27,6 +28,10 @@ func TagOptionSet(options *getoptions.GetOpt) { options.ArgName("FILENAME"), options.Description("The name of a file where to save the reference DB with its indices")) + options.BoolVar(&_GeomSim, "geometric", _GeomSim, + options.Alias("G"), + options.Description("Activate the experimental geometric similarity heuristic")) + // options.BoolVar(&_RunExact, "exact", _RunExact, // options.Alias("E"), // options.Description("Unactivate the heuristic limatitating the sequence comparisons")) @@ -55,6 +60,10 @@ func CLIRefDB() obiseq.BioSequenceSlice { return refdb.Load() } +func CLIGeometricMode() bool { + return _GeomSim +} + func CLIShouldISaveRefDB() bool { return _SaveRefDB != "" } diff --git a/pkg/obiutils/array.go b/pkg/obiutils/array.go index e1635a5..cc20f80 100644 --- a/pkg/obiutils/array.go +++ b/pkg/obiutils/array.go @@ -1,7 +1,18 @@ package obiutils -// Matrix is a generic type representing a matrix. -type Matrix[T any] [][]T +type Integer interface { + ~int | ~int8 | ~int16 | ~int32 | ~int64 +} + +type Float interface { + ~float32 | ~float64 +} +type Numeric interface { + Integer | Float +} + +type Vector[T any] []T +type Matrix[T any] []Vector[T] // Make2DArray generates a 2D array of type T with the specified number of rows and columns. // @@ -22,15 +33,20 @@ func Make2DArray[T any](rows, cols int) Matrix[T] { return matrix } -// Init initializes the Matrix with the given value. -// -// value: the value to initialize the Matrix elements with. -func (matrix *Matrix[T]) Init(value T) { - data := (*matrix)[0] - data = data[0:cap(data)] - for i := range data { - data[i] = value +func Make2DNumericArray[T Numeric](rows, cols int, zeroed bool) Matrix[T] { + matrix := make(Matrix[T], rows) + data := make([]T, cols*rows) + + if zeroed { + for i := range data { + data[i] = 0 + } } + + for i := 0; i < rows; i++ { + matrix[i] = data[i*cols : (i+1)*cols] + } + return matrix } // Row returns the i-th row of the matrix. @@ -50,6 +66,19 @@ func (matrix *Matrix[T]) Column(i int) []T { return r } +// Rows returns the specified rows of the matrix. +// +// The function takes one or more integer arguments representing the indices of the rows to be returned. +// It returns a new matrix containing the specified rows. +func (matrix *Matrix[T]) Rows(i ...int) Matrix[T] { + res := make([]Vector[T], len(i)) + + for j, idx := range i { + res[j] = (*matrix)[idx] + } + return res +} + // Dim returns the dimensions of the Matrix. // // It takes no parameters. diff --git a/pkg/obiutils/bytes.go b/pkg/obiutils/bytes.go index b17d226..5dc8748 100644 --- a/pkg/obiutils/bytes.go +++ b/pkg/obiutils/bytes.go @@ -1,11 +1,17 @@ package obiutils +// InPlaceToLower converts all uppercase letters in the input byte slice to lowercase in place. +// +// It takes a single parameter: +// - data: a byte slice representing the input data +// +// It returns the modified byte slice. func InPlaceToLower(data []byte) []byte { - for i,l := range data { - if l >= 'A' && l <='Z' { - data[i]|=32 + for i, l := range data { + if l >= 'A' && l <= 'Z' { + data[i] |= 32 } } return data -} \ No newline at end of file +} diff --git a/pkg/obiutils/cast_interface.go b/pkg/obiutils/cast_interface.go new file mode 100644 index 0000000..f7ba7bd --- /dev/null +++ b/pkg/obiutils/cast_interface.go @@ -0,0 +1,54 @@ +package obiutils + +// CastableToInt checks if the given input can be casted to an integer. +// +// i: the value to check for castability. +// bool: true if the value can be casted to an integer, false otherwise. +func CastableToInt(i interface{}) bool { + switch i.(type) { + case int, + int8, int16, int32, int64, + float32, float64, + uint8, uint16, uint32, uint64: + return true + default: + return false + } +} + +// InterfaceToBool converts an interface value to a boolean. +// +// It takes an interface{} as a parameter and returns a boolean value and an error. +func InterfaceToBool(i interface{}) (val bool, err error) { + + err = nil + val = false + + switch t := i.(type) { + case int: + val = t != 0 + case int8: + val = t != 0 // standardizes across systems + case int16: + val = t != 0 // standardizes across systems + case int32: + val = t != 0 // standardizes across systems + case int64: + val = t != 0 // standardizes across systems + case float32: + val = t != 0 // standardizes across systems + case float64: + val = t != 0 // standardizes across systems + case uint8: + val = t != 0 // standardizes across systems + case uint16: + val = t != 0 // standardizes across systems + case uint32: + val = t != 0 // standardizes across systems + case uint64: + val = t != 0 // standardizes across systems + default: + err = &NotABoolean{"value attribute cannot be casted to a boolean"} + } + return +} diff --git a/pkg/obiutils/goutils.go b/pkg/obiutils/goutils.go index 36c603b..10e5790 100644 --- a/pkg/obiutils/goutils.go +++ b/pkg/obiutils/goutils.go @@ -196,6 +196,69 @@ func InterfaceToFloat64Map(i interface{}) (val map[string]float64, err error) { return } +// InterfaceToFloat64Slice converts an interface{} to a []float64 slice. +// +// It takes an interface{} parameter and returns a slice of float64 values and an error. +func InterfaceToFloat64Slice(i interface{}) ([]float64, error) { + switch i := i.(type) { + case []float64: + return i, nil + case []interface{}: + val := make([]float64, len(i)) + for k, v := range i { + if x, err := InterfaceToFloat64(v); err != nil { + return nil, err + } else { + val[k] = x + } + } + return val, nil + case []int: + val := make([]float64, len(i)) + for k, v := range i { + val[k] = float64(v) + } + return val, nil + default: + return nil, &NotAMapFloat64{"value attribute cannot be casted to a []float64"} + } +} + +// InterfaceToIntSlice converts an interface{} to a []int slice. +// +// It takes an interface{} parameter and returns a slice of int values and an error. +func InterfaceToIntSlice(i interface{}) ([]int, error) { + + switch i := i.(type) { + case []int: + return i, nil + case []interface{}: + val := make([]int, len(i)) + for k, v := range i { + if x, err := InterfaceToInt(v); err != nil { + return nil, err + } else { + val[k] = x + } + } + return val, nil + case []float64: + val := make([]int, len(i)) + for k, v := range i { + val[k] = int(v + 0.5) + } + return val, nil + case Vector[float64]: + val := make([]int, len(i)) + for k, v := range i { + val[k] = int(v + 0.5) + } + return val, nil + default: + return nil, &NotAMapInt{"value attribute cannot be casted to a []int"} + } +} + // NotABoolean defines a new type of Error : "NotABoolean" type NotABoolean struct { message string @@ -208,56 +271,6 @@ func (m *NotABoolean) Error() string { return m.message } -// It converts an interface{} to a bool, and returns an error if the interface{} cannot be converted -// to a bool -func InterfaceToBool(i interface{}) (val bool, err error) { - - err = nil - val = false - - switch t := i.(type) { - case int: - val = t != 0 - case int8: - val = t != 0 // standardizes across systems - case int16: - val = t != 0 // standardizes across systems - case int32: - val = t != 0 // standardizes across systems - case int64: - val = t != 0 // standardizes across systems - case float32: - val = t != 0 // standardizes across systems - case float64: - val = t != 0 // standardizes across systems - case uint8: - val = t != 0 // standardizes across systems - case uint16: - val = t != 0 // standardizes across systems - case uint32: - val = t != 0 // standardizes across systems - case uint64: - val = t != 0 // standardizes across systems - default: - err = &NotABoolean{"value attribute cannot be casted to a boolean"} - } - return -} - -// If the interface{} can be cast to an int, return true. -func CastableToInt(i interface{}) bool { - - switch i.(type) { - case int, - int8, int16, int32, int64, - float32, float64, - uint8, uint16, uint32, uint64: - return true - default: - return false - } -} - // > It copies the contents of the `src` map into the `dest` map, but if the value is a map, slice, or // array, it makes a deep copy of it func MustFillMap(dest, src map[string]interface{}) { @@ -270,8 +283,15 @@ func MustFillMap(dest, src map[string]interface{}) { } } +// ReadLines reads the lines from a file specified by the given path. +// // Read a whole file into the memory and store it as array of lines // It reads a file line by line, and returns a slice of strings, one for each line +// +// It takes a single parameter: +// - path: a string representing the path of the file to read. +// +// It returns a slice of strings containing the lines read from the file, and an error if any occurred. func ReadLines(path string) (lines []string, err error) { var ( file *os.File @@ -301,6 +321,14 @@ func ReadLines(path string) (lines []string, err error) { return } +// AtomicCounter creates and returns a function that generates a unique integer value each time it is called. +// +// The function takes an optional initial value as a parameter. If an initial value is provided, the generated +// integers will start from that value. If no initial value is provided, the generated integers will start from 0. +// +// The function is thread safe. +// +// The function returns a closure that can be called to retrieve the next integer in the sequence. func AtomicCounter(initial ...int) func() int { counterMutex := sync.Mutex{} counter := 0 @@ -320,12 +348,16 @@ func AtomicCounter(initial ...int) func() int { return nextCounter } -// Marshal is a UTF-8 friendly marshaler. Go's json.Marshal is not UTF-8 +// JsonMarshal marshals an interface into JSON format. +// +// JsonMarshal is a UTF-8 friendly marshaler. Go's json.Marshal is not UTF-8 // friendly because it replaces the valid UTF-8 and JSON characters "&". "<", // ">" with the "slash u" unicode escaped forms (e.g. \u0026). It preemptively // escapes for HTML friendliness. Where text may include any of these // characters, json.Marshal should not be used. Playground of Go breaking a // title: https://play.golang.org/p/o2hiX0c62oN +// +// It takes an interface as a parameter and returns a byte slice and an error. func JsonMarshal(i interface{}) ([]byte, error) { buffer := &bytes.Buffer{} encoder := json.NewEncoder(buffer) @@ -334,22 +366,45 @@ func JsonMarshal(i interface{}) ([]byte, error) { return bytes.TrimRight(buffer.Bytes(), "\n"), err } +// IsAMap checks if the given value is a map. +// +// value: the value to be checked. +// returns: a boolean indicating if the value is a map. func IsAMap(value interface{}) bool { return reflect.TypeOf(value).Kind() == reflect.Map } +// IsAnArray checks if the given value is an array. +// +// value: The value to be checked. +// Returns: true if the value is an array, false otherwise. func IsAnArray(value interface{}) bool { return reflect.TypeOf(value).Kind() == reflect.Array } +// IsASlice determines if the given value is a slice. +// +// value: the value to check. +// bool: true if the value is a slice, false otherwise. func IsASlice(value interface{}) bool { return reflect.TypeOf(value).Kind() == reflect.Slice } +// HasLength checks if the given value has a length. +// +// value: The value to be checked. +// bool: Returns true if the value has a length, false otherwise. func HasLength(value interface{}) bool { _, ok := value.(interface{ Len() int }) return IsAMap(value) || IsAnArray(value) || IsASlice(value) || ok } + +// Len returns the length of the given value. +// +// It accepts a single parameter: +// - value: an interface{} that represents the value whose length is to be determined. +// +// It returns an int, which represents the length of the value. func Len(value interface{}) int { l := 1 diff --git a/pkg/obiutils/ranks.go b/pkg/obiutils/ranks.go index ce4014f..7e2ed66 100644 --- a/pkg/obiutils/ranks.go +++ b/pkg/obiutils/ranks.go @@ -65,6 +65,10 @@ func (r Ranker[_]) Len() int { return len(r.r) } func (r Ranker[T]) Less(i, j int) bool { return r.x.Less(r.r[i], r.r[j]) } func (r Ranker[_]) Swap(i, j int) { r.r[i], r.r[j] = r.r[j], r.r[i] } +// Order sorts the given data using the provided sort.Interface and returns the sorted indices. +// +// data: The data to be sorted. +// Returns: A slice of integers representing the sorted indices. func Order[T sort.Interface](data T) []int { ldata := data.Len() if ldata == 0 { diff --git a/pkg/obiutils/slices.go b/pkg/obiutils/slices.go index 9073abf..8aa85fe 100644 --- a/pkg/obiutils/slices.go +++ b/pkg/obiutils/slices.go @@ -1,5 +1,13 @@ package obiutils +// Contains checks if the given element is present in the given array. +// +// Parameters: +// - arr: The array to search in. +// - x: The element to search for. +// +// Return type: +// - bool: Returns true if the element is found, false otherwise. func Contains[T comparable](arr []T, x T) bool { for _, v := range arr { if v == x { @@ -9,6 +17,14 @@ func Contains[T comparable](arr []T, x T) bool { return false } +// LookFor searches for the first occurrence of a given element in an array and returns its index. +// +// Parameters: +// - arr: the array to be searched +// - x: the element to search for +// +// Return: +// - int: the index of the first occurrence of the element in the array, or -1 if not found func LookFor[T comparable](arr []T, x T) int { for i, v := range arr { if v == x { @@ -18,19 +34,36 @@ func LookFor[T comparable](arr []T, x T) int { return -1 } +// RemoveIndex removes an element at a specified index from a slice. +// +// Parameters: +// - s: The slice from which the element will be removed. +// - index: The index of the element to be removed. +// +// Returns: +// A new slice with the element removed. func RemoveIndex[T comparable](s []T, index int) []T { return append(s[:index], s[index+1:]...) } +// Reverse reverses the elements of a slice. +// +// The function takes a slice `s` and a boolean `inplace` parameter. If `inplace` +// is `true`, the function modifies the input slice directly. If `inplace` is +// `false`, the function creates a new slice `c` and copies the elements of `s` +// into `c`. The function then reverses the elements of `s` in-place or `c` +// depending on the `inplace` parameter. +// +// The function returns the reversed slice. func Reverse[S ~[]E, E any](s S, inplace bool) S { if !inplace { - c := make([]E,len(s)) - copy(c,s) + c := make([]E, len(s)) + copy(c, s) s = c } - for i, j := 0, len(s)-1; i < j; i, j = i+1, j-1 { - s[i], s[j] = s[j], s[i] - } + for i, j := 0, len(s)-1; i < j; i, j = i+1, j-1 { + s[i], s[j] = s[j], s[i] + } return s }