mirror of
https://github.com/metabarcoding/obitools4.git
synced 2025-06-29 16:20:46 +00:00
A first prototype for the space of sequences
Former-commit-id: 07dc6ef044b5b6a6fb45dc2acb01dffe71a96195
This commit is contained in:
@ -8,35 +8,67 @@ import (
|
||||
log "github.com/sirupsen/logrus"
|
||||
)
|
||||
|
||||
// HasAttribute checks if the BioSequence has the specified attribute.
|
||||
//
|
||||
// Parameters:
|
||||
// - key: a string representing the attribute key to check.
|
||||
//
|
||||
// Returns:
|
||||
// - a boolean indicating whether the BioSequence has the attribute.
|
||||
func (s *BioSequence) HasAttribute(key string) bool {
|
||||
ok := s.annotations != nil
|
||||
|
||||
if ok {
|
||||
defer s.AnnotationsUnlock()
|
||||
s.AnnotationsLock()
|
||||
_, ok = s.annotations[key]
|
||||
}
|
||||
|
||||
return ok
|
||||
}
|
||||
|
||||
// A method that returns the value of the key in the annotation map.
|
||||
// GetAttribute returns the value associated with the given key in the BioSequence's annotations map and a boolean indicating whether the key exists.
|
||||
//
|
||||
// Parameters:
|
||||
// - key: The key to look up in the annotations map.
|
||||
//
|
||||
// Returns:
|
||||
// - val: The value associated with the given key.
|
||||
// - ok: A boolean indicating whether the key exists in the annotations map.
|
||||
func (s *BioSequence) GetAttribute(key string) (interface{}, bool) {
|
||||
var val interface{}
|
||||
ok := s.annotations != nil
|
||||
|
||||
if ok {
|
||||
defer s.AnnotationsUnlock()
|
||||
s.AnnotationsLock()
|
||||
val, ok = s.annotations[key]
|
||||
}
|
||||
|
||||
return val, ok
|
||||
}
|
||||
|
||||
// A method that sets the value of the key in the annotation map.
|
||||
// SetAttribute sets the value of a given key in the BioSequence annotations.
|
||||
//
|
||||
// Parameters:
|
||||
// - key: the key to set the value for.
|
||||
// - value: the value to set for the given key.
|
||||
func (s *BioSequence) SetAttribute(key string, value interface{}) {
|
||||
annot := s.Annotations()
|
||||
|
||||
defer s.AnnotationsUnlock()
|
||||
s.AnnotationsLock()
|
||||
annot[key] = value
|
||||
}
|
||||
|
||||
// A method that returns the value of the key in the annotation map.
|
||||
// GetIntAttribute returns an integer attribute value based on the provided key.
|
||||
//
|
||||
// It takes a key as a parameter and returns the corresponding integer value along
|
||||
// with a boolean value indicating whether the key exists in the BioSequence, and if it can be converted to an integer.
|
||||
//
|
||||
// If the stored values is convertible to an integer, but was not stored as an integer, then the value will be stored as an integer.
|
||||
//
|
||||
// The returned boolean value will be true if the key exists, and false otherwise.
|
||||
func (s *BioSequence) GetIntAttribute(key string) (int, bool) {
|
||||
var val int
|
||||
var err error
|
||||
@ -44,19 +76,39 @@ func (s *BioSequence) GetIntAttribute(key string) (int, bool) {
|
||||
v, ok := s.GetAttribute(key)
|
||||
|
||||
if ok {
|
||||
val, err = obiutils.InterfaceToInt(v)
|
||||
ok = err == nil
|
||||
val, ok = v.(int)
|
||||
if !ok {
|
||||
val, err = obiutils.InterfaceToInt(v)
|
||||
ok = err == nil
|
||||
if ok {
|
||||
s.SetAttribute(key, val)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return val, ok
|
||||
}
|
||||
|
||||
// Deleting the key from the annotation map.
|
||||
// DeleteAttribute deletes the attribute with the given key from the BioSequence.
|
||||
//
|
||||
// Parameters:
|
||||
// - key: the key of the attribute to be deleted.
|
||||
//
|
||||
// No return value.
|
||||
func (s *BioSequence) DeleteAttribute(key string) {
|
||||
delete(s.Annotations(), key)
|
||||
if s.annotations != nil {
|
||||
defer s.AnnotationsUnlock()
|
||||
s.AnnotationsLock()
|
||||
delete(s.annotations, key)
|
||||
}
|
||||
}
|
||||
|
||||
// Renaming the key in the annotation map.
|
||||
// RenameAttribute renames an attribute in the BioSequence.
|
||||
//
|
||||
// It takes two string parameters:
|
||||
// - newName: the new name for the attribute.
|
||||
// - oldName: the old name of the attribute to be renamed.
|
||||
// It does not return anything.
|
||||
func (s *BioSequence) RenameAttribute(newName, oldName string) {
|
||||
val, ok := s.GetAttribute(oldName)
|
||||
|
||||
@ -66,7 +118,15 @@ func (s *BioSequence) RenameAttribute(newName, oldName string) {
|
||||
}
|
||||
}
|
||||
|
||||
// A method that returns the value of the key in the annotation map.
|
||||
// GetNumericAttribute returns the numeric value of the specified attribute key
|
||||
// in the BioSequence object.
|
||||
//
|
||||
// Parameters:
|
||||
// - key: the attribute key to retrieve the numeric value for.
|
||||
//
|
||||
// Returns:
|
||||
// - float64: the numeric value of the attribute key.
|
||||
// - bool: indicates whether the attribute key exists and can be converted to a float64.
|
||||
func (s *BioSequence) GetNumericAttribute(key string) (float64, bool) {
|
||||
var val float64
|
||||
var err error
|
||||
@ -81,7 +141,14 @@ func (s *BioSequence) GetNumericAttribute(key string) (float64, bool) {
|
||||
return val, ok
|
||||
}
|
||||
|
||||
// A method that returns the value of the key in the annotation map.
|
||||
// GetStringAttribute retrieves the string value of a specific attribute from the BioSequence.
|
||||
//
|
||||
// Parameters:
|
||||
// - key: the key of the attribute to retrieve.
|
||||
//
|
||||
// Returns:
|
||||
// - string: the value of the attribute as a string.
|
||||
// - bool: a boolean indicating whether the attribute was found or not.
|
||||
func (s *BioSequence) GetStringAttribute(key string) (string, bool) {
|
||||
var val string
|
||||
v, ok := s.GetAttribute(key)
|
||||
@ -93,7 +160,14 @@ func (s *BioSequence) GetStringAttribute(key string) (string, bool) {
|
||||
return val, ok
|
||||
}
|
||||
|
||||
// A method that returns the value of the key in the annotation map.
|
||||
// GetBoolAttribute returns the boolean attribute value associated with the given key in the BioSequence object.
|
||||
//
|
||||
// Parameters:
|
||||
// - key: The key to retrieve the boolean attribute value.
|
||||
//
|
||||
// Return:
|
||||
// - val: The boolean attribute value associated with the given key and can be converted to a boolean.
|
||||
// - ok: A boolean value indicating whether the attribute value was successfully retrieved.
|
||||
func (s *BioSequence) GetBoolAttribute(key string) (bool, bool) {
|
||||
var val bool
|
||||
var err error
|
||||
@ -108,6 +182,14 @@ func (s *BioSequence) GetBoolAttribute(key string) (bool, bool) {
|
||||
return val, ok
|
||||
}
|
||||
|
||||
// GetIntMap returns a map[string]int and a boolean value indicating whether the key exists in the BioSequence.
|
||||
//
|
||||
// Parameters:
|
||||
// - key: The key to retrieve the value from the BioSequence.
|
||||
//
|
||||
// Returns:
|
||||
// - val: A map[string]int representing the value associated with the key and can be converted to a map[string]int.
|
||||
// - ok: A boolean value indicating whether the key exists in the BioSequence.
|
||||
func (s *BioSequence) GetIntMap(key string) (map[string]int, bool) {
|
||||
var val map[string]int
|
||||
var err error
|
||||
@ -122,7 +204,41 @@ func (s *BioSequence) GetIntMap(key string) (map[string]int, bool) {
|
||||
return val, ok
|
||||
}
|
||||
|
||||
// Returning the number of times the sequence has been observed.
|
||||
// GetIntSlice returns the integer slice value associated with the given key in the BioSequence object.
|
||||
//
|
||||
// Parameters:
|
||||
// - key: The key used to retrieve the integer slice value.
|
||||
//
|
||||
// Returns:
|
||||
// - []int: The integer slice value associated with the given key.
|
||||
// - bool: A boolean indicating whether the key exists in the BioSequence object.
|
||||
func (s *BioSequence) GetIntSlice(key string) ([]int, bool) {
|
||||
var val []int
|
||||
var err error
|
||||
|
||||
v, ok := s.GetAttribute(key)
|
||||
|
||||
if ok {
|
||||
val, ok = v.([]int)
|
||||
if !ok {
|
||||
val, err = obiutils.InterfaceToIntSlice(v)
|
||||
ok = err == nil
|
||||
if ok {
|
||||
s.SetAttribute(key, val)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return val, ok
|
||||
}
|
||||
|
||||
// Count returns the value of the "count" attribute of the BioSequence.
|
||||
//
|
||||
// The count of a sequence is the number of times it has been observed in the dataset.
|
||||
// It is represented in the sequence header as the "count" attribute.
|
||||
// If the attribute is not found, the function returns 1 as the default count.
|
||||
//
|
||||
// It returns an integer representing the count value.
|
||||
func (s *BioSequence) Count() int {
|
||||
count, ok := s.GetIntAttribute("count")
|
||||
|
||||
@ -133,13 +249,27 @@ func (s *BioSequence) Count() int {
|
||||
return count
|
||||
}
|
||||
|
||||
// Setting the number of times the sequence has been observed.
|
||||
// SetCount sets the count of the BioSequence.
|
||||
//
|
||||
// The count of a sequence is the number of times it has been observed in the dataset.
|
||||
// The value of the "count" attribute is set to the new count, event if the new count is 1.
|
||||
// If the count is less than 1, the count is set to 1.
|
||||
//
|
||||
// count - the new count to set.
|
||||
func (s *BioSequence) SetCount(count int) {
|
||||
annot := s.Annotations()
|
||||
annot["count"] = count
|
||||
if count < 1 {
|
||||
count = 1
|
||||
}
|
||||
s.SetAttribute("count", count)
|
||||
}
|
||||
|
||||
// Returning the taxid of the sequence.
|
||||
// Taxid returns the taxonomic ID associated with the BioSequence.
|
||||
//
|
||||
// It retrieves the "taxid" attribute from the BioSequence's attributes map.
|
||||
// If the attribute is not found, the function returns 1 as the default taxonomic ID.
|
||||
// The taxid 1 corresponds to the root taxonomic level.
|
||||
//
|
||||
// The function returns an integer representing the taxonomic ID.
|
||||
func (s *BioSequence) Taxid() int {
|
||||
taxid, ok := s.GetIntAttribute("taxid")
|
||||
|
||||
@ -150,10 +280,16 @@ func (s *BioSequence) Taxid() int {
|
||||
return taxid
|
||||
}
|
||||
|
||||
// Setting the taxid of the sequence.
|
||||
// SetTaxid sets the taxid for the BioSequence.
|
||||
//
|
||||
// Parameters:
|
||||
//
|
||||
// taxid - the taxid to set.
|
||||
func (s *BioSequence) SetTaxid(taxid int) {
|
||||
annot := s.Annotations()
|
||||
annot["taxid"] = taxid
|
||||
if taxid < 1 {
|
||||
taxid = 1
|
||||
}
|
||||
s.SetAttribute("taxid", taxid)
|
||||
}
|
||||
|
||||
func (s *BioSequence) OBITagRefIndex() map[int]string {
|
||||
@ -201,4 +337,115 @@ func (s *BioSequence) OBITagRefIndex() map[int]string {
|
||||
|
||||
func (s *BioSequence) SetOBITagRefIndex(idx map[int]string) {
|
||||
s.SetAttribute("obitag_ref_index", idx)
|
||||
}
|
||||
}
|
||||
|
||||
func (s *BioSequence) SetOBITagGeomRefIndex(idx map[int]string) {
|
||||
s.SetAttribute("obitag_geomref_index", idx)
|
||||
}
|
||||
|
||||
func (s *BioSequence) OBITagGeomRefIndex() map[int]string {
|
||||
var val map[int]string
|
||||
|
||||
i, ok := s.GetAttribute("obitag_geomref_index")
|
||||
|
||||
if !ok {
|
||||
return nil
|
||||
}
|
||||
|
||||
switch i := i.(type) {
|
||||
case map[int]string:
|
||||
val = i
|
||||
case map[string]interface{}:
|
||||
val = make(map[int]string, len(i))
|
||||
for k, v := range i {
|
||||
score, err := strconv.Atoi(k)
|
||||
if err != nil {
|
||||
log.Panicln(err)
|
||||
}
|
||||
|
||||
val[score], err = obiutils.InterfaceToString(v)
|
||||
if err != nil {
|
||||
log.Panicln(err)
|
||||
}
|
||||
}
|
||||
case map[string]string:
|
||||
val = make(map[int]string, len(i))
|
||||
for k, v := range i {
|
||||
score, err := strconv.Atoi(k)
|
||||
if err != nil {
|
||||
log.Panicln(err)
|
||||
}
|
||||
val[score] = v
|
||||
|
||||
}
|
||||
default:
|
||||
log.Panicln("value of attribute obitag_geomref_index cannot be casted to a map[int]string")
|
||||
}
|
||||
|
||||
return val
|
||||
}
|
||||
|
||||
// GetCoordinate returns the coordinate of the BioSequence.
|
||||
//
|
||||
// Returns the coordinate of the BioSequence in the space of its reference database landmark sequences.
|
||||
// if no coordinate is found, it returns nil.
|
||||
//
|
||||
// This function does not take any parameters.
|
||||
//
|
||||
// It returns a slice of integers ([]int).
|
||||
func (s *BioSequence) GetCoordinate() []int {
|
||||
coord, ok := s.GetIntSlice("landmark_coord")
|
||||
if !ok {
|
||||
return nil
|
||||
}
|
||||
|
||||
return coord
|
||||
}
|
||||
|
||||
// SetCoordinate sets the coordinate of the BioSequence.
|
||||
//
|
||||
// coord: An array of integers representing the coordinate.
|
||||
// This function does not return anything.
|
||||
func (s *BioSequence) SetCoordinate(coord []int) {
|
||||
s.SetAttribute("landmark_coord", coord)
|
||||
}
|
||||
|
||||
// SetLandmarkID sets the landmark ID of the BioSequence.
|
||||
//
|
||||
// Trying to set a negative landmark ID leads to a no operation.
|
||||
//
|
||||
// Parameters:
|
||||
// id: The ID of the landmark.
|
||||
func (s *BioSequence) SetLandmarkID(id int) {
|
||||
if id < 0 {
|
||||
return
|
||||
}
|
||||
s.SetAttribute("landmark_id", id)
|
||||
}
|
||||
|
||||
// GetLandmarkID returns the landmark ID associated with the BioSequence.
|
||||
//
|
||||
// It retrieves the "landmark_id" attribute from the BioSequence's attributes map.
|
||||
// If the attribute is not found, the function returns -1 as the default landmark ID.
|
||||
// The landmark ID is an integer representing the number of the axis in the landmark space.
|
||||
//
|
||||
// It does not take any parameters.
|
||||
// It returns an integer representing the landmark ID.
|
||||
func (s *BioSequence) GetLandmarkID() int {
|
||||
val, ok := s.GetIntAttribute("landmark_id")
|
||||
|
||||
if !ok {
|
||||
return -1
|
||||
}
|
||||
|
||||
return val
|
||||
}
|
||||
|
||||
// IsALandmark checks if the BioSequence is a landmark.
|
||||
//
|
||||
// A sequence is a landmark if its landmark ID is set (attribute "landmark_id").
|
||||
//
|
||||
// It returns a boolean indicating whether the BioSequence is a landmark or not.
|
||||
func (s *BioSequence) IsALandmark() bool {
|
||||
return s.GetLandmarkID() != -1
|
||||
}
|
||||
|
@ -12,6 +12,7 @@ package obiseq
|
||||
|
||||
import (
|
||||
"crypto/md5"
|
||||
"sync"
|
||||
"sync/atomic"
|
||||
|
||||
"git.metabarcoding.org/lecasofts/go/obitools/pkg/obiutils"
|
||||
@ -58,6 +59,7 @@ type BioSequence struct {
|
||||
feature []byte
|
||||
paired *BioSequence // A pointer to the paired sequence
|
||||
annotations Annotation
|
||||
annot_lock sync.Mutex
|
||||
}
|
||||
|
||||
// MakeEmptyBioSequence() creates a new BioSequence object with no data
|
||||
@ -79,6 +81,7 @@ func MakeEmptyBioSequence(preallocate int) BioSequence {
|
||||
feature: nil,
|
||||
paired: nil,
|
||||
annotations: nil,
|
||||
annot_lock: sync.Mutex{},
|
||||
}
|
||||
}
|
||||
|
||||
@ -142,6 +145,8 @@ func (s *BioSequence) Copy() *BioSequence {
|
||||
newSeq.feature = CopySlice(s.feature)
|
||||
|
||||
if len(s.annotations) > 0 {
|
||||
defer s.annot_lock.Unlock()
|
||||
s.annot_lock.Lock()
|
||||
newSeq.annotations = GetAnnotation(s.annotations)
|
||||
}
|
||||
|
||||
@ -206,6 +211,14 @@ func (s *BioSequence) Annotations() Annotation {
|
||||
return s.annotations
|
||||
}
|
||||
|
||||
func (s *BioSequence) AnnotationsLock() {
|
||||
s.annot_lock.Lock()
|
||||
}
|
||||
|
||||
func (s *BioSequence) AnnotationsUnlock() {
|
||||
s.annot_lock.Unlock()
|
||||
}
|
||||
|
||||
// Checking if the BioSequence has a source.
|
||||
func (s *BioSequence) HasSource() bool {
|
||||
return len(s.source) > 0
|
||||
|
@ -74,8 +74,7 @@ func AssignToClass(data, centers *obiutils.Matrix[float64]) []int {
|
||||
// Returns:
|
||||
// - centers: a pointer to a matrix of float64 values representing the centers of the clusters.
|
||||
func ComputeCenters(data *obiutils.Matrix[float64], k int, classes []int) *obiutils.Matrix[float64] {
|
||||
centers := obiutils.Make2DArray[float64](k, len((*data)[0]))
|
||||
centers.Init(0.0)
|
||||
centers := obiutils.Make2DNumericArray[float64](k, len((*data)[0]), true)
|
||||
ns := make([]int, k)
|
||||
|
||||
var wg sync.WaitGroup
|
||||
|
@ -48,11 +48,14 @@ func FilterTaxonomyOptionSet(options *getoptions.GetOpt) {
|
||||
options.Description("Restrict output to some subclades."))
|
||||
}
|
||||
|
||||
|
||||
func CLISelectedNCBITaxDump() string {
|
||||
return __taxdump__
|
||||
}
|
||||
|
||||
func CLIHasSelectedTaxonomy() bool {
|
||||
return __taxdump__ != ""
|
||||
}
|
||||
|
||||
func CLIAreAlternativeNamesSelected() bool {
|
||||
return __alternative_name__
|
||||
}
|
||||
|
@ -10,6 +10,9 @@ import (
|
||||
"git.metabarcoding.org/lecasofts/go/obitools/pkg/obioptions"
|
||||
"git.metabarcoding.org/lecasofts/go/obitools/pkg/obiseq"
|
||||
"git.metabarcoding.org/lecasofts/go/obitools/pkg/obistats"
|
||||
"git.metabarcoding.org/lecasofts/go/obitools/pkg/obitax"
|
||||
"git.metabarcoding.org/lecasofts/go/obitools/pkg/obitools/obifind"
|
||||
"git.metabarcoding.org/lecasofts/go/obitools/pkg/obitools/obirefidx"
|
||||
"git.metabarcoding.org/lecasofts/go/obitools/pkg/obiutils"
|
||||
"github.com/schollz/progressbar/v3"
|
||||
log "github.com/sirupsen/logrus"
|
||||
@ -103,14 +106,14 @@ func CLISelectLandmarkSequences(iterator obiiter.IBioSequence) obiiter.IBioSeque
|
||||
library := iterator.Load()
|
||||
|
||||
library_size := len(library)
|
||||
n_landmark := NCenter()
|
||||
n_landmark := CLINCenter()
|
||||
|
||||
landmark_idx := obistats.SampleIntWithoutReplacement(n_landmark, library_size)
|
||||
log.Infof("Library contains %d sequence", len(library))
|
||||
|
||||
var seqworld obiutils.Matrix[float64]
|
||||
|
||||
for loop := 0; loop < 5; loop++ {
|
||||
for loop := 0; loop < 2; loop++ {
|
||||
sort.IntSlice(landmark_idx).Sort()
|
||||
log.Debugf("Selected indices : %v", landmark_idx)
|
||||
|
||||
@ -154,14 +157,52 @@ func CLISelectLandmarkSequences(iterator obiiter.IBioSequence) obiiter.IBioSeque
|
||||
}
|
||||
|
||||
classes := obistats.AssignToClass(&seqworld, &initialCenters)
|
||||
|
||||
for i, seq := range library {
|
||||
seq.SetAttribute("landmark_coord", seqworld[i])
|
||||
ic, _ := obiutils.InterfaceToIntSlice(seqworld[i])
|
||||
seq.SetCoordinate(ic)
|
||||
seq.SetAttribute("landmark_class", classes[i])
|
||||
if i, ok := seq_landmark[i]; ok {
|
||||
seq.SetAttribute("landmark_id", i)
|
||||
}
|
||||
}
|
||||
|
||||
if obifind.CLIHasSelectedTaxonomy() {
|
||||
taxo, err := obifind.CLILoadSelectedTaxonomy()
|
||||
if err != nil {
|
||||
log.Fatal(err)
|
||||
}
|
||||
|
||||
taxa := make(obitax.TaxonSet, len(library))
|
||||
|
||||
for i, seq := range library {
|
||||
taxa[i], err = taxo.Taxon(seq.Taxid())
|
||||
if err != nil {
|
||||
log.Fatal(err)
|
||||
}
|
||||
}
|
||||
|
||||
pbopt := make([]progressbar.Option, 0, 5)
|
||||
pbopt = append(pbopt,
|
||||
progressbar.OptionSetWriter(os.Stderr),
|
||||
progressbar.OptionSetWidth(15),
|
||||
progressbar.OptionShowCount(),
|
||||
progressbar.OptionShowIts(),
|
||||
progressbar.OptionSetDescription("[Sequence Indexing]"),
|
||||
)
|
||||
|
||||
bar := progressbar.NewOptions(len(library), pbopt...)
|
||||
|
||||
for i, seq := range library {
|
||||
idx := obirefidx.GeomIndexSesquence(i, library, &taxa, taxo)
|
||||
seq.SetOBITagGeomRefIndex(idx)
|
||||
|
||||
if i%10 == 0 {
|
||||
bar.Add(10)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return obiiter.IBatchOver(library, obioptions.CLIBatchSize())
|
||||
|
||||
}
|
||||
|
@ -2,28 +2,37 @@ package obilandmark
|
||||
|
||||
import (
|
||||
"git.metabarcoding.org/lecasofts/go/obitools/pkg/obitools/obiconvert"
|
||||
"git.metabarcoding.org/lecasofts/go/obitools/pkg/obitools/obifind"
|
||||
"github.com/DavidGamba/go-getoptions"
|
||||
)
|
||||
|
||||
var _nCenter = 200
|
||||
|
||||
// ObilandmarkOptionSet sets the options for Obilandmark.
|
||||
// LandmarkOptionSet sets the options for Obilandmark.
|
||||
//
|
||||
// options: a pointer to the getoptions.GetOpt struct.
|
||||
// Return type: none.
|
||||
func ObilandmarkOptionSet(options *getoptions.GetOpt) {
|
||||
func LandmarkOptionSet(options *getoptions.GetOpt) {
|
||||
|
||||
options.IntVar(&_nCenter, "center", _nCenter,
|
||||
options.Alias("n"),
|
||||
options.Description("Maximum numbers of differences between two variant sequences (default: %d)."))
|
||||
options.Description("Number of landmark sequences to be selected."))
|
||||
}
|
||||
|
||||
// OptionSet is a function that sets the options for the GetOpt struct.
|
||||
//
|
||||
// It takes a pointer to a GetOpt struct as its parameter and does not return anything.
|
||||
func OptionSet(options *getoptions.GetOpt) {
|
||||
obiconvert.InputOptionSet(options)
|
||||
obiconvert.OutputOptionSet(options)
|
||||
ObilandmarkOptionSet(options)
|
||||
obifind.LoadTaxonomyOptionSet(options, false, false)
|
||||
LandmarkOptionSet(options)
|
||||
}
|
||||
|
||||
func NCenter() int {
|
||||
// CLINCenter returns desired number of centers as specified by user.
|
||||
//
|
||||
// No parameters.
|
||||
// Returns an integer value.
|
||||
func CLINCenter() int {
|
||||
return _nCenter
|
||||
}
|
||||
|
79
pkg/obitools/obirefidx/geomindexing.go
Normal file
79
pkg/obitools/obirefidx/geomindexing.go
Normal file
@ -0,0 +1,79 @@
|
||||
package obirefidx
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"log"
|
||||
"sort"
|
||||
"sync"
|
||||
|
||||
"git.metabarcoding.org/lecasofts/go/obitools/pkg/obiseq"
|
||||
"git.metabarcoding.org/lecasofts/go/obitools/pkg/obitax"
|
||||
"git.metabarcoding.org/lecasofts/go/obitools/pkg/obiutils"
|
||||
)
|
||||
|
||||
func GeomIndexSesquence(seqidx int,
|
||||
references obiseq.BioSequenceSlice,
|
||||
taxa *obitax.TaxonSet,
|
||||
taxo *obitax.Taxonomy) map[int]string {
|
||||
|
||||
sequence := references[seqidx]
|
||||
location := sequence.GetCoordinate()
|
||||
|
||||
if location == nil {
|
||||
log.Fatalf("Sequence %s does not have a coordinate", sequence.Id())
|
||||
}
|
||||
|
||||
seq_dist := make([]float64, len(references))
|
||||
|
||||
var wg sync.WaitGroup
|
||||
|
||||
for i, ref := range references {
|
||||
wg.Add(1)
|
||||
go func(i int, ref *obiseq.BioSequence) {
|
||||
defer wg.Done()
|
||||
reflocation := ref.GetCoordinate()
|
||||
if reflocation == nil {
|
||||
log.Fatalf("Sequence %s does not have a coordinate", ref.Id())
|
||||
}
|
||||
d := 0.0
|
||||
for i, x := range location {
|
||||
diff := float64(x - reflocation[i])
|
||||
d += diff * diff
|
||||
}
|
||||
seq_dist[i] = d
|
||||
}(i, ref)
|
||||
}
|
||||
|
||||
wg.Wait()
|
||||
|
||||
order := obiutils.Order(sort.Float64Slice(seq_dist))
|
||||
|
||||
lca := (*taxa)[seqidx]
|
||||
|
||||
index := make(map[int]string)
|
||||
index[0.0] = fmt.Sprintf(
|
||||
"%d@%s@%s",
|
||||
lca.Taxid(),
|
||||
lca.ScientificName(),
|
||||
lca.Rank())
|
||||
|
||||
old_dist := 0.0
|
||||
for _, o := range order {
|
||||
new_lca, _ := lca.LCA((*taxa)[o])
|
||||
if new_lca.Taxid() != lca.Taxid() || seq_dist[o] != old_dist {
|
||||
lca = new_lca
|
||||
old_dist = seq_dist[o]
|
||||
index[int(seq_dist[o])] = fmt.Sprintf(
|
||||
"%d@%s@%s",
|
||||
lca.Taxid(),
|
||||
lca.ScientificName(),
|
||||
lca.Rank())
|
||||
}
|
||||
|
||||
if lca.Taxid() == 1 {
|
||||
break
|
||||
}
|
||||
}
|
||||
|
||||
return index
|
||||
}
|
@ -24,9 +24,6 @@ func IndexSequence(seqidx int,
|
||||
taxo *obitax.Taxonomy) map[int]string {
|
||||
|
||||
sequence := references[seqidx]
|
||||
// matrix := obialign.NewFullLCSMatrix(nil,
|
||||
// sequence.Length(),
|
||||
// sequence.Length())
|
||||
|
||||
var matrix []uint64
|
||||
|
||||
@ -54,7 +51,9 @@ func IndexSequence(seqidx int,
|
||||
nok := make([]int, len(*pseq))
|
||||
nfast := make([]int, len(*pseq))
|
||||
nfastok := make([]int, len(*pseq))
|
||||
*/lseq := sequence.Len()
|
||||
*/
|
||||
|
||||
lseq := sequence.Len()
|
||||
|
||||
mini := -1
|
||||
wordmin := 0
|
||||
|
209
pkg/obitools/obitag/obigeomtag.go
Normal file
209
pkg/obitools/obitag/obigeomtag.go
Normal file
@ -0,0 +1,209 @@
|
||||
package obitag
|
||||
|
||||
import (
|
||||
"log"
|
||||
"math"
|
||||
|
||||
"git.metabarcoding.org/lecasofts/go/obitools/pkg/obialign"
|
||||
"git.metabarcoding.org/lecasofts/go/obitools/pkg/obiiter"
|
||||
"git.metabarcoding.org/lecasofts/go/obitools/pkg/obioptions"
|
||||
"git.metabarcoding.org/lecasofts/go/obitools/pkg/obiseq"
|
||||
"git.metabarcoding.org/lecasofts/go/obitools/pkg/obitax"
|
||||
)
|
||||
|
||||
// ExtractLandmarkSeqs extracts landmark sequences from the given BioSequenceSlice.
|
||||
//
|
||||
// The landmark sequences are extracted from the given BioSequenceSlice and put in a new BioSequenceSlice
|
||||
// in the order corresponding to their landmark IDs.
|
||||
//
|
||||
// references: A pointer to a BioSequenceSlice containing the references.
|
||||
// Returns: A pointer to a BioSequenceSlice containing the extracted landmark sequences.
|
||||
func ExtractLandmarkSeqs(references *obiseq.BioSequenceSlice) *obiseq.BioSequenceSlice {
|
||||
landmarks := make(map[int]*obiseq.BioSequence, 100)
|
||||
|
||||
for _, ref := range *references {
|
||||
if id := ref.GetLandmarkID(); id != -1 {
|
||||
landmarks[id] = ref
|
||||
}
|
||||
}
|
||||
|
||||
ls := obiseq.NewBioSequenceSlice(len(landmarks))
|
||||
*ls = (*ls)[0:len(landmarks)]
|
||||
|
||||
for k, l := range landmarks {
|
||||
(*ls)[k] = l
|
||||
}
|
||||
|
||||
return ls
|
||||
}
|
||||
|
||||
// ExtractTaxonSet extracts a set of taxa from the given references and taxonomy.
|
||||
//
|
||||
// If a reference sequence has a taxid absent from the taxonomy, the function will panic.
|
||||
//
|
||||
// The function takes two parameters:
|
||||
// - references: a pointer to a BioSequenceSlice, which is a slice of BioSequence objects.
|
||||
// - taxonomy: a pointer to a Taxonomy object.
|
||||
//
|
||||
// The function returns a pointer to a TaxonSet, which is a set of taxa.
|
||||
func ExtractTaxonSet(references *obiseq.BioSequenceSlice, taxonomy *obitax.Taxonomy) *obitax.TaxonSet {
|
||||
var err error
|
||||
taxa := make(obitax.TaxonSet, len(*references))
|
||||
|
||||
for i, ref := range *references {
|
||||
taxid := ref.Taxid()
|
||||
taxa[i], err = taxonomy.Taxon(taxid)
|
||||
if err != nil {
|
||||
log.Panicf("Taxid %d, for sequence %s not found in taxonomy", taxid, ref.Id())
|
||||
}
|
||||
}
|
||||
|
||||
return &taxa
|
||||
}
|
||||
|
||||
// MapOnLandmarkSequences calculates the coordinates of landmarks on a given sequence.
|
||||
//
|
||||
// It takes in three parameters:
|
||||
// - sequence: a pointer to a BioSequence object representing the sequence.
|
||||
// - landmarks: a pointer to a BioSequenceSlice object representing the landmarks.
|
||||
// - buffer: a pointer to a slice of uint64, used as a buffer for calculations.
|
||||
//
|
||||
// It returns a slice of integers representing the coordinates of the landmarks on the sequence.
|
||||
func MapOnLandmarkSequences(sequence *obiseq.BioSequence, landmarks *obiseq.BioSequenceSlice, buffer *[]uint64) []int {
|
||||
|
||||
coords := make([]int, len(*landmarks))
|
||||
|
||||
for i, l := range *landmarks {
|
||||
lcs, length := obialign.FastLCSEGFScore(sequence, l, -1, buffer)
|
||||
coords[i] = length - lcs
|
||||
}
|
||||
|
||||
return coords
|
||||
}
|
||||
|
||||
// FindGeomClosest finds the closest geometric sequence in a given set of reference sequences to a query sequence.
|
||||
//
|
||||
// Parameters:
|
||||
// - sequence: A pointer to a BioSequence object representing the query sequence.
|
||||
// - landmarks: A pointer to a BioSequenceSlice object representing the landmarks.
|
||||
// - references: A pointer to a BioSequenceSlice object representing the reference sequences.
|
||||
// - buffer: A pointer to a slice of uint64 representing a buffer.
|
||||
//
|
||||
// Returns:
|
||||
// - A pointer to a BioSequence object representing the closest sequence.
|
||||
// - An int representing the minimum distance.
|
||||
// - A float64 representing the best identity score.
|
||||
// - An array of int representing the indices of the closest sequences.
|
||||
// - A pointer to a BioSequenceSlice object representing the matched sequences.
|
||||
func FindGeomClosest(sequence *obiseq.BioSequence,
|
||||
landmarks *obiseq.BioSequenceSlice,
|
||||
references *obiseq.BioSequenceSlice,
|
||||
buffer *[]uint64) (*obiseq.BioSequence, int, float64, []int, *obiseq.BioSequenceSlice) {
|
||||
|
||||
min_dist := math.MaxInt64
|
||||
min_idx := make([]int, 0)
|
||||
|
||||
query_location := MapOnLandmarkSequences(sequence, landmarks, buffer)
|
||||
|
||||
for i, l := range *references {
|
||||
coord := l.GetCoordinate()
|
||||
if len(coord) == 0 {
|
||||
log.Panicf("Empty coordinate for reference sequence %s", l.Id())
|
||||
}
|
||||
dist := 0
|
||||
for j := 0; j < len(coord); j++ {
|
||||
diff := query_location[j] - coord[j]
|
||||
dist += diff * diff
|
||||
}
|
||||
|
||||
if dist == min_dist {
|
||||
min_idx = append(min_idx, i)
|
||||
}
|
||||
if dist < min_dist {
|
||||
min_dist = dist
|
||||
min_idx = make([]int, 0)
|
||||
min_idx = append(min_idx, i)
|
||||
}
|
||||
}
|
||||
|
||||
best_seq := (*references)[min_idx[0]]
|
||||
best_id := 0.0
|
||||
|
||||
for _, i := range min_idx {
|
||||
seq := (*references)[i]
|
||||
lcs, length := obialign.FastLCSEGFScore(sequence, seq, -1, buffer)
|
||||
ident := float64(lcs) / float64(length)
|
||||
if ident > best_id {
|
||||
best_id = ident
|
||||
best_seq = seq
|
||||
}
|
||||
}
|
||||
|
||||
matches := obiseq.MakeBioSequenceSlice(len(min_idx))
|
||||
matches = matches[0:len(min_idx)]
|
||||
for i, j := range min_idx {
|
||||
matches[i] = (*references)[j]
|
||||
}
|
||||
|
||||
return best_seq, min_dist, best_id, query_location, &matches
|
||||
}
|
||||
|
||||
func GeomIdentify(sequence *obiseq.BioSequence,
|
||||
landmarks *obiseq.BioSequenceSlice,
|
||||
references *obiseq.BioSequenceSlice,
|
||||
taxa *obitax.TaxonSet,
|
||||
taxo *obitax.Taxonomy,
|
||||
buffer *[]uint64) *obiseq.BioSequence {
|
||||
best_seq, min_dist, best_id, query_location, matches := FindGeomClosest(sequence, landmarks, references, buffer)
|
||||
|
||||
taxon := (*obitax.TaxNode)(nil)
|
||||
var err error
|
||||
|
||||
if best_id > 0.5 {
|
||||
taxid, _, _ := MatchDistanceIndex(min_dist, (*matches)[0].OBITagGeomRefIndex())
|
||||
taxon, _ = taxo.Taxon(taxid)
|
||||
for i := 1; i < len(*matches); i++ {
|
||||
taxid, _, _ := MatchDistanceIndex(min_dist, (*matches)[i].OBITagGeomRefIndex())
|
||||
newTaxon, _ := taxo.Taxon(taxid)
|
||||
taxon, err = newTaxon.LCA(taxon)
|
||||
if err != nil {
|
||||
log.Panicf("LCA error: %v", err)
|
||||
}
|
||||
}
|
||||
sequence.SetTaxid(taxon.Taxid())
|
||||
} else {
|
||||
taxon, _ = taxo.Taxon(1)
|
||||
sequence.SetTaxid(1)
|
||||
}
|
||||
|
||||
sequence.SetAttribute("scientific_name", taxon.ScientificName())
|
||||
sequence.SetAttribute("obitag_rank", taxon.Rank())
|
||||
sequence.SetAttribute("obitag_bestid", best_id)
|
||||
sequence.SetAttribute("obitag_bestmatch", best_seq.Id())
|
||||
sequence.SetAttribute("obitag_min_dist", min_dist)
|
||||
sequence.SetAttribute("obitag_coord", query_location)
|
||||
sequence.SetAttribute("obitag_match_count", len(*matches))
|
||||
sequence.SetAttribute("obitag_similarity_method", "geometric")
|
||||
|
||||
return sequence
|
||||
}
|
||||
|
||||
func GeomIdentifySeqWorker(references *obiseq.BioSequenceSlice,
|
||||
taxo *obitax.Taxonomy) obiseq.SeqWorker {
|
||||
|
||||
landmarks := ExtractLandmarkSeqs(references)
|
||||
taxa := ExtractTaxonSet(references, taxo)
|
||||
return func(sequence *obiseq.BioSequence) *obiseq.BioSequence {
|
||||
buffer := make([]uint64, 100)
|
||||
return GeomIdentify(sequence, landmarks, references, taxa, taxo, &buffer)
|
||||
}
|
||||
}
|
||||
|
||||
func CLIGeomAssignTaxonomy(iterator obiiter.IBioSequence,
|
||||
references obiseq.BioSequenceSlice,
|
||||
taxo *obitax.Taxonomy,
|
||||
) obiiter.IBioSequence {
|
||||
|
||||
worker := GeomIdentifySeqWorker(&references, taxo)
|
||||
return iterator.MakeIWorker(worker, obioptions.CLIParallelWorkers(), 0)
|
||||
}
|
@ -1,6 +1,7 @@
|
||||
package obitag
|
||||
|
||||
import (
|
||||
"sort"
|
||||
"strconv"
|
||||
"strings"
|
||||
|
||||
@ -16,6 +17,61 @@ import (
|
||||
"git.metabarcoding.org/lecasofts/go/obitools/pkg/obiutils"
|
||||
)
|
||||
|
||||
// MatchDistanceIndex returns the taxid, rank, and scientificName based on the given distance and distanceIdx.
|
||||
//
|
||||
// Parameters:
|
||||
// - distance: The distance to match against the keys in distanceIdx.
|
||||
// - distanceIdx: A map containing distances as keys and corresponding values in the format "taxid@rank@scientificName".
|
||||
//
|
||||
// Returns:
|
||||
// - taxid: The taxid associated with the matched distance.
|
||||
// - rank: The rank associated with the matched distance.
|
||||
// - scientificName: The scientific name associated with the matched distance.
|
||||
func MatchDistanceIndex(distance int, distanceIdx map[int]string) (int, string, string) {
|
||||
keys := make([]int, 0, len(distanceIdx))
|
||||
for k := range distanceIdx {
|
||||
keys = append(keys, k)
|
||||
}
|
||||
sort.Ints(keys)
|
||||
|
||||
i := sort.Search(len(keys), func(i int) bool {
|
||||
return distance <= keys[i]
|
||||
})
|
||||
|
||||
var taxid int
|
||||
var rank string
|
||||
var scientificName string
|
||||
|
||||
if i == len(keys) || distance > keys[len(keys)-1] {
|
||||
taxid = 1
|
||||
rank = "no rank"
|
||||
scientificName = "root"
|
||||
} else {
|
||||
parts := strings.Split(distanceIdx[keys[i]], "@")
|
||||
taxid, _ = strconv.Atoi(parts[0])
|
||||
rank = parts[1]
|
||||
scientificName = parts[2]
|
||||
}
|
||||
|
||||
// log.Info("taxid:", taxid, " rank:", rank, " scientificName:", scientificName)
|
||||
|
||||
return taxid, rank, scientificName
|
||||
}
|
||||
|
||||
// FindClosests finds the closest bio sequence from a given sequence and a slice of reference sequences.
|
||||
//
|
||||
// Parameters:
|
||||
// - sequence: the bio sequence to find the closest matches for.
|
||||
// - references: a slice of reference sequences to compare against.
|
||||
// - refcounts: a slice of reference sequence counts.
|
||||
// - runExact: a boolean flag indicating whether to run an exact match.
|
||||
//
|
||||
// Returns:
|
||||
// - bests: a slice of the closest bio sequences.
|
||||
// - maxe: the maximum score.
|
||||
// - bestId: the best ID.
|
||||
// - bestmatch: the best match.
|
||||
// - bestidxs: a slice of the best indexes.
|
||||
func FindClosests(sequence *obiseq.BioSequence,
|
||||
references obiseq.BioSequenceSlice,
|
||||
refcounts []*obikmer.Table4mer,
|
||||
@ -94,6 +150,18 @@ func FindClosests(sequence *obiseq.BioSequence,
|
||||
return bests, maxe, bestId, bestmatch, bestidxs
|
||||
}
|
||||
|
||||
// Identify makes the taxonomic identification of a BioSequence.
|
||||
//
|
||||
// Parameters:
|
||||
// - sequence: A pointer to a BioSequence to identify.
|
||||
// - references: A BioSequenceSlice.
|
||||
// - refcounts: A slice of pointers to Table4mer.
|
||||
// - taxa: A TaxonSet.
|
||||
// - taxo: A pointer to a Taxonomy.
|
||||
// - runExact: A boolean value indicating whether to run exact matching.
|
||||
//
|
||||
// Returns:
|
||||
// - A pointer to a BioSequence.
|
||||
func Identify(sequence *obiseq.BioSequence,
|
||||
references obiseq.BioSequenceSlice,
|
||||
refcounts []*obikmer.Table4mer,
|
||||
@ -171,24 +239,19 @@ func Identify(sequence *obiseq.BioSequence,
|
||||
log.Debugln(sequence.Id(), "Best matches:", len(bests), "New index:", newidx)
|
||||
|
||||
sequence.SetTaxid(taxon.Taxid())
|
||||
sequence.SetAttribute("scientific_name", taxon.ScientificName())
|
||||
sequence.SetAttribute("obitag_rank", taxon.Rank())
|
||||
sequence.SetAttribute("obitag_bestid", identity)
|
||||
sequence.SetAttribute("obitag_difference", differences)
|
||||
sequence.SetAttribute("obitag_bestmatch", bestmatch)
|
||||
sequence.SetAttribute("obitag_match_count", len(bests))
|
||||
|
||||
} else {
|
||||
taxon, _ = taxo.Taxon(1)
|
||||
sequence.SetTaxid(1)
|
||||
sequence.SetAttribute("scientific_name", taxon.ScientificName())
|
||||
sequence.SetAttribute("obitag_rank", taxon.Rank())
|
||||
sequence.SetAttribute("obitag_bestid", identity)
|
||||
sequence.SetAttribute("obitag_difference", differences)
|
||||
sequence.SetAttribute("obitag_bestmatch", bestmatch)
|
||||
sequence.SetAttribute("obitag_match_count", len(bests))
|
||||
}
|
||||
|
||||
sequence.SetAttribute("scientific_name", taxon.ScientificName())
|
||||
sequence.SetAttribute("obitag_rank", taxon.Rank())
|
||||
sequence.SetAttribute("obitag_bestid", identity)
|
||||
sequence.SetAttribute("obitag_bestmatch", bestmatch)
|
||||
sequence.SetAttribute("obitag_match_count", len(bests))
|
||||
sequence.SetAttribute("obitag_similarity_method", "lcs")
|
||||
|
||||
return sequence
|
||||
}
|
||||
|
||||
|
@ -15,6 +15,7 @@ import (
|
||||
var _RefDB = ""
|
||||
var _SaveRefDB = ""
|
||||
var _RunExact = false
|
||||
var _GeomSim = false
|
||||
|
||||
func TagOptionSet(options *getoptions.GetOpt) {
|
||||
options.StringVar(&_RefDB, "reference-db", _RefDB,
|
||||
@ -27,6 +28,10 @@ func TagOptionSet(options *getoptions.GetOpt) {
|
||||
options.ArgName("FILENAME"),
|
||||
options.Description("The name of a file where to save the reference DB with its indices"))
|
||||
|
||||
options.BoolVar(&_GeomSim, "geometric", _GeomSim,
|
||||
options.Alias("G"),
|
||||
options.Description("Activate the experimental geometric similarity heuristic"))
|
||||
|
||||
// options.BoolVar(&_RunExact, "exact", _RunExact,
|
||||
// options.Alias("E"),
|
||||
// options.Description("Unactivate the heuristic limatitating the sequence comparisons"))
|
||||
@ -55,6 +60,10 @@ func CLIRefDB() obiseq.BioSequenceSlice {
|
||||
return refdb.Load()
|
||||
}
|
||||
|
||||
func CLIGeometricMode() bool {
|
||||
return _GeomSim
|
||||
}
|
||||
|
||||
func CLIShouldISaveRefDB() bool {
|
||||
return _SaveRefDB != ""
|
||||
}
|
||||
|
@ -1,7 +1,18 @@
|
||||
package obiutils
|
||||
|
||||
// Matrix is a generic type representing a matrix.
|
||||
type Matrix[T any] [][]T
|
||||
type Integer interface {
|
||||
~int | ~int8 | ~int16 | ~int32 | ~int64
|
||||
}
|
||||
|
||||
type Float interface {
|
||||
~float32 | ~float64
|
||||
}
|
||||
type Numeric interface {
|
||||
Integer | Float
|
||||
}
|
||||
|
||||
type Vector[T any] []T
|
||||
type Matrix[T any] []Vector[T]
|
||||
|
||||
// Make2DArray generates a 2D array of type T with the specified number of rows and columns.
|
||||
//
|
||||
@ -22,15 +33,20 @@ func Make2DArray[T any](rows, cols int) Matrix[T] {
|
||||
return matrix
|
||||
}
|
||||
|
||||
// Init initializes the Matrix with the given value.
|
||||
//
|
||||
// value: the value to initialize the Matrix elements with.
|
||||
func (matrix *Matrix[T]) Init(value T) {
|
||||
data := (*matrix)[0]
|
||||
data = data[0:cap(data)]
|
||||
for i := range data {
|
||||
data[i] = value
|
||||
func Make2DNumericArray[T Numeric](rows, cols int, zeroed bool) Matrix[T] {
|
||||
matrix := make(Matrix[T], rows)
|
||||
data := make([]T, cols*rows)
|
||||
|
||||
if zeroed {
|
||||
for i := range data {
|
||||
data[i] = 0
|
||||
}
|
||||
}
|
||||
|
||||
for i := 0; i < rows; i++ {
|
||||
matrix[i] = data[i*cols : (i+1)*cols]
|
||||
}
|
||||
return matrix
|
||||
}
|
||||
|
||||
// Row returns the i-th row of the matrix.
|
||||
@ -50,6 +66,19 @@ func (matrix *Matrix[T]) Column(i int) []T {
|
||||
return r
|
||||
}
|
||||
|
||||
// Rows returns the specified rows of the matrix.
|
||||
//
|
||||
// The function takes one or more integer arguments representing the indices of the rows to be returned.
|
||||
// It returns a new matrix containing the specified rows.
|
||||
func (matrix *Matrix[T]) Rows(i ...int) Matrix[T] {
|
||||
res := make([]Vector[T], len(i))
|
||||
|
||||
for j, idx := range i {
|
||||
res[j] = (*matrix)[idx]
|
||||
}
|
||||
return res
|
||||
}
|
||||
|
||||
// Dim returns the dimensions of the Matrix.
|
||||
//
|
||||
// It takes no parameters.
|
||||
|
@ -1,11 +1,17 @@
|
||||
package obiutils
|
||||
|
||||
// InPlaceToLower converts all uppercase letters in the input byte slice to lowercase in place.
|
||||
//
|
||||
// It takes a single parameter:
|
||||
// - data: a byte slice representing the input data
|
||||
//
|
||||
// It returns the modified byte slice.
|
||||
func InPlaceToLower(data []byte) []byte {
|
||||
for i,l := range data {
|
||||
if l >= 'A' && l <='Z' {
|
||||
data[i]|=32
|
||||
for i, l := range data {
|
||||
if l >= 'A' && l <= 'Z' {
|
||||
data[i] |= 32
|
||||
}
|
||||
}
|
||||
|
||||
return data
|
||||
}
|
||||
}
|
||||
|
54
pkg/obiutils/cast_interface.go
Normal file
54
pkg/obiutils/cast_interface.go
Normal file
@ -0,0 +1,54 @@
|
||||
package obiutils
|
||||
|
||||
// CastableToInt checks if the given input can be casted to an integer.
|
||||
//
|
||||
// i: the value to check for castability.
|
||||
// bool: true if the value can be casted to an integer, false otherwise.
|
||||
func CastableToInt(i interface{}) bool {
|
||||
switch i.(type) {
|
||||
case int,
|
||||
int8, int16, int32, int64,
|
||||
float32, float64,
|
||||
uint8, uint16, uint32, uint64:
|
||||
return true
|
||||
default:
|
||||
return false
|
||||
}
|
||||
}
|
||||
|
||||
// InterfaceToBool converts an interface value to a boolean.
|
||||
//
|
||||
// It takes an interface{} as a parameter and returns a boolean value and an error.
|
||||
func InterfaceToBool(i interface{}) (val bool, err error) {
|
||||
|
||||
err = nil
|
||||
val = false
|
||||
|
||||
switch t := i.(type) {
|
||||
case int:
|
||||
val = t != 0
|
||||
case int8:
|
||||
val = t != 0 // standardizes across systems
|
||||
case int16:
|
||||
val = t != 0 // standardizes across systems
|
||||
case int32:
|
||||
val = t != 0 // standardizes across systems
|
||||
case int64:
|
||||
val = t != 0 // standardizes across systems
|
||||
case float32:
|
||||
val = t != 0 // standardizes across systems
|
||||
case float64:
|
||||
val = t != 0 // standardizes across systems
|
||||
case uint8:
|
||||
val = t != 0 // standardizes across systems
|
||||
case uint16:
|
||||
val = t != 0 // standardizes across systems
|
||||
case uint32:
|
||||
val = t != 0 // standardizes across systems
|
||||
case uint64:
|
||||
val = t != 0 // standardizes across systems
|
||||
default:
|
||||
err = &NotABoolean{"value attribute cannot be casted to a boolean"}
|
||||
}
|
||||
return
|
||||
}
|
@ -196,6 +196,69 @@ func InterfaceToFloat64Map(i interface{}) (val map[string]float64, err error) {
|
||||
return
|
||||
}
|
||||
|
||||
// InterfaceToFloat64Slice converts an interface{} to a []float64 slice.
|
||||
//
|
||||
// It takes an interface{} parameter and returns a slice of float64 values and an error.
|
||||
func InterfaceToFloat64Slice(i interface{}) ([]float64, error) {
|
||||
switch i := i.(type) {
|
||||
case []float64:
|
||||
return i, nil
|
||||
case []interface{}:
|
||||
val := make([]float64, len(i))
|
||||
for k, v := range i {
|
||||
if x, err := InterfaceToFloat64(v); err != nil {
|
||||
return nil, err
|
||||
} else {
|
||||
val[k] = x
|
||||
}
|
||||
}
|
||||
return val, nil
|
||||
case []int:
|
||||
val := make([]float64, len(i))
|
||||
for k, v := range i {
|
||||
val[k] = float64(v)
|
||||
}
|
||||
return val, nil
|
||||
default:
|
||||
return nil, &NotAMapFloat64{"value attribute cannot be casted to a []float64"}
|
||||
}
|
||||
}
|
||||
|
||||
// InterfaceToIntSlice converts an interface{} to a []int slice.
|
||||
//
|
||||
// It takes an interface{} parameter and returns a slice of int values and an error.
|
||||
func InterfaceToIntSlice(i interface{}) ([]int, error) {
|
||||
|
||||
switch i := i.(type) {
|
||||
case []int:
|
||||
return i, nil
|
||||
case []interface{}:
|
||||
val := make([]int, len(i))
|
||||
for k, v := range i {
|
||||
if x, err := InterfaceToInt(v); err != nil {
|
||||
return nil, err
|
||||
} else {
|
||||
val[k] = x
|
||||
}
|
||||
}
|
||||
return val, nil
|
||||
case []float64:
|
||||
val := make([]int, len(i))
|
||||
for k, v := range i {
|
||||
val[k] = int(v + 0.5)
|
||||
}
|
||||
return val, nil
|
||||
case Vector[float64]:
|
||||
val := make([]int, len(i))
|
||||
for k, v := range i {
|
||||
val[k] = int(v + 0.5)
|
||||
}
|
||||
return val, nil
|
||||
default:
|
||||
return nil, &NotAMapInt{"value attribute cannot be casted to a []int"}
|
||||
}
|
||||
}
|
||||
|
||||
// NotABoolean defines a new type of Error : "NotABoolean"
|
||||
type NotABoolean struct {
|
||||
message string
|
||||
@ -208,56 +271,6 @@ func (m *NotABoolean) Error() string {
|
||||
return m.message
|
||||
}
|
||||
|
||||
// It converts an interface{} to a bool, and returns an error if the interface{} cannot be converted
|
||||
// to a bool
|
||||
func InterfaceToBool(i interface{}) (val bool, err error) {
|
||||
|
||||
err = nil
|
||||
val = false
|
||||
|
||||
switch t := i.(type) {
|
||||
case int:
|
||||
val = t != 0
|
||||
case int8:
|
||||
val = t != 0 // standardizes across systems
|
||||
case int16:
|
||||
val = t != 0 // standardizes across systems
|
||||
case int32:
|
||||
val = t != 0 // standardizes across systems
|
||||
case int64:
|
||||
val = t != 0 // standardizes across systems
|
||||
case float32:
|
||||
val = t != 0 // standardizes across systems
|
||||
case float64:
|
||||
val = t != 0 // standardizes across systems
|
||||
case uint8:
|
||||
val = t != 0 // standardizes across systems
|
||||
case uint16:
|
||||
val = t != 0 // standardizes across systems
|
||||
case uint32:
|
||||
val = t != 0 // standardizes across systems
|
||||
case uint64:
|
||||
val = t != 0 // standardizes across systems
|
||||
default:
|
||||
err = &NotABoolean{"value attribute cannot be casted to a boolean"}
|
||||
}
|
||||
return
|
||||
}
|
||||
|
||||
// If the interface{} can be cast to an int, return true.
|
||||
func CastableToInt(i interface{}) bool {
|
||||
|
||||
switch i.(type) {
|
||||
case int,
|
||||
int8, int16, int32, int64,
|
||||
float32, float64,
|
||||
uint8, uint16, uint32, uint64:
|
||||
return true
|
||||
default:
|
||||
return false
|
||||
}
|
||||
}
|
||||
|
||||
// > It copies the contents of the `src` map into the `dest` map, but if the value is a map, slice, or
|
||||
// array, it makes a deep copy of it
|
||||
func MustFillMap(dest, src map[string]interface{}) {
|
||||
@ -270,8 +283,15 @@ func MustFillMap(dest, src map[string]interface{}) {
|
||||
}
|
||||
}
|
||||
|
||||
// ReadLines reads the lines from a file specified by the given path.
|
||||
//
|
||||
// Read a whole file into the memory and store it as array of lines
|
||||
// It reads a file line by line, and returns a slice of strings, one for each line
|
||||
//
|
||||
// It takes a single parameter:
|
||||
// - path: a string representing the path of the file to read.
|
||||
//
|
||||
// It returns a slice of strings containing the lines read from the file, and an error if any occurred.
|
||||
func ReadLines(path string) (lines []string, err error) {
|
||||
var (
|
||||
file *os.File
|
||||
@ -301,6 +321,14 @@ func ReadLines(path string) (lines []string, err error) {
|
||||
return
|
||||
}
|
||||
|
||||
// AtomicCounter creates and returns a function that generates a unique integer value each time it is called.
|
||||
//
|
||||
// The function takes an optional initial value as a parameter. If an initial value is provided, the generated
|
||||
// integers will start from that value. If no initial value is provided, the generated integers will start from 0.
|
||||
//
|
||||
// The function is thread safe.
|
||||
//
|
||||
// The function returns a closure that can be called to retrieve the next integer in the sequence.
|
||||
func AtomicCounter(initial ...int) func() int {
|
||||
counterMutex := sync.Mutex{}
|
||||
counter := 0
|
||||
@ -320,12 +348,16 @@ func AtomicCounter(initial ...int) func() int {
|
||||
return nextCounter
|
||||
}
|
||||
|
||||
// Marshal is a UTF-8 friendly marshaler. Go's json.Marshal is not UTF-8
|
||||
// JsonMarshal marshals an interface into JSON format.
|
||||
//
|
||||
// JsonMarshal is a UTF-8 friendly marshaler. Go's json.Marshal is not UTF-8
|
||||
// friendly because it replaces the valid UTF-8 and JSON characters "&". "<",
|
||||
// ">" with the "slash u" unicode escaped forms (e.g. \u0026). It preemptively
|
||||
// escapes for HTML friendliness. Where text may include any of these
|
||||
// characters, json.Marshal should not be used. Playground of Go breaking a
|
||||
// title: https://play.golang.org/p/o2hiX0c62oN
|
||||
//
|
||||
// It takes an interface as a parameter and returns a byte slice and an error.
|
||||
func JsonMarshal(i interface{}) ([]byte, error) {
|
||||
buffer := &bytes.Buffer{}
|
||||
encoder := json.NewEncoder(buffer)
|
||||
@ -334,22 +366,45 @@ func JsonMarshal(i interface{}) ([]byte, error) {
|
||||
return bytes.TrimRight(buffer.Bytes(), "\n"), err
|
||||
}
|
||||
|
||||
// IsAMap checks if the given value is a map.
|
||||
//
|
||||
// value: the value to be checked.
|
||||
// returns: a boolean indicating if the value is a map.
|
||||
func IsAMap(value interface{}) bool {
|
||||
return reflect.TypeOf(value).Kind() == reflect.Map
|
||||
}
|
||||
|
||||
// IsAnArray checks if the given value is an array.
|
||||
//
|
||||
// value: The value to be checked.
|
||||
// Returns: true if the value is an array, false otherwise.
|
||||
func IsAnArray(value interface{}) bool {
|
||||
return reflect.TypeOf(value).Kind() == reflect.Array
|
||||
}
|
||||
|
||||
// IsASlice determines if the given value is a slice.
|
||||
//
|
||||
// value: the value to check.
|
||||
// bool: true if the value is a slice, false otherwise.
|
||||
func IsASlice(value interface{}) bool {
|
||||
return reflect.TypeOf(value).Kind() == reflect.Slice
|
||||
}
|
||||
|
||||
// HasLength checks if the given value has a length.
|
||||
//
|
||||
// value: The value to be checked.
|
||||
// bool: Returns true if the value has a length, false otherwise.
|
||||
func HasLength(value interface{}) bool {
|
||||
_, ok := value.(interface{ Len() int })
|
||||
return IsAMap(value) || IsAnArray(value) || IsASlice(value) || ok
|
||||
}
|
||||
|
||||
// Len returns the length of the given value.
|
||||
//
|
||||
// It accepts a single parameter:
|
||||
// - value: an interface{} that represents the value whose length is to be determined.
|
||||
//
|
||||
// It returns an int, which represents the length of the value.
|
||||
func Len(value interface{}) int {
|
||||
l := 1
|
||||
|
||||
|
@ -65,6 +65,10 @@ func (r Ranker[_]) Len() int { return len(r.r) }
|
||||
func (r Ranker[T]) Less(i, j int) bool { return r.x.Less(r.r[i], r.r[j]) }
|
||||
func (r Ranker[_]) Swap(i, j int) { r.r[i], r.r[j] = r.r[j], r.r[i] }
|
||||
|
||||
// Order sorts the given data using the provided sort.Interface and returns the sorted indices.
|
||||
//
|
||||
// data: The data to be sorted.
|
||||
// Returns: A slice of integers representing the sorted indices.
|
||||
func Order[T sort.Interface](data T) []int {
|
||||
ldata := data.Len()
|
||||
if ldata == 0 {
|
||||
|
@ -1,5 +1,13 @@
|
||||
package obiutils
|
||||
|
||||
// Contains checks if the given element is present in the given array.
|
||||
//
|
||||
// Parameters:
|
||||
// - arr: The array to search in.
|
||||
// - x: The element to search for.
|
||||
//
|
||||
// Return type:
|
||||
// - bool: Returns true if the element is found, false otherwise.
|
||||
func Contains[T comparable](arr []T, x T) bool {
|
||||
for _, v := range arr {
|
||||
if v == x {
|
||||
@ -9,6 +17,14 @@ func Contains[T comparable](arr []T, x T) bool {
|
||||
return false
|
||||
}
|
||||
|
||||
// LookFor searches for the first occurrence of a given element in an array and returns its index.
|
||||
//
|
||||
// Parameters:
|
||||
// - arr: the array to be searched
|
||||
// - x: the element to search for
|
||||
//
|
||||
// Return:
|
||||
// - int: the index of the first occurrence of the element in the array, or -1 if not found
|
||||
func LookFor[T comparable](arr []T, x T) int {
|
||||
for i, v := range arr {
|
||||
if v == x {
|
||||
@ -18,19 +34,36 @@ func LookFor[T comparable](arr []T, x T) int {
|
||||
return -1
|
||||
}
|
||||
|
||||
// RemoveIndex removes an element at a specified index from a slice.
|
||||
//
|
||||
// Parameters:
|
||||
// - s: The slice from which the element will be removed.
|
||||
// - index: The index of the element to be removed.
|
||||
//
|
||||
// Returns:
|
||||
// A new slice with the element removed.
|
||||
func RemoveIndex[T comparable](s []T, index int) []T {
|
||||
return append(s[:index], s[index+1:]...)
|
||||
}
|
||||
|
||||
// Reverse reverses the elements of a slice.
|
||||
//
|
||||
// The function takes a slice `s` and a boolean `inplace` parameter. If `inplace`
|
||||
// is `true`, the function modifies the input slice directly. If `inplace` is
|
||||
// `false`, the function creates a new slice `c` and copies the elements of `s`
|
||||
// into `c`. The function then reverses the elements of `s` in-place or `c`
|
||||
// depending on the `inplace` parameter.
|
||||
//
|
||||
// The function returns the reversed slice.
|
||||
func Reverse[S ~[]E, E any](s S, inplace bool) S {
|
||||
if !inplace {
|
||||
c := make([]E,len(s))
|
||||
copy(c,s)
|
||||
c := make([]E, len(s))
|
||||
copy(c, s)
|
||||
s = c
|
||||
}
|
||||
for i, j := 0, len(s)-1; i < j; i, j = i+1, j-1 {
|
||||
s[i], s[j] = s[j], s[i]
|
||||
}
|
||||
for i, j := 0, len(s)-1; i < j; i, j = i+1, j-1 {
|
||||
s[i], s[j] = s[j], s[i]
|
||||
}
|
||||
|
||||
return s
|
||||
}
|
||||
|
Reference in New Issue
Block a user