mirror of
https://github.com/metabarcoding/obitools4.git
synced 2025-06-29 16:20:46 +00:00
A first prototype for the space of sequences
Former-commit-id: 07dc6ef044b5b6a6fb45dc2acb01dffe71a96195
This commit is contained in:
@ -8,35 +8,67 @@ import (
|
|||||||
log "github.com/sirupsen/logrus"
|
log "github.com/sirupsen/logrus"
|
||||||
)
|
)
|
||||||
|
|
||||||
|
// HasAttribute checks if the BioSequence has the specified attribute.
|
||||||
|
//
|
||||||
|
// Parameters:
|
||||||
|
// - key: a string representing the attribute key to check.
|
||||||
|
//
|
||||||
|
// Returns:
|
||||||
|
// - a boolean indicating whether the BioSequence has the attribute.
|
||||||
func (s *BioSequence) HasAttribute(key string) bool {
|
func (s *BioSequence) HasAttribute(key string) bool {
|
||||||
ok := s.annotations != nil
|
ok := s.annotations != nil
|
||||||
|
|
||||||
if ok {
|
if ok {
|
||||||
|
defer s.AnnotationsUnlock()
|
||||||
|
s.AnnotationsLock()
|
||||||
_, ok = s.annotations[key]
|
_, ok = s.annotations[key]
|
||||||
}
|
}
|
||||||
|
|
||||||
return ok
|
return ok
|
||||||
}
|
}
|
||||||
|
|
||||||
// A method that returns the value of the key in the annotation map.
|
// GetAttribute returns the value associated with the given key in the BioSequence's annotations map and a boolean indicating whether the key exists.
|
||||||
|
//
|
||||||
|
// Parameters:
|
||||||
|
// - key: The key to look up in the annotations map.
|
||||||
|
//
|
||||||
|
// Returns:
|
||||||
|
// - val: The value associated with the given key.
|
||||||
|
// - ok: A boolean indicating whether the key exists in the annotations map.
|
||||||
func (s *BioSequence) GetAttribute(key string) (interface{}, bool) {
|
func (s *BioSequence) GetAttribute(key string) (interface{}, bool) {
|
||||||
var val interface{}
|
var val interface{}
|
||||||
ok := s.annotations != nil
|
ok := s.annotations != nil
|
||||||
|
|
||||||
if ok {
|
if ok {
|
||||||
|
defer s.AnnotationsUnlock()
|
||||||
|
s.AnnotationsLock()
|
||||||
val, ok = s.annotations[key]
|
val, ok = s.annotations[key]
|
||||||
}
|
}
|
||||||
|
|
||||||
return val, ok
|
return val, ok
|
||||||
}
|
}
|
||||||
|
|
||||||
// A method that sets the value of the key in the annotation map.
|
// SetAttribute sets the value of a given key in the BioSequence annotations.
|
||||||
|
//
|
||||||
|
// Parameters:
|
||||||
|
// - key: the key to set the value for.
|
||||||
|
// - value: the value to set for the given key.
|
||||||
func (s *BioSequence) SetAttribute(key string, value interface{}) {
|
func (s *BioSequence) SetAttribute(key string, value interface{}) {
|
||||||
annot := s.Annotations()
|
annot := s.Annotations()
|
||||||
|
|
||||||
|
defer s.AnnotationsUnlock()
|
||||||
|
s.AnnotationsLock()
|
||||||
annot[key] = value
|
annot[key] = value
|
||||||
}
|
}
|
||||||
|
|
||||||
// A method that returns the value of the key in the annotation map.
|
// GetIntAttribute returns an integer attribute value based on the provided key.
|
||||||
|
//
|
||||||
|
// It takes a key as a parameter and returns the corresponding integer value along
|
||||||
|
// with a boolean value indicating whether the key exists in the BioSequence, and if it can be converted to an integer.
|
||||||
|
//
|
||||||
|
// If the stored values is convertible to an integer, but was not stored as an integer, then the value will be stored as an integer.
|
||||||
|
//
|
||||||
|
// The returned boolean value will be true if the key exists, and false otherwise.
|
||||||
func (s *BioSequence) GetIntAttribute(key string) (int, bool) {
|
func (s *BioSequence) GetIntAttribute(key string) (int, bool) {
|
||||||
var val int
|
var val int
|
||||||
var err error
|
var err error
|
||||||
@ -44,19 +76,39 @@ func (s *BioSequence) GetIntAttribute(key string) (int, bool) {
|
|||||||
v, ok := s.GetAttribute(key)
|
v, ok := s.GetAttribute(key)
|
||||||
|
|
||||||
if ok {
|
if ok {
|
||||||
val, err = obiutils.InterfaceToInt(v)
|
val, ok = v.(int)
|
||||||
ok = err == nil
|
if !ok {
|
||||||
|
val, err = obiutils.InterfaceToInt(v)
|
||||||
|
ok = err == nil
|
||||||
|
if ok {
|
||||||
|
s.SetAttribute(key, val)
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
return val, ok
|
return val, ok
|
||||||
}
|
}
|
||||||
|
|
||||||
// Deleting the key from the annotation map.
|
// DeleteAttribute deletes the attribute with the given key from the BioSequence.
|
||||||
|
//
|
||||||
|
// Parameters:
|
||||||
|
// - key: the key of the attribute to be deleted.
|
||||||
|
//
|
||||||
|
// No return value.
|
||||||
func (s *BioSequence) DeleteAttribute(key string) {
|
func (s *BioSequence) DeleteAttribute(key string) {
|
||||||
delete(s.Annotations(), key)
|
if s.annotations != nil {
|
||||||
|
defer s.AnnotationsUnlock()
|
||||||
|
s.AnnotationsLock()
|
||||||
|
delete(s.annotations, key)
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// Renaming the key in the annotation map.
|
// RenameAttribute renames an attribute in the BioSequence.
|
||||||
|
//
|
||||||
|
// It takes two string parameters:
|
||||||
|
// - newName: the new name for the attribute.
|
||||||
|
// - oldName: the old name of the attribute to be renamed.
|
||||||
|
// It does not return anything.
|
||||||
func (s *BioSequence) RenameAttribute(newName, oldName string) {
|
func (s *BioSequence) RenameAttribute(newName, oldName string) {
|
||||||
val, ok := s.GetAttribute(oldName)
|
val, ok := s.GetAttribute(oldName)
|
||||||
|
|
||||||
@ -66,7 +118,15 @@ func (s *BioSequence) RenameAttribute(newName, oldName string) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// A method that returns the value of the key in the annotation map.
|
// GetNumericAttribute returns the numeric value of the specified attribute key
|
||||||
|
// in the BioSequence object.
|
||||||
|
//
|
||||||
|
// Parameters:
|
||||||
|
// - key: the attribute key to retrieve the numeric value for.
|
||||||
|
//
|
||||||
|
// Returns:
|
||||||
|
// - float64: the numeric value of the attribute key.
|
||||||
|
// - bool: indicates whether the attribute key exists and can be converted to a float64.
|
||||||
func (s *BioSequence) GetNumericAttribute(key string) (float64, bool) {
|
func (s *BioSequence) GetNumericAttribute(key string) (float64, bool) {
|
||||||
var val float64
|
var val float64
|
||||||
var err error
|
var err error
|
||||||
@ -81,7 +141,14 @@ func (s *BioSequence) GetNumericAttribute(key string) (float64, bool) {
|
|||||||
return val, ok
|
return val, ok
|
||||||
}
|
}
|
||||||
|
|
||||||
// A method that returns the value of the key in the annotation map.
|
// GetStringAttribute retrieves the string value of a specific attribute from the BioSequence.
|
||||||
|
//
|
||||||
|
// Parameters:
|
||||||
|
// - key: the key of the attribute to retrieve.
|
||||||
|
//
|
||||||
|
// Returns:
|
||||||
|
// - string: the value of the attribute as a string.
|
||||||
|
// - bool: a boolean indicating whether the attribute was found or not.
|
||||||
func (s *BioSequence) GetStringAttribute(key string) (string, bool) {
|
func (s *BioSequence) GetStringAttribute(key string) (string, bool) {
|
||||||
var val string
|
var val string
|
||||||
v, ok := s.GetAttribute(key)
|
v, ok := s.GetAttribute(key)
|
||||||
@ -93,7 +160,14 @@ func (s *BioSequence) GetStringAttribute(key string) (string, bool) {
|
|||||||
return val, ok
|
return val, ok
|
||||||
}
|
}
|
||||||
|
|
||||||
// A method that returns the value of the key in the annotation map.
|
// GetBoolAttribute returns the boolean attribute value associated with the given key in the BioSequence object.
|
||||||
|
//
|
||||||
|
// Parameters:
|
||||||
|
// - key: The key to retrieve the boolean attribute value.
|
||||||
|
//
|
||||||
|
// Return:
|
||||||
|
// - val: The boolean attribute value associated with the given key and can be converted to a boolean.
|
||||||
|
// - ok: A boolean value indicating whether the attribute value was successfully retrieved.
|
||||||
func (s *BioSequence) GetBoolAttribute(key string) (bool, bool) {
|
func (s *BioSequence) GetBoolAttribute(key string) (bool, bool) {
|
||||||
var val bool
|
var val bool
|
||||||
var err error
|
var err error
|
||||||
@ -108,6 +182,14 @@ func (s *BioSequence) GetBoolAttribute(key string) (bool, bool) {
|
|||||||
return val, ok
|
return val, ok
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// GetIntMap returns a map[string]int and a boolean value indicating whether the key exists in the BioSequence.
|
||||||
|
//
|
||||||
|
// Parameters:
|
||||||
|
// - key: The key to retrieve the value from the BioSequence.
|
||||||
|
//
|
||||||
|
// Returns:
|
||||||
|
// - val: A map[string]int representing the value associated with the key and can be converted to a map[string]int.
|
||||||
|
// - ok: A boolean value indicating whether the key exists in the BioSequence.
|
||||||
func (s *BioSequence) GetIntMap(key string) (map[string]int, bool) {
|
func (s *BioSequence) GetIntMap(key string) (map[string]int, bool) {
|
||||||
var val map[string]int
|
var val map[string]int
|
||||||
var err error
|
var err error
|
||||||
@ -122,7 +204,41 @@ func (s *BioSequence) GetIntMap(key string) (map[string]int, bool) {
|
|||||||
return val, ok
|
return val, ok
|
||||||
}
|
}
|
||||||
|
|
||||||
// Returning the number of times the sequence has been observed.
|
// GetIntSlice returns the integer slice value associated with the given key in the BioSequence object.
|
||||||
|
//
|
||||||
|
// Parameters:
|
||||||
|
// - key: The key used to retrieve the integer slice value.
|
||||||
|
//
|
||||||
|
// Returns:
|
||||||
|
// - []int: The integer slice value associated with the given key.
|
||||||
|
// - bool: A boolean indicating whether the key exists in the BioSequence object.
|
||||||
|
func (s *BioSequence) GetIntSlice(key string) ([]int, bool) {
|
||||||
|
var val []int
|
||||||
|
var err error
|
||||||
|
|
||||||
|
v, ok := s.GetAttribute(key)
|
||||||
|
|
||||||
|
if ok {
|
||||||
|
val, ok = v.([]int)
|
||||||
|
if !ok {
|
||||||
|
val, err = obiutils.InterfaceToIntSlice(v)
|
||||||
|
ok = err == nil
|
||||||
|
if ok {
|
||||||
|
s.SetAttribute(key, val)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return val, ok
|
||||||
|
}
|
||||||
|
|
||||||
|
// Count returns the value of the "count" attribute of the BioSequence.
|
||||||
|
//
|
||||||
|
// The count of a sequence is the number of times it has been observed in the dataset.
|
||||||
|
// It is represented in the sequence header as the "count" attribute.
|
||||||
|
// If the attribute is not found, the function returns 1 as the default count.
|
||||||
|
//
|
||||||
|
// It returns an integer representing the count value.
|
||||||
func (s *BioSequence) Count() int {
|
func (s *BioSequence) Count() int {
|
||||||
count, ok := s.GetIntAttribute("count")
|
count, ok := s.GetIntAttribute("count")
|
||||||
|
|
||||||
@ -133,13 +249,27 @@ func (s *BioSequence) Count() int {
|
|||||||
return count
|
return count
|
||||||
}
|
}
|
||||||
|
|
||||||
// Setting the number of times the sequence has been observed.
|
// SetCount sets the count of the BioSequence.
|
||||||
|
//
|
||||||
|
// The count of a sequence is the number of times it has been observed in the dataset.
|
||||||
|
// The value of the "count" attribute is set to the new count, event if the new count is 1.
|
||||||
|
// If the count is less than 1, the count is set to 1.
|
||||||
|
//
|
||||||
|
// count - the new count to set.
|
||||||
func (s *BioSequence) SetCount(count int) {
|
func (s *BioSequence) SetCount(count int) {
|
||||||
annot := s.Annotations()
|
if count < 1 {
|
||||||
annot["count"] = count
|
count = 1
|
||||||
|
}
|
||||||
|
s.SetAttribute("count", count)
|
||||||
}
|
}
|
||||||
|
|
||||||
// Returning the taxid of the sequence.
|
// Taxid returns the taxonomic ID associated with the BioSequence.
|
||||||
|
//
|
||||||
|
// It retrieves the "taxid" attribute from the BioSequence's attributes map.
|
||||||
|
// If the attribute is not found, the function returns 1 as the default taxonomic ID.
|
||||||
|
// The taxid 1 corresponds to the root taxonomic level.
|
||||||
|
//
|
||||||
|
// The function returns an integer representing the taxonomic ID.
|
||||||
func (s *BioSequence) Taxid() int {
|
func (s *BioSequence) Taxid() int {
|
||||||
taxid, ok := s.GetIntAttribute("taxid")
|
taxid, ok := s.GetIntAttribute("taxid")
|
||||||
|
|
||||||
@ -150,10 +280,16 @@ func (s *BioSequence) Taxid() int {
|
|||||||
return taxid
|
return taxid
|
||||||
}
|
}
|
||||||
|
|
||||||
// Setting the taxid of the sequence.
|
// SetTaxid sets the taxid for the BioSequence.
|
||||||
|
//
|
||||||
|
// Parameters:
|
||||||
|
//
|
||||||
|
// taxid - the taxid to set.
|
||||||
func (s *BioSequence) SetTaxid(taxid int) {
|
func (s *BioSequence) SetTaxid(taxid int) {
|
||||||
annot := s.Annotations()
|
if taxid < 1 {
|
||||||
annot["taxid"] = taxid
|
taxid = 1
|
||||||
|
}
|
||||||
|
s.SetAttribute("taxid", taxid)
|
||||||
}
|
}
|
||||||
|
|
||||||
func (s *BioSequence) OBITagRefIndex() map[int]string {
|
func (s *BioSequence) OBITagRefIndex() map[int]string {
|
||||||
@ -201,4 +337,115 @@ func (s *BioSequence) OBITagRefIndex() map[int]string {
|
|||||||
|
|
||||||
func (s *BioSequence) SetOBITagRefIndex(idx map[int]string) {
|
func (s *BioSequence) SetOBITagRefIndex(idx map[int]string) {
|
||||||
s.SetAttribute("obitag_ref_index", idx)
|
s.SetAttribute("obitag_ref_index", idx)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func (s *BioSequence) SetOBITagGeomRefIndex(idx map[int]string) {
|
||||||
|
s.SetAttribute("obitag_geomref_index", idx)
|
||||||
|
}
|
||||||
|
|
||||||
|
func (s *BioSequence) OBITagGeomRefIndex() map[int]string {
|
||||||
|
var val map[int]string
|
||||||
|
|
||||||
|
i, ok := s.GetAttribute("obitag_geomref_index")
|
||||||
|
|
||||||
|
if !ok {
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
switch i := i.(type) {
|
||||||
|
case map[int]string:
|
||||||
|
val = i
|
||||||
|
case map[string]interface{}:
|
||||||
|
val = make(map[int]string, len(i))
|
||||||
|
for k, v := range i {
|
||||||
|
score, err := strconv.Atoi(k)
|
||||||
|
if err != nil {
|
||||||
|
log.Panicln(err)
|
||||||
|
}
|
||||||
|
|
||||||
|
val[score], err = obiutils.InterfaceToString(v)
|
||||||
|
if err != nil {
|
||||||
|
log.Panicln(err)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
case map[string]string:
|
||||||
|
val = make(map[int]string, len(i))
|
||||||
|
for k, v := range i {
|
||||||
|
score, err := strconv.Atoi(k)
|
||||||
|
if err != nil {
|
||||||
|
log.Panicln(err)
|
||||||
|
}
|
||||||
|
val[score] = v
|
||||||
|
|
||||||
|
}
|
||||||
|
default:
|
||||||
|
log.Panicln("value of attribute obitag_geomref_index cannot be casted to a map[int]string")
|
||||||
|
}
|
||||||
|
|
||||||
|
return val
|
||||||
|
}
|
||||||
|
|
||||||
|
// GetCoordinate returns the coordinate of the BioSequence.
|
||||||
|
//
|
||||||
|
// Returns the coordinate of the BioSequence in the space of its reference database landmark sequences.
|
||||||
|
// if no coordinate is found, it returns nil.
|
||||||
|
//
|
||||||
|
// This function does not take any parameters.
|
||||||
|
//
|
||||||
|
// It returns a slice of integers ([]int).
|
||||||
|
func (s *BioSequence) GetCoordinate() []int {
|
||||||
|
coord, ok := s.GetIntSlice("landmark_coord")
|
||||||
|
if !ok {
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
return coord
|
||||||
|
}
|
||||||
|
|
||||||
|
// SetCoordinate sets the coordinate of the BioSequence.
|
||||||
|
//
|
||||||
|
// coord: An array of integers representing the coordinate.
|
||||||
|
// This function does not return anything.
|
||||||
|
func (s *BioSequence) SetCoordinate(coord []int) {
|
||||||
|
s.SetAttribute("landmark_coord", coord)
|
||||||
|
}
|
||||||
|
|
||||||
|
// SetLandmarkID sets the landmark ID of the BioSequence.
|
||||||
|
//
|
||||||
|
// Trying to set a negative landmark ID leads to a no operation.
|
||||||
|
//
|
||||||
|
// Parameters:
|
||||||
|
// id: The ID of the landmark.
|
||||||
|
func (s *BioSequence) SetLandmarkID(id int) {
|
||||||
|
if id < 0 {
|
||||||
|
return
|
||||||
|
}
|
||||||
|
s.SetAttribute("landmark_id", id)
|
||||||
|
}
|
||||||
|
|
||||||
|
// GetLandmarkID returns the landmark ID associated with the BioSequence.
|
||||||
|
//
|
||||||
|
// It retrieves the "landmark_id" attribute from the BioSequence's attributes map.
|
||||||
|
// If the attribute is not found, the function returns -1 as the default landmark ID.
|
||||||
|
// The landmark ID is an integer representing the number of the axis in the landmark space.
|
||||||
|
//
|
||||||
|
// It does not take any parameters.
|
||||||
|
// It returns an integer representing the landmark ID.
|
||||||
|
func (s *BioSequence) GetLandmarkID() int {
|
||||||
|
val, ok := s.GetIntAttribute("landmark_id")
|
||||||
|
|
||||||
|
if !ok {
|
||||||
|
return -1
|
||||||
|
}
|
||||||
|
|
||||||
|
return val
|
||||||
|
}
|
||||||
|
|
||||||
|
// IsALandmark checks if the BioSequence is a landmark.
|
||||||
|
//
|
||||||
|
// A sequence is a landmark if its landmark ID is set (attribute "landmark_id").
|
||||||
|
//
|
||||||
|
// It returns a boolean indicating whether the BioSequence is a landmark or not.
|
||||||
|
func (s *BioSequence) IsALandmark() bool {
|
||||||
|
return s.GetLandmarkID() != -1
|
||||||
|
}
|
||||||
|
@ -12,6 +12,7 @@ package obiseq
|
|||||||
|
|
||||||
import (
|
import (
|
||||||
"crypto/md5"
|
"crypto/md5"
|
||||||
|
"sync"
|
||||||
"sync/atomic"
|
"sync/atomic"
|
||||||
|
|
||||||
"git.metabarcoding.org/lecasofts/go/obitools/pkg/obiutils"
|
"git.metabarcoding.org/lecasofts/go/obitools/pkg/obiutils"
|
||||||
@ -58,6 +59,7 @@ type BioSequence struct {
|
|||||||
feature []byte
|
feature []byte
|
||||||
paired *BioSequence // A pointer to the paired sequence
|
paired *BioSequence // A pointer to the paired sequence
|
||||||
annotations Annotation
|
annotations Annotation
|
||||||
|
annot_lock sync.Mutex
|
||||||
}
|
}
|
||||||
|
|
||||||
// MakeEmptyBioSequence() creates a new BioSequence object with no data
|
// MakeEmptyBioSequence() creates a new BioSequence object with no data
|
||||||
@ -79,6 +81,7 @@ func MakeEmptyBioSequence(preallocate int) BioSequence {
|
|||||||
feature: nil,
|
feature: nil,
|
||||||
paired: nil,
|
paired: nil,
|
||||||
annotations: nil,
|
annotations: nil,
|
||||||
|
annot_lock: sync.Mutex{},
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -142,6 +145,8 @@ func (s *BioSequence) Copy() *BioSequence {
|
|||||||
newSeq.feature = CopySlice(s.feature)
|
newSeq.feature = CopySlice(s.feature)
|
||||||
|
|
||||||
if len(s.annotations) > 0 {
|
if len(s.annotations) > 0 {
|
||||||
|
defer s.annot_lock.Unlock()
|
||||||
|
s.annot_lock.Lock()
|
||||||
newSeq.annotations = GetAnnotation(s.annotations)
|
newSeq.annotations = GetAnnotation(s.annotations)
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -206,6 +211,14 @@ func (s *BioSequence) Annotations() Annotation {
|
|||||||
return s.annotations
|
return s.annotations
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func (s *BioSequence) AnnotationsLock() {
|
||||||
|
s.annot_lock.Lock()
|
||||||
|
}
|
||||||
|
|
||||||
|
func (s *BioSequence) AnnotationsUnlock() {
|
||||||
|
s.annot_lock.Unlock()
|
||||||
|
}
|
||||||
|
|
||||||
// Checking if the BioSequence has a source.
|
// Checking if the BioSequence has a source.
|
||||||
func (s *BioSequence) HasSource() bool {
|
func (s *BioSequence) HasSource() bool {
|
||||||
return len(s.source) > 0
|
return len(s.source) > 0
|
||||||
|
@ -74,8 +74,7 @@ func AssignToClass(data, centers *obiutils.Matrix[float64]) []int {
|
|||||||
// Returns:
|
// Returns:
|
||||||
// - centers: a pointer to a matrix of float64 values representing the centers of the clusters.
|
// - centers: a pointer to a matrix of float64 values representing the centers of the clusters.
|
||||||
func ComputeCenters(data *obiutils.Matrix[float64], k int, classes []int) *obiutils.Matrix[float64] {
|
func ComputeCenters(data *obiutils.Matrix[float64], k int, classes []int) *obiutils.Matrix[float64] {
|
||||||
centers := obiutils.Make2DArray[float64](k, len((*data)[0]))
|
centers := obiutils.Make2DNumericArray[float64](k, len((*data)[0]), true)
|
||||||
centers.Init(0.0)
|
|
||||||
ns := make([]int, k)
|
ns := make([]int, k)
|
||||||
|
|
||||||
var wg sync.WaitGroup
|
var wg sync.WaitGroup
|
||||||
|
@ -48,11 +48,14 @@ func FilterTaxonomyOptionSet(options *getoptions.GetOpt) {
|
|||||||
options.Description("Restrict output to some subclades."))
|
options.Description("Restrict output to some subclades."))
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
func CLISelectedNCBITaxDump() string {
|
func CLISelectedNCBITaxDump() string {
|
||||||
return __taxdump__
|
return __taxdump__
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func CLIHasSelectedTaxonomy() bool {
|
||||||
|
return __taxdump__ != ""
|
||||||
|
}
|
||||||
|
|
||||||
func CLIAreAlternativeNamesSelected() bool {
|
func CLIAreAlternativeNamesSelected() bool {
|
||||||
return __alternative_name__
|
return __alternative_name__
|
||||||
}
|
}
|
||||||
|
@ -10,6 +10,9 @@ import (
|
|||||||
"git.metabarcoding.org/lecasofts/go/obitools/pkg/obioptions"
|
"git.metabarcoding.org/lecasofts/go/obitools/pkg/obioptions"
|
||||||
"git.metabarcoding.org/lecasofts/go/obitools/pkg/obiseq"
|
"git.metabarcoding.org/lecasofts/go/obitools/pkg/obiseq"
|
||||||
"git.metabarcoding.org/lecasofts/go/obitools/pkg/obistats"
|
"git.metabarcoding.org/lecasofts/go/obitools/pkg/obistats"
|
||||||
|
"git.metabarcoding.org/lecasofts/go/obitools/pkg/obitax"
|
||||||
|
"git.metabarcoding.org/lecasofts/go/obitools/pkg/obitools/obifind"
|
||||||
|
"git.metabarcoding.org/lecasofts/go/obitools/pkg/obitools/obirefidx"
|
||||||
"git.metabarcoding.org/lecasofts/go/obitools/pkg/obiutils"
|
"git.metabarcoding.org/lecasofts/go/obitools/pkg/obiutils"
|
||||||
"github.com/schollz/progressbar/v3"
|
"github.com/schollz/progressbar/v3"
|
||||||
log "github.com/sirupsen/logrus"
|
log "github.com/sirupsen/logrus"
|
||||||
@ -103,14 +106,14 @@ func CLISelectLandmarkSequences(iterator obiiter.IBioSequence) obiiter.IBioSeque
|
|||||||
library := iterator.Load()
|
library := iterator.Load()
|
||||||
|
|
||||||
library_size := len(library)
|
library_size := len(library)
|
||||||
n_landmark := NCenter()
|
n_landmark := CLINCenter()
|
||||||
|
|
||||||
landmark_idx := obistats.SampleIntWithoutReplacement(n_landmark, library_size)
|
landmark_idx := obistats.SampleIntWithoutReplacement(n_landmark, library_size)
|
||||||
log.Infof("Library contains %d sequence", len(library))
|
log.Infof("Library contains %d sequence", len(library))
|
||||||
|
|
||||||
var seqworld obiutils.Matrix[float64]
|
var seqworld obiutils.Matrix[float64]
|
||||||
|
|
||||||
for loop := 0; loop < 5; loop++ {
|
for loop := 0; loop < 2; loop++ {
|
||||||
sort.IntSlice(landmark_idx).Sort()
|
sort.IntSlice(landmark_idx).Sort()
|
||||||
log.Debugf("Selected indices : %v", landmark_idx)
|
log.Debugf("Selected indices : %v", landmark_idx)
|
||||||
|
|
||||||
@ -154,14 +157,52 @@ func CLISelectLandmarkSequences(iterator obiiter.IBioSequence) obiiter.IBioSeque
|
|||||||
}
|
}
|
||||||
|
|
||||||
classes := obistats.AssignToClass(&seqworld, &initialCenters)
|
classes := obistats.AssignToClass(&seqworld, &initialCenters)
|
||||||
|
|
||||||
for i, seq := range library {
|
for i, seq := range library {
|
||||||
seq.SetAttribute("landmark_coord", seqworld[i])
|
ic, _ := obiutils.InterfaceToIntSlice(seqworld[i])
|
||||||
|
seq.SetCoordinate(ic)
|
||||||
seq.SetAttribute("landmark_class", classes[i])
|
seq.SetAttribute("landmark_class", classes[i])
|
||||||
if i, ok := seq_landmark[i]; ok {
|
if i, ok := seq_landmark[i]; ok {
|
||||||
seq.SetAttribute("landmark_id", i)
|
seq.SetAttribute("landmark_id", i)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if obifind.CLIHasSelectedTaxonomy() {
|
||||||
|
taxo, err := obifind.CLILoadSelectedTaxonomy()
|
||||||
|
if err != nil {
|
||||||
|
log.Fatal(err)
|
||||||
|
}
|
||||||
|
|
||||||
|
taxa := make(obitax.TaxonSet, len(library))
|
||||||
|
|
||||||
|
for i, seq := range library {
|
||||||
|
taxa[i], err = taxo.Taxon(seq.Taxid())
|
||||||
|
if err != nil {
|
||||||
|
log.Fatal(err)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
pbopt := make([]progressbar.Option, 0, 5)
|
||||||
|
pbopt = append(pbopt,
|
||||||
|
progressbar.OptionSetWriter(os.Stderr),
|
||||||
|
progressbar.OptionSetWidth(15),
|
||||||
|
progressbar.OptionShowCount(),
|
||||||
|
progressbar.OptionShowIts(),
|
||||||
|
progressbar.OptionSetDescription("[Sequence Indexing]"),
|
||||||
|
)
|
||||||
|
|
||||||
|
bar := progressbar.NewOptions(len(library), pbopt...)
|
||||||
|
|
||||||
|
for i, seq := range library {
|
||||||
|
idx := obirefidx.GeomIndexSesquence(i, library, &taxa, taxo)
|
||||||
|
seq.SetOBITagGeomRefIndex(idx)
|
||||||
|
|
||||||
|
if i%10 == 0 {
|
||||||
|
bar.Add(10)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
return obiiter.IBatchOver(library, obioptions.CLIBatchSize())
|
return obiiter.IBatchOver(library, obioptions.CLIBatchSize())
|
||||||
|
|
||||||
}
|
}
|
||||||
|
@ -2,28 +2,37 @@ package obilandmark
|
|||||||
|
|
||||||
import (
|
import (
|
||||||
"git.metabarcoding.org/lecasofts/go/obitools/pkg/obitools/obiconvert"
|
"git.metabarcoding.org/lecasofts/go/obitools/pkg/obitools/obiconvert"
|
||||||
|
"git.metabarcoding.org/lecasofts/go/obitools/pkg/obitools/obifind"
|
||||||
"github.com/DavidGamba/go-getoptions"
|
"github.com/DavidGamba/go-getoptions"
|
||||||
)
|
)
|
||||||
|
|
||||||
var _nCenter = 200
|
var _nCenter = 200
|
||||||
|
|
||||||
// ObilandmarkOptionSet sets the options for Obilandmark.
|
// LandmarkOptionSet sets the options for Obilandmark.
|
||||||
//
|
//
|
||||||
// options: a pointer to the getoptions.GetOpt struct.
|
// options: a pointer to the getoptions.GetOpt struct.
|
||||||
// Return type: none.
|
// Return type: none.
|
||||||
func ObilandmarkOptionSet(options *getoptions.GetOpt) {
|
func LandmarkOptionSet(options *getoptions.GetOpt) {
|
||||||
|
|
||||||
options.IntVar(&_nCenter, "center", _nCenter,
|
options.IntVar(&_nCenter, "center", _nCenter,
|
||||||
options.Alias("n"),
|
options.Alias("n"),
|
||||||
options.Description("Maximum numbers of differences between two variant sequences (default: %d)."))
|
options.Description("Number of landmark sequences to be selected."))
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// OptionSet is a function that sets the options for the GetOpt struct.
|
||||||
|
//
|
||||||
|
// It takes a pointer to a GetOpt struct as its parameter and does not return anything.
|
||||||
func OptionSet(options *getoptions.GetOpt) {
|
func OptionSet(options *getoptions.GetOpt) {
|
||||||
obiconvert.InputOptionSet(options)
|
obiconvert.InputOptionSet(options)
|
||||||
obiconvert.OutputOptionSet(options)
|
obiconvert.OutputOptionSet(options)
|
||||||
ObilandmarkOptionSet(options)
|
obifind.LoadTaxonomyOptionSet(options, false, false)
|
||||||
|
LandmarkOptionSet(options)
|
||||||
}
|
}
|
||||||
|
|
||||||
func NCenter() int {
|
// CLINCenter returns desired number of centers as specified by user.
|
||||||
|
//
|
||||||
|
// No parameters.
|
||||||
|
// Returns an integer value.
|
||||||
|
func CLINCenter() int {
|
||||||
return _nCenter
|
return _nCenter
|
||||||
}
|
}
|
||||||
|
79
pkg/obitools/obirefidx/geomindexing.go
Normal file
79
pkg/obitools/obirefidx/geomindexing.go
Normal file
@ -0,0 +1,79 @@
|
|||||||
|
package obirefidx
|
||||||
|
|
||||||
|
import (
|
||||||
|
"fmt"
|
||||||
|
"log"
|
||||||
|
"sort"
|
||||||
|
"sync"
|
||||||
|
|
||||||
|
"git.metabarcoding.org/lecasofts/go/obitools/pkg/obiseq"
|
||||||
|
"git.metabarcoding.org/lecasofts/go/obitools/pkg/obitax"
|
||||||
|
"git.metabarcoding.org/lecasofts/go/obitools/pkg/obiutils"
|
||||||
|
)
|
||||||
|
|
||||||
|
func GeomIndexSesquence(seqidx int,
|
||||||
|
references obiseq.BioSequenceSlice,
|
||||||
|
taxa *obitax.TaxonSet,
|
||||||
|
taxo *obitax.Taxonomy) map[int]string {
|
||||||
|
|
||||||
|
sequence := references[seqidx]
|
||||||
|
location := sequence.GetCoordinate()
|
||||||
|
|
||||||
|
if location == nil {
|
||||||
|
log.Fatalf("Sequence %s does not have a coordinate", sequence.Id())
|
||||||
|
}
|
||||||
|
|
||||||
|
seq_dist := make([]float64, len(references))
|
||||||
|
|
||||||
|
var wg sync.WaitGroup
|
||||||
|
|
||||||
|
for i, ref := range references {
|
||||||
|
wg.Add(1)
|
||||||
|
go func(i int, ref *obiseq.BioSequence) {
|
||||||
|
defer wg.Done()
|
||||||
|
reflocation := ref.GetCoordinate()
|
||||||
|
if reflocation == nil {
|
||||||
|
log.Fatalf("Sequence %s does not have a coordinate", ref.Id())
|
||||||
|
}
|
||||||
|
d := 0.0
|
||||||
|
for i, x := range location {
|
||||||
|
diff := float64(x - reflocation[i])
|
||||||
|
d += diff * diff
|
||||||
|
}
|
||||||
|
seq_dist[i] = d
|
||||||
|
}(i, ref)
|
||||||
|
}
|
||||||
|
|
||||||
|
wg.Wait()
|
||||||
|
|
||||||
|
order := obiutils.Order(sort.Float64Slice(seq_dist))
|
||||||
|
|
||||||
|
lca := (*taxa)[seqidx]
|
||||||
|
|
||||||
|
index := make(map[int]string)
|
||||||
|
index[0.0] = fmt.Sprintf(
|
||||||
|
"%d@%s@%s",
|
||||||
|
lca.Taxid(),
|
||||||
|
lca.ScientificName(),
|
||||||
|
lca.Rank())
|
||||||
|
|
||||||
|
old_dist := 0.0
|
||||||
|
for _, o := range order {
|
||||||
|
new_lca, _ := lca.LCA((*taxa)[o])
|
||||||
|
if new_lca.Taxid() != lca.Taxid() || seq_dist[o] != old_dist {
|
||||||
|
lca = new_lca
|
||||||
|
old_dist = seq_dist[o]
|
||||||
|
index[int(seq_dist[o])] = fmt.Sprintf(
|
||||||
|
"%d@%s@%s",
|
||||||
|
lca.Taxid(),
|
||||||
|
lca.ScientificName(),
|
||||||
|
lca.Rank())
|
||||||
|
}
|
||||||
|
|
||||||
|
if lca.Taxid() == 1 {
|
||||||
|
break
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return index
|
||||||
|
}
|
@ -24,9 +24,6 @@ func IndexSequence(seqidx int,
|
|||||||
taxo *obitax.Taxonomy) map[int]string {
|
taxo *obitax.Taxonomy) map[int]string {
|
||||||
|
|
||||||
sequence := references[seqidx]
|
sequence := references[seqidx]
|
||||||
// matrix := obialign.NewFullLCSMatrix(nil,
|
|
||||||
// sequence.Length(),
|
|
||||||
// sequence.Length())
|
|
||||||
|
|
||||||
var matrix []uint64
|
var matrix []uint64
|
||||||
|
|
||||||
@ -54,7 +51,9 @@ func IndexSequence(seqidx int,
|
|||||||
nok := make([]int, len(*pseq))
|
nok := make([]int, len(*pseq))
|
||||||
nfast := make([]int, len(*pseq))
|
nfast := make([]int, len(*pseq))
|
||||||
nfastok := make([]int, len(*pseq))
|
nfastok := make([]int, len(*pseq))
|
||||||
*/lseq := sequence.Len()
|
*/
|
||||||
|
|
||||||
|
lseq := sequence.Len()
|
||||||
|
|
||||||
mini := -1
|
mini := -1
|
||||||
wordmin := 0
|
wordmin := 0
|
||||||
|
209
pkg/obitools/obitag/obigeomtag.go
Normal file
209
pkg/obitools/obitag/obigeomtag.go
Normal file
@ -0,0 +1,209 @@
|
|||||||
|
package obitag
|
||||||
|
|
||||||
|
import (
|
||||||
|
"log"
|
||||||
|
"math"
|
||||||
|
|
||||||
|
"git.metabarcoding.org/lecasofts/go/obitools/pkg/obialign"
|
||||||
|
"git.metabarcoding.org/lecasofts/go/obitools/pkg/obiiter"
|
||||||
|
"git.metabarcoding.org/lecasofts/go/obitools/pkg/obioptions"
|
||||||
|
"git.metabarcoding.org/lecasofts/go/obitools/pkg/obiseq"
|
||||||
|
"git.metabarcoding.org/lecasofts/go/obitools/pkg/obitax"
|
||||||
|
)
|
||||||
|
|
||||||
|
// ExtractLandmarkSeqs extracts landmark sequences from the given BioSequenceSlice.
|
||||||
|
//
|
||||||
|
// The landmark sequences are extracted from the given BioSequenceSlice and put in a new BioSequenceSlice
|
||||||
|
// in the order corresponding to their landmark IDs.
|
||||||
|
//
|
||||||
|
// references: A pointer to a BioSequenceSlice containing the references.
|
||||||
|
// Returns: A pointer to a BioSequenceSlice containing the extracted landmark sequences.
|
||||||
|
func ExtractLandmarkSeqs(references *obiseq.BioSequenceSlice) *obiseq.BioSequenceSlice {
|
||||||
|
landmarks := make(map[int]*obiseq.BioSequence, 100)
|
||||||
|
|
||||||
|
for _, ref := range *references {
|
||||||
|
if id := ref.GetLandmarkID(); id != -1 {
|
||||||
|
landmarks[id] = ref
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
ls := obiseq.NewBioSequenceSlice(len(landmarks))
|
||||||
|
*ls = (*ls)[0:len(landmarks)]
|
||||||
|
|
||||||
|
for k, l := range landmarks {
|
||||||
|
(*ls)[k] = l
|
||||||
|
}
|
||||||
|
|
||||||
|
return ls
|
||||||
|
}
|
||||||
|
|
||||||
|
// ExtractTaxonSet extracts a set of taxa from the given references and taxonomy.
|
||||||
|
//
|
||||||
|
// If a reference sequence has a taxid absent from the taxonomy, the function will panic.
|
||||||
|
//
|
||||||
|
// The function takes two parameters:
|
||||||
|
// - references: a pointer to a BioSequenceSlice, which is a slice of BioSequence objects.
|
||||||
|
// - taxonomy: a pointer to a Taxonomy object.
|
||||||
|
//
|
||||||
|
// The function returns a pointer to a TaxonSet, which is a set of taxa.
|
||||||
|
func ExtractTaxonSet(references *obiseq.BioSequenceSlice, taxonomy *obitax.Taxonomy) *obitax.TaxonSet {
|
||||||
|
var err error
|
||||||
|
taxa := make(obitax.TaxonSet, len(*references))
|
||||||
|
|
||||||
|
for i, ref := range *references {
|
||||||
|
taxid := ref.Taxid()
|
||||||
|
taxa[i], err = taxonomy.Taxon(taxid)
|
||||||
|
if err != nil {
|
||||||
|
log.Panicf("Taxid %d, for sequence %s not found in taxonomy", taxid, ref.Id())
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return &taxa
|
||||||
|
}
|
||||||
|
|
||||||
|
// MapOnLandmarkSequences calculates the coordinates of landmarks on a given sequence.
|
||||||
|
//
|
||||||
|
// It takes in three parameters:
|
||||||
|
// - sequence: a pointer to a BioSequence object representing the sequence.
|
||||||
|
// - landmarks: a pointer to a BioSequenceSlice object representing the landmarks.
|
||||||
|
// - buffer: a pointer to a slice of uint64, used as a buffer for calculations.
|
||||||
|
//
|
||||||
|
// It returns a slice of integers representing the coordinates of the landmarks on the sequence.
|
||||||
|
func MapOnLandmarkSequences(sequence *obiseq.BioSequence, landmarks *obiseq.BioSequenceSlice, buffer *[]uint64) []int {
|
||||||
|
|
||||||
|
coords := make([]int, len(*landmarks))
|
||||||
|
|
||||||
|
for i, l := range *landmarks {
|
||||||
|
lcs, length := obialign.FastLCSEGFScore(sequence, l, -1, buffer)
|
||||||
|
coords[i] = length - lcs
|
||||||
|
}
|
||||||
|
|
||||||
|
return coords
|
||||||
|
}
|
||||||
|
|
||||||
|
// FindGeomClosest finds the closest geometric sequence in a given set of reference sequences to a query sequence.
|
||||||
|
//
|
||||||
|
// Parameters:
|
||||||
|
// - sequence: A pointer to a BioSequence object representing the query sequence.
|
||||||
|
// - landmarks: A pointer to a BioSequenceSlice object representing the landmarks.
|
||||||
|
// - references: A pointer to a BioSequenceSlice object representing the reference sequences.
|
||||||
|
// - buffer: A pointer to a slice of uint64 representing a buffer.
|
||||||
|
//
|
||||||
|
// Returns:
|
||||||
|
// - A pointer to a BioSequence object representing the closest sequence.
|
||||||
|
// - An int representing the minimum distance.
|
||||||
|
// - A float64 representing the best identity score.
|
||||||
|
// - An array of int representing the indices of the closest sequences.
|
||||||
|
// - A pointer to a BioSequenceSlice object representing the matched sequences.
|
||||||
|
func FindGeomClosest(sequence *obiseq.BioSequence,
|
||||||
|
landmarks *obiseq.BioSequenceSlice,
|
||||||
|
references *obiseq.BioSequenceSlice,
|
||||||
|
buffer *[]uint64) (*obiseq.BioSequence, int, float64, []int, *obiseq.BioSequenceSlice) {
|
||||||
|
|
||||||
|
min_dist := math.MaxInt64
|
||||||
|
min_idx := make([]int, 0)
|
||||||
|
|
||||||
|
query_location := MapOnLandmarkSequences(sequence, landmarks, buffer)
|
||||||
|
|
||||||
|
for i, l := range *references {
|
||||||
|
coord := l.GetCoordinate()
|
||||||
|
if len(coord) == 0 {
|
||||||
|
log.Panicf("Empty coordinate for reference sequence %s", l.Id())
|
||||||
|
}
|
||||||
|
dist := 0
|
||||||
|
for j := 0; j < len(coord); j++ {
|
||||||
|
diff := query_location[j] - coord[j]
|
||||||
|
dist += diff * diff
|
||||||
|
}
|
||||||
|
|
||||||
|
if dist == min_dist {
|
||||||
|
min_idx = append(min_idx, i)
|
||||||
|
}
|
||||||
|
if dist < min_dist {
|
||||||
|
min_dist = dist
|
||||||
|
min_idx = make([]int, 0)
|
||||||
|
min_idx = append(min_idx, i)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
best_seq := (*references)[min_idx[0]]
|
||||||
|
best_id := 0.0
|
||||||
|
|
||||||
|
for _, i := range min_idx {
|
||||||
|
seq := (*references)[i]
|
||||||
|
lcs, length := obialign.FastLCSEGFScore(sequence, seq, -1, buffer)
|
||||||
|
ident := float64(lcs) / float64(length)
|
||||||
|
if ident > best_id {
|
||||||
|
best_id = ident
|
||||||
|
best_seq = seq
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
matches := obiseq.MakeBioSequenceSlice(len(min_idx))
|
||||||
|
matches = matches[0:len(min_idx)]
|
||||||
|
for i, j := range min_idx {
|
||||||
|
matches[i] = (*references)[j]
|
||||||
|
}
|
||||||
|
|
||||||
|
return best_seq, min_dist, best_id, query_location, &matches
|
||||||
|
}
|
||||||
|
|
||||||
|
func GeomIdentify(sequence *obiseq.BioSequence,
|
||||||
|
landmarks *obiseq.BioSequenceSlice,
|
||||||
|
references *obiseq.BioSequenceSlice,
|
||||||
|
taxa *obitax.TaxonSet,
|
||||||
|
taxo *obitax.Taxonomy,
|
||||||
|
buffer *[]uint64) *obiseq.BioSequence {
|
||||||
|
best_seq, min_dist, best_id, query_location, matches := FindGeomClosest(sequence, landmarks, references, buffer)
|
||||||
|
|
||||||
|
taxon := (*obitax.TaxNode)(nil)
|
||||||
|
var err error
|
||||||
|
|
||||||
|
if best_id > 0.5 {
|
||||||
|
taxid, _, _ := MatchDistanceIndex(min_dist, (*matches)[0].OBITagGeomRefIndex())
|
||||||
|
taxon, _ = taxo.Taxon(taxid)
|
||||||
|
for i := 1; i < len(*matches); i++ {
|
||||||
|
taxid, _, _ := MatchDistanceIndex(min_dist, (*matches)[i].OBITagGeomRefIndex())
|
||||||
|
newTaxon, _ := taxo.Taxon(taxid)
|
||||||
|
taxon, err = newTaxon.LCA(taxon)
|
||||||
|
if err != nil {
|
||||||
|
log.Panicf("LCA error: %v", err)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
sequence.SetTaxid(taxon.Taxid())
|
||||||
|
} else {
|
||||||
|
taxon, _ = taxo.Taxon(1)
|
||||||
|
sequence.SetTaxid(1)
|
||||||
|
}
|
||||||
|
|
||||||
|
sequence.SetAttribute("scientific_name", taxon.ScientificName())
|
||||||
|
sequence.SetAttribute("obitag_rank", taxon.Rank())
|
||||||
|
sequence.SetAttribute("obitag_bestid", best_id)
|
||||||
|
sequence.SetAttribute("obitag_bestmatch", best_seq.Id())
|
||||||
|
sequence.SetAttribute("obitag_min_dist", min_dist)
|
||||||
|
sequence.SetAttribute("obitag_coord", query_location)
|
||||||
|
sequence.SetAttribute("obitag_match_count", len(*matches))
|
||||||
|
sequence.SetAttribute("obitag_similarity_method", "geometric")
|
||||||
|
|
||||||
|
return sequence
|
||||||
|
}
|
||||||
|
|
||||||
|
func GeomIdentifySeqWorker(references *obiseq.BioSequenceSlice,
|
||||||
|
taxo *obitax.Taxonomy) obiseq.SeqWorker {
|
||||||
|
|
||||||
|
landmarks := ExtractLandmarkSeqs(references)
|
||||||
|
taxa := ExtractTaxonSet(references, taxo)
|
||||||
|
return func(sequence *obiseq.BioSequence) *obiseq.BioSequence {
|
||||||
|
buffer := make([]uint64, 100)
|
||||||
|
return GeomIdentify(sequence, landmarks, references, taxa, taxo, &buffer)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func CLIGeomAssignTaxonomy(iterator obiiter.IBioSequence,
|
||||||
|
references obiseq.BioSequenceSlice,
|
||||||
|
taxo *obitax.Taxonomy,
|
||||||
|
) obiiter.IBioSequence {
|
||||||
|
|
||||||
|
worker := GeomIdentifySeqWorker(&references, taxo)
|
||||||
|
return iterator.MakeIWorker(worker, obioptions.CLIParallelWorkers(), 0)
|
||||||
|
}
|
@ -1,6 +1,7 @@
|
|||||||
package obitag
|
package obitag
|
||||||
|
|
||||||
import (
|
import (
|
||||||
|
"sort"
|
||||||
"strconv"
|
"strconv"
|
||||||
"strings"
|
"strings"
|
||||||
|
|
||||||
@ -16,6 +17,61 @@ import (
|
|||||||
"git.metabarcoding.org/lecasofts/go/obitools/pkg/obiutils"
|
"git.metabarcoding.org/lecasofts/go/obitools/pkg/obiutils"
|
||||||
)
|
)
|
||||||
|
|
||||||
|
// MatchDistanceIndex returns the taxid, rank, and scientificName based on the given distance and distanceIdx.
|
||||||
|
//
|
||||||
|
// Parameters:
|
||||||
|
// - distance: The distance to match against the keys in distanceIdx.
|
||||||
|
// - distanceIdx: A map containing distances as keys and corresponding values in the format "taxid@rank@scientificName".
|
||||||
|
//
|
||||||
|
// Returns:
|
||||||
|
// - taxid: The taxid associated with the matched distance.
|
||||||
|
// - rank: The rank associated with the matched distance.
|
||||||
|
// - scientificName: The scientific name associated with the matched distance.
|
||||||
|
func MatchDistanceIndex(distance int, distanceIdx map[int]string) (int, string, string) {
|
||||||
|
keys := make([]int, 0, len(distanceIdx))
|
||||||
|
for k := range distanceIdx {
|
||||||
|
keys = append(keys, k)
|
||||||
|
}
|
||||||
|
sort.Ints(keys)
|
||||||
|
|
||||||
|
i := sort.Search(len(keys), func(i int) bool {
|
||||||
|
return distance <= keys[i]
|
||||||
|
})
|
||||||
|
|
||||||
|
var taxid int
|
||||||
|
var rank string
|
||||||
|
var scientificName string
|
||||||
|
|
||||||
|
if i == len(keys) || distance > keys[len(keys)-1] {
|
||||||
|
taxid = 1
|
||||||
|
rank = "no rank"
|
||||||
|
scientificName = "root"
|
||||||
|
} else {
|
||||||
|
parts := strings.Split(distanceIdx[keys[i]], "@")
|
||||||
|
taxid, _ = strconv.Atoi(parts[0])
|
||||||
|
rank = parts[1]
|
||||||
|
scientificName = parts[2]
|
||||||
|
}
|
||||||
|
|
||||||
|
// log.Info("taxid:", taxid, " rank:", rank, " scientificName:", scientificName)
|
||||||
|
|
||||||
|
return taxid, rank, scientificName
|
||||||
|
}
|
||||||
|
|
||||||
|
// FindClosests finds the closest bio sequence from a given sequence and a slice of reference sequences.
|
||||||
|
//
|
||||||
|
// Parameters:
|
||||||
|
// - sequence: the bio sequence to find the closest matches for.
|
||||||
|
// - references: a slice of reference sequences to compare against.
|
||||||
|
// - refcounts: a slice of reference sequence counts.
|
||||||
|
// - runExact: a boolean flag indicating whether to run an exact match.
|
||||||
|
//
|
||||||
|
// Returns:
|
||||||
|
// - bests: a slice of the closest bio sequences.
|
||||||
|
// - maxe: the maximum score.
|
||||||
|
// - bestId: the best ID.
|
||||||
|
// - bestmatch: the best match.
|
||||||
|
// - bestidxs: a slice of the best indexes.
|
||||||
func FindClosests(sequence *obiseq.BioSequence,
|
func FindClosests(sequence *obiseq.BioSequence,
|
||||||
references obiseq.BioSequenceSlice,
|
references obiseq.BioSequenceSlice,
|
||||||
refcounts []*obikmer.Table4mer,
|
refcounts []*obikmer.Table4mer,
|
||||||
@ -94,6 +150,18 @@ func FindClosests(sequence *obiseq.BioSequence,
|
|||||||
return bests, maxe, bestId, bestmatch, bestidxs
|
return bests, maxe, bestId, bestmatch, bestidxs
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Identify makes the taxonomic identification of a BioSequence.
|
||||||
|
//
|
||||||
|
// Parameters:
|
||||||
|
// - sequence: A pointer to a BioSequence to identify.
|
||||||
|
// - references: A BioSequenceSlice.
|
||||||
|
// - refcounts: A slice of pointers to Table4mer.
|
||||||
|
// - taxa: A TaxonSet.
|
||||||
|
// - taxo: A pointer to a Taxonomy.
|
||||||
|
// - runExact: A boolean value indicating whether to run exact matching.
|
||||||
|
//
|
||||||
|
// Returns:
|
||||||
|
// - A pointer to a BioSequence.
|
||||||
func Identify(sequence *obiseq.BioSequence,
|
func Identify(sequence *obiseq.BioSequence,
|
||||||
references obiseq.BioSequenceSlice,
|
references obiseq.BioSequenceSlice,
|
||||||
refcounts []*obikmer.Table4mer,
|
refcounts []*obikmer.Table4mer,
|
||||||
@ -171,24 +239,19 @@ func Identify(sequence *obiseq.BioSequence,
|
|||||||
log.Debugln(sequence.Id(), "Best matches:", len(bests), "New index:", newidx)
|
log.Debugln(sequence.Id(), "Best matches:", len(bests), "New index:", newidx)
|
||||||
|
|
||||||
sequence.SetTaxid(taxon.Taxid())
|
sequence.SetTaxid(taxon.Taxid())
|
||||||
sequence.SetAttribute("scientific_name", taxon.ScientificName())
|
|
||||||
sequence.SetAttribute("obitag_rank", taxon.Rank())
|
|
||||||
sequence.SetAttribute("obitag_bestid", identity)
|
|
||||||
sequence.SetAttribute("obitag_difference", differences)
|
|
||||||
sequence.SetAttribute("obitag_bestmatch", bestmatch)
|
|
||||||
sequence.SetAttribute("obitag_match_count", len(bests))
|
|
||||||
|
|
||||||
} else {
|
} else {
|
||||||
taxon, _ = taxo.Taxon(1)
|
taxon, _ = taxo.Taxon(1)
|
||||||
sequence.SetTaxid(1)
|
sequence.SetTaxid(1)
|
||||||
sequence.SetAttribute("scientific_name", taxon.ScientificName())
|
|
||||||
sequence.SetAttribute("obitag_rank", taxon.Rank())
|
|
||||||
sequence.SetAttribute("obitag_bestid", identity)
|
|
||||||
sequence.SetAttribute("obitag_difference", differences)
|
|
||||||
sequence.SetAttribute("obitag_bestmatch", bestmatch)
|
|
||||||
sequence.SetAttribute("obitag_match_count", len(bests))
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
sequence.SetAttribute("scientific_name", taxon.ScientificName())
|
||||||
|
sequence.SetAttribute("obitag_rank", taxon.Rank())
|
||||||
|
sequence.SetAttribute("obitag_bestid", identity)
|
||||||
|
sequence.SetAttribute("obitag_bestmatch", bestmatch)
|
||||||
|
sequence.SetAttribute("obitag_match_count", len(bests))
|
||||||
|
sequence.SetAttribute("obitag_similarity_method", "lcs")
|
||||||
|
|
||||||
return sequence
|
return sequence
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -15,6 +15,7 @@ import (
|
|||||||
var _RefDB = ""
|
var _RefDB = ""
|
||||||
var _SaveRefDB = ""
|
var _SaveRefDB = ""
|
||||||
var _RunExact = false
|
var _RunExact = false
|
||||||
|
var _GeomSim = false
|
||||||
|
|
||||||
func TagOptionSet(options *getoptions.GetOpt) {
|
func TagOptionSet(options *getoptions.GetOpt) {
|
||||||
options.StringVar(&_RefDB, "reference-db", _RefDB,
|
options.StringVar(&_RefDB, "reference-db", _RefDB,
|
||||||
@ -27,6 +28,10 @@ func TagOptionSet(options *getoptions.GetOpt) {
|
|||||||
options.ArgName("FILENAME"),
|
options.ArgName("FILENAME"),
|
||||||
options.Description("The name of a file where to save the reference DB with its indices"))
|
options.Description("The name of a file where to save the reference DB with its indices"))
|
||||||
|
|
||||||
|
options.BoolVar(&_GeomSim, "geometric", _GeomSim,
|
||||||
|
options.Alias("G"),
|
||||||
|
options.Description("Activate the experimental geometric similarity heuristic"))
|
||||||
|
|
||||||
// options.BoolVar(&_RunExact, "exact", _RunExact,
|
// options.BoolVar(&_RunExact, "exact", _RunExact,
|
||||||
// options.Alias("E"),
|
// options.Alias("E"),
|
||||||
// options.Description("Unactivate the heuristic limatitating the sequence comparisons"))
|
// options.Description("Unactivate the heuristic limatitating the sequence comparisons"))
|
||||||
@ -55,6 +60,10 @@ func CLIRefDB() obiseq.BioSequenceSlice {
|
|||||||
return refdb.Load()
|
return refdb.Load()
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func CLIGeometricMode() bool {
|
||||||
|
return _GeomSim
|
||||||
|
}
|
||||||
|
|
||||||
func CLIShouldISaveRefDB() bool {
|
func CLIShouldISaveRefDB() bool {
|
||||||
return _SaveRefDB != ""
|
return _SaveRefDB != ""
|
||||||
}
|
}
|
||||||
|
@ -1,7 +1,18 @@
|
|||||||
package obiutils
|
package obiutils
|
||||||
|
|
||||||
// Matrix is a generic type representing a matrix.
|
type Integer interface {
|
||||||
type Matrix[T any] [][]T
|
~int | ~int8 | ~int16 | ~int32 | ~int64
|
||||||
|
}
|
||||||
|
|
||||||
|
type Float interface {
|
||||||
|
~float32 | ~float64
|
||||||
|
}
|
||||||
|
type Numeric interface {
|
||||||
|
Integer | Float
|
||||||
|
}
|
||||||
|
|
||||||
|
type Vector[T any] []T
|
||||||
|
type Matrix[T any] []Vector[T]
|
||||||
|
|
||||||
// Make2DArray generates a 2D array of type T with the specified number of rows and columns.
|
// Make2DArray generates a 2D array of type T with the specified number of rows and columns.
|
||||||
//
|
//
|
||||||
@ -22,15 +33,20 @@ func Make2DArray[T any](rows, cols int) Matrix[T] {
|
|||||||
return matrix
|
return matrix
|
||||||
}
|
}
|
||||||
|
|
||||||
// Init initializes the Matrix with the given value.
|
func Make2DNumericArray[T Numeric](rows, cols int, zeroed bool) Matrix[T] {
|
||||||
//
|
matrix := make(Matrix[T], rows)
|
||||||
// value: the value to initialize the Matrix elements with.
|
data := make([]T, cols*rows)
|
||||||
func (matrix *Matrix[T]) Init(value T) {
|
|
||||||
data := (*matrix)[0]
|
if zeroed {
|
||||||
data = data[0:cap(data)]
|
for i := range data {
|
||||||
for i := range data {
|
data[i] = 0
|
||||||
data[i] = value
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
for i := 0; i < rows; i++ {
|
||||||
|
matrix[i] = data[i*cols : (i+1)*cols]
|
||||||
|
}
|
||||||
|
return matrix
|
||||||
}
|
}
|
||||||
|
|
||||||
// Row returns the i-th row of the matrix.
|
// Row returns the i-th row of the matrix.
|
||||||
@ -50,6 +66,19 @@ func (matrix *Matrix[T]) Column(i int) []T {
|
|||||||
return r
|
return r
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Rows returns the specified rows of the matrix.
|
||||||
|
//
|
||||||
|
// The function takes one or more integer arguments representing the indices of the rows to be returned.
|
||||||
|
// It returns a new matrix containing the specified rows.
|
||||||
|
func (matrix *Matrix[T]) Rows(i ...int) Matrix[T] {
|
||||||
|
res := make([]Vector[T], len(i))
|
||||||
|
|
||||||
|
for j, idx := range i {
|
||||||
|
res[j] = (*matrix)[idx]
|
||||||
|
}
|
||||||
|
return res
|
||||||
|
}
|
||||||
|
|
||||||
// Dim returns the dimensions of the Matrix.
|
// Dim returns the dimensions of the Matrix.
|
||||||
//
|
//
|
||||||
// It takes no parameters.
|
// It takes no parameters.
|
||||||
|
@ -1,11 +1,17 @@
|
|||||||
package obiutils
|
package obiutils
|
||||||
|
|
||||||
|
// InPlaceToLower converts all uppercase letters in the input byte slice to lowercase in place.
|
||||||
|
//
|
||||||
|
// It takes a single parameter:
|
||||||
|
// - data: a byte slice representing the input data
|
||||||
|
//
|
||||||
|
// It returns the modified byte slice.
|
||||||
func InPlaceToLower(data []byte) []byte {
|
func InPlaceToLower(data []byte) []byte {
|
||||||
for i,l := range data {
|
for i, l := range data {
|
||||||
if l >= 'A' && l <='Z' {
|
if l >= 'A' && l <= 'Z' {
|
||||||
data[i]|=32
|
data[i] |= 32
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
return data
|
return data
|
||||||
}
|
}
|
||||||
|
54
pkg/obiutils/cast_interface.go
Normal file
54
pkg/obiutils/cast_interface.go
Normal file
@ -0,0 +1,54 @@
|
|||||||
|
package obiutils
|
||||||
|
|
||||||
|
// CastableToInt checks if the given input can be casted to an integer.
|
||||||
|
//
|
||||||
|
// i: the value to check for castability.
|
||||||
|
// bool: true if the value can be casted to an integer, false otherwise.
|
||||||
|
func CastableToInt(i interface{}) bool {
|
||||||
|
switch i.(type) {
|
||||||
|
case int,
|
||||||
|
int8, int16, int32, int64,
|
||||||
|
float32, float64,
|
||||||
|
uint8, uint16, uint32, uint64:
|
||||||
|
return true
|
||||||
|
default:
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// InterfaceToBool converts an interface value to a boolean.
|
||||||
|
//
|
||||||
|
// It takes an interface{} as a parameter and returns a boolean value and an error.
|
||||||
|
func InterfaceToBool(i interface{}) (val bool, err error) {
|
||||||
|
|
||||||
|
err = nil
|
||||||
|
val = false
|
||||||
|
|
||||||
|
switch t := i.(type) {
|
||||||
|
case int:
|
||||||
|
val = t != 0
|
||||||
|
case int8:
|
||||||
|
val = t != 0 // standardizes across systems
|
||||||
|
case int16:
|
||||||
|
val = t != 0 // standardizes across systems
|
||||||
|
case int32:
|
||||||
|
val = t != 0 // standardizes across systems
|
||||||
|
case int64:
|
||||||
|
val = t != 0 // standardizes across systems
|
||||||
|
case float32:
|
||||||
|
val = t != 0 // standardizes across systems
|
||||||
|
case float64:
|
||||||
|
val = t != 0 // standardizes across systems
|
||||||
|
case uint8:
|
||||||
|
val = t != 0 // standardizes across systems
|
||||||
|
case uint16:
|
||||||
|
val = t != 0 // standardizes across systems
|
||||||
|
case uint32:
|
||||||
|
val = t != 0 // standardizes across systems
|
||||||
|
case uint64:
|
||||||
|
val = t != 0 // standardizes across systems
|
||||||
|
default:
|
||||||
|
err = &NotABoolean{"value attribute cannot be casted to a boolean"}
|
||||||
|
}
|
||||||
|
return
|
||||||
|
}
|
@ -196,6 +196,69 @@ func InterfaceToFloat64Map(i interface{}) (val map[string]float64, err error) {
|
|||||||
return
|
return
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// InterfaceToFloat64Slice converts an interface{} to a []float64 slice.
|
||||||
|
//
|
||||||
|
// It takes an interface{} parameter and returns a slice of float64 values and an error.
|
||||||
|
func InterfaceToFloat64Slice(i interface{}) ([]float64, error) {
|
||||||
|
switch i := i.(type) {
|
||||||
|
case []float64:
|
||||||
|
return i, nil
|
||||||
|
case []interface{}:
|
||||||
|
val := make([]float64, len(i))
|
||||||
|
for k, v := range i {
|
||||||
|
if x, err := InterfaceToFloat64(v); err != nil {
|
||||||
|
return nil, err
|
||||||
|
} else {
|
||||||
|
val[k] = x
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return val, nil
|
||||||
|
case []int:
|
||||||
|
val := make([]float64, len(i))
|
||||||
|
for k, v := range i {
|
||||||
|
val[k] = float64(v)
|
||||||
|
}
|
||||||
|
return val, nil
|
||||||
|
default:
|
||||||
|
return nil, &NotAMapFloat64{"value attribute cannot be casted to a []float64"}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// InterfaceToIntSlice converts an interface{} to a []int slice.
|
||||||
|
//
|
||||||
|
// It takes an interface{} parameter and returns a slice of int values and an error.
|
||||||
|
func InterfaceToIntSlice(i interface{}) ([]int, error) {
|
||||||
|
|
||||||
|
switch i := i.(type) {
|
||||||
|
case []int:
|
||||||
|
return i, nil
|
||||||
|
case []interface{}:
|
||||||
|
val := make([]int, len(i))
|
||||||
|
for k, v := range i {
|
||||||
|
if x, err := InterfaceToInt(v); err != nil {
|
||||||
|
return nil, err
|
||||||
|
} else {
|
||||||
|
val[k] = x
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return val, nil
|
||||||
|
case []float64:
|
||||||
|
val := make([]int, len(i))
|
||||||
|
for k, v := range i {
|
||||||
|
val[k] = int(v + 0.5)
|
||||||
|
}
|
||||||
|
return val, nil
|
||||||
|
case Vector[float64]:
|
||||||
|
val := make([]int, len(i))
|
||||||
|
for k, v := range i {
|
||||||
|
val[k] = int(v + 0.5)
|
||||||
|
}
|
||||||
|
return val, nil
|
||||||
|
default:
|
||||||
|
return nil, &NotAMapInt{"value attribute cannot be casted to a []int"}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
// NotABoolean defines a new type of Error : "NotABoolean"
|
// NotABoolean defines a new type of Error : "NotABoolean"
|
||||||
type NotABoolean struct {
|
type NotABoolean struct {
|
||||||
message string
|
message string
|
||||||
@ -208,56 +271,6 @@ func (m *NotABoolean) Error() string {
|
|||||||
return m.message
|
return m.message
|
||||||
}
|
}
|
||||||
|
|
||||||
// It converts an interface{} to a bool, and returns an error if the interface{} cannot be converted
|
|
||||||
// to a bool
|
|
||||||
func InterfaceToBool(i interface{}) (val bool, err error) {
|
|
||||||
|
|
||||||
err = nil
|
|
||||||
val = false
|
|
||||||
|
|
||||||
switch t := i.(type) {
|
|
||||||
case int:
|
|
||||||
val = t != 0
|
|
||||||
case int8:
|
|
||||||
val = t != 0 // standardizes across systems
|
|
||||||
case int16:
|
|
||||||
val = t != 0 // standardizes across systems
|
|
||||||
case int32:
|
|
||||||
val = t != 0 // standardizes across systems
|
|
||||||
case int64:
|
|
||||||
val = t != 0 // standardizes across systems
|
|
||||||
case float32:
|
|
||||||
val = t != 0 // standardizes across systems
|
|
||||||
case float64:
|
|
||||||
val = t != 0 // standardizes across systems
|
|
||||||
case uint8:
|
|
||||||
val = t != 0 // standardizes across systems
|
|
||||||
case uint16:
|
|
||||||
val = t != 0 // standardizes across systems
|
|
||||||
case uint32:
|
|
||||||
val = t != 0 // standardizes across systems
|
|
||||||
case uint64:
|
|
||||||
val = t != 0 // standardizes across systems
|
|
||||||
default:
|
|
||||||
err = &NotABoolean{"value attribute cannot be casted to a boolean"}
|
|
||||||
}
|
|
||||||
return
|
|
||||||
}
|
|
||||||
|
|
||||||
// If the interface{} can be cast to an int, return true.
|
|
||||||
func CastableToInt(i interface{}) bool {
|
|
||||||
|
|
||||||
switch i.(type) {
|
|
||||||
case int,
|
|
||||||
int8, int16, int32, int64,
|
|
||||||
float32, float64,
|
|
||||||
uint8, uint16, uint32, uint64:
|
|
||||||
return true
|
|
||||||
default:
|
|
||||||
return false
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// > It copies the contents of the `src` map into the `dest` map, but if the value is a map, slice, or
|
// > It copies the contents of the `src` map into the `dest` map, but if the value is a map, slice, or
|
||||||
// array, it makes a deep copy of it
|
// array, it makes a deep copy of it
|
||||||
func MustFillMap(dest, src map[string]interface{}) {
|
func MustFillMap(dest, src map[string]interface{}) {
|
||||||
@ -270,8 +283,15 @@ func MustFillMap(dest, src map[string]interface{}) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// ReadLines reads the lines from a file specified by the given path.
|
||||||
|
//
|
||||||
// Read a whole file into the memory and store it as array of lines
|
// Read a whole file into the memory and store it as array of lines
|
||||||
// It reads a file line by line, and returns a slice of strings, one for each line
|
// It reads a file line by line, and returns a slice of strings, one for each line
|
||||||
|
//
|
||||||
|
// It takes a single parameter:
|
||||||
|
// - path: a string representing the path of the file to read.
|
||||||
|
//
|
||||||
|
// It returns a slice of strings containing the lines read from the file, and an error if any occurred.
|
||||||
func ReadLines(path string) (lines []string, err error) {
|
func ReadLines(path string) (lines []string, err error) {
|
||||||
var (
|
var (
|
||||||
file *os.File
|
file *os.File
|
||||||
@ -301,6 +321,14 @@ func ReadLines(path string) (lines []string, err error) {
|
|||||||
return
|
return
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// AtomicCounter creates and returns a function that generates a unique integer value each time it is called.
|
||||||
|
//
|
||||||
|
// The function takes an optional initial value as a parameter. If an initial value is provided, the generated
|
||||||
|
// integers will start from that value. If no initial value is provided, the generated integers will start from 0.
|
||||||
|
//
|
||||||
|
// The function is thread safe.
|
||||||
|
//
|
||||||
|
// The function returns a closure that can be called to retrieve the next integer in the sequence.
|
||||||
func AtomicCounter(initial ...int) func() int {
|
func AtomicCounter(initial ...int) func() int {
|
||||||
counterMutex := sync.Mutex{}
|
counterMutex := sync.Mutex{}
|
||||||
counter := 0
|
counter := 0
|
||||||
@ -320,12 +348,16 @@ func AtomicCounter(initial ...int) func() int {
|
|||||||
return nextCounter
|
return nextCounter
|
||||||
}
|
}
|
||||||
|
|
||||||
// Marshal is a UTF-8 friendly marshaler. Go's json.Marshal is not UTF-8
|
// JsonMarshal marshals an interface into JSON format.
|
||||||
|
//
|
||||||
|
// JsonMarshal is a UTF-8 friendly marshaler. Go's json.Marshal is not UTF-8
|
||||||
// friendly because it replaces the valid UTF-8 and JSON characters "&". "<",
|
// friendly because it replaces the valid UTF-8 and JSON characters "&". "<",
|
||||||
// ">" with the "slash u" unicode escaped forms (e.g. \u0026). It preemptively
|
// ">" with the "slash u" unicode escaped forms (e.g. \u0026). It preemptively
|
||||||
// escapes for HTML friendliness. Where text may include any of these
|
// escapes for HTML friendliness. Where text may include any of these
|
||||||
// characters, json.Marshal should not be used. Playground of Go breaking a
|
// characters, json.Marshal should not be used. Playground of Go breaking a
|
||||||
// title: https://play.golang.org/p/o2hiX0c62oN
|
// title: https://play.golang.org/p/o2hiX0c62oN
|
||||||
|
//
|
||||||
|
// It takes an interface as a parameter and returns a byte slice and an error.
|
||||||
func JsonMarshal(i interface{}) ([]byte, error) {
|
func JsonMarshal(i interface{}) ([]byte, error) {
|
||||||
buffer := &bytes.Buffer{}
|
buffer := &bytes.Buffer{}
|
||||||
encoder := json.NewEncoder(buffer)
|
encoder := json.NewEncoder(buffer)
|
||||||
@ -334,22 +366,45 @@ func JsonMarshal(i interface{}) ([]byte, error) {
|
|||||||
return bytes.TrimRight(buffer.Bytes(), "\n"), err
|
return bytes.TrimRight(buffer.Bytes(), "\n"), err
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// IsAMap checks if the given value is a map.
|
||||||
|
//
|
||||||
|
// value: the value to be checked.
|
||||||
|
// returns: a boolean indicating if the value is a map.
|
||||||
func IsAMap(value interface{}) bool {
|
func IsAMap(value interface{}) bool {
|
||||||
return reflect.TypeOf(value).Kind() == reflect.Map
|
return reflect.TypeOf(value).Kind() == reflect.Map
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// IsAnArray checks if the given value is an array.
|
||||||
|
//
|
||||||
|
// value: The value to be checked.
|
||||||
|
// Returns: true if the value is an array, false otherwise.
|
||||||
func IsAnArray(value interface{}) bool {
|
func IsAnArray(value interface{}) bool {
|
||||||
return reflect.TypeOf(value).Kind() == reflect.Array
|
return reflect.TypeOf(value).Kind() == reflect.Array
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// IsASlice determines if the given value is a slice.
|
||||||
|
//
|
||||||
|
// value: the value to check.
|
||||||
|
// bool: true if the value is a slice, false otherwise.
|
||||||
func IsASlice(value interface{}) bool {
|
func IsASlice(value interface{}) bool {
|
||||||
return reflect.TypeOf(value).Kind() == reflect.Slice
|
return reflect.TypeOf(value).Kind() == reflect.Slice
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// HasLength checks if the given value has a length.
|
||||||
|
//
|
||||||
|
// value: The value to be checked.
|
||||||
|
// bool: Returns true if the value has a length, false otherwise.
|
||||||
func HasLength(value interface{}) bool {
|
func HasLength(value interface{}) bool {
|
||||||
_, ok := value.(interface{ Len() int })
|
_, ok := value.(interface{ Len() int })
|
||||||
return IsAMap(value) || IsAnArray(value) || IsASlice(value) || ok
|
return IsAMap(value) || IsAnArray(value) || IsASlice(value) || ok
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Len returns the length of the given value.
|
||||||
|
//
|
||||||
|
// It accepts a single parameter:
|
||||||
|
// - value: an interface{} that represents the value whose length is to be determined.
|
||||||
|
//
|
||||||
|
// It returns an int, which represents the length of the value.
|
||||||
func Len(value interface{}) int {
|
func Len(value interface{}) int {
|
||||||
l := 1
|
l := 1
|
||||||
|
|
||||||
|
@ -65,6 +65,10 @@ func (r Ranker[_]) Len() int { return len(r.r) }
|
|||||||
func (r Ranker[T]) Less(i, j int) bool { return r.x.Less(r.r[i], r.r[j]) }
|
func (r Ranker[T]) Less(i, j int) bool { return r.x.Less(r.r[i], r.r[j]) }
|
||||||
func (r Ranker[_]) Swap(i, j int) { r.r[i], r.r[j] = r.r[j], r.r[i] }
|
func (r Ranker[_]) Swap(i, j int) { r.r[i], r.r[j] = r.r[j], r.r[i] }
|
||||||
|
|
||||||
|
// Order sorts the given data using the provided sort.Interface and returns the sorted indices.
|
||||||
|
//
|
||||||
|
// data: The data to be sorted.
|
||||||
|
// Returns: A slice of integers representing the sorted indices.
|
||||||
func Order[T sort.Interface](data T) []int {
|
func Order[T sort.Interface](data T) []int {
|
||||||
ldata := data.Len()
|
ldata := data.Len()
|
||||||
if ldata == 0 {
|
if ldata == 0 {
|
||||||
|
@ -1,5 +1,13 @@
|
|||||||
package obiutils
|
package obiutils
|
||||||
|
|
||||||
|
// Contains checks if the given element is present in the given array.
|
||||||
|
//
|
||||||
|
// Parameters:
|
||||||
|
// - arr: The array to search in.
|
||||||
|
// - x: The element to search for.
|
||||||
|
//
|
||||||
|
// Return type:
|
||||||
|
// - bool: Returns true if the element is found, false otherwise.
|
||||||
func Contains[T comparable](arr []T, x T) bool {
|
func Contains[T comparable](arr []T, x T) bool {
|
||||||
for _, v := range arr {
|
for _, v := range arr {
|
||||||
if v == x {
|
if v == x {
|
||||||
@ -9,6 +17,14 @@ func Contains[T comparable](arr []T, x T) bool {
|
|||||||
return false
|
return false
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// LookFor searches for the first occurrence of a given element in an array and returns its index.
|
||||||
|
//
|
||||||
|
// Parameters:
|
||||||
|
// - arr: the array to be searched
|
||||||
|
// - x: the element to search for
|
||||||
|
//
|
||||||
|
// Return:
|
||||||
|
// - int: the index of the first occurrence of the element in the array, or -1 if not found
|
||||||
func LookFor[T comparable](arr []T, x T) int {
|
func LookFor[T comparable](arr []T, x T) int {
|
||||||
for i, v := range arr {
|
for i, v := range arr {
|
||||||
if v == x {
|
if v == x {
|
||||||
@ -18,19 +34,36 @@ func LookFor[T comparable](arr []T, x T) int {
|
|||||||
return -1
|
return -1
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// RemoveIndex removes an element at a specified index from a slice.
|
||||||
|
//
|
||||||
|
// Parameters:
|
||||||
|
// - s: The slice from which the element will be removed.
|
||||||
|
// - index: The index of the element to be removed.
|
||||||
|
//
|
||||||
|
// Returns:
|
||||||
|
// A new slice with the element removed.
|
||||||
func RemoveIndex[T comparable](s []T, index int) []T {
|
func RemoveIndex[T comparable](s []T, index int) []T {
|
||||||
return append(s[:index], s[index+1:]...)
|
return append(s[:index], s[index+1:]...)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Reverse reverses the elements of a slice.
|
||||||
|
//
|
||||||
|
// The function takes a slice `s` and a boolean `inplace` parameter. If `inplace`
|
||||||
|
// is `true`, the function modifies the input slice directly. If `inplace` is
|
||||||
|
// `false`, the function creates a new slice `c` and copies the elements of `s`
|
||||||
|
// into `c`. The function then reverses the elements of `s` in-place or `c`
|
||||||
|
// depending on the `inplace` parameter.
|
||||||
|
//
|
||||||
|
// The function returns the reversed slice.
|
||||||
func Reverse[S ~[]E, E any](s S, inplace bool) S {
|
func Reverse[S ~[]E, E any](s S, inplace bool) S {
|
||||||
if !inplace {
|
if !inplace {
|
||||||
c := make([]E,len(s))
|
c := make([]E, len(s))
|
||||||
copy(c,s)
|
copy(c, s)
|
||||||
s = c
|
s = c
|
||||||
}
|
}
|
||||||
for i, j := 0, len(s)-1; i < j; i, j = i+1, j-1 {
|
for i, j := 0, len(s)-1; i < j; i, j = i+1, j-1 {
|
||||||
s[i], s[j] = s[j], s[i]
|
s[i], s[j] = s[j], s[i]
|
||||||
}
|
}
|
||||||
|
|
||||||
return s
|
return s
|
||||||
}
|
}
|
||||||
|
Reference in New Issue
Block a user