A first prototype for the space of sequences

Former-commit-id: 07dc6ef044b5b6a6fb45dc2acb01dffe71a96195
This commit is contained in:
2023-08-27 14:58:55 +02:00
parent cbd42d5b30
commit 9bf006af93
17 changed files with 969 additions and 117 deletions

View File

@@ -10,6 +10,9 @@ import (
"git.metabarcoding.org/lecasofts/go/obitools/pkg/obioptions"
"git.metabarcoding.org/lecasofts/go/obitools/pkg/obiseq"
"git.metabarcoding.org/lecasofts/go/obitools/pkg/obistats"
"git.metabarcoding.org/lecasofts/go/obitools/pkg/obitax"
"git.metabarcoding.org/lecasofts/go/obitools/pkg/obitools/obifind"
"git.metabarcoding.org/lecasofts/go/obitools/pkg/obitools/obirefidx"
"git.metabarcoding.org/lecasofts/go/obitools/pkg/obiutils"
"github.com/schollz/progressbar/v3"
log "github.com/sirupsen/logrus"
@@ -103,14 +106,14 @@ func CLISelectLandmarkSequences(iterator obiiter.IBioSequence) obiiter.IBioSeque
library := iterator.Load()
library_size := len(library)
n_landmark := NCenter()
n_landmark := CLINCenter()
landmark_idx := obistats.SampleIntWithoutReplacement(n_landmark, library_size)
log.Infof("Library contains %d sequence", len(library))
var seqworld obiutils.Matrix[float64]
for loop := 0; loop < 5; loop++ {
for loop := 0; loop < 2; loop++ {
sort.IntSlice(landmark_idx).Sort()
log.Debugf("Selected indices : %v", landmark_idx)
@@ -154,14 +157,52 @@ func CLISelectLandmarkSequences(iterator obiiter.IBioSequence) obiiter.IBioSeque
}
classes := obistats.AssignToClass(&seqworld, &initialCenters)
for i, seq := range library {
seq.SetAttribute("landmark_coord", seqworld[i])
ic, _ := obiutils.InterfaceToIntSlice(seqworld[i])
seq.SetCoordinate(ic)
seq.SetAttribute("landmark_class", classes[i])
if i, ok := seq_landmark[i]; ok {
seq.SetAttribute("landmark_id", i)
}
}
if obifind.CLIHasSelectedTaxonomy() {
taxo, err := obifind.CLILoadSelectedTaxonomy()
if err != nil {
log.Fatal(err)
}
taxa := make(obitax.TaxonSet, len(library))
for i, seq := range library {
taxa[i], err = taxo.Taxon(seq.Taxid())
if err != nil {
log.Fatal(err)
}
}
pbopt := make([]progressbar.Option, 0, 5)
pbopt = append(pbopt,
progressbar.OptionSetWriter(os.Stderr),
progressbar.OptionSetWidth(15),
progressbar.OptionShowCount(),
progressbar.OptionShowIts(),
progressbar.OptionSetDescription("[Sequence Indexing]"),
)
bar := progressbar.NewOptions(len(library), pbopt...)
for i, seq := range library {
idx := obirefidx.GeomIndexSesquence(i, library, &taxa, taxo)
seq.SetOBITagGeomRefIndex(idx)
if i%10 == 0 {
bar.Add(10)
}
}
}
return obiiter.IBatchOver(library, obioptions.CLIBatchSize())
}

View File

@@ -2,28 +2,37 @@ package obilandmark
import (
"git.metabarcoding.org/lecasofts/go/obitools/pkg/obitools/obiconvert"
"git.metabarcoding.org/lecasofts/go/obitools/pkg/obitools/obifind"
"github.com/DavidGamba/go-getoptions"
)
var _nCenter = 200
// ObilandmarkOptionSet sets the options for Obilandmark.
// LandmarkOptionSet sets the options for Obilandmark.
//
// options: a pointer to the getoptions.GetOpt struct.
// Return type: none.
func ObilandmarkOptionSet(options *getoptions.GetOpt) {
func LandmarkOptionSet(options *getoptions.GetOpt) {
options.IntVar(&_nCenter, "center", _nCenter,
options.Alias("n"),
options.Description("Maximum numbers of differences between two variant sequences (default: %d)."))
options.Description("Number of landmark sequences to be selected."))
}
// OptionSet is a function that sets the options for the GetOpt struct.
//
// It takes a pointer to a GetOpt struct as its parameter and does not return anything.
func OptionSet(options *getoptions.GetOpt) {
obiconvert.InputOptionSet(options)
obiconvert.OutputOptionSet(options)
ObilandmarkOptionSet(options)
obifind.LoadTaxonomyOptionSet(options, false, false)
LandmarkOptionSet(options)
}
func NCenter() int {
// CLINCenter returns desired number of centers as specified by user.
//
// No parameters.
// Returns an integer value.
func CLINCenter() int {
return _nCenter
}