mirror of
https://github.com/metabarcoding/obitools4.git
synced 2025-12-10 09:40:27 +00:00
Add some code refactoring from the blackboard branch
This commit is contained in:
@@ -57,7 +57,7 @@ func buildSamples(dataset obiseq.BioSequenceSlice,
|
||||
return samples
|
||||
}
|
||||
|
||||
func annotateOBIClean(dataset obiseq.BioSequenceSlice,
|
||||
func annotateOBIClean(source string, dataset obiseq.BioSequenceSlice,
|
||||
sample map[string]*([]*seqPCR),
|
||||
tag, NAValue string) obiiter.IBioSequence {
|
||||
batchsize := 1000
|
||||
@@ -91,7 +91,7 @@ func annotateOBIClean(dataset obiseq.BioSequenceSlice,
|
||||
return data, nil
|
||||
}
|
||||
|
||||
iter := obiiter.IBatchOver(dataset, batchsize)
|
||||
iter := obiiter.IBatchOver(source, dataset, batchsize)
|
||||
riter := iter.MakeISliceWorker(annot, false)
|
||||
|
||||
return riter
|
||||
@@ -288,7 +288,7 @@ func Weight(sequence *obiseq.BioSequence) map[string]int {
|
||||
|
||||
func CLIOBIClean(itertator obiiter.IBioSequence) obiiter.IBioSequence {
|
||||
|
||||
db := itertator.Load()
|
||||
source, db := itertator.Load()
|
||||
|
||||
log.Infof("Sequence dataset of %d sequeences loaded\n", len(db))
|
||||
|
||||
@@ -365,7 +365,7 @@ func CLIOBIClean(itertator obiiter.IBioSequence) obiiter.IBioSequence {
|
||||
EmpiricalDistCsv(RatioTableFilename(), all_ratio)
|
||||
}
|
||||
|
||||
iter := annotateOBIClean(db, samples, SampleAttribute(), "NA")
|
||||
iter := annotateOBIClean(source, db, samples, SampleAttribute(), "NA")
|
||||
|
||||
if OnlyHead() {
|
||||
iter = iter.FilterOn(IsHead, 1000)
|
||||
|
||||
@@ -274,11 +274,11 @@ func ICleanDB(itertator obiiter.IBioSequence) obiiter.IBioSequence {
|
||||
// obioptions.CLIParallelWorkers(),
|
||||
// )
|
||||
|
||||
references := annotated.Load()
|
||||
source, references := annotated.Load()
|
||||
|
||||
mannwithney := MakeSequenceFamilyGenusWorker(references)
|
||||
|
||||
partof := obiiter.IBatchOver(references,
|
||||
partof := obiiter.IBatchOver(source, references,
|
||||
obioptions.CLIBatchSize())
|
||||
|
||||
// genera_iterator, err := obichunk.ISequenceChunk(
|
||||
|
||||
@@ -46,7 +46,12 @@ func BuildConsensus(seqs obiseq.BioSequenceSlice,
|
||||
|
||||
if err == nil {
|
||||
defer fasta.Close()
|
||||
fasta.Write(obiformats.FormatFastaBatch(obiiter.MakeBioSequenceBatch(0, seqs), obiformats.FormatFastSeqJsonHeader, false))
|
||||
fasta.Write(obiformats.FormatFastaBatch(obiiter.MakeBioSequenceBatch(
|
||||
fmt.Sprintf("%s_consensus", consensus_id),
|
||||
0,
|
||||
seqs,
|
||||
),
|
||||
obiformats.FormatFastSeqJsonHeader, false).Bytes())
|
||||
fasta.Close()
|
||||
}
|
||||
|
||||
@@ -333,7 +338,7 @@ func CLIOBIMinion(itertator obiiter.IBioSequence) obiiter.IBioSequence {
|
||||
dirname := CLIGraphFilesDirectory()
|
||||
newIter := obiiter.MakeIBioSequence()
|
||||
|
||||
db := itertator.Load()
|
||||
source, db := itertator.Load()
|
||||
|
||||
log.Infof("Sequence dataset of %d sequeences loaded\n", len(db))
|
||||
|
||||
@@ -394,7 +399,7 @@ func CLIOBIMinion(itertator obiiter.IBioSequence) obiiter.IBioSequence {
|
||||
CLISampleAttribute(),
|
||||
CLIKmerSize())
|
||||
|
||||
newIter.Push(obiiter.MakeBioSequenceBatch(sample_order, denoised))
|
||||
newIter.Push(obiiter.MakeBioSequenceBatch(source, sample_order, denoised))
|
||||
|
||||
sample_order++
|
||||
}
|
||||
|
||||
@@ -14,7 +14,7 @@ import (
|
||||
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obioptions"
|
||||
)
|
||||
|
||||
func _ExpandListOfFiles(check_ext bool, filenames ...string) ([]string, error) {
|
||||
func ExpandListOfFiles(check_ext bool, filenames ...string) ([]string, error) {
|
||||
var err error
|
||||
list_of_files := orderedset.NewOrderedSet()
|
||||
for _, fn := range filenames {
|
||||
@@ -39,7 +39,7 @@ func _ExpandListOfFiles(check_ext bool, filenames ...string) ([]string, error) {
|
||||
|
||||
if info.IsDir() {
|
||||
if path != fn {
|
||||
subdir, e := _ExpandListOfFiles(true, path)
|
||||
subdir, e := ExpandListOfFiles(true, path)
|
||||
if e != nil {
|
||||
return e
|
||||
}
|
||||
@@ -113,19 +113,26 @@ func CLIReadBioSequences(filenames ...string) (obiiter.IBioSequence, error) {
|
||||
log.Printf("Reading sequences from stdin in %s\n", CLIInputFormat())
|
||||
opts = append(opts, obiformats.OptionsSource("stdin"))
|
||||
|
||||
var err error
|
||||
|
||||
switch CLIInputFormat() {
|
||||
case "ecopcr":
|
||||
iterator = obiformats.ReadEcoPCR(os.Stdin, opts...)
|
||||
iterator, err = obiformats.ReadEcoPCR(os.Stdin, opts...)
|
||||
case "embl":
|
||||
iterator = obiformats.ReadEMBL(os.Stdin, opts...)
|
||||
iterator, err = obiformats.ReadEMBL(os.Stdin, opts...)
|
||||
case "genbank":
|
||||
iterator = obiformats.ReadGenbank(os.Stdin, opts...)
|
||||
iterator, err = obiformats.ReadGenbank(os.Stdin, opts...)
|
||||
default:
|
||||
iterator = obiformats.ReadFastSeqFromStdin(opts...)
|
||||
}
|
||||
|
||||
if err != nil {
|
||||
return obiiter.NilIBioSequence, err
|
||||
}
|
||||
|
||||
} else {
|
||||
|
||||
list_of_files, err := _ExpandListOfFiles(false, filenames...)
|
||||
list_of_files, err := ExpandListOfFiles(false, filenames...)
|
||||
if err != nil {
|
||||
return obiiter.NilIBioSequence, err
|
||||
}
|
||||
|
||||
@@ -129,7 +129,7 @@ func CLIJoinSequences(iterator obiiter.IBioSequence) obiiter.IBioSequence {
|
||||
log.Fatalf("Cannot read the data file to merge with: %s %v", CLIJoinWith(), err)
|
||||
}
|
||||
|
||||
data := data_iter.Load()
|
||||
_, data := data_iter.Load()
|
||||
|
||||
keys := CLIBy()
|
||||
|
||||
|
||||
@@ -103,7 +103,7 @@ func MapOnLandmarkSequences(library obiseq.BioSequenceSlice, landmark_idx []int,
|
||||
// which landmark it corresponds.
|
||||
func CLISelectLandmarkSequences(iterator obiiter.IBioSequence) obiiter.IBioSequence {
|
||||
|
||||
library := iterator.Load()
|
||||
source, library := iterator.Load()
|
||||
|
||||
library_size := len(library)
|
||||
n_landmark := CLINCenter()
|
||||
@@ -191,6 +191,6 @@ func CLISelectLandmarkSequences(iterator obiiter.IBioSequence) obiiter.IBioSeque
|
||||
}
|
||||
}
|
||||
|
||||
return obiiter.IBatchOver(library, obioptions.CLIBatchSize())
|
||||
return obiiter.IBatchOver(source, library, obioptions.CLIBatchSize())
|
||||
|
||||
}
|
||||
|
||||
@@ -255,6 +255,7 @@ func IAssemblePESequencesBatch(iterator obiiter.IBioSequence,
|
||||
delta, minOverlap, minIdentity, withStats, true, fastAlign, fastModeRel, arena, &shifts)
|
||||
}
|
||||
newIter.Push(obiiter.MakeBioSequenceBatch(
|
||||
batch.Source(),
|
||||
batch.Order(),
|
||||
cons,
|
||||
))
|
||||
|
||||
@@ -130,7 +130,7 @@ func MakeIndexingSliceWorker(indexslot, idslot string,
|
||||
func IndexFamilyDB(iterator obiiter.IBioSequence) obiiter.IBioSequence {
|
||||
log.Infoln("Family level reference database indexing...")
|
||||
log.Infoln("Loading database...")
|
||||
references := iterator.Load()
|
||||
source, references := iterator.Load()
|
||||
nref := len(references)
|
||||
log.Infof("Done. Database contains %d sequences", nref)
|
||||
|
||||
@@ -154,7 +154,7 @@ func IndexFamilyDB(iterator obiiter.IBioSequence) obiiter.IBioSequence {
|
||||
|
||||
log.Info("done")
|
||||
|
||||
partof := obiiter.IBatchOver(references,
|
||||
partof := obiiter.IBatchOver(source, references,
|
||||
obioptions.CLIBatchSize()).MakeIWorker(taxonomy.MakeSetSpeciesWorker(),
|
||||
false,
|
||||
obioptions.CLIParallelWorkers(),
|
||||
@@ -243,7 +243,7 @@ func IndexFamilyDB(iterator obiiter.IBioSequence) obiiter.IBioSequence {
|
||||
|
||||
waiting.Wait()
|
||||
|
||||
results := obiiter.IBatchOver(references,
|
||||
results := obiiter.IBatchOver(source, references,
|
||||
obioptions.CLIBatchSize()).Speed("Writing db", nref)
|
||||
|
||||
return results
|
||||
|
||||
@@ -125,7 +125,7 @@ func IndexSequence(seqidx int,
|
||||
func IndexReferenceDB(iterator obiiter.IBioSequence) obiiter.IBioSequence {
|
||||
|
||||
log.Infoln("Loading database...")
|
||||
references := iterator.Load()
|
||||
source, references := iterator.Load()
|
||||
log.Infof("Done. Database contains %d sequences", len(references))
|
||||
|
||||
taxo, error := obifind.CLILoadSelectedTaxonomy()
|
||||
@@ -204,7 +204,7 @@ func IndexReferenceDB(iterator obiiter.IBioSequence) obiiter.IBioSequence {
|
||||
sl = append(sl, iref)
|
||||
bar.Add(1)
|
||||
}
|
||||
indexed.Push(obiiter.MakeBioSequenceBatch(l[0]/10, sl))
|
||||
indexed.Push(obiiter.MakeBioSequenceBatch(source, l[0]/10, sl))
|
||||
}
|
||||
|
||||
indexed.Done()
|
||||
|
||||
@@ -57,7 +57,9 @@ func CLIRefDB() obiseq.BioSequenceSlice {
|
||||
log.Panicf("Cannot open the reference library file : %s\n", _RefDB)
|
||||
}
|
||||
|
||||
return refdb.Load()
|
||||
_, db := refdb.Load()
|
||||
|
||||
return db
|
||||
}
|
||||
|
||||
func CLIGeometricMode() bool {
|
||||
@@ -70,7 +72,7 @@ func CLIShouldISaveRefDB() bool {
|
||||
|
||||
func CLISaveRefetenceDB(db obiseq.BioSequenceSlice) {
|
||||
if CLIShouldISaveRefDB() {
|
||||
idb := obiiter.IBatchOver(db, 1000)
|
||||
idb := obiiter.IBatchOver("", db, 1000)
|
||||
|
||||
var newIter obiiter.IBioSequence
|
||||
|
||||
|
||||
@@ -57,7 +57,9 @@ func CLIRefDB() obiseq.BioSequenceSlice {
|
||||
log.Panicf("Cannot open the reference library file : %s\n", _RefDB)
|
||||
}
|
||||
|
||||
return refdb.Load()
|
||||
_, db := refdb.Load()
|
||||
|
||||
return db
|
||||
}
|
||||
|
||||
func CLIGeometricMode() bool {
|
||||
@@ -70,7 +72,7 @@ func CLIShouldISaveRefDB() bool {
|
||||
|
||||
func CLISaveRefetenceDB(db obiseq.BioSequenceSlice) {
|
||||
if CLIShouldISaveRefDB() {
|
||||
idb := obiiter.IBatchOver(db, 1000)
|
||||
idb := obiiter.IBatchOver("", db, 1000)
|
||||
|
||||
var newIter obiiter.IBioSequence
|
||||
|
||||
|
||||
Reference in New Issue
Block a user