mirror of
https://github.com/metabarcoding/obitools4.git
synced 2025-06-29 16:20:46 +00:00
Patch CSV reader to use the new taxonomy system
This commit is contained in:
@ -5,7 +5,7 @@ import (
|
||||
"io"
|
||||
"os"
|
||||
"path"
|
||||
"unsafe"
|
||||
"strings"
|
||||
|
||||
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obiiter"
|
||||
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obioptions"
|
||||
@ -94,19 +94,28 @@ func _ParseCsvFile(source string,
|
||||
continue
|
||||
}
|
||||
|
||||
err := json.Unmarshal(unsafe.Slice(unsafe.StringData(field), len(field)), &val)
|
||||
ft := header[i]
|
||||
|
||||
if err != nil {
|
||||
val = field
|
||||
} else {
|
||||
if _, ok := val.(float64); ok {
|
||||
if obiutils.IsIntegral(val.(float64)) {
|
||||
val = int(val.(float64))
|
||||
switch {
|
||||
case ft == "taxid":
|
||||
sequence.SetTaxid(field)
|
||||
case strings.HasSuffix(ft, "_taxid"):
|
||||
sequence.SetTaxid(field, strings.TrimSuffix(ft, "_taxid"))
|
||||
default:
|
||||
err := json.Unmarshal(obiutils.UnsafeBytes(field), &val)
|
||||
|
||||
if err != nil {
|
||||
val = field
|
||||
} else {
|
||||
if _, ok := val.(float64); ok {
|
||||
if obiutils.IsIntegral(val.(float64)) {
|
||||
val = int(val.(float64))
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
sequence.SetAttribute(header[i], val)
|
||||
sequence.SetAttribute(ft, val)
|
||||
}
|
||||
}
|
||||
|
||||
slice = append(slice, sequence)
|
||||
|
@ -8,8 +8,8 @@ import (
|
||||
// corresponds to the last commit, and not the one when the file will be
|
||||
// commited
|
||||
|
||||
var _Commit = "39dd3e3"
|
||||
var _Version = "Release 4.3.0"
|
||||
var _Commit = "ccd3b06"
|
||||
var _Version = "Release 4.2.0"
|
||||
|
||||
// Version returns the version of the obitools package.
|
||||
//
|
||||
|
@ -1,460 +0,0 @@
|
||||
package obitag2
|
||||
|
||||
import (
|
||||
"sort"
|
||||
"strconv"
|
||||
"strings"
|
||||
|
||||
log "github.com/sirupsen/logrus"
|
||||
"golang.org/x/exp/maps"
|
||||
"golang.org/x/exp/slices"
|
||||
|
||||
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obialign"
|
||||
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obiiter"
|
||||
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obikmer"
|
||||
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obioptions"
|
||||
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obiseq"
|
||||
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obitax"
|
||||
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obiutils"
|
||||
)
|
||||
|
||||
type Obitag2Match struct {
|
||||
Taxon *obitax.TaxNode
|
||||
Id string
|
||||
Weight int
|
||||
}
|
||||
type Obitag2RefDB struct {
|
||||
Taxonomy *obitax.Taxonomy
|
||||
|
||||
Full *obiseq.BioSequenceSlice
|
||||
FullCounts *[]*obikmer.Table4mer
|
||||
FullTaxa *obitax.TaxonSet
|
||||
|
||||
Clusters *obiseq.BioSequenceSlice
|
||||
ClusterCounts *[]*obikmer.Table4mer
|
||||
ClusterTaxa *obitax.TaxonSet
|
||||
|
||||
Families *map[int]*obiseq.BioSequenceSlice
|
||||
FamilyCounts *map[int]*[]*obikmer.Table4mer
|
||||
FamilyTaxa *map[int]obitax.TaxonSet
|
||||
|
||||
ExactTaxid *map[string]*Obitag2Match
|
||||
}
|
||||
|
||||
// MatchDistanceIndex returns the taxid, rank, and scientificName based on the given distance and distanceIdx.
|
||||
//
|
||||
// Parameters:
|
||||
// - distance: The distance to match against the keys in distanceIdx.
|
||||
// - distanceIdx: A map containing distances as keys and corresponding values in the format "taxid@rank@scientificName".
|
||||
//
|
||||
// Returns:
|
||||
// - taxid: The taxid associated with the matched distance.
|
||||
// - rank: The rank associated with the matched distance.
|
||||
// - scientificName: The scientific name associated with the matched distance.
|
||||
func MatchDistanceIndex(distance int, distanceIdx map[int]string) (int, string, string) {
|
||||
keys := maps.Keys(distanceIdx)
|
||||
slices.Sort(keys)
|
||||
|
||||
i := sort.Search(len(keys), func(i int) bool {
|
||||
return distance <= keys[i]
|
||||
})
|
||||
|
||||
var taxid int
|
||||
var rank string
|
||||
var scientificName string
|
||||
|
||||
if i == len(keys) || distance > keys[len(keys)-1] {
|
||||
taxid = 1
|
||||
rank = "no rank"
|
||||
scientificName = "root"
|
||||
} else {
|
||||
parts := strings.Split(distanceIdx[keys[i]], "@")
|
||||
taxid, _ = strconv.Atoi(parts[0])
|
||||
rank = parts[1]
|
||||
scientificName = parts[2]
|
||||
}
|
||||
|
||||
// log.Info("taxid:", taxid, " rank:", rank, " scientificName:", scientificName)
|
||||
|
||||
return taxid, rank, scientificName
|
||||
}
|
||||
|
||||
// FindClosests finds the closest bio sequence from a given sequence and a slice of reference sequences.
|
||||
//
|
||||
// Parameters:
|
||||
// - sequence: the bio sequence to find the closest matches for.
|
||||
// - references: a slice of reference sequences to compare against.
|
||||
// - refcounts: a slice of reference sequence counts.
|
||||
// - runExact: a boolean flag indicating whether to run an exact match.
|
||||
//
|
||||
// Returns:
|
||||
// - bests: a slice of the closest bio sequences.
|
||||
// - maxe: the maximum score.
|
||||
// - bestId: the best ID.
|
||||
// - bestmatch: the best match.
|
||||
// - bestidxs: a slice of the best indexes.
|
||||
func FindClosests(sequence *obiseq.BioSequence,
|
||||
references obiseq.BioSequenceSlice,
|
||||
refcounts []*obikmer.Table4mer,
|
||||
runExact bool) (obiseq.BioSequenceSlice, int, float64, string, []int) {
|
||||
|
||||
var matrix []uint64
|
||||
|
||||
seqwords := obikmer.Count4Mer(sequence, nil, nil)
|
||||
cw := make([]int, len(refcounts))
|
||||
|
||||
for i, ref := range refcounts {
|
||||
cw[i] = obikmer.Common4Mer(seqwords, ref)
|
||||
}
|
||||
|
||||
o := obiutils.Reverse(obiutils.IntOrder(cw), true)
|
||||
|
||||
bests := obiseq.MakeBioSequenceSlice()
|
||||
// bests = append(bests, references[o[0]])
|
||||
bestidxs := make([]int, 0)
|
||||
// bestidxs = append(bestidxs, o[0])
|
||||
bestId := 0.0
|
||||
bestmatch := references[o[0]].Id()
|
||||
|
||||
maxe := -1
|
||||
wordmin := 0
|
||||
|
||||
// sss := make([]int, 0, 10000)
|
||||
// log.Warnf("%v\n%v\n%d", seqwords, refcounts[o[0]], obikmer.Common4Mer(seqwords, refcounts[o[0]]))
|
||||
|
||||
for i, order := range o {
|
||||
ref := references[order]
|
||||
score := int(1e9)
|
||||
|
||||
if cw[order] < wordmin || i > 1000 {
|
||||
break
|
||||
}
|
||||
lcs, alilength := -1, -1
|
||||
switch maxe {
|
||||
case 0:
|
||||
if obiutils.UnsafeStringFromBytes(sequence.Sequence()) == obiutils.UnsafeStringFromBytes(references[order].Sequence()) {
|
||||
score = 0
|
||||
alilength = sequence.Len()
|
||||
lcs = alilength
|
||||
}
|
||||
case 1:
|
||||
d, _, _, _ := obialign.D1Or0(sequence, references[order])
|
||||
if d >= 0 {
|
||||
score = d
|
||||
alilength = max(sequence.Len(), ref.Len())
|
||||
lcs = alilength - score
|
||||
}
|
||||
default:
|
||||
lcs, alilength = obialign.FastLCSScore(sequence, references[order], maxe, &matrix)
|
||||
if lcs >= 0 {
|
||||
score = alilength - lcs
|
||||
}
|
||||
}
|
||||
|
||||
// log.Warnf("LCS : %d ALILENGTH : %d score : %d cw : %d", lcs, alilength, score, cw[order])
|
||||
if lcs >= 0 {
|
||||
|
||||
// sss = append(sss, cw[order], alilength-lcs)
|
||||
|
||||
//
|
||||
// We have found a better candidate than never
|
||||
//
|
||||
if maxe == -1 || score < maxe {
|
||||
bests = bests[:0] // Empty the best lists
|
||||
bestidxs = bestidxs[:0] //
|
||||
|
||||
maxe = score // Memorize the best scores
|
||||
wordmin = max(0, max(sequence.Len(), ref.Len())-3-4*maxe)
|
||||
bestId = float64(lcs) / float64(alilength)
|
||||
bestmatch = ref.Id()
|
||||
// log.Warnln(sequence.Id(), ref.Id(), cw[order], maxe, bestId, wordmin)
|
||||
}
|
||||
|
||||
if score == maxe {
|
||||
bests = append(bests, ref)
|
||||
bestidxs = append(bestidxs, order)
|
||||
id := float64(lcs) / float64(alilength)
|
||||
if id > bestId {
|
||||
bestId = id
|
||||
bestmatch = ref.Id()
|
||||
}
|
||||
// log.Println(ref.Id(), maxe, bestId,bestidxs)
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
}
|
||||
// sequence.SetAttribute("maxe", sss)
|
||||
log.Debugln("Closest Match", sequence.Id(), maxe, bestId, references[bestidxs[0]].Id(), bestidxs, len(bests))
|
||||
return bests, maxe, bestId, bestmatch, bestidxs
|
||||
}
|
||||
|
||||
func (db *Obitag2RefDB) BestConsensus(bests obiseq.BioSequenceSlice, differences int, slot string) *obitax.TaxNode {
|
||||
taxon := (*obitax.TaxNode)(nil)
|
||||
|
||||
for _, best := range bests {
|
||||
idx := best.OBITagRefIndex(slot)
|
||||
if idx == nil {
|
||||
log.Fatalf("Reference database must be indexed first")
|
||||
}
|
||||
|
||||
d := differences
|
||||
identification, ok := idx[d]
|
||||
found := false
|
||||
var parts []string
|
||||
|
||||
/*
|
||||
Here is an horrible hack for xprize challence.
|
||||
With Euka01 the part[0] was equal to "" for at
|
||||
least a sequence consensus. Which is not normal.
|
||||
|
||||
TO BE CHECKED AND CORRECTED
|
||||
|
||||
The problem seems related to idx that doesn't have
|
||||
a 0 distance
|
||||
*/
|
||||
for !found && d >= 0 {
|
||||
for !ok && d >= 0 {
|
||||
identification, ok = idx[d]
|
||||
d--
|
||||
}
|
||||
|
||||
parts = strings.Split(identification, "@")
|
||||
|
||||
found = parts[0] != ""
|
||||
if !found {
|
||||
log.Debugln("Problem in identification line : ", best.Id(), "idx:", idx, "distance:", d)
|
||||
for !ok && d <= 1000 {
|
||||
identification, ok = idx[d]
|
||||
d++
|
||||
}
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
match_taxid, err := strconv.Atoi(parts[0])
|
||||
|
||||
if err != nil {
|
||||
log.Panicln("Cannot extract taxid from :", identification)
|
||||
}
|
||||
|
||||
match_taxon, err := db.Taxonomy.Taxon(match_taxid)
|
||||
|
||||
if err != nil {
|
||||
log.Panicln("Cannot find taxon corresponding to taxid :", match_taxid)
|
||||
}
|
||||
|
||||
if taxon != nil {
|
||||
taxon, _ = taxon.LCA(match_taxon)
|
||||
} else {
|
||||
taxon = match_taxon
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
return taxon
|
||||
}
|
||||
|
||||
// Identify makes the taxonomic identification of a BioSequence.
|
||||
//
|
||||
// Parameters:
|
||||
// - sequence: A pointer to a BioSequence to identify.
|
||||
// - references: A BioSequenceSlice.
|
||||
// - refcounts: A slice of pointers to Table4mer.
|
||||
// - taxa: A TaxonSet.
|
||||
// - taxo: A pointer to a Taxonomy.
|
||||
// - runExact: A boolean value indicating whether to run exact matching.
|
||||
//
|
||||
// Returns:
|
||||
// - A pointer to a BioSequence.
|
||||
func Identify(sequence *obiseq.BioSequence,
|
||||
db *Obitag2RefDB,
|
||||
runExact bool) *obiseq.BioSequence {
|
||||
|
||||
identity := 1.0
|
||||
differences := 0
|
||||
method := "exact match"
|
||||
weight := 0
|
||||
|
||||
var bestmatch string
|
||||
var taxon *obitax.TaxNode
|
||||
|
||||
exacttaxon, ok := (*db.ExactTaxid)[obiutils.UnsafeStringFromBytes(sequence.Sequence())]
|
||||
if ok {
|
||||
taxon = exacttaxon.Taxon
|
||||
bestmatch = exacttaxon.Id
|
||||
weight = exacttaxon.Weight
|
||||
} else {
|
||||
var bests obiseq.BioSequenceSlice
|
||||
bests, differences, identity, bestmatch, _ = FindClosests(sequence, *db.Clusters, *db.ClusterCounts, runExact)
|
||||
weight = bests.Len() /* Should be the some of the count */
|
||||
ftaxon := (*obitax.TaxNode)(nil)
|
||||
|
||||
if identity >= 0.5 && differences >= 0 {
|
||||
ftaxon = db.BestConsensus(bests, differences, "obitag_ref_index")
|
||||
|
||||
fam := ftaxon.TaxonAtRank("family")
|
||||
|
||||
if fam != nil {
|
||||
ftaxid := fam.Taxid()
|
||||
bests, differences, identity, bestmatch, _ = FindClosests(
|
||||
sequence,
|
||||
*(*db.Families)[ftaxid],
|
||||
*(*db.FamilyCounts)[ftaxid],
|
||||
runExact,
|
||||
)
|
||||
sequence.SetAttribute("obitag_proposed_family", fam.ScientificName())
|
||||
ftaxon = db.BestConsensus(bests, differences, "reffamidx_in")
|
||||
|
||||
}
|
||||
|
||||
} else {
|
||||
// Cannot match any taxon
|
||||
ftaxon, _ = db.Taxonomy.Taxon(1)
|
||||
sequence.SetTaxid(1)
|
||||
}
|
||||
|
||||
taxon = ftaxon
|
||||
method = "lcsfamlily"
|
||||
}
|
||||
|
||||
sequence.SetTaxid(taxon.Taxid())
|
||||
|
||||
sequence.SetAttribute("scientific_name", taxon.ScientificName())
|
||||
sequence.SetAttribute("obitag_rank", taxon.Rank())
|
||||
sequence.SetAttribute("obitag_bestid", identity)
|
||||
sequence.SetAttribute("obitag_bestmatch", bestmatch)
|
||||
sequence.SetAttribute("obitag_match_count", weight)
|
||||
sequence.SetAttribute("obitag_similarity_method", method)
|
||||
|
||||
return sequence
|
||||
}
|
||||
|
||||
func IdentifySeqWorker(db *Obitag2RefDB, runExact bool) obiseq.SeqWorker {
|
||||
return func(sequence *obiseq.BioSequence) (obiseq.BioSequenceSlice, error) {
|
||||
return obiseq.BioSequenceSlice{Identify(sequence, db, runExact)}, nil
|
||||
}
|
||||
}
|
||||
|
||||
func CLIAssignTaxonomy(iterator obiiter.IBioSequence,
|
||||
references obiseq.BioSequenceSlice,
|
||||
taxo *obitax.Taxonomy,
|
||||
) obiiter.IBioSequence {
|
||||
|
||||
refcounts := make(
|
||||
[]*obikmer.Table4mer,
|
||||
len(references))
|
||||
|
||||
taxa := make(obitax.TaxonSet,
|
||||
len(references))
|
||||
|
||||
familydb := make(map[int]*obiseq.BioSequenceSlice)
|
||||
familycounts := make(map[int]*[]*obikmer.Table4mer)
|
||||
familytaxa := make(map[int]obitax.TaxonSet)
|
||||
clusterdb := obiseq.MakeBioSequenceSlice()
|
||||
clustercounts := make([]*obikmer.Table4mer, 0)
|
||||
clustertaxa := make(obitax.TaxonSet, 0)
|
||||
exactmatch := make(map[string]*obiseq.BioSequenceSlice)
|
||||
|
||||
buffer := make([]byte, 0, 1000)
|
||||
|
||||
j := 0
|
||||
for i, seq := range references {
|
||||
refcounts[i] = obikmer.Count4Mer(seq, &buffer, nil)
|
||||
taxa[i], _ = taxo.Taxon(seq.Taxid())
|
||||
|
||||
if is_centrer, _ := seq.GetBoolAttribute("reffamidx_clusterhead"); is_centrer {
|
||||
clusterdb = append(clusterdb, seq)
|
||||
clustercounts = append(clustercounts, refcounts[i])
|
||||
clustertaxa[j] = taxa[i]
|
||||
j++
|
||||
}
|
||||
|
||||
family, _ := seq.GetIntAttribute("family_taxid")
|
||||
|
||||
fs, ok := familydb[family]
|
||||
if !ok {
|
||||
fs = obiseq.NewBioSequenceSlice(0)
|
||||
familydb[family] = fs
|
||||
}
|
||||
|
||||
*fs = append(*fs, seq)
|
||||
|
||||
fc, ok := familycounts[family]
|
||||
|
||||
if !ok {
|
||||
fci := make([]*obikmer.Table4mer, 0)
|
||||
fc = &fci
|
||||
familycounts[family] = fc
|
||||
}
|
||||
|
||||
*fc = append(*fc, refcounts[i])
|
||||
|
||||
ft, ok := familytaxa[family]
|
||||
|
||||
if !ok {
|
||||
ft = make(obitax.TaxonSet, 0)
|
||||
familytaxa[family] = ft
|
||||
}
|
||||
|
||||
ft[len(ft)] = taxa[i]
|
||||
|
||||
seqstr := obiutils.UnsafeStringFromBytes(seq.Sequence())
|
||||
em, ok := exactmatch[seqstr]
|
||||
|
||||
if !ok {
|
||||
em = obiseq.NewBioSequenceSlice()
|
||||
exactmatch[seqstr] = em
|
||||
}
|
||||
|
||||
*em = append(*em, seq)
|
||||
|
||||
}
|
||||
|
||||
exacttaxid := make(map[string]*Obitag2Match, len(exactmatch))
|
||||
for seqstr, seqs := range exactmatch {
|
||||
var err error
|
||||
t, _ := taxo.Taxon((*seqs)[0].Taxid())
|
||||
w := (*seqs)[0].Count()
|
||||
lseqs := seqs.Len()
|
||||
|
||||
for i := 1; i < lseqs; i++ {
|
||||
t2, _ := taxo.Taxon((*seqs)[i].Taxid())
|
||||
t, err = t.LCA(t2)
|
||||
|
||||
if err != nil {
|
||||
log.Panic(err)
|
||||
}
|
||||
|
||||
w += (*seqs)[i].Count()
|
||||
}
|
||||
|
||||
exacttaxid[seqstr] = &Obitag2Match{
|
||||
Taxon: t,
|
||||
Id: (*seqs)[0].Id(),
|
||||
Weight: w,
|
||||
}
|
||||
}
|
||||
|
||||
db := &Obitag2RefDB{
|
||||
Taxonomy: taxo,
|
||||
|
||||
Full: &references,
|
||||
FullCounts: &refcounts,
|
||||
FullTaxa: &taxa,
|
||||
|
||||
Clusters: &clusterdb,
|
||||
ClusterCounts: &clustercounts,
|
||||
ClusterTaxa: &clustertaxa,
|
||||
|
||||
Families: &familydb,
|
||||
FamilyCounts: &familycounts,
|
||||
FamilyTaxa: &familytaxa,
|
||||
|
||||
ExactTaxid: &exacttaxid,
|
||||
}
|
||||
|
||||
worker := IdentifySeqWorker(db, CLIRunExact())
|
||||
|
||||
return iterator.MakeIWorker(worker, false, obioptions.CLIParallelWorkers(), 0)
|
||||
}
|
@ -1,122 +0,0 @@
|
||||
package obitag2
|
||||
|
||||
import (
|
||||
log "github.com/sirupsen/logrus"
|
||||
|
||||
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obiformats"
|
||||
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obiiter"
|
||||
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obioptions"
|
||||
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obiseq"
|
||||
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obitools/obiconvert"
|
||||
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obitools/obifind"
|
||||
"github.com/DavidGamba/go-getoptions"
|
||||
)
|
||||
|
||||
var _RefDB = ""
|
||||
var _SaveRefDB = ""
|
||||
var _RunExact = false
|
||||
var _GeomSim = false
|
||||
|
||||
func TagOptionSet(options *getoptions.GetOpt) {
|
||||
options.StringVar(&_RefDB, "reference-db", _RefDB,
|
||||
options.Alias("R"),
|
||||
options.Required(),
|
||||
options.ArgName("FILENAME"),
|
||||
options.Description("The name of the file containing the reference DB"))
|
||||
|
||||
options.StringVar(&_SaveRefDB, "save-db", _SaveRefDB,
|
||||
options.ArgName("FILENAME"),
|
||||
options.Description("The name of a file where to save the reference DB with its indices"))
|
||||
|
||||
options.BoolVar(&_GeomSim, "geometric", _GeomSim,
|
||||
options.Alias("G"),
|
||||
options.Description("Activate the experimental geometric similarity heuristic"))
|
||||
|
||||
// options.BoolVar(&_RunExact, "exact", _RunExact,
|
||||
// options.Alias("E"),
|
||||
// options.Description("Desactivate the heuristic limitating the sequence comparisons"))
|
||||
|
||||
}
|
||||
|
||||
// OptionSet adds to the basic option set every options declared for
|
||||
// the obiuniq command
|
||||
func OptionSet(options *getoptions.GetOpt) {
|
||||
obiconvert.OptionSet(options)
|
||||
obifind.LoadTaxonomyOptionSet(options, true, false)
|
||||
TagOptionSet(options)
|
||||
}
|
||||
|
||||
func CLIRefDBName() string {
|
||||
return _RefDB
|
||||
}
|
||||
|
||||
func CLIRefDB() obiseq.BioSequenceSlice {
|
||||
refdb, err := obiformats.ReadSequencesFromFile(_RefDB)
|
||||
|
||||
if err != nil {
|
||||
log.Panicf("Cannot open the reference library file : %s\n", _RefDB)
|
||||
}
|
||||
|
||||
_, db := refdb.Load()
|
||||
|
||||
return db
|
||||
}
|
||||
|
||||
func CLIGeometricMode() bool {
|
||||
return _GeomSim
|
||||
}
|
||||
|
||||
func CLIShouldISaveRefDB() bool {
|
||||
return _SaveRefDB != ""
|
||||
}
|
||||
|
||||
func CLISaveRefetenceDB(db obiseq.BioSequenceSlice) {
|
||||
if CLIShouldISaveRefDB() {
|
||||
idb := obiiter.IBatchOver("", db, 1000)
|
||||
|
||||
var newIter obiiter.IBioSequence
|
||||
|
||||
opts := make([]obiformats.WithOption, 0, 10)
|
||||
|
||||
switch obiconvert.CLIOutputFastHeaderFormat() {
|
||||
case "json":
|
||||
opts = append(opts, obiformats.OptionsFastSeqHeaderFormat(obiformats.FormatFastSeqJsonHeader))
|
||||
case "obi":
|
||||
opts = append(opts, obiformats.OptionsFastSeqHeaderFormat(obiformats.FormatFastSeqOBIHeader))
|
||||
default:
|
||||
opts = append(opts, obiformats.OptionsFastSeqHeaderFormat(obiformats.FormatFastSeqJsonHeader))
|
||||
}
|
||||
|
||||
nworkers := obioptions.CLIParallelWorkers() / 4
|
||||
if nworkers < 2 {
|
||||
nworkers = 2
|
||||
}
|
||||
|
||||
opts = append(opts, obiformats.OptionsParallelWorkers(nworkers))
|
||||
opts = append(opts, obiformats.OptionsBatchSize(obioptions.CLIBatchSize()))
|
||||
|
||||
opts = append(opts, obiformats.OptionsCompressed(obiconvert.CLICompressed()))
|
||||
|
||||
var err error
|
||||
|
||||
switch obiconvert.CLIOutputFormat() {
|
||||
case "fastq":
|
||||
newIter, err = obiformats.WriteFastqToFile(idb, _SaveRefDB, opts...)
|
||||
case "fasta":
|
||||
newIter, err = obiformats.WriteFastaToFile(idb, _SaveRefDB, opts...)
|
||||
default:
|
||||
newIter, err = obiformats.WriteSequencesToFile(idb, _SaveRefDB, opts...)
|
||||
}
|
||||
|
||||
if err != nil {
|
||||
log.Fatalf("Write file error: %v", err)
|
||||
}
|
||||
|
||||
newIter.Recycle()
|
||||
obiiter.WaitForLastPipe()
|
||||
}
|
||||
}
|
||||
|
||||
func CLIRunExact() bool {
|
||||
return _RunExact
|
||||
}
|
Reference in New Issue
Block a user