mirror of
https://github.com/metabarcoding/obitools4.git
synced 2025-12-10 17:50:26 +00:00
Patch a bug on writing to stdout, and add clearer error on openning data files
This commit is contained in:
@@ -188,3 +188,17 @@ func CLIReadBioSequences(filenames ...string) (obiiter.IBioSequence, error) {
|
||||
|
||||
return iterator, nil
|
||||
}
|
||||
|
||||
func OpenSequenceDataErrorMessage(args []string, err error) {
|
||||
if err != nil {
|
||||
switch len(args) {
|
||||
case 0:
|
||||
log.Errorf("Cannot open stdin (%v)", err)
|
||||
case 1:
|
||||
log.Errorf("Cannot open file %s: %v", args[0], err)
|
||||
default:
|
||||
log.Errorf("Cannot open one of the data files: %v", err)
|
||||
}
|
||||
os.Exit(1)
|
||||
}
|
||||
}
|
||||
|
||||
172
pkg/obitools/obikmersim/obikmersim.go
Normal file
172
pkg/obitools/obikmersim/obikmersim.go
Normal file
@@ -0,0 +1,172 @@
|
||||
package obikmersim
|
||||
|
||||
import (
|
||||
"math"
|
||||
|
||||
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obialign"
|
||||
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obifp"
|
||||
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obiiter"
|
||||
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obikmer"
|
||||
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obioptions"
|
||||
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obiseq"
|
||||
)
|
||||
|
||||
func _Abs(x int) int {
|
||||
if x < 0 {
|
||||
return -x
|
||||
}
|
||||
return x
|
||||
}
|
||||
|
||||
func MakeCountMatchWorker[T obifp.FPUint[T]](k *obikmer.KmerMap[T], minKmerCount int) obiseq.SeqWorker {
|
||||
return func(sequence *obiseq.BioSequence) (obiseq.BioSequenceSlice, error) {
|
||||
matches := k.Query(sequence)
|
||||
matches.FilterMinCount(minKmerCount)
|
||||
n := matches.Len()
|
||||
|
||||
sequence.SetAttribute("obikmer_match_count", n)
|
||||
sequence.SetAttribute("obikmer_kmer_size", k.Kmersize)
|
||||
sequence.SetAttribute("obikmer_sparse_kmer", k.SparseAt >= 0)
|
||||
return obiseq.BioSequenceSlice{sequence}, nil
|
||||
}
|
||||
}
|
||||
|
||||
func MakeKmerAlignWorker[T obifp.FPUint[T]](
|
||||
k *obikmer.KmerMap[T],
|
||||
minKmerCount int,
|
||||
gap, scale float64, delta int, fastScoreRel bool,
|
||||
minIdentity float64, withStats bool) obiseq.SeqWorker {
|
||||
return func(sequence *obiseq.BioSequence) (obiseq.BioSequenceSlice, error) {
|
||||
arena := obialign.MakePEAlignArena(150, 150)
|
||||
shifts := make(map[int]int)
|
||||
|
||||
matches := k.Query(sequence)
|
||||
matches.FilterMinCount(minKmerCount)
|
||||
|
||||
slice := obiseq.NewBioSequenceSlice(matches.Len())
|
||||
*slice = (*slice)[:0]
|
||||
|
||||
for _, seq := range matches.Sequences() {
|
||||
idmatched_id := seq.Id()
|
||||
|
||||
score, path, fastcount, over, fastscore, reverse := obialign.ReadAlign(
|
||||
sequence, seq,
|
||||
gap, scale, delta,
|
||||
fastScoreRel,
|
||||
arena, &shifts,
|
||||
)
|
||||
|
||||
if reverse {
|
||||
idmatched_id = idmatched_id + "-rev"
|
||||
seq = seq.ReverseComplement(false)
|
||||
}
|
||||
|
||||
cons, match := obialign.BuildQualityConsensus(sequence, seq, path, true, arena)
|
||||
|
||||
left := path[0]
|
||||
right := 0
|
||||
if path[len(path)-1] == 0 {
|
||||
right = path[len(path)-2]
|
||||
}
|
||||
lcons := cons.Len()
|
||||
aliLength := lcons - _Abs(left) - _Abs(right)
|
||||
identity := float64(match) / float64(aliLength)
|
||||
if aliLength == 0 {
|
||||
identity = 0
|
||||
}
|
||||
|
||||
rep := sequence.Copy()
|
||||
|
||||
rep.SetAttribute("obikmer_match_id", idmatched_id)
|
||||
rep.SetAttribute("obikmer_fast_count", fastcount)
|
||||
rep.SetAttribute("obikmer_fast_overlap", over)
|
||||
rep.SetAttribute("obikmer_fast_score", math.Round(fastscore*1000)/1000)
|
||||
|
||||
if reverse {
|
||||
rep.SetAttribute("obikmer_orientation", "reverse")
|
||||
} else {
|
||||
rep.SetAttribute("obikmer_orientation", "forward")
|
||||
}
|
||||
|
||||
if aliLength >= int(k.KmerSize()) && identity >= minIdentity {
|
||||
if withStats {
|
||||
if left < 0 {
|
||||
rep.SetAttribute("seq_a_single", -left)
|
||||
rep.SetAttribute("ali_dir", "left")
|
||||
} else {
|
||||
rep.SetAttribute("seq_b_single", left)
|
||||
rep.SetAttribute("ali_dir", "right")
|
||||
}
|
||||
|
||||
if right < 0 {
|
||||
right = -right
|
||||
rep.SetAttribute("seq_a_single", right)
|
||||
} else {
|
||||
rep.SetAttribute("seq_b_single", right)
|
||||
}
|
||||
rep.SetAttribute("obikmer_score", score)
|
||||
scoreNorm := float64(0)
|
||||
if aliLength > 0 {
|
||||
scoreNorm = math.Round(float64(match)/float64(aliLength)*1000) / 1000
|
||||
} else {
|
||||
scoreNorm = 0
|
||||
}
|
||||
|
||||
rep.SetAttribute("obikmer_score_norm", scoreNorm)
|
||||
rep.SetAttribute("obikmer_ali_length", aliLength)
|
||||
|
||||
rep.SetAttribute("seq_ab_match", match)
|
||||
|
||||
}
|
||||
|
||||
*slice = append(*slice, rep)
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
return *slice, nil
|
||||
}
|
||||
}
|
||||
|
||||
func CLILookForSharedKmers(iterator obiiter.IBioSequence) obiiter.IBioSequence {
|
||||
var newIter obiiter.IBioSequence
|
||||
|
||||
source, references := CLIReference()
|
||||
|
||||
if iterator == obiiter.NilIBioSequence {
|
||||
iterator = obiiter.IBatchOver(source, references, obioptions.CLIBatchSize())
|
||||
}
|
||||
|
||||
kmerMatch := obikmer.NewKmerMap[obifp.Uint64](references, uint(CLIKmerSize()), CLISparseMode())
|
||||
worker := MakeCountMatchWorker(kmerMatch, CLIMinSharedKmers())
|
||||
newIter = iterator.MakeIWorker(worker, false, obioptions.CLIParallelWorkers())
|
||||
|
||||
if CLISelf() {
|
||||
newIter = newIter.Speed("Counting similar reads", references.Len())
|
||||
} else {
|
||||
newIter = newIter.Speed("Counting similar reads")
|
||||
}
|
||||
|
||||
return newIter.FilterEmpty()
|
||||
}
|
||||
|
||||
func CLIAlignSequences(iterator obiiter.IBioSequence) obiiter.IBioSequence {
|
||||
var newIter obiiter.IBioSequence
|
||||
|
||||
source, references := CLIReference()
|
||||
|
||||
if iterator == obiiter.NilIBioSequence {
|
||||
iterator = obiiter.IBatchOver(source, references, obioptions.CLIBatchSize())
|
||||
}
|
||||
|
||||
if CLISelf() {
|
||||
iterator = iterator.Speed("Aligning reads", references.Len())
|
||||
} else {
|
||||
iterator = iterator.Speed("Aligning reads")
|
||||
}
|
||||
kmerMatch := obikmer.NewKmerMap[obifp.Uint64](references, uint(CLIKmerSize()), CLISparseMode())
|
||||
worker := MakeKmerAlignWorker(kmerMatch, CLIMinSharedKmers(), CLIGap(), CLIScale(), CLIDelta(), CLIFastRelativeScore(), 0.8, true)
|
||||
newIter = iterator.MakeIWorker(worker, false, obioptions.CLIParallelWorkers())
|
||||
|
||||
return newIter.FilterEmpty()
|
||||
}
|
||||
140
pkg/obitools/obikmersim/options.go
Normal file
140
pkg/obitools/obikmersim/options.go
Normal file
@@ -0,0 +1,140 @@
|
||||
package obikmersim
|
||||
|
||||
import (
|
||||
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obiformats"
|
||||
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obioptions"
|
||||
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obiseq"
|
||||
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obitools/obiconvert"
|
||||
"github.com/DavidGamba/go-getoptions"
|
||||
)
|
||||
|
||||
var _KmerSize = 30
|
||||
var _Sparse = false
|
||||
var _References = []string{}
|
||||
var _MinSharedKmers = 1
|
||||
var _Self = false
|
||||
|
||||
var _Delta = 5
|
||||
var _PenalityScale = 1.0
|
||||
var _GapPenality = 2.0
|
||||
var _FastScoreAbs = false
|
||||
|
||||
// PCROptionSet defines every options related to a simulated PCR.
|
||||
//
|
||||
// The function adds to a CLI every options proposed to the user
|
||||
// to tune the parametters of the PCR simulation algorithm.
|
||||
//
|
||||
// # Parameters
|
||||
//
|
||||
// - option : is a pointer to a getoptions.GetOpt instance normaly
|
||||
// produced by the
|
||||
func KmerSimCountOptionSet(options *getoptions.GetOpt) {
|
||||
|
||||
options.IntVar(&_KmerSize, "kmer-size", _KmerSize,
|
||||
options.Alias("k"),
|
||||
options.Description("Kmer size to use."))
|
||||
|
||||
options.BoolVar(&_Sparse, "sparse", _Sparse,
|
||||
options.Alias("S"),
|
||||
options.Description("Set sparse kmer mode."))
|
||||
|
||||
options.StringSliceVar(&_References, "reference", 1, 1,
|
||||
options.Alias("r"),
|
||||
options.Description("Reference sequence."))
|
||||
|
||||
options.IntVar(&_MinSharedKmers, "min-shared-kmers", _MinSharedKmers,
|
||||
options.Alias("m"),
|
||||
options.Description("Minimum number of shared kmers between two sequences."))
|
||||
|
||||
options.BoolVar(&_Self, "self", _Self,
|
||||
options.Alias("s"),
|
||||
options.Description("Compare references with themselves."))
|
||||
|
||||
}
|
||||
|
||||
func KmerSimMatchOptionSet(options *getoptions.GetOpt) {
|
||||
options.IntVar(&_Delta, "delta", _Delta,
|
||||
options.Alias("d"),
|
||||
options.Description("Delta value for the match."))
|
||||
|
||||
options.Float64Var(&_PenalityScale, "penality-scale", _PenalityScale,
|
||||
options.Alias("X"),
|
||||
options.Description("Scale factor applied to the mismatch score and the gap penality (default 1)."))
|
||||
|
||||
options.Float64Var(&_GapPenality, "gap-penality", _GapPenality,
|
||||
options.Alias("G"),
|
||||
options.Description("Gap penality expressed as the multiply factor applied to the mismatch score between two nucleotides with a quality of 40 (default 2)."))
|
||||
|
||||
options.BoolVar(&_FastScoreAbs, "fast-absolute", _FastScoreAbs,
|
||||
options.Alias("a"),
|
||||
options.Description("Use fast absolute score mode."))
|
||||
}
|
||||
|
||||
func CountOptionSet(options *getoptions.GetOpt) {
|
||||
obiconvert.OptionSet(options)
|
||||
KmerSimCountOptionSet(options)
|
||||
}
|
||||
|
||||
func MatchOptionSet(options *getoptions.GetOpt) {
|
||||
obiconvert.OptionSet(options)
|
||||
KmerSimCountOptionSet(options)
|
||||
KmerSimMatchOptionSet(options)
|
||||
}
|
||||
|
||||
func CLIKmerSize() uint {
|
||||
return uint(_KmerSize)
|
||||
}
|
||||
|
||||
func CLISparseMode() bool {
|
||||
return _Sparse
|
||||
}
|
||||
|
||||
func CLIReference() (string, obiseq.BioSequenceSlice) {
|
||||
|
||||
refnames, err := obiconvert.ExpandListOfFiles(false, _References...)
|
||||
|
||||
if err != nil {
|
||||
return "", obiseq.BioSequenceSlice{}
|
||||
}
|
||||
|
||||
nreader := 1
|
||||
|
||||
if obiconvert.CLINoInputOrder() {
|
||||
nreader = obioptions.StrictReadWorker()
|
||||
}
|
||||
|
||||
source, references := obiformats.ReadSequencesBatchFromFiles(
|
||||
refnames,
|
||||
obiformats.ReadSequencesFromFile,
|
||||
nreader).Load()
|
||||
|
||||
return source, references
|
||||
}
|
||||
|
||||
func CLIMinSharedKmers() int {
|
||||
return _MinSharedKmers
|
||||
}
|
||||
|
||||
func CLISelf() bool {
|
||||
return _Self
|
||||
}
|
||||
|
||||
func CLIDelta() int {
|
||||
return _Delta
|
||||
}
|
||||
|
||||
func CLIScale() float64 {
|
||||
return _PenalityScale
|
||||
}
|
||||
|
||||
func CLIGapPenality() float64 {
|
||||
return _GapPenality
|
||||
}
|
||||
|
||||
func CLIGap() float64 {
|
||||
return _GapPenality
|
||||
}
|
||||
|
||||
func CLIFastRelativeScore() bool {
|
||||
return !_FastScoreAbs
|
||||
}
|
||||
Reference in New Issue
Block a user