From ccc827afd33f128d0d55060c7ab31a5e6ca1a99a Mon Sep 17 00:00:00 2001 From: Eric Coissac Date: Tue, 11 Nov 2025 21:41:09 +0100 Subject: [PATCH 1/3] finalise obilowmask --- pkg/obioptions/version.go | 2 +- pkg/obitools/obilowmask/entropy.qmd | 13 +++++++++++++ pkg/obitools/obilowmask/obilowmask.go | 22 ++++++++++++++++++---- 3 files changed, 32 insertions(+), 5 deletions(-) diff --git a/pkg/obioptions/version.go b/pkg/obioptions/version.go index 0f52169..d24004d 100644 --- a/pkg/obioptions/version.go +++ b/pkg/obioptions/version.go @@ -8,7 +8,7 @@ import ( // corresponds to the last commit, and not the one when the file will be // commited -var _Commit = "07cdd6f" +var _Commit = "d35a32b" var _Version = "Release 4.4.0" // Version returns the version of the obitools package. diff --git a/pkg/obitools/obilowmask/entropy.qmd b/pkg/obitools/obilowmask/entropy.qmd index 34be993..e851a57 100644 --- a/pkg/obitools/obilowmask/entropy.qmd +++ b/pkg/obitools/obilowmask/entropy.qmd @@ -313,7 +313,20 @@ cat(" ecn(k,4) =", ecn(okmer(k,4),4), "\n") ``` ```{r} + +sequence <- "ttcatcactcagcaatcctgaatgatGAGAGCTTTTTTTTTTTATATATATATATATGTATATGTATGAAATACACTtatgctccgtttgtttcgccgtaa" re <- rev(c(0.8108602271901116,0.8108602271901116,0.8041354757148719,0.8041354757148719,0.8041354757148719,0.8041354757148719,0.8041354757148719,0.8041354757148719,0.7800272339058549,0.7800272339058549,0.7751610144606091,0.7751610144606091,0.7751610144606091,0.764858185548322,0.7325526601302021,0.7137620699527615,0.6789199521982864,0.6584536373623372,0.634002687184193,0.6075290415873623,0.5785545803330997,0.5785545803330997,0.5503220289212184,0.5315314387437778,0.4966893209893028,0.46077361820145696,0.42388221293245526,0.4009547969713408,0.3561142883497758,0.3561142883497758,0.3561142883497758,0.3561142883497758,0.3561142883497758,0.3418776106000334,0.3418776106000334,0.3418776106000334,0.3418776106000334,0.3418776106000334,0.3418776106000334,0.3418776106000334,0.3418776106000334,0.3418776106000334,0.3418776106000334,0.3418776106000334,0.3418776106000334,0.3418776106000334,0.3418776106000334,0.3418776106000334,0.3418776106000334,0.3418776106000334,0.3418776106000334,0.3418776106000334,0.3418776106000334,0.3418776106000334,0.3418776106000334,0.3418776106000334,0.3418776106000334,0.3418776106000334,0.3418776106000334,0.3418776106000334,0.3418776106000334,0.3418776106000334,0.3418776106000334,0.3418776106000334,0.35141814451677883,0.35141814451677883,0.35141814451677883,0.35141814451677883,0.35141814451677883,0.390029016052137,0.42781461756157363,0.45192285937059073,0.47238917420654,0.47238917420654,0.47238917420654,0.5092805794755417,0.5451962822633876,0.5800384000178626,0.602395141014297,0.6046146614886381,0.6046146614886381,0.6119084258128231,0.6119084258128231,0.6214217106113492,0.6424704346756562,0.6482381543085467,0.6635191587399633,0.6635191587399633,0.6635191587399633,0.6828444721058894,0.6950205907027562,0.696103322070051,0.696103322070051,0.696103322070051,0.696103322070051,0.696103322070051,0.696103322070051,0.696103322070051,0.696103322070051,0.696103322070051,0.7208976112999935)) di <- c(0.7208976112999935,0.6961033220700509,0.6961033220700509,0.6961033220700509,0.6961033220700509,0.6961033220700509,0.6961033220700509,0.6961033220700509,0.6961033220700509,0.6961033220700509,0.6950205907027562,0.6828444721058894,0.6635191587399633,0.6635191587399633,0.6635191587399633,0.6482381543085467,0.6424704346756562,0.6214217106113492,0.6119084258128231,0.6119084258128231,0.6046146614886382,0.6046146614886382,0.6023951410142971,0.5800384000178627,0.5451962822633876,0.5092805794755418,0.47238917420654003,0.47238917420654003,0.47238917420654003,0.4519228593705908,0.4278146175615737,0.39002901605213713,0.35141814451677894,0.35141814451677894,0.35141814451677894,0.35141814451677894,0.35141814451677883,0.3418776106000333,0.3418776106000333,0.3418776106000333,0.3418776106000333,0.3418776106000333,0.3418776106000333,0.3418776106000333,0.3418776106000333,0.3418776106000333,0.3418776106000333,0.3418776106000333,0.3418776106000333,0.3418776106000333,0.3418776106000333,0.3418776106000333,0.3418776106000333,0.3418776106000333,0.3418776106000333,0.3418776106000333,0.3418776106000333,0.3418776106000333,0.3418776106000333,0.3418776106000333,0.3418776106000333,0.3418776106000333,0.3418776106000333,0.3418776106000333,0.3418776106000333,0.3418776106000333,0.3418776106000333,0.3418776106000333,0.3561142883497762,0.3561142883497762,0.3561142883497762,0.3561142883497762,0.3561142883497762,0.40095479697134073,0.42388221293245526,0.46077361820145696,0.4966893209893028,0.5315314387437778,0.5503220289212184,0.5785545803330997,0.5785545803330997,0.6075290415873625,0.6340026871841933,0.6584536373623374,0.6789199521982866,0.7137620699527616,0.7325526601302023,0.7648581855483221,0.7751610144606093,0.7751610144606093,0.7751610144606093,0.7800272339058549,0.7800272339058549,0.8041354757148721,0.8041354757148721,0.8041354757148721,0.8041354757148721,0.8041354757148721,0.8041354757148721,0.8108602271901116,0.8108602271901116) + +ebidir <- tibble(direct=di,reverse=re) %>% + mutate(position = 1:length(re), + nucleotide = str_sub(sequence,position,position)) + +ebidir %>% + ggplot(aes(x=position,y=direct)) + + geom_line() + + scale_x_continuous(breaks = ebidir$position, labels = ebidir$nucleotide) + + ylim(0,1)+ + geom_hline(yintercept=0.5, col = "red", linetype = "dashed") ``` \ No newline at end of file diff --git a/pkg/obitools/obilowmask/obilowmask.go b/pkg/obitools/obilowmask/obilowmask.go index 3554a20..a75bca1 100644 --- a/pkg/obitools/obilowmask/obilowmask.go +++ b/pkg/obitools/obilowmask/obilowmask.go @@ -141,6 +141,8 @@ func LowMaskWorker(kmer_size int, level_max int, threshold float64, mode Masking minimier.Add(v) } + // log.Warnf("taille du minimier %d @ %d", minimier.Len(), i) + // Retrieve and store current minimum var ok bool if data[i], ok = minimier.Min(); !ok { @@ -302,9 +304,11 @@ func LowMaskWorker(kmer_size int, level_max int, threshold float64, mode Masking // Store entropy for position corresponding to start of k-mer if s >= nwords && maskPositions[i-nwords+1] >= 0 { - if entropy == 0 { - log.Errorf("Zero entropy @ positon %d", i-nwords+1) + if entropy < 0 { + entropy = 0 + } + entropy = math.Round(entropy*10000) / 10000 entropies[i-nwords+1] = entropy } } @@ -327,10 +331,11 @@ func LowMaskWorker(kmer_size int, level_max int, threshold float64, mode Masking sequenceBytes := seqCopy.Sequence() // Mask identified positions - for i := 0; i < len(sequenceBytes); i++ { + for i := range sequenceBytes { if maskPositions[i] { // Operation &^ 32 converts to UPPERCASE (clears bit 5) - sequenceBytes[i] = sequenceBytes[i] &^ 32 + // sequenceBytes[i] = sequenceBytes[i] &^ 32 + sequenceBytes[i] = mask } } @@ -343,6 +348,15 @@ func LowMaskWorker(kmer_size int, level_max int, threshold float64, mode Masking // Calculates entropies at all scales and masks positions // whose minimum entropy is below the threshold. masking := func(sequence *obiseq.BioSequence) (obiseq.BioSequenceSlice, error) { + if sequence.Len() < kmer_size { + sequence.SetAttribute("obilowmask_error", "Sequence too short") + remove := make([]bool, sequence.Len()) + for i := range remove { + remove[i] = true + } + return applyMaskMode(sequence, remove, maskChar) + } + bseq := sequence.Sequence() // Identify ambiguities From e65b2a5efea93a5af9083cc91ccc27aabb91fd7f Mon Sep 17 00:00:00 2001 From: Eric Coissac Date: Fri, 21 Nov 2025 13:09:24 +0100 Subject: [PATCH 2/3] obimatrix bugs --- pkg/obioptions/version.go | 2 +- pkg/obitools/obimatrix/obimatrix.go | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/pkg/obioptions/version.go b/pkg/obioptions/version.go index d24004d..fc42c56 100644 --- a/pkg/obioptions/version.go +++ b/pkg/obioptions/version.go @@ -8,7 +8,7 @@ import ( // corresponds to the last commit, and not the one when the file will be // commited -var _Commit = "d35a32b" +var _Commit = "27204d5" var _Version = "Release 4.4.0" // Version returns the version of the obitools package. diff --git a/pkg/obitools/obimatrix/obimatrix.go b/pkg/obitools/obimatrix/obimatrix.go index 2ff4aee..0d4f389 100644 --- a/pkg/obitools/obimatrix/obimatrix.go +++ b/pkg/obitools/obimatrix/obimatrix.go @@ -54,13 +54,13 @@ func NewMatrixData(naValue string, attributes ...string) *MatrixData { // It returns a pointer to the transposed MatrixData. func (matrix *MatrixData) TransposeMatrixData() *MatrixData { m := MakeMatrixData(matrix.naValue, "id") - for k, v := range *&matrix.matrix { + for k, v := range matrix.matrix { for kk, vv := range v { if _, ok := m.matrix[kk]; !ok { m.matrix[kk] = make(map[string]interface{}) } m.matrix[kk][k] = vv - m.attributes[kk] = map[string]interface{}{"id": k} + m.attributes[kk] = map[string]interface{}{"id": kk} } } return &m From 57c65f9d503f2b42180661e773d051df61e6ee6d Mon Sep 17 00:00:00 2001 From: Eric Coissac Date: Fri, 21 Nov 2025 13:24:24 +0100 Subject: [PATCH 3/3] obimatrix bug