diff --git a/Release-notes.md b/Release-notes.md index da0c138..336e320 100644 --- a/Release-notes.md +++ b/Release-notes.md @@ -2,13 +2,23 @@ ## Latest changes +### Bug fixes + +- In `obipairing`, correct the stats `seq_a_single` and `seq_b_single` when + on right alignment mode + ### New features - Most of the time obitools identify automatically sequence file format. But it fails sometimes. Two new option **--fasta** and **--fastq** are added to allow the processing of the rare fasta and fastq files not recognized. - -## August 2nd, 2024. Release 4.3.0 + +- In `obiscript`, adds new methods to the Lua sequence object: + - `md5_string()`: returning the MD5 check sum as an hexadecimal string, + - `subsequence(from,to)`: allows to extract a subsequence on a 0 based + coordinate system, upper bound expluded like in go. + - `reverse_complement`: returning a sequence object corresponding to the reverse complement + of the current sequence. ### Change of git repositiory diff --git a/pkg/obialign/pairedendalign.go b/pkg/obialign/pairedendalign.go index 8eb7609..e5c12cb 100644 --- a/pkg/obialign/pairedendalign.go +++ b/pkg/obialign/pairedendalign.go @@ -485,7 +485,8 @@ func PECenterAlign(seqA, seqB *obiseq.BioSequence, gap, scale float64, func PEAlign(seqA, seqB *obiseq.BioSequence, gap, scale float64, fastAlign bool, delta int, fastScoreRel bool, - arena PEAlignArena, shift_buff *map[int]int) (int, []int, int, int, float64) { + arena PEAlignArena, shift_buff *map[int]int) (bool, int, []int, int, int, float64) { + var isLeftAlign bool var score, shift int var startA, startB int var partLen, over int @@ -536,6 +537,7 @@ func PEAlign(seqA, seqB *obiseq.BioSequence, rawSeqB = seqB.Sequence()[0:partLen] qualSeqB = seqB.Qualities()[0:partLen] extra3 = seqB.Len() - partLen + isLeftAlign = true score = _FillMatrixPeLeftAlign( rawSeqA, qualSeqA, rawSeqB, qualSeqB, gap, scale, &arena.pointer.scoreMatrix, @@ -557,7 +559,7 @@ func PEAlign(seqA, seqB *obiseq.BioSequence, rawSeqA = seqA.Sequence()[:partLen] qualSeqA = seqA.Qualities()[:partLen] extra3 = partLen - seqA.Len() - + isLeftAlign = false score = _FillMatrixPeRightAlign( rawSeqA, qualSeqA, rawSeqB, qualSeqB, gap, scale, &arena.pointer.scoreMatrix, @@ -581,6 +583,7 @@ func PEAlign(seqA, seqB *obiseq.BioSequence, qualSeqB = seqB.Qualities()[0:partLen] extra3 = seqB.Len() - partLen score = 0 + isLeftAlign = true } else { startA = 0 startB = -shift @@ -589,6 +592,7 @@ func PEAlign(seqA, seqB *obiseq.BioSequence, partLen = len(qualSeqB) extra3 = partLen - seqA.Len() qualSeqA = seqA.Qualities()[:partLen] + isLeftAlign = false } score = 0 for i, qualA := range qualSeqA { @@ -625,6 +629,8 @@ func PEAlign(seqA, seqB *obiseq.BioSequence, len(rawSeqA), len(rawSeqB), &(arena.pointer.path)) + isLeftAlign = false + scoreL := _FillMatrixPeLeftAlign( rawSeqA, qualSeqA, rawSeqB, qualSeqB, gap, scale, &arena.pointer.scoreMatrix, @@ -634,9 +640,10 @@ func PEAlign(seqA, seqB *obiseq.BioSequence, path = _Backtracking(arena.pointer.pathMatrix, len(rawSeqA), len(rawSeqB), &(arena.pointer.path)) + isLeftAlign = true } } - return score, path, fastCount, over, fastScore + return isLeftAlign, score, path, fastCount, over, fastScore } diff --git a/pkg/obilua/obiseq.go b/pkg/obilua/obiseq.go index d99c634..f2d120f 100644 --- a/pkg/obilua/obiseq.go +++ b/pkg/obilua/obiseq.go @@ -47,18 +47,21 @@ func newObiSeq(luaState *lua.LState) int { } var bioSequenceMethods = map[string]lua.LGFunction{ - "id": bioSequenceGetSetId, - "sequence": bioSequenceGetSetSequence, - "qualities": bioSequenceGetSetQualities, - "definition": bioSequenceGetSetDefinition, - "count": bioSequenceGetSetCount, - "taxid": bioSequenceGetSetTaxid, - "attribute": bioSequenceGetSetAttribute, - "len": bioSequenceGetLength, - "has_sequence": bioSequenceHasSequence, - "has_qualities": bioSequenceHasQualities, - "source": bioSequenceGetSource, - "md5": bioSequenceGetMD5, + "id": bioSequenceGetSetId, + "sequence": bioSequenceGetSetSequence, + "qualities": bioSequenceGetSetQualities, + "definition": bioSequenceGetSetDefinition, + "count": bioSequenceGetSetCount, + "taxid": bioSequenceGetSetTaxid, + "attribute": bioSequenceGetSetAttribute, + "len": bioSequenceGetLength, + "has_sequence": bioSequenceHasSequence, + "has_qualities": bioSequenceHasQualities, + "source": bioSequenceGetSource, + "md5": bioSequenceGetMD5, + "md5_string": bioSequenceGetMD5String, + "subsequence": bioSequenceGetSubsequence, + "reverse_complement": bioSequenceGetRevcomp, } // checkBioSequence checks if the first argument in the Lua stack is a *obiseq.BioSequence. @@ -224,3 +227,30 @@ func bioSequenceGetMD5(luaState *lua.LState) int { luaState.Push(rt) return 1 } + +func bioSequenceGetMD5String(luaState *lua.LState) int { + s := checkBioSequence(luaState) + md5 := s.MD5String() + luaState.Push(lua.LString(md5)) + return 1 +} + +func bioSequenceGetSubsequence(luaState *lua.LState) int { + s := checkBioSequence(luaState) + start := luaState.CheckInt(2) + end := luaState.CheckInt(3) + subseq, err := s.Subsequence(start, end, false) + if err != nil { + luaState.RaiseError("%s : Error on subseq: %v", s.Id(), err) + return 0 + } + luaState.Push(obiseq2Lua(luaState, subseq)) + return 1 +} + +func bioSequenceGetRevcomp(luaState *lua.LState) int { + s := checkBioSequence(luaState) + revcomp := s.ReverseComplement(false) + luaState.Push(obiseq2Lua(luaState, revcomp)) + return 1 +} diff --git a/pkg/obioptions/version.go b/pkg/obioptions/version.go index d6633a6..c904d8d 100644 --- a/pkg/obioptions/version.go +++ b/pkg/obioptions/version.go @@ -7,7 +7,7 @@ import ( // TODO: The version number is extracted from git. This induces that the version // corresponds to the last commit, and not the one when the file will be // commited -var _Commit = "4b65bfc" +var _Commit = "7884a74" var _Version = "Release 4.3.0" // Version returns the version of the obitools package. diff --git a/pkg/obiseq/biosequence.go b/pkg/obiseq/biosequence.go index 3a5f4be..9118ea0 100644 --- a/pkg/obiseq/biosequence.go +++ b/pkg/obiseq/biosequence.go @@ -12,6 +12,7 @@ package obiseq import ( "crypto/md5" + "encoding/hex" "slices" "sync" "sync/atomic" @@ -375,6 +376,11 @@ func (s *BioSequence) MD5() [16]byte { return md5.Sum(s.sequence) } +func (s *BioSequence) MD5String() string { + md5_hash := s.MD5() + return hex.EncodeToString(md5_hash[:]) +} + // SetId sets the id of the BioSequence. // // Parameters: diff --git a/pkg/obitools/obipairing/pairing.go b/pkg/obitools/obipairing/pairing.go index 1c5a13b..77613e4 100644 --- a/pkg/obitools/obipairing/pairing.go +++ b/pkg/obitools/obipairing/pairing.go @@ -9,6 +9,7 @@ import ( "git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obiiter" "git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obioptions" "git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obiseq" + "git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obiutils" ) func _Abs(x int) int { @@ -112,7 +113,7 @@ func AssemblePESequences(seqA, seqB *obiseq.BioSequence, inplace bool, fastAlign, fastModeRel bool, arenaAlign obialign.PEAlignArena, shifh_buff *map[int]int) *obiseq.BioSequence { - score, path, fastcount, over, fastscore := obialign.PEAlign( + isLeftAlign, score, path, fastcount, over, fastscore := obialign.PEAlign( seqA, seqB, gap, scale, fastAlign, delta, fastModeRel, @@ -143,19 +144,14 @@ func AssemblePESequences(seqA, seqB *obiseq.BioSequence, if aliLength >= minOverlap && identity >= minIdentity { annot["mode"] = "alignment" if withStats { - if left < 0 { - annot["seq_a_single"] = -left + if isLeftAlign { annot["ali_dir"] = "left" + annot["seq_a_single"] = obiutils.Abs(left) + annot["seq_b_single"] = obiutils.Abs(right) } else { - annot["seq_b_single"] = left annot["ali_dir"] = "right" - } - - if right < 0 { - right = -right - annot["seq_a_single"] = right - } else { - annot["seq_b_single"] = right + annot["seq_a_single"] = obiutils.Abs(right) + annot["seq_b_single"] = obiutils.Abs(left) } } if inplace {