Changes to be committed:

modified:   Release-notes.md
	modified:   pkg/obialign/pairedendalign.go
	modified:   pkg/obilua/obiseq.go
	modified:   pkg/obioptions/version.go
	modified:   pkg/obiseq/biosequence.go
	modified:   pkg/obitools/obipairing/pairing.go
This commit is contained in:
Eric Coissac
2024-11-27 09:56:22 +01:00
parent 7884a74f9c
commit d29a56dcbf
6 changed files with 78 additions and 29 deletions

View File

@ -2,13 +2,23 @@
## Latest changes
### Bug fixes
- In `obipairing`, correct the stats `seq_a_single` and `seq_b_single` when
on right alignment mode
### New features
- Most of the time obitools identify automatically sequence file format. But
it fails sometimes. Two new option **--fasta** and **--fastq** are added to
allow the processing of the rare fasta and fastq files not recognized.
## August 2nd, 2024. Release 4.3.0
- In `obiscript`, adds new methods to the Lua sequence object:
- `md5_string()`: returning the MD5 check sum as an hexadecimal string,
- `subsequence(from,to)`: allows to extract a subsequence on a 0 based
coordinate system, upper bound expluded like in go.
- `reverse_complement`: returning a sequence object corresponding to the reverse complement
of the current sequence.
### Change of git repositiory

View File

@ -485,7 +485,8 @@ func PECenterAlign(seqA, seqB *obiseq.BioSequence, gap, scale float64,
func PEAlign(seqA, seqB *obiseq.BioSequence,
gap, scale float64, fastAlign bool, delta int, fastScoreRel bool,
arena PEAlignArena, shift_buff *map[int]int) (int, []int, int, int, float64) {
arena PEAlignArena, shift_buff *map[int]int) (bool, int, []int, int, int, float64) {
var isLeftAlign bool
var score, shift int
var startA, startB int
var partLen, over int
@ -536,6 +537,7 @@ func PEAlign(seqA, seqB *obiseq.BioSequence,
rawSeqB = seqB.Sequence()[0:partLen]
qualSeqB = seqB.Qualities()[0:partLen]
extra3 = seqB.Len() - partLen
isLeftAlign = true
score = _FillMatrixPeLeftAlign(
rawSeqA, qualSeqA, rawSeqB, qualSeqB, gap, scale,
&arena.pointer.scoreMatrix,
@ -557,7 +559,7 @@ func PEAlign(seqA, seqB *obiseq.BioSequence,
rawSeqA = seqA.Sequence()[:partLen]
qualSeqA = seqA.Qualities()[:partLen]
extra3 = partLen - seqA.Len()
isLeftAlign = false
score = _FillMatrixPeRightAlign(
rawSeqA, qualSeqA, rawSeqB, qualSeqB, gap, scale,
&arena.pointer.scoreMatrix,
@ -581,6 +583,7 @@ func PEAlign(seqA, seqB *obiseq.BioSequence,
qualSeqB = seqB.Qualities()[0:partLen]
extra3 = seqB.Len() - partLen
score = 0
isLeftAlign = true
} else {
startA = 0
startB = -shift
@ -589,6 +592,7 @@ func PEAlign(seqA, seqB *obiseq.BioSequence,
partLen = len(qualSeqB)
extra3 = partLen - seqA.Len()
qualSeqA = seqA.Qualities()[:partLen]
isLeftAlign = false
}
score = 0
for i, qualA := range qualSeqA {
@ -625,6 +629,8 @@ func PEAlign(seqA, seqB *obiseq.BioSequence,
len(rawSeqA), len(rawSeqB),
&(arena.pointer.path))
isLeftAlign = false
scoreL := _FillMatrixPeLeftAlign(
rawSeqA, qualSeqA, rawSeqB, qualSeqB, gap, scale,
&arena.pointer.scoreMatrix,
@ -634,9 +640,10 @@ func PEAlign(seqA, seqB *obiseq.BioSequence,
path = _Backtracking(arena.pointer.pathMatrix,
len(rawSeqA), len(rawSeqB),
&(arena.pointer.path))
isLeftAlign = true
}
}
return score, path, fastCount, over, fastScore
return isLeftAlign, score, path, fastCount, over, fastScore
}

View File

@ -47,18 +47,21 @@ func newObiSeq(luaState *lua.LState) int {
}
var bioSequenceMethods = map[string]lua.LGFunction{
"id": bioSequenceGetSetId,
"sequence": bioSequenceGetSetSequence,
"qualities": bioSequenceGetSetQualities,
"definition": bioSequenceGetSetDefinition,
"count": bioSequenceGetSetCount,
"taxid": bioSequenceGetSetTaxid,
"attribute": bioSequenceGetSetAttribute,
"len": bioSequenceGetLength,
"has_sequence": bioSequenceHasSequence,
"has_qualities": bioSequenceHasQualities,
"source": bioSequenceGetSource,
"md5": bioSequenceGetMD5,
"id": bioSequenceGetSetId,
"sequence": bioSequenceGetSetSequence,
"qualities": bioSequenceGetSetQualities,
"definition": bioSequenceGetSetDefinition,
"count": bioSequenceGetSetCount,
"taxid": bioSequenceGetSetTaxid,
"attribute": bioSequenceGetSetAttribute,
"len": bioSequenceGetLength,
"has_sequence": bioSequenceHasSequence,
"has_qualities": bioSequenceHasQualities,
"source": bioSequenceGetSource,
"md5": bioSequenceGetMD5,
"md5_string": bioSequenceGetMD5String,
"subsequence": bioSequenceGetSubsequence,
"reverse_complement": bioSequenceGetRevcomp,
}
// checkBioSequence checks if the first argument in the Lua stack is a *obiseq.BioSequence.
@ -224,3 +227,30 @@ func bioSequenceGetMD5(luaState *lua.LState) int {
luaState.Push(rt)
return 1
}
func bioSequenceGetMD5String(luaState *lua.LState) int {
s := checkBioSequence(luaState)
md5 := s.MD5String()
luaState.Push(lua.LString(md5))
return 1
}
func bioSequenceGetSubsequence(luaState *lua.LState) int {
s := checkBioSequence(luaState)
start := luaState.CheckInt(2)
end := luaState.CheckInt(3)
subseq, err := s.Subsequence(start, end, false)
if err != nil {
luaState.RaiseError("%s : Error on subseq: %v", s.Id(), err)
return 0
}
luaState.Push(obiseq2Lua(luaState, subseq))
return 1
}
func bioSequenceGetRevcomp(luaState *lua.LState) int {
s := checkBioSequence(luaState)
revcomp := s.ReverseComplement(false)
luaState.Push(obiseq2Lua(luaState, revcomp))
return 1
}

View File

@ -7,7 +7,7 @@ import (
// TODO: The version number is extracted from git. This induces that the version
// corresponds to the last commit, and not the one when the file will be
// commited
var _Commit = "4b65bfc"
var _Commit = "7884a74"
var _Version = "Release 4.3.0"
// Version returns the version of the obitools package.

View File

@ -12,6 +12,7 @@ package obiseq
import (
"crypto/md5"
"encoding/hex"
"slices"
"sync"
"sync/atomic"
@ -375,6 +376,11 @@ func (s *BioSequence) MD5() [16]byte {
return md5.Sum(s.sequence)
}
func (s *BioSequence) MD5String() string {
md5_hash := s.MD5()
return hex.EncodeToString(md5_hash[:])
}
// SetId sets the id of the BioSequence.
//
// Parameters:

View File

@ -9,6 +9,7 @@ import (
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obiiter"
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obioptions"
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obiseq"
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obiutils"
)
func _Abs(x int) int {
@ -112,7 +113,7 @@ func AssemblePESequences(seqA, seqB *obiseq.BioSequence,
inplace bool, fastAlign, fastModeRel bool,
arenaAlign obialign.PEAlignArena, shifh_buff *map[int]int) *obiseq.BioSequence {
score, path, fastcount, over, fastscore := obialign.PEAlign(
isLeftAlign, score, path, fastcount, over, fastscore := obialign.PEAlign(
seqA, seqB,
gap, scale,
fastAlign, delta, fastModeRel,
@ -143,19 +144,14 @@ func AssemblePESequences(seqA, seqB *obiseq.BioSequence,
if aliLength >= minOverlap && identity >= minIdentity {
annot["mode"] = "alignment"
if withStats {
if left < 0 {
annot["seq_a_single"] = -left
if isLeftAlign {
annot["ali_dir"] = "left"
annot["seq_a_single"] = obiutils.Abs(left)
annot["seq_b_single"] = obiutils.Abs(right)
} else {
annot["seq_b_single"] = left
annot["ali_dir"] = "right"
}
if right < 0 {
right = -right
annot["seq_a_single"] = right
} else {
annot["seq_b_single"] = right
annot["seq_a_single"] = obiutils.Abs(right)
annot["seq_b_single"] = obiutils.Abs(left)
}
}
if inplace {