mirror of
https://github.com/metabarcoding/obitools4.git
synced 2025-06-29 16:20:46 +00:00
Changes to be committed:
modified: Release-notes.md modified: pkg/obialign/pairedendalign.go modified: pkg/obilua/obiseq.go modified: pkg/obioptions/version.go modified: pkg/obiseq/biosequence.go modified: pkg/obitools/obipairing/pairing.go
This commit is contained in:
@ -2,13 +2,23 @@
|
|||||||
|
|
||||||
## Latest changes
|
## Latest changes
|
||||||
|
|
||||||
|
### Bug fixes
|
||||||
|
|
||||||
|
- In `obipairing`, correct the stats `seq_a_single` and `seq_b_single` when
|
||||||
|
on right alignment mode
|
||||||
|
|
||||||
### New features
|
### New features
|
||||||
|
|
||||||
- Most of the time obitools identify automatically sequence file format. But
|
- Most of the time obitools identify automatically sequence file format. But
|
||||||
it fails sometimes. Two new option **--fasta** and **--fastq** are added to
|
it fails sometimes. Two new option **--fasta** and **--fastq** are added to
|
||||||
allow the processing of the rare fasta and fastq files not recognized.
|
allow the processing of the rare fasta and fastq files not recognized.
|
||||||
|
|
||||||
## August 2nd, 2024. Release 4.3.0
|
- In `obiscript`, adds new methods to the Lua sequence object:
|
||||||
|
- `md5_string()`: returning the MD5 check sum as an hexadecimal string,
|
||||||
|
- `subsequence(from,to)`: allows to extract a subsequence on a 0 based
|
||||||
|
coordinate system, upper bound expluded like in go.
|
||||||
|
- `reverse_complement`: returning a sequence object corresponding to the reverse complement
|
||||||
|
of the current sequence.
|
||||||
|
|
||||||
### Change of git repositiory
|
### Change of git repositiory
|
||||||
|
|
||||||
|
@ -485,7 +485,8 @@ func PECenterAlign(seqA, seqB *obiseq.BioSequence, gap, scale float64,
|
|||||||
|
|
||||||
func PEAlign(seqA, seqB *obiseq.BioSequence,
|
func PEAlign(seqA, seqB *obiseq.BioSequence,
|
||||||
gap, scale float64, fastAlign bool, delta int, fastScoreRel bool,
|
gap, scale float64, fastAlign bool, delta int, fastScoreRel bool,
|
||||||
arena PEAlignArena, shift_buff *map[int]int) (int, []int, int, int, float64) {
|
arena PEAlignArena, shift_buff *map[int]int) (bool, int, []int, int, int, float64) {
|
||||||
|
var isLeftAlign bool
|
||||||
var score, shift int
|
var score, shift int
|
||||||
var startA, startB int
|
var startA, startB int
|
||||||
var partLen, over int
|
var partLen, over int
|
||||||
@ -536,6 +537,7 @@ func PEAlign(seqA, seqB *obiseq.BioSequence,
|
|||||||
rawSeqB = seqB.Sequence()[0:partLen]
|
rawSeqB = seqB.Sequence()[0:partLen]
|
||||||
qualSeqB = seqB.Qualities()[0:partLen]
|
qualSeqB = seqB.Qualities()[0:partLen]
|
||||||
extra3 = seqB.Len() - partLen
|
extra3 = seqB.Len() - partLen
|
||||||
|
isLeftAlign = true
|
||||||
score = _FillMatrixPeLeftAlign(
|
score = _FillMatrixPeLeftAlign(
|
||||||
rawSeqA, qualSeqA, rawSeqB, qualSeqB, gap, scale,
|
rawSeqA, qualSeqA, rawSeqB, qualSeqB, gap, scale,
|
||||||
&arena.pointer.scoreMatrix,
|
&arena.pointer.scoreMatrix,
|
||||||
@ -557,7 +559,7 @@ func PEAlign(seqA, seqB *obiseq.BioSequence,
|
|||||||
rawSeqA = seqA.Sequence()[:partLen]
|
rawSeqA = seqA.Sequence()[:partLen]
|
||||||
qualSeqA = seqA.Qualities()[:partLen]
|
qualSeqA = seqA.Qualities()[:partLen]
|
||||||
extra3 = partLen - seqA.Len()
|
extra3 = partLen - seqA.Len()
|
||||||
|
isLeftAlign = false
|
||||||
score = _FillMatrixPeRightAlign(
|
score = _FillMatrixPeRightAlign(
|
||||||
rawSeqA, qualSeqA, rawSeqB, qualSeqB, gap, scale,
|
rawSeqA, qualSeqA, rawSeqB, qualSeqB, gap, scale,
|
||||||
&arena.pointer.scoreMatrix,
|
&arena.pointer.scoreMatrix,
|
||||||
@ -581,6 +583,7 @@ func PEAlign(seqA, seqB *obiseq.BioSequence,
|
|||||||
qualSeqB = seqB.Qualities()[0:partLen]
|
qualSeqB = seqB.Qualities()[0:partLen]
|
||||||
extra3 = seqB.Len() - partLen
|
extra3 = seqB.Len() - partLen
|
||||||
score = 0
|
score = 0
|
||||||
|
isLeftAlign = true
|
||||||
} else {
|
} else {
|
||||||
startA = 0
|
startA = 0
|
||||||
startB = -shift
|
startB = -shift
|
||||||
@ -589,6 +592,7 @@ func PEAlign(seqA, seqB *obiseq.BioSequence,
|
|||||||
partLen = len(qualSeqB)
|
partLen = len(qualSeqB)
|
||||||
extra3 = partLen - seqA.Len()
|
extra3 = partLen - seqA.Len()
|
||||||
qualSeqA = seqA.Qualities()[:partLen]
|
qualSeqA = seqA.Qualities()[:partLen]
|
||||||
|
isLeftAlign = false
|
||||||
}
|
}
|
||||||
score = 0
|
score = 0
|
||||||
for i, qualA := range qualSeqA {
|
for i, qualA := range qualSeqA {
|
||||||
@ -625,6 +629,8 @@ func PEAlign(seqA, seqB *obiseq.BioSequence,
|
|||||||
len(rawSeqA), len(rawSeqB),
|
len(rawSeqA), len(rawSeqB),
|
||||||
&(arena.pointer.path))
|
&(arena.pointer.path))
|
||||||
|
|
||||||
|
isLeftAlign = false
|
||||||
|
|
||||||
scoreL := _FillMatrixPeLeftAlign(
|
scoreL := _FillMatrixPeLeftAlign(
|
||||||
rawSeqA, qualSeqA, rawSeqB, qualSeqB, gap, scale,
|
rawSeqA, qualSeqA, rawSeqB, qualSeqB, gap, scale,
|
||||||
&arena.pointer.scoreMatrix,
|
&arena.pointer.scoreMatrix,
|
||||||
@ -634,9 +640,10 @@ func PEAlign(seqA, seqB *obiseq.BioSequence,
|
|||||||
path = _Backtracking(arena.pointer.pathMatrix,
|
path = _Backtracking(arena.pointer.pathMatrix,
|
||||||
len(rawSeqA), len(rawSeqB),
|
len(rawSeqA), len(rawSeqB),
|
||||||
&(arena.pointer.path))
|
&(arena.pointer.path))
|
||||||
|
isLeftAlign = true
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
return score, path, fastCount, over, fastScore
|
return isLeftAlign, score, path, fastCount, over, fastScore
|
||||||
}
|
}
|
||||||
|
@ -47,18 +47,21 @@ func newObiSeq(luaState *lua.LState) int {
|
|||||||
}
|
}
|
||||||
|
|
||||||
var bioSequenceMethods = map[string]lua.LGFunction{
|
var bioSequenceMethods = map[string]lua.LGFunction{
|
||||||
"id": bioSequenceGetSetId,
|
"id": bioSequenceGetSetId,
|
||||||
"sequence": bioSequenceGetSetSequence,
|
"sequence": bioSequenceGetSetSequence,
|
||||||
"qualities": bioSequenceGetSetQualities,
|
"qualities": bioSequenceGetSetQualities,
|
||||||
"definition": bioSequenceGetSetDefinition,
|
"definition": bioSequenceGetSetDefinition,
|
||||||
"count": bioSequenceGetSetCount,
|
"count": bioSequenceGetSetCount,
|
||||||
"taxid": bioSequenceGetSetTaxid,
|
"taxid": bioSequenceGetSetTaxid,
|
||||||
"attribute": bioSequenceGetSetAttribute,
|
"attribute": bioSequenceGetSetAttribute,
|
||||||
"len": bioSequenceGetLength,
|
"len": bioSequenceGetLength,
|
||||||
"has_sequence": bioSequenceHasSequence,
|
"has_sequence": bioSequenceHasSequence,
|
||||||
"has_qualities": bioSequenceHasQualities,
|
"has_qualities": bioSequenceHasQualities,
|
||||||
"source": bioSequenceGetSource,
|
"source": bioSequenceGetSource,
|
||||||
"md5": bioSequenceGetMD5,
|
"md5": bioSequenceGetMD5,
|
||||||
|
"md5_string": bioSequenceGetMD5String,
|
||||||
|
"subsequence": bioSequenceGetSubsequence,
|
||||||
|
"reverse_complement": bioSequenceGetRevcomp,
|
||||||
}
|
}
|
||||||
|
|
||||||
// checkBioSequence checks if the first argument in the Lua stack is a *obiseq.BioSequence.
|
// checkBioSequence checks if the first argument in the Lua stack is a *obiseq.BioSequence.
|
||||||
@ -224,3 +227,30 @@ func bioSequenceGetMD5(luaState *lua.LState) int {
|
|||||||
luaState.Push(rt)
|
luaState.Push(rt)
|
||||||
return 1
|
return 1
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func bioSequenceGetMD5String(luaState *lua.LState) int {
|
||||||
|
s := checkBioSequence(luaState)
|
||||||
|
md5 := s.MD5String()
|
||||||
|
luaState.Push(lua.LString(md5))
|
||||||
|
return 1
|
||||||
|
}
|
||||||
|
|
||||||
|
func bioSequenceGetSubsequence(luaState *lua.LState) int {
|
||||||
|
s := checkBioSequence(luaState)
|
||||||
|
start := luaState.CheckInt(2)
|
||||||
|
end := luaState.CheckInt(3)
|
||||||
|
subseq, err := s.Subsequence(start, end, false)
|
||||||
|
if err != nil {
|
||||||
|
luaState.RaiseError("%s : Error on subseq: %v", s.Id(), err)
|
||||||
|
return 0
|
||||||
|
}
|
||||||
|
luaState.Push(obiseq2Lua(luaState, subseq))
|
||||||
|
return 1
|
||||||
|
}
|
||||||
|
|
||||||
|
func bioSequenceGetRevcomp(luaState *lua.LState) int {
|
||||||
|
s := checkBioSequence(luaState)
|
||||||
|
revcomp := s.ReverseComplement(false)
|
||||||
|
luaState.Push(obiseq2Lua(luaState, revcomp))
|
||||||
|
return 1
|
||||||
|
}
|
||||||
|
@ -7,7 +7,7 @@ import (
|
|||||||
// TODO: The version number is extracted from git. This induces that the version
|
// TODO: The version number is extracted from git. This induces that the version
|
||||||
// corresponds to the last commit, and not the one when the file will be
|
// corresponds to the last commit, and not the one when the file will be
|
||||||
// commited
|
// commited
|
||||||
var _Commit = "4b65bfc"
|
var _Commit = "7884a74"
|
||||||
var _Version = "Release 4.3.0"
|
var _Version = "Release 4.3.0"
|
||||||
|
|
||||||
// Version returns the version of the obitools package.
|
// Version returns the version of the obitools package.
|
||||||
|
@ -12,6 +12,7 @@ package obiseq
|
|||||||
|
|
||||||
import (
|
import (
|
||||||
"crypto/md5"
|
"crypto/md5"
|
||||||
|
"encoding/hex"
|
||||||
"slices"
|
"slices"
|
||||||
"sync"
|
"sync"
|
||||||
"sync/atomic"
|
"sync/atomic"
|
||||||
@ -375,6 +376,11 @@ func (s *BioSequence) MD5() [16]byte {
|
|||||||
return md5.Sum(s.sequence)
|
return md5.Sum(s.sequence)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func (s *BioSequence) MD5String() string {
|
||||||
|
md5_hash := s.MD5()
|
||||||
|
return hex.EncodeToString(md5_hash[:])
|
||||||
|
}
|
||||||
|
|
||||||
// SetId sets the id of the BioSequence.
|
// SetId sets the id of the BioSequence.
|
||||||
//
|
//
|
||||||
// Parameters:
|
// Parameters:
|
||||||
|
@ -9,6 +9,7 @@ import (
|
|||||||
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obiiter"
|
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obiiter"
|
||||||
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obioptions"
|
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obioptions"
|
||||||
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obiseq"
|
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obiseq"
|
||||||
|
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obiutils"
|
||||||
)
|
)
|
||||||
|
|
||||||
func _Abs(x int) int {
|
func _Abs(x int) int {
|
||||||
@ -112,7 +113,7 @@ func AssemblePESequences(seqA, seqB *obiseq.BioSequence,
|
|||||||
inplace bool, fastAlign, fastModeRel bool,
|
inplace bool, fastAlign, fastModeRel bool,
|
||||||
arenaAlign obialign.PEAlignArena, shifh_buff *map[int]int) *obiseq.BioSequence {
|
arenaAlign obialign.PEAlignArena, shifh_buff *map[int]int) *obiseq.BioSequence {
|
||||||
|
|
||||||
score, path, fastcount, over, fastscore := obialign.PEAlign(
|
isLeftAlign, score, path, fastcount, over, fastscore := obialign.PEAlign(
|
||||||
seqA, seqB,
|
seqA, seqB,
|
||||||
gap, scale,
|
gap, scale,
|
||||||
fastAlign, delta, fastModeRel,
|
fastAlign, delta, fastModeRel,
|
||||||
@ -143,19 +144,14 @@ func AssemblePESequences(seqA, seqB *obiseq.BioSequence,
|
|||||||
if aliLength >= minOverlap && identity >= minIdentity {
|
if aliLength >= minOverlap && identity >= minIdentity {
|
||||||
annot["mode"] = "alignment"
|
annot["mode"] = "alignment"
|
||||||
if withStats {
|
if withStats {
|
||||||
if left < 0 {
|
if isLeftAlign {
|
||||||
annot["seq_a_single"] = -left
|
|
||||||
annot["ali_dir"] = "left"
|
annot["ali_dir"] = "left"
|
||||||
|
annot["seq_a_single"] = obiutils.Abs(left)
|
||||||
|
annot["seq_b_single"] = obiutils.Abs(right)
|
||||||
} else {
|
} else {
|
||||||
annot["seq_b_single"] = left
|
|
||||||
annot["ali_dir"] = "right"
|
annot["ali_dir"] = "right"
|
||||||
}
|
annot["seq_a_single"] = obiutils.Abs(right)
|
||||||
|
annot["seq_b_single"] = obiutils.Abs(left)
|
||||||
if right < 0 {
|
|
||||||
right = -right
|
|
||||||
annot["seq_a_single"] = right
|
|
||||||
} else {
|
|
||||||
annot["seq_b_single"] = right
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
if inplace {
|
if inplace {
|
||||||
|
Reference in New Issue
Block a user