mirror of
https://github.com/metabarcoding/obitools4.git
synced 2025-06-29 16:20:46 +00:00
Reduce memory allocation events
Former-commit-id: fbdb2afc857b02adc2593e2278d3bd838e99b0b2
This commit is contained in:
@ -35,6 +35,7 @@ func main() {
|
|||||||
|
|
||||||
_, args := optionParser(os.Args)
|
_, args := optionParser(os.Args)
|
||||||
|
|
||||||
|
obioptions.SetStrictReadWorker(min(4, obioptions.CLIParallelWorkers()))
|
||||||
fs, err := obiconvert.CLIReadBioSequences(args...)
|
fs, err := obiconvert.CLIReadBioSequences(args...)
|
||||||
|
|
||||||
if err != nil {
|
if err != nil {
|
||||||
|
@ -32,6 +32,9 @@ func main() {
|
|||||||
optionParser := obioptions.GenerateOptionParser(obipairing.OptionSet)
|
optionParser := obioptions.GenerateOptionParser(obipairing.OptionSet)
|
||||||
|
|
||||||
optionParser(os.Args)
|
optionParser(os.Args)
|
||||||
|
|
||||||
|
obioptions.SetStrictReadWorker(2)
|
||||||
|
obioptions.SetStrictWriteWorker(2)
|
||||||
pairs, err := obipairing.CLIPairedSequence()
|
pairs, err := obipairing.CLIPairedSequence()
|
||||||
|
|
||||||
if err != nil {
|
if err != nil {
|
||||||
|
@ -33,7 +33,9 @@ func main() {
|
|||||||
// defer trace.Stop()
|
// defer trace.Stop()
|
||||||
|
|
||||||
obioptions.SetWorkerPerCore(2)
|
obioptions.SetWorkerPerCore(2)
|
||||||
obioptions.SetReadWorkerPerCore(0.5)
|
obioptions.SetStrictReadWorker(1)
|
||||||
|
obioptions.SetStrictWriteWorker(1)
|
||||||
|
obioptions.SetBatchSize(10)
|
||||||
|
|
||||||
optionParser := obioptions.GenerateOptionParser(obitag.OptionSet)
|
optionParser := obioptions.GenerateOptionParser(obitag.OptionSet)
|
||||||
|
|
||||||
|
@ -124,25 +124,26 @@ func BuildAlignment(seqA, seqB *obiseq.BioSequence,
|
|||||||
// In that case arenas will be allocated by the function but, they will not
|
// In that case arenas will be allocated by the function but, they will not
|
||||||
// be reusable for other alignments and desallocated at the BuildQualityConsensus
|
// be reusable for other alignments and desallocated at the BuildQualityConsensus
|
||||||
// return.
|
// return.
|
||||||
func BuildQualityConsensus(seqA, seqB *obiseq.BioSequence, path []int, statOnMismatch bool) (*obiseq.BioSequence, int) {
|
func BuildQualityConsensus(seqA, seqB *obiseq.BioSequence, path []int, statOnMismatch bool,
|
||||||
|
arenaAlign PEAlignArena) (*obiseq.BioSequence, int) {
|
||||||
|
|
||||||
bufferSA := obiseq.GetSlice(seqA.Len())
|
bufferSA := arenaAlign.pointer.aligneSeqA
|
||||||
bufferSB := obiseq.GetSlice(seqB.Len())
|
bufferSB := arenaAlign.pointer.aligneSeqB
|
||||||
defer obiseq.RecycleSlice(&bufferSB)
|
// defer obiseq.RecycleSlice(&bufferSB)
|
||||||
|
|
||||||
bufferQA := obiseq.GetSlice(seqA.Len())
|
bufferQA := arenaAlign.pointer.aligneQualA
|
||||||
bufferQB := obiseq.GetSlice(seqB.Len())
|
bufferQB := arenaAlign.pointer.aligneQualB
|
||||||
defer obiseq.RecycleSlice(&bufferQB)
|
// defer obiseq.RecycleSlice(&bufferQB)
|
||||||
|
|
||||||
_BuildAlignment(seqA.Sequence(), seqB.Sequence(), path, ' ',
|
_BuildAlignment(seqA.Sequence(), seqB.Sequence(), path, ' ',
|
||||||
&bufferSA, &bufferSB)
|
bufferSA, bufferSB)
|
||||||
|
|
||||||
// log.Printf("#1 %s--> la : %d,%p lb : %d,%p qa : %d,%p qb : %d,%p\n", stamp,
|
// log.Printf("#1 %s--> la : %d,%p lb : %d,%p qa : %d,%p qb : %d,%p\n", stamp,
|
||||||
// len(*bufferSA), bufferSA, len(*bufferSB), bufferSB,
|
// len(*bufferSA), bufferSA, len(*bufferSB), bufferSB,
|
||||||
// len(*bufferQA), bufferQA, len(*bufferQB), bufferQB)
|
// len(*bufferQA), bufferQA, len(*bufferQB), bufferQB)
|
||||||
|
|
||||||
_BuildAlignment(seqA.Qualities(), seqB.Qualities(), path, byte(0),
|
_BuildAlignment(seqA.Qualities(), seqB.Qualities(), path, byte(0),
|
||||||
&bufferQA, &bufferQB)
|
bufferQA, bufferQB)
|
||||||
|
|
||||||
// log.Printf("#2 %s--> la : %d,%p lb : %d,%p qa : %d,%p qb : %d,%p\n", stamp,
|
// log.Printf("#2 %s--> la : %d,%p lb : %d,%p qa : %d,%p qb : %d,%p\n", stamp,
|
||||||
// len(*bufferSA), bufferSA, len(*bufferSB), bufferSB,
|
// len(*bufferSA), bufferSA, len(*bufferSB), bufferSB,
|
||||||
@ -157,10 +158,10 @@ func BuildQualityConsensus(seqA, seqB *obiseq.BioSequence, path []int, statOnMis
|
|||||||
|
|
||||||
match := 0
|
match := 0
|
||||||
|
|
||||||
for i, qA = range bufferQA {
|
for i, qA = range *bufferQA {
|
||||||
nA := bufferSA[i]
|
nA := (*bufferSA)[i]
|
||||||
nB := bufferSB[i]
|
nB := (*bufferSB)[i]
|
||||||
qB = bufferQB[i]
|
qB = (*bufferQB)[i]
|
||||||
|
|
||||||
if statOnMismatch && nA != nB && nA != ' ' && nB != ' ' {
|
if statOnMismatch && nA != nB && nA != ' ' && nB != ' ' {
|
||||||
mismatches[strings.ToUpper(fmt.Sprintf("(%c:%02d)->(%c:%02d)", nA, qA, nB, qB))] = i + 1
|
mismatches[strings.ToUpper(fmt.Sprintf("(%c:%02d)->(%c:%02d)", nA, qA, nB, qB))] = i + 1
|
||||||
@ -171,13 +172,13 @@ func BuildQualityConsensus(seqA, seqB *obiseq.BioSequence, path []int, statOnMis
|
|||||||
qm = qB
|
qm = qB
|
||||||
}
|
}
|
||||||
if qB > qA {
|
if qB > qA {
|
||||||
bufferSA[i] = bufferSB[i]
|
(*bufferSA)[i] = (*bufferSB)[i]
|
||||||
qM = qB
|
qM = qB
|
||||||
qm = qA
|
qm = qA
|
||||||
}
|
}
|
||||||
if qB == qA && nA != nB {
|
if qB == qA && nA != nB {
|
||||||
nuc := _FourBitsBaseCode[nA&31] | _FourBitsBaseCode[nB&31]
|
nuc := _FourBitsBaseCode[nA&31] | _FourBitsBaseCode[nB&31]
|
||||||
bufferSA[i] = _FourBitsBaseDecode[nuc]
|
(*bufferSA)[i] = _FourBitsBaseDecode[nuc]
|
||||||
}
|
}
|
||||||
|
|
||||||
q := qA + qB
|
q := qA + qB
|
||||||
@ -195,15 +196,15 @@ func BuildQualityConsensus(seqA, seqB *obiseq.BioSequence, path []int, statOnMis
|
|||||||
q = 90
|
q = 90
|
||||||
}
|
}
|
||||||
|
|
||||||
bufferQA[i] = q
|
(*bufferQA)[i] = q
|
||||||
}
|
}
|
||||||
|
|
||||||
consSeq := obiseq.NewBioSequence(
|
consSeq := obiseq.NewBioSequence(
|
||||||
seqA.Id(),
|
seqA.Id(),
|
||||||
bufferSA,
|
*bufferSA,
|
||||||
seqA.Definition(),
|
seqA.Definition(),
|
||||||
)
|
)
|
||||||
consSeq.SetQualities(bufferQA)
|
consSeq.SetQualities(*bufferQA)
|
||||||
|
|
||||||
if statOnMismatch && len(mismatches) > 0 {
|
if statOnMismatch && len(mismatches) > 0 {
|
||||||
consSeq.SetAttribute("pairing_mismatches", mismatches)
|
consSeq.SetAttribute("pairing_mismatches", mismatches)
|
||||||
|
@ -13,6 +13,10 @@ type _PeAlignArena struct {
|
|||||||
path []int
|
path []int
|
||||||
fastIndex [][]int
|
fastIndex [][]int
|
||||||
fastBuffer []byte
|
fastBuffer []byte
|
||||||
|
aligneSeqA *[]byte
|
||||||
|
aligneSeqB *[]byte
|
||||||
|
aligneQualA *[]byte
|
||||||
|
aligneQualB *[]byte
|
||||||
}
|
}
|
||||||
|
|
||||||
// PEAlignArena defines memory arena usable by the
|
// PEAlignArena defines memory arena usable by the
|
||||||
@ -30,12 +34,21 @@ var NilPEAlignArena = PEAlignArena{nil}
|
|||||||
// MakePEAlignArena makes a new arena for the alignment of two paired sequences
|
// MakePEAlignArena makes a new arena for the alignment of two paired sequences
|
||||||
// of maximum length indicated by lseqA and lseqB.
|
// of maximum length indicated by lseqA and lseqB.
|
||||||
func MakePEAlignArena(lseqA, lseqB int) PEAlignArena {
|
func MakePEAlignArena(lseqA, lseqB int) PEAlignArena {
|
||||||
|
aligneSeqA := make([]byte, 0, lseqA+lseqB)
|
||||||
|
aligneSeqB := make([]byte, 0, lseqA+lseqB)
|
||||||
|
aligneQualA := make([]byte, 0, lseqA+lseqB)
|
||||||
|
aligneQualB := make([]byte, 0, lseqA+lseqB)
|
||||||
|
|
||||||
a := _PeAlignArena{
|
a := _PeAlignArena{
|
||||||
scoreMatrix: make([]int, 0, (lseqA+1)*(lseqB+1)),
|
scoreMatrix: make([]int, 0, (lseqA+1)*(lseqB+1)),
|
||||||
pathMatrix: make([]int, 0, (lseqA+1)*(lseqB+1)),
|
pathMatrix: make([]int, 0, (lseqA+1)*(lseqB+1)),
|
||||||
path: make([]int, 2*(lseqA+lseqB)),
|
path: make([]int, 2*(lseqA+lseqB)),
|
||||||
fastIndex: make([][]int, 256),
|
fastIndex: make([][]int, 256),
|
||||||
fastBuffer: make([]byte, 0, lseqA),
|
fastBuffer: make([]byte, 0, lseqA),
|
||||||
|
aligneSeqA: &aligneSeqA,
|
||||||
|
aligneSeqB: &aligneSeqB,
|
||||||
|
aligneQualA: &aligneQualA,
|
||||||
|
aligneQualB: &aligneQualB,
|
||||||
}
|
}
|
||||||
|
|
||||||
return PEAlignArena{&a}
|
return PEAlignArena{&a}
|
||||||
@ -352,7 +365,7 @@ func PERightAlign(seqA, seqB *obiseq.BioSequence, gap, scale float64,
|
|||||||
|
|
||||||
func PEAlign(seqA, seqB *obiseq.BioSequence,
|
func PEAlign(seqA, seqB *obiseq.BioSequence,
|
||||||
gap, scale float64, fastAlign bool, delta int, fastScoreRel bool,
|
gap, scale float64, fastAlign bool, delta int, fastScoreRel bool,
|
||||||
arena PEAlignArena) (int, []int, int, int, float64) {
|
arena PEAlignArena, shift_buff *map[int]int) (int, []int, int, int, float64) {
|
||||||
var score, shift int
|
var score, shift int
|
||||||
var startA, startB int
|
var startA, startB int
|
||||||
var partLen, over int
|
var partLen, over int
|
||||||
@ -374,7 +387,7 @@ func PEAlign(seqA, seqB *obiseq.BioSequence,
|
|||||||
&arena.pointer.fastIndex,
|
&arena.pointer.fastIndex,
|
||||||
&arena.pointer.fastBuffer)
|
&arena.pointer.fastBuffer)
|
||||||
|
|
||||||
shift, fastCount, fastScore = obikmer.FastShiftFourMer(index, seqA.Len(), seqB, fastScoreRel, nil)
|
shift, fastCount, fastScore = obikmer.FastShiftFourMer(index, shift_buff, seqA.Len(), seqB, fastScoreRel, nil)
|
||||||
|
|
||||||
if shift > 0 {
|
if shift > 0 {
|
||||||
over = seqA.Len() - shift
|
over = seqA.Len() - shift
|
||||||
|
@ -169,7 +169,9 @@ func _ParseEmblFile(source string, input ChannelSeqFileChunk,
|
|||||||
func ReadEMBL(reader io.Reader, options ...WithOption) obiiter.IBioSequence {
|
func ReadEMBL(reader io.Reader, options ...WithOption) obiiter.IBioSequence {
|
||||||
opt := MakeOptions(options)
|
opt := MakeOptions(options)
|
||||||
|
|
||||||
entry_channel := ReadSeqFileChunk(reader, _EndOfLastEntry)
|
buff := make([]byte, 1024*1024*1024*256)
|
||||||
|
|
||||||
|
entry_channel := ReadSeqFileChunk(reader, buff, _EndOfLastEntry)
|
||||||
newIter := obiiter.MakeIBioSequence()
|
newIter := obiiter.MakeIBioSequence()
|
||||||
|
|
||||||
nworkers := opt.ParallelWorkers()
|
nworkers := opt.ParallelWorkers()
|
||||||
|
@ -228,7 +228,9 @@ func ReadFasta(reader io.Reader, options ...WithOption) (obiiter.IBioSequence, e
|
|||||||
|
|
||||||
nworker := opt.ParallelWorkers()
|
nworker := opt.ParallelWorkers()
|
||||||
|
|
||||||
chkchan := ReadSeqFileChunk(reader, _EndOfLastFastaEntry)
|
buff := make([]byte, 1024*1024*1024)
|
||||||
|
|
||||||
|
chkchan := ReadSeqFileChunk(reader, buff, _EndOfLastFastaEntry)
|
||||||
chunck_order := obiutils.AtomicCounter()
|
chunck_order := obiutils.AtomicCounter()
|
||||||
|
|
||||||
for i := 0; i < nworker; i++ {
|
for i := 0; i < nworker; i++ {
|
||||||
|
@ -112,7 +112,7 @@ func _storeSequenceQuality(bytes *bytes.Buffer, out *obiseq.BioSequence, quality
|
|||||||
}
|
}
|
||||||
|
|
||||||
for i := 0; i < len(q); i++ {
|
for i := 0; i < len(q); i++ {
|
||||||
q[i] = q[i] - quality_shift
|
q[i] -= quality_shift
|
||||||
}
|
}
|
||||||
out.SetQualities(q)
|
out.SetQualities(q)
|
||||||
}
|
}
|
||||||
@ -309,7 +309,9 @@ func ReadFastq(reader io.Reader, options ...WithOption) (obiiter.IBioSequence, e
|
|||||||
nworker := opt.ParallelWorkers()
|
nworker := opt.ParallelWorkers()
|
||||||
chunkorder := obiutils.AtomicCounter()
|
chunkorder := obiutils.AtomicCounter()
|
||||||
|
|
||||||
chkchan := ReadSeqFileChunk(reader, _EndOfLastFastqEntry)
|
buff := make([]byte, 1024*1024*1024)
|
||||||
|
|
||||||
|
chkchan := ReadSeqFileChunk(reader, buff, _EndOfLastFastqEntry)
|
||||||
|
|
||||||
for i := 0; i < nworker; i++ {
|
for i := 0; i < nworker; i++ {
|
||||||
out.Add(1)
|
out.Add(1)
|
||||||
|
@ -46,17 +46,8 @@ func FormatFastqBatch(batch obiiter.BioSequenceBatch,
|
|||||||
for _, seq := range batch.Slice() {
|
for _, seq := range batch.Slice() {
|
||||||
if seq.Len() > 0 {
|
if seq.Len() > 0 {
|
||||||
fs := FormatFastq(seq, formater)
|
fs := FormatFastq(seq, formater)
|
||||||
lb := bs.Len()
|
bs.WriteString(fs)
|
||||||
n, _ := bs.WriteString(fs)
|
|
||||||
|
|
||||||
if n < len(fs) {
|
|
||||||
log.Panicln("FormatFastqBatch: Cannot write all FASTQ sequences")
|
|
||||||
}
|
|
||||||
bs.WriteString("\n")
|
bs.WriteString("\n")
|
||||||
|
|
||||||
if bs.Len()-lb < len(fs)+1 {
|
|
||||||
log.Panicln("FormatFastqBatch: Cannot write all FASTQ sequences correctly")
|
|
||||||
}
|
|
||||||
} else {
|
} else {
|
||||||
if skipEmpty {
|
if skipEmpty {
|
||||||
log.Warnf("Sequence %s is empty and skiped in output", seq.Id())
|
log.Warnf("Sequence %s is empty and skiped in output", seq.Id())
|
||||||
@ -69,12 +60,6 @@ func FormatFastqBatch(batch obiiter.BioSequenceBatch,
|
|||||||
|
|
||||||
chunk := bs.Bytes()
|
chunk := bs.Bytes()
|
||||||
|
|
||||||
chunk = chunk[:bs.Len()]
|
|
||||||
|
|
||||||
if chunk[0] != '@' {
|
|
||||||
log.Panicln("FormatFastqBatch: FASTQ format error")
|
|
||||||
}
|
|
||||||
|
|
||||||
return chunk
|
return chunk
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -233,7 +233,9 @@ func ReadGenbank(reader io.Reader, options ...WithOption) obiiter.IBioSequence {
|
|||||||
opt := MakeOptions(options)
|
opt := MakeOptions(options)
|
||||||
// entry_channel := make(chan _FileChunk)
|
// entry_channel := make(chan _FileChunk)
|
||||||
|
|
||||||
entry_channel := ReadSeqFileChunk(reader, _EndOfLastEntry)
|
buff := make([]byte, 1024*1024*1024*256)
|
||||||
|
|
||||||
|
entry_channel := ReadSeqFileChunk(reader, buff, _EndOfLastEntry)
|
||||||
newIter := obiiter.MakeIBioSequence()
|
newIter := obiiter.MakeIBioSequence()
|
||||||
|
|
||||||
nworkers := opt.ParallelWorkers()
|
nworkers := opt.ParallelWorkers()
|
||||||
|
@ -33,10 +33,10 @@ type LastSeqRecord func([]byte) int
|
|||||||
// Returns:
|
// Returns:
|
||||||
// None
|
// None
|
||||||
func ReadSeqFileChunk(reader io.Reader,
|
func ReadSeqFileChunk(reader io.Reader,
|
||||||
|
buff []byte,
|
||||||
splitter LastSeqRecord) ChannelSeqFileChunk {
|
splitter LastSeqRecord) ChannelSeqFileChunk {
|
||||||
var err error
|
var err error
|
||||||
var fullbuff []byte
|
var fullbuff []byte
|
||||||
var buff []byte
|
|
||||||
|
|
||||||
chunk_channel := make(ChannelSeqFileChunk)
|
chunk_channel := make(ChannelSeqFileChunk)
|
||||||
|
|
||||||
@ -46,8 +46,7 @@ func ReadSeqFileChunk(reader io.Reader,
|
|||||||
i := 0
|
i := 0
|
||||||
|
|
||||||
// Initialize the buffer to the size of a chunk of data
|
// Initialize the buffer to the size of a chunk of data
|
||||||
fullbuff = make([]byte, _FileChunkSize, _FileChunkSize*2)
|
fullbuff = buff
|
||||||
buff = fullbuff
|
|
||||||
|
|
||||||
// Read from the reader until the buffer is full or the end of the file is reached
|
// Read from the reader until the buffer is full or the end of the file is reached
|
||||||
l, err = io.ReadFull(reader, buff)
|
l, err = io.ReadFull(reader, buff)
|
||||||
|
@ -99,20 +99,20 @@ func Index4mer(seq *obiseq.BioSequence, index *[][]int, buffer *[]byte) [][]int
|
|||||||
// FastShiftFourMer runs a Fast algorithm (similar to the one used in FASTA) to compare two sequences.
|
// FastShiftFourMer runs a Fast algorithm (similar to the one used in FASTA) to compare two sequences.
|
||||||
// The returned values are two integer values. The shift between both the sequences and the count of
|
// The returned values are two integer values. The shift between both the sequences and the count of
|
||||||
// matching 4mer when this shift is applied between both the sequences.
|
// matching 4mer when this shift is applied between both the sequences.
|
||||||
func FastShiftFourMer(index [][]int, lindex int, seq *obiseq.BioSequence, relscore bool, buffer *[]byte) (int, int, float64) {
|
func FastShiftFourMer(index [][]int, shifts *map[int]int, lindex int, seq *obiseq.BioSequence, relscore bool, buffer *[]byte) (int, int, float64) {
|
||||||
|
|
||||||
iternal_buffer := Encode4mer(seq, buffer)
|
iternal_buffer := Encode4mer(seq, buffer)
|
||||||
|
|
||||||
shifts := make(map[int]int, 3*seq.Len())
|
// shifts := make(map[int]int, 3*seq.Len())
|
||||||
|
|
||||||
for pos, code := range iternal_buffer {
|
for pos, code := range iternal_buffer {
|
||||||
for _, refpos := range index[code] {
|
for _, refpos := range index[code] {
|
||||||
shift := refpos - pos
|
shift := refpos - pos
|
||||||
count, ok := shifts[shift]
|
count, ok := (*shifts)[shift]
|
||||||
if ok {
|
if ok {
|
||||||
shifts[shift] = count + 1
|
(*shifts)[shift] = count + 1
|
||||||
} else {
|
} else {
|
||||||
shifts[shift] = 1
|
(*shifts)[shift] = 1
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -121,7 +121,8 @@ func FastShiftFourMer(index [][]int, lindex int, seq *obiseq.BioSequence, relsco
|
|||||||
maxcount := 0
|
maxcount := 0
|
||||||
maxscore := -1.0
|
maxscore := -1.0
|
||||||
|
|
||||||
for shift, count := range shifts {
|
for shift, count := range *shifts {
|
||||||
|
delete((*shifts), shift)
|
||||||
score := float64(count)
|
score := float64(count)
|
||||||
if relscore {
|
if relscore {
|
||||||
over := -shift
|
over := -shift
|
||||||
|
@ -15,11 +15,13 @@ import (
|
|||||||
|
|
||||||
var _Debug = false
|
var _Debug = false
|
||||||
var _WorkerPerCore = 2.0
|
var _WorkerPerCore = 2.0
|
||||||
var _ReadWorkerPerCore = 1.0
|
var _ReadWorkerPerCore = 0.5
|
||||||
|
var _WriteWorkerPerCore = 0.25
|
||||||
var _StrictReadWorker = 0
|
var _StrictReadWorker = 0
|
||||||
|
var _StrictWriteWorker = 0
|
||||||
var _ParallelFilesRead = 0
|
var _ParallelFilesRead = 0
|
||||||
var _MaxAllowedCPU = runtime.NumCPU()
|
var _MaxAllowedCPU = runtime.NumCPU()
|
||||||
var _BatchSize = 5000
|
var _BatchSize = 2000
|
||||||
var _Pprof = false
|
var _Pprof = false
|
||||||
var _Quality_Shift_Input = byte(33)
|
var _Quality_Shift_Input = byte(33)
|
||||||
var _Quality_Shift_Output = byte(33)
|
var _Quality_Shift_Output = byte(33)
|
||||||
@ -175,12 +177,37 @@ func CLIParallelWorkers() int {
|
|||||||
// Returns an integer representing the number of parallel workers.
|
// Returns an integer representing the number of parallel workers.
|
||||||
func CLIReadParallelWorkers() int {
|
func CLIReadParallelWorkers() int {
|
||||||
if StrictReadWorker() == 0 {
|
if StrictReadWorker() == 0 {
|
||||||
return int(float64(CLIMaxCPU()) * ReadWorkerPerCore())
|
n := int(float64(CLIMaxCPU()) * ReadWorkerPerCore())
|
||||||
|
if n == 0 {
|
||||||
|
n = 1
|
||||||
|
}
|
||||||
|
return n
|
||||||
} else {
|
} else {
|
||||||
return StrictReadWorker()
|
return StrictReadWorker()
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// CLIWriteParallelWorkers returns the number of parallel workers used for
|
||||||
|
// writing files.
|
||||||
|
//
|
||||||
|
// The number of parallel workers is determined by the command line option
|
||||||
|
// --max-cpu|-m and the environment variable OBIMAXCPU. This number is
|
||||||
|
// multiplied by the variable _WriteWorkerPerCore.
|
||||||
|
//
|
||||||
|
// No parameters.
|
||||||
|
// Returns an integer representing the number of parallel workers.
|
||||||
|
func CLIWriteParallelWorkers() int {
|
||||||
|
if StrictWriteWorker() == 0 {
|
||||||
|
n := int(float64(CLIMaxCPU()) * WriteWorkerPerCore())
|
||||||
|
if n == 0 {
|
||||||
|
n = 1
|
||||||
|
}
|
||||||
|
return n
|
||||||
|
} else {
|
||||||
|
return StrictWriteWorker()
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
// CLIMaxCPU returns the maximum number of CPU cores allowed.
|
// CLIMaxCPU returns the maximum number of CPU cores allowed.
|
||||||
//
|
//
|
||||||
// The maximum number of CPU cores is determined by the command line option
|
// The maximum number of CPU cores is determined by the command line option
|
||||||
@ -247,6 +274,15 @@ func ReadWorkerPerCore() float64 {
|
|||||||
return _ReadWorkerPerCore
|
return _ReadWorkerPerCore
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// WriteWorkerPerCore returns the number of worker per CPU core for
|
||||||
|
// computing the result.
|
||||||
|
//
|
||||||
|
// No parameters.
|
||||||
|
// Returns a float64 representing the number of worker per CPU core.
|
||||||
|
func WriteWorkerPerCore() float64 {
|
||||||
|
return _WriteWorkerPerCore
|
||||||
|
}
|
||||||
|
|
||||||
// SetBatchSize sets the size of the sequence batches.
|
// SetBatchSize sets the size of the sequence batches.
|
||||||
//
|
//
|
||||||
// n - an integer representing the size of the sequence batches.
|
// n - an integer representing the size of the sequence batches.
|
||||||
@ -318,13 +354,33 @@ func StrictReadWorker() int {
|
|||||||
return _StrictReadWorker
|
return _StrictReadWorker
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// SetWriteWorker sets the number of workers for writing files.
|
||||||
|
//
|
||||||
|
// The number of worker dedicated to writing files is determined
|
||||||
|
// as the number of allowed CPU cores multiplied by number of write workers per core.
|
||||||
|
// Setting the number of write workers using this function allows to decouple the number
|
||||||
|
// of write workers from the number of CPU cores.
|
||||||
|
//
|
||||||
|
// n - an integer representing the number of workers to be set.
|
||||||
|
func SetStrictWriteWorker(n int) {
|
||||||
|
_StrictWriteWorker = n
|
||||||
|
}
|
||||||
|
|
||||||
|
// WriteWorker returns the number of workers for writing files.
|
||||||
|
//
|
||||||
|
// No parameters.
|
||||||
|
// Returns an integer representing the number of workers.
|
||||||
|
func StrictWriteWorker() int {
|
||||||
|
return _StrictWriteWorker
|
||||||
|
}
|
||||||
|
|
||||||
// ParallelFilesRead returns the number of files to be read in parallel.
|
// ParallelFilesRead returns the number of files to be read in parallel.
|
||||||
//
|
//
|
||||||
// No parameters.
|
// No parameters.
|
||||||
// Returns an integer representing the number of files to be read.
|
// Returns an integer representing the number of files to be read.
|
||||||
func ParallelFilesRead() int {
|
func ParallelFilesRead() int {
|
||||||
if _ParallelFilesRead == 0 {
|
if _ParallelFilesRead == 0 {
|
||||||
return CLIParallelWorkers()
|
return CLIReadParallelWorkers()
|
||||||
} else {
|
} else {
|
||||||
return _ParallelFilesRead
|
return _ParallelFilesRead
|
||||||
}
|
}
|
||||||
|
@ -7,7 +7,7 @@ import (
|
|||||||
// TODO: The version number is extracted from git. This induces that the version
|
// TODO: The version number is extracted from git. This induces that the version
|
||||||
// corresponds to the last commit, and not the one when the file will be
|
// corresponds to the last commit, and not the one when the file will be
|
||||||
// commited
|
// commited
|
||||||
var _Commit = "612868a"
|
var _Commit = "bcaa264"
|
||||||
var _Version = "Release 4.2.0"
|
var _Version = "Release 4.2.0"
|
||||||
|
|
||||||
// Version returns the version of the obitools package.
|
// Version returns the version of the obitools package.
|
||||||
|
@ -12,6 +12,7 @@ package obiseq
|
|||||||
|
|
||||||
import (
|
import (
|
||||||
"crypto/md5"
|
"crypto/md5"
|
||||||
|
"slices"
|
||||||
"sync"
|
"sync"
|
||||||
"sync/atomic"
|
"sync/atomic"
|
||||||
|
|
||||||
@ -418,12 +419,15 @@ func (s *BioSequence) SetFeatures(feature []byte) {
|
|||||||
s.feature = feature
|
s.feature = feature
|
||||||
}
|
}
|
||||||
|
|
||||||
// Setting the sequence of the BioSequence.
|
// SetSequence sets the sequence of the BioSequence.
|
||||||
|
//
|
||||||
|
// Parameters:
|
||||||
|
// - sequence: a byte slice representing the sequence to be set.
|
||||||
func (s *BioSequence) SetSequence(sequence []byte) {
|
func (s *BioSequence) SetSequence(sequence []byte) {
|
||||||
if s.sequence != nil {
|
if s.sequence != nil {
|
||||||
RecycleSlice(&s.sequence)
|
RecycleSlice(&s.sequence)
|
||||||
}
|
}
|
||||||
s.sequence = CopySlice(obiutils.InPlaceToLower(sequence))
|
s.sequence = obiutils.InPlaceToLower(CopySlice(sequence))
|
||||||
}
|
}
|
||||||
|
|
||||||
// Setting the qualities of the BioSequence.
|
// Setting the qualities of the BioSequence.
|
||||||
@ -507,3 +511,15 @@ func (s *BioSequence) Composition() map[byte]int {
|
|||||||
|
|
||||||
return counts
|
return counts
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func (s *BioSequence) Grow(length int) {
|
||||||
|
if s.sequence == nil {
|
||||||
|
s.sequence = GetSlice(length)
|
||||||
|
} else {
|
||||||
|
s.sequence = slices.Grow(s.sequence, length)
|
||||||
|
}
|
||||||
|
|
||||||
|
if s.qualities != nil {
|
||||||
|
s.qualities = slices.Grow(s.qualities, length)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
@ -84,7 +84,7 @@ func CopySlice(src []byte) []byte {
|
|||||||
|
|
||||||
var BioSequenceAnnotationPool = sync.Pool{
|
var BioSequenceAnnotationPool = sync.Pool{
|
||||||
New: func() interface{} {
|
New: func() interface{} {
|
||||||
bs := make(Annotation, 5)
|
bs := make(Annotation, 1)
|
||||||
return &bs
|
return &bs
|
||||||
},
|
},
|
||||||
}
|
}
|
||||||
@ -105,15 +105,17 @@ func RecycleAnnotation(a *Annotation) {
|
|||||||
//
|
//
|
||||||
// It returns an Annotation.
|
// It returns an Annotation.
|
||||||
func GetAnnotation(values ...Annotation) Annotation {
|
func GetAnnotation(values ...Annotation) Annotation {
|
||||||
a := Annotation(nil)
|
a := (*Annotation)(nil)
|
||||||
|
|
||||||
for a == nil {
|
for a == nil || (*a == nil) {
|
||||||
a = *(BioSequenceAnnotationPool.Get().(*Annotation))
|
a = BioSequenceAnnotationPool.Get().(*Annotation)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
annot := *a
|
||||||
|
|
||||||
if len(values) > 0 {
|
if len(values) > 0 {
|
||||||
obiutils.MustFillMap(a, values[0])
|
obiutils.MustFillMap(annot, values[0])
|
||||||
}
|
}
|
||||||
|
|
||||||
return a
|
return annot
|
||||||
}
|
}
|
||||||
|
@ -53,10 +53,7 @@ func CLIWriteBioSequences(iterator obiiter.IBioSequence,
|
|||||||
opts = append(opts, obiformats.OptionsFastSeqHeaderFormat(obiformats.FormatFastSeqJsonHeader))
|
opts = append(opts, obiformats.OptionsFastSeqHeaderFormat(obiformats.FormatFastSeqJsonHeader))
|
||||||
}
|
}
|
||||||
|
|
||||||
nworkers := obioptions.CLIParallelWorkers() / 4
|
nworkers := obioptions.CLIWriteParallelWorkers()
|
||||||
if nworkers < 2 {
|
|
||||||
nworkers = 2
|
|
||||||
}
|
|
||||||
|
|
||||||
opts = append(opts, obiformats.OptionsParallelWorkers(nworkers))
|
opts = append(opts, obiformats.OptionsParallelWorkers(nworkers))
|
||||||
opts = append(opts, obiformats.OptionsBatchSize(obioptions.CLIBatchSize()))
|
opts = append(opts, obiformats.OptionsBatchSize(obioptions.CLIBatchSize()))
|
||||||
|
@ -55,6 +55,8 @@ func JoinPairedSequence(seqA, seqB *obiseq.BioSequence, inplace bool) *obiseq.Bi
|
|||||||
seqA = seqA.Copy()
|
seqA = seqA.Copy()
|
||||||
}
|
}
|
||||||
|
|
||||||
|
seqA.Grow(seqB.Len() + 10)
|
||||||
|
|
||||||
seqA.WriteString("..........")
|
seqA.WriteString("..........")
|
||||||
seqA.Write(seqB.Sequence())
|
seqA.Write(seqB.Sequence())
|
||||||
|
|
||||||
@ -108,13 +110,16 @@ func JoinPairedSequence(seqA, seqB *obiseq.BioSequence, inplace bool) *obiseq.Bi
|
|||||||
func AssemblePESequences(seqA, seqB *obiseq.BioSequence,
|
func AssemblePESequences(seqA, seqB *obiseq.BioSequence,
|
||||||
gap, scale float64, delta, minOverlap int, minIdentity float64, withStats bool,
|
gap, scale float64, delta, minOverlap int, minIdentity float64, withStats bool,
|
||||||
inplace bool, fastAlign, fastModeRel bool,
|
inplace bool, fastAlign, fastModeRel bool,
|
||||||
arenaAlign obialign.PEAlignArena) *obiseq.BioSequence {
|
arenaAlign obialign.PEAlignArena, shifh_buff *map[int]int) *obiseq.BioSequence {
|
||||||
|
|
||||||
score, path, fastcount, over, fastscore := obialign.PEAlign(seqA, seqB,
|
score, path, fastcount, over, fastscore := obialign.PEAlign(
|
||||||
|
seqA, seqB,
|
||||||
gap, scale,
|
gap, scale,
|
||||||
fastAlign, delta, fastModeRel,
|
fastAlign, delta, fastModeRel,
|
||||||
arenaAlign)
|
arenaAlign, shifh_buff,
|
||||||
cons, match := obialign.BuildQualityConsensus(seqA, seqB, path, true)
|
)
|
||||||
|
|
||||||
|
cons, match := obialign.BuildQualityConsensus(seqA, seqB, path, true, arenaAlign)
|
||||||
|
|
||||||
left := path[0]
|
left := path[0]
|
||||||
right := 0
|
right := 0
|
||||||
@ -238,6 +243,7 @@ func IAssemblePESequencesBatch(iterator obiiter.IBioSequence,
|
|||||||
|
|
||||||
f := func(iterator obiiter.IBioSequence, wid int) {
|
f := func(iterator obiiter.IBioSequence, wid int) {
|
||||||
arena := obialign.MakePEAlignArena(150, 150)
|
arena := obialign.MakePEAlignArena(150, 150)
|
||||||
|
shifts := make(map[int]int)
|
||||||
|
|
||||||
for iterator.Next() {
|
for iterator.Next() {
|
||||||
batch := iterator.Get()
|
batch := iterator.Get()
|
||||||
@ -246,7 +252,7 @@ func IAssemblePESequencesBatch(iterator obiiter.IBioSequence,
|
|||||||
B := A.PairedWith()
|
B := A.PairedWith()
|
||||||
cons[i] = AssemblePESequences(A, B.ReverseComplement(true),
|
cons[i] = AssemblePESequences(A, B.ReverseComplement(true),
|
||||||
gap, scale,
|
gap, scale,
|
||||||
delta, minOverlap, minIdentity, withStats, true, fastAlign, fastModeRel, arena)
|
delta, minOverlap, minIdentity, withStats, true, fastAlign, fastModeRel, arena, &shifts)
|
||||||
}
|
}
|
||||||
newIter.Push(obiiter.MakeBioSequenceBatch(
|
newIter.Push(obiiter.MakeBioSequenceBatch(
|
||||||
batch.Order(),
|
batch.Order(),
|
||||||
|
@ -37,6 +37,7 @@ func IPCRTagPESequencesBatch(iterator obiiter.IBioSequence,
|
|||||||
f := func(iterator obiiter.IBioSequence, wid int) {
|
f := func(iterator obiiter.IBioSequence, wid int) {
|
||||||
arena := obialign.MakePEAlignArena(150, 150)
|
arena := obialign.MakePEAlignArena(150, 150)
|
||||||
var err error
|
var err error
|
||||||
|
shifts := make(map[int]int)
|
||||||
|
|
||||||
for iterator.Next() {
|
for iterator.Next() {
|
||||||
batch := iterator.Get()
|
batch := iterator.Get()
|
||||||
@ -46,7 +47,7 @@ func IPCRTagPESequencesBatch(iterator obiiter.IBioSequence,
|
|||||||
A.Copy(), B.ReverseComplement(false),
|
A.Copy(), B.ReverseComplement(false),
|
||||||
gap, scale,
|
gap, scale,
|
||||||
delta, minOverlap, minIdentity, withStats, true,
|
delta, minOverlap, minIdentity, withStats, true,
|
||||||
fastAlign, fastScoreRel, arena,
|
fastAlign, fastScoreRel, arena, &shifts,
|
||||||
)
|
)
|
||||||
|
|
||||||
consensus, err = ngsfilter.ExtractBarcode(consensus, true)
|
consensus, err = ngsfilter.ExtractBarcode(consensus, true)
|
||||||
|
Reference in New Issue
Block a user