mirror of
https://github.com/metabarcoding/obitools4.git
synced 2025-06-29 16:20:46 +00:00
Adds possibility to provide the ngsfilter configuration as a CSV file
Former-commit-id: f0fd2cb1a7b149ae2a330edc5087b21be2c4585b
This commit is contained in:
@ -11,7 +11,6 @@ import (
|
|||||||
log "github.com/sirupsen/logrus"
|
log "github.com/sirupsen/logrus"
|
||||||
|
|
||||||
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obiiter"
|
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obiiter"
|
||||||
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obioptions"
|
|
||||||
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obiseq"
|
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obiseq"
|
||||||
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obiutils"
|
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obiutils"
|
||||||
)
|
)
|
||||||
@ -34,7 +33,7 @@ func FormatFastq(seq *obiseq.BioSequence, formater FormatHeader) string {
|
|||||||
)
|
)
|
||||||
}
|
}
|
||||||
|
|
||||||
func FormatFastqBatch(batch obiiter.BioSequenceBatch, quality_shift int,
|
func FormatFastqBatch(batch obiiter.BioSequenceBatch,
|
||||||
formater FormatHeader, skipEmpty bool) []byte {
|
formater FormatHeader, skipEmpty bool) []byte {
|
||||||
var bs bytes.Buffer
|
var bs bytes.Buffer
|
||||||
for _, seq := range batch.Slice() {
|
for _, seq := range batch.Slice() {
|
||||||
@ -75,7 +74,6 @@ func WriteFastq(iterator obiiter.IBioSequence,
|
|||||||
chunkchan := make(chan FileChunck)
|
chunkchan := make(chan FileChunck)
|
||||||
|
|
||||||
header_format := opt.FormatFastSeqHeader()
|
header_format := opt.FormatFastSeqHeader()
|
||||||
quality := obioptions.OutputQualityShift()
|
|
||||||
|
|
||||||
newIter.Add(nwriters)
|
newIter.Add(nwriters)
|
||||||
|
|
||||||
@ -94,7 +92,7 @@ func WriteFastq(iterator obiiter.IBioSequence,
|
|||||||
for iterator.Next() {
|
for iterator.Next() {
|
||||||
batch := iterator.Get()
|
batch := iterator.Get()
|
||||||
chunk := FileChunck{
|
chunk := FileChunck{
|
||||||
FormatFastqBatch(batch, quality, header_format, opt.SkipEmptySequence()),
|
FormatFastqBatch(batch, header_format, opt.SkipEmptySequence()),
|
||||||
batch.Order(),
|
batch.Order(),
|
||||||
}
|
}
|
||||||
chunkchan <- chunk
|
chunkchan <- chunk
|
||||||
|
@ -2,12 +2,17 @@ package obiformats
|
|||||||
|
|
||||||
import (
|
import (
|
||||||
"bufio"
|
"bufio"
|
||||||
|
"bytes"
|
||||||
|
"encoding/csv"
|
||||||
"fmt"
|
"fmt"
|
||||||
"io"
|
"io"
|
||||||
"strings"
|
"strings"
|
||||||
|
|
||||||
|
log "github.com/sirupsen/logrus"
|
||||||
|
|
||||||
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obingslibrary"
|
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obingslibrary"
|
||||||
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obiseq"
|
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obiseq"
|
||||||
|
"github.com/gabriel-vasile/mimetype"
|
||||||
)
|
)
|
||||||
|
|
||||||
func _readLines(reader io.Reader) []string {
|
func _readLines(reader io.Reader) []string {
|
||||||
@ -80,7 +85,48 @@ func _parseMainNGSFilter(text string) (obingslibrary.PrimerPair, obingslibrary.T
|
|||||||
true
|
true
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func OBIMimeNGSFilterTypeGuesser(stream io.Reader) (*mimetype.MIME, io.Reader, error) {
|
||||||
|
|
||||||
|
// Create a buffer to store the read data
|
||||||
|
buf := make([]byte, 1024*128)
|
||||||
|
n, err := io.ReadFull(stream, buf)
|
||||||
|
|
||||||
|
if err != nil && err != io.ErrUnexpectedEOF {
|
||||||
|
return nil, nil, err
|
||||||
|
}
|
||||||
|
|
||||||
|
// Detect the MIME type using the mimetype library
|
||||||
|
mimeType := mimetype.Detect(buf[:n])
|
||||||
|
if mimeType == nil {
|
||||||
|
return nil, nil, err
|
||||||
|
}
|
||||||
|
|
||||||
|
// Create a new reader based on the read data
|
||||||
|
newReader := io.Reader(bytes.NewReader(buf[:n]))
|
||||||
|
|
||||||
|
if err == nil {
|
||||||
|
newReader = io.MultiReader(newReader, stream)
|
||||||
|
}
|
||||||
|
|
||||||
|
return mimeType, newReader, nil
|
||||||
|
}
|
||||||
|
|
||||||
func ReadNGSFilter(reader io.Reader) (obingslibrary.NGSLibrary, error) {
|
func ReadNGSFilter(reader io.Reader) (obingslibrary.NGSLibrary, error) {
|
||||||
|
mimetype, newReader, err := OBIMimeNGSFilterTypeGuesser(reader)
|
||||||
|
|
||||||
|
if err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
|
||||||
|
log.Infof("NGSFilter configuration mimetype: %s", mimetype.String())
|
||||||
|
|
||||||
|
if mimetype.String() == "text/csv" {
|
||||||
|
return ReadCSVNGSFilter(newReader)
|
||||||
|
}
|
||||||
|
|
||||||
|
return ReadOldNGSFilter(newReader)
|
||||||
|
}
|
||||||
|
func ReadOldNGSFilter(reader io.Reader) (obingslibrary.NGSLibrary, error) {
|
||||||
ngsfilter := obingslibrary.MakeNGSLibrary()
|
ngsfilter := obingslibrary.MakeNGSLibrary()
|
||||||
|
|
||||||
lines := _readLines(reader)
|
lines := _readLines(reader)
|
||||||
@ -126,3 +172,104 @@ func ReadNGSFilter(reader io.Reader) (obingslibrary.NGSLibrary, error) {
|
|||||||
|
|
||||||
return ngsfilter, nil
|
return ngsfilter, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func ReadCSVNGSFilter(reader io.Reader) (obingslibrary.NGSLibrary, error) {
|
||||||
|
ngsfilter := obingslibrary.MakeNGSLibrary()
|
||||||
|
file := csv.NewReader(reader)
|
||||||
|
|
||||||
|
file.Comma = ','
|
||||||
|
file.Comment = '#'
|
||||||
|
file.TrimLeadingSpace = true
|
||||||
|
file.ReuseRecord = true
|
||||||
|
|
||||||
|
records, err := file.ReadAll()
|
||||||
|
|
||||||
|
if err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
|
||||||
|
log.Info("Read ", len(records), " records")
|
||||||
|
log.Infof("First record: %s", records[0])
|
||||||
|
|
||||||
|
header := records[0]
|
||||||
|
data := records[1:]
|
||||||
|
|
||||||
|
// Find the index of the column named "sample"
|
||||||
|
experimentColIndex := -1
|
||||||
|
sampleColIndex := -1
|
||||||
|
sample_tagColIndex := -1
|
||||||
|
forward_primerColIndex := -1
|
||||||
|
reverse_primerColIndex := -1
|
||||||
|
|
||||||
|
extraColumns := make([]int, 0)
|
||||||
|
|
||||||
|
for i, colName := range header {
|
||||||
|
switch colName {
|
||||||
|
case "experiment":
|
||||||
|
experimentColIndex = i
|
||||||
|
case "sample":
|
||||||
|
sampleColIndex = i
|
||||||
|
case "sample_tag":
|
||||||
|
sample_tagColIndex = i
|
||||||
|
case "forward_primer":
|
||||||
|
forward_primerColIndex = i
|
||||||
|
case "reverse_primer":
|
||||||
|
reverse_primerColIndex = i
|
||||||
|
default:
|
||||||
|
extraColumns = append(extraColumns, i)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if experimentColIndex == -1 {
|
||||||
|
return nil, fmt.Errorf("column 'experiment' not found in the CSV file")
|
||||||
|
}
|
||||||
|
|
||||||
|
if sampleColIndex == -1 {
|
||||||
|
return nil, fmt.Errorf("column 'sample' not found in the CSV file")
|
||||||
|
}
|
||||||
|
|
||||||
|
if sample_tagColIndex == -1 {
|
||||||
|
return nil, fmt.Errorf("column 'sample_tag' not found in the CSV file")
|
||||||
|
}
|
||||||
|
|
||||||
|
if forward_primerColIndex == -1 {
|
||||||
|
return nil, fmt.Errorf("column 'forward_primer' not found in the CSV file")
|
||||||
|
}
|
||||||
|
|
||||||
|
if reverse_primerColIndex == -1 {
|
||||||
|
return nil, fmt.Errorf("column 'reverse_primer' not found in the CSV file")
|
||||||
|
}
|
||||||
|
|
||||||
|
for i, fields := range data {
|
||||||
|
if len(fields) != len(header) {
|
||||||
|
return nil, fmt.Errorf("row %d has %d columns, expected %d", len(data), len(fields), len(header))
|
||||||
|
}
|
||||||
|
|
||||||
|
forward_primer := fields[forward_primerColIndex]
|
||||||
|
reverse_primer := fields[reverse_primerColIndex]
|
||||||
|
tags := _parseMainNGSFilterTags(fields[sample_tagColIndex])
|
||||||
|
|
||||||
|
marker, _ := ngsfilter.GetMarker(forward_primer, reverse_primer)
|
||||||
|
pcr, ok := marker.GetPCR(tags.Forward, tags.Reverse)
|
||||||
|
|
||||||
|
if ok {
|
||||||
|
return ngsfilter,
|
||||||
|
fmt.Errorf("line %d : tag pair (%s,%s) used more than once with marker (%s,%s)",
|
||||||
|
i, tags.Forward, tags.Reverse, forward_primer, reverse_primer)
|
||||||
|
}
|
||||||
|
|
||||||
|
pcr.Experiment = fields[experimentColIndex]
|
||||||
|
pcr.Sample = fields[sampleColIndex]
|
||||||
|
pcr.Partial = false
|
||||||
|
|
||||||
|
if extraColumns != nil {
|
||||||
|
pcr.Annotations = make(obiseq.Annotation)
|
||||||
|
for _, colIndex := range extraColumns {
|
||||||
|
pcr.Annotations[header[colIndex]] = fields[colIndex]
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
return ngsfilter, nil
|
||||||
|
}
|
||||||
|
@ -21,8 +21,8 @@ var _ParallelFilesRead = 0
|
|||||||
var _MaxAllowedCPU = runtime.NumCPU()
|
var _MaxAllowedCPU = runtime.NumCPU()
|
||||||
var _BatchSize = 5000
|
var _BatchSize = 5000
|
||||||
var _Pprof = false
|
var _Pprof = false
|
||||||
var _Quality_Shift_Input = 33
|
var _Quality_Shift_Input = byte(33)
|
||||||
var _Quality_Shift_Output = 33
|
var _Quality_Shift_Output = byte(33)
|
||||||
var _Version = "4.2.1"
|
var _Version = "4.2.1"
|
||||||
|
|
||||||
type ArgumentParser func([]string) (*getoptions.GetOpt, []string)
|
type ArgumentParser func([]string) (*getoptions.GetOpt, []string)
|
||||||
@ -266,7 +266,7 @@ func SetBatchSize(n int) {
|
|||||||
//
|
//
|
||||||
// No parameters.
|
// No parameters.
|
||||||
// Returns an integer representing the quality shift value for input.
|
// Returns an integer representing the quality shift value for input.
|
||||||
func InputQualityShift() int {
|
func InputQualityShift() byte {
|
||||||
return _Quality_Shift_Input
|
return _Quality_Shift_Input
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -274,22 +274,22 @@ func InputQualityShift() int {
|
|||||||
//
|
//
|
||||||
// No parameters.
|
// No parameters.
|
||||||
// Returns an integer representing the quality shift value for output.
|
// Returns an integer representing the quality shift value for output.
|
||||||
func OutputQualityShift() int {
|
func OutputQualityShift() byte {
|
||||||
return _Quality_Shift_Output
|
return _Quality_Shift_Output
|
||||||
}
|
}
|
||||||
|
|
||||||
// SetInputQualityShift sets the quality shift value for decoding FASTQ.
|
// SetInputQualityShift sets the quality shift value for decoding FASTQ.
|
||||||
//
|
//
|
||||||
// n - an integer representing the quality shift value to be set.
|
// n - an integer representing the quality shift value to be set.
|
||||||
func SetInputQualityShift(n int) {
|
func SetInputQualityShift[T int | byte](n T) {
|
||||||
_Quality_Shift_Input = n
|
_Quality_Shift_Input = byte(n)
|
||||||
}
|
}
|
||||||
|
|
||||||
// SetOutputQualityShift sets the quality shift value used for FASTQ output.
|
// SetOutputQualityShift sets the quality shift value used for FASTQ output.
|
||||||
//
|
//
|
||||||
// n - an integer representing the quality shift value to be set.
|
// n - an integer representing the quality shift value to be set.
|
||||||
func SetOutputQualityShift(n int) {
|
func SetOutputQualityShift[T int | byte](n T) {
|
||||||
_Quality_Shift_Output = n
|
_Quality_Shift_Output = byte(n)
|
||||||
}
|
}
|
||||||
|
|
||||||
// SetMaxCPU sets the maximum number of CPU cores allowed.
|
// SetMaxCPU sets the maximum number of CPU cores allowed.
|
||||||
|
@ -8,6 +8,8 @@ import (
|
|||||||
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obingslibrary"
|
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obingslibrary"
|
||||||
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obitools/obiconvert"
|
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obitools/obiconvert"
|
||||||
"github.com/DavidGamba/go-getoptions"
|
"github.com/DavidGamba/go-getoptions"
|
||||||
|
|
||||||
|
log "github.com/sirupsen/logrus"
|
||||||
)
|
)
|
||||||
|
|
||||||
var _NGSFilterFile = ""
|
var _NGSFilterFile = ""
|
||||||
@ -74,6 +76,7 @@ func CLINGSFIlter() (obingslibrary.NGSLibrary, error) {
|
|||||||
return nil, fmt.Errorf("open file error: %v", err)
|
return nil, fmt.Errorf("open file error: %v", err)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
log.Infof("Reading NGSFilter file: %s", _NGSFilterFile)
|
||||||
ngsfiler, err := obiformats.ReadNGSFilter(file)
|
ngsfiler, err := obiformats.ReadNGSFilter(file)
|
||||||
|
|
||||||
if err != nil {
|
if err != nil {
|
||||||
|
Reference in New Issue
Block a user