diff --git a/pkg/obiformats/fastseq_write_fastq.go b/pkg/obiformats/fastseq_write_fastq.go index b99474d..0dd570e 100644 --- a/pkg/obiformats/fastseq_write_fastq.go +++ b/pkg/obiformats/fastseq_write_fastq.go @@ -11,7 +11,6 @@ import ( log "github.com/sirupsen/logrus" "git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obiiter" - "git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obioptions" "git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obiseq" "git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obiutils" ) @@ -34,7 +33,7 @@ func FormatFastq(seq *obiseq.BioSequence, formater FormatHeader) string { ) } -func FormatFastqBatch(batch obiiter.BioSequenceBatch, quality_shift int, +func FormatFastqBatch(batch obiiter.BioSequenceBatch, formater FormatHeader, skipEmpty bool) []byte { var bs bytes.Buffer for _, seq := range batch.Slice() { @@ -75,7 +74,6 @@ func WriteFastq(iterator obiiter.IBioSequence, chunkchan := make(chan FileChunck) header_format := opt.FormatFastSeqHeader() - quality := obioptions.OutputQualityShift() newIter.Add(nwriters) @@ -94,7 +92,7 @@ func WriteFastq(iterator obiiter.IBioSequence, for iterator.Next() { batch := iterator.Get() chunk := FileChunck{ - FormatFastqBatch(batch, quality, header_format, opt.SkipEmptySequence()), + FormatFastqBatch(batch, header_format, opt.SkipEmptySequence()), batch.Order(), } chunkchan <- chunk diff --git a/pkg/obiformats/ngsfilter_read.go b/pkg/obiformats/ngsfilter_read.go index e0f224f..dd1627b 100644 --- a/pkg/obiformats/ngsfilter_read.go +++ b/pkg/obiformats/ngsfilter_read.go @@ -2,12 +2,17 @@ package obiformats import ( "bufio" + "bytes" + "encoding/csv" "fmt" "io" "strings" + log "github.com/sirupsen/logrus" + "git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obingslibrary" "git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obiseq" + "github.com/gabriel-vasile/mimetype" ) func _readLines(reader io.Reader) []string { @@ -80,7 +85,48 @@ func _parseMainNGSFilter(text string) (obingslibrary.PrimerPair, obingslibrary.T true } +func OBIMimeNGSFilterTypeGuesser(stream io.Reader) (*mimetype.MIME, io.Reader, error) { + + // Create a buffer to store the read data + buf := make([]byte, 1024*128) + n, err := io.ReadFull(stream, buf) + + if err != nil && err != io.ErrUnexpectedEOF { + return nil, nil, err + } + + // Detect the MIME type using the mimetype library + mimeType := mimetype.Detect(buf[:n]) + if mimeType == nil { + return nil, nil, err + } + + // Create a new reader based on the read data + newReader := io.Reader(bytes.NewReader(buf[:n])) + + if err == nil { + newReader = io.MultiReader(newReader, stream) + } + + return mimeType, newReader, nil +} + func ReadNGSFilter(reader io.Reader) (obingslibrary.NGSLibrary, error) { + mimetype, newReader, err := OBIMimeNGSFilterTypeGuesser(reader) + + if err != nil { + return nil, err + } + + log.Infof("NGSFilter configuration mimetype: %s", mimetype.String()) + + if mimetype.String() == "text/csv" { + return ReadCSVNGSFilter(newReader) + } + + return ReadOldNGSFilter(newReader) +} +func ReadOldNGSFilter(reader io.Reader) (obingslibrary.NGSLibrary, error) { ngsfilter := obingslibrary.MakeNGSLibrary() lines := _readLines(reader) @@ -126,3 +172,104 @@ func ReadNGSFilter(reader io.Reader) (obingslibrary.NGSLibrary, error) { return ngsfilter, nil } + +func ReadCSVNGSFilter(reader io.Reader) (obingslibrary.NGSLibrary, error) { + ngsfilter := obingslibrary.MakeNGSLibrary() + file := csv.NewReader(reader) + + file.Comma = ',' + file.Comment = '#' + file.TrimLeadingSpace = true + file.ReuseRecord = true + + records, err := file.ReadAll() + + if err != nil { + return nil, err + } + + log.Info("Read ", len(records), " records") + log.Infof("First record: %s", records[0]) + + header := records[0] + data := records[1:] + + // Find the index of the column named "sample" + experimentColIndex := -1 + sampleColIndex := -1 + sample_tagColIndex := -1 + forward_primerColIndex := -1 + reverse_primerColIndex := -1 + + extraColumns := make([]int, 0) + + for i, colName := range header { + switch colName { + case "experiment": + experimentColIndex = i + case "sample": + sampleColIndex = i + case "sample_tag": + sample_tagColIndex = i + case "forward_primer": + forward_primerColIndex = i + case "reverse_primer": + reverse_primerColIndex = i + default: + extraColumns = append(extraColumns, i) + } + } + + if experimentColIndex == -1 { + return nil, fmt.Errorf("column 'experiment' not found in the CSV file") + } + + if sampleColIndex == -1 { + return nil, fmt.Errorf("column 'sample' not found in the CSV file") + } + + if sample_tagColIndex == -1 { + return nil, fmt.Errorf("column 'sample_tag' not found in the CSV file") + } + + if forward_primerColIndex == -1 { + return nil, fmt.Errorf("column 'forward_primer' not found in the CSV file") + } + + if reverse_primerColIndex == -1 { + return nil, fmt.Errorf("column 'reverse_primer' not found in the CSV file") + } + + for i, fields := range data { + if len(fields) != len(header) { + return nil, fmt.Errorf("row %d has %d columns, expected %d", len(data), len(fields), len(header)) + } + + forward_primer := fields[forward_primerColIndex] + reverse_primer := fields[reverse_primerColIndex] + tags := _parseMainNGSFilterTags(fields[sample_tagColIndex]) + + marker, _ := ngsfilter.GetMarker(forward_primer, reverse_primer) + pcr, ok := marker.GetPCR(tags.Forward, tags.Reverse) + + if ok { + return ngsfilter, + fmt.Errorf("line %d : tag pair (%s,%s) used more than once with marker (%s,%s)", + i, tags.Forward, tags.Reverse, forward_primer, reverse_primer) + } + + pcr.Experiment = fields[experimentColIndex] + pcr.Sample = fields[sampleColIndex] + pcr.Partial = false + + if extraColumns != nil { + pcr.Annotations = make(obiseq.Annotation) + for _, colIndex := range extraColumns { + pcr.Annotations[header[colIndex]] = fields[colIndex] + } + } + + } + + return ngsfilter, nil +} diff --git a/pkg/obioptions/options.go b/pkg/obioptions/options.go index 9d7e3c2..013ecd1 100644 --- a/pkg/obioptions/options.go +++ b/pkg/obioptions/options.go @@ -21,8 +21,8 @@ var _ParallelFilesRead = 0 var _MaxAllowedCPU = runtime.NumCPU() var _BatchSize = 5000 var _Pprof = false -var _Quality_Shift_Input = 33 -var _Quality_Shift_Output = 33 +var _Quality_Shift_Input = byte(33) +var _Quality_Shift_Output = byte(33) var _Version = "4.2.1" type ArgumentParser func([]string) (*getoptions.GetOpt, []string) @@ -266,7 +266,7 @@ func SetBatchSize(n int) { // // No parameters. // Returns an integer representing the quality shift value for input. -func InputQualityShift() int { +func InputQualityShift() byte { return _Quality_Shift_Input } @@ -274,22 +274,22 @@ func InputQualityShift() int { // // No parameters. // Returns an integer representing the quality shift value for output. -func OutputQualityShift() int { +func OutputQualityShift() byte { return _Quality_Shift_Output } // SetInputQualityShift sets the quality shift value for decoding FASTQ. // // n - an integer representing the quality shift value to be set. -func SetInputQualityShift(n int) { - _Quality_Shift_Input = n +func SetInputQualityShift[T int | byte](n T) { + _Quality_Shift_Input = byte(n) } // SetOutputQualityShift sets the quality shift value used for FASTQ output. // // n - an integer representing the quality shift value to be set. -func SetOutputQualityShift(n int) { - _Quality_Shift_Output = n +func SetOutputQualityShift[T int | byte](n T) { + _Quality_Shift_Output = byte(n) } // SetMaxCPU sets the maximum number of CPU cores allowed. diff --git a/pkg/obitools/obimultiplex/options.go b/pkg/obitools/obimultiplex/options.go index a054764..28723f1 100644 --- a/pkg/obitools/obimultiplex/options.go +++ b/pkg/obitools/obimultiplex/options.go @@ -8,6 +8,8 @@ import ( "git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obingslibrary" "git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obitools/obiconvert" "github.com/DavidGamba/go-getoptions" + + log "github.com/sirupsen/logrus" ) var _NGSFilterFile = "" @@ -74,6 +76,7 @@ func CLINGSFIlter() (obingslibrary.NGSLibrary, error) { return nil, fmt.Errorf("open file error: %v", err) } + log.Infof("Reading NGSFilter file: %s", _NGSFilterFile) ngsfiler, err := obiformats.ReadNGSFilter(file) if err != nil {