From 3424d3057f7a63ad131f6bbddbb3c1d818dc3bb6 Mon Sep 17 00:00:00 2001 From: Eric Coissac Date: Wed, 14 May 2025 14:53:25 +0200 Subject: [PATCH] Changes to be committed: modified: pkg/obiformats/ngsfilter_read.go modified: pkg/obioptions/version.go modified: pkg/obiutils/mimetypes.go --- pkg/obiformats/ngsfilter_read.go | 11 +++++++---- pkg/obioptions/version.go | 2 +- pkg/obiutils/mimetypes.go | 29 +++++++++++++++++++++++++---- 3 files changed, 33 insertions(+), 9 deletions(-) diff --git a/pkg/obiformats/ngsfilter_read.go b/pkg/obiformats/ngsfilter_read.go index fb409ce..df73a71 100644 --- a/pkg/obiformats/ngsfilter_read.go +++ b/pkg/obiformats/ngsfilter_read.go @@ -89,7 +89,7 @@ func _parseMainNGSFilter(text string) (obingslibrary.PrimerPair, obingslibrary.T } func NGSFilterCsvDetector(raw []byte, limit uint32) bool { - r := csv.NewReader(bytes.NewReader(obiutils.DropLastLine(raw, limit))) + r := csv.NewReader(bytes.NewReader(obiutils.DropLastLine(raw))) r.Comma = ',' r.ReuseRecord = true r.LazyQuotes = true @@ -110,7 +110,6 @@ func NGSFilterCsvDetector(raw []byte, limit uint32) bool { if err != nil { return false } - if nfields == 0 { nfields = len(rec) } else if nfields != len(rec) { @@ -133,16 +132,20 @@ func OBIMimeNGSFilterTypeGuesser(stream io.Reader) (*mimetype.MIME, io.Reader, e return nil, nil, err } + buf = buf[:n] + + obiutils.HasBOM(buf) + mimetype.Lookup("text/plain").Extend(NGSFilterCsvDetector, "text/ngsfilter-csv", ".csv") // Detect the MIME type using the mimetype library - mimeType := mimetype.Detect(buf[:n]) + mimeType := mimetype.Detect(buf) if mimeType == nil { return nil, nil, err } // Create a new reader based on the read data - newReader := io.Reader(bytes.NewReader(buf[:n])) + newReader := io.Reader(bytes.NewReader(buf)) if err == nil { newReader = io.MultiReader(newReader, stream) diff --git a/pkg/obioptions/version.go b/pkg/obioptions/version.go index 33cb754..92b90d4 100644 --- a/pkg/obioptions/version.go +++ b/pkg/obioptions/version.go @@ -8,7 +8,7 @@ import ( // corresponds to the last commit, and not the one when the file will be // commited -var _Commit = "f1b9ac4" +var _Commit = "f9324dd" var _Version = "Release 4.4.0" // Version returns the version of the obitools package. diff --git a/pkg/obiutils/mimetypes.go b/pkg/obiutils/mimetypes.go index 3192764..4d53c49 100644 --- a/pkg/obiutils/mimetypes.go +++ b/pkg/obiutils/mimetypes.go @@ -8,12 +8,33 @@ import ( "regexp" "github.com/gabriel-vasile/mimetype" + log "github.com/sirupsen/logrus" ) -func DropLastLine(b []byte, readLimit uint32) []byte { - if readLimit == 0 || uint32(len(b)) < readLimit { - return b +func HasBOM(data []byte) bool { + switch { + case bytes.HasPrefix(data, []byte{0xEF, 0xBB, 0xBF}): + log.Infoln("BOM detected: UTF-8 (EF BB BF)") + return true + case bytes.HasPrefix(data, []byte{0xFE, 0xFF}): + log.Infoln("BOM detected: UTF-16 Big Endian (FE FF)") + return true + case bytes.HasPrefix(data, []byte{0xFF, 0xFE}): + log.Infoln("BOM detected: UTF-16 Little Endian (FF FE)") + return true + case bytes.HasPrefix(data, []byte{0x00, 0x00, 0xFE, 0xFF}): + log.Infoln("BOM detected: UTF-32 Big Endian (00 00 FE FF)") + return true + case bytes.HasPrefix(data, []byte{0xFF, 0xFE, 0x00, 0x00}): + log.Infoln("BOM detected: UTF-32 Little Endian (FF FE 00 00)") + return true + default: + log.Infoln("No BOM detected") + return false } +} + +func DropLastLine(b []byte) []byte { for i := len(b) - 1; i > 0; i-- { if b[i] == '\n' { return b[:i] @@ -27,7 +48,7 @@ var __obimimetype_registred__ = false func RegisterOBIMimeType() { if !__obimimetype_registred__ { csv := func(in []byte, limit uint32) bool { - in = DropLastLine(in, limit) + in = DropLastLine(in) br := bytes.NewReader(in) r := csv.NewReader(br)