diff --git a/pkg/obiformats/embl_read.go b/pkg/obiformats/embl_read.go index 4150f57..fb22ccd 100644 --- a/pkg/obiformats/embl_read.go +++ b/pkg/obiformats/embl_read.go @@ -87,7 +87,7 @@ func EndOfLastFlatFileEntry(buff []byte) int { return -1 } -func EmblChunkParser(withFeatureTable bool) func(string, io.Reader) (obiseq.BioSequenceSlice, error) { +func EmblChunkParser(withFeatureTable, UtoT bool) func(string, io.Reader) (obiseq.BioSequenceSlice, error) { parser := func(source string, input io.Reader) (obiseq.BioSequenceSlice, error) { scanner := bufio.NewScanner(input) sequences := make(obiseq.BioSequenceSlice, 0, 100) @@ -128,6 +128,9 @@ func EmblChunkParser(withFeatureTable bool) func(string, io.Reader) (obiseq.BioS parts := strings.SplitN(line[5:], " ", 7) np := len(parts) - 1 for i := 0; i < np; i++ { + if UtoT { + parts[i] = strings.ReplaceAll(parts[i], "u", "t") + } seqBytes.WriteString(parts[i]) } case line == "//": @@ -161,10 +164,10 @@ func EmblChunkParser(withFeatureTable bool) func(string, io.Reader) (obiseq.BioS func _ParseEmblFile( input ChannelFileChunk, out obiiter.IBioSequence, - withFeatureTable bool, + withFeatureTable, UtoT bool, ) { - parser := EmblChunkParser(withFeatureTable) + parser := EmblChunkParser(withFeatureTable, UtoT) for chunks := range input { order := chunks.Order @@ -206,6 +209,7 @@ func ReadEMBL(reader io.Reader, options ...WithOption) (obiiter.IBioSequence, er entry_channel, newIter, opt.WithFeatureTable(), + opt.UtoT(), ) } diff --git a/pkg/obiformats/fastaseq_read.go b/pkg/obiformats/fastaseq_read.go index a2fc3be..8c8441c 100644 --- a/pkg/obiformats/fastaseq_read.go +++ b/pkg/obiformats/fastaseq_read.go @@ -39,7 +39,7 @@ func EndOfLastFastaEntry(buffer []byte) int { return last } -func FastaChunkParser() func(string, io.Reader) (obiseq.BioSequenceSlice, error) { +func FastaChunkParser(UtoT bool) func(string, io.Reader) (obiseq.BioSequenceSlice, error) { parser := func(source string, input io.Reader) (obiseq.BioSequenceSlice, error) { var identifier string @@ -131,7 +131,9 @@ func FastaChunkParser() func(string, io.Reader) (obiseq.BioSequenceSlice, error) if C >= 'A' && C <= 'Z' { C = C + 'a' - 'A' } - + if UtoT && C == 'u' { + C = 't' + } if (C >= 'a' && C <= 'z') || C == '-' || C == '.' || C == '[' || C == ']' { seqBytes.WriteByte(C) } else { @@ -170,6 +172,9 @@ func FastaChunkParser() func(string, io.Reader) (obiseq.BioSequenceSlice, error) if C >= 'A' && C <= 'Z' { C = C + 'a' - 'A' } + if UtoT && C == 'u' { + C = 't' + } // Removing white space from the sequence if (C >= 'a' && C <= 'z') || C == '-' || C == '.' || C == '[' || C == ']' { seqBytes.WriteByte(C) @@ -207,9 +212,10 @@ func FastaChunkParser() func(string, io.Reader) (obiseq.BioSequenceSlice, error) func _ParseFastaFile( input ChannelFileChunk, out obiiter.IBioSequence, + UtoT bool, ) { - parser := FastaChunkParser() + parser := FastaChunkParser(UtoT) for chunks := range input { sequences, err := parser(chunks.Source, chunks.Raw) @@ -243,7 +249,7 @@ func ReadFasta(reader io.Reader, options ...WithOption) (obiiter.IBioSequence, e for i := 0; i < nworker; i++ { out.Add(1) - go _ParseFastaFile(chkchan, out) + go _ParseFastaFile(chkchan, out, opt.UtoT()) } go func() { diff --git a/pkg/obiformats/fastqseq_read.go b/pkg/obiformats/fastqseq_read.go index f7f79fb..c092e32 100644 --- a/pkg/obiformats/fastqseq_read.go +++ b/pkg/obiformats/fastqseq_read.go @@ -131,7 +131,7 @@ func _storeSequenceQuality(bytes *bytes.Buffer, out *obiseq.BioSequence, quality out.SetQualities(q) } -func FastqChunkParser(quality_shift byte, with_quality bool) func(string, io.Reader) (obiseq.BioSequenceSlice, error) { +func FastqChunkParser(quality_shift byte, with_quality bool, UtoT bool) func(string, io.Reader) (obiseq.BioSequenceSlice, error) { parser := func(source string, input io.Reader) (obiseq.BioSequenceSlice, error) { var identifier string @@ -209,6 +209,9 @@ func FastqChunkParser(quality_shift byte, with_quality bool) func(string, io.Rea if C >= 'A' && C <= 'Z' { C = C + 'a' - 'A' } + if C == 'u' && UtoT { + C = 't' + } seqBytes.Reset() seqBytes.WriteByte(C) state = 6 @@ -228,6 +231,9 @@ func FastqChunkParser(quality_shift byte, with_quality bool) func(string, io.Rea if C >= 'A' && C <= 'Z' { C = C + 'a' - 'A' } + if C == 'u' && UtoT { + C = 't' + } if (C >= 'a' && C <= 'z') || C == '-' || C == '.' || C == '[' || C == ']' { seqBytes.WriteByte(C) } else { @@ -301,10 +307,10 @@ func _ParseFastqFile( input ChannelFileChunk, out obiiter.IBioSequence, quality_shift byte, - with_quality bool, + with_quality, UtoT bool, ) { - parser := FastqChunkParser(quality_shift, with_quality) + parser := FastqChunkParser(quality_shift, with_quality, UtoT) for chunks := range input { sequences, err := parser(chunks.Source, chunks.Raw) @@ -342,6 +348,7 @@ func ReadFastq(reader io.Reader, options ...WithOption) (obiiter.IBioSequence, e out, obidefault.ReadQualitiesShift(), opt.ReadQualities(), + opt.UtoT(), ) } diff --git a/pkg/obiformats/genbank_read.go b/pkg/obiformats/genbank_read.go index 6c23ef6..6c308dd 100644 --- a/pkg/obiformats/genbank_read.go +++ b/pkg/obiformats/genbank_read.go @@ -29,7 +29,7 @@ const ( var _seqlenght_rx = regexp.MustCompile(" +([0-9]+) bp") -func GenbankChunkParser(withFeatureTable bool) func(string, io.Reader) (obiseq.BioSequenceSlice, error) { +func GenbankChunkParser(withFeatureTable, UtoT bool) func(string, io.Reader) (obiseq.BioSequenceSlice, error) { return func(source string, input io.Reader) (obiseq.BioSequenceSlice, error) { state := inHeader scanner := bufio.NewReader(input) @@ -165,6 +165,9 @@ func GenbankChunkParser(withFeatureTable bool) func(string, io.Reader) (obiseq.B parts := strings.SplitN(line[10:], " ", 6) lparts := len(parts) for i := 0; i < lparts; i++ { + if UtoT { + parts[i] = strings.ReplaceAll(parts[i], "u", "t") + } seqBytes.WriteString(parts[i]) } processed = true @@ -200,9 +203,9 @@ func GenbankChunkParser(withFeatureTable bool) func(string, io.Reader) (obiseq.B func _ParseGenbankFile(input ChannelFileChunk, out obiiter.IBioSequence, - withFeatureTable bool) { + withFeatureTable, UtoT bool) { - parser := GenbankChunkParser(withFeatureTable) + parser := GenbankChunkParser(withFeatureTable, UtoT) for chunks := range input { sequences, err := parser(chunks.Source, chunks.Raw) @@ -242,6 +245,7 @@ func ReadGenbank(reader io.Reader, options ...WithOption) (obiiter.IBioSequence, entry_channel, newIter, opt.WithFeatureTable(), + opt.UtoT(), ) }