refactor code and change algorithm used to read from many files

This commit is contained in:
2022-08-23 15:07:06 +02:00
parent bdf317819b
commit 989e678f6b
6 changed files with 157 additions and 49 deletions

View File

@@ -73,7 +73,7 @@ func ReadBioSequencesBatch(filenames ...string) (obiiter.IBioSequenceBatch, erro
opts := make([]obiformats.WithOption, 0, 10)
switch InputFastHeaderFormat() {
switch CLIInputFastHeaderFormat() {
case "json":
opts = append(opts, obiformats.OptionsFastSeqHeaderParser(obiformats.ParseFastSeqJsonHeader))
case "obi":
@@ -91,15 +91,17 @@ func ReadBioSequencesBatch(filenames ...string) (obiiter.IBioSequenceBatch, erro
opts = append(opts, obiformats.OptionsBufferSize(obioptions.CLIBufferSize()))
opts = append(opts, obiformats.OptionsBatchSize(obioptions.CLIBatchSize()))
opts = append(opts, obiformats.OptionsQualityShift(InputQualityShift()))
opts = append(opts, obiformats.OptionsQualityShift(CLIInputQualityShift()))
if len(filenames) == 0 {
switch InputFormat() {
switch CLIInputFormat() {
case "ecopcr":
iterator = obiformats.ReadEcoPCRBatch(os.Stdin, opts...)
case "embl":
iterator = obiformats.ReadEMBLBatch(os.Stdin, opts...)
case "genbank":
iterator = obiformats.ReadGenbankBatch(os.Stdin, opts...)
default:
iterator = obiformats.ReadFastSeqBatchFromStdin(opts...)
}
@@ -110,39 +112,56 @@ func ReadBioSequencesBatch(filenames ...string) (obiiter.IBioSequenceBatch, erro
return obiiter.NilIBioSequenceBatch, err
}
switch InputFormat() {
switch CLIInputFormat() {
case "ecopcr":
reader = obiformats.ReadEcoPCRBatchFromFile
case "embl":
reader = obiformats.ReadEMBLBatchFromFile
case "genbank":
reader = obiformats.ReadGenbankBatchFromFile
default:
reader = obiformats.ReadSequencesBatchFromFile
}
iterator, err = reader(list_of_files[0], opts...)
if len(list_of_files) > 1 {
nreader := 1
if err != nil {
return obiiter.NilIBioSequenceBatch, err
}
if CLINoInputOrder() {
nreader = obioptions.CLIParallelWorkers()
}
iterator = obiformats.ReadSequencesBatchFromFiles(
filenames,
reader,
nreader,
opts...,
)
} else {
iterator, err = reader(list_of_files[0], opts...)
list_of_files = list_of_files[1:]
others := make([]obiiter.IBioSequenceBatch, 0, len(list_of_files))
for _, fn := range list_of_files {
r, err := reader(fn, opts...)
if err != nil {
return obiiter.NilIBioSequenceBatch, err
}
others = append(others, r)
}
if len(others) > 0 {
if NoInputOrder() {
iterator = iterator.Pool(others...)
} else {
iterator = iterator.Concat(others...)
}
}
// list_of_files = list_of_files[1:]
// others := make([]obiiter.IBioSequenceBatch, 0, len(list_of_files))
// for _, fn := range list_of_files {
// r, err := reader(fn, opts...)
// if err != nil {
// return obiiter.NilIBioSequenceBatch, err
// }
// others = append(others, r)
// }
// if len(others) > 0 {
// if CLINoInputOrder() {
// iterator = iterator.Pool(others...)
// } else {
// iterator = iterator.Concat(others...)
// }
// }
}

View File

@@ -12,7 +12,7 @@ func WriteBioSequences(iterator obiiter.IBioSequence, filenames ...string) error
opts := make([]obiformats.WithOption, 0, 10)
switch OutputFastHeaderFormat() {
switch CLIOutputFastHeaderFormat() {
case "json":
log.Println("On output use JSON headers")
opts = append(opts, obiformats.OptionsFastSeqHeaderFormat(obiformats.FormatFastSeqJsonHeader))
@@ -33,12 +33,12 @@ func WriteBioSequences(iterator obiiter.IBioSequence, filenames ...string) error
opts = append(opts, obiformats.OptionsBufferSize(obioptions.CLIBufferSize()))
opts = append(opts, obiformats.OptionsBatchSize(obioptions.CLIBatchSize()))
opts = append(opts, obiformats.OptionsQualityShift(OutputQualityShift()))
opts = append(opts, obiformats.OptionsQualityShift(CLIOutputQualityShift()))
var err error
if len(filenames) == 0 {
switch OutputFormat() {
switch CLIOutputFormat() {
case "fastq":
err = obiformats.WriteFastqToStdout(iterator, opts...)
case "fasta":
@@ -47,7 +47,7 @@ func WriteBioSequences(iterator obiiter.IBioSequence, filenames ...string) error
err = obiformats.WriteSequencesToStdout(iterator, opts...)
}
} else {
switch OutputFormat() {
switch CLIOutputFormat() {
case "fastq":
err = obiformats.WriteFastqToFile(iterator, filenames[0], opts...)
case "fasta":
@@ -72,7 +72,7 @@ func WriteBioSequencesBatch(iterator obiiter.IBioSequenceBatch,
opts := make([]obiformats.WithOption, 0, 10)
switch OutputFastHeaderFormat() {
switch CLIOutputFastHeaderFormat() {
case "json":
log.Println("On output use JSON headers")
opts = append(opts, obiformats.OptionsFastSeqHeaderFormat(obiformats.FormatFastSeqJsonHeader))
@@ -93,12 +93,12 @@ func WriteBioSequencesBatch(iterator obiiter.IBioSequenceBatch,
opts = append(opts, obiformats.OptionsBufferSize(obioptions.CLIBufferSize()))
opts = append(opts, obiformats.OptionsBatchSize(obioptions.CLIBatchSize()))
opts = append(opts, obiformats.OptionsQualityShift(OutputQualityShift()))
opts = append(opts, obiformats.OptionsQualityShift(CLIOutputQualityShift()))
var err error
if len(filenames) == 0 {
switch OutputFormat() {
switch CLIOutputFormat() {
case "fastq":
newIter, err = obiformats.WriteFastqBatchToStdout(iterator, opts...)
case "fasta":
@@ -107,7 +107,7 @@ func WriteBioSequencesBatch(iterator obiiter.IBioSequenceBatch,
newIter, err = obiformats.WriteSequencesBatchToStdout(iterator, opts...)
}
} else {
switch OutputFormat() {
switch CLIOutputFormat() {
case "fastq":
newIter, err = obiformats.WriteFastqBatchToFile(iterator, filenames[0], opts...)
case "fasta":