Refactor sequence file reading

Former-commit-id: 3dcb96e68da648d72bb585da047e3496427d7851
This commit is contained in:
Eric Coissac
2024-05-01 00:50:23 +02:00
parent 33d4d63acd
commit 5b98393a68
7 changed files with 491 additions and 555 deletions

View File

@ -30,7 +30,8 @@ const (
var _seqlenght_rx = regexp.MustCompile(" +([0-9]+) bp")
func _ParseGenbankFile(source string,
input <-chan _FileChunk, out obiiter.IBioSequence,
input ChannelSeqFileChunk,
out obiiter.IBioSequence,
chunck_order func() int,
withFeatureTable bool,
batch_size int,
@ -230,27 +231,31 @@ func _ParseGenbankFile(source string,
func ReadGenbank(reader io.Reader, options ...WithOption) obiiter.IBioSequence {
opt := MakeOptions(options)
entry_channel := make(chan _FileChunk)
// entry_channel := make(chan _FileChunk)
entry_channel := ReadSeqFileChunk(reader, _EndOfLastEntry)
newIter := obiiter.MakeIBioSequence()
nworkers := opt.ParallelWorkers()
chunck_order := obiutils.AtomicCounter()
newIter.Add(nworkers)
// for j := 0; j < opt.ParallelWorkers(); j++ {
for j := 0; j < nworkers; j++ {
newIter.Add(1)
go _ParseGenbankFile(opt.Source(),
entry_channel, newIter, chunck_order,
opt.WithFeatureTable(),
opt.BatchSize(),
opt.TotalSeqSize())
}
// go _ReadFlatFileChunk(reader, entry_channel)
go func() {
newIter.WaitAndClose()
}()
// for j := 0; j < opt.ParallelWorkers(); j++ {
for j := 0; j < nworkers; j++ {
go _ParseGenbankFile(opt.Source(), entry_channel, newIter, chunck_order,
opt.WithFeatureTable(), opt.BatchSize(), opt.TotalSeqSize())
}
go _ReadFlatFileChunk(reader, entry_channel)
if opt.pointer.full_file_batch {
if opt.FullFileBatch() {
newIter = newIter.CompleteFileIterator()
}