mirror of
https://github.com/metabarcoding/obitools4.git
synced 2025-12-08 16:50:27 +00:00
change the model for representing paired reads and extend its usage to other commands
This commit is contained in:
@@ -27,6 +27,9 @@ var __output_solexa_quality__ = false
|
||||
var __no_progress_bar__ = false
|
||||
var __compressed__ = false
|
||||
|
||||
var __output_file_name__ = "-"
|
||||
var __paired_file_name__ = ""
|
||||
|
||||
func InputOptionSet(options *getoptions.GetOpt) {
|
||||
// options.IntVar(&__skipped_entries__, "skip", __skipped_entries__,
|
||||
// options.Description("The N first sequence records of the file are discarded from the analysis and not reported to the output file."))
|
||||
@@ -73,15 +76,29 @@ func OutputOptionSet(options *getoptions.GetOpt) {
|
||||
options.BoolVar(&__no_progress_bar__, "no-progressbar", false,
|
||||
options.Description("Disable the progress bar printing"))
|
||||
|
||||
options.BoolVar(&__compressed__, "--compress", false,
|
||||
options.BoolVar(&__compressed__, "compress", false,
|
||||
options.Alias("Z"),
|
||||
options.Description("Output is compressed"))
|
||||
|
||||
options.StringVar(&__output_file_name__, "out", __output_file_name__,
|
||||
options.Alias("o"),
|
||||
options.ArgName("FILENAME"),
|
||||
options.Description("Filename used for saving the output"),
|
||||
)
|
||||
|
||||
}
|
||||
|
||||
func PairedFilesOptionSet(options *getoptions.GetOpt) {
|
||||
options.StringVar(&__paired_file_name__, "paired-with", __paired_file_name__,
|
||||
options.ArgName("FILENAME"),
|
||||
options.Description("Filename containing the paired reads"),
|
||||
)
|
||||
}
|
||||
|
||||
func OptionSet(options *getoptions.GetOpt) {
|
||||
InputOptionSet(options)
|
||||
OutputOptionSet(options)
|
||||
PairedFilesOptionSet(options)
|
||||
}
|
||||
|
||||
// Returns true if the number of reads described in the
|
||||
@@ -170,3 +187,14 @@ func CLIOutputQualityShift() int {
|
||||
func CLIProgressBar() bool {
|
||||
return !__no_progress_bar__
|
||||
}
|
||||
|
||||
func CLIOutPutFileName() string {
|
||||
return __output_file_name__
|
||||
}
|
||||
|
||||
func CLIHasPairedFile() bool {
|
||||
return __paired_file_name__ != ""
|
||||
}
|
||||
func CLIPairedFileName() string {
|
||||
return __paired_file_name__
|
||||
}
|
||||
@@ -67,7 +67,7 @@ func _ExpandListOfFiles(check_ext bool, filenames ...string) ([]string, error) {
|
||||
return list_of_files, nil
|
||||
}
|
||||
|
||||
func ReadBioSequences(filenames ...string) (obiiter.IBioSequence, error) {
|
||||
func CLIReadBioSequences(filenames ...string) (obiiter.IBioSequence, error) {
|
||||
var iterator obiiter.IBioSequence
|
||||
var reader func(string, ...obiformats.WithOption) (obiiter.IBioSequence, error)
|
||||
|
||||
@@ -142,6 +142,17 @@ func ReadBioSequences(filenames ...string) (obiiter.IBioSequence, error) {
|
||||
if err != nil {
|
||||
return obiiter.NilIBioSequence, err
|
||||
}
|
||||
|
||||
if CLIPairedFileName() != "" {
|
||||
ip, err := reader(CLIPairedFileName(), opts...)
|
||||
|
||||
if err != nil {
|
||||
return obiiter.NilIBioSequence, err
|
||||
}
|
||||
|
||||
iterator = iterator.PairTo(ip)
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
// list_of_files = list_of_files[1:]
|
||||
|
||||
@@ -1,6 +1,9 @@
|
||||
package obiconvert
|
||||
|
||||
import (
|
||||
"path/filepath"
|
||||
"strings"
|
||||
|
||||
log "github.com/sirupsen/logrus"
|
||||
|
||||
"git.metabarcoding.org/lecasofts/go/obitools/pkg/obiformats"
|
||||
@@ -8,6 +11,27 @@ import (
|
||||
"git.metabarcoding.org/lecasofts/go/obitools/pkg/obioptions"
|
||||
)
|
||||
|
||||
func BuildPairedFileNames(filename string) (string, string) {
|
||||
|
||||
dir, name := filepath.Split(filename)
|
||||
parts := strings.SplitN(name, ".", 2)
|
||||
|
||||
forward := parts[0] + "_R1"
|
||||
reverse := parts[0] + "_R2"
|
||||
|
||||
if parts[1] != "" {
|
||||
suffix := "." + parts[1]
|
||||
forward += suffix
|
||||
reverse += suffix
|
||||
}
|
||||
|
||||
if dir != "" {
|
||||
forward = filepath.Join(dir, forward)
|
||||
reverse = filepath.Join(dir, reverse)
|
||||
}
|
||||
|
||||
return forward, reverse
|
||||
}
|
||||
func CLIWriteBioSequences(iterator obiiter.IBioSequence,
|
||||
terminalAction bool, filenames ...string) (obiiter.IBioSequence, error) {
|
||||
|
||||
@@ -45,7 +69,32 @@ func CLIWriteBioSequences(iterator obiiter.IBioSequence,
|
||||
|
||||
var err error
|
||||
|
||||
if len(filenames) == 0 {
|
||||
// No file names are specified or it is "-" : the output is done on stdout
|
||||
|
||||
if CLIOutPutFileName() != "-" || (len(filenames) > 0 && filenames[0] != "-") {
|
||||
var fn string
|
||||
|
||||
if len(filenames) == 0 {
|
||||
fn = CLIOutPutFileName()
|
||||
} else {
|
||||
fn = filenames[0]
|
||||
}
|
||||
|
||||
if iterator.IsPaired() {
|
||||
var reverse string
|
||||
fn, reverse = BuildPairedFileNames(fn)
|
||||
opts = append(opts, obiformats.WritePairedReadsTo(reverse))
|
||||
}
|
||||
|
||||
switch CLIOutputFormat() {
|
||||
case "fastq":
|
||||
newIter, err = obiformats.WriteFastqToFile(iterator, fn, opts...)
|
||||
case "fasta":
|
||||
newIter, err = obiformats.WriteFastaToFile(iterator, fn, opts...)
|
||||
default:
|
||||
newIter, err = obiformats.WriteSequencesToFile(iterator, fn, opts...)
|
||||
}
|
||||
} else {
|
||||
switch CLIOutputFormat() {
|
||||
case "fastq":
|
||||
newIter, err = obiformats.WriteFastqToStdout(iterator, opts...)
|
||||
@@ -54,15 +103,6 @@ func CLIWriteBioSequences(iterator obiiter.IBioSequence,
|
||||
default:
|
||||
newIter, err = obiformats.WriteSequencesToStdout(iterator, opts...)
|
||||
}
|
||||
} else {
|
||||
switch CLIOutputFormat() {
|
||||
case "fastq":
|
||||
newIter, err = obiformats.WriteFastqToFile(iterator, filenames[0], opts...)
|
||||
case "fasta":
|
||||
newIter, err = obiformats.WriteFastaToFile(iterator, filenames[0], opts...)
|
||||
default:
|
||||
newIter, err = obiformats.WriteSequencesToFile(iterator, filenames[0], opts...)
|
||||
}
|
||||
}
|
||||
|
||||
if err != nil {
|
||||
|
||||
@@ -8,11 +8,15 @@ import (
|
||||
"git.metabarcoding.org/lecasofts/go/obitools/pkg/obitools/obiconvert"
|
||||
)
|
||||
|
||||
func IFilterSequence(iterator obiiter.IBioSequence) obiiter.IBioSequence {
|
||||
func CLIFilterSequence(iterator obiiter.IBioSequence) obiiter.IBioSequence {
|
||||
var newIter obiiter.IBioSequence
|
||||
|
||||
predicate := CLISequenceSelectionPredicate()
|
||||
|
||||
if obiconvert.CLIHasPairedFile() {
|
||||
predicate = predicate.PairedPredicat(CLIPairedReadMode())
|
||||
}
|
||||
|
||||
if predicate != nil {
|
||||
if CLISaveDiscardedSequences() {
|
||||
var discarded obiiter.IBioSequence
|
||||
|
||||
@@ -40,6 +40,8 @@ var _AttributePatterns = make(map[string]string, 0)
|
||||
var _InvertMatch = false
|
||||
var _SaveRejected = ""
|
||||
|
||||
var _PairedMode = "forward"
|
||||
|
||||
func TaxonomySelectionOptionSet(options *getoptions.GetOpt) {
|
||||
|
||||
options.StringVar(&_Taxdump, "taxdump", _Taxdump,
|
||||
@@ -135,6 +137,11 @@ func SequenceSelectionOptionSet(options *getoptions.GetOpt) {
|
||||
"Several -a options can be used on the same command line and in this last case, the selected "+
|
||||
"sequence records will match all constraints."))
|
||||
|
||||
options.StringVar(&_PairedMode, "paired-mode", _PairedMode,
|
||||
options.ArgName("forward|reverse|and|or|andnot|xor"),
|
||||
options.Description("If paired reads are passed to obibrep, that option determines how the conditions "+
|
||||
"are applied to both reads."),
|
||||
)
|
||||
}
|
||||
|
||||
// OptionSet adds to the basic option set every options declared for
|
||||
@@ -412,3 +419,24 @@ func CLISaveDiscardedSequences() bool {
|
||||
func CLIDiscardedFileName() string {
|
||||
return _SaveRejected
|
||||
}
|
||||
|
||||
func CLIPairedReadMode() obiseq.SeqPredicateMode {
|
||||
switch _PairedMode {
|
||||
case "forward":
|
||||
return obiseq.ForwardOnly
|
||||
case "reverse":
|
||||
return obiseq.ReverseOnly
|
||||
case "and":
|
||||
return obiseq.And
|
||||
case "or":
|
||||
return obiseq.Or
|
||||
case "andnot":
|
||||
return obiseq.AndNot
|
||||
case "xor":
|
||||
return obiseq.Xor
|
||||
default:
|
||||
log.Fatalf("Paired reads mode must be forward, reverse, and, or, andnot, or xor (%s)", _PairedMode)
|
||||
}
|
||||
|
||||
return obiseq.ForwardOnly
|
||||
}
|
||||
|
||||
@@ -6,8 +6,8 @@ import (
|
||||
"github.com/DavidGamba/go-getoptions"
|
||||
)
|
||||
|
||||
var _ForwardFiles = make([]string, 0, 10)
|
||||
var _ReverseFiles = make([]string, 0, 10)
|
||||
var _ForwardFile = ""
|
||||
var _ReverseFile = ""
|
||||
var _Delta = 5
|
||||
var _MinOverlap = 20
|
||||
var _GapPenality = float64(2.0)
|
||||
@@ -15,15 +15,15 @@ var _WithoutStats = false
|
||||
var _MinIdentity = 0.9
|
||||
|
||||
func PairingOptionSet(options *getoptions.GetOpt) {
|
||||
options.StringSliceVar(&_ForwardFiles, "forward-reads",
|
||||
1, 1000,
|
||||
options.StringVar(&_ForwardFile, "forward-reads", "",
|
||||
options.Alias("F"),
|
||||
options.Required("You must provide at least one forward file"),
|
||||
options.ArgName("FILENAME_F"),
|
||||
options.Required("You must provide at a forward file"),
|
||||
options.Description("The file names containing the forward reads"))
|
||||
options.StringSliceVar(&_ReverseFiles, "reverse-reads",
|
||||
1, 1000,
|
||||
options.StringVar(&_ReverseFile, "reverse-reads", "",
|
||||
options.Alias("R"),
|
||||
options.Required("You must provide at least one reverse file"),
|
||||
options.ArgName("FILENAME_R"),
|
||||
options.Required("You must provide a reverse file"),
|
||||
options.Description("The file names containing the reverse reads"))
|
||||
options.IntVar(&_Delta, "delta", _Delta,
|
||||
options.Alias("D"),
|
||||
@@ -42,42 +42,43 @@ func PairingOptionSet(options *getoptions.GetOpt) {
|
||||
}
|
||||
|
||||
func OptionSet(options *getoptions.GetOpt) {
|
||||
obiconvert.OptionSet(options)
|
||||
obiconvert.OutputOptionSet(options)
|
||||
obiconvert.InputOptionSet(options)
|
||||
PairingOptionSet(options)
|
||||
}
|
||||
|
||||
func IBatchPairedSequence() (obiiter.IPairedBioSequenceBatch, error) {
|
||||
forward, err := obiconvert.ReadBioSequences(_ForwardFiles...)
|
||||
func CLIPairedSequence() (obiiter.IBioSequence, error) {
|
||||
forward, err := obiconvert.CLIReadBioSequences(_ForwardFile)
|
||||
if err != nil {
|
||||
return obiiter.NilIPairedBioSequenceBatch, err
|
||||
return obiiter.NilIBioSequence, err
|
||||
}
|
||||
|
||||
reverse, err := obiconvert.ReadBioSequences(_ReverseFiles...)
|
||||
reverse, err := obiconvert.CLIReadBioSequences(_ReverseFile)
|
||||
if err != nil {
|
||||
return obiiter.NilIPairedBioSequenceBatch, err
|
||||
return obiiter.NilIBioSequence, err
|
||||
}
|
||||
|
||||
paired := forward.PairWith(reverse)
|
||||
paired := forward.PairTo(reverse)
|
||||
|
||||
return paired, nil
|
||||
}
|
||||
|
||||
func Delta() int {
|
||||
func CLIDelta() int {
|
||||
return _Delta
|
||||
}
|
||||
|
||||
func MinOverlap() int {
|
||||
func CLIMinOverlap() int {
|
||||
return _MinOverlap
|
||||
}
|
||||
|
||||
func MinIdentity() float64 {
|
||||
func CLIMinIdentity() float64 {
|
||||
return _MinIdentity
|
||||
}
|
||||
|
||||
func GapPenality() float64 {
|
||||
func CLIGapPenality() float64 {
|
||||
return _GapPenality
|
||||
}
|
||||
|
||||
func WithStats() bool {
|
||||
func CLIWithStats() bool {
|
||||
return !_WithoutStats
|
||||
}
|
||||
|
||||
@@ -3,12 +3,12 @@ package obipairing
|
||||
import (
|
||||
"math"
|
||||
"os"
|
||||
"runtime"
|
||||
|
||||
log "github.com/sirupsen/logrus"
|
||||
|
||||
"git.metabarcoding.org/lecasofts/go/obitools/pkg/obialign"
|
||||
"git.metabarcoding.org/lecasofts/go/obitools/pkg/obiiter"
|
||||
"git.metabarcoding.org/lecasofts/go/obitools/pkg/obioptions"
|
||||
"git.metabarcoding.org/lecasofts/go/obitools/pkg/obiseq"
|
||||
"github.com/schollz/progressbar/v3"
|
||||
)
|
||||
@@ -203,12 +203,16 @@ func AssemblePESequences(seqA, seqB *obiseq.BioSequence,
|
||||
//
|
||||
// The function returns an iterator over batches of obiseq.Biosequence object.
|
||||
// each pair of processed sequences produces one sequence in the result iterator.
|
||||
func IAssemblePESequencesBatch(iterator obiiter.IPairedBioSequenceBatch,
|
||||
func IAssemblePESequencesBatch(iterator obiiter.IBioSequence,
|
||||
gap float64, delta, minOverlap int,
|
||||
minIdentity float64,
|
||||
withStats bool, sizes ...int) obiiter.IBioSequence {
|
||||
|
||||
nworkers := runtime.NumCPU() * 3 / 2
|
||||
if !iterator.IsPaired() {
|
||||
log.Fatalln("Sequence data must be paired")
|
||||
}
|
||||
|
||||
nworkers := obioptions.CLIMaxCPU() * 3 / 2
|
||||
buffsize := iterator.BufferSize()
|
||||
|
||||
if len(sizes) > 0 {
|
||||
@@ -236,15 +240,15 @@ func IAssemblePESequencesBatch(iterator obiiter.IPairedBioSequenceBatch,
|
||||
progressbar.OptionShowIts(),
|
||||
progressbar.OptionSetDescription("[Sequence Pairing]"))
|
||||
|
||||
f := func(iterator obiiter.IPairedBioSequenceBatch, wid int) {
|
||||
f := func(iterator obiiter.IBioSequence, wid int) {
|
||||
arena := obialign.MakePEAlignArena(150, 150)
|
||||
|
||||
for iterator.Next() {
|
||||
batch := iterator.Get()
|
||||
cons := make(obiseq.BioSequenceSlice, len(batch.Forward()))
|
||||
cons := make(obiseq.BioSequenceSlice, len(batch.Slice()))
|
||||
processed := 0
|
||||
for i, A := range batch.Forward() {
|
||||
B := batch.Reverse()[i]
|
||||
for i, A := range batch.Slice() {
|
||||
B := A.PairedWith()
|
||||
cons[i] = AssemblePESequences(A, B, gap, delta, minOverlap, minIdentity, withStats, true, arena)
|
||||
if i%59 == 0 {
|
||||
bar.Add(59)
|
||||
|
||||
Reference in New Issue
Block a user