mirror of
https://github.com/metabarcoding/obitools4.git
synced 2025-12-08 16:50:27 +00:00
First commit
This commit is contained in:
138
pkg/obitools/obiconvert/options.go
Normal file
138
pkg/obitools/obiconvert/options.go
Normal file
@@ -0,0 +1,138 @@
|
||||
package obiconvert
|
||||
|
||||
import (
|
||||
"github.com/DavidGamba/go-getoptions"
|
||||
)
|
||||
|
||||
var __skipped_entries__ = 0
|
||||
var __read_only_entries__ = -1
|
||||
|
||||
var __input_fastjson_format__ = false
|
||||
var __input_fastobi_format__ = false
|
||||
|
||||
var __input_ecopcr_format__ = false
|
||||
var __input_embl_format__ = false
|
||||
|
||||
var __input_solexa_quality__ = false
|
||||
|
||||
var __output_in_fasta__ = false
|
||||
var __output_in_fastq__ = false
|
||||
var __output_fastjson_format__ = false
|
||||
var __output_fastobi_format__ = false
|
||||
var __output_solexa_quality__ = false
|
||||
|
||||
func InputOptionSet(options *getoptions.GetOpt) {
|
||||
options.IntVar(&__skipped_entries__, "skip", 0,
|
||||
options.Description("The N first sequence records of the file are discarded from the analysis and not reported to the output file."))
|
||||
|
||||
options.IntVar(&__read_only_entries__, "only", -1,
|
||||
options.Description("Only the N next sequence records of the file are analyzed. The following sequences in the file are neither analyzed, neither reported to the output file. This option can be used conjointly with the –skip option."))
|
||||
|
||||
options.BoolVar(&__input_fastjson_format__, "input-json-header", false,
|
||||
options.Description("FASTA/FASTQ title line annotations follow json format."))
|
||||
options.BoolVar(&__input_fastobi_format__, "input-OBI-header", false,
|
||||
options.Description("FASTA/FASTQ title line annotations follow OBI format."))
|
||||
|
||||
options.BoolVar(&__input_ecopcr_format__, "ecopcr", false,
|
||||
options.Description("Read data following the ecoPCR output format."))
|
||||
|
||||
options.BoolVar(&__input_embl_format__, "embl", false,
|
||||
options.Description("Read data following the EMBL flatfile format."))
|
||||
|
||||
options.BoolVar(&__input_solexa_quality__, "solexa", false,
|
||||
options.Description("Decodes quality string according to the Solexa specification."))
|
||||
|
||||
}
|
||||
|
||||
func OutputOptionSet(options *getoptions.GetOpt) {
|
||||
options.BoolVar(&__output_in_fasta__, "fasta-output", false,
|
||||
options.Description("Read data following the ecoPCR output format."))
|
||||
|
||||
options.BoolVar(&__output_in_fastq__, "fastq-output", false,
|
||||
options.Description("Read data following the EMBL flatfile format."))
|
||||
|
||||
options.BoolVar(&__output_fastjson_format__, "output-json-header", false,
|
||||
options.Description("output FASTA/FASTQ title line annotations follow json format."))
|
||||
options.BoolVar(&__output_fastobi_format__, "output-OBI-header", false,
|
||||
options.Description("output FASTA/FASTQ title line annotations follow OBI format."))
|
||||
|
||||
options.BoolVar(&__output_solexa_quality__, "solexa-output", false,
|
||||
options.Description("Encodes quality string according to the Solexa specification."))
|
||||
}
|
||||
|
||||
func OptionSet(options *getoptions.GetOpt) {
|
||||
InputOptionSet(options)
|
||||
OutputOptionSet(options)
|
||||
}
|
||||
|
||||
// Returns true if the number of reads described in the
|
||||
// file has to be printed.
|
||||
func InputFormat() string {
|
||||
switch {
|
||||
case __input_ecopcr_format__:
|
||||
return "ecopcr"
|
||||
case __input_embl_format__:
|
||||
return "embl"
|
||||
default:
|
||||
return "guessed"
|
||||
}
|
||||
}
|
||||
|
||||
func OutputFormat() string {
|
||||
switch {
|
||||
case __output_in_fastq__:
|
||||
return "fastq"
|
||||
case __output_in_fasta__:
|
||||
return "fasta"
|
||||
default:
|
||||
return "guessed"
|
||||
}
|
||||
}
|
||||
|
||||
func InputFastHeaderFormat() string {
|
||||
switch {
|
||||
case __input_fastjson_format__:
|
||||
return "json"
|
||||
case __input_fastobi_format__:
|
||||
return "obi"
|
||||
default:
|
||||
return "guessed"
|
||||
}
|
||||
}
|
||||
|
||||
func OutputFastHeaderFormat() string {
|
||||
switch {
|
||||
case __output_fastjson_format__:
|
||||
return "json"
|
||||
case __output_fastobi_format__:
|
||||
return "obi"
|
||||
default:
|
||||
return "json"
|
||||
}
|
||||
}
|
||||
|
||||
// Returns the count of sequences to skip at the beginning of the
|
||||
// processing.
|
||||
func SequencesToSkip() int {
|
||||
return __skipped_entries__
|
||||
}
|
||||
|
||||
func AnalyzeOnly() int {
|
||||
return __read_only_entries__
|
||||
}
|
||||
|
||||
func InputQualityShift() int {
|
||||
if __input_solexa_quality__ {
|
||||
return 64
|
||||
} else {
|
||||
return 33
|
||||
}
|
||||
}
|
||||
|
||||
func OutputQualityShift() int {
|
||||
if __output_solexa_quality__ {
|
||||
return 64
|
||||
} else {
|
||||
return 33
|
||||
}
|
||||
}
|
||||
149
pkg/obitools/obiconvert/sequence_reader.go
Normal file
149
pkg/obitools/obiconvert/sequence_reader.go
Normal file
@@ -0,0 +1,149 @@
|
||||
package obiconvert
|
||||
|
||||
import (
|
||||
"log"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"strings"
|
||||
|
||||
"git.metabarcoding.org/lecasofts/go/oa2/pkg/obiformats"
|
||||
"git.metabarcoding.org/lecasofts/go/oa2/pkg/obiseq"
|
||||
)
|
||||
|
||||
func __expand_list_of_files__(check_ext bool, filenames ...string) ([]string, error) {
|
||||
var err error
|
||||
list_of_files := make([]string, 0, 100)
|
||||
for _, fn := range filenames {
|
||||
|
||||
err = filepath.Walk(fn,
|
||||
func(path string, info os.FileInfo, err error) error {
|
||||
|
||||
for info.Mode()&os.ModeSymlink == os.ModeSymlink {
|
||||
path, err = filepath.EvalSymlinks(path)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
info, err = os.Stat(path)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
|
||||
if info.IsDir() {
|
||||
if path != fn {
|
||||
subdir, err := __expand_list_of_files__(true, path)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
list_of_files = append(list_of_files, subdir...)
|
||||
} else {
|
||||
check_ext = true
|
||||
}
|
||||
} else {
|
||||
if !check_ext ||
|
||||
strings.HasSuffix(path, "fasta") ||
|
||||
strings.HasSuffix(path, "fasta.gz") ||
|
||||
strings.HasSuffix(path, "fastq") ||
|
||||
strings.HasSuffix(path, "fastq.gz") ||
|
||||
strings.HasSuffix(path, "dat") ||
|
||||
strings.HasSuffix(path, "dat.gz") ||
|
||||
strings.HasSuffix(path, "ecopcr") ||
|
||||
strings.HasSuffix(path, "ecopcr.gz") {
|
||||
log.Printf("Appending %s file\n", path)
|
||||
list_of_files = append(list_of_files, path)
|
||||
}
|
||||
}
|
||||
return nil
|
||||
})
|
||||
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
}
|
||||
|
||||
return list_of_files, nil
|
||||
}
|
||||
|
||||
func ReadBioSequencesBatch(filenames ...string) (obiseq.IBioSequenceBatch, error) {
|
||||
var iterator obiseq.IBioSequenceBatch
|
||||
var reader func(string, ...obiformats.WithOption) (obiseq.IBioSequenceBatch, error)
|
||||
|
||||
opts := make([]obiformats.WithOption, 0, 10)
|
||||
|
||||
switch InputFastHeaderFormat() {
|
||||
case "json":
|
||||
opts = append(opts, obiformats.OptionsFastSeqHeaderParser(obiformats.ParseFastSeqJsonHeader))
|
||||
case "obi":
|
||||
opts = append(opts, obiformats.OptionsFastSeqHeaderParser(obiformats.ParseFastSeqOBIHeader))
|
||||
default:
|
||||
opts = append(opts, obiformats.OptionsFastSeqHeaderParser(obiformats.ParseGuessedFastSeqHeader))
|
||||
}
|
||||
|
||||
opts = append(opts, obiformats.OptionsQualityShift(InputQualityShift()))
|
||||
|
||||
if len(filenames) == 0 {
|
||||
|
||||
switch InputFormat() {
|
||||
case "ecopcr":
|
||||
iterator = obiformats.ReadEcoPCRBatch(os.Stdin, opts...)
|
||||
case "embl":
|
||||
iterator = obiformats.ReadEMBLBatch(os.Stdin, opts...)
|
||||
default:
|
||||
iterator = obiformats.ReadFastSeqBatchFromStdin(opts...)
|
||||
}
|
||||
} else {
|
||||
|
||||
list_of_files, err := __expand_list_of_files__(false, filenames...)
|
||||
if err != nil {
|
||||
return obiseq.NilIBioSequenceBatch, err
|
||||
}
|
||||
|
||||
switch InputFormat() {
|
||||
case "ecopcr":
|
||||
reader = obiformats.ReadEcoPCRBatchFromFile
|
||||
case "embl":
|
||||
reader = obiformats.ReadEMBLBatchFromFile
|
||||
default:
|
||||
reader = obiformats.ReadSequencesBatchFromFile
|
||||
}
|
||||
|
||||
iterator, err = reader(list_of_files[0], opts...)
|
||||
|
||||
if err != nil {
|
||||
return obiseq.NilIBioSequenceBatch, err
|
||||
}
|
||||
|
||||
list_of_files = list_of_files[1:]
|
||||
others := make([]obiseq.IBioSequenceBatch, 0, len(list_of_files))
|
||||
|
||||
for _, fn := range list_of_files {
|
||||
r, err := reader(fn, opts...)
|
||||
if err != nil {
|
||||
return obiseq.NilIBioSequenceBatch, err
|
||||
}
|
||||
others = append(others, r)
|
||||
}
|
||||
|
||||
if len(others) > 0 {
|
||||
iterator = iterator.Concat(others...)
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
// if SequencesToSkip() > 0 {
|
||||
// iterator = iterator.Skip(SequencesToSkip())
|
||||
// }
|
||||
|
||||
// if AnalyzeOnly() > 0 {
|
||||
// iterator = iterator.Head(AnalyzeOnly())
|
||||
// }
|
||||
|
||||
return iterator, nil
|
||||
}
|
||||
|
||||
func ReadBioSequences(filenames ...string) (obiseq.IBioSequence, error) {
|
||||
ib, err := ReadBioSequencesBatch(filenames...)
|
||||
return ib.SortBatches().IBioSequence(), err
|
||||
|
||||
}
|
||||
56
pkg/obitools/obiconvert/sequence_writer.go
Normal file
56
pkg/obitools/obiconvert/sequence_writer.go
Normal file
@@ -0,0 +1,56 @@
|
||||
package obiconvert
|
||||
|
||||
import (
|
||||
"log"
|
||||
|
||||
"git.metabarcoding.org/lecasofts/go/oa2/pkg/obiformats"
|
||||
"git.metabarcoding.org/lecasofts/go/oa2/pkg/obiseq"
|
||||
)
|
||||
|
||||
func WriteBioSequences(iterator obiseq.IBioSequence, filenames ...string) error {
|
||||
|
||||
opts := make([]obiformats.WithOption, 0, 10)
|
||||
|
||||
switch OutputFastHeaderFormat() {
|
||||
case "json":
|
||||
log.Println("On output use JSON headers")
|
||||
opts = append(opts, obiformats.OptionsFastSeqHeaderFormat(obiformats.FormatFastSeqJsonHeader))
|
||||
case "obi":
|
||||
log.Println("On output use OBI headers")
|
||||
opts = append(opts, obiformats.OptionsFastSeqHeaderFormat(obiformats.FormatFastSeqOBIHeader))
|
||||
default:
|
||||
log.Println("On output use JSON headers")
|
||||
opts = append(opts, obiformats.OptionsFastSeqHeaderFormat(obiformats.FormatFastSeqJsonHeader))
|
||||
}
|
||||
|
||||
opts = append(opts, obiformats.OptionsQualityShift(OutputQualityShift()))
|
||||
|
||||
var err error
|
||||
|
||||
if len(filenames) == 0 {
|
||||
switch OutputFormat() {
|
||||
case "fastq":
|
||||
err = obiformats.WriteFastqToStdout(iterator, opts...)
|
||||
case "fasta":
|
||||
err = obiformats.WriteFastaToStdout(iterator, opts...)
|
||||
default:
|
||||
err = obiformats.WriteSequencesToStdout(iterator, opts...)
|
||||
}
|
||||
} else {
|
||||
switch OutputFormat() {
|
||||
case "fastq":
|
||||
err = obiformats.WriteFastqToFile(iterator, filenames[0], opts...)
|
||||
case "fasta":
|
||||
err = obiformats.WriteFastaToFile(iterator, filenames[0], opts...)
|
||||
default:
|
||||
err = obiformats.WriteSequencesToFile(iterator, filenames[0], opts...)
|
||||
}
|
||||
}
|
||||
|
||||
if err != nil {
|
||||
log.Fatalf("Write file error: %v", err)
|
||||
return err
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
48
pkg/obitools/obicount/options.go
Normal file
48
pkg/obitools/obicount/options.go
Normal file
@@ -0,0 +1,48 @@
|
||||
// obicount function utility package.
|
||||
//
|
||||
// The obitols/obicount package contains every
|
||||
// functions specificaly required by the obicount utility.
|
||||
package obicount
|
||||
|
||||
import (
|
||||
"github.com/DavidGamba/go-getoptions"
|
||||
)
|
||||
|
||||
var __read_count__ bool
|
||||
var __variant_count__ bool
|
||||
var __symbol_count__ bool
|
||||
|
||||
func OptionSet(options *getoptions.GetOpt) {
|
||||
options.BoolVar(&__variant_count__, "variants", false,
|
||||
options.Alias("v"),
|
||||
options.Description("Prints variant counts."))
|
||||
|
||||
options.BoolVar(&__read_count__, "reads", false,
|
||||
options.Alias("r"),
|
||||
options.Description("Prints read counts."))
|
||||
|
||||
options.BoolVar(&__symbol_count__, "symbols", false,
|
||||
options.Alias("s"),
|
||||
options.Description("Prints symbol counts."))
|
||||
}
|
||||
|
||||
// Returns true if the number of reads described in the
|
||||
// file has to be printed.
|
||||
func IsPrintingReadCount() bool {
|
||||
return __read_count__ ||
|
||||
!(__read_count__ || __variant_count__ || __symbol_count__)
|
||||
}
|
||||
|
||||
// Returns true if the number of sequence variants described in the
|
||||
// file has to be printed.
|
||||
func IsPrintingVariantCount() bool {
|
||||
return __variant_count__ ||
|
||||
!(__read_count__ || __variant_count__ || __symbol_count__)
|
||||
}
|
||||
|
||||
// Returns true if the number of symbols (sum of the sequence lengths)
|
||||
// described in the file has to be printed.
|
||||
func IsPrintingSymbolCount() bool {
|
||||
return __symbol_count__ ||
|
||||
!(__read_count__ || __variant_count__ || __symbol_count__)
|
||||
}
|
||||
87
pkg/obitools/obifind/iterator.go
Normal file
87
pkg/obitools/obifind/iterator.go
Normal file
@@ -0,0 +1,87 @@
|
||||
package obifind
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"fmt"
|
||||
|
||||
"git.metabarcoding.org/lecasofts/go/oa2/pkg/obitax"
|
||||
)
|
||||
|
||||
func IFilterRankRestriction() func(*obitax.ITaxonSet) *obitax.ITaxonSet {
|
||||
f := func(s *obitax.ITaxonSet) *obitax.ITaxonSet {
|
||||
return s
|
||||
}
|
||||
|
||||
if __restrict_rank__ != "" {
|
||||
f = func(s *obitax.ITaxonSet) *obitax.ITaxonSet {
|
||||
return s.IFilterOnTaxRank(__restrict_rank__)
|
||||
}
|
||||
}
|
||||
|
||||
return f
|
||||
}
|
||||
|
||||
func ITaxonNameMatcher() (func(string) *obitax.ITaxonSet, error) {
|
||||
taxonomy, err := LoadSelectedTaxonomy()
|
||||
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
fun := func(name string) *obitax.ITaxonSet {
|
||||
return taxonomy.IFilterOnName(name, __fixed_pattern__)
|
||||
}
|
||||
|
||||
return fun, nil
|
||||
}
|
||||
|
||||
func ITaxonRestrictions() (func(*obitax.ITaxonSet) *obitax.ITaxonSet, error) {
|
||||
|
||||
clades, err := TaxonomicalRestrictions()
|
||||
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
rankfilter := IFilterRankRestriction()
|
||||
|
||||
fun := func(iterator *obitax.ITaxonSet) *obitax.ITaxonSet {
|
||||
return rankfilter(iterator).IFilterBelongingSubclades(clades)
|
||||
}
|
||||
|
||||
return fun, nil
|
||||
}
|
||||
|
||||
func TaxonAsString(taxon *obitax.TaxNode, pattern string) string {
|
||||
text := taxon.ScientificName()
|
||||
|
||||
if __with_path__ {
|
||||
var bf bytes.Buffer
|
||||
path, err := taxon.Path()
|
||||
|
||||
if err != nil {
|
||||
fmt.Printf("%+v", err)
|
||||
}
|
||||
|
||||
bf.WriteString(path.Get(path.Length() - 1).ScientificName())
|
||||
|
||||
for i := path.Length() - 2; i >= 0; i-- {
|
||||
fmt.Fprintf(&bf, ":%s", path.Get(i).ScientificName())
|
||||
}
|
||||
|
||||
text = bf.String()
|
||||
}
|
||||
|
||||
return fmt.Sprintf("%-20s | %10d | %10d | %-20s | %s",
|
||||
pattern,
|
||||
taxon.Taxid(),
|
||||
taxon.Parent().Taxid(),
|
||||
taxon.Rank(),
|
||||
text)
|
||||
}
|
||||
|
||||
func TaxonWriter(itaxa *obitax.ITaxonSet, pattern string) {
|
||||
for itaxa.Next() {
|
||||
fmt.Println(TaxonAsString(itaxa.Get(), pattern))
|
||||
}
|
||||
}
|
||||
114
pkg/obitools/obifind/options.go
Normal file
114
pkg/obitools/obifind/options.go
Normal file
@@ -0,0 +1,114 @@
|
||||
package obifind
|
||||
|
||||
import (
|
||||
"errors"
|
||||
|
||||
"git.metabarcoding.org/lecasofts/go/oa2/pkg/obiformats/ncbitaxdump"
|
||||
"git.metabarcoding.org/lecasofts/go/oa2/pkg/obitax"
|
||||
"github.com/DavidGamba/go-getoptions"
|
||||
)
|
||||
|
||||
var __taxdump__ = ""
|
||||
var __alternative_name__ = false
|
||||
var __rank_list__ = false
|
||||
var __selected_taxonomy__ = (*obitax.Taxonomy)(nil)
|
||||
var __taxonomical_restriction__ = make([]int, 0)
|
||||
|
||||
var __fixed_pattern__ = false
|
||||
var __with_path__ = false
|
||||
var __taxid_path__ = -1
|
||||
var __taxid_sons__ = -1
|
||||
var __restrict_rank__ = ""
|
||||
|
||||
func LoadTaxonomyOptionSet(options *getoptions.GetOpt, required, alternatiive bool) {
|
||||
if required {
|
||||
options.StringVar(&__taxdump__, "taxdump", "",
|
||||
options.Alias("t"),
|
||||
options.Required(),
|
||||
options.Description("Points to the directory containing the NCBI Taxonomy database dump."))
|
||||
} else {
|
||||
options.StringVar(&__taxdump__, "taxdump", "",
|
||||
options.Alias("t"),
|
||||
options.Description("Points to the directory containing the NCBI Taxonomy database dump."))
|
||||
}
|
||||
if alternatiive {
|
||||
options.BoolVar(&__alternative_name__, "alternative-names", false,
|
||||
options.Alias("a"),
|
||||
options.Description("Enable the search on all alternative names and not only scientific names."))
|
||||
}
|
||||
options.BoolVar(&__rank_list__, "rank-list", false,
|
||||
options.Alias("l"),
|
||||
options.Description("List every taxonomic rank available iin the taxonomy."))
|
||||
options.IntSliceVar(&__taxonomical_restriction__, "subclade-of", 1, 1,
|
||||
options.Alias("s"),
|
||||
options.Description("Restrict output to some subclades."))
|
||||
}
|
||||
|
||||
func SelectedNCBITaxDump() string {
|
||||
return __taxdump__
|
||||
}
|
||||
|
||||
func AreAlternativeNamesSelected() bool {
|
||||
return __alternative_name__
|
||||
}
|
||||
|
||||
func TaxonomicalRestrictions() (*obitax.TaxonSet, error) {
|
||||
taxonomy, err := LoadSelectedTaxonomy()
|
||||
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
ts := make(obitax.TaxonSet)
|
||||
for _, taxid := range __taxonomical_restriction__ {
|
||||
tx, err := taxonomy.Taxon(taxid)
|
||||
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
ts.Inserts(tx)
|
||||
}
|
||||
|
||||
return &ts, nil
|
||||
}
|
||||
|
||||
func LoadSelectedTaxonomy() (*obitax.Taxonomy, error) {
|
||||
if SelectedNCBITaxDump() != "" {
|
||||
if __selected_taxonomy__ == nil {
|
||||
var err error
|
||||
__selected_taxonomy__, err = ncbitaxdump.LoadNCBITaxDump(SelectedNCBITaxDump(),
|
||||
!AreAlternativeNamesSelected())
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
}
|
||||
return __selected_taxonomy__, nil
|
||||
}
|
||||
|
||||
return nil, errors.New("No NCBII taxdump selected using option -t|--taxdump")
|
||||
}
|
||||
|
||||
func OptionSet(options *getoptions.GetOpt) {
|
||||
LoadTaxonomyOptionSet(options, true, true)
|
||||
options.BoolVar(&__fixed_pattern__, "fixed", false,
|
||||
options.Alias("F"),
|
||||
options.Description("Match taxon names using a fixed pattern, not a regular expression"))
|
||||
options.BoolVar(&__with_path__, "with-path", false,
|
||||
options.Alias("P"),
|
||||
options.Description("Adds a column containing the full path for each displayed taxon."))
|
||||
options.IntVar(&__taxid_path__, "parents", -1,
|
||||
options.Alias("p"),
|
||||
options.Description("Displays every parental tree's information for the provided taxid."))
|
||||
options.StringVar(&__restrict_rank__, "rank", "",
|
||||
options.Alias("r"),
|
||||
options.Description("Restrict to the given taxonomic rank."))
|
||||
}
|
||||
|
||||
func RequestsPathForTaxid() int {
|
||||
return __taxid_path__
|
||||
}
|
||||
|
||||
func RequestsSonsForTaxid() int {
|
||||
return __taxid_sons__
|
||||
}
|
||||
74
pkg/obitools/obipairing/options.go
Normal file
74
pkg/obitools/obipairing/options.go
Normal file
@@ -0,0 +1,74 @@
|
||||
package obipairing
|
||||
|
||||
import (
|
||||
"git.metabarcoding.org/lecasofts/go/oa2/pkg/obiseq"
|
||||
"git.metabarcoding.org/lecasofts/go/oa2/pkg/obitools/obiconvert"
|
||||
"github.com/DavidGamba/go-getoptions"
|
||||
)
|
||||
|
||||
var __forward_files__ = make([]string, 0, 10)
|
||||
var __reverse_files__ = make([]string, 0, 10)
|
||||
var __delta__ = 5
|
||||
var __min_overlap__ = 20
|
||||
var __gap_penality__ = 2
|
||||
var __without_stats__ = false
|
||||
|
||||
func PairingOptionSet(options *getoptions.GetOpt) {
|
||||
options.StringSliceVar(&__forward_files__, "forward-reads",
|
||||
1, 1000,
|
||||
options.Alias("F"),
|
||||
options.Description("The file names containing the forward reads"))
|
||||
options.StringSliceVar(&__reverse_files__, "reverse-reads",
|
||||
1, 1000,
|
||||
options.Alias("R"),
|
||||
options.Description("The file names containing the reverse reads"))
|
||||
options.IntVar(&__delta__, "delta", 5,
|
||||
options.Alias("D"),
|
||||
options.Description("Length added to the fast detected overlap for the precise alignement (default 5)."))
|
||||
options.IntVar(&__min_overlap__, "min-overlap", 20,
|
||||
options.Alias("O"),
|
||||
options.Description("Minimum ovelap between both the reads to consider the aligment (default 20)."))
|
||||
options.IntVar(&__gap_penality__, "gap-penality", 2,
|
||||
options.Alias("G"),
|
||||
options.Description("Gap penality expressed as the multiply factor applied to the mismatch score between two nucleotides with a quality of 40 (default 2)."))
|
||||
options.BoolVar(&__without_stats__, "without-stat", false,
|
||||
options.Alias("S"),
|
||||
options.Description("Remove alignment statistics from the produced consensus sequences."))
|
||||
}
|
||||
|
||||
func OptionSet(options *getoptions.GetOpt) {
|
||||
obiconvert.OptionSet(options)
|
||||
PairingOptionSet(options)
|
||||
}
|
||||
|
||||
func IBatchPairedSequence() (obiseq.IPairedBioSequenceBatch, error) {
|
||||
forward, err := obiconvert.ReadBioSequencesBatch(__forward_files__...)
|
||||
if err != nil {
|
||||
return obiseq.NilIPairedBioSequenceBatch, err
|
||||
}
|
||||
|
||||
reverse, err := obiconvert.ReadBioSequencesBatch(__reverse_files__...)
|
||||
if err != nil {
|
||||
return obiseq.NilIPairedBioSequenceBatch, err
|
||||
}
|
||||
|
||||
paired := forward.PairWith(reverse)
|
||||
|
||||
return paired, nil
|
||||
}
|
||||
|
||||
func Delta() int {
|
||||
return __delta__
|
||||
}
|
||||
|
||||
func MinOverlap() int {
|
||||
return __min_overlap__
|
||||
}
|
||||
|
||||
func GapPenality() int {
|
||||
return __gap_penality__
|
||||
}
|
||||
|
||||
func WithStats() bool {
|
||||
return !__without_stats__
|
||||
}
|
||||
176
pkg/obitools/obipairing/pairing.go
Normal file
176
pkg/obitools/obipairing/pairing.go
Normal file
@@ -0,0 +1,176 @@
|
||||
package obipairing
|
||||
|
||||
import (
|
||||
"log"
|
||||
"math"
|
||||
"os"
|
||||
"time"
|
||||
|
||||
"git.metabarcoding.org/lecasofts/go/oa2/pkg/obialign"
|
||||
"git.metabarcoding.org/lecasofts/go/oa2/pkg/obiseq"
|
||||
"github.com/schollz/progressbar/v3"
|
||||
)
|
||||
|
||||
func __abs__(x int) int {
|
||||
if x < 0 {
|
||||
return -x
|
||||
}
|
||||
return x
|
||||
}
|
||||
|
||||
func JoinPairedSequence(seqA, seqB obiseq.BioSequence) obiseq.BioSequence {
|
||||
js := make([]byte, seqA.Length(), seqA.Length()+seqB.Length()+10)
|
||||
jq := make([]byte, seqA.Length(), seqA.Length()+seqB.Length()+10)
|
||||
|
||||
copy(js, seqA.Sequence())
|
||||
copy(jq, seqA.Qualities())
|
||||
|
||||
js = append(js, '.', '.', '.', '.', '.', '.', '.', '.', '.', '.')
|
||||
jq = append(jq, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0)
|
||||
|
||||
js = append(js, seqB.Sequence()...)
|
||||
jq = append(jq, seqB.Qualities()...)
|
||||
|
||||
rep := obiseq.MakeBioSequence(seqA.Id(), js, seqA.Definition())
|
||||
rep.SetQualities(jq)
|
||||
|
||||
return rep
|
||||
}
|
||||
|
||||
func AssemblePESequences(seqA, seqB obiseq.BioSequence,
|
||||
gap, delta, overlap_min int, with_stats bool,
|
||||
arena_align obialign.PEAlignArena,
|
||||
arena_cons obialign.BuildAlignArena,
|
||||
arena_qual obialign.BuildAlignArena) obiseq.BioSequence {
|
||||
|
||||
score, path := obialign.PEAlign(seqA, seqB, gap, delta, arena_align)
|
||||
cons, match := obialign.BuildQualityConsensus(seqA, seqB, path,
|
||||
arena_cons, arena_qual)
|
||||
|
||||
left := path[0]
|
||||
right := 0
|
||||
if path[len(path)-1] == 0 {
|
||||
right = path[len(path)-2]
|
||||
}
|
||||
lcons := cons.Length()
|
||||
ali_length := lcons - __abs__(left) - __abs__(right)
|
||||
|
||||
if ali_length >= overlap_min {
|
||||
if with_stats {
|
||||
annot := cons.Annotations()
|
||||
annot["mode"] = "alignment"
|
||||
annot["score"] = score
|
||||
|
||||
if left < 0 {
|
||||
annot["seq_a_single"] = -left
|
||||
annot["ali_dir"] = "left"
|
||||
} else {
|
||||
annot["seq_b_single"] = left
|
||||
annot["ali_dir"] = "right"
|
||||
}
|
||||
|
||||
if right < 0 {
|
||||
right = -right
|
||||
annot["seq_a_single"] = right
|
||||
} else {
|
||||
annot["seq_b_single"] = right
|
||||
}
|
||||
|
||||
score_norm := float64(0)
|
||||
if ali_length > 0 {
|
||||
score_norm = math.Round(float64(match)/float64(ali_length)*1000) / 1000
|
||||
}
|
||||
|
||||
annot["ali_length"] = ali_length
|
||||
annot["seq_ab_match"] = match
|
||||
annot["score_norm"] = score_norm
|
||||
|
||||
}
|
||||
} else {
|
||||
cons = JoinPairedSequence(seqA, seqB)
|
||||
|
||||
if with_stats {
|
||||
annot := cons.Annotations()
|
||||
annot["mode"] = "join"
|
||||
}
|
||||
}
|
||||
|
||||
return cons
|
||||
}
|
||||
|
||||
func IAssemblePESequencesBatch(iterator obiseq.IPairedBioSequenceBatch,
|
||||
gap, delta, overlap_min int, with_stats bool, sizes ...int) obiseq.IBioSequenceBatch {
|
||||
|
||||
nworkers := 7
|
||||
buffsize := iterator.BufferSize()
|
||||
|
||||
if len(sizes) > 0 {
|
||||
nworkers = sizes[0]
|
||||
}
|
||||
|
||||
if len(sizes) > 1 {
|
||||
buffsize = sizes[1]
|
||||
}
|
||||
|
||||
new_iter := obiseq.MakeIBioSequenceBatch(buffsize)
|
||||
|
||||
new_iter.Add(nworkers)
|
||||
|
||||
go func() {
|
||||
new_iter.Wait()
|
||||
for len(new_iter.Channel()) > 0 {
|
||||
time.Sleep(time.Millisecond)
|
||||
}
|
||||
close(new_iter.Channel())
|
||||
log.Printf("End of the sequence Pairing")
|
||||
}()
|
||||
|
||||
bar := progressbar.NewOptions(
|
||||
-1,
|
||||
progressbar.OptionSetWriter(os.Stderr),
|
||||
progressbar.OptionSetWidth(15),
|
||||
progressbar.OptionShowCount(),
|
||||
progressbar.OptionShowIts(),
|
||||
progressbar.OptionSetDescription("[Sequence Pairing]"))
|
||||
|
||||
f := func(iterator obiseq.IPairedBioSequenceBatch, wid int) {
|
||||
arena := obialign.MakePEAlignArena(150, 150)
|
||||
barena1 := obialign.MakeBuildAlignArena(150, 150)
|
||||
barena2 := obialign.MakeBuildAlignArena(150, 150)
|
||||
|
||||
// log.Printf("\n==> %d Wait data to align\n", wid)
|
||||
// start := time.Now()
|
||||
for iterator.Next() {
|
||||
// elapsed := time.Since(start)
|
||||
// log.Printf("\n==>%d got data to align after %s\n", wid, elapsed)
|
||||
batch := iterator.Get()
|
||||
cons := make(obiseq.BioSequenceSlice, len(batch.Forward()))
|
||||
processed := 0
|
||||
for i, A := range batch.Forward() {
|
||||
B := batch.Reverse()[i]
|
||||
cons[i] = AssemblePESequences(A, B, 2, 5, 20, true, arena, barena1, barena2)
|
||||
if i%59 == 0 {
|
||||
bar.Add(59)
|
||||
processed += 59
|
||||
}
|
||||
}
|
||||
bar.Add(batch.Length() - processed)
|
||||
new_iter.Channel() <- obiseq.MakeBioSequenceBatch(
|
||||
batch.Order(),
|
||||
cons...,
|
||||
)
|
||||
// log.Printf("\n==> %d Wait data to align\n", wid)
|
||||
// start = time.Now()
|
||||
}
|
||||
new_iter.Done()
|
||||
}
|
||||
|
||||
log.Printf("Start of the sequence Pairing")
|
||||
|
||||
for i := 0; i < nworkers; i++ {
|
||||
go f(iterator.Split(), i)
|
||||
}
|
||||
|
||||
return new_iter
|
||||
|
||||
}
|
||||
86
pkg/obitools/obipcr/options.go
Normal file
86
pkg/obitools/obipcr/options.go
Normal file
@@ -0,0 +1,86 @@
|
||||
package obipcr
|
||||
|
||||
import (
|
||||
"log"
|
||||
|
||||
"git.metabarcoding.org/lecasofts/go/oa2/pkg/obiapat"
|
||||
"git.metabarcoding.org/lecasofts/go/oa2/pkg/obitools/obiconvert"
|
||||
"github.com/DavidGamba/go-getoptions"
|
||||
)
|
||||
|
||||
var __circular__ = false
|
||||
var __forward_primer__ string
|
||||
var __reverse_primer__ string
|
||||
var __allowed_mismatch__ = 0
|
||||
var __minimum_length__ = 0
|
||||
var __maximum_length__ = -1
|
||||
|
||||
func PCROptionSet(options *getoptions.GetOpt) {
|
||||
options.BoolVar(&__circular__, "circular", false,
|
||||
options.Alias("c"),
|
||||
options.Description("Considers that sequences are [c]ircular."))
|
||||
|
||||
options.StringVar(&__forward_primer__, "forward", "",
|
||||
options.Required("You must provide a forward primer"),
|
||||
options.Description("The forward primer used for the electronic PCR."))
|
||||
|
||||
options.StringVar(&__reverse_primer__, "reverse", "",
|
||||
options.Required("You must provide a reverse primer"),
|
||||
options.Description("The reverse primer used for the electronic PCR."))
|
||||
|
||||
options.IntVar(&__allowed_mismatch__, "allowed-mismatches", 0,
|
||||
options.Alias("e"),
|
||||
options.Description("Maximum number of mismatches allowed for each primer."))
|
||||
|
||||
options.IntVar(&__minimum_length__, "min-length", 0,
|
||||
options.Alias("l"),
|
||||
options.Description("Minimum length of the barcode (primers excluded)."))
|
||||
options.IntVar(&__maximum_length__, "max-length", -1,
|
||||
options.Alias("L"),
|
||||
options.Description("Maximum length of the barcode (primers excluded)."))
|
||||
}
|
||||
|
||||
func OptionSet(options *getoptions.GetOpt) {
|
||||
obiconvert.OptionSet(options)
|
||||
PCROptionSet(options)
|
||||
}
|
||||
|
||||
func ForwardPrimer() string {
|
||||
pattern, err := obiapat.MakeApatPattern(__forward_primer__, __allowed_mismatch__)
|
||||
|
||||
if err != nil {
|
||||
log.Fatalf("%+v", err)
|
||||
}
|
||||
|
||||
pattern.Free()
|
||||
|
||||
return __forward_primer__
|
||||
}
|
||||
|
||||
func ReversePrimer() string {
|
||||
pattern, err := obiapat.MakeApatPattern(__reverse_primer__, __allowed_mismatch__)
|
||||
|
||||
if err != nil {
|
||||
log.Fatalf("%+v", err)
|
||||
}
|
||||
|
||||
pattern.Free()
|
||||
|
||||
return __reverse_primer__
|
||||
}
|
||||
|
||||
func AllowedMismatch() int {
|
||||
return __allowed_mismatch__
|
||||
}
|
||||
|
||||
func Circular() bool {
|
||||
return __circular__
|
||||
}
|
||||
|
||||
func MinLength() int {
|
||||
return __minimum_length__
|
||||
}
|
||||
|
||||
func MaxLength() int {
|
||||
return __maximum_length__
|
||||
}
|
||||
32
pkg/obitools/obipcr/pcr.go
Normal file
32
pkg/obitools/obipcr/pcr.go
Normal file
@@ -0,0 +1,32 @@
|
||||
package obipcr
|
||||
|
||||
import (
|
||||
"git.metabarcoding.org/lecasofts/go/oa2/pkg/obiapat"
|
||||
"git.metabarcoding.org/lecasofts/go/oa2/pkg/obiseq"
|
||||
)
|
||||
|
||||
func PCR(iterator obiseq.IBioSequenceBatch) (obiseq.IBioSequence, error) {
|
||||
|
||||
forward := ForwardPrimer()
|
||||
reverse := ReversePrimer()
|
||||
opts := make([]obiapat.WithOption, 0, 10)
|
||||
|
||||
opts = append(opts, obiapat.OptionForwardError(AllowedMismatch()),
|
||||
obiapat.OptionReverseError(AllowedMismatch()))
|
||||
|
||||
if MinLength() > 0 {
|
||||
opts = append(opts, obiapat.OptionMinLength(MinLength()))
|
||||
}
|
||||
|
||||
if MaxLength() > 0 {
|
||||
opts = append(opts, obiapat.OptionMaxLength(MaxLength()))
|
||||
}
|
||||
|
||||
if Circular() {
|
||||
opts = append(opts, obiapat.OptionCircular(Circular()))
|
||||
}
|
||||
|
||||
worker := obiapat.PCRSliceWorker(forward, reverse, opts...)
|
||||
|
||||
return iterator.MakeISliceWorker(worker).IBioSequence(), nil
|
||||
}
|
||||
Reference in New Issue
Block a user