mirror of
https://github.com/metabarcoding/obitools4.git
synced 2025-06-29 16:20:46 +00:00
adds a directory option to obidistribute
This commit is contained in:
@ -1,7 +1,10 @@
|
|||||||
package obiformats
|
package obiformats
|
||||||
|
|
||||||
import (
|
import (
|
||||||
|
"encoding/json"
|
||||||
"fmt"
|
"fmt"
|
||||||
|
"os"
|
||||||
|
"path/filepath"
|
||||||
"strings"
|
"strings"
|
||||||
"sync"
|
"sync"
|
||||||
|
|
||||||
@ -33,11 +36,35 @@ func WriterDispatcher(prototypename string,
|
|||||||
log.Fatalf("Cannot retreive the new chanel : %v", err)
|
log.Fatalf("Cannot retreive the new chanel : %v", err)
|
||||||
}
|
}
|
||||||
|
|
||||||
name:=fmt.Sprintf(prototypename, dispatcher.Classifier().Value(newflux))
|
key := dispatcher.Classifier().Value(newflux)
|
||||||
|
directory := ""
|
||||||
|
if dispatcher.Classifier().Type == "DualAnnotationClassifier" {
|
||||||
|
var keys [2]string
|
||||||
|
err := json.Unmarshal([]byte(key), &keys)
|
||||||
|
if err != nil {
|
||||||
|
log.Fatalf("Error in parsing dispatch key %s", key)
|
||||||
|
}
|
||||||
|
key = keys[0]
|
||||||
|
directory = keys[1]
|
||||||
|
}
|
||||||
|
|
||||||
|
name := fmt.Sprintf(prototypename, key)
|
||||||
if opt.CompressedFile() && !strings.HasSuffix(name, ".gz") {
|
if opt.CompressedFile() && !strings.HasSuffix(name, ".gz") {
|
||||||
name = name + ".gz"
|
name = name + ".gz"
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if directory != "" {
|
||||||
|
info, err := os.Stat(directory)
|
||||||
|
switch {
|
||||||
|
case !os.IsNotExist(err) && !info.IsDir():
|
||||||
|
log.Fatalln("Cannot Create the directory %s", directory)
|
||||||
|
case os.IsNotExist(err):
|
||||||
|
os.Mkdir(directory, 0755)
|
||||||
|
}
|
||||||
|
|
||||||
|
name = filepath.Join(directory, name)
|
||||||
|
}
|
||||||
|
|
||||||
out, err := formater(data,
|
out, err := formater(data,
|
||||||
name,
|
name,
|
||||||
options...)
|
options...)
|
||||||
|
@ -89,8 +89,6 @@ func WriteFasta(iterator obiiter.IBioSequence,
|
|||||||
}
|
}
|
||||||
close(chunkchan)
|
close(chunkchan)
|
||||||
waitWriter.Wait()
|
waitWriter.Wait()
|
||||||
obiiter.UnregisterPipe()
|
|
||||||
log.Debugln("End of the fasta file writing")
|
|
||||||
}()
|
}()
|
||||||
|
|
||||||
ff := func(iterator obiiter.IBioSequence) {
|
ff := func(iterator obiiter.IBioSequence) {
|
||||||
@ -143,6 +141,9 @@ func WriteFasta(iterator obiiter.IBioSequence,
|
|||||||
file.Close()
|
file.Close()
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
log.Debugln("End of the fasta file writing")
|
||||||
|
obiiter.UnregisterPipe()
|
||||||
waitWriter.Done()
|
waitWriter.Done()
|
||||||
|
|
||||||
}()
|
}()
|
||||||
|
@ -80,8 +80,6 @@ func WriteFastq(iterator obiiter.IBioSequence,
|
|||||||
}
|
}
|
||||||
close(chunkchan)
|
close(chunkchan)
|
||||||
waitWriter.Wait()
|
waitWriter.Wait()
|
||||||
obiiter.UnregisterPipe()
|
|
||||||
log.Debugln("End of the fastq file writing")
|
|
||||||
}()
|
}()
|
||||||
|
|
||||||
ff := func(iterator obiiter.IBioSequence) {
|
ff := func(iterator obiiter.IBioSequence) {
|
||||||
@ -134,6 +132,8 @@ func WriteFastq(iterator obiiter.IBioSequence,
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
log.Debugln("End of the fastq file writing")
|
||||||
|
obiiter.UnregisterPipe()
|
||||||
waitWriter.Done()
|
waitWriter.Done()
|
||||||
}()
|
}()
|
||||||
|
|
||||||
|
@ -78,7 +78,7 @@ func ReadSequencesFromFile(filename string,
|
|||||||
}
|
}
|
||||||
|
|
||||||
filetype := GuessSeqFileType(string(tag))
|
filetype := GuessSeqFileType(string(tag))
|
||||||
log.Debug("File guessed format : %s (tag: %s)",
|
log.Debugf("File guessed format : %s (tag: %s)",
|
||||||
filetype, (strings.Split(string(tag), "\n"))[0])
|
filetype, (strings.Split(string(tag), "\n"))[0])
|
||||||
reader = breader
|
reader = breader
|
||||||
|
|
||||||
|
@ -6,6 +6,7 @@ import (
|
|||||||
"strconv"
|
"strconv"
|
||||||
"sync"
|
"sync"
|
||||||
|
|
||||||
|
"git.metabarcoding.org/lecasofts/go/obitools/pkg/goutils"
|
||||||
log "github.com/sirupsen/logrus"
|
log "github.com/sirupsen/logrus"
|
||||||
)
|
)
|
||||||
|
|
||||||
@ -24,6 +25,7 @@ type BioSequenceClassifier struct {
|
|||||||
Value func(int) string
|
Value func(int) string
|
||||||
Reset func()
|
Reset func()
|
||||||
Clone func() *BioSequenceClassifier
|
Clone func() *BioSequenceClassifier
|
||||||
|
Type string
|
||||||
}
|
}
|
||||||
|
|
||||||
// It creates a classifier that returns the value of the annotation key as an integer. If the
|
// It creates a classifier that returns the value of the annotation key as an integer. If the
|
||||||
@ -88,7 +90,90 @@ func AnnotationClassifier(key string, na string) *BioSequenceClassifier {
|
|||||||
return AnnotationClassifier(key, na)
|
return AnnotationClassifier(key, na)
|
||||||
}
|
}
|
||||||
|
|
||||||
c := BioSequenceClassifier{code, value, reset, clone}
|
c := BioSequenceClassifier{code, value, reset, clone,"AnnotationClassifier"}
|
||||||
|
return &c
|
||||||
|
}
|
||||||
|
|
||||||
|
// It creates a classifier that returns the value of the annotation key as an integer. If the
|
||||||
|
// annotation key is not present, it returns the integer value of the string na
|
||||||
|
func DualAnnotationClassifier(key1, key2 string, na string) *BioSequenceClassifier {
|
||||||
|
encode := make(map[string]int, 1000)
|
||||||
|
decode := make([]string, 0, 1000)
|
||||||
|
locke := sync.RWMutex{}
|
||||||
|
maxcode := 0
|
||||||
|
|
||||||
|
code := func(sequence *BioSequence) int {
|
||||||
|
var val1 = na
|
||||||
|
var val2 = ""
|
||||||
|
var ok bool
|
||||||
|
if sequence.HasAnnotation() {
|
||||||
|
value, ok := sequence.Annotations()[key1]
|
||||||
|
if ok {
|
||||||
|
switch value := value.(type) {
|
||||||
|
case string:
|
||||||
|
val1 = value
|
||||||
|
default:
|
||||||
|
val1 = fmt.Sprint(value)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if key2 != "" {
|
||||||
|
value, ok := sequence.Annotations()[key2]
|
||||||
|
if ok {
|
||||||
|
switch value := value.(type) {
|
||||||
|
case string:
|
||||||
|
val2 = value
|
||||||
|
default:
|
||||||
|
val2 = fmt.Sprint(value)
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
val2=na
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
locke.Lock()
|
||||||
|
defer locke.Unlock()
|
||||||
|
|
||||||
|
jb, _ := goutils.JsonMarshal([2]string{val1, val2})
|
||||||
|
json := string(jb)
|
||||||
|
k, ok := encode[json]
|
||||||
|
|
||||||
|
if !ok {
|
||||||
|
k = maxcode
|
||||||
|
maxcode++
|
||||||
|
encode[json] = k
|
||||||
|
decode = append(decode, json)
|
||||||
|
}
|
||||||
|
|
||||||
|
return k
|
||||||
|
}
|
||||||
|
|
||||||
|
value := func(k int) string {
|
||||||
|
|
||||||
|
locke.RLock()
|
||||||
|
defer locke.RUnlock()
|
||||||
|
if k >= maxcode {
|
||||||
|
log.Fatalf("value %d not register")
|
||||||
|
}
|
||||||
|
return decode[k]
|
||||||
|
}
|
||||||
|
|
||||||
|
reset := func() {
|
||||||
|
locke.Lock()
|
||||||
|
defer locke.Unlock()
|
||||||
|
|
||||||
|
for k := range encode {
|
||||||
|
delete(encode, k)
|
||||||
|
}
|
||||||
|
decode = decode[:0]
|
||||||
|
}
|
||||||
|
|
||||||
|
clone := func() *BioSequenceClassifier {
|
||||||
|
return DualAnnotationClassifier(key1, key2, na)
|
||||||
|
}
|
||||||
|
|
||||||
|
c := BioSequenceClassifier{code, value, reset, clone,"DualAnnotationClassifier"}
|
||||||
return &c
|
return &c
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -121,7 +206,7 @@ func PredicateClassifier(predicate SequencePredicate) *BioSequenceClassifier {
|
|||||||
return PredicateClassifier(predicate)
|
return PredicateClassifier(predicate)
|
||||||
}
|
}
|
||||||
|
|
||||||
c := BioSequenceClassifier{code, value, reset, clone}
|
c := BioSequenceClassifier{code, value, reset, clone,"PredicateClassifier"}
|
||||||
return &c
|
return &c
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -143,7 +228,7 @@ func HashClassifier(size int) *BioSequenceClassifier {
|
|||||||
return HashClassifier(size)
|
return HashClassifier(size)
|
||||||
}
|
}
|
||||||
|
|
||||||
c := BioSequenceClassifier{code, value, reset, clone}
|
c := BioSequenceClassifier{code, value, reset, clone,"HashClassifier"}
|
||||||
return &c
|
return &c
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -198,7 +283,7 @@ func SequenceClassifier() *BioSequenceClassifier {
|
|||||||
return SequenceClassifier()
|
return SequenceClassifier()
|
||||||
}
|
}
|
||||||
|
|
||||||
c := BioSequenceClassifier{code, value, reset, clone}
|
c := BioSequenceClassifier{code, value, reset, clone,"SequenceClassifier"}
|
||||||
return &c
|
return &c
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -228,6 +313,6 @@ func RotateClassifier(size int) *BioSequenceClassifier {
|
|||||||
return RotateClassifier(size)
|
return RotateClassifier(size)
|
||||||
}
|
}
|
||||||
|
|
||||||
c := BioSequenceClassifier{code, value, reset, clone}
|
c := BioSequenceClassifier{code, value, reset, clone,"RotateClassifier"}
|
||||||
return &c
|
return &c
|
||||||
}
|
}
|
||||||
|
@ -13,13 +13,13 @@ import (
|
|||||||
|
|
||||||
var _FilenamePattern = ""
|
var _FilenamePattern = ""
|
||||||
var _SequenceClassifierTag = ""
|
var _SequenceClassifierTag = ""
|
||||||
|
var _DirectoryTag = ""
|
||||||
var _BatchCount = 0
|
var _BatchCount = 0
|
||||||
var _HashSize = 0
|
var _HashSize = 0
|
||||||
var _NAValue = "NA"
|
var _NAValue = "NA"
|
||||||
var _append = false
|
var _append = false
|
||||||
var _compressed = false
|
var _compressed = false
|
||||||
|
|
||||||
|
|
||||||
func DistributeOptionSet(options *getoptions.GetOpt) {
|
func DistributeOptionSet(options *getoptions.GetOpt) {
|
||||||
options.StringVar(&_FilenamePattern, "pattern", _FilenamePattern,
|
options.StringVar(&_FilenamePattern, "pattern", _FilenamePattern,
|
||||||
options.Alias("p"),
|
options.Alias("p"),
|
||||||
@ -30,10 +30,17 @@ func DistributeOptionSet(options *getoptions.GetOpt) {
|
|||||||
|
|
||||||
options.StringVar(&_SequenceClassifierTag, "classifier", _SequenceClassifierTag,
|
options.StringVar(&_SequenceClassifierTag, "classifier", _SequenceClassifierTag,
|
||||||
options.Alias("c"),
|
options.Alias("c"),
|
||||||
options.Description("The name of a tag annotating thes sequences. "+
|
options.Description("The name of a tag annotating the sequences. "+
|
||||||
"The name must corresponds to a string, a integer or a boolean value. "+
|
"The name must corresponds to a string, a integer or a boolean value. "+
|
||||||
"That value will be used to dispatch sequences amoong the different files"))
|
"That value will be used to dispatch sequences amoong the different files"))
|
||||||
|
|
||||||
|
options.StringVar(&_DirectoryTag, "directory", _DirectoryTag,
|
||||||
|
options.Alias("d"),
|
||||||
|
options.Description("The name of a tag annotating the sequences. "+
|
||||||
|
"The name must corresponds to a string, a integer or a boolean value. "+
|
||||||
|
"That value will be used to dispatch sequences amoong the different directory " +
|
||||||
|
"in conjunction with the -c|--classifier options"))
|
||||||
|
|
||||||
options.StringVar(&_NAValue, "na-value", _NAValue,
|
options.StringVar(&_NAValue, "na-value", _NAValue,
|
||||||
options.Description("Value used when the classifier tag is not defined for a sequence."))
|
options.Description("Value used when the classifier tag is not defined for a sequence."))
|
||||||
|
|
||||||
@ -71,7 +78,7 @@ func CLICompressed() bool {
|
|||||||
func CLISequenceClassifier() *obiseq.BioSequenceClassifier {
|
func CLISequenceClassifier() *obiseq.BioSequenceClassifier {
|
||||||
switch {
|
switch {
|
||||||
case _SequenceClassifierTag != "":
|
case _SequenceClassifierTag != "":
|
||||||
return obiseq.AnnotationClassifier(_SequenceClassifierTag, _NAValue)
|
return obiseq.DualAnnotationClassifier(_SequenceClassifierTag,_DirectoryTag, _NAValue)
|
||||||
case _BatchCount > 0:
|
case _BatchCount > 0:
|
||||||
return obiseq.RotateClassifier(_BatchCount)
|
return obiseq.RotateClassifier(_BatchCount)
|
||||||
case _HashSize > 0:
|
case _HashSize > 0:
|
||||||
|
Reference in New Issue
Block a user