diff --git a/pkg/obiformats/dispatcher.go b/pkg/obiformats/dispatcher.go index a6a36d4..fa45b69 100644 --- a/pkg/obiformats/dispatcher.go +++ b/pkg/obiformats/dispatcher.go @@ -1,7 +1,10 @@ package obiformats import ( + "encoding/json" "fmt" + "os" + "path/filepath" "strings" "sync" @@ -33,11 +36,35 @@ func WriterDispatcher(prototypename string, log.Fatalf("Cannot retreive the new chanel : %v", err) } - name:=fmt.Sprintf(prototypename, dispatcher.Classifier().Value(newflux)) - if opt.CompressedFile() && ! strings.HasSuffix(name,".gz") { + key := dispatcher.Classifier().Value(newflux) + directory := "" + if dispatcher.Classifier().Type == "DualAnnotationClassifier" { + var keys [2]string + err := json.Unmarshal([]byte(key), &keys) + if err != nil { + log.Fatalf("Error in parsing dispatch key %s", key) + } + key = keys[0] + directory = keys[1] + } + + name := fmt.Sprintf(prototypename, key) + if opt.CompressedFile() && !strings.HasSuffix(name, ".gz") { name = name + ".gz" } - + + if directory != "" { + info, err := os.Stat(directory) + switch { + case !os.IsNotExist(err) && !info.IsDir(): + log.Fatalln("Cannot Create the directory %s", directory) + case os.IsNotExist(err): + os.Mkdir(directory, 0755) + } + + name = filepath.Join(directory, name) + } + out, err := formater(data, name, options...) diff --git a/pkg/obiformats/fastseq_write_fasta.go b/pkg/obiformats/fastseq_write_fasta.go index ac4b9bd..e79574b 100644 --- a/pkg/obiformats/fastseq_write_fasta.go +++ b/pkg/obiformats/fastseq_write_fasta.go @@ -89,8 +89,6 @@ func WriteFasta(iterator obiiter.IBioSequence, } close(chunkchan) waitWriter.Wait() - obiiter.UnregisterPipe() - log.Debugln("End of the fasta file writing") }() ff := func(iterator obiiter.IBioSequence) { @@ -143,6 +141,9 @@ func WriteFasta(iterator obiiter.IBioSequence, file.Close() } } + + log.Debugln("End of the fasta file writing") + obiiter.UnregisterPipe() waitWriter.Done() }() diff --git a/pkg/obiformats/fastseq_write_fastq.go b/pkg/obiformats/fastseq_write_fastq.go index 67b877f..7126f98 100644 --- a/pkg/obiformats/fastseq_write_fastq.go +++ b/pkg/obiformats/fastseq_write_fastq.go @@ -80,8 +80,6 @@ func WriteFastq(iterator obiiter.IBioSequence, } close(chunkchan) waitWriter.Wait() - obiiter.UnregisterPipe() - log.Debugln("End of the fastq file writing") }() ff := func(iterator obiiter.IBioSequence) { @@ -134,6 +132,8 @@ func WriteFastq(iterator obiiter.IBioSequence, } } + log.Debugln("End of the fastq file writing") + obiiter.UnregisterPipe() waitWriter.Done() }() diff --git a/pkg/obiformats/universal_read.go b/pkg/obiformats/universal_read.go index ddc4eda..70fa188 100644 --- a/pkg/obiformats/universal_read.go +++ b/pkg/obiformats/universal_read.go @@ -78,7 +78,7 @@ func ReadSequencesFromFile(filename string, } filetype := GuessSeqFileType(string(tag)) - log.Debug("File guessed format : %s (tag: %s)", + log.Debugf("File guessed format : %s (tag: %s)", filetype, (strings.Split(string(tag), "\n"))[0]) reader = breader diff --git a/pkg/obiseq/class.go b/pkg/obiseq/class.go index 8ccb95d..3608d2d 100644 --- a/pkg/obiseq/class.go +++ b/pkg/obiseq/class.go @@ -6,6 +6,7 @@ import ( "strconv" "sync" + "git.metabarcoding.org/lecasofts/go/obitools/pkg/goutils" log "github.com/sirupsen/logrus" ) @@ -24,6 +25,7 @@ type BioSequenceClassifier struct { Value func(int) string Reset func() Clone func() *BioSequenceClassifier + Type string } // It creates a classifier that returns the value of the annotation key as an integer. If the @@ -88,7 +90,90 @@ func AnnotationClassifier(key string, na string) *BioSequenceClassifier { return AnnotationClassifier(key, na) } - c := BioSequenceClassifier{code, value, reset, clone} + c := BioSequenceClassifier{code, value, reset, clone,"AnnotationClassifier"} + return &c +} + +// It creates a classifier that returns the value of the annotation key as an integer. If the +// annotation key is not present, it returns the integer value of the string na +func DualAnnotationClassifier(key1, key2 string, na string) *BioSequenceClassifier { + encode := make(map[string]int, 1000) + decode := make([]string, 0, 1000) + locke := sync.RWMutex{} + maxcode := 0 + + code := func(sequence *BioSequence) int { + var val1 = na + var val2 = "" + var ok bool + if sequence.HasAnnotation() { + value, ok := sequence.Annotations()[key1] + if ok { + switch value := value.(type) { + case string: + val1 = value + default: + val1 = fmt.Sprint(value) + } + } + + if key2 != "" { + value, ok := sequence.Annotations()[key2] + if ok { + switch value := value.(type) { + case string: + val2 = value + default: + val2 = fmt.Sprint(value) + } + } else { + val2=na + } + } + } + + locke.Lock() + defer locke.Unlock() + + jb, _ := goutils.JsonMarshal([2]string{val1, val2}) + json := string(jb) + k, ok := encode[json] + + if !ok { + k = maxcode + maxcode++ + encode[json] = k + decode = append(decode, json) + } + + return k + } + + value := func(k int) string { + + locke.RLock() + defer locke.RUnlock() + if k >= maxcode { + log.Fatalf("value %d not register") + } + return decode[k] + } + + reset := func() { + locke.Lock() + defer locke.Unlock() + + for k := range encode { + delete(encode, k) + } + decode = decode[:0] + } + + clone := func() *BioSequenceClassifier { + return DualAnnotationClassifier(key1, key2, na) + } + + c := BioSequenceClassifier{code, value, reset, clone,"DualAnnotationClassifier"} return &c } @@ -121,7 +206,7 @@ func PredicateClassifier(predicate SequencePredicate) *BioSequenceClassifier { return PredicateClassifier(predicate) } - c := BioSequenceClassifier{code, value, reset, clone} + c := BioSequenceClassifier{code, value, reset, clone,"PredicateClassifier"} return &c } @@ -143,7 +228,7 @@ func HashClassifier(size int) *BioSequenceClassifier { return HashClassifier(size) } - c := BioSequenceClassifier{code, value, reset, clone} + c := BioSequenceClassifier{code, value, reset, clone,"HashClassifier"} return &c } @@ -198,7 +283,7 @@ func SequenceClassifier() *BioSequenceClassifier { return SequenceClassifier() } - c := BioSequenceClassifier{code, value, reset, clone} + c := BioSequenceClassifier{code, value, reset, clone,"SequenceClassifier"} return &c } @@ -228,6 +313,6 @@ func RotateClassifier(size int) *BioSequenceClassifier { return RotateClassifier(size) } - c := BioSequenceClassifier{code, value, reset, clone} + c := BioSequenceClassifier{code, value, reset, clone,"RotateClassifier"} return &c } diff --git a/pkg/obitools/obidistribute/options.go b/pkg/obitools/obidistribute/options.go index acd4be9..b1b4faa 100644 --- a/pkg/obitools/obidistribute/options.go +++ b/pkg/obitools/obidistribute/options.go @@ -13,13 +13,13 @@ import ( var _FilenamePattern = "" var _SequenceClassifierTag = "" +var _DirectoryTag = "" var _BatchCount = 0 var _HashSize = 0 var _NAValue = "NA" var _append = false var _compressed = false - func DistributeOptionSet(options *getoptions.GetOpt) { options.StringVar(&_FilenamePattern, "pattern", _FilenamePattern, options.Alias("p"), @@ -30,10 +30,17 @@ func DistributeOptionSet(options *getoptions.GetOpt) { options.StringVar(&_SequenceClassifierTag, "classifier", _SequenceClassifierTag, options.Alias("c"), - options.Description("The name of a tag annotating thes sequences. "+ + options.Description("The name of a tag annotating the sequences. "+ "The name must corresponds to a string, a integer or a boolean value. "+ "That value will be used to dispatch sequences amoong the different files")) + options.StringVar(&_DirectoryTag, "directory", _DirectoryTag, + options.Alias("d"), + options.Description("The name of a tag annotating the sequences. "+ + "The name must corresponds to a string, a integer or a boolean value. "+ + "That value will be used to dispatch sequences amoong the different directory " + + "in conjunction with the -c|--classifier options")) + options.StringVar(&_NAValue, "na-value", _NAValue, options.Description("Value used when the classifier tag is not defined for a sequence.")) @@ -71,7 +78,7 @@ func CLICompressed() bool { func CLISequenceClassifier() *obiseq.BioSequenceClassifier { switch { case _SequenceClassifierTag != "": - return obiseq.AnnotationClassifier(_SequenceClassifierTag, _NAValue) + return obiseq.DualAnnotationClassifier(_SequenceClassifierTag,_DirectoryTag, _NAValue) case _BatchCount > 0: return obiseq.RotateClassifier(_BatchCount) case _HashSize > 0: