adds a directory option to obidistribute

This commit is contained in:
2023-02-17 22:52:53 +01:00
parent 9554a32490
commit d04eb915b3
6 changed files with 136 additions and 16 deletions

View File

@ -1,7 +1,10 @@
package obiformats package obiformats
import ( import (
"encoding/json"
"fmt" "fmt"
"os"
"path/filepath"
"strings" "strings"
"sync" "sync"
@ -33,11 +36,35 @@ func WriterDispatcher(prototypename string,
log.Fatalf("Cannot retreive the new chanel : %v", err) log.Fatalf("Cannot retreive the new chanel : %v", err)
} }
name:=fmt.Sprintf(prototypename, dispatcher.Classifier().Value(newflux)) key := dispatcher.Classifier().Value(newflux)
directory := ""
if dispatcher.Classifier().Type == "DualAnnotationClassifier" {
var keys [2]string
err := json.Unmarshal([]byte(key), &keys)
if err != nil {
log.Fatalf("Error in parsing dispatch key %s", key)
}
key = keys[0]
directory = keys[1]
}
name := fmt.Sprintf(prototypename, key)
if opt.CompressedFile() && !strings.HasSuffix(name, ".gz") { if opt.CompressedFile() && !strings.HasSuffix(name, ".gz") {
name = name + ".gz" name = name + ".gz"
} }
if directory != "" {
info, err := os.Stat(directory)
switch {
case !os.IsNotExist(err) && !info.IsDir():
log.Fatalln("Cannot Create the directory %s", directory)
case os.IsNotExist(err):
os.Mkdir(directory, 0755)
}
name = filepath.Join(directory, name)
}
out, err := formater(data, out, err := formater(data,
name, name,
options...) options...)

View File

@ -89,8 +89,6 @@ func WriteFasta(iterator obiiter.IBioSequence,
} }
close(chunkchan) close(chunkchan)
waitWriter.Wait() waitWriter.Wait()
obiiter.UnregisterPipe()
log.Debugln("End of the fasta file writing")
}() }()
ff := func(iterator obiiter.IBioSequence) { ff := func(iterator obiiter.IBioSequence) {
@ -143,6 +141,9 @@ func WriteFasta(iterator obiiter.IBioSequence,
file.Close() file.Close()
} }
} }
log.Debugln("End of the fasta file writing")
obiiter.UnregisterPipe()
waitWriter.Done() waitWriter.Done()
}() }()

View File

@ -80,8 +80,6 @@ func WriteFastq(iterator obiiter.IBioSequence,
} }
close(chunkchan) close(chunkchan)
waitWriter.Wait() waitWriter.Wait()
obiiter.UnregisterPipe()
log.Debugln("End of the fastq file writing")
}() }()
ff := func(iterator obiiter.IBioSequence) { ff := func(iterator obiiter.IBioSequence) {
@ -134,6 +132,8 @@ func WriteFastq(iterator obiiter.IBioSequence,
} }
} }
log.Debugln("End of the fastq file writing")
obiiter.UnregisterPipe()
waitWriter.Done() waitWriter.Done()
}() }()

View File

@ -78,7 +78,7 @@ func ReadSequencesFromFile(filename string,
} }
filetype := GuessSeqFileType(string(tag)) filetype := GuessSeqFileType(string(tag))
log.Debug("File guessed format : %s (tag: %s)", log.Debugf("File guessed format : %s (tag: %s)",
filetype, (strings.Split(string(tag), "\n"))[0]) filetype, (strings.Split(string(tag), "\n"))[0])
reader = breader reader = breader

View File

@ -6,6 +6,7 @@ import (
"strconv" "strconv"
"sync" "sync"
"git.metabarcoding.org/lecasofts/go/obitools/pkg/goutils"
log "github.com/sirupsen/logrus" log "github.com/sirupsen/logrus"
) )
@ -24,6 +25,7 @@ type BioSequenceClassifier struct {
Value func(int) string Value func(int) string
Reset func() Reset func()
Clone func() *BioSequenceClassifier Clone func() *BioSequenceClassifier
Type string
} }
// It creates a classifier that returns the value of the annotation key as an integer. If the // It creates a classifier that returns the value of the annotation key as an integer. If the
@ -88,7 +90,90 @@ func AnnotationClassifier(key string, na string) *BioSequenceClassifier {
return AnnotationClassifier(key, na) return AnnotationClassifier(key, na)
} }
c := BioSequenceClassifier{code, value, reset, clone} c := BioSequenceClassifier{code, value, reset, clone,"AnnotationClassifier"}
return &c
}
// It creates a classifier that returns the value of the annotation key as an integer. If the
// annotation key is not present, it returns the integer value of the string na
func DualAnnotationClassifier(key1, key2 string, na string) *BioSequenceClassifier {
encode := make(map[string]int, 1000)
decode := make([]string, 0, 1000)
locke := sync.RWMutex{}
maxcode := 0
code := func(sequence *BioSequence) int {
var val1 = na
var val2 = ""
var ok bool
if sequence.HasAnnotation() {
value, ok := sequence.Annotations()[key1]
if ok {
switch value := value.(type) {
case string:
val1 = value
default:
val1 = fmt.Sprint(value)
}
}
if key2 != "" {
value, ok := sequence.Annotations()[key2]
if ok {
switch value := value.(type) {
case string:
val2 = value
default:
val2 = fmt.Sprint(value)
}
} else {
val2=na
}
}
}
locke.Lock()
defer locke.Unlock()
jb, _ := goutils.JsonMarshal([2]string{val1, val2})
json := string(jb)
k, ok := encode[json]
if !ok {
k = maxcode
maxcode++
encode[json] = k
decode = append(decode, json)
}
return k
}
value := func(k int) string {
locke.RLock()
defer locke.RUnlock()
if k >= maxcode {
log.Fatalf("value %d not register")
}
return decode[k]
}
reset := func() {
locke.Lock()
defer locke.Unlock()
for k := range encode {
delete(encode, k)
}
decode = decode[:0]
}
clone := func() *BioSequenceClassifier {
return DualAnnotationClassifier(key1, key2, na)
}
c := BioSequenceClassifier{code, value, reset, clone,"DualAnnotationClassifier"}
return &c return &c
} }
@ -121,7 +206,7 @@ func PredicateClassifier(predicate SequencePredicate) *BioSequenceClassifier {
return PredicateClassifier(predicate) return PredicateClassifier(predicate)
} }
c := BioSequenceClassifier{code, value, reset, clone} c := BioSequenceClassifier{code, value, reset, clone,"PredicateClassifier"}
return &c return &c
} }
@ -143,7 +228,7 @@ func HashClassifier(size int) *BioSequenceClassifier {
return HashClassifier(size) return HashClassifier(size)
} }
c := BioSequenceClassifier{code, value, reset, clone} c := BioSequenceClassifier{code, value, reset, clone,"HashClassifier"}
return &c return &c
} }
@ -198,7 +283,7 @@ func SequenceClassifier() *BioSequenceClassifier {
return SequenceClassifier() return SequenceClassifier()
} }
c := BioSequenceClassifier{code, value, reset, clone} c := BioSequenceClassifier{code, value, reset, clone,"SequenceClassifier"}
return &c return &c
} }
@ -228,6 +313,6 @@ func RotateClassifier(size int) *BioSequenceClassifier {
return RotateClassifier(size) return RotateClassifier(size)
} }
c := BioSequenceClassifier{code, value, reset, clone} c := BioSequenceClassifier{code, value, reset, clone,"RotateClassifier"}
return &c return &c
} }

View File

@ -13,13 +13,13 @@ import (
var _FilenamePattern = "" var _FilenamePattern = ""
var _SequenceClassifierTag = "" var _SequenceClassifierTag = ""
var _DirectoryTag = ""
var _BatchCount = 0 var _BatchCount = 0
var _HashSize = 0 var _HashSize = 0
var _NAValue = "NA" var _NAValue = "NA"
var _append = false var _append = false
var _compressed = false var _compressed = false
func DistributeOptionSet(options *getoptions.GetOpt) { func DistributeOptionSet(options *getoptions.GetOpt) {
options.StringVar(&_FilenamePattern, "pattern", _FilenamePattern, options.StringVar(&_FilenamePattern, "pattern", _FilenamePattern,
options.Alias("p"), options.Alias("p"),
@ -30,10 +30,17 @@ func DistributeOptionSet(options *getoptions.GetOpt) {
options.StringVar(&_SequenceClassifierTag, "classifier", _SequenceClassifierTag, options.StringVar(&_SequenceClassifierTag, "classifier", _SequenceClassifierTag,
options.Alias("c"), options.Alias("c"),
options.Description("The name of a tag annotating thes sequences. "+ options.Description("The name of a tag annotating the sequences. "+
"The name must corresponds to a string, a integer or a boolean value. "+ "The name must corresponds to a string, a integer or a boolean value. "+
"That value will be used to dispatch sequences amoong the different files")) "That value will be used to dispatch sequences amoong the different files"))
options.StringVar(&_DirectoryTag, "directory", _DirectoryTag,
options.Alias("d"),
options.Description("The name of a tag annotating the sequences. "+
"The name must corresponds to a string, a integer or a boolean value. "+
"That value will be used to dispatch sequences amoong the different directory " +
"in conjunction with the -c|--classifier options"))
options.StringVar(&_NAValue, "na-value", _NAValue, options.StringVar(&_NAValue, "na-value", _NAValue,
options.Description("Value used when the classifier tag is not defined for a sequence.")) options.Description("Value used when the classifier tag is not defined for a sequence."))
@ -71,7 +78,7 @@ func CLICompressed() bool {
func CLISequenceClassifier() *obiseq.BioSequenceClassifier { func CLISequenceClassifier() *obiseq.BioSequenceClassifier {
switch { switch {
case _SequenceClassifierTag != "": case _SequenceClassifierTag != "":
return obiseq.AnnotationClassifier(_SequenceClassifierTag, _NAValue) return obiseq.DualAnnotationClassifier(_SequenceClassifierTag,_DirectoryTag, _NAValue)
case _BatchCount > 0: case _BatchCount > 0:
return obiseq.RotateClassifier(_BatchCount) return obiseq.RotateClassifier(_BatchCount)
case _HashSize > 0: case _HashSize > 0: