mirror of
https://github.com/metabarcoding/obitools4.git
synced 2025-06-29 16:20:46 +00:00
438 lines
9.9 KiB
Go
438 lines
9.9 KiB
Go
// This is an integration of the xopen package originally written by Brent Pedersen
|
|
// (https://github.com/brentp/xopen).
|
|
//
|
|
// Here it can be considered as a fork of [Wei Shen](http://shenwei.me) the version :
|
|
//
|
|
// https://github.com/shenwei356/xopen
|
|
//
|
|
// Package xopen makes it easy to get buffered readers and writers.
|
|
// Ropen opens a (possibly gzipped) file/process/http site for buffered reading.
|
|
// Wopen opens a (possibly gzipped) file for buffered writing.
|
|
// Both will use gzip when appropriate and will user buffered IO.
|
|
package obiutils
|
|
|
|
import (
|
|
"bufio"
|
|
"errors"
|
|
"fmt"
|
|
"io"
|
|
"net/http"
|
|
"os"
|
|
"os/exec"
|
|
"os/user"
|
|
"path/filepath"
|
|
"strings"
|
|
|
|
"github.com/dsnet/compress/bzip2"
|
|
"github.com/klauspost/compress/zstd"
|
|
gzip "github.com/klauspost/pgzip"
|
|
"github.com/ulikunitz/xz"
|
|
)
|
|
|
|
// Level is the default compression level of gzip.
|
|
// This value will be automatically adjusted to the default value of zstd or bzip2.
|
|
var Level = gzip.DefaultCompression
|
|
|
|
// ErrNoContent means nothing in the stream/file.
|
|
var ErrNoContent = errors.New("xopen: no content")
|
|
|
|
// ErrDirNotSupported means the path is a directory.
|
|
var ErrDirNotSupported = errors.New("xopen: input is a directory")
|
|
|
|
// IsGzip returns true buffered Reader has the gzip magic.
|
|
func IsGzip(b *bufio.Reader) (bool, error) {
|
|
return CheckBytes(b, []byte{0x1f, 0x8b})
|
|
}
|
|
|
|
// IsXz returns true buffered Reader has the xz magic.
|
|
func IsXz(b *bufio.Reader) (bool, error) {
|
|
return CheckBytes(b, []byte{0xfd, 0x37, 0x7a, 0x58, 0x5a, 0x00})
|
|
}
|
|
|
|
// IsZst returns true buffered Reader has the zstd magic.
|
|
func IsZst(b *bufio.Reader) (bool, error) {
|
|
return CheckBytes(b, []byte{0x28, 0xB5, 0x2f, 0xfd})
|
|
}
|
|
|
|
// IsBzip2 returns true buffered Reader has the bzip2 magic.
|
|
func IsBzip2(b *bufio.Reader) (bool, error) {
|
|
return CheckBytes(b, []byte{0x42, 0x5a, 0x68})
|
|
}
|
|
|
|
// IsStdin checks if we are getting data from stdin.
|
|
func IsStdin() bool {
|
|
// http://stackoverflow.com/a/26567513
|
|
stat, err := os.Stdin.Stat()
|
|
if err != nil {
|
|
return false
|
|
}
|
|
return (stat.Mode() & os.ModeCharDevice) == 0
|
|
}
|
|
|
|
// ExpandUser expands ~/path and ~otheruser/path appropriately
|
|
func ExpandUser(path string) (string, error) {
|
|
if len(path) == 0 || path[0] != '~' {
|
|
return path, nil
|
|
}
|
|
var u *user.User
|
|
var err error
|
|
if len(path) == 1 || path[1] == '/' {
|
|
u, err = user.Current()
|
|
} else {
|
|
name := strings.Split(path[1:], "/")[0]
|
|
u, err = user.Lookup(name)
|
|
}
|
|
if err != nil {
|
|
return "", err
|
|
}
|
|
home := u.HomeDir
|
|
path = home + "/" + path[1:]
|
|
return path, nil
|
|
}
|
|
|
|
// Exists checks if a local file exits
|
|
func Exists(path string) bool {
|
|
path, perr := ExpandUser(path)
|
|
if perr != nil {
|
|
return false
|
|
}
|
|
_, err := os.Stat(path)
|
|
return err == nil
|
|
}
|
|
|
|
// CheckBytes peeks at a buffered stream and checks if the first read bytes match.
|
|
func CheckBytes(b *bufio.Reader, buf []byte) (bool, error) {
|
|
|
|
m, err := b.Peek(len(buf))
|
|
if err != nil {
|
|
// return false, ErrNoContent
|
|
return false, err // EOF
|
|
}
|
|
for i := range buf {
|
|
if m[i] != buf[i] {
|
|
return false, nil
|
|
}
|
|
}
|
|
return true, nil
|
|
}
|
|
|
|
// Reader is returned by Ropen
|
|
type Reader struct {
|
|
*bufio.Reader
|
|
rdr io.Reader
|
|
gz io.ReadCloser
|
|
}
|
|
|
|
// Close the associated files.
|
|
func (r *Reader) Close() error {
|
|
var err error
|
|
if r.gz != nil {
|
|
err = r.gz.Close()
|
|
if err != nil {
|
|
return err
|
|
}
|
|
}
|
|
if c, ok := r.rdr.(io.ReadCloser); ok {
|
|
err = c.Close()
|
|
if err != nil {
|
|
return err
|
|
}
|
|
}
|
|
return nil
|
|
}
|
|
|
|
// Writer is returned by Wopen
|
|
type Writer struct {
|
|
*bufio.Writer
|
|
wtr *os.File
|
|
gz *gzip.Writer
|
|
xw *xz.Writer
|
|
zw *zstd.Encoder
|
|
bz2 *bzip2.Writer
|
|
}
|
|
|
|
// Close the associated files.
|
|
func (w *Writer) Close() error {
|
|
var err error
|
|
err = w.Flush()
|
|
if err != nil {
|
|
return err
|
|
}
|
|
|
|
if w.gz != nil {
|
|
err = w.gz.Close()
|
|
if err != nil {
|
|
return err
|
|
}
|
|
}
|
|
if w.xw != nil {
|
|
err = w.xw.Close()
|
|
if err != nil {
|
|
return err
|
|
}
|
|
}
|
|
if w.zw != nil {
|
|
err = w.zw.Close()
|
|
if err != nil {
|
|
return err
|
|
}
|
|
}
|
|
if w.bz2 != nil {
|
|
err = w.bz2.Close()
|
|
if err != nil {
|
|
return err
|
|
}
|
|
}
|
|
return w.wtr.Close()
|
|
}
|
|
|
|
// Flush the writer.
|
|
func (w *Writer) Flush() error {
|
|
var err error
|
|
err = w.Writer.Flush()
|
|
if err != nil {
|
|
return err
|
|
}
|
|
|
|
if w.gz != nil {
|
|
err = w.gz.Flush()
|
|
if err != nil {
|
|
return err
|
|
}
|
|
}
|
|
if w.zw != nil {
|
|
err = w.zw.Flush()
|
|
if err != nil {
|
|
return err
|
|
}
|
|
}
|
|
return nil
|
|
}
|
|
|
|
var bufSize = 65536
|
|
|
|
// Buf returns a buffered reader from an io.Reader
|
|
// If f == "-", then it will attempt to read from os.Stdin.
|
|
// If the file is gzipped, it will be read as such.
|
|
func Buf(r io.Reader) (*Reader, error) {
|
|
b := bufio.NewReaderSize(r, bufSize)
|
|
var rd io.Reader
|
|
var rdr io.ReadCloser
|
|
|
|
if is, err := IsGzip(b); err != nil {
|
|
// check BOM
|
|
t, _, err := b.ReadRune() // no content
|
|
if err != nil {
|
|
return nil, ErrNoContent
|
|
}
|
|
if t != '\uFEFF' {
|
|
b.UnreadRune()
|
|
}
|
|
return &Reader{b, r, rdr}, nil // non-gzip file with content less than 2 bytes
|
|
} else if is {
|
|
rdr, err = gzip.NewReader(b)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
b = bufio.NewReaderSize(rdr, bufSize)
|
|
} else if is, err := IsZst(b); err != nil {
|
|
// check BOM
|
|
t, _, err := b.ReadRune() // no content
|
|
if err != nil {
|
|
return nil, ErrNoContent
|
|
}
|
|
if t != '\uFEFF' {
|
|
b.UnreadRune()
|
|
}
|
|
return &Reader{b, r, rdr}, nil // non-gzip/zst file with content less than 4 bytes
|
|
} else if is {
|
|
rd, err = zstd.NewReader(b)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
b = bufio.NewReaderSize(rd, bufSize)
|
|
} else if is, err := IsXz(b); err != nil {
|
|
// check BOM
|
|
t, _, err := b.ReadRune() // no content
|
|
if err != nil {
|
|
return nil, ErrNoContent
|
|
}
|
|
if t != '\uFEFF' {
|
|
b.UnreadRune()
|
|
}
|
|
return &Reader{b, r, rdr}, nil // non-gzip/zst/xz file with content less than 6 bytes
|
|
} else if is {
|
|
rd, err = xz.NewReader(b)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
b = bufio.NewReaderSize(rd, bufSize)
|
|
} else if is, err := IsBzip2(b); err != nil {
|
|
// check BOM
|
|
t, _, err := b.ReadRune() // no content
|
|
if err != nil {
|
|
return nil, ErrNoContent
|
|
}
|
|
if t != '\uFEFF' {
|
|
b.UnreadRune()
|
|
}
|
|
return &Reader{b, r, rdr}, nil // non-gzip/zst/xz file with content less than 6 bytes
|
|
} else if is {
|
|
rd, err = bzip2.NewReader(b, &bzip2.ReaderConfig{})
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
b = bufio.NewReaderSize(rd, bufSize)
|
|
}
|
|
|
|
// other files with content >= 6 bytes
|
|
|
|
// check BOM
|
|
t, _, err := b.ReadRune()
|
|
if err != nil {
|
|
return nil, ErrNoContent
|
|
}
|
|
if t != '\uFEFF' {
|
|
b.UnreadRune()
|
|
}
|
|
return &Reader{b, r, rdr}, nil
|
|
}
|
|
|
|
// XReader returns a reader from a url string or a file.
|
|
func XReader(f string) (io.Reader, error) {
|
|
if strings.HasPrefix(f, "http://") || strings.HasPrefix(f, "https://") {
|
|
var rsp *http.Response
|
|
rsp, err := http.Get(f)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
if rsp.StatusCode != 200 {
|
|
return nil, fmt.Errorf("http error downloading %s. status: %s", f, rsp.Status)
|
|
}
|
|
rdr := rsp.Body
|
|
return rdr, nil
|
|
}
|
|
f, err := ExpandUser(f)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
|
|
fi, err := os.Stat(f)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
if fi.IsDir() {
|
|
return nil, ErrDirNotSupported
|
|
}
|
|
|
|
return os.Open(f)
|
|
}
|
|
|
|
// Ropen opens a buffered reader.
|
|
func Ropen(f string) (*Reader, error) {
|
|
var err error
|
|
var rdr io.Reader
|
|
if f == "-" {
|
|
if !IsStdin() {
|
|
return nil, errors.New("stdin not detected")
|
|
}
|
|
b, err := Buf(os.Stdin)
|
|
return b, err
|
|
} else if f[0] == '|' {
|
|
// TODO: use csv to handle quoted file names.
|
|
cmdStrs := strings.Split(f[1:], " ")
|
|
var cmd *exec.Cmd
|
|
if len(cmdStrs) == 2 {
|
|
cmd = exec.Command(cmdStrs[0], cmdStrs[1:]...)
|
|
} else {
|
|
cmd = exec.Command(cmdStrs[0])
|
|
}
|
|
rdr, err = cmd.StdoutPipe()
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
err = cmd.Start()
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
} else {
|
|
rdr, err = XReader(f)
|
|
}
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
b, err := Buf(rdr)
|
|
return b, err
|
|
}
|
|
|
|
// Wopen opens a buffered reader.
|
|
// If f == "-", then stdout will be used.
|
|
// If f endswith ".gz", then the output will be gzipped.
|
|
// If f endswith ".xz", then the output will be zx-compressed.
|
|
// If f endswith ".zst", then the output will be zstd-compressed.
|
|
// If f endswith ".bz2", then the output will be bzip2-compressed.
|
|
func Wopen(f string) (*Writer, error) {
|
|
return WopenFile(f, os.O_RDWR|os.O_CREATE|os.O_TRUNC, 0666)
|
|
}
|
|
|
|
// WopenFile opens a buffered reader.
|
|
// If f == "-", then stdout will be used.
|
|
// If f endswith ".gz", then the output will be gzipped.
|
|
// If f endswith ".xz", then the output will be zx-compressed.
|
|
// If f endswith ".bz2", then the output will be bzip2-compressed.
|
|
func WopenFile(f string, flag int, perm os.FileMode) (*Writer, error) {
|
|
var wtr *os.File
|
|
if f == "-" {
|
|
wtr = os.Stdout
|
|
} else {
|
|
dir := filepath.Dir(f)
|
|
fi, err := os.Stat(dir)
|
|
if err == nil && !fi.IsDir() {
|
|
return nil, fmt.Errorf("can not write file into a non-directory path: %s", dir)
|
|
}
|
|
if os.IsNotExist(err) {
|
|
os.MkdirAll(dir, 0755)
|
|
}
|
|
wtr, err = os.OpenFile(f, flag, perm)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
}
|
|
|
|
f2 := strings.ToLower(f)
|
|
if strings.HasSuffix(f2, ".gz") {
|
|
gz, err := gzip.NewWriterLevel(wtr, Level)
|
|
if err != nil {
|
|
err = fmt.Errorf("xopen: %s", err)
|
|
}
|
|
return &Writer{bufio.NewWriterSize(gz, bufSize), wtr, gz, nil, nil, nil}, err
|
|
}
|
|
if strings.HasSuffix(f2, ".xz") {
|
|
xw, err := xz.NewWriter(wtr)
|
|
return &Writer{bufio.NewWriterSize(xw, bufSize), wtr, nil, xw, nil, nil}, err
|
|
}
|
|
if strings.HasSuffix(f2, ".zst") {
|
|
level := Level
|
|
if level == gzip.DefaultCompression {
|
|
level = 2
|
|
}
|
|
zw, err := zstd.NewWriter(wtr, zstd.WithEncoderLevel(zstd.EncoderLevel(level)))
|
|
if err != nil {
|
|
err = fmt.Errorf("xopen: zstd: %s", err)
|
|
}
|
|
return &Writer{bufio.NewWriterSize(zw, bufSize), wtr, nil, nil, zw, nil}, err
|
|
}
|
|
if strings.HasSuffix(f2, ".bz2") {
|
|
level := Level
|
|
if level == gzip.DefaultCompression {
|
|
level = 6
|
|
}
|
|
bz2, err := bzip2.NewWriter(wtr, &bzip2.WriterConfig{Level: level})
|
|
if err != nil {
|
|
err = fmt.Errorf("xopen: %s", err)
|
|
}
|
|
return &Writer{bufio.NewWriterSize(bz2, bufSize), wtr, nil, nil, nil, bz2}, err
|
|
}
|
|
return &Writer{bufio.NewWriterSize(wtr, bufSize), wtr, nil, nil, nil, nil}, nil
|
|
}
|