Adds the ability to read gzip-tar file for the taxonomy dump

2026-02-03 06:40:33 +00:00 · 2025-01-24 11:47:59 +01:00
parent ffd67252c3
commit 3137c1f841
17 changed files with 305 additions and 64 deletions
--- a/pkg/obiformats/csv_read.go
+++ b/pkg/obiformats/csv_read.go
@@ -157,9 +157,9 @@ func ReadCSV(reader io.Reader, options ...WithOption) (obiiter.IBioSequence, err
 func ReadCSVFromFile(filename string, options ...WithOption) (obiiter.IBioSequence, error) {

 	options = append(options, OptionsSource(obiutils.RemoveAllExt((path.Base(filename)))))
-	file, err := Ropen(filename)
+	file, err := obiutils.Ropen(filename)

-	if err == ErrNoContent {
+	if err == obiutils.ErrNoContent {
 		log.Infof("file %s is empty", filename)
 		return ReadEmptyFile(options...)
 	}
@@ -173,9 +173,9 @@ func ReadCSVFromFile(filename string, options ...WithOption) (obiiter.IBioSequen

 func ReadCSVFromStdin(reader io.Reader, options ...WithOption) (obiiter.IBioSequence, error) {
 	options = append(options, OptionsSource(obiutils.RemoveAllExt("stdin")))
-	input, err := Buf(os.Stdin)
+	input, err := obiutils.Buf(os.Stdin)

-	if err == ErrNoContent {
+	if err == obiutils.ErrNoContent {
 		log.Infof("stdin is empty")
 		return ReadEmptyFile(options...)
 	}
--- a/pkg/obiformats/embl_read.go
+++ b/pkg/obiformats/embl_read.go
@@ -227,9 +227,9 @@ func ReadEMBLFromFile(filename string, options ...WithOption) (obiiter.IBioSeque

 	options = append(options, OptionsSource(obiutils.RemoveAllExt((path.Base(filename)))))

-	reader, err = Ropen(filename)
+	reader, err = obiutils.Ropen(filename)

-	if err == ErrNoContent {
+	if err == obiutils.ErrNoContent {
 		log.Infof("file %s is empty", filename)
 		return ReadEmptyFile(options...)
 	}
--- a/pkg/obiformats/fastaseq_read.go
+++ b/pkg/obiformats/fastaseq_read.go
@@ -271,9 +271,9 @@ func ReadFasta(reader io.Reader, options ...WithOption) (obiiter.IBioSequence, e
 func ReadFastaFromFile(filename string, options ...WithOption) (obiiter.IBioSequence, error) {
 	options = append(options, OptionsSource(obiutils.RemoveAllExt((path.Base(filename)))))

-	file, err := Ropen(filename)
+	file, err := obiutils.Ropen(filename)

-	if err == ErrNoContent {
+	if err == obiutils.ErrNoContent {
 		log.Infof("file %s is empty", filename)
 		return ReadEmptyFile(options...)
 	}
@@ -287,9 +287,9 @@ func ReadFastaFromFile(filename string, options ...WithOption) (obiiter.IBioSequ

 func ReadFastaFromStdin(reader io.Reader, options ...WithOption) (obiiter.IBioSequence, error) {
 	options = append(options, OptionsSource("stdin"))
-	input, err := Buf(os.Stdin)
+	input, err := obiutils.Buf(os.Stdin)

-	if err == ErrNoContent {
+	if err == obiutils.ErrNoContent {
 		log.Infof("stdin is empty")
 		return ReadEmptyFile(options...)
 	}
--- a/pkg/obiformats/fastqseq_read.go
+++ b/pkg/obiformats/fastqseq_read.go
@@ -370,9 +370,9 @@ func ReadFastq(reader io.Reader, options ...WithOption) (obiiter.IBioSequence, e
 func ReadFastqFromFile(filename string, options ...WithOption) (obiiter.IBioSequence, error) {
 	options = append(options, OptionsSource(obiutils.RemoveAllExt((path.Base(filename)))))

-	file, err := Ropen(filename)
+	file, err := obiutils.Ropen(filename)

-	if err == ErrNoContent {
+	if err == obiutils.ErrNoContent {
 		log.Infof("file %s is empty", filename)
 		return ReadEmptyFile(options...)
 	}
@@ -386,9 +386,9 @@ func ReadFastqFromFile(filename string, options ...WithOption) (obiiter.IBioSequ

 func ReadFastqFromStdin(reader io.Reader, options ...WithOption) (obiiter.IBioSequence, error) {
 	options = append(options, OptionsSource(obiutils.RemoveAllExt("stdin")))
-	input, err := Buf(os.Stdin)
+	input, err := obiutils.Buf(os.Stdin)

-	if err == ErrNoContent {
+	if err == obiutils.ErrNoContent {
 		log.Infof("stdin is empty")
 		return ReadEmptyFile(options...)
 	}
--- a/pkg/obiformats/genbank_read.go
+++ b/pkg/obiformats/genbank_read.go
@@ -266,9 +266,9 @@ func ReadGenbankFromFile(filename string, options ...WithOption) (obiiter.IBioSe

 	options = append(options, OptionsSource(obiutils.RemoveAllExt((path.Base(filename)))))

-	reader, err = Ropen(filename)
+	reader, err = obiutils.Ropen(filename)

-	if err == ErrNoContent {
+	if err == obiutils.ErrNoContent {
 		log.Infof("file %s is empty", filename)
 		return ReadEmptyFile(options...)
 	}
--- a/pkg/obiformats/ncbitaxdump/read.go
+++ b/pkg/obiformats/ncbitaxdump/read.go
@@ -1,204 +0,0 @@
-package ncbitaxdump
-
-import (
-	"bufio"
-	"encoding/csv"
-	"fmt"
-	"io"
-	"os"
-	"path"
-	"strings"
-
-	log "github.com/sirupsen/logrus"
-
-	"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obitax"
-	"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obiutils"
-)
-
-// loadNodeTable reads a node table from the provided reader and populates the given taxonomy.
-// It is an internal function and should not be called directly. It is part of the NCBI taxdump reader.
-// The node table is expected to be in CSV format with a custom delimiter ('|') and comments
-// starting with '#'. Each record in the table represents a taxon with its taxid, parent taxid,
-// and rank.
-//
-// Parameters:
-//   - reader: An io.Reader from which the node table is read.
-//   - taxonomy: A pointer to an obitax.Taxonomy instance where the taxon data will be added.
-//
-// The function reads each record from the input, trims whitespace from the taxid, parent, and rank,
-// and adds the taxon to the taxonomy. If an error occurs while adding a taxon, the function logs
-// a fatal error and terminates the program.
-func loadNodeTable(reader io.Reader, taxonomy *obitax.Taxonomy) {
-	file := csv.NewReader(reader)
-	file.Comma = '|'
-	file.Comment = '#'
-	file.TrimLeadingSpace = true
-	file.ReuseRecord = true
-
-	n := 0
-
-	for record, err := file.Read(); err == nil; record, err = file.Read() {
-		n++
-		taxid := strings.TrimSpace(record[0])
-		parent := strings.TrimSpace(record[1])
-		rank := strings.TrimSpace(record[2])
-
-		_, err := taxonomy.AddTaxon(taxid, parent, rank, taxid == "1", false)
-
-		if err != nil {
-			log.Fatalf("Error adding taxon %s: %v\n", taxid, err)
-		}
-	}
-}
-
-// loadNameTable reads a name table from the provided reader and populates the given taxonomy.
-// It is an internal function and should not be called directly. It is part of the NCBI taxdump reader.
-// The name table is expected to be in a custom format with fields separated by the '|' character.
-// Each record in the table represents a taxon with its taxid, name, and class name.
-//
-// Parameters:
-//   - reader: An io.Reader from which the name table is read.
-//   - taxonomy: A pointer to an obitax.Taxonomy instance where the taxon names will be set.
-//   - onlysn: A boolean flag indicating whether to only process records with the class name "scientific name".
-//
-// Returns:
-//
-//	The number of taxon names successfully loaded into the taxonomy. If a line is too long, -1 is returned.
-//	The function processes each line, trims whitespace from the taxid, name, and class name, and sets
-//	the name in the taxonomy if the conditions are met.
-func loadNameTable(reader io.Reader, taxonomy *obitax.Taxonomy, onlysn bool) int {
-	// file := csv.NewReader(reader)
-	// file.Comma = '|'
-	// file.Comment = '#'
-	// file.TrimLeadingSpace = true
-	// file.ReuseRecord = true
-	// file.LazyQuotes = true
-	file := bufio.NewReader(reader)
-
-	n := 0
-	l := 0
-
-	for line, prefix, err := file.ReadLine(); err == nil; line, prefix, err = file.ReadLine() {
-		l++
-		if prefix {
-			return -1
-		}
-
-		record := strings.Split(string(line), "|")
-		taxid := strings.TrimSpace(record[0])
-
-		name := strings.TrimSpace(record[1])
-		classname := strings.TrimSpace(record[3])
-
-		if !onlysn || classname == "scientific name" {
-			n++
-			taxonomy.Taxon(taxid).SetName(name, classname)
-		}
-	}
-
-	return n
-}
-
-// loadMergedTable reads a merged table from the provided reader and populates the given taxonomy.
-// It is an internal function and should not be called directly. It is part of the NCBI taxdump reader.
-// The merged table is expected to be in CSV format with a custom delimiter ('|') and comments
-// starting with '#'. Each record in the table represents a mapping between an old taxid and a new taxid.
-//
-// Parameters:
-//   - reader: An io.Reader from which the merged table is read.
-//   - taxonomy: A pointer to an obitax.Taxonomy instance where the alias mappings will be added.
-//
-// Returns:
-//
-//	The number of alias mappings successfully loaded into the taxonomy. The function processes
-//	each record, trims whitespace from the old and new taxid, and adds the alias to the taxonomy.
-func loadMergedTable(reader io.Reader, taxonomy *obitax.Taxonomy) int {
-	file := csv.NewReader(reader)
-	file.Comma = '|'
-	file.Comment = '#'
-	file.TrimLeadingSpace = true
-	file.ReuseRecord = true
-
-	n := 0
-
-	for record, err := file.Read(); err == nil; record, err = file.Read() {
-		n++
-		oldtaxid := strings.TrimSpace(record[0])
-		newtaxid := strings.TrimSpace(record[1])
-
-		taxonomy.AddAlias(newtaxid, oldtaxid, false)
-	}
-
-	return n
-}
-
-// LoadNCBITaxDump loads the NCBI taxonomy data from the specified directory.
-// It reads the taxonomy nodes, taxon names, and merged taxa from the corresponding files
-// and constructs a Taxonomy object.
-//
-// Parameters:
-//   - directory: A string representing the path to the directory containing the NCBI taxonomy dump files.
-//   - onlysn: A boolean indicating whether to load only scientific names (true) or all names (false).
-//
-// Returns:
-//   - A pointer to the obitax.Taxonomy object containing the loaded taxonomy data, or an error
-//     if any of the files cannot be opened or read.
-func LoadNCBITaxDump(directory string, onlysn bool) (*obitax.Taxonomy, error) {
-
-	taxonomy := obitax.NewTaxonomy("NCBI Taxonomy", "taxon", obiutils.AsciiDigitSet)
-
-	//
-	// Load the Taxonomy nodes
-	//
-
-	log.Printf("Loading Taxonomy nodes\n")
-
-	nodefile, err := os.Open(path.Join(directory, "nodes.dmp"))
-	if err != nil {
-		return nil, fmt.Errorf("cannot open nodes file from '%s'",
-			directory)
-	}
-	defer nodefile.Close()
-
-	buffered := bufio.NewReader(nodefile)
-	loadNodeTable(buffered, taxonomy)
-	log.Printf("%d Taxonomy nodes read\n", taxonomy.Len())
-
-	//
-	// Load the Taxonomy nodes
-	//
-
-	log.Printf("Loading Taxon names\n")
-
-	namefile, nerr := os.Open(path.Join(directory, "names.dmp"))
-	if nerr != nil {
-		return nil, fmt.Errorf("cannot open names file from '%s'",
-			directory)
-	}
-	defer namefile.Close()
-
-	n := loadNameTable(namefile, taxonomy, onlysn)
-	log.Printf("%d taxon names read\n", n)
-
-	//
-	// Load the merged taxa
-	//
-
-	log.Printf("Loading Merged taxa\n")
-
-	aliasfile, aerr := os.Open(path.Join(directory, "merged.dmp"))
-	if aerr != nil {
-		return nil, fmt.Errorf("cannot open merged file from '%s'",
-			directory)
-	}
-	defer aliasfile.Close()
-
-	buffered = bufio.NewReader(aliasfile)
-	n = loadMergedTable(buffered, taxonomy)
-	log.Printf("%d merged taxa read\n", n)
-
-	root := taxonomy.Taxon("1")
-	taxonomy.SetRoot(root)
-
-	return taxonomy, nil
-}
--- a/pkg/obiformats/universal_read.go
+++ b/pkg/obiformats/universal_read.go
@@ -172,15 +172,15 @@ func OBIMimeTypeGuesser(stream io.Reader) (*mimetype.MIME, io.Reader, error) {
 // - error: An error if any occurred during the reading process.
 func ReadSequencesFromFile(filename string,
 	options ...WithOption) (obiiter.IBioSequence, error) {
-	var file *Reader
+	var file *obiutils.Reader
 	var reader io.Reader
 	var err error

 	options = append(options, OptionsSource(obiutils.RemoveAllExt((path.Base(filename)))))

-	file, err = Ropen(filename)
+	file, err = obiutils.Ropen(filename)

-	if err == ErrNoContent {
+	if err == obiutils.ErrNoContent {
 		log.Infof("file %s is empty", filename)
 		return ReadEmptyFile(options...)
 	}
--- a/pkg/obiformats/xopen.go
+++ b/pkg/obiformats/xopen.go
@@ -1,437 +0,0 @@
-// This is an integration of the xopen package originally written by Brent Pedersen
-// (https://github.com/brentp/xopen).
-//
-// Here it can be considered as a fork of [Wei Shen](http://shenwei.me) the version :
-//
-//	https://github.com/shenwei356/xopen
-//
-// Package xopen makes it easy to get buffered readers and writers.
-// Ropen opens a (possibly gzipped) file/process/http site for buffered reading.
-// Wopen opens a (possibly gzipped) file for buffered writing.
-// Both will use gzip when appropriate and will user buffered IO.
-package obiformats
-
-import (
-	"bufio"
-	"errors"
-	"fmt"
-	"io"
-	"net/http"
-	"os"
-	"os/exec"
-	"os/user"
-	"path/filepath"
-	"strings"
-
-	"github.com/dsnet/compress/bzip2"
-	"github.com/klauspost/compress/zstd"
-	gzip "github.com/klauspost/pgzip"
-	"github.com/ulikunitz/xz"
-)
-
-// Level is the default compression level of gzip.
-// This value will be automatically adjusted to the default value of zstd or bzip2.
-var Level = gzip.DefaultCompression
-
-// ErrNoContent means nothing in the stream/file.
-var ErrNoContent = errors.New("xopen: no content")
-
-// ErrDirNotSupported means the path is a directory.
-var ErrDirNotSupported = errors.New("xopen: input is a directory")
-
-// IsGzip returns true buffered Reader has the gzip magic.
-func IsGzip(b *bufio.Reader) (bool, error) {
-	return CheckBytes(b, []byte{0x1f, 0x8b})
-}
-
-// IsXz returns true buffered Reader has the xz magic.
-func IsXz(b *bufio.Reader) (bool, error) {
-	return CheckBytes(b, []byte{0xfd, 0x37, 0x7a, 0x58, 0x5a, 0x00})
-}
-
-// IsZst returns true buffered Reader has the zstd magic.
-func IsZst(b *bufio.Reader) (bool, error) {
-	return CheckBytes(b, []byte{0x28, 0xB5, 0x2f, 0xfd})
-}
-
-// IsBzip2 returns true buffered Reader has the bzip2 magic.
-func IsBzip2(b *bufio.Reader) (bool, error) {
-	return CheckBytes(b, []byte{0x42, 0x5a, 0x68})
-}
-
-// IsStdin checks if we are getting data from stdin.
-func IsStdin() bool {
-	// http://stackoverflow.com/a/26567513
-	stat, err := os.Stdin.Stat()
-	if err != nil {
-		return false
-	}
-	return (stat.Mode() & os.ModeCharDevice) == 0
-}
-
-// ExpandUser expands ~/path and ~otheruser/path appropriately
-func ExpandUser(path string) (string, error) {
-	if len(path) == 0 || path[0] != '~' {
-		return path, nil
-	}
-	var u *user.User
-	var err error
-	if len(path) == 1 || path[1] == '/' {
-		u, err = user.Current()
-	} else {
-		name := strings.Split(path[1:], "/")[0]
-		u, err = user.Lookup(name)
-	}
-	if err != nil {
-		return "", err
-	}
-	home := u.HomeDir
-	path = home + "/" + path[1:]
-	return path, nil
-}
-
-// Exists checks if a local file exits
-func Exists(path string) bool {
-	path, perr := ExpandUser(path)
-	if perr != nil {
-		return false
-	}
-	_, err := os.Stat(path)
-	return err == nil
-}
-
-// CheckBytes peeks at a buffered stream and checks if the first read bytes match.
-func CheckBytes(b *bufio.Reader, buf []byte) (bool, error) {
-
-	m, err := b.Peek(len(buf))
-	if err != nil {
-		// return false, ErrNoContent
-		return false, err // EOF
-	}
-	for i := range buf {
-		if m[i] != buf[i] {
-			return false, nil
-		}
-	}
-	return true, nil
-}
-
-// Reader is returned by Ropen
-type Reader struct {
-	*bufio.Reader
-	rdr io.Reader
-	gz  io.ReadCloser
-}
-
-// Close the associated files.
-func (r *Reader) Close() error {
-	var err error
-	if r.gz != nil {
-		err = r.gz.Close()
-		if err != nil {
-			return err
-		}
-	}
-	if c, ok := r.rdr.(io.ReadCloser); ok {
-		err = c.Close()
-		if err != nil {
-			return err
-		}
-	}
-	return nil
-}
-
-// Writer is returned by Wopen
-type Writer struct {
-	*bufio.Writer
-	wtr *os.File
-	gz  *gzip.Writer
-	xw  *xz.Writer
-	zw  *zstd.Encoder
-	bz2 *bzip2.Writer
-}
-
-// Close the associated files.
-func (w *Writer) Close() error {
-	var err error
-	err = w.Flush()
-	if err != nil {
-		return err
-	}
-
-	if w.gz != nil {
-		err = w.gz.Close()
-		if err != nil {
-			return err
-		}
-	}
-	if w.xw != nil {
-		err = w.xw.Close()
-		if err != nil {
-			return err
-		}
-	}
-	if w.zw != nil {
-		err = w.zw.Close()
-		if err != nil {
-			return err
-		}
-	}
-	if w.bz2 != nil {
-		err = w.bz2.Close()
-		if err != nil {
-			return err
-		}
-	}
-	return w.wtr.Close()
-}
-
-// Flush the writer.
-func (w *Writer) Flush() error {
-	var err error
-	err = w.Writer.Flush()
-	if err != nil {
-		return err
-	}
-
-	if w.gz != nil {
-		err = w.gz.Flush()
-		if err != nil {
-			return err
-		}
-	}
-	if w.zw != nil {
-		err = w.zw.Flush()
-		if err != nil {
-			return err
-		}
-	}
-	return nil
-}
-
-var bufSize = 65536
-
-// Buf returns a buffered reader from an io.Reader
-// If f == "-", then it will attempt to read from os.Stdin.
-// If the file is gzipped, it will be read as such.
-func Buf(r io.Reader) (*Reader, error) {
-	b := bufio.NewReaderSize(r, bufSize)
-	var rd io.Reader
-	var rdr io.ReadCloser
-
-	if is, err := IsGzip(b); err != nil {
-		// check BOM
-		t, _, err := b.ReadRune() // no content
-		if err != nil {
-			return nil, ErrNoContent
-		}
-		if t != '\uFEFF' {
-			b.UnreadRune()
-		}
-		return &Reader{b, r, rdr}, nil // non-gzip file with content less than 2 bytes
-	} else if is {
-		rdr, err = gzip.NewReader(b)
-		if err != nil {
-			return nil, err
-		}
-		b = bufio.NewReaderSize(rdr, bufSize)
-	} else if is, err := IsZst(b); err != nil {
-		// check BOM
-		t, _, err := b.ReadRune() // no content
-		if err != nil {
-			return nil, ErrNoContent
-		}
-		if t != '\uFEFF' {
-			b.UnreadRune()
-		}
-		return &Reader{b, r, rdr}, nil // non-gzip/zst file with content less than 4 bytes
-	} else if is {
-		rd, err = zstd.NewReader(b)
-		if err != nil {
-			return nil, err
-		}
-		b = bufio.NewReaderSize(rd, bufSize)
-	} else if is, err := IsXz(b); err != nil {
-		// check BOM
-		t, _, err := b.ReadRune() // no content
-		if err != nil {
-			return nil, ErrNoContent
-		}
-		if t != '\uFEFF' {
-			b.UnreadRune()
-		}
-		return &Reader{b, r, rdr}, nil // non-gzip/zst/xz file with content less than 6 bytes
-	} else if is {
-		rd, err = xz.NewReader(b)
-		if err != nil {
-			return nil, err
-		}
-		b = bufio.NewReaderSize(rd, bufSize)
-	} else if is, err := IsBzip2(b); err != nil {
-		// check BOM
-		t, _, err := b.ReadRune() // no content
-		if err != nil {
-			return nil, ErrNoContent
-		}
-		if t != '\uFEFF' {
-			b.UnreadRune()
-		}
-		return &Reader{b, r, rdr}, nil // non-gzip/zst/xz file with content less than 6 bytes
-	} else if is {
-		rd, err = bzip2.NewReader(b, &bzip2.ReaderConfig{})
-		if err != nil {
-			return nil, err
-		}
-		b = bufio.NewReaderSize(rd, bufSize)
-	}
-
-	// other files with content >= 6 bytes
-
-	// check BOM
-	t, _, err := b.ReadRune()
-	if err != nil {
-		return nil, ErrNoContent
-	}
-	if t != '\uFEFF' {
-		b.UnreadRune()
-	}
-	return &Reader{b, r, rdr}, nil
-}
-
-// XReader returns a reader from a url string or a file.
-func XReader(f string) (io.Reader, error) {
-	if strings.HasPrefix(f, "http://") || strings.HasPrefix(f, "https://") {
-		var rsp *http.Response
-		rsp, err := http.Get(f)
-		if err != nil {
-			return nil, err
-		}
-		if rsp.StatusCode != 200 {
-			return nil, fmt.Errorf("http error downloading %s. status: %s", f, rsp.Status)
-		}
-		rdr := rsp.Body
-		return rdr, nil
-	}
-	f, err := ExpandUser(f)
-	if err != nil {
-		return nil, err
-	}
-
-	fi, err := os.Stat(f)
-	if err != nil {
-		return nil, err
-	}
-	if fi.IsDir() {
-		return nil, ErrDirNotSupported
-	}
-
-	return os.Open(f)
-}
-
-// Ropen opens a buffered reader.
-func Ropen(f string) (*Reader, error) {
-	var err error
-	var rdr io.Reader
-	if f == "-" {
-		if !IsStdin() {
-			return nil, errors.New("stdin not detected")
-		}
-		b, err := Buf(os.Stdin)
-		return b, err
-	} else if f[0] == '|' {
-		// TODO: use csv to handle quoted file names.
-		cmdStrs := strings.Split(f[1:], " ")
-		var cmd *exec.Cmd
-		if len(cmdStrs) == 2 {
-			cmd = exec.Command(cmdStrs[0], cmdStrs[1:]...)
-		} else {
-			cmd = exec.Command(cmdStrs[0])
-		}
-		rdr, err = cmd.StdoutPipe()
-		if err != nil {
-			return nil, err
-		}
-		err = cmd.Start()
-		if err != nil {
-			return nil, err
-		}
-	} else {
-		rdr, err = XReader(f)
-	}
-	if err != nil {
-		return nil, err
-	}
-	b, err := Buf(rdr)
-	return b, err
-}
-
-// Wopen opens a buffered reader.
-// If f == "-", then stdout will be used.
-// If f endswith ".gz", then the output will be gzipped.
-// If f endswith ".xz", then the output will be zx-compressed.
-// If f endswith ".zst", then the output will be zstd-compressed.
-// If f endswith ".bz2", then the output will be bzip2-compressed.
-func Wopen(f string) (*Writer, error) {
-	return WopenFile(f, os.O_RDWR|os.O_CREATE|os.O_TRUNC, 0666)
-}
-
-// WopenFile opens a buffered reader.
-// If f == "-", then stdout will be used.
-// If f endswith ".gz", then the output will be gzipped.
-// If f endswith ".xz", then the output will be zx-compressed.
-// If f endswith ".bz2", then the output will be bzip2-compressed.
-func WopenFile(f string, flag int, perm os.FileMode) (*Writer, error) {
-	var wtr *os.File
-	if f == "-" {
-		wtr = os.Stdout
-	} else {
-		dir := filepath.Dir(f)
-		fi, err := os.Stat(dir)
-		if err == nil && !fi.IsDir() {
-			return nil, fmt.Errorf("can not write file into a non-directory path: %s", dir)
-		}
-		if os.IsNotExist(err) {
-			os.MkdirAll(dir, 0755)
-		}
-		wtr, err = os.OpenFile(f, flag, perm)
-		if err != nil {
-			return nil, err
-		}
-	}
-
-	f2 := strings.ToLower(f)
-	if strings.HasSuffix(f2, ".gz") {
-		gz, err := gzip.NewWriterLevel(wtr, Level)
-		if err != nil {
-			err = fmt.Errorf("xopen: %s", err)
-		}
-		return &Writer{bufio.NewWriterSize(gz, bufSize), wtr, gz, nil, nil, nil}, err
-	}
-	if strings.HasSuffix(f2, ".xz") {
-		xw, err := xz.NewWriter(wtr)
-		return &Writer{bufio.NewWriterSize(xw, bufSize), wtr, nil, xw, nil, nil}, err
-	}
-	if strings.HasSuffix(f2, ".zst") {
-		level := Level
-		if level == gzip.DefaultCompression {
-			level = 2
-		}
-		zw, err := zstd.NewWriter(wtr, zstd.WithEncoderLevel(zstd.EncoderLevel(level)))
-		if err != nil {
-			err = fmt.Errorf("xopen: zstd: %s", err)
-		}
-		return &Writer{bufio.NewWriterSize(zw, bufSize), wtr, nil, nil, zw, nil}, err
-	}
-	if strings.HasSuffix(f2, ".bz2") {
-		level := Level
-		if level == gzip.DefaultCompression {
-			level = 6
-		}
-		bz2, err := bzip2.NewWriter(wtr, &bzip2.WriterConfig{Level: level})
-		if err != nil {
-			err = fmt.Errorf("xopen: %s", err)
-		}
-		return &Writer{bufio.NewWriterSize(bz2, bufSize), wtr, nil, nil, nil, bz2}, err
-	}
-	return &Writer{bufio.NewWriterSize(wtr, bufSize), wtr, nil, nil, nil, nil}, nil
-}
--- a/pkg/obiformats/xopen_test.go
+++ b/pkg/obiformats/xopen_test.go
@@ -1,148 +0,0 @@
-package obiformats
-
-import (
-	"bufio"
-	"bytes"
-	"compress/gzip"
-	"fmt"
-	"io"
-	"os"
-	"strings"
-	"testing"
-
-	. "gopkg.in/check.v1"
-)
-
-func Test(t *testing.T) { TestingT(t) }
-
-type XopenTest struct{}
-
-var _ = Suite(&XopenTest{})
-
-func gzFromString(s string) string {
-	var c bytes.Buffer
-	gz := gzip.NewWriter(&c)
-	gz.Write([]byte(s))
-	return c.String()
-}
-
-var gzTests = []struct {
-	isGz bool
-	data string
-}{
-	{false, "asdf"},
-	{true, gzFromString("asdf")},
-}
-
-func (s *XopenTest) TestIsGzip(c *C) {
-	for _, t := range gzTests {
-		isGz, err := IsGzip(bufio.NewReader(strings.NewReader(t.data)))
-		c.Assert(err, IsNil)
-		c.Assert(t.isGz, Equals, isGz)
-	}
-}
-
-func (s *XopenTest) TestIsStdin(c *C) {
-	r := IsStdin()
-	c.Assert(r, Equals, false)
-}
-
-func (s *XopenTest) TestRopen(c *C) {
-	rdr, err := Ropen("-")
-	c.Assert(err, ErrorMatches, "stdin not detected")
-	c.Assert(rdr, IsNil)
-}
-
-func (s *XopenTest) TestWopen(c *C) {
-	for _, f := range []string{"t.gz", "t"} {
-		testString := "ASDF1234"
-		wtr, err := Wopen(f)
-		c.Assert(err, IsNil)
-		_, err = os.Stat(f)
-		c.Assert(err, IsNil)
-		c.Assert(wtr.wtr, NotNil)
-		fmt.Fprint(wtr, testString)
-		wtr.Close()
-
-		rdr, err := Ropen(f)
-		c.Assert(err, IsNil)
-
-		str, err := rdr.ReadString(99)
-		c.Assert(str, Equals, testString)
-		c.Assert(err, Equals, io.EOF)
-		str, _ = rdr.ReadString(99)
-		c.Assert(str, Equals, "")
-
-		rdr.Close()
-		os.Remove(f)
-	}
-}
-
-var httpTests = []struct {
-	url         string
-	expectError bool
-}{
-	{"https://raw.githubusercontent.com/brentp/xopen/master/README.md", false},
-	{"http://raw.githubusercontent.com/brentp/xopen/master/README.md", false},
-	{"http://raw.githubusercontent.com/brentp/xopen/master/BAD.md", true},
-}
-
-func (s *XopenTest) TestReadHttp(c *C) {
-	for _, t := range httpTests {
-		rdr, err := Ropen(t.url)
-		if !t.expectError {
-			c.Assert(err, IsNil)
-			v, err := rdr.ReadString(byte('\n'))
-			c.Assert(err, IsNil)
-			c.Assert(len(v), Not(Equals), 0)
-		} else {
-			c.Assert(err, ErrorMatches, ".* 404 Not Found")
-		}
-	}
-}
-
-// func (s *XopenTest) TestReadProcess(c *C) {
-// 	for _, cmd := range []string{"|ls -lh", "|ls", "|ls -lh xopen_test.go"} {
-// 		rdr, err := Ropen(cmd)
-// 		c.Assert(err, IsNil)
-// 		b := make([]byte, 1000)
-// 		_, err = rdr.Read(b)
-// 		if err != io.EOF {
-// 			c.Assert(err, IsNil)
-// 		}
-// 		lines := strings.Split(string(b), "\n")
-// 		has := false
-// 		for _, line := range lines {
-// 			if strings.Contains(line, "xopen_test.go") {
-// 				has = true
-// 			}
-// 		}
-// 		c.Assert(has, Equals, true)
-// 	}
-// }
-
-func (s *XopenTest) TestOpenStdout(c *C) {
-	w, err := Wopen("-")
-	c.Assert(err, IsNil)
-	c.Assert(w.wtr, Equals, os.Stdout)
-}
-
-func (s *XopenTest) TestOpenBadFile(c *C) {
-	r, err := Ropen("XXXXXXXXXXXXXXXXXXXXXXX")
-	c.Assert(r, IsNil)
-	c.Assert(err, ErrorMatches, ".*no such file.*")
-}
-
-func (s *XopenTest) TestExists(c *C) {
-	c.Assert(Exists("xopen.go"), Equals, true)
-	c.Assert(Exists("____xx"), Equals, false)
-}
-
-func (s *XopenTest) TestUser(c *C) {
-	c.Assert(Exists("~"), Equals, true)
-}
-
-func (s *XopenTest) TestExpand(c *C) {
-	_, err := ExpandUser("~baduser66")
-	c.Assert(err, Not(IsNil))
-}