mirror of
https://github.com/metabarcoding/obitools4.git
synced 2026-03-26 05:50:52 +00:00
Compare commits
143 Commits
V4.3
...
Release_4.
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
aa819618c2 | ||
|
|
9c162459b0 | ||
|
|
25b494e562 | ||
|
|
0b5cadd104 | ||
|
|
a2106e4e82 | ||
|
|
a8a00ba0f7 | ||
|
|
1595a74ada | ||
|
|
68d723ecba | ||
|
|
250d616129 | ||
|
|
fbf816d219 | ||
|
|
7f0133a196 | ||
|
|
f798f22434 | ||
|
|
248bc9f672 | ||
|
|
7a7db703f1 | ||
|
|
da195ac5cb | ||
|
|
20a0a09f5f | ||
|
|
7d8c578c57 | ||
|
|
d7f615108f | ||
|
|
71574f240b | ||
|
|
c98501a898 | ||
|
|
23f145a4c2 | ||
|
|
fe6d74efbf | ||
|
|
cff8135468 | ||
|
|
02ab683fa0 | ||
|
|
de88e7eecd | ||
|
|
e3c41fc11b | ||
|
|
aa2e94dd6f | ||
|
|
a43e6258be | ||
|
|
12ca62b06a | ||
|
|
09ac15a76b | ||
|
|
16f72e6305 | ||
|
|
6c6c369ee2 | ||
|
|
c5dd477675 | ||
|
|
afcb43b352 | ||
|
|
b26b76cbf8 | ||
|
|
aa468ec462 | ||
|
|
00dcd78e84 | ||
|
|
60f27c1dc8 | ||
|
|
28162ac36f | ||
|
|
1a1adb83ac | ||
|
|
05de9ca58e | ||
|
|
500144051a | ||
|
|
740f66b4c7 | ||
|
|
b49aba9c09 | ||
|
|
52244cdb64 | ||
|
|
0678181023 | ||
|
|
f55dd553c7 | ||
|
|
4a383ac6c9 | ||
|
|
371e702423 | ||
|
|
ac0d3f3fe4 | ||
|
|
547135c747 | ||
|
|
f4a919732e | ||
|
|
e681666aaa | ||
|
|
adf2486295 | ||
|
|
272f5c9c35 | ||
|
|
c1b9503ca6 | ||
|
|
86e60aedd0 | ||
|
|
961abcea7b | ||
|
|
57c65f9d50 | ||
|
|
e65b2a5efe | ||
|
|
3e5f3f76b0 | ||
|
|
ccc827afd3 | ||
|
|
cef29005a5 | ||
|
|
4603d7973e | ||
|
|
8bc47c13d3 | ||
|
|
07cdd6f758 | ||
|
|
432da366e2 | ||
|
|
2d7dc7d09d | ||
|
|
5e12ed5400 | ||
|
|
7500ee1d15 | ||
|
|
5a1d66bf06 | ||
|
|
0844dcc607 | ||
|
|
7f4ebe757e | ||
|
|
5150947e23 | ||
|
|
d17a9520b9 | ||
|
|
29bf4ce871 | ||
|
|
d7ed9d343e | ||
|
|
82b6bb1ab6 | ||
|
|
6d204f6281 | ||
|
|
7a6d552450 | ||
|
|
412b54822c | ||
|
|
730d448fc3 | ||
|
|
04f3af3e60 | ||
|
|
997b6e8c01 | ||
|
|
f239e8da92 | ||
|
|
ed28d3fb5b | ||
|
|
43b285587e | ||
|
|
8d53d253d4 | ||
|
|
8c26fc9884 | ||
|
|
235a7e202a | ||
|
|
27fa984a63 | ||
|
|
add9d89ccc | ||
|
|
9965370d85 | ||
|
|
8a2bb1fe82 | ||
|
|
efc3f3af29 | ||
|
|
1c6ab1c559 | ||
|
|
38dcd98d4a | ||
|
|
7b23985693 | ||
|
|
d31e677304 | ||
|
|
6cb7a5a352 | ||
|
|
3424d3057f | ||
|
|
f9324dd8f4 | ||
|
|
f1b9ac4a13 | ||
|
|
e065e2963b | ||
|
|
13ff892ac9 | ||
|
|
c0ecaf90ab | ||
|
|
a57cfda675 | ||
|
|
c2f38e737b | ||
|
|
0aec5ba4df | ||
|
|
67e5b6ef24 | ||
|
|
3b1aa2869e | ||
|
|
7542e33010 | ||
|
|
03b5ce9397 | ||
|
|
2d52322876 | ||
|
|
fd80249b85 | ||
|
|
5a3705b6bb | ||
|
|
2ab6f67d58 | ||
|
|
8b379d30da | ||
|
|
8448783499 | ||
|
|
d1c31c54de | ||
|
|
7a9dc1ab3b | ||
|
|
3a1cf4fe97 | ||
|
|
83926c91e1 | ||
|
|
937a483aa6 | ||
|
|
dada70e6b1 | ||
|
|
62e5a93492 | ||
|
|
f21f51ae62 | ||
|
|
3b5d4ba455 | ||
|
|
50d11ce374 | ||
|
|
52d5f6fe11 | ||
|
|
78caabd2fd | ||
|
|
65bd29b955 | ||
|
|
b18c9b7ac6 | ||
|
|
78df7db18d | ||
|
|
fc08c12ab0 | ||
|
|
0339e4dffa | ||
|
|
706b44c37f | ||
|
|
fbe7d15dc3 | ||
|
|
b5cf586f17 | ||
|
|
286e27d6ba | ||
|
|
996ec69bd9 | ||
|
|
5f9182d25b | ||
|
|
9913fa8354 |
172
.github/workflows/release.yml
vendored
Normal file
172
.github/workflows/release.yml
vendored
Normal file
@@ -0,0 +1,172 @@
|
||||
name: Create Release on Tag
|
||||
|
||||
on:
|
||||
push:
|
||||
tags:
|
||||
- "Release_*"
|
||||
|
||||
permissions:
|
||||
contents: write
|
||||
|
||||
jobs:
|
||||
# First run tests
|
||||
test:
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- name: Setup Go
|
||||
uses: actions/setup-go@v5
|
||||
with:
|
||||
go-version: "1.23"
|
||||
- name: Checkout obitools4 project
|
||||
uses: actions/checkout@v4
|
||||
- name: Run tests
|
||||
run: make githubtests
|
||||
|
||||
# Build binaries for each platform
|
||||
build:
|
||||
needs: test
|
||||
strategy:
|
||||
matrix:
|
||||
include:
|
||||
- os: ubuntu-latest
|
||||
goos: linux
|
||||
goarch: amd64
|
||||
output_name: linux_amd64
|
||||
- os: ubuntu-24.04-arm
|
||||
goos: linux
|
||||
goarch: arm64
|
||||
output_name: linux_arm64
|
||||
- os: macos-15-intel
|
||||
goos: darwin
|
||||
goarch: amd64
|
||||
output_name: darwin_amd64
|
||||
- os: macos-latest
|
||||
goos: darwin
|
||||
goarch: arm64
|
||||
output_name: darwin_arm64
|
||||
|
||||
runs-on: ${{ matrix.os }}
|
||||
|
||||
steps:
|
||||
- name: Checkout code
|
||||
uses: actions/checkout@v4
|
||||
|
||||
- name: Setup Go
|
||||
uses: actions/setup-go@v5
|
||||
with:
|
||||
go-version: "1.23"
|
||||
|
||||
- name: Extract version from tag
|
||||
id: get_version
|
||||
run: |
|
||||
TAG=${GITHUB_REF#refs/tags/Release_}
|
||||
echo "version=$TAG" >> $GITHUB_OUTPUT
|
||||
|
||||
- name: Install build tools (macOS)
|
||||
if: runner.os == 'macOS'
|
||||
run: |
|
||||
# Ensure Xcode Command Line Tools are installed
|
||||
xcode-select --install 2>/dev/null || true
|
||||
xcode-select -p
|
||||
|
||||
- name: Build binaries
|
||||
env:
|
||||
GOOS: ${{ matrix.goos }}
|
||||
GOARCH: ${{ matrix.goarch }}
|
||||
VERSION: ${{ steps.get_version.outputs.version }}
|
||||
run: |
|
||||
make obitools
|
||||
mkdir -p artifacts
|
||||
# Create a single tar.gz with all binaries for this platform
|
||||
tar -czf artifacts/obitools4_${VERSION}_${{ matrix.output_name }}.tar.gz -C build .
|
||||
|
||||
- name: Upload artifacts
|
||||
uses: actions/upload-artifact@v4
|
||||
with:
|
||||
name: binaries-${{ matrix.output_name }}
|
||||
path: artifacts/*
|
||||
|
||||
# Create the release
|
||||
create-release:
|
||||
needs: build
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- name: Checkout code
|
||||
uses: actions/checkout@v4
|
||||
with:
|
||||
fetch-depth: 0
|
||||
|
||||
- name: Extract version from tag
|
||||
id: get_version
|
||||
run: |
|
||||
TAG=${GITHUB_REF#refs/tags/Release_}
|
||||
echo "version=$TAG" >> $GITHUB_OUTPUT
|
||||
|
||||
- name: Download all artifacts
|
||||
uses: actions/download-artifact@v4
|
||||
with:
|
||||
path: release-artifacts
|
||||
|
||||
- name: Prepare release directory
|
||||
run: |
|
||||
mkdir -p release
|
||||
find release-artifacts -type f -name "*.tar.gz" -exec cp {} release/ \;
|
||||
ls -lh release/
|
||||
|
||||
- name: Generate Release Notes
|
||||
env:
|
||||
VERSION: ${{ steps.get_version.outputs.version }}
|
||||
run: |
|
||||
PREV_TAG=$(git describe --tags --abbrev=0 HEAD^ 2>/dev/null || echo "")
|
||||
|
||||
echo "# OBITools4 Release ${VERSION}" > release_notes.md
|
||||
echo "" >> release_notes.md
|
||||
|
||||
if [ -n "$PREV_TAG" ]; then
|
||||
echo "## Changes since ${PREV_TAG}" >> release_notes.md
|
||||
echo "" >> release_notes.md
|
||||
git log ${PREV_TAG}..HEAD --pretty=format:"- %s" >> release_notes.md
|
||||
else
|
||||
echo "## Changes" >> release_notes.md
|
||||
echo "" >> release_notes.md
|
||||
git log --pretty=format:"- %s" -n 20 >> release_notes.md
|
||||
fi
|
||||
|
||||
echo "" >> release_notes.md
|
||||
echo "" >> release_notes.md
|
||||
echo "## Installation" >> release_notes.md
|
||||
echo "" >> release_notes.md
|
||||
echo "Download the appropriate archive for your system and extract it:" >> release_notes.md
|
||||
echo "" >> release_notes.md
|
||||
echo "### Linux (AMD64)" >> release_notes.md
|
||||
echo '```bash' >> release_notes.md
|
||||
echo "tar -xzf obitools4_${VERSION}_linux_amd64.tar.gz" >> release_notes.md
|
||||
echo '```' >> release_notes.md
|
||||
echo "" >> release_notes.md
|
||||
echo "### Linux (ARM64)" >> release_notes.md
|
||||
echo '```bash' >> release_notes.md
|
||||
echo "tar -xzf obitools4_${VERSION}_linux_arm64.tar.gz" >> release_notes.md
|
||||
echo '```' >> release_notes.md
|
||||
echo "" >> release_notes.md
|
||||
echo "### macOS (Intel)" >> release_notes.md
|
||||
echo '```bash' >> release_notes.md
|
||||
echo "tar -xzf obitools4_${VERSION}_darwin_amd64.tar.gz" >> release_notes.md
|
||||
echo '```' >> release_notes.md
|
||||
echo "" >> release_notes.md
|
||||
echo "### macOS (Apple Silicon)" >> release_notes.md
|
||||
echo '```bash' >> release_notes.md
|
||||
echo "tar -xzf obitools4_${VERSION}_darwin_arm64.tar.gz" >> release_notes.md
|
||||
echo '```' >> release_notes.md
|
||||
echo "" >> release_notes.md
|
||||
echo "All OBITools4 binaries are included in each archive." >> release_notes.md
|
||||
|
||||
- name: Create GitHub Release
|
||||
uses: softprops/action-gh-release@v1
|
||||
with:
|
||||
name: Release ${{ steps.get_version.outputs.version }}
|
||||
body_path: release_notes.md
|
||||
files: release/*
|
||||
draft: false
|
||||
prerelease: false
|
||||
env:
|
||||
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
|
||||
6
.gitignore
vendored
6
.gitignore
vendored
@@ -16,12 +16,18 @@
|
||||
**/*.tgz
|
||||
**/*.yaml
|
||||
**/*.csv
|
||||
xx
|
||||
|
||||
.rhistory
|
||||
/.vscode
|
||||
/build
|
||||
/bugs
|
||||
|
||||
/ncbitaxo
|
||||
|
||||
!/obitests/**
|
||||
!/sample/**
|
||||
LLM/**
|
||||
*_files
|
||||
|
||||
entropy.html
|
||||
96
Makefile
96
Makefile
@@ -2,8 +2,9 @@
|
||||
#export GOBIN=$(GOPATH)/bin
|
||||
#export PATH=$(GOBIN):$(shell echo $${PATH})
|
||||
|
||||
GOFLAGS=
|
||||
GOCMD=go
|
||||
GOBUILD=$(GOCMD) build # -compiler gccgo -gccgoflags -O3
|
||||
GOBUILD=$(GOCMD) build $(GOFLAGS)
|
||||
GOGENERATE=$(GOCMD) generate
|
||||
GOCLEAN=$(GOCMD) clean
|
||||
GOTEST=$(GOCMD) test
|
||||
@@ -16,6 +17,12 @@ PACKAGES_SRC:= $(wildcard pkg/*/*.go pkg/*/*/*.go)
|
||||
PACKAGE_DIRS:=$(sort $(patsubst %/,%,$(dir $(PACKAGES_SRC))))
|
||||
PACKAGES:=$(notdir $(PACKAGE_DIRS))
|
||||
|
||||
GITHOOK_SRC_DIR=git-hooks
|
||||
GITHOOKS_SRC:=$(wildcard $(GITHOOK_SRC_DIR)/*)
|
||||
|
||||
GITHOOK_DIR=.git/hooks
|
||||
GITHOOKS:=$(patsubst $(GITHOOK_SRC_DIR)/%,$(GITHOOK_DIR)/%,$(GITHOOKS_SRC))
|
||||
|
||||
OBITOOLS_SRC:= $(wildcard cmd/obitools/*/*.go)
|
||||
OBITOOLS_DIRS:=$(sort $(patsubst %/,%,$(dir $(OBITOOLS_SRC))))
|
||||
OBITOOLS:=$(notdir $(OBITOOLS_DIRS))
|
||||
@@ -53,35 +60,32 @@ endif
|
||||
|
||||
OUTPUT:=$(shell mktemp)
|
||||
|
||||
all: obitools
|
||||
all: install-githook obitools
|
||||
|
||||
packages: $(patsubst %,pkg-%,$(PACKAGES))
|
||||
obitools: $(patsubst %,$(OBITOOLS_PREFIX)%,$(OBITOOLS))
|
||||
|
||||
install-githook: $(GITHOOKS)
|
||||
|
||||
$(GITHOOK_DIR)/%: $(GITHOOK_SRC_DIR)/%
|
||||
@echo installing $$(basename $@)...
|
||||
@mkdir -p $(GITHOOK_DIR)
|
||||
@cp $< $@
|
||||
@chmod +x $@
|
||||
|
||||
|
||||
update-deps:
|
||||
go get -u ./...
|
||||
|
||||
test:
|
||||
test: .FORCE
|
||||
$(GOTEST) ./...
|
||||
|
||||
obitests:
|
||||
@for t in $$(find obitests -name test.sh -print) ; do \
|
||||
bash $${t} ;\
|
||||
bash $${t} || exit 1;\
|
||||
done
|
||||
|
||||
githubtests: obitools obitests
|
||||
|
||||
man:
|
||||
make -C doc man
|
||||
obibook:
|
||||
make -C doc obibook
|
||||
doc: man obibook
|
||||
|
||||
macos-pkg:
|
||||
@bash pkgs/macos/macos-installer-builder-master/macOS-x64/build-macos-x64.sh \
|
||||
OBITools \
|
||||
0.0.1
|
||||
|
||||
$(BUILD_DIR):
|
||||
mkdir -p $@
|
||||
|
||||
@@ -90,19 +94,61 @@ $(foreach P,$(PACKAGE_DIRS),$(eval $(call MAKE_PKG_RULE,$(P))))
|
||||
|
||||
$(foreach P,$(OBITOOLS_DIRS),$(eval $(call MAKE_OBITOOLS_RULE,$(P))))
|
||||
|
||||
pkg/obioptions/version.go: .FORCE
|
||||
ifneq ($(strip $(COMMIT_ID)),)
|
||||
@cat $@ \
|
||||
| sed -E 's/^var _Commit = "[^"]*"/var _Commit = "'$(COMMIT_ID)'"/' \
|
||||
| sed -E 's/^var _Version = "[^"]*"/var _Version = "'"$(LAST_TAG)"'"/' \
|
||||
pkg/obioptions/version.go: version.txt .FORCE
|
||||
@version=$$(cat version.txt); \
|
||||
cat $@ \
|
||||
| sed -E 's/^var _Version = "[^"]*"/var _Version = "Release '$$version'"/' \
|
||||
> $(OUTPUT)
|
||||
|
||||
@diff $@ $(OUTPUT) 2>&1 > /dev/null \
|
||||
|| echo "Update version.go : $@ to $(LAST_TAG) ($(COMMIT_ID))" \
|
||||
&& mv $(OUTPUT) $@
|
||||
|| (echo "Update version.go to $$(cat version.txt)" && mv $(OUTPUT) $@)
|
||||
|
||||
@rm -f $(OUTPUT)
|
||||
endif
|
||||
|
||||
.PHONY: all packages obitools man obibook doc update-deps obitests githubtests .FORCE
|
||||
bump-version:
|
||||
@echo "Incrementing version..."
|
||||
@current=$$(cat version.txt); \
|
||||
echo " Current version: $$current"; \
|
||||
major=$$(echo $$current | cut -d. -f1); \
|
||||
minor=$$(echo $$current | cut -d. -f2); \
|
||||
patch=$$(echo $$current | cut -d. -f3); \
|
||||
new_patch=$$((patch + 1)); \
|
||||
new_version="$$major.$$minor.$$new_patch"; \
|
||||
echo " New version: $$new_version"; \
|
||||
echo "$$new_version" > version.txt
|
||||
@echo "✓ Version updated in version.txt"
|
||||
@$(MAKE) pkg/obioptions/version.go
|
||||
|
||||
jjnew:
|
||||
@echo "$(YELLOW)→ Creating a new commit...$(NC)"
|
||||
@echo "$(BLUE)→ Documenting current commit...$(NC)"
|
||||
@jj auto-describe
|
||||
@echo "$(BLUE)→ Done.$(NC)"
|
||||
@jj new
|
||||
@echo "$(GREEN)✓ New commit created$(NC)"
|
||||
|
||||
jjpush:
|
||||
@echo "$(YELLOW)→ Pushing commit to repository...$(NC)"
|
||||
@echo "$(BLUE)→ Documenting current commit...$(NC)"
|
||||
@jj auto-describe
|
||||
@echo "$(BLUE)→ Creating new commit for version bump...$(NC)"
|
||||
@jj new
|
||||
@$(MAKE) bump-version
|
||||
@echo "$(BLUE)→ Documenting version bump commit...$(NC)"
|
||||
@jj auto-describe
|
||||
@version=$$(cat version.txt); \
|
||||
tag_name="Release_$$version"; \
|
||||
echo "$(BLUE)→ Pushing commits and creating tag $$tag_name...$(NC)"; \
|
||||
jj git push --change @; \
|
||||
git tag -a "$$tag_name" -m "Release $$version" 2>/dev/null || echo "Tag $$tag_name already exists"; \
|
||||
git push origin "$$tag_name" 2>/dev/null || echo "Tag already pushed"
|
||||
@echo "$(GREEN)✓ Commits and tag pushed to repository$(NC)"
|
||||
|
||||
jjfetch:
|
||||
@echo "$(YELLOW)→ Pulling latest commits...$(NC)"
|
||||
@jj git fetch
|
||||
@jj new master@origin
|
||||
@echo "$(GREEN)✓ Latest commits pulled$(NC)"
|
||||
|
||||
.PHONY: all obitools update-deps obitests githubtests jjnew jjpush jjfetch bump-version .FORCE
|
||||
.FORCE:
|
||||
34
README.md
34
README.md
@@ -16,12 +16,17 @@ The easiest way to run it is to copy and paste the following command into your t
|
||||
curl -L https://raw.githubusercontent.com/metabarcoding/obitools4/master/install_obitools.sh | bash
|
||||
```
|
||||
|
||||
By default, the script installs the *OBITools* commands and other associated files into the `/usr/local` directory.
|
||||
The names of the commands in the new *OBITools4* are mostly identical to those in *OBITools2*.
|
||||
Therefore, installing the new *OBITools* may hide or delete the old ones. If you want both versions to be
|
||||
available on your system, the installation script offers two options:
|
||||
By default, the script installs the latest version of *OBITools* commands and other associated files into the `/usr/local` directory.
|
||||
|
||||
### Installation Options
|
||||
|
||||
The installation script offers several options:
|
||||
|
||||
> -l, --list List all available versions and exit.
|
||||
>
|
||||
> -v, --version Install a specific version (e.g., `-v 4.4.3`).
|
||||
> By default, the latest version is installed.
|
||||
>
|
||||
> -i, --install-dir Directory where obitools are installed
|
||||
> (as example use `/usr/local` not `/usr/local/bin`).
|
||||
>
|
||||
@@ -30,14 +35,31 @@ available on your system, the installation script offers two options:
|
||||
> same time on your system (as example `-p g` will produce
|
||||
> `gobigrep` command instead of `obigrep`).
|
||||
|
||||
You can use these options by following the installation command:
|
||||
### Examples
|
||||
|
||||
List all available versions:
|
||||
```{bash}
|
||||
curl -L https://raw.githubusercontent.com/metabarcoding/obitools4/master/install_obitools.sh | bash -s -- --list
|
||||
```
|
||||
|
||||
Install a specific version:
|
||||
```{bash}
|
||||
curl -L https://raw.githubusercontent.com/metabarcoding/obitools4/master/install_obitools.sh | bash -s -- --version 4.4.3
|
||||
```
|
||||
|
||||
Install in a custom directory with command prefix:
|
||||
```{bash}
|
||||
curl -L https://raw.githubusercontent.com/metabarcoding/obitools4/master/install_obitools.sh | \
|
||||
bash -s -- --install-dir test_install --obitools-prefix k
|
||||
```
|
||||
|
||||
In this case, the binaries will be installed in the `test_install` directory and all command names will be prefixed with the letter `k`. Thus, `obigrep` will be named `kobigrep`.
|
||||
In this last example, the binaries will be installed in the `test_install` directory and all command names will be prefixed with the letter `k`. Thus, `obigrep` will be named `kobigrep`.
|
||||
|
||||
### Note on Version Compatibility
|
||||
|
||||
The names of the commands in the new *OBITools4* are mostly identical to those in *OBITools2*.
|
||||
Therefore, installing the new *OBITools* may hide or delete the old ones. If you want both versions to be
|
||||
available on your system, use the `--install-dir` and `--obitools-prefix` options as shown above.
|
||||
|
||||
## Continuing the analysis...
|
||||
|
||||
|
||||
@@ -1,10 +1,66 @@
|
||||
# OBITools release notes
|
||||
|
||||
## March 2nd, 2025. Release 4.3.0
|
||||
## New changes
|
||||
|
||||
### Bug fixes
|
||||
|
||||
- In `obipairing` correct the misspelling of the `obiparing_*` tags where the `i`
|
||||
was missing to `obipairing_`.
|
||||
|
||||
- In `obigrep` the **-C** option that excludes sequences too abundant was not
|
||||
functional.
|
||||
|
||||
- In `obitaxonomy` the **-l** option that lists all the taxonomic rank defined by
|
||||
a taxonomy was not functional
|
||||
|
||||
- The file type guesser was not using enough data to be able to correctly detect
|
||||
file format when sequences were too long in fastq and fasta or when lines were
|
||||
to long in CSV files. That's now corrected
|
||||
|
||||
- Options **--fasta** or **--fastq** usable to specify input format were ignored.
|
||||
They are now correctly considered
|
||||
|
||||
- The `obiannotate` command were crashing when a selection option was used but
|
||||
no editing option.
|
||||
|
||||
- The `--fail-on-taxonomy` led to an error on merged taxa even when the
|
||||
`--update-taxid` option was used.
|
||||
|
||||
- The `--compressed` option was not correctly named. It was renamed to `--compress`
|
||||
|
||||
### Enhancement
|
||||
|
||||
- Some sequences in the Genbank and EMBL databases are several gigabases long. The
|
||||
sequence parser had to reallocate and recopy memory many times to read them,
|
||||
resulting in a complexity of O(N^2) for reading such large sequences.
|
||||
The new file chunk reader has a linear algorithm that speeds up the reading
|
||||
of very long sequences.
|
||||
|
||||
- A new option **--csv** is added to every obitools to indicate that the input
|
||||
format is CSV
|
||||
|
||||
- The new version of obitools are now printing the taxids in a fancy way
|
||||
including the scientific name and the taxonomic rank (`"taxon:9606 [Homo
|
||||
sapiens]@species"`). But if you need the old fashion raw taxid, a new option
|
||||
**--raw-taxid** has been added to get obitools printing the taxids without any
|
||||
decorations (`"9606"`).
|
||||
|
||||
|
||||
## March 1st, 2025. Release 4.4.0
|
||||
|
||||
A new documentation website is available at https://obitools4.metabarcoding.org.
|
||||
Its development is still in progress.
|
||||
|
||||
The biggest step forward in this new version is taxonomy management. The new
|
||||
version is now able to handle taxonomic identifiers that are not just integer
|
||||
values. This is a first step towards an easy way to handle other taxonomy
|
||||
databases soon, such as the GBIF or Catalog of Life taxonomies. This version
|
||||
is able to handle files containing taxonomic information created by previous
|
||||
versions of OBITools, but files created by this new version may have some
|
||||
problems to be analyzed by previous versions, at least for the taxonomic
|
||||
information.
|
||||
|
||||
|
||||
### Breaking changes
|
||||
|
||||
- In `obimultiplex`, the short version of the **--tag-list** option used to
|
||||
@@ -75,8 +131,24 @@ Its development is still in progress.
|
||||
|
||||
### Enhancement
|
||||
|
||||
- All obitools now have a **--taxonomy** option. If specified, the taxonomy is
|
||||
loaded first and taxids annotating the sequences are validated against that
|
||||
taxonomy. A warning is issued for any invalid taxid and for any taxid that
|
||||
is transferred to a new taxid. The **--update-taxid** option allows these
|
||||
old taxids to be replaced with their new equivalent in the result of the
|
||||
obitools command.
|
||||
|
||||
- The scoring system used by the `obipairing` command has been changed to be
|
||||
more coherent. In the new version, the scores associated to a match and a
|
||||
mismatch involving a nucleotide with a quality score of 0 are equal. Which
|
||||
is normal as a zero quality score means a perfect indecision on the read
|
||||
nucleotide, therefore there is no reason to penalize a match differently
|
||||
from a mismatch (see
|
||||
https://obitools4.metabarcoding.org/docs/commands/alignments/obipairing/exact-alignment/).
|
||||
|
||||
- In every *OBITools* command, the progress bar is automatically deactivated
|
||||
when the standard error output is redirected.
|
||||
|
||||
- Because Genbank and ENA:EMBL contain very large sequences, while OBITools4
|
||||
are optimized As Genbank and ENA:EMBL contain very large sequences, while
|
||||
OBITools4 is optimized for short sequences, `obipcr` faces some problems
|
||||
@@ -85,8 +157,10 @@ Its development is still in progress.
|
||||
features, currently only available for FASTA and FASTQ file readers, have
|
||||
been implemented to limit the memory impact of `obipcr` without changing the
|
||||
computational efficiency too much.
|
||||
|
||||
- Logging system and therefore format, have been homogenized.
|
||||
|
||||
## August 2nd, 2024. Release 4.3.0
|
||||
|
||||
### Change of git repository
|
||||
|
||||
|
||||
213
blackboard/Prospective/kmer_index_design.md
Normal file
213
blackboard/Prospective/kmer_index_design.md
Normal file
@@ -0,0 +1,213 @@
|
||||
# Index de k-mers pour génomes de grande taille
|
||||
|
||||
## Contexte et objectifs
|
||||
|
||||
### Cas d'usage
|
||||
|
||||
- Indexation de k-mers longs (k=31) pour des génomes de grande taille (< 10 Go par génome)
|
||||
- Nombre de génomes : plusieurs dizaines à quelques centaines
|
||||
- Indexation en parallèle
|
||||
- Stockage sur disque
|
||||
- Possibilité d'ajouter des génomes, mais pas de modifier un génome existant
|
||||
|
||||
### Requêtes cibles
|
||||
|
||||
- **Présence/absence** d'un k-mer dans un génome
|
||||
- **Intersection** entre génomes
|
||||
- **Distances** : Jaccard (présence/absence) et potentiellement Bray-Curtis (comptage)
|
||||
|
||||
### Ressources disponibles
|
||||
|
||||
- 128 Go de RAM
|
||||
- Stockage disque
|
||||
|
||||
---
|
||||
|
||||
## Estimation des volumes
|
||||
|
||||
### Par génome
|
||||
|
||||
- **10 Go de séquence** → ~10¹⁰ k-mers bruts (chevauchants)
|
||||
- **Après déduplication** : typiquement 10-50% de k-mers uniques → **~1-5 × 10⁹ k-mers distincts**
|
||||
|
||||
### Espace théorique
|
||||
|
||||
- **k=31** → 62 bits → ~4.6 × 10¹⁸ k-mers possibles
|
||||
- Table d'indexation directe impossible
|
||||
|
||||
---
|
||||
|
||||
## Métriques de distance
|
||||
|
||||
### Présence/absence (binaire)
|
||||
|
||||
- **Jaccard** : |A ∩ B| / |A ∪ B|
|
||||
- **Sørensen-Dice** : 2|A ∩ B| / (|A| + |B|)
|
||||
|
||||
### Comptage (abondance)
|
||||
|
||||
- **Bray-Curtis** : 1 - (2 × Σ min(aᵢ, bᵢ)) / (Σ aᵢ + Σ bᵢ)
|
||||
|
||||
Note : Pour Bray-Curtis, le stockage des comptages est nécessaire, ce qui augmente significativement la taille de l'index.
|
||||
|
||||
---
|
||||
|
||||
## Options d'indexation
|
||||
|
||||
### Option 1 : Bloom Filter par génome
|
||||
|
||||
**Principe** : Structure probabiliste pour test d'appartenance.
|
||||
|
||||
**Avantages :**
|
||||
- Très compact : ~10 bits/élément pour FPR ~1%
|
||||
- Construction rapide, streaming
|
||||
- Facile à sérialiser/désérialiser
|
||||
- Intersection et Jaccard estimables via formules analytiques
|
||||
|
||||
**Inconvénients :**
|
||||
- Faux positifs (pas de faux négatifs)
|
||||
- Distances approximatives
|
||||
|
||||
**Taille estimée** : 1-6 Go par génome (selon FPR cible)
|
||||
|
||||
#### Dimensionnement des Bloom filters
|
||||
|
||||
```
|
||||
\mathrm{FPR} ;=; \left(1 - e^{-h n / m}\right)^h
|
||||
```
|
||||
|
||||
|
||||
| Bits/élément | FPR optimal | k (hash functions) |
|
||||
|--------------|-------------|---------------------|
|
||||
| 8 | ~2% | 5-6 |
|
||||
| 10 | ~1% | 7 |
|
||||
| 12 | ~0.3% | 8 |
|
||||
| 16 | ~0.01% | 11 |
|
||||
|
||||
Formule du taux de faux positifs :
|
||||
```
|
||||
FPR ≈ (1 - e^(-kn/m))^k
|
||||
```
|
||||
Où n = nombre d'éléments, m = nombre de bits, k = nombre de hash functions.
|
||||
|
||||
### Option 2 : Ensemble trié de k-mers
|
||||
|
||||
**Principe** : Stocker les k-mers (uint64) triés, avec compression possible.
|
||||
|
||||
**Avantages :**
|
||||
- Exact (pas de faux positifs)
|
||||
- Intersection/union par merge sort O(n+m)
|
||||
- Compression efficace (delta encoding sur k-mers triés)
|
||||
|
||||
**Inconvénients :**
|
||||
- Plus volumineux : 8 octets/k-mer
|
||||
- Construction plus lente (tri nécessaire)
|
||||
|
||||
**Taille estimée** : 8-40 Go par génome (non compressé)
|
||||
|
||||
### Option 3 : MPHF (Minimal Perfect Hash Function)
|
||||
|
||||
**Principe** : Fonction de hash parfaite minimale pour les k-mers présents.
|
||||
|
||||
**Avantages :**
|
||||
- Très compact : ~3-4 bits/élément
|
||||
- Lookup O(1)
|
||||
- Exact pour les k-mers présents
|
||||
|
||||
**Inconvénients :**
|
||||
- Construction coûteuse (plusieurs passes)
|
||||
- Statique (pas d'ajout de k-mers après construction)
|
||||
- Ne distingue pas "absent" vs "jamais vu" sans structure auxiliaire
|
||||
|
||||
### Option 4 : Hybride MPHF + Bloom filter
|
||||
|
||||
- MPHF pour mapping compact des k-mers présents
|
||||
- Bloom filter pour pré-filtrage des absents
|
||||
|
||||
---
|
||||
|
||||
## Optimisation : Indexation de (k-2)-mers pour requêtes k-mers
|
||||
|
||||
### Principe
|
||||
|
||||
Au lieu d'indexer directement les 31-mers dans un Bloom filter, on indexe les 29-mers. Pour tester la présence d'un 31-mer, on vérifie que les **trois 29-mers** qu'il contient sont présents :
|
||||
|
||||
- positions 0-28
|
||||
- positions 1-29
|
||||
- positions 2-30
|
||||
|
||||
### Analyse probabiliste
|
||||
|
||||
Si le Bloom filter a un FPR de p pour un 29-mer individuel, le FPR effectif pour un 31-mer devient **p³** (les trois requêtes doivent toutes être des faux positifs).
|
||||
|
||||
| FPR 29-mer | FPR 31-mer effectif |
|
||||
|------------|---------------------|
|
||||
| 10% | 0.1% |
|
||||
| 5% | 0.0125% |
|
||||
| 1% | 0.0001% |
|
||||
|
||||
### Avantages
|
||||
|
||||
1. **Moins d'éléments à stocker** : il y a moins de 29-mers distincts que de 31-mers distincts dans un génome (deux 31-mers différents peuvent partager un même 29-mer)
|
||||
|
||||
2. **FPR drastiquement réduit** : FPR³ avec seulement 3 requêtes
|
||||
|
||||
3. **Index plus compact** : on peut utiliser moins de bits par élément (FPR plus élevé acceptable sur le 29-mer) tout en obtenant un FPR très bas sur le 31-mer
|
||||
|
||||
### Trade-off
|
||||
|
||||
Un Bloom filter à **5-6 bits/élément** pour les 29-mers donnerait un FPR effectif < 0.01% pour les 31-mers, soit environ **2× plus compact** que l'approche directe à qualité égale.
|
||||
|
||||
**Coût** : 3× plus de requêtes par lookup (mais les requêtes Bloom sont très rapides).
|
||||
|
||||
---
|
||||
|
||||
## Accélération des calculs de distance : MinHash
|
||||
|
||||
### Principe
|
||||
|
||||
Pré-calculer une "signature" compacte (sketch) de chaque génome permettant d'estimer rapidement Jaccard sans charger les index complets.
|
||||
|
||||
### Avantages
|
||||
|
||||
- Matrice de distances entre 100+ génomes en quelques secondes
|
||||
- Signature de taille fixe (ex: 1000-10000 hash values) quel que soit le génome
|
||||
- Stockage minimal
|
||||
|
||||
### Utilisation
|
||||
|
||||
1. Construction : une passe sur les k-mers de chaque génome
|
||||
2. Distance : comparaison des sketches en O(taille du sketch)
|
||||
|
||||
---
|
||||
|
||||
## Architecture recommandée
|
||||
|
||||
### Pour présence/absence + Jaccard
|
||||
|
||||
1. **Index principal** : Bloom filter de (k-2)-mers avec l'optimisation décrite
|
||||
- Compact (~3-5 Go par génome)
|
||||
- FPR très bas pour les k-mers grâce aux requêtes triples
|
||||
|
||||
2. **Sketches MinHash** : pour calcul rapide des distances entre génomes
|
||||
- Quelques Ko par génome
|
||||
- Permet exploration rapide de la matrice de distances
|
||||
|
||||
### Pour comptage + Bray-Curtis
|
||||
|
||||
1. **Index principal** : k-mers triés + comptages
|
||||
- uint64 (k-mer) + uint8/uint16 (count)
|
||||
- Compression delta possible
|
||||
- Plus volumineux mais exact
|
||||
|
||||
2. **Sketches** : variantes de MinHash pour données pondérées (ex: HyperMinHash)
|
||||
|
||||
---
|
||||
|
||||
## Prochaines étapes
|
||||
|
||||
1. Implémenter un Bloom filter optimisé pour k-mers
|
||||
2. Implémenter l'optimisation (k-2)-mer → k-mer
|
||||
3. Implémenter MinHash pour les sketches
|
||||
4. Définir le format de sérialisation sur disque
|
||||
5. Benchmarker sur des génomes réels
|
||||
@@ -30,7 +30,11 @@ func main() {
|
||||
// trace.Start(ftrace)
|
||||
// defer trace.Stop()
|
||||
|
||||
optionParser := obioptions.GenerateOptionParser(obiannotate.OptionSet)
|
||||
optionParser := obioptions.GenerateOptionParser(
|
||||
"obiannotate",
|
||||
"edits the sequence annotations",
|
||||
obiannotate.OptionSet,
|
||||
)
|
||||
|
||||
_, args := optionParser(os.Args)
|
||||
|
||||
@@ -38,6 +42,11 @@ func main() {
|
||||
obiconvert.OpenSequenceDataErrorMessage(args, err)
|
||||
|
||||
annotator := obiannotate.CLIAnnotationPipeline()
|
||||
|
||||
if obiannotate.CLIHasSetNumberFlag() {
|
||||
sequences = sequences.NumberSequences(1, !obiconvert.CLINoInputOrder())
|
||||
}
|
||||
|
||||
obiconvert.CLIWriteBioSequences(sequences.Pipe(annotator), true)
|
||||
|
||||
obiutils.WaitForLastPipe()
|
||||
|
||||
@@ -11,7 +11,10 @@ import (
|
||||
)
|
||||
|
||||
func main() {
|
||||
optionParser := obioptions.GenerateOptionParser(obiclean.OptionSet)
|
||||
optionParser := obioptions.GenerateOptionParser(
|
||||
"obiclean",
|
||||
"",
|
||||
obiclean.OptionSet)
|
||||
|
||||
_, args := optionParser(os.Args)
|
||||
|
||||
|
||||
@@ -14,7 +14,10 @@ import (
|
||||
func main() {
|
||||
obidefault.SetBatchSize(10)
|
||||
|
||||
optionParser := obioptions.GenerateOptionParser(obicleandb.OptionSet)
|
||||
optionParser := obioptions.GenerateOptionParser(
|
||||
"obicleandb",
|
||||
"clean-up reference databases",
|
||||
obicleandb.OptionSet)
|
||||
|
||||
_, args := optionParser(os.Args)
|
||||
|
||||
|
||||
@@ -11,7 +11,10 @@ import (
|
||||
)
|
||||
|
||||
func main() {
|
||||
optionParser := obioptions.GenerateOptionParser(obiconvert.OptionSet)
|
||||
optionParser := obioptions.GenerateOptionParser(
|
||||
"obicomplement",
|
||||
"reverse complement of sequences",
|
||||
obiconvert.OptionSet(true))
|
||||
|
||||
_, args := optionParser(os.Args)
|
||||
|
||||
|
||||
@@ -11,7 +11,10 @@ import (
|
||||
)
|
||||
|
||||
func main() {
|
||||
optionParser := obioptions.GenerateOptionParser(obiconsensus.OptionSet)
|
||||
optionParser := obioptions.GenerateOptionParser(
|
||||
"obiconsensus",
|
||||
"ONT reads denoising",
|
||||
obiconsensus.OptionSet)
|
||||
|
||||
_, args := optionParser(os.Args)
|
||||
|
||||
|
||||
@@ -14,7 +14,10 @@ func main() {
|
||||
obidefault.SetStrictReadWorker(2)
|
||||
obidefault.SetStrictWriteWorker(2)
|
||||
|
||||
optionParser := obioptions.GenerateOptionParser(obiconvert.OptionSet)
|
||||
optionParser := obioptions.GenerateOptionParser(
|
||||
"obiconvert",
|
||||
"convertion of sequence files to various formats",
|
||||
obiconvert.OptionSet(true))
|
||||
|
||||
_, args := optionParser(os.Args)
|
||||
|
||||
|
||||
@@ -28,6 +28,8 @@ func main() {
|
||||
// defer trace.Stop()
|
||||
|
||||
optionParser := obioptions.GenerateOptionParser(
|
||||
"obicount",
|
||||
"counts the sequences present in a file of sequences",
|
||||
obiconvert.InputOptionSet,
|
||||
obicount.OptionSet,
|
||||
)
|
||||
|
||||
@@ -10,7 +10,10 @@ import (
|
||||
)
|
||||
|
||||
func main() {
|
||||
optionParser := obioptions.GenerateOptionParser(obicsv.OptionSet)
|
||||
optionParser := obioptions.GenerateOptionParser(
|
||||
"obicsv",
|
||||
"converts sequence files to CSV format",
|
||||
obicsv.OptionSet)
|
||||
|
||||
_, args := optionParser(os.Args)
|
||||
|
||||
|
||||
@@ -15,7 +15,10 @@ func main() {
|
||||
obidefault.SetStrictReadWorker(2)
|
||||
obidefault.SetStrictWriteWorker(2)
|
||||
|
||||
optionParser := obioptions.GenerateOptionParser(obidemerge.OptionSet)
|
||||
optionParser := obioptions.GenerateOptionParser(
|
||||
"obidemerge",
|
||||
"",
|
||||
obidemerge.OptionSet)
|
||||
|
||||
_, args := optionParser(os.Args)
|
||||
|
||||
|
||||
@@ -11,7 +11,10 @@ import (
|
||||
)
|
||||
|
||||
func main() {
|
||||
optionParser := obioptions.GenerateOptionParser(obidistribute.OptionSet)
|
||||
optionParser := obioptions.GenerateOptionParser(
|
||||
"obidistribute",
|
||||
"divided an input set of sequences into subsets",
|
||||
obidistribute.OptionSet)
|
||||
|
||||
_, args := optionParser(os.Args)
|
||||
|
||||
|
||||
@@ -30,7 +30,10 @@ func main() {
|
||||
// trace.Start(ftrace)
|
||||
// defer trace.Stop()
|
||||
|
||||
optionParser := obioptions.GenerateOptionParser(obigrep.OptionSet)
|
||||
optionParser := obioptions.GenerateOptionParser(
|
||||
"obigrep",
|
||||
"select a subset of sequences on various criteria",
|
||||
obigrep.OptionSet)
|
||||
|
||||
_, args := optionParser(os.Args)
|
||||
|
||||
|
||||
@@ -15,7 +15,10 @@ func main() {
|
||||
obidefault.SetStrictReadWorker(2)
|
||||
obidefault.SetStrictWriteWorker(2)
|
||||
|
||||
optionParser := obioptions.GenerateOptionParser(obijoin.OptionSet)
|
||||
optionParser := obioptions.GenerateOptionParser(
|
||||
"obijoin",
|
||||
"merge annotations contained in a file to another file",
|
||||
obijoin.OptionSet)
|
||||
|
||||
_, args := optionParser(os.Args)
|
||||
|
||||
|
||||
@@ -31,7 +31,10 @@ func main() {
|
||||
// trace.Start(ftrace)
|
||||
// defer trace.Stop()
|
||||
|
||||
optionParser := obioptions.GenerateOptionParser(obikmersim.MatchOptionSet)
|
||||
optionParser := obioptions.GenerateOptionParser(
|
||||
"obikmermatch",
|
||||
"",
|
||||
obikmersim.MatchOptionSet)
|
||||
|
||||
_, args := optionParser(os.Args)
|
||||
|
||||
|
||||
@@ -32,7 +32,10 @@ func main() {
|
||||
// trace.Start(ftrace)
|
||||
// defer trace.Stop()
|
||||
|
||||
optionParser := obioptions.GenerateOptionParser(obikmersim.CountOptionSet)
|
||||
optionParser := obioptions.GenerateOptionParser(
|
||||
"obikmersimcount",
|
||||
"",
|
||||
obikmersim.CountOptionSet)
|
||||
|
||||
_, args := optionParser(os.Args)
|
||||
|
||||
|
||||
@@ -11,7 +11,10 @@ import (
|
||||
)
|
||||
|
||||
func main() {
|
||||
optionParser := obioptions.GenerateOptionParser(obilandmark.OptionSet)
|
||||
optionParser := obioptions.GenerateOptionParser(
|
||||
"obilandmark",
|
||||
"",
|
||||
obilandmark.OptionSet)
|
||||
|
||||
_, args := optionParser(os.Args)
|
||||
|
||||
|
||||
47
cmd/obitools/obilowmask/main.go
Normal file
47
cmd/obitools/obilowmask/main.go
Normal file
@@ -0,0 +1,47 @@
|
||||
package main
|
||||
|
||||
import (
|
||||
"os"
|
||||
|
||||
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obioptions"
|
||||
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obiseq"
|
||||
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obitools/obiconvert"
|
||||
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obitools/obilowmask"
|
||||
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obiutils"
|
||||
)
|
||||
|
||||
func main() {
|
||||
|
||||
defer obiseq.LogBioSeqStatus()
|
||||
|
||||
// go tool pprof -http=":8000" ./obipairing ./cpu.pprof
|
||||
// f, err := os.Create("cpu.pprof")
|
||||
// if err != nil {
|
||||
// log.Fatal(err)
|
||||
// }
|
||||
// pprof.StartCPUProfile(f)
|
||||
// defer pprof.StopCPUProfile()
|
||||
|
||||
// go tool trace cpu.trace
|
||||
// ftrace, err := os.Create("cpu.trace")
|
||||
// if err != nil {
|
||||
// log.Fatal(err)
|
||||
// }
|
||||
// trace.Start(ftrace)
|
||||
// defer trace.Stop()
|
||||
|
||||
optionParser := obioptions.GenerateOptionParser(
|
||||
"obimicrosat",
|
||||
"looks for microsatellites sequences in a sequence file",
|
||||
obilowmask.OptionSet)
|
||||
|
||||
_, args := optionParser(os.Args)
|
||||
|
||||
sequences, err := obiconvert.CLIReadBioSequences(args...)
|
||||
obiconvert.OpenSequenceDataErrorMessage(args, err)
|
||||
|
||||
selected := obilowmask.CLISequenceEntropyMasker(sequences)
|
||||
obiconvert.CLIWriteBioSequences(selected, true)
|
||||
obiutils.WaitForLastPipe()
|
||||
|
||||
}
|
||||
@@ -31,6 +31,8 @@ func main() {
|
||||
// defer trace.Stop()
|
||||
|
||||
optionParser := obioptions.GenerateOptionParser(
|
||||
"obimatrix",
|
||||
"",
|
||||
obimatrix.OptionSet,
|
||||
)
|
||||
|
||||
|
||||
@@ -30,7 +30,10 @@ func main() {
|
||||
// trace.Start(ftrace)
|
||||
// defer trace.Stop()
|
||||
|
||||
optionParser := obioptions.GenerateOptionParser(obimicrosat.OptionSet)
|
||||
optionParser := obioptions.GenerateOptionParser(
|
||||
"obimicrosat",
|
||||
"looks for microsatellites sequences in a sequence file",
|
||||
obimicrosat.OptionSet)
|
||||
|
||||
_, args := optionParser(os.Args)
|
||||
|
||||
|
||||
@@ -28,7 +28,10 @@ func main() {
|
||||
// trace.Start(ftrace)
|
||||
// defer trace.Stop()
|
||||
|
||||
optionParser := obioptions.GenerateOptionParser(obimultiplex.OptionSet)
|
||||
optionParser := obioptions.GenerateOptionParser(
|
||||
"obimultiplex",
|
||||
"demultiplex amplicons",
|
||||
obimultiplex.OptionSet)
|
||||
|
||||
_, args := optionParser(os.Args)
|
||||
|
||||
|
||||
@@ -30,7 +30,10 @@ func main() {
|
||||
// trace.Start(ftrace)
|
||||
// defer trace.Stop()
|
||||
|
||||
optionParser := obioptions.GenerateOptionParser(obipairing.OptionSet)
|
||||
optionParser := obioptions.GenerateOptionParser(
|
||||
"obipairing",
|
||||
"align forward with reverse reads with paired reads",
|
||||
obipairing.OptionSet)
|
||||
|
||||
optionParser(os.Args)
|
||||
|
||||
|
||||
@@ -29,7 +29,10 @@ func main() {
|
||||
obidefault.SetParallelFilesRead(obidefault.ParallelWorkers() / 4)
|
||||
obidefault.SetBatchSize(10)
|
||||
|
||||
optionParser := obioptions.GenerateOptionParser(obipcr.OptionSet)
|
||||
optionParser := obioptions.GenerateOptionParser(
|
||||
"obipcr",
|
||||
"simulates a PCR on a sequence files",
|
||||
obipcr.OptionSet)
|
||||
|
||||
_, args := optionParser(os.Args)
|
||||
|
||||
|
||||
@@ -11,7 +11,10 @@ import (
|
||||
)
|
||||
|
||||
func main() {
|
||||
optionParser := obioptions.GenerateOptionParser(obirefidx.OptionSet)
|
||||
optionParser := obioptions.GenerateOptionParser(
|
||||
"obireffamidx",
|
||||
"",
|
||||
obirefidx.OptionSet)
|
||||
|
||||
_, args := optionParser(os.Args)
|
||||
|
||||
|
||||
@@ -11,7 +11,10 @@ import (
|
||||
)
|
||||
|
||||
func main() {
|
||||
optionParser := obioptions.GenerateOptionParser(obirefidx.OptionSet)
|
||||
optionParser := obioptions.GenerateOptionParser(
|
||||
"obirefidx",
|
||||
"",
|
||||
obirefidx.OptionSet)
|
||||
|
||||
_, args := optionParser(os.Args)
|
||||
|
||||
|
||||
@@ -31,7 +31,10 @@ func main() {
|
||||
// trace.Start(ftrace)
|
||||
// defer trace.Stop()
|
||||
|
||||
optionParser := obioptions.GenerateOptionParser(obiscript.OptionSet)
|
||||
optionParser := obioptions.GenerateOptionParser(
|
||||
"obiscript",
|
||||
"executes a lua script on the input sequences",
|
||||
obiscript.OptionSet)
|
||||
|
||||
_, args := optionParser(os.Args)
|
||||
|
||||
|
||||
@@ -31,7 +31,10 @@ func main() {
|
||||
// trace.Start(ftrace)
|
||||
// defer trace.Stop()
|
||||
|
||||
optionParser := obioptions.GenerateOptionParser(obisplit.OptionSet)
|
||||
optionParser := obioptions.GenerateOptionParser(
|
||||
"obisplit",
|
||||
"",
|
||||
obisplit.OptionSet)
|
||||
|
||||
_, args := optionParser(os.Args)
|
||||
|
||||
|
||||
@@ -33,7 +33,10 @@ func main() {
|
||||
// trace.Start(ftrace)
|
||||
// defer trace.Stop()
|
||||
|
||||
optionParser := obioptions.GenerateOptionParser(obisummary.OptionSet)
|
||||
optionParser := obioptions.GenerateOptionParser(
|
||||
"obisummary",
|
||||
"resume main information from a sequence file",
|
||||
obisummary.OptionSet)
|
||||
|
||||
_, args := optionParser(os.Args)
|
||||
|
||||
|
||||
@@ -11,6 +11,7 @@ import (
|
||||
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obitax"
|
||||
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obitools/obiconvert"
|
||||
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obitools/obitag"
|
||||
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obitools/obitaxonomy"
|
||||
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obiutils"
|
||||
|
||||
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obioptions"
|
||||
@@ -39,7 +40,10 @@ func main() {
|
||||
obidefault.SetStrictWriteWorker(1)
|
||||
obidefault.SetBatchSize(10)
|
||||
|
||||
optionParser := obioptions.GenerateOptionParser(obitag.OptionSet)
|
||||
optionParser := obioptions.GenerateOptionParser(
|
||||
"obitag",
|
||||
"realizes taxonomic assignment",
|
||||
obitag.OptionSet)
|
||||
|
||||
_, args := optionParser(os.Args)
|
||||
|
||||
@@ -55,7 +59,7 @@ func main() {
|
||||
}
|
||||
|
||||
if taxo == nil {
|
||||
taxo, err = references.ExtractTaxonomy(nil)
|
||||
taxo, err = references.ExtractTaxonomy(nil, obitaxonomy.CLINewickWithLeaves())
|
||||
|
||||
if err != nil {
|
||||
log.Fatalf("No taxonomy specified or extractable from reference database: %v", err)
|
||||
@@ -70,10 +74,12 @@ func main() {
|
||||
|
||||
var identified obiiter.IBioSequence
|
||||
|
||||
fsrb := fs.Rebatch(obidefault.BatchSize())
|
||||
|
||||
if obitag.CLIGeometricMode() {
|
||||
identified = obitag.CLIGeomAssignTaxonomy(fs, references, taxo)
|
||||
identified = obitag.CLIGeomAssignTaxonomy(fsrb, references, taxo)
|
||||
} else {
|
||||
identified = obitag.CLIAssignTaxonomy(fs, references, taxo)
|
||||
identified = obitag.CLIAssignTaxonomy(fsrb, references, taxo)
|
||||
}
|
||||
|
||||
obiconvert.CLIWriteBioSequences(identified, true)
|
||||
|
||||
@@ -33,7 +33,10 @@ func main() {
|
||||
|
||||
obidefault.SetWorkerPerCore(1)
|
||||
|
||||
optionParser := obioptions.GenerateOptionParser(obitagpcr.OptionSet)
|
||||
optionParser := obioptions.GenerateOptionParser(
|
||||
"obitagpcr",
|
||||
"split a paired raw read data set per sample",
|
||||
obitagpcr.OptionSet)
|
||||
|
||||
optionParser(os.Args)
|
||||
pairs, err := obipairing.CLIPairedSequence()
|
||||
|
||||
@@ -4,9 +4,11 @@ import (
|
||||
"os"
|
||||
|
||||
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obidefault"
|
||||
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obiitercsv"
|
||||
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obioptions"
|
||||
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obitax"
|
||||
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obitools/obiconvert"
|
||||
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obitools/obicsv"
|
||||
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obitools/obitaxonomy"
|
||||
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obiutils"
|
||||
|
||||
@@ -14,30 +16,59 @@ import (
|
||||
)
|
||||
|
||||
func main() {
|
||||
optionParser := obioptions.GenerateOptionParser(obitaxonomy.OptionSet)
|
||||
optionParser := obioptions.GenerateOptionParser(
|
||||
"obitaxonomy",
|
||||
"manipulates and queries taxonomy",
|
||||
obitaxonomy.OptionSet)
|
||||
|
||||
_, args := optionParser(os.Args)
|
||||
|
||||
var iterator *obitax.ITaxon
|
||||
|
||||
switch {
|
||||
case obitaxonomy.CLIDownloadNCBI():
|
||||
if obitaxonomy.CLIDownloadNCBI() {
|
||||
err := obitaxonomy.CLIDownloadNCBITaxdump()
|
||||
if err != nil {
|
||||
log.Errorf("Cannot download NCBI taxonomy: %s", err.Error())
|
||||
os.Exit(1)
|
||||
}
|
||||
os.Exit(0)
|
||||
}
|
||||
|
||||
if !obidefault.HasSelectedTaxonomy() {
|
||||
log.Fatal("you must indicate a taxonomy using the -t or --taxonomy option")
|
||||
}
|
||||
|
||||
switch {
|
||||
case obitaxonomy.CLIAskForRankList():
|
||||
newIter := obiitercsv.NewICSVRecord()
|
||||
newIter.Add(1)
|
||||
newIter.AppendField("rank")
|
||||
go func() {
|
||||
ranks := obitax.DefaultTaxonomy().RankList()
|
||||
data := make([]obiitercsv.CSVRecord, len(ranks))
|
||||
|
||||
for i, rank := range ranks {
|
||||
record := make(obiitercsv.CSVRecord)
|
||||
record["rank"] = rank
|
||||
data[i] = record
|
||||
}
|
||||
newIter.Push(obiitercsv.MakeCSVRecordBatch(obitax.DefaultTaxonomy().Name(), 0, data))
|
||||
newIter.Close()
|
||||
newIter.Done()
|
||||
}()
|
||||
obicsv.CLICSVWriter(newIter, true)
|
||||
obiutils.WaitForLastPipe()
|
||||
os.Exit(0)
|
||||
|
||||
case obitaxonomy.CLIExtractTaxonomy():
|
||||
iter, err := obiconvert.CLIReadBioSequences(args...)
|
||||
iter = iter.NumberSequences(1, true)
|
||||
|
||||
if err != nil {
|
||||
log.Fatalf("Cannot extract taxonomy: %v", err)
|
||||
}
|
||||
|
||||
taxonomy, err := iter.ExtractTaxonomy()
|
||||
taxonomy, err := iter.ExtractTaxonomy(obitaxonomy.CLINewickWithLeaves())
|
||||
|
||||
if err != nil {
|
||||
log.Fatalf("Cannot extract taxonomy: %v", err)
|
||||
@@ -99,7 +130,12 @@ func main() {
|
||||
}
|
||||
|
||||
iterator = obitaxonomy.CLITaxonRestrictions(iterator)
|
||||
|
||||
if obitaxonomy.CLIAsNewick() {
|
||||
obitaxonomy.CLINewickWriter(iterator, true)
|
||||
} else {
|
||||
obitaxonomy.CLICSVTaxaWriter(iterator, true)
|
||||
}
|
||||
|
||||
obiutils.WaitForLastPipe()
|
||||
|
||||
|
||||
@@ -33,7 +33,10 @@ func main() {
|
||||
|
||||
obidefault.SetBatchSize(10)
|
||||
obidefault.SetReadQualities(false)
|
||||
optionParser := obioptions.GenerateOptionParser(obiuniq.OptionSet)
|
||||
optionParser := obioptions.GenerateOptionParser(
|
||||
"obiuniq",
|
||||
"dereplicate sequence data sets",
|
||||
obiuniq.OptionSet)
|
||||
|
||||
_, args := optionParser(os.Args)
|
||||
|
||||
|
||||
@@ -3,13 +3,13 @@ package main
|
||||
import (
|
||||
"os"
|
||||
|
||||
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obitax"
|
||||
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obiformats"
|
||||
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obiutils"
|
||||
)
|
||||
|
||||
func main() {
|
||||
|
||||
obitax.DetectTaxonomyFormat(os.Args[1])
|
||||
obiformats.DetectTaxonomyFormat(os.Args[1])
|
||||
println(obiutils.RemoveAllExt("toto/tutu/test.txt"))
|
||||
println(obiutils.Basename("toto/tutu/test.txt"))
|
||||
|
||||
|
||||
23
git-hooks/pre-push
Executable file
23
git-hooks/pre-push
Executable file
@@ -0,0 +1,23 @@
|
||||
#!/bin/bash
|
||||
|
||||
remote="$1"
|
||||
#url="$2"
|
||||
|
||||
log() {
|
||||
echo -e "[Pre-Push tests @ $(date)] $*" 1>&2
|
||||
}
|
||||
|
||||
current_branch=$(git symbolic-ref --short head)
|
||||
|
||||
cmd="make githubtests"
|
||||
|
||||
if [[ $current_branch = "master" ]]; then
|
||||
log "you are on $current_branch, running build test"
|
||||
if ! eval "$cmd"; then
|
||||
log "Pre-push tests failed $cmd"
|
||||
exit 1
|
||||
fi
|
||||
fi
|
||||
|
||||
log "Tests are OK, ready to push on $remote"
|
||||
exit 0
|
||||
21
go.mod
21
go.mod
@@ -1,11 +1,12 @@
|
||||
module git.metabarcoding.org/obitools/obitools4/obitools4
|
||||
|
||||
go 1.23.1
|
||||
go 1.23.4
|
||||
|
||||
toolchain go1.24.2
|
||||
|
||||
require (
|
||||
github.com/DavidGamba/go-getoptions v0.28.0
|
||||
github.com/PaesslerAG/gval v1.2.2
|
||||
github.com/TuftsBCB/io v0.0.0-20140121014543-22b94e9b23f9
|
||||
github.com/barkimedes/go-deepcopy v0.0.0-20220514131651-17c30cfc62df
|
||||
github.com/buger/jsonparser v1.1.1
|
||||
github.com/chen3feng/stl4go v0.1.1
|
||||
@@ -19,19 +20,23 @@ require (
|
||||
github.com/stretchr/testify v1.8.4
|
||||
github.com/tevino/abool/v2 v2.1.0
|
||||
github.com/yuin/gopher-lua v1.1.1
|
||||
golang.org/x/exp v0.0.0-20231006140011-7918f672742d
|
||||
golang.org/x/exp v0.0.0-20231110203233-9a3e6036ecaa
|
||||
gonum.org/v1/gonum v0.14.0
|
||||
gopkg.in/yaml.v3 v3.0.1
|
||||
scientificgo.org/special v0.0.0
|
||||
)
|
||||
|
||||
require (
|
||||
github.com/RoaringBitmap/roaring v1.9.4 // indirect
|
||||
github.com/bits-and-blooms/bitset v1.12.0 // indirect
|
||||
github.com/davecgh/go-spew v1.1.1 // indirect
|
||||
github.com/goombaio/orderedmap v0.0.0-20180924084748-ba921b7e2419 // indirect
|
||||
github.com/kr/pretty v0.3.0 // indirect
|
||||
github.com/kr/pretty v0.3.1 // indirect
|
||||
github.com/kr/text v0.2.0 // indirect
|
||||
github.com/mschoch/smat v0.2.0 // indirect
|
||||
github.com/pelletier/go-toml/v2 v2.2.4 // indirect
|
||||
github.com/pmezard/go-difflib v1.0.0 // indirect
|
||||
github.com/rogpeppe/go-internal v1.6.1 // indirect
|
||||
github.com/rogpeppe/go-internal v1.12.0 // indirect
|
||||
)
|
||||
|
||||
require (
|
||||
@@ -44,8 +49,8 @@ require (
|
||||
github.com/rivo/uniseg v0.4.4 // indirect
|
||||
github.com/shopspring/decimal v1.3.1 // indirect
|
||||
github.com/ulikunitz/xz v0.5.11
|
||||
golang.org/x/net v0.17.0 // indirect
|
||||
golang.org/x/sys v0.17.0 // indirect
|
||||
golang.org/x/term v0.13.0 // indirect
|
||||
golang.org/x/net v0.35.0 // indirect
|
||||
golang.org/x/sys v0.30.0 // indirect
|
||||
golang.org/x/term v0.29.0 // indirect
|
||||
gopkg.in/check.v1 v1.0.0-20201130134442-10cb98267c6c
|
||||
)
|
||||
|
||||
39
go.sum
39
go.sum
@@ -4,10 +4,12 @@ github.com/PaesslerAG/gval v1.2.2 h1:Y7iBzhgE09IGTt5QgGQ2IdaYYYOU134YGHBThD+wm9E
|
||||
github.com/PaesslerAG/gval v1.2.2/go.mod h1:XRFLwvmkTEdYziLdaCeCa5ImcGVrfQbeNUbVR+C6xac=
|
||||
github.com/PaesslerAG/jsonpath v0.1.0 h1:gADYeifvlqK3R3i2cR5B4DGgxLXIPb3TRTH1mGi0jPI=
|
||||
github.com/PaesslerAG/jsonpath v0.1.0/go.mod h1:4BzmtoM/PI8fPO4aQGIusjGxGir2BzcV0grWtFzq1Y8=
|
||||
github.com/TuftsBCB/io v0.0.0-20140121014543-22b94e9b23f9 h1:Zc1/GNsUpgZR9qm1EmRSKrnOHA7CCd0bIzGdq0cREN0=
|
||||
github.com/TuftsBCB/io v0.0.0-20140121014543-22b94e9b23f9/go.mod h1:PZyV4WA3NpqtezSY0h6E6NARAmdDm0qwrydveOyR5Gc=
|
||||
github.com/RoaringBitmap/roaring v1.9.4 h1:yhEIoH4YezLYT04s1nHehNO64EKFTop/wBhxv2QzDdQ=
|
||||
github.com/RoaringBitmap/roaring v1.9.4/go.mod h1:6AXUsoIEzDTFFQCe1RbGA6uFONMhvejWj5rqITANK90=
|
||||
github.com/barkimedes/go-deepcopy v0.0.0-20220514131651-17c30cfc62df h1:GSoSVRLoBaFpOOds6QyY1L8AX7uoY+Ln3BHc22W40X0=
|
||||
github.com/barkimedes/go-deepcopy v0.0.0-20220514131651-17c30cfc62df/go.mod h1:hiVxq5OP2bUGBRNS3Z/bt/reCLFNbdcST6gISi1fiOM=
|
||||
github.com/bits-and-blooms/bitset v1.12.0 h1:U/q1fAF7xXRhFCrhROzIfffYnu+dlS38vCZtmFVPHmA=
|
||||
github.com/bits-and-blooms/bitset v1.12.0/go.mod h1:7hO7Gc7Pp1vODcmWvKMRA9BNmbv6a/7QIWpPxHddWR8=
|
||||
github.com/buger/jsonparser v1.1.1 h1:2PnMjfWD7wBILjqQbt530v576A/cAbQvEW9gGIpYMUs=
|
||||
github.com/buger/jsonparser v1.1.1/go.mod h1:6RYKKt7H4d4+iWqouImQ9R2FZql3VbhNgx27UK13J/0=
|
||||
github.com/chen3feng/stl4go v0.1.1 h1:0L1+mDw7pomftKDruM23f1mA7miavOj6C6MZeadzN2Q=
|
||||
@@ -36,10 +38,9 @@ github.com/klauspost/compress v1.17.2/go.mod h1:ntbaceVETuRiXiv4DpjP66DpAtAGkEQs
|
||||
github.com/klauspost/cpuid v1.2.0/go.mod h1:Pj4uuM528wm8OyEC2QMXAi2YiTZ96dNQPGgoMS4s3ek=
|
||||
github.com/klauspost/pgzip v1.2.6 h1:8RXeL5crjEUFnR2/Sn6GJNWtSQ3Dk8pq4CL3jvdDyjU=
|
||||
github.com/klauspost/pgzip v1.2.6/go.mod h1:Ch1tH69qFZu15pkjo5kYi6mth2Zzwzt50oCQKQE9RUs=
|
||||
github.com/kr/pretty v0.1.0/go.mod h1:dAy3ld7l9f0ibDNOQOHHMYYIIbhfbHSm3C4ZsoJORNo=
|
||||
github.com/kr/pretty v0.2.1/go.mod h1:ipq/a2n7PKx3OHsz4KJII5eveXtPO4qwEXGdVfWzfnI=
|
||||
github.com/kr/pretty v0.3.0 h1:WgNl7dwNpEZ6jJ9k1snq4pZsg7DOEN8hP9Xw0Tsjwk0=
|
||||
github.com/kr/pretty v0.3.0/go.mod h1:640gp4NfQd8pI5XOwp5fnNeVWj67G7CFk/SaSQn7NBk=
|
||||
github.com/kr/pretty v0.3.1 h1:flRD4NNwYAUpkphVc1HcthR4KEIFJ65n8Mw5qdRn3LE=
|
||||
github.com/kr/pretty v0.3.1/go.mod h1:hoEshYVHaxMs3cyo3Yncou5ZscifuDolrwPKZanG3xk=
|
||||
github.com/kr/pty v1.1.1/go.mod h1:pFQYn66WHrOpPYNljwOMqo10TkYh1fy3cYio2l3bCsQ=
|
||||
github.com/kr/text v0.1.0/go.mod h1:4Jbv+DJW3UT/LiOwJeYQe1efqtUx/iVham/4vfdArNI=
|
||||
github.com/kr/text v0.2.0 h1:5Nx0Ya0ZqY2ygV366QzturHI13Jq95ApcVaJBhpS+AY=
|
||||
@@ -50,15 +51,21 @@ github.com/mattn/go-runewidth v0.0.15 h1:UNAjwbU9l54TA3KzvqLGxwWjHmMgBUVhBiTjelZ
|
||||
github.com/mattn/go-runewidth v0.0.15/go.mod h1:Jdepj2loyihRzMpdS35Xk/zdY8IAYHsh153qUoGf23w=
|
||||
github.com/mitchellh/colorstring v0.0.0-20190213212951-d06e56a500db h1:62I3jR2EmQ4l5rM/4FEfDWcRD+abF5XlKShorW5LRoQ=
|
||||
github.com/mitchellh/colorstring v0.0.0-20190213212951-d06e56a500db/go.mod h1:l0dey0ia/Uv7NcFFVbCLtqEBQbrT4OCwCSKTEv6enCw=
|
||||
github.com/mschoch/smat v0.2.0 h1:8imxQsjDm8yFEAVBe7azKmKSgzSkZXDuKkSq9374khM=
|
||||
github.com/mschoch/smat v0.2.0/go.mod h1:kc9mz7DoBKqDyiRL7VZN8KvXQMWeTaVnttLRXOlotKw=
|
||||
github.com/pbnjay/memory v0.0.0-20210728143218-7b4eea64cf58 h1:onHthvaw9LFnH4t2DcNVpwGmV9E1BkGknEliJkfwQj0=
|
||||
github.com/pbnjay/memory v0.0.0-20210728143218-7b4eea64cf58/go.mod h1:DXv8WO4yhMYhSNPKjeNKa5WY9YCIEBRbNzFFPJbWO6Y=
|
||||
github.com/pelletier/go-toml/v2 v2.2.4 h1:mye9XuhQ6gvn5h28+VilKrrPoQVanw5PMw/TB0t5Ec4=
|
||||
github.com/pelletier/go-toml/v2 v2.2.4/go.mod h1:2gIqNv+qfxSVS7cM2xJQKtLSTLUE9V8t9Stt+h56mCY=
|
||||
github.com/pkg/diff v0.0.0-20210226163009-20ebb0f2a09e/go.mod h1:pJLUxLENpZxwdsKMEsNbx1VGcRFpLqf3715MtcvvzbA=
|
||||
github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM=
|
||||
github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4=
|
||||
github.com/rivo/uniseg v0.2.0/go.mod h1:J6wj4VEh+S6ZtnVlnTBMWIodfgj8LQOQFoIToxlJtxc=
|
||||
github.com/rivo/uniseg v0.4.4 h1:8TfxU8dW6PdqD27gjM8MVNuicgxIjxpm4K7x4jp8sis=
|
||||
github.com/rivo/uniseg v0.4.4/go.mod h1:FN3SvrM+Zdj16jyLfmOkMNblXMcoc8DfTHruCPUcx88=
|
||||
github.com/rogpeppe/go-internal v1.6.1 h1:/FiVV8dS/e+YqF2JvO3yXRFbBLTIuSDkuC7aBOAvL+k=
|
||||
github.com/rogpeppe/go-internal v1.6.1/go.mod h1:xXDCJY+GAPziupqXw64V24skbSoqbTEfhy4qGm1nDQc=
|
||||
github.com/rogpeppe/go-internal v1.9.0/go.mod h1:WtVeX8xhTBvf0smdhujwtBcq4Qrzq/fJaraNFVN+nFs=
|
||||
github.com/rogpeppe/go-internal v1.12.0 h1:exVL4IDcn6na9z1rAb56Vxr+CgyK3nn3O+epU5NdKM8=
|
||||
github.com/rogpeppe/go-internal v1.12.0/go.mod h1:E+RYuTGaKKdloAfM02xzb0FW3Paa99yedzYV+kq4uf4=
|
||||
github.com/rrethy/ahocorasick v1.0.0 h1:YKkCB+E5PXc0xmLfMrWbfNht8vG9Re97IHSWZk/Lk8E=
|
||||
github.com/rrethy/ahocorasick v1.0.0/go.mod h1:nq8oScE7Vy1rOppoQxpQiiDmPHuKCuk9rXrNcxUV3R0=
|
||||
github.com/schollz/progressbar/v3 v3.13.1 h1:o8rySDYiQ59Mwzy2FELeHY5ZARXZTVJC7iHD6PEFUiE=
|
||||
@@ -79,25 +86,23 @@ github.com/ulikunitz/xz v0.5.11 h1:kpFauv27b6ynzBNT/Xy+1k+fK4WswhN/6PN5WhFAGw8=
|
||||
github.com/ulikunitz/xz v0.5.11/go.mod h1:nbz6k7qbPmH4IRqmfOplQw/tblSgqTqBwxkY0oWt/14=
|
||||
github.com/yuin/gopher-lua v1.1.1 h1:kYKnWBjvbNP4XLT3+bPEwAXJx262OhaHDWDVOPjL46M=
|
||||
github.com/yuin/gopher-lua v1.1.1/go.mod h1:GBR0iDaNXjAgGg9zfCvksxSRnQx76gclCIb7kdAd1Pw=
|
||||
golang.org/x/exp v0.0.0-20231006140011-7918f672742d h1:jtJma62tbqLibJ5sFQz8bKtEM8rJBtfilJ2qTU199MI=
|
||||
golang.org/x/exp v0.0.0-20231006140011-7918f672742d/go.mod h1:ldy0pHrwJyGW56pPQzzkH36rKxoZW1tw7ZJpeKx+hdo=
|
||||
golang.org/x/net v0.17.0 h1:pVaXccu2ozPjCXewfr1S7xza/zcXTity9cCdXQYSjIM=
|
||||
golang.org/x/net v0.17.0/go.mod h1:NxSsAGuq816PNPmqtQdLE42eU2Fs7NoRIZrHJAlaCOE=
|
||||
golang.org/x/exp v0.0.0-20231110203233-9a3e6036ecaa h1:FRnLl4eNAQl8hwxVVC17teOw8kdjVDVAiFMtgUdTSRQ=
|
||||
golang.org/x/exp v0.0.0-20231110203233-9a3e6036ecaa/go.mod h1:zk2irFbV9DP96SEBUUAy67IdHUaZuSnrz1n472HUCLE=
|
||||
golang.org/x/net v0.35.0 h1:T5GQRQb2y08kTAByq9L4/bz8cipCdA8FbRTXewonqY8=
|
||||
golang.org/x/net v0.35.0/go.mod h1:EglIi67kWsHKlRzzVMUD93VMSWGFOMSZgxFjparz1Qk=
|
||||
golang.org/x/sys v0.0.0-20220715151400-c0bba94af5f8/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
|
||||
golang.org/x/sys v0.0.0-20220811171246-fbc7d0a398ab/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
|
||||
golang.org/x/sys v0.6.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
|
||||
golang.org/x/sys v0.17.0 h1:25cE3gD+tdBA7lp7QfhuV+rJiE9YXTcS3VG1SqssI/Y=
|
||||
golang.org/x/sys v0.17.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA=
|
||||
golang.org/x/sys v0.30.0 h1:QjkSwP/36a20jFYWkSue1YwXzLmsV5Gfq7Eiy72C1uc=
|
||||
golang.org/x/sys v0.30.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA=
|
||||
golang.org/x/term v0.6.0/go.mod h1:m6U89DPEgQRMq3DNkDClhWw02AUbt2daBVO4cn4Hv9U=
|
||||
golang.org/x/term v0.13.0 h1:bb+I9cTfFazGW51MZqBVmZy7+JEJMouUHTUSKVQLBek=
|
||||
golang.org/x/term v0.13.0/go.mod h1:LTmsnFJwVN6bCy1rVCoS+qHT1HhALEFxKncY3WNNh4U=
|
||||
golang.org/x/term v0.29.0 h1:L6pJp37ocefwRRtYPKSWOWzOtWSxVajvz2ldH/xi3iU=
|
||||
golang.org/x/term v0.29.0/go.mod h1:6bl4lRlvVuDgSf3179VpIxBF0o10JUpXWOnI7nErv7s=
|
||||
gonum.org/v1/gonum v0.14.0 h1:2NiG67LD1tEH0D7kM+ps2V+fXmsAnpUeec7n8tcr4S0=
|
||||
gonum.org/v1/gonum v0.14.0/go.mod h1:AoWeoz0becf9QMWtE8iWXNXc27fK4fNeHNf/oMejGfU=
|
||||
gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
|
||||
gopkg.in/check.v1 v1.0.0-20180628173108-788fd7840127/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
|
||||
gopkg.in/check.v1 v1.0.0-20201130134442-10cb98267c6c h1:Hei/4ADfdWqJk1ZMxUNpqntNwaWcugrBjAiHlqqRiVk=
|
||||
gopkg.in/check.v1 v1.0.0-20201130134442-10cb98267c6c/go.mod h1:JHkPIbrfpd72SG/EVd6muEfDQjcINNoR0C8j2r3qZ4Q=
|
||||
gopkg.in/errgo.v2 v2.1.0/go.mod h1:hNsd1EY+bozCKY1Ytp96fpM3vjJbqLJn88ws8XvfDNI=
|
||||
gopkg.in/yaml.v3 v3.0.0-20200313102051-9f266ea9e77c/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM=
|
||||
gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA=
|
||||
gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM=
|
||||
|
||||
11
go.work.sum
11
go.work.sum
@@ -2,7 +2,6 @@ git.sr.ht/~sbinet/gg v0.3.1 h1:LNhjNn8DerC8f9DHLz6lS0YYul/b602DUxDgGkd/Aik=
|
||||
git.sr.ht/~sbinet/gg v0.3.1/go.mod h1:KGYtlADtqsqANL9ueOFkWymvzUvLMQllU5Ixo+8v3pc=
|
||||
github.com/ajstarks/svgo v0.0.0-20211024235047-1546f124cd8b h1:slYM766cy2nI3BwyRiyQj/Ud48djTMtMebDqepE95rw=
|
||||
github.com/ajstarks/svgo v0.0.0-20211024235047-1546f124cd8b/go.mod h1:1KcenG0jGWcpt8ov532z81sp/kMMUG485J2InIOyADM=
|
||||
github.com/buger/jsonparser v1.1.1/go.mod h1:6RYKKt7H4d4+iWqouImQ9R2FZql3VbhNgx27UK13J/0=
|
||||
github.com/chzyer/logex v1.1.10 h1:Swpa1K6QvQznwJRcfTfQJmTE72DqScAa40E+fbHEXEE=
|
||||
github.com/chzyer/logex v1.1.10/go.mod h1:+Ywpsq7O8HXn0nuIou7OrIPyXbp3wmkHB+jjWRnGsAI=
|
||||
github.com/chzyer/logex v1.2.0 h1:+eqR0HfOetur4tgnC8ftU5imRnhi4te+BadWS95c5AM=
|
||||
@@ -20,15 +19,20 @@ github.com/go-latex/latex v0.0.0-20230307184459-12ec69307ad9 h1:NxXI5pTAtpEaU49b
|
||||
github.com/go-latex/latex v0.0.0-20230307184459-12ec69307ad9/go.mod h1:gWuR/CrFDDeVRFQwHPvsv9soJVB/iqymhuZQuJ3a9OM=
|
||||
github.com/go-pdf/fpdf v0.6.0 h1:MlgtGIfsdMEEQJr2le6b/HNr1ZlQwxyWr77r2aj2U/8=
|
||||
github.com/go-pdf/fpdf v0.6.0/go.mod h1:HzcnA+A23uwogo0tp9yU+l3V+KXhiESpt1PMayhOh5M=
|
||||
github.com/go-quicktest/qt v1.101.0/go.mod h1:14Bz/f7NwaXPtdYEgzsx46kqSxVwTbzVZsDC26tQJow=
|
||||
github.com/goccmack/gocc v0.0.0-20230228185258-2292f9e40198 h1:FSii2UQeSLngl3jFoR4tUKZLprO7qUlh/TKKticc0BM=
|
||||
github.com/goccmack/gocc v0.0.0-20230228185258-2292f9e40198/go.mod h1:DTh/Y2+NbnOVVoypCCQrovMPDKUGp4yZpSbWg5D0XIM=
|
||||
github.com/golang/freetype v0.0.0-20170609003504-e2365dfdc4a0 h1:DACJavvAHhabrF08vX0COfcOBJRhZ8lUbR+ZWIs0Y5g=
|
||||
github.com/golang/freetype v0.0.0-20170609003504-e2365dfdc4a0/go.mod h1:E/TSTwGwJL78qG/PmXZO1EjYhfJinVAhrmmHX6Z8B9k=
|
||||
github.com/google/go-cmdtest v0.4.1-0.20220921163831-55ab3332a786/go.mod h1:apVn/GCasLZUVpAJ6oWAuyP7Ne7CEsQbTnc0plM3m+o=
|
||||
github.com/google/go-cmp v0.5.8 h1:e6P7q2lk1O+qJJb4BtCQXlK8vWEO8V1ZeuEdJNOqZyg=
|
||||
github.com/google/go-cmp v0.5.8/go.mod h1:17dUlkBOakJ0+DkrSSNjCkIjxS6bF9zb3elmeNGIjoY=
|
||||
github.com/google/renameio v0.1.0/go.mod h1:KWCgfxg9yswjAJkECMjeO8J8rahYeXnNhOm40UhjYkI=
|
||||
github.com/google/safehtml v0.1.0/go.mod h1:L4KWwDsUJdECRAEpZoBn3O64bQaywRscowZjJAzjHnU=
|
||||
github.com/hashicorp/errwrap v1.0.0/go.mod h1:YH+1FKiLXxHSkmPseP+kNlulaMuP3n2brvKWEqk/Jc4=
|
||||
github.com/hashicorp/go-multierror v1.1.1/go.mod h1:iw975J/qwKPdAO1clOe2L8331t/9/fmwbPZ6JB6eMoM=
|
||||
github.com/ianlancetaylor/demangle v0.0.0-20220319035150-800ac71e25c2 h1:rcanfLhLDA8nozr/K289V1zcntHr3V+SHlXwzz1ZI2g=
|
||||
github.com/jba/templatecheck v0.7.1/go.mod h1:n1Etw+Rrw1mDDD8dDRsEKTwMZsJ98EkktgNJC6wLUGo=
|
||||
github.com/k0kubun/go-ansi v0.0.0-20180517002512-3bf9e2903213 h1:qGQQKEcAR99REcMpsXCp3lJ03zYT1PkRd3kQGPn9GVg=
|
||||
github.com/klauspost/cpuid v1.2.0 h1:NMpwD2G9JSFOE1/TJjGSo5zG7Yb2bTe7eq1jH+irmeE=
|
||||
github.com/kr/pty v1.1.1 h1:VkoXIwSboBpnk99O/KFauAEILuNHv5DVFKZMBN/gUgw=
|
||||
@@ -39,17 +43,22 @@ github.com/stretchr/objx v0.1.0 h1:4G4v2dO3VZwixGIRoQ5Lfboy6nUhCyYzaqnIAPPhYs4=
|
||||
github.com/stretchr/objx v0.5.0 h1:1zr/of2m5FGMsad5YfcqgdqdWrIhu+EBEJRhR1U7z/c=
|
||||
github.com/stretchr/objx v0.5.0/go.mod h1:Yh+to48EsGEfYuaHDzXPcE3xhTkx73EhmCGUpEOglKo=
|
||||
github.com/yuin/goldmark v1.4.13 h1:fVcFKWvrslecOb/tg+Cc05dkeYx540o0FuFt3nUVDoE=
|
||||
github.com/yuin/goldmark v1.4.13/go.mod h1:6yULJ656Px+3vBD8DxQVa3kxgyrAnzto9xy5taEt/CY=
|
||||
golang.org/x/crypto v0.14.0 h1:wBqGXzWJW6m1XrIKlAH0Hs1JJ7+9KBwnIO8v66Q9cHc=
|
||||
golang.org/x/crypto v0.14.0/go.mod h1:MVFd36DqK4CsrnJYDkBA3VC4m2GkXAM0PvzMCn4JQf4=
|
||||
golang.org/x/crypto v0.33.0/go.mod h1:bVdXmD7IV/4GdElGPozy6U7lWdRXA4qyRVGJV57uQ5M=
|
||||
golang.org/x/image v0.6.0 h1:bR8b5okrPI3g/gyZakLZHeWxAR8Dn5CyxXv1hLH5g/4=
|
||||
golang.org/x/image v0.6.0/go.mod h1:MXLdDR43H7cDJq5GEGXEVeeNhPgi+YYEQ2pC1byI1x0=
|
||||
golang.org/x/mod v0.13.0 h1:I/DsJXRlw/8l/0c24sM9yb0T4z9liZTduXvdAWYiysY=
|
||||
golang.org/x/mod v0.13.0/go.mod h1:hTbmBsO62+eylJbnUtE2MGJUyE7QWk4xUqPFrRgJ+7c=
|
||||
golang.org/x/mod v0.14.0/go.mod h1:hTbmBsO62+eylJbnUtE2MGJUyE7QWk4xUqPFrRgJ+7c=
|
||||
golang.org/x/sync v0.0.0-20220722155255-886fb9371eb4 h1:uVc8UZUe6tr40fFVnUP5Oj+veunVezqYl9z7DYw9xzw=
|
||||
golang.org/x/text v0.13.0 h1:ablQoSUd0tRdKxZewP80B+BaqeKJuVhuRxj/dkrun3k=
|
||||
golang.org/x/text v0.13.0/go.mod h1:TvPlkZtksWOMsz7fbANvkp4WM8x/WCo/om8BMLbz+aE=
|
||||
golang.org/x/text v0.22.0/go.mod h1:YRoo4H8PVmsu+E3Ou7cqLVH8oXWIHVoX0jqUWALQhfY=
|
||||
golang.org/x/tools v0.14.0 h1:jvNa2pY0M4r62jkRQ6RwEZZyPcymeL9XZMLBbV7U2nc=
|
||||
golang.org/x/tools v0.14.0/go.mod h1:uYBEerGOWcJyEORxN+Ek8+TT266gXkNlHdJBwexUsBg=
|
||||
golang.org/x/tools v0.15.0/go.mod h1:hpksKq4dtpQWS1uQ61JkdqWM3LscIS6Slf+VVkm+wQk=
|
||||
golang.org/x/xerrors v0.0.0-20190717185122-a985d3407aa7 h1:9zdDQZ7Thm29KFXgAX/+yaf3eVbP7djjWp/dXAppNCc=
|
||||
gonum.org/v1/plot v0.10.1 h1:dnifSs43YJuNMDzB7v8wV64O4ABBHReuAVAoBxqBqS4=
|
||||
gonum.org/v1/plot v0.10.1/go.mod h1:VZW5OlhkL1mysU9vaqNHnsy86inf6Ot+jB3r+BczCEo=
|
||||
|
||||
@@ -1,27 +1,56 @@
|
||||
#!/bin/bash
|
||||
|
||||
INSTALL_DIR="/usr/local"
|
||||
OBITOOLS_PREFIX=""
|
||||
# default values
|
||||
# Default values
|
||||
URL="https://go.dev/dl/"
|
||||
OBIURL4="https://github.com/metabarcoding/obitools4/archive/refs/heads/master.zip"
|
||||
GITHUB_REPO="https://github.com/metabarcoding/obitools4"
|
||||
INSTALL_DIR="/usr/local"
|
||||
OBITOOLS_PREFIX=""
|
||||
VERSION=""
|
||||
LIST_VERSIONS=false
|
||||
|
||||
# help message
|
||||
# Help message
|
||||
function display_help {
|
||||
echo "Usage: $0 [OPTIONS]"
|
||||
echo ""
|
||||
echo "Options:"
|
||||
echo " -i, --install-dir Directory where obitools are installed "
|
||||
echo " (as example use /usr/local not /usr/local/bin)."
|
||||
echo " (e.g., use /usr/local not /usr/local/bin)."
|
||||
echo " -p, --obitools-prefix Prefix added to the obitools command names if you"
|
||||
echo " want to have several versions of obitools at the"
|
||||
echo " same time on your system (as example -p g will produce "
|
||||
echo " same time on your system (e.g., -p g will produce "
|
||||
echo " gobigrep command instead of obigrep)."
|
||||
echo " -v, --version Install a specific version (e.g., 4.4.8)."
|
||||
echo " If not specified, installs the latest version."
|
||||
echo " -l, --list List all available versions and exit."
|
||||
echo " -h, --help Display this help message."
|
||||
echo ""
|
||||
echo "Examples:"
|
||||
echo " $0 # Install latest version"
|
||||
echo " $0 -l # List available versions"
|
||||
echo " $0 -v 4.4.8 # Install specific version"
|
||||
echo " $0 -i /opt/local # Install to custom directory"
|
||||
}
|
||||
|
||||
# List available versions from GitHub releases
|
||||
function list_versions {
|
||||
echo "Fetching available versions..." 1>&2
|
||||
echo ""
|
||||
curl -s "https://api.github.com/repos/metabarcoding/obitools4/releases" \
|
||||
| grep '"tag_name":' \
|
||||
| sed -E 's/.*"tag_name": "Release_([0-9.]+)".*/\1/' \
|
||||
| sort -V -r
|
||||
}
|
||||
|
||||
# Get latest version from GitHub releases
|
||||
function get_latest_version {
|
||||
curl -s "https://api.github.com/repos/metabarcoding/obitools4/releases" \
|
||||
| grep '"tag_name":' \
|
||||
| sed -E 's/.*"tag_name": "Release_([0-9.]+)".*/\1/' \
|
||||
| sort -V -r \
|
||||
| head -1
|
||||
}
|
||||
|
||||
# Parse command line arguments
|
||||
while [ "$#" -gt 0 ]; do
|
||||
case "$1" in
|
||||
-i|--install-dir)
|
||||
@@ -32,33 +61,67 @@ while [ "$#" -gt 0 ]; do
|
||||
OBITOOLS_PREFIX="$2"
|
||||
shift 2
|
||||
;;
|
||||
-v|--version)
|
||||
VERSION="$2"
|
||||
shift 2
|
||||
;;
|
||||
-l|--list)
|
||||
LIST_VERSIONS=true
|
||||
shift
|
||||
;;
|
||||
-h|--help)
|
||||
display_help 1>&2
|
||||
display_help
|
||||
exit 0
|
||||
;;
|
||||
*)
|
||||
echo "Error: Unsupported option $1" 1>&2
|
||||
display_help 1>&2
|
||||
exit 1
|
||||
;;
|
||||
esac
|
||||
done
|
||||
|
||||
# the directory from where the script is run
|
||||
# List versions and exit if requested
|
||||
if [ "$LIST_VERSIONS" = true ]; then
|
||||
echo "Available OBITools4 versions:"
|
||||
echo "=============================="
|
||||
list_versions
|
||||
exit 0
|
||||
fi
|
||||
|
||||
# Determine version to install
|
||||
if [ -z "$VERSION" ]; then
|
||||
echo "Fetching latest version..." 1>&2
|
||||
VERSION=$(get_latest_version)
|
||||
if [ -z "$VERSION" ]; then
|
||||
echo "Error: Could not determine latest version" 1>&2
|
||||
exit 1
|
||||
fi
|
||||
echo "Latest version: $VERSION" 1>&2
|
||||
else
|
||||
echo "Installing version: $VERSION" 1>&2
|
||||
fi
|
||||
|
||||
# Construct source URL for the specified version
|
||||
OBIURL4="${GITHUB_REPO}/archive/refs/tags/Release_${VERSION}.zip"
|
||||
|
||||
# The directory from where the script is run
|
||||
DIR="$(pwd)"
|
||||
|
||||
# the temp directory used, within $DIR
|
||||
# omit the -p parameter to create a temporal directory in the default location
|
||||
# WORK_DIR=$(mktemp -d -p "$DIR" "obitools4.XXXXXX" 2> /dev/null || \
|
||||
# mktemp -d -t "$DIR" "obitools4.XXXXXX")
|
||||
|
||||
# Create temporary directory
|
||||
WORK_DIR=$(mktemp -d "obitools4.XXXXXX")
|
||||
|
||||
# check if tmp dir was created
|
||||
# Check if tmp dir was created
|
||||
if [[ ! "$WORK_DIR" || ! -d "$WORK_DIR" ]]; then
|
||||
echo "Could not create temp dir" 1>&2
|
||||
exit 1
|
||||
fi
|
||||
|
||||
mkdir -p "${WORK_DIR}/cache" \
|
||||
|| (echo "Cannot create ${WORK_DIR}/cache directory" 1>&2
|
||||
exit 1)
|
||||
|
||||
# Create installation directory
|
||||
mkdir -p "${INSTALL_DIR}/bin" 2> /dev/null \
|
||||
|| (echo "Please enter your password for installing obitools in ${INSTALL_DIR}" 1>&2
|
||||
sudo mkdir -p "${INSTALL_DIR}/bin")
|
||||
@@ -68,14 +131,20 @@ if [[ ! -d "${INSTALL_DIR}/bin" ]]; then
|
||||
exit 1
|
||||
fi
|
||||
|
||||
INSTALL_DIR="$(cd $INSTALL_DIR && pwd)"
|
||||
INSTALL_DIR="$(cd ${INSTALL_DIR} && pwd)"
|
||||
|
||||
echo WORK_DIR=$WORK_DIR 1>&2
|
||||
echo INSTALL_DIR=$INSTALL_DIR 1>&2
|
||||
echo OBITOOLS_PREFIX=$OBITOOLS_PREFIX 1>&2
|
||||
echo "================================" 1>&2
|
||||
echo "OBITools4 Installation" 1>&2
|
||||
echo "================================" 1>&2
|
||||
echo "VERSION=$VERSION" 1>&2
|
||||
echo "WORK_DIR=$WORK_DIR" 1>&2
|
||||
echo "INSTALL_DIR=$INSTALL_DIR" 1>&2
|
||||
echo "OBITOOLS_PREFIX=$OBITOOLS_PREFIX" 1>&2
|
||||
echo "================================" 1>&2
|
||||
|
||||
pushd "$WORK_DIR"|| exit
|
||||
pushd "$WORK_DIR" > /dev/null || exit
|
||||
|
||||
# Detect OS and architecture
|
||||
OS=$(uname -a | awk '{print $1}')
|
||||
ARCH=$(uname -m)
|
||||
|
||||
@@ -87,7 +156,9 @@ if [[ "$ARCH" == "aarch64" ]] ; then
|
||||
ARCH="arm64"
|
||||
fi
|
||||
|
||||
GOFILE=$(curl "$URL" \
|
||||
# Download and install Go
|
||||
echo "Downloading Go..." 1>&2
|
||||
GOFILE=$(curl -s "$URL" \
|
||||
| grep 'class="download"' \
|
||||
| grep "\.tar\.gz" \
|
||||
| sed -E 's@^.*/dl/(go[1-9].+\.tar\.gz)".*$@\1@' \
|
||||
@@ -95,35 +166,71 @@ GOFILE=$(curl "$URL" \
|
||||
| grep -i "$ARCH" \
|
||||
| head -1)
|
||||
|
||||
GOURL=$(curl "${URL}${GOFILE}" \
|
||||
GOURL=$(curl -s "${URL}${GOFILE}" \
|
||||
| sed -E 's@^.*href="(.*\.tar\.gz)".*$@\1@')
|
||||
|
||||
echo "Install GO from : $GOURL" 1>&2
|
||||
echo "Installing Go from: $GOURL" 1>&2
|
||||
|
||||
curl "$GOURL" \
|
||||
| tar zxf -
|
||||
curl -s "$GOURL" | tar zxf -
|
||||
|
||||
PATH="$(pwd)/go/bin:$PATH"
|
||||
export PATH
|
||||
GOPATH="$(pwd)/go"
|
||||
export GOPATH
|
||||
export GOCACHE="$(pwd)/cache"
|
||||
|
||||
curl -L "$OBIURL4" > master.zip
|
||||
unzip master.zip
|
||||
echo "GOCACHE=$GOCACHE" 1>&2
|
||||
mkdir -p "$GOCACHE"
|
||||
|
||||
echo "Install OBITOOLS from : $OBIURL4"
|
||||
# Download OBITools4 source
|
||||
echo "Downloading OBITools4 v${VERSION}..." 1>&2
|
||||
echo "Source URL: $OBIURL4" 1>&2
|
||||
|
||||
cd obitools4-master || exit
|
||||
|
||||
if [[ -z "$OBITOOLS_PREFIX" ]] ; then
|
||||
make
|
||||
else
|
||||
make OBITOOLS_PREFIX="${OBITOOLS_PREFIX}"
|
||||
if ! curl -sL "$OBIURL4" > obitools4.zip; then
|
||||
echo "Error: Could not download OBITools4 version ${VERSION}" 1>&2
|
||||
echo "Please check that this version exists with: $0 --list" 1>&2
|
||||
exit 1
|
||||
fi
|
||||
|
||||
unzip -q obitools4.zip
|
||||
|
||||
# Find the extracted directory
|
||||
OBITOOLS_DIR=$(ls -d obitools4-* 2>/dev/null | head -1)
|
||||
|
||||
if [ -z "$OBITOOLS_DIR" ] || [ ! -d "$OBITOOLS_DIR" ]; then
|
||||
echo "Error: Could not find extracted OBITools4 directory" 1>&2
|
||||
exit 1
|
||||
fi
|
||||
|
||||
echo "Building OBITools4..." 1>&2
|
||||
cd "$OBITOOLS_DIR" || exit
|
||||
mkdir -p vendor
|
||||
|
||||
# Build with or without prefix
|
||||
if [[ -z "$OBITOOLS_PREFIX" ]] ; then
|
||||
make GOFLAGS="-buildvcs=false"
|
||||
else
|
||||
make GOFLAGS="-buildvcs=false" OBITOOLS_PREFIX="${OBITOOLS_PREFIX}"
|
||||
fi
|
||||
|
||||
# Install binaries
|
||||
echo "Installing binaries to ${INSTALL_DIR}/bin..." 1>&2
|
||||
(cp build/* "${INSTALL_DIR}/bin" 2> /dev/null) \
|
||||
|| (echo "Please enter your password for installing obitools in ${INSTALL_DIR}"
|
||||
|| (echo "Please enter your password for installing obitools in ${INSTALL_DIR}" 1>&2
|
||||
sudo cp build/* "${INSTALL_DIR}/bin")
|
||||
|
||||
popd || exit
|
||||
popd > /dev/null || exit
|
||||
|
||||
# Cleanup
|
||||
echo "Cleaning up..." 1>&2
|
||||
chmod -R +w "$WORK_DIR"
|
||||
rm -rf "$WORK_DIR"
|
||||
|
||||
echo "" 1>&2
|
||||
echo "================================" 1>&2
|
||||
echo "OBITools4 v${VERSION} installed successfully!" 1>&2
|
||||
echo "Binaries location: ${INSTALL_DIR}/bin" 1>&2
|
||||
if [[ -n "$OBITOOLS_PREFIX" ]] ; then
|
||||
echo "Command prefix: ${OBITOOLS_PREFIX}" 1>&2
|
||||
fi
|
||||
echo "================================" 1>&2
|
||||
|
||||
292
kmer_roaring_index/FREQUENCY_FILTER_FINAL.md
Normal file
292
kmer_roaring_index/FREQUENCY_FILTER_FINAL.md
Normal file
@@ -0,0 +1,292 @@
|
||||
# Filtre de Fréquence avec v Niveaux de Roaring Bitmaps
|
||||
|
||||
## Algorithme
|
||||
|
||||
```go
|
||||
Pour chaque k-mer rencontré dans les données:
|
||||
c = 0
|
||||
tant que (k-mer ∈ index[c] ET c < v):
|
||||
c++
|
||||
|
||||
si c < v:
|
||||
index[c].insert(k-mer)
|
||||
```
|
||||
|
||||
**Résultat** : `index[v-1]` contient les k-mers vus **≥ v fois**
|
||||
|
||||
---
|
||||
|
||||
## Exemple d'exécution (v=3)
|
||||
|
||||
```
|
||||
Données:
|
||||
Read1: kmer X
|
||||
Read2: kmer X
|
||||
Read3: kmer X (X vu 3 fois)
|
||||
Read4: kmer Y
|
||||
Read5: kmer Y (Y vu 2 fois)
|
||||
Read6: kmer Z (Z vu 1 fois)
|
||||
|
||||
Exécution:
|
||||
|
||||
Read1 (X):
|
||||
c=0: X ∉ index[0] → index[0].add(X)
|
||||
État: index[0]={X}, index[1]={}, index[2]={}
|
||||
|
||||
Read2 (X):
|
||||
c=0: X ∈ index[0] → c=1
|
||||
c=1: X ∉ index[1] → index[1].add(X)
|
||||
État: index[0]={X}, index[1]={X}, index[2]={}
|
||||
|
||||
Read3 (X):
|
||||
c=0: X ∈ index[0] → c=1
|
||||
c=1: X ∈ index[1] → c=2
|
||||
c=2: X ∉ index[2] → index[2].add(X)
|
||||
État: index[0]={X}, index[1]={X}, index[2]={X}
|
||||
|
||||
Read4 (Y):
|
||||
c=0: Y ∉ index[0] → index[0].add(Y)
|
||||
État: index[0]={X,Y}, index[1]={X}, index[2]={X}
|
||||
|
||||
Read5 (Y):
|
||||
c=0: Y ∈ index[0] → c=1
|
||||
c=1: Y ∉ index[1] → index[1].add(Y)
|
||||
État: index[0]={X,Y}, index[1]={X,Y}, index[2]={X}
|
||||
|
||||
Read6 (Z):
|
||||
c=0: Z ∉ index[0] → index[0].add(Z)
|
||||
État: index[0]={X,Y,Z}, index[1]={X,Y}, index[2]={X}
|
||||
|
||||
Résultat final:
|
||||
index[0] (freq≥1): {X, Y, Z}
|
||||
index[1] (freq≥2): {X, Y}
|
||||
index[2] (freq≥3): {X} ← K-mers filtrés ✓
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Utilisation
|
||||
|
||||
```go
|
||||
// Créer le filtre
|
||||
filter := obikmer.NewFrequencyFilter(31, 3) // k=31, minFreq=3
|
||||
|
||||
// Ajouter les séquences
|
||||
for _, read := range reads {
|
||||
filter.AddSequence(read)
|
||||
}
|
||||
|
||||
// Récupérer les k-mers filtrés (freq ≥ 3)
|
||||
filtered := filter.GetFilteredSet("filtered")
|
||||
fmt.Printf("K-mers de qualité: %d\n", filtered.Cardinality())
|
||||
|
||||
// Statistiques
|
||||
stats := filter.Stats()
|
||||
fmt.Println(stats.String())
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Performance
|
||||
|
||||
### Complexité
|
||||
|
||||
**Par k-mer** :
|
||||
- Lookups : Moyenne ~v/2, pire cas v
|
||||
- Insertions : 1 Add
|
||||
- **Pas de Remove** ✅
|
||||
|
||||
**Total pour n k-mers** :
|
||||
- Temps : O(n × v/2)
|
||||
- Mémoire : O(unique_kmers × v × 2 bytes)
|
||||
|
||||
### Early exit pour distribution skewed
|
||||
|
||||
Avec distribution typique (séquençage) :
|
||||
```
|
||||
80% singletons → 1 lookup (early exit)
|
||||
15% freq 2-3 → 2-3 lookups
|
||||
5% freq ≥4 → jusqu'à v lookups
|
||||
|
||||
Moyenne réelle : ~2 lookups/kmer (au lieu de v/2)
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Mémoire
|
||||
|
||||
### Pour 10^8 k-mers uniques
|
||||
|
||||
| v (minFreq) | Nombre bitmaps | Mémoire | vs map simple |
|
||||
|-------------|----------------|---------|---------------|
|
||||
| v=2 | 2 | ~400 MB | 6x moins |
|
||||
| v=3 | 3 | ~600 MB | 4x moins |
|
||||
| v=5 | 5 | ~1 GB | 2.4x moins |
|
||||
| v=10 | 10 | ~2 GB | 1.2x moins |
|
||||
| v=20 | 20 | ~4 GB | ~égal |
|
||||
|
||||
**Note** : Avec distribution skewed (beaucoup de singletons), la mémoire réelle est bien plus faible car les niveaux hauts ont peu d'éléments.
|
||||
|
||||
### Exemple réaliste (séquençage)
|
||||
|
||||
Pour 10^8 k-mers totaux, v=3 :
|
||||
```
|
||||
Distribution:
|
||||
80% singletons → 80M dans index[0]
|
||||
15% freq 2-3 → 15M dans index[1]
|
||||
5% freq ≥3 → 5M dans index[2]
|
||||
|
||||
Mémoire:
|
||||
index[0]: 80M × 2 bytes = 160 MB
|
||||
index[1]: 15M × 2 bytes = 30 MB
|
||||
index[2]: 5M × 2 bytes = 10 MB
|
||||
Total: ~200 MB ✅
|
||||
|
||||
vs map simple: 80M × 24 bytes = ~2 GB
|
||||
Réduction: 10x
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Comparaison des approches
|
||||
|
||||
| Approche | Mémoire (10^8 kmers) | Passes | Lookups/kmer | Quand utiliser |
|
||||
|----------|----------------------|--------|--------------|----------------|
|
||||
| **v-Bitmaps** | **200-600 MB** | **1** | **~2 (avg)** | **Standard** ✅ |
|
||||
| Map simple | 2.4 GB | 1 | 1 | Si RAM illimitée |
|
||||
| Multi-pass | 400 MB | v | v | Si I/O pas cher |
|
||||
|
||||
---
|
||||
|
||||
## Avantages de v-Bitmaps
|
||||
|
||||
✅ **Une seule passe** sur les données
|
||||
✅ **Mémoire optimale** avec Roaring bitmaps
|
||||
✅ **Pas de Remove** (seulement Contains + Add)
|
||||
✅ **Early exit** efficace sur singletons
|
||||
✅ **Scalable** jusqu'à v~10-20
|
||||
✅ **Simple** à implémenter et comprendre
|
||||
|
||||
---
|
||||
|
||||
## Cas d'usage typiques
|
||||
|
||||
### 1. Éliminer erreurs de séquençage
|
||||
|
||||
```go
|
||||
filter := obikmer.NewFrequencyFilter(31, 3)
|
||||
|
||||
// Traiter FASTQ
|
||||
for read := range StreamFastq("sample.fastq") {
|
||||
filter.AddSequence(read)
|
||||
}
|
||||
|
||||
// K-mers de qualité (pas d'erreurs)
|
||||
cleaned := filter.GetFilteredSet("cleaned")
|
||||
```
|
||||
|
||||
**Résultat** : Élimine 70-80% des k-mers (erreurs)
|
||||
|
||||
### 2. Assemblage de génome
|
||||
|
||||
```go
|
||||
filter := obikmer.NewFrequencyFilter(31, 2)
|
||||
|
||||
// Filtrer avant l'assemblage
|
||||
for read := range reads {
|
||||
filter.AddSequence(read)
|
||||
}
|
||||
|
||||
solidKmers := filter.GetFilteredSet("solid")
|
||||
// Utiliser solidKmers pour le graphe de Bruijn
|
||||
```
|
||||
|
||||
### 3. Comparaison de génomes
|
||||
|
||||
```go
|
||||
collection := obikmer.NewKmerSetCollection(31)
|
||||
|
||||
for _, genome := range genomes {
|
||||
filter := obikmer.NewFrequencyFilter(31, 3)
|
||||
filter.AddSequences(genome.Reads)
|
||||
|
||||
cleaned := filter.GetFilteredSet(genome.ID)
|
||||
collection.Add(cleaned)
|
||||
}
|
||||
|
||||
// Analyses comparatives sur k-mers de qualité
|
||||
matrix := collection.ParallelPairwiseJaccard(8)
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Limites
|
||||
|
||||
**Pour v > 20** :
|
||||
- Trop de lookups (v lookups/kmer)
|
||||
- Mémoire importante (v × 200MB pour 10^8 kmers)
|
||||
|
||||
**Solutions alternatives pour v > 20** :
|
||||
- Utiliser map simple (9 bytes/kmer) si RAM disponible
|
||||
- Algorithme différent (sketch, probabiliste)
|
||||
|
||||
---
|
||||
|
||||
## Optimisations possibles
|
||||
|
||||
### 1. Parallélisation
|
||||
|
||||
```go
|
||||
// Traiter plusieurs fichiers en parallèle
|
||||
filters := make([]*FrequencyFilter, numFiles)
|
||||
|
||||
var wg sync.WaitGroup
|
||||
for i, file := range files {
|
||||
wg.Add(1)
|
||||
go func(idx int, f string) {
|
||||
defer wg.Done()
|
||||
filters[idx] = ProcessFile(f, k, minFreq)
|
||||
}(i, file)
|
||||
}
|
||||
wg.Wait()
|
||||
|
||||
// Merger les résultats
|
||||
merged := MergeFilters(filters)
|
||||
```
|
||||
|
||||
### 2. Streaming avec seuil adaptatif
|
||||
|
||||
```go
|
||||
// Commencer avec v=5, réduire progressivement
|
||||
filter := obikmer.NewFrequencyFilter(31, 5)
|
||||
|
||||
// ... traitement ...
|
||||
|
||||
// Si trop de mémoire, réduire à v=3
|
||||
if filter.MemoryUsage() > threshold {
|
||||
filter = ConvertToLowerThreshold(filter, 3)
|
||||
}
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Récapitulatif final
|
||||
|
||||
**Pour filtrer les k-mers par fréquence ≥ v :**
|
||||
|
||||
1. **Créer** : `filter := NewFrequencyFilter(k, v)`
|
||||
2. **Traiter** : `filter.AddSequence(read)` pour chaque read
|
||||
3. **Résultat** : `filtered := filter.GetFilteredSet(id)`
|
||||
|
||||
**Mémoire** : ~2v MB par million de k-mers uniques
|
||||
**Temps** : Une seule passe, ~2 lookups/kmer en moyenne
|
||||
**Optimal pour** : v ≤ 20, distribution skewed (séquençage)
|
||||
|
||||
---
|
||||
|
||||
## Code fourni
|
||||
|
||||
1. **frequency_filter.go** - Implémentation complète
|
||||
2. **examples_frequency_filter_final.go** - Exemples d'utilisation
|
||||
|
||||
**Tout est prêt à utiliser !** 🚀
|
||||
320
kmer_roaring_index/examples_frequency_filter_final.go
Normal file
320
kmer_roaring_index/examples_frequency_filter_final.go
Normal file
@@ -0,0 +1,320 @@
|
||||
package main
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"obikmer"
|
||||
)
|
||||
|
||||
func main() {
|
||||
// ==========================================
|
||||
// EXEMPLE 1 : Utilisation basique
|
||||
// ==========================================
|
||||
fmt.Println("=== EXEMPLE 1 : Utilisation basique ===\n")
|
||||
|
||||
k := 31
|
||||
minFreq := 3 // Garder les k-mers vus ≥3 fois
|
||||
|
||||
// Créer le filtre
|
||||
filter := obikmer.NewFrequencyFilter(k, minFreq)
|
||||
|
||||
// Simuler des séquences avec différentes fréquences
|
||||
sequences := [][]byte{
|
||||
[]byte("ACGTACGTACGTACGTACGTACGTACGTACG"), // Kmer X
|
||||
[]byte("ACGTACGTACGTACGTACGTACGTACGTACG"), // Kmer X (freq=2)
|
||||
[]byte("ACGTACGTACGTACGTACGTACGTACGTACG"), // Kmer X (freq=3) ✓
|
||||
[]byte("TTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTT"), // Kmer Y
|
||||
[]byte("TTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTT"), // Kmer Y (freq=2) ✗
|
||||
[]byte("GGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGG"), // Kmer Z (freq=1) ✗
|
||||
}
|
||||
|
||||
fmt.Printf("Traitement de %d séquences...\n", len(sequences))
|
||||
for _, seq := range sequences {
|
||||
filter.AddSequence(seq)
|
||||
}
|
||||
|
||||
// Récupérer les k-mers filtrés
|
||||
filtered := filter.GetFilteredSet("filtered")
|
||||
fmt.Printf("\nK-mers avec freq ≥ %d: %d\n", minFreq, filtered.Cardinality())
|
||||
|
||||
// Statistiques
|
||||
stats := filter.Stats()
|
||||
fmt.Println("\n" + stats.String())
|
||||
|
||||
// ==========================================
|
||||
// EXEMPLE 2 : Vérifier les niveaux
|
||||
// ==========================================
|
||||
fmt.Println("\n=== EXEMPLE 2 : Inspection des niveaux ===\n")
|
||||
|
||||
// Vérifier chaque niveau
|
||||
for level := 0; level < minFreq; level++ {
|
||||
levelSet := filter.GetKmersAtLevel(level)
|
||||
fmt.Printf("Niveau %d (freq≥%d): %d k-mers\n",
|
||||
level+1, level+1, levelSet.Cardinality())
|
||||
}
|
||||
|
||||
// ==========================================
|
||||
// EXEMPLE 3 : Données réalistes
|
||||
// ==========================================
|
||||
fmt.Println("\n=== EXEMPLE 3 : Simulation données séquençage ===\n")
|
||||
|
||||
filter2 := obikmer.NewFrequencyFilter(31, 3)
|
||||
|
||||
// Simuler un dataset réaliste :
|
||||
// - 1000 reads
|
||||
// - 80% contiennent des erreurs (singletons)
|
||||
// - 15% vrais k-mers à basse fréquence
|
||||
// - 5% vrais k-mers à haute fréquence
|
||||
|
||||
// Vraie séquence répétée
|
||||
trueSeq := []byte("ACGTACGTACGTACGTACGTACGTACGTACG")
|
||||
for i := 0; i < 50; i++ {
|
||||
filter2.AddSequence(trueSeq)
|
||||
}
|
||||
|
||||
// Séquence à fréquence moyenne
|
||||
mediumSeq := []byte("CCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC")
|
||||
for i := 0; i < 5; i++ {
|
||||
filter2.AddSequence(mediumSeq)
|
||||
}
|
||||
|
||||
// Erreurs de séquençage (singletons)
|
||||
for i := 0; i < 100; i++ {
|
||||
errorSeq := []byte(fmt.Sprintf("TTTTTTTTTTTTTTTTTTTTTTTTTTTT%03d", i))
|
||||
filter2.AddSequence(errorSeq)
|
||||
}
|
||||
|
||||
stats2 := filter2.Stats()
|
||||
fmt.Println(stats2.String())
|
||||
|
||||
fmt.Println("Distribution attendue:")
|
||||
fmt.Println(" - Beaucoup de singletons (erreurs)")
|
||||
fmt.Println(" - Peu de k-mers à haute fréquence (signal)")
|
||||
fmt.Println(" → Filtrage efficace !")
|
||||
|
||||
// ==========================================
|
||||
// EXEMPLE 4 : Tester différents seuils
|
||||
// ==========================================
|
||||
fmt.Println("\n=== EXEMPLE 4 : Comparaison de seuils ===\n")
|
||||
|
||||
testSeqs := [][]byte{
|
||||
[]byte("ACGTACGTACGTACGTACGTACGTACGTACG"),
|
||||
[]byte("ACGTACGTACGTACGTACGTACGTACGTACG"),
|
||||
[]byte("ACGTACGTACGTACGTACGTACGTACGTACG"),
|
||||
[]byte("ACGTACGTACGTACGTACGTACGTACGTACG"),
|
||||
[]byte("ACGTACGTACGTACGTACGTACGTACGTACG"), // freq=5
|
||||
[]byte("TTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTT"),
|
||||
[]byte("TTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTT"),
|
||||
[]byte("TTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTT"), // freq=3
|
||||
[]byte("GGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGG"), // freq=1
|
||||
}
|
||||
|
||||
for _, minFreq := range []int{2, 3, 5} {
|
||||
f := obikmer.NewFrequencyFilter(31, minFreq)
|
||||
f.AddSequences(testSeqs)
|
||||
|
||||
fmt.Printf("minFreq=%d: %d k-mers retenus (%.2f MB)\n",
|
||||
minFreq,
|
||||
f.Cardinality(),
|
||||
float64(f.MemoryUsage())/1024/1024)
|
||||
}
|
||||
|
||||
// ==========================================
|
||||
// EXEMPLE 5 : Comparaison mémoire
|
||||
// ==========================================
|
||||
fmt.Println("\n=== EXEMPLE 5 : Comparaison mémoire ===\n")
|
||||
|
||||
filter3 := obikmer.NewFrequencyFilter(31, 3)
|
||||
|
||||
// Simuler 10000 séquences
|
||||
for i := 0; i < 10000; i++ {
|
||||
seq := make([]byte, 100)
|
||||
for j := range seq {
|
||||
seq[j] = "ACGT"[(i+j)%4]
|
||||
}
|
||||
filter3.AddSequence(seq)
|
||||
}
|
||||
|
||||
fmt.Println(filter3.CompareWithSimpleMap())
|
||||
|
||||
// ==========================================
|
||||
// EXEMPLE 6 : Workflow complet
|
||||
// ==========================================
|
||||
fmt.Println("\n=== EXEMPLE 6 : Workflow complet ===\n")
|
||||
|
||||
fmt.Println("1. Créer le filtre")
|
||||
finalFilter := obikmer.NewFrequencyFilter(31, 3)
|
||||
|
||||
fmt.Println("2. Traiter les données (simulation)")
|
||||
// En pratique : lire depuis FASTQ
|
||||
// for read := range ReadFastq("data.fastq") {
|
||||
// finalFilter.AddSequence(read)
|
||||
// }
|
||||
|
||||
// Simulation
|
||||
for i := 0; i < 1000; i++ {
|
||||
seq := []byte("ACGTACGTACGTACGTACGTACGTACGTACG")
|
||||
finalFilter.AddSequence(seq)
|
||||
}
|
||||
|
||||
fmt.Println("3. Récupérer les k-mers filtrés")
|
||||
result := finalFilter.GetFilteredSet("final")
|
||||
|
||||
fmt.Println("4. Utiliser le résultat")
|
||||
fmt.Printf(" K-mers de qualité: %d\n", result.Cardinality())
|
||||
fmt.Printf(" Mémoire utilisée: %.2f MB\n", float64(finalFilter.MemoryUsage())/1024/1024)
|
||||
|
||||
fmt.Println("5. Sauvegarder (optionnel)")
|
||||
// result.Save("filtered_kmers.bin")
|
||||
|
||||
// ==========================================
|
||||
// EXEMPLE 7 : Vérification individuelle
|
||||
// ==========================================
|
||||
fmt.Println("\n=== EXEMPLE 7 : Vérification de k-mers spécifiques ===\n")
|
||||
|
||||
checkFilter := obikmer.NewFrequencyFilter(31, 3)
|
||||
|
||||
testSeq := []byte("ACGTACGTACGTACGTACGTACGTACGTACG")
|
||||
for i := 0; i < 5; i++ {
|
||||
checkFilter.AddSequence(testSeq)
|
||||
}
|
||||
|
||||
var kmers []uint64
|
||||
kmers = obikmer.EncodeKmers(testSeq, 31, &kmers)
|
||||
|
||||
if len(kmers) > 0 {
|
||||
testKmer := kmers[0]
|
||||
|
||||
fmt.Printf("K-mer test: 0x%016X\n", testKmer)
|
||||
fmt.Printf(" Présent dans filtre: %v\n", checkFilter.Contains(testKmer))
|
||||
fmt.Printf(" Fréquence approx: %d\n", checkFilter.GetFrequency(testKmer))
|
||||
}
|
||||
|
||||
// ==========================================
|
||||
// EXEMPLE 8 : Intégration avec collection
|
||||
// ==========================================
|
||||
fmt.Println("\n=== EXEMPLE 8 : Intégration avec KmerSetCollection ===\n")
|
||||
|
||||
// Créer une collection de génomes filtrés
|
||||
collection := obikmer.NewKmerSetCollection(31)
|
||||
|
||||
genomes := map[string][][]byte{
|
||||
"Genome1": {
|
||||
[]byte("ACGTACGTACGTACGTACGTACGTACGTACG"),
|
||||
[]byte("ACGTACGTACGTACGTACGTACGTACGTACG"),
|
||||
[]byte("ACGTACGTACGTACGTACGTACGTACGTACG"),
|
||||
[]byte("TTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTT"), // Erreur
|
||||
},
|
||||
"Genome2": {
|
||||
[]byte("ACGTACGTACGTACGTACGTACGTACGTACG"),
|
||||
[]byte("ACGTACGTACGTACGTACGTACGTACGTACG"),
|
||||
[]byte("ACGTACGTACGTACGTACGTACGTACGTACG"),
|
||||
[]byte("GGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGG"), // Erreur
|
||||
},
|
||||
}
|
||||
|
||||
for id, sequences := range genomes {
|
||||
// Filtrer chaque génome
|
||||
genomeFilter := obikmer.NewFrequencyFilter(31, 3)
|
||||
genomeFilter.AddSequences(sequences)
|
||||
|
||||
// Ajouter à la collection
|
||||
filteredSet := genomeFilter.GetFilteredSet(id)
|
||||
collection.Add(filteredSet)
|
||||
|
||||
fmt.Printf("%s: %d k-mers de qualité\n", id, filteredSet.Cardinality())
|
||||
}
|
||||
|
||||
// Analyser la collection
|
||||
fmt.Println("\nAnalyse comparative:")
|
||||
collectionStats := collection.ComputeStats()
|
||||
fmt.Printf(" Core genome: %d k-mers\n", collectionStats.CoreSize)
|
||||
fmt.Printf(" Pan genome: %d k-mers\n", collectionStats.PanGenomeSize)
|
||||
|
||||
// ==========================================
|
||||
// RÉSUMÉ
|
||||
// ==========================================
|
||||
fmt.Println("\n=== RÉSUMÉ ===\n")
|
||||
fmt.Println("Le FrequencyFilter permet de:")
|
||||
fmt.Println(" ✓ Filtrer les k-mers par fréquence minimale")
|
||||
fmt.Println(" ✓ Utiliser une mémoire optimale avec Roaring bitmaps")
|
||||
fmt.Println(" ✓ Une seule passe sur les données")
|
||||
fmt.Println(" ✓ Éliminer efficacement les erreurs de séquençage")
|
||||
fmt.Println("")
|
||||
fmt.Println("Workflow typique:")
|
||||
fmt.Println(" 1. filter := NewFrequencyFilter(k, minFreq)")
|
||||
fmt.Println(" 2. for each sequence: filter.AddSequence(seq)")
|
||||
fmt.Println(" 3. filtered := filter.GetFilteredSet(id)")
|
||||
fmt.Println(" 4. Utiliser filtered dans vos analyses")
|
||||
}
|
||||
|
||||
// ==================================
|
||||
// FONCTION HELPER POUR BENCHMARKS
|
||||
// ==================================
|
||||
|
||||
func BenchmarkFrequencyFilter() {
|
||||
k := 31
|
||||
minFreq := 3
|
||||
|
||||
// Test avec différentes tailles
|
||||
sizes := []int{1000, 10000, 100000}
|
||||
|
||||
fmt.Println("\n=== BENCHMARK ===\n")
|
||||
|
||||
for _, size := range sizes {
|
||||
filter := obikmer.NewFrequencyFilter(k, minFreq)
|
||||
|
||||
// Générer des séquences
|
||||
for i := 0; i < size; i++ {
|
||||
seq := make([]byte, 100)
|
||||
for j := range seq {
|
||||
seq[j] = "ACGT"[(i+j)%4]
|
||||
}
|
||||
filter.AddSequence(seq)
|
||||
}
|
||||
|
||||
fmt.Printf("Size=%d reads:\n", size)
|
||||
fmt.Printf(" Filtered k-mers: %d\n", filter.Cardinality())
|
||||
fmt.Printf(" Memory: %.2f MB\n", float64(filter.MemoryUsage())/1024/1024)
|
||||
fmt.Println()
|
||||
}
|
||||
}
|
||||
|
||||
// ==================================
|
||||
// FONCTION POUR DONNÉES RÉELLES
|
||||
// ==================================
|
||||
|
||||
func ProcessRealData() {
|
||||
// Exemple pour traiter de vraies données FASTQ
|
||||
|
||||
k := 31
|
||||
minFreq := 3
|
||||
|
||||
filter := obikmer.NewFrequencyFilter(k, minFreq)
|
||||
|
||||
// Pseudo-code pour lire un FASTQ
|
||||
/*
|
||||
fastqFile := "sample.fastq"
|
||||
reader := NewFastqReader(fastqFile)
|
||||
|
||||
for reader.HasNext() {
|
||||
read := reader.Next()
|
||||
filter.AddSequence(read.Sequence)
|
||||
}
|
||||
|
||||
// Récupérer le résultat
|
||||
filtered := filter.GetFilteredSet("sample_filtered")
|
||||
filtered.Save("sample_filtered_kmers.bin")
|
||||
|
||||
// Stats
|
||||
stats := filter.Stats()
|
||||
fmt.Println(stats.String())
|
||||
*/
|
||||
|
||||
fmt.Println("Workflow pour données réelles:")
|
||||
fmt.Println(" 1. Créer le filtre avec minFreq approprié (2-5 typique)")
|
||||
fmt.Println(" 2. Stream les reads depuis FASTQ")
|
||||
fmt.Println(" 3. Récupérer les k-mers filtrés")
|
||||
fmt.Println(" 4. Utiliser pour assemblage/comparaison/etc.")
|
||||
|
||||
_ = filter // unused
|
||||
}
|
||||
109
obitests/obitools/obiannotate/test.sh
Executable file
109
obitests/obitools/obiannotate/test.sh
Executable file
@@ -0,0 +1,109 @@
|
||||
#!/bin/bash
|
||||
|
||||
#
|
||||
# Here give the name of the test serie
|
||||
#
|
||||
|
||||
TEST_NAME=obiannotate
|
||||
CMD=obiannotate
|
||||
|
||||
######
|
||||
#
|
||||
# Some variable and function definitions: please don't change them
|
||||
#
|
||||
######
|
||||
TEST_DIR="$(dirname "$(readlink -f "${BASH_SOURCE[0]}")")"
|
||||
OBITOOLS_DIR="${TEST_DIR/obitest*/}build"
|
||||
export PATH="${OBITOOLS_DIR}:${PATH}"
|
||||
|
||||
MCMD="$(echo "${CMD:0:4}" | tr '[:lower:]' '[:upper:]')$(echo "${CMD:4}" | tr '[:upper:]' '[:lower:]')"
|
||||
|
||||
TMPDIR="$(mktemp -d)"
|
||||
ntest=0
|
||||
success=0
|
||||
failed=0
|
||||
|
||||
cleanup() {
|
||||
echo "========================================" 1>&2
|
||||
echo "## Results of the $TEST_NAME tests:" 1>&2
|
||||
|
||||
echo 1>&2
|
||||
echo "- $ntest tests run" 1>&2
|
||||
echo "- $success successfully completed" 1>&2
|
||||
echo "- $failed failed tests" 1>&2
|
||||
echo 1>&2
|
||||
echo "Cleaning up the temporary directory..." 1>&2
|
||||
echo 1>&2
|
||||
echo "========================================" 1>&2
|
||||
|
||||
rm -rf "$TMPDIR" # Suppress the temporary directory
|
||||
|
||||
if [ $failed -gt 0 ]; then
|
||||
log "$TEST_NAME tests failed"
|
||||
log
|
||||
log
|
||||
exit 1
|
||||
fi
|
||||
|
||||
log
|
||||
log
|
||||
|
||||
exit 0
|
||||
}
|
||||
|
||||
log() {
|
||||
echo -e "[$TEST_NAME @ $(date)] $*" 1>&2
|
||||
}
|
||||
|
||||
log "Testing $TEST_NAME..."
|
||||
log "Test directory is $TEST_DIR"
|
||||
log "obitools directory is $OBITOOLS_DIR"
|
||||
log "Temporary directory is $TMPDIR"
|
||||
log "files: $(find $TEST_DIR | awk -F'/' '{print $NF}' | tail -n +2)"
|
||||
|
||||
######################################################################
|
||||
####
|
||||
#### Below are the tests
|
||||
####
|
||||
#### Before each test :
|
||||
#### - increment the variable ntest
|
||||
####
|
||||
#### Run the command as the condition of an if / then /else
|
||||
#### - The command must return 0 on success
|
||||
#### - The command must return an exit code different from 0 on failure
|
||||
#### - The datafiles are stored in the same directory than the test script
|
||||
#### - The test script directory is stored in the TEST_DIR variable
|
||||
#### - If result files have to be produced they must be stored
|
||||
#### in the temporary directory (TMPDIR variable)
|
||||
####
|
||||
#### then clause is executed on success of the command
|
||||
#### - Write a success message using the log function
|
||||
#### - increment the variable success
|
||||
####
|
||||
#### else clause is executed on failure of the command
|
||||
#### - Write a failure message using the log function
|
||||
#### - increment the variable failed
|
||||
####
|
||||
######################################################################
|
||||
|
||||
|
||||
|
||||
((ntest++))
|
||||
if $CMD -h > "${TMPDIR}/help.txt" 2>&1
|
||||
then
|
||||
log "$MCMD: printing help OK"
|
||||
((success++))
|
||||
else
|
||||
log "$MCMD: printing help failed"
|
||||
((failed++))
|
||||
fi
|
||||
|
||||
|
||||
#########################################
|
||||
#
|
||||
# At the end of the tests
|
||||
# the cleanup function is called
|
||||
#
|
||||
#########################################
|
||||
|
||||
cleanup
|
||||
109
obitests/obitools/obiclean/test.sh
Executable file
109
obitests/obitools/obiclean/test.sh
Executable file
@@ -0,0 +1,109 @@
|
||||
#!/bin/bash
|
||||
|
||||
#
|
||||
# Here give the name of the test serie
|
||||
#
|
||||
|
||||
TEST_NAME=obiclean
|
||||
CMD=obiclean
|
||||
|
||||
######
|
||||
#
|
||||
# Some variable and function definitions: please don't change them
|
||||
#
|
||||
######
|
||||
TEST_DIR="$(dirname "$(readlink -f "${BASH_SOURCE[0]}")")"
|
||||
OBITOOLS_DIR="${TEST_DIR/obitest*/}build"
|
||||
export PATH="${OBITOOLS_DIR}:${PATH}"
|
||||
|
||||
MCMD="$(echo "${CMD:0:4}" | tr '[:lower:]' '[:upper:]')$(echo "${CMD:4}" | tr '[:upper:]' '[:lower:]')"
|
||||
|
||||
TMPDIR="$(mktemp -d)"
|
||||
ntest=0
|
||||
success=0
|
||||
failed=0
|
||||
|
||||
cleanup() {
|
||||
echo "========================================" 1>&2
|
||||
echo "## Results of the $TEST_NAME tests:" 1>&2
|
||||
|
||||
echo 1>&2
|
||||
echo "- $ntest tests run" 1>&2
|
||||
echo "- $success successfully completed" 1>&2
|
||||
echo "- $failed failed tests" 1>&2
|
||||
echo 1>&2
|
||||
echo "Cleaning up the temporary directory..." 1>&2
|
||||
echo 1>&2
|
||||
echo "========================================" 1>&2
|
||||
|
||||
rm -rf "$TMPDIR" # Suppress the temporary directory
|
||||
|
||||
if [ $failed -gt 0 ]; then
|
||||
log "$TEST_NAME tests failed"
|
||||
log
|
||||
log
|
||||
exit 1
|
||||
fi
|
||||
|
||||
log
|
||||
log
|
||||
|
||||
exit 0
|
||||
}
|
||||
|
||||
log() {
|
||||
echo -e "[$TEST_NAME @ $(date)] $*" 1>&2
|
||||
}
|
||||
|
||||
log "Testing $TEST_NAME..."
|
||||
log "Test directory is $TEST_DIR"
|
||||
log "obitools directory is $OBITOOLS_DIR"
|
||||
log "Temporary directory is $TMPDIR"
|
||||
log "files: $(find $TEST_DIR | awk -F'/' '{print $NF}' | tail -n +2)"
|
||||
|
||||
######################################################################
|
||||
####
|
||||
#### Below are the tests
|
||||
####
|
||||
#### Before each test :
|
||||
#### - increment the variable ntest
|
||||
####
|
||||
#### Run the command as the condition of an if / then /else
|
||||
#### - The command must return 0 on success
|
||||
#### - The command must return an exit code different from 0 on failure
|
||||
#### - The datafiles are stored in the same directory than the test script
|
||||
#### - The test script directory is stored in the TEST_DIR variable
|
||||
#### - If result files have to be produced they must be stored
|
||||
#### in the temporary directory (TMPDIR variable)
|
||||
####
|
||||
#### then clause is executed on success of the command
|
||||
#### - Write a success message using the log function
|
||||
#### - increment the variable success
|
||||
####
|
||||
#### else clause is executed on failure of the command
|
||||
#### - Write a failure message using the log function
|
||||
#### - increment the variable failed
|
||||
####
|
||||
######################################################################
|
||||
|
||||
|
||||
|
||||
((ntest++))
|
||||
if $CMD -h > "${TMPDIR}/help.txt" 2>&1
|
||||
then
|
||||
log "$MCMD: printing help OK"
|
||||
((success++))
|
||||
else
|
||||
log "$MCMD: printing help failed"
|
||||
((failed++))
|
||||
fi
|
||||
|
||||
|
||||
#########################################
|
||||
#
|
||||
# At the end of the tests
|
||||
# the cleanup function is called
|
||||
#
|
||||
#########################################
|
||||
|
||||
cleanup
|
||||
109
obitests/obitools/obicleandb/test.sh
Executable file
109
obitests/obitools/obicleandb/test.sh
Executable file
@@ -0,0 +1,109 @@
|
||||
#!/bin/bash
|
||||
|
||||
#
|
||||
# Here give the name of the test serie
|
||||
#
|
||||
|
||||
TEST_NAME=obicleandb
|
||||
CMD=obicleandb
|
||||
|
||||
######
|
||||
#
|
||||
# Some variable and function definitions: please don't change them
|
||||
#
|
||||
######
|
||||
TEST_DIR="$(dirname "$(readlink -f "${BASH_SOURCE[0]}")")"
|
||||
OBITOOLS_DIR="${TEST_DIR/obitest*/}build"
|
||||
export PATH="${OBITOOLS_DIR}:${PATH}"
|
||||
|
||||
MCMD="$(echo "${CMD:0:4}" | tr '[:lower:]' '[:upper:]')$(echo "${CMD:4}" | tr '[:upper:]' '[:lower:]')"
|
||||
|
||||
TMPDIR="$(mktemp -d)"
|
||||
ntest=0
|
||||
success=0
|
||||
failed=0
|
||||
|
||||
cleanup() {
|
||||
echo "========================================" 1>&2
|
||||
echo "## Results of the $TEST_NAME tests:" 1>&2
|
||||
|
||||
echo 1>&2
|
||||
echo "- $ntest tests run" 1>&2
|
||||
echo "- $success successfully completed" 1>&2
|
||||
echo "- $failed failed tests" 1>&2
|
||||
echo 1>&2
|
||||
echo "Cleaning up the temporary directory..." 1>&2
|
||||
echo 1>&2
|
||||
echo "========================================" 1>&2
|
||||
|
||||
rm -rf "$TMPDIR" # Suppress the temporary directory
|
||||
|
||||
if [ $failed -gt 0 ]; then
|
||||
log "$TEST_NAME tests failed"
|
||||
log
|
||||
log
|
||||
exit 1
|
||||
fi
|
||||
|
||||
log
|
||||
log
|
||||
|
||||
exit 0
|
||||
}
|
||||
|
||||
log() {
|
||||
echo -e "[$TEST_NAME @ $(date)] $*" 1>&2
|
||||
}
|
||||
|
||||
log "Testing $TEST_NAME..."
|
||||
log "Test directory is $TEST_DIR"
|
||||
log "obitools directory is $OBITOOLS_DIR"
|
||||
log "Temporary directory is $TMPDIR"
|
||||
log "files: $(find $TEST_DIR | awk -F'/' '{print $NF}' | tail -n +2)"
|
||||
|
||||
######################################################################
|
||||
####
|
||||
#### Below are the tests
|
||||
####
|
||||
#### Before each test :
|
||||
#### - increment the variable ntest
|
||||
####
|
||||
#### Run the command as the condition of an if / then /else
|
||||
#### - The command must return 0 on success
|
||||
#### - The command must return an exit code different from 0 on failure
|
||||
#### - The datafiles are stored in the same directory than the test script
|
||||
#### - The test script directory is stored in the TEST_DIR variable
|
||||
#### - If result files have to be produced they must be stored
|
||||
#### in the temporary directory (TMPDIR variable)
|
||||
####
|
||||
#### then clause is executed on success of the command
|
||||
#### - Write a success message using the log function
|
||||
#### - increment the variable success
|
||||
####
|
||||
#### else clause is executed on failure of the command
|
||||
#### - Write a failure message using the log function
|
||||
#### - increment the variable failed
|
||||
####
|
||||
######################################################################
|
||||
|
||||
|
||||
|
||||
((ntest++))
|
||||
if $CMD -h > "${TMPDIR}/help.txt" 2>&1
|
||||
then
|
||||
log "$MCMD: printing help OK"
|
||||
((success++))
|
||||
else
|
||||
log "$MCMD: printing help failed"
|
||||
((failed++))
|
||||
fi
|
||||
|
||||
|
||||
#########################################
|
||||
#
|
||||
# At the end of the tests
|
||||
# the cleanup function is called
|
||||
#
|
||||
#########################################
|
||||
|
||||
cleanup
|
||||
109
obitests/obitools/obicomplement/test.sh
Executable file
109
obitests/obitools/obicomplement/test.sh
Executable file
@@ -0,0 +1,109 @@
|
||||
#!/bin/bash
|
||||
|
||||
#
|
||||
# Here give the name of the test serie
|
||||
#
|
||||
|
||||
TEST_NAME=obicomplement
|
||||
CMD=obicomplement
|
||||
|
||||
######
|
||||
#
|
||||
# Some variable and function definitions: please don't change them
|
||||
#
|
||||
######
|
||||
TEST_DIR="$(dirname "$(readlink -f "${BASH_SOURCE[0]}")")"
|
||||
OBITOOLS_DIR="${TEST_DIR/obitest*/}build"
|
||||
export PATH="${OBITOOLS_DIR}:${PATH}"
|
||||
|
||||
MCMD="$(echo "${CMD:0:4}" | tr '[:lower:]' '[:upper:]')$(echo "${CMD:4}" | tr '[:upper:]' '[:lower:]')"
|
||||
|
||||
TMPDIR="$(mktemp -d)"
|
||||
ntest=0
|
||||
success=0
|
||||
failed=0
|
||||
|
||||
cleanup() {
|
||||
echo "========================================" 1>&2
|
||||
echo "## Results of the $TEST_NAME tests:" 1>&2
|
||||
|
||||
echo 1>&2
|
||||
echo "- $ntest tests run" 1>&2
|
||||
echo "- $success successfully completed" 1>&2
|
||||
echo "- $failed failed tests" 1>&2
|
||||
echo 1>&2
|
||||
echo "Cleaning up the temporary directory..." 1>&2
|
||||
echo 1>&2
|
||||
echo "========================================" 1>&2
|
||||
|
||||
rm -rf "$TMPDIR" # Suppress the temporary directory
|
||||
|
||||
if [ $failed -gt 0 ]; then
|
||||
log "$TEST_NAME tests failed"
|
||||
log
|
||||
log
|
||||
exit 1
|
||||
fi
|
||||
|
||||
log
|
||||
log
|
||||
|
||||
exit 0
|
||||
}
|
||||
|
||||
log() {
|
||||
echo -e "[$TEST_NAME @ $(date)] $*" 1>&2
|
||||
}
|
||||
|
||||
log "Testing $TEST_NAME..."
|
||||
log "Test directory is $TEST_DIR"
|
||||
log "obitools directory is $OBITOOLS_DIR"
|
||||
log "Temporary directory is $TMPDIR"
|
||||
log "files: $(find $TEST_DIR | awk -F'/' '{print $NF}' | tail -n +2)"
|
||||
|
||||
######################################################################
|
||||
####
|
||||
#### Below are the tests
|
||||
####
|
||||
#### Before each test :
|
||||
#### - increment the variable ntest
|
||||
####
|
||||
#### Run the command as the condition of an if / then /else
|
||||
#### - The command must return 0 on success
|
||||
#### - The command must return an exit code different from 0 on failure
|
||||
#### - The datafiles are stored in the same directory than the test script
|
||||
#### - The test script directory is stored in the TEST_DIR variable
|
||||
#### - If result files have to be produced they must be stored
|
||||
#### in the temporary directory (TMPDIR variable)
|
||||
####
|
||||
#### then clause is executed on success of the command
|
||||
#### - Write a success message using the log function
|
||||
#### - increment the variable success
|
||||
####
|
||||
#### else clause is executed on failure of the command
|
||||
#### - Write a failure message using the log function
|
||||
#### - increment the variable failed
|
||||
####
|
||||
######################################################################
|
||||
|
||||
|
||||
|
||||
((ntest++))
|
||||
if $CMD -h > "${TMPDIR}/help.txt" 2>&1
|
||||
then
|
||||
log "$MCMD: printing help OK"
|
||||
((success++))
|
||||
else
|
||||
log "$MCMD: printing help failed"
|
||||
((failed++))
|
||||
fi
|
||||
|
||||
|
||||
#########################################
|
||||
#
|
||||
# At the end of the tests
|
||||
# the cleanup function is called
|
||||
#
|
||||
#########################################
|
||||
|
||||
cleanup
|
||||
109
obitests/obitools/obiconsensus/test.sh
Executable file
109
obitests/obitools/obiconsensus/test.sh
Executable file
@@ -0,0 +1,109 @@
|
||||
#!/bin/bash
|
||||
|
||||
#
|
||||
# Here give the name of the test serie
|
||||
#
|
||||
|
||||
TEST_NAME=obiconsensus
|
||||
CMD=obiconsensus
|
||||
|
||||
######
|
||||
#
|
||||
# Some variable and function definitions: please don't change them
|
||||
#
|
||||
######
|
||||
TEST_DIR="$(dirname "$(readlink -f "${BASH_SOURCE[0]}")")"
|
||||
OBITOOLS_DIR="${TEST_DIR/obitest*/}build"
|
||||
export PATH="${OBITOOLS_DIR}:${PATH}"
|
||||
|
||||
MCMD="$(echo "${CMD:0:4}" | tr '[:lower:]' '[:upper:]')$(echo "${CMD:4}" | tr '[:upper:]' '[:lower:]')"
|
||||
|
||||
TMPDIR="$(mktemp -d)"
|
||||
ntest=0
|
||||
success=0
|
||||
failed=0
|
||||
|
||||
cleanup() {
|
||||
echo "========================================" 1>&2
|
||||
echo "## Results of the $TEST_NAME tests:" 1>&2
|
||||
|
||||
echo 1>&2
|
||||
echo "- $ntest tests run" 1>&2
|
||||
echo "- $success successfully completed" 1>&2
|
||||
echo "- $failed failed tests" 1>&2
|
||||
echo 1>&2
|
||||
echo "Cleaning up the temporary directory..." 1>&2
|
||||
echo 1>&2
|
||||
echo "========================================" 1>&2
|
||||
|
||||
rm -rf "$TMPDIR" # Suppress the temporary directory
|
||||
|
||||
if [ $failed -gt 0 ]; then
|
||||
log "$TEST_NAME tests failed"
|
||||
log
|
||||
log
|
||||
exit 1
|
||||
fi
|
||||
|
||||
log
|
||||
log
|
||||
|
||||
exit 0
|
||||
}
|
||||
|
||||
log() {
|
||||
echo -e "[$TEST_NAME @ $(date)] $*" 1>&2
|
||||
}
|
||||
|
||||
log "Testing $TEST_NAME..."
|
||||
log "Test directory is $TEST_DIR"
|
||||
log "obitools directory is $OBITOOLS_DIR"
|
||||
log "Temporary directory is $TMPDIR"
|
||||
log "files: $(find $TEST_DIR | awk -F'/' '{print $NF}' | tail -n +2)"
|
||||
|
||||
######################################################################
|
||||
####
|
||||
#### Below are the tests
|
||||
####
|
||||
#### Before each test :
|
||||
#### - increment the variable ntest
|
||||
####
|
||||
#### Run the command as the condition of an if / then /else
|
||||
#### - The command must return 0 on success
|
||||
#### - The command must return an exit code different from 0 on failure
|
||||
#### - The datafiles are stored in the same directory than the test script
|
||||
#### - The test script directory is stored in the TEST_DIR variable
|
||||
#### - If result files have to be produced they must be stored
|
||||
#### in the temporary directory (TMPDIR variable)
|
||||
####
|
||||
#### then clause is executed on success of the command
|
||||
#### - Write a success message using the log function
|
||||
#### - increment the variable success
|
||||
####
|
||||
#### else clause is executed on failure of the command
|
||||
#### - Write a failure message using the log function
|
||||
#### - increment the variable failed
|
||||
####
|
||||
######################################################################
|
||||
|
||||
|
||||
|
||||
((ntest++))
|
||||
if $CMD -h > "${TMPDIR}/help.txt" 2>&1
|
||||
then
|
||||
log "$MCMD: printing help OK"
|
||||
((success++))
|
||||
else
|
||||
log "$MCMD: printing help failed"
|
||||
((failed++))
|
||||
fi
|
||||
|
||||
|
||||
#########################################
|
||||
#
|
||||
# At the end of the tests
|
||||
# the cleanup function is called
|
||||
#
|
||||
#########################################
|
||||
|
||||
cleanup
|
||||
BIN
obitests/obitools/obiconvert/gbpln1088.4Mb.fasta.gz
Normal file
BIN
obitests/obitools/obiconvert/gbpln1088.4Mb.fasta.gz
Normal file
Binary file not shown.
144
obitests/obitools/obiconvert/test.sh
Executable file
144
obitests/obitools/obiconvert/test.sh
Executable file
@@ -0,0 +1,144 @@
|
||||
#!/bin/bash
|
||||
|
||||
#
|
||||
# Here give the name of the test serie
|
||||
#
|
||||
|
||||
TEST_NAME=obiconvert
|
||||
CMD=obiconvert
|
||||
|
||||
######
|
||||
#
|
||||
# Some variable and function definitions: please don't change them
|
||||
#
|
||||
######
|
||||
TEST_DIR="$(dirname "$(readlink -f "${BASH_SOURCE[0]}")")"
|
||||
|
||||
if [ -z "$TEST_DIR" ] ; then
|
||||
TEST_DIR="."
|
||||
fi
|
||||
|
||||
OBITOOLS_DIR="${TEST_DIR/obitest*/}build"
|
||||
export PATH="${OBITOOLS_DIR}:${PATH}"
|
||||
|
||||
MCMD="$(echo "${CMD:0:4}" | tr '[:lower:]' '[:upper:]')$(echo "${CMD:4}" | tr '[:upper:]' '[:lower:]')"
|
||||
|
||||
TMPDIR="$(mktemp -d)"
|
||||
ntest=0
|
||||
success=0
|
||||
failed=0
|
||||
|
||||
cleanup() {
|
||||
echo "========================================" 1>&2
|
||||
echo "## Results of the $TEST_NAME tests:" 1>&2
|
||||
|
||||
echo 1>&2
|
||||
echo "- $ntest tests run" 1>&2
|
||||
echo "- $success successfully completed" 1>&2
|
||||
echo "- $failed failed tests" 1>&2
|
||||
echo 1>&2
|
||||
echo "Cleaning up the temporary directory..." 1>&2
|
||||
echo 1>&2
|
||||
echo "========================================" 1>&2
|
||||
|
||||
rm -rf "$TMPDIR" # Suppress the temporary directory
|
||||
|
||||
if [ $failed -gt 0 ]; then
|
||||
log "$TEST_NAME tests failed"
|
||||
log
|
||||
log
|
||||
exit 1
|
||||
fi
|
||||
|
||||
log
|
||||
log
|
||||
|
||||
exit 0
|
||||
}
|
||||
|
||||
log() {
|
||||
echo -e "[$TEST_NAME @ $(date)] $*" 1>&2
|
||||
}
|
||||
|
||||
log "Testing $TEST_NAME..."
|
||||
log "Test directory is $TEST_DIR"
|
||||
log "obitools directory is $OBITOOLS_DIR"
|
||||
log "Temporary directory is $TMPDIR"
|
||||
log "files: $(find $TEST_DIR | awk -F'/' '{print $NF}' | tail -n +2)"
|
||||
|
||||
######################################################################
|
||||
####
|
||||
#### Below are the tests
|
||||
####
|
||||
#### Before each test :
|
||||
#### - increment the variable ntest
|
||||
####
|
||||
#### Run the command as the condition of an if / then /else
|
||||
#### - The command must return 0 on success
|
||||
#### - The command must return an exit code different from 0 on failure
|
||||
#### - The datafiles are stored in the same directory than the test script
|
||||
#### - The test script directory is stored in the TEST_DIR variable
|
||||
#### - If result files have to be produced they must be stored
|
||||
#### in the temporary directory (TMPDIR variable)
|
||||
####
|
||||
#### then clause is executed on success of the command
|
||||
#### - Write a success message using the log function
|
||||
#### - increment the variable success
|
||||
####
|
||||
#### else clause is executed on failure of the command
|
||||
#### - Write a failure message using the log function
|
||||
#### - increment the variable failed
|
||||
####
|
||||
######################################################################
|
||||
|
||||
|
||||
|
||||
((ntest++))
|
||||
if $CMD -h > "${TMPDIR}/help.txt" 2>&1
|
||||
then
|
||||
log "$MCMD: printing help OK"
|
||||
((success++))
|
||||
else
|
||||
log "$MCMD: printing help failed"
|
||||
((failed++))
|
||||
fi
|
||||
|
||||
|
||||
((ntest++))
|
||||
if obiconvert -Z "${TEST_DIR}/gbpln1088.4Mb.fasta.gz" \
|
||||
> "${TMPDIR}/xxx.fasta.gz" && \
|
||||
zdiff "${TEST_DIR}/gbpln1088.4Mb.fasta.gz" \
|
||||
"${TMPDIR}/xxx.fasta.gz"
|
||||
then
|
||||
log "$MCMD: converting large fasta file to fasta OK"
|
||||
((success++))
|
||||
else
|
||||
log "$MCMD: converting large fasta file to fasta failed"
|
||||
((failed++))
|
||||
fi
|
||||
|
||||
((ntest++))
|
||||
if obiconvert -Z --fastq-output \
|
||||
"${TEST_DIR}/gbpln1088.4Mb.fasta.gz" \
|
||||
> "${TMPDIR}/xxx.fastq.gz" && \
|
||||
obiconvert -Z --fasta-output \
|
||||
"${TMPDIR}/xxx.fastq.gz" \
|
||||
> "${TMPDIR}/yyy.fasta.gz" && \
|
||||
zdiff "${TEST_DIR}/gbpln1088.4Mb.fasta.gz" \
|
||||
"${TMPDIR}/yyy.fasta.gz"
|
||||
then
|
||||
log "$MCMD: converting large file between fasta and fastq OK"
|
||||
((success++))
|
||||
else
|
||||
log "$MCMD: converting large file between fasta and fastq failed"
|
||||
((failed++))
|
||||
fi
|
||||
|
||||
#########################################
|
||||
#
|
||||
# At the end of the tests
|
||||
# the cleanup function is called
|
||||
#
|
||||
#########################################
|
||||
|
||||
cleanup
|
||||
@@ -5,6 +5,7 @@
|
||||
#
|
||||
|
||||
TEST_NAME=obicount
|
||||
CMD=obicount
|
||||
|
||||
######
|
||||
#
|
||||
@@ -15,6 +16,7 @@ TEST_DIR="$(dirname "$(readlink -f "${BASH_SOURCE[0]}")")"
|
||||
OBITOOLS_DIR="${TEST_DIR/obitest*/}build"
|
||||
export PATH="${OBITOOLS_DIR}:${PATH}"
|
||||
|
||||
MCMD="$(echo "${CMD:0:4}" | tr '[:lower:]' '[:upper:]')$(echo "${CMD:4}" | tr '[:upper:]' '[:lower:]')"
|
||||
|
||||
TMPDIR="$(mktemp -d)"
|
||||
ntest=0
|
||||
@@ -38,9 +40,14 @@ cleanup() {
|
||||
|
||||
if [ $failed -gt 0 ]; then
|
||||
log "$TEST_NAME tests failed"
|
||||
log
|
||||
log
|
||||
exit 1
|
||||
fi
|
||||
|
||||
log
|
||||
log
|
||||
|
||||
exit 0
|
||||
}
|
||||
|
||||
@@ -79,6 +86,18 @@ log "files: $(find $TEST_DIR | awk -F'/' '{print $NF}' | tail -n +2)"
|
||||
####
|
||||
######################################################################
|
||||
|
||||
|
||||
|
||||
((ntest++))
|
||||
if $CMD -h > "${TMPDIR}/help.txt" 2>&1
|
||||
then
|
||||
log "$MCMD: printing help OK"
|
||||
((success++))
|
||||
else
|
||||
log "$MCMD: printing help failed"
|
||||
((failed++))
|
||||
fi
|
||||
|
||||
((ntest++))
|
||||
if obicount "${TEST_DIR}/wolf_F.fasta.gz" \
|
||||
> "${TMPDIR}/wolf_F.fasta_count.csv"
|
||||
|
||||
109
obitests/obitools/obicsv/test.sh
Executable file
109
obitests/obitools/obicsv/test.sh
Executable file
@@ -0,0 +1,109 @@
|
||||
#!/bin/bash
|
||||
|
||||
#
|
||||
# Here give the name of the test serie
|
||||
#
|
||||
|
||||
TEST_NAME=obicsv
|
||||
CMD=obicsv
|
||||
|
||||
######
|
||||
#
|
||||
# Some variable and function definitions: please don't change them
|
||||
#
|
||||
######
|
||||
TEST_DIR="$(dirname "$(readlink -f "${BASH_SOURCE[0]}")")"
|
||||
OBITOOLS_DIR="${TEST_DIR/obitest*/}build"
|
||||
export PATH="${OBITOOLS_DIR}:${PATH}"
|
||||
|
||||
MCMD="$(echo "${CMD:0:4}" | tr '[:lower:]' '[:upper:]')$(echo "${CMD:4}" | tr '[:upper:]' '[:lower:]')"
|
||||
|
||||
TMPDIR="$(mktemp -d)"
|
||||
ntest=0
|
||||
success=0
|
||||
failed=0
|
||||
|
||||
cleanup() {
|
||||
echo "========================================" 1>&2
|
||||
echo "## Results of the $TEST_NAME tests:" 1>&2
|
||||
|
||||
echo 1>&2
|
||||
echo "- $ntest tests run" 1>&2
|
||||
echo "- $success successfully completed" 1>&2
|
||||
echo "- $failed failed tests" 1>&2
|
||||
echo 1>&2
|
||||
echo "Cleaning up the temporary directory..." 1>&2
|
||||
echo 1>&2
|
||||
echo "========================================" 1>&2
|
||||
|
||||
rm -rf "$TMPDIR" # Suppress the temporary directory
|
||||
|
||||
if [ $failed -gt 0 ]; then
|
||||
log "$TEST_NAME tests failed"
|
||||
log
|
||||
log
|
||||
exit 1
|
||||
fi
|
||||
|
||||
log
|
||||
log
|
||||
|
||||
exit 0
|
||||
}
|
||||
|
||||
log() {
|
||||
echo -e "[$TEST_NAME @ $(date)] $*" 1>&2
|
||||
}
|
||||
|
||||
log "Testing $TEST_NAME..."
|
||||
log "Test directory is $TEST_DIR"
|
||||
log "obitools directory is $OBITOOLS_DIR"
|
||||
log "Temporary directory is $TMPDIR"
|
||||
log "files: $(find $TEST_DIR | awk -F'/' '{print $NF}' | tail -n +2)"
|
||||
|
||||
######################################################################
|
||||
####
|
||||
#### Below are the tests
|
||||
####
|
||||
#### Before each test :
|
||||
#### - increment the variable ntest
|
||||
####
|
||||
#### Run the command as the condition of an if / then /else
|
||||
#### - The command must return 0 on success
|
||||
#### - The command must return an exit code different from 0 on failure
|
||||
#### - The datafiles are stored in the same directory than the test script
|
||||
#### - The test script directory is stored in the TEST_DIR variable
|
||||
#### - If result files have to be produced they must be stored
|
||||
#### in the temporary directory (TMPDIR variable)
|
||||
####
|
||||
#### then clause is executed on success of the command
|
||||
#### - Write a success message using the log function
|
||||
#### - increment the variable success
|
||||
####
|
||||
#### else clause is executed on failure of the command
|
||||
#### - Write a failure message using the log function
|
||||
#### - increment the variable failed
|
||||
####
|
||||
######################################################################
|
||||
|
||||
|
||||
|
||||
((ntest++))
|
||||
if $CMD -h > "${TMPDIR}/help.txt" 2>&1
|
||||
then
|
||||
log "$MCMD: printing help OK"
|
||||
((success++))
|
||||
else
|
||||
log "$MCMD: printing help failed"
|
||||
((failed++))
|
||||
fi
|
||||
|
||||
|
||||
#########################################
|
||||
#
|
||||
# At the end of the tests
|
||||
# the cleanup function is called
|
||||
#
|
||||
#########################################
|
||||
|
||||
cleanup
|
||||
109
obitests/obitools/obidemerge/test.sh
Executable file
109
obitests/obitools/obidemerge/test.sh
Executable file
@@ -0,0 +1,109 @@
|
||||
#!/bin/bash
|
||||
|
||||
#
|
||||
# Here give the name of the test serie
|
||||
#
|
||||
|
||||
TEST_NAME=obidemerge
|
||||
CMD=obidemerge
|
||||
|
||||
######
|
||||
#
|
||||
# Some variable and function definitions: please don't change them
|
||||
#
|
||||
######
|
||||
TEST_DIR="$(dirname "$(readlink -f "${BASH_SOURCE[0]}")")"
|
||||
OBITOOLS_DIR="${TEST_DIR/obitest*/}build"
|
||||
export PATH="${OBITOOLS_DIR}:${PATH}"
|
||||
|
||||
MCMD="$(echo "${CMD:0:4}" | tr '[:lower:]' '[:upper:]')$(echo "${CMD:4}" | tr '[:upper:]' '[:lower:]')"
|
||||
|
||||
TMPDIR="$(mktemp -d)"
|
||||
ntest=0
|
||||
success=0
|
||||
failed=0
|
||||
|
||||
cleanup() {
|
||||
echo "========================================" 1>&2
|
||||
echo "## Results of the $TEST_NAME tests:" 1>&2
|
||||
|
||||
echo 1>&2
|
||||
echo "- $ntest tests run" 1>&2
|
||||
echo "- $success successfully completed" 1>&2
|
||||
echo "- $failed failed tests" 1>&2
|
||||
echo 1>&2
|
||||
echo "Cleaning up the temporary directory..." 1>&2
|
||||
echo 1>&2
|
||||
echo "========================================" 1>&2
|
||||
|
||||
rm -rf "$TMPDIR" # Suppress the temporary directory
|
||||
|
||||
if [ $failed -gt 0 ]; then
|
||||
log "$TEST_NAME tests failed"
|
||||
log
|
||||
log
|
||||
exit 1
|
||||
fi
|
||||
|
||||
log
|
||||
log
|
||||
|
||||
exit 0
|
||||
}
|
||||
|
||||
log() {
|
||||
echo -e "[$TEST_NAME @ $(date)] $*" 1>&2
|
||||
}
|
||||
|
||||
log "Testing $TEST_NAME..."
|
||||
log "Test directory is $TEST_DIR"
|
||||
log "obitools directory is $OBITOOLS_DIR"
|
||||
log "Temporary directory is $TMPDIR"
|
||||
log "files: $(find $TEST_DIR | awk -F'/' '{print $NF}' | tail -n +2)"
|
||||
|
||||
######################################################################
|
||||
####
|
||||
#### Below are the tests
|
||||
####
|
||||
#### Before each test :
|
||||
#### - increment the variable ntest
|
||||
####
|
||||
#### Run the command as the condition of an if / then /else
|
||||
#### - The command must return 0 on success
|
||||
#### - The command must return an exit code different from 0 on failure
|
||||
#### - The datafiles are stored in the same directory than the test script
|
||||
#### - The test script directory is stored in the TEST_DIR variable
|
||||
#### - If result files have to be produced they must be stored
|
||||
#### in the temporary directory (TMPDIR variable)
|
||||
####
|
||||
#### then clause is executed on success of the command
|
||||
#### - Write a success message using the log function
|
||||
#### - increment the variable success
|
||||
####
|
||||
#### else clause is executed on failure of the command
|
||||
#### - Write a failure message using the log function
|
||||
#### - increment the variable failed
|
||||
####
|
||||
######################################################################
|
||||
|
||||
|
||||
|
||||
((ntest++))
|
||||
if $CMD -h > "${TMPDIR}/help.txt" 2>&1
|
||||
then
|
||||
log "$MCMD: printing help OK"
|
||||
((success++))
|
||||
else
|
||||
log "$MCMD: printing help failed"
|
||||
((failed++))
|
||||
fi
|
||||
|
||||
|
||||
#########################################
|
||||
#
|
||||
# At the end of the tests
|
||||
# the cleanup function is called
|
||||
#
|
||||
#########################################
|
||||
|
||||
cleanup
|
||||
109
obitests/obitools/obidistribute/test.sh
Executable file
109
obitests/obitools/obidistribute/test.sh
Executable file
@@ -0,0 +1,109 @@
|
||||
#!/bin/bash
|
||||
|
||||
#
|
||||
# Here give the name of the test serie
|
||||
#
|
||||
|
||||
TEST_NAME=obidistribute
|
||||
CMD=obidistribute
|
||||
|
||||
######
|
||||
#
|
||||
# Some variable and function definitions: please don't change them
|
||||
#
|
||||
######
|
||||
TEST_DIR="$(dirname "$(readlink -f "${BASH_SOURCE[0]}")")"
|
||||
OBITOOLS_DIR="${TEST_DIR/obitest*/}build"
|
||||
export PATH="${OBITOOLS_DIR}:${PATH}"
|
||||
|
||||
MCMD="$(echo "${CMD:0:4}" | tr '[:lower:]' '[:upper:]')$(echo "${CMD:4}" | tr '[:upper:]' '[:lower:]')"
|
||||
|
||||
TMPDIR="$(mktemp -d)"
|
||||
ntest=0
|
||||
success=0
|
||||
failed=0
|
||||
|
||||
cleanup() {
|
||||
echo "========================================" 1>&2
|
||||
echo "## Results of the $TEST_NAME tests:" 1>&2
|
||||
|
||||
echo 1>&2
|
||||
echo "- $ntest tests run" 1>&2
|
||||
echo "- $success successfully completed" 1>&2
|
||||
echo "- $failed failed tests" 1>&2
|
||||
echo 1>&2
|
||||
echo "Cleaning up the temporary directory..." 1>&2
|
||||
echo 1>&2
|
||||
echo "========================================" 1>&2
|
||||
|
||||
rm -rf "$TMPDIR" # Suppress the temporary directory
|
||||
|
||||
if [ $failed -gt 0 ]; then
|
||||
log "$TEST_NAME tests failed"
|
||||
log
|
||||
log
|
||||
exit 1
|
||||
fi
|
||||
|
||||
log
|
||||
log
|
||||
|
||||
exit 0
|
||||
}
|
||||
|
||||
log() {
|
||||
echo -e "[$TEST_NAME @ $(date)] $*" 1>&2
|
||||
}
|
||||
|
||||
log "Testing $TEST_NAME..."
|
||||
log "Test directory is $TEST_DIR"
|
||||
log "obitools directory is $OBITOOLS_DIR"
|
||||
log "Temporary directory is $TMPDIR"
|
||||
log "files: $(find $TEST_DIR | awk -F'/' '{print $NF}' | tail -n +2)"
|
||||
|
||||
######################################################################
|
||||
####
|
||||
#### Below are the tests
|
||||
####
|
||||
#### Before each test :
|
||||
#### - increment the variable ntest
|
||||
####
|
||||
#### Run the command as the condition of an if / then /else
|
||||
#### - The command must return 0 on success
|
||||
#### - The command must return an exit code different from 0 on failure
|
||||
#### - The datafiles are stored in the same directory than the test script
|
||||
#### - The test script directory is stored in the TEST_DIR variable
|
||||
#### - If result files have to be produced they must be stored
|
||||
#### in the temporary directory (TMPDIR variable)
|
||||
####
|
||||
#### then clause is executed on success of the command
|
||||
#### - Write a success message using the log function
|
||||
#### - increment the variable success
|
||||
####
|
||||
#### else clause is executed on failure of the command
|
||||
#### - Write a failure message using the log function
|
||||
#### - increment the variable failed
|
||||
####
|
||||
######################################################################
|
||||
|
||||
|
||||
|
||||
((ntest++))
|
||||
if $CMD -h > "${TMPDIR}/help.txt" 2>&1
|
||||
then
|
||||
log "$MCMD: printing help OK"
|
||||
((success++))
|
||||
else
|
||||
log "$MCMD: printing help failed"
|
||||
((failed++))
|
||||
fi
|
||||
|
||||
|
||||
#########################################
|
||||
#
|
||||
# At the end of the tests
|
||||
# the cleanup function is called
|
||||
#
|
||||
#########################################
|
||||
|
||||
cleanup
|
||||
109
obitests/obitools/obigrep/test.sh
Executable file
109
obitests/obitools/obigrep/test.sh
Executable file
@@ -0,0 +1,109 @@
|
||||
#!/bin/bash
|
||||
|
||||
#
|
||||
# Here give the name of the test serie
|
||||
#
|
||||
|
||||
TEST_NAME=obigrep
|
||||
CMD=obigrep
|
||||
|
||||
######
|
||||
#
|
||||
# Some variable and function definitions: please don't change them
|
||||
#
|
||||
######
|
||||
TEST_DIR="$(dirname "$(readlink -f "${BASH_SOURCE[0]}")")"
|
||||
OBITOOLS_DIR="${TEST_DIR/obitest*/}build"
|
||||
export PATH="${OBITOOLS_DIR}:${PATH}"
|
||||
|
||||
MCMD="$(echo "${CMD:0:4}" | tr '[:lower:]' '[:upper:]')$(echo "${CMD:4}" | tr '[:upper:]' '[:lower:]')"
|
||||
|
||||
TMPDIR="$(mktemp -d)"
|
||||
ntest=0
|
||||
success=0
|
||||
failed=0
|
||||
|
||||
cleanup() {
|
||||
echo "========================================" 1>&2
|
||||
echo "## Results of the $TEST_NAME tests:" 1>&2
|
||||
|
||||
echo 1>&2
|
||||
echo "- $ntest tests run" 1>&2
|
||||
echo "- $success successfully completed" 1>&2
|
||||
echo "- $failed failed tests" 1>&2
|
||||
echo 1>&2
|
||||
echo "Cleaning up the temporary directory..." 1>&2
|
||||
echo 1>&2
|
||||
echo "========================================" 1>&2
|
||||
|
||||
rm -rf "$TMPDIR" # Suppress the temporary directory
|
||||
|
||||
if [ $failed -gt 0 ]; then
|
||||
log "$TEST_NAME tests failed"
|
||||
log
|
||||
log
|
||||
exit 1
|
||||
fi
|
||||
|
||||
log
|
||||
log
|
||||
|
||||
exit 0
|
||||
}
|
||||
|
||||
log() {
|
||||
echo -e "[$TEST_NAME @ $(date)] $*" 1>&2
|
||||
}
|
||||
|
||||
log "Testing $TEST_NAME..."
|
||||
log "Test directory is $TEST_DIR"
|
||||
log "obitools directory is $OBITOOLS_DIR"
|
||||
log "Temporary directory is $TMPDIR"
|
||||
log "files: $(find $TEST_DIR | awk -F'/' '{print $NF}' | tail -n +2)"
|
||||
|
||||
######################################################################
|
||||
####
|
||||
#### Below are the tests
|
||||
####
|
||||
#### Before each test :
|
||||
#### - increment the variable ntest
|
||||
####
|
||||
#### Run the command as the condition of an if / then /else
|
||||
#### - The command must return 0 on success
|
||||
#### - The command must return an exit code different from 0 on failure
|
||||
#### - The datafiles are stored in the same directory than the test script
|
||||
#### - The test script directory is stored in the TEST_DIR variable
|
||||
#### - If result files have to be produced they must be stored
|
||||
#### in the temporary directory (TMPDIR variable)
|
||||
####
|
||||
#### then clause is executed on success of the command
|
||||
#### - Write a success message using the log function
|
||||
#### - increment the variable success
|
||||
####
|
||||
#### else clause is executed on failure of the command
|
||||
#### - Write a failure message using the log function
|
||||
#### - increment the variable failed
|
||||
####
|
||||
######################################################################
|
||||
|
||||
|
||||
|
||||
((ntest++))
|
||||
if $CMD -h > "${TMPDIR}/help.txt" 2>&1
|
||||
then
|
||||
log "$MCMD: printing help OK"
|
||||
((success++))
|
||||
else
|
||||
log "$MCMD: printing help failed"
|
||||
((failed++))
|
||||
fi
|
||||
|
||||
|
||||
#########################################
|
||||
#
|
||||
# At the end of the tests
|
||||
# the cleanup function is called
|
||||
#
|
||||
#########################################
|
||||
|
||||
cleanup
|
||||
109
obitests/obitools/obijoin/test.sh
Executable file
109
obitests/obitools/obijoin/test.sh
Executable file
@@ -0,0 +1,109 @@
|
||||
#!/bin/bash
|
||||
|
||||
#
|
||||
# Here give the name of the test serie
|
||||
#
|
||||
|
||||
TEST_NAME=obijoin
|
||||
CMD=obijoin
|
||||
|
||||
######
|
||||
#
|
||||
# Some variable and function definitions: please don't change them
|
||||
#
|
||||
######
|
||||
TEST_DIR="$(dirname "$(readlink -f "${BASH_SOURCE[0]}")")"
|
||||
OBITOOLS_DIR="${TEST_DIR/obitest*/}build"
|
||||
export PATH="${OBITOOLS_DIR}:${PATH}"
|
||||
|
||||
MCMD="$(echo "${CMD:0:4}" | tr '[:lower:]' '[:upper:]')$(echo "${CMD:4}" | tr '[:upper:]' '[:lower:]')"
|
||||
|
||||
TMPDIR="$(mktemp -d)"
|
||||
ntest=0
|
||||
success=0
|
||||
failed=0
|
||||
|
||||
cleanup() {
|
||||
echo "========================================" 1>&2
|
||||
echo "## Results of the $TEST_NAME tests:" 1>&2
|
||||
|
||||
echo 1>&2
|
||||
echo "- $ntest tests run" 1>&2
|
||||
echo "- $success successfully completed" 1>&2
|
||||
echo "- $failed failed tests" 1>&2
|
||||
echo 1>&2
|
||||
echo "Cleaning up the temporary directory..." 1>&2
|
||||
echo 1>&2
|
||||
echo "========================================" 1>&2
|
||||
|
||||
rm -rf "$TMPDIR" # Suppress the temporary directory
|
||||
|
||||
if [ $failed -gt 0 ]; then
|
||||
log "$TEST_NAME tests failed"
|
||||
log
|
||||
log
|
||||
exit 1
|
||||
fi
|
||||
|
||||
log
|
||||
log
|
||||
|
||||
exit 0
|
||||
}
|
||||
|
||||
log() {
|
||||
echo -e "[$TEST_NAME @ $(date)] $*" 1>&2
|
||||
}
|
||||
|
||||
log "Testing $TEST_NAME..."
|
||||
log "Test directory is $TEST_DIR"
|
||||
log "obitools directory is $OBITOOLS_DIR"
|
||||
log "Temporary directory is $TMPDIR"
|
||||
log "files: $(find $TEST_DIR | awk -F'/' '{print $NF}' | tail -n +2)"
|
||||
|
||||
######################################################################
|
||||
####
|
||||
#### Below are the tests
|
||||
####
|
||||
#### Before each test :
|
||||
#### - increment the variable ntest
|
||||
####
|
||||
#### Run the command as the condition of an if / then /else
|
||||
#### - The command must return 0 on success
|
||||
#### - The command must return an exit code different from 0 on failure
|
||||
#### - The datafiles are stored in the same directory than the test script
|
||||
#### - The test script directory is stored in the TEST_DIR variable
|
||||
#### - If result files have to be produced they must be stored
|
||||
#### in the temporary directory (TMPDIR variable)
|
||||
####
|
||||
#### then clause is executed on success of the command
|
||||
#### - Write a success message using the log function
|
||||
#### - increment the variable success
|
||||
####
|
||||
#### else clause is executed on failure of the command
|
||||
#### - Write a failure message using the log function
|
||||
#### - increment the variable failed
|
||||
####
|
||||
######################################################################
|
||||
|
||||
|
||||
|
||||
((ntest++))
|
||||
if $CMD -h > "${TMPDIR}/help.txt" 2>&1
|
||||
then
|
||||
log "$MCMD: printing help OK"
|
||||
((success++))
|
||||
else
|
||||
log "$MCMD: printing help failed"
|
||||
((failed++))
|
||||
fi
|
||||
|
||||
|
||||
#########################################
|
||||
#
|
||||
# At the end of the tests
|
||||
# the cleanup function is called
|
||||
#
|
||||
#########################################
|
||||
|
||||
cleanup
|
||||
109
obitests/obitools/obikmermatch/test.sh
Executable file
109
obitests/obitools/obikmermatch/test.sh
Executable file
@@ -0,0 +1,109 @@
|
||||
#!/bin/bash
|
||||
|
||||
#
|
||||
# Here give the name of the test serie
|
||||
#
|
||||
|
||||
TEST_NAME=obikmermatch
|
||||
CMD=obikmermatch
|
||||
|
||||
######
|
||||
#
|
||||
# Some variable and function definitions: please don't change them
|
||||
#
|
||||
######
|
||||
TEST_DIR="$(dirname "$(readlink -f "${BASH_SOURCE[0]}")")"
|
||||
OBITOOLS_DIR="${TEST_DIR/obitest*/}build"
|
||||
export PATH="${OBITOOLS_DIR}:${PATH}"
|
||||
|
||||
MCMD="$(echo "${CMD:0:4}" | tr '[:lower:]' '[:upper:]')$(echo "${CMD:4}" | tr '[:upper:]' '[:lower:]')"
|
||||
|
||||
TMPDIR="$(mktemp -d)"
|
||||
ntest=0
|
||||
success=0
|
||||
failed=0
|
||||
|
||||
cleanup() {
|
||||
echo "========================================" 1>&2
|
||||
echo "## Results of the $TEST_NAME tests:" 1>&2
|
||||
|
||||
echo 1>&2
|
||||
echo "- $ntest tests run" 1>&2
|
||||
echo "- $success successfully completed" 1>&2
|
||||
echo "- $failed failed tests" 1>&2
|
||||
echo 1>&2
|
||||
echo "Cleaning up the temporary directory..." 1>&2
|
||||
echo 1>&2
|
||||
echo "========================================" 1>&2
|
||||
|
||||
rm -rf "$TMPDIR" # Suppress the temporary directory
|
||||
|
||||
if [ $failed -gt 0 ]; then
|
||||
log "$TEST_NAME tests failed"
|
||||
log
|
||||
log
|
||||
exit 1
|
||||
fi
|
||||
|
||||
log
|
||||
log
|
||||
|
||||
exit 0
|
||||
}
|
||||
|
||||
log() {
|
||||
echo -e "[$TEST_NAME @ $(date)] $*" 1>&2
|
||||
}
|
||||
|
||||
log "Testing $TEST_NAME..."
|
||||
log "Test directory is $TEST_DIR"
|
||||
log "obitools directory is $OBITOOLS_DIR"
|
||||
log "Temporary directory is $TMPDIR"
|
||||
log "files: $(find $TEST_DIR | awk -F'/' '{print $NF}' | tail -n +2)"
|
||||
|
||||
######################################################################
|
||||
####
|
||||
#### Below are the tests
|
||||
####
|
||||
#### Before each test :
|
||||
#### - increment the variable ntest
|
||||
####
|
||||
#### Run the command as the condition of an if / then /else
|
||||
#### - The command must return 0 on success
|
||||
#### - The command must return an exit code different from 0 on failure
|
||||
#### - The datafiles are stored in the same directory than the test script
|
||||
#### - The test script directory is stored in the TEST_DIR variable
|
||||
#### - If result files have to be produced they must be stored
|
||||
#### in the temporary directory (TMPDIR variable)
|
||||
####
|
||||
#### then clause is executed on success of the command
|
||||
#### - Write a success message using the log function
|
||||
#### - increment the variable success
|
||||
####
|
||||
#### else clause is executed on failure of the command
|
||||
#### - Write a failure message using the log function
|
||||
#### - increment the variable failed
|
||||
####
|
||||
######################################################################
|
||||
|
||||
|
||||
|
||||
((ntest++))
|
||||
if $CMD -h > "${TMPDIR}/help.txt" 2>&1
|
||||
then
|
||||
log "$MCMD: printing help OK"
|
||||
((success++))
|
||||
else
|
||||
log "$MCMD: printing help failed"
|
||||
((failed++))
|
||||
fi
|
||||
|
||||
|
||||
#########################################
|
||||
#
|
||||
# At the end of the tests
|
||||
# the cleanup function is called
|
||||
#
|
||||
#########################################
|
||||
|
||||
cleanup
|
||||
109
obitests/obitools/obikmersimcount/test.sh
Executable file
109
obitests/obitools/obikmersimcount/test.sh
Executable file
@@ -0,0 +1,109 @@
|
||||
#!/bin/bash
|
||||
|
||||
#
|
||||
# Here give the name of the test serie
|
||||
#
|
||||
|
||||
TEST_NAME=obikmersimcount
|
||||
CMD=obikmersimcount
|
||||
|
||||
######
|
||||
#
|
||||
# Some variable and function definitions: please don't change them
|
||||
#
|
||||
######
|
||||
TEST_DIR="$(dirname "$(readlink -f "${BASH_SOURCE[0]}")")"
|
||||
OBITOOLS_DIR="${TEST_DIR/obitest*/}build"
|
||||
export PATH="${OBITOOLS_DIR}:${PATH}"
|
||||
|
||||
MCMD="$(echo "${CMD:0:4}" | tr '[:lower:]' '[:upper:]')$(echo "${CMD:4}" | tr '[:upper:]' '[:lower:]')"
|
||||
|
||||
TMPDIR="$(mktemp -d)"
|
||||
ntest=0
|
||||
success=0
|
||||
failed=0
|
||||
|
||||
cleanup() {
|
||||
echo "========================================" 1>&2
|
||||
echo "## Results of the $TEST_NAME tests:" 1>&2
|
||||
|
||||
echo 1>&2
|
||||
echo "- $ntest tests run" 1>&2
|
||||
echo "- $success successfully completed" 1>&2
|
||||
echo "- $failed failed tests" 1>&2
|
||||
echo 1>&2
|
||||
echo "Cleaning up the temporary directory..." 1>&2
|
||||
echo 1>&2
|
||||
echo "========================================" 1>&2
|
||||
|
||||
rm -rf "$TMPDIR" # Suppress the temporary directory
|
||||
|
||||
if [ $failed -gt 0 ]; then
|
||||
log "$TEST_NAME tests failed"
|
||||
log
|
||||
log
|
||||
exit 1
|
||||
fi
|
||||
|
||||
log
|
||||
log
|
||||
|
||||
exit 0
|
||||
}
|
||||
|
||||
log() {
|
||||
echo -e "[$TEST_NAME @ $(date)] $*" 1>&2
|
||||
}
|
||||
|
||||
log "Testing $TEST_NAME..."
|
||||
log "Test directory is $TEST_DIR"
|
||||
log "obitools directory is $OBITOOLS_DIR"
|
||||
log "Temporary directory is $TMPDIR"
|
||||
log "files: $(find $TEST_DIR | awk -F'/' '{print $NF}' | tail -n +2)"
|
||||
|
||||
######################################################################
|
||||
####
|
||||
#### Below are the tests
|
||||
####
|
||||
#### Before each test :
|
||||
#### - increment the variable ntest
|
||||
####
|
||||
#### Run the command as the condition of an if / then /else
|
||||
#### - The command must return 0 on success
|
||||
#### - The command must return an exit code different from 0 on failure
|
||||
#### - The datafiles are stored in the same directory than the test script
|
||||
#### - The test script directory is stored in the TEST_DIR variable
|
||||
#### - If result files have to be produced they must be stored
|
||||
#### in the temporary directory (TMPDIR variable)
|
||||
####
|
||||
#### then clause is executed on success of the command
|
||||
#### - Write a success message using the log function
|
||||
#### - increment the variable success
|
||||
####
|
||||
#### else clause is executed on failure of the command
|
||||
#### - Write a failure message using the log function
|
||||
#### - increment the variable failed
|
||||
####
|
||||
######################################################################
|
||||
|
||||
|
||||
|
||||
((ntest++))
|
||||
if $CMD -h > "${TMPDIR}/help.txt" 2>&1
|
||||
then
|
||||
log "$MCMD: printing help OK"
|
||||
((success++))
|
||||
else
|
||||
log "$MCMD: printing help failed"
|
||||
((failed++))
|
||||
fi
|
||||
|
||||
|
||||
#########################################
|
||||
#
|
||||
# At the end of the tests
|
||||
# the cleanup function is called
|
||||
#
|
||||
#########################################
|
||||
|
||||
cleanup
|
||||
109
obitests/obitools/obilandmark/test.sh
Executable file
109
obitests/obitools/obilandmark/test.sh
Executable file
@@ -0,0 +1,109 @@
|
||||
#!/bin/bash
|
||||
|
||||
#
|
||||
# Here give the name of the test serie
|
||||
#
|
||||
|
||||
TEST_NAME=obilandmark
|
||||
CMD=obilandmark
|
||||
|
||||
######
|
||||
#
|
||||
# Some variable and function definitions: please don't change them
|
||||
#
|
||||
######
|
||||
TEST_DIR="$(dirname "$(readlink -f "${BASH_SOURCE[0]}")")"
|
||||
OBITOOLS_DIR="${TEST_DIR/obitest*/}build"
|
||||
export PATH="${OBITOOLS_DIR}:${PATH}"
|
||||
|
||||
MCMD="$(echo "${CMD:0:4}" | tr '[:lower:]' '[:upper:]')$(echo "${CMD:4}" | tr '[:upper:]' '[:lower:]')"
|
||||
|
||||
TMPDIR="$(mktemp -d)"
|
||||
ntest=0
|
||||
success=0
|
||||
failed=0
|
||||
|
||||
cleanup() {
|
||||
echo "========================================" 1>&2
|
||||
echo "## Results of the $TEST_NAME tests:" 1>&2
|
||||
|
||||
echo 1>&2
|
||||
echo "- $ntest tests run" 1>&2
|
||||
echo "- $success successfully completed" 1>&2
|
||||
echo "- $failed failed tests" 1>&2
|
||||
echo 1>&2
|
||||
echo "Cleaning up the temporary directory..." 1>&2
|
||||
echo 1>&2
|
||||
echo "========================================" 1>&2
|
||||
|
||||
rm -rf "$TMPDIR" # Suppress the temporary directory
|
||||
|
||||
if [ $failed -gt 0 ]; then
|
||||
log "$TEST_NAME tests failed"
|
||||
log
|
||||
log
|
||||
exit 1
|
||||
fi
|
||||
|
||||
log
|
||||
log
|
||||
|
||||
exit 0
|
||||
}
|
||||
|
||||
log() {
|
||||
echo -e "[$TEST_NAME @ $(date)] $*" 1>&2
|
||||
}
|
||||
|
||||
log "Testing $TEST_NAME..."
|
||||
log "Test directory is $TEST_DIR"
|
||||
log "obitools directory is $OBITOOLS_DIR"
|
||||
log "Temporary directory is $TMPDIR"
|
||||
log "files: $(find $TEST_DIR | awk -F'/' '{print $NF}' | tail -n +2)"
|
||||
|
||||
######################################################################
|
||||
####
|
||||
#### Below are the tests
|
||||
####
|
||||
#### Before each test :
|
||||
#### - increment the variable ntest
|
||||
####
|
||||
#### Run the command as the condition of an if / then /else
|
||||
#### - The command must return 0 on success
|
||||
#### - The command must return an exit code different from 0 on failure
|
||||
#### - The datafiles are stored in the same directory than the test script
|
||||
#### - The test script directory is stored in the TEST_DIR variable
|
||||
#### - If result files have to be produced they must be stored
|
||||
#### in the temporary directory (TMPDIR variable)
|
||||
####
|
||||
#### then clause is executed on success of the command
|
||||
#### - Write a success message using the log function
|
||||
#### - increment the variable success
|
||||
####
|
||||
#### else clause is executed on failure of the command
|
||||
#### - Write a failure message using the log function
|
||||
#### - increment the variable failed
|
||||
####
|
||||
######################################################################
|
||||
|
||||
|
||||
|
||||
((ntest++))
|
||||
if $CMD -h > "${TMPDIR}/help.txt" 2>&1
|
||||
then
|
||||
log "$MCMD: printing help OK"
|
||||
((success++))
|
||||
else
|
||||
log "$MCMD: printing help failed"
|
||||
((failed++))
|
||||
fi
|
||||
|
||||
|
||||
#########################################
|
||||
#
|
||||
# At the end of the tests
|
||||
# the cleanup function is called
|
||||
#
|
||||
#########################################
|
||||
|
||||
cleanup
|
||||
109
obitests/obitools/obimatrix/test.sh
Executable file
109
obitests/obitools/obimatrix/test.sh
Executable file
@@ -0,0 +1,109 @@
|
||||
#!/bin/bash
|
||||
|
||||
#
|
||||
# Here give the name of the test serie
|
||||
#
|
||||
|
||||
TEST_NAME=obimatrix
|
||||
CMD=obimatrix
|
||||
|
||||
######
|
||||
#
|
||||
# Some variable and function definitions: please don't change them
|
||||
#
|
||||
######
|
||||
TEST_DIR="$(dirname "$(readlink -f "${BASH_SOURCE[0]}")")"
|
||||
OBITOOLS_DIR="${TEST_DIR/obitest*/}build"
|
||||
export PATH="${OBITOOLS_DIR}:${PATH}"
|
||||
|
||||
MCMD="$(echo "${CMD:0:4}" | tr '[:lower:]' '[:upper:]')$(echo "${CMD:4}" | tr '[:upper:]' '[:lower:]')"
|
||||
|
||||
TMPDIR="$(mktemp -d)"
|
||||
ntest=0
|
||||
success=0
|
||||
failed=0
|
||||
|
||||
cleanup() {
|
||||
echo "========================================" 1>&2
|
||||
echo "## Results of the $TEST_NAME tests:" 1>&2
|
||||
|
||||
echo 1>&2
|
||||
echo "- $ntest tests run" 1>&2
|
||||
echo "- $success successfully completed" 1>&2
|
||||
echo "- $failed failed tests" 1>&2
|
||||
echo 1>&2
|
||||
echo "Cleaning up the temporary directory..." 1>&2
|
||||
echo 1>&2
|
||||
echo "========================================" 1>&2
|
||||
|
||||
rm -rf "$TMPDIR" # Suppress the temporary directory
|
||||
|
||||
if [ $failed -gt 0 ]; then
|
||||
log "$TEST_NAME tests failed"
|
||||
log
|
||||
log
|
||||
exit 1
|
||||
fi
|
||||
|
||||
log
|
||||
log
|
||||
|
||||
exit 0
|
||||
}
|
||||
|
||||
log() {
|
||||
echo -e "[$TEST_NAME @ $(date)] $*" 1>&2
|
||||
}
|
||||
|
||||
log "Testing $TEST_NAME..."
|
||||
log "Test directory is $TEST_DIR"
|
||||
log "obitools directory is $OBITOOLS_DIR"
|
||||
log "Temporary directory is $TMPDIR"
|
||||
log "files: $(find $TEST_DIR | awk -F'/' '{print $NF}' | tail -n +2)"
|
||||
|
||||
######################################################################
|
||||
####
|
||||
#### Below are the tests
|
||||
####
|
||||
#### Before each test :
|
||||
#### - increment the variable ntest
|
||||
####
|
||||
#### Run the command as the condition of an if / then /else
|
||||
#### - The command must return 0 on success
|
||||
#### - The command must return an exit code different from 0 on failure
|
||||
#### - The datafiles are stored in the same directory than the test script
|
||||
#### - The test script directory is stored in the TEST_DIR variable
|
||||
#### - If result files have to be produced they must be stored
|
||||
#### in the temporary directory (TMPDIR variable)
|
||||
####
|
||||
#### then clause is executed on success of the command
|
||||
#### - Write a success message using the log function
|
||||
#### - increment the variable success
|
||||
####
|
||||
#### else clause is executed on failure of the command
|
||||
#### - Write a failure message using the log function
|
||||
#### - increment the variable failed
|
||||
####
|
||||
######################################################################
|
||||
|
||||
|
||||
|
||||
((ntest++))
|
||||
if $CMD -h > "${TMPDIR}/help.txt" 2>&1
|
||||
then
|
||||
log "$MCMD: printing help OK"
|
||||
((success++))
|
||||
else
|
||||
log "$MCMD: printing help failed"
|
||||
((failed++))
|
||||
fi
|
||||
|
||||
|
||||
#########################################
|
||||
#
|
||||
# At the end of the tests
|
||||
# the cleanup function is called
|
||||
#
|
||||
#########################################
|
||||
|
||||
cleanup
|
||||
109
obitests/obitools/obimicrosat/test.sh
Executable file
109
obitests/obitools/obimicrosat/test.sh
Executable file
@@ -0,0 +1,109 @@
|
||||
#!/bin/bash
|
||||
|
||||
#
|
||||
# Here give the name of the test serie
|
||||
#
|
||||
|
||||
TEST_NAME=obimicrosat
|
||||
CMD=obimicrosat
|
||||
|
||||
######
|
||||
#
|
||||
# Some variable and function definitions: please don't change them
|
||||
#
|
||||
######
|
||||
TEST_DIR="$(dirname "$(readlink -f "${BASH_SOURCE[0]}")")"
|
||||
OBITOOLS_DIR="${TEST_DIR/obitest*/}build"
|
||||
export PATH="${OBITOOLS_DIR}:${PATH}"
|
||||
|
||||
MCMD="$(echo "${CMD:0:4}" | tr '[:lower:]' '[:upper:]')$(echo "${CMD:4}" | tr '[:upper:]' '[:lower:]')"
|
||||
|
||||
TMPDIR="$(mktemp -d)"
|
||||
ntest=0
|
||||
success=0
|
||||
failed=0
|
||||
|
||||
cleanup() {
|
||||
echo "========================================" 1>&2
|
||||
echo "## Results of the $TEST_NAME tests:" 1>&2
|
||||
|
||||
echo 1>&2
|
||||
echo "- $ntest tests run" 1>&2
|
||||
echo "- $success successfully completed" 1>&2
|
||||
echo "- $failed failed tests" 1>&2
|
||||
echo 1>&2
|
||||
echo "Cleaning up the temporary directory..." 1>&2
|
||||
echo 1>&2
|
||||
echo "========================================" 1>&2
|
||||
|
||||
rm -rf "$TMPDIR" # Suppress the temporary directory
|
||||
|
||||
if [ $failed -gt 0 ]; then
|
||||
log "$TEST_NAME tests failed"
|
||||
log
|
||||
log
|
||||
exit 1
|
||||
fi
|
||||
|
||||
log
|
||||
log
|
||||
|
||||
exit 0
|
||||
}
|
||||
|
||||
log() {
|
||||
echo -e "[$TEST_NAME @ $(date)] $*" 1>&2
|
||||
}
|
||||
|
||||
log "Testing $TEST_NAME..."
|
||||
log "Test directory is $TEST_DIR"
|
||||
log "obitools directory is $OBITOOLS_DIR"
|
||||
log "Temporary directory is $TMPDIR"
|
||||
log "files: $(find $TEST_DIR | awk -F'/' '{print $NF}' | tail -n +2)"
|
||||
|
||||
######################################################################
|
||||
####
|
||||
#### Below are the tests
|
||||
####
|
||||
#### Before each test :
|
||||
#### - increment the variable ntest
|
||||
####
|
||||
#### Run the command as the condition of an if / then /else
|
||||
#### - The command must return 0 on success
|
||||
#### - The command must return an exit code different from 0 on failure
|
||||
#### - The datafiles are stored in the same directory than the test script
|
||||
#### - The test script directory is stored in the TEST_DIR variable
|
||||
#### - If result files have to be produced they must be stored
|
||||
#### in the temporary directory (TMPDIR variable)
|
||||
####
|
||||
#### then clause is executed on success of the command
|
||||
#### - Write a success message using the log function
|
||||
#### - increment the variable success
|
||||
####
|
||||
#### else clause is executed on failure of the command
|
||||
#### - Write a failure message using the log function
|
||||
#### - increment the variable failed
|
||||
####
|
||||
######################################################################
|
||||
|
||||
|
||||
|
||||
((ntest++))
|
||||
if $CMD -h > "${TMPDIR}/help.txt" 2>&1
|
||||
then
|
||||
log "$MCMD: printing help OK"
|
||||
((success++))
|
||||
else
|
||||
log "$MCMD: printing help failed"
|
||||
((failed++))
|
||||
fi
|
||||
|
||||
|
||||
#########################################
|
||||
#
|
||||
# At the end of the tests
|
||||
# the cleanup function is called
|
||||
#
|
||||
#########################################
|
||||
|
||||
cleanup
|
||||
109
obitests/obitools/obimultiplex/test.sh
Executable file
109
obitests/obitools/obimultiplex/test.sh
Executable file
@@ -0,0 +1,109 @@
|
||||
#!/bin/bash
|
||||
|
||||
#
|
||||
# Here give the name of the test serie
|
||||
#
|
||||
|
||||
TEST_NAME=obimultiplex
|
||||
CMD=obimultiplex
|
||||
|
||||
######
|
||||
#
|
||||
# Some variable and function definitions: please don't change them
|
||||
#
|
||||
######
|
||||
TEST_DIR="$(dirname "$(readlink -f "${BASH_SOURCE[0]}")")"
|
||||
OBITOOLS_DIR="${TEST_DIR/obitest*/}build"
|
||||
export PATH="${OBITOOLS_DIR}:${PATH}"
|
||||
|
||||
MCMD="$(echo "${CMD:0:4}" | tr '[:lower:]' '[:upper:]')$(echo "${CMD:4}" | tr '[:upper:]' '[:lower:]')"
|
||||
|
||||
TMPDIR="$(mktemp -d)"
|
||||
ntest=0
|
||||
success=0
|
||||
failed=0
|
||||
|
||||
cleanup() {
|
||||
echo "========================================" 1>&2
|
||||
echo "## Results of the $TEST_NAME tests:" 1>&2
|
||||
|
||||
echo 1>&2
|
||||
echo "- $ntest tests run" 1>&2
|
||||
echo "- $success successfully completed" 1>&2
|
||||
echo "- $failed failed tests" 1>&2
|
||||
echo 1>&2
|
||||
echo "Cleaning up the temporary directory..." 1>&2
|
||||
echo 1>&2
|
||||
echo "========================================" 1>&2
|
||||
|
||||
rm -rf "$TMPDIR" # Suppress the temporary directory
|
||||
|
||||
if [ $failed -gt 0 ]; then
|
||||
log "$TEST_NAME tests failed"
|
||||
log
|
||||
log
|
||||
exit 1
|
||||
fi
|
||||
|
||||
log
|
||||
log
|
||||
|
||||
exit 0
|
||||
}
|
||||
|
||||
log() {
|
||||
echo -e "[$TEST_NAME @ $(date)] $*" 1>&2
|
||||
}
|
||||
|
||||
log "Testing $TEST_NAME..."
|
||||
log "Test directory is $TEST_DIR"
|
||||
log "obitools directory is $OBITOOLS_DIR"
|
||||
log "Temporary directory is $TMPDIR"
|
||||
log "files: $(find $TEST_DIR | awk -F'/' '{print $NF}' | tail -n +2)"
|
||||
|
||||
######################################################################
|
||||
####
|
||||
#### Below are the tests
|
||||
####
|
||||
#### Before each test :
|
||||
#### - increment the variable ntest
|
||||
####
|
||||
#### Run the command as the condition of an if / then /else
|
||||
#### - The command must return 0 on success
|
||||
#### - The command must return an exit code different from 0 on failure
|
||||
#### - The datafiles are stored in the same directory than the test script
|
||||
#### - The test script directory is stored in the TEST_DIR variable
|
||||
#### - If result files have to be produced they must be stored
|
||||
#### in the temporary directory (TMPDIR variable)
|
||||
####
|
||||
#### then clause is executed on success of the command
|
||||
#### - Write a success message using the log function
|
||||
#### - increment the variable success
|
||||
####
|
||||
#### else clause is executed on failure of the command
|
||||
#### - Write a failure message using the log function
|
||||
#### - increment the variable failed
|
||||
####
|
||||
######################################################################
|
||||
|
||||
|
||||
|
||||
((ntest++))
|
||||
if $CMD -h > "${TMPDIR}/help.txt" 2>&1
|
||||
then
|
||||
log "$MCMD: printing help OK"
|
||||
((success++))
|
||||
else
|
||||
log "$MCMD: printing help failed"
|
||||
((failed++))
|
||||
fi
|
||||
|
||||
|
||||
#########################################
|
||||
#
|
||||
# At the end of the tests
|
||||
# the cleanup function is called
|
||||
#
|
||||
#########################################
|
||||
|
||||
cleanup
|
||||
@@ -4,7 +4,8 @@
|
||||
# Here give the name of the test serie
|
||||
#
|
||||
|
||||
TEST_NAME=obiparing
|
||||
TEST_NAME=obipairing
|
||||
CMD=obipairing
|
||||
|
||||
######
|
||||
#
|
||||
@@ -15,6 +16,7 @@ TEST_DIR="$(dirname "$(readlink -f "${BASH_SOURCE[0]}")")"
|
||||
OBITOOLS_DIR="${TEST_DIR/obitest*/}build"
|
||||
export PATH="${OBITOOLS_DIR}:${PATH}"
|
||||
|
||||
MCMD="$(echo "${CMD:0:4}" | tr '[:lower:]' '[:upper:]')$(echo "${CMD:4}" | tr '[:upper:]' '[:lower:]')"
|
||||
|
||||
TMPDIR="$(mktemp -d)"
|
||||
ntest=0
|
||||
@@ -38,9 +40,13 @@ cleanup() {
|
||||
|
||||
if [ $failed -gt 0 ]; then
|
||||
log "$TEST_NAME tests failed"
|
||||
log
|
||||
log
|
||||
exit 1
|
||||
fi
|
||||
|
||||
log
|
||||
log
|
||||
exit 0
|
||||
}
|
||||
|
||||
@@ -79,6 +85,16 @@ log "files: $(find $TEST_DIR | awk -F'/' '{print $NF}' | tail -n +2)"
|
||||
####
|
||||
######################################################################
|
||||
|
||||
((ntest++))
|
||||
if $CMD -h > "${TMPDIR}/help.txt" 2>&1
|
||||
then
|
||||
log "$MCMD: printing help OK"
|
||||
((success++))
|
||||
else
|
||||
log "$MCMD: printing help failed"
|
||||
((failed++))
|
||||
fi
|
||||
|
||||
((ntest++))
|
||||
if obipairing -F "${TEST_DIR}/wolf_F.fastq.gz" \
|
||||
-R "${TEST_DIR}/wolf_R.fastq.gz" \
|
||||
@@ -94,8 +110,8 @@ fi
|
||||
|
||||
((ntest++))
|
||||
if obicsv -Z -s -i \
|
||||
-k ali_dir -k ali_length -k paring_fast_count \
|
||||
-k paring_fast_overlap -k paring_fast_score \
|
||||
-k ali_dir -k ali_length -k pairing_fast_count \
|
||||
-k pairing_fast_overlap -k pairing_fast_score \
|
||||
-k score -k score_norm -k seq_a_single \
|
||||
-k seq_b_single -k seq_ab_match \
|
||||
"${TMPDIR}/wolf_paired_alignment.fastq.gz" \
|
||||
BIN
obitests/obitools/obipairing/wolf_paired_alignment.csv.gz
Normal file
BIN
obitests/obitools/obipairing/wolf_paired_alignment.csv.gz
Normal file
Binary file not shown.
Binary file not shown.
109
obitests/obitools/obipcr/test.sh
Executable file
109
obitests/obitools/obipcr/test.sh
Executable file
@@ -0,0 +1,109 @@
|
||||
#!/bin/bash
|
||||
|
||||
#
|
||||
# Here give the name of the test serie
|
||||
#
|
||||
|
||||
TEST_NAME=obipcr
|
||||
CMD=obipcr
|
||||
|
||||
######
|
||||
#
|
||||
# Some variable and function definitions: please don't change them
|
||||
#
|
||||
######
|
||||
TEST_DIR="$(dirname "$(readlink -f "${BASH_SOURCE[0]}")")"
|
||||
OBITOOLS_DIR="${TEST_DIR/obitest*/}build"
|
||||
export PATH="${OBITOOLS_DIR}:${PATH}"
|
||||
|
||||
MCMD="$(echo "${CMD:0:4}" | tr '[:lower:]' '[:upper:]')$(echo "${CMD:4}" | tr '[:upper:]' '[:lower:]')"
|
||||
|
||||
TMPDIR="$(mktemp -d)"
|
||||
ntest=0
|
||||
success=0
|
||||
failed=0
|
||||
|
||||
cleanup() {
|
||||
echo "========================================" 1>&2
|
||||
echo "## Results of the $TEST_NAME tests:" 1>&2
|
||||
|
||||
echo 1>&2
|
||||
echo "- $ntest tests run" 1>&2
|
||||
echo "- $success successfully completed" 1>&2
|
||||
echo "- $failed failed tests" 1>&2
|
||||
echo 1>&2
|
||||
echo "Cleaning up the temporary directory..." 1>&2
|
||||
echo 1>&2
|
||||
echo "========================================" 1>&2
|
||||
|
||||
rm -rf "$TMPDIR" # Suppress the temporary directory
|
||||
|
||||
if [ $failed -gt 0 ]; then
|
||||
log "$TEST_NAME tests failed"
|
||||
log
|
||||
log
|
||||
exit 1
|
||||
fi
|
||||
|
||||
log
|
||||
log
|
||||
|
||||
exit 0
|
||||
}
|
||||
|
||||
log() {
|
||||
echo -e "[$TEST_NAME @ $(date)] $*" 1>&2
|
||||
}
|
||||
|
||||
log "Testing $TEST_NAME..."
|
||||
log "Test directory is $TEST_DIR"
|
||||
log "obitools directory is $OBITOOLS_DIR"
|
||||
log "Temporary directory is $TMPDIR"
|
||||
log "files: $(find $TEST_DIR | awk -F'/' '{print $NF}' | tail -n +2)"
|
||||
|
||||
######################################################################
|
||||
####
|
||||
#### Below are the tests
|
||||
####
|
||||
#### Before each test :
|
||||
#### - increment the variable ntest
|
||||
####
|
||||
#### Run the command as the condition of an if / then /else
|
||||
#### - The command must return 0 on success
|
||||
#### - The command must return an exit code different from 0 on failure
|
||||
#### - The datafiles are stored in the same directory than the test script
|
||||
#### - The test script directory is stored in the TEST_DIR variable
|
||||
#### - If result files have to be produced they must be stored
|
||||
#### in the temporary directory (TMPDIR variable)
|
||||
####
|
||||
#### then clause is executed on success of the command
|
||||
#### - Write a success message using the log function
|
||||
#### - increment the variable success
|
||||
####
|
||||
#### else clause is executed on failure of the command
|
||||
#### - Write a failure message using the log function
|
||||
#### - increment the variable failed
|
||||
####
|
||||
######################################################################
|
||||
|
||||
|
||||
|
||||
((ntest++))
|
||||
if $CMD -h > "${TMPDIR}/help.txt" 2>&1
|
||||
then
|
||||
log "$MCMD: printing help OK"
|
||||
((success++))
|
||||
else
|
||||
log "$MCMD: printing help failed"
|
||||
((failed++))
|
||||
fi
|
||||
|
||||
|
||||
#########################################
|
||||
#
|
||||
# At the end of the tests
|
||||
# the cleanup function is called
|
||||
#
|
||||
#########################################
|
||||
|
||||
cleanup
|
||||
109
obitests/obitools/obirefidx/test.sh
Executable file
109
obitests/obitools/obirefidx/test.sh
Executable file
@@ -0,0 +1,109 @@
|
||||
#!/bin/bash
|
||||
|
||||
#
|
||||
# Here give the name of the test serie
|
||||
#
|
||||
|
||||
TEST_NAME=obirefidx
|
||||
CMD=obirefidx
|
||||
|
||||
######
|
||||
#
|
||||
# Some variable and function definitions: please don't change them
|
||||
#
|
||||
######
|
||||
TEST_DIR="$(dirname "$(readlink -f "${BASH_SOURCE[0]}")")"
|
||||
OBITOOLS_DIR="${TEST_DIR/obitest*/}build"
|
||||
export PATH="${OBITOOLS_DIR}:${PATH}"
|
||||
|
||||
MCMD="$(echo "${CMD:0:4}" | tr '[:lower:]' '[:upper:]')$(echo "${CMD:4}" | tr '[:upper:]' '[:lower:]')"
|
||||
|
||||
TMPDIR="$(mktemp -d)"
|
||||
ntest=0
|
||||
success=0
|
||||
failed=0
|
||||
|
||||
cleanup() {
|
||||
echo "========================================" 1>&2
|
||||
echo "## Results of the $TEST_NAME tests:" 1>&2
|
||||
|
||||
echo 1>&2
|
||||
echo "- $ntest tests run" 1>&2
|
||||
echo "- $success successfully completed" 1>&2
|
||||
echo "- $failed failed tests" 1>&2
|
||||
echo 1>&2
|
||||
echo "Cleaning up the temporary directory..." 1>&2
|
||||
echo 1>&2
|
||||
echo "========================================" 1>&2
|
||||
|
||||
rm -rf "$TMPDIR" # Suppress the temporary directory
|
||||
|
||||
if [ $failed -gt 0 ]; then
|
||||
log "$TEST_NAME tests failed"
|
||||
log
|
||||
log
|
||||
exit 1
|
||||
fi
|
||||
|
||||
log
|
||||
log
|
||||
|
||||
exit 0
|
||||
}
|
||||
|
||||
log() {
|
||||
echo -e "[$TEST_NAME @ $(date)] $*" 1>&2
|
||||
}
|
||||
|
||||
log "Testing $TEST_NAME..."
|
||||
log "Test directory is $TEST_DIR"
|
||||
log "obitools directory is $OBITOOLS_DIR"
|
||||
log "Temporary directory is $TMPDIR"
|
||||
log "files: $(find $TEST_DIR | awk -F'/' '{print $NF}' | tail -n +2)"
|
||||
|
||||
######################################################################
|
||||
####
|
||||
#### Below are the tests
|
||||
####
|
||||
#### Before each test :
|
||||
#### - increment the variable ntest
|
||||
####
|
||||
#### Run the command as the condition of an if / then /else
|
||||
#### - The command must return 0 on success
|
||||
#### - The command must return an exit code different from 0 on failure
|
||||
#### - The datafiles are stored in the same directory than the test script
|
||||
#### - The test script directory is stored in the TEST_DIR variable
|
||||
#### - If result files have to be produced they must be stored
|
||||
#### in the temporary directory (TMPDIR variable)
|
||||
####
|
||||
#### then clause is executed on success of the command
|
||||
#### - Write a success message using the log function
|
||||
#### - increment the variable success
|
||||
####
|
||||
#### else clause is executed on failure of the command
|
||||
#### - Write a failure message using the log function
|
||||
#### - increment the variable failed
|
||||
####
|
||||
######################################################################
|
||||
|
||||
|
||||
|
||||
((ntest++))
|
||||
if $CMD -h > "${TMPDIR}/help.txt" 2>&1
|
||||
then
|
||||
log "$MCMD: printing help OK"
|
||||
((success++))
|
||||
else
|
||||
log "$MCMD: printing help failed"
|
||||
((failed++))
|
||||
fi
|
||||
|
||||
|
||||
#########################################
|
||||
#
|
||||
# At the end of the tests
|
||||
# the cleanup function is called
|
||||
#
|
||||
#########################################
|
||||
|
||||
cleanup
|
||||
109
obitests/obitools/obiscript/test.sh
Executable file
109
obitests/obitools/obiscript/test.sh
Executable file
@@ -0,0 +1,109 @@
|
||||
#!/bin/bash
|
||||
|
||||
#
|
||||
# Here give the name of the test serie
|
||||
#
|
||||
|
||||
TEST_NAME=obiscript
|
||||
CMD=obiscript
|
||||
|
||||
######
|
||||
#
|
||||
# Some variable and function definitions: please don't change them
|
||||
#
|
||||
######
|
||||
TEST_DIR="$(dirname "$(readlink -f "${BASH_SOURCE[0]}")")"
|
||||
OBITOOLS_DIR="${TEST_DIR/obitest*/}build"
|
||||
export PATH="${OBITOOLS_DIR}:${PATH}"
|
||||
|
||||
MCMD="$(echo "${CMD:0:4}" | tr '[:lower:]' '[:upper:]')$(echo "${CMD:4}" | tr '[:upper:]' '[:lower:]')"
|
||||
|
||||
TMPDIR="$(mktemp -d)"
|
||||
ntest=0
|
||||
success=0
|
||||
failed=0
|
||||
|
||||
cleanup() {
|
||||
echo "========================================" 1>&2
|
||||
echo "## Results of the $TEST_NAME tests:" 1>&2
|
||||
|
||||
echo 1>&2
|
||||
echo "- $ntest tests run" 1>&2
|
||||
echo "- $success successfully completed" 1>&2
|
||||
echo "- $failed failed tests" 1>&2
|
||||
echo 1>&2
|
||||
echo "Cleaning up the temporary directory..." 1>&2
|
||||
echo 1>&2
|
||||
echo "========================================" 1>&2
|
||||
|
||||
rm -rf "$TMPDIR" # Suppress the temporary directory
|
||||
|
||||
if [ $failed -gt 0 ]; then
|
||||
log "$TEST_NAME tests failed"
|
||||
log
|
||||
log
|
||||
exit 1
|
||||
fi
|
||||
|
||||
log
|
||||
log
|
||||
|
||||
exit 0
|
||||
}
|
||||
|
||||
log() {
|
||||
echo -e "[$TEST_NAME @ $(date)] $*" 1>&2
|
||||
}
|
||||
|
||||
log "Testing $TEST_NAME..."
|
||||
log "Test directory is $TEST_DIR"
|
||||
log "obitools directory is $OBITOOLS_DIR"
|
||||
log "Temporary directory is $TMPDIR"
|
||||
log "files: $(find $TEST_DIR | awk -F'/' '{print $NF}' | tail -n +2)"
|
||||
|
||||
######################################################################
|
||||
####
|
||||
#### Below are the tests
|
||||
####
|
||||
#### Before each test :
|
||||
#### - increment the variable ntest
|
||||
####
|
||||
#### Run the command as the condition of an if / then /else
|
||||
#### - The command must return 0 on success
|
||||
#### - The command must return an exit code different from 0 on failure
|
||||
#### - The datafiles are stored in the same directory than the test script
|
||||
#### - The test script directory is stored in the TEST_DIR variable
|
||||
#### - If result files have to be produced they must be stored
|
||||
#### in the temporary directory (TMPDIR variable)
|
||||
####
|
||||
#### then clause is executed on success of the command
|
||||
#### - Write a success message using the log function
|
||||
#### - increment the variable success
|
||||
####
|
||||
#### else clause is executed on failure of the command
|
||||
#### - Write a failure message using the log function
|
||||
#### - increment the variable failed
|
||||
####
|
||||
######################################################################
|
||||
|
||||
|
||||
|
||||
((ntest++))
|
||||
if $CMD -h > "${TMPDIR}/help.txt" 2>&1
|
||||
then
|
||||
log "$MCMD: printing help OK"
|
||||
((success++))
|
||||
else
|
||||
log "$MCMD: printing help failed"
|
||||
((failed++))
|
||||
fi
|
||||
|
||||
|
||||
#########################################
|
||||
#
|
||||
# At the end of the tests
|
||||
# the cleanup function is called
|
||||
#
|
||||
#########################################
|
||||
|
||||
cleanup
|
||||
109
obitests/obitools/obisplit/test.sh
Executable file
109
obitests/obitools/obisplit/test.sh
Executable file
@@ -0,0 +1,109 @@
|
||||
#!/bin/bash
|
||||
|
||||
#
|
||||
# Here give the name of the test serie
|
||||
#
|
||||
|
||||
TEST_NAME=obisplit
|
||||
CMD=obisplit
|
||||
|
||||
######
|
||||
#
|
||||
# Some variable and function definitions: please don't change them
|
||||
#
|
||||
######
|
||||
TEST_DIR="$(dirname "$(readlink -f "${BASH_SOURCE[0]}")")"
|
||||
OBITOOLS_DIR="${TEST_DIR/obitest*/}build"
|
||||
export PATH="${OBITOOLS_DIR}:${PATH}"
|
||||
|
||||
MCMD="$(echo "${CMD:0:4}" | tr '[:lower:]' '[:upper:]')$(echo "${CMD:4}" | tr '[:upper:]' '[:lower:]')"
|
||||
|
||||
TMPDIR="$(mktemp -d)"
|
||||
ntest=0
|
||||
success=0
|
||||
failed=0
|
||||
|
||||
cleanup() {
|
||||
echo "========================================" 1>&2
|
||||
echo "## Results of the $TEST_NAME tests:" 1>&2
|
||||
|
||||
echo 1>&2
|
||||
echo "- $ntest tests run" 1>&2
|
||||
echo "- $success successfully completed" 1>&2
|
||||
echo "- $failed failed tests" 1>&2
|
||||
echo 1>&2
|
||||
echo "Cleaning up the temporary directory..." 1>&2
|
||||
echo 1>&2
|
||||
echo "========================================" 1>&2
|
||||
|
||||
rm -rf "$TMPDIR" # Suppress the temporary directory
|
||||
|
||||
if [ $failed -gt 0 ]; then
|
||||
log "$TEST_NAME tests failed"
|
||||
log
|
||||
log
|
||||
exit 1
|
||||
fi
|
||||
|
||||
log
|
||||
log
|
||||
|
||||
exit 0
|
||||
}
|
||||
|
||||
log() {
|
||||
echo -e "[$TEST_NAME @ $(date)] $*" 1>&2
|
||||
}
|
||||
|
||||
log "Testing $TEST_NAME..."
|
||||
log "Test directory is $TEST_DIR"
|
||||
log "obitools directory is $OBITOOLS_DIR"
|
||||
log "Temporary directory is $TMPDIR"
|
||||
log "files: $(find $TEST_DIR | awk -F'/' '{print $NF}' | tail -n +2)"
|
||||
|
||||
######################################################################
|
||||
####
|
||||
#### Below are the tests
|
||||
####
|
||||
#### Before each test :
|
||||
#### - increment the variable ntest
|
||||
####
|
||||
#### Run the command as the condition of an if / then /else
|
||||
#### - The command must return 0 on success
|
||||
#### - The command must return an exit code different from 0 on failure
|
||||
#### - The datafiles are stored in the same directory than the test script
|
||||
#### - The test script directory is stored in the TEST_DIR variable
|
||||
#### - If result files have to be produced they must be stored
|
||||
#### in the temporary directory (TMPDIR variable)
|
||||
####
|
||||
#### then clause is executed on success of the command
|
||||
#### - Write a success message using the log function
|
||||
#### - increment the variable success
|
||||
####
|
||||
#### else clause is executed on failure of the command
|
||||
#### - Write a failure message using the log function
|
||||
#### - increment the variable failed
|
||||
####
|
||||
######################################################################
|
||||
|
||||
|
||||
|
||||
((ntest++))
|
||||
if $CMD -h > "${TMPDIR}/help.txt" 2>&1
|
||||
then
|
||||
log "$MCMD: printing help OK"
|
||||
((success++))
|
||||
else
|
||||
log "$MCMD: printing help failed"
|
||||
((failed++))
|
||||
fi
|
||||
|
||||
|
||||
#########################################
|
||||
#
|
||||
# At the end of the tests
|
||||
# the cleanup function is called
|
||||
#
|
||||
#########################################
|
||||
|
||||
cleanup
|
||||
9
obitests/obitools/obisummary/some_uniq_seq.fasta
Normal file
9
obitests/obitools/obisummary/some_uniq_seq.fasta
Normal file
@@ -0,0 +1,9 @@
|
||||
>Seq_1 {"count":2,"merged_sample":{"15a_F730814":1,"29a_F260619":1}}
|
||||
ttagccctaaacacaagtaattaatataacaaaattattcgccagagtactaccggcaat
|
||||
agctyaaaactcaaaggacttggcggtgctttataccctt
|
||||
>Seq_2 {"count":22,"merged_sample":{"15a_F730814":12,"29a_F260619":10}}
|
||||
ttagccctaaacacaagtaattaatataacaaaattattcgccagagtactaccggcaat
|
||||
atcttaaaactcaaaggacttggcggtgctttataccctt
|
||||
>Seq_3 {"count":22,"merged_sample":{"15a_F730814":15,"29a_F260619":7}}
|
||||
ttagccctaaacacaagtaattaatataacaaaattattcgccagagtactaccggcgat
|
||||
agcttaaaactcaaaggacttggcggtgctttataccctt
|
||||
35
obitests/obitools/obisummary/some_uniq_seq.json
Normal file
35
obitests/obitools/obisummary/some_uniq_seq.json
Normal file
@@ -0,0 +1,35 @@
|
||||
{
|
||||
"annotations": {
|
||||
"keys": {
|
||||
"map": {
|
||||
"merged_sample": 3
|
||||
},
|
||||
"scalar": {
|
||||
"count": 3
|
||||
}
|
||||
},
|
||||
"map_attributes": 1,
|
||||
"scalar_attributes": 1,
|
||||
"vector_attributes": 0
|
||||
},
|
||||
"count": {
|
||||
"reads": 46,
|
||||
"total_length": 300,
|
||||
"variants": 3
|
||||
},
|
||||
"samples": {
|
||||
"sample_count": 2,
|
||||
"sample_stats": {
|
||||
"15a_F730814": {
|
||||
"reads": 28,
|
||||
"singletons": 1,
|
||||
"variants": 3
|
||||
},
|
||||
"29a_F260619": {
|
||||
"reads": 18,
|
||||
"singletons": 1,
|
||||
"variants": 3
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
25
obitests/obitools/obisummary/some_uniq_seq.yaml
Normal file
25
obitests/obitools/obisummary/some_uniq_seq.yaml
Normal file
@@ -0,0 +1,25 @@
|
||||
annotations:
|
||||
keys:
|
||||
map:
|
||||
merged_sample: 3
|
||||
scalar:
|
||||
count: 3
|
||||
map_attributes: 1
|
||||
scalar_attributes: 1
|
||||
vector_attributes: 0
|
||||
count:
|
||||
reads: 46
|
||||
total_length: 300
|
||||
variants: 3
|
||||
samples:
|
||||
sample_count: 2
|
||||
sample_stats:
|
||||
15a_F730814:
|
||||
reads: 28
|
||||
singletons: 1
|
||||
variants: 3
|
||||
29a_F260619:
|
||||
reads: 18
|
||||
singletons: 1
|
||||
variants: 3
|
||||
|
||||
152
obitests/obitools/obisummary/test.sh
Executable file
152
obitests/obitools/obisummary/test.sh
Executable file
@@ -0,0 +1,152 @@
|
||||
#!/bin/bash
|
||||
|
||||
#
|
||||
# Here give the name of the test serie
|
||||
#
|
||||
|
||||
TEST_NAME=obisummary
|
||||
CMD=obisummary
|
||||
|
||||
######
|
||||
#
|
||||
# Some variable and function definitions: please don't change them
|
||||
#
|
||||
######
|
||||
TEST_DIR="$(dirname "$(readlink -f "${BASH_SOURCE[0]}")")"
|
||||
OBITOOLS_DIR="${TEST_DIR/obitest*/}build"
|
||||
export PATH="${OBITOOLS_DIR}:${PATH}"
|
||||
|
||||
MCMD="$(echo "${CMD:0:4}" | tr '[:lower:]' '[:upper:]')$(echo "${CMD:4}" | tr '[:upper:]' '[:lower:]')"
|
||||
|
||||
TMPDIR="$(mktemp -d)"
|
||||
ntest=0
|
||||
success=0
|
||||
failed=0
|
||||
|
||||
cleanup() {
|
||||
echo "========================================" 1>&2
|
||||
echo "## Results of the $TEST_NAME tests:" 1>&2
|
||||
|
||||
echo 1>&2
|
||||
echo "- $ntest tests run" 1>&2
|
||||
echo "- $success successfully completed" 1>&2
|
||||
echo "- $failed failed tests" 1>&2
|
||||
echo 1>&2
|
||||
echo "Cleaning up the temporary directory..." 1>&2
|
||||
echo 1>&2
|
||||
echo "========================================" 1>&2
|
||||
|
||||
rm -rf "$TMPDIR" # Suppress the temporary directory
|
||||
|
||||
if [ $failed -gt 0 ]; then
|
||||
log "$TEST_NAME tests failed"
|
||||
log
|
||||
log
|
||||
exit 1
|
||||
fi
|
||||
|
||||
log
|
||||
log
|
||||
|
||||
exit 0
|
||||
}
|
||||
|
||||
log() {
|
||||
echo -e "[$TEST_NAME @ $(date)] $*" 1>&2
|
||||
}
|
||||
|
||||
log "Testing $TEST_NAME..."
|
||||
log "Test directory is $TEST_DIR"
|
||||
log "obitools directory is $OBITOOLS_DIR"
|
||||
log "Temporary directory is $TMPDIR"
|
||||
log "files: $(find $TEST_DIR | awk -F'/' '{print $NF}' | tail -n +2)"
|
||||
|
||||
######################################################################
|
||||
####
|
||||
#### Below are the tests
|
||||
####
|
||||
#### Before each test :
|
||||
#### - increment the variable ntest
|
||||
####
|
||||
#### Run the command as the condition of an if / then /else
|
||||
#### - The command must return 0 on success
|
||||
#### - The command must return an exit code different from 0 on failure
|
||||
#### - The datafiles are stored in the same directory than the test script
|
||||
#### - The test script directory is stored in the TEST_DIR variable
|
||||
#### - If result files have to be produced they must be stored
|
||||
#### in the temporary directory (TMPDIR variable)
|
||||
####
|
||||
#### then clause is executed on success of the command
|
||||
#### - Write a success message using the log function
|
||||
#### - increment the variable success
|
||||
####
|
||||
#### else clause is executed on failure of the command
|
||||
#### - Write a failure message using the log function
|
||||
#### - increment the variable failed
|
||||
####
|
||||
######################################################################
|
||||
|
||||
|
||||
|
||||
((ntest++))
|
||||
if $CMD -h > "${TMPDIR}/help.txt" 2>&1
|
||||
then
|
||||
log "$MCMD: printing help OK"
|
||||
((success++))
|
||||
else
|
||||
log "$MCMD: printing help failed"
|
||||
((failed++))
|
||||
fi
|
||||
|
||||
((ntest++))
|
||||
if obisummary "${TEST_DIR}/some_uniq_seq.fasta" \
|
||||
> "${TMPDIR}/some_uniq_seq.json"
|
||||
then
|
||||
log "$MCMD: formating json execution OK"
|
||||
((success++))
|
||||
else
|
||||
log "$MCMD: formating json execution failed"
|
||||
((failed++))
|
||||
fi
|
||||
|
||||
((ntest++))
|
||||
if diff "${TEST_DIR}/some_uniq_seq.json" \
|
||||
"${TMPDIR}/some_uniq_seq.json" > /dev/null
|
||||
then
|
||||
log "$MCMD: formating json OK"
|
||||
((success++))
|
||||
else
|
||||
log "$MCMD: formating json failed"
|
||||
((failed++))
|
||||
fi
|
||||
|
||||
((ntest++))
|
||||
if obisummary --yaml "${TEST_DIR}/some_uniq_seq.fasta" \
|
||||
> "${TMPDIR}/some_uniq_seq.yaml"
|
||||
then
|
||||
log "$MCMD: formating yaml execution OK"
|
||||
((success++))
|
||||
else
|
||||
log "$MCMD: formating yaml execution failed"
|
||||
((failed++))
|
||||
fi
|
||||
|
||||
((ntest++))
|
||||
if diff "${TEST_DIR}/some_uniq_seq.yaml" \
|
||||
"${TMPDIR}/some_uniq_seq.yaml" > /dev/null
|
||||
then
|
||||
log "$MCMD: formating yaml OK"
|
||||
((success++))
|
||||
else
|
||||
log "$MCMD: formating yaml failed"
|
||||
((failed++))
|
||||
fi
|
||||
|
||||
#########################################
|
||||
#
|
||||
# At the end of the tests
|
||||
# the cleanup function is called
|
||||
#
|
||||
#########################################
|
||||
|
||||
cleanup
|
||||
109
obitests/obitools/obitag/test.sh
Executable file
109
obitests/obitools/obitag/test.sh
Executable file
@@ -0,0 +1,109 @@
|
||||
#!/bin/bash
|
||||
|
||||
#
|
||||
# Here give the name of the test serie
|
||||
#
|
||||
|
||||
TEST_NAME=obitag
|
||||
CMD=obitag
|
||||
|
||||
######
|
||||
#
|
||||
# Some variable and function definitions: please don't change them
|
||||
#
|
||||
######
|
||||
TEST_DIR="$(dirname "$(readlink -f "${BASH_SOURCE[0]}")")"
|
||||
OBITOOLS_DIR="${TEST_DIR/obitest*/}build"
|
||||
export PATH="${OBITOOLS_DIR}:${PATH}"
|
||||
|
||||
MCMD="$(echo "${CMD:0:4}" | tr '[:lower:]' '[:upper:]')$(echo "${CMD:4}" | tr '[:upper:]' '[:lower:]')"
|
||||
|
||||
TMPDIR="$(mktemp -d)"
|
||||
ntest=0
|
||||
success=0
|
||||
failed=0
|
||||
|
||||
cleanup() {
|
||||
echo "========================================" 1>&2
|
||||
echo "## Results of the $TEST_NAME tests:" 1>&2
|
||||
|
||||
echo 1>&2
|
||||
echo "- $ntest tests run" 1>&2
|
||||
echo "- $success successfully completed" 1>&2
|
||||
echo "- $failed failed tests" 1>&2
|
||||
echo 1>&2
|
||||
echo "Cleaning up the temporary directory..." 1>&2
|
||||
echo 1>&2
|
||||
echo "========================================" 1>&2
|
||||
|
||||
rm -rf "$TMPDIR" # Suppress the temporary directory
|
||||
|
||||
if [ $failed -gt 0 ]; then
|
||||
log "$TEST_NAME tests failed"
|
||||
log
|
||||
log
|
||||
exit 1
|
||||
fi
|
||||
|
||||
log
|
||||
log
|
||||
|
||||
exit 0
|
||||
}
|
||||
|
||||
log() {
|
||||
echo -e "[$TEST_NAME @ $(date)] $*" 1>&2
|
||||
}
|
||||
|
||||
log "Testing $TEST_NAME..."
|
||||
log "Test directory is $TEST_DIR"
|
||||
log "obitools directory is $OBITOOLS_DIR"
|
||||
log "Temporary directory is $TMPDIR"
|
||||
log "files: $(find $TEST_DIR | awk -F'/' '{print $NF}' | tail -n +2)"
|
||||
|
||||
######################################################################
|
||||
####
|
||||
#### Below are the tests
|
||||
####
|
||||
#### Before each test :
|
||||
#### - increment the variable ntest
|
||||
####
|
||||
#### Run the command as the condition of an if / then /else
|
||||
#### - The command must return 0 on success
|
||||
#### - The command must return an exit code different from 0 on failure
|
||||
#### - The datafiles are stored in the same directory than the test script
|
||||
#### - The test script directory is stored in the TEST_DIR variable
|
||||
#### - If result files have to be produced they must be stored
|
||||
#### in the temporary directory (TMPDIR variable)
|
||||
####
|
||||
#### then clause is executed on success of the command
|
||||
#### - Write a success message using the log function
|
||||
#### - increment the variable success
|
||||
####
|
||||
#### else clause is executed on failure of the command
|
||||
#### - Write a failure message using the log function
|
||||
#### - increment the variable failed
|
||||
####
|
||||
######################################################################
|
||||
|
||||
|
||||
|
||||
((ntest++))
|
||||
if $CMD -h > "${TMPDIR}/help.txt" 2>&1
|
||||
then
|
||||
log "$MCMD: printing help OK"
|
||||
((success++))
|
||||
else
|
||||
log "$MCMD: printing help failed"
|
||||
((failed++))
|
||||
fi
|
||||
|
||||
|
||||
#########################################
|
||||
#
|
||||
# At the end of the tests
|
||||
# the cleanup function is called
|
||||
#
|
||||
#########################################
|
||||
|
||||
cleanup
|
||||
109
obitests/obitools/obitagpcr/test.sh
Executable file
109
obitests/obitools/obitagpcr/test.sh
Executable file
@@ -0,0 +1,109 @@
|
||||
#!/bin/bash
|
||||
|
||||
#
|
||||
# Here give the name of the test serie
|
||||
#
|
||||
|
||||
TEST_NAME=obipcr
|
||||
CMD=obipcr
|
||||
|
||||
######
|
||||
#
|
||||
# Some variable and function definitions: please don't change them
|
||||
#
|
||||
######
|
||||
TEST_DIR="$(dirname "$(readlink -f "${BASH_SOURCE[0]}")")"
|
||||
OBITOOLS_DIR="${TEST_DIR/obitest*/}build"
|
||||
export PATH="${OBITOOLS_DIR}:${PATH}"
|
||||
|
||||
MCMD="$(echo "${CMD:0:4}" | tr '[:lower:]' '[:upper:]')$(echo "${CMD:4}" | tr '[:upper:]' '[:lower:]')"
|
||||
|
||||
TMPDIR="$(mktemp -d)"
|
||||
ntest=0
|
||||
success=0
|
||||
failed=0
|
||||
|
||||
cleanup() {
|
||||
echo "========================================" 1>&2
|
||||
echo "## Results of the $TEST_NAME tests:" 1>&2
|
||||
|
||||
echo 1>&2
|
||||
echo "- $ntest tests run" 1>&2
|
||||
echo "- $success successfully completed" 1>&2
|
||||
echo "- $failed failed tests" 1>&2
|
||||
echo 1>&2
|
||||
echo "Cleaning up the temporary directory..." 1>&2
|
||||
echo 1>&2
|
||||
echo "========================================" 1>&2
|
||||
|
||||
rm -rf "$TMPDIR" # Suppress the temporary directory
|
||||
|
||||
if [ $failed -gt 0 ]; then
|
||||
log "$TEST_NAME tests failed"
|
||||
log
|
||||
log
|
||||
exit 1
|
||||
fi
|
||||
|
||||
log
|
||||
log
|
||||
|
||||
exit 0
|
||||
}
|
||||
|
||||
log() {
|
||||
echo -e "[$TEST_NAME @ $(date)] $*" 1>&2
|
||||
}
|
||||
|
||||
log "Testing $TEST_NAME..."
|
||||
log "Test directory is $TEST_DIR"
|
||||
log "obitools directory is $OBITOOLS_DIR"
|
||||
log "Temporary directory is $TMPDIR"
|
||||
log "files: $(find $TEST_DIR | awk -F'/' '{print $NF}' | tail -n +2)"
|
||||
|
||||
######################################################################
|
||||
####
|
||||
#### Below are the tests
|
||||
####
|
||||
#### Before each test :
|
||||
#### - increment the variable ntest
|
||||
####
|
||||
#### Run the command as the condition of an if / then /else
|
||||
#### - The command must return 0 on success
|
||||
#### - The command must return an exit code different from 0 on failure
|
||||
#### - The datafiles are stored in the same directory than the test script
|
||||
#### - The test script directory is stored in the TEST_DIR variable
|
||||
#### - If result files have to be produced they must be stored
|
||||
#### in the temporary directory (TMPDIR variable)
|
||||
####
|
||||
#### then clause is executed on success of the command
|
||||
#### - Write a success message using the log function
|
||||
#### - increment the variable success
|
||||
####
|
||||
#### else clause is executed on failure of the command
|
||||
#### - Write a failure message using the log function
|
||||
#### - increment the variable failed
|
||||
####
|
||||
######################################################################
|
||||
|
||||
|
||||
|
||||
((ntest++))
|
||||
if $CMD -h > "${TMPDIR}/help.txt" 2>&1
|
||||
then
|
||||
log "$MCMD: printing help OK"
|
||||
((success++))
|
||||
else
|
||||
log "$MCMD: printing help failed"
|
||||
((failed++))
|
||||
fi
|
||||
|
||||
|
||||
#########################################
|
||||
#
|
||||
# At the end of the tests
|
||||
# the cleanup function is called
|
||||
#
|
||||
#########################################
|
||||
|
||||
cleanup
|
||||
109
obitests/obitools/obitaxonomy/test.sh
Executable file
109
obitests/obitools/obitaxonomy/test.sh
Executable file
@@ -0,0 +1,109 @@
|
||||
#!/bin/bash
|
||||
|
||||
#
|
||||
# Here give the name of the test serie
|
||||
#
|
||||
|
||||
TEST_NAME=obitaxonomy
|
||||
CMD=obitaxonomy
|
||||
|
||||
######
|
||||
#
|
||||
# Some variable and function definitions: please don't change them
|
||||
#
|
||||
######
|
||||
TEST_DIR="$(dirname "$(readlink -f "${BASH_SOURCE[0]}")")"
|
||||
OBITOOLS_DIR="${TEST_DIR/obitest*/}build"
|
||||
export PATH="${OBITOOLS_DIR}:${PATH}"
|
||||
|
||||
MCMD="$(echo "${CMD:0:4}" | tr '[:lower:]' '[:upper:]')$(echo "${CMD:4}" | tr '[:upper:]' '[:lower:]')"
|
||||
|
||||
TMPDIR="$(mktemp -d)"
|
||||
ntest=0
|
||||
success=0
|
||||
failed=0
|
||||
|
||||
cleanup() {
|
||||
echo "========================================" 1>&2
|
||||
echo "## Results of the $TEST_NAME tests:" 1>&2
|
||||
|
||||
echo 1>&2
|
||||
echo "- $ntest tests run" 1>&2
|
||||
echo "- $success successfully completed" 1>&2
|
||||
echo "- $failed failed tests" 1>&2
|
||||
echo 1>&2
|
||||
echo "Cleaning up the temporary directory..." 1>&2
|
||||
echo 1>&2
|
||||
echo "========================================" 1>&2
|
||||
|
||||
rm -rf "$TMPDIR" # Suppress the temporary directory
|
||||
|
||||
if [ $failed -gt 0 ]; then
|
||||
log "$TEST_NAME tests failed"
|
||||
log
|
||||
log
|
||||
exit 1
|
||||
fi
|
||||
|
||||
log
|
||||
log
|
||||
|
||||
exit 0
|
||||
}
|
||||
|
||||
log() {
|
||||
echo -e "[$TEST_NAME @ $(date)] $*" 1>&2
|
||||
}
|
||||
|
||||
log "Testing $TEST_NAME..."
|
||||
log "Test directory is $TEST_DIR"
|
||||
log "obitools directory is $OBITOOLS_DIR"
|
||||
log "Temporary directory is $TMPDIR"
|
||||
log "files: $(find $TEST_DIR | awk -F'/' '{print $NF}' | tail -n +2)"
|
||||
|
||||
######################################################################
|
||||
####
|
||||
#### Below are the tests
|
||||
####
|
||||
#### Before each test :
|
||||
#### - increment the variable ntest
|
||||
####
|
||||
#### Run the command as the condition of an if / then /else
|
||||
#### - The command must return 0 on success
|
||||
#### - The command must return an exit code different from 0 on failure
|
||||
#### - The datafiles are stored in the same directory than the test script
|
||||
#### - The test script directory is stored in the TEST_DIR variable
|
||||
#### - If result files have to be produced they must be stored
|
||||
#### in the temporary directory (TMPDIR variable)
|
||||
####
|
||||
#### then clause is executed on success of the command
|
||||
#### - Write a success message using the log function
|
||||
#### - increment the variable success
|
||||
####
|
||||
#### else clause is executed on failure of the command
|
||||
#### - Write a failure message using the log function
|
||||
#### - increment the variable failed
|
||||
####
|
||||
######################################################################
|
||||
|
||||
|
||||
|
||||
((ntest++))
|
||||
if $CMD -h > "${TMPDIR}/help.txt" 2>&1
|
||||
then
|
||||
log "$MCMD: printing help OK"
|
||||
((success++))
|
||||
else
|
||||
log "$MCMD: printing help failed"
|
||||
((failed++))
|
||||
fi
|
||||
|
||||
|
||||
#########################################
|
||||
#
|
||||
# At the end of the tests
|
||||
# the cleanup function is called
|
||||
#
|
||||
#########################################
|
||||
|
||||
cleanup
|
||||
205
obitests/obitools/obiuniq/test.sh
Executable file
205
obitests/obitools/obiuniq/test.sh
Executable file
@@ -0,0 +1,205 @@
|
||||
#!/bin/bash
|
||||
|
||||
#
|
||||
# Here give the name of the test serie
|
||||
#
|
||||
|
||||
TEST_NAME=obiuniq
|
||||
CMD=obiuniq
|
||||
|
||||
######
|
||||
#
|
||||
# Some variable and function definitions: please don't change them
|
||||
#
|
||||
######
|
||||
TEST_DIR="$(dirname "$(readlink -f "${BASH_SOURCE[0]}")")"
|
||||
OBITOOLS_DIR="${TEST_DIR/obitest*/}build"
|
||||
export PATH="${OBITOOLS_DIR}:${PATH}"
|
||||
|
||||
MCMD="$(echo "${CMD:0:4}" | tr '[:lower:]' '[:upper:]')$(echo "${CMD:4}" | tr '[:upper:]' '[:lower:]')"
|
||||
|
||||
TMPDIR="$(mktemp -d)"
|
||||
ntest=0
|
||||
success=0
|
||||
failed=0
|
||||
|
||||
cleanup() {
|
||||
echo "========================================" 1>&2
|
||||
echo "## Results of the $TEST_NAME tests:" 1>&2
|
||||
|
||||
echo 1>&2
|
||||
echo "- $ntest tests run" 1>&2
|
||||
echo "- $success successfully completed" 1>&2
|
||||
echo "- $failed failed tests" 1>&2
|
||||
echo 1>&2
|
||||
echo "Cleaning up the temporary directory..." 1>&2
|
||||
echo 1>&2
|
||||
echo "========================================" 1>&2
|
||||
|
||||
rm -rf "$TMPDIR" # Suppress the temporary directory
|
||||
|
||||
if [ $failed -gt 0 ]; then
|
||||
log "$TEST_NAME tests failed"
|
||||
log
|
||||
log
|
||||
exit 1
|
||||
fi
|
||||
|
||||
log
|
||||
log
|
||||
|
||||
exit 0
|
||||
}
|
||||
|
||||
log() {
|
||||
echo -e "[$TEST_NAME @ $(date)] $*" 1>&2
|
||||
}
|
||||
|
||||
log "Testing $TEST_NAME..."
|
||||
log "Test directory is $TEST_DIR"
|
||||
log "obitools directory is $OBITOOLS_DIR"
|
||||
log "Temporary directory is $TMPDIR"
|
||||
log "files: $(find $TEST_DIR | awk -F'/' '{print $NF}' | tail -n +2)"
|
||||
|
||||
######################################################################
|
||||
####
|
||||
#### Below are the tests
|
||||
####
|
||||
#### Before each test :
|
||||
#### - increment the variable ntest
|
||||
####
|
||||
#### Run the command as the condition of an if / then /else
|
||||
#### - The command must return 0 on success
|
||||
#### - The command must return an exit code different from 0 on failure
|
||||
#### - The datafiles are stored in the same directory than the test script
|
||||
#### - The test script directory is stored in the TEST_DIR variable
|
||||
#### - If result files have to be produced they must be stored
|
||||
#### in the temporary directory (TMPDIR variable)
|
||||
####
|
||||
#### then clause is executed on success of the command
|
||||
#### - Write a success message using the log function
|
||||
#### - increment the variable success
|
||||
####
|
||||
#### else clause is executed on failure of the command
|
||||
#### - Write a failure message using the log function
|
||||
#### - increment the variable failed
|
||||
####
|
||||
######################################################################
|
||||
|
||||
|
||||
|
||||
((ntest++))
|
||||
if $CMD -h > "${TMPDIR}/help.txt" 2>&1
|
||||
then
|
||||
log "$MCMD: printing help OK"
|
||||
((success++))
|
||||
else
|
||||
log "$MCMD: printing help failed"
|
||||
((failed++))
|
||||
fi
|
||||
|
||||
((ntest++))
|
||||
if obiuniq "${TEST_DIR}/touniq.fasta" \
|
||||
> "${TMPDIR}/touniq_u.fasta"
|
||||
then
|
||||
log "OBIUniq simple: running OK"
|
||||
((success++))
|
||||
else
|
||||
log "OBIUniq simple: running failed"
|
||||
((failed++))
|
||||
fi
|
||||
|
||||
obicsv -s --auto ${TEST_DIR}/touniq_u.fasta \
|
||||
| tail -n +2 \
|
||||
| sort \
|
||||
> "${TMPDIR}/touniq_u_ref.csv"
|
||||
|
||||
obicsv -s --auto ${TMPDIR}/touniq_u.fasta \
|
||||
| tail -n +2 \
|
||||
| sort \
|
||||
> "${TMPDIR}/touniq_u.csv"
|
||||
|
||||
((ntest++))
|
||||
if diff "${TMPDIR}/touniq_u_ref.csv" \
|
||||
"${TMPDIR}/touniq_u.csv" > /dev/null
|
||||
then
|
||||
log "OBIUniq simple: result OK"
|
||||
((success++))
|
||||
else
|
||||
log "OBIUniq simple: result failed"
|
||||
((failed++))
|
||||
fi
|
||||
|
||||
((ntest++))
|
||||
if obiuniq -c a "${TEST_DIR}/touniq.fasta" \
|
||||
> "${TMPDIR}/touniq_u_a.fasta"
|
||||
then
|
||||
log "OBIUniq one category: running OK"
|
||||
((success++))
|
||||
else
|
||||
log "OBIUniq one category: running failed"
|
||||
((failed++))
|
||||
fi
|
||||
|
||||
obicsv -s --auto ${TEST_DIR}/touniq_u_a.fasta \
|
||||
| tail -n +2 \
|
||||
| sort \
|
||||
> "${TMPDIR}/touniq_u_a_ref.csv"
|
||||
|
||||
obicsv -s --auto ${TMPDIR}/touniq_u_a.fasta \
|
||||
| tail -n +2 \
|
||||
| sort \
|
||||
> "${TMPDIR}/touniq_u_a.csv"
|
||||
|
||||
|
||||
((ntest++))
|
||||
if diff "${TMPDIR}/touniq_u_a_ref.csv" \
|
||||
"${TMPDIR}/touniq_u_a.csv" > /dev/null
|
||||
then
|
||||
log "OBIUniq one category: result OK"
|
||||
((success++))
|
||||
else
|
||||
log "OBIUniq one category: result failed"
|
||||
((failed++))
|
||||
fi
|
||||
|
||||
((ntest++))
|
||||
if obiuniq -c a -c b "${TEST_DIR}/touniq.fasta" \
|
||||
> "${TMPDIR}/touniq_u_a_b.fasta"
|
||||
then
|
||||
log "OBIUniq two categories: running OK"
|
||||
((success++))
|
||||
else
|
||||
log "OBIUniq two categories: running failed"
|
||||
((failed++))
|
||||
fi
|
||||
|
||||
obicsv -s --auto ${TEST_DIR}/touniq_u_a_b.fasta \
|
||||
| tail -n +2 \
|
||||
| sort \
|
||||
> "${TMPDIR}/touniq_u_a_b_ref.csv"
|
||||
|
||||
obicsv -s --auto ${TMPDIR}/touniq_u_a_b.fasta \
|
||||
| tail -n +2 \
|
||||
| sort \
|
||||
> "${TMPDIR}/touniq_u_a_b.csv"
|
||||
|
||||
((ntest++))
|
||||
if diff "${TMPDIR}/touniq_u_a_b_ref.csv" \
|
||||
"${TMPDIR}/touniq_u_a_b.csv" > /dev/null
|
||||
then
|
||||
log "OBIUniq two categories: result OK"
|
||||
((success++))
|
||||
else
|
||||
log "OBIUniq two categories: result failed"
|
||||
((failed++))
|
||||
fi
|
||||
|
||||
#########################################
|
||||
#
|
||||
# At the end of the tests
|
||||
# the cleanup function is called
|
||||
#
|
||||
#########################################
|
||||
|
||||
cleanup
|
||||
16
obitests/obitools/obiuniq/touniq.fasta
Normal file
16
obitests/obitools/obiuniq/touniq.fasta
Normal file
@@ -0,0 +1,16 @@
|
||||
>seq1 {"a":2, "b":4,"c":5}
|
||||
aaacccgggttt
|
||||
>seq2 {"a":3, "b":4,"c":5}
|
||||
aaacccgggttt
|
||||
>seq3 {"a":3, "b":5,"c":5}
|
||||
aaacccgggttt
|
||||
>seq4 {"a":3, "b":5,"c":6}
|
||||
aaacccgggttt
|
||||
>seq5 {"a":2, "b":4,"c":5}
|
||||
aaacccgggtttca
|
||||
>seq6 {"a":3, "b":4,"c":5}
|
||||
aaacccgggtttca
|
||||
>seq7 {"a":3, "b":5,"c":5}
|
||||
aaacccgggtttca
|
||||
>seq8 {"a":3, "b":5,"c":6}
|
||||
aaacccgggtttca
|
||||
4
obitests/obitools/obiuniq/touniq_u.fasta
Normal file
4
obitests/obitools/obiuniq/touniq_u.fasta
Normal file
@@ -0,0 +1,4 @@
|
||||
>seq5 {"count":4}
|
||||
aaacccgggtttca
|
||||
>seq1 {"count":4}
|
||||
aaacccgggttt
|
||||
8
obitests/obitools/obiuniq/touniq_u_a.fasta
Normal file
8
obitests/obitools/obiuniq/touniq_u_a.fasta
Normal file
@@ -0,0 +1,8 @@
|
||||
>seq5 {"a":2,"b":4,"c":5,"count":1}
|
||||
aaacccgggtttca
|
||||
>seq6 {"a":3,"count":3}
|
||||
aaacccgggtttca
|
||||
>seq1 {"a":2,"b":4,"c":5,"count":1}
|
||||
aaacccgggttt
|
||||
>seq2 {"a":3,"count":3}
|
||||
aaacccgggttt
|
||||
12
obitests/obitools/obiuniq/touniq_u_a_b.fasta
Normal file
12
obitests/obitools/obiuniq/touniq_u_a_b.fasta
Normal file
@@ -0,0 +1,12 @@
|
||||
>seq5 {"a":2,"b":4,"c":5,"count":1}
|
||||
aaacccgggtttca
|
||||
>seq6 {"a":3,"b":4,"c":5,"count":1}
|
||||
aaacccgggtttca
|
||||
>seq7 {"a":3,"b":5,"count":2}
|
||||
aaacccgggtttca
|
||||
>seq1 {"a":2,"b":4,"c":5,"count":1}
|
||||
aaacccgggttt
|
||||
>seq2 {"a":3,"b":4,"c":5,"count":1}
|
||||
aaacccgggttt
|
||||
>seq3 {"a":3,"b":5,"count":2}
|
||||
aaacccgggttt
|
||||
@@ -169,7 +169,7 @@ func BuildQualityConsensus(seqA, seqB *obiseq.BioSequence, path []int, statOnMis
|
||||
|
||||
right = len(*bufferQA) - right
|
||||
|
||||
// log.Warnf("BuildQualityConsensus: left = %d right = %d\n", left, right)
|
||||
// obilog.Warnf("BuildQualityConsensus: left = %d right = %d\n", left, right)
|
||||
|
||||
for i, qA = range *bufferQA {
|
||||
nA := (*bufferSA)[i]
|
||||
|
||||
@@ -117,7 +117,7 @@ func _MatchScoreRatio(QF, QR byte) (float64, float64) {
|
||||
term1 := _Logaddexp(qF, qR)
|
||||
term2 := _Logdiffexp(term1, qF+qR)
|
||||
|
||||
// log.Warnf("MatchScoreRatio: %v, %v , %v, %v", QF, QR, term1, term2)
|
||||
// obilog.Warnf("MatchScoreRatio: %v, %v , %v, %v", QF, QR, term1, term2)
|
||||
|
||||
match_logp := _Log1mexp(term2 + l3 - l4)
|
||||
match_score := match_logp - _Log1mexp(match_logp)
|
||||
|
||||
@@ -4,33 +4,6 @@ import (
|
||||
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obiseq"
|
||||
)
|
||||
|
||||
var _iupac = [26]byte{
|
||||
// a b c d e f
|
||||
1, 14, 2, 13, 0, 0,
|
||||
// g h i j k l
|
||||
4, 11, 0, 0, 12, 0,
|
||||
// m n o p q r
|
||||
3, 15, 0, 0, 0, 5,
|
||||
// s t u v w x
|
||||
6, 8, 8, 13, 9, 0,
|
||||
// y z
|
||||
10, 0,
|
||||
}
|
||||
|
||||
func _samenuc(a, b byte) bool {
|
||||
if (a >= 'A') && (a <= 'Z') {
|
||||
a |= 32
|
||||
}
|
||||
if (b >= 'A') && (b <= 'Z') {
|
||||
b |= 32
|
||||
}
|
||||
|
||||
if (a >= 'a') && (a <= 'z') && (b >= 'a') && (b <= 'z') {
|
||||
return (_iupac[a-'a'] & _iupac[b-'a']) > 0
|
||||
}
|
||||
return a == b
|
||||
}
|
||||
|
||||
// FastLCSEGFScoreByte calculates the score of the Longest Common Subsequence (LCS) between two byte slices.
|
||||
//
|
||||
// The score is calculated using the following scoring matrix:
|
||||
@@ -165,7 +138,7 @@ func FastLCSEGFScoreByte(bA, bB []byte, maxError int, endgapfree bool, buffer *[
|
||||
default:
|
||||
// We are in the middle of the matrix
|
||||
Sdiag = _incpath(previous[x])
|
||||
if _samenuc(bA[j-1], bB[i-1]) {
|
||||
if obiseq.SameIUPACNuc(bA[j-1], bB[i-1]) {
|
||||
Sdiag = _incscore(Sdiag)
|
||||
}
|
||||
|
||||
@@ -265,7 +238,7 @@ func FastLCSEGFScoreByte(bA, bB []byte, maxError int, endgapfree bool, buffer *[
|
||||
Sleft = _notavail
|
||||
default:
|
||||
Sdiag = _incpath(previous[x])
|
||||
if _samenuc(bA[j-1], bB[i-1]) {
|
||||
if obiseq.SameIUPACNuc(bA[j-1], bB[i-1]) {
|
||||
Sdiag = _incscore(Sdiag)
|
||||
}
|
||||
|
||||
|
||||
@@ -1,6 +1,9 @@
|
||||
package obialign
|
||||
|
||||
import log "github.com/sirupsen/logrus"
|
||||
import (
|
||||
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obiseq"
|
||||
log "github.com/sirupsen/logrus"
|
||||
)
|
||||
|
||||
// buffIndex converts a pair of coordinates (i, j) into a linear index in a matrix
|
||||
// of size width x width. The coordinates are (-1)-indexed, and the linear index
|
||||
@@ -69,7 +72,7 @@ func LocatePattern(id string, pattern, sequence []byte) (int, int, int) {
|
||||
// Mismatch score = -1
|
||||
// Match score = 0
|
||||
match := -1
|
||||
if _samenuc(pattern[j], sequence[i]) {
|
||||
if obiseq.SameIUPACNuc(pattern[j], sequence[i]) {
|
||||
match = 0
|
||||
}
|
||||
|
||||
@@ -103,7 +106,7 @@ func LocatePattern(id string, pattern, sequence []byte) (int, int, int) {
|
||||
// Mismatch score = -1
|
||||
// Match score = 0
|
||||
match := -1
|
||||
if _samenuc(pattern[jmax], sequence[i]) {
|
||||
if obiseq.SameIUPACNuc(pattern[jmax], sequence[i]) {
|
||||
match = 0
|
||||
}
|
||||
|
||||
@@ -152,7 +155,7 @@ func LocatePattern(id string, pattern, sequence []byte) (int, int, int) {
|
||||
|
||||
}
|
||||
|
||||
// log.Warnf("from : %d to: %d error: %d match: %v",
|
||||
// obilog.Warnf("from : %d to: %d error: %d match: %v",
|
||||
// i, end+1, -buffer[buffIndex(len(sequence)-1, len(pattern)-1, width)],
|
||||
// string(sequence[i:(end+1)]))
|
||||
return i, end + 1, -buffer[buffIndex(len(sequence)-1, len(pattern)-1, width)]
|
||||
|
||||
@@ -53,10 +53,10 @@ func ReadAlign(seqA, seqB *obiseq.BioSequence,
|
||||
over = min(seqA.Len(), seqB.Len())
|
||||
}
|
||||
|
||||
// log.Warnf("fw/fw: %v shift=%d fastCount=%d/over=%d fastScore=%f",
|
||||
// obilog.Warnf("fw/fw: %v shift=%d fastCount=%d/over=%d fastScore=%f",
|
||||
// directAlignment, shift, fastCount, over, fastScore)
|
||||
|
||||
// log.Warnf(("seqA: %s\nseqB: %s\n"), seqA.String(), seqB.String())
|
||||
// obilog.Warnf(("seqA: %s\nseqB: %s\n"), seqA.String(), seqB.String())
|
||||
|
||||
// At least one mismatch exists in the overlaping region
|
||||
if fastCount+3 < over {
|
||||
|
||||
14
pkg/obiapat/obiapat.c
Normal file → Executable file
14
pkg/obiapat/obiapat.c
Normal file → Executable file
@@ -149,9 +149,9 @@ char *LowerSequence(char *seq)
|
||||
char *cseq;
|
||||
|
||||
for (cseq = seq ; *cseq ; cseq++)
|
||||
if (IS_UPPER(*cseq))
|
||||
if (IS_UPPER(*cseq)) {
|
||||
*cseq = TO_LOWER(*cseq);
|
||||
|
||||
}
|
||||
return seq;
|
||||
}
|
||||
|
||||
@@ -299,14 +299,14 @@ int32_t delete_apatseq(SeqPtr pseq,
|
||||
return 1;
|
||||
}
|
||||
|
||||
PatternPtr buildPattern(const char *pat, int32_t error_max, uint8_t hasIndel,
|
||||
Pattern *buildPattern(const char *pat, int32_t error_max, uint8_t hasIndel,
|
||||
int *errno, char **errmsg)
|
||||
{
|
||||
PatternPtr pattern;
|
||||
Pattern *pattern;
|
||||
int32_t patlen;
|
||||
int32_t patlen2;
|
||||
|
||||
patlen = strlen(pat);
|
||||
patlen = (int32_t)strlen(pat);
|
||||
patlen2 = lenPattern(pat);
|
||||
|
||||
pattern = ECOMALLOC(sizeof(Pattern) + // Space for struct Pattern
|
||||
@@ -341,10 +341,10 @@ PatternPtr buildPattern(const char *pat, int32_t error_max, uint8_t hasIndel,
|
||||
|
||||
}
|
||||
|
||||
PatternPtr complementPattern(PatternPtr pat, int *errno,
|
||||
Pattern *complementPattern(Pattern *pat, int *errno,
|
||||
char **errmsg)
|
||||
{
|
||||
PatternPtr pattern;
|
||||
Pattern *pattern;
|
||||
|
||||
pattern = ECOMALLOC(sizeof(Pattern) +
|
||||
sizeof(char) * strlen(pat->cpat) + 1 +
|
||||
|
||||
10
pkg/obiapat/obiapat.h
Normal file → Executable file
10
pkg/obiapat/obiapat.h
Normal file → Executable file
@@ -116,13 +116,13 @@ ecoseq_t *new_ecoseq_with_data( char *AC,
|
||||
|
||||
|
||||
|
||||
int32_t delete_apatseq(SeqPtr pseq,
|
||||
int32_t delete_apatseq(Seq *pseq,
|
||||
int *errno, char **errmsg);
|
||||
PatternPtr buildPattern(const char *pat, int32_t error_max, uint8_t hasIndel, int *errno, char **errmsg);
|
||||
PatternPtr complementPattern(PatternPtr pat, int *errno, char **errmsg);
|
||||
Pattern *buildPattern(const char *pat, int32_t error_max, uint8_t hasIndel, int *errno, char **errmsg);
|
||||
Pattern *complementPattern(Pattern *pat, int *errno, char **errmsg);
|
||||
|
||||
SeqPtr new_apatseq(const char *in,int32_t circular, int32_t seqlen,
|
||||
SeqPtr out,
|
||||
Seq *new_apatseq(const char *in,int32_t circular, int32_t seqlen,
|
||||
Seq *out,
|
||||
int *errno, char **errmsg);
|
||||
|
||||
char *ecoComplementPattern(char *nucAcSeq);
|
||||
|
||||
13
pkg/obiapat/pattern.go
Normal file → Executable file
13
pkg/obiapat/pattern.go
Normal file → Executable file
@@ -26,7 +26,7 @@ var _AllocatedApaPattern = 0
|
||||
// ApatPattern stores a regular pattern usable by the
|
||||
// Apat algorithm functions and methods
|
||||
type _ApatPattern struct {
|
||||
pointer *C.Pattern
|
||||
pointer C.PatternPtr
|
||||
pattern string
|
||||
}
|
||||
|
||||
@@ -72,6 +72,7 @@ var NilApatSequence = ApatSequence{nil}
|
||||
//
|
||||
// Returns an ApatPattern object and an error if the pattern is invalid.
|
||||
func MakeApatPattern(pattern string, errormax int, allowsIndel bool) (ApatPattern, error) {
|
||||
|
||||
cpattern := C.CString(pattern)
|
||||
defer C.free(unsafe.Pointer(cpattern))
|
||||
cerrormax := C.int32_t(errormax)
|
||||
@@ -159,7 +160,7 @@ func (pattern ApatPattern) Free() {
|
||||
// Print method prints the ApatPattern to the standard output.
|
||||
// This is mainly a debug method.
|
||||
func (pattern ApatPattern) Print() {
|
||||
C.PrintDebugPattern(C.PatternPtr(pattern.pointer.pointer))
|
||||
C.PrintDebugPattern((*C.Pattern)(pattern.pointer.pointer))
|
||||
}
|
||||
|
||||
// MakeApatSequence casts an obiseq.BioSequence to an ApatSequence.
|
||||
@@ -410,8 +411,8 @@ func (pattern ApatPattern) FilterBestMatch(sequence ApatSequence, begin, length
|
||||
|
||||
best := [3]int{0, 0, 10000}
|
||||
for _, m := range res {
|
||||
// log.Warnf("Current : Begin : %d End : %d Err : %d", m[0], m[1], m[2])
|
||||
// log.Warnf("Best : Begin : %d End : %d Err : %d", best[0], best[1], best[2])
|
||||
// obilog.Warnf("Current : Begin : %d End : %d Err : %d", m[0], m[1], m[2])
|
||||
// obilog.Warnf("Best : Begin : %d End : %d Err : %d", best[0], best[1], best[2])
|
||||
if (m[0] - m[2]) < best[1]+best[2] {
|
||||
// match are overlapping
|
||||
// log.Warnln("overlap")
|
||||
@@ -467,7 +468,7 @@ func (pattern ApatPattern) AllMatches(sequence ApatSequence, begin, length int)
|
||||
// Recompute the start and end position of the match
|
||||
// when the pattern allows for indels
|
||||
if m[2] > 0 && pattern.pointer.pointer.hasIndel {
|
||||
// log.Warnf("Locating indel on sequence %s[%s]", sequence.pointer.reference.Id(), pattern.String())
|
||||
// obilog.Warnf("Locating indel on sequence %s[%s]", sequence.pointer.reference.Id(), pattern.String())
|
||||
start := m[0] - m[2]*2
|
||||
start = max(start, 0)
|
||||
end := start + int(pattern.pointer.pointer.patlen) + 4*m[2]
|
||||
@@ -489,7 +490,7 @@ func (pattern ApatPattern) AllMatches(sequence ApatSequence, begin, length int)
|
||||
m[0] = start + pb
|
||||
m[1] = start + pe
|
||||
|
||||
// log.Warnf("seq[%d@%d:%d] %d: %s %d - %s:%s:%s", i, m[0], m[1], olderr, sequence.pointer.reference.Id(), score,
|
||||
// obilog.Warnf("seq[%d@%d:%d] %d: %s %d - %s:%s:%s", i, m[0], m[1], olderr, sequence.pointer.reference.Id(), score,
|
||||
// frg, (*cpattern)[0:int(pattern.pointer.pointer.patlen)], sequence.pointer.reference.Sequence()[m[0]:m[1]])
|
||||
}
|
||||
|
||||
|
||||
22
pkg/obichunk/chunk.go
Normal file
22
pkg/obichunk/chunk.go
Normal file
@@ -0,0 +1,22 @@
|
||||
package obichunk
|
||||
|
||||
import (
|
||||
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obiiter"
|
||||
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obiseq"
|
||||
)
|
||||
|
||||
func ISequenceChunk(iterator obiiter.IBioSequence,
|
||||
classifier *obiseq.BioSequenceClassifier,
|
||||
onMemory bool,
|
||||
dereplicate bool,
|
||||
na string,
|
||||
statsOn obiseq.StatsOnDescriptions,
|
||||
uniqueClassifier *obiseq.BioSequenceClassifier,
|
||||
) (obiiter.IBioSequence, error) {
|
||||
|
||||
if onMemory {
|
||||
return ISequenceChunkOnMemory(iterator, classifier)
|
||||
} else {
|
||||
return ISequenceChunkOnDisk(iterator, classifier, dereplicate, na, statsOn, uniqueClassifier)
|
||||
}
|
||||
}
|
||||
@@ -10,6 +10,7 @@ import (
|
||||
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obiformats"
|
||||
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obiiter"
|
||||
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obiseq"
|
||||
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obiutils"
|
||||
)
|
||||
|
||||
// tempDir creates a temporary directory with a prefix "obiseq_chunks_"
|
||||
@@ -73,7 +74,13 @@ func find(root, ext string) []string {
|
||||
// is removed. The function logs the number of batches created and the processing
|
||||
// status of each batch.
|
||||
func ISequenceChunkOnDisk(iterator obiiter.IBioSequence,
|
||||
classifier *obiseq.BioSequenceClassifier) (obiiter.IBioSequence, error) {
|
||||
classifier *obiseq.BioSequenceClassifier,
|
||||
dereplicate bool,
|
||||
na string,
|
||||
statsOn obiseq.StatsOnDescriptions,
|
||||
uniqueClassifier *obiseq.BioSequenceClassifier,
|
||||
) (obiiter.IBioSequence, error) {
|
||||
obiutils.RegisterAPipe()
|
||||
dir, err := tempDir()
|
||||
if err != nil {
|
||||
return obiiter.NilIBioSequence, err
|
||||
@@ -86,7 +93,7 @@ func ISequenceChunkOnDisk(iterator obiiter.IBioSequence,
|
||||
go func() {
|
||||
defer func() {
|
||||
os.RemoveAll(dir)
|
||||
log.Debugln("Clear the cache directory")
|
||||
obiutils.UnregisterPipe()
|
||||
}()
|
||||
|
||||
newIter.Wait()
|
||||
@@ -111,11 +118,45 @@ func ISequenceChunkOnDisk(iterator obiiter.IBioSequence,
|
||||
panic(err)
|
||||
}
|
||||
|
||||
if dereplicate {
|
||||
u := make(map[string]*obiseq.BioSequence)
|
||||
var source string
|
||||
uniqueClassifier.Reset()
|
||||
|
||||
for iseq.Next() {
|
||||
batch := iseq.Get()
|
||||
source = batch.Source()
|
||||
|
||||
for _, seq := range batch.Slice() {
|
||||
// Use composite key: sequence + categories
|
||||
code := uniqueClassifier.Code(seq)
|
||||
key := uniqueClassifier.Value(code)
|
||||
prev, ok := u[key]
|
||||
if ok {
|
||||
prev.Merge(seq, na, true, statsOn)
|
||||
} else {
|
||||
u[key] = seq
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
chunk := obiseq.MakeBioSequenceSlice(len(u))
|
||||
i := 0
|
||||
|
||||
for _, seq := range u {
|
||||
chunk[i] = seq
|
||||
i++
|
||||
}
|
||||
|
||||
newIter.Push(obiiter.MakeBioSequenceBatch(source, order, chunk))
|
||||
|
||||
} else {
|
||||
source, chunk := iseq.Load()
|
||||
|
||||
newIter.Push(obiiter.MakeBioSequenceBatch(source, order, chunk))
|
||||
log.Infof("Start processing of batch %d/%d : %d sequences",
|
||||
order, nbatch, len(chunk))
|
||||
order+1, nbatch, len(chunk))
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
|
||||
@@ -9,7 +9,20 @@ import (
|
||||
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obiseq"
|
||||
)
|
||||
|
||||
func ISequenceChunk(iterator obiiter.IBioSequence,
|
||||
// ISequenceChunkOnMemory processes a sequence iterator by distributing the sequences
|
||||
// into chunks in memory. It uses a classifier to determine how to distribute
|
||||
// the sequences and returns a new iterator for the processed sequences.
|
||||
//
|
||||
// Parameters:
|
||||
// - iterator: An iterator of biosequences to be processed.
|
||||
// - classifier: A pointer to a BioSequenceClassifier used to classify the sequences
|
||||
// during distribution.
|
||||
//
|
||||
// Returns:
|
||||
// An iterator of biosequences representing the processed chunks.
|
||||
//
|
||||
// The function operates asynchronously.
|
||||
func ISequenceChunkOnMemory(iterator obiiter.IBioSequence,
|
||||
classifier *obiseq.BioSequenceClassifier) (obiiter.IBioSequence, error) {
|
||||
|
||||
newIter := obiiter.MakeIBioSequence()
|
||||
@@ -25,18 +25,35 @@ func IUniqueSequence(iterator obiiter.IBioSequence,
|
||||
|
||||
log.Infoln("Starting data splitting")
|
||||
|
||||
cat := opts.Categories()
|
||||
na := opts.NAValue()
|
||||
|
||||
// Classifier for bucketing: Hash only to control number of chunks
|
||||
bucketClassifier := obiseq.HashClassifier(opts.BatchCount())
|
||||
|
||||
// Classifier for uniqueness: Sequence + categories
|
||||
var uniqueClassifier *obiseq.BioSequenceClassifier
|
||||
if len(cat) > 0 {
|
||||
cls := make([]*obiseq.BioSequenceClassifier, len(cat)+1)
|
||||
cls[0] = obiseq.SequenceClassifier()
|
||||
for i, c := range cat {
|
||||
cls[i+1] = obiseq.AnnotationClassifier(c, na)
|
||||
}
|
||||
uniqueClassifier = obiseq.CompositeClassifier(cls...)
|
||||
} else {
|
||||
uniqueClassifier = obiseq.SequenceClassifier()
|
||||
}
|
||||
|
||||
if opts.SortOnDisk() {
|
||||
nworkers = 1
|
||||
iterator, err = ISequenceChunkOnDisk(iterator,
|
||||
obiseq.HashClassifier(opts.BatchCount()))
|
||||
iterator, err = ISequenceChunkOnDisk(iterator, bucketClassifier, true, na, opts.StatsOn(), uniqueClassifier)
|
||||
|
||||
if err != nil {
|
||||
return obiiter.NilIBioSequence, err
|
||||
}
|
||||
|
||||
} else {
|
||||
iterator, err = ISequenceChunk(iterator,
|
||||
obiseq.HashClassifier(opts.BatchCount()))
|
||||
iterator, err = ISequenceChunkOnMemory(iterator, bucketClassifier)
|
||||
|
||||
if err != nil {
|
||||
return obiiter.NilIBioSequence, err
|
||||
@@ -63,63 +80,25 @@ func IUniqueSequence(iterator obiiter.IBioSequence,
|
||||
return neworder
|
||||
}
|
||||
|
||||
var ff func(obiiter.IBioSequence,
|
||||
*obiseq.BioSequenceClassifier,
|
||||
int)
|
||||
|
||||
cat := opts.Categories()
|
||||
na := opts.NAValue()
|
||||
|
||||
ff = func(input obiiter.IBioSequence,
|
||||
classifier *obiseq.BioSequenceClassifier,
|
||||
icat int) {
|
||||
icat--
|
||||
ff := func(input obiiter.IBioSequence,
|
||||
classifier *obiseq.BioSequenceClassifier) {
|
||||
input, err = ISequenceSubChunk(input,
|
||||
classifier,
|
||||
1)
|
||||
|
||||
var next obiiter.IBioSequence
|
||||
if icat >= 0 {
|
||||
next = obiiter.MakeIBioSequence()
|
||||
|
||||
iUnique.Add(1)
|
||||
|
||||
go ff(next,
|
||||
obiseq.AnnotationClassifier(cat[icat], na),
|
||||
icat)
|
||||
}
|
||||
|
||||
o := 0
|
||||
for input.Next() {
|
||||
batch := input.Get()
|
||||
|
||||
if icat < 0 || len(batch.Slice()) == 1 {
|
||||
// No more sub classification of sequence or only a single sequence
|
||||
if !(opts.NoSingleton() && len(batch.Slice()) == 1 && batch.Slice()[0].Count() == 1) {
|
||||
iUnique.Push(batch.Reorder(nextOrder()))
|
||||
}
|
||||
} else {
|
||||
// A new step of classification must du realized
|
||||
next.Push(batch.Reorder(o))
|
||||
o++
|
||||
}
|
||||
}
|
||||
|
||||
if icat >= 0 {
|
||||
next.Close()
|
||||
}
|
||||
|
||||
iUnique.Done()
|
||||
}
|
||||
|
||||
for i := 0; i < nworkers-1; i++ {
|
||||
go ff(iterator.Split(),
|
||||
obiseq.SequenceClassifier(),
|
||||
len(cat))
|
||||
go ff(iterator.Split(), uniqueClassifier.Clone())
|
||||
}
|
||||
go ff(iterator,
|
||||
obiseq.SequenceClassifier(),
|
||||
len(cat))
|
||||
go ff(iterator, uniqueClassifier)
|
||||
|
||||
iMerged := iUnique.IMergeSequenceBatch(opts.NAValue(),
|
||||
opts.StatsOn(),
|
||||
|
||||
@@ -2,21 +2,75 @@ package obicorazick
|
||||
|
||||
import (
|
||||
log "github.com/sirupsen/logrus"
|
||||
|
||||
"sync"
|
||||
"os"
|
||||
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obiseq"
|
||||
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obidefault"
|
||||
"github.com/rrethy/ahocorasick"
|
||||
"github.com/schollz/progressbar/v3"
|
||||
)
|
||||
|
||||
func AhoCorazickWorker(slot string, patterns []string) obiseq.SeqWorker {
|
||||
|
||||
matcher := ahocorasick.CompileStrings(patterns)
|
||||
sizebatch:=10000000
|
||||
nmatcher := len(patterns) / sizebatch + 1
|
||||
log.Infof("Building AhoCorasick %d matcher for %d patterns in slot %s",
|
||||
nmatcher, len(patterns), slot)
|
||||
|
||||
if nmatcher == 0 {
|
||||
log.Errorln("No patterns provided")
|
||||
}
|
||||
|
||||
matchers := make([]*ahocorasick.Matcher, nmatcher)
|
||||
ieme := make(chan int)
|
||||
mutex := &sync.WaitGroup{}
|
||||
npar := min(obidefault.ParallelWorkers(), nmatcher)
|
||||
mutex.Add(npar)
|
||||
|
||||
pbopt := make([]progressbar.Option, 0, 5)
|
||||
pbopt = append(pbopt,
|
||||
progressbar.OptionSetWriter(os.Stderr),
|
||||
progressbar.OptionSetWidth(15),
|
||||
progressbar.OptionShowCount(),
|
||||
progressbar.OptionShowIts(),
|
||||
progressbar.OptionSetDescription("Building AhoCorasick matcher..."),
|
||||
)
|
||||
|
||||
bar := progressbar.NewOptions(nmatcher, pbopt...)
|
||||
bar.Add(0)
|
||||
|
||||
builder := func() {
|
||||
for i := range ieme {
|
||||
matchers[i] = ahocorasick.CompileStrings(patterns[i*sizebatch:min((i+1)*sizebatch,len(patterns))])
|
||||
bar.Add(1)
|
||||
}
|
||||
mutex.Done()
|
||||
}
|
||||
|
||||
for i := 0; i < npar; i++ {
|
||||
go builder()
|
||||
}
|
||||
|
||||
for i := 0; i < nmatcher; i++ {
|
||||
ieme <- i
|
||||
}
|
||||
|
||||
close(ieme)
|
||||
mutex.Wait()
|
||||
|
||||
fslot := slot + "_Fwd"
|
||||
rslot := slot + "_Rev"
|
||||
|
||||
f := func(s *obiseq.BioSequence) (obiseq.BioSequenceSlice, error) {
|
||||
matchesF := len(matcher.FindAllByteSlice(s.Sequence()))
|
||||
matchesR := len(matcher.FindAllByteSlice(s.ReverseComplement(false).Sequence()))
|
||||
matchesF := 0
|
||||
matchesR := 0
|
||||
b := s.Sequence()
|
||||
bc := s.ReverseComplement(false).Sequence()
|
||||
|
||||
for _, matcher := range matchers {
|
||||
matchesF += len(matcher.FindAllByteSlice(b))
|
||||
matchesR += len(matcher.FindAllByteSlice(bc))
|
||||
}
|
||||
|
||||
log.Debugln("Macthes = ", matchesF, matchesR)
|
||||
matches := matchesF + matchesR
|
||||
|
||||
@@ -1,15 +1,15 @@
|
||||
package obidefault
|
||||
|
||||
var __compressed__ = false
|
||||
var __compress__ = false
|
||||
|
||||
func CompressOutput() bool {
|
||||
return __compressed__
|
||||
return __compress__
|
||||
}
|
||||
|
||||
func SetCompressOutput(b bool) {
|
||||
__compressed__ = b
|
||||
__compress__ = b
|
||||
}
|
||||
|
||||
func CompressedPtr() *bool {
|
||||
return &__compressed__
|
||||
func CompressOutputPtr() *bool {
|
||||
return &__compress__
|
||||
}
|
||||
|
||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user