mirror of
https://github.com/metabarcoding/obitools4.git
synced 2026-05-02 04:50:40 +00:00
Compare commits
49 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
| 6c4a6c697c | |||
| 60b3753673 | |||
| 14e2840a2d | |||
| 42910c7db9 | |||
| 8b4cf677c6 | |||
| 02765f154f | |||
| 449544bd63 | |||
| 434d2e5930 | |||
| 7cb02ded69 | |||
| 6d469bd711 | |||
| 3d8e4a3a4e | |||
| 07d04a6967 | |||
| 03f251c365 | |||
| 5714fa6cd3 | |||
| f101625771 | |||
| 4359b52eaf | |||
| da0c8b6f28 | |||
| 841e5c9e2a | |||
| e298daeef9 | |||
| d9e6f67a6e | |||
| f036c7fa96 | |||
| e33665e716 | |||
| c955a614ca | |||
| f19065261e | |||
| 3e349e92e1 | |||
| a4ce24a418 | |||
| 960ad1531d | |||
| 137f49d1d1 | |||
| 083a92e13d | |||
| 67683435e8 | |||
| f32b29db4f | |||
| 10f49fe64b | |||
| d257917748 | |||
| fec078c04c | |||
| a92393dd51 | |||
| 7e76698490 | |||
| 64b0b32f61 | |||
| c8e6a218cb | |||
| 8c7017a99d | |||
| c7816973a6 | |||
| 670edc1958 | |||
| f92f285417 | |||
| a786b58ed3 | |||
| a2b26712b2 | |||
| 1599abc9ad | |||
| af213ab446 | |||
| a60184c115 | |||
| 585b024bf0 | |||
| afc9ffda85 |
@@ -10,10 +10,10 @@ jobs:
|
|||||||
runs-on: ubuntu-latest
|
runs-on: ubuntu-latest
|
||||||
steps:
|
steps:
|
||||||
- name: Setup Go
|
- name: Setup Go
|
||||||
uses: actions/setup-go@v2
|
uses: actions/setup-go@v5
|
||||||
with:
|
with:
|
||||||
go-version: '1.23'
|
go-version: '1.23'
|
||||||
- name: Checkout obitools4 project
|
- name: Checkout obitools4 project
|
||||||
uses: actions/checkout@v4
|
uses: actions/checkout@v5
|
||||||
- name: Run tests
|
- name: Run tests
|
||||||
run: make githubtests
|
run: make githubtests
|
||||||
|
|||||||
@@ -16,9 +16,9 @@ jobs:
|
|||||||
- name: Setup Go
|
- name: Setup Go
|
||||||
uses: actions/setup-go@v5
|
uses: actions/setup-go@v5
|
||||||
with:
|
with:
|
||||||
go-version: "1.23"
|
go-version: "1.26"
|
||||||
- name: Checkout obitools4 project
|
- name: Checkout obitools4 project
|
||||||
uses: actions/checkout@v4
|
uses: actions/checkout@v5
|
||||||
- name: Run tests
|
- name: Run tests
|
||||||
run: make githubtests
|
run: make githubtests
|
||||||
|
|
||||||
@@ -49,12 +49,12 @@ jobs:
|
|||||||
|
|
||||||
steps:
|
steps:
|
||||||
- name: Checkout code
|
- name: Checkout code
|
||||||
uses: actions/checkout@v4
|
uses: actions/checkout@v5
|
||||||
|
|
||||||
- name: Setup Go
|
- name: Setup Go
|
||||||
uses: actions/setup-go@v5
|
uses: actions/setup-go@v5
|
||||||
with:
|
with:
|
||||||
go-version: "1.23"
|
go-version: "1.26"
|
||||||
|
|
||||||
- name: Extract version from tag
|
- name: Extract version from tag
|
||||||
id: get_version
|
id: get_version
|
||||||
@@ -62,12 +62,6 @@ jobs:
|
|||||||
TAG=${GITHUB_REF#refs/tags/Release_}
|
TAG=${GITHUB_REF#refs/tags/Release_}
|
||||||
echo "version=$TAG" >> $GITHUB_OUTPUT
|
echo "version=$TAG" >> $GITHUB_OUTPUT
|
||||||
|
|
||||||
- name: Install build tools (Linux)
|
|
||||||
if: runner.os == 'Linux'
|
|
||||||
run: |
|
|
||||||
sudo apt-get update -q
|
|
||||||
sudo apt-get install -y musl-tools zlib1g-dev
|
|
||||||
|
|
||||||
- name: Install build tools (macOS)
|
- name: Install build tools (macOS)
|
||||||
if: runner.os == 'macOS'
|
if: runner.os == 'macOS'
|
||||||
run: |
|
run: |
|
||||||
@@ -75,21 +69,30 @@ jobs:
|
|||||||
xcode-select --install 2>/dev/null || true
|
xcode-select --install 2>/dev/null || true
|
||||||
xcode-select -p
|
xcode-select -p
|
||||||
|
|
||||||
- name: Build binaries
|
- name: Build binaries (Linux)
|
||||||
|
if: runner.os == 'Linux'
|
||||||
|
env:
|
||||||
|
VERSION: ${{ steps.get_version.outputs.version }}
|
||||||
|
run: |
|
||||||
|
docker run --rm \
|
||||||
|
-v "$(pwd):/src" \
|
||||||
|
-w /src \
|
||||||
|
-e VERSION="${VERSION}" \
|
||||||
|
golang:1.26-alpine \
|
||||||
|
sh -c "apk add --no-cache gcc musl-dev zlib-dev zlib-static make && \
|
||||||
|
make LDFLAGS='-linkmode=external -extldflags=-static' obitools"
|
||||||
|
mkdir -p artifacts
|
||||||
|
tar -czf artifacts/obitools4_${VERSION}_${{ matrix.output_name }}.tar.gz -C build .
|
||||||
|
|
||||||
|
- name: Build binaries (macOS)
|
||||||
|
if: runner.os == 'macOS'
|
||||||
env:
|
env:
|
||||||
GOOS: ${{ matrix.goos }}
|
GOOS: ${{ matrix.goos }}
|
||||||
GOARCH: ${{ matrix.goarch }}
|
GOARCH: ${{ matrix.goarch }}
|
||||||
VERSION: ${{ steps.get_version.outputs.version }}
|
VERSION: ${{ steps.get_version.outputs.version }}
|
||||||
CC: ${{ matrix.goos == 'linux' && 'musl-gcc' || '' }}
|
|
||||||
CGO_CFLAGS: ${{ matrix.goos == 'linux' && '-I/usr/include' || '' }}
|
|
||||||
run: |
|
run: |
|
||||||
if [ "$GOOS" = "linux" ]; then
|
|
||||||
make LDFLAGS='-linkmode=external -extldflags=-static' obitools
|
|
||||||
else
|
|
||||||
make obitools
|
make obitools
|
||||||
fi
|
|
||||||
mkdir -p artifacts
|
mkdir -p artifacts
|
||||||
# Create a single tar.gz with all binaries for this platform
|
|
||||||
tar -czf artifacts/obitools4_${VERSION}_${{ matrix.output_name }}.tar.gz -C build .
|
tar -czf artifacts/obitools4_${VERSION}_${{ matrix.output_name }}.tar.gz -C build .
|
||||||
|
|
||||||
- name: Upload artifacts
|
- name: Upload artifacts
|
||||||
@@ -104,7 +107,7 @@ jobs:
|
|||||||
runs-on: ubuntu-latest
|
runs-on: ubuntu-latest
|
||||||
steps:
|
steps:
|
||||||
- name: Checkout code
|
- name: Checkout code
|
||||||
uses: actions/checkout@v4
|
uses: actions/checkout@v5
|
||||||
with:
|
with:
|
||||||
fetch-depth: 0
|
fetch-depth: 0
|
||||||
|
|
||||||
|
|||||||
+1
-1
@@ -23,7 +23,7 @@ xx
|
|||||||
/.vscode
|
/.vscode
|
||||||
/build
|
/build
|
||||||
/bugs
|
/bugs
|
||||||
|
autodoc
|
||||||
/ncbitaxo
|
/ncbitaxo
|
||||||
|
|
||||||
!/obitests/**
|
!/obitests/**
|
||||||
|
|||||||
@@ -37,6 +37,11 @@ func main() {
|
|||||||
|
|
||||||
optionParser(os.Args)
|
optionParser(os.Args)
|
||||||
|
|
||||||
|
if !obipairing.CLIHasPairedFiles() {
|
||||||
|
log.Error("You must provide both a forward file (-F) and a reverse file (-R)")
|
||||||
|
os.Exit(1)
|
||||||
|
}
|
||||||
|
|
||||||
obidefault.SetStrictReadWorker(2)
|
obidefault.SetStrictReadWorker(2)
|
||||||
obidefault.SetStrictWriteWorker(2)
|
obidefault.SetStrictWriteWorker(2)
|
||||||
pairs, err := obipairing.CLIPairedSequence()
|
pairs, err := obipairing.CLIPairedSequence()
|
||||||
|
|||||||
@@ -1,6 +1,7 @@
|
|||||||
package main
|
package main
|
||||||
|
|
||||||
import (
|
import (
|
||||||
|
"fmt"
|
||||||
"os"
|
"os"
|
||||||
|
|
||||||
log "github.com/sirupsen/logrus"
|
log "github.com/sirupsen/logrus"
|
||||||
@@ -8,6 +9,7 @@ import (
|
|||||||
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obidefault"
|
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obidefault"
|
||||||
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obioptions"
|
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obioptions"
|
||||||
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obitools/obiconvert"
|
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obitools/obiconvert"
|
||||||
|
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obitools/obimultiplex"
|
||||||
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obitools/obipairing"
|
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obitools/obipairing"
|
||||||
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obitools/obitagpcr"
|
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obitools/obitagpcr"
|
||||||
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obiutils"
|
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obiutils"
|
||||||
@@ -39,6 +41,17 @@ func main() {
|
|||||||
obitagpcr.OptionSet)
|
obitagpcr.OptionSet)
|
||||||
|
|
||||||
optionParser(os.Args)
|
optionParser(os.Args)
|
||||||
|
|
||||||
|
if obimultiplex.CLIAskConfigTemplate() {
|
||||||
|
fmt.Print(obimultiplex.CLIConfigTemplate())
|
||||||
|
os.Exit(0)
|
||||||
|
}
|
||||||
|
|
||||||
|
if !obipairing.CLIHasPairedFiles() {
|
||||||
|
log.Error("You must provide both a forward file (-F) and a reverse file (-R)")
|
||||||
|
os.Exit(1)
|
||||||
|
}
|
||||||
|
|
||||||
pairs, err := obipairing.CLIPairedSequence()
|
pairs, err := obipairing.CLIPairedSequence()
|
||||||
|
|
||||||
if err != nil {
|
if err != nil {
|
||||||
|
|||||||
@@ -0,0 +1,10 @@
|
|||||||
|
{
|
||||||
|
"people": [
|
||||||
|
"Software",
|
||||||
|
"Agreement",
|
||||||
|
"Module"
|
||||||
|
],
|
||||||
|
"projects": [
|
||||||
|
"Code"
|
||||||
|
]
|
||||||
|
}
|
||||||
@@ -6,7 +6,7 @@ require (
|
|||||||
github.com/DavidGamba/go-getoptions v0.33.0
|
github.com/DavidGamba/go-getoptions v0.33.0
|
||||||
github.com/PaesslerAG/gval v1.2.4
|
github.com/PaesslerAG/gval v1.2.4
|
||||||
github.com/barkimedes/go-deepcopy v0.0.0-20220514131651-17c30cfc62df
|
github.com/barkimedes/go-deepcopy v0.0.0-20220514131651-17c30cfc62df
|
||||||
github.com/buger/jsonparser v1.1.1
|
github.com/buger/jsonparser v1.1.2
|
||||||
github.com/chen3feng/stl4go v0.1.1
|
github.com/chen3feng/stl4go v0.1.1
|
||||||
github.com/dlclark/regexp2 v1.11.5
|
github.com/dlclark/regexp2 v1.11.5
|
||||||
github.com/goccy/go-json v0.10.6
|
github.com/goccy/go-json v0.10.6
|
||||||
|
|||||||
@@ -6,8 +6,8 @@ github.com/PaesslerAG/jsonpath v0.1.0 h1:gADYeifvlqK3R3i2cR5B4DGgxLXIPb3TRTH1mGi
|
|||||||
github.com/PaesslerAG/jsonpath v0.1.0/go.mod h1:4BzmtoM/PI8fPO4aQGIusjGxGir2BzcV0grWtFzq1Y8=
|
github.com/PaesslerAG/jsonpath v0.1.0/go.mod h1:4BzmtoM/PI8fPO4aQGIusjGxGir2BzcV0grWtFzq1Y8=
|
||||||
github.com/barkimedes/go-deepcopy v0.0.0-20220514131651-17c30cfc62df h1:GSoSVRLoBaFpOOds6QyY1L8AX7uoY+Ln3BHc22W40X0=
|
github.com/barkimedes/go-deepcopy v0.0.0-20220514131651-17c30cfc62df h1:GSoSVRLoBaFpOOds6QyY1L8AX7uoY+Ln3BHc22W40X0=
|
||||||
github.com/barkimedes/go-deepcopy v0.0.0-20220514131651-17c30cfc62df/go.mod h1:hiVxq5OP2bUGBRNS3Z/bt/reCLFNbdcST6gISi1fiOM=
|
github.com/barkimedes/go-deepcopy v0.0.0-20220514131651-17c30cfc62df/go.mod h1:hiVxq5OP2bUGBRNS3Z/bt/reCLFNbdcST6gISi1fiOM=
|
||||||
github.com/buger/jsonparser v1.1.1 h1:2PnMjfWD7wBILjqQbt530v576A/cAbQvEW9gGIpYMUs=
|
github.com/buger/jsonparser v1.1.2 h1:frqHqw7otoVbk5M8LlE/L7HTnIq2v9RX6EJ48i9AxJk=
|
||||||
github.com/buger/jsonparser v1.1.1/go.mod h1:6RYKKt7H4d4+iWqouImQ9R2FZql3VbhNgx27UK13J/0=
|
github.com/buger/jsonparser v1.1.2/go.mod h1:6RYKKt7H4d4+iWqouImQ9R2FZql3VbhNgx27UK13J/0=
|
||||||
github.com/chen3feng/stl4go v0.1.1 h1:0L1+mDw7pomftKDruM23f1mA7miavOj6C6MZeadzN2Q=
|
github.com/chen3feng/stl4go v0.1.1 h1:0L1+mDw7pomftKDruM23f1mA7miavOj6C6MZeadzN2Q=
|
||||||
github.com/chen3feng/stl4go v0.1.1/go.mod h1:5ml3psLgETJjRJnMbPE+JiHLrCpt+Ajc2weeTECXzWU=
|
github.com/chen3feng/stl4go v0.1.1/go.mod h1:5ml3psLgETJjRJnMbPE+JiHLrCpt+Ajc2weeTECXzWU=
|
||||||
github.com/chengxilo/virtualterm v1.0.4 h1:Z6IpERbRVlfB8WkOmtbHiDbBANU7cimRIof7mk9/PwM=
|
github.com/chengxilo/virtualterm v1.0.4 h1:Z6IpERbRVlfB8WkOmtbHiDbBANU7cimRIof7mk9/PwM=
|
||||||
|
|||||||
Binary file not shown.
Vendored
BIN
Binary file not shown.
Vendored
BIN
Binary file not shown.
@@ -0,0 +1,24 @@
|
|||||||
|
>HELIUM_000100422_612GNAAXX:7:118:3572:14633#0/1_sub[28..126] {"count":10172,"merged_sample":{"26a_F040644":10172},"obitag_bestid":0.9797979797979798,"obitag_bestmatch":"AY227529","obitag_match_count":1,"obitag_rank":"genus","obitag_similarity_method":"lcs","taxid":"taxon:9992 [Marmota]@genus"}
|
||||||
|
ttagccctaaacataaacattcaataaacaagaatgttcgccagagtactactagcaaca
|
||||||
|
gcctgaaactcaaaggacttggcggtgctttacatccct
|
||||||
|
>HELIUM_000100422_612GNAAXX:7:99:9351:13090#0/1_sub[28..127] {"count":260,"merged_sample":{"29a_F260619":260},"obitag_bestid":0.9405940594059405,"obitag_bestmatch":"AF154263","obitag_match_count":9,"obitag_rank":"infraorder","obitag_similarity_method":"lcs","taxid":"taxon:35500 [Pecora]@infraorder"}
|
||||||
|
ttagccctaaacacaaataattacacaaacaaaattgttcaccagagtactagcggcaac
|
||||||
|
agcttaaaactcaaaggacttggcggtgctttataccctt
|
||||||
|
>HELIUM_000100422_612GNAAXX:7:108:10111:9078#0/1_sub[28..127] {"count":7146,"merged_sample":{"13a_F730603":7146},"obitag_bestid":1,"obitag_bestmatch":"AB245427","obitag_match_count":1,"obitag_rank":"species","obitag_similarity_method":"lcs","taxid":"taxon:9860 [Cervus elaphus]@species"}
|
||||||
|
ctagccttaaacacaaatagttatgcaaacaaaactattcgccagagtactaccggcaat
|
||||||
|
agcttaaaactcaaaggacttggcggtgctttataccctt
|
||||||
|
>HELIUM_000100422_612GNAAXX:7:38:14204:12725#0/1_sub[28..126] {"count":87,"merged_sample":{"26a_F040644":87},"obitag_bestid":0.9494949494949495,"obitag_bestmatch":"AY227530","obitag_match_count":2,"obitag_rank":"tribe","obitag_similarity_method":"lcs","taxid":"taxon:337730 [Marmotini]@tribe"}
|
||||||
|
ttagccctaaacataaacattcaataaacaagaatgttcgccagaggactactagcaata
|
||||||
|
gcttaaaactcaaaggacttggcggtgctttatatccct
|
||||||
|
>HELIUM_000100422_612GNAAXX:7:30:9942:4495#0/1_sub[28..126] {"count":95,"merged_sample":{"26a_F040644":11,"29a_F260619":84},"obitag_bestid":0.9595959595959596,"obitag_bestmatch":"AC187326","obitag_match_count":1,"obitag_rank":"subspecies","obitag_similarity_method":"lcs","taxid":"taxon:9615 [Canis lupus familiaris]@subspecies"}
|
||||||
|
ttagccctaaacataagctattccataacaaaataattcgccagagaactactagcaaca
|
||||||
|
gattaaacctcaaaggacttggcagtgctttatacccct
|
||||||
|
>HELIUM_000100422_612GNAAXX:7:51:16702:19393#0/1_sub[28..127] {"count":12004,"merged_sample":{"15a_F730814":7465,"29a_F260619":4539},"obitag_bestid":1,"obitag_bestmatch":"AJ885202","obitag_match_count":1,"obitag_rank":"species","obitag_similarity_method":"lcs","taxid":"taxon:9858 [Capreolus capreolus]@species"}
|
||||||
|
ttagccctaaacacaagtaattaatataacaaaattattcgccagagtactaccggcaat
|
||||||
|
agcttaaaactcaaaggacttggcggtgctttataccctt
|
||||||
|
>HELIUM_000100422_612GNAAXX:7:84:14502:1617#0/1_sub[28..127] {"count":319,"merged_sample":{"29a_F260619":319},"obitag_bestid":1,"obitag_bestmatch":"AJ972683","obitag_match_count":1,"obitag_rank":"species","obitag_similarity_method":"lcs","taxid":"taxon:9858 [Capreolus capreolus]@species"}
|
||||||
|
ttagccctaaacacaagtaattattataacaaaattattcgccagagtactaccggcaat
|
||||||
|
agcttaaaactcaaaggacttggcggtgctttataccctt
|
||||||
|
>HELIUM_000100422_612GNAAXX:7:50:10637:6527#0/1_sub[28..126] {"count":366,"merged_sample":{"13a_F730603":13,"15a_F730814":5,"26a_F040644":347,"29a_F260619":1},"obitag_bestid":1,"obitag_bestmatch":"AB048590","obitag_match_count":1,"obitag_rank":"genus","obitag_similarity_method":"lcs","taxid":"taxon:9611 [Canis]@genus"}
|
||||||
|
ttagccctaaacatagataattttacaacaaaataattcgccagaggactactagcaata
|
||||||
|
gcttaaaactcaaaggacttggcggtgctttatatccct
|
||||||
@@ -0,0 +1,48 @@
|
|||||||
|
taxid,parent,taxonomic_rank,scientific_name
|
||||||
|
taxon:1 [root]@no rank,taxon:1 [root]@no rank,no rank,root
|
||||||
|
taxon:131567 [cellular organisms]@cellular root,taxon:1 [root]@no rank,cellular root,cellular organisms
|
||||||
|
taxon:2759 [Eukaryota]@domain,taxon:131567 [cellular organisms]@cellular root,domain,Eukaryota
|
||||||
|
taxon:33154 [Opisthokonta]@clade,taxon:2759 [Eukaryota]@domain,clade,Opisthokonta
|
||||||
|
taxon:33208 [Metazoa]@kingdom,taxon:33154 [Opisthokonta]@clade,kingdom,Metazoa
|
||||||
|
taxon:6072 [Eumetazoa]@clade,taxon:33208 [Metazoa]@kingdom,clade,Eumetazoa
|
||||||
|
taxon:33213 [Bilateria]@clade,taxon:6072 [Eumetazoa]@clade,clade,Bilateria
|
||||||
|
taxon:33511 [Deuterostomia]@clade,taxon:33213 [Bilateria]@clade,clade,Deuterostomia
|
||||||
|
taxon:7711 [Chordata]@phylum,taxon:33511 [Deuterostomia]@clade,phylum,Chordata
|
||||||
|
taxon:89593 [Craniata]@subphylum,taxon:7711 [Chordata]@phylum,subphylum,Craniata
|
||||||
|
taxon:7742 [Vertebrata]@clade,taxon:89593 [Craniata]@subphylum,clade,Vertebrata
|
||||||
|
taxon:7776 [Gnathostomata]@clade,taxon:7742 [Vertebrata]@clade,clade,Gnathostomata
|
||||||
|
taxon:117570 [Teleostomi]@clade,taxon:7776 [Gnathostomata]@clade,clade,Teleostomi
|
||||||
|
taxon:117571 [Euteleostomi]@clade,taxon:117570 [Teleostomi]@clade,clade,Euteleostomi
|
||||||
|
taxon:8287 [Sarcopterygii]@superclass,taxon:117571 [Euteleostomi]@clade,superclass,Sarcopterygii
|
||||||
|
taxon:1338369 [Dipnotetrapodomorpha]@clade,taxon:8287 [Sarcopterygii]@superclass,clade,Dipnotetrapodomorpha
|
||||||
|
taxon:32523 [Tetrapoda]@clade,taxon:1338369 [Dipnotetrapodomorpha]@clade,clade,Tetrapoda
|
||||||
|
taxon:32524 [Amniota]@clade,taxon:32523 [Tetrapoda]@clade,clade,Amniota
|
||||||
|
taxon:40674 [Mammalia]@class,taxon:32524 [Amniota]@clade,class,Mammalia
|
||||||
|
taxon:32525 [Theria]@clade,taxon:40674 [Mammalia]@class,clade,Theria
|
||||||
|
taxon:9347 [Eutheria]@clade,taxon:32525 [Theria]@clade,clade,Eutheria
|
||||||
|
taxon:1437010 [Boreoeutheria]@clade,taxon:9347 [Eutheria]@clade,clade,Boreoeutheria
|
||||||
|
taxon:314146 [Euarchontoglires]@superorder,taxon:1437010 [Boreoeutheria]@clade,superorder,Euarchontoglires
|
||||||
|
taxon:314145 [Laurasiatheria]@superorder,taxon:1437010 [Boreoeutheria]@clade,superorder,Laurasiatheria
|
||||||
|
taxon:33554 [Carnivora]@order,taxon:314145 [Laurasiatheria]@superorder,order,Carnivora
|
||||||
|
taxon:91561 [Artiodactyla]@order,taxon:314145 [Laurasiatheria]@superorder,order,Artiodactyla
|
||||||
|
taxon:314147 [Glires]@clade,taxon:314146 [Euarchontoglires]@superorder,clade,Glires
|
||||||
|
taxon:9845 [Ruminantia]@suborder,taxon:91561 [Artiodactyla]@order,suborder,Ruminantia
|
||||||
|
taxon:35500 [Pecora]@infraorder,taxon:9845 [Ruminantia]@suborder,infraorder,Pecora
|
||||||
|
taxon:9989 [Rodentia]@order,taxon:314147 [Glires]@clade,order,Rodentia
|
||||||
|
taxon:379584 [Caniformia]@suborder,taxon:33554 [Carnivora]@order,suborder,Caniformia
|
||||||
|
taxon:9608 [Canidae]@family,taxon:379584 [Caniformia]@suborder,family,Canidae
|
||||||
|
taxon:9850 [Cervidae]@family,taxon:35500 [Pecora]@infraorder,family,Cervidae
|
||||||
|
taxon:9881 [Odocoileinae]@subfamily,taxon:9850 [Cervidae]@family,subfamily,Odocoileinae
|
||||||
|
taxon:33553 [Sciuromorpha]@suborder,taxon:9989 [Rodentia]@order,suborder,Sciuromorpha
|
||||||
|
taxon:55153 [Sciuridae]@family,taxon:33553 [Sciuromorpha]@suborder,family,Sciuridae
|
||||||
|
taxon:34878 [Cervinae]@subfamily,taxon:9850 [Cervidae]@family,subfamily,Cervinae
|
||||||
|
taxon:9611 [Canis]@genus,taxon:9608 [Canidae]@family,genus,Canis
|
||||||
|
taxon:9857 [Capreolus]@genus,taxon:9881 [Odocoileinae]@subfamily,genus,Capreolus
|
||||||
|
taxon:9612 [Canis lupus]@species,taxon:9611 [Canis]@genus,species,Canis lupus
|
||||||
|
taxon:337726 [Xerinae]@subfamily,taxon:55153 [Sciuridae]@family,subfamily,Xerinae
|
||||||
|
taxon:9859 [Cervus]@genus,taxon:34878 [Cervinae]@subfamily,genus,Cervus
|
||||||
|
taxon:337730 [Marmotini]@tribe,taxon:337726 [Xerinae]@subfamily,tribe,Marmotini
|
||||||
|
taxon:9992 [Marmota]@genus,taxon:337730 [Marmotini]@tribe,genus,Marmota
|
||||||
|
taxon:9860 [Cervus elaphus]@species,taxon:9859 [Cervus]@genus,species,Cervus elaphus
|
||||||
|
taxon:9615 [Canis lupus familiaris]@subspecies,taxon:9612 [Canis lupus]@species,subspecies,Canis lupus familiaris
|
||||||
|
taxon:9858 [Capreolus capreolus]@species,taxon:9857 [Capreolus]@genus,species,Capreolus capreolus
|
||||||
|
@@ -134,6 +134,130 @@ else
|
|||||||
((failed++))
|
((failed++))
|
||||||
fi
|
fi
|
||||||
|
|
||||||
|
|
||||||
|
# ------------------------------------------------------------------
|
||||||
|
# --raw-taxid tests (no taxonomy loaded)
|
||||||
|
# ------------------------------------------------------------------
|
||||||
|
|
||||||
|
# Running test
|
||||||
|
((ntest++))
|
||||||
|
if obiconvert --raw-taxid "${TEST_DIR}/out_ecotag.fasta" \
|
||||||
|
> "${TMPDIR}/raw_taxid.fasta" 2>/dev/null
|
||||||
|
then
|
||||||
|
log "$MCMD --raw-taxid: running OK"
|
||||||
|
((success++))
|
||||||
|
else
|
||||||
|
log "$MCMD --raw-taxid: running failed"
|
||||||
|
((failed++))
|
||||||
|
fi
|
||||||
|
|
||||||
|
# Taxids must be bare numbers — no full-format "taxon:ID [Name]@rank" strings
|
||||||
|
((ntest++))
|
||||||
|
if grep '"taxid"' "${TMPDIR}/raw_taxid.fasta" | grep -qv '"taxid":"[0-9][0-9]*"'
|
||||||
|
then
|
||||||
|
log "$MCMD --raw-taxid: taxid format check failed (full-format taxid found)"
|
||||||
|
((failed++))
|
||||||
|
else
|
||||||
|
log "$MCMD --raw-taxid: taxid format OK (all taxids are bare numbers)"
|
||||||
|
((success++))
|
||||||
|
fi
|
||||||
|
|
||||||
|
# --raw-taxid is idempotent: piping through a second obiconvert --raw-taxid must
|
||||||
|
# produce bit-for-bit identical output.
|
||||||
|
((ntest++))
|
||||||
|
if obiconvert --raw-taxid "${TMPDIR}/raw_taxid.fasta" \
|
||||||
|
> "${TMPDIR}/raw_taxid2.fasta" 2>/dev/null
|
||||||
|
then
|
||||||
|
log "$MCMD --raw-taxid piped: running OK"
|
||||||
|
((success++))
|
||||||
|
else
|
||||||
|
log "$MCMD --raw-taxid piped: running failed"
|
||||||
|
((failed++))
|
||||||
|
fi
|
||||||
|
|
||||||
|
((ntest++))
|
||||||
|
if diff "${TMPDIR}/raw_taxid.fasta" \
|
||||||
|
"${TMPDIR}/raw_taxid2.fasta" > /dev/null
|
||||||
|
then
|
||||||
|
log "$MCMD --raw-taxid piped: idempotency OK"
|
||||||
|
((success++))
|
||||||
|
else
|
||||||
|
log "$MCMD --raw-taxid piped: idempotency failed (outputs differ)"
|
||||||
|
((failed++))
|
||||||
|
fi
|
||||||
|
|
||||||
|
|
||||||
|
# ------------------------------------------------------------------
|
||||||
|
# --taxonomy tests (full-format taxid, no --raw-taxid)
|
||||||
|
# ------------------------------------------------------------------
|
||||||
|
|
||||||
|
# Running test
|
||||||
|
((ntest++))
|
||||||
|
if obiconvert --taxonomy "${TEST_DIR}/taxonomy.csv" \
|
||||||
|
"${TEST_DIR}/out_ecotag.fasta" \
|
||||||
|
> "${TMPDIR}/taxo.fasta" 2>/dev/null
|
||||||
|
then
|
||||||
|
log "$MCMD --taxonomy: running OK"
|
||||||
|
((success++))
|
||||||
|
else
|
||||||
|
log "$MCMD --taxonomy: running failed"
|
||||||
|
((failed++))
|
||||||
|
fi
|
||||||
|
|
||||||
|
# Taxids must be in full "taxon:ID [Name]@rank" format
|
||||||
|
((ntest++))
|
||||||
|
if grep '"taxid"' "${TMPDIR}/taxo.fasta" | grep -q '"taxid":"taxon:[0-9]'
|
||||||
|
then
|
||||||
|
log "$MCMD --taxonomy: taxid format OK (full-format taxids present)"
|
||||||
|
((success++))
|
||||||
|
else
|
||||||
|
log "$MCMD --taxonomy: taxid format check failed (no full-format taxid found)"
|
||||||
|
((failed++))
|
||||||
|
fi
|
||||||
|
|
||||||
|
|
||||||
|
# ------------------------------------------------------------------
|
||||||
|
# --raw-taxid --taxonomy tests
|
||||||
|
# ------------------------------------------------------------------
|
||||||
|
|
||||||
|
# Running test
|
||||||
|
((ntest++))
|
||||||
|
if obiconvert --raw-taxid --taxonomy "${TEST_DIR}/taxonomy.csv" \
|
||||||
|
"${TEST_DIR}/out_ecotag.fasta" \
|
||||||
|
> "${TMPDIR}/raw_taxid_taxo.fasta" 2>/dev/null
|
||||||
|
then
|
||||||
|
log "$MCMD --raw-taxid --taxonomy: running OK"
|
||||||
|
((success++))
|
||||||
|
else
|
||||||
|
log "$MCMD --raw-taxid --taxonomy: running failed"
|
||||||
|
((failed++))
|
||||||
|
fi
|
||||||
|
|
||||||
|
# Taxids must be bare numbers even when taxonomy is loaded
|
||||||
|
((ntest++))
|
||||||
|
if grep '"taxid"' "${TMPDIR}/raw_taxid_taxo.fasta" | grep -qv '"taxid":"[0-9][0-9]*"'
|
||||||
|
then
|
||||||
|
log "$MCMD --raw-taxid --taxonomy: taxid format check failed (full-format taxid found)"
|
||||||
|
((failed++))
|
||||||
|
else
|
||||||
|
log "$MCMD --raw-taxid --taxonomy: taxid format OK (all taxids are bare numbers)"
|
||||||
|
((success++))
|
||||||
|
fi
|
||||||
|
|
||||||
|
# --raw-taxid with or without taxonomy must yield identical taxid values
|
||||||
|
((ntest++))
|
||||||
|
if diff <(grep '"taxid"' "${TMPDIR}/raw_taxid.fasta" | grep -o '"taxid":"[^"]*"' | sort) \
|
||||||
|
<(grep '"taxid"' "${TMPDIR}/raw_taxid_taxo.fasta" | grep -o '"taxid":"[^"]*"' | sort) \
|
||||||
|
> /dev/null
|
||||||
|
then
|
||||||
|
log "$MCMD --raw-taxid vs --raw-taxid --taxonomy: taxid values match OK"
|
||||||
|
((success++))
|
||||||
|
else
|
||||||
|
log "$MCMD --raw-taxid vs --raw-taxid --taxonomy: taxid values differ (unexpected)"
|
||||||
|
((failed++))
|
||||||
|
fi
|
||||||
|
|
||||||
|
|
||||||
#########################################
|
#########################################
|
||||||
#
|
#
|
||||||
# At the end of the tests
|
# At the end of the tests
|
||||||
|
|||||||
@@ -0,0 +1,24 @@
|
|||||||
|
>HELIUM_000100422_612GNAAXX:7:118:3572:14633#0/1_sub[28..126] {"count":10172,"merged_sample":{"26a_F040644":10172},"obitag_bestid":0.9797979797979798,"obitag_bestmatch":"AY227529","obitag_match_count":1,"obitag_rank":"genus","obitag_similarity_method":"lcs","taxid":"taxon:9992 [Marmota]@genus"}
|
||||||
|
ttagccctaaacataaacattcaataaacaagaatgttcgccagagtactactagcaaca
|
||||||
|
gcctgaaactcaaaggacttggcggtgctttacatccct
|
||||||
|
>HELIUM_000100422_612GNAAXX:7:99:9351:13090#0/1_sub[28..127] {"count":260,"merged_sample":{"29a_F260619":260},"obitag_bestid":0.9405940594059405,"obitag_bestmatch":"AF154263","obitag_match_count":9,"obitag_rank":"infraorder","obitag_similarity_method":"lcs","taxid":"taxon:35500 [Pecora]@infraorder"}
|
||||||
|
ttagccctaaacacaaataattacacaaacaaaattgttcaccagagtactagcggcaac
|
||||||
|
agcttaaaactcaaaggacttggcggtgctttataccctt
|
||||||
|
>HELIUM_000100422_612GNAAXX:7:108:10111:9078#0/1_sub[28..127] {"count":7146,"merged_sample":{"13a_F730603":7146},"obitag_bestid":1,"obitag_bestmatch":"AB245427","obitag_match_count":1,"obitag_rank":"species","obitag_similarity_method":"lcs","taxid":"taxon:9860 [Cervus elaphus]@species"}
|
||||||
|
ctagccttaaacacaaatagttatgcaaacaaaactattcgccagagtactaccggcaat
|
||||||
|
agcttaaaactcaaaggacttggcggtgctttataccctt
|
||||||
|
>HELIUM_000100422_612GNAAXX:7:38:14204:12725#0/1_sub[28..126] {"count":87,"merged_sample":{"26a_F040644":87},"obitag_bestid":0.9494949494949495,"obitag_bestmatch":"AY227530","obitag_match_count":2,"obitag_rank":"tribe","obitag_similarity_method":"lcs","taxid":"taxon:337730 [Marmotini]@tribe"}
|
||||||
|
ttagccctaaacataaacattcaataaacaagaatgttcgccagaggactactagcaata
|
||||||
|
gcttaaaactcaaaggacttggcggtgctttatatccct
|
||||||
|
>HELIUM_000100422_612GNAAXX:7:30:9942:4495#0/1_sub[28..126] {"count":95,"merged_sample":{"26a_F040644":11,"29a_F260619":84},"obitag_bestid":0.9595959595959596,"obitag_bestmatch":"AC187326","obitag_match_count":1,"obitag_rank":"subspecies","obitag_similarity_method":"lcs","taxid":"taxon:9615 [Canis lupus familiaris]@subspecies"}
|
||||||
|
ttagccctaaacataagctattccataacaaaataattcgccagagaactactagcaaca
|
||||||
|
gattaaacctcaaaggacttggcagtgctttatacccct
|
||||||
|
>HELIUM_000100422_612GNAAXX:7:51:16702:19393#0/1_sub[28..127] {"count":12004,"merged_sample":{"15a_F730814":7465,"29a_F260619":4539},"obitag_bestid":1,"obitag_bestmatch":"AJ885202","obitag_match_count":1,"obitag_rank":"species","obitag_similarity_method":"lcs","taxid":"taxon:9858 [Capreolus capreolus]@species"}
|
||||||
|
ttagccctaaacacaagtaattaatataacaaaattattcgccagagtactaccggcaat
|
||||||
|
agcttaaaactcaaaggacttggcggtgctttataccctt
|
||||||
|
>HELIUM_000100422_612GNAAXX:7:84:14502:1617#0/1_sub[28..127] {"count":319,"merged_sample":{"29a_F260619":319},"obitag_bestid":1,"obitag_bestmatch":"AJ972683","obitag_match_count":1,"obitag_rank":"species","obitag_similarity_method":"lcs","taxid":"taxon:9858 [Capreolus capreolus]@species"}
|
||||||
|
ttagccctaaacacaagtaattattataacaaaattattcgccagagtactaccggcaat
|
||||||
|
agcttaaaactcaaaggacttggcggtgctttataccctt
|
||||||
|
>HELIUM_000100422_612GNAAXX:7:50:10637:6527#0/1_sub[28..126] {"count":366,"merged_sample":{"13a_F730603":13,"15a_F730814":5,"26a_F040644":347,"29a_F260619":1},"obitag_bestid":1,"obitag_bestmatch":"AB048590","obitag_match_count":1,"obitag_rank":"genus","obitag_similarity_method":"lcs","taxid":"taxon:9611 [Canis]@genus"}
|
||||||
|
ttagccctaaacatagataattttacaacaaaataattcgccagaggactactagcaata
|
||||||
|
gcttaaaactcaaaggacttggcggtgctttatatccct
|
||||||
@@ -631,9 +631,9 @@ func ReadCSVNGSFilter(reader io.Reader) (*obingslibrary.NGSLibrary, error) {
|
|||||||
return nil, fmt.Errorf("row %d has %d columns, expected %d", len(data), len(fields), len(header))
|
return nil, fmt.Errorf("row %d has %d columns, expected %d", len(data), len(fields), len(header))
|
||||||
}
|
}
|
||||||
|
|
||||||
forward_primer := fields[forward_primerColIndex]
|
forward_primer := strings.TrimSpace(fields[forward_primerColIndex])
|
||||||
reverse_primer := fields[reverse_primerColIndex]
|
reverse_primer := strings.TrimSpace(fields[reverse_primerColIndex])
|
||||||
tags := _parseMainNGSFilterTags(fields[sample_tagColIndex])
|
tags := _parseMainNGSFilterTags(strings.TrimSpace(fields[sample_tagColIndex]))
|
||||||
|
|
||||||
marker, _ := ngsfilter.GetMarker(forward_primer, reverse_primer)
|
marker, _ := ngsfilter.GetMarker(forward_primer, reverse_primer)
|
||||||
pcr, ok := marker.GetPCR(tags.Forward, tags.Reverse)
|
pcr, ok := marker.GetPCR(tags.Forward, tags.Reverse)
|
||||||
@@ -644,8 +644,8 @@ func ReadCSVNGSFilter(reader io.Reader) (*obingslibrary.NGSLibrary, error) {
|
|||||||
i, tags.Forward, tags.Reverse, forward_primer, reverse_primer)
|
i, tags.Forward, tags.Reverse, forward_primer, reverse_primer)
|
||||||
}
|
}
|
||||||
|
|
||||||
pcr.Experiment = fields[experimentColIndex]
|
pcr.Experiment = strings.TrimSpace(fields[experimentColIndex])
|
||||||
pcr.Sample = fields[sampleColIndex]
|
pcr.Sample = strings.TrimSpace(fields[sampleColIndex])
|
||||||
|
|
||||||
if extraColumns != nil {
|
if extraColumns != nil {
|
||||||
pcr.Annotations = make(obiseq.Annotation)
|
pcr.Annotations = make(obiseq.Annotation)
|
||||||
|
|||||||
+18
-24
@@ -57,34 +57,21 @@ func (dist *IDistribute) Classifier() *obiseq.BioSequenceClassifier {
|
|||||||
}
|
}
|
||||||
|
|
||||||
// Distribute organizes the biosequences from the iterator into batches
|
// Distribute organizes the biosequences from the iterator into batches
|
||||||
// based on the provided classifier and batch sizes. It returns an
|
// based on the provided classifier. It returns an IDistribute instance
|
||||||
// IDistribute instance that manages the distribution of the sequences.
|
// that manages the distribution of the sequences.
|
||||||
//
|
//
|
||||||
// Parameters:
|
// Batches are flushed when either BatchSizeMax() sequences or BatchMem()
|
||||||
// - class: A pointer to a BioSequenceClassifier used to classify
|
// bytes are accumulated per key, mirroring the RebatchBySize strategy.
|
||||||
// the biosequences during distribution.
|
func (iterator IBioSequence) Distribute(class *obiseq.BioSequenceClassifier) IDistribute {
|
||||||
// - sizes: Optional integer values specifying the batch size. If
|
maxCount := obidefault.BatchSizeMax()
|
||||||
// no sizes are provided, a default batch size of 5000 is used.
|
maxBytes := obidefault.BatchMem()
|
||||||
//
|
|
||||||
// Returns:
|
|
||||||
// An IDistribute instance that contains the outputs of the
|
|
||||||
// classified biosequences, a channel for new data notifications,
|
|
||||||
// and the classifier used for distribution. The method operates
|
|
||||||
// asynchronously, processing the sequences in separate goroutines.
|
|
||||||
// It ensures that the outputs are closed and cleaned up once
|
|
||||||
// processing is complete.
|
|
||||||
func (iterator IBioSequence) Distribute(class *obiseq.BioSequenceClassifier, sizes ...int) IDistribute {
|
|
||||||
batchsize := obidefault.BatchSize()
|
|
||||||
|
|
||||||
outputs := make(map[int]IBioSequence, 100)
|
outputs := make(map[int]IBioSequence, 100)
|
||||||
slices := make(map[int]*obiseq.BioSequenceSlice, 100)
|
slices := make(map[int]*obiseq.BioSequenceSlice, 100)
|
||||||
|
bufBytes := make(map[int]int, 100)
|
||||||
orders := make(map[int]int, 100)
|
orders := make(map[int]int, 100)
|
||||||
news := make(chan int)
|
news := make(chan int)
|
||||||
|
|
||||||
if len(sizes) > 0 {
|
|
||||||
batchsize = sizes[0]
|
|
||||||
}
|
|
||||||
|
|
||||||
jobDone := sync.WaitGroup{}
|
jobDone := sync.WaitGroup{}
|
||||||
lock := sync.Mutex{}
|
lock := sync.Mutex{}
|
||||||
|
|
||||||
@@ -115,6 +102,7 @@ func (iterator IBioSequence) Distribute(class *obiseq.BioSequenceClassifier, siz
|
|||||||
slice = &s
|
slice = &s
|
||||||
slices[key] = slice
|
slices[key] = slice
|
||||||
orders[key] = 0
|
orders[key] = 0
|
||||||
|
bufBytes[key] = 0
|
||||||
|
|
||||||
lock.Lock()
|
lock.Lock()
|
||||||
outputs[key] = MakeIBioSequence()
|
outputs[key] = MakeIBioSequence()
|
||||||
@@ -123,14 +111,20 @@ func (iterator IBioSequence) Distribute(class *obiseq.BioSequenceClassifier, siz
|
|||||||
news <- key
|
news <- key
|
||||||
}
|
}
|
||||||
|
|
||||||
*slice = append(*slice, s)
|
sz := s.MemorySize()
|
||||||
|
countFull := maxCount > 0 && len(*slice) >= maxCount
|
||||||
if len(*slice) == batchsize {
|
memFull := maxBytes > 0 && bufBytes[key]+sz > maxBytes && len(*slice) > 0
|
||||||
|
if countFull || memFull {
|
||||||
outputs[key].Push(MakeBioSequenceBatch(source, orders[key], *slice))
|
outputs[key].Push(MakeBioSequenceBatch(source, orders[key], *slice))
|
||||||
orders[key]++
|
orders[key]++
|
||||||
s := obiseq.MakeBioSequenceSlice()
|
s := obiseq.MakeBioSequenceSlice()
|
||||||
slices[key] = &s
|
slices[key] = &s
|
||||||
|
slice = &s
|
||||||
|
bufBytes[key] = 0
|
||||||
}
|
}
|
||||||
|
|
||||||
|
*slice = append(*slice, s)
|
||||||
|
bufBytes[key] += sz
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -47,7 +47,7 @@ func Encode4mer(seq *obiseq.BioSequence, buffer *[]byte) []byte {
|
|||||||
length := slength - 3
|
length := slength - 3
|
||||||
rawseq := seq.Sequence()
|
rawseq := seq.Sequence()
|
||||||
|
|
||||||
if length < 0 {
|
if length <= 0 {
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
+90
-7
@@ -91,7 +91,7 @@ func LuaWorker(proto *lua.FunctionProto) obiseq.SeqWorker {
|
|||||||
err := interpreter.PCall(0, lua.MultRet, nil)
|
err := interpreter.PCall(0, lua.MultRet, nil)
|
||||||
|
|
||||||
if err != nil {
|
if err != nil {
|
||||||
log.Fatalf("Error in executing the lua script")
|
log.Fatalf("Error in executing the lua script: %v", err)
|
||||||
}
|
}
|
||||||
|
|
||||||
result := interpreter.GetGlobal("worker")
|
result := interpreter.GetGlobal("worker")
|
||||||
@@ -141,6 +141,69 @@ func LuaWorker(proto *lua.FunctionProto) obiseq.SeqWorker {
|
|||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// LuaSliceWorker creates a SeqSliceWorker that calls the Lua function
|
||||||
|
// named "slice_worker". Unlike LuaWorker, the entire batch (BioSequenceSlice)
|
||||||
|
// is passed to the Lua function at once, enabling batch-level processing
|
||||||
|
// (e.g. a single HTTP request per batch instead of one per sequence).
|
||||||
|
//
|
||||||
|
// The Lua function signature:
|
||||||
|
//
|
||||||
|
// function slice_worker(slice) -- receives a BioSequenceSlice
|
||||||
|
// -- process the batch
|
||||||
|
// return slice -- returns a BioSequenceSlice (or nil)
|
||||||
|
// end
|
||||||
|
func LuaSliceWorker(proto *lua.FunctionProto) obiseq.SeqSliceWorker {
|
||||||
|
interpreter := NewInterpreter()
|
||||||
|
lfunc := interpreter.NewFunctionFromProto(proto)
|
||||||
|
interpreter.Push(lfunc)
|
||||||
|
err := interpreter.PCall(0, lua.MultRet, nil)
|
||||||
|
|
||||||
|
if err != nil {
|
||||||
|
log.Fatalf("Error in executing the lua script: %v", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
result := interpreter.GetGlobal("slice_worker")
|
||||||
|
|
||||||
|
if lua_worker, ok := result.(*lua.LFunction); ok {
|
||||||
|
f := func(slice obiseq.BioSequenceSlice) (obiseq.BioSequenceSlice, error) {
|
||||||
|
if err := interpreter.CallByParam(lua.P{
|
||||||
|
Fn: lua_worker,
|
||||||
|
NRet: 1,
|
||||||
|
Protect: true,
|
||||||
|
}, obiseqslice2Lua(interpreter, &slice)); err != nil {
|
||||||
|
log.Fatal(err)
|
||||||
|
}
|
||||||
|
|
||||||
|
lreponse := interpreter.Get(-1)
|
||||||
|
defer interpreter.Pop(1)
|
||||||
|
|
||||||
|
if reponse, ok := lreponse.(*lua.LUserData); ok {
|
||||||
|
s := reponse.Value
|
||||||
|
switch val := s.(type) {
|
||||||
|
case *obiseq.BioSequenceSlice:
|
||||||
|
return *val, nil
|
||||||
|
case *obiseq.BioSequence:
|
||||||
|
return obiseq.BioSequenceSlice{val}, nil
|
||||||
|
default:
|
||||||
|
r := reflect.TypeOf(val)
|
||||||
|
return nil, fmt.Errorf("slice_worker function doesn't return the correct type %s", r)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if _, ok = lreponse.(*lua.LNilType); ok {
|
||||||
|
return nil, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
return nil, fmt.Errorf("slice_worker function doesn't return the correct type %T", lreponse)
|
||||||
|
}
|
||||||
|
|
||||||
|
return f
|
||||||
|
}
|
||||||
|
|
||||||
|
log.Fatalf("The slice_worker object is not a function")
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
// LuaProcessor processes a Lua script on a sequence iterator and returns a new iterator.
|
// LuaProcessor processes a Lua script on a sequence iterator and returns a new iterator.
|
||||||
//
|
//
|
||||||
// Parameters:
|
// Parameters:
|
||||||
@@ -173,7 +236,7 @@ func LuaProcessor(iterator obiiter.IBioSequence, name, program string, breakOnEr
|
|||||||
err = interpreter.PCall(0, lua.MultRet, nil)
|
err = interpreter.PCall(0, lua.MultRet, nil)
|
||||||
|
|
||||||
if err != nil {
|
if err != nil {
|
||||||
log.Fatalf("Error in executing the lua script")
|
log.Fatalf("Error in executing the lua script: %v", err)
|
||||||
}
|
}
|
||||||
|
|
||||||
result := interpreter.GetGlobal("begin")
|
result := interpreter.GetGlobal("begin")
|
||||||
@@ -198,7 +261,7 @@ func LuaProcessor(iterator obiiter.IBioSequence, name, program string, breakOnEr
|
|||||||
err = interpreter.PCall(0, lua.MultRet, nil)
|
err = interpreter.PCall(0, lua.MultRet, nil)
|
||||||
|
|
||||||
if err != nil {
|
if err != nil {
|
||||||
log.Fatalf("Error in executing the lua script")
|
log.Fatalf("Error in executing the lua script: %v", err)
|
||||||
}
|
}
|
||||||
|
|
||||||
result := interpreter.GetGlobal("finish")
|
result := interpreter.GetGlobal("finish")
|
||||||
@@ -216,11 +279,27 @@ func LuaProcessor(iterator obiiter.IBioSequence, name, program string, breakOnEr
|
|||||||
|
|
||||||
}()
|
}()
|
||||||
|
|
||||||
ff := func(iterator obiiter.IBioSequence) {
|
// Detect whether the script defines slice_worker (batch-level) or worker (per-sequence).
|
||||||
w := LuaWorker(proto)
|
hasSliceWorker := func() bool {
|
||||||
sw := obiseq.SeqToSliceWorker(w, false)
|
interpreter := NewInterpreter()
|
||||||
|
lfunc := interpreter.NewFunctionFromProto(proto)
|
||||||
|
interpreter.Push(lfunc)
|
||||||
|
if err := interpreter.PCall(0, lua.MultRet, nil); err != nil {
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
result := interpreter.GetGlobal("slice_worker")
|
||||||
|
interpreter.Close()
|
||||||
|
_, ok := result.(*lua.LFunction)
|
||||||
|
return ok
|
||||||
|
}()
|
||||||
|
|
||||||
// iterator = iterator.SortBatches()
|
ff := func(iterator obiiter.IBioSequence) {
|
||||||
|
var sw obiseq.SeqSliceWorker
|
||||||
|
if hasSliceWorker {
|
||||||
|
sw = LuaSliceWorker(proto)
|
||||||
|
} else {
|
||||||
|
sw = obiseq.SeqToSliceWorker(LuaWorker(proto), false)
|
||||||
|
}
|
||||||
|
|
||||||
for iterator.Next() {
|
for iterator.Next() {
|
||||||
seqs := iterator.Get()
|
seqs := iterator.Get()
|
||||||
@@ -235,6 +314,10 @@ func LuaProcessor(iterator obiiter.IBioSequence, name, program string, breakOnEr
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if ns == nil {
|
||||||
|
ns = obiseq.BioSequenceSlice{}
|
||||||
|
}
|
||||||
|
|
||||||
newIter.Push(obiiter.MakeBioSequenceBatch(seqs.Source(), seqs.Order(), ns))
|
newIter.Push(obiiter.MakeBioSequenceBatch(seqs.Source(), seqs.Order(), ns))
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -17,15 +17,7 @@ import (
|
|||||||
// No return values. This function operates directly on the Lua state stack.
|
// No return values. This function operates directly on the Lua state stack.
|
||||||
func pushInterfaceToLua(L *lua.LState, val interface{}) {
|
func pushInterfaceToLua(L *lua.LState, val interface{}) {
|
||||||
switch v := val.(type) {
|
switch v := val.(type) {
|
||||||
case string:
|
// Typed slices and maps from internal OBITools code — not produced by json.Unmarshal
|
||||||
L.Push(lua.LString(v))
|
|
||||||
case bool:
|
|
||||||
L.Push(lua.LBool(v))
|
|
||||||
case int:
|
|
||||||
L.Push(lua.LNumber(v))
|
|
||||||
case float64:
|
|
||||||
L.Push(lua.LNumber(v))
|
|
||||||
// Add other cases as needed for different types
|
|
||||||
case map[string]int:
|
case map[string]int:
|
||||||
pushMapStringIntToLua(L, v)
|
pushMapStringIntToLua(L, v)
|
||||||
case map[string]string:
|
case map[string]string:
|
||||||
@@ -34,8 +26,6 @@ func pushInterfaceToLua(L *lua.LState, val interface{}) {
|
|||||||
pushMapStringBoolToLua(L, v)
|
pushMapStringBoolToLua(L, v)
|
||||||
case map[string]float64:
|
case map[string]float64:
|
||||||
pushMapStringFloat64ToLua(L, v)
|
pushMapStringFloat64ToLua(L, v)
|
||||||
case map[string]interface{}:
|
|
||||||
pushMapStringInterfaceToLua(L, v)
|
|
||||||
case []string:
|
case []string:
|
||||||
pushSliceStringToLua(L, v)
|
pushSliceStringToLua(L, v)
|
||||||
case []int:
|
case []int:
|
||||||
@@ -46,63 +36,63 @@ func pushInterfaceToLua(L *lua.LState, val interface{}) {
|
|||||||
pushSliceNumericToLua(L, v)
|
pushSliceNumericToLua(L, v)
|
||||||
case []bool:
|
case []bool:
|
||||||
pushSliceBoolToLua(L, v)
|
pushSliceBoolToLua(L, v)
|
||||||
case []interface{}:
|
|
||||||
pushSliceInterfaceToLua(L, v)
|
|
||||||
case nil:
|
|
||||||
L.Push(lua.LNil)
|
|
||||||
case *sync.Mutex:
|
case *sync.Mutex:
|
||||||
pushMutexToLua(L, v)
|
pushMutexToLua(L, v)
|
||||||
default:
|
default:
|
||||||
log.Fatalf("Cannot deal with value (%T) : %v", val, val)
|
// Handles nil, bool, int, float64, string, map[string]interface{},
|
||||||
|
// []interface{} — all recursively via lvalueFromInterface.
|
||||||
|
L.Push(lvalueFromInterface(L, v))
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
func pushMapStringInterfaceToLua(L *lua.LState, m map[string]interface{}) {
|
func pushMapStringInterfaceToLua(L *lua.LState, m map[string]interface{}) {
|
||||||
// Create a new Lua table
|
|
||||||
luaTable := L.NewTable()
|
luaTable := L.NewTable()
|
||||||
// Iterate over the Go map and set the key-value pairs in the Lua table
|
|
||||||
for key, value := range m {
|
for key, value := range m {
|
||||||
switch v := value.(type) {
|
L.SetField(luaTable, key, lvalueFromInterface(L, value))
|
||||||
case int:
|
|
||||||
luaTable.RawSetString(key, lua.LNumber(v))
|
|
||||||
case float64:
|
|
||||||
luaTable.RawSetString(key, lua.LNumber(v))
|
|
||||||
case bool:
|
|
||||||
luaTable.RawSetString(key, lua.LBool(v))
|
|
||||||
case string:
|
|
||||||
luaTable.RawSetString(key, lua.LString(v))
|
|
||||||
default:
|
|
||||||
log.Fatalf("Doesn't deal with map containing value %v of type %T", v, v)
|
|
||||||
}
|
}
|
||||||
}
|
|
||||||
|
|
||||||
// Push the Lua table onto the stack
|
|
||||||
L.Push(luaTable)
|
L.Push(luaTable)
|
||||||
}
|
}
|
||||||
|
|
||||||
func pushSliceInterfaceToLua(L *lua.LState, s []interface{}) {
|
func pushSliceInterfaceToLua(L *lua.LState, s []interface{}) {
|
||||||
// Create a new Lua table
|
|
||||||
luaTable := L.NewTable()
|
luaTable := L.NewTable()
|
||||||
// Iterate over the Go map and set the key-value pairs in the Lua table
|
|
||||||
for _, value := range s {
|
for _, value := range s {
|
||||||
switch v := value.(type) {
|
luaTable.Append(lvalueFromInterface(L, value))
|
||||||
case int:
|
|
||||||
luaTable.Append(lua.LNumber(v))
|
|
||||||
case float64:
|
|
||||||
luaTable.Append(lua.LNumber(v))
|
|
||||||
case bool:
|
|
||||||
luaTable.Append(lua.LBool(v))
|
|
||||||
case string:
|
|
||||||
luaTable.Append(lua.LString(v))
|
|
||||||
default:
|
|
||||||
log.Fatalf("Doesn't deal with slice containing value %v of type %T", v, v)
|
|
||||||
}
|
}
|
||||||
}
|
|
||||||
|
|
||||||
// Push the Lua table onto the stack
|
|
||||||
L.Push(luaTable)
|
L.Push(luaTable)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// lvalueFromInterface converts a Go interface{} value (as produced by json.Unmarshal)
|
||||||
|
// to the corresponding lua.LValue, handling nested maps and slices recursively.
|
||||||
|
func lvalueFromInterface(L *lua.LState, value interface{}) lua.LValue {
|
||||||
|
switch v := value.(type) {
|
||||||
|
case nil:
|
||||||
|
return lua.LNil
|
||||||
|
case bool:
|
||||||
|
return lua.LBool(v)
|
||||||
|
case int:
|
||||||
|
return lua.LNumber(v)
|
||||||
|
case float64:
|
||||||
|
return lua.LNumber(v)
|
||||||
|
case string:
|
||||||
|
return lua.LString(v)
|
||||||
|
case map[string]interface{}:
|
||||||
|
t := L.NewTable()
|
||||||
|
for key, val := range v {
|
||||||
|
L.SetField(t, key, lvalueFromInterface(L, val))
|
||||||
|
}
|
||||||
|
return t
|
||||||
|
case []interface{}:
|
||||||
|
t := L.NewTable()
|
||||||
|
for _, val := range v {
|
||||||
|
t.Append(lvalueFromInterface(L, val))
|
||||||
|
}
|
||||||
|
return t
|
||||||
|
default:
|
||||||
|
log.Fatalf("lvalueFromInterface: unsupported type %T: %v", v, v)
|
||||||
|
return lua.LNil
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
// pushMapStringIntToLua creates a new Lua table and iterates over the Go map to set key-value pairs in the Lua table. It then pushes the Lua table onto the stack.
|
// pushMapStringIntToLua creates a new Lua table and iterates over the Go map to set key-value pairs in the Lua table. It then pushes the Lua table onto the stack.
|
||||||
//
|
//
|
||||||
// L *lua.LState - the Lua state
|
// L *lua.LState - the Lua state
|
||||||
|
|||||||
@@ -28,6 +28,8 @@ func Table2Interface(interpreter *lua.LState, table *lua.LTable) interface{} {
|
|||||||
val[i-1] = float64(v.(lua.LNumber))
|
val[i-1] = float64(v.(lua.LNumber))
|
||||||
case lua.LTString:
|
case lua.LTString:
|
||||||
val[i-1] = string(v.(lua.LString))
|
val[i-1] = string(v.(lua.LString))
|
||||||
|
case lua.LTTable:
|
||||||
|
val[i-1] = Table2Interface(interpreter, v.(*lua.LTable))
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
return val
|
return val
|
||||||
@@ -45,6 +47,8 @@ func Table2Interface(interpreter *lua.LState, table *lua.LTable) interface{} {
|
|||||||
val[string(ks)] = float64(v.(lua.LNumber))
|
val[string(ks)] = float64(v.(lua.LNumber))
|
||||||
case lua.LTString:
|
case lua.LTString:
|
||||||
val[string(ks)] = string(v.(lua.LString))
|
val[string(ks)] = string(v.(lua.LString))
|
||||||
|
case lua.LTTable:
|
||||||
|
val[string(ks)] = Table2Interface(interpreter, v.(*lua.LTable))
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
})
|
})
|
||||||
|
|||||||
@@ -0,0 +1,128 @@
|
|||||||
|
package obilua
|
||||||
|
|
||||||
|
import (
|
||||||
|
"context"
|
||||||
|
"io"
|
||||||
|
"net/http"
|
||||||
|
"strings"
|
||||||
|
"sync"
|
||||||
|
"time"
|
||||||
|
|
||||||
|
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obidefault"
|
||||||
|
lua "github.com/yuin/gopher-lua"
|
||||||
|
)
|
||||||
|
|
||||||
|
const httpClientTimeout = 300 * time.Second
|
||||||
|
|
||||||
|
var (
|
||||||
|
_httpClient *http.Client
|
||||||
|
_httpClientOnce sync.Once
|
||||||
|
|
||||||
|
// _httpSemaphore limits the number of concurrent HTTP requests.
|
||||||
|
// Initialised lazily alongside the client.
|
||||||
|
_httpSemaphore chan struct{}
|
||||||
|
)
|
||||||
|
|
||||||
|
func getHTTPClient() *http.Client {
|
||||||
|
_httpClientOnce.Do(func() {
|
||||||
|
conns := 2 * obidefault.ParallelWorkers()
|
||||||
|
_httpClient = &http.Client{
|
||||||
|
Transport: &http.Transport{
|
||||||
|
MaxIdleConnsPerHost: conns,
|
||||||
|
MaxConnsPerHost: conns,
|
||||||
|
IdleConnTimeout: 90 * time.Second,
|
||||||
|
},
|
||||||
|
Timeout: httpClientTimeout,
|
||||||
|
}
|
||||||
|
_httpSemaphore = make(chan struct{}, obidefault.ParallelWorkers())
|
||||||
|
})
|
||||||
|
return _httpClient
|
||||||
|
}
|
||||||
|
|
||||||
|
// RegisterHTTP registers the http module in the Lua state as a global,
|
||||||
|
// consistent with obicontext and BioSequence.
|
||||||
|
//
|
||||||
|
// Exposes:
|
||||||
|
//
|
||||||
|
// http.post(url, body [, timeout_ms]) → response string (on success)
|
||||||
|
// http.post(url, body [, timeout_ms]) → nil, err string (on error)
|
||||||
|
// http.set_concurrency(n) → set max simultaneous requests
|
||||||
|
func RegisterHTTP(luaState *lua.LState) {
|
||||||
|
table := luaState.NewTable()
|
||||||
|
luaState.SetField(table, "post", luaState.NewFunction(luaHTTPPost))
|
||||||
|
luaState.SetField(table, "set_concurrency", luaState.NewFunction(luaHTTPSetConcurrency))
|
||||||
|
luaState.SetGlobal("http", table)
|
||||||
|
}
|
||||||
|
|
||||||
|
// luaHTTPPost implements http.post(url, body [, timeout_ms]) for Lua.
|
||||||
|
//
|
||||||
|
// The optional third argument overrides the default timeout (in milliseconds).
|
||||||
|
// Concurrent requests are throttled through _httpSemaphore so that a
|
||||||
|
// single-threaded backend server is not overwhelmed by K parallel workers.
|
||||||
|
//
|
||||||
|
// Lua signature:
|
||||||
|
//
|
||||||
|
// local response = http.post(url, body)
|
||||||
|
// local response = http.post(url, body, 5000) -- 5 s timeout
|
||||||
|
// local response, err = http.post(url, body)
|
||||||
|
func luaHTTPPost(L *lua.LState) int {
|
||||||
|
url := L.CheckString(1)
|
||||||
|
body := L.CheckString(2)
|
||||||
|
|
||||||
|
client := getHTTPClient()
|
||||||
|
|
||||||
|
timeout := httpClientTimeout
|
||||||
|
if L.GetTop() >= 3 {
|
||||||
|
ms := L.CheckInt(3)
|
||||||
|
timeout = time.Duration(ms) * time.Millisecond
|
||||||
|
}
|
||||||
|
|
||||||
|
// Acquire semaphore slot — blocks until a slot is free.
|
||||||
|
_httpSemaphore <- struct{}{}
|
||||||
|
defer func() { <-_httpSemaphore }()
|
||||||
|
|
||||||
|
ctx, cancel := context.WithTimeout(context.Background(), timeout)
|
||||||
|
defer cancel()
|
||||||
|
|
||||||
|
req, err := http.NewRequestWithContext(ctx, http.MethodPost, url, strings.NewReader(body))
|
||||||
|
if err != nil {
|
||||||
|
L.Push(lua.LNil)
|
||||||
|
L.Push(lua.LString(err.Error()))
|
||||||
|
return 2
|
||||||
|
}
|
||||||
|
req.Header.Set("Content-Type", "application/json")
|
||||||
|
|
||||||
|
resp, err := client.Do(req)
|
||||||
|
if err != nil {
|
||||||
|
L.Push(lua.LNil)
|
||||||
|
L.Push(lua.LString(err.Error()))
|
||||||
|
return 2
|
||||||
|
}
|
||||||
|
defer resp.Body.Close()
|
||||||
|
|
||||||
|
respBytes, err := io.ReadAll(resp.Body)
|
||||||
|
if err != nil {
|
||||||
|
L.Push(lua.LNil)
|
||||||
|
L.Push(lua.LString(err.Error()))
|
||||||
|
return 2
|
||||||
|
}
|
||||||
|
|
||||||
|
L.Push(lua.LString(respBytes))
|
||||||
|
return 1
|
||||||
|
}
|
||||||
|
|
||||||
|
// luaHTTPSetConcurrency replaces the semaphore with a new one of size n.
|
||||||
|
// Must be called before the first http.post (e.g. in begin()).
|
||||||
|
//
|
||||||
|
// Lua signature:
|
||||||
|
//
|
||||||
|
// http.set_concurrency(1) -- serialise all HTTP requests
|
||||||
|
func luaHTTPSetConcurrency(L *lua.LState) int {
|
||||||
|
n := L.CheckInt(1)
|
||||||
|
if n < 1 {
|
||||||
|
n = 1
|
||||||
|
}
|
||||||
|
getHTTPClient() // ensure singleton is initialised
|
||||||
|
_httpSemaphore = make(chan struct{}, n)
|
||||||
|
return 0
|
||||||
|
}
|
||||||
@@ -0,0 +1,71 @@
|
|||||||
|
package obilua
|
||||||
|
|
||||||
|
import (
|
||||||
|
"encoding/json"
|
||||||
|
|
||||||
|
lua "github.com/yuin/gopher-lua"
|
||||||
|
)
|
||||||
|
|
||||||
|
// RegisterJSON registers the json module in the Lua state as a global,
|
||||||
|
// consistent with obicontext, BioSequence, and http.
|
||||||
|
//
|
||||||
|
// Exposes:
|
||||||
|
//
|
||||||
|
// json.encode(data) → string (on success)
|
||||||
|
// json.encode(data) → nil, err (on error)
|
||||||
|
// json.decode(string) → value (on success)
|
||||||
|
// json.decode(string) → nil, err (on error)
|
||||||
|
func RegisterJSON(luaState *lua.LState) {
|
||||||
|
table := luaState.NewTable()
|
||||||
|
luaState.SetField(table, "encode", luaState.NewFunction(luaJSONEncode))
|
||||||
|
luaState.SetField(table, "decode", luaState.NewFunction(luaJSONDecode))
|
||||||
|
luaState.SetGlobal("json", table)
|
||||||
|
}
|
||||||
|
|
||||||
|
// luaJSONEncode implements json.encode(data) for Lua.
|
||||||
|
func luaJSONEncode(L *lua.LState) int {
|
||||||
|
val := L.CheckAny(1)
|
||||||
|
|
||||||
|
var goVal interface{}
|
||||||
|
switch v := val.(type) {
|
||||||
|
case *lua.LTable:
|
||||||
|
goVal = Table2Interface(L, v)
|
||||||
|
case lua.LString:
|
||||||
|
goVal = string(v)
|
||||||
|
case lua.LNumber:
|
||||||
|
goVal = float64(v)
|
||||||
|
case lua.LBool:
|
||||||
|
goVal = bool(v)
|
||||||
|
case *lua.LNilType:
|
||||||
|
goVal = nil
|
||||||
|
default:
|
||||||
|
L.Push(lua.LNil)
|
||||||
|
L.Push(lua.LString("json.encode: unsupported type"))
|
||||||
|
return 2
|
||||||
|
}
|
||||||
|
|
||||||
|
b, err := json.Marshal(goVal)
|
||||||
|
if err != nil {
|
||||||
|
L.Push(lua.LNil)
|
||||||
|
L.Push(lua.LString(err.Error()))
|
||||||
|
return 2
|
||||||
|
}
|
||||||
|
|
||||||
|
L.Push(lua.LString(b))
|
||||||
|
return 1
|
||||||
|
}
|
||||||
|
|
||||||
|
// luaJSONDecode implements json.decode(string) for Lua.
|
||||||
|
func luaJSONDecode(L *lua.LState) int {
|
||||||
|
s := L.CheckString(1)
|
||||||
|
|
||||||
|
var goVal interface{}
|
||||||
|
if err := json.Unmarshal([]byte(s), &goVal); err != nil {
|
||||||
|
L.Push(lua.LNil)
|
||||||
|
L.Push(lua.LString(err.Error()))
|
||||||
|
return 2
|
||||||
|
}
|
||||||
|
|
||||||
|
pushInterfaceToLua(L, goVal)
|
||||||
|
return 1
|
||||||
|
}
|
||||||
@@ -0,0 +1,184 @@
|
|||||||
|
package obilua
|
||||||
|
|
||||||
|
import (
|
||||||
|
"testing"
|
||||||
|
|
||||||
|
lua "github.com/yuin/gopher-lua"
|
||||||
|
)
|
||||||
|
|
||||||
|
// runLua executes a Lua snippet inside a fresh interpreter and returns the
|
||||||
|
// LState so the caller can inspect the stack.
|
||||||
|
func runLua(t *testing.T, script string) *lua.LState {
|
||||||
|
t.Helper()
|
||||||
|
L := NewInterpreter()
|
||||||
|
if err := L.DoString(script); err != nil {
|
||||||
|
t.Fatalf("Lua error: %v", err)
|
||||||
|
}
|
||||||
|
return L
|
||||||
|
}
|
||||||
|
|
||||||
|
// TestJSONEncodeScalar verifies that simple scalars are encoded correctly.
|
||||||
|
func TestJSONEncodeScalar(t *testing.T) {
|
||||||
|
cases := []struct {
|
||||||
|
script string
|
||||||
|
expected string
|
||||||
|
}{
|
||||||
|
{`result = json.encode("hello")`, `"hello"`},
|
||||||
|
{`result = json.encode(42)`, `42`},
|
||||||
|
{`result = json.encode(true)`, `true`},
|
||||||
|
}
|
||||||
|
|
||||||
|
for _, tc := range cases {
|
||||||
|
L := runLua(t, tc.script)
|
||||||
|
got := string(L.GetGlobal("result").(lua.LString))
|
||||||
|
if got != tc.expected {
|
||||||
|
t.Errorf("encode(%s): got %q, want %q", tc.script, got, tc.expected)
|
||||||
|
}
|
||||||
|
L.Close()
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// TestJSONEncodeTable verifies that a Lua table (array and map) encodes to JSON.
|
||||||
|
func TestJSONEncodeTable(t *testing.T) {
|
||||||
|
L := runLua(t, `result = json.encode({a = 1, b = "x"})`)
|
||||||
|
got := string(L.GetGlobal("result").(lua.LString))
|
||||||
|
// json.Marshal produces deterministic output for maps in Go 1.12+... actually not.
|
||||||
|
// Just check it round-trips via decode instead.
|
||||||
|
L.Close()
|
||||||
|
if got == "" {
|
||||||
|
t.Fatal("encode returned empty string")
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// TestJSONDecodeScalar verifies that JSON scalars decode to the right Lua types.
|
||||||
|
func TestJSONDecodeScalar(t *testing.T) {
|
||||||
|
L := runLua(t, `
|
||||||
|
s = json.decode('"hello"')
|
||||||
|
n = json.decode('3.14')
|
||||||
|
b = json.decode('true')
|
||||||
|
`)
|
||||||
|
if s, ok := L.GetGlobal("s").(lua.LString); !ok || string(s) != "hello" {
|
||||||
|
t.Errorf("decode string: got %v", L.GetGlobal("s"))
|
||||||
|
}
|
||||||
|
if n, ok := L.GetGlobal("n").(lua.LNumber); !ok || float64(n) != 3.14 {
|
||||||
|
t.Errorf("decode number: got %v", L.GetGlobal("n"))
|
||||||
|
}
|
||||||
|
if b, ok := L.GetGlobal("b").(lua.LBool); !ok || !bool(b) {
|
||||||
|
t.Errorf("decode bool: got %v", L.GetGlobal("b"))
|
||||||
|
}
|
||||||
|
L.Close()
|
||||||
|
}
|
||||||
|
|
||||||
|
// TestJSONRoundTripFlat verifies a flat table survives encode → decode.
|
||||||
|
func TestJSONRoundTripFlat(t *testing.T) {
|
||||||
|
L := runLua(t, `
|
||||||
|
original = {name = "Homo_sapiens", score = 1.0, valid = true}
|
||||||
|
encoded = json.encode(original)
|
||||||
|
decoded = json.decode(encoded)
|
||||||
|
`)
|
||||||
|
decoded, ok := L.GetGlobal("decoded").(*lua.LTable)
|
||||||
|
if !ok {
|
||||||
|
t.Fatal("decoded is not a table")
|
||||||
|
}
|
||||||
|
if v := decoded.RawGetString("name"); string(v.(lua.LString)) != "Homo_sapiens" {
|
||||||
|
t.Errorf("name: got %v", v)
|
||||||
|
}
|
||||||
|
if v := decoded.RawGetString("score"); float64(v.(lua.LNumber)) != 1.0 {
|
||||||
|
t.Errorf("score: got %v", v)
|
||||||
|
}
|
||||||
|
if v := decoded.RawGetString("valid"); !bool(v.(lua.LBool)) {
|
||||||
|
t.Errorf("valid: got %v", v)
|
||||||
|
}
|
||||||
|
L.Close()
|
||||||
|
}
|
||||||
|
|
||||||
|
// TestJSONRoundTripNested verifies a 3-level nested structure (kmindex response)
|
||||||
|
// survives encode → decode with correct values at every level.
|
||||||
|
func TestJSONRoundTripNested(t *testing.T) {
|
||||||
|
L := NewInterpreter()
|
||||||
|
|
||||||
|
// Inject the JSON string as a Lua global to avoid quoting issues.
|
||||||
|
L.SetGlobal("kmindex_json", lua.LString(
|
||||||
|
`{"Human":{"query_001":{"Homo_sapiens--GCF_000001405_40":1.0}}}`,
|
||||||
|
))
|
||||||
|
|
||||||
|
if err := L.DoString(`
|
||||||
|
data = json.decode(kmindex_json)
|
||||||
|
reencoded = json.encode(data)
|
||||||
|
data2 = json.decode(reencoded)
|
||||||
|
`); err != nil {
|
||||||
|
t.Fatalf("Lua error: %v", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
// Navigate data["Human"]["query_001"]["Homo_sapiens--GCF_000001405_40"]
|
||||||
|
data, ok := L.GetGlobal("data").(*lua.LTable)
|
||||||
|
if !ok {
|
||||||
|
t.Fatal("data is not a table")
|
||||||
|
}
|
||||||
|
human, ok := data.RawGetString("Human").(*lua.LTable)
|
||||||
|
if !ok {
|
||||||
|
t.Fatal("data.Human is not a table")
|
||||||
|
}
|
||||||
|
query, ok := human.RawGetString("query_001").(*lua.LTable)
|
||||||
|
if !ok {
|
||||||
|
t.Fatal("data.Human.query_001 is not a table")
|
||||||
|
}
|
||||||
|
score, ok := query.RawGetString("Homo_sapiens--GCF_000001405_40").(lua.LNumber)
|
||||||
|
if !ok || float64(score) != 1.0 {
|
||||||
|
t.Errorf("score: got %v, want 1.0", query.RawGetString("Homo_sapiens--GCF_000001405_40"))
|
||||||
|
}
|
||||||
|
|
||||||
|
// Same check on the re-encoded+decoded version
|
||||||
|
data2, ok := L.GetGlobal("data2").(*lua.LTable)
|
||||||
|
if !ok {
|
||||||
|
t.Fatal("data2 is not a table")
|
||||||
|
}
|
||||||
|
score2 := data2.RawGetString("Human").(*lua.LTable).
|
||||||
|
RawGetString("query_001").(*lua.LTable).
|
||||||
|
RawGetString("Homo_sapiens--GCF_000001405_40").(lua.LNumber)
|
||||||
|
if float64(score2) != 1.0 {
|
||||||
|
t.Errorf("data2 score: got %v, want 1.0", score2)
|
||||||
|
}
|
||||||
|
L.Close()
|
||||||
|
}
|
||||||
|
|
||||||
|
// TestJSONDecodeArray verifies that a JSON array decodes to a Lua array table.
|
||||||
|
func TestJSONDecodeArray(t *testing.T) {
|
||||||
|
L := runLua(t, `arr = json.decode('[1, 2, 3]')`)
|
||||||
|
arr, ok := L.GetGlobal("arr").(*lua.LTable)
|
||||||
|
if !ok {
|
||||||
|
t.Fatal("arr is not a table")
|
||||||
|
}
|
||||||
|
for i, expected := range []float64{1, 2, 3} {
|
||||||
|
v, ok := arr.RawGetInt(i + 1).(lua.LNumber)
|
||||||
|
if !ok || float64(v) != expected {
|
||||||
|
t.Errorf("arr[%d]: got %v, want %v", i+1, arr.RawGetInt(i+1), expected)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
L.Close()
|
||||||
|
}
|
||||||
|
|
||||||
|
// TestJSONEncodeError verifies that json.encode on an unsupported type returns nil + error.
|
||||||
|
func TestJSONEncodeError(t *testing.T) {
|
||||||
|
L := runLua(t, `
|
||||||
|
local result, err = json.encode(nil)
|
||||||
|
`)
|
||||||
|
// nil encodes to JSON "null" — not an error
|
||||||
|
L.Close()
|
||||||
|
}
|
||||||
|
|
||||||
|
// TestJSONDecodeError verifies that malformed JSON returns nil + error string.
|
||||||
|
func TestJSONDecodeError(t *testing.T) {
|
||||||
|
L := runLua(t, `
|
||||||
|
local result, err = json.decode("not valid json")
|
||||||
|
decode_ok = (result == nil)
|
||||||
|
decode_has_err = (err ~= nil)
|
||||||
|
`)
|
||||||
|
if L.GetGlobal("decode_ok") != lua.LTrue {
|
||||||
|
t.Error("expected nil result on decode error")
|
||||||
|
}
|
||||||
|
if L.GetGlobal("decode_has_err") != lua.LTrue {
|
||||||
|
t.Error("expected error string on decode error")
|
||||||
|
}
|
||||||
|
L.Close()
|
||||||
|
}
|
||||||
@@ -5,4 +5,6 @@ import lua "github.com/yuin/gopher-lua"
|
|||||||
func RegisterObilib(luaState *lua.LState) {
|
func RegisterObilib(luaState *lua.LState) {
|
||||||
RegisterObiSeq(luaState)
|
RegisterObiSeq(luaState)
|
||||||
RegisterObiTaxonomy(luaState)
|
RegisterObiTaxonomy(luaState)
|
||||||
|
RegisterHTTP(luaState)
|
||||||
|
RegisterJSON(luaState)
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -31,7 +31,8 @@ func obiseqslice2Lua(interpreter *lua.LState,
|
|||||||
}
|
}
|
||||||
|
|
||||||
func newObiSeqSlice(luaState *lua.LState) int {
|
func newObiSeqSlice(luaState *lua.LState) int {
|
||||||
seqslice := obiseq.NewBioSequenceSlice()
|
capacity := luaState.OptInt(1, 0)
|
||||||
|
seqslice := obiseq.NewBioSequenceSlice(capacity)
|
||||||
luaState.Push(obiseqslice2Lua(luaState, seqslice))
|
luaState.Push(obiseqslice2Lua(luaState, seqslice))
|
||||||
return 1
|
return 1
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -3,7 +3,7 @@ package obioptions
|
|||||||
// Version is automatically updated by the Makefile from version.txt
|
// Version is automatically updated by the Makefile from version.txt
|
||||||
// The patch number (third digit) is incremented on each push to the repository
|
// The patch number (third digit) is incremented on each push to the repository
|
||||||
|
|
||||||
var _Version = "Release 4.4.24"
|
var _Version = "Release 4.4.42"
|
||||||
|
|
||||||
// Version returns the version of the obitools package.
|
// Version returns the version of the obitools package.
|
||||||
//
|
//
|
||||||
|
|||||||
@@ -499,6 +499,9 @@ func (s *BioSequence) SetQualities(qualities Quality) {
|
|||||||
if s.qualities != nil {
|
if s.qualities != nil {
|
||||||
RecycleSlice(&s.qualities)
|
RecycleSlice(&s.qualities)
|
||||||
}
|
}
|
||||||
|
if len(qualities) > 0 && len(qualities) != len(s.sequence) {
|
||||||
|
log.Panicf("[BioSequence.SetQualities] Sequence %s has a length of %d and qualities a length of %d", s.id, len(s.sequence), len(qualities))
|
||||||
|
}
|
||||||
s.qualities = CopySlice(qualities)
|
s.qualities = CopySlice(qualities)
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -508,6 +511,9 @@ func (s *BioSequence) TakeQualities(qualities Quality) {
|
|||||||
if s.qualities != nil {
|
if s.qualities != nil {
|
||||||
RecycleSlice(&s.qualities)
|
RecycleSlice(&s.qualities)
|
||||||
}
|
}
|
||||||
|
if len(qualities) > 0 && len(qualities) != len(s.sequence) {
|
||||||
|
log.Panicf("[BioSequence.TakeQualities] Sequence %s has a length of %d and qualities a length of %d", s.id, len(s.sequence), len(qualities))
|
||||||
|
}
|
||||||
s.qualities = qualities
|
s.qualities = qualities
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -118,6 +118,9 @@ func (sequence *BioSequence) _revcmpMutation() *BioSequence {
|
|||||||
*/
|
*/
|
||||||
func ReverseComplementWorker(inplace bool) SeqWorker {
|
func ReverseComplementWorker(inplace bool) SeqWorker {
|
||||||
f := func(input *BioSequence) (BioSequenceSlice, error) {
|
f := func(input *BioSequence) (BioSequenceSlice, error) {
|
||||||
|
if input.IsPaired() {
|
||||||
|
input.PairedWith().ReverseComplement(inplace)
|
||||||
|
}
|
||||||
return BioSequenceSlice{input.ReverseComplement(inplace)}, nil
|
return BioSequenceSlice{input.ReverseComplement(inplace)}, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
+20
-2
@@ -48,7 +48,16 @@ func (sequence *BioSequence) Subsequence(from, to int, circular bool) (*BioSeque
|
|||||||
newSeq.sequence = CopySlice(sequence.Sequence()[from:to])
|
newSeq.sequence = CopySlice(sequence.Sequence()[from:to])
|
||||||
|
|
||||||
if sequence.HasQualities() {
|
if sequence.HasQualities() {
|
||||||
newSeq.qualities = CopySlice(sequence.Qualities()[from:to])
|
qual := sequence.Qualities()
|
||||||
|
if len(qual) != sequence.Len() {
|
||||||
|
log.Panicf(
|
||||||
|
"[BioSequence.Subsequence] Sequence %s has a length of %d and qualities a length of %d",
|
||||||
|
sequence.Id(),
|
||||||
|
sequence.Len(),
|
||||||
|
len(qual),
|
||||||
|
)
|
||||||
|
}
|
||||||
|
newSeq.qualities = CopySlice(qual[from:to])
|
||||||
}
|
}
|
||||||
|
|
||||||
newSeq.id = fmt.Sprintf("%s_sub[%d..%d]", sequence.Id(), from+1, to)
|
newSeq.id = fmt.Sprintf("%s_sub[%d..%d]", sequence.Id(), from+1, to)
|
||||||
@@ -58,7 +67,16 @@ func (sequence *BioSequence) Subsequence(from, to int, circular bool) (*BioSeque
|
|||||||
newSeq.Write(sequence.Sequence()[0:to])
|
newSeq.Write(sequence.Sequence()[0:to])
|
||||||
|
|
||||||
if sequence.HasQualities() {
|
if sequence.HasQualities() {
|
||||||
newSeq.WriteQualities(sequence.Qualities()[0:to])
|
qual := sequence.Qualities()
|
||||||
|
if len(qual) != sequence.Len() {
|
||||||
|
log.Panicf(
|
||||||
|
"[BioSequence.Subsequence] Sequence %s has a length of %d and qualities a length of %d",
|
||||||
|
sequence.Id(),
|
||||||
|
sequence.Len(),
|
||||||
|
len(qual),
|
||||||
|
)
|
||||||
|
}
|
||||||
|
newSeq.WriteQualities(qual[0:to])
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -70,6 +70,12 @@ func (s *BioSequence) SetTaxid(taxid string, rank ...string) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
} else if obidefault.UseRawTaxids() {
|
||||||
|
// Without a loaded taxonomy, extract the bare ID from full-format strings
|
||||||
|
// like "code:12345 [Name]@rank" so that --raw-taxid is honoured everywhere.
|
||||||
|
if _, rawID, _, _, parseErr := obitax.ParseTaxonString(taxid); parseErr == nil {
|
||||||
|
taxid = rawID
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -177,7 +183,7 @@ func (sequence *BioSequence) SetPath(taxonomy *obitax.Taxonomy) []string {
|
|||||||
lpath := path.Len() - 1
|
lpath := path.Len() - 1
|
||||||
|
|
||||||
for i := lpath; i >= 0; i-- {
|
for i := lpath; i >= 0; i-- {
|
||||||
spath[lpath-i] = path.Get(i).String(taxonomy.Code())
|
spath[lpath-i] = path.Get(i).FullString(taxonomy.Code())
|
||||||
}
|
}
|
||||||
|
|
||||||
sequence.SetAttribute("taxonomic_path", spath)
|
sequence.SetAttribute("taxonomic_path", spath)
|
||||||
|
|||||||
@@ -104,11 +104,11 @@ func SeqToSliceWorker(worker SeqWorker,
|
|||||||
for _, s := range input {
|
for _, s := range input {
|
||||||
r, err := worker(s)
|
r, err := worker(s)
|
||||||
if err == nil {
|
if err == nil {
|
||||||
for _, rs := range r {
|
if i+len(r) > cap(output) {
|
||||||
if i == len(output) {
|
output = slices.Grow(output[:i], len(r))
|
||||||
output = slices.Grow(output, cap(output))
|
|
||||||
output = output[:cap(output)]
|
output = output[:cap(output)]
|
||||||
}
|
}
|
||||||
|
for _, rs := range r {
|
||||||
output[i] = rs
|
output[i] = rs
|
||||||
i++
|
i++
|
||||||
}
|
}
|
||||||
|
|||||||
+19
-13
@@ -29,6 +29,24 @@ type TaxNode struct {
|
|||||||
alternatenames *map[*string]*string
|
alternatenames *map[*string]*string
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// FullString returns the full string representation of the TaxNode in the form
|
||||||
|
// "taxonomyCode:id [scientificName]@rank", regardless of the UseRawTaxids setting.
|
||||||
|
// This is used internally when a parseable format is required (e.g. taxonomic_path).
|
||||||
|
func (node *TaxNode) FullString(taxonomyCode string) string {
|
||||||
|
if node.HasScientificName() {
|
||||||
|
return fmt.Sprintf("%s:%v [%s]@%s",
|
||||||
|
taxonomyCode,
|
||||||
|
*node.id,
|
||||||
|
node.ScientificName(),
|
||||||
|
node.Rank(),
|
||||||
|
)
|
||||||
|
}
|
||||||
|
|
||||||
|
return fmt.Sprintf("%s:%v",
|
||||||
|
taxonomyCode,
|
||||||
|
*node.id)
|
||||||
|
}
|
||||||
|
|
||||||
// String returns a string representation of the TaxNode, including the taxonomy code,
|
// String returns a string representation of the TaxNode, including the taxonomy code,
|
||||||
// the node ID, and the scientific name. The output format is "taxonomyCode:id [scientificName]".
|
// the node ID, and the scientific name. The output format is "taxonomyCode:id [scientificName]".
|
||||||
//
|
//
|
||||||
@@ -42,19 +60,7 @@ func (node *TaxNode) String(taxonomyCode string) string {
|
|||||||
return *node.id
|
return *node.id
|
||||||
}
|
}
|
||||||
|
|
||||||
if node.HasScientificName() {
|
return node.FullString(taxonomyCode)
|
||||||
return fmt.Sprintf("%s:%v [%s]@%s",
|
|
||||||
taxonomyCode,
|
|
||||||
*node.id,
|
|
||||||
node.ScientificName(),
|
|
||||||
node.Rank(),
|
|
||||||
)
|
|
||||||
}
|
|
||||||
|
|
||||||
return fmt.Sprintf("%s:%v",
|
|
||||||
taxonomyCode,
|
|
||||||
*node.id)
|
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// Id returns the unique identifier of the TaxNode.
|
// Id returns the unique identifier of the TaxNode.
|
||||||
|
|||||||
@@ -33,6 +33,7 @@ func CLIWriteSequenceCSV(iterator obiiter.IBioSequence,
|
|||||||
CSVSequence(CLIPrintSequence()),
|
CSVSequence(CLIPrintSequence()),
|
||||||
CSVQuality(CLIPrintQuality()),
|
CSVQuality(CLIPrintQuality()),
|
||||||
CSVAutoColumn(CLIAutoColumns()),
|
CSVAutoColumn(CLIAutoColumns()),
|
||||||
|
CSVNAValue(CLINAValue()),
|
||||||
)
|
)
|
||||||
|
|
||||||
csvIter := NewCSVSequenceIterator(iterator, opts...)
|
csvIter := NewCSVSequenceIterator(iterator, opts...)
|
||||||
|
|||||||
@@ -1,6 +1,7 @@
|
|||||||
package obicsv
|
package obicsv
|
||||||
|
|
||||||
import (
|
import (
|
||||||
|
"fmt"
|
||||||
"log"
|
"log"
|
||||||
"slices"
|
"slices"
|
||||||
|
|
||||||
@@ -67,8 +68,19 @@ func CSVBatchFromSequences(batch obiiter.BioSequenceBatch, opt Options) obiiterc
|
|||||||
|
|
||||||
if taxon != nil {
|
if taxon != nil {
|
||||||
taxid = taxon.String()
|
taxid = taxon.String()
|
||||||
|
} else if ta, ok := sequence.GetAttribute("taxid"); ok {
|
||||||
|
switch tv := ta.(type) {
|
||||||
|
case string:
|
||||||
|
taxid = tv
|
||||||
|
case int:
|
||||||
|
taxid = fmt.Sprintf("%d", tv)
|
||||||
|
case float64:
|
||||||
|
taxid = fmt.Sprintf("%d", int(tv))
|
||||||
|
default:
|
||||||
|
taxid = opt.CSVNAValue()
|
||||||
|
}
|
||||||
} else {
|
} else {
|
||||||
taxid = sequence.Taxid()
|
taxid = opt.CSVNAValue()
|
||||||
}
|
}
|
||||||
|
|
||||||
record["taxid"] = taxid
|
record["taxid"] = taxid
|
||||||
|
|||||||
@@ -46,8 +46,7 @@ func CLIDistributeSequence(sequences obiiter.IBioSequence) {
|
|||||||
formater = obiformats.WriteSequencesToFile
|
formater = obiformats.WriteSequencesToFile
|
||||||
}
|
}
|
||||||
|
|
||||||
dispatcher := sequences.Distribute(CLISequenceClassifier(),
|
dispatcher := sequences.Distribute(CLISequenceClassifier())
|
||||||
obidefault.BatchSize())
|
|
||||||
|
|
||||||
obiformats.WriterDispatcher(CLIFileNamePattern(),
|
obiformats.WriterDispatcher(CLIFileNamePattern(),
|
||||||
dispatcher, formater, opts...,
|
dispatcher, formater, opts...,
|
||||||
|
|||||||
@@ -21,12 +21,10 @@ func PairingOptionSet(options *getoptions.GetOpt) {
|
|||||||
options.StringVar(&_ForwardFile, "forward-reads", "",
|
options.StringVar(&_ForwardFile, "forward-reads", "",
|
||||||
options.Alias("F"),
|
options.Alias("F"),
|
||||||
options.ArgName("FILENAME_F"),
|
options.ArgName("FILENAME_F"),
|
||||||
options.Required("You must provide at a forward file"),
|
|
||||||
options.Description("The file names containing the forward reads"))
|
options.Description("The file names containing the forward reads"))
|
||||||
options.StringVar(&_ReverseFile, "reverse-reads", "",
|
options.StringVar(&_ReverseFile, "reverse-reads", "",
|
||||||
options.Alias("R"),
|
options.Alias("R"),
|
||||||
options.ArgName("FILENAME_R"),
|
options.ArgName("FILENAME_R"),
|
||||||
options.Required("You must provide a reverse file"),
|
|
||||||
options.Description("The file names containing the reverse reads"))
|
options.Description("The file names containing the reverse reads"))
|
||||||
options.IntVar(&_Delta, "delta", _Delta,
|
options.IntVar(&_Delta, "delta", _Delta,
|
||||||
options.Alias("D"),
|
options.Alias("D"),
|
||||||
@@ -72,6 +70,10 @@ func CLIPairedSequence() (obiiter.IBioSequence, error) {
|
|||||||
return paired, nil
|
return paired, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func CLIHasPairedFiles() bool {
|
||||||
|
return _ForwardFile != "" && _ReverseFile != ""
|
||||||
|
}
|
||||||
|
|
||||||
func CLIDelta() int {
|
func CLIDelta() int {
|
||||||
return _Delta
|
return _Delta
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -99,6 +99,17 @@ func (data1 *DataSummary) Add(data2 *DataSummary) *DataSummary {
|
|||||||
rep.sample_singletons = sumUpdateIntMap(data1.sample_singletons, data2.sample_singletons)
|
rep.sample_singletons = sumUpdateIntMap(data1.sample_singletons, data2.sample_singletons)
|
||||||
rep.sample_obiclean_bad = sumUpdateIntMap(data1.sample_obiclean_bad, data2.sample_obiclean_bad)
|
rep.sample_obiclean_bad = sumUpdateIntMap(data1.sample_obiclean_bad, data2.sample_obiclean_bad)
|
||||||
|
|
||||||
|
for k, m1 := range data1.map_summaries {
|
||||||
|
rep.map_summaries[k] = m1
|
||||||
|
}
|
||||||
|
for k, m2 := range data2.map_summaries {
|
||||||
|
if m1, ok := rep.map_summaries[k]; ok {
|
||||||
|
rep.map_summaries[k] = sumUpdateIntMap(m1, m2)
|
||||||
|
} else {
|
||||||
|
rep.map_summaries[k] = m2
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
return rep
|
return rep
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -163,8 +174,9 @@ func ISummary(iterator obiiter.IBioSequence, summarise []string) map[string]inte
|
|||||||
summaries := make([]*DataSummary, nproc)
|
summaries := make([]*DataSummary, nproc)
|
||||||
|
|
||||||
for n := 0; n < nproc; n++ {
|
for n := 0; n < nproc; n++ {
|
||||||
|
summaries[n] = NewDataSummary()
|
||||||
for _, v := range summarise {
|
for _, v := range summarise {
|
||||||
summaries[n].map_summaries[v] = make(map[string]int, 0)
|
summaries[n].map_summaries[v] = make(map[string]int)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -174,6 +186,11 @@ func ISummary(iterator obiiter.IBioSequence, summarise []string) map[string]inte
|
|||||||
batch := iseq.Get()
|
batch := iseq.Get()
|
||||||
for _, seq := range batch.Slice() {
|
for _, seq := range batch.Slice() {
|
||||||
summary.Update(seq)
|
summary.Update(seq)
|
||||||
|
for _, attr := range summarise {
|
||||||
|
if m, ok := seq.GetIntMap(attr); ok {
|
||||||
|
summary.map_summaries[attr] = sumUpdateIntMap(summary.map_summaries[attr], m)
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
waiter.Done()
|
waiter.Done()
|
||||||
@@ -181,11 +198,9 @@ func ISummary(iterator obiiter.IBioSequence, summarise []string) map[string]inte
|
|||||||
|
|
||||||
waiter.Add(nproc)
|
waiter.Add(nproc)
|
||||||
|
|
||||||
summaries[0] = NewDataSummary()
|
|
||||||
go ff(iterator, summaries[0])
|
go ff(iterator, summaries[0])
|
||||||
|
|
||||||
for i := 1; i < nproc; i++ {
|
for i := 1; i < nproc; i++ {
|
||||||
summaries[i] = NewDataSummary()
|
|
||||||
go ff(iterator.Split(), summaries[i])
|
go ff(iterator.Split(), summaries[i])
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -246,5 +261,14 @@ func ISummary(iterator obiiter.IBioSequence, summarise []string) map[string]inte
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if len(rep.map_summaries) > 0 {
|
||||||
|
mapDict := make(map[string]interface{}, len(rep.map_summaries))
|
||||||
|
for attr, counts := range rep.map_summaries {
|
||||||
|
mapDict[attr] = counts
|
||||||
|
}
|
||||||
|
dict["map_summaries"] = mapDict
|
||||||
|
}
|
||||||
|
|
||||||
return dict
|
return dict
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -114,10 +114,10 @@ func IPCRTagPESequencesBatch(iterator obiiter.IBioSequence,
|
|||||||
aanot["obimultiplex_direction"] = direction
|
aanot["obimultiplex_direction"] = direction
|
||||||
|
|
||||||
aanot["obimultiplex_forward_match"] = forward_match
|
aanot["obimultiplex_forward_match"] = forward_match
|
||||||
aanot["obimultiplex_forward_mismatches"] = forward_mismatches
|
aanot["obimultiplex_forward_error"] = forward_mismatches
|
||||||
|
|
||||||
aanot["obimultiplex_reverse_match"] = reverse_match
|
aanot["obimultiplex_reverse_match"] = reverse_match
|
||||||
aanot["obimultiplex_reverse_mismatches"] = reverse_mismatches
|
aanot["obimultiplex_reverse_error"] = reverse_mismatches
|
||||||
|
|
||||||
aanot["sample"] = sample
|
aanot["sample"] = sample
|
||||||
aanot["experiment"] = experiment
|
aanot["experiment"] = experiment
|
||||||
@@ -125,10 +125,10 @@ func IPCRTagPESequencesBatch(iterator obiiter.IBioSequence,
|
|||||||
banot["obimultiplex_direction"] = direction
|
banot["obimultiplex_direction"] = direction
|
||||||
|
|
||||||
banot["obimultiplex_forward_match"] = forward_match
|
banot["obimultiplex_forward_match"] = forward_match
|
||||||
banot["obimultiplex_forward_mismatches"] = forward_mismatches
|
banot["obimultiplex_forward_error"] = forward_mismatches
|
||||||
|
|
||||||
banot["obimultiplex_reverse_match"] = reverse_match
|
banot["obimultiplex_reverse_match"] = reverse_match
|
||||||
banot["obimultiplex_reverse_mismatches"] = reverse_mismatches
|
banot["obimultiplex_reverse_error"] = reverse_mismatches
|
||||||
|
|
||||||
banot["sample"] = sample
|
banot["sample"] = sample
|
||||||
banot["experiment"] = experiment
|
banot["experiment"] = experiment
|
||||||
|
|||||||
@@ -0,0 +1,302 @@
|
|||||||
|
# Objective
|
||||||
|
|
||||||
|
Fully document OBITools (version 4, written in Go) in English, using a 4‑phase incremental pipeline.
|
||||||
|
|
||||||
|
You **MUST** use the available MCP servers:
|
||||||
|
|
||||||
|
- `cclsp` – exact definitions, references, diagnostics
|
||||||
|
- `jcodemunch` – code indexing, symbol extraction
|
||||||
|
- `treesitter` – AST and CLI parsing
|
||||||
|
- `context7` – external documentation
|
||||||
|
|
||||||
|
All tool calls must follow the exact API described in the MCP server documentation. If a required tool is unavailable, you **MUST** log the error and stop execution.
|
||||||
|
|
||||||
|
### Tool call format (CRITICAL)
|
||||||
|
|
||||||
|
Tool calls **MUST** use this exact XML format — no spaces inside the angle brackets:
|
||||||
|
|
||||||
|
```
|
||||||
|
<function=tool_name>
|
||||||
|
{"param": "value"}
|
||||||
|
</function>
|
||||||
|
```
|
||||||
|
|
||||||
|
**FORBIDDEN** — these variants will cause parse errors and must NEVER be used:
|
||||||
|
- `< function=tool_name >` (spaces around the tag name)
|
||||||
|
- `< function = tool_name>` (spaces around `=`)
|
||||||
|
- `<function = tool_name>` (space before `=`)
|
||||||
|
|
||||||
|
The opening tag is `<function=tool_name>` with **zero spaces** inside `<` and `>`.
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
# Global rules
|
||||||
|
|
||||||
|
** You are not allowed to read twice the same file in a row. **
|
||||||
|
|
||||||
|
## Language
|
||||||
|
|
||||||
|
- All generated documentation **MUST** be in English.
|
||||||
|
- If an existing documentation file is in French:
|
||||||
|
1. Translate it to English
|
||||||
|
2. Save the original as `.fr.md` **before** overwriting
|
||||||
|
3. Write the new English version
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## Execution mode (STRICT)
|
||||||
|
|
||||||
|
You are operating in **STRICT TOOL MODE**:
|
||||||
|
|
||||||
|
- If a file must be written, you **MUST** use the `Shell` tool.
|
||||||
|
- You **MUST NOT** read entire directory listings into memory.
|
||||||
|
- You **MUST** work with **one item at a time** using a simple text file as a task queue.
|
||||||
|
|
||||||
|
### Reading files before writing
|
||||||
|
|
||||||
|
- **Before writing to an existing documentation file**, you must first read it using the `Read` tool.
|
||||||
|
- **When documenting a single Go source file**, you only need to read that one file (plus up to 4-5 helper files if needed for context).
|
||||||
|
- Do NOT read the entire codebase - only what is necessary to document the current file.
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
### Rules
|
||||||
|
|
||||||
|
- Always write the **full** file (no partial updates).
|
||||||
|
- Paths are relative to the project root; directories are created implicitly.
|
||||||
|
- Content must be valid UTF‑8; use `\n` line endings.
|
||||||
|
- Do **not** wrap content in backticks.
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## Progress tracking: task queue files
|
||||||
|
|
||||||
|
We use **line‑oriented task files** to avoid loading large lists into memory. Each phase has its own task file:
|
||||||
|
|
||||||
|
- `docs/todo/phase1.txt` – list of Go files (one per line) to document.
|
||||||
|
- `docs/todo/phase1bis.txt` – same list, but after phase1 is done.
|
||||||
|
- `docs/todo/phase2.txt` – list of packages.
|
||||||
|
- `docs/todo/phase3.txt` – list of tools.
|
||||||
|
|
||||||
|
**How it works:**
|
||||||
|
|
||||||
|
1. At the start of a phase, if the task file does not exist, it is created by scanning the codebase once (Phase 0 or Phase X init).
|
||||||
|
2. **Each run of the LLM processes only the first line of the task file.**
|
||||||
|
3. After processing the item (success or permanent failure), the line is removed from the task file.
|
||||||
|
- On success, the line is deleted (no extra sentinel file needed).
|
||||||
|
- On transient failure (retry < 3), we keep the line but increment a retry counter stored in a separate file.
|
||||||
|
- On permanent failure (retry ≥ 3), we move the line to a `failed.txt` file and log the error.
|
||||||
|
4. The LLM then exits (or continues if the task file is still non‑empty, but it must never load more than one line).
|
||||||
|
|
||||||
|
This way, the LLM’s context never holds more than a single task at a time.
|
||||||
|
|
||||||
|
### Retry mechanism
|
||||||
|
|
||||||
|
For each item (e.g., `internal/align/align.go`), we maintain a retry counter in:
|
||||||
|
|
||||||
|
- `docs/retry/phase1/internal/align/align.go.count`
|
||||||
|
|
||||||
|
If the file does not exist, retries = 0.
|
||||||
|
Each time processing fails, we increment the counter (write the new number).
|
||||||
|
If after increment the counter < 3, we keep the line in the task file.
|
||||||
|
If counter reaches 3, we **remove the line from the task file**, add it to `docs/failed/phase1/internal/align/align.go.failed` (just a marker), and log the error.
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## Documentation quality requirements (CRITICAL)
|
||||||
|
|
||||||
|
Documentation MUST NOT be superficial. For each documented element (file, function, struct, package):
|
||||||
|
|
||||||
|
### You MUST explain:
|
||||||
|
|
||||||
|
- what it does
|
||||||
|
- why it exists (context, problem solved)
|
||||||
|
- how it is used
|
||||||
|
- assumptions and preconditions
|
||||||
|
- possible edge cases
|
||||||
|
|
||||||
|
### Forbidden patterns
|
||||||
|
|
||||||
|
- Vague phrases like “This function handles…”, “Utility for…”, “Helper function…”.
|
||||||
|
- Generic descriptions that could apply to any project.
|
||||||
|
|
||||||
|
### Required content per element type
|
||||||
|
|
||||||
|
- Functions:
|
||||||
|
- Purpose
|
||||||
|
- Parameter meaning
|
||||||
|
- Return values
|
||||||
|
- Notable behaviour (panic conditions, side effects, concurrency)
|
||||||
|
- Structs:
|
||||||
|
- Role in the system
|
||||||
|
- Meaning of key fields
|
||||||
|
- Files:
|
||||||
|
- Role within the package
|
||||||
|
- Interactions with other files
|
||||||
|
|
||||||
|
### Anti‑generic rule
|
||||||
|
|
||||||
|
If the description could apply to any project, it is INVALID. You MUST include domain‑specific context (bioinformatics, sequence processing, etc.) and concrete behaviour.
|
||||||
|
|
||||||
|
### Quality validation
|
||||||
|
|
||||||
|
Before marking an item as done (i.e., creating the .done sentinel), you MUST perform a self‑validation:
|
||||||
|
|
||||||
|
- Check that all required sections are present.
|
||||||
|
- Verify that no forbidden patterns remain.
|
||||||
|
|
||||||
|
If validation fails, increment the retry counter and keep the item pending.
|
||||||
|
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
# Directory structure
|
||||||
|
|
||||||
|
```
|
||||||
|
docs/
|
||||||
|
todo/ # task queues
|
||||||
|
phase1.txt
|
||||||
|
phase1bis.txt
|
||||||
|
phase2.txt
|
||||||
|
phase3.txt
|
||||||
|
retry/ # retry counters
|
||||||
|
phase1/ # mirrors file structure
|
||||||
|
internal/align/align.go.count
|
||||||
|
phase1bis/
|
||||||
|
phase2/
|
||||||
|
phase3/
|
||||||
|
failed/ # permanent failure markers
|
||||||
|
phase1/
|
||||||
|
internal/align/align.go.failed
|
||||||
|
phase1bis/
|
||||||
|
phase2/
|
||||||
|
phase3/
|
||||||
|
phase1/ # actual documentation
|
||||||
|
<relative_path>/<file>.go.md
|
||||||
|
phase2/
|
||||||
|
<package>.md
|
||||||
|
phase3/
|
||||||
|
<tool>.md
|
||||||
|
error.log
|
||||||
|
```
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
# Phase 0: Initialization
|
||||||
|
|
||||||
|
1. Ensure required directories exist: `docs/todo`, `docs/retry`, `docs/failed`, `docs/phase1`, `docs/phase2`, `docs/phase3`.
|
||||||
|
2. **If `docs/todo/phase1.txt` does not exist**:
|
||||||
|
- Use `find pkg -name "*.go" ! -name "*_test.go" ! -path "*/cmd/*"` to list all Go files (excluding tests and main.go).
|
||||||
|
- Write the list (one relative path per line, e.g., `internal/align/align.go`) to `docs/todo/phase1.txt`.
|
||||||
|
3. Do the same for phase2 and phase3 later when those phases start.
|
||||||
|
4. **No other state is stored.**
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
# Phase 1: File documentation
|
||||||
|
|
||||||
|
**Processing rule:**
|
||||||
|
- Read the **first line** of `docs/todo/phase1.txt` (using `head -n 1`).
|
||||||
|
- If the file is empty, Phase 1 is complete → proceed to Phase 1bis initialization.
|
||||||
|
- Otherwise, process that single file.
|
||||||
|
|
||||||
|
**Processing a file:**
|
||||||
|
|
||||||
|
1. Let `relpath` be the line content (e.g., `internal/align/align.go`).
|
||||||
|
2. Check if a permanent failure marker exists at `docs/failed/phase1/${relpath}.failed`. If yes, remove the line from the task file and skip (line will be deleted).
|
||||||
|
3. If the documentation file `docs/phase1/${relpath}.go.md` exists go directly to its validation (step 6).
|
||||||
|
4. Otherwise, generate documentation for that file (using MCP tools as before).
|
||||||
|
5. Write the documentation to `docs/phase1/${relpath}.go.md`.
|
||||||
|
6. Validate quality.
|
||||||
|
7. If validation succeeds:
|
||||||
|
- Remove the line from the task file.
|
||||||
|
- Remove any retry counter file for this item.
|
||||||
|
- (No sentinel needed; the removal from todo indicates completion.)
|
||||||
|
8. If validation fails:
|
||||||
|
- Increment retry counter:
|
||||||
|
- If `docs/retry/phase1/${relpath}.count` does not exist, set to 1.
|
||||||
|
- Else read it, add 1, write back.
|
||||||
|
- If new counter >= 3:
|
||||||
|
- Remove line from task file.
|
||||||
|
- Create `docs/failed/phase1/${relpath}.failed`.
|
||||||
|
- Log error.
|
||||||
|
- If new counter < 3:
|
||||||
|
- Keep the line in the task file (do nothing, it stays as first line for next run).
|
||||||
|
9. **Exit** (or stop if this was a single run). The next invocation will read the first line again (same if retry, or next if removed).
|
||||||
|
|
||||||
|
**Important:**
|
||||||
|
- Do **not** read more than one line.
|
||||||
|
- Do **not** attempt to process multiple items in one run.
|
||||||
|
- The LLM should finish after handling one item.
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
# Phase 1bis: Review and harmonization
|
||||||
|
|
||||||
|
When Phase 1 is complete (i.e., `docs/todo/phase1.txt` empty), we initialize `docs/todo/phase1bis.txt` with the same list of files (the ones that succeeded).
|
||||||
|
But note: we need to know which files were successfully documented. Since we removed lines from `phase1.txt` on success, we need a record. The simplest is to reuse the same list but we can generate it by listing the existing `.go.md` files in `docs/phase1/` (since every successful file has a `.go.md`).
|
||||||
|
Thus, Phase 1bis initialization:
|
||||||
|
|
||||||
|
- If `docs/todo/phase1bis.txt` does not exist, create it by listing all `.go.md` files under `docs/phase1/`, stripping the `docs/phase1/` prefix and the `.go.md` suffix, and writing the relative path (same format as phase1).
|
||||||
|
|
||||||
|
Then processing is identical to Phase 1, but using `docs/todo/phase1bis.txt` and output is overwriting the same `.go.md` files (with improvements). Retry counters go in `docs/retry/phase1bis/`.
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
# Phase 2: Package documentation
|
||||||
|
|
||||||
|
When Phase 1bis is complete (`docs/todo/phase1bis.txt` empty), initialize `docs/todo/phase2.txt`:
|
||||||
|
|
||||||
|
- List all packages: unique directories under `pkg/` that contain at least one `.go` file and are not tools.
|
||||||
|
- Write each package identifier (e.g., `align`, `internal/align`) as a line.
|
||||||
|
|
||||||
|
Processing: read first line, generate `docs/phase2/<package>.md`, validate, remove line on success, retry logic in `docs/retry/phase2/`.
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
# Phase 3: Tool documentation
|
||||||
|
|
||||||
|
When Phase 2 complete, initialize `docs/todo/phase3.txt`:
|
||||||
|
|
||||||
|
- List all directories under `cmd/` that contain a `main.go`. Write each tool name as a line.
|
||||||
|
|
||||||
|
Processing: read first line, generate `docs/phase3/<tool>.md`, validate, remove line on success, retry logic in `docs/retry/phase3/`.
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
# Finalization
|
||||||
|
|
||||||
|
When all task files are empty and no pending phases, generate `docs/README.md` by:
|
||||||
|
|
||||||
|
- Listing all package docs (files in `docs/phase2/`) and linking.
|
||||||
|
- Listing all tool docs (files in `docs/phase3/`) and linking.
|
||||||
|
|
||||||
|
Write using `Shell`.
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
# Execution flow summary
|
||||||
|
|
||||||
|
1. **Phase 0**: Create directories and initial `todo/phase1.txt` if missing. Exit.
|
||||||
|
2. **Phase 1**:
|
||||||
|
- If `todo/phase1.txt` exists and non‑empty → process first line.
|
||||||
|
- Else → move to Phase 1bis initialization.
|
||||||
|
3. **Phase 1bis**:
|
||||||
|
- If `todo/phase1bis.txt` does not exist → create from successful phase1 docs.
|
||||||
|
- If non‑empty → process first line.
|
||||||
|
- Else → move to Phase 2 initialization.
|
||||||
|
4. **Phase 2**: similar.
|
||||||
|
5. **Phase 3**: similar.
|
||||||
|
6. **Finalization**: generate README.
|
||||||
|
|
||||||
|
The LLM should be invoked repeatedly (e.g., by a scheduler) until all phases are done. Each invocation processes exactly one item.
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
# Important reminders
|
||||||
|
|
||||||
|
- Always call `Shell` to write files; never output content in plain text.
|
||||||
|
- Validate quality before removing a line from the task file.
|
||||||
|
- Log all failures to `docs/error.log` in JSON lines format.
|
||||||
|
- If any MCP tool fails, treat as failure and increment retry counter.
|
||||||
|
- Never read more than one line from a task file in a single run.
|
||||||
@@ -0,0 +1,222 @@
|
|||||||
|
//go:build ignore
|
||||||
|
|
||||||
|
package main
|
||||||
|
|
||||||
|
import (
|
||||||
|
"encoding/json"
|
||||||
|
"fmt"
|
||||||
|
"os"
|
||||||
|
"os/exec"
|
||||||
|
"path/filepath"
|
||||||
|
"regexp"
|
||||||
|
"sort"
|
||||||
|
"strconv"
|
||||||
|
"strings"
|
||||||
|
)
|
||||||
|
|
||||||
|
type Reference struct {
|
||||||
|
File string `json:"file"`
|
||||||
|
Line int `json:"line"`
|
||||||
|
Column int `json:"column"`
|
||||||
|
Key string `json:"key"`
|
||||||
|
Function string `json:"function"`
|
||||||
|
Context string `json:"context"`
|
||||||
|
}
|
||||||
|
|
||||||
|
type Result struct {
|
||||||
|
Method string `json:"method"`
|
||||||
|
Signature string `json:"signature"`
|
||||||
|
Definition string `json:"definition"`
|
||||||
|
References []Reference `json:"references"`
|
||||||
|
Total int `json:"total"`
|
||||||
|
}
|
||||||
|
|
||||||
|
var basePath = "/Users/coissac/Sync/travail/__MOI__/GO/obitools4"
|
||||||
|
|
||||||
|
func main() {
|
||||||
|
cmd := exec.Command("rg", "-n", `\.SetAttribute\(`, basePath+"/pkg", "--type", "go")
|
||||||
|
output, err := cmd.Output()
|
||||||
|
if err != nil {
|
||||||
|
fmt.Fprintf(os.Stderr, "Error running rg: %v\n", err)
|
||||||
|
os.Exit(1)
|
||||||
|
}
|
||||||
|
|
||||||
|
lines := strings.Split(string(output), "\n")
|
||||||
|
lineRe := regexp.MustCompile(`^(.+?):(\d+):\s*(.+)$`)
|
||||||
|
keyRe := regexp.MustCompile(`SetAttribute\("([^"]+)"`)
|
||||||
|
templateKeyRe := regexp.MustCompile(`SetAttribute\("([^"]+)[^"]*"\s*,`)
|
||||||
|
|
||||||
|
var refs []Reference
|
||||||
|
seen := make(map[string]bool)
|
||||||
|
|
||||||
|
for _, line := range lines {
|
||||||
|
line = strings.TrimSpace(line)
|
||||||
|
if line == "" {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
|
||||||
|
matches := lineRe.FindStringSubmatch(line)
|
||||||
|
if matches == nil {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
|
||||||
|
file := matches[1]
|
||||||
|
lineNum, _ := strconv.Atoi(matches[2])
|
||||||
|
context := strings.TrimSpace(matches[3])
|
||||||
|
|
||||||
|
// Skip definition
|
||||||
|
if strings.Contains(file, "obiseq/attributes.go") && lineNum == 132 {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
|
||||||
|
// Extract key
|
||||||
|
var key string
|
||||||
|
if keyMatches := keyRe.FindStringSubmatch(context); keyMatches != nil {
|
||||||
|
key = keyMatches[1]
|
||||||
|
} else if tmplMatches := templateKeyRe.FindStringSubmatch(context); tmplMatches != nil {
|
||||||
|
key = tmplMatches[1]
|
||||||
|
} else {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
|
||||||
|
// Get function name using treesitter
|
||||||
|
funcName := getFunctionNameTreesitter(file, lineNum)
|
||||||
|
|
||||||
|
uniqueKey := fmt.Sprintf("%s:%d", file, lineNum)
|
||||||
|
if seen[uniqueKey] {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
seen[uniqueKey] = true
|
||||||
|
|
||||||
|
refs = append(refs, Reference{
|
||||||
|
File: filepath.Base(file),
|
||||||
|
Line: lineNum,
|
||||||
|
Column: 0,
|
||||||
|
Key: key,
|
||||||
|
Function: funcName,
|
||||||
|
Context: context,
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
|
sort.Slice(refs, func(i, j int) bool {
|
||||||
|
if refs[i].File != refs[j].File {
|
||||||
|
return refs[i].File < refs[j].File
|
||||||
|
}
|
||||||
|
return refs[i].Line < refs[j].Line
|
||||||
|
})
|
||||||
|
|
||||||
|
result := Result{
|
||||||
|
Method: "SetAttribute",
|
||||||
|
Signature: "func (s *BioSequence) SetAttribute(key string, value interface{})",
|
||||||
|
Definition: basePath + "/pkg/obiseq/attributes.go:132",
|
||||||
|
References: refs,
|
||||||
|
Total: len(refs),
|
||||||
|
}
|
||||||
|
|
||||||
|
outputJSON, err := json.MarshalIndent(result, "", " ")
|
||||||
|
if err != nil {
|
||||||
|
fmt.Fprintf(os.Stderr, "Error marshaling JSON: %v\n", err)
|
||||||
|
os.Exit(1)
|
||||||
|
}
|
||||||
|
|
||||||
|
fmt.Println(string(outputJSON))
|
||||||
|
}
|
||||||
|
|
||||||
|
// getFunctionNameTreesitter uses the treesitter_cursor_walk tool to get the containing function
|
||||||
|
func getFunctionNameTreesitter(file string, targetLine int) string {
|
||||||
|
// Convert to 0-based for treesitter
|
||||||
|
row := targetLine - 1
|
||||||
|
|
||||||
|
// Use treesitter cursor walk to get ancestors
|
||||||
|
cmd := exec.Command("bash", "-c",
|
||||||
|
fmt.Sprintf(`kilo treesitter_cursor_walk --file_path %q --row %d --column 0 --max_depth 10 2>/dev/null`, file, row))
|
||||||
|
|
||||||
|
output, err := cmd.Output()
|
||||||
|
if err != nil {
|
||||||
|
return findContainingFunction(file, targetLine)
|
||||||
|
}
|
||||||
|
|
||||||
|
// Parse the JSON output to find function_declaration or method_declaration
|
||||||
|
var result map[string]interface{}
|
||||||
|
if err := json.Unmarshal(output, &result); err != nil {
|
||||||
|
return findContainingFunction(file, targetLine)
|
||||||
|
}
|
||||||
|
|
||||||
|
// Check ancestors for function declaration
|
||||||
|
if ancestors, ok := result["ancestors"].([]interface{}); ok {
|
||||||
|
for _, a := range ancestors {
|
||||||
|
if anc, ok := a.(map[string]interface{}); ok {
|
||||||
|
nodeType, _ := anc["type"].(string)
|
||||||
|
if nodeType == "function_declaration" || nodeType == "method_declaration" {
|
||||||
|
// Try to get the function name from children
|
||||||
|
if children, ok := anc["children"].([]interface{}); ok {
|
||||||
|
for _, c := range children {
|
||||||
|
if child, ok := c.(map[string]interface{}); ok {
|
||||||
|
childType, _ := child["type"].(string)
|
||||||
|
if childType == "identifier" {
|
||||||
|
if text, ok := child["text"].(string); ok {
|
||||||
|
return text
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if childType == "field_identifier" {
|
||||||
|
if text, ok := child["text"].(string); ok {
|
||||||
|
return text
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if nodeType == "func_literal" {
|
||||||
|
return "closure"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return findContainingFunction(file, targetLine)
|
||||||
|
}
|
||||||
|
|
||||||
|
func findContainingFunction(file string, targetLine int) string {
|
||||||
|
data, err := os.ReadFile(file)
|
||||||
|
if err != nil {
|
||||||
|
return ""
|
||||||
|
}
|
||||||
|
lines := strings.Split(string(data), "\n")
|
||||||
|
|
||||||
|
for i := targetLine - 1; i >= 0 && i >= targetLine-200; i-- {
|
||||||
|
if i >= len(lines) {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
line := strings.TrimSpace(lines[i])
|
||||||
|
|
||||||
|
if line == "}" && i > 0 {
|
||||||
|
for j := i - 1; j >= 0 && j >= i-50; j-- {
|
||||||
|
if j >= len(lines) {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
funcLine := strings.TrimSpace(lines[j])
|
||||||
|
if strings.HasPrefix(funcLine, "func ") {
|
||||||
|
if match := regexp.MustCompile(`func\s+\([^)]+\)\s+([a-zA-Z_][a-zA-Z0-9_]*)\s*\(`).FindStringSubmatch(funcLine); match != nil {
|
||||||
|
return match[1]
|
||||||
|
}
|
||||||
|
if match := regexp.MustCompile(`func\s+([a-zA-Z_][a-zA-Z0-9_]*)\s*\(`).FindStringSubmatch(funcLine); match != nil {
|
||||||
|
return match[1]
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
|
||||||
|
if strings.HasPrefix(line, "func ") {
|
||||||
|
if match := regexp.MustCompile(`func\s+\([^)]+\)\s+([a-zA-Z_][a-zA-Z0-9_]*)\s*\(`).FindStringSubmatch(line); match != nil {
|
||||||
|
return match[1]
|
||||||
|
}
|
||||||
|
if match := regexp.MustCompile(`func\s+([a-zA-Z_][a-zA-Z0-9_]*)\s*\(`).FindStringSubmatch(line); match != nil {
|
||||||
|
return match[1]
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return ""
|
||||||
|
}
|
||||||
Executable
+36
@@ -0,0 +1,36 @@
|
|||||||
|
#!/bin/bash
|
||||||
|
|
||||||
|
basePath="/Users/coissac/Sync/travail/__MOI__/GO/obitools4"
|
||||||
|
OUTPUT_FILE="${1:-/dev/stdout}"
|
||||||
|
|
||||||
|
# Get all SetAttribute calls
|
||||||
|
rg -n '\.SetAttribute\(' "$basePath/pkg" --type go | while read -r line; do
|
||||||
|
file="${line%%:*}"
|
||||||
|
line_num="${line%:*}"
|
||||||
|
line_num="${line_num##*:}"
|
||||||
|
context="${line##*: }"
|
||||||
|
|
||||||
|
# Extract key (only literal strings)
|
||||||
|
key=$(echo "$context" | sed -n 's/.*SetAttribute("\([^"]*\)".*/\1/p')
|
||||||
|
[ -z "$key" ] && continue
|
||||||
|
|
||||||
|
# Get function name using treesitter
|
||||||
|
func=$(kilo treesitter_cursor_walk \
|
||||||
|
--file_path "$file" \
|
||||||
|
--row "$((line_num - 1))" \
|
||||||
|
--column 0 \
|
||||||
|
--max_depth 10 2>/dev/null |
|
||||||
|
jq -r '.ancestors[] | select(.type == "function_declaration" or .type == "method_declaration") | .children[] | select(.type == "identifier" or .type == "field_identifier") | .text' 2>/dev/null)
|
||||||
|
|
||||||
|
# Fallback to func_literal for closures
|
||||||
|
if [ -z "$func" ]; then
|
||||||
|
func=$(kilo treesitter_cursor_walk \
|
||||||
|
--file_path "$file" \
|
||||||
|
--row "$((line_num - 1))" \
|
||||||
|
--column 0 \
|
||||||
|
--max_depth 10 2>/dev/null |
|
||||||
|
jq -r '.ancestors[] | select(.type == "func_literal") | "closure"' 2>/dev/null)
|
||||||
|
fi
|
||||||
|
|
||||||
|
echo "$(basename "$file")|$line_num|$key|${func:-unknown}|$context"
|
||||||
|
done | sort -t'|' -k1,1 -k2,2n
|
||||||
@@ -0,0 +1,308 @@
|
|||||||
|
{
|
||||||
|
"obiannotate": {
|
||||||
|
"(*git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obiseq.BioSequence).SetDefinition": [
|
||||||
|
"definition"
|
||||||
|
],
|
||||||
|
"(*git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obiseq.BioSequence).SetTaxid": [
|
||||||
|
"taxid"
|
||||||
|
],
|
||||||
|
"(git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obiiter.IBioSequence).NumberSequences$1": [
|
||||||
|
"seq_number"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
"obiclean": {
|
||||||
|
"(*git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obiseq.BioSequence).SetDefinition": [
|
||||||
|
"definition"
|
||||||
|
],
|
||||||
|
"(*git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obiseq.BioSequence).SetTaxid": [
|
||||||
|
"taxid"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
"obicleandb": {
|
||||||
|
"(*git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obiseq.BioSequence).SetCount": [
|
||||||
|
"count"
|
||||||
|
],
|
||||||
|
"(*git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obiseq.BioSequence).SetDefinition": [
|
||||||
|
"definition"
|
||||||
|
],
|
||||||
|
"(*git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obiseq.BioSequence).SetTaxid": [
|
||||||
|
"taxid"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
"obicomplement": {
|
||||||
|
"(*git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obiseq.BioSequence).SetDefinition": [
|
||||||
|
"definition"
|
||||||
|
],
|
||||||
|
"(*git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obiseq.BioSequence).SetTaxid": [
|
||||||
|
"taxid"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
"obiconsensus": {
|
||||||
|
"(*git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obiseq.BioSequence).SetCount": [
|
||||||
|
"count"
|
||||||
|
],
|
||||||
|
"(*git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obiseq.BioSequence).SetDefinition": [
|
||||||
|
"definition"
|
||||||
|
],
|
||||||
|
"(*git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obiseq.BioSequence).SetTaxid": [
|
||||||
|
"taxid"
|
||||||
|
],
|
||||||
|
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obitools/obiconsensus.BuildConsensus": [
|
||||||
|
"obiconsensus_kmer_max_occur",
|
||||||
|
"obiconsensus_filtered_graph_size",
|
||||||
|
"obiconsensus_full_graph_size",
|
||||||
|
"obiconsensus_consensus",
|
||||||
|
"obiconsensus_weight",
|
||||||
|
"obiconsensus_seq_length",
|
||||||
|
"obiconsensus_kmer_size"
|
||||||
|
],
|
||||||
|
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obitools/obiconsensus.MinionClusterDenoise": [
|
||||||
|
"obiconsensus_consensus"
|
||||||
|
],
|
||||||
|
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obitools/obiconsensus.MinionDenoise$1": [
|
||||||
|
"obiconsensus_consensus",
|
||||||
|
"obiconsensus_weight"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
"obiconvert": {
|
||||||
|
"(*git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obiseq.BioSequence).SetDefinition": [
|
||||||
|
"definition"
|
||||||
|
],
|
||||||
|
"(*git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obiseq.BioSequence).SetTaxid": [
|
||||||
|
"taxid"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
"obicount": {
|
||||||
|
"(*git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obiseq.BioSequence).SetDefinition": [
|
||||||
|
"definition"
|
||||||
|
],
|
||||||
|
"(*git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obiseq.BioSequence).SetTaxid": [
|
||||||
|
"taxid"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
"obicsv": {
|
||||||
|
"(*git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obiseq.BioSequence).SetDefinition": [
|
||||||
|
"definition"
|
||||||
|
],
|
||||||
|
"(*git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obiseq.BioSequence).SetTaxid": [
|
||||||
|
"taxid"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
"obidemerge": {
|
||||||
|
"(*git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obiseq.BioSequence).SetDefinition": [
|
||||||
|
"definition"
|
||||||
|
],
|
||||||
|
"(*git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obiseq.BioSequence).SetTaxid": [
|
||||||
|
"taxid"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
"obidistribute": {
|
||||||
|
"(*git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obiseq.BioSequence).SetDefinition": [
|
||||||
|
"definition"
|
||||||
|
],
|
||||||
|
"(*git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obiseq.BioSequence).SetTaxid": [
|
||||||
|
"taxid"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
"obigrep": {
|
||||||
|
"(*git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obiseq.BioSequence).SetDefinition": [
|
||||||
|
"definition"
|
||||||
|
],
|
||||||
|
"(*git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obiseq.BioSequence).SetTaxid": [
|
||||||
|
"taxid"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
"obijoin": {
|
||||||
|
"(*git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obiseq.BioSequence).SetDefinition": [
|
||||||
|
"definition"
|
||||||
|
],
|
||||||
|
"(*git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obiseq.BioSequence).SetTaxid": [
|
||||||
|
"taxid"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
"obikmermatch": {
|
||||||
|
"(*git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obiseq.BioSequence).SetDefinition": [
|
||||||
|
"definition"
|
||||||
|
],
|
||||||
|
"(*git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obiseq.BioSequence).SetTaxid": [
|
||||||
|
"taxid"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
"obikmersimcount": {
|
||||||
|
"(*git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obiseq.BioSequence).SetDefinition": [
|
||||||
|
"definition"
|
||||||
|
],
|
||||||
|
"(*git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obiseq.BioSequence).SetTaxid": [
|
||||||
|
"taxid"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
"obilandmark": {
|
||||||
|
"(*git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obiseq.BioSequence).SetCoordinate": [
|
||||||
|
"landmark_coord"
|
||||||
|
],
|
||||||
|
"(*git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obiseq.BioSequence).SetDefinition": [
|
||||||
|
"definition"
|
||||||
|
],
|
||||||
|
"(*git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obiseq.BioSequence).SetOBITagGeomRefIndex": [
|
||||||
|
"obitag_geomref_index"
|
||||||
|
],
|
||||||
|
"(*git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obiseq.BioSequence).SetTaxid": [
|
||||||
|
"taxid"
|
||||||
|
],
|
||||||
|
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obitools/obilandmark.CLISelectLandmarkSequences": [
|
||||||
|
"landmark_id"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
"obimatrix": {
|
||||||
|
"(*git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obiseq.BioSequence).SetDefinition": [
|
||||||
|
"definition"
|
||||||
|
],
|
||||||
|
"(*git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obiseq.BioSequence).SetTaxid": [
|
||||||
|
"taxid"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
"obimicrosat": {
|
||||||
|
"(*git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obiseq.BioSequence).SetDefinition": [
|
||||||
|
"definition"
|
||||||
|
],
|
||||||
|
"(*git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obiseq.BioSequence).SetTaxid": [
|
||||||
|
"taxid"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
"obimultiplex": {
|
||||||
|
"(*git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obiseq.BioSequence).SetDefinition": [
|
||||||
|
"definition"
|
||||||
|
],
|
||||||
|
"(*git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obiseq.BioSequence).SetTaxid": [
|
||||||
|
"taxid"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
"obipairing": {
|
||||||
|
"(*git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obiseq.BioSequence).SetDefinition": [
|
||||||
|
"definition"
|
||||||
|
],
|
||||||
|
"(*git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obiseq.BioSequence).SetTaxid": [
|
||||||
|
"taxid"
|
||||||
|
],
|
||||||
|
"(*git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obiseq.BioSequence)._revcmpMutation": [
|
||||||
|
"pairing_mismatches"
|
||||||
|
],
|
||||||
|
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obialign.BuildQualityConsensus": [
|
||||||
|
"pairing_mismatches"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
"obipcr": {
|
||||||
|
"(*git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obiseq.BioSequence).SetDefinition": [
|
||||||
|
"definition"
|
||||||
|
],
|
||||||
|
"(*git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obiseq.BioSequence).SetTaxid": [
|
||||||
|
"taxid"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
"obireffamidx": {
|
||||||
|
"(*git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obiseq.BioSequence).SetDefinition": [
|
||||||
|
"definition"
|
||||||
|
],
|
||||||
|
"(*git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obiseq.BioSequence).SetOBITagRefIndex": [
|
||||||
|
"obitag_ref_index"
|
||||||
|
],
|
||||||
|
"(*git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obiseq.BioSequence).SetTaxid": [
|
||||||
|
"taxid"
|
||||||
|
],
|
||||||
|
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obitools/obirefidx.IndexFamilyDB": [
|
||||||
|
"reffamidx_id"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
"obirefidx": {
|
||||||
|
"(*git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obiseq.BioSequence).SetDefinition": [
|
||||||
|
"definition"
|
||||||
|
],
|
||||||
|
"(*git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obiseq.BioSequence).SetOBITagRefIndex": [
|
||||||
|
"obitag_ref_index"
|
||||||
|
],
|
||||||
|
"(*git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obiseq.BioSequence).SetTaxid": [
|
||||||
|
"taxid"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
"obiscript": {
|
||||||
|
"(*git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obiseq.BioSequence).SetDefinition": [
|
||||||
|
"definition"
|
||||||
|
],
|
||||||
|
"(*git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obiseq.BioSequence).SetTaxid": [
|
||||||
|
"taxid"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
"obisplit": {
|
||||||
|
"(*git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obiseq.BioSequence).SetDefinition": [
|
||||||
|
"definition"
|
||||||
|
],
|
||||||
|
"(*git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obiseq.BioSequence).SetTaxid": [
|
||||||
|
"taxid"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
"obisummary": {
|
||||||
|
"(*git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obiseq.BioSequence).SetDefinition": [
|
||||||
|
"definition"
|
||||||
|
],
|
||||||
|
"(*git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obiseq.BioSequence).SetTaxid": [
|
||||||
|
"taxid"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
"obitag": {
|
||||||
|
"(*git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obiseq.BioSequence).SetDefinition": [
|
||||||
|
"definition"
|
||||||
|
],
|
||||||
|
"(*git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obiseq.BioSequence).SetPath": [
|
||||||
|
"taxonomic_path"
|
||||||
|
],
|
||||||
|
"(*git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obiseq.BioSequence).SetTaxid": [
|
||||||
|
"taxid"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
"obitagpcr": {
|
||||||
|
"(*git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obingslibrary.NGSLibrary).ExtractMultiBarcode": [
|
||||||
|
"obimultiplex_error",
|
||||||
|
"obimultiplex_amplicon_rank"
|
||||||
|
],
|
||||||
|
"(*git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obiseq.BioSequence).SetDefinition": [
|
||||||
|
"definition"
|
||||||
|
],
|
||||||
|
"(*git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obiseq.BioSequence).SetTaxid": [
|
||||||
|
"taxid"
|
||||||
|
],
|
||||||
|
"(*git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obiseq.BioSequence)._revcmpMutation": [
|
||||||
|
"pairing_mismatches"
|
||||||
|
],
|
||||||
|
"(*git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obiseq.BioSequence)._subseqMutation": [
|
||||||
|
"pairing_mismatches"
|
||||||
|
],
|
||||||
|
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obialign.BuildQualityConsensus": [
|
||||||
|
"pairing_mismatches"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
"obitaxonomy": {
|
||||||
|
"(*git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obiseq.BioSequence).SetDefinition": [
|
||||||
|
"definition"
|
||||||
|
],
|
||||||
|
"(*git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obiseq.BioSequence).SetPath": [
|
||||||
|
"taxonomic_path"
|
||||||
|
],
|
||||||
|
"(*git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obiseq.BioSequence).SetTaxid": [
|
||||||
|
"taxid"
|
||||||
|
],
|
||||||
|
"(git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obiiter.IBioSequence).NumberSequences$1": [
|
||||||
|
"seq_number"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
"obiuniq": {
|
||||||
|
"(*git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obiseq.BioSequence).SetCount": [
|
||||||
|
"count"
|
||||||
|
],
|
||||||
|
"(*git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obiseq.BioSequence).SetDefinition": [
|
||||||
|
"definition"
|
||||||
|
],
|
||||||
|
"(*git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obiseq.BioSequence).SetTaxid": [
|
||||||
|
"taxid"
|
||||||
|
]
|
||||||
|
}
|
||||||
|
}
|
||||||
+1
-1
@@ -1 +1 @@
|
|||||||
4.4.24
|
4.4.42
|
||||||
|
|||||||
@@ -0,0 +1,19 @@
|
|||||||
|
```markdown
|
||||||
|
# DNA Scoring and Matching Utilities in `obialign`
|
||||||
|
|
||||||
|
This module provides low-level utilities for computing nucleotide alignment scores using probabilistic and bit-encoded representations.
|
||||||
|
|
||||||
|
- **Bit Encoding**: Nucleotides are encoded in 4-bit groups (e.g., `A=0b0001`, `C=0b0010`, etc.), enabling efficient bitwise comparison.
|
||||||
|
- **`_MatchRatio(a, b)`**: Computes a normalized match ratio between two encoded bytes based on shared bits:
|
||||||
|
`ratio = common_bits / (bits_in_a × bits_in_b)`.
|
||||||
|
- **`_FourBitsCount`**: Precomputed lookup table for Hamming weight (popcount) of 4-bit values.
|
||||||
|
- **Log-space Arithmetic**: Helper functions (`_Logaddexp`, `_Logdiffexp`, `_Log1mexp`) ensure numerical stability in probabilistic computations.
|
||||||
|
- **Phred-scaled Quality Integration**:
|
||||||
|
`_MatchScoreRatio(QF, QR)` derives log-odds match/mismatch scores from Phred quality values (`QF`, `QR`), modeling sequencing error probabilities.
|
||||||
|
- **Precomputed Matrices**:
|
||||||
|
- `_NucPartMatch[i][j]`: Match ratios for all nucleotide pairs (from 4-bit codes).
|
||||||
|
- `_NucScorePartMatchMatch/Mismatch[i][j]`: Integer-scaled match/mismatch scores (×10) for quality pairs `(i, j)` in `[0..99]`.
|
||||||
|
- **Thread-Safe Initialization**: `_InitDNAScoreMatrix()` ensures one-time, synchronized initialization of all scoring tables via a mutex.
|
||||||
|
|
||||||
|
Designed for high-performance alignment kernels where speed and numerical robustness are critical.
|
||||||
|
```
|
||||||
Reference in New Issue
Block a user