Push zkptpswyxnvt #43

Merged
coissac merged 3 commits from push-zkptpswyxnvt into main 2026-06-22 16:30:00 +00:00
7 changed files with 541 additions and 2 deletions
Showing only changes of commit 469e53b6f5 - Show all commits
+2
View File
@@ -17,5 +17,7 @@ benchmark/global_index_presence
benchmark/global_index_count benchmark/global_index_count
benchmark/stats benchmark/stats
benchmark/reference_index benchmark/reference_index
benchmark/reference_dist
benchmark/obikmer_dist
benchmark/specific_index_count benchmark/specific_index_count
benchmark/specific_index_presence benchmark/specific_index_presence
+88 -2
View File
@@ -10,6 +10,23 @@ GENOMES := $(wildcard genomes/*.fna.gz)
-include deps.mk -include deps.mk
REF_NPZS := $(SPECIMENS:%=reference_index/%.npz) REF_NPZS := $(SPECIMENS:%=reference_index/%.npz)
REF_DIST_CSVS := $(addprefix reference_dist/, \
shared_kmers.csv hamming_dist.csv jaccard_dist.csv \
bray_curtis_dist.csv relfreq_bray_curtis_dist.csv \
euclidean_dist.csv relfreq_euclidean_dist.csv \
hellinger_dist.csv hellinger_euclidean_dist.csv)
OBIKMER_PRESENCE_DIST := $(addprefix obikmer_dist/presence/, \
jaccard_dist.csv jaccard_shared.csv jaccard_nj.nwk \
hamming_dist.csv hamming_nj.nwk)
OBIKMER_COUNT_DIST := $(addprefix obikmer_dist/count/, \
jaccard_dist.csv jaccard_shared.csv jaccard_nj.nwk \
bray_curtis_dist.csv bray_curtis_nj.nwk \
relfreq_bray_curtis_dist.csv relfreq_bray_curtis_nj.nwk \
euclidean_dist.csv euclidean_nj.nwk \
relfreq_euclidean_dist.csv relfreq_euclidean_nj.nwk \
hellinger_dist.csv hellinger_nj.nwk \
hellinger_euclidean_dist.csv hellinger_euclidean_nj.nwk)
DIST_COMPARISON := stats/dist_comparison/summary.csv
PRESENCE_DONE := $(SPECIMENS:%=specimen_index_presence/%/index.done) PRESENCE_DONE := $(SPECIMENS:%=specimen_index_presence/%/index.done)
PRESENCE_STATS := $(SPECIMENS:%=stats/indexing_presence/%.stats) PRESENCE_STATS := $(SPECIMENS:%=stats/indexing_presence/%.stats)
COUNT_DONE := $(SPECIMENS:%=specimen_index_count/%/index.done) COUNT_DONE := $(SPECIMENS:%=specimen_index_count/%/index.done)
@@ -24,7 +41,9 @@ SIMULATED_READS := $(foreach s,$(SPECIMENS),simulated_data/$(subst --,/,$s)/read
.NOTPARALLEL: .NOTPARALLEL:
.PHONY: all simulate reference \ .PHONY: all simulate reference reference_dist \
obikmer_dist obikmer_dist_presence obikmer_dist_count \
dist_comparison \
index_presence index_count \ index_presence index_count \
aggregate_index_presence aggregate_index_count \ aggregate_index_presence aggregate_index_count \
merge_presence merge_count \ merge_presence merge_count \
@@ -39,7 +58,8 @@ verify_merge_count: stats/verify_merge_count/current.csv
all: aggregate_verify_presence aggregate_verify_count \ all: aggregate_verify_presence aggregate_verify_count \
verify_merge_presence verify_merge_count \ verify_merge_presence verify_merge_count \
aggregate_filter_presence aggregate_filter_count aggregate_filter_presence aggregate_filter_count \
dist_comparison
# ── dependency file ─────────────────────────────────────────────────────────── # ── dependency file ───────────────────────────────────────────────────────────
@@ -62,6 +82,72 @@ reference_index/%.npz:
reference: $(REF_NPZS) reference: $(REF_NPZS)
# ── reference distance matrices ───────────────────────────────────────────────
$(REF_DIST_CSVS) &: $(REF_NPZS) build_reference_dist.py
$(VENV_PY) build_reference_dist.py
reference_dist: $(REF_DIST_CSVS)
# ── obikmer distance (presence index) ────────────────────────────────────────
$(OBIKMER_PRESENCE_DIST) &: global_index_presence/index.done $(BINARY)
mkdir -p obikmer_dist/presence
$(BINARY) distance \
--output obikmer_dist/presence/jaccard \
--metric jaccard --shared-kmers --nj \
global_index_presence
$(BINARY) distance \
--output obikmer_dist/presence/hamming \
--metric hamming --nj \
global_index_presence
obikmer_dist_presence: $(OBIKMER_PRESENCE_DIST)
# ── obikmer distance (count index) ───────────────────────────────────────────
$(OBIKMER_COUNT_DIST) &: global_index_count/index.done $(BINARY)
mkdir -p obikmer_dist/count
$(BINARY) distance \
--output obikmer_dist/count/jaccard \
--metric jaccard --shared-kmers --nj \
global_index_count
$(BINARY) distance \
--output obikmer_dist/count/bray_curtis \
--metric bray-curtis --nj \
global_index_count
$(BINARY) distance \
--output obikmer_dist/count/relfreq_bray_curtis \
--metric relfreq-bray-curtis --nj \
global_index_count
$(BINARY) distance \
--output obikmer_dist/count/euclidean \
--metric euclidean --nj \
global_index_count
$(BINARY) distance \
--output obikmer_dist/count/relfreq_euclidean \
--metric relfreq-euclidean --nj \
global_index_count
$(BINARY) distance \
--output obikmer_dist/count/hellinger \
--metric hellinger --nj \
global_index_count
$(BINARY) distance \
--output obikmer_dist/count/hellinger_euclidean \
--metric hellinger-euclidean --nj \
global_index_count
obikmer_dist_count: $(OBIKMER_COUNT_DIST)
obikmer_dist: obikmer_dist_presence obikmer_dist_count
# ── distance comparison ───────────────────────────────────────────────────────
$(DIST_COMPARISON): $(REF_DIST_CSVS) $(OBIKMER_PRESENCE_DIST) $(OBIKMER_COUNT_DIST) compare_all_dist.py
$(VENV_PY) compare_all_dist.py --out $(DIST_COMPARISON)
dist_comparison: $(DIST_COMPARISON)
# ── per-specimen indexing ───────────────────────────────────────────────────── # ── per-specimen indexing ─────────────────────────────────────────────────────
# Prerequisites (reads → index.done + .stats) are in deps.mk. # Prerequisites (reads → index.done + .stats) are in deps.mk.
+226
View File
@@ -0,0 +1,226 @@
#!/usr/bin/env python3
"""Compute reference pairwise distance matrices from per-specimen .npz kmer indexes.
Reads all .npz files in reference_index/ (each containing sorted uint64 `kmers`
and uint32 `counts`), computes all distance metrics supported by `obikmer distance`,
and writes one CSV per metric to reference_dist/.
Output CSV format matches `obikmer distance --output`:
- first row: "genome", then specimen names
- subsequent rows: specimen name, then float or int values
Metrics written
jaccard_dist.csv Jaccard distance (presence/absence)
shared_kmers.csv Shared-kmer count matrix (intersection size)
bray_curtis_dist.csv Bray-Curtis dissimilarity (raw counts)
relfreq_bray_curtis_dist.csv Bray-Curtis on relative frequencies
euclidean_dist.csv Euclidean distance (raw counts)
relfreq_euclidean_dist.csv Euclidean distance on relative frequencies
hellinger_dist.csv Hellinger distance
hellinger_euclidean_dist.csv Euclidean distance in Hellinger space
"""
import argparse
import sys
from pathlib import Path
import numpy as np
# ── pairwise helpers ──────────────────────────────────────────────────────────
def shared_indices(a_kmers: np.ndarray, b_kmers: np.ndarray):
"""Return index arrays (idx_a, idx_b) for kmers present in both sets.
Both arrays must be sorted uint64. Uses searchsorted: O(|B| log |A|).
"""
pos = np.searchsorted(a_kmers, b_kmers)
pos = np.clip(pos, 0, len(a_kmers) - 1)
mask = a_kmers[pos] == b_kmers
idx_b = np.where(mask)[0]
idx_a = pos[idx_b]
return idx_a, idx_b
def pairwise_stats(specimens: list[dict]) -> dict[str, np.ndarray]:
"""Compute all pairwise distance matrices at once.
Returns a dict metric_name → ndarray (n×n float64 or int64).
Each specimen dict has keys: name, kmers, counts.
"""
n = len(specimens)
# Pre-compute per-specimen scalars
kmer_counts = np.array([len(s['kmers']) for s in specimens], dtype=np.uint64)
count_sums = np.array([s['counts'].sum() for s in specimens], dtype=np.uint64)
# Per-specimen sum-of-squares (for Euclidean decomposition)
sq_sums = np.array([(s['counts'].astype(np.float64) ** 2).sum() for s in specimens])
# Allocate output matrices
shared_mat = np.zeros((n, n), dtype=np.uint64)
hamming_mat = np.zeros((n, n), dtype=np.float64)
jaccard_mat = np.zeros((n, n), dtype=np.float64)
bray_mat = np.zeros((n, n), dtype=np.float64)
relfreq_bray = np.zeros((n, n), dtype=np.float64)
euclidean_mat = np.zeros((n, n), dtype=np.float64)
relfreq_eucl = np.zeros((n, n), dtype=np.float64)
hellinger_mat = np.zeros((n, n), dtype=np.float64)
hell_eucl_mat = np.zeros((n, n), dtype=np.float64)
for i in range(n):
a_km = specimens[i]['kmers']
a_ct = specimens[i]['counts'].astype(np.float64)
sa = float(count_sums[i])
na = int(kmer_counts[i])
for j in range(i + 1, n):
b_km = specimens[j]['kmers']
b_ct = specimens[j]['counts'].astype(np.float64)
sb = float(count_sums[j])
nb = int(kmer_counts[j])
idx_a, idx_b = shared_indices(a_km, b_km)
inter = len(idx_a)
ca_sh = a_ct[idx_a]
cb_sh = b_ct[idx_b]
# ── Presence metrics ──────────────────────────────────────────────
union = na + nb - inter
jac = (1.0 - inter / union) if union else 0.0
hamming = float(na + nb - 2 * inter) # |A Δ B|
# ── Count metrics ─────────────────────────────────────────────────
# Bray-Curtis: 1 - 2*Σmin(a,b) / (Σa + Σb)
sum_min = np.minimum(ca_sh, cb_sh).sum()
denom_bc = sa + sb
bc = (1.0 - 2.0 * sum_min / denom_bc) if denom_bc else 0.0
# RelfreqBray: 1 - Σmin(a/sa, b/sb) [only shared contribute]
if sa and sb:
rfb = 1.0 - np.minimum(ca_sh / sa, cb_sh / sb).sum()
else:
rfb = 0.0
# Euclidean: √(Σa² + Σb² - 2·Σ(a·b)_shared)
cross = (ca_sh * cb_sh).sum()
eucl_partial = sq_sums[i] + sq_sums[j] - 2.0 * cross
eucl = np.sqrt(max(eucl_partial, 0.0))
# RelfreqEuclidean: √(Σ(a/sa - b/sb)²)
# = √(Σa²/sa² + Σb²/sb² - 2·Σ(a·b)_shared/(sa·sb))
if sa and sb:
rf_cross = (ca_sh / sa * (cb_sh / sb)).sum()
rfe_partial = (sq_sums[i] / sa**2
+ sq_sums[j] / sb**2
- 2.0 * rf_cross)
rfe = np.sqrt(max(rfe_partial, 0.0))
else:
rfe = 0.0
# Hellinger partial: Σ(√(a/sa) - √(b/sb))² over global universe
# = 2 - 2·Σ√(a·b)_shared / √(sa·sb)
if sa and sb:
bc_coeff = np.sqrt(ca_sh * cb_sh).sum() / np.sqrt(sa * sb)
hell_partial = max(2.0 - 2.0 * bc_coeff, 0.0)
else:
hell_partial = 0.0
sq2 = np.sqrt(2.0)
hell = np.sqrt(hell_partial) / sq2
hell_euc = np.sqrt(hell_partial)
# ── Fill symmetric matrices ───────────────────────────────────────
for mat, val in [
(shared_mat, inter),
(hamming_mat, hamming),
(jaccard_mat, jac),
(bray_mat, bc),
(relfreq_bray, rfb),
(euclidean_mat, eucl),
(relfreq_eucl, rfe),
(hellinger_mat, hell),
(hell_eucl_mat, hell_euc),
]:
mat[i, j] = val
mat[j, i] = val
return {
'shared_kmers': shared_mat,
'hamming_dist': hamming_mat,
'jaccard_dist': jaccard_mat,
'bray_curtis_dist': bray_mat,
'relfreq_bray_curtis_dist': relfreq_bray,
'euclidean_dist': euclidean_mat,
'relfreq_euclidean_dist': relfreq_eucl,
'hellinger_dist': hellinger_mat,
'hellinger_euclidean_dist': hell_eucl_mat,
}
# ── I/O ───────────────────────────────────────────────────────────────────────
def write_csv(path: Path, labels: list[str], mat: np.ndarray, fmt: str) -> None:
with path.open('w') as fh:
fh.write('genome,' + ','.join(labels) + '\n')
for i, label in enumerate(labels):
row = ','.join(format(mat[i, j], fmt) for j in range(len(labels)))
fh.write(f'{label},{row}\n')
print(f'{path}', file=sys.stderr)
# ── main ─────────────────────────────────────────────────────────────────────
def main() -> None:
ap = argparse.ArgumentParser(description=__doc__,
formatter_class=argparse.RawDescriptionHelpFormatter)
ap.add_argument('--ref-dir', default='reference_index',
help='Directory with per-specimen .npz files (default: reference_index)')
ap.add_argument('--out-dir', default='reference_dist',
help='Output directory for CSV files (default: reference_dist)')
args = ap.parse_args()
ref_dir = Path(args.ref_dir)
out_dir = Path(args.out_dir)
out_dir.mkdir(exist_ok=True)
npz_files = sorted(ref_dir.glob('*.npz'))
if not npz_files:
print(f'ERROR: no .npz files found in {ref_dir}', file=sys.stderr)
sys.exit(1)
print(f'Loading {len(npz_files)} specimen(s) from {ref_dir}/', file=sys.stderr)
specimens = []
for f in npz_files:
data = np.load(f)
specimens.append({
'name': f.stem,
'kmers': data['kmers'],
'counts': data['counts'],
})
print(f' {f.stem}: {len(data["kmers"]):,} kmers', file=sys.stderr)
labels = [s['name'] for s in specimens]
n = len(labels)
print(f'\nComputing pairwise distances for {n} specimens…', file=sys.stderr)
matrices = pairwise_stats(specimens)
print(f'\nWriting CSVs to {out_dir}/', file=sys.stderr)
write_csv(out_dir / 'shared_kmers.csv', labels, matrices['shared_kmers'], 'd')
write_csv(out_dir / 'hamming_dist.csv', labels, matrices['hamming_dist'], '.6f')
write_csv(out_dir / 'jaccard_dist.csv', labels, matrices['jaccard_dist'], '.6f')
write_csv(out_dir / 'bray_curtis_dist.csv', labels, matrices['bray_curtis_dist'], '.6f')
write_csv(out_dir / 'relfreq_bray_curtis_dist.csv', labels, matrices['relfreq_bray_curtis_dist'], '.6f')
write_csv(out_dir / 'euclidean_dist.csv', labels, matrices['euclidean_dist'], '.6f')
write_csv(out_dir / 'relfreq_euclidean_dist.csv', labels, matrices['relfreq_euclidean_dist'], '.6f')
write_csv(out_dir / 'hellinger_dist.csv', labels, matrices['hellinger_dist'], '.6f')
write_csv(out_dir / 'hellinger_euclidean_dist.csv', labels, matrices['hellinger_euclidean_dist'], '.6f')
print('\nDone.', file=sys.stderr)
if __name__ == '__main__':
main()
+182
View File
@@ -0,0 +1,182 @@
#!/usr/bin/env python3
"""Compare all reference distance matrices against obikmer distance outputs.
Reads from:
reference_dist/ — ground-truth matrices computed by build_reference_dist.py
obikmer_dist/ — matrices produced by `obikmer distance`
Handles label reordering: both matrices are sorted by genome label before
element-wise comparison, so column/row order differences are irrelevant.
Output: stats/dist_comparison/summary.csv
comparison,max_abs,mean_abs,rmse,n_pairs,status
"""
import csv
import sys
from pathlib import Path
import numpy as np
# ── CSV loading ───────────────────────────────────────────────────────────────
def load_matrix(path: Path) -> tuple[list[str], np.ndarray]:
"""Load a distance-matrix CSV; return (sorted_labels, matrix_float64)."""
with path.open() as fh:
reader = csv.reader(fh)
header = next(reader)[1:] # skip 'genome' column
raw: dict[str, list[float]] = {}
for row in reader:
raw[row[0]] = [float(x) for x in row[1:]]
label_to_col = {h: i for i, h in enumerate(header)}
labels = sorted(raw.keys())
n = len(labels)
mat = np.zeros((n, n), dtype=np.float64)
for i, ri in enumerate(labels):
for j, cj in enumerate(labels):
mat[i, j] = raw[ri][label_to_col[cj]]
return labels, mat
# ── comparison ────────────────────────────────────────────────────────────────
def compare(label: str,
ref_path: Path,
obi_path: Path,
tol: float = 1e-4) -> dict:
if not ref_path.exists():
return {'comparison': label, 'status': 'REF_MISSING',
'max_abs': '', 'mean_abs': '', 'rmse': '', 'n_pairs': ''}
if not obi_path.exists():
return {'comparison': label, 'status': 'OBI_MISSING',
'max_abs': '', 'mean_abs': '', 'rmse': '', 'n_pairs': ''}
ref_labels, ref_mat = load_matrix(ref_path)
obi_labels, obi_mat = load_matrix(obi_path)
if ref_labels != obi_labels:
only_ref = sorted(set(ref_labels) - set(obi_labels))
only_obi = sorted(set(obi_labels) - set(ref_labels))
print(f' [{label}] label mismatch — '
f'only_ref={only_ref} only_obi={only_obi}', file=sys.stderr)
return {'comparison': label, 'status': 'LABEL_MISMATCH',
'max_abs': '', 'mean_abs': '', 'rmse': '', 'n_pairs': ''}
n = len(ref_labels)
# Off-diagonal mask
mask = ~np.eye(n, dtype=bool)
diff = np.abs(ref_mat[mask] - obi_mat[mask])
n_pairs = diff.size
max_abs = float(diff.max())
mean_abs = float(diff.mean())
rmse = float(np.sqrt((diff ** 2).mean()))
status = 'PASS' if max_abs <= tol else 'FAIL'
print(f' [{label}] n={n_pairs} '
f'max={max_abs:.3e} mean={mean_abs:.3e} rmse={rmse:.3e} {status}',
file=sys.stderr)
return {
'comparison': label,
'max_abs': f'{max_abs:.6e}',
'mean_abs': f'{mean_abs:.6e}',
'rmse': f'{rmse:.6e}',
'n_pairs': str(n_pairs),
'status': status,
}
# ── comparison table ──────────────────────────────────────────────────────────
# (label, ref_csv, obikmer_csv)
# The reference jaccard/shared is presence-based, which should match both
# presence/jaccard and count/jaccard (threshold=1).
COMPARISONS = [
# ── presence index ────────────────────────────────────────────────────────
('presence/jaccard_dist',
'reference_dist/jaccard_dist.csv',
'obikmer_dist/presence/jaccard_dist.csv'),
('presence/jaccard_shared',
'reference_dist/shared_kmers.csv',
'obikmer_dist/presence/jaccard_shared.csv'),
('presence/hamming_dist',
'reference_dist/hamming_dist.csv',
'obikmer_dist/presence/hamming_dist.csv'),
# ── count index (jaccard cross-check) ─────────────────────────────────────
('count/jaccard_dist',
'reference_dist/jaccard_dist.csv',
'obikmer_dist/count/jaccard_dist.csv'),
('count/jaccard_shared',
'reference_dist/shared_kmers.csv',
'obikmer_dist/count/jaccard_shared.csv'),
# ── count index (count-based metrics) ────────────────────────────────────
('count/bray_curtis_dist',
'reference_dist/bray_curtis_dist.csv',
'obikmer_dist/count/bray_curtis_dist.csv'),
('count/relfreq_bray_curtis_dist',
'reference_dist/relfreq_bray_curtis_dist.csv',
'obikmer_dist/count/relfreq_bray_curtis_dist.csv'),
('count/euclidean_dist',
'reference_dist/euclidean_dist.csv',
'obikmer_dist/count/euclidean_dist.csv'),
('count/relfreq_euclidean_dist',
'reference_dist/relfreq_euclidean_dist.csv',
'obikmer_dist/count/relfreq_euclidean_dist.csv'),
('count/hellinger_dist',
'reference_dist/hellinger_dist.csv',
'obikmer_dist/count/hellinger_dist.csv'),
('count/hellinger_euclidean_dist',
'reference_dist/hellinger_euclidean_dist.csv',
'obikmer_dist/count/hellinger_euclidean_dist.csv'),
]
# ── main ─────────────────────────────────────────────────────────────────────
def main() -> None:
import argparse
ap = argparse.ArgumentParser(description=__doc__,
formatter_class=argparse.RawDescriptionHelpFormatter)
ap.add_argument('--tol', type=float, default=1e-4,
help='Max abs diff threshold for PASS/FAIL (default 1e-4)')
ap.add_argument('--out', default='stats/dist_comparison/summary.csv',
help='Output summary CSV path')
args = ap.parse_args()
out_path = Path(args.out)
out_path.parent.mkdir(parents=True, exist_ok=True)
print(f'Comparing {len(COMPARISONS)} matrix pairs…', file=sys.stderr)
rows = []
for label, ref, obi in COMPARISONS:
rows.append(compare(label, Path(ref), Path(obi), tol=args.tol))
fields = ['comparison', 'max_abs', 'mean_abs', 'rmse', 'n_pairs', 'status']
with out_path.open('w', newline='') as fh:
w = csv.DictWriter(fh, fieldnames=fields)
w.writeheader()
w.writerows(rows)
print(f'\n{out_path}', file=sys.stderr)
n_fail = sum(1 for r in rows if r.get('status') == 'FAIL')
n_pass = sum(1 for r in rows if r.get('status') == 'PASS')
print(f'Summary: {n_pass} PASS {n_fail} FAIL '
f'{len(rows) - n_pass - n_fail} SKIP', file=sys.stderr)
if n_fail:
sys.exit(1)
if __name__ == '__main__':
main()
+21
View File
@@ -0,0 +1,21 @@
genome,Candidozyma_auris--GCF_003013715.1_ASM301371v2,Acidobacterium_capsulatum--ATCC_51196,Bacillus_subtilis--168,Escherichia_coli--CFT073,Escherichia_coli--EDL933,Escherichia_coli--K-12_MG1655,Escherichia_coli--K-12_W3110,Klebsiella_pneumoniae--ATCC_13883,Klebsiella_pneumoniae--HS11286,Klebsiella_pneumoniae--MGH_78578,Opitutus_terrae--PB90-1,Proteus_mirabilis--HI4320,Saccharolobus_islandicus--M.16.4,Salmonella_enterica--AKU_12601,Salmonella_enterica--CT18,Salmonella_enterica--LT2,Salmonella_enterica--P125109,Shouchella_clausii--KSM-K16,Wolbachia_endosymbiont--GCF_000306885.1_ASM30688v1,Yersinia_ruckeri--YRB
Candidozyma_auris--GCF_003013715.1_ASM301371v2,0.000000,1.000000,1.000000,1.000000,1.000000,1.000000,1.000000,1.000000,1.000000,1.000000,1.000000,1.000000,1.000000,1.000000,1.000000,1.000000,1.000000,1.000000,1.000000,1.000000
Acidobacterium_capsulatum--ATCC_51196,1.000000,0.000000,0.999981,0.999990,0.999989,0.999987,0.999987,0.999990,0.999988,0.999988,0.999994,0.999989,1.000000,0.999988,0.999987,0.999987,0.999988,0.999989,0.999991,0.999987
Bacillus_subtilis--168,1.000000,0.999981,0.000000,0.999990,0.999989,0.999989,0.999989,0.999989,0.999988,0.999986,0.999995,0.999985,0.999999,0.999988,0.999987,0.999989,0.999988,0.999778,0.999993,0.999987
Escherichia_coli--CFT073,1.000000,0.999990,0.999990,0.000000,0.825741,0.807495,0.807218,0.991156,0.996855,0.997849,0.999996,0.999633,1.000000,0.993885,0.996736,0.994148,0.993821,0.999991,0.999984,0.999291
Escherichia_coli--EDL933,1.000000,0.999989,0.999989,0.825741,0.000000,0.735107,0.734775,0.996126,0.998058,0.997908,0.999997,0.999640,1.000000,0.993993,0.997126,0.994390,0.994059,0.999991,0.999986,0.999292
Escherichia_coli--K-12_MG1655,1.000000,0.999987,0.999989,0.807495,0.735107,0.000000,0.382567,0.996190,0.997747,0.997455,0.999996,0.999604,1.000000,0.993444,0.996645,0.993773,0.993431,0.999989,0.999984,0.999174
Escherichia_coli--K-12_W3110,1.000000,0.999987,0.999989,0.807218,0.734775,0.382567,0.000000,0.996220,0.997761,0.997467,0.999995,0.999604,1.000000,0.993445,0.996669,0.993769,0.993443,0.999990,0.999985,0.999165
Klebsiella_pneumoniae--ATCC_13883,1.000000,0.999990,0.999989,0.991156,0.996126,0.996190,0.996220,0.000000,0.845220,0.840545,0.999997,0.999648,1.000000,0.996177,0.998128,0.996268,0.996052,0.999990,0.999987,0.999325
Klebsiella_pneumoniae--HS11286,1.000000,0.999988,0.999988,0.996855,0.998058,0.997747,0.997761,0.845220,0.000000,0.906475,0.999996,0.999683,1.000000,0.997724,0.995697,0.997776,0.997769,0.999989,0.999979,0.999463
Klebsiella_pneumoniae--MGH_78578,1.000000,0.999988,0.999986,0.997849,0.997908,0.997455,0.997467,0.840545,0.906475,0.000000,0.999996,0.999704,1.000000,0.997928,0.995054,0.997844,0.997868,0.999990,0.999980,0.999479
Opitutus_terrae--PB90-1,1.000000,0.999994,0.999995,0.999996,0.999997,0.999996,0.999995,0.999997,0.999996,0.999996,0.000000,0.999997,0.999998,0.999996,0.999996,0.999996,0.999995,0.999997,0.999993,0.999996
Proteus_mirabilis--HI4320,1.000000,0.999989,0.999985,0.999633,0.999640,0.999604,0.999604,0.999648,0.999683,0.999704,0.999997,0.000000,1.000000,0.999604,0.999699,0.999622,0.999613,0.999987,0.999983,0.999505
Saccharolobus_islandicus--M.16.4,1.000000,1.000000,0.999999,1.000000,1.000000,1.000000,1.000000,1.000000,1.000000,1.000000,0.999998,1.000000,0.000000,1.000000,1.000000,1.000000,1.000000,1.000000,1.000000,1.000000
Salmonella_enterica--AKU_12601,1.000000,0.999988,0.999988,0.993885,0.993993,0.993444,0.993445,0.996177,0.997724,0.997928,0.999996,0.999604,1.000000,0.000000,0.869238,0.682277,0.663383,0.999990,0.999985,0.999260
Salmonella_enterica--CT18,1.000000,0.999987,0.999987,0.996736,0.997126,0.996645,0.996669,0.998128,0.995697,0.995054,0.999996,0.999699,1.000000,0.869238,0.000000,0.890872,0.886148,0.999988,0.999976,0.999524
Salmonella_enterica--LT2,1.000000,0.999987,0.999989,0.994148,0.994390,0.993773,0.993769,0.996268,0.997776,0.997844,0.999996,0.999622,1.000000,0.682277,0.890872,0.000000,0.622606,0.999989,0.999985,0.999296
Salmonella_enterica--P125109,1.000000,0.999988,0.999988,0.993821,0.994059,0.993431,0.993443,0.996052,0.997769,0.997868,0.999995,0.999613,1.000000,0.663383,0.886148,0.622606,0.000000,0.999988,0.999983,0.999270
Shouchella_clausii--KSM-K16,1.000000,0.999989,0.999778,0.999991,0.999991,0.999989,0.999990,0.999990,0.999989,0.999990,0.999997,0.999987,1.000000,0.999990,0.999988,0.999989,0.999988,0.000000,0.999991,0.999988
Wolbachia_endosymbiont--GCF_000306885.1_ASM30688v1,1.000000,0.999991,0.999993,0.999984,0.999986,0.999984,0.999985,0.999987,0.999979,0.999980,0.999993,0.999983,1.000000,0.999985,0.999976,0.999985,0.999983,0.999991,0.000000,0.999983
Yersinia_ruckeri--YRB,1.000000,0.999987,0.999987,0.999291,0.999292,0.999174,0.999165,0.999325,0.999463,0.999479,0.999996,0.999505,1.000000,0.999260,0.999524,0.999296,0.999270,0.999988,0.999983,0.000000
1 genome Candidozyma_auris--GCF_003013715.1_ASM301371v2 Acidobacterium_capsulatum--ATCC_51196 Bacillus_subtilis--168 Escherichia_coli--CFT073 Escherichia_coli--EDL933 Escherichia_coli--K-12_MG1655 Escherichia_coli--K-12_W3110 Klebsiella_pneumoniae--ATCC_13883 Klebsiella_pneumoniae--HS11286 Klebsiella_pneumoniae--MGH_78578 Opitutus_terrae--PB90-1 Proteus_mirabilis--HI4320 Saccharolobus_islandicus--M.16.4 Salmonella_enterica--AKU_12601 Salmonella_enterica--CT18 Salmonella_enterica--LT2 Salmonella_enterica--P125109 Shouchella_clausii--KSM-K16 Wolbachia_endosymbiont--GCF_000306885.1_ASM30688v1 Yersinia_ruckeri--YRB
2 Candidozyma_auris--GCF_003013715.1_ASM301371v2 0.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000
3 Acidobacterium_capsulatum--ATCC_51196 1.000000 0.000000 0.999981 0.999990 0.999989 0.999987 0.999987 0.999990 0.999988 0.999988 0.999994 0.999989 1.000000 0.999988 0.999987 0.999987 0.999988 0.999989 0.999991 0.999987
4 Bacillus_subtilis--168 1.000000 0.999981 0.000000 0.999990 0.999989 0.999989 0.999989 0.999989 0.999988 0.999986 0.999995 0.999985 0.999999 0.999988 0.999987 0.999989 0.999988 0.999778 0.999993 0.999987
5 Escherichia_coli--CFT073 1.000000 0.999990 0.999990 0.000000 0.825741 0.807495 0.807218 0.991156 0.996855 0.997849 0.999996 0.999633 1.000000 0.993885 0.996736 0.994148 0.993821 0.999991 0.999984 0.999291
6 Escherichia_coli--EDL933 1.000000 0.999989 0.999989 0.825741 0.000000 0.735107 0.734775 0.996126 0.998058 0.997908 0.999997 0.999640 1.000000 0.993993 0.997126 0.994390 0.994059 0.999991 0.999986 0.999292
7 Escherichia_coli--K-12_MG1655 1.000000 0.999987 0.999989 0.807495 0.735107 0.000000 0.382567 0.996190 0.997747 0.997455 0.999996 0.999604 1.000000 0.993444 0.996645 0.993773 0.993431 0.999989 0.999984 0.999174
8 Escherichia_coli--K-12_W3110 1.000000 0.999987 0.999989 0.807218 0.734775 0.382567 0.000000 0.996220 0.997761 0.997467 0.999995 0.999604 1.000000 0.993445 0.996669 0.993769 0.993443 0.999990 0.999985 0.999165
9 Klebsiella_pneumoniae--ATCC_13883 1.000000 0.999990 0.999989 0.991156 0.996126 0.996190 0.996220 0.000000 0.845220 0.840545 0.999997 0.999648 1.000000 0.996177 0.998128 0.996268 0.996052 0.999990 0.999987 0.999325
10 Klebsiella_pneumoniae--HS11286 1.000000 0.999988 0.999988 0.996855 0.998058 0.997747 0.997761 0.845220 0.000000 0.906475 0.999996 0.999683 1.000000 0.997724 0.995697 0.997776 0.997769 0.999989 0.999979 0.999463
11 Klebsiella_pneumoniae--MGH_78578 1.000000 0.999988 0.999986 0.997849 0.997908 0.997455 0.997467 0.840545 0.906475 0.000000 0.999996 0.999704 1.000000 0.997928 0.995054 0.997844 0.997868 0.999990 0.999980 0.999479
12 Opitutus_terrae--PB90-1 1.000000 0.999994 0.999995 0.999996 0.999997 0.999996 0.999995 0.999997 0.999996 0.999996 0.000000 0.999997 0.999998 0.999996 0.999996 0.999996 0.999995 0.999997 0.999993 0.999996
13 Proteus_mirabilis--HI4320 1.000000 0.999989 0.999985 0.999633 0.999640 0.999604 0.999604 0.999648 0.999683 0.999704 0.999997 0.000000 1.000000 0.999604 0.999699 0.999622 0.999613 0.999987 0.999983 0.999505
14 Saccharolobus_islandicus--M.16.4 1.000000 1.000000 0.999999 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 0.999998 1.000000 0.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000
15 Salmonella_enterica--AKU_12601 1.000000 0.999988 0.999988 0.993885 0.993993 0.993444 0.993445 0.996177 0.997724 0.997928 0.999996 0.999604 1.000000 0.000000 0.869238 0.682277 0.663383 0.999990 0.999985 0.999260
16 Salmonella_enterica--CT18 1.000000 0.999987 0.999987 0.996736 0.997126 0.996645 0.996669 0.998128 0.995697 0.995054 0.999996 0.999699 1.000000 0.869238 0.000000 0.890872 0.886148 0.999988 0.999976 0.999524
17 Salmonella_enterica--LT2 1.000000 0.999987 0.999989 0.994148 0.994390 0.993773 0.993769 0.996268 0.997776 0.997844 0.999996 0.999622 1.000000 0.682277 0.890872 0.000000 0.622606 0.999989 0.999985 0.999296
18 Salmonella_enterica--P125109 1.000000 0.999988 0.999988 0.993821 0.994059 0.993431 0.993443 0.996052 0.997769 0.997868 0.999995 0.999613 1.000000 0.663383 0.886148 0.622606 0.000000 0.999988 0.999983 0.999270
19 Shouchella_clausii--KSM-K16 1.000000 0.999989 0.999778 0.999991 0.999991 0.999989 0.999990 0.999990 0.999989 0.999990 0.999997 0.999987 1.000000 0.999990 0.999988 0.999989 0.999988 0.000000 0.999991 0.999988
20 Wolbachia_endosymbiont--GCF_000306885.1_ASM30688v1 1.000000 0.999991 0.999993 0.999984 0.999986 0.999984 0.999985 0.999987 0.999979 0.999980 0.999993 0.999983 1.000000 0.999985 0.999976 0.999985 0.999983 0.999991 0.000000 0.999983
21 Yersinia_ruckeri--YRB 1.000000 0.999987 0.999987 0.999291 0.999292 0.999174 0.999165 0.999325 0.999463 0.999479 0.999996 0.999505 1.000000 0.999260 0.999524 0.999296 0.999270 0.999988 0.999983 0.000000
+1
View File
@@ -0,0 +1 @@
(((((((((((Candidozyma_auris--GCF_003013715.1_ASM301371v2:0.5000001881725941,Saccharolobus_islandicus--M.16.4:0.4999993211600824):0.0000023411501775538747,Opitutus_terrae--PB90-1:0.499997075187947):0.0000029791191795691675,(Acidobacterium_capsulatum--ATCC_51196:0.49999227771334689,(Bacillus_subtilis--168:0.49988797935621456,Shouchella_clausii--KSM-K16:0.49988984146059159):0.0001037210285571577):0.0000023959836053522034):0.0000034093646568700288,Wolbachia_endosymbiont--GCF_000306885.1_ASM30688v1:0.4999920159222422):0.000199555100890203,Proteus_mirabilis--HI4320:0.49979129185300427):0.00010103619067070024,Yersinia_ruckeri--YRB:0.4996806650749249):0.0013719139155004,(Klebsiella_pneumoniae--HS11286:0.43798845051648258,(Klebsiella_pneumoniae--ATCC_13883:0.41780293826821265,Klebsiella_pneumoniae--MGH_78578:0.42274184870836559):0.017586732339732737):0.0604124197073832):0.0006482538063555254,(Salmonella_enterica--CT18:0.43952894448143017,(Salmonella_enterica--AKU_12601:0.3357977326267918,(Salmonella_enterica--LT2:0.31203395843666389,Salmonella_enterica--P125109:0.31057217324861216):0.025729515856701136):0.10292985918524672):0.05825411485542886):0.08937928015651564,Escherichia_coli--CFT073:0.40806501650701029):0.0410131211869626,Escherichia_coli--EDL933:0.3681464750911808):0.1755112579711463,Escherichia_coli--K-12_MG1655:0.19129818036662728,Escherichia_coli--K-12_W3110:0.19126872019906239);
+21
View File
@@ -0,0 +1,21 @@
genome,Candidozyma_auris--GCF_003013715.1_ASM301371v2,Acidobacterium_capsulatum--ATCC_51196,Bacillus_subtilis--168,Escherichia_coli--CFT073,Escherichia_coli--EDL933,Escherichia_coli--K-12_MG1655,Escherichia_coli--K-12_W3110,Klebsiella_pneumoniae--ATCC_13883,Klebsiella_pneumoniae--HS11286,Klebsiella_pneumoniae--MGH_78578,Opitutus_terrae--PB90-1,Proteus_mirabilis--HI4320,Saccharolobus_islandicus--M.16.4,Salmonella_enterica--AKU_12601,Salmonella_enterica--CT18,Salmonella_enterica--LT2,Salmonella_enterica--P125109,Shouchella_clausii--KSM-K16,Wolbachia_endosymbiont--GCF_000306885.1_ASM30688v1,Yersinia_ruckeri--YRB
Candidozyma_auris--GCF_003013715.1_ASM301371v2,0,0,0,0,0,0,0,0,0,0,0,0,8,0,1,0,0,0,0,3
Acidobacterium_capsulatum--ATCC_51196,0,0,203,119,128,141,140,116,109,111,78,112,0,136,109,147,134,117,55,129
Bacillus_subtilis--168,0,203,0,124,132,128,123,133,109,130,66,158,6,131,112,124,135,2393,46,124
Escherichia_coli--CFT073,0,119,124,0,1966777,1998059,1999094,117743,32029,22312,63,4225,0,74946,31918,73311,76585,113,128,7854
Escherichia_coli--EDL933,0,128,132,1966777,0,2627885,2628700,52488,20134,22064,48,4202,0,74655,28602,71244,74665,112,108,7963
Escherichia_coli--K-12_MG1655,0,141,128,1998059,2627885,0,4452541,48302,21382,24602,47,4277,0,75729,30449,73622,76778,119,111,8566
Escherichia_coli--K-12_W3110,0,140,123,1999094,2628700,4452541,0,47894,21226,24470,68,4278,0,75658,30207,73614,76583,112,108,8660
Klebsiella_pneumoniae--ATCC_13883,0,116,133,117743,52488,48302,47894,0,1416091,1477759,42,4172,0,48296,18988,48144,50416,120,106,7712
Klebsiella_pneumoniae--HS11286,0,109,109,32029,20134,21382,21226,1416091,0,644063,42,2738,0,21498,29758,21606,21376,99,102,4417
Klebsiella_pneumoniae--MGH_78578,0,111,130,22312,22064,24602,24470,1477759,644063,0,42,2614,0,19948,35067,21330,20813,97,102,4374
Opitutus_terrae--PB90-1,0,78,66,63,48,47,68,42,42,42,0,43,18,57,42,53,66,39,58,43
Proteus_mirabilis--HI4320,0,112,158,4225,4202,4277,4278,4172,2738,2614,43,0,0,4254,2481,4166,4215,131,103,4704
Saccharolobus_islandicus--M.16.4,8,0,6,0,0,0,0,0,0,0,18,0,0,0,0,0,0,0,0,0
Salmonella_enterica--AKU_12601,0,136,131,74946,74655,75729,75658,48296,21498,19948,57,4254,0,0,1047731,2857146,2951421,117,108,7643
Salmonella_enterica--CT18,1,109,112,31918,28602,30449,30207,18988,29758,35067,42,2481,0,1047731,0,917948,940297,106,106,3716
Salmonella_enterica--LT2,0,147,124,73311,71244,73622,73614,48144,21606,21330,53,4166,0,2857146,917948,0,3284800,122,108,7460
Salmonella_enterica--P125109,0,134,135,76585,74665,76778,76583,50416,21376,20813,66,4215,0,2951421,940297,3284800,0,134,124,7645
Shouchella_clausii--KSM-K16,0,117,2393,113,112,119,112,120,99,97,39,131,0,117,106,122,134,0,58,124
Wolbachia_endosymbiont--GCF_000306885.1_ASM30688v1,0,55,46,128,108,111,108,106,102,102,58,103,0,108,106,108,124,58,0,96
Yersinia_ruckeri--YRB,3,129,124,7854,7963,8566,8660,7712,4417,4374,43,4704,0,7643,3716,7460,7645,124,96,0
1 genome Candidozyma_auris--GCF_003013715.1_ASM301371v2 Acidobacterium_capsulatum--ATCC_51196 Bacillus_subtilis--168 Escherichia_coli--CFT073 Escherichia_coli--EDL933 Escherichia_coli--K-12_MG1655 Escherichia_coli--K-12_W3110 Klebsiella_pneumoniae--ATCC_13883 Klebsiella_pneumoniae--HS11286 Klebsiella_pneumoniae--MGH_78578 Opitutus_terrae--PB90-1 Proteus_mirabilis--HI4320 Saccharolobus_islandicus--M.16.4 Salmonella_enterica--AKU_12601 Salmonella_enterica--CT18 Salmonella_enterica--LT2 Salmonella_enterica--P125109 Shouchella_clausii--KSM-K16 Wolbachia_endosymbiont--GCF_000306885.1_ASM30688v1 Yersinia_ruckeri--YRB
2 Candidozyma_auris--GCF_003013715.1_ASM301371v2 0 0 0 0 0 0 0 0 0 0 0 0 8 0 1 0 0 0 0 3
3 Acidobacterium_capsulatum--ATCC_51196 0 0 203 119 128 141 140 116 109 111 78 112 0 136 109 147 134 117 55 129
4 Bacillus_subtilis--168 0 203 0 124 132 128 123 133 109 130 66 158 6 131 112 124 135 2393 46 124
5 Escherichia_coli--CFT073 0 119 124 0 1966777 1998059 1999094 117743 32029 22312 63 4225 0 74946 31918 73311 76585 113 128 7854
6 Escherichia_coli--EDL933 0 128 132 1966777 0 2627885 2628700 52488 20134 22064 48 4202 0 74655 28602 71244 74665 112 108 7963
7 Escherichia_coli--K-12_MG1655 0 141 128 1998059 2627885 0 4452541 48302 21382 24602 47 4277 0 75729 30449 73622 76778 119 111 8566
8 Escherichia_coli--K-12_W3110 0 140 123 1999094 2628700 4452541 0 47894 21226 24470 68 4278 0 75658 30207 73614 76583 112 108 8660
9 Klebsiella_pneumoniae--ATCC_13883 0 116 133 117743 52488 48302 47894 0 1416091 1477759 42 4172 0 48296 18988 48144 50416 120 106 7712
10 Klebsiella_pneumoniae--HS11286 0 109 109 32029 20134 21382 21226 1416091 0 644063 42 2738 0 21498 29758 21606 21376 99 102 4417
11 Klebsiella_pneumoniae--MGH_78578 0 111 130 22312 22064 24602 24470 1477759 644063 0 42 2614 0 19948 35067 21330 20813 97 102 4374
12 Opitutus_terrae--PB90-1 0 78 66 63 48 47 68 42 42 42 0 43 18 57 42 53 66 39 58 43
13 Proteus_mirabilis--HI4320 0 112 158 4225 4202 4277 4278 4172 2738 2614 43 0 0 4254 2481 4166 4215 131 103 4704
14 Saccharolobus_islandicus--M.16.4 8 0 6 0 0 0 0 0 0 0 18 0 0 0 0 0 0 0 0 0
15 Salmonella_enterica--AKU_12601 0 136 131 74946 74655 75729 75658 48296 21498 19948 57 4254 0 0 1047731 2857146 2951421 117 108 7643
16 Salmonella_enterica--CT18 1 109 112 31918 28602 30449 30207 18988 29758 35067 42 2481 0 1047731 0 917948 940297 106 106 3716
17 Salmonella_enterica--LT2 0 147 124 73311 71244 73622 73614 48144 21606 21330 53 4166 0 2857146 917948 0 3284800 122 108 7460
18 Salmonella_enterica--P125109 0 134 135 76585 74665 76778 76583 50416 21376 20813 66 4215 0 2951421 940297 3284800 0 134 124 7645
19 Shouchella_clausii--KSM-K16 0 117 2393 113 112 119 112 120 99 97 39 131 0 117 106 122 134 0 58 124
20 Wolbachia_endosymbiont--GCF_000306885.1_ASM30688v1 0 55 46 128 108 111 108 106 102 102 58 103 0 108 106 108 124 58 0 96
21 Yersinia_ruckeri--YRB 3 129 124 7854 7963 8566 8660 7712 4417 4374 43 4704 0 7643 3716 7460 7645 124 96 0