Patch compilation of binaries
Former-commit-id: 688670c339643a282bdeabafafff3b451be83cb6 Former-commit-id: 60d3e42d2af73515fea50d9c97dc4eacda9c8abb
This commit is contained in:
@ -103,13 +103,16 @@ TAR = tar
|
|||||||
# PRTDIR : port dependent files location (libraries and binaries)
|
# PRTDIR : port dependent files location (libraries and binaries)
|
||||||
# BINDIR : port binaries
|
# BINDIR : port binaries
|
||||||
# LIBDIR : port libraries
|
# LIBDIR : port libraries
|
||||||
|
# INCDIR : port includes
|
||||||
#
|
#
|
||||||
|
|
||||||
PRTDIR = $(CFGDIR)../ports/$(PORTNAME)
|
PRTDIR = $(CFGDIR)../ports/$(PORTNAME)
|
||||||
|
|
||||||
BINDIR = $(PRTDIR)/bin
|
BINDIR = $(abspath $(PRTDIR))/bin
|
||||||
|
|
||||||
LIBDIR = $(PRTDIR)/lib
|
LIBDIR = $(abspath $(PRTDIR))/lib
|
||||||
|
|
||||||
|
INCDIR = $(abspath $(PRTDIR))/include
|
||||||
|
|
||||||
# ------------------------------------
|
# ------------------------------------
|
||||||
# default gmake variable in implicit rules
|
# default gmake variable in implicit rules
|
||||||
|
@ -18,9 +18,15 @@
|
|||||||
# General compilation flags
|
# General compilation flags
|
||||||
# ------------------------------------
|
# ------------------------------------
|
||||||
|
|
||||||
|
CC = /usr/bin/gcc
|
||||||
|
CXX = /usr/bin/g++
|
||||||
|
CXXPP = /usr/bin/cpp
|
||||||
|
CPP = /usr/bin/cpp
|
||||||
|
|
||||||
#
|
#
|
||||||
# MACHDEF : define machine and OS specific flags
|
# MACHDEF : define machine and OS specific flags
|
||||||
#
|
#
|
||||||
|
|
||||||
|
MACHINE = MACOSX
|
||||||
MACHDEF = -DLX_TARGET_MACINTEL -DLITTLE_ENDIAN -DMACOSX
|
MACHDEF = -DLX_TARGET_MACINTEL -DLITTLE_ENDIAN -DMACOSX
|
||||||
|
|
||||||
|
@ -22,7 +22,7 @@
|
|||||||
# MACHDEF : define machine and OS specific flags
|
# MACHDEF : define machine and OS specific flags
|
||||||
#
|
#
|
||||||
|
|
||||||
MACHDEF = -DLX_TARGET_LINUX -DLITTLE_ENDIAN
|
MACHDEF = -DLX_TARGET_LINUX -DLITTLE_ENDIAN -DLINUX
|
||||||
|
|
||||||
#
|
#
|
||||||
# MATH_LIBS : machine specific math librairies
|
# MATH_LIBS : machine specific math librairies
|
||||||
|
@ -15,6 +15,9 @@ PKGDIR ?= build.$(PORTNAME)
|
|||||||
|
|
||||||
PRTPATH = $(abspath $(PRTDIR))
|
PRTPATH = $(abspath $(PRTDIR))
|
||||||
|
|
||||||
|
PRTPATH_BIN = $(PRTPATH)/bin
|
||||||
|
PKG_CONFIG = $(PRTPATH)/bin/pkg-config
|
||||||
|
|
||||||
#
|
#
|
||||||
# Rules
|
# Rules
|
||||||
#
|
#
|
||||||
@ -28,7 +31,17 @@ pkg.expand::
|
|||||||
test -f $(PKGDIR)/configure || $(TAR) zxf $(PKGTAR) -C $(PKGDIR) --strip-components 1
|
test -f $(PKGDIR)/configure || $(TAR) zxf $(PKGTAR) -C $(PKGDIR) --strip-components 1
|
||||||
|
|
||||||
pkg.make:: pkg.expand
|
pkg.make:: pkg.expand
|
||||||
test -f $(PKGDIR)/Makefile || (cd $(PKGDIR) && ./configure --prefix=$(PRTPATH))
|
echo $(PKG_CONFIG)
|
||||||
|
test -f $(PKGDIR)/Makefile || (export PATH="$(PRTPATH_BIN):$$PATH" && \
|
||||||
|
export PKG_CONFIG=$(PKG_CONFIG) && \
|
||||||
|
export CC="$(CC)" && \
|
||||||
|
export CXX="$(CXX)" && \
|
||||||
|
export CPP="$(CPP)" && \
|
||||||
|
export CXXPP="$(CXXPP)" && \
|
||||||
|
export CFLAGS="$(CFLAGS)" && \
|
||||||
|
export LDFLAGS="$(LDFLAGS)" && \
|
||||||
|
cd $(PKGDIR) && \
|
||||||
|
./configure --prefix=$(PRTPATH) $(CONFIGURE_OPTIONS))
|
||||||
$(MAKE) -C $(PKGDIR)
|
$(MAKE) -C $(PKGDIR)
|
||||||
|
|
||||||
pkg.install:: pkg.make
|
pkg.install:: pkg.make
|
||||||
|
12
src/Makefile
12
src/Makefile
@ -17,12 +17,18 @@
|
|||||||
#
|
#
|
||||||
include ../config/auto.conf
|
include ../config/auto.conf
|
||||||
|
|
||||||
DIRS = exonerate \
|
DIRS = aragorn \
|
||||||
|
clustalo \
|
||||||
|
exonerate \
|
||||||
|
hmmer3 \
|
||||||
kimono \
|
kimono \
|
||||||
|
muscle \
|
||||||
|
ncbiblast \
|
||||||
prokov \
|
prokov \
|
||||||
|
repseek \
|
||||||
sequtils \
|
sequtils \
|
||||||
aragorn \
|
sumaclust \
|
||||||
ncbiblast
|
sumatra
|
||||||
|
|
||||||
include ../config/targets/propagate.targ
|
include ../config/targets/propagate.targ
|
||||||
|
|
||||||
|
29
src/clustalo/Makefile
Executable file
29
src/clustalo/Makefile
Executable file
@ -0,0 +1,29 @@
|
|||||||
|
# ---------------------------------------------------------------
|
||||||
|
# $Id: $
|
||||||
|
# ---------------------------------------------------------------
|
||||||
|
# @file: Makefile
|
||||||
|
# @desc: makefile for lxpack
|
||||||
|
#
|
||||||
|
# @history:
|
||||||
|
# @history:
|
||||||
|
# @+ <Gloup> : Apr 97 : Created
|
||||||
|
# @+ <Gloup> : Mar 02 : Updated for LXxware
|
||||||
|
#
|
||||||
|
# @note: should be processed with gnu compatible make
|
||||||
|
# @note: helixware_compatible
|
||||||
|
#
|
||||||
|
# @end:
|
||||||
|
# ---------------------------------------------------------------
|
||||||
|
#
|
||||||
|
include ../../config/auto.conf
|
||||||
|
|
||||||
|
DIRS = argtable \
|
||||||
|
clustalo
|
||||||
|
|
||||||
|
include ../../config/targets/propagate.targ
|
||||||
|
|
||||||
|
include ../../config/targets/help.targ
|
||||||
|
|
||||||
|
all::
|
||||||
|
$(MAKE) ACTION=$@ _action
|
||||||
|
|
24
src/clustalo/argtable/Makefile
Normal file
24
src/clustalo/argtable/Makefile
Normal file
@ -0,0 +1,24 @@
|
|||||||
|
# ---------------------------------------------------------------
|
||||||
|
# $Id: $
|
||||||
|
# ---------------------------------------------------------------
|
||||||
|
# @file: Makefile
|
||||||
|
# @desc: makefile for package exonerate
|
||||||
|
#
|
||||||
|
# @history:
|
||||||
|
# @+ <Gloup> : Sept 15 : Adapted to ORG.Annot
|
||||||
|
#
|
||||||
|
# @note: should be processed with gnu compatible make
|
||||||
|
# @note: helixware_compatible
|
||||||
|
#
|
||||||
|
# @end:
|
||||||
|
# ---------------------------------------------------------------
|
||||||
|
#
|
||||||
|
|
||||||
|
include ../../../config/auto.conf
|
||||||
|
|
||||||
|
PKG = argtable2-13
|
||||||
|
|
||||||
|
include $(CFGDIR)targets/package.targ
|
||||||
|
|
||||||
|
include $(CFGDIR)targets/help.targ
|
||||||
|
|
BIN
src/clustalo/argtable/argtable2-13.tgz
Normal file
BIN
src/clustalo/argtable/argtable2-13.tgz
Normal file
Binary file not shown.
24
src/clustalo/clustalo/Makefile
Normal file
24
src/clustalo/clustalo/Makefile
Normal file
@ -0,0 +1,24 @@
|
|||||||
|
# ---------------------------------------------------------------
|
||||||
|
# $Id: $
|
||||||
|
# ---------------------------------------------------------------
|
||||||
|
# @file: Makefile
|
||||||
|
# @desc: makefile for package exonerate
|
||||||
|
#
|
||||||
|
# @history:
|
||||||
|
# @+ <Gloup> : Sept 15 : Adapted to ORG.Annot
|
||||||
|
#
|
||||||
|
# @note: should be processed with gnu compatible make
|
||||||
|
# @note: helixware_compatible
|
||||||
|
#
|
||||||
|
# @end:
|
||||||
|
# ---------------------------------------------------------------
|
||||||
|
#
|
||||||
|
|
||||||
|
include ../../../config/auto.conf
|
||||||
|
|
||||||
|
PKG = clustal-omega-1.2.1
|
||||||
|
|
||||||
|
include $(CFGDIR)targets/package.targ
|
||||||
|
|
||||||
|
include $(CFGDIR)targets/help.targ
|
||||||
|
|
BIN
src/clustalo/clustalo/clustal-omega-1.2.1.tgz
Normal file
BIN
src/clustalo/clustalo/clustal-omega-1.2.1.tgz
Normal file
Binary file not shown.
BIN
src/exonerate/.DS_Store
vendored
BIN
src/exonerate/.DS_Store
vendored
Binary file not shown.
20
src/exonerate/Makefile
Normal file → Executable file
20
src/exonerate/Makefile
Normal file → Executable file
@ -2,10 +2,12 @@
|
|||||||
# $Id: $
|
# $Id: $
|
||||||
# ---------------------------------------------------------------
|
# ---------------------------------------------------------------
|
||||||
# @file: Makefile
|
# @file: Makefile
|
||||||
# @desc: makefile for package exonerate
|
# @desc: makefile for lxpack
|
||||||
#
|
#
|
||||||
# @history:
|
# @history:
|
||||||
# @+ <Gloup> : Sept 15 : Adapted to ORG.Annot
|
# @history:
|
||||||
|
# @+ <Gloup> : Apr 97 : Created
|
||||||
|
# @+ <Gloup> : Mar 02 : Updated for LXxware
|
||||||
#
|
#
|
||||||
# @note: should be processed with gnu compatible make
|
# @note: should be processed with gnu compatible make
|
||||||
# @note: helixware_compatible
|
# @note: helixware_compatible
|
||||||
@ -13,12 +15,18 @@
|
|||||||
# @end:
|
# @end:
|
||||||
# ---------------------------------------------------------------
|
# ---------------------------------------------------------------
|
||||||
#
|
#
|
||||||
|
|
||||||
include ../../config/auto.conf
|
include ../../config/auto.conf
|
||||||
|
|
||||||
PKG = exonerate-2.2.0
|
DIRS = pkg-config \
|
||||||
|
libffi \
|
||||||
|
gettext \
|
||||||
|
glib2 \
|
||||||
|
exonerate
|
||||||
|
|
||||||
include $(CFGDIR)targets/package.targ
|
include ../../config/targets/propagate.targ
|
||||||
|
|
||||||
include $(CFGDIR)targets/help.targ
|
include ../../config/targets/help.targ
|
||||||
|
|
||||||
|
all::
|
||||||
|
$(MAKE) ACTION=$@ _action
|
||||||
|
|
||||||
|
Binary file not shown.
BIN
src/exonerate/exonerate/.DS_Store
vendored
Normal file
BIN
src/exonerate/exonerate/.DS_Store
vendored
Normal file
Binary file not shown.
24
src/exonerate/exonerate/Makefile
Normal file
24
src/exonerate/exonerate/Makefile
Normal file
@ -0,0 +1,24 @@
|
|||||||
|
# ---------------------------------------------------------------
|
||||||
|
# $Id: $
|
||||||
|
# ---------------------------------------------------------------
|
||||||
|
# @file: Makefile
|
||||||
|
# @desc: makefile for package exonerate
|
||||||
|
#
|
||||||
|
# @history:
|
||||||
|
# @+ <Gloup> : Sept 15 : Adapted to ORG.Annot
|
||||||
|
#
|
||||||
|
# @note: should be processed with gnu compatible make
|
||||||
|
# @note: helixware_compatible
|
||||||
|
#
|
||||||
|
# @end:
|
||||||
|
# ---------------------------------------------------------------
|
||||||
|
#
|
||||||
|
|
||||||
|
include ../../../config/auto.conf
|
||||||
|
|
||||||
|
PKG = exonerate-2.2.0_EC
|
||||||
|
|
||||||
|
include $(CFGDIR)targets/package.targ
|
||||||
|
|
||||||
|
include $(CFGDIR)targets/help.targ
|
||||||
|
|
BIN
src/exonerate/exonerate/exonerate-2.2.0_EC.tgz
Normal file
BIN
src/exonerate/exonerate/exonerate-2.2.0_EC.tgz
Normal file
Binary file not shown.
24
src/exonerate/gettext/Makefile
Normal file
24
src/exonerate/gettext/Makefile
Normal file
@ -0,0 +1,24 @@
|
|||||||
|
# ---------------------------------------------------------------
|
||||||
|
# $Id: $
|
||||||
|
# ---------------------------------------------------------------
|
||||||
|
# @file: Makefile
|
||||||
|
# @desc: makefile for package exonerate
|
||||||
|
#
|
||||||
|
# @history:
|
||||||
|
# @+ <Gloup> : Sept 15 : Adapted to ORG.Annot
|
||||||
|
#
|
||||||
|
# @note: should be processed with gnu compatible make
|
||||||
|
# @note: helixware_compatible
|
||||||
|
#
|
||||||
|
# @end:
|
||||||
|
# ---------------------------------------------------------------
|
||||||
|
#
|
||||||
|
|
||||||
|
include ../../../config/auto.conf
|
||||||
|
|
||||||
|
PKG = gettext-0.19
|
||||||
|
|
||||||
|
include $(CFGDIR)targets/package.targ
|
||||||
|
|
||||||
|
include $(CFGDIR)targets/help.targ
|
||||||
|
|
BIN
src/exonerate/gettext/gettext-0.19.tgz
Normal file
BIN
src/exonerate/gettext/gettext-0.19.tgz
Normal file
Binary file not shown.
25
src/exonerate/glib2/Makefile
Normal file
25
src/exonerate/glib2/Makefile
Normal file
@ -0,0 +1,25 @@
|
|||||||
|
# ---------------------------------------------------------------
|
||||||
|
# $Id: $
|
||||||
|
# ---------------------------------------------------------------
|
||||||
|
# @file: Makefile
|
||||||
|
# @desc: makefile for package exonerate
|
||||||
|
#
|
||||||
|
# @history:
|
||||||
|
# @+ <Gloup> : Sept 15 : Adapted to ORG.Annot
|
||||||
|
#
|
||||||
|
# @note: should be processed with gnu compatible make
|
||||||
|
# @note: helixware_compatible
|
||||||
|
#
|
||||||
|
# @end:
|
||||||
|
# ---------------------------------------------------------------
|
||||||
|
#
|
||||||
|
|
||||||
|
include ../../../config/auto.conf
|
||||||
|
|
||||||
|
PKG = glib-2.44.1
|
||||||
|
CONFIGURE_OPTIONS = --disable-dtrace
|
||||||
|
|
||||||
|
include $(CFGDIR)targets/package.targ
|
||||||
|
|
||||||
|
include $(CFGDIR)targets/help.targ
|
||||||
|
|
BIN
src/exonerate/glib2/glib-2.44.1.tgz
Normal file
BIN
src/exonerate/glib2/glib-2.44.1.tgz
Normal file
Binary file not shown.
BIN
src/exonerate/libffi/.DS_Store
vendored
Normal file
BIN
src/exonerate/libffi/.DS_Store
vendored
Normal file
Binary file not shown.
24
src/exonerate/libffi/Makefile
Normal file
24
src/exonerate/libffi/Makefile
Normal file
@ -0,0 +1,24 @@
|
|||||||
|
# ---------------------------------------------------------------
|
||||||
|
# $Id: $
|
||||||
|
# ---------------------------------------------------------------
|
||||||
|
# @file: Makefile
|
||||||
|
# @desc: makefile for package exonerate
|
||||||
|
#
|
||||||
|
# @history:
|
||||||
|
# @+ <Gloup> : Sept 15 : Adapted to ORG.Annot
|
||||||
|
#
|
||||||
|
# @note: should be processed with gnu compatible make
|
||||||
|
# @note: helixware_compatible
|
||||||
|
#
|
||||||
|
# @end:
|
||||||
|
# ---------------------------------------------------------------
|
||||||
|
#
|
||||||
|
|
||||||
|
include ../../../config/auto.conf
|
||||||
|
|
||||||
|
PKG = libffi-3.2.1
|
||||||
|
|
||||||
|
include $(CFGDIR)targets/package.targ
|
||||||
|
|
||||||
|
include $(CFGDIR)targets/help.targ
|
||||||
|
|
BIN
src/exonerate/libffi/libffi-3.2.1.tgz
Normal file
BIN
src/exonerate/libffi/libffi-3.2.1.tgz
Normal file
Binary file not shown.
25
src/exonerate/pkg-config/Makefile
Normal file
25
src/exonerate/pkg-config/Makefile
Normal file
@ -0,0 +1,25 @@
|
|||||||
|
# ---------------------------------------------------------------
|
||||||
|
# $Id: $
|
||||||
|
# ---------------------------------------------------------------
|
||||||
|
# @file: Makefile
|
||||||
|
# @desc: makefile for package exonerate
|
||||||
|
#
|
||||||
|
# @history:
|
||||||
|
# @+ <Gloup> : Sept 15 : Adapted to ORG.Annot
|
||||||
|
#
|
||||||
|
# @note: should be processed with gnu compatible make
|
||||||
|
# @note: helixware_compatible
|
||||||
|
#
|
||||||
|
# @end:
|
||||||
|
# ---------------------------------------------------------------
|
||||||
|
#
|
||||||
|
|
||||||
|
include ../../../config/auto.conf
|
||||||
|
|
||||||
|
PKG = pkg-config-0.29
|
||||||
|
CONFIGURE_OPTIONS= --with-internal-glib
|
||||||
|
|
||||||
|
include $(CFGDIR)targets/package.targ
|
||||||
|
|
||||||
|
include $(CFGDIR)targets/help.targ
|
||||||
|
|
BIN
src/exonerate/pkg-config/pkg-config-0.29.tgz
Normal file
BIN
src/exonerate/pkg-config/pkg-config-0.29.tgz
Normal file
Binary file not shown.
28
src/hmmer3/Makefile
Executable file
28
src/hmmer3/Makefile
Executable file
@ -0,0 +1,28 @@
|
|||||||
|
# ---------------------------------------------------------------
|
||||||
|
# $Id: $
|
||||||
|
# ---------------------------------------------------------------
|
||||||
|
# @file: Makefile
|
||||||
|
# @desc: makefile for lxpack
|
||||||
|
#
|
||||||
|
# @history:
|
||||||
|
# @history:
|
||||||
|
# @+ <Gloup> : Apr 97 : Created
|
||||||
|
# @+ <Gloup> : Mar 02 : Updated for LXxware
|
||||||
|
#
|
||||||
|
# @note: should be processed with gnu compatible make
|
||||||
|
# @note: helixware_compatible
|
||||||
|
#
|
||||||
|
# @end:
|
||||||
|
# ---------------------------------------------------------------
|
||||||
|
#
|
||||||
|
include ../../config/auto.conf
|
||||||
|
|
||||||
|
DIRS = hmmer3
|
||||||
|
|
||||||
|
include ../../config/targets/propagate.targ
|
||||||
|
|
||||||
|
include ../../config/targets/help.targ
|
||||||
|
|
||||||
|
all::
|
||||||
|
$(MAKE) ACTION=$@ _action
|
||||||
|
|
Binary file not shown.
24
src/hmmer3/hmmer3/Makefile
Normal file
24
src/hmmer3/hmmer3/Makefile
Normal file
@ -0,0 +1,24 @@
|
|||||||
|
# ---------------------------------------------------------------
|
||||||
|
# $Id: $
|
||||||
|
# ---------------------------------------------------------------
|
||||||
|
# @file: Makefile
|
||||||
|
# @desc: makefile for package exonerate
|
||||||
|
#
|
||||||
|
# @history:
|
||||||
|
# @+ <Gloup> : Sept 15 : Adapted to ORG.Annot
|
||||||
|
#
|
||||||
|
# @note: should be processed with gnu compatible make
|
||||||
|
# @note: helixware_compatible
|
||||||
|
#
|
||||||
|
# @end:
|
||||||
|
# ---------------------------------------------------------------
|
||||||
|
#
|
||||||
|
|
||||||
|
include ../../../config/auto.conf
|
||||||
|
|
||||||
|
PKG = hmmer-3.1b1
|
||||||
|
|
||||||
|
include $(CFGDIR)targets/package.targ
|
||||||
|
|
||||||
|
include $(CFGDIR)targets/help.targ
|
||||||
|
|
BIN
src/hmmer3/hmmer3/hmmer-3.1b1.tgz
Normal file
BIN
src/hmmer3/hmmer3/hmmer-3.1b1.tgz
Normal file
Binary file not shown.
@ -1,4 +1,4 @@
|
|||||||
# ---------------------------------------------------------------
|
#---------------------------------------------------------------
|
||||||
# $Id: $
|
# $Id: $
|
||||||
# ---------------------------------------------------------------
|
# ---------------------------------------------------------------
|
||||||
# @file: Makefile
|
# @file: Makefile
|
||||||
@ -25,9 +25,6 @@ include ../../config/targets/help.targ
|
|||||||
|
|
||||||
all::
|
all::
|
||||||
$(MAKE) ACTION=$@ _action
|
$(MAKE) ACTION=$@ _action
|
||||||
test -d $(PRTDIR) || mkdir $(PRTDIR)
|
|
||||||
test -d $(BINDIR) || mkdir $(BINDIR)
|
|
||||||
\cp -f lxpack/ports/$(PORTNAME)/bin/* $(BINDIR)
|
|
||||||
|
|
||||||
clean::
|
clean::
|
||||||
$(MAKE) -C lxpack portclean
|
$(MAKE) -C lxpack portclean
|
||||||
|
@ -15,14 +15,14 @@
|
|||||||
# @end:
|
# @end:
|
||||||
# ---------------------------------------------------------------
|
# ---------------------------------------------------------------
|
||||||
#
|
#
|
||||||
include ./config/auto.conf
|
include ../../../config/auto.conf
|
||||||
|
|
||||||
DIRS = src \
|
DIRS = src \
|
||||||
tests
|
tests
|
||||||
|
|
||||||
include ./config/targets/propagate.targ
|
include ../../../config/targets/propagate.targ
|
||||||
|
|
||||||
include ./config/targets/help.targ
|
include ../../../config/targets/help.targ
|
||||||
|
|
||||||
portclean::
|
portclean::
|
||||||
$(MAKE) ACTION=$@ _action
|
$(MAKE) ACTION=$@ _action
|
||||||
|
@ -1,51 +0,0 @@
|
|||||||
|
|
||||||
$Id: README.txt 1825 2013-02-26 09:39:47Z viari $
|
|
||||||
|
|
||||||
This directory contains Makefile machine specific configuration files
|
|
||||||
(and default targets to help you writing Makefile's)
|
|
||||||
|
|
||||||
These headers should be used with GNU make or compatible
|
|
||||||
|
|
||||||
#
|
|
||||||
# portname
|
|
||||||
#
|
|
||||||
|
|
||||||
To check your port, issue :
|
|
||||||
|
|
||||||
./guess_port
|
|
||||||
|
|
||||||
if output is 'unknown <mach>:<sys>:<rel>' then you should :
|
|
||||||
- add a port entry in guess_port for <mach>:<sys>:<rel>
|
|
||||||
- create a ports/<port>.conf configuration file
|
|
||||||
(the best is to start from another port file,
|
|
||||||
choose whatever looks closest)
|
|
||||||
|
|
||||||
#
|
|
||||||
# configuration flags
|
|
||||||
#
|
|
||||||
|
|
||||||
auto.conf : the main configuration file :
|
|
||||||
- determine the machine port thru 'guess_port' shell
|
|
||||||
- include 'default.conf' file
|
|
||||||
- include the machine specific 'ports/<port>.conf' file
|
|
||||||
|
|
||||||
default.conf : default configuration (included by 'auto.conf')
|
|
||||||
|
|
||||||
ports/<port>.conf : machine specific configuration (included by 'auto.conf')
|
|
||||||
|
|
||||||
#
|
|
||||||
# utility targets
|
|
||||||
#
|
|
||||||
|
|
||||||
targets/help.targ : target for standard help
|
|
||||||
|
|
||||||
targets/propagate.targ : target for propagating targets to subdirectories
|
|
||||||
|
|
||||||
targets/package.targ : default targets for standard package with 'configure'
|
|
||||||
|
|
||||||
targets/empty.targ : default empty targets (defined as double colon rules)
|
|
||||||
|
|
||||||
targets/lxbin.targ : default make targets for standard lx binary (without libraries)
|
|
||||||
|
|
||||||
targets/debug.targ : target to print debug information (for dev.)
|
|
||||||
|
|
@ -1,54 +0,0 @@
|
|||||||
#
|
|
||||||
# $Id: auto.conf 1825 2013-02-26 09:39:47Z viari $
|
|
||||||
#
|
|
||||||
# auto.conf
|
|
||||||
# auto configuration file using guess_port
|
|
||||||
#
|
|
||||||
# this file is included in Makefile
|
|
||||||
#
|
|
||||||
|
|
||||||
#
|
|
||||||
# default shell for gnu-make
|
|
||||||
#
|
|
||||||
|
|
||||||
SHELL = /bin/sh
|
|
||||||
|
|
||||||
#
|
|
||||||
# CFGDIR : location of config files = this file directory location
|
|
||||||
#
|
|
||||||
# CFGPRT : port name (as returned by guess_port)
|
|
||||||
#
|
|
||||||
|
|
||||||
# because builtin 'lastword' is missing in gnu-make 3.80
|
|
||||||
|
|
||||||
lastword = $(word $(words $1), $1)
|
|
||||||
|
|
||||||
CFGDIR := $(dir $(call lastword, $(MAKEFILE_LIST)))
|
|
||||||
|
|
||||||
CFGPRT := $(shell $(CFGDIR)guess_port)
|
|
||||||
|
|
||||||
# check if port is correctly defined
|
|
||||||
|
|
||||||
ifneq (1, $(words $(CFGPRT)))
|
|
||||||
entry := $(call lastword, $(CFGPRT))
|
|
||||||
$(error port is undefined - add entry for "$(entry)" in configuration file -)
|
|
||||||
endif
|
|
||||||
|
|
||||||
#
|
|
||||||
# PORTNAME : port name to use : default is CFGPRT but may be futher modified
|
|
||||||
# by machine specific configuration
|
|
||||||
|
|
||||||
PORTNAME = $(CFGPRT)
|
|
||||||
|
|
||||||
#
|
|
||||||
# default configuration
|
|
||||||
# may be overriden by machine dependant definitions below
|
|
||||||
#
|
|
||||||
|
|
||||||
include $(CFGDIR)default.conf
|
|
||||||
|
|
||||||
#
|
|
||||||
# machine dependant definitions
|
|
||||||
#
|
|
||||||
|
|
||||||
include $(CFGDIR)ports/$(CFGPRT).conf
|
|
@ -1,124 +0,0 @@
|
|||||||
#
|
|
||||||
# $Id: default.conf 2007 2013-12-03 14:21:39Z viari $
|
|
||||||
#
|
|
||||||
# default.conf
|
|
||||||
# default configuration flags
|
|
||||||
# maybe further redefined by machine specific configuration
|
|
||||||
#
|
|
||||||
# this file is included by auto.conf
|
|
||||||
#
|
|
||||||
|
|
||||||
# ------------------------------------
|
|
||||||
# General compilation flags
|
|
||||||
# ------------------------------------
|
|
||||||
|
|
||||||
#
|
|
||||||
# MACHDEF : define machine and OS specific flags
|
|
||||||
#
|
|
||||||
|
|
||||||
MACHDEF =
|
|
||||||
|
|
||||||
#
|
|
||||||
# CC : (ansi C) compiler command to use
|
|
||||||
# you may add some machine specific flags (like -arch ...)
|
|
||||||
# in the <machine>.conf configuration file
|
|
||||||
#
|
|
||||||
|
|
||||||
CC = gcc
|
|
||||||
|
|
||||||
#
|
|
||||||
# default compiler optimizer flag
|
|
||||||
#
|
|
||||||
|
|
||||||
OPTIM = -O
|
|
||||||
|
|
||||||
#
|
|
||||||
# CC_LIBS : additionnal machine specific $(CC) libraries
|
|
||||||
# like '-lC' on some machines
|
|
||||||
#
|
|
||||||
|
|
||||||
CC_LIBS =
|
|
||||||
|
|
||||||
#
|
|
||||||
# MALLOC_LIBS : machine specific malloc librairies
|
|
||||||
# like '-lmalloc' on SGI
|
|
||||||
#
|
|
||||||
|
|
||||||
MALLOC_LIBS =
|
|
||||||
|
|
||||||
#
|
|
||||||
# MATH_LIBS : machine specific math librairies
|
|
||||||
# like '-lm' on Solaris
|
|
||||||
#
|
|
||||||
|
|
||||||
MATH_LIBS =
|
|
||||||
|
|
||||||
#
|
|
||||||
# LINT : looks like LINT command does not exist anymore
|
|
||||||
# here is a rough replacement
|
|
||||||
#
|
|
||||||
|
|
||||||
LINT = gcc -S -Wall -Wno-format-y2k -W -Wstrict-prototypes \
|
|
||||||
-Wmissing-prototypes -Wpointer-arith -Wreturn-type \
|
|
||||||
-Wcast-qual -Wwrite-strings -Wswitch -Wshadow \
|
|
||||||
-Wcast-align -Wbad-function-cast -Wchar-subscripts \
|
|
||||||
-Winline -Wnested-externs -Wredundant-decls
|
|
||||||
|
|
||||||
# ------------------------------------
|
|
||||||
# General system commands
|
|
||||||
# ------------------------------------
|
|
||||||
|
|
||||||
#
|
|
||||||
# AR : AR archive command
|
|
||||||
# ARFLAGS : $(AR) archiving flags
|
|
||||||
# ARXFLAGS : $(AR) extraction flags
|
|
||||||
#
|
|
||||||
|
|
||||||
AR = ar
|
|
||||||
ARFLAGS = rcv
|
|
||||||
ARXFLAGS = xv
|
|
||||||
|
|
||||||
#
|
|
||||||
# RANLIB : ranlib command
|
|
||||||
#
|
|
||||||
|
|
||||||
RANLIB = ranlib
|
|
||||||
|
|
||||||
#
|
|
||||||
# DIFF : diff command
|
|
||||||
#
|
|
||||||
|
|
||||||
DIFF = diff
|
|
||||||
|
|
||||||
#
|
|
||||||
# TAR : tar command
|
|
||||||
#
|
|
||||||
|
|
||||||
TAR = tar
|
|
||||||
|
|
||||||
# ------------------------------------
|
|
||||||
# Default locations
|
|
||||||
# ------------------------------------
|
|
||||||
#
|
|
||||||
# PRTDIR : port dependent files location (libraries and binaries)
|
|
||||||
# BINDIR : port binaries
|
|
||||||
# LIBDIR : port libraries
|
|
||||||
#
|
|
||||||
|
|
||||||
PRTDIR = $(CFGDIR)../ports/$(PORTNAME)
|
|
||||||
|
|
||||||
BINDIR = $(PRTDIR)/bin
|
|
||||||
|
|
||||||
LIBDIR = $(PRTDIR)/lib
|
|
||||||
|
|
||||||
# ------------------------------------
|
|
||||||
# default gmake variable in implicit rules
|
|
||||||
# ------------------------------------
|
|
||||||
|
|
||||||
CFLAGS = $(OPTIM) $(MACHDEF) -I$(INCDIR)
|
|
||||||
|
|
||||||
LDFLAGS = -L$(LIBDIR) -L.
|
|
||||||
|
|
||||||
LDLIBS = $(LIBS) $(MALLOC_LIBS) $(MATH_LIBS) $(CC_LIBS)
|
|
||||||
|
|
||||||
LINTFLAGS = $(MACHDEF) -I$(INCDIR)
|
|
@ -1,33 +0,0 @@
|
|||||||
#! /bin/sh
|
|
||||||
#
|
|
||||||
# $Id: guess_port 1825 2013-02-26 09:39:47Z viari $
|
|
||||||
#
|
|
||||||
# @file: guess_port
|
|
||||||
# @desc: attempt to guess the portname
|
|
||||||
# @usage: guess_port
|
|
||||||
#
|
|
||||||
# @history:
|
|
||||||
# @+ <Gloup> Nov. 2000 first draft adapted from GNU config.guess
|
|
||||||
# @+ <Gloup> Feb. 2010 moved to sh
|
|
||||||
#
|
|
||||||
|
|
||||||
mach=`uname -m`
|
|
||||||
syst=`uname -s`
|
|
||||||
rels=`uname -r`
|
|
||||||
|
|
||||||
case ${mach}:${syst}:${rels} in
|
|
||||||
|
|
||||||
alpha:OSF1:* ) echo alpha-osf1;;
|
|
||||||
sun4*:SunOS:5.* ) echo sparc-solaris;;
|
|
||||||
i86pc:SunOS:5.* ) echo i386-solaris;;
|
|
||||||
sun4*:SunOS:* ) echo sparc-sunos;;
|
|
||||||
Power*:Darwin:* ) echo ppc-darwin;;
|
|
||||||
i*86:Linux:* ) echo i386-linux;;
|
|
||||||
x*86*:Linux:* ) echo i386-linux;;
|
|
||||||
i*86:Darwin:* ) echo i386-darwin;;
|
|
||||||
IP*:IRIX*:* ) echo mips-irix;;
|
|
||||||
i*86:MINGW32*:* ) echo x86-mingw32;;
|
|
||||||
|
|
||||||
*) echo unknown ${mach}:${syst}:${rels}; exit 1;;
|
|
||||||
esac
|
|
||||||
exit 0
|
|
@ -1,26 +0,0 @@
|
|||||||
#
|
|
||||||
# $Id: i386-darwin.conf 1825 2013-02-26 09:39:47Z viari $
|
|
||||||
#
|
|
||||||
# i386-darwin.conf
|
|
||||||
# configuration file for MacOS-X/Intel-Based/Darwin 1.2 with gcc compiler
|
|
||||||
# this file is included in Makefile
|
|
||||||
#
|
|
||||||
# system (uname -srp) : Darwin 8.7.1 i386
|
|
||||||
# compiler (cc --version) : i686-apple-darwin8-gcc-4.0.1
|
|
||||||
#
|
|
||||||
# check tags
|
|
||||||
# @uname:uname -srp:Darwin 8.7.1 i386
|
|
||||||
# @cc:cc --version:i686-apple-darwin8-gcc-4.0.1
|
|
||||||
#
|
|
||||||
#
|
|
||||||
|
|
||||||
# ------------------------------------
|
|
||||||
# General compilation flags
|
|
||||||
# ------------------------------------
|
|
||||||
|
|
||||||
#
|
|
||||||
# MACHDEF : define machine and OS specific flags
|
|
||||||
#
|
|
||||||
|
|
||||||
MACHDEF = -DLX_TARGET_MACINTEL -DLITTLE_ENDIAN -DMACOSX
|
|
||||||
|
|
@ -1,32 +0,0 @@
|
|||||||
#
|
|
||||||
# $Id: i386-linux.conf 1825 2013-02-26 09:39:47Z viari $
|
|
||||||
#
|
|
||||||
# i386-linux.conf
|
|
||||||
# configuration file for linux ix86 with GNU gcc compiler
|
|
||||||
# this file is included in Makefile
|
|
||||||
#
|
|
||||||
# system (uname -srp) : Linux 2.2.14-5.0 unknown
|
|
||||||
# compiler (gcc --version) : egcs-2.91.66
|
|
||||||
#
|
|
||||||
# check tags
|
|
||||||
# @uname:uname -srp:Linux 2.2.14-5.0 unknown
|
|
||||||
# @cc:cc --version:egcs-2.91.66
|
|
||||||
#
|
|
||||||
#
|
|
||||||
|
|
||||||
# ------------------------------------
|
|
||||||
# General compilation flags
|
|
||||||
# ------------------------------------
|
|
||||||
|
|
||||||
#
|
|
||||||
# MACHDEF : define machine and OS specific flags
|
|
||||||
#
|
|
||||||
|
|
||||||
MACHDEF = -DLX_TARGET_LINUX -DLITTLE_ENDIAN
|
|
||||||
|
|
||||||
#
|
|
||||||
# MATH_LIBS : machine specific math librairies
|
|
||||||
#
|
|
||||||
|
|
||||||
MATH_LIBS = -lm
|
|
||||||
|
|
@ -1,32 +0,0 @@
|
|||||||
#
|
|
||||||
# $Id: ppc-darwin.conf 1825 2013-02-26 09:39:47Z viari $
|
|
||||||
#
|
|
||||||
# ppc-darwin.conf
|
|
||||||
# configuration file for MacOS-X/Darwin 1.2 with native cc compiler
|
|
||||||
# this file is included in Makefile
|
|
||||||
#
|
|
||||||
# system (uname -srp) : Darwin 1.2 powerpc
|
|
||||||
# compiler (cc --version) : 2.7.2.1
|
|
||||||
#
|
|
||||||
# check tags
|
|
||||||
# @uname:uname -srp:Darwin 1.2 powerpc
|
|
||||||
# @cc:cc --version:2.7.2.1
|
|
||||||
#
|
|
||||||
#
|
|
||||||
|
|
||||||
# ------------------------------------
|
|
||||||
# General compilation flags
|
|
||||||
# ------------------------------------
|
|
||||||
|
|
||||||
#
|
|
||||||
# MACHDEF : define machine and OS specific flags
|
|
||||||
#
|
|
||||||
|
|
||||||
MACHDEF = -DLX_TARGET_MACPPC -DBIG_ENDIAN
|
|
||||||
|
|
||||||
#
|
|
||||||
# CC : name of (ansi C) compiler to use
|
|
||||||
#
|
|
||||||
|
|
||||||
CC = cc -arch ppc
|
|
||||||
|
|
@ -1,31 +0,0 @@
|
|||||||
#
|
|
||||||
# $Id: sparc-solaris.conf 1825 2013-02-26 09:39:47Z viari $
|
|
||||||
#
|
|
||||||
# sparc-solaris.conf
|
|
||||||
# configuration file for sparc solaris with GNU gcc compiler
|
|
||||||
# this file is included in Makefile
|
|
||||||
#
|
|
||||||
# system (uname -srp) : SunOS 5.8 sparc
|
|
||||||
# compiler (gcc --version) : 2.95.2
|
|
||||||
#
|
|
||||||
# check tags
|
|
||||||
# @uname:uname -srp:SunOS 5.8 sparc
|
|
||||||
# @cc:cc --version:2.95.2
|
|
||||||
#
|
|
||||||
#
|
|
||||||
|
|
||||||
# ------------------------------------
|
|
||||||
# General compilation flags
|
|
||||||
# ------------------------------------
|
|
||||||
|
|
||||||
#
|
|
||||||
# MACHDEF : define machine and OS specific flags
|
|
||||||
#
|
|
||||||
|
|
||||||
MACHDEF = -DLX_TARGET_SOLARIS -DBIG_ENDIAN
|
|
||||||
|
|
||||||
#
|
|
||||||
# MATH_LIBS : machine specific math librairies
|
|
||||||
#
|
|
||||||
|
|
||||||
MATH_LIBS = -lm
|
|
@ -1,54 +0,0 @@
|
|||||||
#
|
|
||||||
# $Id: x86-mingw32.conf 1825 2013-02-26 09:39:47Z viari $
|
|
||||||
#
|
|
||||||
# x86-mingw32
|
|
||||||
# configuration file for MinGW with GNU gcc compiler.
|
|
||||||
#
|
|
||||||
# this file is included in Makefile
|
|
||||||
#
|
|
||||||
#
|
|
||||||
|
|
||||||
#
|
|
||||||
# rename PORTNAME safely since MinGW produce pure win32 executables
|
|
||||||
# without dll's
|
|
||||||
#
|
|
||||||
|
|
||||||
PORTNAME = x86-win32
|
|
||||||
|
|
||||||
# ------------------------------------
|
|
||||||
# General compilation flags
|
|
||||||
# ------------------------------------
|
|
||||||
|
|
||||||
#
|
|
||||||
# CC_LIBS : additionnal machine specific $(CC) libraries
|
|
||||||
#
|
|
||||||
# libiberty is needed for some system extensions (like mkstemps)
|
|
||||||
#
|
|
||||||
|
|
||||||
CC_LIBS = -liberty
|
|
||||||
|
|
||||||
#
|
|
||||||
# MACHDEF : define machine and OS specific flags
|
|
||||||
#
|
|
||||||
# -DDLMALLOC : use dlmalloc instead of malloc (which does not have mallinfo)
|
|
||||||
# -posix is a new replacement for several MinGW32 flags, including:
|
|
||||||
# -D__USE_MINGW_ANSI_STDIO : mingw gcc flag to recognize the C99 "%zu" format
|
|
||||||
#
|
|
||||||
|
|
||||||
MACHDEF = -posix -DLX_TARGET_WIN32 -DWIN_MINGW -DDLMALLOC -DLITTLE_ENDIAN
|
|
||||||
|
|
||||||
#
|
|
||||||
# MATH_LIBS : machine specific math librairies
|
|
||||||
#
|
|
||||||
|
|
||||||
MATH_LIBS = -lm
|
|
||||||
|
|
||||||
# ------------------------------------
|
|
||||||
# General system commands
|
|
||||||
# ------------------------------------
|
|
||||||
|
|
||||||
#
|
|
||||||
# DIFF : diff command / should ignore cr on windows
|
|
||||||
#
|
|
||||||
|
|
||||||
DIFF = diff --strip-trailing-cr
|
|
@ -1,25 +0,0 @@
|
|||||||
#
|
|
||||||
# $Id: help.targ 1825 2013-02-26 09:39:47Z viari $
|
|
||||||
#
|
|
||||||
# debug.targ
|
|
||||||
#
|
|
||||||
# target to print debug information (dev. only)
|
|
||||||
#
|
|
||||||
# it defines the following targets:
|
|
||||||
#
|
|
||||||
# debug :
|
|
||||||
# print debug
|
|
||||||
#
|
|
||||||
# it requires auto.conf
|
|
||||||
#
|
|
||||||
|
|
||||||
.PHONY: debug
|
|
||||||
|
|
||||||
debug::
|
|
||||||
@echo "+ PORTNAME: $(PORTNAME)"
|
|
||||||
@echo "+ CFGPRT: $(CFGPRT)"
|
|
||||||
@echo "+ CFGDIR: $(CFGDIR)"
|
|
||||||
@echo "+ PRTDIR: $(PRTDIR)"
|
|
||||||
@echo "+ MACHDEF: $(MACHDEF)"
|
|
||||||
|
|
||||||
|
|
@ -1,24 +0,0 @@
|
|||||||
#
|
|
||||||
# $Id: $
|
|
||||||
#
|
|
||||||
# epty.targ
|
|
||||||
#
|
|
||||||
# default empty targets (defined as double colon rules)
|
|
||||||
#
|
|
||||||
#
|
|
||||||
|
|
||||||
#
|
|
||||||
# Rules
|
|
||||||
#
|
|
||||||
|
|
||||||
.PHONY: all test clean portclean help
|
|
||||||
|
|
||||||
all::
|
|
||||||
|
|
||||||
test::
|
|
||||||
|
|
||||||
clean::
|
|
||||||
|
|
||||||
portclean:: clean
|
|
||||||
|
|
||||||
test::
|
|
@ -1,23 +0,0 @@
|
|||||||
#
|
|
||||||
# $Id: help.targ 1825 2013-02-26 09:39:47Z viari $
|
|
||||||
#
|
|
||||||
# help.targ
|
|
||||||
#
|
|
||||||
# default target to print help
|
|
||||||
#
|
|
||||||
# it defines the following targets:
|
|
||||||
#
|
|
||||||
# help :
|
|
||||||
# print help
|
|
||||||
#
|
|
||||||
|
|
||||||
.PHONY: help
|
|
||||||
|
|
||||||
help::
|
|
||||||
@ echo "basic usage: make [<action>+]"
|
|
||||||
@ echo "valid <action> :"
|
|
||||||
@ echo " all : compile everything for current port [default target]"
|
|
||||||
@ echo " clean : local cleanup"
|
|
||||||
@ echo " portclean : cleanup distribution for current port"
|
|
||||||
@ echo " test : run tests on current port"
|
|
||||||
@ echo " help : print this help"
|
|
@ -1,51 +0,0 @@
|
|||||||
#
|
|
||||||
# $Id: $
|
|
||||||
#
|
|
||||||
# lxbin.targ
|
|
||||||
#
|
|
||||||
# default make targets for standard lx binary
|
|
||||||
#
|
|
||||||
# you should define the 'PROGS' and 'OSRC' variables
|
|
||||||
# and optionnaly 'LIBS' if binaries have to be linked with libraries
|
|
||||||
#
|
|
||||||
# note: if main source code for binary PROG is PROG.c, there is nothing to do,
|
|
||||||
# else (e.g. if it involves several sources files) you should also add local
|
|
||||||
# file dependencies. e.g under the form:
|
|
||||||
#
|
|
||||||
# mymain: $(OBJ) mymain_base.c mymain_help.c
|
|
||||||
# $(CC) $(CFLAGS) -o $@ $^ $(LDFLAGS) $(LDLIBS)
|
|
||||||
#
|
|
||||||
#
|
|
||||||
# 'auto.conf' should have been included
|
|
||||||
#
|
|
||||||
|
|
||||||
OBJ = $(OSRC:.c=.o)
|
|
||||||
|
|
||||||
INCDIR = ../include
|
|
||||||
|
|
||||||
#
|
|
||||||
# Rules
|
|
||||||
#
|
|
||||||
|
|
||||||
.PHONY: all prelib install test clean portclean
|
|
||||||
|
|
||||||
all:: prelib $(PROGS) install
|
|
||||||
@echo "+++++++++++ binaries $(PROGS) done"
|
|
||||||
|
|
||||||
prelib::
|
|
||||||
test -d $(PRTDIR) || mkdir $(PRTDIR) # because some linker may complain
|
|
||||||
test -d $(LIBDIR) || mkdir $(LIBDIR) # if -L$(LIBDIR) does not exist
|
|
||||||
|
|
||||||
install::
|
|
||||||
test -d $(PRTDIR) || mkdir $(PRTDIR)
|
|
||||||
test -d $(BINDIR) || mkdir $(BINDIR)
|
|
||||||
-for f in $(PROGS) ; do \cp -f $$f $(BINDIR) ; done
|
|
||||||
|
|
||||||
test::
|
|
||||||
|
|
||||||
clean::
|
|
||||||
-\rm -f *.o cvstatic* *% *.bak so_loc*
|
|
||||||
-\rm -f $(PROGS)
|
|
||||||
|
|
||||||
portclean:: clean
|
|
||||||
-(! test -d $(BINDIR)) || (cd $(BINDIR) && \rm -f $(PROGS))
|
|
@ -1,43 +0,0 @@
|
|||||||
#
|
|
||||||
# $Id: $
|
|
||||||
#
|
|
||||||
# lxlib.targ
|
|
||||||
#
|
|
||||||
# default make targets for standard lx library
|
|
||||||
#
|
|
||||||
# you should define the 'LOCLIB' and 'OSRC' variables
|
|
||||||
#
|
|
||||||
# 'auto.conf' should have been included
|
|
||||||
#
|
|
||||||
|
|
||||||
OBJ = $(OSRC:.c=.o)
|
|
||||||
|
|
||||||
INCDIR = ../include
|
|
||||||
|
|
||||||
#
|
|
||||||
# Rules
|
|
||||||
#
|
|
||||||
|
|
||||||
.PHONY: all lib install test clean portclean
|
|
||||||
|
|
||||||
all:: lib install
|
|
||||||
@echo "+++++++++++ library $(LOCLIB) done"
|
|
||||||
|
|
||||||
lib:: $(OBJ)
|
|
||||||
$(AR) $(ARFLAGS) $(LOCLIB) $(OBJ)
|
|
||||||
$(RANLIB) $(LOCLIB)
|
|
||||||
|
|
||||||
install::
|
|
||||||
test -d $(PRTDIR) || mkdir $(PRTDIR)
|
|
||||||
test -d $(LIBDIR) || mkdir $(LIBDIR)
|
|
||||||
\cp -f $(LOCLIB) $(LIBDIR)
|
|
||||||
$(RANLIB) $(LIBDIR)/$(LOCLIB)
|
|
||||||
|
|
||||||
test::
|
|
||||||
|
|
||||||
clean::
|
|
||||||
-\rm -f *.o cvstatic* *% *.bak so_loc*
|
|
||||||
-\rm -f $(LOCLIB)
|
|
||||||
|
|
||||||
portclean:: clean
|
|
||||||
-(! test -d $(LIBDIR)) || (cd $(LIBDIR) && \rm -f $(LOCLIB))
|
|
@ -1,48 +0,0 @@
|
|||||||
#
|
|
||||||
# $Id: package.targ 1825 2013-02-26 09:39:47Z viari $
|
|
||||||
#
|
|
||||||
# package.targ
|
|
||||||
#
|
|
||||||
# default make targets for standard package with configure
|
|
||||||
#
|
|
||||||
# you should define the 'PKG' variable
|
|
||||||
# (and optionaly 'PKGTAR', 'PKGDIR')
|
|
||||||
#
|
|
||||||
|
|
||||||
PKGTAR ?= $(PKG).tgz
|
|
||||||
|
|
||||||
PKGDIR ?= build.$(PORTNAME)
|
|
||||||
|
|
||||||
PRTPATH = $(abspath $(PRTDIR))
|
|
||||||
|
|
||||||
#
|
|
||||||
# Rules
|
|
||||||
#
|
|
||||||
|
|
||||||
.PHONY: all clean test portclean pkg pkg.expand pkg.make pkg.install
|
|
||||||
|
|
||||||
all:: pkg
|
|
||||||
|
|
||||||
pkg.expand::
|
|
||||||
test -d $(PKGDIR) || mkdir $(PKGDIR)
|
|
||||||
test -f $(PKGDIR)/configure || $(TAR) zxf $(PKGTAR) -C $(PKGDIR) --strip-components 1
|
|
||||||
|
|
||||||
pkg.make:: pkg.expand
|
|
||||||
test -f $(PKGDIR)/Makefile || (cd $(PKGDIR) && ./configure --prefix=$(PRTPATH))
|
|
||||||
$(MAKE) -C $(PKGDIR)
|
|
||||||
|
|
||||||
pkg.install:: pkg.make
|
|
||||||
$(MAKE) -C $(PKGDIR) install
|
|
||||||
|
|
||||||
pkg:: pkg.install
|
|
||||||
@echo "+++++++++++ package $(PKG) done"
|
|
||||||
|
|
||||||
test::
|
|
||||||
(! test -d $(PKGDIR)) || $(MAKE) -C $(PKGDIR) test
|
|
||||||
|
|
||||||
clean::
|
|
||||||
(! test -d $(PKGDIR)) || $(MAKE) -C $(PKGDIR) clean
|
|
||||||
|
|
||||||
portclean::
|
|
||||||
(! test -d $(PKGDIR)) || $(MAKE) -C $(PKGDIR) distclean
|
|
||||||
(! test -d $(PKGDIR)) || \rm -r $(PKGDIR)
|
|
@ -1,30 +0,0 @@
|
|||||||
#
|
|
||||||
# $Id: propagate.targ 1825 2013-02-26 09:39:47Z viari $
|
|
||||||
#
|
|
||||||
# propagate.targ
|
|
||||||
#
|
|
||||||
# default make targets for library containers
|
|
||||||
#
|
|
||||||
# you should define the 'DIRS' variable
|
|
||||||
#
|
|
||||||
# It will propagate 'MAKE <target>' to all
|
|
||||||
# directories listed in DIRS
|
|
||||||
#
|
|
||||||
|
|
||||||
#
|
|
||||||
# Rules
|
|
||||||
#
|
|
||||||
|
|
||||||
.PHONY: all _action $(DIRS)
|
|
||||||
|
|
||||||
.DEFAULT:
|
|
||||||
$(MAKE) ACTION=$@ _action
|
|
||||||
|
|
||||||
all::
|
|
||||||
$(MAKE) ACTION=all _action
|
|
||||||
|
|
||||||
_action: $(DIRS)
|
|
||||||
@echo "$(ACTION) done"
|
|
||||||
|
|
||||||
$(DIRS):
|
|
||||||
$(MAKE) -C $@ $(ACTION)
|
|
@ -13,7 +13,7 @@
|
|||||||
# ---------------------------------------------------------------
|
# ---------------------------------------------------------------
|
||||||
#
|
#
|
||||||
|
|
||||||
include ../config/auto.conf
|
include ../../../../config/auto.conf
|
||||||
|
|
||||||
PROGS = kimono kimfit
|
PROGS = kimono kimfit
|
||||||
|
|
||||||
@ -29,8 +29,8 @@ OSRC = fasta_io.c \
|
|||||||
kim_genetic.c \
|
kim_genetic.c \
|
||||||
kim_codonskew.c
|
kim_codonskew.c
|
||||||
|
|
||||||
include ../config/targets/lxbin.targ
|
include ../../../../config/targets/lxbin.targ
|
||||||
include ../config/targets/help.targ
|
include ../../../../config/targets/help.targ
|
||||||
|
|
||||||
#
|
#
|
||||||
# file dependencies
|
# file dependencies
|
||||||
|
@ -16,7 +16,7 @@
|
|||||||
# ---------------------------------------------------------------
|
# ---------------------------------------------------------------
|
||||||
#
|
#
|
||||||
|
|
||||||
include ../config/targets/empty.targ
|
include ../../../../config/targets/empty.targ
|
||||||
|
|
||||||
clean::
|
clean::
|
||||||
-\rm -f *.tst
|
-\rm -f *.tst
|
||||||
|
30
src/muscle/Makefile
Executable file
30
src/muscle/Makefile
Executable file
@ -0,0 +1,30 @@
|
|||||||
|
#---------------------------------------------------------------
|
||||||
|
# $Id: $
|
||||||
|
# ---------------------------------------------------------------
|
||||||
|
# @file: Makefile
|
||||||
|
# @desc: makefile for lxpack
|
||||||
|
#
|
||||||
|
# @history:
|
||||||
|
# @history:
|
||||||
|
# @+ <Gloup> : Apr 97 : Created
|
||||||
|
# @+ <Gloup> : Mar 02 : Updated for LXxware
|
||||||
|
#
|
||||||
|
# @note: should be processed with gnu compatible make
|
||||||
|
# @note: helixware_compatible
|
||||||
|
#
|
||||||
|
# @end:
|
||||||
|
# ---------------------------------------------------------------
|
||||||
|
#
|
||||||
|
include ../../config/auto.conf
|
||||||
|
|
||||||
|
DIRS = muscle3.8.31
|
||||||
|
|
||||||
|
include ../../config/targets/propagate.targ
|
||||||
|
|
||||||
|
include ../../config/targets/help.targ
|
||||||
|
|
||||||
|
all::
|
||||||
|
$(MAKE) ACTION=$@ _action
|
||||||
|
|
||||||
|
clean::
|
||||||
|
$(MAKE) -C lxpack portclean
|
30
src/muscle/muscle3.8.31/Makefile
Executable file
30
src/muscle/muscle3.8.31/Makefile
Executable file
@ -0,0 +1,30 @@
|
|||||||
|
#---------------------------------------------------------------
|
||||||
|
# $Id: $
|
||||||
|
# ---------------------------------------------------------------
|
||||||
|
# @file: Makefile
|
||||||
|
# @desc: makefile for lxpack
|
||||||
|
#
|
||||||
|
# @history:
|
||||||
|
# @history:
|
||||||
|
# @+ <Gloup> : Apr 97 : Created
|
||||||
|
# @+ <Gloup> : Mar 02 : Updated for LXxware
|
||||||
|
#
|
||||||
|
# @note: should be processed with gnu compatible make
|
||||||
|
# @note: helixware_compatible
|
||||||
|
#
|
||||||
|
# @end:
|
||||||
|
# ---------------------------------------------------------------
|
||||||
|
#
|
||||||
|
include ../../../config/auto.conf
|
||||||
|
|
||||||
|
DIRS = src
|
||||||
|
|
||||||
|
include ../../../config/targets/propagate.targ
|
||||||
|
|
||||||
|
include ../../../config/targets/help.targ
|
||||||
|
|
||||||
|
all::
|
||||||
|
$(MAKE) ACTION=$@ _action
|
||||||
|
|
||||||
|
clean::
|
||||||
|
$(MAKE) -C lxpack portclean
|
11
src/muscle/muscle3.8.31/src/Makefile
Normal file
11
src/muscle/muscle3.8.31/src/Makefile
Normal file
@ -0,0 +1,11 @@
|
|||||||
|
include ../../../../config/auto.conf
|
||||||
|
|
||||||
|
all: muscle install
|
||||||
|
|
||||||
|
muscle:
|
||||||
|
chmod +x ./mk
|
||||||
|
(export CXX=$(CXX) && ./mk)
|
||||||
|
|
||||||
|
install:
|
||||||
|
cp muscle $(BINDIR)
|
||||||
|
|
27
src/muscle/muscle3.8.31/src/README.txt
Normal file
27
src/muscle/muscle3.8.31/src/README.txt
Normal file
@ -0,0 +1,27 @@
|
|||||||
|
MUSCLE v3.0 source code README
|
||||||
|
------------------------------
|
||||||
|
|
||||||
|
http://www.drive5.com/muscle
|
||||||
|
|
||||||
|
This version of MUSCLE was built and tested on two platforms:
|
||||||
|
Windows XP and Red Hat Linux 8.0.
|
||||||
|
|
||||||
|
On Windows, I used Microsoft Visual C++ .Net, which I find
|
||||||
|
to be the best C++ compile / edit / test environment I've
|
||||||
|
tried on any platform. The Microsoft project file is
|
||||||
|
muscle.vcproj.
|
||||||
|
|
||||||
|
The Linux make file is Makefile. This is a very simple-minded
|
||||||
|
make file (because I am a Linux development novice), so should
|
||||||
|
be easy to understand. By default, it uses shared libraries,
|
||||||
|
but I found this to give problems when copying between
|
||||||
|
different Linux versions. The fix was to use the linker
|
||||||
|
flag -lm static (commented out), which gives a much bigger
|
||||||
|
but more portable binary. The posted binary was linked with
|
||||||
|
static libraries.
|
||||||
|
|
||||||
|
The source code was not written to be maintained by anyone
|
||||||
|
but me, so the usual apologies and caveats apply.
|
||||||
|
|
||||||
|
Bob Edgar,
|
||||||
|
January 2004
|
802
src/muscle/muscle3.8.31/src/aligngivenpath.cpp
Normal file
802
src/muscle/muscle3.8.31/src/aligngivenpath.cpp
Normal file
@ -0,0 +1,802 @@
|
|||||||
|
#include "muscle.h"
|
||||||
|
#include "msa.h"
|
||||||
|
#include "pwpath.h"
|
||||||
|
#include "profile.h"
|
||||||
|
|
||||||
|
#define TRACE 0
|
||||||
|
|
||||||
|
static void LogPP(const ProfPos &PP)
|
||||||
|
{
|
||||||
|
Log("ResidueGroup %u\n", PP.m_uResidueGroup);
|
||||||
|
Log("AllGaps %d\n", PP.m_bAllGaps);
|
||||||
|
Log("Occ %.3g\n", PP.m_fOcc);
|
||||||
|
Log("LL=%.3g LG=%.3g GL=%.3g GG=%.3g\n", PP.m_LL, PP.m_LG, PP.m_GL, PP.m_GG);
|
||||||
|
Log("Freqs ");
|
||||||
|
for (unsigned i = 0; i < 20; ++i)
|
||||||
|
if (PP.m_fcCounts[i] > 0)
|
||||||
|
Log("%c=%.3g ", LetterToChar(i), PP.m_fcCounts[i]);
|
||||||
|
Log("\n");
|
||||||
|
}
|
||||||
|
|
||||||
|
static void AssertProfPosEq(const ProfPos *PA, const ProfPos *PB, unsigned i)
|
||||||
|
{
|
||||||
|
const ProfPos &PPA = PA[i];
|
||||||
|
const ProfPos &PPB = PB[i];
|
||||||
|
#define eq(x) if (PPA.m_##x != PPB.m_##x) { LogPP(PPA); LogPP(PPB); Quit("AssertProfPosEq." #x); }
|
||||||
|
#define be(x) if (!BTEq(PPA.m_##x, PPB.m_##x)) { LogPP(PPA); LogPP(PPB); Quit("AssertProfPosEq." #x); }
|
||||||
|
eq(bAllGaps)
|
||||||
|
eq(uResidueGroup)
|
||||||
|
|
||||||
|
be(LL)
|
||||||
|
be(LG)
|
||||||
|
be(GL)
|
||||||
|
be(GG)
|
||||||
|
be(fOcc)
|
||||||
|
be(scoreGapOpen)
|
||||||
|
be(scoreGapClose)
|
||||||
|
|
||||||
|
for (unsigned j = 0; j < 20; ++j)
|
||||||
|
{
|
||||||
|
#define eqj(x) if (PPA.m_##x != PPB.m_##x) Quit("AssertProfPosEq j=%u " #x, j);
|
||||||
|
#define bej(x) if (!BTEq(PPA.m_##x, PPB.m_##x)) Quit("AssertProfPosEq j=%u " #x, j);
|
||||||
|
bej(fcCounts[j]);
|
||||||
|
// eqj(uSortOrder[j]) // may differ due to ties, don't check?
|
||||||
|
bej(AAScores[j])
|
||||||
|
#undef eqj
|
||||||
|
#undef bej
|
||||||
|
}
|
||||||
|
#undef eq
|
||||||
|
#undef be
|
||||||
|
}
|
||||||
|
|
||||||
|
void AssertProfsEq(const ProfPos *PA, unsigned uLengthA, const ProfPos *PB,
|
||||||
|
unsigned uLengthB)
|
||||||
|
{
|
||||||
|
if (uLengthA != uLengthB)
|
||||||
|
Quit("AssertProfsEq: lengths differ %u %u", uLengthA, uLengthB);
|
||||||
|
for (unsigned i = 0; i < uLengthB; ++i)
|
||||||
|
AssertProfPosEq(PA, PB, i);
|
||||||
|
}
|
||||||
|
|
||||||
|
#if DEBUG
|
||||||
|
static void ValidateProf(const ProfPos *Prof, unsigned uLength)
|
||||||
|
{
|
||||||
|
for (unsigned i = 0; i < uLength; ++i)
|
||||||
|
{
|
||||||
|
const ProfPos &PP = Prof[i];
|
||||||
|
|
||||||
|
FCOUNT s1 = PP.m_LL + PP.m_LG + PP.m_GL + PP.m_GG;
|
||||||
|
assert(BTEq(s1, 1.0));
|
||||||
|
|
||||||
|
if (i > 0)
|
||||||
|
{
|
||||||
|
const ProfPos &PPPrev = Prof[i-1];
|
||||||
|
FCOUNT s2 = PPPrev.m_LL + PPPrev.m_GL;
|
||||||
|
FCOUNT s3 = PP.m_LL + PP.m_LG;
|
||||||
|
assert(BTEq(s2, s3));
|
||||||
|
}
|
||||||
|
if (i < uLength - 1)
|
||||||
|
{
|
||||||
|
const ProfPos &PPNext = Prof[i+1];
|
||||||
|
FCOUNT s4 = PP.m_LL + PP.m_GL;
|
||||||
|
FCOUNT s5 = PPNext.m_LL + PPNext.m_LG;
|
||||||
|
assert(BTEq(s4, s5));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
#else
|
||||||
|
#define ValidateProf(Prof, Length) /* empty */
|
||||||
|
#endif
|
||||||
|
|
||||||
|
static void ScoresFromFreqsPos(ProfPos *Prof, unsigned uLength, unsigned uPos)
|
||||||
|
{
|
||||||
|
ProfPos &PP = Prof[uPos];
|
||||||
|
SortCounts(PP.m_fcCounts, PP.m_uSortOrder);
|
||||||
|
PP.m_uResidueGroup = ResidueGroupFromFCounts(PP.m_fcCounts);
|
||||||
|
|
||||||
|
// "Occupancy"
|
||||||
|
PP.m_fOcc = PP.m_LL + PP.m_GL;
|
||||||
|
|
||||||
|
// Frequency of gap-opens in this position (i)
|
||||||
|
// Gap open = letter in i-1 and gap in i
|
||||||
|
// = iff LG in i
|
||||||
|
FCOUNT fcOpen = PP.m_LG;
|
||||||
|
|
||||||
|
// Frequency of gap-closes in this position
|
||||||
|
// Gap close = gap in i and letter in i+1
|
||||||
|
// = iff GL in i+1
|
||||||
|
FCOUNT fcClose;
|
||||||
|
if (uPos + 1 < uLength)
|
||||||
|
fcClose = Prof[uPos + 1].m_GL;
|
||||||
|
else
|
||||||
|
fcClose = PP.m_GG + PP.m_LG;
|
||||||
|
|
||||||
|
PP.m_scoreGapOpen = (SCORE) ((1.0 - fcOpen)*g_scoreGapOpen/2.0);
|
||||||
|
PP.m_scoreGapClose = (SCORE) ((1.0 - fcClose)*g_scoreGapOpen/2.0);
|
||||||
|
#if DOUBLE_AFFINE
|
||||||
|
PP.m_scoreGapOpen2 = (SCORE) ((1.0 - fcOpen)*g_scoreGapOpen2/2.0);
|
||||||
|
PP.m_scoreGapClose2 = (SCORE) ((1.0 - fcClose)*g_scoreGapOpen2/2.0);
|
||||||
|
#endif
|
||||||
|
|
||||||
|
for (unsigned i = 0; i < g_AlphaSize; ++i)
|
||||||
|
{
|
||||||
|
SCORE scoreSum = 0;
|
||||||
|
for (unsigned j = 0; j < g_AlphaSize; ++j)
|
||||||
|
scoreSum += PP.m_fcCounts[j]*(*g_ptrScoreMatrix)[i][j];
|
||||||
|
PP.m_AAScores[i] = scoreSum;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
void ProfScoresFromFreqs(ProfPos *Prof, unsigned uLength)
|
||||||
|
{
|
||||||
|
for (unsigned i = 0; i < uLength; ++i)
|
||||||
|
ScoresFromFreqsPos(Prof, uLength, i);
|
||||||
|
}
|
||||||
|
|
||||||
|
static void AppendDelete(const MSA &msaA, unsigned &uColIndexA,
|
||||||
|
unsigned uSeqCountA, unsigned uSeqCountB, MSA &msaCombined,
|
||||||
|
unsigned &uColIndexCombined)
|
||||||
|
{
|
||||||
|
#if TRACE
|
||||||
|
Log("AppendDelete ColIxA=%u ColIxCmb=%u\n",
|
||||||
|
uColIndexA, uColIndexCombined);
|
||||||
|
#endif
|
||||||
|
for (unsigned uSeqIndexA = 0; uSeqIndexA < uSeqCountA; ++uSeqIndexA)
|
||||||
|
{
|
||||||
|
char c = msaA.GetChar(uSeqIndexA, uColIndexA);
|
||||||
|
msaCombined.SetChar(uSeqIndexA, uColIndexCombined, c);
|
||||||
|
}
|
||||||
|
|
||||||
|
for (unsigned uSeqIndexB = 0; uSeqIndexB < uSeqCountB; ++uSeqIndexB)
|
||||||
|
msaCombined.SetChar(uSeqCountA + uSeqIndexB, uColIndexCombined, '-');
|
||||||
|
|
||||||
|
++uColIndexCombined;
|
||||||
|
++uColIndexA;
|
||||||
|
}
|
||||||
|
|
||||||
|
static void AppendInsert(const MSA &msaB, unsigned &uColIndexB,
|
||||||
|
unsigned uSeqCountA, unsigned uSeqCountB, MSA &msaCombined,
|
||||||
|
unsigned &uColIndexCombined)
|
||||||
|
{
|
||||||
|
#if TRACE
|
||||||
|
Log("AppendInsert ColIxB=%u ColIxCmb=%u\n",
|
||||||
|
uColIndexB, uColIndexCombined);
|
||||||
|
#endif
|
||||||
|
for (unsigned uSeqIndexA = 0; uSeqIndexA < uSeqCountA; ++uSeqIndexA)
|
||||||
|
msaCombined.SetChar(uSeqIndexA, uColIndexCombined, '-');
|
||||||
|
|
||||||
|
for (unsigned uSeqIndexB = 0; uSeqIndexB < uSeqCountB; ++uSeqIndexB)
|
||||||
|
{
|
||||||
|
char c = msaB.GetChar(uSeqIndexB, uColIndexB);
|
||||||
|
msaCombined.SetChar(uSeqCountA + uSeqIndexB, uColIndexCombined, c);
|
||||||
|
}
|
||||||
|
|
||||||
|
++uColIndexCombined;
|
||||||
|
++uColIndexB;
|
||||||
|
}
|
||||||
|
|
||||||
|
static void AppendTplInserts(const MSA &msaA, unsigned &uColIndexA, unsigned uColCountA,
|
||||||
|
const MSA &msaB, unsigned &uColIndexB, unsigned uColCountB, unsigned uSeqCountA,
|
||||||
|
unsigned uSeqCountB, MSA &msaCombined, unsigned &uColIndexCombined)
|
||||||
|
{
|
||||||
|
#if TRACE
|
||||||
|
Log("AppendTplInserts ColIxA=%u ColIxB=%u ColIxCmb=%u\n",
|
||||||
|
uColIndexA, uColIndexB, uColIndexCombined);
|
||||||
|
#endif
|
||||||
|
const unsigned uLengthA = msaA.GetColCount();
|
||||||
|
const unsigned uLengthB = msaB.GetColCount();
|
||||||
|
|
||||||
|
unsigned uNewColCount = uColCountA;
|
||||||
|
if (uColCountB > uNewColCount)
|
||||||
|
uNewColCount = uColCountB;
|
||||||
|
|
||||||
|
for (unsigned n = 0; n < uColCountA; ++n)
|
||||||
|
{
|
||||||
|
for (unsigned uSeqIndexA = 0; uSeqIndexA < uSeqCountA; ++uSeqIndexA)
|
||||||
|
{
|
||||||
|
char c = msaA.GetChar(uSeqIndexA, uColIndexA + n);
|
||||||
|
c = UnalignChar(c);
|
||||||
|
msaCombined.SetChar(uSeqIndexA, uColIndexCombined + n, c);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
for (unsigned n = uColCountA; n < uNewColCount; ++n)
|
||||||
|
{
|
||||||
|
for (unsigned uSeqIndexA = 0; uSeqIndexA < uSeqCountA; ++uSeqIndexA)
|
||||||
|
msaCombined.SetChar(uSeqIndexA, uColIndexCombined + n, '.');
|
||||||
|
}
|
||||||
|
|
||||||
|
for (unsigned n = 0; n < uColCountB; ++n)
|
||||||
|
{
|
||||||
|
for (unsigned uSeqIndexB = 0; uSeqIndexB < uSeqCountB; ++uSeqIndexB)
|
||||||
|
{
|
||||||
|
char c = msaB.GetChar(uSeqIndexB, uColIndexB + n);
|
||||||
|
c = UnalignChar(c);
|
||||||
|
msaCombined.SetChar(uSeqCountA + uSeqIndexB, uColIndexCombined + n, c);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
for (unsigned n = uColCountB; n < uNewColCount; ++n)
|
||||||
|
{
|
||||||
|
for (unsigned uSeqIndexB = 0; uSeqIndexB < uSeqCountB; ++uSeqIndexB)
|
||||||
|
msaCombined.SetChar(uSeqCountA + uSeqIndexB, uColIndexCombined + n, '.');
|
||||||
|
}
|
||||||
|
|
||||||
|
uColIndexCombined += uNewColCount;
|
||||||
|
uColIndexA += uColCountA;
|
||||||
|
uColIndexB += uColCountB;
|
||||||
|
}
|
||||||
|
|
||||||
|
static void AppendMatch(const MSA &msaA, unsigned &uColIndexA, const MSA &msaB,
|
||||||
|
unsigned &uColIndexB, unsigned uSeqCountA, unsigned uSeqCountB,
|
||||||
|
MSA &msaCombined, unsigned &uColIndexCombined)
|
||||||
|
{
|
||||||
|
#if TRACE
|
||||||
|
Log("AppendMatch ColIxA=%u ColIxB=%u ColIxCmb=%u\n",
|
||||||
|
uColIndexA, uColIndexB, uColIndexCombined);
|
||||||
|
#endif
|
||||||
|
|
||||||
|
for (unsigned uSeqIndexA = 0; uSeqIndexA < uSeqCountA; ++uSeqIndexA)
|
||||||
|
{
|
||||||
|
char c = msaA.GetChar(uSeqIndexA, uColIndexA);
|
||||||
|
msaCombined.SetChar(uSeqIndexA, uColIndexCombined, c);
|
||||||
|
}
|
||||||
|
|
||||||
|
for (unsigned uSeqIndexB = 0; uSeqIndexB < uSeqCountB; ++uSeqIndexB)
|
||||||
|
{
|
||||||
|
char c = msaB.GetChar(uSeqIndexB, uColIndexB);
|
||||||
|
msaCombined.SetChar(uSeqCountA + uSeqIndexB, uColIndexCombined, c);
|
||||||
|
}
|
||||||
|
|
||||||
|
++uColIndexA;
|
||||||
|
++uColIndexB;
|
||||||
|
++uColIndexCombined;
|
||||||
|
}
|
||||||
|
|
||||||
|
void AlignTwoMSAsGivenPath(const PWPath &Path, const MSA &msaA, const MSA &msaB,
|
||||||
|
MSA &msaCombined)
|
||||||
|
{
|
||||||
|
msaCombined.Clear();
|
||||||
|
|
||||||
|
#if TRACE
|
||||||
|
Log("FastAlignProfiles\n");
|
||||||
|
Log("Template A:\n");
|
||||||
|
msaA.LogMe();
|
||||||
|
Log("Template B:\n");
|
||||||
|
msaB.LogMe();
|
||||||
|
#endif
|
||||||
|
|
||||||
|
const unsigned uColCountA = msaA.GetColCount();
|
||||||
|
const unsigned uColCountB = msaB.GetColCount();
|
||||||
|
|
||||||
|
const unsigned uSeqCountA = msaA.GetSeqCount();
|
||||||
|
const unsigned uSeqCountB = msaB.GetSeqCount();
|
||||||
|
|
||||||
|
msaCombined.SetSeqCount(uSeqCountA + uSeqCountB);
|
||||||
|
|
||||||
|
// Copy sequence names into combined MSA
|
||||||
|
for (unsigned uSeqIndexA = 0; uSeqIndexA < uSeqCountA; ++uSeqIndexA)
|
||||||
|
{
|
||||||
|
msaCombined.SetSeqName(uSeqIndexA, msaA.GetSeqName(uSeqIndexA));
|
||||||
|
msaCombined.SetSeqId(uSeqIndexA, msaA.GetSeqId(uSeqIndexA));
|
||||||
|
}
|
||||||
|
|
||||||
|
for (unsigned uSeqIndexB = 0; uSeqIndexB < uSeqCountB; ++uSeqIndexB)
|
||||||
|
{
|
||||||
|
msaCombined.SetSeqName(uSeqCountA + uSeqIndexB, msaB.GetSeqName(uSeqIndexB));
|
||||||
|
msaCombined.SetSeqId(uSeqCountA + uSeqIndexB, msaB.GetSeqId(uSeqIndexB));
|
||||||
|
}
|
||||||
|
|
||||||
|
unsigned uColIndexA = 0;
|
||||||
|
unsigned uColIndexB = 0;
|
||||||
|
unsigned uColIndexCombined = 0;
|
||||||
|
const unsigned uEdgeCount = Path.GetEdgeCount();
|
||||||
|
for (unsigned uEdgeIndex = 0; uEdgeIndex < uEdgeCount; ++uEdgeIndex)
|
||||||
|
{
|
||||||
|
const PWEdge &Edge = Path.GetEdge(uEdgeIndex);
|
||||||
|
#if TRACE
|
||||||
|
Log("\nEdge %u %c%u.%u\n",
|
||||||
|
uEdgeIndex,
|
||||||
|
Edge.cType,
|
||||||
|
Edge.uPrefixLengthA,
|
||||||
|
Edge.uPrefixLengthB);
|
||||||
|
#endif
|
||||||
|
const char cType = Edge.cType;
|
||||||
|
const unsigned uPrefixLengthA = Edge.uPrefixLengthA;
|
||||||
|
unsigned uColCountA = 0;
|
||||||
|
if (uPrefixLengthA > 0)
|
||||||
|
{
|
||||||
|
const unsigned uNodeIndexA = uPrefixLengthA - 1;
|
||||||
|
const unsigned uTplColIndexA = uNodeIndexA;
|
||||||
|
if (uTplColIndexA > uColIndexA)
|
||||||
|
uColCountA = uTplColIndexA - uColIndexA;
|
||||||
|
}
|
||||||
|
|
||||||
|
const unsigned uPrefixLengthB = Edge.uPrefixLengthB;
|
||||||
|
unsigned uColCountB = 0;
|
||||||
|
if (uPrefixLengthB > 0)
|
||||||
|
{
|
||||||
|
const unsigned uNodeIndexB = uPrefixLengthB - 1;
|
||||||
|
const unsigned uTplColIndexB = uNodeIndexB;
|
||||||
|
if (uTplColIndexB > uColIndexB)
|
||||||
|
uColCountB = uTplColIndexB - uColIndexB;
|
||||||
|
}
|
||||||
|
|
||||||
|
// TODO: This code looks like a hangover from HMM estimation -- can we delete it?
|
||||||
|
assert(uColCountA == 0);
|
||||||
|
assert(uColCountB == 0);
|
||||||
|
AppendTplInserts(msaA, uColIndexA, uColCountA, msaB, uColIndexB, uColCountB,
|
||||||
|
uSeqCountA, uSeqCountB, msaCombined, uColIndexCombined);
|
||||||
|
|
||||||
|
switch (cType)
|
||||||
|
{
|
||||||
|
case 'M':
|
||||||
|
{
|
||||||
|
assert(uPrefixLengthA > 0);
|
||||||
|
assert(uPrefixLengthB > 0);
|
||||||
|
const unsigned uColA = uPrefixLengthA - 1;
|
||||||
|
const unsigned uColB = uPrefixLengthB - 1;
|
||||||
|
assert(uColIndexA == uColA);
|
||||||
|
assert(uColIndexB == uColB);
|
||||||
|
AppendMatch(msaA, uColIndexA, msaB, uColIndexB, uSeqCountA, uSeqCountB,
|
||||||
|
msaCombined, uColIndexCombined);
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
case 'D':
|
||||||
|
{
|
||||||
|
assert(uPrefixLengthA > 0);
|
||||||
|
const unsigned uColA = uPrefixLengthA - 1;
|
||||||
|
assert(uColIndexA == uColA);
|
||||||
|
AppendDelete(msaA, uColIndexA, uSeqCountA, uSeqCountB, msaCombined, uColIndexCombined);
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
case 'I':
|
||||||
|
{
|
||||||
|
assert(uPrefixLengthB > 0);
|
||||||
|
const unsigned uColB = uPrefixLengthB - 1;
|
||||||
|
assert(uColIndexB == uColB);
|
||||||
|
AppendInsert(msaB, uColIndexB, uSeqCountA, uSeqCountB, msaCombined, uColIndexCombined);
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
default:
|
||||||
|
assert(false);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
unsigned uInsertColCountA = uColCountA - uColIndexA;
|
||||||
|
unsigned uInsertColCountB = uColCountB - uColIndexB;
|
||||||
|
|
||||||
|
// TODO: This code looks like a hangover from HMM estimation -- can we delete it?
|
||||||
|
assert(uInsertColCountA == 0);
|
||||||
|
assert(uInsertColCountB == 0);
|
||||||
|
AppendTplInserts(msaA, uColIndexA, uInsertColCountA, msaB, uColIndexB,
|
||||||
|
uInsertColCountB, uSeqCountA, uSeqCountB, msaCombined, uColIndexCombined);
|
||||||
|
|
||||||
|
assert(msaCombined.GetColCount() == uEdgeCount);
|
||||||
|
}
|
||||||
|
|
||||||
|
static const ProfPos PPStart =
|
||||||
|
{
|
||||||
|
false, //m_bAllGaps;
|
||||||
|
{ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }, // m_uSortOrder[21];
|
||||||
|
{ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }, // m_fcCounts[20];
|
||||||
|
1.0, // m_LL;
|
||||||
|
0.0, // m_LG;
|
||||||
|
0.0, // m_GL;
|
||||||
|
0.0, // m_GG;
|
||||||
|
{ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }, // m_ALScores
|
||||||
|
0, // m_uResidueGroup;
|
||||||
|
1.0, // m_fOcc;
|
||||||
|
0.0, // m_fcStartOcc;
|
||||||
|
0.0, // m_fcEndOcc;
|
||||||
|
0.0, // m_scoreGapOpen;
|
||||||
|
0.0, // m_scoreGapClose;
|
||||||
|
};
|
||||||
|
|
||||||
|
// MM
|
||||||
|
// Ai<41>1 Ai Out
|
||||||
|
// X X LL LL
|
||||||
|
// X - LG LG
|
||||||
|
// - X GL GL
|
||||||
|
// - - GG GG
|
||||||
|
//
|
||||||
|
// Bj<42>1 Bj
|
||||||
|
// X X LL LL
|
||||||
|
// X - LG LG
|
||||||
|
// - X GL GL
|
||||||
|
// - - GG GG
|
||||||
|
static void SetGapsMM(
|
||||||
|
const ProfPos *PA, unsigned uPrefixLengthA, WEIGHT wA,
|
||||||
|
const ProfPos *PB, unsigned uPrefixLengthB, WEIGHT wB,
|
||||||
|
ProfPos *POut, unsigned uColIndexOut)
|
||||||
|
{
|
||||||
|
const ProfPos &PPA = uPrefixLengthA > 0 ? PA[uPrefixLengthA-1] : PPStart;
|
||||||
|
const ProfPos &PPB = uPrefixLengthB > 0 ? PB[uPrefixLengthB-1] : PPStart;
|
||||||
|
ProfPos &PPO = POut[uColIndexOut];
|
||||||
|
|
||||||
|
PPO.m_LL = wA*PPA.m_LL + wB*PPB.m_LL;
|
||||||
|
PPO.m_LG = wA*PPA.m_LG + wB*PPB.m_LG;
|
||||||
|
PPO.m_GL = wA*PPA.m_GL + wB*PPB.m_GL;
|
||||||
|
PPO.m_GG = wA*PPA.m_GG + wB*PPB.m_GG;
|
||||||
|
}
|
||||||
|
|
||||||
|
// MD
|
||||||
|
// Ai<41>1 Ai Out
|
||||||
|
// X X LL LL
|
||||||
|
// X - LG LG
|
||||||
|
// - X GL GL
|
||||||
|
// - - GG GG
|
||||||
|
//
|
||||||
|
// Bj (-)
|
||||||
|
// X - ?L LG
|
||||||
|
// - - ?G GG
|
||||||
|
static void SetGapsMD(
|
||||||
|
const ProfPos *PA, unsigned uPrefixLengthA, WEIGHT wA,
|
||||||
|
const ProfPos *PB, unsigned uPrefixLengthB, WEIGHT wB,
|
||||||
|
ProfPos *POut, unsigned uColIndexOut)
|
||||||
|
{
|
||||||
|
const ProfPos &PPA = uPrefixLengthA > 0 ? PA[uPrefixLengthA-1] : PPStart;
|
||||||
|
const ProfPos &PPB = uPrefixLengthB > 0 ? PB[uPrefixLengthB-1] : PPStart;
|
||||||
|
ProfPos &PPO = POut[uColIndexOut];
|
||||||
|
|
||||||
|
PPO.m_LL = wA*PPA.m_LL;
|
||||||
|
PPO.m_LG = wA*PPA.m_LG + wB*(PPB.m_LL + PPB.m_GL);
|
||||||
|
PPO.m_GL = wA*PPA.m_GL;
|
||||||
|
PPO.m_GG = wA*PPA.m_GG + wB*(PPB.m_LG + PPB.m_GG);
|
||||||
|
}
|
||||||
|
|
||||||
|
// DD
|
||||||
|
// Ai<41>1 Ai Out
|
||||||
|
// X X LL LL
|
||||||
|
// X - LG LG
|
||||||
|
// - X GL GL
|
||||||
|
// - - GG GG
|
||||||
|
//
|
||||||
|
// (-) (-)
|
||||||
|
// - - ?? GG
|
||||||
|
static void SetGapsDD(
|
||||||
|
const ProfPos *PA, unsigned uPrefixLengthA, WEIGHT wA,
|
||||||
|
const ProfPos *PB, unsigned uPrefixLengthB, WEIGHT wB,
|
||||||
|
ProfPos *POut, unsigned uColIndexOut)
|
||||||
|
{
|
||||||
|
const ProfPos &PPA = uPrefixLengthA > 0 ? PA[uPrefixLengthA-1] : PPStart;
|
||||||
|
ProfPos &PPO = POut[uColIndexOut];
|
||||||
|
|
||||||
|
PPO.m_LL = wA*PPA.m_LL;
|
||||||
|
PPO.m_LG = wA*PPA.m_LG;
|
||||||
|
PPO.m_GL = wA*PPA.m_GL;
|
||||||
|
PPO.m_GG = wA*PPA.m_GG + wB;
|
||||||
|
}
|
||||||
|
|
||||||
|
// MI
|
||||||
|
// Ai (-) Out
|
||||||
|
// X - ?L LG
|
||||||
|
// - - ?G GG
|
||||||
|
|
||||||
|
// Bj<42>1 Bj
|
||||||
|
// X X LL LL
|
||||||
|
// X - LG LG
|
||||||
|
// - X GL GL
|
||||||
|
// - - GG GG
|
||||||
|
static void SetGapsMI(
|
||||||
|
const ProfPos *PA, unsigned uPrefixLengthA, WEIGHT wA,
|
||||||
|
const ProfPos *PB, unsigned uPrefixLengthB, WEIGHT wB,
|
||||||
|
ProfPos *POut, unsigned uColIndexOut)
|
||||||
|
{
|
||||||
|
const ProfPos &PPA = uPrefixLengthA > 0 ? PA[uPrefixLengthA-1] : PPStart;
|
||||||
|
const ProfPos &PPB = uPrefixLengthB > 0 ? PB[uPrefixLengthB-1] : PPStart;
|
||||||
|
ProfPos &PPO = POut[uColIndexOut];
|
||||||
|
|
||||||
|
PPO.m_LL = wB*PPB.m_LL;
|
||||||
|
PPO.m_LG = wB*PPB.m_LG + wA*(PPA.m_LL + PPA.m_GL);
|
||||||
|
PPO.m_GL = wB*PPB.m_GL;
|
||||||
|
PPO.m_GG = wB*PPB.m_GG + wA*(PPA.m_LG + PPA.m_GG);
|
||||||
|
}
|
||||||
|
|
||||||
|
// DM
|
||||||
|
// Ai<41>1 Ai Out
|
||||||
|
// X X LL LL
|
||||||
|
// X - LG LG
|
||||||
|
// - X GL GL
|
||||||
|
// - - GG GG
|
||||||
|
//
|
||||||
|
// (-) Bj
|
||||||
|
// - X ?L GL
|
||||||
|
// - - ?G GG
|
||||||
|
static void SetGapsDM(
|
||||||
|
const ProfPos *PA, unsigned uPrefixLengthA, WEIGHT wA,
|
||||||
|
const ProfPos *PB, unsigned uPrefixLengthB, WEIGHT wB,
|
||||||
|
ProfPos *POut, unsigned uColIndexOut)
|
||||||
|
{
|
||||||
|
const ProfPos &PPA = uPrefixLengthA > 0 ? PA[uPrefixLengthA-1] : PPStart;
|
||||||
|
const ProfPos &PPB = uPrefixLengthB > 0 ? PB[uPrefixLengthB-1] : PPStart;
|
||||||
|
ProfPos &PPO = POut[uColIndexOut];
|
||||||
|
|
||||||
|
PPO.m_LL = wA*PPA.m_LL;
|
||||||
|
PPO.m_LG = wA*PPA.m_LG;
|
||||||
|
PPO.m_GL = wA*PPA.m_GL + wB*(PPB.m_LL + PPB.m_GL);
|
||||||
|
PPO.m_GG = wA*PPA.m_GG + wB*(PPB.m_LG + PPB.m_GG);
|
||||||
|
}
|
||||||
|
|
||||||
|
// IM
|
||||||
|
// (-) Ai Out
|
||||||
|
// - X ?L GL
|
||||||
|
// - - ?G GG
|
||||||
|
|
||||||
|
// Bj<42>1 Bj
|
||||||
|
// X X LL LL
|
||||||
|
// X - LG LG
|
||||||
|
// - X GL GL
|
||||||
|
// - - GG GG
|
||||||
|
static void SetGapsIM(
|
||||||
|
const ProfPos *PA, unsigned uPrefixLengthA, WEIGHT wA,
|
||||||
|
const ProfPos *PB, unsigned uPrefixLengthB, WEIGHT wB,
|
||||||
|
ProfPos *POut, unsigned uColIndexOut)
|
||||||
|
{
|
||||||
|
const ProfPos &PPA = uPrefixLengthA > 0 ? PA[uPrefixLengthA-1] : PPStart;
|
||||||
|
const ProfPos &PPB = uPrefixLengthB > 0 ? PB[uPrefixLengthB-1] : PPStart;
|
||||||
|
ProfPos &PPO = POut[uColIndexOut];
|
||||||
|
|
||||||
|
PPO.m_LL = wB*PPB.m_LL;
|
||||||
|
PPO.m_LG = wB*PPB.m_LG;
|
||||||
|
PPO.m_GL = wB*PPB.m_GL + wA*(PPA.m_LL + PPA.m_GL);
|
||||||
|
PPO.m_GG = wB*PPB.m_GG + wA*(PPA.m_LG + PPA.m_GG);
|
||||||
|
}
|
||||||
|
|
||||||
|
// ID
|
||||||
|
// (-) Ai Out
|
||||||
|
// - X ?L GL
|
||||||
|
// - - ?G GG
|
||||||
|
|
||||||
|
// Bj (-)
|
||||||
|
// X - ?L LG
|
||||||
|
// - - ?G GG
|
||||||
|
static void SetGapsID(
|
||||||
|
const ProfPos *PA, unsigned uPrefixLengthA, WEIGHT wA,
|
||||||
|
const ProfPos *PB, unsigned uPrefixLengthB, WEIGHT wB,
|
||||||
|
ProfPos *POut, unsigned uColIndexOut)
|
||||||
|
{
|
||||||
|
const ProfPos &PPA = uPrefixLengthA > 0 ? PA[uPrefixLengthA-1] : PPStart;
|
||||||
|
const ProfPos &PPB = uPrefixLengthB > 0 ? PB[uPrefixLengthB-1] : PPStart;
|
||||||
|
ProfPos &PPO = POut[uColIndexOut];
|
||||||
|
|
||||||
|
PPO.m_LL = 0;
|
||||||
|
PPO.m_LG = wB*PPB.m_GL + wB*PPB.m_LL;
|
||||||
|
PPO.m_GL = wA*PPA.m_GL + wA*PPA.m_LL;
|
||||||
|
PPO.m_GG = wA*(PPA.m_LG + PPA.m_GG) + wB*(PPB.m_LG + PPB.m_GG);
|
||||||
|
}
|
||||||
|
|
||||||
|
// DI
|
||||||
|
// Ai (-) Out
|
||||||
|
// X - ?L LG
|
||||||
|
// - - ?G GG
|
||||||
|
|
||||||
|
// (-) Bj
|
||||||
|
// - X ?L GL
|
||||||
|
// - - ?G GG
|
||||||
|
static void SetGapsDI(
|
||||||
|
const ProfPos *PA, unsigned uPrefixLengthA, WEIGHT wA,
|
||||||
|
const ProfPos *PB, unsigned uPrefixLengthB, WEIGHT wB,
|
||||||
|
ProfPos *POut, unsigned uColIndexOut)
|
||||||
|
{
|
||||||
|
const ProfPos &PPA = uPrefixLengthA > 0 ? PA[uPrefixLengthA-1] : PPStart;
|
||||||
|
const ProfPos &PPB = uPrefixLengthB > 0 ? PB[uPrefixLengthB-1] : PPStart;
|
||||||
|
ProfPos &PPO = POut[uColIndexOut];
|
||||||
|
|
||||||
|
PPO.m_LL = 0;
|
||||||
|
PPO.m_LG = wA*PPA.m_GL + wA*PPA.m_LL;
|
||||||
|
PPO.m_GL = wB*PPB.m_GL + wB*PPB.m_LL;
|
||||||
|
PPO.m_GG = wA*(PPA.m_LG + PPA.m_GG) + wB*(PPB.m_LG + PPB.m_GG);
|
||||||
|
}
|
||||||
|
|
||||||
|
// II
|
||||||
|
// (-) (-) Out
|
||||||
|
// - - ?? GG
|
||||||
|
|
||||||
|
// Bj<42>1 Bj
|
||||||
|
// X X LL LL
|
||||||
|
// X - LG LG
|
||||||
|
// - X GL GL
|
||||||
|
// - - GG GG
|
||||||
|
static void SetGapsII(
|
||||||
|
const ProfPos *PA, unsigned uPrefixLengthA, WEIGHT wA,
|
||||||
|
const ProfPos *PB, unsigned uPrefixLengthB, WEIGHT wB,
|
||||||
|
ProfPos *POut, unsigned uColIndexOut)
|
||||||
|
{
|
||||||
|
const ProfPos &PPB = uPrefixLengthB > 0 ? PB[uPrefixLengthB-1] : PPStart;
|
||||||
|
ProfPos &PPO = POut[uColIndexOut];
|
||||||
|
|
||||||
|
PPO.m_LL = wB*PPB.m_LL;
|
||||||
|
PPO.m_LG = wB*PPB.m_LG;
|
||||||
|
PPO.m_GL = wB*PPB.m_GL;
|
||||||
|
PPO.m_GG = wB*PPB.m_GG + wA;
|
||||||
|
}
|
||||||
|
|
||||||
|
static void SetFreqs(
|
||||||
|
const ProfPos *PA, unsigned uPrefixLengthA, WEIGHT wA,
|
||||||
|
const ProfPos *PB, unsigned uPrefixLengthB, WEIGHT wB,
|
||||||
|
ProfPos *POut, unsigned uColIndexOut)
|
||||||
|
{
|
||||||
|
const ProfPos &PPA = uPrefixLengthA > 0 ? PA[uPrefixLengthA-1] : PPStart;
|
||||||
|
const ProfPos &PPB = uPrefixLengthB > 0 ? PB[uPrefixLengthB-1] : PPStart;
|
||||||
|
ProfPos &PPO = POut[uColIndexOut];
|
||||||
|
|
||||||
|
if (g_bNormalizeCounts)
|
||||||
|
{
|
||||||
|
const FCOUNT fA = PPA.m_fOcc*wA/(wA + wB);
|
||||||
|
const FCOUNT fB = PPB.m_fOcc*wB/(wA + wB);
|
||||||
|
FCOUNT fTotal = 0;
|
||||||
|
for (unsigned i = 0; i < 20; ++i)
|
||||||
|
{
|
||||||
|
const FCOUNT f = fA*PPA.m_fcCounts[i] + fB*PPB.m_fcCounts[i];
|
||||||
|
PPO.m_fcCounts[i] = f;
|
||||||
|
fTotal += f;
|
||||||
|
}
|
||||||
|
if (fTotal > 0)
|
||||||
|
for (unsigned i = 0; i < 20; ++i)
|
||||||
|
PPO.m_fcCounts[i] /= fTotal;
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
for (unsigned i = 0; i < 20; ++i)
|
||||||
|
PPO.m_fcCounts[i] = wA*PPA.m_fcCounts[i] + wB*PPB.m_fcCounts[i];
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
void AlignTwoProfsGivenPath(const PWPath &Path,
|
||||||
|
const ProfPos *PA, unsigned uPrefixLengthA, WEIGHT wA,
|
||||||
|
const ProfPos *PB, unsigned uPrefixLengthB, WEIGHT wB,
|
||||||
|
ProfPos **ptrPOut, unsigned *ptruLengthOut)
|
||||||
|
{
|
||||||
|
#if TRACE
|
||||||
|
Log("AlignTwoProfsGivenPath wA=%.3g wB=%.3g Path=\n", wA, wB);
|
||||||
|
Path.LogMe();
|
||||||
|
#endif
|
||||||
|
assert(BTEq(wA + wB, 1.0));
|
||||||
|
|
||||||
|
unsigned uColIndexA = 0;
|
||||||
|
unsigned uColIndexB = 0;
|
||||||
|
unsigned uColIndexOut = 0;
|
||||||
|
const unsigned uEdgeCount = Path.GetEdgeCount();
|
||||||
|
ProfPos *POut = new ProfPos[uEdgeCount];
|
||||||
|
char cPrevType = 'M';
|
||||||
|
for (unsigned uEdgeIndex = 0; uEdgeIndex < uEdgeCount; ++uEdgeIndex)
|
||||||
|
{
|
||||||
|
const PWEdge &Edge = Path.GetEdge(uEdgeIndex);
|
||||||
|
const char cType = Edge.cType;
|
||||||
|
|
||||||
|
const unsigned uPrefixLengthA = Edge.uPrefixLengthA;
|
||||||
|
const unsigned uPrefixLengthB = Edge.uPrefixLengthB;
|
||||||
|
|
||||||
|
#if TRACE
|
||||||
|
Log("\nEdge %u %c%u.%u ColA=%u ColB=%u\n",
|
||||||
|
uEdgeIndex,
|
||||||
|
Edge.cType,
|
||||||
|
Edge.uPrefixLengthA,
|
||||||
|
Edge.uPrefixLengthB,
|
||||||
|
uColIndexA,
|
||||||
|
uColIndexB);
|
||||||
|
#endif
|
||||||
|
|
||||||
|
POut[uColIndexOut].m_bAllGaps = false;
|
||||||
|
switch (cType)
|
||||||
|
{
|
||||||
|
case 'M':
|
||||||
|
{
|
||||||
|
assert(uPrefixLengthA > 0);
|
||||||
|
assert(uPrefixLengthB > 0);
|
||||||
|
SetFreqs(
|
||||||
|
PA, uPrefixLengthA, wA,
|
||||||
|
PB, uPrefixLengthB, wB,
|
||||||
|
POut, uColIndexOut);
|
||||||
|
switch (cPrevType)
|
||||||
|
{
|
||||||
|
case 'M':
|
||||||
|
SetGapsMM(
|
||||||
|
PA, uPrefixLengthA, wA,
|
||||||
|
PB, uPrefixLengthB, wB,
|
||||||
|
POut, uColIndexOut);
|
||||||
|
break;
|
||||||
|
case 'D':
|
||||||
|
SetGapsDM(
|
||||||
|
PA, uPrefixLengthA, wA,
|
||||||
|
PB, uPrefixLengthB, wB,
|
||||||
|
POut, uColIndexOut);
|
||||||
|
break;
|
||||||
|
case 'I':
|
||||||
|
SetGapsIM(
|
||||||
|
PA, uPrefixLengthA, wA,
|
||||||
|
PB, uPrefixLengthB, wB,
|
||||||
|
POut, uColIndexOut);
|
||||||
|
break;
|
||||||
|
default:
|
||||||
|
Quit("Bad cPrevType");
|
||||||
|
}
|
||||||
|
++uColIndexA;
|
||||||
|
++uColIndexB;
|
||||||
|
++uColIndexOut;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
case 'D':
|
||||||
|
{
|
||||||
|
assert(uPrefixLengthA > 0);
|
||||||
|
SetFreqs(
|
||||||
|
PA, uPrefixLengthA, wA,
|
||||||
|
PB, uPrefixLengthB, 0,
|
||||||
|
POut, uColIndexOut);
|
||||||
|
switch (cPrevType)
|
||||||
|
{
|
||||||
|
case 'M':
|
||||||
|
SetGapsMD(
|
||||||
|
PA, uPrefixLengthA, wA,
|
||||||
|
PB, uPrefixLengthB, wB,
|
||||||
|
POut, uColIndexOut);
|
||||||
|
break;
|
||||||
|
case 'D':
|
||||||
|
SetGapsDD(
|
||||||
|
PA, uPrefixLengthA, wA,
|
||||||
|
PB, uPrefixLengthB, wB,
|
||||||
|
POut, uColIndexOut);
|
||||||
|
break;
|
||||||
|
case 'I':
|
||||||
|
SetGapsID(
|
||||||
|
PA, uPrefixLengthA, wA,
|
||||||
|
PB, uPrefixLengthB, wB,
|
||||||
|
POut, uColIndexOut);
|
||||||
|
break;
|
||||||
|
default:
|
||||||
|
Quit("Bad cPrevType");
|
||||||
|
}
|
||||||
|
++uColIndexA;
|
||||||
|
++uColIndexOut;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
case 'I':
|
||||||
|
{
|
||||||
|
assert(uPrefixLengthB > 0);
|
||||||
|
SetFreqs(
|
||||||
|
PA, uPrefixLengthA, 0,
|
||||||
|
PB, uPrefixLengthB, wB,
|
||||||
|
POut, uColIndexOut);
|
||||||
|
switch (cPrevType)
|
||||||
|
{
|
||||||
|
case 'M':
|
||||||
|
SetGapsMI(
|
||||||
|
PA, uPrefixLengthA, wA,
|
||||||
|
PB, uPrefixLengthB, wB,
|
||||||
|
POut, uColIndexOut);
|
||||||
|
break;
|
||||||
|
case 'D':
|
||||||
|
SetGapsDI(
|
||||||
|
PA, uPrefixLengthA, wA,
|
||||||
|
PB, uPrefixLengthB, wB,
|
||||||
|
POut, uColIndexOut);
|
||||||
|
break;
|
||||||
|
case 'I':
|
||||||
|
SetGapsII(
|
||||||
|
PA, uPrefixLengthA, wA,
|
||||||
|
PB, uPrefixLengthB, wB,
|
||||||
|
POut, uColIndexOut);
|
||||||
|
break;
|
||||||
|
default:
|
||||||
|
Quit("Bad cPrevType");
|
||||||
|
}
|
||||||
|
++uColIndexB;
|
||||||
|
++uColIndexOut;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
default:
|
||||||
|
assert(false);
|
||||||
|
}
|
||||||
|
cPrevType = cType;
|
||||||
|
}
|
||||||
|
assert(uColIndexOut == uEdgeCount);
|
||||||
|
|
||||||
|
ProfScoresFromFreqs(POut, uEdgeCount);
|
||||||
|
ValidateProf(POut, uEdgeCount);
|
||||||
|
|
||||||
|
*ptrPOut = POut;
|
||||||
|
*ptruLengthOut = uEdgeCount;
|
||||||
|
|
||||||
|
#if TRACE
|
||||||
|
Log("AlignTwoProfsGivenPath:\n");
|
||||||
|
ListProfile(POut, uEdgeCount, 0);
|
||||||
|
#endif
|
||||||
|
}
|
237
src/muscle/muscle3.8.31/src/aligngivenpathsw.cpp
Normal file
237
src/muscle/muscle3.8.31/src/aligngivenpathsw.cpp
Normal file
@ -0,0 +1,237 @@
|
|||||||
|
#include "muscle.h"
|
||||||
|
#include "msa.h"
|
||||||
|
#include "pwpath.h"
|
||||||
|
#include "profile.h"
|
||||||
|
|
||||||
|
#define TRACE 0
|
||||||
|
|
||||||
|
static void AppendDelete(const MSA &msaA, unsigned &uColIndexA,
|
||||||
|
unsigned uSeqCountA, unsigned uSeqCountB, MSA &msaCombined,
|
||||||
|
unsigned &uColIndexCombined)
|
||||||
|
{
|
||||||
|
#if TRACE
|
||||||
|
Log("AppendDelete ColIxA=%u ColIxCmb=%u\n",
|
||||||
|
uColIndexA, uColIndexCombined);
|
||||||
|
#endif
|
||||||
|
for (unsigned uSeqIndexA = 0; uSeqIndexA < uSeqCountA; ++uSeqIndexA)
|
||||||
|
{
|
||||||
|
char c = msaA.GetChar(uSeqIndexA, uColIndexA);
|
||||||
|
msaCombined.SetChar(uSeqIndexA, uColIndexCombined, c);
|
||||||
|
}
|
||||||
|
|
||||||
|
for (unsigned uSeqIndexB = 0; uSeqIndexB < uSeqCountB; ++uSeqIndexB)
|
||||||
|
msaCombined.SetChar(uSeqCountA + uSeqIndexB, uColIndexCombined, '-');
|
||||||
|
|
||||||
|
++uColIndexCombined;
|
||||||
|
++uColIndexA;
|
||||||
|
}
|
||||||
|
|
||||||
|
static void AppendInsert(const MSA &msaB, unsigned &uColIndexB,
|
||||||
|
unsigned uSeqCountA, unsigned uSeqCountB, MSA &msaCombined,
|
||||||
|
unsigned &uColIndexCombined)
|
||||||
|
{
|
||||||
|
#if TRACE
|
||||||
|
Log("AppendInsert ColIxB=%u ColIxCmb=%u\n",
|
||||||
|
uColIndexB, uColIndexCombined);
|
||||||
|
#endif
|
||||||
|
for (unsigned uSeqIndexA = 0; uSeqIndexA < uSeqCountA; ++uSeqIndexA)
|
||||||
|
msaCombined.SetChar(uSeqIndexA, uColIndexCombined, '-');
|
||||||
|
|
||||||
|
for (unsigned uSeqIndexB = 0; uSeqIndexB < uSeqCountB; ++uSeqIndexB)
|
||||||
|
{
|
||||||
|
char c = msaB.GetChar(uSeqIndexB, uColIndexB);
|
||||||
|
msaCombined.SetChar(uSeqCountA + uSeqIndexB, uColIndexCombined, c);
|
||||||
|
}
|
||||||
|
|
||||||
|
++uColIndexCombined;
|
||||||
|
++uColIndexB;
|
||||||
|
}
|
||||||
|
|
||||||
|
static void AppendUnalignedTerminals(const MSA &msaA, unsigned &uColIndexA, unsigned uColCountA,
|
||||||
|
const MSA &msaB, unsigned &uColIndexB, unsigned uColCountB, unsigned uSeqCountA,
|
||||||
|
unsigned uSeqCountB, MSA &msaCombined, unsigned &uColIndexCombined)
|
||||||
|
{
|
||||||
|
#if TRACE
|
||||||
|
Log("AppendUnalignedTerminals ColIxA=%u ColIxB=%u ColIxCmb=%u\n",
|
||||||
|
uColIndexA, uColIndexB, uColIndexCombined);
|
||||||
|
#endif
|
||||||
|
const unsigned uLengthA = msaA.GetColCount();
|
||||||
|
const unsigned uLengthB = msaB.GetColCount();
|
||||||
|
|
||||||
|
unsigned uNewColCount = uColCountA;
|
||||||
|
if (uColCountB > uNewColCount)
|
||||||
|
uNewColCount = uColCountB;
|
||||||
|
|
||||||
|
for (unsigned n = 0; n < uColCountA; ++n)
|
||||||
|
{
|
||||||
|
for (unsigned uSeqIndexA = 0; uSeqIndexA < uSeqCountA; ++uSeqIndexA)
|
||||||
|
{
|
||||||
|
char c = msaA.GetChar(uSeqIndexA, uColIndexA + n);
|
||||||
|
c = UnalignChar(c);
|
||||||
|
msaCombined.SetChar(uSeqIndexA, uColIndexCombined + n, c);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
for (unsigned n = uColCountA; n < uNewColCount; ++n)
|
||||||
|
{
|
||||||
|
for (unsigned uSeqIndexA = 0; uSeqIndexA < uSeqCountA; ++uSeqIndexA)
|
||||||
|
msaCombined.SetChar(uSeqIndexA, uColIndexCombined + n, '.');
|
||||||
|
}
|
||||||
|
|
||||||
|
for (unsigned n = 0; n < uColCountB; ++n)
|
||||||
|
{
|
||||||
|
for (unsigned uSeqIndexB = 0; uSeqIndexB < uSeqCountB; ++uSeqIndexB)
|
||||||
|
{
|
||||||
|
char c = msaB.GetChar(uSeqIndexB, uColIndexB + n);
|
||||||
|
c = UnalignChar(c);
|
||||||
|
msaCombined.SetChar(uSeqCountA + uSeqIndexB, uColIndexCombined + n, c);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
for (unsigned n = uColCountB; n < uNewColCount; ++n)
|
||||||
|
{
|
||||||
|
for (unsigned uSeqIndexB = 0; uSeqIndexB < uSeqCountB; ++uSeqIndexB)
|
||||||
|
msaCombined.SetChar(uSeqCountA + uSeqIndexB, uColIndexCombined + n, '.');
|
||||||
|
}
|
||||||
|
|
||||||
|
uColIndexCombined += uNewColCount;
|
||||||
|
uColIndexA += uColCountA;
|
||||||
|
uColIndexB += uColCountB;
|
||||||
|
}
|
||||||
|
|
||||||
|
static void AppendMatch(const MSA &msaA, unsigned &uColIndexA, const MSA &msaB,
|
||||||
|
unsigned &uColIndexB, unsigned uSeqCountA, unsigned uSeqCountB,
|
||||||
|
MSA &msaCombined, unsigned &uColIndexCombined)
|
||||||
|
{
|
||||||
|
#if TRACE
|
||||||
|
Log("AppendMatch ColIxA=%u ColIxB=%u ColIxCmb=%u\n",
|
||||||
|
uColIndexA, uColIndexB, uColIndexCombined);
|
||||||
|
#endif
|
||||||
|
|
||||||
|
for (unsigned uSeqIndexA = 0; uSeqIndexA < uSeqCountA; ++uSeqIndexA)
|
||||||
|
{
|
||||||
|
char c = msaA.GetChar(uSeqIndexA, uColIndexA);
|
||||||
|
msaCombined.SetChar(uSeqIndexA, uColIndexCombined, c);
|
||||||
|
}
|
||||||
|
|
||||||
|
for (unsigned uSeqIndexB = 0; uSeqIndexB < uSeqCountB; ++uSeqIndexB)
|
||||||
|
{
|
||||||
|
char c = msaB.GetChar(uSeqIndexB, uColIndexB);
|
||||||
|
msaCombined.SetChar(uSeqCountA + uSeqIndexB, uColIndexCombined, c);
|
||||||
|
}
|
||||||
|
|
||||||
|
++uColIndexA;
|
||||||
|
++uColIndexB;
|
||||||
|
++uColIndexCombined;
|
||||||
|
}
|
||||||
|
|
||||||
|
void AlignTwoMSAsGivenPathSW(const PWPath &Path, const MSA &msaA, const MSA &msaB,
|
||||||
|
MSA &msaCombined)
|
||||||
|
{
|
||||||
|
msaCombined.Clear();
|
||||||
|
|
||||||
|
#if TRACE
|
||||||
|
Log("AlignTwoMSAsGivenPathSW\n");
|
||||||
|
Log("Template A:\n");
|
||||||
|
msaA.LogMe();
|
||||||
|
Log("Template B:\n");
|
||||||
|
msaB.LogMe();
|
||||||
|
#endif
|
||||||
|
|
||||||
|
const unsigned uColCountA = msaA.GetColCount();
|
||||||
|
const unsigned uColCountB = msaB.GetColCount();
|
||||||
|
|
||||||
|
const unsigned uSeqCountA = msaA.GetSeqCount();
|
||||||
|
const unsigned uSeqCountB = msaB.GetSeqCount();
|
||||||
|
|
||||||
|
msaCombined.SetSeqCount(uSeqCountA + uSeqCountB);
|
||||||
|
|
||||||
|
// Copy sequence names into combined MSA
|
||||||
|
for (unsigned uSeqIndexA = 0; uSeqIndexA < uSeqCountA; ++uSeqIndexA)
|
||||||
|
{
|
||||||
|
msaCombined.SetSeqName(uSeqIndexA, msaA.GetSeqName(uSeqIndexA));
|
||||||
|
msaCombined.SetSeqId(uSeqIndexA, msaA.GetSeqId(uSeqIndexA));
|
||||||
|
}
|
||||||
|
|
||||||
|
for (unsigned uSeqIndexB = 0; uSeqIndexB < uSeqCountB; ++uSeqIndexB)
|
||||||
|
{
|
||||||
|
msaCombined.SetSeqName(uSeqCountA + uSeqIndexB, msaB.GetSeqName(uSeqIndexB));
|
||||||
|
msaCombined.SetSeqId(uSeqCountA + uSeqIndexB, msaB.GetSeqId(uSeqIndexB));
|
||||||
|
}
|
||||||
|
|
||||||
|
unsigned uColIndexA = 0;
|
||||||
|
unsigned uColIndexB = 0;
|
||||||
|
unsigned uColIndexCombined = 0;
|
||||||
|
const unsigned uEdgeCount = Path.GetEdgeCount();
|
||||||
|
for (unsigned uEdgeIndex = 0; uEdgeIndex < uEdgeCount; ++uEdgeIndex)
|
||||||
|
{
|
||||||
|
const PWEdge &Edge = Path.GetEdge(uEdgeIndex);
|
||||||
|
#if TRACE
|
||||||
|
Log("\nEdge %u %c%u.%u\n",
|
||||||
|
uEdgeIndex,
|
||||||
|
Edge.cType,
|
||||||
|
Edge.uPrefixLengthA,
|
||||||
|
Edge.uPrefixLengthB);
|
||||||
|
#endif
|
||||||
|
const char cType = Edge.cType;
|
||||||
|
const unsigned uPrefixLengthA = Edge.uPrefixLengthA;
|
||||||
|
unsigned uColCountA = 0;
|
||||||
|
if (uPrefixLengthA > 0)
|
||||||
|
{
|
||||||
|
const unsigned uNodeIndexA = uPrefixLengthA - 1;
|
||||||
|
const unsigned uTplColIndexA = uNodeIndexA;
|
||||||
|
if (uTplColIndexA > uColIndexA)
|
||||||
|
uColCountA = uTplColIndexA - uColIndexA;
|
||||||
|
}
|
||||||
|
|
||||||
|
const unsigned uPrefixLengthB = Edge.uPrefixLengthB;
|
||||||
|
unsigned uColCountB = 0;
|
||||||
|
if (uPrefixLengthB > 0)
|
||||||
|
{
|
||||||
|
const unsigned uNodeIndexB = uPrefixLengthB - 1;
|
||||||
|
const unsigned uTplColIndexB = uNodeIndexB;
|
||||||
|
if (uTplColIndexB > uColIndexB)
|
||||||
|
uColCountB = uTplColIndexB - uColIndexB;
|
||||||
|
}
|
||||||
|
|
||||||
|
AppendUnalignedTerminals(msaA, uColIndexA, uColCountA, msaB, uColIndexB, uColCountB,
|
||||||
|
uSeqCountA, uSeqCountB, msaCombined, uColIndexCombined);
|
||||||
|
|
||||||
|
switch (cType)
|
||||||
|
{
|
||||||
|
case 'M':
|
||||||
|
{
|
||||||
|
assert(uPrefixLengthA > 0);
|
||||||
|
assert(uPrefixLengthB > 0);
|
||||||
|
const unsigned uColA = uPrefixLengthA - 1;
|
||||||
|
const unsigned uColB = uPrefixLengthB - 1;
|
||||||
|
assert(uColIndexA == uColA);
|
||||||
|
assert(uColIndexB == uColB);
|
||||||
|
AppendMatch(msaA, uColIndexA, msaB, uColIndexB, uSeqCountA, uSeqCountB,
|
||||||
|
msaCombined, uColIndexCombined);
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
case 'D':
|
||||||
|
{
|
||||||
|
assert(uPrefixLengthA > 0);
|
||||||
|
const unsigned uColA = uPrefixLengthA - 1;
|
||||||
|
assert(uColIndexA == uColA);
|
||||||
|
AppendDelete(msaA, uColIndexA, uSeqCountA, uSeqCountB, msaCombined, uColIndexCombined);
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
case 'I':
|
||||||
|
{
|
||||||
|
assert(uPrefixLengthB > 0);
|
||||||
|
const unsigned uColB = uPrefixLengthB - 1;
|
||||||
|
assert(uColIndexB == uColB);
|
||||||
|
AppendInsert(msaB, uColIndexB, uSeqCountA, uSeqCountB, msaCombined, uColIndexCombined);
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
default:
|
||||||
|
assert(false);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
unsigned uInsertColCountA = uColCountA - uColIndexA;
|
||||||
|
unsigned uInsertColCountB = uColCountB - uColIndexB;
|
||||||
|
|
||||||
|
AppendUnalignedTerminals(msaA, uColIndexA, uInsertColCountA, msaB, uColIndexB,
|
||||||
|
uInsertColCountB, uSeqCountA, uSeqCountB, msaCombined, uColIndexCombined);
|
||||||
|
}
|
41
src/muscle/muscle3.8.31/src/aligntwomsas.cpp
Normal file
41
src/muscle/muscle3.8.31/src/aligntwomsas.cpp
Normal file
@ -0,0 +1,41 @@
|
|||||||
|
#include "muscle.h"
|
||||||
|
#include "msa.h"
|
||||||
|
#include "profile.h"
|
||||||
|
#include "pwpath.h"
|
||||||
|
#include "textfile.h"
|
||||||
|
#include "timing.h"
|
||||||
|
|
||||||
|
SCORE AlignTwoMSAs(const MSA &msa1, const MSA &msa2, MSA &msaOut, PWPath &Path,
|
||||||
|
bool bLockLeft, bool bLockRight)
|
||||||
|
{
|
||||||
|
const unsigned uLengthA = msa1.GetColCount();
|
||||||
|
const unsigned uLengthB = msa2.GetColCount();
|
||||||
|
|
||||||
|
ProfPos *PA = ProfileFromMSA(msa1);
|
||||||
|
ProfPos *PB = ProfileFromMSA(msa2);
|
||||||
|
|
||||||
|
if (bLockLeft)
|
||||||
|
{
|
||||||
|
PA[0].m_scoreGapOpen = MINUS_INFINITY;
|
||||||
|
PB[0].m_scoreGapOpen = MINUS_INFINITY;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (bLockRight)
|
||||||
|
{
|
||||||
|
PA[uLengthA-1].m_scoreGapClose = MINUS_INFINITY;
|
||||||
|
PB[uLengthB-1].m_scoreGapClose = MINUS_INFINITY;
|
||||||
|
}
|
||||||
|
|
||||||
|
float r = (float) uLengthA/ (float) (uLengthB + 1); // +1 to prevent div 0
|
||||||
|
if (r < 1)
|
||||||
|
r = 1/r;
|
||||||
|
|
||||||
|
SCORE Score = GlobalAlign(PA, uLengthA, PB, uLengthB, Path);
|
||||||
|
|
||||||
|
AlignTwoMSAsGivenPath(Path, msa1, msa2, msaOut);
|
||||||
|
|
||||||
|
delete[] PA;
|
||||||
|
delete[] PB;
|
||||||
|
|
||||||
|
return Score;
|
||||||
|
}
|
31
src/muscle/muscle3.8.31/src/aligntwoprofs.cpp
Normal file
31
src/muscle/muscle3.8.31/src/aligntwoprofs.cpp
Normal file
@ -0,0 +1,31 @@
|
|||||||
|
#include "muscle.h"
|
||||||
|
#include "msa.h"
|
||||||
|
#include "profile.h"
|
||||||
|
#include "pwpath.h"
|
||||||
|
|
||||||
|
SCORE GlobalAlign4(ProfPos *PA, unsigned uLengthA, ProfPos *PB,
|
||||||
|
unsigned uLengthB, PWPath &Path);
|
||||||
|
|
||||||
|
SCORE AlignTwoProfs(
|
||||||
|
const ProfPos *PA, unsigned uLengthA, WEIGHT wA,
|
||||||
|
const ProfPos *PB, unsigned uLengthB, WEIGHT wB,
|
||||||
|
PWPath &Path, ProfPos **ptrPout, unsigned *ptruLengthOut)
|
||||||
|
{
|
||||||
|
assert(uLengthA < 100000);
|
||||||
|
assert(uLengthB < 100000);
|
||||||
|
|
||||||
|
float r = (float) uLengthA/ (float) (uLengthB + 1); // +1 to prevent div 0
|
||||||
|
if (r < 1)
|
||||||
|
r = 1/r;
|
||||||
|
|
||||||
|
SCORE Score = GlobalAlign(PA, uLengthA, PB, uLengthB, Path);
|
||||||
|
|
||||||
|
AlignTwoProfsGivenPath(Path, PA, uLengthB, wA/(wA + wB), PB, uLengthB, wB/(wA + wB),
|
||||||
|
ptrPout, ptruLengthOut);
|
||||||
|
|
||||||
|
#if HYDRO
|
||||||
|
if (ALPHA_Amino == g_Alpha)
|
||||||
|
Hydro(*ptrPout, *ptruLengthOut);
|
||||||
|
#endif
|
||||||
|
return Score;
|
||||||
|
}
|
170
src/muscle/muscle3.8.31/src/aln.cpp
Normal file
170
src/muscle/muscle3.8.31/src/aln.cpp
Normal file
@ -0,0 +1,170 @@
|
|||||||
|
#include "muscle.h"
|
||||||
|
#include <stdio.h>
|
||||||
|
#include <ctype.h>
|
||||||
|
#include "msa.h"
|
||||||
|
#include "textfile.h"
|
||||||
|
|
||||||
|
const unsigned uCharsPerLine = 60;
|
||||||
|
const int MIN_NAME = 10;
|
||||||
|
const int MAX_NAME = 32;
|
||||||
|
|
||||||
|
static char GetAlnConsensusChar(const MSA &a, unsigned uColIndex);
|
||||||
|
|
||||||
|
void MSA::ToAlnFile(TextFile &File) const
|
||||||
|
{
|
||||||
|
if (g_bClwStrict)
|
||||||
|
File.PutString("CLUSTAL W (1.81) multiple sequence alignment\n");
|
||||||
|
else
|
||||||
|
{
|
||||||
|
File.PutString("MUSCLE ("
|
||||||
|
SHORT_VERSION ")"
|
||||||
|
" multiple sequence alignment\n");
|
||||||
|
File.PutString("\n");
|
||||||
|
}
|
||||||
|
|
||||||
|
int iLongestNameLength = 0;
|
||||||
|
for (unsigned uSeqIndex = 0; uSeqIndex < GetSeqCount(); ++uSeqIndex)
|
||||||
|
{
|
||||||
|
const char *ptrName = GetSeqName(uSeqIndex);
|
||||||
|
const char *ptrBlank = strchr(ptrName, ' ');
|
||||||
|
int iLength;
|
||||||
|
if (0 != ptrBlank)
|
||||||
|
iLength = (int) (ptrBlank - ptrName);
|
||||||
|
else
|
||||||
|
iLength = (int) strlen(ptrName);
|
||||||
|
if (iLength > iLongestNameLength)
|
||||||
|
iLongestNameLength = iLength;
|
||||||
|
}
|
||||||
|
if (iLongestNameLength > MAX_NAME)
|
||||||
|
iLongestNameLength = MAX_NAME;
|
||||||
|
if (iLongestNameLength < MIN_NAME)
|
||||||
|
iLongestNameLength = MIN_NAME;
|
||||||
|
|
||||||
|
unsigned uLineCount = (GetColCount() - 1)/uCharsPerLine + 1;
|
||||||
|
for (unsigned uLineIndex = 0; uLineIndex < uLineCount; ++uLineIndex)
|
||||||
|
{
|
||||||
|
File.PutString("\n");
|
||||||
|
unsigned uStartColIndex = uLineIndex*uCharsPerLine;
|
||||||
|
unsigned uEndColIndex = uStartColIndex + uCharsPerLine - 1;
|
||||||
|
if (uEndColIndex >= GetColCount())
|
||||||
|
uEndColIndex = GetColCount() - 1;
|
||||||
|
char Name[MAX_NAME+1];
|
||||||
|
for (unsigned uSeqIndex = 0; uSeqIndex < GetSeqCount(); ++uSeqIndex)
|
||||||
|
{
|
||||||
|
const char *ptrName = GetSeqName(uSeqIndex);
|
||||||
|
const char *ptrBlank = strchr(ptrName, ' ');
|
||||||
|
int iLength;
|
||||||
|
if (0 != ptrBlank)
|
||||||
|
iLength = (int) (ptrBlank - ptrName);
|
||||||
|
else
|
||||||
|
iLength = (int) strlen(ptrName);
|
||||||
|
if (iLength > MAX_NAME)
|
||||||
|
iLength = MAX_NAME;
|
||||||
|
memset(Name, ' ', MAX_NAME);
|
||||||
|
memcpy(Name, ptrName, iLength);
|
||||||
|
Name[iLongestNameLength] = 0;
|
||||||
|
|
||||||
|
File.PutFormat("%s ", Name);
|
||||||
|
for (unsigned uColIndex = uStartColIndex; uColIndex <= uEndColIndex;
|
||||||
|
++uColIndex)
|
||||||
|
{
|
||||||
|
const char c = GetChar(uSeqIndex, uColIndex);
|
||||||
|
File.PutFormat("%c", toupper(c));
|
||||||
|
}
|
||||||
|
File.PutString("\n");
|
||||||
|
}
|
||||||
|
|
||||||
|
memset(Name, ' ', MAX_NAME);
|
||||||
|
Name[iLongestNameLength] = 0;
|
||||||
|
File.PutFormat("%s ", Name);
|
||||||
|
for (unsigned uColIndex = uStartColIndex; uColIndex <= uEndColIndex;
|
||||||
|
++uColIndex)
|
||||||
|
{
|
||||||
|
const char c = GetAlnConsensusChar(*this, uColIndex);
|
||||||
|
File.PutChar(c);
|
||||||
|
}
|
||||||
|
File.PutString("\n");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
static char GetAlnConsensusChar(const MSA &a, unsigned uColIndex)
|
||||||
|
{
|
||||||
|
const unsigned uSeqCount = a.GetSeqCount();
|
||||||
|
unsigned BitMap = 0;
|
||||||
|
unsigned Count = 0;
|
||||||
|
for (unsigned uSeqIndex = 0; uSeqIndex < uSeqCount; ++uSeqIndex)
|
||||||
|
{
|
||||||
|
unsigned uLetter = a.GetLetterEx(uSeqIndex, uColIndex);
|
||||||
|
assert(uLetter < 32);
|
||||||
|
unsigned Bit = (1 << uLetter);
|
||||||
|
if (!(BitMap & Bit))
|
||||||
|
++Count;
|
||||||
|
BitMap |= Bit;
|
||||||
|
}
|
||||||
|
|
||||||
|
// '*' indicates positions which have a single, fully conserved residue
|
||||||
|
if (1 == Count)
|
||||||
|
return '*';
|
||||||
|
|
||||||
|
if (ALPHA_Amino != g_Alpha)
|
||||||
|
return ' ';
|
||||||
|
|
||||||
|
#define B(a) (1 << AX_##a)
|
||||||
|
#define S2(a, b) S(B(a) | B(b))
|
||||||
|
#define S3(a, b, c) S(B(a) | B(b) | B(c))
|
||||||
|
#define S4(a, b, c, d) S(B(a) | B(b) | B(c) | B(d))
|
||||||
|
#define S(w) if (0 == (BitMap & ~(w)) && (BitMap & (w)) != 0) return ':';
|
||||||
|
|
||||||
|
#define W3(a, b, c) W(B(a) | B(b) | B(c))
|
||||||
|
#define W4(a, b, c, d) W(B(a) | B(b) | B(c) | B(d))
|
||||||
|
#define W5(a, b, c, d, e) W(B(a) | B(b) | B(c) | B(d) | B(e))
|
||||||
|
#define W6(a, b, c, d, e, f) W(B(a) | B(b) | B(c) | B(d) | B(e) | B(f))
|
||||||
|
#define W(w) if (0 == (BitMap & ~(w)) && (BitMap & (w)) != 0) return '.';
|
||||||
|
|
||||||
|
// ':' indicates that one of the following 'strong'
|
||||||
|
// groups is fully conserved
|
||||||
|
// STA
|
||||||
|
// NEQK
|
||||||
|
// NHQK
|
||||||
|
// NDEQ
|
||||||
|
// QHRK
|
||||||
|
// MILV
|
||||||
|
// MILF
|
||||||
|
// HY
|
||||||
|
// FYW
|
||||||
|
//
|
||||||
|
S3(S, T, A)
|
||||||
|
S4(N, E, Q, K)
|
||||||
|
S4(N, H, Q, K)
|
||||||
|
S4(N, D, E, Q)
|
||||||
|
S4(M, I, L, V)
|
||||||
|
S4(M, I, L, F)
|
||||||
|
S2(H, Y)
|
||||||
|
S3(F, Y, W)
|
||||||
|
|
||||||
|
// '.' indicates that one of the following 'weaker'
|
||||||
|
// groups is fully conserved
|
||||||
|
// CSA
|
||||||
|
// ATV
|
||||||
|
// SAG
|
||||||
|
// STNK
|
||||||
|
// STPA
|
||||||
|
// SGND
|
||||||
|
// SNDEQK
|
||||||
|
// NDEQHK
|
||||||
|
// NEQHRK
|
||||||
|
// FVLIM
|
||||||
|
// HFY
|
||||||
|
W3(C, S, A)
|
||||||
|
W3(A, T, V)
|
||||||
|
W3(S, A, G)
|
||||||
|
W4(S, T, N, K)
|
||||||
|
W4(S, T, P, A)
|
||||||
|
W4(S, G, N, D)
|
||||||
|
W6(S, N, D, E, Q, K)
|
||||||
|
W6(N, W, Q, H, R, K)
|
||||||
|
W5(F, V, L, I, M)
|
||||||
|
W3(H, F, Y)
|
||||||
|
|
||||||
|
return ' ';
|
||||||
|
}
|
283
src/muscle/muscle3.8.31/src/alpha.cpp
Normal file
283
src/muscle/muscle3.8.31/src/alpha.cpp
Normal file
@ -0,0 +1,283 @@
|
|||||||
|
#include "muscle.h"
|
||||||
|
#include <ctype.h>
|
||||||
|
|
||||||
|
/***
|
||||||
|
From Bioperl docs:
|
||||||
|
Extended DNA / RNA alphabet
|
||||||
|
------------------------------------------
|
||||||
|
Symbol Meaning Nucleic Acid
|
||||||
|
------------------------------------------
|
||||||
|
A A Adenine
|
||||||
|
C C Cytosine
|
||||||
|
G G Guanine
|
||||||
|
T T Thymine
|
||||||
|
U U Uracil
|
||||||
|
M A or C
|
||||||
|
R A or G
|
||||||
|
W A or T
|
||||||
|
S C or G
|
||||||
|
Y C or T
|
||||||
|
K G or T
|
||||||
|
V A or C or G
|
||||||
|
H A or C or T
|
||||||
|
D A or G or T
|
||||||
|
B C or G or T
|
||||||
|
X G or A or T or C
|
||||||
|
N G or A or T or C
|
||||||
|
|
||||||
|
IUPAC-IUB SYMBOLS FOR NUCLEOTIDE NOMENCLATURE:
|
||||||
|
Cornish-Bowden (1985) Nucl. Acids Res. 13: 3021-3030.
|
||||||
|
***/
|
||||||
|
|
||||||
|
unsigned g_CharToLetter[MAX_CHAR];
|
||||||
|
unsigned g_CharToLetterEx[MAX_CHAR];
|
||||||
|
|
||||||
|
char g_LetterToChar[MAX_ALPHA];
|
||||||
|
char g_LetterExToChar[MAX_ALPHA_EX];
|
||||||
|
|
||||||
|
char g_UnalignChar[MAX_CHAR];
|
||||||
|
char g_AlignChar[MAX_CHAR];
|
||||||
|
|
||||||
|
bool g_IsWildcardChar[MAX_CHAR];
|
||||||
|
bool g_IsResidueChar[MAX_CHAR];
|
||||||
|
|
||||||
|
ALPHA g_Alpha = ALPHA_Undefined;
|
||||||
|
unsigned g_AlphaSize = 0;
|
||||||
|
|
||||||
|
#define Res(c, Letter) \
|
||||||
|
{ \
|
||||||
|
const unsigned char Upper = (unsigned char) toupper(c); \
|
||||||
|
const unsigned char Lower = (unsigned char) tolower(c); \
|
||||||
|
g_CharToLetter[Upper] = Letter; \
|
||||||
|
g_CharToLetter[Lower] = Letter; \
|
||||||
|
g_CharToLetterEx[Upper] = Letter; \
|
||||||
|
g_CharToLetterEx[Lower] = Letter; \
|
||||||
|
g_LetterToChar[Letter] = Upper; \
|
||||||
|
g_LetterExToChar[Letter] = Upper; \
|
||||||
|
g_IsResidueChar[Upper] = true; \
|
||||||
|
g_IsResidueChar[Lower] = true; \
|
||||||
|
g_AlignChar[Upper] = Upper; \
|
||||||
|
g_AlignChar[Lower] = Upper; \
|
||||||
|
g_UnalignChar[Upper] = Lower; \
|
||||||
|
g_UnalignChar[Lower] = Lower; \
|
||||||
|
}
|
||||||
|
|
||||||
|
#define Wild(c, Letter) \
|
||||||
|
{ \
|
||||||
|
const unsigned char Upper = (unsigned char) toupper(c); \
|
||||||
|
const unsigned char Lower = (unsigned char) tolower(c); \
|
||||||
|
g_CharToLetterEx[Upper] = Letter; \
|
||||||
|
g_CharToLetterEx[Lower] = Letter; \
|
||||||
|
g_LetterExToChar[Letter] = Upper; \
|
||||||
|
g_IsResidueChar[Upper] = true; \
|
||||||
|
g_IsResidueChar[Lower] = true; \
|
||||||
|
g_AlignChar[Upper] = Upper; \
|
||||||
|
g_AlignChar[Lower] = Upper; \
|
||||||
|
g_UnalignChar[Upper] = Lower; \
|
||||||
|
g_UnalignChar[Lower] = Lower; \
|
||||||
|
g_IsWildcardChar[Lower] = true; \
|
||||||
|
g_IsWildcardChar[Upper] = true; \
|
||||||
|
}
|
||||||
|
|
||||||
|
static unsigned GetAlphaSize(ALPHA Alpha)
|
||||||
|
{
|
||||||
|
switch (Alpha)
|
||||||
|
{
|
||||||
|
case ALPHA_Amino:
|
||||||
|
return 20;
|
||||||
|
|
||||||
|
case ALPHA_RNA:
|
||||||
|
case ALPHA_DNA:
|
||||||
|
return 4;
|
||||||
|
}
|
||||||
|
Quit("Invalid Alpha=%d", Alpha);
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
static void InitArrays()
|
||||||
|
{
|
||||||
|
memset(g_CharToLetter, 0xff, sizeof(g_CharToLetter));
|
||||||
|
memset(g_CharToLetterEx, 0xff, sizeof(g_CharToLetterEx));
|
||||||
|
|
||||||
|
memset(g_LetterToChar, '?', sizeof(g_LetterToChar));
|
||||||
|
memset(g_LetterExToChar, '?', sizeof(g_LetterExToChar));
|
||||||
|
|
||||||
|
memset(g_AlignChar, '?', sizeof(g_UnalignChar));
|
||||||
|
memset(g_UnalignChar, '?', sizeof(g_UnalignChar));
|
||||||
|
|
||||||
|
memset(g_IsWildcardChar, 0, sizeof(g_IsWildcardChar));
|
||||||
|
}
|
||||||
|
|
||||||
|
static void SetGapChar(char c)
|
||||||
|
{
|
||||||
|
unsigned char u = (unsigned char) c;
|
||||||
|
|
||||||
|
g_CharToLetterEx[u] = AX_GAP;
|
||||||
|
g_LetterExToChar[AX_GAP] = u;
|
||||||
|
g_AlignChar[u] = u;
|
||||||
|
g_UnalignChar[u] = u;
|
||||||
|
}
|
||||||
|
|
||||||
|
static void SetAlphaDNA()
|
||||||
|
{
|
||||||
|
Res('A', NX_A)
|
||||||
|
Res('C', NX_C)
|
||||||
|
Res('G', NX_G)
|
||||||
|
Res('T', NX_T)
|
||||||
|
Wild('M', NX_M)
|
||||||
|
Wild('R', NX_R)
|
||||||
|
Wild('W', NX_W)
|
||||||
|
Wild('S', NX_S)
|
||||||
|
Wild('Y', NX_Y)
|
||||||
|
Wild('K', NX_K)
|
||||||
|
Wild('V', NX_V)
|
||||||
|
Wild('H', NX_H)
|
||||||
|
Wild('D', NX_D)
|
||||||
|
Wild('B', NX_B)
|
||||||
|
Wild('X', NX_X)
|
||||||
|
Wild('N', NX_N)
|
||||||
|
}
|
||||||
|
|
||||||
|
static void SetAlphaRNA()
|
||||||
|
{
|
||||||
|
Res('A', NX_A)
|
||||||
|
Res('C', NX_C)
|
||||||
|
Res('G', NX_G)
|
||||||
|
Res('U', NX_U)
|
||||||
|
Res('T', NX_T)
|
||||||
|
Wild('M', NX_M)
|
||||||
|
Wild('R', NX_R)
|
||||||
|
Wild('W', NX_W)
|
||||||
|
Wild('S', NX_S)
|
||||||
|
Wild('Y', NX_Y)
|
||||||
|
Wild('K', NX_K)
|
||||||
|
Wild('V', NX_V)
|
||||||
|
Wild('H', NX_H)
|
||||||
|
Wild('D', NX_D)
|
||||||
|
Wild('B', NX_B)
|
||||||
|
Wild('X', NX_X)
|
||||||
|
Wild('N', NX_N)
|
||||||
|
}
|
||||||
|
|
||||||
|
static void SetAlphaAmino()
|
||||||
|
{
|
||||||
|
Res('A', AX_A)
|
||||||
|
Res('C', AX_C)
|
||||||
|
Res('D', AX_D)
|
||||||
|
Res('E', AX_E)
|
||||||
|
Res('F', AX_F)
|
||||||
|
Res('G', AX_G)
|
||||||
|
Res('H', AX_H)
|
||||||
|
Res('I', AX_I)
|
||||||
|
Res('K', AX_K)
|
||||||
|
Res('L', AX_L)
|
||||||
|
Res('M', AX_M)
|
||||||
|
Res('N', AX_N)
|
||||||
|
Res('P', AX_P)
|
||||||
|
Res('Q', AX_Q)
|
||||||
|
Res('R', AX_R)
|
||||||
|
Res('S', AX_S)
|
||||||
|
Res('T', AX_T)
|
||||||
|
Res('V', AX_V)
|
||||||
|
Res('W', AX_W)
|
||||||
|
Res('Y', AX_Y)
|
||||||
|
|
||||||
|
Wild('B', AX_B)
|
||||||
|
Wild('X', AX_X)
|
||||||
|
Wild('Z', AX_Z)
|
||||||
|
}
|
||||||
|
|
||||||
|
void SetAlpha(ALPHA Alpha)
|
||||||
|
{
|
||||||
|
InitArrays();
|
||||||
|
|
||||||
|
SetGapChar('.');
|
||||||
|
SetGapChar('-');
|
||||||
|
|
||||||
|
switch (Alpha)
|
||||||
|
{
|
||||||
|
case ALPHA_Amino:
|
||||||
|
SetAlphaAmino();
|
||||||
|
break;
|
||||||
|
|
||||||
|
case ALPHA_DNA:
|
||||||
|
SetAlphaDNA();
|
||||||
|
|
||||||
|
case ALPHA_RNA:
|
||||||
|
SetAlphaRNA();
|
||||||
|
break;
|
||||||
|
|
||||||
|
default:
|
||||||
|
Quit("Invalid Alpha=%d", Alpha);
|
||||||
|
}
|
||||||
|
|
||||||
|
g_AlphaSize = GetAlphaSize(Alpha);
|
||||||
|
g_Alpha = Alpha;
|
||||||
|
|
||||||
|
if (g_bVerbose)
|
||||||
|
Log("Alphabet %s\n", ALPHAToStr(g_Alpha));
|
||||||
|
}
|
||||||
|
|
||||||
|
char GetWildcardChar()
|
||||||
|
{
|
||||||
|
switch (g_Alpha)
|
||||||
|
{
|
||||||
|
case ALPHA_Amino:
|
||||||
|
return 'X';
|
||||||
|
|
||||||
|
case ALPHA_DNA:
|
||||||
|
case ALPHA_RNA:
|
||||||
|
return 'N';
|
||||||
|
|
||||||
|
default:
|
||||||
|
Quit("Invalid Alpha=%d", g_Alpha);
|
||||||
|
}
|
||||||
|
return '?';
|
||||||
|
}
|
||||||
|
|
||||||
|
bool IsNucleo(char c)
|
||||||
|
{
|
||||||
|
return strchr("ACGTURYNacgturyn", c) != 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
bool IsDNA(char c)
|
||||||
|
{
|
||||||
|
return strchr("AGCTNagctn", c) != 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
bool IsRNA(char c)
|
||||||
|
{
|
||||||
|
return strchr("AGCUNagcun", c) != 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
static char InvalidLetters[256];
|
||||||
|
static int InvalidLetterCount = 0;
|
||||||
|
|
||||||
|
void ClearInvalidLetterWarning()
|
||||||
|
{
|
||||||
|
memset(InvalidLetters, 0, 256);
|
||||||
|
}
|
||||||
|
|
||||||
|
void InvalidLetterWarning(char c, char w)
|
||||||
|
{
|
||||||
|
InvalidLetters[(unsigned char) c] = 1;
|
||||||
|
++InvalidLetterCount;
|
||||||
|
}
|
||||||
|
|
||||||
|
void ReportInvalidLetters()
|
||||||
|
{
|
||||||
|
if (0 == InvalidLetterCount)
|
||||||
|
return;
|
||||||
|
|
||||||
|
char Str[257];
|
||||||
|
memset(Str, 0, 257);
|
||||||
|
|
||||||
|
int n = 0;
|
||||||
|
for (int i = 0; i < 256; ++i)
|
||||||
|
{
|
||||||
|
if (InvalidLetters[i])
|
||||||
|
Str[n++] = (char) i;
|
||||||
|
}
|
||||||
|
Warning("Assuming %s (see -seqtype option), invalid letters found: %s",
|
||||||
|
ALPHAToStr(g_Alpha), Str);
|
||||||
|
}
|
106
src/muscle/muscle3.8.31/src/alpha.h
Normal file
106
src/muscle/muscle3.8.31/src/alpha.h
Normal file
@ -0,0 +1,106 @@
|
|||||||
|
#ifndef alpha_h
|
||||||
|
#define alpha_h
|
||||||
|
|
||||||
|
bool StrHasAmino(const char *Str);
|
||||||
|
bool StrHasGap(const char *Str);
|
||||||
|
void ClearInvalidLetterWarning();
|
||||||
|
void InvalidLetterWarning(char c, char w);
|
||||||
|
void ReportInvalidLetters();
|
||||||
|
|
||||||
|
extern unsigned g_CharToLetter[];
|
||||||
|
extern unsigned g_CharToLetterEx[];
|
||||||
|
|
||||||
|
extern char g_LetterToChar[];
|
||||||
|
extern char g_LetterExToChar[];
|
||||||
|
|
||||||
|
extern char g_UnalignChar[];
|
||||||
|
extern char g_AlignChar[];
|
||||||
|
|
||||||
|
extern bool g_IsWildcardChar[];
|
||||||
|
extern bool g_IsResidueChar[];
|
||||||
|
|
||||||
|
#define CharToLetter(c) (g_CharToLetter[(unsigned char) (c)])
|
||||||
|
#define CharToLetterEx(c) (g_CharToLetterEx[(unsigned char) (c)])
|
||||||
|
|
||||||
|
#define LetterToChar(u) (g_LetterToChar[u])
|
||||||
|
#define LetterExToChar(u) (g_LetterExToChar[u])
|
||||||
|
|
||||||
|
#define IsResidueChar(c) (g_IsResidueChar[(unsigned char) (c)])
|
||||||
|
#define IsGapChar(c) ('-' == (c) || '.' == (c))
|
||||||
|
#define IsWildcardChar(c) (g_IsWildcardChar[(unsigned char) (c)])
|
||||||
|
|
||||||
|
#define AlignChar(c) (g_AlignChar[(unsigned char) (c)])
|
||||||
|
#define UnalignChar(c) (g_UnalignChar[(unsigned char) (c)])
|
||||||
|
|
||||||
|
// AX=Amino alphabet with eXtensions (B, Z and X)
|
||||||
|
enum AX
|
||||||
|
{
|
||||||
|
AX_A,
|
||||||
|
AX_C,
|
||||||
|
AX_D,
|
||||||
|
AX_E,
|
||||||
|
AX_F,
|
||||||
|
AX_G,
|
||||||
|
AX_H,
|
||||||
|
AX_I,
|
||||||
|
AX_K,
|
||||||
|
AX_L,
|
||||||
|
AX_M,
|
||||||
|
AX_N,
|
||||||
|
AX_P,
|
||||||
|
AX_Q,
|
||||||
|
AX_R,
|
||||||
|
AX_S,
|
||||||
|
AX_T,
|
||||||
|
AX_V,
|
||||||
|
AX_W,
|
||||||
|
AX_Y,
|
||||||
|
|
||||||
|
AX_X, // Any
|
||||||
|
|
||||||
|
AX_B, // D or N
|
||||||
|
AX_Z, // E or Q
|
||||||
|
|
||||||
|
AX_GAP,
|
||||||
|
};
|
||||||
|
const unsigned AX_COUNT = AX_GAP + 1;
|
||||||
|
|
||||||
|
// NX=Nucleotide alphabet with extensions
|
||||||
|
enum NX
|
||||||
|
{
|
||||||
|
NX_A,
|
||||||
|
NX_C,
|
||||||
|
NX_G,
|
||||||
|
NX_T,
|
||||||
|
NX_U = NX_T,
|
||||||
|
|
||||||
|
NX_M, // AC
|
||||||
|
NX_R, // AG
|
||||||
|
NX_W, // AT
|
||||||
|
NX_S, // CG
|
||||||
|
NX_Y, // CT
|
||||||
|
NX_K, // GT
|
||||||
|
NX_V, // ACG
|
||||||
|
NX_H, // ACT
|
||||||
|
NX_D, // AGT
|
||||||
|
NX_B, // CGT
|
||||||
|
NX_X, // GATC
|
||||||
|
NX_N, // GATC
|
||||||
|
NX_GAP
|
||||||
|
};
|
||||||
|
const unsigned NX_COUNT = NX_GAP + 1;
|
||||||
|
|
||||||
|
const unsigned MAX_ALPHA = 20;
|
||||||
|
const unsigned MAX_ALPHA_EX = AX_COUNT;
|
||||||
|
const unsigned MAX_CHAR = 256;
|
||||||
|
|
||||||
|
extern ALPHA g_Alpha;
|
||||||
|
extern unsigned g_AlphaSize;
|
||||||
|
|
||||||
|
void SetAlpha(ALPHA Alpha);
|
||||||
|
char GetWildcardChar();
|
||||||
|
bool IsNucleo(char c);
|
||||||
|
bool IsDNA(char c);
|
||||||
|
bool IsRNA(char c);
|
||||||
|
|
||||||
|
#endif // alpha_h
|
218
src/muscle/muscle3.8.31/src/anchors.cpp
Normal file
218
src/muscle/muscle3.8.31/src/anchors.cpp
Normal file
@ -0,0 +1,218 @@
|
|||||||
|
#include "muscle.h"
|
||||||
|
#include "msa.h"
|
||||||
|
#include "objscore.h"
|
||||||
|
|
||||||
|
#define TRACE 0
|
||||||
|
|
||||||
|
static void WindowSmooth(const SCORE Score[], unsigned uCount, unsigned uWindowLength,
|
||||||
|
SCORE SmoothScore[], double dCeil)
|
||||||
|
{
|
||||||
|
#define Ceil(x) ((SCORE) ((x) > dCeil ? dCeil : (x)))
|
||||||
|
|
||||||
|
if (1 != uWindowLength%2)
|
||||||
|
Quit("WindowSmooth=%u must be odd", uWindowLength);
|
||||||
|
|
||||||
|
if (uCount <= uWindowLength)
|
||||||
|
{
|
||||||
|
for (unsigned i = 0; i < uCount; ++i)
|
||||||
|
SmoothScore[i] = 0;
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
const unsigned w2 = uWindowLength/2;
|
||||||
|
for (unsigned i = 0; i < w2; ++i)
|
||||||
|
{
|
||||||
|
SmoothScore[i] = 0;
|
||||||
|
SmoothScore[uCount - i - 1] = 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
SCORE scoreWindowTotal = 0;
|
||||||
|
for (unsigned i = 0; i < uWindowLength; ++i)
|
||||||
|
{
|
||||||
|
scoreWindowTotal += Ceil(Score[i]);
|
||||||
|
}
|
||||||
|
|
||||||
|
for (unsigned i = w2; ; ++i)
|
||||||
|
{
|
||||||
|
SmoothScore[i] = scoreWindowTotal/uWindowLength;
|
||||||
|
if (i == uCount - w2 - 1)
|
||||||
|
break;
|
||||||
|
|
||||||
|
scoreWindowTotal -= Ceil(Score[i - w2]);
|
||||||
|
scoreWindowTotal += Ceil(Score[i + w2 + 1]);
|
||||||
|
}
|
||||||
|
#undef Ceil
|
||||||
|
}
|
||||||
|
|
||||||
|
// Find columns that score above the given threshold.
|
||||||
|
// A range of scores is defined between the average
|
||||||
|
// and the maximum. The threshold is a fraction 0.0 .. 1.0
|
||||||
|
// within that range, where 0.0 is the average score
|
||||||
|
// and 1.0 is the maximum score.
|
||||||
|
// "Grade" is by analogy with grading on a curve.
|
||||||
|
static void FindBestColsGrade(const SCORE Score[], unsigned uCount,
|
||||||
|
double dThreshold, unsigned BestCols[], unsigned *ptruBestColCount)
|
||||||
|
{
|
||||||
|
SCORE scoreTotal = 0;
|
||||||
|
for (unsigned uIndex = 0; uIndex < uCount; ++uIndex)
|
||||||
|
scoreTotal += Score[uIndex];
|
||||||
|
const SCORE scoreAvg = scoreTotal / uCount;
|
||||||
|
|
||||||
|
SCORE scoreMax = MINUS_INFINITY;
|
||||||
|
for (unsigned uIndex = 0; uIndex < uCount; ++uIndex)
|
||||||
|
if (Score[uIndex] > scoreMax)
|
||||||
|
scoreMax = Score[uIndex];
|
||||||
|
|
||||||
|
unsigned uBestColCount = 0;
|
||||||
|
for (unsigned uIndex = 0; uIndex < uCount; ++uIndex)
|
||||||
|
{
|
||||||
|
const SCORE s = Score[uIndex];
|
||||||
|
const double dHeight = (s - scoreAvg)/(scoreMax - scoreAvg);
|
||||||
|
if (dHeight >= dThreshold)
|
||||||
|
{
|
||||||
|
BestCols[uBestColCount] = uIndex;
|
||||||
|
++uBestColCount;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
*ptruBestColCount = uBestColCount;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Best col only if all following criteria satisfied:
|
||||||
|
// (1) Score >= min
|
||||||
|
// (2) Smoothed score >= min
|
||||||
|
// (3) No gaps.
|
||||||
|
static void FindBestColsCombo(const MSA &msa, const SCORE Score[],
|
||||||
|
const SCORE SmoothScore[], double dMinScore, double dMinSmoothScore,
|
||||||
|
unsigned BestCols[], unsigned *ptruBestColCount)
|
||||||
|
{
|
||||||
|
const unsigned uColCount = msa.GetColCount();
|
||||||
|
|
||||||
|
unsigned uBestColCount = 0;
|
||||||
|
for (unsigned uIndex = 0; uIndex < uColCount; ++uIndex)
|
||||||
|
{
|
||||||
|
if (Score[uIndex] < dMinScore)
|
||||||
|
continue;
|
||||||
|
if (SmoothScore[uIndex] < dMinSmoothScore)
|
||||||
|
continue;
|
||||||
|
if (msa.ColumnHasGap(uIndex))
|
||||||
|
continue;
|
||||||
|
BestCols[uBestColCount] = uIndex;
|
||||||
|
++uBestColCount;
|
||||||
|
}
|
||||||
|
*ptruBestColCount = uBestColCount;
|
||||||
|
}
|
||||||
|
|
||||||
|
static void ListBestCols(const MSA &msa, const SCORE Score[], const SCORE SmoothScore[],
|
||||||
|
unsigned BestCols[], unsigned uBestColCount)
|
||||||
|
{
|
||||||
|
const unsigned uColCount = msa.GetColCount();
|
||||||
|
const unsigned uSeqCount = msa.GetSeqCount();
|
||||||
|
|
||||||
|
Log("Col ");
|
||||||
|
for (unsigned uSeqIndex = 0; uSeqIndex < uSeqCount; ++uSeqIndex)
|
||||||
|
Log("%u", uSeqIndex%10);
|
||||||
|
Log(" ");
|
||||||
|
|
||||||
|
for (unsigned uColIndex = 0; uColIndex < uColCount; ++uColIndex)
|
||||||
|
{
|
||||||
|
Log("%3u ", uColIndex);
|
||||||
|
for (unsigned uSeqIndex = 0; uSeqIndex < uSeqCount; ++uSeqIndex)
|
||||||
|
Log("%c", msa.GetChar(uSeqIndex, uColIndex));
|
||||||
|
|
||||||
|
Log(" %10.3f", Score[uColIndex]);
|
||||||
|
Log(" %10.3f", SmoothScore[uColIndex]);
|
||||||
|
|
||||||
|
for (unsigned i = 0; i < uBestColCount; ++i)
|
||||||
|
if (BestCols[i] == uColIndex)
|
||||||
|
Log(" <-- Best");
|
||||||
|
Log("\n");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// If two best columns are found within a window, choose
|
||||||
|
// the highest-scoring. If more than two, choose the one
|
||||||
|
// closest to the center of the window.
|
||||||
|
static void MergeBestCols(const SCORE Scores[], const unsigned BestCols[],
|
||||||
|
unsigned uBestColCount, unsigned uWindowLength, unsigned AnchorCols[],
|
||||||
|
unsigned *ptruAnchorColCount)
|
||||||
|
{
|
||||||
|
unsigned uAnchorColCount = 0;
|
||||||
|
for (unsigned n = 0; n < uBestColCount; /* update inside loop */)
|
||||||
|
{
|
||||||
|
unsigned uBestColIndex = BestCols[n];
|
||||||
|
unsigned uCountWithinWindow = 0;
|
||||||
|
for (unsigned i = n + 1; i < uBestColCount; ++i)
|
||||||
|
{
|
||||||
|
unsigned uBestColIndex2 = BestCols[i];
|
||||||
|
if (uBestColIndex2 - uBestColIndex >= uWindowLength)
|
||||||
|
break;
|
||||||
|
++uCountWithinWindow;
|
||||||
|
}
|
||||||
|
unsigned uAnchorCol = uBestColIndex;
|
||||||
|
if (1 == uCountWithinWindow)
|
||||||
|
{
|
||||||
|
unsigned uBestColIndex2 = BestCols[n+1];
|
||||||
|
if (Scores[uBestColIndex] > Scores[uBestColIndex2])
|
||||||
|
uAnchorCol = uBestColIndex;
|
||||||
|
else
|
||||||
|
uAnchorCol = uBestColIndex2;
|
||||||
|
}
|
||||||
|
else if (uCountWithinWindow > 1)
|
||||||
|
{
|
||||||
|
unsigned uWindowCenter = uBestColIndex + uWindowLength/2;
|
||||||
|
int iClosestDist = uWindowLength;
|
||||||
|
unsigned uClosestCol = uBestColIndex;
|
||||||
|
for (unsigned i = n + 1; i < n + uCountWithinWindow; ++i)
|
||||||
|
{
|
||||||
|
unsigned uColIndex = BestCols[i];
|
||||||
|
int iDist = uColIndex - uBestColIndex;
|
||||||
|
if (iDist < 0)
|
||||||
|
iDist = -iDist;
|
||||||
|
if (iDist < iClosestDist)
|
||||||
|
{
|
||||||
|
uClosestCol = uColIndex;
|
||||||
|
iClosestDist = iDist;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
uAnchorCol = uClosestCol;
|
||||||
|
}
|
||||||
|
AnchorCols[uAnchorColCount] = uAnchorCol;
|
||||||
|
++uAnchorColCount;
|
||||||
|
n += uCountWithinWindow + 1;
|
||||||
|
}
|
||||||
|
*ptruAnchorColCount = uAnchorColCount;
|
||||||
|
}
|
||||||
|
|
||||||
|
void FindAnchorCols(const MSA &msa, unsigned AnchorCols[],
|
||||||
|
unsigned *ptruAnchorColCount)
|
||||||
|
{
|
||||||
|
const unsigned uColCount = msa.GetColCount();
|
||||||
|
if (uColCount < 16)
|
||||||
|
{
|
||||||
|
*ptruAnchorColCount = 0;
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
SCORE *MatchScore = new SCORE[uColCount];
|
||||||
|
SCORE *SmoothScore = new SCORE[uColCount];
|
||||||
|
unsigned *BestCols = new unsigned[uColCount];
|
||||||
|
|
||||||
|
GetLetterScores(msa, MatchScore);
|
||||||
|
WindowSmooth(MatchScore, uColCount, g_uSmoothWindowLength, SmoothScore,
|
||||||
|
g_dSmoothScoreCeil);
|
||||||
|
|
||||||
|
unsigned uBestColCount;
|
||||||
|
FindBestColsCombo(msa, MatchScore, SmoothScore, g_dMinBestColScore, g_dMinSmoothScore,
|
||||||
|
BestCols, &uBestColCount);
|
||||||
|
|
||||||
|
#if TRACE
|
||||||
|
ListBestCols(msa, MatchScore, SmoothScore, BestCols, uBestColCount);
|
||||||
|
#endif
|
||||||
|
|
||||||
|
MergeBestCols(MatchScore, BestCols, uBestColCount, g_uAnchorSpacing, AnchorCols,
|
||||||
|
ptruAnchorColCount);
|
||||||
|
|
||||||
|
delete[] MatchScore;
|
||||||
|
delete[] SmoothScore;
|
||||||
|
delete[] BestCols;
|
||||||
|
}
|
206
src/muscle/muscle3.8.31/src/bittraceback.cpp
Normal file
206
src/muscle/muscle3.8.31/src/bittraceback.cpp
Normal file
@ -0,0 +1,206 @@
|
|||||||
|
#include "muscle.h"
|
||||||
|
#include "pwpath.h"
|
||||||
|
|
||||||
|
#define TRACE 0
|
||||||
|
|
||||||
|
static char XlatEdgeType(char c)
|
||||||
|
{
|
||||||
|
if ('E' == c)
|
||||||
|
return 'D';
|
||||||
|
if ('J' == c)
|
||||||
|
return 'I';
|
||||||
|
return c;
|
||||||
|
}
|
||||||
|
|
||||||
|
static const char *BitsToStr(char Bits)
|
||||||
|
{
|
||||||
|
static char Str[] = "xM xD xI";
|
||||||
|
|
||||||
|
switch (Bits & BIT_xM)
|
||||||
|
{
|
||||||
|
case BIT_MM:
|
||||||
|
Str[0] = 'M';
|
||||||
|
break;
|
||||||
|
case BIT_DM:
|
||||||
|
Str[0] = 'D';
|
||||||
|
break;
|
||||||
|
case BIT_IM:
|
||||||
|
Str[0] = 'I';
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
switch (Bits & BIT_xD)
|
||||||
|
{
|
||||||
|
case BIT_MD:
|
||||||
|
Str[3] = 'M';
|
||||||
|
break;
|
||||||
|
case BIT_DD:
|
||||||
|
Str[3] = 'D';
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
switch (Bits & BIT_xI)
|
||||||
|
{
|
||||||
|
case BIT_MI:
|
||||||
|
Str[6] = 'M';
|
||||||
|
break;
|
||||||
|
case BIT_II:
|
||||||
|
Str[6] = 'I';
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
return Str;
|
||||||
|
}
|
||||||
|
|
||||||
|
static inline char XChar(char Bits, char cType)
|
||||||
|
{
|
||||||
|
switch (cType)
|
||||||
|
{
|
||||||
|
case 'M':
|
||||||
|
{
|
||||||
|
switch (Bits & BIT_xM)
|
||||||
|
{
|
||||||
|
case BIT_MM:
|
||||||
|
return 'M';
|
||||||
|
case BIT_DM:
|
||||||
|
return 'D';
|
||||||
|
case BIT_IM:
|
||||||
|
return 'I';
|
||||||
|
#if DOUBLE_AFFINE
|
||||||
|
case BIT_EM:
|
||||||
|
return 'E';
|
||||||
|
case BIT_JM:
|
||||||
|
return 'J';
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
Quit("Huh!?");
|
||||||
|
return '?';
|
||||||
|
}
|
||||||
|
case 'D':
|
||||||
|
{
|
||||||
|
switch (Bits & BIT_xD)
|
||||||
|
{
|
||||||
|
case BIT_MD:
|
||||||
|
return 'M';
|
||||||
|
case BIT_DD:
|
||||||
|
return 'D';
|
||||||
|
}
|
||||||
|
Quit("Huh!?");
|
||||||
|
return '?';
|
||||||
|
}
|
||||||
|
case 'I':
|
||||||
|
{
|
||||||
|
switch (Bits & BIT_xI)
|
||||||
|
{
|
||||||
|
case BIT_MI:
|
||||||
|
return 'M';
|
||||||
|
case BIT_II:
|
||||||
|
return 'I';
|
||||||
|
}
|
||||||
|
Quit("Huh!?");
|
||||||
|
return '?';
|
||||||
|
}
|
||||||
|
#if DOUBLE_AFFINE
|
||||||
|
case 'E':
|
||||||
|
{
|
||||||
|
switch (Bits & BIT_xE)
|
||||||
|
{
|
||||||
|
case BIT_ME:
|
||||||
|
return 'M';
|
||||||
|
case BIT_EE:
|
||||||
|
return 'E';
|
||||||
|
}
|
||||||
|
Quit("Huh!?");
|
||||||
|
return '?';
|
||||||
|
}
|
||||||
|
case 'J':
|
||||||
|
{
|
||||||
|
switch (Bits & BIT_xJ)
|
||||||
|
{
|
||||||
|
case BIT_MJ:
|
||||||
|
return 'M';
|
||||||
|
case BIT_JJ:
|
||||||
|
return 'J';
|
||||||
|
}
|
||||||
|
Quit("Huh!?");
|
||||||
|
return '?';
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
default:
|
||||||
|
Quit("Huh?");
|
||||||
|
return '?';
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
void BitTraceBack(char **TraceBack, unsigned uLengthA, unsigned uLengthB,
|
||||||
|
char LastEdge, PWPath &Path)
|
||||||
|
{
|
||||||
|
#if TRACE
|
||||||
|
Log("BitTraceBack\n");
|
||||||
|
#endif
|
||||||
|
Path.Clear();
|
||||||
|
|
||||||
|
PWEdge Edge;
|
||||||
|
Edge.uPrefixLengthA = uLengthA;
|
||||||
|
Edge.uPrefixLengthB = uLengthB;
|
||||||
|
char Bits = TraceBack[uLengthA][uLengthB];
|
||||||
|
Edge.cType = LastEdge;
|
||||||
|
for (;;)
|
||||||
|
{
|
||||||
|
#if TRACE
|
||||||
|
Log("Prepend %c%d.%d\n", Edge.cType, Edge.uPrefixLengthA, Edge.uPrefixLengthB);
|
||||||
|
#endif
|
||||||
|
char cSave = Edge.cType;
|
||||||
|
Edge.cType = XlatEdgeType(cSave);
|
||||||
|
Path.PrependEdge(Edge);
|
||||||
|
Edge.cType = cSave;
|
||||||
|
|
||||||
|
unsigned PLA = Edge.uPrefixLengthA;
|
||||||
|
unsigned PLB = Edge.uPrefixLengthB;
|
||||||
|
char Bits = TraceBack[PLA][PLB];
|
||||||
|
char NextEdgeType = XChar(Bits, Edge.cType);
|
||||||
|
#if TRACE
|
||||||
|
Log("XChar(%s, %c) = %c\n", BitsToStr(Bits), Edge.cType, NextEdgeType);
|
||||||
|
#endif
|
||||||
|
switch (Edge.cType)
|
||||||
|
{
|
||||||
|
case 'M':
|
||||||
|
{
|
||||||
|
if (Edge.uPrefixLengthA == 0)
|
||||||
|
Quit("BitTraceBack MA=0");
|
||||||
|
if (Edge.uPrefixLengthB == 0)
|
||||||
|
Quit("BitTraceBack MA=0");
|
||||||
|
--(Edge.uPrefixLengthA);
|
||||||
|
--(Edge.uPrefixLengthB);
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
case 'D':
|
||||||
|
case 'E':
|
||||||
|
{
|
||||||
|
if (Edge.uPrefixLengthA == 0)
|
||||||
|
Quit("BitTraceBack DA=0");
|
||||||
|
--(Edge.uPrefixLengthA);
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
case 'I':
|
||||||
|
case 'J':
|
||||||
|
{
|
||||||
|
if (Edge.uPrefixLengthB == 0)
|
||||||
|
Quit("BitTraceBack IB=0");
|
||||||
|
--(Edge.uPrefixLengthB);
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
default:
|
||||||
|
Quit("BitTraceBack: Invalid edge %c", Edge);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (0 == Edge.uPrefixLengthA && 0 == Edge.uPrefixLengthB)
|
||||||
|
break;
|
||||||
|
|
||||||
|
Edge.cType = NextEdgeType;
|
||||||
|
}
|
||||||
|
|
||||||
|
#if TRACE
|
||||||
|
Path.LogMe();
|
||||||
|
#endif
|
||||||
|
}
|
28
src/muscle/muscle3.8.31/src/blosum62.cpp
Normal file
28
src/muscle/muscle3.8.31/src/blosum62.cpp
Normal file
@ -0,0 +1,28 @@
|
|||||||
|
#include "muscle.h"
|
||||||
|
|
||||||
|
int BLOSUM62[20][20] =
|
||||||
|
{
|
||||||
|
// A C D E F G H I K L M N P Q R S T V W Y
|
||||||
|
{ 4, 0, -2, -1, -2, 0, -2, -1, -1, -1, -1, -2, -1, -1, -1, 1, 0, 0, -3, -2}, // A
|
||||||
|
{ 0, 9, -3, -4, -2, -3, -3, -1, -3, -1, -1, -3, -3, -3, -3, -1, -1, -1, -2, -2}, // C
|
||||||
|
{-2, -3, 6, 2, -3, -1, -1, -3, -1, -4, -3, 1, -1, 0, -2, 0, -1, -3, -4, -3}, // D
|
||||||
|
{-1, -4, 2, 5, -3, -2, 0, -3, 1, -3, -2, 0, -1, 2, 0, 0, -1, -2, -3, -2}, // E
|
||||||
|
{-2, -2, -3, -3, 6, -3, -1, 0, -3, 0, 0, -3, -4, -3, -3, -2, -2, -1, 1, 3}, // F
|
||||||
|
{ 0, -3, -1, -2, -3, 6, -2, -4, -2, -4, -3, 0, -2, -2, -2, 0, -2, -3, -2, -3}, // G
|
||||||
|
{-2, -3, -1, 0, -1, -2, 8, -3, -1, -3, -2, 1, -2, 0, 0, -1, -2, -3, -2, 2}, // H
|
||||||
|
{-1, -1, -3, -3, 0, -4, -3, 4, -3, 2, 1, -3, -3, -3, -3, -2, -1, 3, -3, -1}, // I
|
||||||
|
{-1, -3, -1, 1, -3, -2, -1, -3, 5, -2, -1, 0, -1, 1, 2, 0, -1, -2, -3, -2}, // K
|
||||||
|
{-1, -1, -4, -3, 0, -4, -3, 2, -2, 4, 2, -3, -3, -2, -2, -2, -1, 1, -2, -1}, // L
|
||||||
|
{-1, -1, -3, -2, 0, -3, -2, 1, -1, 2, 5, -2, -2, 0, -1, -1, -1, 1, -1, -1}, // M
|
||||||
|
{-2, -3, 1, 0, -3, 0, 1, -3, 0, -3, -2, 6, -2, 0, 0, 1, 0, -3, -4, -2}, // N
|
||||||
|
{-1, -3, -1, -1, -4, -2, -2, -3, -1, -3, -2, -2, 7, -1, -2, -1, -1, -2, -4, -3}, // P
|
||||||
|
{-1, -3, 0, 2, -3, -2, 0, -3, 1, -2, 0, 0, -1, 5, 1, 0, -1, -2, -2, -1}, // Q
|
||||||
|
{-1, -3, -2, 0, -3, -2, 0, -3, 2, -2, -1, 0, -2, 1, 5, -1, -1, -3, -3, -2}, // R
|
||||||
|
{ 1, -1, 0, 0, -2, 0, -1, -2, 0, -2, -1, 1, -1, 0, -1, 4, 1, -2, -3, -2}, // S
|
||||||
|
{ 0, -1, -1, -1, -2, -2, -2, -1, -1, -1, -1, 0, -1, -1, -1, 1, 5, 0, -2, -2}, // T
|
||||||
|
{ 0, -1, -3, -2, -1, -3, -3, 3, -2, 1, 1, -3, -2, -2, -3, -2, 0, 4, -3, -1}, // V
|
||||||
|
{-3, -2, -4, -3, 1, -2, -2, -3, -3, -2, -1, -4, -4, -2, -3, -3, -2, -3, 11, 2}, // W
|
||||||
|
{-2, -2, -3, -2, 3, -3, 2, -1, -2, -1, -1, -2, -3, -1, -2, -2, -2, -1, 2, 7}, // Y
|
||||||
|
};
|
||||||
|
|
||||||
|
double BLOSUM62_Expected = -0.5209;
|
118
src/muscle/muscle3.8.31/src/blosumla.cpp
Normal file
118
src/muscle/muscle3.8.31/src/blosumla.cpp
Normal file
@ -0,0 +1,118 @@
|
|||||||
|
#include "muscle.h"
|
||||||
|
|
||||||
|
#define GAPVAL 0.3
|
||||||
|
#define GAPGAPVAL 5.0
|
||||||
|
|
||||||
|
// Blosum62 log-average factor matrix
|
||||||
|
static float Blosum62LA[20][20] =
|
||||||
|
{
|
||||||
|
#define v(x) ((float) x)
|
||||||
|
#define S_ROW(n, c, A, C, D, E, F, G, H, I, K, L, M, N, P, Q, R, S, T, V, W, Y) \
|
||||||
|
{ v(A), v(C), v(D), v(E), v(F), v(G), v(H), v(I), v(K), v(L), v(M), v(N), v(P), v(Q), \
|
||||||
|
v(R), v(S), v(T), v(V), v(W), v(Y) },
|
||||||
|
|
||||||
|
// Blosum62 log average matrix
|
||||||
|
// A C D E F
|
||||||
|
// G H I K L
|
||||||
|
// M N P Q R
|
||||||
|
// S T V W Y
|
||||||
|
S_ROW( 0, 'A', 3.9029401, 0.8679881, 0.5446049, 0.7412640, 0.4648942,
|
||||||
|
1.0568696, 0.5693654, 0.6324813, 0.7753898, 0.6019460,
|
||||||
|
0.7231498, 0.5883077, 0.7541214, 0.7568035, 0.6126988,
|
||||||
|
1.4721037, 0.9844022, 0.9364584, 0.4165484, 0.5426125)
|
||||||
|
|
||||||
|
S_ROW( 1, 'C', 0.8679881, 19.5765802, 0.3014542, 0.2859347, 0.4389910,
|
||||||
|
0.4203886, 0.3550472, 0.6534589, 0.3491296, 0.6422760,
|
||||||
|
0.6113537, 0.3978026, 0.3795628, 0.3657796, 0.3089379,
|
||||||
|
0.7384148, 0.7405530, 0.7558448, 0.4499807, 0.4342013)
|
||||||
|
|
||||||
|
S_ROW( 2, 'D', 0.5446049, 0.3014542, 7.3979253, 1.6878109, 0.2989696,
|
||||||
|
0.6343015, 0.6785593, 0.3390155, 0.7840905, 0.2866128,
|
||||||
|
0.3464547, 1.5538520, 0.5987177, 0.8970811, 0.5732000,
|
||||||
|
0.9135051, 0.6947898, 0.3365004, 0.2321050, 0.3456829)
|
||||||
|
|
||||||
|
S_ROW( 3, 'E', 0.7412640, 0.2859347, 1.6878109, 5.4695276, 0.3307441,
|
||||||
|
0.4812675, 0.9600400, 0.3305223, 1.3082782, 0.3728734,
|
||||||
|
0.5003421, 0.9112983, 0.6792027, 1.9017376, 0.9607983,
|
||||||
|
0.9503570, 0.7414260, 0.4289431, 0.3743021, 0.4964664)
|
||||||
|
|
||||||
|
S_ROW( 4, 'F', 0.4648942, 0.4389910, 0.2989696, 0.3307441, 8.1287983,
|
||||||
|
0.3406407, 0.6519893, 0.9457698, 0.3440433, 1.1545978,
|
||||||
|
1.0043715, 0.3542882, 0.2874440, 0.3339729, 0.3807263,
|
||||||
|
0.4399736, 0.4816930, 0.7450894, 1.3743775, 2.7693817)
|
||||||
|
|
||||||
|
S_ROW( 5, 'G', 1.0568696, 0.4203886, 0.6343015, 0.4812675, 0.3406407,
|
||||||
|
6.8763075, 0.4929663, 0.2750096, 0.5888716, 0.2845039,
|
||||||
|
0.3954865, 0.8637114, 0.4773858, 0.5386498, 0.4499840,
|
||||||
|
0.9035965, 0.5792712, 0.3369551, 0.4216898, 0.3487141)
|
||||||
|
|
||||||
|
S_ROW( 6, 'H', 0.5693654, 0.3550472, 0.6785593, 0.9600400, 0.6519893,
|
||||||
|
0.4929663, 13.5060070, 0.3262878, 0.7788884, 0.3806759,
|
||||||
|
0.5841316, 1.2220028, 0.4728797, 1.1679835, 0.9170473,
|
||||||
|
0.7367319, 0.5575021, 0.3394474, 0.4440859, 1.7979036)
|
||||||
|
|
||||||
|
S_ROW( 7, 'I', 0.6324813, 0.6534589, 0.3390155, 0.3305223, 0.9457698,
|
||||||
|
0.2750096, 0.3262878, 3.9979299, 0.3963730, 1.6944349,
|
||||||
|
1.4777449, 0.3279345, 0.3846629, 0.3829375, 0.3547509,
|
||||||
|
0.4431634, 0.7798163, 2.4175121, 0.4088732, 0.6303898)
|
||||||
|
|
||||||
|
S_ROW( 8, 'K', 0.7753898, 0.3491296, 0.7840905, 1.3082782, 0.3440433,
|
||||||
|
0.5888716, 0.7788884, 0.3963730, 4.7643359, 0.4282702,
|
||||||
|
0.6253033, 0.9398419, 0.7037741, 1.5543233, 2.0768092,
|
||||||
|
0.9319192, 0.7929060, 0.4565429, 0.3589319, 0.5321784)
|
||||||
|
|
||||||
|
S_ROW( 9, 'L', 0.6019460, 0.6422760, 0.2866128, 0.3728734, 1.1545978,
|
||||||
|
0.2845039, 0.3806759, 1.6944349, 0.4282702, 3.7966214,
|
||||||
|
1.9942957, 0.3100430, 0.3711219, 0.4773261, 0.4739194,
|
||||||
|
0.4288939, 0.6603292, 1.3142355, 0.5680359, 0.6920589)
|
||||||
|
|
||||||
|
S_ROW(10, 'M', 0.7231498, 0.6113537, 0.3464547, 0.5003421, 1.0043715,
|
||||||
|
0.3954865, 0.5841316, 1.4777449, 0.6253033, 1.9942957,
|
||||||
|
6.4814549, 0.4745299, 0.4238960, 0.8642486, 0.6226249,
|
||||||
|
0.5985578, 0.7938018, 1.2689365, 0.6103022, 0.7083636)
|
||||||
|
|
||||||
|
S_ROW(11, 'N', 0.5883077, 0.3978026, 1.5538520, 0.9112983, 0.3542882,
|
||||||
|
0.8637114, 1.2220028, 0.3279345, 0.9398419, 0.3100430,
|
||||||
|
0.4745299, 7.0940964, 0.4999337, 1.0005835, 0.8586298,
|
||||||
|
1.2315289, 0.9841525, 0.3690340, 0.2777841, 0.4860309)
|
||||||
|
|
||||||
|
S_ROW(12, 'P', 0.7541214, 0.3795628, 0.5987177, 0.6792027, 0.2874440,
|
||||||
|
0.4773858, 0.4728797, 0.3846629, 0.7037741, 0.3711219,
|
||||||
|
0.4238960, 0.4999337, 12.8375452, 0.6412803, 0.4815348,
|
||||||
|
0.7555033, 0.6888962, 0.4430825, 0.2818321, 0.3635216)
|
||||||
|
|
||||||
|
S_ROW(13, 'Q', 0.7568035, 0.3657796, 0.8970811, 1.9017376, 0.3339729,
|
||||||
|
0.5386498, 1.1679835, 0.3829375, 1.5543233, 0.4773261,
|
||||||
|
0.8642486, 1.0005835, 0.6412803, 6.2444210, 1.4057958,
|
||||||
|
0.9655559, 0.7913219, 0.4667781, 0.5093584, 0.6110951)
|
||||||
|
|
||||||
|
S_ROW(14, 'R', 0.6126988, 0.3089379, 0.5732000, 0.9607983, 0.3807263,
|
||||||
|
0.4499840, 0.9170473, 0.3547509, 2.0768092, 0.4739194,
|
||||||
|
0.6226249, 0.8586298, 0.4815348, 1.4057958, 6.6655769,
|
||||||
|
0.7671661, 0.6777544, 0.4200721, 0.3951049, 0.5559652)
|
||||||
|
|
||||||
|
S_ROW(15, 'S', 1.4721037, 0.7384148, 0.9135051, 0.9503570, 0.4399736,
|
||||||
|
0.9035965, 0.7367319, 0.4431634, 0.9319192, 0.4288939,
|
||||||
|
0.5985578, 1.2315289, 0.7555033, 0.9655559, 0.7671661,
|
||||||
|
3.8428476, 1.6139205, 0.5652240, 0.3853031, 0.5575206)
|
||||||
|
|
||||||
|
S_ROW(16, 'T', 0.9844022, 0.7405530, 0.6947898, 0.7414260, 0.4816930,
|
||||||
|
0.5792712, 0.5575021, 0.7798163, 0.7929060, 0.6603292,
|
||||||
|
0.7938018, 0.9841525, 0.6888962, 0.7913219, 0.6777544,
|
||||||
|
1.6139205, 4.8321048, 0.9809432, 0.4309317, 0.5731577)
|
||||||
|
|
||||||
|
S_ROW(17, 'V', 0.9364584, 0.7558448, 0.3365004, 0.4289431, 0.7450894,
|
||||||
|
0.3369551, 0.3394474, 2.4175121, 0.4565429, 1.3142355,
|
||||||
|
1.2689365, 0.3690340, 0.4430825, 0.4667781, 0.4200721,
|
||||||
|
0.5652240, 0.9809432, 3.6921553, 0.3744576, 0.6580390)
|
||||||
|
|
||||||
|
S_ROW(18, 'W', 0.4165484, 0.4499807, 0.2321050, 0.3743021, 1.3743775,
|
||||||
|
0.4216898, 0.4440859, 0.4088732, 0.3589319, 0.5680359,
|
||||||
|
0.6103022, 0.2777841, 0.2818321, 0.5093584, 0.3951049,
|
||||||
|
0.3853031, 0.4309317, 0.3744576, 38.1077830, 2.1098056)
|
||||||
|
|
||||||
|
S_ROW(19, 'Y', 0.5426125, 0.4342013, 0.3456829, 0.4964664, 2.7693817,
|
||||||
|
0.3487141, 1.7979036, 0.6303898, 0.5321784, 0.6920589,
|
||||||
|
0.7083636, 0.4860309, 0.3635216, 0.6110951, 0.5559652,
|
||||||
|
0.5575206, 0.5731577, 0.6580390, 2.1098056, 9.8322054)
|
||||||
|
};
|
666
src/muscle/muscle3.8.31/src/clust.cpp
Normal file
666
src/muscle/muscle3.8.31/src/clust.cpp
Normal file
@ -0,0 +1,666 @@
|
|||||||
|
#include "muscle.h"
|
||||||
|
#include "clust.h"
|
||||||
|
#include "clustset.h"
|
||||||
|
#include <stdio.h>
|
||||||
|
|
||||||
|
#define TRACE 0
|
||||||
|
|
||||||
|
Clust::Clust()
|
||||||
|
{
|
||||||
|
m_Nodes = 0;
|
||||||
|
m_uNodeCount = 0;
|
||||||
|
m_uLeafCount = 0;
|
||||||
|
m_uClusterCount = 0;
|
||||||
|
m_JoinStyle = JOIN_Undefined;
|
||||||
|
m_dDist = 0;
|
||||||
|
m_uLeafCount = 0;
|
||||||
|
m_ptrSet = 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
Clust::~Clust()
|
||||||
|
{
|
||||||
|
delete[] m_Nodes;
|
||||||
|
delete[] m_dDist;
|
||||||
|
delete[] m_ClusterIndexToNodeIndex;
|
||||||
|
}
|
||||||
|
|
||||||
|
void Clust::Create(ClustSet &Set, CLUSTER Method)
|
||||||
|
{
|
||||||
|
m_ptrSet = &Set;
|
||||||
|
|
||||||
|
SetLeafCount(Set.GetLeafCount());
|
||||||
|
|
||||||
|
switch (Method)
|
||||||
|
{
|
||||||
|
case CLUSTER_UPGMA:
|
||||||
|
m_JoinStyle = JOIN_NearestNeighbor;
|
||||||
|
m_CentroidStyle = LINKAGE_Avg;
|
||||||
|
break;
|
||||||
|
|
||||||
|
case CLUSTER_UPGMAMax:
|
||||||
|
m_JoinStyle = JOIN_NearestNeighbor;
|
||||||
|
m_CentroidStyle = LINKAGE_Max;
|
||||||
|
break;
|
||||||
|
|
||||||
|
case CLUSTER_UPGMAMin:
|
||||||
|
m_JoinStyle = JOIN_NearestNeighbor;
|
||||||
|
m_CentroidStyle = LINKAGE_Min;
|
||||||
|
break;
|
||||||
|
|
||||||
|
case CLUSTER_UPGMB:
|
||||||
|
m_JoinStyle = JOIN_NearestNeighbor;
|
||||||
|
m_CentroidStyle = LINKAGE_Biased;
|
||||||
|
break;
|
||||||
|
|
||||||
|
case CLUSTER_NeighborJoining:
|
||||||
|
m_JoinStyle = JOIN_NeighborJoining;
|
||||||
|
m_CentroidStyle = LINKAGE_NeighborJoining;
|
||||||
|
break;
|
||||||
|
|
||||||
|
default:
|
||||||
|
Quit("Clust::Create, invalid method %d", Method);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (m_uLeafCount <= 1)
|
||||||
|
Quit("Clust::Create: no leaves");
|
||||||
|
|
||||||
|
m_uNodeCount = 2*m_uLeafCount - 1;
|
||||||
|
m_Nodes = new ClustNode[m_uNodeCount];
|
||||||
|
m_ClusterIndexToNodeIndex = new unsigned[m_uLeafCount];
|
||||||
|
|
||||||
|
m_ptrClusterList = 0;
|
||||||
|
for (unsigned uNodeIndex = 0; uNodeIndex < m_uNodeCount; ++uNodeIndex)
|
||||||
|
{
|
||||||
|
ClustNode &Node = m_Nodes[uNodeIndex];
|
||||||
|
Node.m_uIndex = uNodeIndex;
|
||||||
|
if (uNodeIndex < m_uLeafCount)
|
||||||
|
{
|
||||||
|
Node.m_uSize = 1;
|
||||||
|
Node.m_uLeafIndexes = new unsigned[1];
|
||||||
|
Node.m_uLeafIndexes[0] = uNodeIndex;
|
||||||
|
AddToClusterList(uNodeIndex);
|
||||||
|
}
|
||||||
|
else
|
||||||
|
Node.m_uSize = 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Compute initial distance matrix between leaves
|
||||||
|
SetProgressDesc("Build dist matrix");
|
||||||
|
unsigned uPairIndex = 0;
|
||||||
|
const unsigned uPairCount = (m_uLeafCount*(m_uLeafCount - 1))/2;
|
||||||
|
for (unsigned i = 0; i < m_uLeafCount; ++i)
|
||||||
|
for (unsigned j = 0; j < i; ++j)
|
||||||
|
{
|
||||||
|
const float dDist = (float) m_ptrSet->ComputeDist(*this, i, j);
|
||||||
|
SetDist(i, j, dDist);
|
||||||
|
if (0 == uPairIndex%10000)
|
||||||
|
Progress(uPairIndex, uPairCount);
|
||||||
|
++uPairIndex;
|
||||||
|
}
|
||||||
|
ProgressStepsDone();
|
||||||
|
|
||||||
|
// Call CreateCluster once for each internal node in the tree
|
||||||
|
SetProgressDesc("Build guide tree");
|
||||||
|
m_uClusterCount = m_uLeafCount;
|
||||||
|
const unsigned uInternalNodeCount = m_uNodeCount - m_uLeafCount;
|
||||||
|
for (unsigned uNodeIndex = m_uLeafCount; uNodeIndex < m_uNodeCount; ++uNodeIndex)
|
||||||
|
{
|
||||||
|
unsigned i = uNodeIndex + 1 - m_uLeafCount;
|
||||||
|
Progress(i, uInternalNodeCount);
|
||||||
|
CreateCluster();
|
||||||
|
}
|
||||||
|
ProgressStepsDone();
|
||||||
|
}
|
||||||
|
|
||||||
|
void Clust::CreateCluster()
|
||||||
|
{
|
||||||
|
unsigned uLeftNodeIndex;
|
||||||
|
unsigned uRightNodeIndex;
|
||||||
|
float dLeftLength;
|
||||||
|
float dRightLength;
|
||||||
|
ChooseJoin(&uLeftNodeIndex, &uRightNodeIndex, &dLeftLength, &dRightLength);
|
||||||
|
|
||||||
|
const unsigned uNewNodeIndex = m_uNodeCount - m_uClusterCount + 1;
|
||||||
|
|
||||||
|
JoinNodes(uLeftNodeIndex, uRightNodeIndex, dLeftLength, dRightLength,
|
||||||
|
uNewNodeIndex);
|
||||||
|
|
||||||
|
#if TRACE
|
||||||
|
Log("Merge New=%u L=%u R=%u Ld=%7.2g Rd=%7.2g\n",
|
||||||
|
uNewNodeIndex, uLeftNodeIndex, uRightNodeIndex, dLeftLength, dRightLength);
|
||||||
|
#endif
|
||||||
|
|
||||||
|
// Compute distances to other clusters
|
||||||
|
--m_uClusterCount;
|
||||||
|
for (unsigned uNodeIndex = GetFirstCluster(); uNodeIndex != uInsane;
|
||||||
|
uNodeIndex = GetNextCluster(uNodeIndex))
|
||||||
|
{
|
||||||
|
if (uNodeIndex == uLeftNodeIndex || uNodeIndex == uRightNodeIndex)
|
||||||
|
continue;
|
||||||
|
|
||||||
|
if (uNewNodeIndex == uNodeIndex)
|
||||||
|
continue;
|
||||||
|
|
||||||
|
const float dDist = ComputeDist(uNewNodeIndex, uNodeIndex);
|
||||||
|
SetDist(uNewNodeIndex, uNodeIndex, dDist);
|
||||||
|
}
|
||||||
|
|
||||||
|
for (unsigned uNodeIndex = GetFirstCluster(); uNodeIndex != uInsane;
|
||||||
|
uNodeIndex = GetNextCluster(uNodeIndex))
|
||||||
|
{
|
||||||
|
if (uNodeIndex == uLeftNodeIndex || uNodeIndex == uRightNodeIndex)
|
||||||
|
continue;
|
||||||
|
|
||||||
|
if (uNewNodeIndex == uNodeIndex)
|
||||||
|
continue;
|
||||||
|
|
||||||
|
#if REDLACK
|
||||||
|
const float dMetric = ComputeMetric(uNewNodeIndex, uNodeIndex);
|
||||||
|
InsertMetric(uNewNodeIndex, uNodeIndex, dMetric);
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
void Clust::ChooseJoin(unsigned *ptruLeftIndex, unsigned *ptruRightIndex,
|
||||||
|
float *ptrdLeftLength, float *ptrdRightLength)
|
||||||
|
{
|
||||||
|
switch (m_JoinStyle)
|
||||||
|
{
|
||||||
|
case JOIN_NearestNeighbor:
|
||||||
|
ChooseJoinNearestNeighbor(ptruLeftIndex, ptruRightIndex, ptrdLeftLength,
|
||||||
|
ptrdRightLength);
|
||||||
|
return;
|
||||||
|
case JOIN_NeighborJoining:
|
||||||
|
ChooseJoinNeighborJoining(ptruLeftIndex, ptruRightIndex, ptrdLeftLength,
|
||||||
|
ptrdRightLength);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
Quit("Clust::ChooseJoin, Invalid join style %u", m_JoinStyle);
|
||||||
|
}
|
||||||
|
|
||||||
|
void Clust::ChooseJoinNearestNeighbor(unsigned *ptruLeftIndex,
|
||||||
|
unsigned *ptruRightIndex, float *ptrdLeftLength, float *ptrdRightLength)
|
||||||
|
{
|
||||||
|
const unsigned uClusterCount = GetClusterCount();
|
||||||
|
|
||||||
|
unsigned uMinLeftNodeIndex;
|
||||||
|
unsigned uMinRightNodeIndex;
|
||||||
|
GetMinMetric(&uMinLeftNodeIndex, &uMinRightNodeIndex);
|
||||||
|
|
||||||
|
float dMinDist = GetDist(uMinLeftNodeIndex, uMinRightNodeIndex);
|
||||||
|
|
||||||
|
const float dLeftHeight = GetHeight(uMinLeftNodeIndex);
|
||||||
|
const float dRightHeight = GetHeight(uMinRightNodeIndex);
|
||||||
|
|
||||||
|
*ptruLeftIndex = uMinLeftNodeIndex;
|
||||||
|
*ptruRightIndex = uMinRightNodeIndex;
|
||||||
|
*ptrdLeftLength = dMinDist/2 - dLeftHeight;
|
||||||
|
*ptrdRightLength = dMinDist/2 - dRightHeight;
|
||||||
|
}
|
||||||
|
|
||||||
|
void Clust::ChooseJoinNeighborJoining(unsigned *ptruLeftIndex,
|
||||||
|
unsigned *ptruRightIndex, float *ptrdLeftLength, float *ptrdRightLength)
|
||||||
|
{
|
||||||
|
const unsigned uClusterCount = GetClusterCount();
|
||||||
|
|
||||||
|
//unsigned uMinLeftNodeIndex = uInsane;
|
||||||
|
//unsigned uMinRightNodeIndex = uInsane;
|
||||||
|
//float dMinD = PLUS_INFINITY;
|
||||||
|
//for (unsigned i = GetFirstCluster(); i != uInsane; i = GetNextCluster(i))
|
||||||
|
// {
|
||||||
|
// const float ri = Calc_r(i);
|
||||||
|
// for (unsigned j = GetNextCluster(i); j != uInsane; j = GetNextCluster(j))
|
||||||
|
// {
|
||||||
|
// const float rj = Calc_r(j);
|
||||||
|
// const float dij = GetDist(i, j);
|
||||||
|
// const float Dij = dij - (ri + rj);
|
||||||
|
// if (Dij < dMinD)
|
||||||
|
// {
|
||||||
|
// dMinD = Dij;
|
||||||
|
// uMinLeftNodeIndex = i;
|
||||||
|
// uMinRightNodeIndex = j;
|
||||||
|
// }
|
||||||
|
// }
|
||||||
|
// }
|
||||||
|
|
||||||
|
unsigned uMinLeftNodeIndex;
|
||||||
|
unsigned uMinRightNodeIndex;
|
||||||
|
GetMinMetric(&uMinLeftNodeIndex, &uMinRightNodeIndex);
|
||||||
|
|
||||||
|
const float dDistLR = GetDist(uMinLeftNodeIndex, uMinRightNodeIndex);
|
||||||
|
const float rL = Calc_r(uMinLeftNodeIndex);
|
||||||
|
const float rR = Calc_r(uMinRightNodeIndex);
|
||||||
|
|
||||||
|
const float dLeftLength = (dDistLR + rL - rR)/2;
|
||||||
|
const float dRightLength = (dDistLR - rL + rR)/2;
|
||||||
|
|
||||||
|
*ptruLeftIndex = uMinLeftNodeIndex;
|
||||||
|
*ptruRightIndex = uMinRightNodeIndex;
|
||||||
|
*ptrdLeftLength = dLeftLength;
|
||||||
|
*ptrdRightLength = dRightLength;
|
||||||
|
}
|
||||||
|
|
||||||
|
void Clust::JoinNodes(unsigned uLeftIndex, unsigned uRightIndex, float dLeftLength,
|
||||||
|
float dRightLength, unsigned uNodeIndex)
|
||||||
|
{
|
||||||
|
ClustNode &Parent = m_Nodes[uNodeIndex];
|
||||||
|
ClustNode &Left = m_Nodes[uLeftIndex];
|
||||||
|
ClustNode &Right = m_Nodes[uRightIndex];
|
||||||
|
|
||||||
|
Left.m_dLength = dLeftLength;
|
||||||
|
Right.m_dLength = dRightLength;
|
||||||
|
|
||||||
|
Parent.m_ptrLeft = &Left;
|
||||||
|
Parent.m_ptrRight = &Right;
|
||||||
|
|
||||||
|
Left.m_ptrParent = &Parent;
|
||||||
|
Right.m_ptrParent = &Parent;
|
||||||
|
|
||||||
|
const unsigned uLeftSize = Left.m_uSize;
|
||||||
|
const unsigned uRightSize = Right.m_uSize;
|
||||||
|
const unsigned uParentSize = uLeftSize + uRightSize;
|
||||||
|
Parent.m_uSize = uParentSize;
|
||||||
|
|
||||||
|
assert(0 == Parent.m_uLeafIndexes);
|
||||||
|
Parent.m_uLeafIndexes = new unsigned[uParentSize];
|
||||||
|
|
||||||
|
const unsigned uLeftBytes = uLeftSize*sizeof(unsigned);
|
||||||
|
const unsigned uRightBytes = uRightSize*sizeof(unsigned);
|
||||||
|
memcpy(Parent.m_uLeafIndexes, Left.m_uLeafIndexes, uLeftBytes);
|
||||||
|
memcpy(Parent.m_uLeafIndexes + uLeftSize, Right.m_uLeafIndexes, uRightBytes);
|
||||||
|
|
||||||
|
DeleteFromClusterList(uLeftIndex);
|
||||||
|
DeleteFromClusterList(uRightIndex);
|
||||||
|
AddToClusterList(uNodeIndex);
|
||||||
|
}
|
||||||
|
|
||||||
|
float Clust::Calc_r(unsigned uNodeIndex) const
|
||||||
|
{
|
||||||
|
const unsigned uClusterCount = GetClusterCount();
|
||||||
|
if (2 == uClusterCount)
|
||||||
|
return 0;
|
||||||
|
|
||||||
|
float dSum = 0;
|
||||||
|
for (unsigned i = GetFirstCluster(); i != uInsane; i = GetNextCluster(i))
|
||||||
|
{
|
||||||
|
if (i == uNodeIndex)
|
||||||
|
continue;
|
||||||
|
dSum += GetDist(uNodeIndex, i);
|
||||||
|
}
|
||||||
|
return dSum/(uClusterCount - 2);
|
||||||
|
}
|
||||||
|
|
||||||
|
float Clust::ComputeDist(unsigned uNewNodeIndex, unsigned uNodeIndex)
|
||||||
|
{
|
||||||
|
switch (m_CentroidStyle)
|
||||||
|
{
|
||||||
|
case LINKAGE_Avg:
|
||||||
|
return ComputeDistAverageLinkage(uNewNodeIndex, uNodeIndex);
|
||||||
|
|
||||||
|
case LINKAGE_Min:
|
||||||
|
return ComputeDistMinLinkage(uNewNodeIndex, uNodeIndex);
|
||||||
|
|
||||||
|
case LINKAGE_Max:
|
||||||
|
return ComputeDistMaxLinkage(uNewNodeIndex, uNodeIndex);
|
||||||
|
|
||||||
|
case LINKAGE_Biased:
|
||||||
|
return ComputeDistMAFFT(uNewNodeIndex, uNodeIndex);
|
||||||
|
|
||||||
|
case LINKAGE_NeighborJoining:
|
||||||
|
return ComputeDistNeighborJoining(uNewNodeIndex, uNodeIndex);
|
||||||
|
}
|
||||||
|
Quit("Clust::ComputeDist, invalid centroid style %u", m_CentroidStyle);
|
||||||
|
return (float) g_dNAN;
|
||||||
|
}
|
||||||
|
|
||||||
|
float Clust::ComputeDistMinLinkage(unsigned uNewNodeIndex, unsigned uNodeIndex)
|
||||||
|
{
|
||||||
|
const unsigned uLeftNodeIndex = GetLeftIndex(uNewNodeIndex);
|
||||||
|
const unsigned uRightNodeIndex = GetRightIndex(uNewNodeIndex);
|
||||||
|
const float dDistL = GetDist(uLeftNodeIndex, uNodeIndex);
|
||||||
|
const float dDistR = GetDist(uRightNodeIndex, uNodeIndex);
|
||||||
|
return (dDistL < dDistR ? dDistL : dDistR);
|
||||||
|
}
|
||||||
|
|
||||||
|
float Clust::ComputeDistMaxLinkage(unsigned uNewNodeIndex, unsigned uNodeIndex)
|
||||||
|
{
|
||||||
|
const unsigned uLeftNodeIndex = GetLeftIndex(uNewNodeIndex);
|
||||||
|
const unsigned uRightNodeIndex = GetRightIndex(uNewNodeIndex);
|
||||||
|
const float dDistL = GetDist(uLeftNodeIndex, uNodeIndex);
|
||||||
|
const float dDistR = GetDist(uRightNodeIndex, uNodeIndex);
|
||||||
|
return (dDistL > dDistR ? dDistL : dDistR);
|
||||||
|
}
|
||||||
|
|
||||||
|
float Clust::ComputeDistAverageLinkage(unsigned uNewNodeIndex, unsigned uNodeIndex)
|
||||||
|
{
|
||||||
|
const unsigned uLeftNodeIndex = GetLeftIndex(uNewNodeIndex);
|
||||||
|
const unsigned uRightNodeIndex = GetRightIndex(uNewNodeIndex);
|
||||||
|
const float dDistL = GetDist(uLeftNodeIndex, uNodeIndex);
|
||||||
|
const float dDistR = GetDist(uRightNodeIndex, uNodeIndex);
|
||||||
|
return (dDistL + dDistR)/2;
|
||||||
|
}
|
||||||
|
|
||||||
|
float Clust::ComputeDistNeighborJoining(unsigned uNewNodeIndex, unsigned uNodeIndex)
|
||||||
|
{
|
||||||
|
const unsigned uLeftNodeIndex = GetLeftIndex(uNewNodeIndex);
|
||||||
|
const unsigned uRightNodeIndex = GetRightIndex(uNewNodeIndex);
|
||||||
|
const float dDistLR = GetDist(uLeftNodeIndex, uRightNodeIndex);
|
||||||
|
const float dDistL = GetDist(uLeftNodeIndex, uNodeIndex);
|
||||||
|
const float dDistR = GetDist(uRightNodeIndex, uNodeIndex);
|
||||||
|
const float dDist = (dDistL + dDistR - dDistLR)/2;
|
||||||
|
return dDist;
|
||||||
|
}
|
||||||
|
|
||||||
|
// This is a mysterious variant of UPGMA reverse-engineered from MAFFT source.
|
||||||
|
float Clust::ComputeDistMAFFT(unsigned uNewNodeIndex, unsigned uNodeIndex)
|
||||||
|
{
|
||||||
|
const unsigned uLeftNodeIndex = GetLeftIndex(uNewNodeIndex);
|
||||||
|
const unsigned uRightNodeIndex = GetRightIndex(uNewNodeIndex);
|
||||||
|
|
||||||
|
const float dDistLR = GetDist(uLeftNodeIndex, uRightNodeIndex);
|
||||||
|
const float dDistL = GetDist(uLeftNodeIndex, uNodeIndex);
|
||||||
|
const float dDistR = GetDist(uRightNodeIndex, uNodeIndex);
|
||||||
|
const float dMinDistLR = (dDistL < dDistR ? dDistL : dDistR);
|
||||||
|
const float dSumDistLR = dDistL + dDistR;
|
||||||
|
const float dDist = dMinDistLR*(1 - g_dSUEFF) + dSumDistLR*g_dSUEFF/2;
|
||||||
|
return dDist;
|
||||||
|
}
|
||||||
|
|
||||||
|
unsigned Clust::GetClusterCount() const
|
||||||
|
{
|
||||||
|
return m_uClusterCount;
|
||||||
|
}
|
||||||
|
|
||||||
|
void Clust::LogMe() const
|
||||||
|
{
|
||||||
|
Log("Clust %u leaves, %u nodes, %u clusters.\n",
|
||||||
|
m_uLeafCount, m_uNodeCount, m_uClusterCount);
|
||||||
|
|
||||||
|
Log("Distance matrix\n");
|
||||||
|
const unsigned uNodeCount = GetNodeCount();
|
||||||
|
Log(" ");
|
||||||
|
for (unsigned i = 0; i < uNodeCount - 1; ++i)
|
||||||
|
Log(" %7u", i);
|
||||||
|
Log("\n");
|
||||||
|
|
||||||
|
Log(" ");
|
||||||
|
for (unsigned i = 0; i < uNodeCount - 1; ++i)
|
||||||
|
Log(" ------");
|
||||||
|
Log("\n");
|
||||||
|
|
||||||
|
for (unsigned i = 0; i < uNodeCount - 1; ++i)
|
||||||
|
{
|
||||||
|
Log("%4u: ", i);
|
||||||
|
for (unsigned j = 0; j < i; ++j)
|
||||||
|
Log(" %7.2g", GetDist(i, j));
|
||||||
|
Log("\n");
|
||||||
|
}
|
||||||
|
|
||||||
|
Log("\n");
|
||||||
|
Log("Node Size Prnt Left Rght Length Name\n");
|
||||||
|
Log("---- ---- ---- ---- ---- ------ ----\n");
|
||||||
|
for (unsigned uNodeIndex = 0; uNodeIndex < m_uNodeCount; ++uNodeIndex)
|
||||||
|
{
|
||||||
|
const ClustNode &Node = m_Nodes[uNodeIndex];
|
||||||
|
Log("%4u %4u", uNodeIndex, Node.m_uSize);
|
||||||
|
if (0 != Node.m_ptrParent)
|
||||||
|
Log(" %4u", Node.m_ptrParent->m_uIndex);
|
||||||
|
else
|
||||||
|
Log(" ");
|
||||||
|
|
||||||
|
if (0 != Node.m_ptrLeft)
|
||||||
|
Log(" %4u", Node.m_ptrLeft->m_uIndex);
|
||||||
|
else
|
||||||
|
Log(" ");
|
||||||
|
|
||||||
|
if (0 != Node.m_ptrRight)
|
||||||
|
Log(" %4u", Node.m_ptrRight->m_uIndex);
|
||||||
|
else
|
||||||
|
Log(" ");
|
||||||
|
|
||||||
|
if (uNodeIndex != m_uNodeCount - 1)
|
||||||
|
Log(" %7.3g", Node.m_dLength);
|
||||||
|
if (IsLeaf(uNodeIndex))
|
||||||
|
{
|
||||||
|
const char *ptrName = GetNodeName(uNodeIndex);
|
||||||
|
if (0 != ptrName)
|
||||||
|
Log(" %s", ptrName);
|
||||||
|
}
|
||||||
|
if (GetRootNodeIndex() == uNodeIndex)
|
||||||
|
Log(" [ROOT]");
|
||||||
|
Log("\n");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
const ClustNode &Clust::GetNode(unsigned uNodeIndex) const
|
||||||
|
{
|
||||||
|
if (uNodeIndex >= m_uNodeCount)
|
||||||
|
Quit("ClustNode::GetNode(%u) %u", uNodeIndex, m_uNodeCount);
|
||||||
|
return m_Nodes[uNodeIndex];
|
||||||
|
}
|
||||||
|
|
||||||
|
bool Clust::IsLeaf(unsigned uNodeIndex) const
|
||||||
|
{
|
||||||
|
return uNodeIndex < m_uLeafCount;
|
||||||
|
}
|
||||||
|
|
||||||
|
unsigned Clust::GetClusterSize(unsigned uNodeIndex) const
|
||||||
|
{
|
||||||
|
const ClustNode &Node = GetNode(uNodeIndex);
|
||||||
|
return Node.m_uSize;
|
||||||
|
}
|
||||||
|
|
||||||
|
unsigned Clust::GetLeftIndex(unsigned uNodeIndex) const
|
||||||
|
{
|
||||||
|
const ClustNode &Node = GetNode(uNodeIndex);
|
||||||
|
if (0 == Node.m_ptrLeft)
|
||||||
|
Quit("Clust::GetLeftIndex: leaf");
|
||||||
|
return Node.m_ptrLeft->m_uIndex;
|
||||||
|
}
|
||||||
|
|
||||||
|
unsigned Clust::GetRightIndex(unsigned uNodeIndex) const
|
||||||
|
{
|
||||||
|
const ClustNode &Node = GetNode(uNodeIndex);
|
||||||
|
if (0 == Node.m_ptrRight)
|
||||||
|
Quit("Clust::GetRightIndex: leaf");
|
||||||
|
return Node.m_ptrRight->m_uIndex;
|
||||||
|
}
|
||||||
|
|
||||||
|
float Clust::GetLength(unsigned uNodeIndex) const
|
||||||
|
{
|
||||||
|
const ClustNode &Node = GetNode(uNodeIndex);
|
||||||
|
return Node.m_dLength;
|
||||||
|
}
|
||||||
|
|
||||||
|
void Clust::SetLeafCount(unsigned uLeafCount)
|
||||||
|
{
|
||||||
|
if (uLeafCount <= 1)
|
||||||
|
Quit("Clust::SetLeafCount(%u)", uLeafCount);
|
||||||
|
|
||||||
|
m_uLeafCount = uLeafCount;
|
||||||
|
const unsigned uNodeCount = GetNodeCount();
|
||||||
|
|
||||||
|
// Triangular matrix size excluding diagonal (all zeros in our case).
|
||||||
|
m_uTriangularMatrixSize = (uNodeCount*(uNodeCount - 1))/2;
|
||||||
|
m_dDist = new float[m_uTriangularMatrixSize];
|
||||||
|
}
|
||||||
|
|
||||||
|
unsigned Clust::GetLeafCount() const
|
||||||
|
{
|
||||||
|
return m_uLeafCount;
|
||||||
|
}
|
||||||
|
|
||||||
|
unsigned Clust::VectorIndex(unsigned uIndex1, unsigned uIndex2) const
|
||||||
|
{
|
||||||
|
const unsigned uNodeCount = GetNodeCount();
|
||||||
|
if (uIndex1 >= uNodeCount || uIndex2 >= uNodeCount)
|
||||||
|
Quit("DistVectorIndex(%u,%u) %u", uIndex1, uIndex2, uNodeCount);
|
||||||
|
unsigned v;
|
||||||
|
if (uIndex1 >= uIndex2)
|
||||||
|
v = uIndex2 + (uIndex1*(uIndex1 - 1))/2;
|
||||||
|
else
|
||||||
|
v = uIndex1 + (uIndex2*(uIndex2 - 1))/2;
|
||||||
|
assert(v < m_uTriangularMatrixSize);
|
||||||
|
return v;
|
||||||
|
}
|
||||||
|
|
||||||
|
float Clust::GetDist(unsigned uIndex1, unsigned uIndex2) const
|
||||||
|
{
|
||||||
|
unsigned v = VectorIndex(uIndex1, uIndex2);
|
||||||
|
return m_dDist[v];
|
||||||
|
}
|
||||||
|
|
||||||
|
void Clust::SetDist(unsigned uIndex1, unsigned uIndex2, float dDist)
|
||||||
|
{
|
||||||
|
unsigned v = VectorIndex(uIndex1, uIndex2);
|
||||||
|
m_dDist[v] = dDist;
|
||||||
|
}
|
||||||
|
|
||||||
|
float Clust::GetHeight(unsigned uNodeIndex) const
|
||||||
|
{
|
||||||
|
if (IsLeaf(uNodeIndex))
|
||||||
|
return 0;
|
||||||
|
|
||||||
|
const unsigned uLeftIndex = GetLeftIndex(uNodeIndex);
|
||||||
|
const unsigned uRightIndex = GetRightIndex(uNodeIndex);
|
||||||
|
const float dLeftLength = GetLength(uLeftIndex);
|
||||||
|
const float dRightLength = GetLength(uRightIndex);
|
||||||
|
const float dLeftHeight = dLeftLength + GetHeight(uLeftIndex);
|
||||||
|
const float dRightHeight = dRightLength + GetHeight(uRightIndex);
|
||||||
|
return (dLeftHeight + dRightHeight)/2;
|
||||||
|
}
|
||||||
|
|
||||||
|
const char *Clust::GetNodeName(unsigned uNodeIndex) const
|
||||||
|
{
|
||||||
|
if (!IsLeaf(uNodeIndex))
|
||||||
|
Quit("Clust::GetNodeName, is not leaf");
|
||||||
|
return m_ptrSet->GetLeafName(uNodeIndex);
|
||||||
|
}
|
||||||
|
|
||||||
|
unsigned Clust::GetNodeId(unsigned uNodeIndex) const
|
||||||
|
{
|
||||||
|
if (uNodeIndex >= GetLeafCount())
|
||||||
|
return 0;
|
||||||
|
return m_ptrSet->GetLeafId(uNodeIndex);
|
||||||
|
}
|
||||||
|
|
||||||
|
unsigned Clust::GetLeaf(unsigned uNodeIndex, unsigned uLeafIndex) const
|
||||||
|
{
|
||||||
|
const ClustNode &Node = GetNode(uNodeIndex);
|
||||||
|
const unsigned uLeafCount = Node.m_uSize;
|
||||||
|
if (uLeafIndex >= uLeafCount)
|
||||||
|
Quit("Clust::GetLeaf, invalid index");
|
||||||
|
const unsigned uIndex = Node.m_uLeafIndexes[uLeafIndex];
|
||||||
|
if (uIndex >= m_uNodeCount)
|
||||||
|
Quit("Clust::GetLeaf, index out of range");
|
||||||
|
return uIndex;
|
||||||
|
}
|
||||||
|
|
||||||
|
unsigned Clust::GetFirstCluster() const
|
||||||
|
{
|
||||||
|
if (0 == m_ptrClusterList)
|
||||||
|
return uInsane;
|
||||||
|
return m_ptrClusterList->m_uIndex;
|
||||||
|
}
|
||||||
|
|
||||||
|
unsigned Clust::GetNextCluster(unsigned uIndex) const
|
||||||
|
{
|
||||||
|
ClustNode *ptrNode = &m_Nodes[uIndex];
|
||||||
|
if (0 == ptrNode->m_ptrNextCluster)
|
||||||
|
return uInsane;
|
||||||
|
return ptrNode->m_ptrNextCluster->m_uIndex;
|
||||||
|
}
|
||||||
|
|
||||||
|
void Clust::DeleteFromClusterList(unsigned uNodeIndex)
|
||||||
|
{
|
||||||
|
assert(uNodeIndex < m_uNodeCount);
|
||||||
|
ClustNode *ptrNode = &m_Nodes[uNodeIndex];
|
||||||
|
ClustNode *ptrPrev = ptrNode->m_ptrPrevCluster;
|
||||||
|
ClustNode *ptrNext = ptrNode->m_ptrNextCluster;
|
||||||
|
|
||||||
|
if (0 != ptrNext)
|
||||||
|
ptrNext->m_ptrPrevCluster = ptrPrev;
|
||||||
|
if (0 == ptrPrev)
|
||||||
|
{
|
||||||
|
assert(m_ptrClusterList == ptrNode);
|
||||||
|
m_ptrClusterList = ptrNext;
|
||||||
|
}
|
||||||
|
else
|
||||||
|
ptrPrev->m_ptrNextCluster = ptrNext;
|
||||||
|
|
||||||
|
ptrNode->m_ptrNextCluster = 0;
|
||||||
|
ptrNode->m_ptrPrevCluster = 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
void Clust::AddToClusterList(unsigned uNodeIndex)
|
||||||
|
{
|
||||||
|
assert(uNodeIndex < m_uNodeCount);
|
||||||
|
ClustNode *ptrNode = &m_Nodes[uNodeIndex];
|
||||||
|
|
||||||
|
if (0 != m_ptrClusterList)
|
||||||
|
m_ptrClusterList->m_ptrPrevCluster = ptrNode;
|
||||||
|
|
||||||
|
ptrNode->m_ptrNextCluster = m_ptrClusterList;
|
||||||
|
ptrNode->m_ptrPrevCluster = 0;
|
||||||
|
|
||||||
|
m_ptrClusterList = ptrNode;
|
||||||
|
}
|
||||||
|
|
||||||
|
float Clust::ComputeMetric(unsigned uIndex1, unsigned uIndex2) const
|
||||||
|
{
|
||||||
|
switch (m_JoinStyle)
|
||||||
|
{
|
||||||
|
case JOIN_NearestNeighbor:
|
||||||
|
return ComputeMetricNearestNeighbor(uIndex1, uIndex2);
|
||||||
|
|
||||||
|
case JOIN_NeighborJoining:
|
||||||
|
return ComputeMetricNeighborJoining(uIndex1, uIndex2);
|
||||||
|
}
|
||||||
|
Quit("Clust::ComputeMetric");
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
float Clust::ComputeMetricNeighborJoining(unsigned i, unsigned j) const
|
||||||
|
{
|
||||||
|
float ri = Calc_r(i);
|
||||||
|
float rj = Calc_r(j);
|
||||||
|
float dij = GetDist(i, j);
|
||||||
|
float dMetric = dij - (ri + rj);
|
||||||
|
return (float) dMetric;
|
||||||
|
}
|
||||||
|
|
||||||
|
float Clust::ComputeMetricNearestNeighbor(unsigned i, unsigned j) const
|
||||||
|
{
|
||||||
|
return (float) GetDist(i, j);
|
||||||
|
}
|
||||||
|
|
||||||
|
float Clust::GetMinMetricBruteForce(unsigned *ptruIndex1, unsigned *ptruIndex2) const
|
||||||
|
{
|
||||||
|
unsigned uMinLeftNodeIndex = uInsane;
|
||||||
|
unsigned uMinRightNodeIndex = uInsane;
|
||||||
|
float dMinMetric = PLUS_INFINITY;
|
||||||
|
for (unsigned uLeftNodeIndex = GetFirstCluster(); uLeftNodeIndex != uInsane;
|
||||||
|
uLeftNodeIndex = GetNextCluster(uLeftNodeIndex))
|
||||||
|
{
|
||||||
|
for (unsigned uRightNodeIndex = GetNextCluster(uLeftNodeIndex);
|
||||||
|
uRightNodeIndex != uInsane;
|
||||||
|
uRightNodeIndex = GetNextCluster(uRightNodeIndex))
|
||||||
|
{
|
||||||
|
float dMetric = ComputeMetric(uLeftNodeIndex, uRightNodeIndex);
|
||||||
|
if (dMetric < dMinMetric)
|
||||||
|
{
|
||||||
|
dMinMetric = dMetric;
|
||||||
|
uMinLeftNodeIndex = uLeftNodeIndex;
|
||||||
|
uMinRightNodeIndex = uRightNodeIndex;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
*ptruIndex1 = uMinLeftNodeIndex;
|
||||||
|
*ptruIndex2 = uMinRightNodeIndex;
|
||||||
|
return dMinMetric;
|
||||||
|
}
|
||||||
|
|
||||||
|
float Clust::GetMinMetric(unsigned *ptruIndex1, unsigned *ptruIndex2) const
|
||||||
|
{
|
||||||
|
return GetMinMetricBruteForce(ptruIndex1, ptruIndex2);
|
||||||
|
}
|
148
src/muscle/muscle3.8.31/src/clust.h
Normal file
148
src/muscle/muscle3.8.31/src/clust.h
Normal file
@ -0,0 +1,148 @@
|
|||||||
|
#ifndef Clust_h
|
||||||
|
#define Clust_h
|
||||||
|
|
||||||
|
class Clust;
|
||||||
|
class ClustNode;
|
||||||
|
class ClustSet;
|
||||||
|
class Phylip;
|
||||||
|
class SortedNode;
|
||||||
|
|
||||||
|
const unsigned RB_NIL = ((unsigned) 0xfff0);
|
||||||
|
|
||||||
|
class ClustNode
|
||||||
|
{
|
||||||
|
public:
|
||||||
|
ClustNode()
|
||||||
|
{
|
||||||
|
m_uIndex = uInsane;
|
||||||
|
m_uSize = uInsane;
|
||||||
|
m_dLength = (float) dInsane;
|
||||||
|
m_ptrLeft = 0;
|
||||||
|
m_ptrRight = 0;
|
||||||
|
m_ptrParent = 0;
|
||||||
|
m_ptrNextCluster = 0;
|
||||||
|
m_ptrPrevCluster = 0;
|
||||||
|
m_uLeafIndexes = 0;
|
||||||
|
}
|
||||||
|
~ClustNode()
|
||||||
|
{
|
||||||
|
delete[] m_uLeafIndexes;
|
||||||
|
}
|
||||||
|
unsigned m_uIndex;
|
||||||
|
unsigned m_uSize;
|
||||||
|
float m_dLength;
|
||||||
|
ClustNode *m_ptrLeft;
|
||||||
|
ClustNode *m_ptrRight;
|
||||||
|
ClustNode *m_ptrParent;
|
||||||
|
ClustNode *m_ptrNextCluster;
|
||||||
|
ClustNode *m_ptrPrevCluster;
|
||||||
|
unsigned *m_uLeafIndexes;
|
||||||
|
};
|
||||||
|
|
||||||
|
class Clust
|
||||||
|
{
|
||||||
|
public:
|
||||||
|
Clust();
|
||||||
|
virtual ~Clust();
|
||||||
|
|
||||||
|
void Create(ClustSet &Set, CLUSTER Method);
|
||||||
|
|
||||||
|
unsigned GetLeafCount() const;
|
||||||
|
|
||||||
|
unsigned GetClusterCount() const;
|
||||||
|
unsigned GetClusterSize(unsigned uNodeIndex) const;
|
||||||
|
unsigned GetLeaf(unsigned uClusterIndex, unsigned uLeafIndex) const;
|
||||||
|
|
||||||
|
unsigned GetNodeCount() const { return 2*m_uLeafCount - 1; }
|
||||||
|
const ClustNode &GetRoot() const { return m_Nodes[GetRootNodeIndex()]; }
|
||||||
|
unsigned GetRootNodeIndex() const { return m_uNodeCount - 1; }
|
||||||
|
|
||||||
|
const ClustNode &GetNode(unsigned uNodeIndex) const;
|
||||||
|
bool IsLeaf(unsigned uNodeIndex) const;
|
||||||
|
unsigned GetLeftIndex(unsigned uNodeIndex) const;
|
||||||
|
unsigned GetRightIndex(unsigned uNodeIndex) const;
|
||||||
|
float GetLength(unsigned uNodeIndex) const;
|
||||||
|
float GetHeight(unsigned uNodeIndex) const;
|
||||||
|
const char *GetNodeName(unsigned uNodeIndex) const;
|
||||||
|
unsigned GetNodeId(unsigned uNodeIndex) const;
|
||||||
|
|
||||||
|
JOIN GetJoinStyle() const { return m_JoinStyle; }
|
||||||
|
LINKAGE GetCentroidStyle() const { return m_CentroidStyle; }
|
||||||
|
|
||||||
|
void SetDist(unsigned uIndex1, unsigned uIndex2, float dDist);
|
||||||
|
float GetDist(unsigned uIndex1, unsigned uIndex2) const;
|
||||||
|
|
||||||
|
void ToPhylip(Phylip &tree);
|
||||||
|
|
||||||
|
void LogMe() const;
|
||||||
|
|
||||||
|
//private:
|
||||||
|
void SetLeafCount(unsigned uLeafCount);
|
||||||
|
|
||||||
|
void CreateCluster();
|
||||||
|
void JoinNodes(unsigned uLeftNodeIndex, unsigned uRightNodeIndex,
|
||||||
|
float dLeftLength, float dRightLength, unsigned uNewNodeIndex);
|
||||||
|
|
||||||
|
void ChooseJoin(unsigned *ptruLeftIndex, unsigned *ptruRightIndex,
|
||||||
|
float *ptrdLeftLength, float *ptrdRightLength);
|
||||||
|
void ChooseJoinNeighborJoining(unsigned *ptruLeftIndex, unsigned *ptruRightIndex,
|
||||||
|
float *ptrdLeftLength, float *ptrdRightLength);
|
||||||
|
void ChooseJoinNearestNeighbor(unsigned *ptruLeftIndex, unsigned *ptruRightIndex,
|
||||||
|
float *ptrdLeftLength, float *ptrdRightLength);
|
||||||
|
|
||||||
|
float ComputeDist(unsigned uNewNodeIndex, unsigned uNodeIndex);
|
||||||
|
float ComputeDistAverageLinkage(unsigned uNewNodeIndex, unsigned uNodeIndex);
|
||||||
|
float ComputeDistMinLinkage(unsigned uNewNodeIndex, unsigned uNodeIndex);
|
||||||
|
float ComputeDistMaxLinkage(unsigned uNewNodeIndex, unsigned uNodeIndex);
|
||||||
|
float ComputeDistNeighborJoining(unsigned uNewNewIndex, unsigned uNodeIndex);
|
||||||
|
float ComputeDistMAFFT(unsigned uNewNewIndex, unsigned uNodeIndex);
|
||||||
|
|
||||||
|
float Calc_r(unsigned uNodeIndex) const;
|
||||||
|
|
||||||
|
unsigned VectorIndex(unsigned uIndex1, unsigned uIndex2) const;
|
||||||
|
|
||||||
|
unsigned GetFirstCluster() const;
|
||||||
|
unsigned GetNextCluster(unsigned uNodeIndex) const;
|
||||||
|
|
||||||
|
float ComputeMetric(unsigned uIndex1, unsigned uIndex2) const;
|
||||||
|
float ComputeMetricNearestNeighbor(unsigned i, unsigned j) const;
|
||||||
|
float ComputeMetricNeighborJoining(unsigned i, unsigned j) const;
|
||||||
|
|
||||||
|
void InitMetric(unsigned uMaxNodeIndex);
|
||||||
|
void InsertMetric(unsigned uIndex1, unsigned uIndex2, float dMetric);
|
||||||
|
float GetMinMetric(unsigned *ptruIndex1, unsigned *ptruIndex2) const;
|
||||||
|
float GetMinMetricBruteForce(unsigned *ptruIndex1, unsigned *ptruIndex2) const;
|
||||||
|
void DeleteMetric(unsigned uIndex);
|
||||||
|
void DeleteMetric(unsigned uIndex1, unsigned uIndex2);
|
||||||
|
void ListMetric() const;
|
||||||
|
|
||||||
|
void DeleteFromClusterList(unsigned uNodeIndex);
|
||||||
|
void AddToClusterList(unsigned uNodeIndex);
|
||||||
|
|
||||||
|
void RBDelete(unsigned RBNode);
|
||||||
|
unsigned RBInsert(unsigned i, unsigned j, float fMetric);
|
||||||
|
|
||||||
|
unsigned RBNext(unsigned RBNode) const;
|
||||||
|
unsigned RBPrev(unsigned RBNode) const;
|
||||||
|
unsigned RBMin(unsigned RBNode) const;
|
||||||
|
unsigned RBMax(unsigned RBNode) const;
|
||||||
|
|
||||||
|
void ValidateRB(const char szMsg[] = 0) const;
|
||||||
|
void ValidateRBNode(unsigned Node, const char szMsg[]) const;
|
||||||
|
|
||||||
|
//private:
|
||||||
|
JOIN m_JoinStyle;
|
||||||
|
LINKAGE m_CentroidStyle;
|
||||||
|
ClustNode *m_Nodes;
|
||||||
|
unsigned *m_ClusterIndexToNodeIndex;
|
||||||
|
unsigned *m_NodeIndexToClusterIndex;
|
||||||
|
unsigned m_uLeafCount;
|
||||||
|
unsigned m_uNodeCount;
|
||||||
|
unsigned m_uClusterCount;
|
||||||
|
unsigned m_uTriangularMatrixSize;
|
||||||
|
float *m_dDist;
|
||||||
|
ClustSet *m_ptrSet;
|
||||||
|
ClustNode *m_ptrClusterList;
|
||||||
|
};
|
||||||
|
|
||||||
|
#endif // Clust_h
|
339
src/muscle/muscle3.8.31/src/cluster.cpp
Normal file
339
src/muscle/muscle3.8.31/src/cluster.cpp
Normal file
@ -0,0 +1,339 @@
|
|||||||
|
#include "muscle.h"
|
||||||
|
#include "cluster.h"
|
||||||
|
#include "distfunc.h"
|
||||||
|
|
||||||
|
static inline float Min(float d1, float d2)
|
||||||
|
{
|
||||||
|
return d1 < d2 ? d1 : d2;
|
||||||
|
}
|
||||||
|
|
||||||
|
static inline float Max(float d1, float d2)
|
||||||
|
{
|
||||||
|
return d1 > d2 ? d1 : d2;
|
||||||
|
}
|
||||||
|
|
||||||
|
static inline float Mean(float d1, float d2)
|
||||||
|
{
|
||||||
|
return (float) ((d1 + d2)/2.0);
|
||||||
|
}
|
||||||
|
|
||||||
|
#if _DEBUG
|
||||||
|
void ClusterTree::Validate(unsigned uNodeCount)
|
||||||
|
{
|
||||||
|
unsigned n;
|
||||||
|
ClusterNode *pNode;
|
||||||
|
unsigned uDisjointListCount = 0;
|
||||||
|
for (pNode = m_ptrDisjoints; pNode; pNode = pNode->GetNextDisjoint())
|
||||||
|
{
|
||||||
|
ClusterNode *pPrev = pNode->GetPrevDisjoint();
|
||||||
|
ClusterNode *pNext = pNode->GetNextDisjoint();
|
||||||
|
if (0 != pPrev)
|
||||||
|
{
|
||||||
|
if (pPrev->GetNextDisjoint() != pNode)
|
||||||
|
{
|
||||||
|
Log("Prev->This mismatch, prev=\n");
|
||||||
|
pPrev->LogMe();
|
||||||
|
Log("This=\n");
|
||||||
|
pNode->LogMe();
|
||||||
|
Quit("ClusterTree::Validate()");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
if (pNode != m_ptrDisjoints)
|
||||||
|
{
|
||||||
|
Log("[%u]->prev = 0 but != m_ptrDisjoints=%d\n",
|
||||||
|
pNode->GetIndex(),
|
||||||
|
m_ptrDisjoints ? m_ptrDisjoints->GetIndex() : 0xffffffff);
|
||||||
|
pNode->LogMe();
|
||||||
|
Quit("ClusterTree::Validate()");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if (0 != pNext)
|
||||||
|
{
|
||||||
|
if (pNext->GetPrevDisjoint() != pNode)
|
||||||
|
{
|
||||||
|
Log("Next->This mismatch, next=\n");
|
||||||
|
pNext->LogMe();
|
||||||
|
Log("This=\n");
|
||||||
|
pNode->LogMe();
|
||||||
|
Quit("ClusterTree::Validate()");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
++uDisjointListCount;
|
||||||
|
if (uDisjointListCount > m_uNodeCount)
|
||||||
|
Quit("Loop in disjoint list");
|
||||||
|
}
|
||||||
|
|
||||||
|
unsigned uParentlessNodeCount = 0;
|
||||||
|
for (n = 0; n < uNodeCount; ++n)
|
||||||
|
if (0 == m_Nodes[n].GetParent())
|
||||||
|
++uParentlessNodeCount;
|
||||||
|
|
||||||
|
if (uDisjointListCount != uParentlessNodeCount)
|
||||||
|
Quit("Disjoints = %u Parentless = %u\n", uDisjointListCount,
|
||||||
|
uParentlessNodeCount);
|
||||||
|
}
|
||||||
|
#else // !_DEBUG
|
||||||
|
#define Validate(uNodeCount) // empty
|
||||||
|
#endif
|
||||||
|
|
||||||
|
void ClusterNode::LogMe() const
|
||||||
|
{
|
||||||
|
unsigned uClusterSize = GetClusterSize();
|
||||||
|
Log("[%02u] w=%5.3f CW=%5.3f LBW=%5.3f RBW=%5.3f LWT=%5.3f RWT=%5.3f L=%02d R=%02d P=%02d NxDj=%02d PvDj=%02d Sz=%02d {",
|
||||||
|
m_uIndex,
|
||||||
|
m_dWeight,
|
||||||
|
GetClusterWeight(),
|
||||||
|
GetLeftBranchWeight(),
|
||||||
|
GetRightBranchWeight(),
|
||||||
|
GetLeftWeight(),
|
||||||
|
GetRightWeight(),
|
||||||
|
m_ptrLeft ? m_ptrLeft->GetIndex() : 0xffffffff,
|
||||||
|
m_ptrRight ? m_ptrRight->GetIndex() : 0xffffffff,
|
||||||
|
m_ptrParent ? m_ptrParent->GetIndex() : 0xffffffff,
|
||||||
|
m_ptrNextDisjoint ? m_ptrNextDisjoint->GetIndex() : 0xffffffff,
|
||||||
|
m_ptrPrevDisjoint ? m_ptrPrevDisjoint->GetIndex() : 0xffffffff,
|
||||||
|
uClusterSize);
|
||||||
|
for (unsigned i = 0; i < uClusterSize; ++i)
|
||||||
|
Log(" %u", GetClusterLeaf(i)->GetIndex());
|
||||||
|
Log(" }\n");
|
||||||
|
}
|
||||||
|
|
||||||
|
// How many leaves in the sub-tree under this node?
|
||||||
|
unsigned ClusterNode::GetClusterSize() const
|
||||||
|
{
|
||||||
|
unsigned uLeafCount = 0;
|
||||||
|
|
||||||
|
if (0 == m_ptrLeft && 0 == m_ptrRight)
|
||||||
|
return 1;
|
||||||
|
|
||||||
|
if (0 != m_ptrLeft)
|
||||||
|
uLeafCount += m_ptrLeft->GetClusterSize();
|
||||||
|
if (0 != m_ptrRight)
|
||||||
|
uLeafCount += m_ptrRight->GetClusterSize();
|
||||||
|
assert(uLeafCount > 0);
|
||||||
|
return uLeafCount;
|
||||||
|
}
|
||||||
|
|
||||||
|
double ClusterNode::GetClusterWeight() const
|
||||||
|
{
|
||||||
|
double dWeight = 0.0;
|
||||||
|
if (0 != m_ptrLeft)
|
||||||
|
dWeight += m_ptrLeft->GetClusterWeight();
|
||||||
|
if (0 != m_ptrRight)
|
||||||
|
dWeight += m_ptrRight->GetClusterWeight();
|
||||||
|
return dWeight + GetWeight();
|
||||||
|
}
|
||||||
|
|
||||||
|
double ClusterNode::GetLeftBranchWeight() const
|
||||||
|
{
|
||||||
|
const ClusterNode *ptrLeft = GetLeft();
|
||||||
|
if (0 == ptrLeft)
|
||||||
|
return 0.0;
|
||||||
|
|
||||||
|
return GetWeight() - ptrLeft->GetWeight();
|
||||||
|
}
|
||||||
|
|
||||||
|
double ClusterNode::GetRightBranchWeight() const
|
||||||
|
{
|
||||||
|
const ClusterNode *ptrRight = GetRight();
|
||||||
|
if (0 == ptrRight)
|
||||||
|
return 0.0;
|
||||||
|
|
||||||
|
return GetWeight() - ptrRight->GetWeight();
|
||||||
|
}
|
||||||
|
|
||||||
|
double ClusterNode::GetRightWeight() const
|
||||||
|
{
|
||||||
|
const ClusterNode *ptrRight = GetRight();
|
||||||
|
if (0 == ptrRight)
|
||||||
|
return 0.0;
|
||||||
|
return ptrRight->GetClusterWeight() + GetWeight();
|
||||||
|
}
|
||||||
|
|
||||||
|
double ClusterNode::GetLeftWeight() const
|
||||||
|
{
|
||||||
|
const ClusterNode *ptrLeft = GetLeft();
|
||||||
|
if (0 == ptrLeft)
|
||||||
|
return 0.0;
|
||||||
|
return ptrLeft->GetClusterWeight() + GetWeight();
|
||||||
|
}
|
||||||
|
|
||||||
|
// Return n'th leaf in the sub-tree under this node.
|
||||||
|
const ClusterNode *ClusterNode::GetClusterLeaf(unsigned uLeafIndex) const
|
||||||
|
{
|
||||||
|
if (0 != m_ptrLeft)
|
||||||
|
{
|
||||||
|
if (0 == m_ptrRight)
|
||||||
|
return this;
|
||||||
|
|
||||||
|
unsigned uLeftLeafCount = m_ptrLeft->GetClusterSize();
|
||||||
|
|
||||||
|
if (uLeafIndex < uLeftLeafCount)
|
||||||
|
return m_ptrLeft->GetClusterLeaf(uLeafIndex);
|
||||||
|
|
||||||
|
assert(uLeafIndex >= uLeftLeafCount);
|
||||||
|
return m_ptrRight->GetClusterLeaf(uLeafIndex - uLeftLeafCount);
|
||||||
|
}
|
||||||
|
if (0 == m_ptrRight)
|
||||||
|
return this;
|
||||||
|
return m_ptrRight->GetClusterLeaf(uLeafIndex);
|
||||||
|
}
|
||||||
|
|
||||||
|
void ClusterTree::DeleteFromDisjoints(ClusterNode *ptrNode)
|
||||||
|
{
|
||||||
|
ClusterNode *ptrPrev = ptrNode->GetPrevDisjoint();
|
||||||
|
ClusterNode *ptrNext = ptrNode->GetNextDisjoint();
|
||||||
|
|
||||||
|
if (0 != ptrPrev)
|
||||||
|
ptrPrev->SetNextDisjoint(ptrNext);
|
||||||
|
else
|
||||||
|
m_ptrDisjoints = ptrNext;
|
||||||
|
|
||||||
|
if (0 != ptrNext)
|
||||||
|
ptrNext->SetPrevDisjoint(ptrPrev);
|
||||||
|
|
||||||
|
#if _DEBUG
|
||||||
|
// not algorithmically necessary, but improves clarity
|
||||||
|
// and supports Validate().
|
||||||
|
ptrNode->SetPrevDisjoint(0);
|
||||||
|
ptrNode->SetNextDisjoint(0);
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
|
||||||
|
void ClusterTree::AddToDisjoints(ClusterNode *ptrNode)
|
||||||
|
{
|
||||||
|
ptrNode->SetNextDisjoint(m_ptrDisjoints);
|
||||||
|
ptrNode->SetPrevDisjoint(0);
|
||||||
|
if (0 != m_ptrDisjoints)
|
||||||
|
m_ptrDisjoints->SetPrevDisjoint(ptrNode);
|
||||||
|
m_ptrDisjoints = ptrNode;
|
||||||
|
}
|
||||||
|
|
||||||
|
ClusterTree::ClusterTree()
|
||||||
|
{
|
||||||
|
m_ptrDisjoints = 0;
|
||||||
|
m_Nodes = 0;
|
||||||
|
m_uNodeCount = 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
ClusterTree::~ClusterTree()
|
||||||
|
{
|
||||||
|
delete[] m_Nodes;
|
||||||
|
}
|
||||||
|
|
||||||
|
void ClusterTree::LogMe() const
|
||||||
|
{
|
||||||
|
Log("Disjoints=%d\n", m_ptrDisjoints ? m_ptrDisjoints->GetIndex() : 0xffffffff);
|
||||||
|
for (unsigned i = 0; i < m_uNodeCount; ++i)
|
||||||
|
{
|
||||||
|
m_Nodes[i].LogMe();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
ClusterNode *ClusterTree::GetRoot() const
|
||||||
|
{
|
||||||
|
return &m_Nodes[m_uNodeCount - 1];
|
||||||
|
}
|
||||||
|
|
||||||
|
// This is the UPGMA algorithm as described in Durbin et al. p166.
|
||||||
|
void ClusterTree::Create(const DistFunc &Dist)
|
||||||
|
{
|
||||||
|
unsigned i;
|
||||||
|
m_uLeafCount = Dist.GetCount();
|
||||||
|
m_uNodeCount = 2*m_uLeafCount - 1;
|
||||||
|
|
||||||
|
delete[] m_Nodes;
|
||||||
|
m_Nodes = new ClusterNode[m_uNodeCount];
|
||||||
|
|
||||||
|
for (i = 0; i < m_uNodeCount; ++i)
|
||||||
|
m_Nodes[i].SetIndex(i);
|
||||||
|
|
||||||
|
for (i = 0; i < m_uLeafCount - 1; ++i)
|
||||||
|
m_Nodes[i].SetNextDisjoint(&m_Nodes[i+1]);
|
||||||
|
|
||||||
|
for (i = 1; i < m_uLeafCount; ++i)
|
||||||
|
m_Nodes[i].SetPrevDisjoint(&m_Nodes[i-1]);
|
||||||
|
|
||||||
|
m_ptrDisjoints = &m_Nodes[0];
|
||||||
|
|
||||||
|
// Log("Initial state\n");
|
||||||
|
// LogMe();
|
||||||
|
// Log("\n");
|
||||||
|
|
||||||
|
DistFunc ClusterDist;
|
||||||
|
ClusterDist.SetCount(m_uNodeCount);
|
||||||
|
double dMaxDist = 0.0;
|
||||||
|
for (i = 0; i < m_uLeafCount; ++i)
|
||||||
|
for (unsigned j = 0; j < m_uLeafCount; ++j)
|
||||||
|
{
|
||||||
|
float dDist = Dist.GetDist(i, j);
|
||||||
|
ClusterDist.SetDist(i, j, dDist);
|
||||||
|
}
|
||||||
|
|
||||||
|
Validate(m_uLeafCount);
|
||||||
|
|
||||||
|
// Iteration. N-1 joins needed to create a binary tree from N leaves.
|
||||||
|
for (unsigned uJoinIndex = m_uLeafCount; uJoinIndex < m_uNodeCount;
|
||||||
|
++uJoinIndex)
|
||||||
|
{
|
||||||
|
// Find closest pair of clusters
|
||||||
|
unsigned uIndexClosest1;
|
||||||
|
unsigned uIndexClosest2;
|
||||||
|
bool bFound = false;
|
||||||
|
double dDistClosest = 9e99;
|
||||||
|
for (ClusterNode *ptrNode1 = m_ptrDisjoints; ptrNode1;
|
||||||
|
ptrNode1 = ptrNode1->GetNextDisjoint())
|
||||||
|
{
|
||||||
|
for (ClusterNode *ptrNode2 = ptrNode1->GetNextDisjoint(); ptrNode2;
|
||||||
|
ptrNode2 = ptrNode2->GetNextDisjoint())
|
||||||
|
{
|
||||||
|
unsigned i1 = ptrNode1->GetIndex();
|
||||||
|
unsigned i2 = ptrNode2->GetIndex();
|
||||||
|
double dDist = ClusterDist.GetDist(i1, i2);
|
||||||
|
if (dDist < dDistClosest)
|
||||||
|
{
|
||||||
|
bFound = true;
|
||||||
|
dDistClosest = dDist;
|
||||||
|
uIndexClosest1 = i1;
|
||||||
|
uIndexClosest2 = i2;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
assert(bFound);
|
||||||
|
|
||||||
|
ClusterNode &Join = m_Nodes[uJoinIndex];
|
||||||
|
ClusterNode &Child1 = m_Nodes[uIndexClosest1];
|
||||||
|
ClusterNode &Child2 = m_Nodes[uIndexClosest2];
|
||||||
|
|
||||||
|
Join.SetLeft(&Child1);
|
||||||
|
Join.SetRight(&Child2);
|
||||||
|
Join.SetWeight(dDistClosest);
|
||||||
|
|
||||||
|
Child1.SetParent(&Join);
|
||||||
|
Child2.SetParent(&Join);
|
||||||
|
|
||||||
|
DeleteFromDisjoints(&Child1);
|
||||||
|
DeleteFromDisjoints(&Child2);
|
||||||
|
AddToDisjoints(&Join);
|
||||||
|
|
||||||
|
// Log("After join %d %d\n", uIndexClosest1, uIndexClosest2);
|
||||||
|
// LogMe();
|
||||||
|
|
||||||
|
// Calculate distance of every remaining disjoint cluster to the
|
||||||
|
// new cluster created by the join
|
||||||
|
for (ClusterNode *ptrNode = m_ptrDisjoints; ptrNode;
|
||||||
|
ptrNode = ptrNode->GetNextDisjoint())
|
||||||
|
{
|
||||||
|
unsigned uNodeIndex = ptrNode->GetIndex();
|
||||||
|
float dDist1 = ClusterDist.GetDist(uNodeIndex, uIndexClosest1);
|
||||||
|
float dDist2 = ClusterDist.GetDist(uNodeIndex, uIndexClosest2);
|
||||||
|
float dDist = Min(dDist1, dDist2);
|
||||||
|
ClusterDist.SetDist(uJoinIndex, uNodeIndex, dDist);
|
||||||
|
}
|
||||||
|
Validate(uJoinIndex+1);
|
||||||
|
}
|
||||||
|
GetRoot()->GetClusterWeight();
|
||||||
|
// LogMe();
|
||||||
|
}
|
86
src/muscle/muscle3.8.31/src/cluster.h
Normal file
86
src/muscle/muscle3.8.31/src/cluster.h
Normal file
@ -0,0 +1,86 @@
|
|||||||
|
class DistFunc;
|
||||||
|
|
||||||
|
class ClusterNode
|
||||||
|
{
|
||||||
|
friend class ClusterTree;
|
||||||
|
public:
|
||||||
|
ClusterNode()
|
||||||
|
{
|
||||||
|
m_dWeight = 0.0;
|
||||||
|
m_dWeight2 = 0.0;
|
||||||
|
m_ptrLeft = 0;
|
||||||
|
m_ptrRight = 0;
|
||||||
|
m_ptrParent = 0;
|
||||||
|
m_uIndex = 0;
|
||||||
|
m_ptrPrevDisjoint = 0;
|
||||||
|
m_ptrNextDisjoint = 0;
|
||||||
|
}
|
||||||
|
~ClusterNode() {}
|
||||||
|
|
||||||
|
public:
|
||||||
|
unsigned GetIndex() const { return m_uIndex; }
|
||||||
|
ClusterNode *GetLeft() const { return m_ptrLeft; }
|
||||||
|
ClusterNode *GetRight() const { return m_ptrRight; }
|
||||||
|
ClusterNode *GetParent() const { return m_ptrParent; }
|
||||||
|
double GetWeight() const { return m_dWeight; }
|
||||||
|
|
||||||
|
const ClusterNode *GetClusterLeaf(unsigned uLeafIndex) const;
|
||||||
|
unsigned GetClusterSize() const;
|
||||||
|
double GetClusterWeight() const;
|
||||||
|
double GetLeftBranchWeight() const;
|
||||||
|
double GetRightBranchWeight() const;
|
||||||
|
double GetLeftWeight() const;
|
||||||
|
double GetRightWeight() const;
|
||||||
|
|
||||||
|
void LogMe() const;
|
||||||
|
|
||||||
|
double GetWeight2() const { return m_dWeight2; }
|
||||||
|
void SetWeight2(double dWeight2) { m_dWeight2 = dWeight2; }
|
||||||
|
|
||||||
|
protected:
|
||||||
|
void SetIndex(unsigned uIndex) { m_uIndex = uIndex; }
|
||||||
|
void SetWeight(double dWeight) { m_dWeight = dWeight; }
|
||||||
|
void SetLeft(ClusterNode *ptrLeft) { m_ptrLeft = ptrLeft; }
|
||||||
|
void SetRight(ClusterNode *ptrRight) { m_ptrRight = ptrRight; }
|
||||||
|
void SetParent(ClusterNode *ptrParent) { m_ptrParent = ptrParent; }
|
||||||
|
void SetNextDisjoint(ClusterNode *ptrNode) { m_ptrNextDisjoint = ptrNode; }
|
||||||
|
void SetPrevDisjoint(ClusterNode *ptrNode) { m_ptrPrevDisjoint = ptrNode; }
|
||||||
|
|
||||||
|
ClusterNode *GetNextDisjoint() { return m_ptrNextDisjoint; }
|
||||||
|
ClusterNode *GetPrevDisjoint() { return m_ptrPrevDisjoint; }
|
||||||
|
|
||||||
|
private:
|
||||||
|
double m_dWeight;
|
||||||
|
double m_dWeight2;
|
||||||
|
unsigned m_uIndex;
|
||||||
|
ClusterNode *m_ptrLeft;
|
||||||
|
ClusterNode *m_ptrRight;
|
||||||
|
ClusterNode *m_ptrParent;
|
||||||
|
ClusterNode *m_ptrNextDisjoint;
|
||||||
|
ClusterNode *m_ptrPrevDisjoint;
|
||||||
|
};
|
||||||
|
|
||||||
|
class ClusterTree
|
||||||
|
{
|
||||||
|
public:
|
||||||
|
ClusterTree();
|
||||||
|
virtual ~ClusterTree();
|
||||||
|
|
||||||
|
void Create(const DistFunc &DF);
|
||||||
|
|
||||||
|
ClusterNode *GetRoot() const;
|
||||||
|
void LogMe() const;
|
||||||
|
|
||||||
|
protected:
|
||||||
|
void Join(ClusterNode *ptrNode1, ClusterNode *ptrNode2,
|
||||||
|
ClusterNode *ptrJoin);
|
||||||
|
void AddToDisjoints(ClusterNode *ptrNode);
|
||||||
|
void DeleteFromDisjoints(ClusterNode *ptrNode);
|
||||||
|
void Validate(unsigned uNodeCount);
|
||||||
|
|
||||||
|
private:
|
||||||
|
ClusterNode *m_ptrDisjoints;
|
||||||
|
ClusterNode *m_Nodes;
|
||||||
|
unsigned m_uNodeCount;
|
||||||
|
unsigned m_uLeafCount;
|
||||||
|
};
|
21
src/muscle/muscle3.8.31/src/clustset.h
Normal file
21
src/muscle/muscle3.8.31/src/clustset.h
Normal file
@ -0,0 +1,21 @@
|
|||||||
|
#ifndef ClustSet_h
|
||||||
|
#define ClustSet_h
|
||||||
|
|
||||||
|
enum JOIN;
|
||||||
|
enum LINKAGE;
|
||||||
|
class Clust;
|
||||||
|
|
||||||
|
class ClustSet
|
||||||
|
{
|
||||||
|
public:
|
||||||
|
virtual unsigned GetLeafCount() = 0;
|
||||||
|
virtual double ComputeDist(const Clust &C, unsigned uNodeIndex1,
|
||||||
|
unsigned uNodeIndex2) = 0;
|
||||||
|
virtual void JoinNodes(const Clust &C, unsigned uLeftNodeIndex,
|
||||||
|
unsigned uRightNodeIndex, unsigned uJoinedNodeIndex,
|
||||||
|
double *ptrdLeftLength, double *ptrdRightLength) = 0;
|
||||||
|
virtual const char *GetLeafName(unsigned uNodeIndex) = 0;
|
||||||
|
virtual unsigned GetLeafId(unsigned uNodeIndex) = 0;
|
||||||
|
};
|
||||||
|
|
||||||
|
#endif // ClustSet_h
|
48
src/muscle/muscle3.8.31/src/clustsetdf.h
Normal file
48
src/muscle/muscle3.8.31/src/clustsetdf.h
Normal file
@ -0,0 +1,48 @@
|
|||||||
|
#ifndef ClustSetDF_h
|
||||||
|
#define ClustSetDF_h
|
||||||
|
|
||||||
|
class MSA;
|
||||||
|
class Clust;
|
||||||
|
|
||||||
|
#include "clustset.h"
|
||||||
|
#include "distfunc.h"
|
||||||
|
#include "msa.h"
|
||||||
|
|
||||||
|
class ClustSetDF : public ClustSet
|
||||||
|
{
|
||||||
|
public:
|
||||||
|
ClustSetDF(const DistFunc &DF) :
|
||||||
|
m_ptrDF(&DF)
|
||||||
|
{
|
||||||
|
}
|
||||||
|
|
||||||
|
public:
|
||||||
|
virtual unsigned GetLeafCount()
|
||||||
|
{
|
||||||
|
return m_ptrDF->GetCount();
|
||||||
|
}
|
||||||
|
virtual const char *GetLeafName(unsigned uNodeIndex)
|
||||||
|
{
|
||||||
|
return m_ptrDF->GetName(uNodeIndex);
|
||||||
|
}
|
||||||
|
virtual unsigned GetLeafId(unsigned uNodeIndex)
|
||||||
|
{
|
||||||
|
return m_ptrDF->GetId(uNodeIndex);
|
||||||
|
}
|
||||||
|
virtual void JoinNodes(const Clust &C, unsigned uLeftNodeIndex,
|
||||||
|
unsigned uRightNodeIndex, unsigned uJoinedNodeIndex,
|
||||||
|
double *ptrdLeftLength, double *ptrdRightLength)
|
||||||
|
{
|
||||||
|
Quit("ClustSetDF::JoinNodes, should never be called");
|
||||||
|
}
|
||||||
|
virtual double ComputeDist(const Clust &C, unsigned uNodeIndex1,
|
||||||
|
unsigned uNodeIndex2)
|
||||||
|
{
|
||||||
|
return m_ptrDF->GetDist(uNodeIndex1, uNodeIndex2);
|
||||||
|
}
|
||||||
|
|
||||||
|
private:
|
||||||
|
const DistFunc *m_ptrDF;
|
||||||
|
};
|
||||||
|
|
||||||
|
#endif // ClustSetDF_h
|
55
src/muscle/muscle3.8.31/src/clustsetmsa.h
Normal file
55
src/muscle/muscle3.8.31/src/clustsetmsa.h
Normal file
@ -0,0 +1,55 @@
|
|||||||
|
#ifndef ClustSetMSA_h
|
||||||
|
#define ClustSetMSA_h
|
||||||
|
|
||||||
|
class MSA;
|
||||||
|
class Clust;
|
||||||
|
|
||||||
|
#include "clustset.h"
|
||||||
|
#include "msadist.h"
|
||||||
|
|
||||||
|
// Distance matrix based set.
|
||||||
|
// Computes distances between leaves, never between
|
||||||
|
// joined clusters (leaves this to distance matrix method).
|
||||||
|
class ClustSetMSA : public ClustSet
|
||||||
|
{
|
||||||
|
public:
|
||||||
|
ClustSetMSA(const MSA &msa, MSADist &MD) :
|
||||||
|
m_ptrMSA(&msa),
|
||||||
|
m_ptrMSADist(&MD)
|
||||||
|
{
|
||||||
|
}
|
||||||
|
|
||||||
|
public:
|
||||||
|
virtual unsigned GetLeafCount()
|
||||||
|
{
|
||||||
|
return m_ptrMSA->GetSeqCount();
|
||||||
|
}
|
||||||
|
virtual const char *GetLeafName(unsigned uNodeIndex)
|
||||||
|
{
|
||||||
|
return m_ptrMSA->GetSeqName(uNodeIndex);
|
||||||
|
}
|
||||||
|
virtual unsigned GetLeafId(unsigned uNodeIndex)
|
||||||
|
{
|
||||||
|
return m_ptrMSA->GetSeqId(uNodeIndex);
|
||||||
|
}
|
||||||
|
virtual void JoinNodes(const Clust &C, unsigned uLeftNodeIndex,
|
||||||
|
unsigned uRightNodeIndex, unsigned uJoinedNodeIndex,
|
||||||
|
double *ptrdLeftLength, double *ptrdRightLength)
|
||||||
|
{
|
||||||
|
Quit("ClustSetMSA::JoinNodes, should never be called");
|
||||||
|
}
|
||||||
|
virtual double ComputeDist(const Clust &C, unsigned uNodeIndex1,
|
||||||
|
unsigned uNodeIndex2)
|
||||||
|
{
|
||||||
|
return m_ptrMSADist->ComputeDist(*m_ptrMSA, uNodeIndex1, uNodeIndex2);
|
||||||
|
}
|
||||||
|
|
||||||
|
public:
|
||||||
|
const MSA &GetMSA();
|
||||||
|
|
||||||
|
private:
|
||||||
|
const MSA *m_ptrMSA;
|
||||||
|
MSADist *m_ptrMSADist;
|
||||||
|
};
|
||||||
|
|
||||||
|
#endif // ClustSetMSA_h
|
190
src/muscle/muscle3.8.31/src/clwwt.cpp
Normal file
190
src/muscle/muscle3.8.31/src/clwwt.cpp
Normal file
@ -0,0 +1,190 @@
|
|||||||
|
#include "muscle.h"
|
||||||
|
#include "tree.h"
|
||||||
|
#include "msa.h"
|
||||||
|
|
||||||
|
/***
|
||||||
|
Compute weights by the CLUSTALW method.
|
||||||
|
Thompson, Higgins and Gibson (1994), CABIOS (10) 19-29;
|
||||||
|
see also CLUSTALW paper.
|
||||||
|
|
||||||
|
Weights are computed from the edge lengths of a rooted tree.
|
||||||
|
|
||||||
|
Define the strength of an edge to be its length divided by the number
|
||||||
|
of leaves under that edge. The weight of a sequence is then the sum
|
||||||
|
of edge strengths on the path from the root to the leaf.
|
||||||
|
|
||||||
|
Example.
|
||||||
|
|
||||||
|
0.2
|
||||||
|
-----A 0.1
|
||||||
|
-x ------- B 0.7
|
||||||
|
--------y ----------- C
|
||||||
|
0.3 ----------z
|
||||||
|
0.4 -------------- D
|
||||||
|
0.8
|
||||||
|
|
||||||
|
Edge Length Leaves Strength
|
||||||
|
---- ----- ------ --------
|
||||||
|
xy 0.3 3 0.1
|
||||||
|
xA 0.2 1 0.2
|
||||||
|
yz 0.4 2 0.2
|
||||||
|
yB 0.1 1 0.1
|
||||||
|
zC 0.7 1 0.7
|
||||||
|
zD 0.8 1 0.8
|
||||||
|
|
||||||
|
Leaf Path Strengths Weight
|
||||||
|
---- ---- --------- ------
|
||||||
|
A xA 0.2 0.2
|
||||||
|
B xy-yB 0.1 + 0.1 0.2
|
||||||
|
C xy-yz-zC 0.1 + 0.2 + 0.7 1.0
|
||||||
|
D xy-yz-zD 0.1 + 0.2 + 0.8 1.1
|
||||||
|
|
||||||
|
***/
|
||||||
|
|
||||||
|
#define TRACE 0
|
||||||
|
|
||||||
|
static unsigned CountLeaves(const Tree &tree, unsigned uNodeIndex,
|
||||||
|
unsigned LeavesUnderNode[])
|
||||||
|
{
|
||||||
|
if (tree.IsLeaf(uNodeIndex))
|
||||||
|
{
|
||||||
|
LeavesUnderNode[uNodeIndex] = 1;
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
const unsigned uLeft = tree.GetLeft(uNodeIndex);
|
||||||
|
const unsigned uRight = tree.GetRight(uNodeIndex);
|
||||||
|
const unsigned uRightCount = CountLeaves(tree, uRight, LeavesUnderNode);
|
||||||
|
const unsigned uLeftCount = CountLeaves(tree, uLeft, LeavesUnderNode);
|
||||||
|
const unsigned uCount = uRightCount + uLeftCount;
|
||||||
|
LeavesUnderNode[uNodeIndex] = uCount;
|
||||||
|
return uCount;
|
||||||
|
}
|
||||||
|
|
||||||
|
void CalcClustalWWeights(const Tree &tree, WEIGHT Weights[])
|
||||||
|
{
|
||||||
|
#if TRACE
|
||||||
|
Log("CalcClustalWWeights\n");
|
||||||
|
tree.LogMe();
|
||||||
|
#endif
|
||||||
|
|
||||||
|
const unsigned uLeafCount = tree.GetLeafCount();
|
||||||
|
if (0 == uLeafCount)
|
||||||
|
return;
|
||||||
|
else if (1 == uLeafCount)
|
||||||
|
{
|
||||||
|
Weights[0] = (WEIGHT) 1.0;
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
else if (2 == uLeafCount)
|
||||||
|
{
|
||||||
|
Weights[0] = (WEIGHT) 0.5;
|
||||||
|
Weights[1] = (WEIGHT) 0.5;
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (!tree.IsRooted())
|
||||||
|
Quit("CalcClustalWWeights requires rooted tree");
|
||||||
|
|
||||||
|
const unsigned uNodeCount = tree.GetNodeCount();
|
||||||
|
unsigned *LeavesUnderNode = new unsigned[uNodeCount];
|
||||||
|
memset(LeavesUnderNode, 0, uNodeCount*sizeof(unsigned));
|
||||||
|
|
||||||
|
const unsigned uRootNodeIndex = tree.GetRootNodeIndex();
|
||||||
|
unsigned uLeavesUnderRoot = CountLeaves(tree, uRootNodeIndex, LeavesUnderNode);
|
||||||
|
if (uLeavesUnderRoot != uLeafCount)
|
||||||
|
Quit("WeightsFromTreee: Internal error, root count %u %u",
|
||||||
|
uLeavesUnderRoot, uLeafCount);
|
||||||
|
|
||||||
|
#if TRACE
|
||||||
|
Log("Node Leaves Length Strength\n");
|
||||||
|
Log("---- ------ -------- --------\n");
|
||||||
|
// 1234 123456 12345678 12345678
|
||||||
|
#endif
|
||||||
|
|
||||||
|
double *Strengths = new double[uNodeCount];
|
||||||
|
for (unsigned uNodeIndex = 0; uNodeIndex < uNodeCount; ++uNodeIndex)
|
||||||
|
{
|
||||||
|
if (tree.IsRoot(uNodeIndex))
|
||||||
|
{
|
||||||
|
Strengths[uNodeIndex] = 0.0;
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
const unsigned uParent = tree.GetParent(uNodeIndex);
|
||||||
|
const double dLength = tree.GetEdgeLength(uNodeIndex, uParent);
|
||||||
|
const unsigned uLeaves = LeavesUnderNode[uNodeIndex];
|
||||||
|
const double dStrength = dLength / (double) uLeaves;
|
||||||
|
Strengths[uNodeIndex] = dStrength;
|
||||||
|
#if TRACE
|
||||||
|
Log("%4u %6u %8g %8g\n", uNodeIndex, uLeaves, dLength, dStrength);
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
|
||||||
|
#if TRACE
|
||||||
|
Log("\n");
|
||||||
|
Log(" Seq Path..Weight\n");
|
||||||
|
Log("-------------------- ------------\n");
|
||||||
|
#endif
|
||||||
|
for (unsigned n = 0; n < uLeafCount; ++n)
|
||||||
|
{
|
||||||
|
const unsigned uLeafNodeIndex = tree.LeafIndexToNodeIndex(n);
|
||||||
|
#if TRACE
|
||||||
|
Log("%20.20s %4u ", tree.GetLeafName(uLeafNodeIndex), uLeafNodeIndex);
|
||||||
|
#endif
|
||||||
|
if (!tree.IsLeaf(uLeafNodeIndex))
|
||||||
|
Quit("CalcClustalWWeights: leaf");
|
||||||
|
|
||||||
|
double dWeight = 0;
|
||||||
|
unsigned uNode = uLeafNodeIndex;
|
||||||
|
while (!tree.IsRoot(uNode))
|
||||||
|
{
|
||||||
|
dWeight += Strengths[uNode];
|
||||||
|
uNode = tree.GetParent(uNode);
|
||||||
|
#if TRACE
|
||||||
|
Log("->%u(%g)", uNode, Strengths[uNode]);
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
if (dWeight < 0.0001)
|
||||||
|
{
|
||||||
|
#if TRACE
|
||||||
|
Log("zero->one");
|
||||||
|
#endif
|
||||||
|
dWeight = 1.0;
|
||||||
|
}
|
||||||
|
Weights[n] = (WEIGHT) dWeight;
|
||||||
|
#if TRACE
|
||||||
|
Log(" = %g\n", dWeight);
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
|
||||||
|
delete[] Strengths;
|
||||||
|
delete[] LeavesUnderNode;
|
||||||
|
|
||||||
|
Normalize(Weights, uLeafCount);
|
||||||
|
}
|
||||||
|
|
||||||
|
void MSA::SetClustalWWeights(const Tree &tree)
|
||||||
|
{
|
||||||
|
const unsigned uSeqCount = GetSeqCount();
|
||||||
|
const unsigned uLeafCount = tree.GetLeafCount();
|
||||||
|
|
||||||
|
WEIGHT *Weights = new WEIGHT[uSeqCount];
|
||||||
|
|
||||||
|
CalcClustalWWeights(tree, Weights);
|
||||||
|
|
||||||
|
for (unsigned n = 0; n < uLeafCount; ++n)
|
||||||
|
{
|
||||||
|
const WEIGHT w = Weights[n];
|
||||||
|
const unsigned uLeafNodeIndex = tree.LeafIndexToNodeIndex(n);
|
||||||
|
const unsigned uId = tree.GetLeafId(uLeafNodeIndex);
|
||||||
|
const unsigned uSeqIndex = GetSeqIndex(uId);
|
||||||
|
#if DEBUG
|
||||||
|
if (GetSeqName(uSeqIndex) != tree.GetLeafName(uLeafNodeIndex))
|
||||||
|
Quit("MSA::SetClustalWWeights: names don't match");
|
||||||
|
#endif
|
||||||
|
SetSeqWeight(uSeqIndex, w);
|
||||||
|
}
|
||||||
|
NormalizeWeights((WEIGHT) 1.0);
|
||||||
|
|
||||||
|
delete[] Weights;
|
||||||
|
}
|
189
src/muscle/muscle3.8.31/src/color.cpp
Normal file
189
src/muscle/muscle3.8.31/src/color.cpp
Normal file
@ -0,0 +1,189 @@
|
|||||||
|
#include "muscle.h"
|
||||||
|
#include "msa.h"
|
||||||
|
|
||||||
|
static int Blosum62[23][23] =
|
||||||
|
{
|
||||||
|
// A B C D E F G H I K L M N P Q R S T V W X Y Z
|
||||||
|
+4, -2, +0, -2, -1, -2, +0, -2, -1, -1, -1, -1, -2, -1, -1, -1, +1, +0, +0, -3, -1, -2, -1, // A
|
||||||
|
-2, +6, -3, +6, +2, -3, -1, -1, -3, -1, -4, -3, +1, -1, +0, -2, +0, -1, -3, -4, -1, -3, +2, // B
|
||||||
|
+0, -3, +9, -3, -4, -2, -3, -3, -1, -3, -1, -1, -3, -3, -3, -3, -1, -1, -1, -2, -1, -2, -4, // C
|
||||||
|
-2, +6, -3, +6, +2, -3, -1, -1, -3, -1, -4, -3, +1, -1, +0, -2, +0, -1, -3, -4, -1, -3, +2, // D
|
||||||
|
-1, +2, -4, +2, +5, -3, -2, +0, -3, +1, -3, -2, +0, -1, +2, +0, +0, -1, -2, -3, -1, -2, +5, // E
|
||||||
|
|
||||||
|
-2, -3, -2, -3, -3, +6, -3, -1, +0, -3, +0, +0, -3, -4, -3, -3, -2, -2, -1, +1, -1, +3, -3, // F
|
||||||
|
+0, -1, -3, -1, -2, -3, +6, -2, -4, -2, -4, -3, +0, -2, -2, -2, +0, -2, -3, -2, -1, -3, -2, // G
|
||||||
|
-2, -1, -3, -1, +0, -1, -2, +8, -3, -1, -3, -2, +1, -2, +0, +0, -1, -2, -3, -2, -1, +2, +0, // H
|
||||||
|
-1, -3, -1, -3, -3, +0, -4, -3, +4, -3, +2, +1, -3, -3, -3, -3, -2, -1, +3, -3, -1, -1, -3, // I
|
||||||
|
-1, -1, -3, -1, +1, -3, -2, -1, -3, +5, -2, -1, +0, -1, +1, +2, +0, -1, -2, -3, -1, -2, +1, // K
|
||||||
|
|
||||||
|
-1, -4, -1, -4, -3, +0, -4, -3, +2, -2, +4, +2, -3, -3, -2, -2, -2, -1, +1, -2, -1, -1, -3, // L
|
||||||
|
-1, -3, -1, -3, -2, +0, -3, -2, +1, -1, +2, +5, -2, -2, +0, -1, -1, -1, +1, -1, -1, -1, -2, // M
|
||||||
|
-2, +1, -3, +1, +0, -3, +0, +1, -3, +0, -3, -2, +6, -2, +0, +0, +1, +0, -3, -4, -1, -2, +0, // N
|
||||||
|
-1, -1, -3, -1, -1, -4, -2, -2, -3, -1, -3, -2, -2, +7, -1, -2, -1, -1, -2, -4, -1, -3, -1, // P
|
||||||
|
-1, +0, -3, +0, +2, -3, -2, +0, -3, +1, -2, +0, +0, -1, +5, +1, +0, -1, -2, -2, -1, -1, +2, // Q
|
||||||
|
|
||||||
|
-1, -2, -3, -2, +0, -3, -2, +0, -3, +2, -2, -1, +0, -2, +1, +5, -1, -1, -3, -3, -1, -2, +0, // R
|
||||||
|
+1, +0, -1, +0, +0, -2, +0, -1, -2, +0, -2, -1, +1, -1, +0, -1, +4, +1, -2, -3, -1, -2, +0, // S
|
||||||
|
+0, -1, -1, -1, -1, -2, -2, -2, -1, -1, -1, -1, +0, -1, -1, -1, +1, +5, +0, -2, -1, -2, -1, // T
|
||||||
|
+0, -3, -1, -3, -2, -1, -3, -3, +3, -2, +1, +1, -3, -2, -2, -3, -2, +0, +4, -3, -1, -1, -2, // V
|
||||||
|
-3, -4, -2, -4, -3, +1, -2, -2, -3, -3, -2, -1, -4, -4, -2, -3, -3, -2, -3,+11, -1, +2, -3, // W
|
||||||
|
|
||||||
|
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, // X
|
||||||
|
-2, -3, -2, -3, -2, +3, -3, +2, -1, -2, -1, -1, -2, -3, -1, -2, -2, -2, -1, +2, -1, +7, -2, // Y
|
||||||
|
-1, +2, -4, +2, +5, -3, -2, +0, -3, +1, -3, -2, +0, -1, +2, +0, +0, -1, -2, -3, -1, -2, +5, // Z
|
||||||
|
};
|
||||||
|
|
||||||
|
static int toi_tab[26] =
|
||||||
|
{
|
||||||
|
0, // A
|
||||||
|
1, // B
|
||||||
|
2, // C
|
||||||
|
3, // D
|
||||||
|
4, // E
|
||||||
|
5, // F
|
||||||
|
6, // G
|
||||||
|
7, // H
|
||||||
|
8, // I
|
||||||
|
-1, // J
|
||||||
|
9, // K
|
||||||
|
10, // L
|
||||||
|
11, // M
|
||||||
|
12, // N
|
||||||
|
-1, // O
|
||||||
|
13, // P
|
||||||
|
14, // Q
|
||||||
|
15, // R
|
||||||
|
16, // S
|
||||||
|
17, // T
|
||||||
|
17, // U
|
||||||
|
18, // V
|
||||||
|
19, // W
|
||||||
|
20, // X
|
||||||
|
21, // Y
|
||||||
|
22, // Z
|
||||||
|
};
|
||||||
|
|
||||||
|
static int toi(char c)
|
||||||
|
{
|
||||||
|
c = toupper(c);
|
||||||
|
return toi_tab[c - 'A'];
|
||||||
|
}
|
||||||
|
|
||||||
|
static int BlosumScore(char c1, char c2)
|
||||||
|
{
|
||||||
|
int i1 = toi(c1);
|
||||||
|
int i2 = toi(c2);
|
||||||
|
return Blosum62[i1][i2];
|
||||||
|
}
|
||||||
|
|
||||||
|
/***
|
||||||
|
Consider a column with 5 As and 3 Bs.
|
||||||
|
There are:
|
||||||
|
5x4 pairs of As.
|
||||||
|
3x2 pairs of Bs.
|
||||||
|
5x3x2 AB pairs
|
||||||
|
8x7 = 5x4 + 3x2 + 5x3x2 pairs of letters
|
||||||
|
***/
|
||||||
|
static double BlosumScoreCol(const MSA &a, unsigned uColIndex)
|
||||||
|
{
|
||||||
|
int iCounts[23];
|
||||||
|
memset(iCounts, 0, sizeof(iCounts));
|
||||||
|
const unsigned uSeqCount = a.GetSeqCount();
|
||||||
|
unsigned uCharCount = 0;
|
||||||
|
for (unsigned uSeqIndex = 0; uSeqIndex < uSeqCount; ++uSeqIndex)
|
||||||
|
{
|
||||||
|
char c = a.GetChar(uSeqIndex, uColIndex);
|
||||||
|
if (IsGapChar(c))
|
||||||
|
continue;
|
||||||
|
int iChar = toi(c);
|
||||||
|
++iCounts[iChar];
|
||||||
|
++uCharCount;
|
||||||
|
}
|
||||||
|
if (uCharCount < 2)
|
||||||
|
return -9;
|
||||||
|
int iTotalScore = 0;
|
||||||
|
for (int i1 = 0; i1 < 23; ++i1)
|
||||||
|
{
|
||||||
|
int iCounts1 = iCounts[i1];
|
||||||
|
iTotalScore += iCounts1*(iCounts1 - 1)*Blosum62[i1][i1];
|
||||||
|
for (int i2 = i1 + 1; i2 < 23; ++i2)
|
||||||
|
iTotalScore += iCounts[i2]*iCounts1*2*Blosum62[i1][i2];
|
||||||
|
}
|
||||||
|
int iPairCount = uCharCount*(uCharCount - 1);
|
||||||
|
return (double) iTotalScore / (double) iPairCount;
|
||||||
|
}
|
||||||
|
|
||||||
|
/***
|
||||||
|
Consider a column with 5 As and 3 Bs.
|
||||||
|
A residue of type Q scores:
|
||||||
|
5xAQ + 3xBQ
|
||||||
|
***/
|
||||||
|
static void AssignColorsCol(const MSA &a, unsigned uColIndex, int **Colors)
|
||||||
|
{
|
||||||
|
int iCounts[23];
|
||||||
|
memset(iCounts, 0, sizeof(iCounts));
|
||||||
|
const unsigned uSeqCount = a.GetSeqCount();
|
||||||
|
unsigned uCharCount = 0;
|
||||||
|
for (unsigned uSeqIndex = 0; uSeqIndex < uSeqCount; ++uSeqIndex)
|
||||||
|
{
|
||||||
|
char c = a.GetChar(uSeqIndex, uColIndex);
|
||||||
|
if (IsGapChar(c))
|
||||||
|
continue;
|
||||||
|
int iChar = toi(c);
|
||||||
|
++iCounts[iChar];
|
||||||
|
++uCharCount;
|
||||||
|
}
|
||||||
|
int iMostConservedType = -1;
|
||||||
|
int iMostConservedCount = -1;
|
||||||
|
for (unsigned i = 0; i < 23; ++i)
|
||||||
|
{
|
||||||
|
if (iCounts[i] > iMostConservedCount)
|
||||||
|
{
|
||||||
|
iMostConservedType = i;
|
||||||
|
iMostConservedCount = iCounts[i];
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
double dColScore = BlosumScoreCol(a, uColIndex);
|
||||||
|
int c;
|
||||||
|
if (dColScore >= 3.0)
|
||||||
|
c = 3;
|
||||||
|
//else if (dColScore >= 1.0)
|
||||||
|
// c = 2;
|
||||||
|
else if (dColScore >= 0.2)
|
||||||
|
c = 1;
|
||||||
|
else
|
||||||
|
c = 0;
|
||||||
|
|
||||||
|
int Color[23];
|
||||||
|
for (unsigned uLetter = 0; uLetter < 23; ++uLetter)
|
||||||
|
{
|
||||||
|
double dScore = Blosum62[uLetter][iMostConservedType];
|
||||||
|
if (dScore >= dColScore)
|
||||||
|
Color[uLetter] = c;
|
||||||
|
else
|
||||||
|
Color[uLetter] = 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
for (unsigned uSeqIndex = 0; uSeqIndex < uSeqCount; ++uSeqIndex)
|
||||||
|
{
|
||||||
|
char c = a.GetChar(uSeqIndex, uColIndex);
|
||||||
|
if (IsGapChar(c))
|
||||||
|
{
|
||||||
|
Colors[uSeqIndex][uColIndex] = 0;
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
int iLetter = toi(c);
|
||||||
|
if (iLetter >= 0 && iLetter < 23)
|
||||||
|
Colors[uSeqIndex][uColIndex] = Color[iLetter];
|
||||||
|
else
|
||||||
|
Colors[uSeqIndex][uColIndex] = 0;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
void AssignColors(const MSA &a, int **Colors)
|
||||||
|
{
|
||||||
|
const unsigned uColCount = a.GetColCount();
|
||||||
|
for (unsigned uColIndex = 0; uColIndex < uColCount; ++uColIndex)
|
||||||
|
AssignColorsCol(a, uColIndex, Colors);
|
||||||
|
}
|
118
src/muscle/muscle3.8.31/src/cons.cpp
Normal file
118
src/muscle/muscle3.8.31/src/cons.cpp
Normal file
@ -0,0 +1,118 @@
|
|||||||
|
/***
|
||||||
|
Conservation value for a column in an MSA is defined as the number
|
||||||
|
of times the most common letter appears divided by the number of
|
||||||
|
sequences.
|
||||||
|
***/
|
||||||
|
|
||||||
|
#include "muscle.h"
|
||||||
|
#include "msa.h"
|
||||||
|
#include <math.h>
|
||||||
|
|
||||||
|
double MSA::GetAvgCons() const
|
||||||
|
{
|
||||||
|
assert(GetSeqCount() > 0);
|
||||||
|
double dSum = 0;
|
||||||
|
unsigned uNonGapColCount = 0;
|
||||||
|
for (unsigned uColIndex = 0; uColIndex < GetColCount(); ++uColIndex)
|
||||||
|
{
|
||||||
|
if (!IsGapColumn(uColIndex))
|
||||||
|
{
|
||||||
|
dSum += GetCons(uColIndex);
|
||||||
|
++uNonGapColCount;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
assert(uNonGapColCount > 0);
|
||||||
|
double dAvg = dSum / uNonGapColCount;
|
||||||
|
assert(dAvg > 0 && dAvg <= 1);
|
||||||
|
return dAvg;
|
||||||
|
}
|
||||||
|
|
||||||
|
double MSA::GetCons(unsigned uColIndex) const
|
||||||
|
{
|
||||||
|
unsigned Counts[MAX_ALPHA];
|
||||||
|
for (unsigned uLetter = 0; uLetter < g_AlphaSize; ++uLetter)
|
||||||
|
Counts[uLetter] = 0;
|
||||||
|
|
||||||
|
unsigned uMaxCount = 0;
|
||||||
|
for (unsigned uSeqIndex = 0; uSeqIndex < GetSeqCount(); ++uSeqIndex)
|
||||||
|
{
|
||||||
|
if (IsGap(uSeqIndex, uColIndex))
|
||||||
|
continue;
|
||||||
|
char c = GetChar(uSeqIndex, uColIndex);
|
||||||
|
c = toupper(c);
|
||||||
|
if ('X' == c || 'B' == c || 'Z' == c)
|
||||||
|
continue;
|
||||||
|
unsigned uLetter = GetLetter(uSeqIndex, uColIndex);
|
||||||
|
unsigned uCount = Counts[uLetter] + 1;
|
||||||
|
if (uCount > uMaxCount)
|
||||||
|
uMaxCount = uCount;
|
||||||
|
Counts[uLetter] = uCount;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Cons is undefined for all-gap column
|
||||||
|
if (0 == uMaxCount)
|
||||||
|
{
|
||||||
|
// assert(false);
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
double dCons = (double) uMaxCount / (double) GetSeqCount();
|
||||||
|
assert(dCons > 0 && dCons <= 1);
|
||||||
|
return dCons;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Perecent identity of a pair of sequences.
|
||||||
|
// Positions with one or both gapped are ignored.
|
||||||
|
double MSA::GetPctIdentityPair(unsigned uSeqIndex1, unsigned uSeqIndex2) const
|
||||||
|
{
|
||||||
|
const unsigned uColCount = GetColCount();
|
||||||
|
unsigned uPosCount = 0;
|
||||||
|
unsigned uSameCount = 0;
|
||||||
|
for (unsigned uColIndex = 0; uColIndex < uColCount; ++uColIndex)
|
||||||
|
{
|
||||||
|
const char c1 = GetChar(uSeqIndex1, uColIndex);
|
||||||
|
const char c2 = GetChar(uSeqIndex2, uColIndex);
|
||||||
|
if (IsGapChar(c1) || IsGapChar(c2))
|
||||||
|
continue;
|
||||||
|
if (c1 == c2)
|
||||||
|
++uSameCount;
|
||||||
|
++uPosCount;
|
||||||
|
}
|
||||||
|
if (0 == uPosCount)
|
||||||
|
return 0;
|
||||||
|
return (double) uSameCount / (double) uPosCount;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Perecent group identity of a pair of sequences.
|
||||||
|
// Positions with one or both gapped are ignored.
|
||||||
|
double MSA::GetPctGroupIdentityPair(unsigned uSeqIndex1,
|
||||||
|
unsigned uSeqIndex2) const
|
||||||
|
{
|
||||||
|
extern unsigned ResidueGroup[];
|
||||||
|
|
||||||
|
const unsigned uColCount = GetColCount();
|
||||||
|
unsigned uPosCount = 0;
|
||||||
|
unsigned uSameCount = 0;
|
||||||
|
for (unsigned uColIndex = 0; uColIndex < uColCount; ++uColIndex)
|
||||||
|
{
|
||||||
|
if (IsGap(uSeqIndex1, uColIndex))
|
||||||
|
continue;
|
||||||
|
if (IsGap(uSeqIndex2, uColIndex))
|
||||||
|
continue;
|
||||||
|
if (IsWildcard(uSeqIndex1, uColIndex))
|
||||||
|
continue;
|
||||||
|
if (IsWildcard(uSeqIndex2, uColIndex))
|
||||||
|
continue;
|
||||||
|
|
||||||
|
const unsigned uLetter1 = GetLetter(uSeqIndex1, uColIndex);
|
||||||
|
const unsigned uLetter2 = GetLetter(uSeqIndex2, uColIndex);
|
||||||
|
const unsigned uGroup1 = ResidueGroup[uLetter1];
|
||||||
|
const unsigned uGroup2 = ResidueGroup[uLetter2];
|
||||||
|
if (uGroup1 == uGroup2)
|
||||||
|
++uSameCount;
|
||||||
|
++uPosCount;
|
||||||
|
}
|
||||||
|
if (0 == uPosCount)
|
||||||
|
return 0;
|
||||||
|
return (double) uSameCount / (double) uPosCount;
|
||||||
|
}
|
378
src/muscle/muscle3.8.31/src/diaglist.cpp
Normal file
378
src/muscle/muscle3.8.31/src/diaglist.cpp
Normal file
@ -0,0 +1,378 @@
|
|||||||
|
#include "muscle.h"
|
||||||
|
#include "diaglist.h"
|
||||||
|
#include "pwpath.h"
|
||||||
|
|
||||||
|
#define MAX(x, y) ((x) > (y) ? (x) : (y))
|
||||||
|
#define MIN(x, y) ((x) < (y) ? (x) : (y))
|
||||||
|
|
||||||
|
void DiagList::Add(const Diag &d)
|
||||||
|
{
|
||||||
|
if (m_uCount == MAX_DIAGS)
|
||||||
|
Quit("DiagList::Add, overflow %u", m_uCount);
|
||||||
|
m_Diags[m_uCount] = d;
|
||||||
|
++m_uCount;
|
||||||
|
}
|
||||||
|
|
||||||
|
void DiagList::Add(unsigned uStartPosA, unsigned uStartPosB, unsigned uLength)
|
||||||
|
{
|
||||||
|
Diag d;
|
||||||
|
d.m_uStartPosA = uStartPosA;
|
||||||
|
d.m_uStartPosB = uStartPosB;
|
||||||
|
d.m_uLength = uLength;
|
||||||
|
Add(d);
|
||||||
|
}
|
||||||
|
|
||||||
|
const Diag &DiagList::Get(unsigned uIndex) const
|
||||||
|
{
|
||||||
|
if (uIndex >= m_uCount)
|
||||||
|
Quit("DiagList::Get(%u), count=%u", uIndex, m_uCount);
|
||||||
|
return m_Diags[uIndex];
|
||||||
|
}
|
||||||
|
|
||||||
|
void DiagList::LogMe() const
|
||||||
|
{
|
||||||
|
Log("DiagList::LogMe, count=%u\n", m_uCount);
|
||||||
|
Log(" n StartA StartB Length\n");
|
||||||
|
Log("--- ------ ------ ------\n");
|
||||||
|
for (unsigned n = 0; n < m_uCount; ++n)
|
||||||
|
{
|
||||||
|
const Diag &d = m_Diags[n];
|
||||||
|
Log("%3u %6u %6u %6u\n",
|
||||||
|
n, d.m_uStartPosA, d.m_uStartPosB, d.m_uLength);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
void DiagList::FromPath(const PWPath &Path)
|
||||||
|
{
|
||||||
|
Clear();
|
||||||
|
|
||||||
|
const unsigned uEdgeCount = Path.GetEdgeCount();
|
||||||
|
unsigned uLength = 0;
|
||||||
|
unsigned uStartPosA;
|
||||||
|
unsigned uStartPosB;
|
||||||
|
for (unsigned uEdgeIndex = 0; uEdgeIndex < uEdgeCount; ++uEdgeIndex)
|
||||||
|
{
|
||||||
|
const PWEdge &Edge = Path.GetEdge(uEdgeIndex);
|
||||||
|
|
||||||
|
// Typical cases
|
||||||
|
if (Edge.cType == 'M')
|
||||||
|
{
|
||||||
|
if (0 == uLength)
|
||||||
|
{
|
||||||
|
uStartPosA = Edge.uPrefixLengthA - 1;
|
||||||
|
uStartPosB = Edge.uPrefixLengthB - 1;
|
||||||
|
}
|
||||||
|
++uLength;
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
if (uLength >= g_uMinDiagLength)
|
||||||
|
Add(uStartPosA, uStartPosB, uLength);
|
||||||
|
uLength = 0;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Special case for last edge
|
||||||
|
if (uLength >= g_uMinDiagLength)
|
||||||
|
Add(uStartPosA, uStartPosB, uLength);
|
||||||
|
}
|
||||||
|
|
||||||
|
bool DiagList::NonZeroIntersection(const Diag &d) const
|
||||||
|
{
|
||||||
|
for (unsigned n = 0; n < m_uCount; ++n)
|
||||||
|
{
|
||||||
|
const Diag &d2 = m_Diags[n];
|
||||||
|
if (DiagOverlap(d, d2) > 0)
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
// DialogOverlap returns the length of the overlapping
|
||||||
|
// section of the two diagonals along the diagonals
|
||||||
|
// themselves; in other words, the length of
|
||||||
|
// the intersection of the two sets of cells in
|
||||||
|
// the matrix.
|
||||||
|
unsigned DiagOverlap(const Diag &d1, const Diag &d2)
|
||||||
|
{
|
||||||
|
// Determine where the diagonals intersect the A
|
||||||
|
// axis (extending them if required). If they
|
||||||
|
// intersect at different points, they do not
|
||||||
|
// overlap. Coordinates on a diagonal are
|
||||||
|
// given by B = A + c where c is the value of
|
||||||
|
// A at the intersection with the A axis.
|
||||||
|
// Hence, c = B - A for any point on the diagonal.
|
||||||
|
int c1 = (int) d1.m_uStartPosB - (int) d1.m_uStartPosA;
|
||||||
|
int c2 = (int) d2.m_uStartPosB - (int) d2.m_uStartPosA;
|
||||||
|
if (c1 != c2)
|
||||||
|
return 0;
|
||||||
|
|
||||||
|
assert(DiagOverlapA(d1, d2) == DiagOverlapB(d1, d2));
|
||||||
|
return DiagOverlapA(d1, d2);
|
||||||
|
}
|
||||||
|
|
||||||
|
// DialogOverlapA returns the length of the overlapping
|
||||||
|
// section of the projection of the two diagonals onto
|
||||||
|
// the A axis.
|
||||||
|
unsigned DiagOverlapA(const Diag &d1, const Diag &d2)
|
||||||
|
{
|
||||||
|
unsigned uMaxStart = MAX(d1.m_uStartPosA, d2.m_uStartPosA);
|
||||||
|
unsigned uMinEnd = MIN(d1.m_uStartPosA + d1.m_uLength - 1,
|
||||||
|
d2.m_uStartPosA + d2.m_uLength - 1);
|
||||||
|
|
||||||
|
int iLength = (int) uMinEnd - (int) uMaxStart + 1;
|
||||||
|
if (iLength < 0)
|
||||||
|
return 0;
|
||||||
|
return (unsigned) iLength;
|
||||||
|
}
|
||||||
|
|
||||||
|
// DialogOverlapB returns the length of the overlapping
|
||||||
|
// section of the projection of the two diagonals onto
|
||||||
|
// the B axis.
|
||||||
|
unsigned DiagOverlapB(const Diag &d1, const Diag &d2)
|
||||||
|
{
|
||||||
|
unsigned uMaxStart = MAX(d1.m_uStartPosB, d2.m_uStartPosB);
|
||||||
|
unsigned uMinEnd = MIN(d1.m_uStartPosB + d1.m_uLength - 1,
|
||||||
|
d2.m_uStartPosB + d2.m_uLength - 1);
|
||||||
|
|
||||||
|
int iLength = (int) uMinEnd - (int) uMaxStart + 1;
|
||||||
|
if (iLength < 0)
|
||||||
|
return 0;
|
||||||
|
return (unsigned) iLength;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Returns true if the two diagonals can be on the
|
||||||
|
// same path through the DP matrix. If DiagCompatible
|
||||||
|
// returns false, they cannot be in the same path
|
||||||
|
// and hence "contradict" each other.
|
||||||
|
bool DiagCompatible(const Diag &d1, const Diag &d2)
|
||||||
|
{
|
||||||
|
if (DiagOverlap(d1, d2) > 0)
|
||||||
|
return true;
|
||||||
|
return 0 == DiagOverlapA(d1, d2) && 0 == DiagOverlapB(d1, d2);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Returns the length of the "break" between two diagonals.
|
||||||
|
unsigned DiagBreak(const Diag &d1, const Diag &d2)
|
||||||
|
{
|
||||||
|
int c1 = (int) d1.m_uStartPosB - (int) d1.m_uStartPosA;
|
||||||
|
int c2 = (int) d2.m_uStartPosB - (int) d2.m_uStartPosA;
|
||||||
|
if (c1 != c2)
|
||||||
|
return 0;
|
||||||
|
|
||||||
|
int iMaxStart = MAX(d1.m_uStartPosA, d2.m_uStartPosA);
|
||||||
|
int iMinEnd = MIN(d1.m_uStartPosA + d1.m_uLength - 1,
|
||||||
|
d2.m_uStartPosA + d1.m_uLength - 1);
|
||||||
|
int iBreak = iMaxStart - iMinEnd - 1;
|
||||||
|
if (iBreak < 0)
|
||||||
|
return 0;
|
||||||
|
return (unsigned) iBreak;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Merge diagonals that are continuations of each other with
|
||||||
|
// short breaks of up to length g_uMaxDiagBreak.
|
||||||
|
// In a sorted list of diagonals, we only have to check
|
||||||
|
// consecutive entries.
|
||||||
|
void MergeDiags(DiagList &DL)
|
||||||
|
{
|
||||||
|
return;
|
||||||
|
#if DEBUG
|
||||||
|
if (!DL.IsSorted())
|
||||||
|
Quit("MergeDiags: !IsSorted");
|
||||||
|
#endif
|
||||||
|
|
||||||
|
// TODO: Fix this!
|
||||||
|
// Breaks must be with no offset (no gaps)
|
||||||
|
const unsigned uCount = DL.GetCount();
|
||||||
|
if (uCount <= 1)
|
||||||
|
return;
|
||||||
|
|
||||||
|
DiagList NewList;
|
||||||
|
|
||||||
|
Diag MergedDiag;
|
||||||
|
const Diag *ptrPrev = &DL.Get(0);
|
||||||
|
for (unsigned i = 1; i < uCount; ++i)
|
||||||
|
{
|
||||||
|
const Diag *ptrDiag = &DL.Get(i);
|
||||||
|
unsigned uBreakLength = DiagBreak(*ptrPrev, *ptrDiag);
|
||||||
|
if (uBreakLength <= g_uMaxDiagBreak)
|
||||||
|
{
|
||||||
|
MergedDiag.m_uStartPosA = ptrPrev->m_uStartPosA;
|
||||||
|
MergedDiag.m_uStartPosB = ptrPrev->m_uStartPosB;
|
||||||
|
MergedDiag.m_uLength = ptrPrev->m_uLength + ptrDiag->m_uLength
|
||||||
|
+ uBreakLength;
|
||||||
|
ptrPrev = &MergedDiag;
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
NewList.Add(*ptrPrev);
|
||||||
|
ptrPrev = ptrDiag;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
NewList.Add(*ptrPrev);
|
||||||
|
DL.Copy(NewList);
|
||||||
|
}
|
||||||
|
|
||||||
|
void DiagList::DeleteIncompatible()
|
||||||
|
{
|
||||||
|
assert(IsSorted());
|
||||||
|
|
||||||
|
if (m_uCount < 2)
|
||||||
|
return;
|
||||||
|
|
||||||
|
bool *bFlagForDeletion = new bool[m_uCount];
|
||||||
|
for (unsigned i = 0; i < m_uCount; ++i)
|
||||||
|
bFlagForDeletion[i] = false;
|
||||||
|
|
||||||
|
for (unsigned i = 0; i < m_uCount; ++i)
|
||||||
|
{
|
||||||
|
const Diag &di = m_Diags[i];
|
||||||
|
for (unsigned j = i + 1; j < m_uCount; ++j)
|
||||||
|
{
|
||||||
|
const Diag &dj = m_Diags[j];
|
||||||
|
|
||||||
|
// Verify sorted correctly
|
||||||
|
assert(di.m_uStartPosA <= dj.m_uStartPosA);
|
||||||
|
|
||||||
|
// If two diagonals are incompatible and
|
||||||
|
// one is is much longer than the other,
|
||||||
|
// keep the longer one.
|
||||||
|
if (!DiagCompatible(di, dj))
|
||||||
|
{
|
||||||
|
if (di.m_uLength > dj.m_uLength*4)
|
||||||
|
bFlagForDeletion[j] = true;
|
||||||
|
else if (dj.m_uLength > di.m_uLength*4)
|
||||||
|
bFlagForDeletion[i] = true;
|
||||||
|
else
|
||||||
|
{
|
||||||
|
bFlagForDeletion[i] = true;
|
||||||
|
bFlagForDeletion[j] = true;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
for (unsigned i = 0; i < m_uCount; ++i)
|
||||||
|
{
|
||||||
|
const Diag &di = m_Diags[i];
|
||||||
|
if (bFlagForDeletion[i])
|
||||||
|
continue;
|
||||||
|
|
||||||
|
for (unsigned j = i + 1; j < m_uCount; ++j)
|
||||||
|
{
|
||||||
|
const Diag &dj = m_Diags[j];
|
||||||
|
if (bFlagForDeletion[j])
|
||||||
|
continue;
|
||||||
|
|
||||||
|
// Verify sorted correctly
|
||||||
|
assert(di.m_uStartPosA <= dj.m_uStartPosA);
|
||||||
|
|
||||||
|
// If sort order in B different from sorted order in A,
|
||||||
|
// either diags are incompatible or we detected a repeat
|
||||||
|
// or permutation.
|
||||||
|
if (di.m_uStartPosB >= dj.m_uStartPosB || !DiagCompatible(di, dj))
|
||||||
|
{
|
||||||
|
bFlagForDeletion[i] = true;
|
||||||
|
bFlagForDeletion[j] = true;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
unsigned uNewCount = 0;
|
||||||
|
Diag *NewDiags = new Diag[m_uCount];
|
||||||
|
for (unsigned i = 0; i < m_uCount; ++i)
|
||||||
|
{
|
||||||
|
if (bFlagForDeletion[i])
|
||||||
|
continue;
|
||||||
|
|
||||||
|
const Diag &d = m_Diags[i];
|
||||||
|
NewDiags[uNewCount] = d;
|
||||||
|
++uNewCount;
|
||||||
|
}
|
||||||
|
memcpy(m_Diags, NewDiags, uNewCount*sizeof(Diag));
|
||||||
|
m_uCount = uNewCount;
|
||||||
|
delete[] NewDiags;
|
||||||
|
}
|
||||||
|
|
||||||
|
void DiagList::Copy(const DiagList &DL)
|
||||||
|
{
|
||||||
|
Clear();
|
||||||
|
unsigned uCount = DL.GetCount();
|
||||||
|
for (unsigned i = 0; i < uCount; ++i)
|
||||||
|
Add(DL.Get(i));
|
||||||
|
}
|
||||||
|
|
||||||
|
// Check if sorted in increasing order of m_uStartPosA
|
||||||
|
bool DiagList::IsSorted() const
|
||||||
|
{
|
||||||
|
return true;
|
||||||
|
unsigned uCount = GetCount();
|
||||||
|
for (unsigned i = 1; i < uCount; ++i)
|
||||||
|
if (m_Diags[i-1].m_uStartPosA > m_Diags[i].m_uStartPosA)
|
||||||
|
return false;
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Sort in increasing order of m_uStartPosA
|
||||||
|
// Dumb bubble sort, but don't care about speed
|
||||||
|
// because don't get long lists.
|
||||||
|
void DiagList::Sort()
|
||||||
|
{
|
||||||
|
if (m_uCount < 2)
|
||||||
|
return;
|
||||||
|
|
||||||
|
bool bContinue = true;
|
||||||
|
while (bContinue)
|
||||||
|
{
|
||||||
|
bContinue = false;
|
||||||
|
for (unsigned i = 0; i < m_uCount - 1; ++i)
|
||||||
|
{
|
||||||
|
if (m_Diags[i].m_uStartPosA > m_Diags[i+1].m_uStartPosA)
|
||||||
|
{
|
||||||
|
Diag Tmp = m_Diags[i];
|
||||||
|
m_Diags[i] = m_Diags[i+1];
|
||||||
|
m_Diags[i+1] = Tmp;
|
||||||
|
bContinue = true;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
//void TestDiag()
|
||||||
|
// {
|
||||||
|
// Diag d1;
|
||||||
|
// Diag d2;
|
||||||
|
// Diag d3;
|
||||||
|
//
|
||||||
|
// d1.m_uStartPosA = 0;
|
||||||
|
// d1.m_uStartPosB = 1;
|
||||||
|
// d1.m_uLength = 32;
|
||||||
|
//
|
||||||
|
// d2.m_uStartPosA = 55;
|
||||||
|
// d2.m_uStartPosB = 70;
|
||||||
|
// d2.m_uLength = 36;
|
||||||
|
//
|
||||||
|
// d3.m_uStartPosA = 102;
|
||||||
|
// d3.m_uStartPosB = 122;
|
||||||
|
// d3.m_uLength = 50;
|
||||||
|
//
|
||||||
|
// DiagList DL;
|
||||||
|
// DL.Add(d1);
|
||||||
|
// DL.Add(d2);
|
||||||
|
// DL.Add(d3);
|
||||||
|
//
|
||||||
|
// Log("Before DeleteIncompatible:\n");
|
||||||
|
// DL.LogMe();
|
||||||
|
// DL.DeleteIncompatible();
|
||||||
|
//
|
||||||
|
// Log("After DeleteIncompatible:\n");
|
||||||
|
// DL.LogMe();
|
||||||
|
//
|
||||||
|
// MergeDiags(DL);
|
||||||
|
// Log("After Merge:\n");
|
||||||
|
// DL.LogMe();
|
||||||
|
//
|
||||||
|
// DPRegionList RL;
|
||||||
|
// DiagListToDPRegionList(DL, RL, 200, 200);
|
||||||
|
// RL.LogMe();
|
||||||
|
// }
|
89
src/muscle/muscle3.8.31/src/diaglist.h
Normal file
89
src/muscle/muscle3.8.31/src/diaglist.h
Normal file
@ -0,0 +1,89 @@
|
|||||||
|
#ifndef diaglist_h
|
||||||
|
#define diaglist_h
|
||||||
|
|
||||||
|
const unsigned EMPTY = (unsigned) ~0;
|
||||||
|
const unsigned MAX_DIAGS = 1024;
|
||||||
|
|
||||||
|
struct Diag
|
||||||
|
{
|
||||||
|
unsigned m_uStartPosA;
|
||||||
|
unsigned m_uStartPosB;
|
||||||
|
unsigned m_uLength;
|
||||||
|
};
|
||||||
|
|
||||||
|
struct Rect
|
||||||
|
{
|
||||||
|
unsigned m_uStartPosA;
|
||||||
|
unsigned m_uStartPosB;
|
||||||
|
unsigned m_uLengthA;
|
||||||
|
unsigned m_uLengthB;
|
||||||
|
};
|
||||||
|
|
||||||
|
class DiagList
|
||||||
|
{
|
||||||
|
public:
|
||||||
|
DiagList()
|
||||||
|
{
|
||||||
|
m_uCount = 0;
|
||||||
|
}
|
||||||
|
~DiagList()
|
||||||
|
{
|
||||||
|
Free();
|
||||||
|
}
|
||||||
|
|
||||||
|
public:
|
||||||
|
// Creation
|
||||||
|
void Clear()
|
||||||
|
{
|
||||||
|
Free();
|
||||||
|
}
|
||||||
|
void FromPath(const PWPath &Path);
|
||||||
|
void Add(const Diag &d);
|
||||||
|
void Add(unsigned uStartPosA, unsigned uStartPosB, unsigned uLength);
|
||||||
|
void DeleteIncompatible();
|
||||||
|
|
||||||
|
// Accessors
|
||||||
|
unsigned GetCount() const
|
||||||
|
{
|
||||||
|
return m_uCount;
|
||||||
|
}
|
||||||
|
const Diag &Get(unsigned uIndex) const;
|
||||||
|
|
||||||
|
// Operations
|
||||||
|
void Sort();
|
||||||
|
void Copy(const DiagList &DL);
|
||||||
|
|
||||||
|
// Query
|
||||||
|
// returns true iff given diagonal is included in the list
|
||||||
|
// in whole or in part.
|
||||||
|
bool NonZeroIntersection(const Diag &d) const;
|
||||||
|
bool IsSorted() const;
|
||||||
|
|
||||||
|
// Diagnostics
|
||||||
|
void LogMe() const;
|
||||||
|
|
||||||
|
private:
|
||||||
|
void Free()
|
||||||
|
{
|
||||||
|
m_uCount = 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
private:
|
||||||
|
unsigned m_uCount;
|
||||||
|
Diag m_Diags[MAX_DIAGS];
|
||||||
|
};
|
||||||
|
|
||||||
|
unsigned DiagOverlap(const Diag &d1, const Diag &d2);
|
||||||
|
unsigned DiagOverlapA(const Diag &d1, const Diag &d2);
|
||||||
|
unsigned DiagOverlapB(const Diag &d1, const Diag &d2);
|
||||||
|
unsigned DiagBreak(const Diag &d1, const Diag &d2);
|
||||||
|
bool DiagCompatible(const Diag &d1, const Diag &d2);
|
||||||
|
void CheckDiags(const ProfPos *PA, unsigned uLengthA, const ProfPos *PB,
|
||||||
|
unsigned uLengthB, const MSA &msaA, const MSA &msaB, const PWPath &Path);
|
||||||
|
void FindDiags(const ProfPos *PX, unsigned uLengthX, const ProfPos *PY,
|
||||||
|
unsigned uLengthY, DiagList &DL);
|
||||||
|
void FindDiagsNuc(const ProfPos *PX, unsigned uLengthX, const ProfPos *PY,
|
||||||
|
unsigned uLengthY, DiagList &DL);
|
||||||
|
void MergeDiags(DiagList &DL);
|
||||||
|
|
||||||
|
#endif // diaglist_h
|
162
src/muscle/muscle3.8.31/src/diffobjscore.cpp
Normal file
162
src/muscle/muscle3.8.31/src/diffobjscore.cpp
Normal file
@ -0,0 +1,162 @@
|
|||||||
|
#include "muscle.h"
|
||||||
|
#include "msa.h"
|
||||||
|
#include "objscore.h"
|
||||||
|
#include "profile.h"
|
||||||
|
|
||||||
|
#define TRACE 0
|
||||||
|
#define COMPARE_3_52 0
|
||||||
|
#define BRUTE_LETTERS 0
|
||||||
|
|
||||||
|
static SCORE ScoreColLetters(const MSA &msa, unsigned uColIndex)
|
||||||
|
{
|
||||||
|
SCOREMATRIX &Mx = *g_ptrScoreMatrix;
|
||||||
|
const unsigned uSeqCount = msa.GetSeqCount();
|
||||||
|
|
||||||
|
#if BRUTE_LETTERS
|
||||||
|
SCORE BruteScore = 0;
|
||||||
|
for (unsigned uSeqIndex1 = 0; uSeqIndex1 < uSeqCount; ++uSeqIndex1)
|
||||||
|
{
|
||||||
|
unsigned uLetter1 = msa.GetLetterEx(uSeqIndex1, uColIndex);
|
||||||
|
if (uLetter1 >= g_AlphaSize)
|
||||||
|
continue;
|
||||||
|
WEIGHT w1 = msa.GetSeqWeight(uSeqIndex1);
|
||||||
|
for (unsigned uSeqIndex2 = uSeqIndex1 + 1; uSeqIndex2 < uSeqCount; ++uSeqIndex2)
|
||||||
|
{
|
||||||
|
unsigned uLetter2 = msa.GetLetterEx(uSeqIndex2, uColIndex);
|
||||||
|
if (uLetter2 >= g_AlphaSize)
|
||||||
|
continue;
|
||||||
|
WEIGHT w2 = msa.GetSeqWeight(uSeqIndex2);
|
||||||
|
BruteScore += w1*w2*Mx[uLetter1][uLetter2];
|
||||||
|
}
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
|
double N = 0;
|
||||||
|
for (unsigned uSeqIndex1 = 0; uSeqIndex1 < uSeqCount; ++uSeqIndex1)
|
||||||
|
{
|
||||||
|
WEIGHT w = msa.GetSeqWeight(uSeqIndex1);
|
||||||
|
N += w;
|
||||||
|
}
|
||||||
|
if (N <= 0)
|
||||||
|
return 0;
|
||||||
|
|
||||||
|
FCOUNT Freqs[20];
|
||||||
|
memset(Freqs, 0, sizeof(Freqs));
|
||||||
|
SCORE Score = 0;
|
||||||
|
for (unsigned uSeqIndex1 = 0; uSeqIndex1 < uSeqCount; ++uSeqIndex1)
|
||||||
|
{
|
||||||
|
unsigned uLetter = msa.GetLetterEx(uSeqIndex1, uColIndex);
|
||||||
|
if (uLetter >= g_AlphaSize)
|
||||||
|
continue;
|
||||||
|
WEIGHT w = msa.GetSeqWeight(uSeqIndex1);
|
||||||
|
Freqs[uLetter] += w;
|
||||||
|
Score -= w*w*Mx[uLetter][uLetter];
|
||||||
|
}
|
||||||
|
|
||||||
|
for (unsigned uLetter1 = 0; uLetter1 < g_AlphaSize; ++uLetter1)
|
||||||
|
{
|
||||||
|
const FCOUNT f1 = Freqs[uLetter1];
|
||||||
|
Score += f1*f1*Mx[uLetter1][uLetter1];
|
||||||
|
for (unsigned uLetter2 = uLetter1 + 1; uLetter2 < g_AlphaSize; ++uLetter2)
|
||||||
|
{
|
||||||
|
const FCOUNT f2 = Freqs[uLetter2];
|
||||||
|
Score += 2*f1*f2*Mx[uLetter1][uLetter2];
|
||||||
|
}
|
||||||
|
}
|
||||||
|
Score /= 2;
|
||||||
|
#if BRUTE_LETTERS
|
||||||
|
assert(BTEq(BruteScore, Score));
|
||||||
|
#endif
|
||||||
|
return Score;
|
||||||
|
}
|
||||||
|
|
||||||
|
static SCORE ScoreLetters(const MSA &msa, const unsigned Edges[],
|
||||||
|
unsigned uEdgeCount)
|
||||||
|
{
|
||||||
|
const unsigned uSeqCount = msa.GetSeqCount();
|
||||||
|
const unsigned uColCount = msa.GetColCount();
|
||||||
|
|
||||||
|
// Letters
|
||||||
|
SCORE Score = 0;
|
||||||
|
for (unsigned uEdgeIndex = 0; uEdgeIndex < uEdgeCount; ++uEdgeIndex)
|
||||||
|
{
|
||||||
|
const unsigned uColIndex = Edges[uEdgeIndex];
|
||||||
|
assert(uColIndex < uColCount);
|
||||||
|
Score += ScoreColLetters(msa, uColIndex);
|
||||||
|
}
|
||||||
|
return Score;
|
||||||
|
}
|
||||||
|
|
||||||
|
void GetLetterScores(const MSA &msa, SCORE Scores[])
|
||||||
|
{
|
||||||
|
const unsigned uColCount = msa.GetColCount();
|
||||||
|
for (unsigned uColIndex = 0; uColIndex < uColCount; ++uColIndex)
|
||||||
|
Scores[uColIndex] = ScoreColLetters(msa, uColIndex);
|
||||||
|
}
|
||||||
|
|
||||||
|
SCORE DiffObjScore(
|
||||||
|
const MSA &msa1, const PWPath &Path1, const unsigned Edges1[], unsigned uEdgeCount1,
|
||||||
|
const MSA &msa2, const PWPath &Path2, const unsigned Edges2[], unsigned uEdgeCount2)
|
||||||
|
{
|
||||||
|
#if TRACE
|
||||||
|
{
|
||||||
|
Log("============DiffObjScore===========\n");
|
||||||
|
Log("msa1:\n");
|
||||||
|
msa1.LogMe();
|
||||||
|
Log("\n");
|
||||||
|
Log("Cols1: ");
|
||||||
|
for (unsigned i = 0; i < uEdgeCount1; ++i)
|
||||||
|
Log(" %u", Edges1[i]);
|
||||||
|
Log("\n\n");
|
||||||
|
Log("msa2:\n");
|
||||||
|
msa2.LogMe();
|
||||||
|
Log("Cols2: ");
|
||||||
|
for (unsigned i = 0; i < uEdgeCount2; ++i)
|
||||||
|
Log(" %u", Edges2[i]);
|
||||||
|
Log("\n\n");
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#if COMPARE_3_52
|
||||||
|
extern SCORE g_SPScoreLetters;
|
||||||
|
extern SCORE g_SPScoreGaps;
|
||||||
|
SCORE SP1 = ObjScoreSP(msa1);
|
||||||
|
SCORE SPLetters1 = g_SPScoreLetters;
|
||||||
|
SCORE SPGaps1 = g_SPScoreGaps;
|
||||||
|
|
||||||
|
SCORE SP2 = ObjScoreSP(msa2);
|
||||||
|
SCORE SPLetters2 = g_SPScoreLetters;
|
||||||
|
SCORE SPGaps2 = g_SPScoreGaps;
|
||||||
|
SCORE SPDiffLetters = SPLetters2 - SPLetters1;
|
||||||
|
SCORE SPDiffGaps = SPGaps2 - SPGaps1;
|
||||||
|
SCORE SPDiff = SPDiffLetters + SPDiffGaps;
|
||||||
|
#endif
|
||||||
|
|
||||||
|
SCORE Letters1 = ScoreLetters(msa1, Edges1, uEdgeCount1);
|
||||||
|
SCORE Letters2 = ScoreLetters(msa2, Edges2, uEdgeCount2);
|
||||||
|
|
||||||
|
SCORE Gaps1 = ScoreGaps(msa1, Edges1, uEdgeCount1);
|
||||||
|
SCORE Gaps2 = ScoreGaps(msa2, Edges2, uEdgeCount2);
|
||||||
|
|
||||||
|
SCORE DiffLetters = Letters2 - Letters1;
|
||||||
|
SCORE DiffGaps = Gaps2 - Gaps1;
|
||||||
|
SCORE Diff = DiffLetters + DiffGaps;
|
||||||
|
|
||||||
|
#if COMPARE_3_52
|
||||||
|
Log("ObjScoreSP Letters1=%.4g Letters2=%.4g DiffLetters=%.4g\n",
|
||||||
|
SPLetters1, SPLetters2, SPDiffLetters);
|
||||||
|
|
||||||
|
Log("DiffObjScore Letters1=%.4g Letters2=%.4g DiffLetters=%.4g\n",
|
||||||
|
Letters1, Letters2, DiffLetters);
|
||||||
|
|
||||||
|
Log("ObjScoreSP Gaps1=%.4g Gaps2=%.4g DiffGaps=%.4g\n",
|
||||||
|
SPGaps1, SPGaps2, SPDiffGaps);
|
||||||
|
|
||||||
|
Log("DiffObjScore Gaps1=%.4g Gaps2=%.4g DiffGaps=%.4g\n",
|
||||||
|
Gaps1, Gaps2, DiffGaps);
|
||||||
|
|
||||||
|
Log("SP diff=%.4g DiffObjScore Diff=%.4g\n", SPDiff, Diff);
|
||||||
|
#endif
|
||||||
|
|
||||||
|
return Diff;
|
||||||
|
}
|
114
src/muscle/muscle3.8.31/src/diffpaths.cpp
Normal file
114
src/muscle/muscle3.8.31/src/diffpaths.cpp
Normal file
@ -0,0 +1,114 @@
|
|||||||
|
#include "muscle.h"
|
||||||
|
#include "pwpath.h"
|
||||||
|
|
||||||
|
#define TRACE 0
|
||||||
|
|
||||||
|
void DiffPaths(const PWPath &p1, const PWPath &p2, unsigned Edges1[],
|
||||||
|
unsigned *ptruDiffCount1, unsigned Edges2[], unsigned *ptruDiffCount2)
|
||||||
|
{
|
||||||
|
#if TRACE
|
||||||
|
Log("DiffPaths\n");
|
||||||
|
Log("p1=");
|
||||||
|
p1.LogMe();
|
||||||
|
Log("p2=");
|
||||||
|
p2.LogMe();
|
||||||
|
#endif
|
||||||
|
const unsigned uEdgeCount1 = p1.GetEdgeCount();
|
||||||
|
const unsigned uEdgeCount2 = p2.GetEdgeCount();
|
||||||
|
|
||||||
|
unsigned uDiffCount1 = 0;
|
||||||
|
unsigned uDiffCount2 = 0;
|
||||||
|
unsigned uEdgeIndex1 = 0;
|
||||||
|
unsigned uEdgeIndex2 = 0;
|
||||||
|
const PWEdge *Edge1 = &p1.GetEdge(uEdgeIndex1);
|
||||||
|
const PWEdge *Edge2 = &p2.GetEdge(uEdgeIndex2);
|
||||||
|
for (;;)
|
||||||
|
{
|
||||||
|
unsigned uEdgeIndexTop1 = uEdgeIndex1;
|
||||||
|
unsigned uEdgeIndexTop2 = uEdgeIndex2;
|
||||||
|
Edge1 = &p1.GetEdge(uEdgeIndex1);
|
||||||
|
Edge2 = &p2.GetEdge(uEdgeIndex2);
|
||||||
|
#if TRACE
|
||||||
|
Log("e1[%u] PLA%u PLB%u %c, e2[%u] PLA%u PLB %u %c DC1=%u DC2=%u\n",
|
||||||
|
uEdgeIndex1, Edge1->uPrefixLengthA, Edge1->uPrefixLengthB, Edge1->cType,
|
||||||
|
uEdgeIndex2, Edge2->uPrefixLengthA, Edge2->uPrefixLengthB, Edge2->cType,
|
||||||
|
uDiffCount1, uDiffCount2);
|
||||||
|
#endif
|
||||||
|
if (Edge1->uPrefixLengthA == Edge2->uPrefixLengthA &&
|
||||||
|
Edge1->uPrefixLengthB == Edge2->uPrefixLengthB)
|
||||||
|
{
|
||||||
|
if (!Edge1->Equal(*Edge2))
|
||||||
|
{
|
||||||
|
Edges1[uDiffCount1++] = uEdgeIndex1;
|
||||||
|
Edges2[uDiffCount2++] = uEdgeIndex2;
|
||||||
|
}
|
||||||
|
++uEdgeIndex1;
|
||||||
|
++uEdgeIndex2;
|
||||||
|
}
|
||||||
|
|
||||||
|
else if (Edge2->uPrefixLengthA < Edge1->uPrefixLengthA ||
|
||||||
|
Edge2->uPrefixLengthB < Edge1->uPrefixLengthB)
|
||||||
|
Edges2[uDiffCount2++] = uEdgeIndex2++;
|
||||||
|
|
||||||
|
else if (Edge1->uPrefixLengthA < Edge2->uPrefixLengthA ||
|
||||||
|
Edge1->uPrefixLengthB < Edge2->uPrefixLengthB)
|
||||||
|
Edges1[uDiffCount1++] = uEdgeIndex1++;
|
||||||
|
|
||||||
|
if (uEdgeCount1 == uEdgeIndex1)
|
||||||
|
{
|
||||||
|
while (uEdgeIndex2 < uEdgeCount2)
|
||||||
|
Edges2[uDiffCount2++] = uEdgeIndex2++;
|
||||||
|
goto Done;
|
||||||
|
}
|
||||||
|
if (uEdgeCount2 == uEdgeIndex2)
|
||||||
|
{
|
||||||
|
while (uEdgeIndex1 < uEdgeCount1)
|
||||||
|
Edges1[uDiffCount1++] = uEdgeIndex1++;
|
||||||
|
goto Done;
|
||||||
|
}
|
||||||
|
if (uEdgeIndex1 == uEdgeIndexTop1 && uEdgeIndex2 == uEdgeIndexTop2)
|
||||||
|
Quit("DiffPaths stuck");
|
||||||
|
}
|
||||||
|
Done:;
|
||||||
|
#if TRACE
|
||||||
|
Log("DiffCount1=%u (%u %u)\n", uDiffCount1, uEdgeCount1, uEdgeCount2);
|
||||||
|
Log("Diffs1=");
|
||||||
|
for (unsigned i = 0; i < uDiffCount1; ++i)
|
||||||
|
{
|
||||||
|
const PWEdge e = p1.GetEdge(Edges1[i]);
|
||||||
|
Log(" %u=%c%u.%u", Edges1[i], e.cType, e.uPrefixLengthA, e.uPrefixLengthB);
|
||||||
|
}
|
||||||
|
Log("\n");
|
||||||
|
Log("DiffCount2=%u\n", uDiffCount2);
|
||||||
|
Log("Diffs2=");
|
||||||
|
for (unsigned i = 0; i < uDiffCount2; ++i)
|
||||||
|
{
|
||||||
|
const PWEdge e = p2.GetEdge(Edges2[i]);
|
||||||
|
Log(" %u=%c%u.%u", Edges2[i], e.cType, e.uPrefixLengthA, e.uPrefixLengthB);
|
||||||
|
}
|
||||||
|
Log("\n");
|
||||||
|
#endif
|
||||||
|
*ptruDiffCount1 = uDiffCount1;
|
||||||
|
*ptruDiffCount2 = uDiffCount2;
|
||||||
|
}
|
||||||
|
|
||||||
|
void TestDiffPaths()
|
||||||
|
{
|
||||||
|
PWPath p1;
|
||||||
|
PWPath p2;
|
||||||
|
|
||||||
|
p1.AppendEdge('M', 1, 1);
|
||||||
|
p1.AppendEdge('M', 2, 2);
|
||||||
|
p1.AppendEdge('M', 3, 3);
|
||||||
|
|
||||||
|
p2.AppendEdge('M', 1, 1);
|
||||||
|
p2.AppendEdge('D', 2, 1);
|
||||||
|
p2.AppendEdge('I', 2, 2);
|
||||||
|
p2.AppendEdge('M', 3, 3);
|
||||||
|
|
||||||
|
unsigned Edges1[64];
|
||||||
|
unsigned Edges2[64];
|
||||||
|
unsigned uDiffCount1;
|
||||||
|
unsigned uDiffCount2;
|
||||||
|
DiffPaths(p1, p2, Edges1, &uDiffCount1, Edges2, &uDiffCount2);
|
||||||
|
}
|
381
src/muscle/muscle3.8.31/src/difftrees.cpp
Normal file
381
src/muscle/muscle3.8.31/src/difftrees.cpp
Normal file
@ -0,0 +1,381 @@
|
|||||||
|
#include "muscle.h"
|
||||||
|
#include "tree.h"
|
||||||
|
|
||||||
|
#define TRACE 0
|
||||||
|
|
||||||
|
/***
|
||||||
|
Algorithm to compare two trees, X and Y.
|
||||||
|
|
||||||
|
A node x in X and node y in Y are defined to be
|
||||||
|
similar iff the set of leaves in the subtree under
|
||||||
|
x is identical to the set of leaves under y.
|
||||||
|
|
||||||
|
A node is defined to be dissimilar iff it is not
|
||||||
|
similar to any node in the other tree.
|
||||||
|
|
||||||
|
Nodes x and y are defined to be married iff every
|
||||||
|
node in the subtree under x is similar to a node
|
||||||
|
in the subtree under y. Married nodes are considered
|
||||||
|
to be equal. The subtrees under two married nodes can
|
||||||
|
at most differ by exchanges of left and right branches,
|
||||||
|
which we do not consider to be significant here.
|
||||||
|
|
||||||
|
A node is defined to be a bachelor iff it is not
|
||||||
|
married. If a node is a bachelor, then it has a
|
||||||
|
dissimilar node in its subtree, and it follows
|
||||||
|
immediately from the definition of marriage that its
|
||||||
|
parent is also a bachelor. Hence all nodes on the path
|
||||||
|
from a bachelor node to the root are bachelors.
|
||||||
|
|
||||||
|
We assume the trees have the same set of leaves, so
|
||||||
|
every leaf is trivially both similar and married to
|
||||||
|
the same leaf in the opposite tree. Bachelor nodes
|
||||||
|
are therefore always internal (i.e., non-leaf) nodes.
|
||||||
|
|
||||||
|
A node is defined to be a diff iff (a) it is married
|
||||||
|
and (b) its parent is a bachelor. The subtree under
|
||||||
|
a diff is maximally similar to the other tree. (In
|
||||||
|
other words, you cannot extend the subtree without
|
||||||
|
adding a bachelor).
|
||||||
|
|
||||||
|
The set of diffs is the subset of the two trees that
|
||||||
|
we consider to be identical.
|
||||||
|
|
||||||
|
Example:
|
||||||
|
|
||||||
|
-----A
|
||||||
|
-----k
|
||||||
|
----j -----B
|
||||||
|
--i -----C
|
||||||
|
------D
|
||||||
|
|
||||||
|
|
||||||
|
-----A
|
||||||
|
-----p
|
||||||
|
----n -----B
|
||||||
|
--m -----D
|
||||||
|
------C
|
||||||
|
|
||||||
|
|
||||||
|
The following pairs of internal nodes are similar.
|
||||||
|
|
||||||
|
Nodes Set of leaves
|
||||||
|
----- -------------
|
||||||
|
k,p A,B
|
||||||
|
i,m A,B,C,D
|
||||||
|
|
||||||
|
Bachelors in the first tree are i and j, bachelors
|
||||||
|
in the second tree are m and n.
|
||||||
|
|
||||||
|
Node k and p are married, but i and m are not (because j
|
||||||
|
and n are bachelors). The diffs are C, D and k.
|
||||||
|
|
||||||
|
The set of bachelor nodes can be viewed as the internal
|
||||||
|
nodes of a tree, the leaves of which are diffs. (To see
|
||||||
|
that there can't be disjoint subtrees, note that the path
|
||||||
|
from a diff to a root is all bachelor nodes, so there is
|
||||||
|
always a path between two diffs that goes through the root).
|
||||||
|
We call this tree the "diffs tree".
|
||||||
|
|
||||||
|
There is a simple O(N) algorithm to build the diffs tree.
|
||||||
|
To achieve O(N) we avoid traversing a given subtree multiple
|
||||||
|
times and also avoid comparing lists of leaves.
|
||||||
|
|
||||||
|
We visit nodes in depth-first order (i.e., a node is visited
|
||||||
|
before its parent).
|
||||||
|
|
||||||
|
If either child of a node is a bachelor, we flag it as
|
||||||
|
a bachelor.
|
||||||
|
|
||||||
|
If both children of the node we are visiting are married,
|
||||||
|
we check whether the spouses of those children have the
|
||||||
|
same parent in the other tree. If the parents are different,
|
||||||
|
the current node is a bachelor. If they have the same parent,
|
||||||
|
then the node we are visiting is the spouse of that parent.
|
||||||
|
We assign this newly identified married couple a unique integer
|
||||||
|
id. The id of a node is in one-to-one correspondence with the
|
||||||
|
set of leaves in its subtree. Two nodes have the same set of
|
||||||
|
leaves iff they have the same id. Bachelor nodes do not get
|
||||||
|
an id.
|
||||||
|
***/
|
||||||
|
|
||||||
|
static void BuildDiffs(const Tree &tree, unsigned uTreeNodeIndex,
|
||||||
|
const bool bIsDiff[], Tree &Diffs, unsigned uDiffsNodeIndex,
|
||||||
|
unsigned IdToDiffsLeafNodeIndex[])
|
||||||
|
{
|
||||||
|
#if TRACE
|
||||||
|
Log("BuildDiffs(TreeNode=%u IsDiff=%d IsLeaf=%d)\n",
|
||||||
|
uTreeNodeIndex, bIsDiff[uTreeNodeIndex], tree.IsLeaf(uTreeNodeIndex));
|
||||||
|
#endif
|
||||||
|
if (bIsDiff[uTreeNodeIndex])
|
||||||
|
{
|
||||||
|
unsigned uLeafCount = tree.GetLeafCount();
|
||||||
|
unsigned *Leaves = new unsigned[uLeafCount];
|
||||||
|
GetLeaves(tree, uTreeNodeIndex, Leaves, &uLeafCount);
|
||||||
|
for (unsigned n = 0; n < uLeafCount; ++n)
|
||||||
|
{
|
||||||
|
const unsigned uLeafNodeIndex = Leaves[n];
|
||||||
|
const unsigned uId = tree.GetLeafId(uLeafNodeIndex);
|
||||||
|
if (uId >= tree.GetLeafCount())
|
||||||
|
Quit("BuildDiffs, id out of range");
|
||||||
|
IdToDiffsLeafNodeIndex[uId] = uDiffsNodeIndex;
|
||||||
|
#if TRACE
|
||||||
|
Log(" Leaf id=%u DiffsNode=%u\n", uId, uDiffsNodeIndex);
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
delete[] Leaves;
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (tree.IsLeaf(uTreeNodeIndex))
|
||||||
|
Quit("BuildDiffs: should never reach leaf");
|
||||||
|
|
||||||
|
const unsigned uTreeLeft = tree.GetLeft(uTreeNodeIndex);
|
||||||
|
const unsigned uTreeRight = tree.GetRight(uTreeNodeIndex);
|
||||||
|
|
||||||
|
const unsigned uDiffsLeft = Diffs.AppendBranch(uDiffsNodeIndex);
|
||||||
|
const unsigned uDiffsRight = uDiffsLeft + 1;
|
||||||
|
|
||||||
|
BuildDiffs(tree, uTreeLeft, bIsDiff, Diffs, uDiffsLeft, IdToDiffsLeafNodeIndex);
|
||||||
|
BuildDiffs(tree, uTreeRight, bIsDiff, Diffs, uDiffsRight, IdToDiffsLeafNodeIndex);
|
||||||
|
}
|
||||||
|
|
||||||
|
void DiffTrees(const Tree &Tree1, const Tree &Tree2, Tree &Diffs,
|
||||||
|
unsigned IdToDiffsLeafNodeIndex[])
|
||||||
|
{
|
||||||
|
#if TRACE
|
||||||
|
Log("Tree1:\n");
|
||||||
|
Tree1.LogMe();
|
||||||
|
Log("\n");
|
||||||
|
Log("Tree2:\n");
|
||||||
|
Tree2.LogMe();
|
||||||
|
#endif
|
||||||
|
|
||||||
|
if (!Tree1.IsRooted() || !Tree2.IsRooted())
|
||||||
|
Quit("DiffTrees: requires rooted trees");
|
||||||
|
|
||||||
|
const unsigned uNodeCount = Tree1.GetNodeCount();
|
||||||
|
const unsigned uNodeCount2 = Tree2.GetNodeCount();
|
||||||
|
|
||||||
|
const unsigned uLeafCount = Tree1.GetLeafCount();
|
||||||
|
const unsigned uLeafCount2 = Tree2.GetLeafCount();
|
||||||
|
assert(uLeafCount == uLeafCount2);
|
||||||
|
|
||||||
|
if (uNodeCount != uNodeCount2)
|
||||||
|
Quit("DiffTrees: different node counts");
|
||||||
|
|
||||||
|
// Allocate tables so we can convert tree node index to
|
||||||
|
// and from the unique id with a O(1) lookup.
|
||||||
|
unsigned *NodeIndexToId1 = new unsigned[uNodeCount];
|
||||||
|
unsigned *IdToNodeIndex2 = new unsigned[uNodeCount];
|
||||||
|
|
||||||
|
bool *bIsBachelor1 = new bool[uNodeCount];
|
||||||
|
bool *bIsDiff1 = new bool[uNodeCount];
|
||||||
|
|
||||||
|
for (unsigned uNodeIndex = 0; uNodeIndex < uNodeCount; ++uNodeIndex)
|
||||||
|
{
|
||||||
|
NodeIndexToId1[uNodeIndex] = uNodeCount;
|
||||||
|
bIsBachelor1[uNodeIndex] = false;
|
||||||
|
bIsDiff1[uNodeIndex] = false;
|
||||||
|
|
||||||
|
// Use uNodeCount as value meaning "not set".
|
||||||
|
IdToNodeIndex2[uNodeIndex] = uNodeCount;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Initialize node index <-> id lookup tables
|
||||||
|
for (unsigned uNodeIndex = 0; uNodeIndex < uNodeCount; ++uNodeIndex)
|
||||||
|
{
|
||||||
|
if (Tree1.IsLeaf(uNodeIndex))
|
||||||
|
{
|
||||||
|
const unsigned uId = Tree1.GetLeafId(uNodeIndex);
|
||||||
|
if (uId >= uNodeCount)
|
||||||
|
Quit("Diff trees requires existing leaf ids in range 0 .. (N-1)");
|
||||||
|
NodeIndexToId1[uNodeIndex] = uId;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (Tree2.IsLeaf(uNodeIndex))
|
||||||
|
{
|
||||||
|
const unsigned uId = Tree2.GetLeafId(uNodeIndex);
|
||||||
|
if (uId >= uNodeCount)
|
||||||
|
Quit("Diff trees requires existing leaf ids in range 0 .. (N-1)");
|
||||||
|
IdToNodeIndex2[uId] = uNodeIndex;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Validity check. This verifies that the ids
|
||||||
|
// pre-assigned to the leaves in Tree1 are unique
|
||||||
|
// (note that the id<N check above does not rule
|
||||||
|
// out two leaves having duplicate ids).
|
||||||
|
for (unsigned uId = 0; uId < uLeafCount; ++uId)
|
||||||
|
{
|
||||||
|
unsigned uNodeIndex2 = IdToNodeIndex2[uId];
|
||||||
|
if (uNodeCount == uNodeIndex2)
|
||||||
|
Quit("DiffTrees, check 2");
|
||||||
|
}
|
||||||
|
|
||||||
|
// Ids assigned to internal nodes are N, N+1 ...
|
||||||
|
// An internal node id uniquely identifies a set
|
||||||
|
// of two or more leaves.
|
||||||
|
unsigned uInternalNodeId = uLeafCount;
|
||||||
|
|
||||||
|
// Depth-first traversal of tree.
|
||||||
|
// The order guarantees that a node is visited before
|
||||||
|
// its parent is visited.
|
||||||
|
for (unsigned uNodeIndex1 = Tree1.FirstDepthFirstNode();
|
||||||
|
NULL_NEIGHBOR != uNodeIndex1;
|
||||||
|
uNodeIndex1 = Tree1.NextDepthFirstNode(uNodeIndex1))
|
||||||
|
{
|
||||||
|
#if TRACE
|
||||||
|
Log("Main loop: Node1=%u IsLeaf=%d IsBachelor=%d\n",
|
||||||
|
uNodeIndex1,
|
||||||
|
Tree1.IsLeaf(uNodeIndex1),
|
||||||
|
bIsBachelor1[uNodeIndex1]);
|
||||||
|
#endif
|
||||||
|
|
||||||
|
// Leaves are trivial; nothing to do.
|
||||||
|
if (Tree1.IsLeaf(uNodeIndex1) || bIsBachelor1[uNodeIndex1])
|
||||||
|
continue;
|
||||||
|
|
||||||
|
// If either child is a bachelor, flag
|
||||||
|
// this node as a bachelor and continue.
|
||||||
|
unsigned uLeft1 = Tree1.GetLeft(uNodeIndex1);
|
||||||
|
if (bIsBachelor1[uLeft1])
|
||||||
|
{
|
||||||
|
bIsBachelor1[uNodeIndex1] = true;
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
unsigned uRight1 = Tree1.GetRight(uNodeIndex1);
|
||||||
|
if (bIsBachelor1[uRight1])
|
||||||
|
{
|
||||||
|
bIsBachelor1[uNodeIndex1] = true;
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Both children are married.
|
||||||
|
// Married nodes are guaranteed to have an id.
|
||||||
|
unsigned uIdLeft = NodeIndexToId1[uLeft1];
|
||||||
|
unsigned uIdRight = NodeIndexToId1[uRight1];
|
||||||
|
|
||||||
|
if (uIdLeft == uNodeCount || uIdRight == uNodeCount)
|
||||||
|
Quit("DiffTrees, check 5");
|
||||||
|
|
||||||
|
// uLeft2 is the spouse of uLeft1, and similarly for uRight2.
|
||||||
|
unsigned uLeft2 = IdToNodeIndex2[uIdLeft];
|
||||||
|
unsigned uRight2 = IdToNodeIndex2[uIdRight];
|
||||||
|
|
||||||
|
if (uLeft2 == uNodeCount || uRight2 == uNodeCount)
|
||||||
|
Quit("DiffTrees, check 6");
|
||||||
|
|
||||||
|
// If the spouses of uLeft1 and uRight1 have the same
|
||||||
|
// parent, then this parent is the spouse of uNodeIndex1.
|
||||||
|
// Otherwise, uNodeIndex1 is a diff.
|
||||||
|
unsigned uParentLeft2 = Tree2.GetParent(uLeft2);
|
||||||
|
unsigned uParentRight2 = Tree2.GetParent(uRight2);
|
||||||
|
|
||||||
|
#if TRACE
|
||||||
|
Log("L1=%u R1=%u L2=%u R2=%u PL2=%u PR2=%u\n",
|
||||||
|
uLeft1,
|
||||||
|
uRight1,
|
||||||
|
uLeft2,
|
||||||
|
uRight2,
|
||||||
|
uParentLeft2,
|
||||||
|
uParentRight2);
|
||||||
|
#endif
|
||||||
|
|
||||||
|
if (uParentLeft2 == uParentRight2)
|
||||||
|
{
|
||||||
|
NodeIndexToId1[uNodeIndex1] = uInternalNodeId;
|
||||||
|
IdToNodeIndex2[uInternalNodeId] = uParentLeft2;
|
||||||
|
++uInternalNodeId;
|
||||||
|
}
|
||||||
|
else
|
||||||
|
bIsBachelor1[uNodeIndex1] = true;
|
||||||
|
}
|
||||||
|
|
||||||
|
unsigned uDiffCount = 0;
|
||||||
|
for (unsigned uNodeIndex = 0; uNodeIndex < uNodeCount; ++uNodeIndex)
|
||||||
|
{
|
||||||
|
if (bIsBachelor1[uNodeIndex])
|
||||||
|
continue;
|
||||||
|
if (Tree1.IsRoot(uNodeIndex))
|
||||||
|
{
|
||||||
|
// Special case: if no bachelors, consider the
|
||||||
|
// root a diff.
|
||||||
|
if (!bIsBachelor1[uNodeIndex])
|
||||||
|
bIsDiff1[uNodeIndex] = true;
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
const unsigned uParent = Tree1.GetParent(uNodeIndex);
|
||||||
|
if (bIsBachelor1[uParent])
|
||||||
|
{
|
||||||
|
bIsDiff1[uNodeIndex] = true;
|
||||||
|
++uDiffCount;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#if TRACE
|
||||||
|
Log("Tree1:\n");
|
||||||
|
Log("Node Id Bach Diff Name\n");
|
||||||
|
Log("---- ---- ---- ---- ----\n");
|
||||||
|
for (unsigned n = 0; n < uNodeCount; ++n)
|
||||||
|
{
|
||||||
|
Log("%4u %4u %d %d",
|
||||||
|
n,
|
||||||
|
NodeIndexToId1[n],
|
||||||
|
bIsBachelor1[n],
|
||||||
|
bIsDiff1[n]);
|
||||||
|
if (Tree1.IsLeaf(n))
|
||||||
|
Log(" %s", Tree1.GetLeafName(n));
|
||||||
|
Log("\n");
|
||||||
|
}
|
||||||
|
Log("\n");
|
||||||
|
Log("Tree2:\n");
|
||||||
|
Log("Node Id Name\n");
|
||||||
|
Log("---- ---- ----\n");
|
||||||
|
for (unsigned n = 0; n < uNodeCount; ++n)
|
||||||
|
{
|
||||||
|
Log("%4u ", n);
|
||||||
|
if (Tree2.IsLeaf(n))
|
||||||
|
Log(" %s", Tree2.GetLeafName(n));
|
||||||
|
Log("\n");
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
|
Diffs.CreateRooted();
|
||||||
|
const unsigned uDiffsRootIndex = Diffs.GetRootNodeIndex();
|
||||||
|
const unsigned uRootIndex1 = Tree1.GetRootNodeIndex();
|
||||||
|
|
||||||
|
for (unsigned n = 0; n < uLeafCount; ++n)
|
||||||
|
IdToDiffsLeafNodeIndex[n] = uNodeCount;
|
||||||
|
|
||||||
|
BuildDiffs(Tree1, uRootIndex1, bIsDiff1, Diffs, uDiffsRootIndex,
|
||||||
|
IdToDiffsLeafNodeIndex);
|
||||||
|
|
||||||
|
#if TRACE
|
||||||
|
Log("\n");
|
||||||
|
Log("Diffs:\n");
|
||||||
|
Diffs.LogMe();
|
||||||
|
Log("\n");
|
||||||
|
Log("IdToDiffsLeafNodeIndex:");
|
||||||
|
for (unsigned n = 0; n < uLeafCount; ++n)
|
||||||
|
{
|
||||||
|
if (n%16 == 0)
|
||||||
|
Log("\n");
|
||||||
|
else
|
||||||
|
Log(" ");
|
||||||
|
Log("%u=%u", n, IdToDiffsLeafNodeIndex[n]);
|
||||||
|
}
|
||||||
|
Log("\n");
|
||||||
|
#endif
|
||||||
|
|
||||||
|
for (unsigned n = 0; n < uLeafCount; ++n)
|
||||||
|
if (IdToDiffsLeafNodeIndex[n] == uNodeCount)
|
||||||
|
Quit("TreeDiffs check 7");
|
||||||
|
|
||||||
|
delete[] NodeIndexToId1;
|
||||||
|
delete[] IdToNodeIndex2;
|
||||||
|
|
||||||
|
delete[] bIsBachelor1;
|
||||||
|
delete[] bIsDiff1;
|
||||||
|
}
|
235
src/muscle/muscle3.8.31/src/difftreese.cpp
Normal file
235
src/muscle/muscle3.8.31/src/difftreese.cpp
Normal file
@ -0,0 +1,235 @@
|
|||||||
|
#include "muscle.h"
|
||||||
|
#include "tree.h"
|
||||||
|
|
||||||
|
#define TRACE 0
|
||||||
|
|
||||||
|
/***
|
||||||
|
Algorithm to compare two trees, X and Y.
|
||||||
|
|
||||||
|
A node x in X and node y in Y are defined to be
|
||||||
|
similar iff the set of leaves in the subtree under
|
||||||
|
x is identical to the set of leaves under y.
|
||||||
|
|
||||||
|
A node is defined to be changed iff it is not
|
||||||
|
similar to any node in the other tree.
|
||||||
|
|
||||||
|
Nodes x and y are defined to be married iff every
|
||||||
|
node in the subtree under x is similar to a node
|
||||||
|
in the subtree under y. Married nodes are considered
|
||||||
|
to be equal. The subtrees under two married nodes can
|
||||||
|
at most differ by exchanges of left and right branches,
|
||||||
|
which we do not consider to be significant here.
|
||||||
|
|
||||||
|
A node is changed iff it is not married. If a node is
|
||||||
|
changed, then it has a dissimilar node in its subtree,
|
||||||
|
and it follows immediately from the definition of marriage
|
||||||
|
that its parent is also a bachelor. Hence all nodes on the
|
||||||
|
path from a changed node to the root are changed.
|
||||||
|
|
||||||
|
We assume the trees have the same set of leaves, so
|
||||||
|
every leaf is trivially both similar and married to
|
||||||
|
the same leaf in the opposite tree. Changed nodes
|
||||||
|
are therefore always internal (i.e., non-leaf) nodes.
|
||||||
|
|
||||||
|
Example:
|
||||||
|
|
||||||
|
-----A
|
||||||
|
-----k
|
||||||
|
----j -----B
|
||||||
|
--i -----C
|
||||||
|
------D
|
||||||
|
|
||||||
|
|
||||||
|
-----A
|
||||||
|
-----p
|
||||||
|
----n -----B
|
||||||
|
--m -----D
|
||||||
|
------C
|
||||||
|
|
||||||
|
|
||||||
|
The following pairs of internal nodes are similar.
|
||||||
|
|
||||||
|
Nodes Set of leaves
|
||||||
|
----- -------------
|
||||||
|
k,p A,B
|
||||||
|
i,m A,B,C,D
|
||||||
|
|
||||||
|
Changed nodes in the first tree are i and j, changed nodes
|
||||||
|
in the second tree are m and n.
|
||||||
|
|
||||||
|
Node k and p are married, but i and m are not (because j
|
||||||
|
and n are changed). The diffs are C, D and k.
|
||||||
|
|
||||||
|
To achieve O(N) we avoid traversing a given subtree multiple
|
||||||
|
times and also avoid comparing lists of leaves.
|
||||||
|
|
||||||
|
We visit nodes in depth-first order (i.e., a node is visited
|
||||||
|
before its parent).
|
||||||
|
|
||||||
|
If either child of a node is changed, we flag it as changed.
|
||||||
|
|
||||||
|
If both children of the node we are visiting are married,
|
||||||
|
we check whether the spouses of those children have the
|
||||||
|
same parent in the other tree. If the parents are different,
|
||||||
|
the current node is a bachelor. If they have the same parent,
|
||||||
|
then the node we are visiting is the spouse of that parent.
|
||||||
|
We assign this newly identified married couple a unique integer
|
||||||
|
id. The id of a node is in one-to-one correspondence with the
|
||||||
|
set of leaves in its subtree. Two nodes have the same set of
|
||||||
|
leaves iff they have the same id. Changed nodes do not get
|
||||||
|
an id.
|
||||||
|
***/
|
||||||
|
|
||||||
|
void DiffTreesE(const Tree &NewTree, const Tree &OldTree,
|
||||||
|
unsigned NewNodeIndexToOldNodeIndex[])
|
||||||
|
{
|
||||||
|
#if TRACE
|
||||||
|
Log("DiffTreesE NewTree:\n");
|
||||||
|
NewTree.LogMe();
|
||||||
|
Log("\n");
|
||||||
|
Log("OldTree:\n");
|
||||||
|
OldTree.LogMe();
|
||||||
|
#endif
|
||||||
|
|
||||||
|
if (!NewTree.IsRooted() || !OldTree.IsRooted())
|
||||||
|
Quit("DiffTrees: requires rooted trees");
|
||||||
|
|
||||||
|
const unsigned uNodeCount = NewTree.GetNodeCount();
|
||||||
|
const unsigned uOldNodeCount = OldTree.GetNodeCount();
|
||||||
|
const unsigned uLeafCount = NewTree.GetLeafCount();
|
||||||
|
const unsigned uOldLeafCount = OldTree.GetLeafCount();
|
||||||
|
if (uNodeCount != uOldNodeCount || uLeafCount != uOldLeafCount)
|
||||||
|
Quit("DiffTreesE: different node counts");
|
||||||
|
|
||||||
|
{
|
||||||
|
unsigned *IdToOldNodeIndex = new unsigned[uNodeCount];
|
||||||
|
for (unsigned uOldNodeIndex = 0; uOldNodeIndex < uNodeCount; ++uOldNodeIndex)
|
||||||
|
{
|
||||||
|
if (OldTree.IsLeaf(uOldNodeIndex))
|
||||||
|
{
|
||||||
|
unsigned Id = OldTree.GetLeafId(uOldNodeIndex);
|
||||||
|
IdToOldNodeIndex[Id] = uOldNodeIndex;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Initialize NewNodeIndexToOldNodeIndex[]
|
||||||
|
// All internal nodes are marked as changed, but may be updated later.
|
||||||
|
for (unsigned uNewNodeIndex = 0; uNewNodeIndex < uNodeCount; ++uNewNodeIndex)
|
||||||
|
{
|
||||||
|
if (NewTree.IsLeaf(uNewNodeIndex))
|
||||||
|
{
|
||||||
|
unsigned uId = NewTree.GetLeafId(uNewNodeIndex);
|
||||||
|
assert(uId < uLeafCount);
|
||||||
|
|
||||||
|
unsigned uOldNodeIndex = IdToOldNodeIndex[uId];
|
||||||
|
assert(uOldNodeIndex < uNodeCount);
|
||||||
|
|
||||||
|
NewNodeIndexToOldNodeIndex[uNewNodeIndex] = uOldNodeIndex;
|
||||||
|
}
|
||||||
|
else
|
||||||
|
NewNodeIndexToOldNodeIndex[uNewNodeIndex] = NODE_CHANGED;
|
||||||
|
}
|
||||||
|
delete[] IdToOldNodeIndex;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Depth-first traversal of tree.
|
||||||
|
// The order guarantees that a node is visited before
|
||||||
|
// its parent is visited.
|
||||||
|
for (unsigned uNewNodeIndex = NewTree.FirstDepthFirstNode();
|
||||||
|
NULL_NEIGHBOR != uNewNodeIndex;
|
||||||
|
uNewNodeIndex = NewTree.NextDepthFirstNode(uNewNodeIndex))
|
||||||
|
{
|
||||||
|
if (NewTree.IsLeaf(uNewNodeIndex))
|
||||||
|
continue;
|
||||||
|
|
||||||
|
// If either child is changed, flag this node as changed and continue.
|
||||||
|
unsigned uNewLeft = NewTree.GetLeft(uNewNodeIndex);
|
||||||
|
unsigned uOldLeft = NewNodeIndexToOldNodeIndex[uNewLeft];
|
||||||
|
if (NODE_CHANGED == uOldLeft)
|
||||||
|
{
|
||||||
|
NewNodeIndexToOldNodeIndex[uNewLeft] = NODE_CHANGED;
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
unsigned uNewRight = NewTree.GetRight(uNewNodeIndex);
|
||||||
|
unsigned uOldRight = NewNodeIndexToOldNodeIndex[uNewRight];
|
||||||
|
if (NODE_CHANGED == NewNodeIndexToOldNodeIndex[uNewRight])
|
||||||
|
{
|
||||||
|
NewNodeIndexToOldNodeIndex[uNewRight] = NODE_CHANGED;
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
unsigned uOldParentLeft = OldTree.GetParent(uOldLeft);
|
||||||
|
unsigned uOldParentRight = OldTree.GetParent(uOldRight);
|
||||||
|
if (uOldParentLeft == uOldParentRight)
|
||||||
|
NewNodeIndexToOldNodeIndex[uNewNodeIndex] = uOldParentLeft;
|
||||||
|
else
|
||||||
|
NewNodeIndexToOldNodeIndex[uNewNodeIndex] = NODE_CHANGED;
|
||||||
|
}
|
||||||
|
|
||||||
|
#if TRACE
|
||||||
|
{
|
||||||
|
Log("NewToOld ");
|
||||||
|
for (unsigned uNewNodeIndex = 0; uNewNodeIndex < uNodeCount; ++uNewNodeIndex)
|
||||||
|
{
|
||||||
|
Log(" [%3u]=", uNewNodeIndex);
|
||||||
|
if (NODE_CHANGED == NewNodeIndexToOldNodeIndex[uNewNodeIndex])
|
||||||
|
Log(" X");
|
||||||
|
else
|
||||||
|
Log("%3u", NewNodeIndexToOldNodeIndex[uNewNodeIndex]);
|
||||||
|
if ((uNewNodeIndex+1)%8 == 0)
|
||||||
|
Log("\n ");
|
||||||
|
}
|
||||||
|
Log("\n");
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#if DEBUG
|
||||||
|
{
|
||||||
|
for (unsigned uNewNodeIndex = 0; uNewNodeIndex < uNodeCount; ++uNewNodeIndex)
|
||||||
|
{
|
||||||
|
unsigned uOld = NewNodeIndexToOldNodeIndex[uNewNodeIndex];
|
||||||
|
if (NewTree.IsLeaf(uNewNodeIndex))
|
||||||
|
{
|
||||||
|
if (uOld >= uNodeCount)
|
||||||
|
{
|
||||||
|
Log("NewNode=%u uOld=%u > uNodeCount=%u\n",
|
||||||
|
uNewNodeIndex, uOld, uNodeCount);
|
||||||
|
Quit("Diff check failed");
|
||||||
|
}
|
||||||
|
unsigned uIdNew = NewTree.GetLeafId(uNewNodeIndex);
|
||||||
|
unsigned uIdOld = OldTree.GetLeafId(uOld);
|
||||||
|
if (uIdNew != uIdOld)
|
||||||
|
{
|
||||||
|
Log("NewNode=%u uOld=%u IdNew=%u IdOld=%u\n",
|
||||||
|
uNewNodeIndex, uOld, uIdNew, uIdOld);
|
||||||
|
Quit("Diff check failed");
|
||||||
|
}
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (NODE_CHANGED == uOld)
|
||||||
|
continue;
|
||||||
|
|
||||||
|
unsigned uNewLeft = NewTree.GetLeft(uNewNodeIndex);
|
||||||
|
unsigned uNewRight = NewTree.GetRight(uNewNodeIndex);
|
||||||
|
|
||||||
|
unsigned uOldLeft = OldTree.GetLeft(uOld);
|
||||||
|
unsigned uOldRight = OldTree.GetRight(uOld);
|
||||||
|
|
||||||
|
unsigned uNewLeftPartner = NewNodeIndexToOldNodeIndex[uNewLeft];
|
||||||
|
unsigned uNewRightPartner = NewNodeIndexToOldNodeIndex[uNewRight];
|
||||||
|
|
||||||
|
bool bSameNotRotated = (uNewLeftPartner == uOldLeft && uNewRightPartner == uOldRight);
|
||||||
|
bool bSameRotated = (uNewLeftPartner == uOldRight && uNewRightPartner == uOldLeft);
|
||||||
|
if (!bSameNotRotated && !bSameRotated)
|
||||||
|
{
|
||||||
|
Log("NewNode=%u NewL=%u NewR=%u\n", uNewNodeIndex, uNewLeft, uNewRight);
|
||||||
|
Log("OldNode=%u OldL=%u OldR=%u\n", uOld, uOldLeft, uOldRight);
|
||||||
|
Log("NewLPartner=%u NewRPartner=%u\n", uNewLeftPartner, uNewRightPartner);
|
||||||
|
Quit("Diff check failed");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
}
|
89
src/muscle/muscle3.8.31/src/distcalc.cpp
Normal file
89
src/muscle/muscle3.8.31/src/distcalc.cpp
Normal file
@ -0,0 +1,89 @@
|
|||||||
|
#include "muscle.h"
|
||||||
|
#include "distfunc.h"
|
||||||
|
#include "distcalc.h"
|
||||||
|
#include "msa.h"
|
||||||
|
|
||||||
|
void DistCalcDF::Init(const DistFunc &DF)
|
||||||
|
{
|
||||||
|
m_ptrDF = &DF;
|
||||||
|
}
|
||||||
|
|
||||||
|
void DistCalcDF::CalcDistRange(unsigned i, dist_t Dist[]) const
|
||||||
|
{
|
||||||
|
for (unsigned j = 0; j < i; ++j)
|
||||||
|
Dist[j] = m_ptrDF->GetDist(i, j);
|
||||||
|
}
|
||||||
|
|
||||||
|
unsigned DistCalcDF::GetCount() const
|
||||||
|
{
|
||||||
|
return m_ptrDF->GetCount();
|
||||||
|
}
|
||||||
|
|
||||||
|
unsigned DistCalcDF::GetId(unsigned i) const
|
||||||
|
{
|
||||||
|
return m_ptrDF->GetId(i);
|
||||||
|
}
|
||||||
|
|
||||||
|
const char *DistCalcDF::GetName(unsigned i) const
|
||||||
|
{
|
||||||
|
return m_ptrDF->GetName(i);
|
||||||
|
}
|
||||||
|
|
||||||
|
void DistCalcMSA::Init(const MSA &msa, DISTANCE Distance)
|
||||||
|
{
|
||||||
|
m_ptrMSA = &msa;
|
||||||
|
m_Distance = Distance;
|
||||||
|
}
|
||||||
|
|
||||||
|
void DistCalcMSA::CalcDistRange(unsigned i, dist_t Dist[]) const
|
||||||
|
{
|
||||||
|
for (unsigned j = 0; j < i; ++j)
|
||||||
|
{
|
||||||
|
switch (m_Distance)
|
||||||
|
{
|
||||||
|
case DISTANCE_PctIdKimura:
|
||||||
|
{
|
||||||
|
const float PctId = (float) m_ptrMSA->GetPctIdentityPair(i, j);
|
||||||
|
Dist[j] = (float) KimuraDist(PctId);
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
case DISTANCE_PctIdLog:
|
||||||
|
{
|
||||||
|
const float PctId = (float) m_ptrMSA->GetPctIdentityPair(i, j);
|
||||||
|
Dist[j] = (float) PctIdToMAFFTDist(PctId);
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
case DISTANCE_ScoreDist:
|
||||||
|
{
|
||||||
|
double GetScoreDist(const MSA &msa, unsigned SeqIndex1, unsigned SeqIndex2);
|
||||||
|
Dist[j] = (float) GetScoreDist(*m_ptrMSA, i, j);
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
case DISTANCE_Edit:
|
||||||
|
{
|
||||||
|
const float PctId = (float) m_ptrMSA->GetPctIdentityPair(i, j);
|
||||||
|
if (PctId > 1.0)
|
||||||
|
Quit("Internal error, DISTANCE_Edit, pct id=%.3g", PctId);
|
||||||
|
Dist[j] = (float) 1.0 - PctId;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
default:
|
||||||
|
Quit("DistCalcMSA: Invalid DISTANCE_%u", m_Distance);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
unsigned DistCalcMSA::GetCount() const
|
||||||
|
{
|
||||||
|
return m_ptrMSA->GetSeqCount();
|
||||||
|
}
|
||||||
|
|
||||||
|
unsigned DistCalcMSA::GetId(unsigned i) const
|
||||||
|
{
|
||||||
|
return m_ptrMSA->GetSeqId(i);
|
||||||
|
}
|
||||||
|
|
||||||
|
const char *DistCalcMSA::GetName(unsigned i) const
|
||||||
|
{
|
||||||
|
return m_ptrMSA->GetSeqName(i);
|
||||||
|
}
|
45
src/muscle/muscle3.8.31/src/distcalc.h
Normal file
45
src/muscle/muscle3.8.31/src/distcalc.h
Normal file
@ -0,0 +1,45 @@
|
|||||||
|
#ifndef DistCalc_h
|
||||||
|
#define DistCalc_h
|
||||||
|
|
||||||
|
typedef float dist_t;
|
||||||
|
const dist_t BIG_DIST = (dist_t) 1e29;
|
||||||
|
|
||||||
|
class DistFunc;
|
||||||
|
|
||||||
|
class DistCalc
|
||||||
|
{
|
||||||
|
public:
|
||||||
|
virtual void CalcDistRange(unsigned i, dist_t Dist[]) const = 0;
|
||||||
|
virtual unsigned GetCount() const = 0;
|
||||||
|
virtual unsigned GetId(unsigned i) const = 0;
|
||||||
|
virtual const char *GetName(unsigned i) const = 0;
|
||||||
|
};
|
||||||
|
|
||||||
|
class DistCalcDF : public DistCalc
|
||||||
|
{
|
||||||
|
public:
|
||||||
|
void Init(const DistFunc &DF);
|
||||||
|
virtual void CalcDistRange(unsigned i, dist_t Dist[]) const;
|
||||||
|
virtual unsigned GetCount() const;
|
||||||
|
virtual unsigned GetId(unsigned i) const;
|
||||||
|
virtual const char *GetName(unsigned i) const;
|
||||||
|
|
||||||
|
private:
|
||||||
|
const DistFunc *m_ptrDF;
|
||||||
|
};
|
||||||
|
|
||||||
|
class DistCalcMSA : public DistCalc
|
||||||
|
{
|
||||||
|
public:
|
||||||
|
void Init(const MSA &msa, DISTANCE Distance);
|
||||||
|
virtual void CalcDistRange(unsigned i, dist_t Dist[]) const;
|
||||||
|
virtual unsigned GetCount() const;
|
||||||
|
virtual unsigned GetId(unsigned i) const;
|
||||||
|
virtual const char *GetName(unsigned i) const;
|
||||||
|
|
||||||
|
private:
|
||||||
|
const MSA *m_ptrMSA;
|
||||||
|
DISTANCE m_Distance;
|
||||||
|
};
|
||||||
|
|
||||||
|
#endif // DistCalc_h
|
113
src/muscle/muscle3.8.31/src/distfunc.cpp
Normal file
113
src/muscle/muscle3.8.31/src/distfunc.cpp
Normal file
@ -0,0 +1,113 @@
|
|||||||
|
#include "muscle.h"
|
||||||
|
#include "distfunc.h"
|
||||||
|
#include <assert.h>
|
||||||
|
|
||||||
|
DistFunc::DistFunc()
|
||||||
|
{
|
||||||
|
m_Dists = 0;
|
||||||
|
m_uCount = 0;
|
||||||
|
m_uCacheCount = 0;
|
||||||
|
m_Names = 0;
|
||||||
|
m_Ids = 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
DistFunc::~DistFunc()
|
||||||
|
{
|
||||||
|
if (0 != m_Names)
|
||||||
|
{
|
||||||
|
for (unsigned i = 0; i < m_uCount; ++i)
|
||||||
|
free(m_Names[i]);
|
||||||
|
}
|
||||||
|
delete[] m_Dists;
|
||||||
|
delete[] m_Names;
|
||||||
|
delete[] m_Ids;
|
||||||
|
}
|
||||||
|
|
||||||
|
float DistFunc::GetDist(unsigned uIndex1, unsigned uIndex2) const
|
||||||
|
{
|
||||||
|
return m_Dists[VectorIndex(uIndex1, uIndex2)];
|
||||||
|
}
|
||||||
|
|
||||||
|
unsigned DistFunc::GetCount() const
|
||||||
|
{
|
||||||
|
return m_uCount;
|
||||||
|
}
|
||||||
|
|
||||||
|
void DistFunc::SetCount(unsigned uCount)
|
||||||
|
{
|
||||||
|
m_uCount = uCount;
|
||||||
|
if (uCount <= m_uCacheCount)
|
||||||
|
return;
|
||||||
|
delete[] m_Dists;
|
||||||
|
m_Dists = new float[VectorLength()];
|
||||||
|
m_Names = new char *[m_uCount];
|
||||||
|
m_Ids = new unsigned[m_uCount];
|
||||||
|
m_uCacheCount = uCount;
|
||||||
|
|
||||||
|
memset(m_Names, 0, m_uCount*sizeof(char *));
|
||||||
|
memset(m_Ids, 0xff, m_uCount*sizeof(unsigned));
|
||||||
|
memset(m_Dists, 0, VectorLength()*sizeof(float));
|
||||||
|
}
|
||||||
|
|
||||||
|
void DistFunc::SetDist(unsigned uIndex1, unsigned uIndex2, float dDist)
|
||||||
|
{
|
||||||
|
m_Dists[VectorIndex(uIndex1, uIndex2)] = dDist;
|
||||||
|
m_Dists[VectorIndex(uIndex2, uIndex1)] = dDist;
|
||||||
|
}
|
||||||
|
|
||||||
|
unsigned DistFunc::VectorIndex(unsigned uIndex1, unsigned uIndex2) const
|
||||||
|
{
|
||||||
|
assert(uIndex1 < m_uCount && uIndex2 < m_uCount);
|
||||||
|
return uIndex1*m_uCount + uIndex2;
|
||||||
|
}
|
||||||
|
|
||||||
|
unsigned DistFunc::VectorLength() const
|
||||||
|
{
|
||||||
|
return m_uCount*m_uCount;
|
||||||
|
}
|
||||||
|
|
||||||
|
void DistFunc::SetName(unsigned uIndex, const char szName[])
|
||||||
|
{
|
||||||
|
assert(uIndex < m_uCount);
|
||||||
|
m_Names[uIndex] = strsave(szName);
|
||||||
|
}
|
||||||
|
|
||||||
|
void DistFunc::SetId(unsigned uIndex, unsigned uId)
|
||||||
|
{
|
||||||
|
assert(uIndex < m_uCount);
|
||||||
|
m_Ids[uIndex] = uId;
|
||||||
|
}
|
||||||
|
|
||||||
|
const char *DistFunc::GetName(unsigned uIndex) const
|
||||||
|
{
|
||||||
|
assert(uIndex < m_uCount);
|
||||||
|
return m_Names[uIndex];
|
||||||
|
}
|
||||||
|
|
||||||
|
unsigned DistFunc::GetId(unsigned uIndex) const
|
||||||
|
{
|
||||||
|
assert(uIndex < m_uCount);
|
||||||
|
return m_Ids[uIndex];
|
||||||
|
}
|
||||||
|
|
||||||
|
void DistFunc::LogMe() const
|
||||||
|
{
|
||||||
|
Log("DistFunc::LogMe count=%u\n", m_uCount);
|
||||||
|
Log(" ");
|
||||||
|
for (unsigned i = 0; i < m_uCount; ++i)
|
||||||
|
Log(" %7u", i);
|
||||||
|
Log("\n");
|
||||||
|
|
||||||
|
Log(" ");
|
||||||
|
for (unsigned i = 0; i < m_uCount; ++i)
|
||||||
|
Log(" %7.7s", m_Names[i] ? m_Names[i] : "");
|
||||||
|
Log("\n");
|
||||||
|
|
||||||
|
for (unsigned i = 0; i < m_uCount; ++i)
|
||||||
|
{
|
||||||
|
Log("%4u %10.10s : ", i, m_Names[i] ? m_Names[i] : "");
|
||||||
|
for (unsigned j = 0; j <= i; ++j)
|
||||||
|
Log(" %7.4g", GetDist(i, j));
|
||||||
|
Log("\n");
|
||||||
|
}
|
||||||
|
}
|
36
src/muscle/muscle3.8.31/src/distfunc.h
Normal file
36
src/muscle/muscle3.8.31/src/distfunc.h
Normal file
@ -0,0 +1,36 @@
|
|||||||
|
#ifndef DistFunc_h
|
||||||
|
#define DistFunc_h
|
||||||
|
|
||||||
|
class DistFunc
|
||||||
|
{
|
||||||
|
public:
|
||||||
|
DistFunc();
|
||||||
|
virtual ~DistFunc();
|
||||||
|
|
||||||
|
public:
|
||||||
|
virtual void SetCount(unsigned uCount);
|
||||||
|
virtual void SetDist(unsigned uIndex1, unsigned uIndex2, float dDist);
|
||||||
|
|
||||||
|
void SetName(unsigned uIndex, const char szName[]);
|
||||||
|
void SetId(unsigned uIndex, unsigned uId);
|
||||||
|
const char *GetName(unsigned uIndex) const;
|
||||||
|
unsigned GetId(unsigned uIndex) const;
|
||||||
|
|
||||||
|
virtual float GetDist(unsigned uIndex1, unsigned uIndex2) const;
|
||||||
|
virtual unsigned GetCount() const;
|
||||||
|
|
||||||
|
void LogMe() const;
|
||||||
|
|
||||||
|
protected:
|
||||||
|
unsigned VectorIndex(unsigned uIndex, unsigned uIndex2) const;
|
||||||
|
unsigned VectorLength() const;
|
||||||
|
|
||||||
|
private:
|
||||||
|
unsigned m_uCount;
|
||||||
|
unsigned m_uCacheCount;
|
||||||
|
float *m_Dists;
|
||||||
|
char **m_Names;
|
||||||
|
unsigned *m_Ids;
|
||||||
|
};
|
||||||
|
|
||||||
|
#endif // DistFunc_h
|
45
src/muscle/muscle3.8.31/src/distpwkimura.cpp
Normal file
45
src/muscle/muscle3.8.31/src/distpwkimura.cpp
Normal file
@ -0,0 +1,45 @@
|
|||||||
|
#include "muscle.h"
|
||||||
|
#include "distfunc.h"
|
||||||
|
#include "msa.h"
|
||||||
|
#include "seqvect.h"
|
||||||
|
#include "pwpath.h"
|
||||||
|
|
||||||
|
void DistPWKimura(const SeqVect &v, DistFunc &DF)
|
||||||
|
{
|
||||||
|
SEQWEIGHT SeqWeightSave = GetSeqWeightMethod();
|
||||||
|
SetSeqWeightMethod(SEQWEIGHT_Henikoff);
|
||||||
|
|
||||||
|
const unsigned uSeqCount = v.Length();
|
||||||
|
DF.SetCount(uSeqCount);
|
||||||
|
|
||||||
|
const unsigned uPairCount = (uSeqCount*(uSeqCount + 1))/2;
|
||||||
|
unsigned uCount = 0;
|
||||||
|
SetProgressDesc("PWKimura distance");
|
||||||
|
for (unsigned uSeqIndex1 = 0; uSeqIndex1 < uSeqCount; ++uSeqIndex1)
|
||||||
|
{
|
||||||
|
const Seq &s1 = v.GetSeq(uSeqIndex1);
|
||||||
|
MSA msa1;
|
||||||
|
msa1.FromSeq(s1);
|
||||||
|
for (unsigned uSeqIndex2 = 0; uSeqIndex2 < uSeqIndex1; ++uSeqIndex2)
|
||||||
|
{
|
||||||
|
if (0 == uCount%20)
|
||||||
|
Progress(uCount, uPairCount);
|
||||||
|
++uCount;
|
||||||
|
const Seq &s2 = v.GetSeq(uSeqIndex2);
|
||||||
|
MSA msa2;
|
||||||
|
msa2.FromSeq(s2);
|
||||||
|
|
||||||
|
PWPath Path;
|
||||||
|
MSA msaOut;
|
||||||
|
AlignTwoMSAs(msa1, msa2, msaOut, Path, false, false);
|
||||||
|
|
||||||
|
double dPctId = msaOut.GetPctIdentityPair(0, 1);
|
||||||
|
float f = (float) KimuraDist(dPctId);
|
||||||
|
|
||||||
|
DF.SetDist(uSeqIndex1, uSeqIndex2, f);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
ProgressStepsDone();
|
||||||
|
|
||||||
|
SetSeqWeightMethod(SeqWeightSave);
|
||||||
|
}
|
299
src/muscle/muscle3.8.31/src/domuscle.cpp
Normal file
299
src/muscle/muscle3.8.31/src/domuscle.cpp
Normal file
@ -0,0 +1,299 @@
|
|||||||
|
#include "muscle.h"
|
||||||
|
#include "textfile.h"
|
||||||
|
#include "seqvect.h"
|
||||||
|
#include "distfunc.h"
|
||||||
|
#include "msa.h"
|
||||||
|
#include "tree.h"
|
||||||
|
#include "profile.h"
|
||||||
|
#include "timing.h"
|
||||||
|
|
||||||
|
static char g_strUseTreeWarning[] =
|
||||||
|
"\n******** WARNING ****************\n"
|
||||||
|
"\nYou specified the -usetree option.\n"
|
||||||
|
"Note that a good evolutionary tree may NOT be a good\n"
|
||||||
|
"guide tree for multiple alignment. For more details,\n"
|
||||||
|
"please refer to the user guide. To disable this\n"
|
||||||
|
"warning, use -usetree_nowarn <treefilename>.\n\n";
|
||||||
|
|
||||||
|
void DoMuscle()
|
||||||
|
{
|
||||||
|
SetOutputFileName(g_pstrOutFileName);
|
||||||
|
SetInputFileName(g_pstrInFileName);
|
||||||
|
|
||||||
|
SetMaxIters(g_uMaxIters);
|
||||||
|
SetSeqWeightMethod(g_SeqWeight1);
|
||||||
|
|
||||||
|
TextFile fileIn(g_pstrInFileName);
|
||||||
|
SeqVect v;
|
||||||
|
v.FromFASTAFile(fileIn);
|
||||||
|
const unsigned uSeqCount = v.Length();
|
||||||
|
|
||||||
|
if (0 == uSeqCount)
|
||||||
|
Quit("No sequences in input file");
|
||||||
|
|
||||||
|
ALPHA Alpha = ALPHA_Undefined;
|
||||||
|
switch (g_SeqType)
|
||||||
|
{
|
||||||
|
case SEQTYPE_Auto:
|
||||||
|
Alpha = v.GuessAlpha();
|
||||||
|
break;
|
||||||
|
|
||||||
|
case SEQTYPE_Protein:
|
||||||
|
Alpha = ALPHA_Amino;
|
||||||
|
break;
|
||||||
|
|
||||||
|
case SEQTYPE_DNA:
|
||||||
|
Alpha = ALPHA_DNA;
|
||||||
|
break;
|
||||||
|
|
||||||
|
case SEQTYPE_RNA:
|
||||||
|
Alpha = ALPHA_RNA;
|
||||||
|
break;
|
||||||
|
|
||||||
|
default:
|
||||||
|
Quit("Invalid seq type");
|
||||||
|
}
|
||||||
|
SetAlpha(Alpha);
|
||||||
|
v.FixAlpha();
|
||||||
|
|
||||||
|
PTR_SCOREMATRIX UserMatrix = 0;
|
||||||
|
if (0 != g_pstrMatrixFileName)
|
||||||
|
{
|
||||||
|
const char *FileName = g_pstrMatrixFileName;
|
||||||
|
const char *Path = getenv("MUSCLE_MXPATH");
|
||||||
|
if (Path != 0)
|
||||||
|
{
|
||||||
|
size_t n = strlen(Path) + 1 + strlen(FileName) + 1;
|
||||||
|
char *NewFileName = new char[n];
|
||||||
|
sprintf(NewFileName, "%s/%s", Path, FileName);
|
||||||
|
FileName = NewFileName;
|
||||||
|
}
|
||||||
|
TextFile File(FileName);
|
||||||
|
UserMatrix = ReadMx(File);
|
||||||
|
g_Alpha = ALPHA_Amino;
|
||||||
|
g_PPScore = PPSCORE_SP;
|
||||||
|
}
|
||||||
|
|
||||||
|
SetPPScore();
|
||||||
|
|
||||||
|
if (0 != UserMatrix)
|
||||||
|
g_ptrScoreMatrix = UserMatrix;
|
||||||
|
|
||||||
|
unsigned uMaxL = 0;
|
||||||
|
unsigned uTotL = 0;
|
||||||
|
for (unsigned uSeqIndex = 0; uSeqIndex < uSeqCount; ++uSeqIndex)
|
||||||
|
{
|
||||||
|
unsigned L = v.GetSeq(uSeqIndex).Length();
|
||||||
|
uTotL += L;
|
||||||
|
if (L > uMaxL)
|
||||||
|
uMaxL = L;
|
||||||
|
}
|
||||||
|
|
||||||
|
SetIter(1);
|
||||||
|
g_bDiags = g_bDiags1;
|
||||||
|
SetSeqStats(uSeqCount, uMaxL, uTotL/uSeqCount);
|
||||||
|
|
||||||
|
SetMuscleSeqVect(v);
|
||||||
|
|
||||||
|
MSA::SetIdCount(uSeqCount);
|
||||||
|
|
||||||
|
// Initialize sequence ids.
|
||||||
|
// From this point on, ids must somehow propogate from here.
|
||||||
|
for (unsigned uSeqIndex = 0; uSeqIndex < uSeqCount; ++uSeqIndex)
|
||||||
|
v.SetSeqId(uSeqIndex, uSeqIndex);
|
||||||
|
|
||||||
|
if (0 == uSeqCount)
|
||||||
|
Quit("Input file '%s' has no sequences", g_pstrInFileName);
|
||||||
|
if (1 == uSeqCount)
|
||||||
|
{
|
||||||
|
TextFile fileOut(g_pstrOutFileName, true);
|
||||||
|
v.ToFile(fileOut);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (uSeqCount > 1)
|
||||||
|
MHackStart(v);
|
||||||
|
|
||||||
|
// First iteration
|
||||||
|
Tree GuideTree;
|
||||||
|
if (0 != g_pstrUseTreeFileName)
|
||||||
|
{
|
||||||
|
// Discourage users...
|
||||||
|
if (!g_bUseTreeNoWarn)
|
||||||
|
fprintf(stderr, "%s", g_strUseTreeWarning);
|
||||||
|
|
||||||
|
// Read tree from file
|
||||||
|
TextFile TreeFile(g_pstrUseTreeFileName);
|
||||||
|
GuideTree.FromFile(TreeFile);
|
||||||
|
|
||||||
|
// Make sure tree is rooted
|
||||||
|
if (!GuideTree.IsRooted())
|
||||||
|
Quit("User tree must be rooted");
|
||||||
|
|
||||||
|
if (GuideTree.GetLeafCount() != uSeqCount)
|
||||||
|
Quit("User tree does not match input sequences");
|
||||||
|
|
||||||
|
const unsigned uNodeCount = GuideTree.GetNodeCount();
|
||||||
|
for (unsigned uNodeIndex = 0; uNodeIndex < uNodeCount; ++uNodeIndex)
|
||||||
|
{
|
||||||
|
if (!GuideTree.IsLeaf(uNodeIndex))
|
||||||
|
continue;
|
||||||
|
const char *LeafName = GuideTree.GetLeafName(uNodeIndex);
|
||||||
|
unsigned uSeqIndex;
|
||||||
|
bool SeqFound = v.FindName(LeafName, &uSeqIndex);
|
||||||
|
if (!SeqFound)
|
||||||
|
Quit("Label %s in tree does not match sequences", LeafName);
|
||||||
|
unsigned uId = v.GetSeqIdFromName(LeafName);
|
||||||
|
GuideTree.SetLeafId(uNodeIndex, uId);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
else
|
||||||
|
TreeFromSeqVect(v, GuideTree, g_Cluster1, g_Distance1, g_Root1,
|
||||||
|
g_pstrDistMxFileName1);
|
||||||
|
|
||||||
|
const char *Tree1 = ValueOpt("Tree1");
|
||||||
|
if (0 != Tree1)
|
||||||
|
{
|
||||||
|
TextFile f(Tree1, true);
|
||||||
|
GuideTree.ToFile(f);
|
||||||
|
if (g_bClusterOnly)
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
SetMuscleTree(GuideTree);
|
||||||
|
ValidateMuscleIds(GuideTree);
|
||||||
|
|
||||||
|
MSA msa;
|
||||||
|
ProgNode *ProgNodes = 0;
|
||||||
|
if (g_bLow)
|
||||||
|
ProgNodes = ProgressiveAlignE(v, GuideTree, msa);
|
||||||
|
else
|
||||||
|
ProgressiveAlign(v, GuideTree, msa);
|
||||||
|
SetCurrentAlignment(msa);
|
||||||
|
|
||||||
|
if (0 != g_pstrComputeWeightsFileName)
|
||||||
|
{
|
||||||
|
extern void OutWeights(const char *FileName, const MSA &msa);
|
||||||
|
SetMSAWeightsMuscle(msa);
|
||||||
|
OutWeights(g_pstrComputeWeightsFileName, msa);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
ValidateMuscleIds(msa);
|
||||||
|
|
||||||
|
if (1 == g_uMaxIters || 2 == uSeqCount)
|
||||||
|
{
|
||||||
|
//TextFile fileOut(g_pstrOutFileName, true);
|
||||||
|
//MHackEnd(msa);
|
||||||
|
//msa.ToFile(fileOut);
|
||||||
|
MuscleOutput(msa);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (0 == g_pstrUseTreeFileName)
|
||||||
|
{
|
||||||
|
g_bDiags = g_bDiags2;
|
||||||
|
SetIter(2);
|
||||||
|
|
||||||
|
if (g_bLow)
|
||||||
|
{
|
||||||
|
if (0 != g_uMaxTreeRefineIters)
|
||||||
|
RefineTreeE(msa, v, GuideTree, ProgNodes);
|
||||||
|
}
|
||||||
|
else
|
||||||
|
RefineTree(msa, GuideTree);
|
||||||
|
|
||||||
|
const char *Tree2 = ValueOpt("Tree2");
|
||||||
|
if (0 != Tree2)
|
||||||
|
{
|
||||||
|
TextFile f(Tree2, true);
|
||||||
|
GuideTree.ToFile(f);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
SetSeqWeightMethod(g_SeqWeight2);
|
||||||
|
SetMuscleTree(GuideTree);
|
||||||
|
|
||||||
|
if (g_bAnchors)
|
||||||
|
RefineVert(msa, GuideTree, g_uMaxIters - 2);
|
||||||
|
else
|
||||||
|
RefineHoriz(msa, GuideTree, g_uMaxIters - 2, false, false);
|
||||||
|
|
||||||
|
#if 0
|
||||||
|
// Refining by subfamilies is disabled as it didn't give better
|
||||||
|
// results. I tried doing this before and after RefineHoriz.
|
||||||
|
// Should get back to this as it seems like this should work.
|
||||||
|
RefineSubfams(msa, GuideTree, g_uMaxIters - 2);
|
||||||
|
#endif
|
||||||
|
|
||||||
|
ValidateMuscleIds(msa);
|
||||||
|
ValidateMuscleIds(GuideTree);
|
||||||
|
|
||||||
|
//TextFile fileOut(g_pstrOutFileName, true);
|
||||||
|
//MHackEnd(msa);
|
||||||
|
//msa.ToFile(fileOut);
|
||||||
|
MuscleOutput(msa);
|
||||||
|
}
|
||||||
|
|
||||||
|
void Run()
|
||||||
|
{
|
||||||
|
SetStartTime();
|
||||||
|
Log("Started %s\n", GetTimeAsStr());
|
||||||
|
for (int i = 0; i < g_argc; ++i)
|
||||||
|
Log("%s ", g_argv[i]);
|
||||||
|
Log("\n");
|
||||||
|
|
||||||
|
#if TIMING
|
||||||
|
TICKS t1 = GetClockTicks();
|
||||||
|
#endif
|
||||||
|
if (g_bRefine)
|
||||||
|
Refine();
|
||||||
|
else if (g_bRefineW)
|
||||||
|
{
|
||||||
|
extern void DoRefineW();
|
||||||
|
DoRefineW();
|
||||||
|
}
|
||||||
|
else if (g_bProfDB)
|
||||||
|
ProfDB();
|
||||||
|
else if (g_bSW)
|
||||||
|
Local();
|
||||||
|
else if (0 != g_pstrSPFileName)
|
||||||
|
DoSP();
|
||||||
|
else if (g_bProfile)
|
||||||
|
Profile();
|
||||||
|
else if (g_bPPScore)
|
||||||
|
PPScore();
|
||||||
|
else if (g_bPAS)
|
||||||
|
ProgAlignSubFams();
|
||||||
|
else if (g_bMakeTree)
|
||||||
|
{
|
||||||
|
extern void DoMakeTree();
|
||||||
|
DoMakeTree();
|
||||||
|
}
|
||||||
|
else
|
||||||
|
DoMuscle();
|
||||||
|
|
||||||
|
#if TIMING
|
||||||
|
extern TICKS g_ticksDP;
|
||||||
|
extern TICKS g_ticksObjScore;
|
||||||
|
TICKS t2 = GetClockTicks();
|
||||||
|
TICKS TotalTicks = t2 - t1;
|
||||||
|
TICKS ticksOther = TotalTicks - g_ticksDP - g_ticksObjScore;
|
||||||
|
double dSecs = TicksToSecs(TotalTicks);
|
||||||
|
double PctDP = (double) g_ticksDP*100.0/(double) TotalTicks;
|
||||||
|
double PctOS = (double) g_ticksObjScore*100.0/(double) TotalTicks;
|
||||||
|
double PctOther = (double) ticksOther*100.0/(double) TotalTicks;
|
||||||
|
Log(" Ticks Secs Pct\n");
|
||||||
|
Log(" ============ ======= =====\n");
|
||||||
|
Log("DP %12ld %7.2f %5.1f%%\n",
|
||||||
|
(long) g_ticksDP, TicksToSecs(g_ticksDP), PctDP);
|
||||||
|
Log("OS %12ld %7.2f %5.1f%%\n",
|
||||||
|
(long) g_ticksObjScore, TicksToSecs(g_ticksObjScore), PctOS);
|
||||||
|
Log("Other %12ld %7.2f %5.1f%%\n",
|
||||||
|
(long) ticksOther, TicksToSecs(ticksOther), PctOther);
|
||||||
|
Log("Total %12ld %7.2f 100.0%%\n", (long) TotalTicks, dSecs);
|
||||||
|
#endif
|
||||||
|
|
||||||
|
ListDiagSavings();
|
||||||
|
Log("Finished %s\n", GetTimeAsStr());
|
||||||
|
}
|
60
src/muscle/muscle3.8.31/src/dosp.cpp
Normal file
60
src/muscle/muscle3.8.31/src/dosp.cpp
Normal file
@ -0,0 +1,60 @@
|
|||||||
|
#include "muscle.h"
|
||||||
|
#include "textfile.h"
|
||||||
|
#include "msa.h"
|
||||||
|
#include "objscore.h"
|
||||||
|
#include "tree.h"
|
||||||
|
#include "profile.h"
|
||||||
|
|
||||||
|
void DoSP()
|
||||||
|
{
|
||||||
|
TextFile f(g_pstrSPFileName);
|
||||||
|
|
||||||
|
MSA a;
|
||||||
|
a.FromFile(f);
|
||||||
|
|
||||||
|
ALPHA Alpha = ALPHA_Undefined;
|
||||||
|
switch (g_SeqType)
|
||||||
|
{
|
||||||
|
case SEQTYPE_Auto:
|
||||||
|
Alpha = a.GuessAlpha();
|
||||||
|
break;
|
||||||
|
|
||||||
|
case SEQTYPE_Protein:
|
||||||
|
Alpha = ALPHA_Amino;
|
||||||
|
break;
|
||||||
|
|
||||||
|
case SEQTYPE_DNA:
|
||||||
|
Alpha = ALPHA_DNA;
|
||||||
|
break;
|
||||||
|
|
||||||
|
case SEQTYPE_RNA:
|
||||||
|
Alpha = ALPHA_RNA;
|
||||||
|
break;
|
||||||
|
|
||||||
|
default:
|
||||||
|
Quit("Invalid SeqType");
|
||||||
|
}
|
||||||
|
SetAlpha(Alpha);
|
||||||
|
a.FixAlpha();
|
||||||
|
|
||||||
|
SetPPScore();
|
||||||
|
|
||||||
|
const unsigned uSeqCount = a.GetSeqCount();
|
||||||
|
if (0 == uSeqCount)
|
||||||
|
Quit("No sequences in input file %s", g_pstrSPFileName);
|
||||||
|
|
||||||
|
MSA::SetIdCount(uSeqCount);
|
||||||
|
for (unsigned uSeqIndex = 0; uSeqIndex < uSeqCount; ++uSeqIndex)
|
||||||
|
a.SetSeqId(uSeqIndex, uSeqIndex);
|
||||||
|
|
||||||
|
SetSeqWeightMethod(g_SeqWeight1);
|
||||||
|
Tree tree;
|
||||||
|
TreeFromMSA(a, tree, g_Cluster2, g_Distance2, g_Root2);
|
||||||
|
SetMuscleTree(tree);
|
||||||
|
SetMSAWeightsMuscle((MSA &) a);
|
||||||
|
|
||||||
|
SCORE SP = ObjScoreSP(a);
|
||||||
|
|
||||||
|
Log("File=%s;SP=%.4g\n", g_pstrSPFileName, SP);
|
||||||
|
fprintf(stderr, "File=%s;SP=%.4g\n", g_pstrSPFileName, SP);
|
||||||
|
}
|
73
src/muscle/muscle3.8.31/src/dpregionlist.h
Normal file
73
src/muscle/muscle3.8.31/src/dpregionlist.h
Normal file
@ -0,0 +1,73 @@
|
|||||||
|
#ifndef DPRegionList_h
|
||||||
|
#define DPRegionList_h
|
||||||
|
|
||||||
|
#include "diaglist.h"
|
||||||
|
|
||||||
|
enum DPREGIONTYPE
|
||||||
|
{
|
||||||
|
DPREGIONTYPE_Unknown,
|
||||||
|
DPREGIONTYPE_Diag,
|
||||||
|
DPREGIONTYPE_Rect
|
||||||
|
};
|
||||||
|
|
||||||
|
struct DPRegion
|
||||||
|
{
|
||||||
|
DPREGIONTYPE m_Type;
|
||||||
|
union
|
||||||
|
{
|
||||||
|
Diag m_Diag;
|
||||||
|
Rect m_Rect;
|
||||||
|
};
|
||||||
|
};
|
||||||
|
|
||||||
|
const unsigned MAX_DPREGIONS = 1024;
|
||||||
|
|
||||||
|
class DPRegionList
|
||||||
|
{
|
||||||
|
public:
|
||||||
|
DPRegionList()
|
||||||
|
{
|
||||||
|
m_uCount = 0;
|
||||||
|
}
|
||||||
|
~DPRegionList()
|
||||||
|
{
|
||||||
|
Free();
|
||||||
|
}
|
||||||
|
|
||||||
|
public:
|
||||||
|
// Creation
|
||||||
|
void Clear()
|
||||||
|
{
|
||||||
|
Free();
|
||||||
|
}
|
||||||
|
void Add(const DPRegion &r);
|
||||||
|
|
||||||
|
// Accessors
|
||||||
|
unsigned GetCount() const
|
||||||
|
{
|
||||||
|
return m_uCount;
|
||||||
|
}
|
||||||
|
const DPRegion &Get(unsigned uIndex) const
|
||||||
|
{
|
||||||
|
assert(uIndex < m_uCount);
|
||||||
|
return m_DPRegions[uIndex];
|
||||||
|
}
|
||||||
|
|
||||||
|
// Diagnostics
|
||||||
|
void LogMe() const;
|
||||||
|
|
||||||
|
private:
|
||||||
|
void Free()
|
||||||
|
{
|
||||||
|
m_uCount = 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
private:
|
||||||
|
unsigned m_uCount;
|
||||||
|
DPRegion m_DPRegions[MAX_DPREGIONS];
|
||||||
|
};
|
||||||
|
|
||||||
|
void DiagListToDPRegionList(const DiagList &DL, DPRegionList &RL,
|
||||||
|
unsigned uLengthA, unsigned uLengthB);
|
||||||
|
|
||||||
|
#endif // DPRegionList_h
|
108
src/muscle/muscle3.8.31/src/dpreglist.cpp
Normal file
108
src/muscle/muscle3.8.31/src/dpreglist.cpp
Normal file
@ -0,0 +1,108 @@
|
|||||||
|
#include "muscle.h"
|
||||||
|
#include "dpreglist.h"
|
||||||
|
|
||||||
|
unsigned DPRegionList::GetDPArea() const
|
||||||
|
{
|
||||||
|
unsigned uArea = 0;
|
||||||
|
for (unsigned i = 0; i < m_uCount; ++i)
|
||||||
|
{
|
||||||
|
const DPRegion &r = m_DPRegions[i];
|
||||||
|
if (DPREGIONTYPE_Rect == r.m_Type)
|
||||||
|
uArea += r.m_Rect.m_uLengthA*r.m_Rect.m_uLengthB;
|
||||||
|
}
|
||||||
|
return uArea;
|
||||||
|
}
|
||||||
|
|
||||||
|
void DPRegionList::Add(const DPRegion &r)
|
||||||
|
{
|
||||||
|
if (m_uCount == MAX_DPREGIONS)
|
||||||
|
Quit("DPRegionList::Add, overflow %d", m_uCount);
|
||||||
|
m_DPRegions[m_uCount] = r;
|
||||||
|
++m_uCount;
|
||||||
|
}
|
||||||
|
|
||||||
|
void DPRegionList::LogMe() const
|
||||||
|
{
|
||||||
|
Log("DPRegionList::LogMe, count=%u\n", m_uCount);
|
||||||
|
Log("Region Type StartA StartB EndA EndB\n");
|
||||||
|
Log("------ ---- ------ ------ ---- ----\n");
|
||||||
|
for (unsigned i = 0; i < m_uCount; ++i)
|
||||||
|
{
|
||||||
|
const DPRegion &r = m_DPRegions[i];
|
||||||
|
Log("%6u ", i);
|
||||||
|
if (DPREGIONTYPE_Diag == r.m_Type)
|
||||||
|
Log("Diag %6u %6u %6u %6u\n",
|
||||||
|
r.m_Diag.m_uStartPosA,
|
||||||
|
r.m_Diag.m_uStartPosB,
|
||||||
|
r.m_Diag.m_uStartPosA + r.m_Diag.m_uLength - 1,
|
||||||
|
r.m_Diag.m_uStartPosB + r.m_Diag.m_uLength - 1);
|
||||||
|
else if (DPREGIONTYPE_Rect == r.m_Type)
|
||||||
|
Log("Rect %6u %6u %6u %6u\n",
|
||||||
|
r.m_Rect.m_uStartPosA,
|
||||||
|
r.m_Rect.m_uStartPosB,
|
||||||
|
r.m_Rect.m_uStartPosA + r.m_Rect.m_uLengthA - 1,
|
||||||
|
r.m_Rect.m_uStartPosB + r.m_Rect.m_uLengthB - 1);
|
||||||
|
else
|
||||||
|
Log(" *** ERROR *** Type=%u\n", r.m_Type);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
void DiagListToDPRegionList(const DiagList &DL, DPRegionList &RL,
|
||||||
|
unsigned uLengthA, unsigned uLengthB)
|
||||||
|
{
|
||||||
|
if (g_uDiagMargin > g_uMinDiagLength/2)
|
||||||
|
Quit("Invalid parameters, diagmargin=%d must be <= 2*diaglength=%d",
|
||||||
|
g_uDiagMargin, g_uMinDiagLength);
|
||||||
|
|
||||||
|
unsigned uStartPosA = 0;
|
||||||
|
unsigned uStartPosB = 0;
|
||||||
|
const unsigned uDiagCount = DL.GetCount();
|
||||||
|
DPRegion r;
|
||||||
|
for (unsigned uDiagIndex = 0; uDiagIndex < uDiagCount; ++uDiagIndex)
|
||||||
|
{
|
||||||
|
const Diag &d = DL.Get(uDiagIndex);
|
||||||
|
assert(d.m_uLength >= g_uMinDiagLength);
|
||||||
|
const unsigned uStartVertexA = d.m_uStartPosA + g_uDiagMargin - 1;
|
||||||
|
const unsigned uStartVertexB = d.m_uStartPosB + g_uDiagMargin - 1;
|
||||||
|
const unsigned uEndVertexA = d.m_uStartPosA + d.m_uLength - g_uDiagMargin;
|
||||||
|
const unsigned uEndVertexB = d.m_uStartPosB + d.m_uLength - g_uDiagMargin;
|
||||||
|
|
||||||
|
r.m_Type = DPREGIONTYPE_Rect;
|
||||||
|
r.m_Rect.m_uStartPosA = uStartPosA;
|
||||||
|
r.m_Rect.m_uStartPosB = uStartPosB;
|
||||||
|
|
||||||
|
assert(uStartVertexA + 1 >= uStartPosA);
|
||||||
|
assert(uStartVertexB + 1 >= uStartPosB);
|
||||||
|
r.m_Rect.m_uLengthA = uStartVertexA + 1 - uStartPosA;
|
||||||
|
r.m_Rect.m_uLengthB = uStartVertexB + 1 - uStartPosB;
|
||||||
|
RL.Add(r);
|
||||||
|
|
||||||
|
if (uEndVertexA > uStartVertexA + 1)
|
||||||
|
{
|
||||||
|
const unsigned uDiagLengthMinusCaps = uEndVertexA - uStartVertexA - 1;
|
||||||
|
|
||||||
|
r.m_Type = DPREGIONTYPE_Diag;
|
||||||
|
r.m_Diag.m_uStartPosA = uStartVertexA + 1;
|
||||||
|
r.m_Diag.m_uStartPosB = uStartVertexB + 1;
|
||||||
|
assert(uEndVertexA - uStartVertexA == uEndVertexB - uStartVertexB);
|
||||||
|
r.m_Diag.m_uLength = uEndVertexA - uStartVertexA - 1;
|
||||||
|
RL.Add(r);
|
||||||
|
}
|
||||||
|
|
||||||
|
uStartPosA = uEndVertexA;
|
||||||
|
uStartPosB = uEndVertexB;
|
||||||
|
}
|
||||||
|
|
||||||
|
assert((int) uLengthA - (int) uStartPosA >= (int) g_uDiagMargin);
|
||||||
|
assert((int) uLengthB - (int) uStartPosB >= (int) g_uDiagMargin);
|
||||||
|
|
||||||
|
r.m_Type = DPREGIONTYPE_Rect;
|
||||||
|
r.m_Rect.m_uStartPosA = uStartPosA;
|
||||||
|
r.m_Rect.m_uStartPosB = uStartPosB;
|
||||||
|
|
||||||
|
assert(uLengthA >= uStartPosA);
|
||||||
|
assert(uLengthB >= uStartPosB);
|
||||||
|
r.m_Rect.m_uLengthA = uLengthA - uStartPosA;
|
||||||
|
r.m_Rect.m_uLengthB = uLengthB - uStartPosB;
|
||||||
|
RL.Add(r);
|
||||||
|
}
|
76
src/muscle/muscle3.8.31/src/dpreglist.h
Normal file
76
src/muscle/muscle3.8.31/src/dpreglist.h
Normal file
@ -0,0 +1,76 @@
|
|||||||
|
#ifndef dpreglist_h
|
||||||
|
#define dpreglist_h
|
||||||
|
|
||||||
|
#include "diaglist.h"
|
||||||
|
|
||||||
|
enum DPREGIONTYPE
|
||||||
|
{
|
||||||
|
DPREGIONTYPE_Unknown,
|
||||||
|
DPREGIONTYPE_Diag,
|
||||||
|
DPREGIONTYPE_Rect
|
||||||
|
};
|
||||||
|
|
||||||
|
struct DPRegion
|
||||||
|
{
|
||||||
|
DPREGIONTYPE m_Type;
|
||||||
|
union
|
||||||
|
{
|
||||||
|
Diag m_Diag;
|
||||||
|
Rect m_Rect;
|
||||||
|
};
|
||||||
|
};
|
||||||
|
|
||||||
|
const unsigned MAX_DPREGIONS = 1024;
|
||||||
|
|
||||||
|
class DPRegionList
|
||||||
|
{
|
||||||
|
public:
|
||||||
|
DPRegionList()
|
||||||
|
{
|
||||||
|
m_uCount = 0;
|
||||||
|
}
|
||||||
|
~DPRegionList()
|
||||||
|
{
|
||||||
|
Free();
|
||||||
|
}
|
||||||
|
|
||||||
|
public:
|
||||||
|
// Creation
|
||||||
|
void Clear()
|
||||||
|
{
|
||||||
|
Free();
|
||||||
|
}
|
||||||
|
void Add(const DPRegion &r);
|
||||||
|
|
||||||
|
// Accessors
|
||||||
|
unsigned GetCount() const
|
||||||
|
{
|
||||||
|
return m_uCount;
|
||||||
|
}
|
||||||
|
|
||||||
|
const DPRegion &Get(unsigned uIndex) const
|
||||||
|
{
|
||||||
|
assert(uIndex < m_uCount);
|
||||||
|
return m_DPRegions[uIndex];
|
||||||
|
}
|
||||||
|
|
||||||
|
unsigned GetDPArea() const;
|
||||||
|
|
||||||
|
// Diagnostics
|
||||||
|
void LogMe() const;
|
||||||
|
|
||||||
|
private:
|
||||||
|
void Free()
|
||||||
|
{
|
||||||
|
m_uCount = 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
private:
|
||||||
|
unsigned m_uCount;
|
||||||
|
DPRegion m_DPRegions[MAX_DPREGIONS];
|
||||||
|
};
|
||||||
|
|
||||||
|
void DiagListToDPRegionList(const DiagList &DL, DPRegionList &RL,
|
||||||
|
unsigned uLengthA, unsigned uLengthB);
|
||||||
|
|
||||||
|
#endif // dpreglist_h
|
41
src/muscle/muscle3.8.31/src/drawtree.cpp
Normal file
41
src/muscle/muscle3.8.31/src/drawtree.cpp
Normal file
@ -0,0 +1,41 @@
|
|||||||
|
#include "muscle.h"
|
||||||
|
#include "tree.h"
|
||||||
|
|
||||||
|
/***
|
||||||
|
Simple tree drawing algorithm.
|
||||||
|
|
||||||
|
y coordinate of node is index in depth-first traversal.
|
||||||
|
x coordinate is distance from root.
|
||||||
|
***/
|
||||||
|
|
||||||
|
static unsigned DistFromRoot(const Tree &tree, unsigned uNodeIndex)
|
||||||
|
{
|
||||||
|
const unsigned uRoot = tree.GetRootNodeIndex();
|
||||||
|
unsigned uDist = 0;
|
||||||
|
while (uNodeIndex != uRoot)
|
||||||
|
{
|
||||||
|
++uDist;
|
||||||
|
uNodeIndex = tree.GetParent(uNodeIndex);
|
||||||
|
}
|
||||||
|
return uDist;
|
||||||
|
}
|
||||||
|
|
||||||
|
static void DrawNode(const Tree &tree, unsigned uNodeIndex)
|
||||||
|
{
|
||||||
|
if (!tree.IsLeaf(uNodeIndex))
|
||||||
|
DrawNode(tree, tree.GetLeft(uNodeIndex));
|
||||||
|
|
||||||
|
unsigned uDist = DistFromRoot(tree, uNodeIndex);
|
||||||
|
for (unsigned i = 0; i < 5*uDist; ++i)
|
||||||
|
Log(" ");
|
||||||
|
Log("%d\n", uNodeIndex);
|
||||||
|
|
||||||
|
if (!tree.IsLeaf(uNodeIndex))
|
||||||
|
DrawNode(tree, tree.GetRight(uNodeIndex));
|
||||||
|
}
|
||||||
|
|
||||||
|
void DrawTree(const Tree &tree)
|
||||||
|
{
|
||||||
|
unsigned uRoot = tree.GetRootNodeIndex();
|
||||||
|
DrawNode(tree, uRoot);
|
||||||
|
}
|
88
src/muscle/muscle3.8.31/src/edgelist.cpp
Normal file
88
src/muscle/muscle3.8.31/src/edgelist.cpp
Normal file
@ -0,0 +1,88 @@
|
|||||||
|
#include "muscle.h"
|
||||||
|
#include "edgelist.h"
|
||||||
|
|
||||||
|
EdgeList::EdgeList()
|
||||||
|
{
|
||||||
|
m_uNode1 = 0;
|
||||||
|
m_uNode2 = 0;
|
||||||
|
m_uCount = 0;
|
||||||
|
m_uCacheSize = 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
EdgeList::~EdgeList()
|
||||||
|
{
|
||||||
|
Clear();
|
||||||
|
}
|
||||||
|
|
||||||
|
void EdgeList::Clear()
|
||||||
|
{
|
||||||
|
delete[] m_uNode1;
|
||||||
|
delete[] m_uNode2;
|
||||||
|
m_uNode1 = 0;
|
||||||
|
m_uNode2 = 0;
|
||||||
|
m_uCount = 0;
|
||||||
|
m_uCacheSize = 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
void EdgeList::Add(unsigned uNode1, unsigned uNode2)
|
||||||
|
{
|
||||||
|
if (m_uCount <= m_uCacheSize)
|
||||||
|
Expand();
|
||||||
|
m_uNode1[m_uCount] = uNode1;
|
||||||
|
m_uNode2[m_uCount] = uNode2;
|
||||||
|
++m_uCount;
|
||||||
|
}
|
||||||
|
|
||||||
|
unsigned EdgeList::GetCount() const
|
||||||
|
{
|
||||||
|
return m_uCount;
|
||||||
|
}
|
||||||
|
|
||||||
|
void EdgeList::GetEdge(unsigned uIndex, unsigned *ptruNode1, unsigned *ptruNode2) const
|
||||||
|
{
|
||||||
|
if (uIndex > m_uCount)
|
||||||
|
Quit("EdgeList::GetEdge(%u) count=%u", uIndex, m_uCount);
|
||||||
|
*ptruNode1 = m_uNode1[uIndex];
|
||||||
|
*ptruNode2 = m_uNode2[uIndex];
|
||||||
|
}
|
||||||
|
|
||||||
|
void EdgeList::Copy(const EdgeList &rhs)
|
||||||
|
{
|
||||||
|
Clear();
|
||||||
|
const unsigned uCount = rhs.GetCount();
|
||||||
|
for (unsigned n = 0; n < uCount; ++n)
|
||||||
|
{
|
||||||
|
unsigned uNode1;
|
||||||
|
unsigned uNode2;
|
||||||
|
rhs.GetEdge(n, &uNode1, &uNode2);
|
||||||
|
Add(uNode1, uNode2);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
void EdgeList::Expand()
|
||||||
|
{
|
||||||
|
unsigned uNewCacheSize = m_uCacheSize + 512;
|
||||||
|
unsigned *NewNode1 = new unsigned[uNewCacheSize];
|
||||||
|
unsigned *NewNode2 = new unsigned[uNewCacheSize];
|
||||||
|
if (m_uCount > 0)
|
||||||
|
{
|
||||||
|
memcpy(NewNode1, m_uNode1, m_uCount*sizeof(unsigned));
|
||||||
|
memcpy(NewNode2, m_uNode2, m_uCount*sizeof(unsigned));
|
||||||
|
}
|
||||||
|
delete[] m_uNode1;
|
||||||
|
delete[] m_uNode2;
|
||||||
|
m_uNode1 = NewNode1;
|
||||||
|
m_uNode2 = NewNode2;
|
||||||
|
m_uCacheSize = uNewCacheSize;
|
||||||
|
}
|
||||||
|
|
||||||
|
void EdgeList::LogMe() const
|
||||||
|
{
|
||||||
|
for (unsigned n = 0; n < m_uCount; ++n)
|
||||||
|
{
|
||||||
|
if (n > 0)
|
||||||
|
Log(" ");
|
||||||
|
Log("%u->%u", m_uNode1[n], m_uNode2[n]);
|
||||||
|
}
|
||||||
|
Log("\n");
|
||||||
|
}
|
28
src/muscle/muscle3.8.31/src/edgelist.h
Normal file
28
src/muscle/muscle3.8.31/src/edgelist.h
Normal file
@ -0,0 +1,28 @@
|
|||||||
|
#ifndef EdgeList_h
|
||||||
|
#define EdgeList_h
|
||||||
|
|
||||||
|
class EdgeList
|
||||||
|
{
|
||||||
|
public:
|
||||||
|
EdgeList();
|
||||||
|
virtual ~EdgeList();
|
||||||
|
|
||||||
|
public:
|
||||||
|
void Clear();
|
||||||
|
void Add(unsigned uNode1, unsigned uNode2);
|
||||||
|
unsigned GetCount() const;
|
||||||
|
void GetEdge(unsigned uIndex, unsigned *ptruNode1, unsigned *ptruNode2) const;
|
||||||
|
void Copy(const EdgeList &rhs);
|
||||||
|
void LogMe() const;
|
||||||
|
|
||||||
|
private:
|
||||||
|
void Expand();
|
||||||
|
|
||||||
|
private:
|
||||||
|
unsigned m_uCount;
|
||||||
|
unsigned m_uCacheSize;
|
||||||
|
unsigned *m_uNode1;
|
||||||
|
unsigned *m_uNode2;
|
||||||
|
};
|
||||||
|
|
||||||
|
#endif // EdgeList_h
|
8
src/muscle/muscle3.8.31/src/enumopts.cpp
Normal file
8
src/muscle/muscle3.8.31/src/enumopts.cpp
Normal file
@ -0,0 +1,8 @@
|
|||||||
|
#include "muscle.h"
|
||||||
|
#include "enumopts.h"
|
||||||
|
|
||||||
|
#define s(t) EnumOpt t##_Opts[] = {
|
||||||
|
#define c(t, x) #x, t##_##x,
|
||||||
|
#define e(t) 0, 0 };
|
||||||
|
|
||||||
|
#include "enums.h"
|
16
src/muscle/muscle3.8.31/src/enumopts.h
Normal file
16
src/muscle/muscle3.8.31/src/enumopts.h
Normal file
@ -0,0 +1,16 @@
|
|||||||
|
#ifndef enumopts_h
|
||||||
|
#define enumopts_h
|
||||||
|
|
||||||
|
struct EnumOpt
|
||||||
|
{
|
||||||
|
const char *pstrOpt;
|
||||||
|
int iValue;
|
||||||
|
};
|
||||||
|
|
||||||
|
#define s(t) extern EnumOpt t##_Opts[];
|
||||||
|
#define c(t, x) /* empty */
|
||||||
|
#define e(t) /* empty */
|
||||||
|
#include "enums.h"
|
||||||
|
|
||||||
|
|
||||||
|
#endif // enumopts_h
|
98
src/muscle/muscle3.8.31/src/enums.h
Normal file
98
src/muscle/muscle3.8.31/src/enums.h
Normal file
@ -0,0 +1,98 @@
|
|||||||
|
// enums.h
|
||||||
|
// Define enum types.
|
||||||
|
// Exploit macro hacks to avoid lots of repetetive typing.
|
||||||
|
// Generally I am opposed to macro hacks because of the
|
||||||
|
// highly obscure code that results, but in this case it
|
||||||
|
// makes maintenance much easier and less error-prone.
|
||||||
|
// The idea is that this file can be included in different
|
||||||
|
// places with different definitions of s (Start), c (Case)
|
||||||
|
// and e (End). See types.h.
|
||||||
|
|
||||||
|
s(ALPHA)
|
||||||
|
c(ALPHA, Amino)
|
||||||
|
c(ALPHA, DNA)
|
||||||
|
c(ALPHA, RNA)
|
||||||
|
e(ALPHA)
|
||||||
|
|
||||||
|
s(SEQTYPE)
|
||||||
|
c(SEQTYPE, Protein)
|
||||||
|
c(SEQTYPE, DNA)
|
||||||
|
c(SEQTYPE, RNA)
|
||||||
|
c(SEQTYPE, Auto)
|
||||||
|
e(SEQTYPE)
|
||||||
|
|
||||||
|
s(ROOT)
|
||||||
|
c(ROOT, Pseudo)
|
||||||
|
c(ROOT, MidLongestSpan)
|
||||||
|
c(ROOT, MinAvgLeafDist)
|
||||||
|
e(ROOT)
|
||||||
|
|
||||||
|
s(CLUSTER)
|
||||||
|
c(CLUSTER, UPGMA)
|
||||||
|
c(CLUSTER, UPGMAMax)
|
||||||
|
c(CLUSTER, UPGMAMin)
|
||||||
|
c(CLUSTER, UPGMB)
|
||||||
|
c(CLUSTER, NeighborJoining)
|
||||||
|
e(CLUSTER)
|
||||||
|
|
||||||
|
s(JOIN)
|
||||||
|
c(JOIN, NearestNeighbor)
|
||||||
|
c(JOIN, NeighborJoining)
|
||||||
|
e(JOIN)
|
||||||
|
|
||||||
|
s(LINKAGE)
|
||||||
|
c(LINKAGE, Min)
|
||||||
|
c(LINKAGE, Avg)
|
||||||
|
c(LINKAGE, Max)
|
||||||
|
c(LINKAGE, NeighborJoining)
|
||||||
|
c(LINKAGE, Biased)
|
||||||
|
e(LINKAGE)
|
||||||
|
|
||||||
|
s(DISTANCE)
|
||||||
|
c(DISTANCE, Kmer6_6)
|
||||||
|
c(DISTANCE, Kmer20_3)
|
||||||
|
c(DISTANCE, Kmer20_4)
|
||||||
|
c(DISTANCE, Kbit20_3)
|
||||||
|
c(DISTANCE, Kmer4_6)
|
||||||
|
c(DISTANCE, PctIdKimura)
|
||||||
|
c(DISTANCE, PctIdLog)
|
||||||
|
c(DISTANCE, PWKimura)
|
||||||
|
c(DISTANCE, PWScoreDist)
|
||||||
|
c(DISTANCE, ScoreDist)
|
||||||
|
c(DISTANCE, Edit)
|
||||||
|
e(DISTANCE)
|
||||||
|
|
||||||
|
s(PPSCORE)
|
||||||
|
c(PPSCORE, LE)
|
||||||
|
c(PPSCORE, SP)
|
||||||
|
c(PPSCORE, SV)
|
||||||
|
c(PPSCORE, SPN)
|
||||||
|
e(PPSCORE)
|
||||||
|
|
||||||
|
s(SEQWEIGHT)
|
||||||
|
c(SEQWEIGHT, None)
|
||||||
|
c(SEQWEIGHT, Henikoff)
|
||||||
|
c(SEQWEIGHT, HenikoffPB)
|
||||||
|
c(SEQWEIGHT, GSC)
|
||||||
|
c(SEQWEIGHT, ClustalW)
|
||||||
|
c(SEQWEIGHT, ThreeWay)
|
||||||
|
e(SEQWEIGHT)
|
||||||
|
|
||||||
|
s(OBJSCORE)
|
||||||
|
c(OBJSCORE, SP) // Sum of Pairs of sequences
|
||||||
|
c(OBJSCORE, DP) // Dynamic Programming score
|
||||||
|
c(OBJSCORE, XP) // Cross Pairs = sum of pairs between two MSAs
|
||||||
|
c(OBJSCORE, PS) // sum of Prof-Seq score for all seqs in MSA
|
||||||
|
c(OBJSCORE, SPF) // sum of pairs, fast approximation
|
||||||
|
c(OBJSCORE, SPM) // sp if <= 100 seqs, spf otherwise
|
||||||
|
e(OBJSCORE)
|
||||||
|
|
||||||
|
s(TERMGAPS)
|
||||||
|
c(TERMGAPS, Full)
|
||||||
|
c(TERMGAPS, Half)
|
||||||
|
c(TERMGAPS, Ext)
|
||||||
|
e(TERMGAPS)
|
||||||
|
|
||||||
|
#undef s
|
||||||
|
#undef c
|
||||||
|
#undef e
|
16
src/muscle/muscle3.8.31/src/enumtostr.cpp
Normal file
16
src/muscle/muscle3.8.31/src/enumtostr.cpp
Normal file
@ -0,0 +1,16 @@
|
|||||||
|
#include "muscle.h"
|
||||||
|
#include <stdio.h>
|
||||||
|
|
||||||
|
static char szMsg[64];
|
||||||
|
|
||||||
|
// Define XXXToStr(XXX x) functions for each enum type XXX.
|
||||||
|
#define s(t) const char *t##ToStr(t x) { switch (x) { case t##_Undefined: return "Undefined";
|
||||||
|
#define c(t, x) case t##_##x: return #x;
|
||||||
|
#define e(t) } sprintf(szMsg, #t "_%d", x); return szMsg; }
|
||||||
|
#include "enums.h"
|
||||||
|
|
||||||
|
// Define StrToXXX(const char *Str) functions for each enum type XXX.
|
||||||
|
#define s(t) t StrTo##t(const char *Str) { if (0) ;
|
||||||
|
#define c(t, x) else if (0 == stricmp(#x, Str)) return t##_##x;
|
||||||
|
#define e(t) Quit("Invalid value %s for type %s", Str, #t); return t##_Undefined; }
|
||||||
|
#include "enums.h"
|
689
src/muscle/muscle3.8.31/src/estring.cpp
Normal file
689
src/muscle/muscle3.8.31/src/estring.cpp
Normal file
@ -0,0 +1,689 @@
|
|||||||
|
#include "muscle.h"
|
||||||
|
#include "pwpath.h"
|
||||||
|
#include "estring.h"
|
||||||
|
#include "seq.h"
|
||||||
|
#include "msa.h"
|
||||||
|
|
||||||
|
/***
|
||||||
|
An "estring" is an edit string that operates on a sequence.
|
||||||
|
An estring is represented as a vector of integers.
|
||||||
|
It is interpreted in order of increasing suffix.
|
||||||
|
A positive value n means copy n letters.
|
||||||
|
A negative value -n means insert n indels.
|
||||||
|
Zero marks the end of the vector.
|
||||||
|
Consecutive entries must have opposite sign, i.e. the
|
||||||
|
shortest possible representation must be used.
|
||||||
|
|
||||||
|
A "tpair" is a traceback path for a pairwise alignment
|
||||||
|
represented as two estrings, one for each sequence.
|
||||||
|
***/
|
||||||
|
|
||||||
|
#define c2(c,d) (((unsigned char) c) << 8 | (unsigned char) d)
|
||||||
|
|
||||||
|
unsigned LengthEstring(const short es[])
|
||||||
|
{
|
||||||
|
unsigned i = 0;
|
||||||
|
while (*es++ != 0)
|
||||||
|
++i;
|
||||||
|
return i;
|
||||||
|
}
|
||||||
|
|
||||||
|
short *EstringNewCopy(const short es[])
|
||||||
|
{
|
||||||
|
unsigned n = LengthEstring(es) + 1;
|
||||||
|
short *esNew = new short[n];
|
||||||
|
memcpy(esNew, es, n*sizeof(short));
|
||||||
|
return esNew;
|
||||||
|
}
|
||||||
|
|
||||||
|
void LogEstring(const short es[])
|
||||||
|
{
|
||||||
|
Log("<");
|
||||||
|
for (unsigned i = 0; es[i] != 0; ++i)
|
||||||
|
{
|
||||||
|
if (i > 0)
|
||||||
|
Log(" ");
|
||||||
|
Log("%d", es[i]);
|
||||||
|
}
|
||||||
|
Log(">");
|
||||||
|
}
|
||||||
|
|
||||||
|
static bool EstringsEq(const short es1[], const short es2[])
|
||||||
|
{
|
||||||
|
for (;;)
|
||||||
|
{
|
||||||
|
if (*es1 != *es2)
|
||||||
|
return false;
|
||||||
|
if (0 == *es1)
|
||||||
|
break;
|
||||||
|
++es1;
|
||||||
|
++es2;
|
||||||
|
}
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
static void EstringCounts(const short es[], unsigned *ptruSymbols,
|
||||||
|
unsigned *ptruIndels)
|
||||||
|
{
|
||||||
|
unsigned uSymbols = 0;
|
||||||
|
unsigned uIndels = 0;
|
||||||
|
for (unsigned i = 0; es[i] != 0; ++i)
|
||||||
|
{
|
||||||
|
short n = es[i];
|
||||||
|
if (n > 0)
|
||||||
|
uSymbols += n;
|
||||||
|
else if (n < 0)
|
||||||
|
uIndels += -n;
|
||||||
|
}
|
||||||
|
*ptruSymbols = uSymbols;
|
||||||
|
*ptruIndels = uIndels;
|
||||||
|
}
|
||||||
|
|
||||||
|
static char *EstringOp(const short es[], const char s[])
|
||||||
|
{
|
||||||
|
unsigned uSymbols;
|
||||||
|
unsigned uIndels;
|
||||||
|
EstringCounts(es, &uSymbols, &uIndels);
|
||||||
|
assert((unsigned) strlen(s) == uSymbols);
|
||||||
|
char *sout = new char[uSymbols + uIndels + 1];
|
||||||
|
char *psout = sout;
|
||||||
|
for (;;)
|
||||||
|
{
|
||||||
|
int n = *es++;
|
||||||
|
if (0 == n)
|
||||||
|
break;
|
||||||
|
if (n > 0)
|
||||||
|
for (int i = 0; i < n; ++i)
|
||||||
|
*psout++ = *s++;
|
||||||
|
else
|
||||||
|
for (int i = 0; i < -n; ++i)
|
||||||
|
*psout++ = '-';
|
||||||
|
}
|
||||||
|
assert(0 == *s);
|
||||||
|
*psout = 0;
|
||||||
|
return sout;
|
||||||
|
}
|
||||||
|
|
||||||
|
void EstringOp(const short es[], const Seq &sIn, Seq &sOut)
|
||||||
|
{
|
||||||
|
#if DEBUG
|
||||||
|
unsigned uSymbols;
|
||||||
|
unsigned uIndels;
|
||||||
|
EstringCounts(es, &uSymbols, &uIndels);
|
||||||
|
assert(sIn.Length() == uSymbols);
|
||||||
|
#endif
|
||||||
|
sOut.Clear();
|
||||||
|
sOut.SetName(sIn.GetName());
|
||||||
|
int p = 0;
|
||||||
|
for (;;)
|
||||||
|
{
|
||||||
|
int n = *es++;
|
||||||
|
if (0 == n)
|
||||||
|
break;
|
||||||
|
if (n > 0)
|
||||||
|
for (int i = 0; i < n; ++i)
|
||||||
|
{
|
||||||
|
char c = sIn[p++];
|
||||||
|
sOut.push_back(c);
|
||||||
|
}
|
||||||
|
else
|
||||||
|
for (int i = 0; i < -n; ++i)
|
||||||
|
sOut.push_back('-');
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
unsigned EstringOp(const short es[], const Seq &sIn, MSA &a)
|
||||||
|
{
|
||||||
|
unsigned uSymbols;
|
||||||
|
unsigned uIndels;
|
||||||
|
EstringCounts(es, &uSymbols, &uIndels);
|
||||||
|
assert(sIn.Length() == uSymbols);
|
||||||
|
|
||||||
|
unsigned uColCount = uSymbols + uIndels;
|
||||||
|
|
||||||
|
a.Clear();
|
||||||
|
a.SetSize(1, uColCount);
|
||||||
|
|
||||||
|
a.SetSeqName(0, sIn.GetName());
|
||||||
|
a.SetSeqId(0, sIn.GetId());
|
||||||
|
|
||||||
|
unsigned p = 0;
|
||||||
|
unsigned uColIndex = 0;
|
||||||
|
for (;;)
|
||||||
|
{
|
||||||
|
int n = *es++;
|
||||||
|
if (0 == n)
|
||||||
|
break;
|
||||||
|
if (n > 0)
|
||||||
|
for (int i = 0; i < n; ++i)
|
||||||
|
{
|
||||||
|
char c = sIn[p++];
|
||||||
|
a.SetChar(0, uColIndex++, c);
|
||||||
|
}
|
||||||
|
else
|
||||||
|
for (int i = 0; i < -n; ++i)
|
||||||
|
a.SetChar(0, uColIndex++, '-');
|
||||||
|
}
|
||||||
|
assert(uColIndex == uColCount);
|
||||||
|
return uColCount;
|
||||||
|
}
|
||||||
|
|
||||||
|
void PathToEstrings(const PWPath &Path, short **ptresA, short **ptresB)
|
||||||
|
{
|
||||||
|
// First pass to determine size of estrings esA and esB
|
||||||
|
const unsigned uEdgeCount = Path.GetEdgeCount();
|
||||||
|
if (0 == uEdgeCount)
|
||||||
|
{
|
||||||
|
short *esA = new short[1];
|
||||||
|
short *esB = new short[1];
|
||||||
|
esA[0] = 0;
|
||||||
|
esB[0] = 0;
|
||||||
|
*ptresA = esA;
|
||||||
|
*ptresB = esB;
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
unsigned iLengthA = 1;
|
||||||
|
unsigned iLengthB = 1;
|
||||||
|
const char cFirstEdgeType = Path.GetEdge(0).cType;
|
||||||
|
char cPrevEdgeType = cFirstEdgeType;
|
||||||
|
for (unsigned uEdgeIndex = 1; uEdgeIndex < uEdgeCount; ++uEdgeIndex)
|
||||||
|
{
|
||||||
|
const PWEdge &Edge = Path.GetEdge(uEdgeIndex);
|
||||||
|
char cEdgeType = Edge.cType;
|
||||||
|
|
||||||
|
switch (c2(cPrevEdgeType, cEdgeType))
|
||||||
|
{
|
||||||
|
case c2('M', 'M'):
|
||||||
|
case c2('D', 'D'):
|
||||||
|
case c2('I', 'I'):
|
||||||
|
break;
|
||||||
|
|
||||||
|
case c2('D', 'M'):
|
||||||
|
case c2('M', 'D'):
|
||||||
|
++iLengthB;
|
||||||
|
break;
|
||||||
|
|
||||||
|
case c2('I', 'M'):
|
||||||
|
case c2('M', 'I'):
|
||||||
|
++iLengthA;
|
||||||
|
break;
|
||||||
|
|
||||||
|
case c2('I', 'D'):
|
||||||
|
case c2('D', 'I'):
|
||||||
|
++iLengthB;
|
||||||
|
++iLengthA;
|
||||||
|
break;
|
||||||
|
|
||||||
|
default:
|
||||||
|
assert(false);
|
||||||
|
}
|
||||||
|
cPrevEdgeType = cEdgeType;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Pass2 for seq A
|
||||||
|
{
|
||||||
|
short *esA = new short[iLengthA+1];
|
||||||
|
unsigned iA = 0;
|
||||||
|
switch (Path.GetEdge(0).cType)
|
||||||
|
{
|
||||||
|
case 'M':
|
||||||
|
case 'D':
|
||||||
|
esA[0] = 1;
|
||||||
|
break;
|
||||||
|
|
||||||
|
case 'I':
|
||||||
|
esA[0] = -1;
|
||||||
|
break;
|
||||||
|
|
||||||
|
default:
|
||||||
|
assert(false);
|
||||||
|
}
|
||||||
|
|
||||||
|
char cPrevEdgeType = cFirstEdgeType;
|
||||||
|
for (unsigned uEdgeIndex = 1; uEdgeIndex < uEdgeCount; ++uEdgeIndex)
|
||||||
|
{
|
||||||
|
const PWEdge &Edge = Path.GetEdge(uEdgeIndex);
|
||||||
|
char cEdgeType = Edge.cType;
|
||||||
|
|
||||||
|
switch (c2(cPrevEdgeType, cEdgeType))
|
||||||
|
{
|
||||||
|
case c2('M', 'M'):
|
||||||
|
case c2('D', 'D'):
|
||||||
|
case c2('D', 'M'):
|
||||||
|
case c2('M', 'D'):
|
||||||
|
++(esA[iA]);
|
||||||
|
break;
|
||||||
|
|
||||||
|
case c2('I', 'D'):
|
||||||
|
case c2('I', 'M'):
|
||||||
|
++iA;
|
||||||
|
esA[iA] = 1;
|
||||||
|
break;
|
||||||
|
|
||||||
|
case c2('M', 'I'):
|
||||||
|
case c2('D', 'I'):
|
||||||
|
++iA;
|
||||||
|
esA[iA] = -1;
|
||||||
|
break;
|
||||||
|
|
||||||
|
case c2('I', 'I'):
|
||||||
|
--(esA[iA]);
|
||||||
|
break;
|
||||||
|
|
||||||
|
default:
|
||||||
|
assert(false);
|
||||||
|
}
|
||||||
|
|
||||||
|
cPrevEdgeType = cEdgeType;
|
||||||
|
}
|
||||||
|
assert(iA == iLengthA - 1);
|
||||||
|
esA[iLengthA] = 0;
|
||||||
|
*ptresA = esA;
|
||||||
|
}
|
||||||
|
|
||||||
|
{
|
||||||
|
// Pass2 for seq B
|
||||||
|
short *esB = new short[iLengthB+1];
|
||||||
|
unsigned iB = 0;
|
||||||
|
switch (Path.GetEdge(0).cType)
|
||||||
|
{
|
||||||
|
case 'M':
|
||||||
|
case 'I':
|
||||||
|
esB[0] = 1;
|
||||||
|
break;
|
||||||
|
|
||||||
|
case 'D':
|
||||||
|
esB[0] = -1;
|
||||||
|
break;
|
||||||
|
|
||||||
|
default:
|
||||||
|
assert(false);
|
||||||
|
}
|
||||||
|
|
||||||
|
char cPrevEdgeType = cFirstEdgeType;
|
||||||
|
for (unsigned uEdgeIndex = 1; uEdgeIndex < uEdgeCount; ++uEdgeIndex)
|
||||||
|
{
|
||||||
|
const PWEdge &Edge = Path.GetEdge(uEdgeIndex);
|
||||||
|
char cEdgeType = Edge.cType;
|
||||||
|
|
||||||
|
switch (c2(cPrevEdgeType, cEdgeType))
|
||||||
|
{
|
||||||
|
case c2('M', 'M'):
|
||||||
|
case c2('I', 'I'):
|
||||||
|
case c2('I', 'M'):
|
||||||
|
case c2('M', 'I'):
|
||||||
|
++(esB[iB]);
|
||||||
|
break;
|
||||||
|
|
||||||
|
case c2('D', 'I'):
|
||||||
|
case c2('D', 'M'):
|
||||||
|
++iB;
|
||||||
|
esB[iB] = 1;
|
||||||
|
break;
|
||||||
|
|
||||||
|
case c2('M', 'D'):
|
||||||
|
case c2('I', 'D'):
|
||||||
|
++iB;
|
||||||
|
esB[iB] = -1;
|
||||||
|
break;
|
||||||
|
|
||||||
|
case c2('D', 'D'):
|
||||||
|
--(esB[iB]);
|
||||||
|
break;
|
||||||
|
|
||||||
|
default:
|
||||||
|
assert(false);
|
||||||
|
}
|
||||||
|
|
||||||
|
cPrevEdgeType = cEdgeType;
|
||||||
|
}
|
||||||
|
assert(iB == iLengthB - 1);
|
||||||
|
esB[iLengthB] = 0;
|
||||||
|
*ptresB = esB;
|
||||||
|
}
|
||||||
|
|
||||||
|
#if DEBUG
|
||||||
|
{
|
||||||
|
const PWEdge &LastEdge = Path.GetEdge(uEdgeCount - 1);
|
||||||
|
unsigned uSymbols;
|
||||||
|
unsigned uIndels;
|
||||||
|
EstringCounts(*ptresA, &uSymbols, &uIndels);
|
||||||
|
assert(uSymbols == LastEdge.uPrefixLengthA);
|
||||||
|
assert(uSymbols + uIndels == uEdgeCount);
|
||||||
|
|
||||||
|
EstringCounts(*ptresB, &uSymbols, &uIndels);
|
||||||
|
assert(uSymbols == LastEdge.uPrefixLengthB);
|
||||||
|
assert(uSymbols + uIndels == uEdgeCount);
|
||||||
|
|
||||||
|
PWPath TmpPath;
|
||||||
|
EstringsToPath(*ptresA, *ptresB, TmpPath);
|
||||||
|
TmpPath.AssertEqual(Path);
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
|
||||||
|
void EstringsToPath(const short esA[], const short esB[], PWPath &Path)
|
||||||
|
{
|
||||||
|
Path.Clear();
|
||||||
|
unsigned iA = 0;
|
||||||
|
unsigned iB = 0;
|
||||||
|
int nA = esA[iA++];
|
||||||
|
int nB = esB[iB++];
|
||||||
|
unsigned uPrefixLengthA = 0;
|
||||||
|
unsigned uPrefixLengthB = 0;
|
||||||
|
for (;;)
|
||||||
|
{
|
||||||
|
char cType;
|
||||||
|
if (nA > 0)
|
||||||
|
{
|
||||||
|
if (nB > 0)
|
||||||
|
{
|
||||||
|
cType = 'M';
|
||||||
|
--nA;
|
||||||
|
--nB;
|
||||||
|
}
|
||||||
|
else if (nB < 0)
|
||||||
|
{
|
||||||
|
cType = 'D';
|
||||||
|
--nA;
|
||||||
|
++nB;
|
||||||
|
}
|
||||||
|
else
|
||||||
|
assert(false);
|
||||||
|
}
|
||||||
|
else if (nA < 0)
|
||||||
|
{
|
||||||
|
if (nB > 0)
|
||||||
|
{
|
||||||
|
cType = 'I';
|
||||||
|
++nA;
|
||||||
|
--nB;
|
||||||
|
}
|
||||||
|
else
|
||||||
|
assert(false);
|
||||||
|
}
|
||||||
|
else
|
||||||
|
assert(false);
|
||||||
|
|
||||||
|
switch (cType)
|
||||||
|
{
|
||||||
|
case 'M':
|
||||||
|
++uPrefixLengthA;
|
||||||
|
++uPrefixLengthB;
|
||||||
|
break;
|
||||||
|
case 'D':
|
||||||
|
++uPrefixLengthA;
|
||||||
|
break;
|
||||||
|
case 'I':
|
||||||
|
++uPrefixLengthB;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
PWEdge Edge;
|
||||||
|
Edge.cType = cType;
|
||||||
|
Edge.uPrefixLengthA = uPrefixLengthA;
|
||||||
|
Edge.uPrefixLengthB = uPrefixLengthB;
|
||||||
|
Path.AppendEdge(Edge);
|
||||||
|
|
||||||
|
if (nA == 0)
|
||||||
|
{
|
||||||
|
if (0 == esA[iA])
|
||||||
|
{
|
||||||
|
assert(0 == esB[iB]);
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
nA = esA[iA++];
|
||||||
|
}
|
||||||
|
if (nB == 0)
|
||||||
|
nB = esB[iB++];
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/***
|
||||||
|
Multiply two estrings to make a third estring.
|
||||||
|
The product of two estrings e1*e2 is defined to be
|
||||||
|
the estring that produces the same result as applying
|
||||||
|
e1 then e2. Multiplication is not commutative. In fact,
|
||||||
|
the reversed order is undefined unless both estrings
|
||||||
|
consist of a single, identical, positive entry.
|
||||||
|
A primary motivation for using estrings is that
|
||||||
|
multiplication is very fast, reducing the time
|
||||||
|
needed to construct the root alignment.
|
||||||
|
|
||||||
|
Example
|
||||||
|
|
||||||
|
<-1,3>(XXX) = -XXX
|
||||||
|
<2,-1,2>(-XXX) = -X-XX
|
||||||
|
|
||||||
|
Therefore,
|
||||||
|
|
||||||
|
<-1,3>*<2,-1,2> = <-1,1,-1,2>
|
||||||
|
***/
|
||||||
|
|
||||||
|
static bool CanMultiplyEstrings(const short es1[], const short es2[])
|
||||||
|
{
|
||||||
|
unsigned uSymbols1;
|
||||||
|
unsigned uSymbols2;
|
||||||
|
unsigned uIndels1;
|
||||||
|
unsigned uIndels2;
|
||||||
|
EstringCounts(es1, &uSymbols1, &uIndels1);
|
||||||
|
EstringCounts(es2, &uSymbols2, &uIndels2);
|
||||||
|
return uSymbols1 + uIndels1 == uSymbols2;
|
||||||
|
}
|
||||||
|
|
||||||
|
static inline void AppendGaps(short esp[], int &ip, int n)
|
||||||
|
{
|
||||||
|
if (-1 == ip)
|
||||||
|
esp[++ip] = n;
|
||||||
|
else if (esp[ip] < 0)
|
||||||
|
esp[ip] += n;
|
||||||
|
else
|
||||||
|
esp[++ip] = n;
|
||||||
|
}
|
||||||
|
|
||||||
|
static inline void AppendSymbols(short esp[], int &ip, int n)
|
||||||
|
{
|
||||||
|
if (-1 == ip)
|
||||||
|
esp[++ip] = n;
|
||||||
|
else if (esp[ip] > 0)
|
||||||
|
esp[ip] += n;
|
||||||
|
else
|
||||||
|
esp[++ip] = n;
|
||||||
|
}
|
||||||
|
|
||||||
|
void MulEstrings(const short es1[], const short es2[], short esp[])
|
||||||
|
{
|
||||||
|
assert(CanMultiplyEstrings(es1, es2));
|
||||||
|
|
||||||
|
unsigned i1 = 0;
|
||||||
|
int ip = -1;
|
||||||
|
int n1 = es1[i1++];
|
||||||
|
for (unsigned i2 = 0; ; ++i2)
|
||||||
|
{
|
||||||
|
int n2 = es2[i2];
|
||||||
|
if (0 == n2)
|
||||||
|
break;
|
||||||
|
if (n2 > 0)
|
||||||
|
{
|
||||||
|
for (;;)
|
||||||
|
{
|
||||||
|
if (n1 < 0)
|
||||||
|
{
|
||||||
|
if (n2 > -n1)
|
||||||
|
{
|
||||||
|
AppendGaps(esp, ip, n1);
|
||||||
|
n2 += n1;
|
||||||
|
n1 = es1[i1++];
|
||||||
|
}
|
||||||
|
else if (n2 == -n1)
|
||||||
|
{
|
||||||
|
AppendGaps(esp, ip, n1);
|
||||||
|
n1 = es1[i1++];
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
assert(n2 < -n1);
|
||||||
|
AppendGaps(esp, ip, -n2);
|
||||||
|
n1 += n2;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
assert(n1 > 0);
|
||||||
|
if (n2 > n1)
|
||||||
|
{
|
||||||
|
AppendSymbols(esp, ip, n1);
|
||||||
|
n2 -= n1;
|
||||||
|
n1 = es1[i1++];
|
||||||
|
}
|
||||||
|
else if (n2 == n1)
|
||||||
|
{
|
||||||
|
AppendSymbols(esp, ip, n1);
|
||||||
|
n1 = es1[i1++];
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
assert(n2 < n1);
|
||||||
|
AppendSymbols(esp, ip, n2);
|
||||||
|
n1 -= n2;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
assert(n2 < 0);
|
||||||
|
AppendGaps(esp, ip, n2);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
esp[++ip] = 0;
|
||||||
|
|
||||||
|
#if DEBUG
|
||||||
|
{
|
||||||
|
int MaxLen = (int) (LengthEstring(es1) + LengthEstring(es2) + 1);
|
||||||
|
assert(ip < MaxLen);
|
||||||
|
if (ip >= 2)
|
||||||
|
for (int i = 0; i < ip - 2; ++i)
|
||||||
|
{
|
||||||
|
if (!(esp[i] > 0 && esp[i+1] < 0 || esp[i] < 0 && esp[i+1] > 0))
|
||||||
|
{
|
||||||
|
Log("Bad result of MulEstring: ");
|
||||||
|
LogEstring(esp);
|
||||||
|
Quit("Assert failed (alternating signs)");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
unsigned uSymbols1;
|
||||||
|
unsigned uSymbols2;
|
||||||
|
unsigned uSymbolsp;
|
||||||
|
unsigned uIndels1;
|
||||||
|
unsigned uIndels2;
|
||||||
|
unsigned uIndelsp;
|
||||||
|
EstringCounts(es1, &uSymbols1, &uIndels1);
|
||||||
|
EstringCounts(es2, &uSymbols2, &uIndels2);
|
||||||
|
EstringCounts(esp, &uSymbolsp, &uIndelsp);
|
||||||
|
if (uSymbols1 + uIndels1 != uSymbols2)
|
||||||
|
{
|
||||||
|
Log("Bad result of MulEstring: ");
|
||||||
|
LogEstring(esp);
|
||||||
|
Quit("Assert failed (counts1 %u %u %u)",
|
||||||
|
uSymbols1, uIndels1, uSymbols2);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
|
||||||
|
static void test(const short es1[], const short es2[], const short esa[])
|
||||||
|
{
|
||||||
|
unsigned uSymbols1;
|
||||||
|
unsigned uSymbols2;
|
||||||
|
unsigned uIndels1;
|
||||||
|
unsigned uIndels2;
|
||||||
|
EstringCounts(es1, &uSymbols1, &uIndels1);
|
||||||
|
EstringCounts(es2, &uSymbols2, &uIndels2);
|
||||||
|
|
||||||
|
char s[4096];
|
||||||
|
memset(s, 'X', sizeof(s));
|
||||||
|
s[uSymbols1] = 0;
|
||||||
|
|
||||||
|
char *s1 = EstringOp(es1, s);
|
||||||
|
char *s12 = EstringOp(es2, s1);
|
||||||
|
|
||||||
|
memset(s, 'X', sizeof(s));
|
||||||
|
s[uSymbols2] = 0;
|
||||||
|
char *s2 = EstringOp(es2, s);
|
||||||
|
|
||||||
|
Log("%s * %s = %s\n", s1, s2, s12);
|
||||||
|
|
||||||
|
LogEstring(es1);
|
||||||
|
Log(" * ");
|
||||||
|
LogEstring(es2);
|
||||||
|
Log(" = ");
|
||||||
|
LogEstring(esa);
|
||||||
|
Log("\n");
|
||||||
|
|
||||||
|
short esp[4096];
|
||||||
|
MulEstrings(es1, es2, esp);
|
||||||
|
LogEstring(esp);
|
||||||
|
if (!EstringsEq(esp, esa))
|
||||||
|
Log(" *ERROR* ");
|
||||||
|
Log("\n");
|
||||||
|
|
||||||
|
memset(s, 'X', sizeof(s));
|
||||||
|
s[uSymbols1] = 0;
|
||||||
|
char *sp = EstringOp(esp, s);
|
||||||
|
Log("%s\n", sp);
|
||||||
|
Log("\n==========\n\n");
|
||||||
|
}
|
||||||
|
|
||||||
|
void TestEstrings()
|
||||||
|
{
|
||||||
|
SetListFileName("c:\\tmp\\muscle.log", false);
|
||||||
|
//{
|
||||||
|
//short es1[] = { -1, 1, -1, 0 };
|
||||||
|
//short es2[] = { 1, -1, 2, 0 };
|
||||||
|
//short esa[] = { -2, 1, -1, 0 };
|
||||||
|
//test(es1, es2, esa);
|
||||||
|
//}
|
||||||
|
//{
|
||||||
|
//short es1[] = { 2, -1, 2, 0 };
|
||||||
|
//short es2[] = { 1, -1, 3, -1, 1, 0 };
|
||||||
|
//short esa[] = { 1, -1, 1, -1, 1, -1, 1, 0 };
|
||||||
|
//test(es1, es2, esa);
|
||||||
|
//}
|
||||||
|
//{
|
||||||
|
//short es1[] = { -1, 3, 0 };
|
||||||
|
//short es2[] = { 2, -1, 2, 0 };
|
||||||
|
//short esa[] = { -1, 1, -1, 2, 0 };
|
||||||
|
//test(es1, es2, esa);
|
||||||
|
//}
|
||||||
|
//{
|
||||||
|
//short es1[] = { -1, 1, -1, 1, 0};
|
||||||
|
//short es2[] = { 4, 0 };
|
||||||
|
//short esa[] = { -1, 1, -1, 1, 0};
|
||||||
|
//test(es1, es2, esa);
|
||||||
|
//}
|
||||||
|
//{
|
||||||
|
//short es1[] = { 1, -1, 1, -1, 0};
|
||||||
|
//short es2[] = { 4, 0 };
|
||||||
|
//short esa[] = { 1, -1, 1, -1, 0};
|
||||||
|
//test(es1, es2, esa);
|
||||||
|
//}
|
||||||
|
//{
|
||||||
|
//short es1[] = { 1, -1, 1, -1, 0};
|
||||||
|
//short es2[] = { -1, 4, -1, 0 };
|
||||||
|
//short esa[] = { -1, 1, -1, 1, -2, 0};
|
||||||
|
//test(es1, es2, esa);
|
||||||
|
//}
|
||||||
|
{
|
||||||
|
short es1[] = { 106, -77, 56, -2, 155, -3, 123, -2, 0};
|
||||||
|
short es2[] = { 50, -36, 34, -3, 12, -6, 1, -6, 18, -17, 60, -5, 349, -56, 0 };
|
||||||
|
short esa[] = { 0 };
|
||||||
|
test(es1, es2, esa);
|
||||||
|
}
|
||||||
|
exit(0);
|
||||||
|
}
|
13
src/muscle/muscle3.8.31/src/estring.h
Normal file
13
src/muscle/muscle3.8.31/src/estring.h
Normal file
@ -0,0 +1,13 @@
|
|||||||
|
#ifndef pathsum_h
|
||||||
|
#define pathsum_h
|
||||||
|
|
||||||
|
void PathToEstrings(const PWPath &Path, short **ptresA, short **ptresB);
|
||||||
|
void EstringsToPath(const short esA[], const short esB[], PWPath &Path);
|
||||||
|
void MulEstrings(const short es1[], const short es2[], short esp[]);
|
||||||
|
void EstringOp(const short es[], const Seq &sIn, Seq &sOut);
|
||||||
|
unsigned EstringOp(const short es[], const Seq &sIn, MSA &a);
|
||||||
|
void LogEstring(const short es[]);
|
||||||
|
unsigned LengthEstring(const short es[]);
|
||||||
|
short *EstringNewCopy(const short es[]);
|
||||||
|
|
||||||
|
#endif // pathsum_h
|
56
src/muscle/muscle3.8.31/src/fasta.cpp
Normal file
56
src/muscle/muscle3.8.31/src/fasta.cpp
Normal file
@ -0,0 +1,56 @@
|
|||||||
|
#include "muscle.h"
|
||||||
|
#include <stdio.h>
|
||||||
|
#include <ctype.h>
|
||||||
|
#include "msa.h"
|
||||||
|
#include "textfile.h"
|
||||||
|
|
||||||
|
const unsigned FASTA_BLOCK = 60;
|
||||||
|
|
||||||
|
void MSA::FromFASTAFile(TextFile &File)
|
||||||
|
{
|
||||||
|
Clear();
|
||||||
|
|
||||||
|
FILE *f = File.GetStdioFile();
|
||||||
|
|
||||||
|
unsigned uSeqCount = 0;
|
||||||
|
unsigned uColCount = uInsane;
|
||||||
|
for (;;)
|
||||||
|
{
|
||||||
|
char *Label;
|
||||||
|
unsigned uSeqLength;
|
||||||
|
char *SeqData = GetFastaSeq(f, &uSeqLength, &Label, false);
|
||||||
|
if (0 == SeqData)
|
||||||
|
break;
|
||||||
|
AppendSeq(SeqData, uSeqLength, Label);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
void MSA::ToFASTAFile(TextFile &File) const
|
||||||
|
{
|
||||||
|
const unsigned uColCount = GetColCount();
|
||||||
|
assert(uColCount > 0);
|
||||||
|
const unsigned uLinesPerSeq = (GetColCount() - 1)/FASTA_BLOCK + 1;
|
||||||
|
const unsigned uSeqCount = GetSeqCount();
|
||||||
|
|
||||||
|
for (unsigned uSeqIndex = 0; uSeqIndex < uSeqCount; ++uSeqIndex)
|
||||||
|
{
|
||||||
|
File.PutString(">");
|
||||||
|
File.PutString(GetSeqName(uSeqIndex));
|
||||||
|
File.PutString("\n");
|
||||||
|
|
||||||
|
unsigned n = 0;
|
||||||
|
for (unsigned uLine = 0; uLine < uLinesPerSeq; ++uLine)
|
||||||
|
{
|
||||||
|
unsigned uLetters = uColCount - uLine*FASTA_BLOCK;
|
||||||
|
if (uLetters > FASTA_BLOCK)
|
||||||
|
uLetters = FASTA_BLOCK;
|
||||||
|
for (unsigned i = 0; i < uLetters; ++i)
|
||||||
|
{
|
||||||
|
char c = GetChar(uSeqIndex, n);
|
||||||
|
File.PutChar(c);
|
||||||
|
++n;
|
||||||
|
}
|
||||||
|
File.PutChar('\n');
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user