Patch compilation of binaries
Former-commit-id: 688670c339643a282bdeabafafff3b451be83cb6 Former-commit-id: 60d3e42d2af73515fea50d9c97dc4eacda9c8abb
This commit is contained in:
@ -103,13 +103,16 @@ TAR = tar
|
||||
# PRTDIR : port dependent files location (libraries and binaries)
|
||||
# BINDIR : port binaries
|
||||
# LIBDIR : port libraries
|
||||
# INCDIR : port includes
|
||||
#
|
||||
|
||||
PRTDIR = $(CFGDIR)../ports/$(PORTNAME)
|
||||
|
||||
BINDIR = $(PRTDIR)/bin
|
||||
BINDIR = $(abspath $(PRTDIR))/bin
|
||||
|
||||
LIBDIR = $(PRTDIR)/lib
|
||||
LIBDIR = $(abspath $(PRTDIR))/lib
|
||||
|
||||
INCDIR = $(abspath $(PRTDIR))/include
|
||||
|
||||
# ------------------------------------
|
||||
# default gmake variable in implicit rules
|
||||
|
@ -18,9 +18,15 @@
|
||||
# General compilation flags
|
||||
# ------------------------------------
|
||||
|
||||
CC = /usr/bin/gcc
|
||||
CXX = /usr/bin/g++
|
||||
CXXPP = /usr/bin/cpp
|
||||
CPP = /usr/bin/cpp
|
||||
|
||||
#
|
||||
# MACHDEF : define machine and OS specific flags
|
||||
#
|
||||
|
||||
MACHINE = MACOSX
|
||||
MACHDEF = -DLX_TARGET_MACINTEL -DLITTLE_ENDIAN -DMACOSX
|
||||
|
||||
|
@ -22,7 +22,7 @@
|
||||
# MACHDEF : define machine and OS specific flags
|
||||
#
|
||||
|
||||
MACHDEF = -DLX_TARGET_LINUX -DLITTLE_ENDIAN
|
||||
MACHDEF = -DLX_TARGET_LINUX -DLITTLE_ENDIAN -DLINUX
|
||||
|
||||
#
|
||||
# MATH_LIBS : machine specific math librairies
|
||||
|
@ -15,6 +15,9 @@ PKGDIR ?= build.$(PORTNAME)
|
||||
|
||||
PRTPATH = $(abspath $(PRTDIR))
|
||||
|
||||
PRTPATH_BIN = $(PRTPATH)/bin
|
||||
PKG_CONFIG = $(PRTPATH)/bin/pkg-config
|
||||
|
||||
#
|
||||
# Rules
|
||||
#
|
||||
@ -28,7 +31,17 @@ pkg.expand::
|
||||
test -f $(PKGDIR)/configure || $(TAR) zxf $(PKGTAR) -C $(PKGDIR) --strip-components 1
|
||||
|
||||
pkg.make:: pkg.expand
|
||||
test -f $(PKGDIR)/Makefile || (cd $(PKGDIR) && ./configure --prefix=$(PRTPATH))
|
||||
echo $(PKG_CONFIG)
|
||||
test -f $(PKGDIR)/Makefile || (export PATH="$(PRTPATH_BIN):$$PATH" && \
|
||||
export PKG_CONFIG=$(PKG_CONFIG) && \
|
||||
export CC="$(CC)" && \
|
||||
export CXX="$(CXX)" && \
|
||||
export CPP="$(CPP)" && \
|
||||
export CXXPP="$(CXXPP)" && \
|
||||
export CFLAGS="$(CFLAGS)" && \
|
||||
export LDFLAGS="$(LDFLAGS)" && \
|
||||
cd $(PKGDIR) && \
|
||||
./configure --prefix=$(PRTPATH) $(CONFIGURE_OPTIONS))
|
||||
$(MAKE) -C $(PKGDIR)
|
||||
|
||||
pkg.install:: pkg.make
|
||||
|
12
src/Makefile
12
src/Makefile
@ -17,12 +17,18 @@
|
||||
#
|
||||
include ../config/auto.conf
|
||||
|
||||
DIRS = exonerate \
|
||||
DIRS = aragorn \
|
||||
clustalo \
|
||||
exonerate \
|
||||
hmmer3 \
|
||||
kimono \
|
||||
muscle \
|
||||
ncbiblast \
|
||||
prokov \
|
||||
repseek \
|
||||
sequtils \
|
||||
aragorn \
|
||||
ncbiblast
|
||||
sumaclust \
|
||||
sumatra
|
||||
|
||||
include ../config/targets/propagate.targ
|
||||
|
||||
|
29
src/clustalo/Makefile
Executable file
29
src/clustalo/Makefile
Executable file
@ -0,0 +1,29 @@
|
||||
# ---------------------------------------------------------------
|
||||
# $Id: $
|
||||
# ---------------------------------------------------------------
|
||||
# @file: Makefile
|
||||
# @desc: makefile for lxpack
|
||||
#
|
||||
# @history:
|
||||
# @history:
|
||||
# @+ <Gloup> : Apr 97 : Created
|
||||
# @+ <Gloup> : Mar 02 : Updated for LXxware
|
||||
#
|
||||
# @note: should be processed with gnu compatible make
|
||||
# @note: helixware_compatible
|
||||
#
|
||||
# @end:
|
||||
# ---------------------------------------------------------------
|
||||
#
|
||||
include ../../config/auto.conf
|
||||
|
||||
DIRS = argtable \
|
||||
clustalo
|
||||
|
||||
include ../../config/targets/propagate.targ
|
||||
|
||||
include ../../config/targets/help.targ
|
||||
|
||||
all::
|
||||
$(MAKE) ACTION=$@ _action
|
||||
|
24
src/clustalo/argtable/Makefile
Normal file
24
src/clustalo/argtable/Makefile
Normal file
@ -0,0 +1,24 @@
|
||||
# ---------------------------------------------------------------
|
||||
# $Id: $
|
||||
# ---------------------------------------------------------------
|
||||
# @file: Makefile
|
||||
# @desc: makefile for package exonerate
|
||||
#
|
||||
# @history:
|
||||
# @+ <Gloup> : Sept 15 : Adapted to ORG.Annot
|
||||
#
|
||||
# @note: should be processed with gnu compatible make
|
||||
# @note: helixware_compatible
|
||||
#
|
||||
# @end:
|
||||
# ---------------------------------------------------------------
|
||||
#
|
||||
|
||||
include ../../../config/auto.conf
|
||||
|
||||
PKG = argtable2-13
|
||||
|
||||
include $(CFGDIR)targets/package.targ
|
||||
|
||||
include $(CFGDIR)targets/help.targ
|
||||
|
BIN
src/clustalo/argtable/argtable2-13.tgz
Normal file
BIN
src/clustalo/argtable/argtable2-13.tgz
Normal file
Binary file not shown.
24
src/clustalo/clustalo/Makefile
Normal file
24
src/clustalo/clustalo/Makefile
Normal file
@ -0,0 +1,24 @@
|
||||
# ---------------------------------------------------------------
|
||||
# $Id: $
|
||||
# ---------------------------------------------------------------
|
||||
# @file: Makefile
|
||||
# @desc: makefile for package exonerate
|
||||
#
|
||||
# @history:
|
||||
# @+ <Gloup> : Sept 15 : Adapted to ORG.Annot
|
||||
#
|
||||
# @note: should be processed with gnu compatible make
|
||||
# @note: helixware_compatible
|
||||
#
|
||||
# @end:
|
||||
# ---------------------------------------------------------------
|
||||
#
|
||||
|
||||
include ../../../config/auto.conf
|
||||
|
||||
PKG = clustal-omega-1.2.1
|
||||
|
||||
include $(CFGDIR)targets/package.targ
|
||||
|
||||
include $(CFGDIR)targets/help.targ
|
||||
|
BIN
src/clustalo/clustalo/clustal-omega-1.2.1.tgz
Normal file
BIN
src/clustalo/clustalo/clustal-omega-1.2.1.tgz
Normal file
Binary file not shown.
BIN
src/exonerate/.DS_Store
vendored
BIN
src/exonerate/.DS_Store
vendored
Binary file not shown.
20
src/exonerate/Makefile
Normal file → Executable file
20
src/exonerate/Makefile
Normal file → Executable file
@ -2,10 +2,12 @@
|
||||
# $Id: $
|
||||
# ---------------------------------------------------------------
|
||||
# @file: Makefile
|
||||
# @desc: makefile for package exonerate
|
||||
# @desc: makefile for lxpack
|
||||
#
|
||||
# @history:
|
||||
# @+ <Gloup> : Sept 15 : Adapted to ORG.Annot
|
||||
# @history:
|
||||
# @+ <Gloup> : Apr 97 : Created
|
||||
# @+ <Gloup> : Mar 02 : Updated for LXxware
|
||||
#
|
||||
# @note: should be processed with gnu compatible make
|
||||
# @note: helixware_compatible
|
||||
@ -13,12 +15,18 @@
|
||||
# @end:
|
||||
# ---------------------------------------------------------------
|
||||
#
|
||||
|
||||
include ../../config/auto.conf
|
||||
|
||||
PKG = exonerate-2.2.0
|
||||
DIRS = pkg-config \
|
||||
libffi \
|
||||
gettext \
|
||||
glib2 \
|
||||
exonerate
|
||||
|
||||
include $(CFGDIR)targets/package.targ
|
||||
include ../../config/targets/propagate.targ
|
||||
|
||||
include $(CFGDIR)targets/help.targ
|
||||
include ../../config/targets/help.targ
|
||||
|
||||
all::
|
||||
$(MAKE) ACTION=$@ _action
|
||||
|
||||
|
Binary file not shown.
BIN
src/exonerate/exonerate/.DS_Store
vendored
Normal file
BIN
src/exonerate/exonerate/.DS_Store
vendored
Normal file
Binary file not shown.
24
src/exonerate/exonerate/Makefile
Normal file
24
src/exonerate/exonerate/Makefile
Normal file
@ -0,0 +1,24 @@
|
||||
# ---------------------------------------------------------------
|
||||
# $Id: $
|
||||
# ---------------------------------------------------------------
|
||||
# @file: Makefile
|
||||
# @desc: makefile for package exonerate
|
||||
#
|
||||
# @history:
|
||||
# @+ <Gloup> : Sept 15 : Adapted to ORG.Annot
|
||||
#
|
||||
# @note: should be processed with gnu compatible make
|
||||
# @note: helixware_compatible
|
||||
#
|
||||
# @end:
|
||||
# ---------------------------------------------------------------
|
||||
#
|
||||
|
||||
include ../../../config/auto.conf
|
||||
|
||||
PKG = exonerate-2.2.0_EC
|
||||
|
||||
include $(CFGDIR)targets/package.targ
|
||||
|
||||
include $(CFGDIR)targets/help.targ
|
||||
|
BIN
src/exonerate/exonerate/exonerate-2.2.0_EC.tgz
Normal file
BIN
src/exonerate/exonerate/exonerate-2.2.0_EC.tgz
Normal file
Binary file not shown.
24
src/exonerate/gettext/Makefile
Normal file
24
src/exonerate/gettext/Makefile
Normal file
@ -0,0 +1,24 @@
|
||||
# ---------------------------------------------------------------
|
||||
# $Id: $
|
||||
# ---------------------------------------------------------------
|
||||
# @file: Makefile
|
||||
# @desc: makefile for package exonerate
|
||||
#
|
||||
# @history:
|
||||
# @+ <Gloup> : Sept 15 : Adapted to ORG.Annot
|
||||
#
|
||||
# @note: should be processed with gnu compatible make
|
||||
# @note: helixware_compatible
|
||||
#
|
||||
# @end:
|
||||
# ---------------------------------------------------------------
|
||||
#
|
||||
|
||||
include ../../../config/auto.conf
|
||||
|
||||
PKG = gettext-0.19
|
||||
|
||||
include $(CFGDIR)targets/package.targ
|
||||
|
||||
include $(CFGDIR)targets/help.targ
|
||||
|
BIN
src/exonerate/gettext/gettext-0.19.tgz
Normal file
BIN
src/exonerate/gettext/gettext-0.19.tgz
Normal file
Binary file not shown.
25
src/exonerate/glib2/Makefile
Normal file
25
src/exonerate/glib2/Makefile
Normal file
@ -0,0 +1,25 @@
|
||||
# ---------------------------------------------------------------
|
||||
# $Id: $
|
||||
# ---------------------------------------------------------------
|
||||
# @file: Makefile
|
||||
# @desc: makefile for package exonerate
|
||||
#
|
||||
# @history:
|
||||
# @+ <Gloup> : Sept 15 : Adapted to ORG.Annot
|
||||
#
|
||||
# @note: should be processed with gnu compatible make
|
||||
# @note: helixware_compatible
|
||||
#
|
||||
# @end:
|
||||
# ---------------------------------------------------------------
|
||||
#
|
||||
|
||||
include ../../../config/auto.conf
|
||||
|
||||
PKG = glib-2.44.1
|
||||
CONFIGURE_OPTIONS = --disable-dtrace
|
||||
|
||||
include $(CFGDIR)targets/package.targ
|
||||
|
||||
include $(CFGDIR)targets/help.targ
|
||||
|
BIN
src/exonerate/glib2/glib-2.44.1.tgz
Normal file
BIN
src/exonerate/glib2/glib-2.44.1.tgz
Normal file
Binary file not shown.
BIN
src/exonerate/libffi/.DS_Store
vendored
Normal file
BIN
src/exonerate/libffi/.DS_Store
vendored
Normal file
Binary file not shown.
24
src/exonerate/libffi/Makefile
Normal file
24
src/exonerate/libffi/Makefile
Normal file
@ -0,0 +1,24 @@
|
||||
# ---------------------------------------------------------------
|
||||
# $Id: $
|
||||
# ---------------------------------------------------------------
|
||||
# @file: Makefile
|
||||
# @desc: makefile for package exonerate
|
||||
#
|
||||
# @history:
|
||||
# @+ <Gloup> : Sept 15 : Adapted to ORG.Annot
|
||||
#
|
||||
# @note: should be processed with gnu compatible make
|
||||
# @note: helixware_compatible
|
||||
#
|
||||
# @end:
|
||||
# ---------------------------------------------------------------
|
||||
#
|
||||
|
||||
include ../../../config/auto.conf
|
||||
|
||||
PKG = libffi-3.2.1
|
||||
|
||||
include $(CFGDIR)targets/package.targ
|
||||
|
||||
include $(CFGDIR)targets/help.targ
|
||||
|
BIN
src/exonerate/libffi/libffi-3.2.1.tgz
Normal file
BIN
src/exonerate/libffi/libffi-3.2.1.tgz
Normal file
Binary file not shown.
25
src/exonerate/pkg-config/Makefile
Normal file
25
src/exonerate/pkg-config/Makefile
Normal file
@ -0,0 +1,25 @@
|
||||
# ---------------------------------------------------------------
|
||||
# $Id: $
|
||||
# ---------------------------------------------------------------
|
||||
# @file: Makefile
|
||||
# @desc: makefile for package exonerate
|
||||
#
|
||||
# @history:
|
||||
# @+ <Gloup> : Sept 15 : Adapted to ORG.Annot
|
||||
#
|
||||
# @note: should be processed with gnu compatible make
|
||||
# @note: helixware_compatible
|
||||
#
|
||||
# @end:
|
||||
# ---------------------------------------------------------------
|
||||
#
|
||||
|
||||
include ../../../config/auto.conf
|
||||
|
||||
PKG = pkg-config-0.29
|
||||
CONFIGURE_OPTIONS= --with-internal-glib
|
||||
|
||||
include $(CFGDIR)targets/package.targ
|
||||
|
||||
include $(CFGDIR)targets/help.targ
|
||||
|
BIN
src/exonerate/pkg-config/pkg-config-0.29.tgz
Normal file
BIN
src/exonerate/pkg-config/pkg-config-0.29.tgz
Normal file
Binary file not shown.
28
src/hmmer3/Makefile
Executable file
28
src/hmmer3/Makefile
Executable file
@ -0,0 +1,28 @@
|
||||
# ---------------------------------------------------------------
|
||||
# $Id: $
|
||||
# ---------------------------------------------------------------
|
||||
# @file: Makefile
|
||||
# @desc: makefile for lxpack
|
||||
#
|
||||
# @history:
|
||||
# @history:
|
||||
# @+ <Gloup> : Apr 97 : Created
|
||||
# @+ <Gloup> : Mar 02 : Updated for LXxware
|
||||
#
|
||||
# @note: should be processed with gnu compatible make
|
||||
# @note: helixware_compatible
|
||||
#
|
||||
# @end:
|
||||
# ---------------------------------------------------------------
|
||||
#
|
||||
include ../../config/auto.conf
|
||||
|
||||
DIRS = hmmer3
|
||||
|
||||
include ../../config/targets/propagate.targ
|
||||
|
||||
include ../../config/targets/help.targ
|
||||
|
||||
all::
|
||||
$(MAKE) ACTION=$@ _action
|
||||
|
Binary file not shown.
24
src/hmmer3/hmmer3/Makefile
Normal file
24
src/hmmer3/hmmer3/Makefile
Normal file
@ -0,0 +1,24 @@
|
||||
# ---------------------------------------------------------------
|
||||
# $Id: $
|
||||
# ---------------------------------------------------------------
|
||||
# @file: Makefile
|
||||
# @desc: makefile for package exonerate
|
||||
#
|
||||
# @history:
|
||||
# @+ <Gloup> : Sept 15 : Adapted to ORG.Annot
|
||||
#
|
||||
# @note: should be processed with gnu compatible make
|
||||
# @note: helixware_compatible
|
||||
#
|
||||
# @end:
|
||||
# ---------------------------------------------------------------
|
||||
#
|
||||
|
||||
include ../../../config/auto.conf
|
||||
|
||||
PKG = hmmer-3.1b1
|
||||
|
||||
include $(CFGDIR)targets/package.targ
|
||||
|
||||
include $(CFGDIR)targets/help.targ
|
||||
|
BIN
src/hmmer3/hmmer3/hmmer-3.1b1.tgz
Normal file
BIN
src/hmmer3/hmmer3/hmmer-3.1b1.tgz
Normal file
Binary file not shown.
@ -1,4 +1,4 @@
|
||||
# ---------------------------------------------------------------
|
||||
#---------------------------------------------------------------
|
||||
# $Id: $
|
||||
# ---------------------------------------------------------------
|
||||
# @file: Makefile
|
||||
@ -25,9 +25,6 @@ include ../../config/targets/help.targ
|
||||
|
||||
all::
|
||||
$(MAKE) ACTION=$@ _action
|
||||
test -d $(PRTDIR) || mkdir $(PRTDIR)
|
||||
test -d $(BINDIR) || mkdir $(BINDIR)
|
||||
\cp -f lxpack/ports/$(PORTNAME)/bin/* $(BINDIR)
|
||||
|
||||
clean::
|
||||
$(MAKE) -C lxpack portclean
|
||||
|
@ -15,14 +15,14 @@
|
||||
# @end:
|
||||
# ---------------------------------------------------------------
|
||||
#
|
||||
include ./config/auto.conf
|
||||
include ../../../config/auto.conf
|
||||
|
||||
DIRS = src \
|
||||
tests
|
||||
|
||||
include ./config/targets/propagate.targ
|
||||
include ../../../config/targets/propagate.targ
|
||||
|
||||
include ./config/targets/help.targ
|
||||
include ../../../config/targets/help.targ
|
||||
|
||||
portclean::
|
||||
$(MAKE) ACTION=$@ _action
|
||||
|
@ -1,51 +0,0 @@
|
||||
|
||||
$Id: README.txt 1825 2013-02-26 09:39:47Z viari $
|
||||
|
||||
This directory contains Makefile machine specific configuration files
|
||||
(and default targets to help you writing Makefile's)
|
||||
|
||||
These headers should be used with GNU make or compatible
|
||||
|
||||
#
|
||||
# portname
|
||||
#
|
||||
|
||||
To check your port, issue :
|
||||
|
||||
./guess_port
|
||||
|
||||
if output is 'unknown <mach>:<sys>:<rel>' then you should :
|
||||
- add a port entry in guess_port for <mach>:<sys>:<rel>
|
||||
- create a ports/<port>.conf configuration file
|
||||
(the best is to start from another port file,
|
||||
choose whatever looks closest)
|
||||
|
||||
#
|
||||
# configuration flags
|
||||
#
|
||||
|
||||
auto.conf : the main configuration file :
|
||||
- determine the machine port thru 'guess_port' shell
|
||||
- include 'default.conf' file
|
||||
- include the machine specific 'ports/<port>.conf' file
|
||||
|
||||
default.conf : default configuration (included by 'auto.conf')
|
||||
|
||||
ports/<port>.conf : machine specific configuration (included by 'auto.conf')
|
||||
|
||||
#
|
||||
# utility targets
|
||||
#
|
||||
|
||||
targets/help.targ : target for standard help
|
||||
|
||||
targets/propagate.targ : target for propagating targets to subdirectories
|
||||
|
||||
targets/package.targ : default targets for standard package with 'configure'
|
||||
|
||||
targets/empty.targ : default empty targets (defined as double colon rules)
|
||||
|
||||
targets/lxbin.targ : default make targets for standard lx binary (without libraries)
|
||||
|
||||
targets/debug.targ : target to print debug information (for dev.)
|
||||
|
@ -1,54 +0,0 @@
|
||||
#
|
||||
# $Id: auto.conf 1825 2013-02-26 09:39:47Z viari $
|
||||
#
|
||||
# auto.conf
|
||||
# auto configuration file using guess_port
|
||||
#
|
||||
# this file is included in Makefile
|
||||
#
|
||||
|
||||
#
|
||||
# default shell for gnu-make
|
||||
#
|
||||
|
||||
SHELL = /bin/sh
|
||||
|
||||
#
|
||||
# CFGDIR : location of config files = this file directory location
|
||||
#
|
||||
# CFGPRT : port name (as returned by guess_port)
|
||||
#
|
||||
|
||||
# because builtin 'lastword' is missing in gnu-make 3.80
|
||||
|
||||
lastword = $(word $(words $1), $1)
|
||||
|
||||
CFGDIR := $(dir $(call lastword, $(MAKEFILE_LIST)))
|
||||
|
||||
CFGPRT := $(shell $(CFGDIR)guess_port)
|
||||
|
||||
# check if port is correctly defined
|
||||
|
||||
ifneq (1, $(words $(CFGPRT)))
|
||||
entry := $(call lastword, $(CFGPRT))
|
||||
$(error port is undefined - add entry for "$(entry)" in configuration file -)
|
||||
endif
|
||||
|
||||
#
|
||||
# PORTNAME : port name to use : default is CFGPRT but may be futher modified
|
||||
# by machine specific configuration
|
||||
|
||||
PORTNAME = $(CFGPRT)
|
||||
|
||||
#
|
||||
# default configuration
|
||||
# may be overriden by machine dependant definitions below
|
||||
#
|
||||
|
||||
include $(CFGDIR)default.conf
|
||||
|
||||
#
|
||||
# machine dependant definitions
|
||||
#
|
||||
|
||||
include $(CFGDIR)ports/$(CFGPRT).conf
|
@ -1,124 +0,0 @@
|
||||
#
|
||||
# $Id: default.conf 2007 2013-12-03 14:21:39Z viari $
|
||||
#
|
||||
# default.conf
|
||||
# default configuration flags
|
||||
# maybe further redefined by machine specific configuration
|
||||
#
|
||||
# this file is included by auto.conf
|
||||
#
|
||||
|
||||
# ------------------------------------
|
||||
# General compilation flags
|
||||
# ------------------------------------
|
||||
|
||||
#
|
||||
# MACHDEF : define machine and OS specific flags
|
||||
#
|
||||
|
||||
MACHDEF =
|
||||
|
||||
#
|
||||
# CC : (ansi C) compiler command to use
|
||||
# you may add some machine specific flags (like -arch ...)
|
||||
# in the <machine>.conf configuration file
|
||||
#
|
||||
|
||||
CC = gcc
|
||||
|
||||
#
|
||||
# default compiler optimizer flag
|
||||
#
|
||||
|
||||
OPTIM = -O
|
||||
|
||||
#
|
||||
# CC_LIBS : additionnal machine specific $(CC) libraries
|
||||
# like '-lC' on some machines
|
||||
#
|
||||
|
||||
CC_LIBS =
|
||||
|
||||
#
|
||||
# MALLOC_LIBS : machine specific malloc librairies
|
||||
# like '-lmalloc' on SGI
|
||||
#
|
||||
|
||||
MALLOC_LIBS =
|
||||
|
||||
#
|
||||
# MATH_LIBS : machine specific math librairies
|
||||
# like '-lm' on Solaris
|
||||
#
|
||||
|
||||
MATH_LIBS =
|
||||
|
||||
#
|
||||
# LINT : looks like LINT command does not exist anymore
|
||||
# here is a rough replacement
|
||||
#
|
||||
|
||||
LINT = gcc -S -Wall -Wno-format-y2k -W -Wstrict-prototypes \
|
||||
-Wmissing-prototypes -Wpointer-arith -Wreturn-type \
|
||||
-Wcast-qual -Wwrite-strings -Wswitch -Wshadow \
|
||||
-Wcast-align -Wbad-function-cast -Wchar-subscripts \
|
||||
-Winline -Wnested-externs -Wredundant-decls
|
||||
|
||||
# ------------------------------------
|
||||
# General system commands
|
||||
# ------------------------------------
|
||||
|
||||
#
|
||||
# AR : AR archive command
|
||||
# ARFLAGS : $(AR) archiving flags
|
||||
# ARXFLAGS : $(AR) extraction flags
|
||||
#
|
||||
|
||||
AR = ar
|
||||
ARFLAGS = rcv
|
||||
ARXFLAGS = xv
|
||||
|
||||
#
|
||||
# RANLIB : ranlib command
|
||||
#
|
||||
|
||||
RANLIB = ranlib
|
||||
|
||||
#
|
||||
# DIFF : diff command
|
||||
#
|
||||
|
||||
DIFF = diff
|
||||
|
||||
#
|
||||
# TAR : tar command
|
||||
#
|
||||
|
||||
TAR = tar
|
||||
|
||||
# ------------------------------------
|
||||
# Default locations
|
||||
# ------------------------------------
|
||||
#
|
||||
# PRTDIR : port dependent files location (libraries and binaries)
|
||||
# BINDIR : port binaries
|
||||
# LIBDIR : port libraries
|
||||
#
|
||||
|
||||
PRTDIR = $(CFGDIR)../ports/$(PORTNAME)
|
||||
|
||||
BINDIR = $(PRTDIR)/bin
|
||||
|
||||
LIBDIR = $(PRTDIR)/lib
|
||||
|
||||
# ------------------------------------
|
||||
# default gmake variable in implicit rules
|
||||
# ------------------------------------
|
||||
|
||||
CFLAGS = $(OPTIM) $(MACHDEF) -I$(INCDIR)
|
||||
|
||||
LDFLAGS = -L$(LIBDIR) -L.
|
||||
|
||||
LDLIBS = $(LIBS) $(MALLOC_LIBS) $(MATH_LIBS) $(CC_LIBS)
|
||||
|
||||
LINTFLAGS = $(MACHDEF) -I$(INCDIR)
|
@ -1,33 +0,0 @@
|
||||
#! /bin/sh
|
||||
#
|
||||
# $Id: guess_port 1825 2013-02-26 09:39:47Z viari $
|
||||
#
|
||||
# @file: guess_port
|
||||
# @desc: attempt to guess the portname
|
||||
# @usage: guess_port
|
||||
#
|
||||
# @history:
|
||||
# @+ <Gloup> Nov. 2000 first draft adapted from GNU config.guess
|
||||
# @+ <Gloup> Feb. 2010 moved to sh
|
||||
#
|
||||
|
||||
mach=`uname -m`
|
||||
syst=`uname -s`
|
||||
rels=`uname -r`
|
||||
|
||||
case ${mach}:${syst}:${rels} in
|
||||
|
||||
alpha:OSF1:* ) echo alpha-osf1;;
|
||||
sun4*:SunOS:5.* ) echo sparc-solaris;;
|
||||
i86pc:SunOS:5.* ) echo i386-solaris;;
|
||||
sun4*:SunOS:* ) echo sparc-sunos;;
|
||||
Power*:Darwin:* ) echo ppc-darwin;;
|
||||
i*86:Linux:* ) echo i386-linux;;
|
||||
x*86*:Linux:* ) echo i386-linux;;
|
||||
i*86:Darwin:* ) echo i386-darwin;;
|
||||
IP*:IRIX*:* ) echo mips-irix;;
|
||||
i*86:MINGW32*:* ) echo x86-mingw32;;
|
||||
|
||||
*) echo unknown ${mach}:${syst}:${rels}; exit 1;;
|
||||
esac
|
||||
exit 0
|
@ -1,26 +0,0 @@
|
||||
#
|
||||
# $Id: i386-darwin.conf 1825 2013-02-26 09:39:47Z viari $
|
||||
#
|
||||
# i386-darwin.conf
|
||||
# configuration file for MacOS-X/Intel-Based/Darwin 1.2 with gcc compiler
|
||||
# this file is included in Makefile
|
||||
#
|
||||
# system (uname -srp) : Darwin 8.7.1 i386
|
||||
# compiler (cc --version) : i686-apple-darwin8-gcc-4.0.1
|
||||
#
|
||||
# check tags
|
||||
# @uname:uname -srp:Darwin 8.7.1 i386
|
||||
# @cc:cc --version:i686-apple-darwin8-gcc-4.0.1
|
||||
#
|
||||
#
|
||||
|
||||
# ------------------------------------
|
||||
# General compilation flags
|
||||
# ------------------------------------
|
||||
|
||||
#
|
||||
# MACHDEF : define machine and OS specific flags
|
||||
#
|
||||
|
||||
MACHDEF = -DLX_TARGET_MACINTEL -DLITTLE_ENDIAN -DMACOSX
|
||||
|
@ -1,32 +0,0 @@
|
||||
#
|
||||
# $Id: i386-linux.conf 1825 2013-02-26 09:39:47Z viari $
|
||||
#
|
||||
# i386-linux.conf
|
||||
# configuration file for linux ix86 with GNU gcc compiler
|
||||
# this file is included in Makefile
|
||||
#
|
||||
# system (uname -srp) : Linux 2.2.14-5.0 unknown
|
||||
# compiler (gcc --version) : egcs-2.91.66
|
||||
#
|
||||
# check tags
|
||||
# @uname:uname -srp:Linux 2.2.14-5.0 unknown
|
||||
# @cc:cc --version:egcs-2.91.66
|
||||
#
|
||||
#
|
||||
|
||||
# ------------------------------------
|
||||
# General compilation flags
|
||||
# ------------------------------------
|
||||
|
||||
#
|
||||
# MACHDEF : define machine and OS specific flags
|
||||
#
|
||||
|
||||
MACHDEF = -DLX_TARGET_LINUX -DLITTLE_ENDIAN
|
||||
|
||||
#
|
||||
# MATH_LIBS : machine specific math librairies
|
||||
#
|
||||
|
||||
MATH_LIBS = -lm
|
||||
|
@ -1,32 +0,0 @@
|
||||
#
|
||||
# $Id: ppc-darwin.conf 1825 2013-02-26 09:39:47Z viari $
|
||||
#
|
||||
# ppc-darwin.conf
|
||||
# configuration file for MacOS-X/Darwin 1.2 with native cc compiler
|
||||
# this file is included in Makefile
|
||||
#
|
||||
# system (uname -srp) : Darwin 1.2 powerpc
|
||||
# compiler (cc --version) : 2.7.2.1
|
||||
#
|
||||
# check tags
|
||||
# @uname:uname -srp:Darwin 1.2 powerpc
|
||||
# @cc:cc --version:2.7.2.1
|
||||
#
|
||||
#
|
||||
|
||||
# ------------------------------------
|
||||
# General compilation flags
|
||||
# ------------------------------------
|
||||
|
||||
#
|
||||
# MACHDEF : define machine and OS specific flags
|
||||
#
|
||||
|
||||
MACHDEF = -DLX_TARGET_MACPPC -DBIG_ENDIAN
|
||||
|
||||
#
|
||||
# CC : name of (ansi C) compiler to use
|
||||
#
|
||||
|
||||
CC = cc -arch ppc
|
||||
|
@ -1,31 +0,0 @@
|
||||
#
|
||||
# $Id: sparc-solaris.conf 1825 2013-02-26 09:39:47Z viari $
|
||||
#
|
||||
# sparc-solaris.conf
|
||||
# configuration file for sparc solaris with GNU gcc compiler
|
||||
# this file is included in Makefile
|
||||
#
|
||||
# system (uname -srp) : SunOS 5.8 sparc
|
||||
# compiler (gcc --version) : 2.95.2
|
||||
#
|
||||
# check tags
|
||||
# @uname:uname -srp:SunOS 5.8 sparc
|
||||
# @cc:cc --version:2.95.2
|
||||
#
|
||||
#
|
||||
|
||||
# ------------------------------------
|
||||
# General compilation flags
|
||||
# ------------------------------------
|
||||
|
||||
#
|
||||
# MACHDEF : define machine and OS specific flags
|
||||
#
|
||||
|
||||
MACHDEF = -DLX_TARGET_SOLARIS -DBIG_ENDIAN
|
||||
|
||||
#
|
||||
# MATH_LIBS : machine specific math librairies
|
||||
#
|
||||
|
||||
MATH_LIBS = -lm
|
@ -1,54 +0,0 @@
|
||||
#
|
||||
# $Id: x86-mingw32.conf 1825 2013-02-26 09:39:47Z viari $
|
||||
#
|
||||
# x86-mingw32
|
||||
# configuration file for MinGW with GNU gcc compiler.
|
||||
#
|
||||
# this file is included in Makefile
|
||||
#
|
||||
#
|
||||
|
||||
#
|
||||
# rename PORTNAME safely since MinGW produce pure win32 executables
|
||||
# without dll's
|
||||
#
|
||||
|
||||
PORTNAME = x86-win32
|
||||
|
||||
# ------------------------------------
|
||||
# General compilation flags
|
||||
# ------------------------------------
|
||||
|
||||
#
|
||||
# CC_LIBS : additionnal machine specific $(CC) libraries
|
||||
#
|
||||
# libiberty is needed for some system extensions (like mkstemps)
|
||||
#
|
||||
|
||||
CC_LIBS = -liberty
|
||||
|
||||
#
|
||||
# MACHDEF : define machine and OS specific flags
|
||||
#
|
||||
# -DDLMALLOC : use dlmalloc instead of malloc (which does not have mallinfo)
|
||||
# -posix is a new replacement for several MinGW32 flags, including:
|
||||
# -D__USE_MINGW_ANSI_STDIO : mingw gcc flag to recognize the C99 "%zu" format
|
||||
#
|
||||
|
||||
MACHDEF = -posix -DLX_TARGET_WIN32 -DWIN_MINGW -DDLMALLOC -DLITTLE_ENDIAN
|
||||
|
||||
#
|
||||
# MATH_LIBS : machine specific math librairies
|
||||
#
|
||||
|
||||
MATH_LIBS = -lm
|
||||
|
||||
# ------------------------------------
|
||||
# General system commands
|
||||
# ------------------------------------
|
||||
|
||||
#
|
||||
# DIFF : diff command / should ignore cr on windows
|
||||
#
|
||||
|
||||
DIFF = diff --strip-trailing-cr
|
@ -1,25 +0,0 @@
|
||||
#
|
||||
# $Id: help.targ 1825 2013-02-26 09:39:47Z viari $
|
||||
#
|
||||
# debug.targ
|
||||
#
|
||||
# target to print debug information (dev. only)
|
||||
#
|
||||
# it defines the following targets:
|
||||
#
|
||||
# debug :
|
||||
# print debug
|
||||
#
|
||||
# it requires auto.conf
|
||||
#
|
||||
|
||||
.PHONY: debug
|
||||
|
||||
debug::
|
||||
@echo "+ PORTNAME: $(PORTNAME)"
|
||||
@echo "+ CFGPRT: $(CFGPRT)"
|
||||
@echo "+ CFGDIR: $(CFGDIR)"
|
||||
@echo "+ PRTDIR: $(PRTDIR)"
|
||||
@echo "+ MACHDEF: $(MACHDEF)"
|
||||
|
||||
|
@ -1,24 +0,0 @@
|
||||
#
|
||||
# $Id: $
|
||||
#
|
||||
# epty.targ
|
||||
#
|
||||
# default empty targets (defined as double colon rules)
|
||||
#
|
||||
#
|
||||
|
||||
#
|
||||
# Rules
|
||||
#
|
||||
|
||||
.PHONY: all test clean portclean help
|
||||
|
||||
all::
|
||||
|
||||
test::
|
||||
|
||||
clean::
|
||||
|
||||
portclean:: clean
|
||||
|
||||
test::
|
@ -1,23 +0,0 @@
|
||||
#
|
||||
# $Id: help.targ 1825 2013-02-26 09:39:47Z viari $
|
||||
#
|
||||
# help.targ
|
||||
#
|
||||
# default target to print help
|
||||
#
|
||||
# it defines the following targets:
|
||||
#
|
||||
# help :
|
||||
# print help
|
||||
#
|
||||
|
||||
.PHONY: help
|
||||
|
||||
help::
|
||||
@ echo "basic usage: make [<action>+]"
|
||||
@ echo "valid <action> :"
|
||||
@ echo " all : compile everything for current port [default target]"
|
||||
@ echo " clean : local cleanup"
|
||||
@ echo " portclean : cleanup distribution for current port"
|
||||
@ echo " test : run tests on current port"
|
||||
@ echo " help : print this help"
|
@ -1,51 +0,0 @@
|
||||
#
|
||||
# $Id: $
|
||||
#
|
||||
# lxbin.targ
|
||||
#
|
||||
# default make targets for standard lx binary
|
||||
#
|
||||
# you should define the 'PROGS' and 'OSRC' variables
|
||||
# and optionnaly 'LIBS' if binaries have to be linked with libraries
|
||||
#
|
||||
# note: if main source code for binary PROG is PROG.c, there is nothing to do,
|
||||
# else (e.g. if it involves several sources files) you should also add local
|
||||
# file dependencies. e.g under the form:
|
||||
#
|
||||
# mymain: $(OBJ) mymain_base.c mymain_help.c
|
||||
# $(CC) $(CFLAGS) -o $@ $^ $(LDFLAGS) $(LDLIBS)
|
||||
#
|
||||
#
|
||||
# 'auto.conf' should have been included
|
||||
#
|
||||
|
||||
OBJ = $(OSRC:.c=.o)
|
||||
|
||||
INCDIR = ../include
|
||||
|
||||
#
|
||||
# Rules
|
||||
#
|
||||
|
||||
.PHONY: all prelib install test clean portclean
|
||||
|
||||
all:: prelib $(PROGS) install
|
||||
@echo "+++++++++++ binaries $(PROGS) done"
|
||||
|
||||
prelib::
|
||||
test -d $(PRTDIR) || mkdir $(PRTDIR) # because some linker may complain
|
||||
test -d $(LIBDIR) || mkdir $(LIBDIR) # if -L$(LIBDIR) does not exist
|
||||
|
||||
install::
|
||||
test -d $(PRTDIR) || mkdir $(PRTDIR)
|
||||
test -d $(BINDIR) || mkdir $(BINDIR)
|
||||
-for f in $(PROGS) ; do \cp -f $$f $(BINDIR) ; done
|
||||
|
||||
test::
|
||||
|
||||
clean::
|
||||
-\rm -f *.o cvstatic* *% *.bak so_loc*
|
||||
-\rm -f $(PROGS)
|
||||
|
||||
portclean:: clean
|
||||
-(! test -d $(BINDIR)) || (cd $(BINDIR) && \rm -f $(PROGS))
|
@ -1,43 +0,0 @@
|
||||
#
|
||||
# $Id: $
|
||||
#
|
||||
# lxlib.targ
|
||||
#
|
||||
# default make targets for standard lx library
|
||||
#
|
||||
# you should define the 'LOCLIB' and 'OSRC' variables
|
||||
#
|
||||
# 'auto.conf' should have been included
|
||||
#
|
||||
|
||||
OBJ = $(OSRC:.c=.o)
|
||||
|
||||
INCDIR = ../include
|
||||
|
||||
#
|
||||
# Rules
|
||||
#
|
||||
|
||||
.PHONY: all lib install test clean portclean
|
||||
|
||||
all:: lib install
|
||||
@echo "+++++++++++ library $(LOCLIB) done"
|
||||
|
||||
lib:: $(OBJ)
|
||||
$(AR) $(ARFLAGS) $(LOCLIB) $(OBJ)
|
||||
$(RANLIB) $(LOCLIB)
|
||||
|
||||
install::
|
||||
test -d $(PRTDIR) || mkdir $(PRTDIR)
|
||||
test -d $(LIBDIR) || mkdir $(LIBDIR)
|
||||
\cp -f $(LOCLIB) $(LIBDIR)
|
||||
$(RANLIB) $(LIBDIR)/$(LOCLIB)
|
||||
|
||||
test::
|
||||
|
||||
clean::
|
||||
-\rm -f *.o cvstatic* *% *.bak so_loc*
|
||||
-\rm -f $(LOCLIB)
|
||||
|
||||
portclean:: clean
|
||||
-(! test -d $(LIBDIR)) || (cd $(LIBDIR) && \rm -f $(LOCLIB))
|
@ -1,48 +0,0 @@
|
||||
#
|
||||
# $Id: package.targ 1825 2013-02-26 09:39:47Z viari $
|
||||
#
|
||||
# package.targ
|
||||
#
|
||||
# default make targets for standard package with configure
|
||||
#
|
||||
# you should define the 'PKG' variable
|
||||
# (and optionaly 'PKGTAR', 'PKGDIR')
|
||||
#
|
||||
|
||||
PKGTAR ?= $(PKG).tgz
|
||||
|
||||
PKGDIR ?= build.$(PORTNAME)
|
||||
|
||||
PRTPATH = $(abspath $(PRTDIR))
|
||||
|
||||
#
|
||||
# Rules
|
||||
#
|
||||
|
||||
.PHONY: all clean test portclean pkg pkg.expand pkg.make pkg.install
|
||||
|
||||
all:: pkg
|
||||
|
||||
pkg.expand::
|
||||
test -d $(PKGDIR) || mkdir $(PKGDIR)
|
||||
test -f $(PKGDIR)/configure || $(TAR) zxf $(PKGTAR) -C $(PKGDIR) --strip-components 1
|
||||
|
||||
pkg.make:: pkg.expand
|
||||
test -f $(PKGDIR)/Makefile || (cd $(PKGDIR) && ./configure --prefix=$(PRTPATH))
|
||||
$(MAKE) -C $(PKGDIR)
|
||||
|
||||
pkg.install:: pkg.make
|
||||
$(MAKE) -C $(PKGDIR) install
|
||||
|
||||
pkg:: pkg.install
|
||||
@echo "+++++++++++ package $(PKG) done"
|
||||
|
||||
test::
|
||||
(! test -d $(PKGDIR)) || $(MAKE) -C $(PKGDIR) test
|
||||
|
||||
clean::
|
||||
(! test -d $(PKGDIR)) || $(MAKE) -C $(PKGDIR) clean
|
||||
|
||||
portclean::
|
||||
(! test -d $(PKGDIR)) || $(MAKE) -C $(PKGDIR) distclean
|
||||
(! test -d $(PKGDIR)) || \rm -r $(PKGDIR)
|
@ -1,30 +0,0 @@
|
||||
#
|
||||
# $Id: propagate.targ 1825 2013-02-26 09:39:47Z viari $
|
||||
#
|
||||
# propagate.targ
|
||||
#
|
||||
# default make targets for library containers
|
||||
#
|
||||
# you should define the 'DIRS' variable
|
||||
#
|
||||
# It will propagate 'MAKE <target>' to all
|
||||
# directories listed in DIRS
|
||||
#
|
||||
|
||||
#
|
||||
# Rules
|
||||
#
|
||||
|
||||
.PHONY: all _action $(DIRS)
|
||||
|
||||
.DEFAULT:
|
||||
$(MAKE) ACTION=$@ _action
|
||||
|
||||
all::
|
||||
$(MAKE) ACTION=all _action
|
||||
|
||||
_action: $(DIRS)
|
||||
@echo "$(ACTION) done"
|
||||
|
||||
$(DIRS):
|
||||
$(MAKE) -C $@ $(ACTION)
|
@ -13,7 +13,7 @@
|
||||
# ---------------------------------------------------------------
|
||||
#
|
||||
|
||||
include ../config/auto.conf
|
||||
include ../../../../config/auto.conf
|
||||
|
||||
PROGS = kimono kimfit
|
||||
|
||||
@ -29,8 +29,8 @@ OSRC = fasta_io.c \
|
||||
kim_genetic.c \
|
||||
kim_codonskew.c
|
||||
|
||||
include ../config/targets/lxbin.targ
|
||||
include ../config/targets/help.targ
|
||||
include ../../../../config/targets/lxbin.targ
|
||||
include ../../../../config/targets/help.targ
|
||||
|
||||
#
|
||||
# file dependencies
|
||||
|
@ -16,7 +16,7 @@
|
||||
# ---------------------------------------------------------------
|
||||
#
|
||||
|
||||
include ../config/targets/empty.targ
|
||||
include ../../../../config/targets/empty.targ
|
||||
|
||||
clean::
|
||||
-\rm -f *.tst
|
||||
|
30
src/muscle/Makefile
Executable file
30
src/muscle/Makefile
Executable file
@ -0,0 +1,30 @@
|
||||
#---------------------------------------------------------------
|
||||
# $Id: $
|
||||
# ---------------------------------------------------------------
|
||||
# @file: Makefile
|
||||
# @desc: makefile for lxpack
|
||||
#
|
||||
# @history:
|
||||
# @history:
|
||||
# @+ <Gloup> : Apr 97 : Created
|
||||
# @+ <Gloup> : Mar 02 : Updated for LXxware
|
||||
#
|
||||
# @note: should be processed with gnu compatible make
|
||||
# @note: helixware_compatible
|
||||
#
|
||||
# @end:
|
||||
# ---------------------------------------------------------------
|
||||
#
|
||||
include ../../config/auto.conf
|
||||
|
||||
DIRS = muscle3.8.31
|
||||
|
||||
include ../../config/targets/propagate.targ
|
||||
|
||||
include ../../config/targets/help.targ
|
||||
|
||||
all::
|
||||
$(MAKE) ACTION=$@ _action
|
||||
|
||||
clean::
|
||||
$(MAKE) -C lxpack portclean
|
30
src/muscle/muscle3.8.31/Makefile
Executable file
30
src/muscle/muscle3.8.31/Makefile
Executable file
@ -0,0 +1,30 @@
|
||||
#---------------------------------------------------------------
|
||||
# $Id: $
|
||||
# ---------------------------------------------------------------
|
||||
# @file: Makefile
|
||||
# @desc: makefile for lxpack
|
||||
#
|
||||
# @history:
|
||||
# @history:
|
||||
# @+ <Gloup> : Apr 97 : Created
|
||||
# @+ <Gloup> : Mar 02 : Updated for LXxware
|
||||
#
|
||||
# @note: should be processed with gnu compatible make
|
||||
# @note: helixware_compatible
|
||||
#
|
||||
# @end:
|
||||
# ---------------------------------------------------------------
|
||||
#
|
||||
include ../../../config/auto.conf
|
||||
|
||||
DIRS = src
|
||||
|
||||
include ../../../config/targets/propagate.targ
|
||||
|
||||
include ../../../config/targets/help.targ
|
||||
|
||||
all::
|
||||
$(MAKE) ACTION=$@ _action
|
||||
|
||||
clean::
|
||||
$(MAKE) -C lxpack portclean
|
11
src/muscle/muscle3.8.31/src/Makefile
Normal file
11
src/muscle/muscle3.8.31/src/Makefile
Normal file
@ -0,0 +1,11 @@
|
||||
include ../../../../config/auto.conf
|
||||
|
||||
all: muscle install
|
||||
|
||||
muscle:
|
||||
chmod +x ./mk
|
||||
(export CXX=$(CXX) && ./mk)
|
||||
|
||||
install:
|
||||
cp muscle $(BINDIR)
|
||||
|
27
src/muscle/muscle3.8.31/src/README.txt
Normal file
27
src/muscle/muscle3.8.31/src/README.txt
Normal file
@ -0,0 +1,27 @@
|
||||
MUSCLE v3.0 source code README
|
||||
------------------------------
|
||||
|
||||
http://www.drive5.com/muscle
|
||||
|
||||
This version of MUSCLE was built and tested on two platforms:
|
||||
Windows XP and Red Hat Linux 8.0.
|
||||
|
||||
On Windows, I used Microsoft Visual C++ .Net, which I find
|
||||
to be the best C++ compile / edit / test environment I've
|
||||
tried on any platform. The Microsoft project file is
|
||||
muscle.vcproj.
|
||||
|
||||
The Linux make file is Makefile. This is a very simple-minded
|
||||
make file (because I am a Linux development novice), so should
|
||||
be easy to understand. By default, it uses shared libraries,
|
||||
but I found this to give problems when copying between
|
||||
different Linux versions. The fix was to use the linker
|
||||
flag -lm static (commented out), which gives a much bigger
|
||||
but more portable binary. The posted binary was linked with
|
||||
static libraries.
|
||||
|
||||
The source code was not written to be maintained by anyone
|
||||
but me, so the usual apologies and caveats apply.
|
||||
|
||||
Bob Edgar,
|
||||
January 2004
|
802
src/muscle/muscle3.8.31/src/aligngivenpath.cpp
Normal file
802
src/muscle/muscle3.8.31/src/aligngivenpath.cpp
Normal file
@ -0,0 +1,802 @@
|
||||
#include "muscle.h"
|
||||
#include "msa.h"
|
||||
#include "pwpath.h"
|
||||
#include "profile.h"
|
||||
|
||||
#define TRACE 0
|
||||
|
||||
static void LogPP(const ProfPos &PP)
|
||||
{
|
||||
Log("ResidueGroup %u\n", PP.m_uResidueGroup);
|
||||
Log("AllGaps %d\n", PP.m_bAllGaps);
|
||||
Log("Occ %.3g\n", PP.m_fOcc);
|
||||
Log("LL=%.3g LG=%.3g GL=%.3g GG=%.3g\n", PP.m_LL, PP.m_LG, PP.m_GL, PP.m_GG);
|
||||
Log("Freqs ");
|
||||
for (unsigned i = 0; i < 20; ++i)
|
||||
if (PP.m_fcCounts[i] > 0)
|
||||
Log("%c=%.3g ", LetterToChar(i), PP.m_fcCounts[i]);
|
||||
Log("\n");
|
||||
}
|
||||
|
||||
static void AssertProfPosEq(const ProfPos *PA, const ProfPos *PB, unsigned i)
|
||||
{
|
||||
const ProfPos &PPA = PA[i];
|
||||
const ProfPos &PPB = PB[i];
|
||||
#define eq(x) if (PPA.m_##x != PPB.m_##x) { LogPP(PPA); LogPP(PPB); Quit("AssertProfPosEq." #x); }
|
||||
#define be(x) if (!BTEq(PPA.m_##x, PPB.m_##x)) { LogPP(PPA); LogPP(PPB); Quit("AssertProfPosEq." #x); }
|
||||
eq(bAllGaps)
|
||||
eq(uResidueGroup)
|
||||
|
||||
be(LL)
|
||||
be(LG)
|
||||
be(GL)
|
||||
be(GG)
|
||||
be(fOcc)
|
||||
be(scoreGapOpen)
|
||||
be(scoreGapClose)
|
||||
|
||||
for (unsigned j = 0; j < 20; ++j)
|
||||
{
|
||||
#define eqj(x) if (PPA.m_##x != PPB.m_##x) Quit("AssertProfPosEq j=%u " #x, j);
|
||||
#define bej(x) if (!BTEq(PPA.m_##x, PPB.m_##x)) Quit("AssertProfPosEq j=%u " #x, j);
|
||||
bej(fcCounts[j]);
|
||||
// eqj(uSortOrder[j]) // may differ due to ties, don't check?
|
||||
bej(AAScores[j])
|
||||
#undef eqj
|
||||
#undef bej
|
||||
}
|
||||
#undef eq
|
||||
#undef be
|
||||
}
|
||||
|
||||
void AssertProfsEq(const ProfPos *PA, unsigned uLengthA, const ProfPos *PB,
|
||||
unsigned uLengthB)
|
||||
{
|
||||
if (uLengthA != uLengthB)
|
||||
Quit("AssertProfsEq: lengths differ %u %u", uLengthA, uLengthB);
|
||||
for (unsigned i = 0; i < uLengthB; ++i)
|
||||
AssertProfPosEq(PA, PB, i);
|
||||
}
|
||||
|
||||
#if DEBUG
|
||||
static void ValidateProf(const ProfPos *Prof, unsigned uLength)
|
||||
{
|
||||
for (unsigned i = 0; i < uLength; ++i)
|
||||
{
|
||||
const ProfPos &PP = Prof[i];
|
||||
|
||||
FCOUNT s1 = PP.m_LL + PP.m_LG + PP.m_GL + PP.m_GG;
|
||||
assert(BTEq(s1, 1.0));
|
||||
|
||||
if (i > 0)
|
||||
{
|
||||
const ProfPos &PPPrev = Prof[i-1];
|
||||
FCOUNT s2 = PPPrev.m_LL + PPPrev.m_GL;
|
||||
FCOUNT s3 = PP.m_LL + PP.m_LG;
|
||||
assert(BTEq(s2, s3));
|
||||
}
|
||||
if (i < uLength - 1)
|
||||
{
|
||||
const ProfPos &PPNext = Prof[i+1];
|
||||
FCOUNT s4 = PP.m_LL + PP.m_GL;
|
||||
FCOUNT s5 = PPNext.m_LL + PPNext.m_LG;
|
||||
assert(BTEq(s4, s5));
|
||||
}
|
||||
}
|
||||
}
|
||||
#else
|
||||
#define ValidateProf(Prof, Length) /* empty */
|
||||
#endif
|
||||
|
||||
static void ScoresFromFreqsPos(ProfPos *Prof, unsigned uLength, unsigned uPos)
|
||||
{
|
||||
ProfPos &PP = Prof[uPos];
|
||||
SortCounts(PP.m_fcCounts, PP.m_uSortOrder);
|
||||
PP.m_uResidueGroup = ResidueGroupFromFCounts(PP.m_fcCounts);
|
||||
|
||||
// "Occupancy"
|
||||
PP.m_fOcc = PP.m_LL + PP.m_GL;
|
||||
|
||||
// Frequency of gap-opens in this position (i)
|
||||
// Gap open = letter in i-1 and gap in i
|
||||
// = iff LG in i
|
||||
FCOUNT fcOpen = PP.m_LG;
|
||||
|
||||
// Frequency of gap-closes in this position
|
||||
// Gap close = gap in i and letter in i+1
|
||||
// = iff GL in i+1
|
||||
FCOUNT fcClose;
|
||||
if (uPos + 1 < uLength)
|
||||
fcClose = Prof[uPos + 1].m_GL;
|
||||
else
|
||||
fcClose = PP.m_GG + PP.m_LG;
|
||||
|
||||
PP.m_scoreGapOpen = (SCORE) ((1.0 - fcOpen)*g_scoreGapOpen/2.0);
|
||||
PP.m_scoreGapClose = (SCORE) ((1.0 - fcClose)*g_scoreGapOpen/2.0);
|
||||
#if DOUBLE_AFFINE
|
||||
PP.m_scoreGapOpen2 = (SCORE) ((1.0 - fcOpen)*g_scoreGapOpen2/2.0);
|
||||
PP.m_scoreGapClose2 = (SCORE) ((1.0 - fcClose)*g_scoreGapOpen2/2.0);
|
||||
#endif
|
||||
|
||||
for (unsigned i = 0; i < g_AlphaSize; ++i)
|
||||
{
|
||||
SCORE scoreSum = 0;
|
||||
for (unsigned j = 0; j < g_AlphaSize; ++j)
|
||||
scoreSum += PP.m_fcCounts[j]*(*g_ptrScoreMatrix)[i][j];
|
||||
PP.m_AAScores[i] = scoreSum;
|
||||
}
|
||||
}
|
||||
|
||||
void ProfScoresFromFreqs(ProfPos *Prof, unsigned uLength)
|
||||
{
|
||||
for (unsigned i = 0; i < uLength; ++i)
|
||||
ScoresFromFreqsPos(Prof, uLength, i);
|
||||
}
|
||||
|
||||
static void AppendDelete(const MSA &msaA, unsigned &uColIndexA,
|
||||
unsigned uSeqCountA, unsigned uSeqCountB, MSA &msaCombined,
|
||||
unsigned &uColIndexCombined)
|
||||
{
|
||||
#if TRACE
|
||||
Log("AppendDelete ColIxA=%u ColIxCmb=%u\n",
|
||||
uColIndexA, uColIndexCombined);
|
||||
#endif
|
||||
for (unsigned uSeqIndexA = 0; uSeqIndexA < uSeqCountA; ++uSeqIndexA)
|
||||
{
|
||||
char c = msaA.GetChar(uSeqIndexA, uColIndexA);
|
||||
msaCombined.SetChar(uSeqIndexA, uColIndexCombined, c);
|
||||
}
|
||||
|
||||
for (unsigned uSeqIndexB = 0; uSeqIndexB < uSeqCountB; ++uSeqIndexB)
|
||||
msaCombined.SetChar(uSeqCountA + uSeqIndexB, uColIndexCombined, '-');
|
||||
|
||||
++uColIndexCombined;
|
||||
++uColIndexA;
|
||||
}
|
||||
|
||||
static void AppendInsert(const MSA &msaB, unsigned &uColIndexB,
|
||||
unsigned uSeqCountA, unsigned uSeqCountB, MSA &msaCombined,
|
||||
unsigned &uColIndexCombined)
|
||||
{
|
||||
#if TRACE
|
||||
Log("AppendInsert ColIxB=%u ColIxCmb=%u\n",
|
||||
uColIndexB, uColIndexCombined);
|
||||
#endif
|
||||
for (unsigned uSeqIndexA = 0; uSeqIndexA < uSeqCountA; ++uSeqIndexA)
|
||||
msaCombined.SetChar(uSeqIndexA, uColIndexCombined, '-');
|
||||
|
||||
for (unsigned uSeqIndexB = 0; uSeqIndexB < uSeqCountB; ++uSeqIndexB)
|
||||
{
|
||||
char c = msaB.GetChar(uSeqIndexB, uColIndexB);
|
||||
msaCombined.SetChar(uSeqCountA + uSeqIndexB, uColIndexCombined, c);
|
||||
}
|
||||
|
||||
++uColIndexCombined;
|
||||
++uColIndexB;
|
||||
}
|
||||
|
||||
static void AppendTplInserts(const MSA &msaA, unsigned &uColIndexA, unsigned uColCountA,
|
||||
const MSA &msaB, unsigned &uColIndexB, unsigned uColCountB, unsigned uSeqCountA,
|
||||
unsigned uSeqCountB, MSA &msaCombined, unsigned &uColIndexCombined)
|
||||
{
|
||||
#if TRACE
|
||||
Log("AppendTplInserts ColIxA=%u ColIxB=%u ColIxCmb=%u\n",
|
||||
uColIndexA, uColIndexB, uColIndexCombined);
|
||||
#endif
|
||||
const unsigned uLengthA = msaA.GetColCount();
|
||||
const unsigned uLengthB = msaB.GetColCount();
|
||||
|
||||
unsigned uNewColCount = uColCountA;
|
||||
if (uColCountB > uNewColCount)
|
||||
uNewColCount = uColCountB;
|
||||
|
||||
for (unsigned n = 0; n < uColCountA; ++n)
|
||||
{
|
||||
for (unsigned uSeqIndexA = 0; uSeqIndexA < uSeqCountA; ++uSeqIndexA)
|
||||
{
|
||||
char c = msaA.GetChar(uSeqIndexA, uColIndexA + n);
|
||||
c = UnalignChar(c);
|
||||
msaCombined.SetChar(uSeqIndexA, uColIndexCombined + n, c);
|
||||
}
|
||||
}
|
||||
for (unsigned n = uColCountA; n < uNewColCount; ++n)
|
||||
{
|
||||
for (unsigned uSeqIndexA = 0; uSeqIndexA < uSeqCountA; ++uSeqIndexA)
|
||||
msaCombined.SetChar(uSeqIndexA, uColIndexCombined + n, '.');
|
||||
}
|
||||
|
||||
for (unsigned n = 0; n < uColCountB; ++n)
|
||||
{
|
||||
for (unsigned uSeqIndexB = 0; uSeqIndexB < uSeqCountB; ++uSeqIndexB)
|
||||
{
|
||||
char c = msaB.GetChar(uSeqIndexB, uColIndexB + n);
|
||||
c = UnalignChar(c);
|
||||
msaCombined.SetChar(uSeqCountA + uSeqIndexB, uColIndexCombined + n, c);
|
||||
}
|
||||
}
|
||||
for (unsigned n = uColCountB; n < uNewColCount; ++n)
|
||||
{
|
||||
for (unsigned uSeqIndexB = 0; uSeqIndexB < uSeqCountB; ++uSeqIndexB)
|
||||
msaCombined.SetChar(uSeqCountA + uSeqIndexB, uColIndexCombined + n, '.');
|
||||
}
|
||||
|
||||
uColIndexCombined += uNewColCount;
|
||||
uColIndexA += uColCountA;
|
||||
uColIndexB += uColCountB;
|
||||
}
|
||||
|
||||
static void AppendMatch(const MSA &msaA, unsigned &uColIndexA, const MSA &msaB,
|
||||
unsigned &uColIndexB, unsigned uSeqCountA, unsigned uSeqCountB,
|
||||
MSA &msaCombined, unsigned &uColIndexCombined)
|
||||
{
|
||||
#if TRACE
|
||||
Log("AppendMatch ColIxA=%u ColIxB=%u ColIxCmb=%u\n",
|
||||
uColIndexA, uColIndexB, uColIndexCombined);
|
||||
#endif
|
||||
|
||||
for (unsigned uSeqIndexA = 0; uSeqIndexA < uSeqCountA; ++uSeqIndexA)
|
||||
{
|
||||
char c = msaA.GetChar(uSeqIndexA, uColIndexA);
|
||||
msaCombined.SetChar(uSeqIndexA, uColIndexCombined, c);
|
||||
}
|
||||
|
||||
for (unsigned uSeqIndexB = 0; uSeqIndexB < uSeqCountB; ++uSeqIndexB)
|
||||
{
|
||||
char c = msaB.GetChar(uSeqIndexB, uColIndexB);
|
||||
msaCombined.SetChar(uSeqCountA + uSeqIndexB, uColIndexCombined, c);
|
||||
}
|
||||
|
||||
++uColIndexA;
|
||||
++uColIndexB;
|
||||
++uColIndexCombined;
|
||||
}
|
||||
|
||||
void AlignTwoMSAsGivenPath(const PWPath &Path, const MSA &msaA, const MSA &msaB,
|
||||
MSA &msaCombined)
|
||||
{
|
||||
msaCombined.Clear();
|
||||
|
||||
#if TRACE
|
||||
Log("FastAlignProfiles\n");
|
||||
Log("Template A:\n");
|
||||
msaA.LogMe();
|
||||
Log("Template B:\n");
|
||||
msaB.LogMe();
|
||||
#endif
|
||||
|
||||
const unsigned uColCountA = msaA.GetColCount();
|
||||
const unsigned uColCountB = msaB.GetColCount();
|
||||
|
||||
const unsigned uSeqCountA = msaA.GetSeqCount();
|
||||
const unsigned uSeqCountB = msaB.GetSeqCount();
|
||||
|
||||
msaCombined.SetSeqCount(uSeqCountA + uSeqCountB);
|
||||
|
||||
// Copy sequence names into combined MSA
|
||||
for (unsigned uSeqIndexA = 0; uSeqIndexA < uSeqCountA; ++uSeqIndexA)
|
||||
{
|
||||
msaCombined.SetSeqName(uSeqIndexA, msaA.GetSeqName(uSeqIndexA));
|
||||
msaCombined.SetSeqId(uSeqIndexA, msaA.GetSeqId(uSeqIndexA));
|
||||
}
|
||||
|
||||
for (unsigned uSeqIndexB = 0; uSeqIndexB < uSeqCountB; ++uSeqIndexB)
|
||||
{
|
||||
msaCombined.SetSeqName(uSeqCountA + uSeqIndexB, msaB.GetSeqName(uSeqIndexB));
|
||||
msaCombined.SetSeqId(uSeqCountA + uSeqIndexB, msaB.GetSeqId(uSeqIndexB));
|
||||
}
|
||||
|
||||
unsigned uColIndexA = 0;
|
||||
unsigned uColIndexB = 0;
|
||||
unsigned uColIndexCombined = 0;
|
||||
const unsigned uEdgeCount = Path.GetEdgeCount();
|
||||
for (unsigned uEdgeIndex = 0; uEdgeIndex < uEdgeCount; ++uEdgeIndex)
|
||||
{
|
||||
const PWEdge &Edge = Path.GetEdge(uEdgeIndex);
|
||||
#if TRACE
|
||||
Log("\nEdge %u %c%u.%u\n",
|
||||
uEdgeIndex,
|
||||
Edge.cType,
|
||||
Edge.uPrefixLengthA,
|
||||
Edge.uPrefixLengthB);
|
||||
#endif
|
||||
const char cType = Edge.cType;
|
||||
const unsigned uPrefixLengthA = Edge.uPrefixLengthA;
|
||||
unsigned uColCountA = 0;
|
||||
if (uPrefixLengthA > 0)
|
||||
{
|
||||
const unsigned uNodeIndexA = uPrefixLengthA - 1;
|
||||
const unsigned uTplColIndexA = uNodeIndexA;
|
||||
if (uTplColIndexA > uColIndexA)
|
||||
uColCountA = uTplColIndexA - uColIndexA;
|
||||
}
|
||||
|
||||
const unsigned uPrefixLengthB = Edge.uPrefixLengthB;
|
||||
unsigned uColCountB = 0;
|
||||
if (uPrefixLengthB > 0)
|
||||
{
|
||||
const unsigned uNodeIndexB = uPrefixLengthB - 1;
|
||||
const unsigned uTplColIndexB = uNodeIndexB;
|
||||
if (uTplColIndexB > uColIndexB)
|
||||
uColCountB = uTplColIndexB - uColIndexB;
|
||||
}
|
||||
|
||||
// TODO: This code looks like a hangover from HMM estimation -- can we delete it?
|
||||
assert(uColCountA == 0);
|
||||
assert(uColCountB == 0);
|
||||
AppendTplInserts(msaA, uColIndexA, uColCountA, msaB, uColIndexB, uColCountB,
|
||||
uSeqCountA, uSeqCountB, msaCombined, uColIndexCombined);
|
||||
|
||||
switch (cType)
|
||||
{
|
||||
case 'M':
|
||||
{
|
||||
assert(uPrefixLengthA > 0);
|
||||
assert(uPrefixLengthB > 0);
|
||||
const unsigned uColA = uPrefixLengthA - 1;
|
||||
const unsigned uColB = uPrefixLengthB - 1;
|
||||
assert(uColIndexA == uColA);
|
||||
assert(uColIndexB == uColB);
|
||||
AppendMatch(msaA, uColIndexA, msaB, uColIndexB, uSeqCountA, uSeqCountB,
|
||||
msaCombined, uColIndexCombined);
|
||||
break;
|
||||
}
|
||||
case 'D':
|
||||
{
|
||||
assert(uPrefixLengthA > 0);
|
||||
const unsigned uColA = uPrefixLengthA - 1;
|
||||
assert(uColIndexA == uColA);
|
||||
AppendDelete(msaA, uColIndexA, uSeqCountA, uSeqCountB, msaCombined, uColIndexCombined);
|
||||
break;
|
||||
}
|
||||
case 'I':
|
||||
{
|
||||
assert(uPrefixLengthB > 0);
|
||||
const unsigned uColB = uPrefixLengthB - 1;
|
||||
assert(uColIndexB == uColB);
|
||||
AppendInsert(msaB, uColIndexB, uSeqCountA, uSeqCountB, msaCombined, uColIndexCombined);
|
||||
break;
|
||||
}
|
||||
default:
|
||||
assert(false);
|
||||
}
|
||||
}
|
||||
unsigned uInsertColCountA = uColCountA - uColIndexA;
|
||||
unsigned uInsertColCountB = uColCountB - uColIndexB;
|
||||
|
||||
// TODO: This code looks like a hangover from HMM estimation -- can we delete it?
|
||||
assert(uInsertColCountA == 0);
|
||||
assert(uInsertColCountB == 0);
|
||||
AppendTplInserts(msaA, uColIndexA, uInsertColCountA, msaB, uColIndexB,
|
||||
uInsertColCountB, uSeqCountA, uSeqCountB, msaCombined, uColIndexCombined);
|
||||
|
||||
assert(msaCombined.GetColCount() == uEdgeCount);
|
||||
}
|
||||
|
||||
static const ProfPos PPStart =
|
||||
{
|
||||
false, //m_bAllGaps;
|
||||
{ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }, // m_uSortOrder[21];
|
||||
{ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }, // m_fcCounts[20];
|
||||
1.0, // m_LL;
|
||||
0.0, // m_LG;
|
||||
0.0, // m_GL;
|
||||
0.0, // m_GG;
|
||||
{ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }, // m_ALScores
|
||||
0, // m_uResidueGroup;
|
||||
1.0, // m_fOcc;
|
||||
0.0, // m_fcStartOcc;
|
||||
0.0, // m_fcEndOcc;
|
||||
0.0, // m_scoreGapOpen;
|
||||
0.0, // m_scoreGapClose;
|
||||
};
|
||||
|
||||
// MM
|
||||
// Ai<41>1 Ai Out
|
||||
// X X LL LL
|
||||
// X - LG LG
|
||||
// - X GL GL
|
||||
// - - GG GG
|
||||
//
|
||||
// Bj<42>1 Bj
|
||||
// X X LL LL
|
||||
// X - LG LG
|
||||
// - X GL GL
|
||||
// - - GG GG
|
||||
static void SetGapsMM(
|
||||
const ProfPos *PA, unsigned uPrefixLengthA, WEIGHT wA,
|
||||
const ProfPos *PB, unsigned uPrefixLengthB, WEIGHT wB,
|
||||
ProfPos *POut, unsigned uColIndexOut)
|
||||
{
|
||||
const ProfPos &PPA = uPrefixLengthA > 0 ? PA[uPrefixLengthA-1] : PPStart;
|
||||
const ProfPos &PPB = uPrefixLengthB > 0 ? PB[uPrefixLengthB-1] : PPStart;
|
||||
ProfPos &PPO = POut[uColIndexOut];
|
||||
|
||||
PPO.m_LL = wA*PPA.m_LL + wB*PPB.m_LL;
|
||||
PPO.m_LG = wA*PPA.m_LG + wB*PPB.m_LG;
|
||||
PPO.m_GL = wA*PPA.m_GL + wB*PPB.m_GL;
|
||||
PPO.m_GG = wA*PPA.m_GG + wB*PPB.m_GG;
|
||||
}
|
||||
|
||||
// MD
|
||||
// Ai<41>1 Ai Out
|
||||
// X X LL LL
|
||||
// X - LG LG
|
||||
// - X GL GL
|
||||
// - - GG GG
|
||||
//
|
||||
// Bj (-)
|
||||
// X - ?L LG
|
||||
// - - ?G GG
|
||||
static void SetGapsMD(
|
||||
const ProfPos *PA, unsigned uPrefixLengthA, WEIGHT wA,
|
||||
const ProfPos *PB, unsigned uPrefixLengthB, WEIGHT wB,
|
||||
ProfPos *POut, unsigned uColIndexOut)
|
||||
{
|
||||
const ProfPos &PPA = uPrefixLengthA > 0 ? PA[uPrefixLengthA-1] : PPStart;
|
||||
const ProfPos &PPB = uPrefixLengthB > 0 ? PB[uPrefixLengthB-1] : PPStart;
|
||||
ProfPos &PPO = POut[uColIndexOut];
|
||||
|
||||
PPO.m_LL = wA*PPA.m_LL;
|
||||
PPO.m_LG = wA*PPA.m_LG + wB*(PPB.m_LL + PPB.m_GL);
|
||||
PPO.m_GL = wA*PPA.m_GL;
|
||||
PPO.m_GG = wA*PPA.m_GG + wB*(PPB.m_LG + PPB.m_GG);
|
||||
}
|
||||
|
||||
// DD
|
||||
// Ai<41>1 Ai Out
|
||||
// X X LL LL
|
||||
// X - LG LG
|
||||
// - X GL GL
|
||||
// - - GG GG
|
||||
//
|
||||
// (-) (-)
|
||||
// - - ?? GG
|
||||
static void SetGapsDD(
|
||||
const ProfPos *PA, unsigned uPrefixLengthA, WEIGHT wA,
|
||||
const ProfPos *PB, unsigned uPrefixLengthB, WEIGHT wB,
|
||||
ProfPos *POut, unsigned uColIndexOut)
|
||||
{
|
||||
const ProfPos &PPA = uPrefixLengthA > 0 ? PA[uPrefixLengthA-1] : PPStart;
|
||||
ProfPos &PPO = POut[uColIndexOut];
|
||||
|
||||
PPO.m_LL = wA*PPA.m_LL;
|
||||
PPO.m_LG = wA*PPA.m_LG;
|
||||
PPO.m_GL = wA*PPA.m_GL;
|
||||
PPO.m_GG = wA*PPA.m_GG + wB;
|
||||
}
|
||||
|
||||
// MI
|
||||
// Ai (-) Out
|
||||
// X - ?L LG
|
||||
// - - ?G GG
|
||||
|
||||
// Bj<42>1 Bj
|
||||
// X X LL LL
|
||||
// X - LG LG
|
||||
// - X GL GL
|
||||
// - - GG GG
|
||||
static void SetGapsMI(
|
||||
const ProfPos *PA, unsigned uPrefixLengthA, WEIGHT wA,
|
||||
const ProfPos *PB, unsigned uPrefixLengthB, WEIGHT wB,
|
||||
ProfPos *POut, unsigned uColIndexOut)
|
||||
{
|
||||
const ProfPos &PPA = uPrefixLengthA > 0 ? PA[uPrefixLengthA-1] : PPStart;
|
||||
const ProfPos &PPB = uPrefixLengthB > 0 ? PB[uPrefixLengthB-1] : PPStart;
|
||||
ProfPos &PPO = POut[uColIndexOut];
|
||||
|
||||
PPO.m_LL = wB*PPB.m_LL;
|
||||
PPO.m_LG = wB*PPB.m_LG + wA*(PPA.m_LL + PPA.m_GL);
|
||||
PPO.m_GL = wB*PPB.m_GL;
|
||||
PPO.m_GG = wB*PPB.m_GG + wA*(PPA.m_LG + PPA.m_GG);
|
||||
}
|
||||
|
||||
// DM
|
||||
// Ai<41>1 Ai Out
|
||||
// X X LL LL
|
||||
// X - LG LG
|
||||
// - X GL GL
|
||||
// - - GG GG
|
||||
//
|
||||
// (-) Bj
|
||||
// - X ?L GL
|
||||
// - - ?G GG
|
||||
static void SetGapsDM(
|
||||
const ProfPos *PA, unsigned uPrefixLengthA, WEIGHT wA,
|
||||
const ProfPos *PB, unsigned uPrefixLengthB, WEIGHT wB,
|
||||
ProfPos *POut, unsigned uColIndexOut)
|
||||
{
|
||||
const ProfPos &PPA = uPrefixLengthA > 0 ? PA[uPrefixLengthA-1] : PPStart;
|
||||
const ProfPos &PPB = uPrefixLengthB > 0 ? PB[uPrefixLengthB-1] : PPStart;
|
||||
ProfPos &PPO = POut[uColIndexOut];
|
||||
|
||||
PPO.m_LL = wA*PPA.m_LL;
|
||||
PPO.m_LG = wA*PPA.m_LG;
|
||||
PPO.m_GL = wA*PPA.m_GL + wB*(PPB.m_LL + PPB.m_GL);
|
||||
PPO.m_GG = wA*PPA.m_GG + wB*(PPB.m_LG + PPB.m_GG);
|
||||
}
|
||||
|
||||
// IM
|
||||
// (-) Ai Out
|
||||
// - X ?L GL
|
||||
// - - ?G GG
|
||||
|
||||
// Bj<42>1 Bj
|
||||
// X X LL LL
|
||||
// X - LG LG
|
||||
// - X GL GL
|
||||
// - - GG GG
|
||||
static void SetGapsIM(
|
||||
const ProfPos *PA, unsigned uPrefixLengthA, WEIGHT wA,
|
||||
const ProfPos *PB, unsigned uPrefixLengthB, WEIGHT wB,
|
||||
ProfPos *POut, unsigned uColIndexOut)
|
||||
{
|
||||
const ProfPos &PPA = uPrefixLengthA > 0 ? PA[uPrefixLengthA-1] : PPStart;
|
||||
const ProfPos &PPB = uPrefixLengthB > 0 ? PB[uPrefixLengthB-1] : PPStart;
|
||||
ProfPos &PPO = POut[uColIndexOut];
|
||||
|
||||
PPO.m_LL = wB*PPB.m_LL;
|
||||
PPO.m_LG = wB*PPB.m_LG;
|
||||
PPO.m_GL = wB*PPB.m_GL + wA*(PPA.m_LL + PPA.m_GL);
|
||||
PPO.m_GG = wB*PPB.m_GG + wA*(PPA.m_LG + PPA.m_GG);
|
||||
}
|
||||
|
||||
// ID
|
||||
// (-) Ai Out
|
||||
// - X ?L GL
|
||||
// - - ?G GG
|
||||
|
||||
// Bj (-)
|
||||
// X - ?L LG
|
||||
// - - ?G GG
|
||||
static void SetGapsID(
|
||||
const ProfPos *PA, unsigned uPrefixLengthA, WEIGHT wA,
|
||||
const ProfPos *PB, unsigned uPrefixLengthB, WEIGHT wB,
|
||||
ProfPos *POut, unsigned uColIndexOut)
|
||||
{
|
||||
const ProfPos &PPA = uPrefixLengthA > 0 ? PA[uPrefixLengthA-1] : PPStart;
|
||||
const ProfPos &PPB = uPrefixLengthB > 0 ? PB[uPrefixLengthB-1] : PPStart;
|
||||
ProfPos &PPO = POut[uColIndexOut];
|
||||
|
||||
PPO.m_LL = 0;
|
||||
PPO.m_LG = wB*PPB.m_GL + wB*PPB.m_LL;
|
||||
PPO.m_GL = wA*PPA.m_GL + wA*PPA.m_LL;
|
||||
PPO.m_GG = wA*(PPA.m_LG + PPA.m_GG) + wB*(PPB.m_LG + PPB.m_GG);
|
||||
}
|
||||
|
||||
// DI
|
||||
// Ai (-) Out
|
||||
// X - ?L LG
|
||||
// - - ?G GG
|
||||
|
||||
// (-) Bj
|
||||
// - X ?L GL
|
||||
// - - ?G GG
|
||||
static void SetGapsDI(
|
||||
const ProfPos *PA, unsigned uPrefixLengthA, WEIGHT wA,
|
||||
const ProfPos *PB, unsigned uPrefixLengthB, WEIGHT wB,
|
||||
ProfPos *POut, unsigned uColIndexOut)
|
||||
{
|
||||
const ProfPos &PPA = uPrefixLengthA > 0 ? PA[uPrefixLengthA-1] : PPStart;
|
||||
const ProfPos &PPB = uPrefixLengthB > 0 ? PB[uPrefixLengthB-1] : PPStart;
|
||||
ProfPos &PPO = POut[uColIndexOut];
|
||||
|
||||
PPO.m_LL = 0;
|
||||
PPO.m_LG = wA*PPA.m_GL + wA*PPA.m_LL;
|
||||
PPO.m_GL = wB*PPB.m_GL + wB*PPB.m_LL;
|
||||
PPO.m_GG = wA*(PPA.m_LG + PPA.m_GG) + wB*(PPB.m_LG + PPB.m_GG);
|
||||
}
|
||||
|
||||
// II
|
||||
// (-) (-) Out
|
||||
// - - ?? GG
|
||||
|
||||
// Bj<42>1 Bj
|
||||
// X X LL LL
|
||||
// X - LG LG
|
||||
// - X GL GL
|
||||
// - - GG GG
|
||||
static void SetGapsII(
|
||||
const ProfPos *PA, unsigned uPrefixLengthA, WEIGHT wA,
|
||||
const ProfPos *PB, unsigned uPrefixLengthB, WEIGHT wB,
|
||||
ProfPos *POut, unsigned uColIndexOut)
|
||||
{
|
||||
const ProfPos &PPB = uPrefixLengthB > 0 ? PB[uPrefixLengthB-1] : PPStart;
|
||||
ProfPos &PPO = POut[uColIndexOut];
|
||||
|
||||
PPO.m_LL = wB*PPB.m_LL;
|
||||
PPO.m_LG = wB*PPB.m_LG;
|
||||
PPO.m_GL = wB*PPB.m_GL;
|
||||
PPO.m_GG = wB*PPB.m_GG + wA;
|
||||
}
|
||||
|
||||
static void SetFreqs(
|
||||
const ProfPos *PA, unsigned uPrefixLengthA, WEIGHT wA,
|
||||
const ProfPos *PB, unsigned uPrefixLengthB, WEIGHT wB,
|
||||
ProfPos *POut, unsigned uColIndexOut)
|
||||
{
|
||||
const ProfPos &PPA = uPrefixLengthA > 0 ? PA[uPrefixLengthA-1] : PPStart;
|
||||
const ProfPos &PPB = uPrefixLengthB > 0 ? PB[uPrefixLengthB-1] : PPStart;
|
||||
ProfPos &PPO = POut[uColIndexOut];
|
||||
|
||||
if (g_bNormalizeCounts)
|
||||
{
|
||||
const FCOUNT fA = PPA.m_fOcc*wA/(wA + wB);
|
||||
const FCOUNT fB = PPB.m_fOcc*wB/(wA + wB);
|
||||
FCOUNT fTotal = 0;
|
||||
for (unsigned i = 0; i < 20; ++i)
|
||||
{
|
||||
const FCOUNT f = fA*PPA.m_fcCounts[i] + fB*PPB.m_fcCounts[i];
|
||||
PPO.m_fcCounts[i] = f;
|
||||
fTotal += f;
|
||||
}
|
||||
if (fTotal > 0)
|
||||
for (unsigned i = 0; i < 20; ++i)
|
||||
PPO.m_fcCounts[i] /= fTotal;
|
||||
}
|
||||
else
|
||||
{
|
||||
for (unsigned i = 0; i < 20; ++i)
|
||||
PPO.m_fcCounts[i] = wA*PPA.m_fcCounts[i] + wB*PPB.m_fcCounts[i];
|
||||
}
|
||||
}
|
||||
|
||||
void AlignTwoProfsGivenPath(const PWPath &Path,
|
||||
const ProfPos *PA, unsigned uPrefixLengthA, WEIGHT wA,
|
||||
const ProfPos *PB, unsigned uPrefixLengthB, WEIGHT wB,
|
||||
ProfPos **ptrPOut, unsigned *ptruLengthOut)
|
||||
{
|
||||
#if TRACE
|
||||
Log("AlignTwoProfsGivenPath wA=%.3g wB=%.3g Path=\n", wA, wB);
|
||||
Path.LogMe();
|
||||
#endif
|
||||
assert(BTEq(wA + wB, 1.0));
|
||||
|
||||
unsigned uColIndexA = 0;
|
||||
unsigned uColIndexB = 0;
|
||||
unsigned uColIndexOut = 0;
|
||||
const unsigned uEdgeCount = Path.GetEdgeCount();
|
||||
ProfPos *POut = new ProfPos[uEdgeCount];
|
||||
char cPrevType = 'M';
|
||||
for (unsigned uEdgeIndex = 0; uEdgeIndex < uEdgeCount; ++uEdgeIndex)
|
||||
{
|
||||
const PWEdge &Edge = Path.GetEdge(uEdgeIndex);
|
||||
const char cType = Edge.cType;
|
||||
|
||||
const unsigned uPrefixLengthA = Edge.uPrefixLengthA;
|
||||
const unsigned uPrefixLengthB = Edge.uPrefixLengthB;
|
||||
|
||||
#if TRACE
|
||||
Log("\nEdge %u %c%u.%u ColA=%u ColB=%u\n",
|
||||
uEdgeIndex,
|
||||
Edge.cType,
|
||||
Edge.uPrefixLengthA,
|
||||
Edge.uPrefixLengthB,
|
||||
uColIndexA,
|
||||
uColIndexB);
|
||||
#endif
|
||||
|
||||
POut[uColIndexOut].m_bAllGaps = false;
|
||||
switch (cType)
|
||||
{
|
||||
case 'M':
|
||||
{
|
||||
assert(uPrefixLengthA > 0);
|
||||
assert(uPrefixLengthB > 0);
|
||||
SetFreqs(
|
||||
PA, uPrefixLengthA, wA,
|
||||
PB, uPrefixLengthB, wB,
|
||||
POut, uColIndexOut);
|
||||
switch (cPrevType)
|
||||
{
|
||||
case 'M':
|
||||
SetGapsMM(
|
||||
PA, uPrefixLengthA, wA,
|
||||
PB, uPrefixLengthB, wB,
|
||||
POut, uColIndexOut);
|
||||
break;
|
||||
case 'D':
|
||||
SetGapsDM(
|
||||
PA, uPrefixLengthA, wA,
|
||||
PB, uPrefixLengthB, wB,
|
||||
POut, uColIndexOut);
|
||||
break;
|
||||
case 'I':
|
||||
SetGapsIM(
|
||||
PA, uPrefixLengthA, wA,
|
||||
PB, uPrefixLengthB, wB,
|
||||
POut, uColIndexOut);
|
||||
break;
|
||||
default:
|
||||
Quit("Bad cPrevType");
|
||||
}
|
||||
++uColIndexA;
|
||||
++uColIndexB;
|
||||
++uColIndexOut;
|
||||
break;
|
||||
}
|
||||
case 'D':
|
||||
{
|
||||
assert(uPrefixLengthA > 0);
|
||||
SetFreqs(
|
||||
PA, uPrefixLengthA, wA,
|
||||
PB, uPrefixLengthB, 0,
|
||||
POut, uColIndexOut);
|
||||
switch (cPrevType)
|
||||
{
|
||||
case 'M':
|
||||
SetGapsMD(
|
||||
PA, uPrefixLengthA, wA,
|
||||
PB, uPrefixLengthB, wB,
|
||||
POut, uColIndexOut);
|
||||
break;
|
||||
case 'D':
|
||||
SetGapsDD(
|
||||
PA, uPrefixLengthA, wA,
|
||||
PB, uPrefixLengthB, wB,
|
||||
POut, uColIndexOut);
|
||||
break;
|
||||
case 'I':
|
||||
SetGapsID(
|
||||
PA, uPrefixLengthA, wA,
|
||||
PB, uPrefixLengthB, wB,
|
||||
POut, uColIndexOut);
|
||||
break;
|
||||
default:
|
||||
Quit("Bad cPrevType");
|
||||
}
|
||||
++uColIndexA;
|
||||
++uColIndexOut;
|
||||
break;
|
||||
}
|
||||
case 'I':
|
||||
{
|
||||
assert(uPrefixLengthB > 0);
|
||||
SetFreqs(
|
||||
PA, uPrefixLengthA, 0,
|
||||
PB, uPrefixLengthB, wB,
|
||||
POut, uColIndexOut);
|
||||
switch (cPrevType)
|
||||
{
|
||||
case 'M':
|
||||
SetGapsMI(
|
||||
PA, uPrefixLengthA, wA,
|
||||
PB, uPrefixLengthB, wB,
|
||||
POut, uColIndexOut);
|
||||
break;
|
||||
case 'D':
|
||||
SetGapsDI(
|
||||
PA, uPrefixLengthA, wA,
|
||||
PB, uPrefixLengthB, wB,
|
||||
POut, uColIndexOut);
|
||||
break;
|
||||
case 'I':
|
||||
SetGapsII(
|
||||
PA, uPrefixLengthA, wA,
|
||||
PB, uPrefixLengthB, wB,
|
||||
POut, uColIndexOut);
|
||||
break;
|
||||
default:
|
||||
Quit("Bad cPrevType");
|
||||
}
|
||||
++uColIndexB;
|
||||
++uColIndexOut;
|
||||
break;
|
||||
}
|
||||
default:
|
||||
assert(false);
|
||||
}
|
||||
cPrevType = cType;
|
||||
}
|
||||
assert(uColIndexOut == uEdgeCount);
|
||||
|
||||
ProfScoresFromFreqs(POut, uEdgeCount);
|
||||
ValidateProf(POut, uEdgeCount);
|
||||
|
||||
*ptrPOut = POut;
|
||||
*ptruLengthOut = uEdgeCount;
|
||||
|
||||
#if TRACE
|
||||
Log("AlignTwoProfsGivenPath:\n");
|
||||
ListProfile(POut, uEdgeCount, 0);
|
||||
#endif
|
||||
}
|
237
src/muscle/muscle3.8.31/src/aligngivenpathsw.cpp
Normal file
237
src/muscle/muscle3.8.31/src/aligngivenpathsw.cpp
Normal file
@ -0,0 +1,237 @@
|
||||
#include "muscle.h"
|
||||
#include "msa.h"
|
||||
#include "pwpath.h"
|
||||
#include "profile.h"
|
||||
|
||||
#define TRACE 0
|
||||
|
||||
static void AppendDelete(const MSA &msaA, unsigned &uColIndexA,
|
||||
unsigned uSeqCountA, unsigned uSeqCountB, MSA &msaCombined,
|
||||
unsigned &uColIndexCombined)
|
||||
{
|
||||
#if TRACE
|
||||
Log("AppendDelete ColIxA=%u ColIxCmb=%u\n",
|
||||
uColIndexA, uColIndexCombined);
|
||||
#endif
|
||||
for (unsigned uSeqIndexA = 0; uSeqIndexA < uSeqCountA; ++uSeqIndexA)
|
||||
{
|
||||
char c = msaA.GetChar(uSeqIndexA, uColIndexA);
|
||||
msaCombined.SetChar(uSeqIndexA, uColIndexCombined, c);
|
||||
}
|
||||
|
||||
for (unsigned uSeqIndexB = 0; uSeqIndexB < uSeqCountB; ++uSeqIndexB)
|
||||
msaCombined.SetChar(uSeqCountA + uSeqIndexB, uColIndexCombined, '-');
|
||||
|
||||
++uColIndexCombined;
|
||||
++uColIndexA;
|
||||
}
|
||||
|
||||
static void AppendInsert(const MSA &msaB, unsigned &uColIndexB,
|
||||
unsigned uSeqCountA, unsigned uSeqCountB, MSA &msaCombined,
|
||||
unsigned &uColIndexCombined)
|
||||
{
|
||||
#if TRACE
|
||||
Log("AppendInsert ColIxB=%u ColIxCmb=%u\n",
|
||||
uColIndexB, uColIndexCombined);
|
||||
#endif
|
||||
for (unsigned uSeqIndexA = 0; uSeqIndexA < uSeqCountA; ++uSeqIndexA)
|
||||
msaCombined.SetChar(uSeqIndexA, uColIndexCombined, '-');
|
||||
|
||||
for (unsigned uSeqIndexB = 0; uSeqIndexB < uSeqCountB; ++uSeqIndexB)
|
||||
{
|
||||
char c = msaB.GetChar(uSeqIndexB, uColIndexB);
|
||||
msaCombined.SetChar(uSeqCountA + uSeqIndexB, uColIndexCombined, c);
|
||||
}
|
||||
|
||||
++uColIndexCombined;
|
||||
++uColIndexB;
|
||||
}
|
||||
|
||||
static void AppendUnalignedTerminals(const MSA &msaA, unsigned &uColIndexA, unsigned uColCountA,
|
||||
const MSA &msaB, unsigned &uColIndexB, unsigned uColCountB, unsigned uSeqCountA,
|
||||
unsigned uSeqCountB, MSA &msaCombined, unsigned &uColIndexCombined)
|
||||
{
|
||||
#if TRACE
|
||||
Log("AppendUnalignedTerminals ColIxA=%u ColIxB=%u ColIxCmb=%u\n",
|
||||
uColIndexA, uColIndexB, uColIndexCombined);
|
||||
#endif
|
||||
const unsigned uLengthA = msaA.GetColCount();
|
||||
const unsigned uLengthB = msaB.GetColCount();
|
||||
|
||||
unsigned uNewColCount = uColCountA;
|
||||
if (uColCountB > uNewColCount)
|
||||
uNewColCount = uColCountB;
|
||||
|
||||
for (unsigned n = 0; n < uColCountA; ++n)
|
||||
{
|
||||
for (unsigned uSeqIndexA = 0; uSeqIndexA < uSeqCountA; ++uSeqIndexA)
|
||||
{
|
||||
char c = msaA.GetChar(uSeqIndexA, uColIndexA + n);
|
||||
c = UnalignChar(c);
|
||||
msaCombined.SetChar(uSeqIndexA, uColIndexCombined + n, c);
|
||||
}
|
||||
}
|
||||
for (unsigned n = uColCountA; n < uNewColCount; ++n)
|
||||
{
|
||||
for (unsigned uSeqIndexA = 0; uSeqIndexA < uSeqCountA; ++uSeqIndexA)
|
||||
msaCombined.SetChar(uSeqIndexA, uColIndexCombined + n, '.');
|
||||
}
|
||||
|
||||
for (unsigned n = 0; n < uColCountB; ++n)
|
||||
{
|
||||
for (unsigned uSeqIndexB = 0; uSeqIndexB < uSeqCountB; ++uSeqIndexB)
|
||||
{
|
||||
char c = msaB.GetChar(uSeqIndexB, uColIndexB + n);
|
||||
c = UnalignChar(c);
|
||||
msaCombined.SetChar(uSeqCountA + uSeqIndexB, uColIndexCombined + n, c);
|
||||
}
|
||||
}
|
||||
for (unsigned n = uColCountB; n < uNewColCount; ++n)
|
||||
{
|
||||
for (unsigned uSeqIndexB = 0; uSeqIndexB < uSeqCountB; ++uSeqIndexB)
|
||||
msaCombined.SetChar(uSeqCountA + uSeqIndexB, uColIndexCombined + n, '.');
|
||||
}
|
||||
|
||||
uColIndexCombined += uNewColCount;
|
||||
uColIndexA += uColCountA;
|
||||
uColIndexB += uColCountB;
|
||||
}
|
||||
|
||||
static void AppendMatch(const MSA &msaA, unsigned &uColIndexA, const MSA &msaB,
|
||||
unsigned &uColIndexB, unsigned uSeqCountA, unsigned uSeqCountB,
|
||||
MSA &msaCombined, unsigned &uColIndexCombined)
|
||||
{
|
||||
#if TRACE
|
||||
Log("AppendMatch ColIxA=%u ColIxB=%u ColIxCmb=%u\n",
|
||||
uColIndexA, uColIndexB, uColIndexCombined);
|
||||
#endif
|
||||
|
||||
for (unsigned uSeqIndexA = 0; uSeqIndexA < uSeqCountA; ++uSeqIndexA)
|
||||
{
|
||||
char c = msaA.GetChar(uSeqIndexA, uColIndexA);
|
||||
msaCombined.SetChar(uSeqIndexA, uColIndexCombined, c);
|
||||
}
|
||||
|
||||
for (unsigned uSeqIndexB = 0; uSeqIndexB < uSeqCountB; ++uSeqIndexB)
|
||||
{
|
||||
char c = msaB.GetChar(uSeqIndexB, uColIndexB);
|
||||
msaCombined.SetChar(uSeqCountA + uSeqIndexB, uColIndexCombined, c);
|
||||
}
|
||||
|
||||
++uColIndexA;
|
||||
++uColIndexB;
|
||||
++uColIndexCombined;
|
||||
}
|
||||
|
||||
void AlignTwoMSAsGivenPathSW(const PWPath &Path, const MSA &msaA, const MSA &msaB,
|
||||
MSA &msaCombined)
|
||||
{
|
||||
msaCombined.Clear();
|
||||
|
||||
#if TRACE
|
||||
Log("AlignTwoMSAsGivenPathSW\n");
|
||||
Log("Template A:\n");
|
||||
msaA.LogMe();
|
||||
Log("Template B:\n");
|
||||
msaB.LogMe();
|
||||
#endif
|
||||
|
||||
const unsigned uColCountA = msaA.GetColCount();
|
||||
const unsigned uColCountB = msaB.GetColCount();
|
||||
|
||||
const unsigned uSeqCountA = msaA.GetSeqCount();
|
||||
const unsigned uSeqCountB = msaB.GetSeqCount();
|
||||
|
||||
msaCombined.SetSeqCount(uSeqCountA + uSeqCountB);
|
||||
|
||||
// Copy sequence names into combined MSA
|
||||
for (unsigned uSeqIndexA = 0; uSeqIndexA < uSeqCountA; ++uSeqIndexA)
|
||||
{
|
||||
msaCombined.SetSeqName(uSeqIndexA, msaA.GetSeqName(uSeqIndexA));
|
||||
msaCombined.SetSeqId(uSeqIndexA, msaA.GetSeqId(uSeqIndexA));
|
||||
}
|
||||
|
||||
for (unsigned uSeqIndexB = 0; uSeqIndexB < uSeqCountB; ++uSeqIndexB)
|
||||
{
|
||||
msaCombined.SetSeqName(uSeqCountA + uSeqIndexB, msaB.GetSeqName(uSeqIndexB));
|
||||
msaCombined.SetSeqId(uSeqCountA + uSeqIndexB, msaB.GetSeqId(uSeqIndexB));
|
||||
}
|
||||
|
||||
unsigned uColIndexA = 0;
|
||||
unsigned uColIndexB = 0;
|
||||
unsigned uColIndexCombined = 0;
|
||||
const unsigned uEdgeCount = Path.GetEdgeCount();
|
||||
for (unsigned uEdgeIndex = 0; uEdgeIndex < uEdgeCount; ++uEdgeIndex)
|
||||
{
|
||||
const PWEdge &Edge = Path.GetEdge(uEdgeIndex);
|
||||
#if TRACE
|
||||
Log("\nEdge %u %c%u.%u\n",
|
||||
uEdgeIndex,
|
||||
Edge.cType,
|
||||
Edge.uPrefixLengthA,
|
||||
Edge.uPrefixLengthB);
|
||||
#endif
|
||||
const char cType = Edge.cType;
|
||||
const unsigned uPrefixLengthA = Edge.uPrefixLengthA;
|
||||
unsigned uColCountA = 0;
|
||||
if (uPrefixLengthA > 0)
|
||||
{
|
||||
const unsigned uNodeIndexA = uPrefixLengthA - 1;
|
||||
const unsigned uTplColIndexA = uNodeIndexA;
|
||||
if (uTplColIndexA > uColIndexA)
|
||||
uColCountA = uTplColIndexA - uColIndexA;
|
||||
}
|
||||
|
||||
const unsigned uPrefixLengthB = Edge.uPrefixLengthB;
|
||||
unsigned uColCountB = 0;
|
||||
if (uPrefixLengthB > 0)
|
||||
{
|
||||
const unsigned uNodeIndexB = uPrefixLengthB - 1;
|
||||
const unsigned uTplColIndexB = uNodeIndexB;
|
||||
if (uTplColIndexB > uColIndexB)
|
||||
uColCountB = uTplColIndexB - uColIndexB;
|
||||
}
|
||||
|
||||
AppendUnalignedTerminals(msaA, uColIndexA, uColCountA, msaB, uColIndexB, uColCountB,
|
||||
uSeqCountA, uSeqCountB, msaCombined, uColIndexCombined);
|
||||
|
||||
switch (cType)
|
||||
{
|
||||
case 'M':
|
||||
{
|
||||
assert(uPrefixLengthA > 0);
|
||||
assert(uPrefixLengthB > 0);
|
||||
const unsigned uColA = uPrefixLengthA - 1;
|
||||
const unsigned uColB = uPrefixLengthB - 1;
|
||||
assert(uColIndexA == uColA);
|
||||
assert(uColIndexB == uColB);
|
||||
AppendMatch(msaA, uColIndexA, msaB, uColIndexB, uSeqCountA, uSeqCountB,
|
||||
msaCombined, uColIndexCombined);
|
||||
break;
|
||||
}
|
||||
case 'D':
|
||||
{
|
||||
assert(uPrefixLengthA > 0);
|
||||
const unsigned uColA = uPrefixLengthA - 1;
|
||||
assert(uColIndexA == uColA);
|
||||
AppendDelete(msaA, uColIndexA, uSeqCountA, uSeqCountB, msaCombined, uColIndexCombined);
|
||||
break;
|
||||
}
|
||||
case 'I':
|
||||
{
|
||||
assert(uPrefixLengthB > 0);
|
||||
const unsigned uColB = uPrefixLengthB - 1;
|
||||
assert(uColIndexB == uColB);
|
||||
AppendInsert(msaB, uColIndexB, uSeqCountA, uSeqCountB, msaCombined, uColIndexCombined);
|
||||
break;
|
||||
}
|
||||
default:
|
||||
assert(false);
|
||||
}
|
||||
}
|
||||
unsigned uInsertColCountA = uColCountA - uColIndexA;
|
||||
unsigned uInsertColCountB = uColCountB - uColIndexB;
|
||||
|
||||
AppendUnalignedTerminals(msaA, uColIndexA, uInsertColCountA, msaB, uColIndexB,
|
||||
uInsertColCountB, uSeqCountA, uSeqCountB, msaCombined, uColIndexCombined);
|
||||
}
|
41
src/muscle/muscle3.8.31/src/aligntwomsas.cpp
Normal file
41
src/muscle/muscle3.8.31/src/aligntwomsas.cpp
Normal file
@ -0,0 +1,41 @@
|
||||
#include "muscle.h"
|
||||
#include "msa.h"
|
||||
#include "profile.h"
|
||||
#include "pwpath.h"
|
||||
#include "textfile.h"
|
||||
#include "timing.h"
|
||||
|
||||
SCORE AlignTwoMSAs(const MSA &msa1, const MSA &msa2, MSA &msaOut, PWPath &Path,
|
||||
bool bLockLeft, bool bLockRight)
|
||||
{
|
||||
const unsigned uLengthA = msa1.GetColCount();
|
||||
const unsigned uLengthB = msa2.GetColCount();
|
||||
|
||||
ProfPos *PA = ProfileFromMSA(msa1);
|
||||
ProfPos *PB = ProfileFromMSA(msa2);
|
||||
|
||||
if (bLockLeft)
|
||||
{
|
||||
PA[0].m_scoreGapOpen = MINUS_INFINITY;
|
||||
PB[0].m_scoreGapOpen = MINUS_INFINITY;
|
||||
}
|
||||
|
||||
if (bLockRight)
|
||||
{
|
||||
PA[uLengthA-1].m_scoreGapClose = MINUS_INFINITY;
|
||||
PB[uLengthB-1].m_scoreGapClose = MINUS_INFINITY;
|
||||
}
|
||||
|
||||
float r = (float) uLengthA/ (float) (uLengthB + 1); // +1 to prevent div 0
|
||||
if (r < 1)
|
||||
r = 1/r;
|
||||
|
||||
SCORE Score = GlobalAlign(PA, uLengthA, PB, uLengthB, Path);
|
||||
|
||||
AlignTwoMSAsGivenPath(Path, msa1, msa2, msaOut);
|
||||
|
||||
delete[] PA;
|
||||
delete[] PB;
|
||||
|
||||
return Score;
|
||||
}
|
31
src/muscle/muscle3.8.31/src/aligntwoprofs.cpp
Normal file
31
src/muscle/muscle3.8.31/src/aligntwoprofs.cpp
Normal file
@ -0,0 +1,31 @@
|
||||
#include "muscle.h"
|
||||
#include "msa.h"
|
||||
#include "profile.h"
|
||||
#include "pwpath.h"
|
||||
|
||||
SCORE GlobalAlign4(ProfPos *PA, unsigned uLengthA, ProfPos *PB,
|
||||
unsigned uLengthB, PWPath &Path);
|
||||
|
||||
SCORE AlignTwoProfs(
|
||||
const ProfPos *PA, unsigned uLengthA, WEIGHT wA,
|
||||
const ProfPos *PB, unsigned uLengthB, WEIGHT wB,
|
||||
PWPath &Path, ProfPos **ptrPout, unsigned *ptruLengthOut)
|
||||
{
|
||||
assert(uLengthA < 100000);
|
||||
assert(uLengthB < 100000);
|
||||
|
||||
float r = (float) uLengthA/ (float) (uLengthB + 1); // +1 to prevent div 0
|
||||
if (r < 1)
|
||||
r = 1/r;
|
||||
|
||||
SCORE Score = GlobalAlign(PA, uLengthA, PB, uLengthB, Path);
|
||||
|
||||
AlignTwoProfsGivenPath(Path, PA, uLengthB, wA/(wA + wB), PB, uLengthB, wB/(wA + wB),
|
||||
ptrPout, ptruLengthOut);
|
||||
|
||||
#if HYDRO
|
||||
if (ALPHA_Amino == g_Alpha)
|
||||
Hydro(*ptrPout, *ptruLengthOut);
|
||||
#endif
|
||||
return Score;
|
||||
}
|
170
src/muscle/muscle3.8.31/src/aln.cpp
Normal file
170
src/muscle/muscle3.8.31/src/aln.cpp
Normal file
@ -0,0 +1,170 @@
|
||||
#include "muscle.h"
|
||||
#include <stdio.h>
|
||||
#include <ctype.h>
|
||||
#include "msa.h"
|
||||
#include "textfile.h"
|
||||
|
||||
const unsigned uCharsPerLine = 60;
|
||||
const int MIN_NAME = 10;
|
||||
const int MAX_NAME = 32;
|
||||
|
||||
static char GetAlnConsensusChar(const MSA &a, unsigned uColIndex);
|
||||
|
||||
void MSA::ToAlnFile(TextFile &File) const
|
||||
{
|
||||
if (g_bClwStrict)
|
||||
File.PutString("CLUSTAL W (1.81) multiple sequence alignment\n");
|
||||
else
|
||||
{
|
||||
File.PutString("MUSCLE ("
|
||||
SHORT_VERSION ")"
|
||||
" multiple sequence alignment\n");
|
||||
File.PutString("\n");
|
||||
}
|
||||
|
||||
int iLongestNameLength = 0;
|
||||
for (unsigned uSeqIndex = 0; uSeqIndex < GetSeqCount(); ++uSeqIndex)
|
||||
{
|
||||
const char *ptrName = GetSeqName(uSeqIndex);
|
||||
const char *ptrBlank = strchr(ptrName, ' ');
|
||||
int iLength;
|
||||
if (0 != ptrBlank)
|
||||
iLength = (int) (ptrBlank - ptrName);
|
||||
else
|
||||
iLength = (int) strlen(ptrName);
|
||||
if (iLength > iLongestNameLength)
|
||||
iLongestNameLength = iLength;
|
||||
}
|
||||
if (iLongestNameLength > MAX_NAME)
|
||||
iLongestNameLength = MAX_NAME;
|
||||
if (iLongestNameLength < MIN_NAME)
|
||||
iLongestNameLength = MIN_NAME;
|
||||
|
||||
unsigned uLineCount = (GetColCount() - 1)/uCharsPerLine + 1;
|
||||
for (unsigned uLineIndex = 0; uLineIndex < uLineCount; ++uLineIndex)
|
||||
{
|
||||
File.PutString("\n");
|
||||
unsigned uStartColIndex = uLineIndex*uCharsPerLine;
|
||||
unsigned uEndColIndex = uStartColIndex + uCharsPerLine - 1;
|
||||
if (uEndColIndex >= GetColCount())
|
||||
uEndColIndex = GetColCount() - 1;
|
||||
char Name[MAX_NAME+1];
|
||||
for (unsigned uSeqIndex = 0; uSeqIndex < GetSeqCount(); ++uSeqIndex)
|
||||
{
|
||||
const char *ptrName = GetSeqName(uSeqIndex);
|
||||
const char *ptrBlank = strchr(ptrName, ' ');
|
||||
int iLength;
|
||||
if (0 != ptrBlank)
|
||||
iLength = (int) (ptrBlank - ptrName);
|
||||
else
|
||||
iLength = (int) strlen(ptrName);
|
||||
if (iLength > MAX_NAME)
|
||||
iLength = MAX_NAME;
|
||||
memset(Name, ' ', MAX_NAME);
|
||||
memcpy(Name, ptrName, iLength);
|
||||
Name[iLongestNameLength] = 0;
|
||||
|
||||
File.PutFormat("%s ", Name);
|
||||
for (unsigned uColIndex = uStartColIndex; uColIndex <= uEndColIndex;
|
||||
++uColIndex)
|
||||
{
|
||||
const char c = GetChar(uSeqIndex, uColIndex);
|
||||
File.PutFormat("%c", toupper(c));
|
||||
}
|
||||
File.PutString("\n");
|
||||
}
|
||||
|
||||
memset(Name, ' ', MAX_NAME);
|
||||
Name[iLongestNameLength] = 0;
|
||||
File.PutFormat("%s ", Name);
|
||||
for (unsigned uColIndex = uStartColIndex; uColIndex <= uEndColIndex;
|
||||
++uColIndex)
|
||||
{
|
||||
const char c = GetAlnConsensusChar(*this, uColIndex);
|
||||
File.PutChar(c);
|
||||
}
|
||||
File.PutString("\n");
|
||||
}
|
||||
}
|
||||
|
||||
static char GetAlnConsensusChar(const MSA &a, unsigned uColIndex)
|
||||
{
|
||||
const unsigned uSeqCount = a.GetSeqCount();
|
||||
unsigned BitMap = 0;
|
||||
unsigned Count = 0;
|
||||
for (unsigned uSeqIndex = 0; uSeqIndex < uSeqCount; ++uSeqIndex)
|
||||
{
|
||||
unsigned uLetter = a.GetLetterEx(uSeqIndex, uColIndex);
|
||||
assert(uLetter < 32);
|
||||
unsigned Bit = (1 << uLetter);
|
||||
if (!(BitMap & Bit))
|
||||
++Count;
|
||||
BitMap |= Bit;
|
||||
}
|
||||
|
||||
// '*' indicates positions which have a single, fully conserved residue
|
||||
if (1 == Count)
|
||||
return '*';
|
||||
|
||||
if (ALPHA_Amino != g_Alpha)
|
||||
return ' ';
|
||||
|
||||
#define B(a) (1 << AX_##a)
|
||||
#define S2(a, b) S(B(a) | B(b))
|
||||
#define S3(a, b, c) S(B(a) | B(b) | B(c))
|
||||
#define S4(a, b, c, d) S(B(a) | B(b) | B(c) | B(d))
|
||||
#define S(w) if (0 == (BitMap & ~(w)) && (BitMap & (w)) != 0) return ':';
|
||||
|
||||
#define W3(a, b, c) W(B(a) | B(b) | B(c))
|
||||
#define W4(a, b, c, d) W(B(a) | B(b) | B(c) | B(d))
|
||||
#define W5(a, b, c, d, e) W(B(a) | B(b) | B(c) | B(d) | B(e))
|
||||
#define W6(a, b, c, d, e, f) W(B(a) | B(b) | B(c) | B(d) | B(e) | B(f))
|
||||
#define W(w) if (0 == (BitMap & ~(w)) && (BitMap & (w)) != 0) return '.';
|
||||
|
||||
// ':' indicates that one of the following 'strong'
|
||||
// groups is fully conserved
|
||||
// STA
|
||||
// NEQK
|
||||
// NHQK
|
||||
// NDEQ
|
||||
// QHRK
|
||||
// MILV
|
||||
// MILF
|
||||
// HY
|
||||
// FYW
|
||||
//
|
||||
S3(S, T, A)
|
||||
S4(N, E, Q, K)
|
||||
S4(N, H, Q, K)
|
||||
S4(N, D, E, Q)
|
||||
S4(M, I, L, V)
|
||||
S4(M, I, L, F)
|
||||
S2(H, Y)
|
||||
S3(F, Y, W)
|
||||
|
||||
// '.' indicates that one of the following 'weaker'
|
||||
// groups is fully conserved
|
||||
// CSA
|
||||
// ATV
|
||||
// SAG
|
||||
// STNK
|
||||
// STPA
|
||||
// SGND
|
||||
// SNDEQK
|
||||
// NDEQHK
|
||||
// NEQHRK
|
||||
// FVLIM
|
||||
// HFY
|
||||
W3(C, S, A)
|
||||
W3(A, T, V)
|
||||
W3(S, A, G)
|
||||
W4(S, T, N, K)
|
||||
W4(S, T, P, A)
|
||||
W4(S, G, N, D)
|
||||
W6(S, N, D, E, Q, K)
|
||||
W6(N, W, Q, H, R, K)
|
||||
W5(F, V, L, I, M)
|
||||
W3(H, F, Y)
|
||||
|
||||
return ' ';
|
||||
}
|
283
src/muscle/muscle3.8.31/src/alpha.cpp
Normal file
283
src/muscle/muscle3.8.31/src/alpha.cpp
Normal file
@ -0,0 +1,283 @@
|
||||
#include "muscle.h"
|
||||
#include <ctype.h>
|
||||
|
||||
/***
|
||||
From Bioperl docs:
|
||||
Extended DNA / RNA alphabet
|
||||
------------------------------------------
|
||||
Symbol Meaning Nucleic Acid
|
||||
------------------------------------------
|
||||
A A Adenine
|
||||
C C Cytosine
|
||||
G G Guanine
|
||||
T T Thymine
|
||||
U U Uracil
|
||||
M A or C
|
||||
R A or G
|
||||
W A or T
|
||||
S C or G
|
||||
Y C or T
|
||||
K G or T
|
||||
V A or C or G
|
||||
H A or C or T
|
||||
D A or G or T
|
||||
B C or G or T
|
||||
X G or A or T or C
|
||||
N G or A or T or C
|
||||
|
||||
IUPAC-IUB SYMBOLS FOR NUCLEOTIDE NOMENCLATURE:
|
||||
Cornish-Bowden (1985) Nucl. Acids Res. 13: 3021-3030.
|
||||
***/
|
||||
|
||||
unsigned g_CharToLetter[MAX_CHAR];
|
||||
unsigned g_CharToLetterEx[MAX_CHAR];
|
||||
|
||||
char g_LetterToChar[MAX_ALPHA];
|
||||
char g_LetterExToChar[MAX_ALPHA_EX];
|
||||
|
||||
char g_UnalignChar[MAX_CHAR];
|
||||
char g_AlignChar[MAX_CHAR];
|
||||
|
||||
bool g_IsWildcardChar[MAX_CHAR];
|
||||
bool g_IsResidueChar[MAX_CHAR];
|
||||
|
||||
ALPHA g_Alpha = ALPHA_Undefined;
|
||||
unsigned g_AlphaSize = 0;
|
||||
|
||||
#define Res(c, Letter) \
|
||||
{ \
|
||||
const unsigned char Upper = (unsigned char) toupper(c); \
|
||||
const unsigned char Lower = (unsigned char) tolower(c); \
|
||||
g_CharToLetter[Upper] = Letter; \
|
||||
g_CharToLetter[Lower] = Letter; \
|
||||
g_CharToLetterEx[Upper] = Letter; \
|
||||
g_CharToLetterEx[Lower] = Letter; \
|
||||
g_LetterToChar[Letter] = Upper; \
|
||||
g_LetterExToChar[Letter] = Upper; \
|
||||
g_IsResidueChar[Upper] = true; \
|
||||
g_IsResidueChar[Lower] = true; \
|
||||
g_AlignChar[Upper] = Upper; \
|
||||
g_AlignChar[Lower] = Upper; \
|
||||
g_UnalignChar[Upper] = Lower; \
|
||||
g_UnalignChar[Lower] = Lower; \
|
||||
}
|
||||
|
||||
#define Wild(c, Letter) \
|
||||
{ \
|
||||
const unsigned char Upper = (unsigned char) toupper(c); \
|
||||
const unsigned char Lower = (unsigned char) tolower(c); \
|
||||
g_CharToLetterEx[Upper] = Letter; \
|
||||
g_CharToLetterEx[Lower] = Letter; \
|
||||
g_LetterExToChar[Letter] = Upper; \
|
||||
g_IsResidueChar[Upper] = true; \
|
||||
g_IsResidueChar[Lower] = true; \
|
||||
g_AlignChar[Upper] = Upper; \
|
||||
g_AlignChar[Lower] = Upper; \
|
||||
g_UnalignChar[Upper] = Lower; \
|
||||
g_UnalignChar[Lower] = Lower; \
|
||||
g_IsWildcardChar[Lower] = true; \
|
||||
g_IsWildcardChar[Upper] = true; \
|
||||
}
|
||||
|
||||
static unsigned GetAlphaSize(ALPHA Alpha)
|
||||
{
|
||||
switch (Alpha)
|
||||
{
|
||||
case ALPHA_Amino:
|
||||
return 20;
|
||||
|
||||
case ALPHA_RNA:
|
||||
case ALPHA_DNA:
|
||||
return 4;
|
||||
}
|
||||
Quit("Invalid Alpha=%d", Alpha);
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void InitArrays()
|
||||
{
|
||||
memset(g_CharToLetter, 0xff, sizeof(g_CharToLetter));
|
||||
memset(g_CharToLetterEx, 0xff, sizeof(g_CharToLetterEx));
|
||||
|
||||
memset(g_LetterToChar, '?', sizeof(g_LetterToChar));
|
||||
memset(g_LetterExToChar, '?', sizeof(g_LetterExToChar));
|
||||
|
||||
memset(g_AlignChar, '?', sizeof(g_UnalignChar));
|
||||
memset(g_UnalignChar, '?', sizeof(g_UnalignChar));
|
||||
|
||||
memset(g_IsWildcardChar, 0, sizeof(g_IsWildcardChar));
|
||||
}
|
||||
|
||||
static void SetGapChar(char c)
|
||||
{
|
||||
unsigned char u = (unsigned char) c;
|
||||
|
||||
g_CharToLetterEx[u] = AX_GAP;
|
||||
g_LetterExToChar[AX_GAP] = u;
|
||||
g_AlignChar[u] = u;
|
||||
g_UnalignChar[u] = u;
|
||||
}
|
||||
|
||||
static void SetAlphaDNA()
|
||||
{
|
||||
Res('A', NX_A)
|
||||
Res('C', NX_C)
|
||||
Res('G', NX_G)
|
||||
Res('T', NX_T)
|
||||
Wild('M', NX_M)
|
||||
Wild('R', NX_R)
|
||||
Wild('W', NX_W)
|
||||
Wild('S', NX_S)
|
||||
Wild('Y', NX_Y)
|
||||
Wild('K', NX_K)
|
||||
Wild('V', NX_V)
|
||||
Wild('H', NX_H)
|
||||
Wild('D', NX_D)
|
||||
Wild('B', NX_B)
|
||||
Wild('X', NX_X)
|
||||
Wild('N', NX_N)
|
||||
}
|
||||
|
||||
static void SetAlphaRNA()
|
||||
{
|
||||
Res('A', NX_A)
|
||||
Res('C', NX_C)
|
||||
Res('G', NX_G)
|
||||
Res('U', NX_U)
|
||||
Res('T', NX_T)
|
||||
Wild('M', NX_M)
|
||||
Wild('R', NX_R)
|
||||
Wild('W', NX_W)
|
||||
Wild('S', NX_S)
|
||||
Wild('Y', NX_Y)
|
||||
Wild('K', NX_K)
|
||||
Wild('V', NX_V)
|
||||
Wild('H', NX_H)
|
||||
Wild('D', NX_D)
|
||||
Wild('B', NX_B)
|
||||
Wild('X', NX_X)
|
||||
Wild('N', NX_N)
|
||||
}
|
||||
|
||||
static void SetAlphaAmino()
|
||||
{
|
||||
Res('A', AX_A)
|
||||
Res('C', AX_C)
|
||||
Res('D', AX_D)
|
||||
Res('E', AX_E)
|
||||
Res('F', AX_F)
|
||||
Res('G', AX_G)
|
||||
Res('H', AX_H)
|
||||
Res('I', AX_I)
|
||||
Res('K', AX_K)
|
||||
Res('L', AX_L)
|
||||
Res('M', AX_M)
|
||||
Res('N', AX_N)
|
||||
Res('P', AX_P)
|
||||
Res('Q', AX_Q)
|
||||
Res('R', AX_R)
|
||||
Res('S', AX_S)
|
||||
Res('T', AX_T)
|
||||
Res('V', AX_V)
|
||||
Res('W', AX_W)
|
||||
Res('Y', AX_Y)
|
||||
|
||||
Wild('B', AX_B)
|
||||
Wild('X', AX_X)
|
||||
Wild('Z', AX_Z)
|
||||
}
|
||||
|
||||
void SetAlpha(ALPHA Alpha)
|
||||
{
|
||||
InitArrays();
|
||||
|
||||
SetGapChar('.');
|
||||
SetGapChar('-');
|
||||
|
||||
switch (Alpha)
|
||||
{
|
||||
case ALPHA_Amino:
|
||||
SetAlphaAmino();
|
||||
break;
|
||||
|
||||
case ALPHA_DNA:
|
||||
SetAlphaDNA();
|
||||
|
||||
case ALPHA_RNA:
|
||||
SetAlphaRNA();
|
||||
break;
|
||||
|
||||
default:
|
||||
Quit("Invalid Alpha=%d", Alpha);
|
||||
}
|
||||
|
||||
g_AlphaSize = GetAlphaSize(Alpha);
|
||||
g_Alpha = Alpha;
|
||||
|
||||
if (g_bVerbose)
|
||||
Log("Alphabet %s\n", ALPHAToStr(g_Alpha));
|
||||
}
|
||||
|
||||
char GetWildcardChar()
|
||||
{
|
||||
switch (g_Alpha)
|
||||
{
|
||||
case ALPHA_Amino:
|
||||
return 'X';
|
||||
|
||||
case ALPHA_DNA:
|
||||
case ALPHA_RNA:
|
||||
return 'N';
|
||||
|
||||
default:
|
||||
Quit("Invalid Alpha=%d", g_Alpha);
|
||||
}
|
||||
return '?';
|
||||
}
|
||||
|
||||
bool IsNucleo(char c)
|
||||
{
|
||||
return strchr("ACGTURYNacgturyn", c) != 0;
|
||||
}
|
||||
|
||||
bool IsDNA(char c)
|
||||
{
|
||||
return strchr("AGCTNagctn", c) != 0;
|
||||
}
|
||||
|
||||
bool IsRNA(char c)
|
||||
{
|
||||
return strchr("AGCUNagcun", c) != 0;
|
||||
}
|
||||
|
||||
static char InvalidLetters[256];
|
||||
static int InvalidLetterCount = 0;
|
||||
|
||||
void ClearInvalidLetterWarning()
|
||||
{
|
||||
memset(InvalidLetters, 0, 256);
|
||||
}
|
||||
|
||||
void InvalidLetterWarning(char c, char w)
|
||||
{
|
||||
InvalidLetters[(unsigned char) c] = 1;
|
||||
++InvalidLetterCount;
|
||||
}
|
||||
|
||||
void ReportInvalidLetters()
|
||||
{
|
||||
if (0 == InvalidLetterCount)
|
||||
return;
|
||||
|
||||
char Str[257];
|
||||
memset(Str, 0, 257);
|
||||
|
||||
int n = 0;
|
||||
for (int i = 0; i < 256; ++i)
|
||||
{
|
||||
if (InvalidLetters[i])
|
||||
Str[n++] = (char) i;
|
||||
}
|
||||
Warning("Assuming %s (see -seqtype option), invalid letters found: %s",
|
||||
ALPHAToStr(g_Alpha), Str);
|
||||
}
|
106
src/muscle/muscle3.8.31/src/alpha.h
Normal file
106
src/muscle/muscle3.8.31/src/alpha.h
Normal file
@ -0,0 +1,106 @@
|
||||
#ifndef alpha_h
|
||||
#define alpha_h
|
||||
|
||||
bool StrHasAmino(const char *Str);
|
||||
bool StrHasGap(const char *Str);
|
||||
void ClearInvalidLetterWarning();
|
||||
void InvalidLetterWarning(char c, char w);
|
||||
void ReportInvalidLetters();
|
||||
|
||||
extern unsigned g_CharToLetter[];
|
||||
extern unsigned g_CharToLetterEx[];
|
||||
|
||||
extern char g_LetterToChar[];
|
||||
extern char g_LetterExToChar[];
|
||||
|
||||
extern char g_UnalignChar[];
|
||||
extern char g_AlignChar[];
|
||||
|
||||
extern bool g_IsWildcardChar[];
|
||||
extern bool g_IsResidueChar[];
|
||||
|
||||
#define CharToLetter(c) (g_CharToLetter[(unsigned char) (c)])
|
||||
#define CharToLetterEx(c) (g_CharToLetterEx[(unsigned char) (c)])
|
||||
|
||||
#define LetterToChar(u) (g_LetterToChar[u])
|
||||
#define LetterExToChar(u) (g_LetterExToChar[u])
|
||||
|
||||
#define IsResidueChar(c) (g_IsResidueChar[(unsigned char) (c)])
|
||||
#define IsGapChar(c) ('-' == (c) || '.' == (c))
|
||||
#define IsWildcardChar(c) (g_IsWildcardChar[(unsigned char) (c)])
|
||||
|
||||
#define AlignChar(c) (g_AlignChar[(unsigned char) (c)])
|
||||
#define UnalignChar(c) (g_UnalignChar[(unsigned char) (c)])
|
||||
|
||||
// AX=Amino alphabet with eXtensions (B, Z and X)
|
||||
enum AX
|
||||
{
|
||||
AX_A,
|
||||
AX_C,
|
||||
AX_D,
|
||||
AX_E,
|
||||
AX_F,
|
||||
AX_G,
|
||||
AX_H,
|
||||
AX_I,
|
||||
AX_K,
|
||||
AX_L,
|
||||
AX_M,
|
||||
AX_N,
|
||||
AX_P,
|
||||
AX_Q,
|
||||
AX_R,
|
||||
AX_S,
|
||||
AX_T,
|
||||
AX_V,
|
||||
AX_W,
|
||||
AX_Y,
|
||||
|
||||
AX_X, // Any
|
||||
|
||||
AX_B, // D or N
|
||||
AX_Z, // E or Q
|
||||
|
||||
AX_GAP,
|
||||
};
|
||||
const unsigned AX_COUNT = AX_GAP + 1;
|
||||
|
||||
// NX=Nucleotide alphabet with extensions
|
||||
enum NX
|
||||
{
|
||||
NX_A,
|
||||
NX_C,
|
||||
NX_G,
|
||||
NX_T,
|
||||
NX_U = NX_T,
|
||||
|
||||
NX_M, // AC
|
||||
NX_R, // AG
|
||||
NX_W, // AT
|
||||
NX_S, // CG
|
||||
NX_Y, // CT
|
||||
NX_K, // GT
|
||||
NX_V, // ACG
|
||||
NX_H, // ACT
|
||||
NX_D, // AGT
|
||||
NX_B, // CGT
|
||||
NX_X, // GATC
|
||||
NX_N, // GATC
|
||||
NX_GAP
|
||||
};
|
||||
const unsigned NX_COUNT = NX_GAP + 1;
|
||||
|
||||
const unsigned MAX_ALPHA = 20;
|
||||
const unsigned MAX_ALPHA_EX = AX_COUNT;
|
||||
const unsigned MAX_CHAR = 256;
|
||||
|
||||
extern ALPHA g_Alpha;
|
||||
extern unsigned g_AlphaSize;
|
||||
|
||||
void SetAlpha(ALPHA Alpha);
|
||||
char GetWildcardChar();
|
||||
bool IsNucleo(char c);
|
||||
bool IsDNA(char c);
|
||||
bool IsRNA(char c);
|
||||
|
||||
#endif // alpha_h
|
218
src/muscle/muscle3.8.31/src/anchors.cpp
Normal file
218
src/muscle/muscle3.8.31/src/anchors.cpp
Normal file
@ -0,0 +1,218 @@
|
||||
#include "muscle.h"
|
||||
#include "msa.h"
|
||||
#include "objscore.h"
|
||||
|
||||
#define TRACE 0
|
||||
|
||||
static void WindowSmooth(const SCORE Score[], unsigned uCount, unsigned uWindowLength,
|
||||
SCORE SmoothScore[], double dCeil)
|
||||
{
|
||||
#define Ceil(x) ((SCORE) ((x) > dCeil ? dCeil : (x)))
|
||||
|
||||
if (1 != uWindowLength%2)
|
||||
Quit("WindowSmooth=%u must be odd", uWindowLength);
|
||||
|
||||
if (uCount <= uWindowLength)
|
||||
{
|
||||
for (unsigned i = 0; i < uCount; ++i)
|
||||
SmoothScore[i] = 0;
|
||||
return;
|
||||
}
|
||||
|
||||
const unsigned w2 = uWindowLength/2;
|
||||
for (unsigned i = 0; i < w2; ++i)
|
||||
{
|
||||
SmoothScore[i] = 0;
|
||||
SmoothScore[uCount - i - 1] = 0;
|
||||
}
|
||||
|
||||
SCORE scoreWindowTotal = 0;
|
||||
for (unsigned i = 0; i < uWindowLength; ++i)
|
||||
{
|
||||
scoreWindowTotal += Ceil(Score[i]);
|
||||
}
|
||||
|
||||
for (unsigned i = w2; ; ++i)
|
||||
{
|
||||
SmoothScore[i] = scoreWindowTotal/uWindowLength;
|
||||
if (i == uCount - w2 - 1)
|
||||
break;
|
||||
|
||||
scoreWindowTotal -= Ceil(Score[i - w2]);
|
||||
scoreWindowTotal += Ceil(Score[i + w2 + 1]);
|
||||
}
|
||||
#undef Ceil
|
||||
}
|
||||
|
||||
// Find columns that score above the given threshold.
|
||||
// A range of scores is defined between the average
|
||||
// and the maximum. The threshold is a fraction 0.0 .. 1.0
|
||||
// within that range, where 0.0 is the average score
|
||||
// and 1.0 is the maximum score.
|
||||
// "Grade" is by analogy with grading on a curve.
|
||||
static void FindBestColsGrade(const SCORE Score[], unsigned uCount,
|
||||
double dThreshold, unsigned BestCols[], unsigned *ptruBestColCount)
|
||||
{
|
||||
SCORE scoreTotal = 0;
|
||||
for (unsigned uIndex = 0; uIndex < uCount; ++uIndex)
|
||||
scoreTotal += Score[uIndex];
|
||||
const SCORE scoreAvg = scoreTotal / uCount;
|
||||
|
||||
SCORE scoreMax = MINUS_INFINITY;
|
||||
for (unsigned uIndex = 0; uIndex < uCount; ++uIndex)
|
||||
if (Score[uIndex] > scoreMax)
|
||||
scoreMax = Score[uIndex];
|
||||
|
||||
unsigned uBestColCount = 0;
|
||||
for (unsigned uIndex = 0; uIndex < uCount; ++uIndex)
|
||||
{
|
||||
const SCORE s = Score[uIndex];
|
||||
const double dHeight = (s - scoreAvg)/(scoreMax - scoreAvg);
|
||||
if (dHeight >= dThreshold)
|
||||
{
|
||||
BestCols[uBestColCount] = uIndex;
|
||||
++uBestColCount;
|
||||
}
|
||||
}
|
||||
*ptruBestColCount = uBestColCount;
|
||||
}
|
||||
|
||||
// Best col only if all following criteria satisfied:
|
||||
// (1) Score >= min
|
||||
// (2) Smoothed score >= min
|
||||
// (3) No gaps.
|
||||
static void FindBestColsCombo(const MSA &msa, const SCORE Score[],
|
||||
const SCORE SmoothScore[], double dMinScore, double dMinSmoothScore,
|
||||
unsigned BestCols[], unsigned *ptruBestColCount)
|
||||
{
|
||||
const unsigned uColCount = msa.GetColCount();
|
||||
|
||||
unsigned uBestColCount = 0;
|
||||
for (unsigned uIndex = 0; uIndex < uColCount; ++uIndex)
|
||||
{
|
||||
if (Score[uIndex] < dMinScore)
|
||||
continue;
|
||||
if (SmoothScore[uIndex] < dMinSmoothScore)
|
||||
continue;
|
||||
if (msa.ColumnHasGap(uIndex))
|
||||
continue;
|
||||
BestCols[uBestColCount] = uIndex;
|
||||
++uBestColCount;
|
||||
}
|
||||
*ptruBestColCount = uBestColCount;
|
||||
}
|
||||
|
||||
static void ListBestCols(const MSA &msa, const SCORE Score[], const SCORE SmoothScore[],
|
||||
unsigned BestCols[], unsigned uBestColCount)
|
||||
{
|
||||
const unsigned uColCount = msa.GetColCount();
|
||||
const unsigned uSeqCount = msa.GetSeqCount();
|
||||
|
||||
Log("Col ");
|
||||
for (unsigned uSeqIndex = 0; uSeqIndex < uSeqCount; ++uSeqIndex)
|
||||
Log("%u", uSeqIndex%10);
|
||||
Log(" ");
|
||||
|
||||
for (unsigned uColIndex = 0; uColIndex < uColCount; ++uColIndex)
|
||||
{
|
||||
Log("%3u ", uColIndex);
|
||||
for (unsigned uSeqIndex = 0; uSeqIndex < uSeqCount; ++uSeqIndex)
|
||||
Log("%c", msa.GetChar(uSeqIndex, uColIndex));
|
||||
|
||||
Log(" %10.3f", Score[uColIndex]);
|
||||
Log(" %10.3f", SmoothScore[uColIndex]);
|
||||
|
||||
for (unsigned i = 0; i < uBestColCount; ++i)
|
||||
if (BestCols[i] == uColIndex)
|
||||
Log(" <-- Best");
|
||||
Log("\n");
|
||||
}
|
||||
}
|
||||
|
||||
// If two best columns are found within a window, choose
|
||||
// the highest-scoring. If more than two, choose the one
|
||||
// closest to the center of the window.
|
||||
static void MergeBestCols(const SCORE Scores[], const unsigned BestCols[],
|
||||
unsigned uBestColCount, unsigned uWindowLength, unsigned AnchorCols[],
|
||||
unsigned *ptruAnchorColCount)
|
||||
{
|
||||
unsigned uAnchorColCount = 0;
|
||||
for (unsigned n = 0; n < uBestColCount; /* update inside loop */)
|
||||
{
|
||||
unsigned uBestColIndex = BestCols[n];
|
||||
unsigned uCountWithinWindow = 0;
|
||||
for (unsigned i = n + 1; i < uBestColCount; ++i)
|
||||
{
|
||||
unsigned uBestColIndex2 = BestCols[i];
|
||||
if (uBestColIndex2 - uBestColIndex >= uWindowLength)
|
||||
break;
|
||||
++uCountWithinWindow;
|
||||
}
|
||||
unsigned uAnchorCol = uBestColIndex;
|
||||
if (1 == uCountWithinWindow)
|
||||
{
|
||||
unsigned uBestColIndex2 = BestCols[n+1];
|
||||
if (Scores[uBestColIndex] > Scores[uBestColIndex2])
|
||||
uAnchorCol = uBestColIndex;
|
||||
else
|
||||
uAnchorCol = uBestColIndex2;
|
||||
}
|
||||
else if (uCountWithinWindow > 1)
|
||||
{
|
||||
unsigned uWindowCenter = uBestColIndex + uWindowLength/2;
|
||||
int iClosestDist = uWindowLength;
|
||||
unsigned uClosestCol = uBestColIndex;
|
||||
for (unsigned i = n + 1; i < n + uCountWithinWindow; ++i)
|
||||
{
|
||||
unsigned uColIndex = BestCols[i];
|
||||
int iDist = uColIndex - uBestColIndex;
|
||||
if (iDist < 0)
|
||||
iDist = -iDist;
|
||||
if (iDist < iClosestDist)
|
||||
{
|
||||
uClosestCol = uColIndex;
|
||||
iClosestDist = iDist;
|
||||
}
|
||||
}
|
||||
uAnchorCol = uClosestCol;
|
||||
}
|
||||
AnchorCols[uAnchorColCount] = uAnchorCol;
|
||||
++uAnchorColCount;
|
||||
n += uCountWithinWindow + 1;
|
||||
}
|
||||
*ptruAnchorColCount = uAnchorColCount;
|
||||
}
|
||||
|
||||
void FindAnchorCols(const MSA &msa, unsigned AnchorCols[],
|
||||
unsigned *ptruAnchorColCount)
|
||||
{
|
||||
const unsigned uColCount = msa.GetColCount();
|
||||
if (uColCount < 16)
|
||||
{
|
||||
*ptruAnchorColCount = 0;
|
||||
return;
|
||||
}
|
||||
|
||||
SCORE *MatchScore = new SCORE[uColCount];
|
||||
SCORE *SmoothScore = new SCORE[uColCount];
|
||||
unsigned *BestCols = new unsigned[uColCount];
|
||||
|
||||
GetLetterScores(msa, MatchScore);
|
||||
WindowSmooth(MatchScore, uColCount, g_uSmoothWindowLength, SmoothScore,
|
||||
g_dSmoothScoreCeil);
|
||||
|
||||
unsigned uBestColCount;
|
||||
FindBestColsCombo(msa, MatchScore, SmoothScore, g_dMinBestColScore, g_dMinSmoothScore,
|
||||
BestCols, &uBestColCount);
|
||||
|
||||
#if TRACE
|
||||
ListBestCols(msa, MatchScore, SmoothScore, BestCols, uBestColCount);
|
||||
#endif
|
||||
|
||||
MergeBestCols(MatchScore, BestCols, uBestColCount, g_uAnchorSpacing, AnchorCols,
|
||||
ptruAnchorColCount);
|
||||
|
||||
delete[] MatchScore;
|
||||
delete[] SmoothScore;
|
||||
delete[] BestCols;
|
||||
}
|
206
src/muscle/muscle3.8.31/src/bittraceback.cpp
Normal file
206
src/muscle/muscle3.8.31/src/bittraceback.cpp
Normal file
@ -0,0 +1,206 @@
|
||||
#include "muscle.h"
|
||||
#include "pwpath.h"
|
||||
|
||||
#define TRACE 0
|
||||
|
||||
static char XlatEdgeType(char c)
|
||||
{
|
||||
if ('E' == c)
|
||||
return 'D';
|
||||
if ('J' == c)
|
||||
return 'I';
|
||||
return c;
|
||||
}
|
||||
|
||||
static const char *BitsToStr(char Bits)
|
||||
{
|
||||
static char Str[] = "xM xD xI";
|
||||
|
||||
switch (Bits & BIT_xM)
|
||||
{
|
||||
case BIT_MM:
|
||||
Str[0] = 'M';
|
||||
break;
|
||||
case BIT_DM:
|
||||
Str[0] = 'D';
|
||||
break;
|
||||
case BIT_IM:
|
||||
Str[0] = 'I';
|
||||
break;
|
||||
}
|
||||
|
||||
switch (Bits & BIT_xD)
|
||||
{
|
||||
case BIT_MD:
|
||||
Str[3] = 'M';
|
||||
break;
|
||||
case BIT_DD:
|
||||
Str[3] = 'D';
|
||||
break;
|
||||
}
|
||||
|
||||
switch (Bits & BIT_xI)
|
||||
{
|
||||
case BIT_MI:
|
||||
Str[6] = 'M';
|
||||
break;
|
||||
case BIT_II:
|
||||
Str[6] = 'I';
|
||||
break;
|
||||
}
|
||||
|
||||
return Str;
|
||||
}
|
||||
|
||||
static inline char XChar(char Bits, char cType)
|
||||
{
|
||||
switch (cType)
|
||||
{
|
||||
case 'M':
|
||||
{
|
||||
switch (Bits & BIT_xM)
|
||||
{
|
||||
case BIT_MM:
|
||||
return 'M';
|
||||
case BIT_DM:
|
||||
return 'D';
|
||||
case BIT_IM:
|
||||
return 'I';
|
||||
#if DOUBLE_AFFINE
|
||||
case BIT_EM:
|
||||
return 'E';
|
||||
case BIT_JM:
|
||||
return 'J';
|
||||
#endif
|
||||
}
|
||||
Quit("Huh!?");
|
||||
return '?';
|
||||
}
|
||||
case 'D':
|
||||
{
|
||||
switch (Bits & BIT_xD)
|
||||
{
|
||||
case BIT_MD:
|
||||
return 'M';
|
||||
case BIT_DD:
|
||||
return 'D';
|
||||
}
|
||||
Quit("Huh!?");
|
||||
return '?';
|
||||
}
|
||||
case 'I':
|
||||
{
|
||||
switch (Bits & BIT_xI)
|
||||
{
|
||||
case BIT_MI:
|
||||
return 'M';
|
||||
case BIT_II:
|
||||
return 'I';
|
||||
}
|
||||
Quit("Huh!?");
|
||||
return '?';
|
||||
}
|
||||
#if DOUBLE_AFFINE
|
||||
case 'E':
|
||||
{
|
||||
switch (Bits & BIT_xE)
|
||||
{
|
||||
case BIT_ME:
|
||||
return 'M';
|
||||
case BIT_EE:
|
||||
return 'E';
|
||||
}
|
||||
Quit("Huh!?");
|
||||
return '?';
|
||||
}
|
||||
case 'J':
|
||||
{
|
||||
switch (Bits & BIT_xJ)
|
||||
{
|
||||
case BIT_MJ:
|
||||
return 'M';
|
||||
case BIT_JJ:
|
||||
return 'J';
|
||||
}
|
||||
Quit("Huh!?");
|
||||
return '?';
|
||||
}
|
||||
#endif
|
||||
default:
|
||||
Quit("Huh?");
|
||||
return '?';
|
||||
}
|
||||
}
|
||||
|
||||
void BitTraceBack(char **TraceBack, unsigned uLengthA, unsigned uLengthB,
|
||||
char LastEdge, PWPath &Path)
|
||||
{
|
||||
#if TRACE
|
||||
Log("BitTraceBack\n");
|
||||
#endif
|
||||
Path.Clear();
|
||||
|
||||
PWEdge Edge;
|
||||
Edge.uPrefixLengthA = uLengthA;
|
||||
Edge.uPrefixLengthB = uLengthB;
|
||||
char Bits = TraceBack[uLengthA][uLengthB];
|
||||
Edge.cType = LastEdge;
|
||||
for (;;)
|
||||
{
|
||||
#if TRACE
|
||||
Log("Prepend %c%d.%d\n", Edge.cType, Edge.uPrefixLengthA, Edge.uPrefixLengthB);
|
||||
#endif
|
||||
char cSave = Edge.cType;
|
||||
Edge.cType = XlatEdgeType(cSave);
|
||||
Path.PrependEdge(Edge);
|
||||
Edge.cType = cSave;
|
||||
|
||||
unsigned PLA = Edge.uPrefixLengthA;
|
||||
unsigned PLB = Edge.uPrefixLengthB;
|
||||
char Bits = TraceBack[PLA][PLB];
|
||||
char NextEdgeType = XChar(Bits, Edge.cType);
|
||||
#if TRACE
|
||||
Log("XChar(%s, %c) = %c\n", BitsToStr(Bits), Edge.cType, NextEdgeType);
|
||||
#endif
|
||||
switch (Edge.cType)
|
||||
{
|
||||
case 'M':
|
||||
{
|
||||
if (Edge.uPrefixLengthA == 0)
|
||||
Quit("BitTraceBack MA=0");
|
||||
if (Edge.uPrefixLengthB == 0)
|
||||
Quit("BitTraceBack MA=0");
|
||||
--(Edge.uPrefixLengthA);
|
||||
--(Edge.uPrefixLengthB);
|
||||
break;
|
||||
}
|
||||
case 'D':
|
||||
case 'E':
|
||||
{
|
||||
if (Edge.uPrefixLengthA == 0)
|
||||
Quit("BitTraceBack DA=0");
|
||||
--(Edge.uPrefixLengthA);
|
||||
break;
|
||||
}
|
||||
case 'I':
|
||||
case 'J':
|
||||
{
|
||||
if (Edge.uPrefixLengthB == 0)
|
||||
Quit("BitTraceBack IB=0");
|
||||
--(Edge.uPrefixLengthB);
|
||||
break;
|
||||
}
|
||||
default:
|
||||
Quit("BitTraceBack: Invalid edge %c", Edge);
|
||||
}
|
||||
|
||||
if (0 == Edge.uPrefixLengthA && 0 == Edge.uPrefixLengthB)
|
||||
break;
|
||||
|
||||
Edge.cType = NextEdgeType;
|
||||
}
|
||||
|
||||
#if TRACE
|
||||
Path.LogMe();
|
||||
#endif
|
||||
}
|
28
src/muscle/muscle3.8.31/src/blosum62.cpp
Normal file
28
src/muscle/muscle3.8.31/src/blosum62.cpp
Normal file
@ -0,0 +1,28 @@
|
||||
#include "muscle.h"
|
||||
|
||||
int BLOSUM62[20][20] =
|
||||
{
|
||||
// A C D E F G H I K L M N P Q R S T V W Y
|
||||
{ 4, 0, -2, -1, -2, 0, -2, -1, -1, -1, -1, -2, -1, -1, -1, 1, 0, 0, -3, -2}, // A
|
||||
{ 0, 9, -3, -4, -2, -3, -3, -1, -3, -1, -1, -3, -3, -3, -3, -1, -1, -1, -2, -2}, // C
|
||||
{-2, -3, 6, 2, -3, -1, -1, -3, -1, -4, -3, 1, -1, 0, -2, 0, -1, -3, -4, -3}, // D
|
||||
{-1, -4, 2, 5, -3, -2, 0, -3, 1, -3, -2, 0, -1, 2, 0, 0, -1, -2, -3, -2}, // E
|
||||
{-2, -2, -3, -3, 6, -3, -1, 0, -3, 0, 0, -3, -4, -3, -3, -2, -2, -1, 1, 3}, // F
|
||||
{ 0, -3, -1, -2, -3, 6, -2, -4, -2, -4, -3, 0, -2, -2, -2, 0, -2, -3, -2, -3}, // G
|
||||
{-2, -3, -1, 0, -1, -2, 8, -3, -1, -3, -2, 1, -2, 0, 0, -1, -2, -3, -2, 2}, // H
|
||||
{-1, -1, -3, -3, 0, -4, -3, 4, -3, 2, 1, -3, -3, -3, -3, -2, -1, 3, -3, -1}, // I
|
||||
{-1, -3, -1, 1, -3, -2, -1, -3, 5, -2, -1, 0, -1, 1, 2, 0, -1, -2, -3, -2}, // K
|
||||
{-1, -1, -4, -3, 0, -4, -3, 2, -2, 4, 2, -3, -3, -2, -2, -2, -1, 1, -2, -1}, // L
|
||||
{-1, -1, -3, -2, 0, -3, -2, 1, -1, 2, 5, -2, -2, 0, -1, -1, -1, 1, -1, -1}, // M
|
||||
{-2, -3, 1, 0, -3, 0, 1, -3, 0, -3, -2, 6, -2, 0, 0, 1, 0, -3, -4, -2}, // N
|
||||
{-1, -3, -1, -1, -4, -2, -2, -3, -1, -3, -2, -2, 7, -1, -2, -1, -1, -2, -4, -3}, // P
|
||||
{-1, -3, 0, 2, -3, -2, 0, -3, 1, -2, 0, 0, -1, 5, 1, 0, -1, -2, -2, -1}, // Q
|
||||
{-1, -3, -2, 0, -3, -2, 0, -3, 2, -2, -1, 0, -2, 1, 5, -1, -1, -3, -3, -2}, // R
|
||||
{ 1, -1, 0, 0, -2, 0, -1, -2, 0, -2, -1, 1, -1, 0, -1, 4, 1, -2, -3, -2}, // S
|
||||
{ 0, -1, -1, -1, -2, -2, -2, -1, -1, -1, -1, 0, -1, -1, -1, 1, 5, 0, -2, -2}, // T
|
||||
{ 0, -1, -3, -2, -1, -3, -3, 3, -2, 1, 1, -3, -2, -2, -3, -2, 0, 4, -3, -1}, // V
|
||||
{-3, -2, -4, -3, 1, -2, -2, -3, -3, -2, -1, -4, -4, -2, -3, -3, -2, -3, 11, 2}, // W
|
||||
{-2, -2, -3, -2, 3, -3, 2, -1, -2, -1, -1, -2, -3, -1, -2, -2, -2, -1, 2, 7}, // Y
|
||||
};
|
||||
|
||||
double BLOSUM62_Expected = -0.5209;
|
118
src/muscle/muscle3.8.31/src/blosumla.cpp
Normal file
118
src/muscle/muscle3.8.31/src/blosumla.cpp
Normal file
@ -0,0 +1,118 @@
|
||||
#include "muscle.h"
|
||||
|
||||
#define GAPVAL 0.3
|
||||
#define GAPGAPVAL 5.0
|
||||
|
||||
// Blosum62 log-average factor matrix
|
||||
static float Blosum62LA[20][20] =
|
||||
{
|
||||
#define v(x) ((float) x)
|
||||
#define S_ROW(n, c, A, C, D, E, F, G, H, I, K, L, M, N, P, Q, R, S, T, V, W, Y) \
|
||||
{ v(A), v(C), v(D), v(E), v(F), v(G), v(H), v(I), v(K), v(L), v(M), v(N), v(P), v(Q), \
|
||||
v(R), v(S), v(T), v(V), v(W), v(Y) },
|
||||
|
||||
// Blosum62 log average matrix
|
||||
// A C D E F
|
||||
// G H I K L
|
||||
// M N P Q R
|
||||
// S T V W Y
|
||||
S_ROW( 0, 'A', 3.9029401, 0.8679881, 0.5446049, 0.7412640, 0.4648942,
|
||||
1.0568696, 0.5693654, 0.6324813, 0.7753898, 0.6019460,
|
||||
0.7231498, 0.5883077, 0.7541214, 0.7568035, 0.6126988,
|
||||
1.4721037, 0.9844022, 0.9364584, 0.4165484, 0.5426125)
|
||||
|
||||
S_ROW( 1, 'C', 0.8679881, 19.5765802, 0.3014542, 0.2859347, 0.4389910,
|
||||
0.4203886, 0.3550472, 0.6534589, 0.3491296, 0.6422760,
|
||||
0.6113537, 0.3978026, 0.3795628, 0.3657796, 0.3089379,
|
||||
0.7384148, 0.7405530, 0.7558448, 0.4499807, 0.4342013)
|
||||
|
||||
S_ROW( 2, 'D', 0.5446049, 0.3014542, 7.3979253, 1.6878109, 0.2989696,
|
||||
0.6343015, 0.6785593, 0.3390155, 0.7840905, 0.2866128,
|
||||
0.3464547, 1.5538520, 0.5987177, 0.8970811, 0.5732000,
|
||||
0.9135051, 0.6947898, 0.3365004, 0.2321050, 0.3456829)
|
||||
|
||||
S_ROW( 3, 'E', 0.7412640, 0.2859347, 1.6878109, 5.4695276, 0.3307441,
|
||||
0.4812675, 0.9600400, 0.3305223, 1.3082782, 0.3728734,
|
||||
0.5003421, 0.9112983, 0.6792027, 1.9017376, 0.9607983,
|
||||
0.9503570, 0.7414260, 0.4289431, 0.3743021, 0.4964664)
|
||||
|
||||
S_ROW( 4, 'F', 0.4648942, 0.4389910, 0.2989696, 0.3307441, 8.1287983,
|
||||
0.3406407, 0.6519893, 0.9457698, 0.3440433, 1.1545978,
|
||||
1.0043715, 0.3542882, 0.2874440, 0.3339729, 0.3807263,
|
||||
0.4399736, 0.4816930, 0.7450894, 1.3743775, 2.7693817)
|
||||
|
||||
S_ROW( 5, 'G', 1.0568696, 0.4203886, 0.6343015, 0.4812675, 0.3406407,
|
||||
6.8763075, 0.4929663, 0.2750096, 0.5888716, 0.2845039,
|
||||
0.3954865, 0.8637114, 0.4773858, 0.5386498, 0.4499840,
|
||||
0.9035965, 0.5792712, 0.3369551, 0.4216898, 0.3487141)
|
||||
|
||||
S_ROW( 6, 'H', 0.5693654, 0.3550472, 0.6785593, 0.9600400, 0.6519893,
|
||||
0.4929663, 13.5060070, 0.3262878, 0.7788884, 0.3806759,
|
||||
0.5841316, 1.2220028, 0.4728797, 1.1679835, 0.9170473,
|
||||
0.7367319, 0.5575021, 0.3394474, 0.4440859, 1.7979036)
|
||||
|
||||
S_ROW( 7, 'I', 0.6324813, 0.6534589, 0.3390155, 0.3305223, 0.9457698,
|
||||
0.2750096, 0.3262878, 3.9979299, 0.3963730, 1.6944349,
|
||||
1.4777449, 0.3279345, 0.3846629, 0.3829375, 0.3547509,
|
||||
0.4431634, 0.7798163, 2.4175121, 0.4088732, 0.6303898)
|
||||
|
||||
S_ROW( 8, 'K', 0.7753898, 0.3491296, 0.7840905, 1.3082782, 0.3440433,
|
||||
0.5888716, 0.7788884, 0.3963730, 4.7643359, 0.4282702,
|
||||
0.6253033, 0.9398419, 0.7037741, 1.5543233, 2.0768092,
|
||||
0.9319192, 0.7929060, 0.4565429, 0.3589319, 0.5321784)
|
||||
|
||||
S_ROW( 9, 'L', 0.6019460, 0.6422760, 0.2866128, 0.3728734, 1.1545978,
|
||||
0.2845039, 0.3806759, 1.6944349, 0.4282702, 3.7966214,
|
||||
1.9942957, 0.3100430, 0.3711219, 0.4773261, 0.4739194,
|
||||
0.4288939, 0.6603292, 1.3142355, 0.5680359, 0.6920589)
|
||||
|
||||
S_ROW(10, 'M', 0.7231498, 0.6113537, 0.3464547, 0.5003421, 1.0043715,
|
||||
0.3954865, 0.5841316, 1.4777449, 0.6253033, 1.9942957,
|
||||
6.4814549, 0.4745299, 0.4238960, 0.8642486, 0.6226249,
|
||||
0.5985578, 0.7938018, 1.2689365, 0.6103022, 0.7083636)
|
||||
|
||||
S_ROW(11, 'N', 0.5883077, 0.3978026, 1.5538520, 0.9112983, 0.3542882,
|
||||
0.8637114, 1.2220028, 0.3279345, 0.9398419, 0.3100430,
|
||||
0.4745299, 7.0940964, 0.4999337, 1.0005835, 0.8586298,
|
||||
1.2315289, 0.9841525, 0.3690340, 0.2777841, 0.4860309)
|
||||
|
||||
S_ROW(12, 'P', 0.7541214, 0.3795628, 0.5987177, 0.6792027, 0.2874440,
|
||||
0.4773858, 0.4728797, 0.3846629, 0.7037741, 0.3711219,
|
||||
0.4238960, 0.4999337, 12.8375452, 0.6412803, 0.4815348,
|
||||
0.7555033, 0.6888962, 0.4430825, 0.2818321, 0.3635216)
|
||||
|
||||
S_ROW(13, 'Q', 0.7568035, 0.3657796, 0.8970811, 1.9017376, 0.3339729,
|
||||
0.5386498, 1.1679835, 0.3829375, 1.5543233, 0.4773261,
|
||||
0.8642486, 1.0005835, 0.6412803, 6.2444210, 1.4057958,
|
||||
0.9655559, 0.7913219, 0.4667781, 0.5093584, 0.6110951)
|
||||
|
||||
S_ROW(14, 'R', 0.6126988, 0.3089379, 0.5732000, 0.9607983, 0.3807263,
|
||||
0.4499840, 0.9170473, 0.3547509, 2.0768092, 0.4739194,
|
||||
0.6226249, 0.8586298, 0.4815348, 1.4057958, 6.6655769,
|
||||
0.7671661, 0.6777544, 0.4200721, 0.3951049, 0.5559652)
|
||||
|
||||
S_ROW(15, 'S', 1.4721037, 0.7384148, 0.9135051, 0.9503570, 0.4399736,
|
||||
0.9035965, 0.7367319, 0.4431634, 0.9319192, 0.4288939,
|
||||
0.5985578, 1.2315289, 0.7555033, 0.9655559, 0.7671661,
|
||||
3.8428476, 1.6139205, 0.5652240, 0.3853031, 0.5575206)
|
||||
|
||||
S_ROW(16, 'T', 0.9844022, 0.7405530, 0.6947898, 0.7414260, 0.4816930,
|
||||
0.5792712, 0.5575021, 0.7798163, 0.7929060, 0.6603292,
|
||||
0.7938018, 0.9841525, 0.6888962, 0.7913219, 0.6777544,
|
||||
1.6139205, 4.8321048, 0.9809432, 0.4309317, 0.5731577)
|
||||
|
||||
S_ROW(17, 'V', 0.9364584, 0.7558448, 0.3365004, 0.4289431, 0.7450894,
|
||||
0.3369551, 0.3394474, 2.4175121, 0.4565429, 1.3142355,
|
||||
1.2689365, 0.3690340, 0.4430825, 0.4667781, 0.4200721,
|
||||
0.5652240, 0.9809432, 3.6921553, 0.3744576, 0.6580390)
|
||||
|
||||
S_ROW(18, 'W', 0.4165484, 0.4499807, 0.2321050, 0.3743021, 1.3743775,
|
||||
0.4216898, 0.4440859, 0.4088732, 0.3589319, 0.5680359,
|
||||
0.6103022, 0.2777841, 0.2818321, 0.5093584, 0.3951049,
|
||||
0.3853031, 0.4309317, 0.3744576, 38.1077830, 2.1098056)
|
||||
|
||||
S_ROW(19, 'Y', 0.5426125, 0.4342013, 0.3456829, 0.4964664, 2.7693817,
|
||||
0.3487141, 1.7979036, 0.6303898, 0.5321784, 0.6920589,
|
||||
0.7083636, 0.4860309, 0.3635216, 0.6110951, 0.5559652,
|
||||
0.5575206, 0.5731577, 0.6580390, 2.1098056, 9.8322054)
|
||||
};
|
666
src/muscle/muscle3.8.31/src/clust.cpp
Normal file
666
src/muscle/muscle3.8.31/src/clust.cpp
Normal file
@ -0,0 +1,666 @@
|
||||
#include "muscle.h"
|
||||
#include "clust.h"
|
||||
#include "clustset.h"
|
||||
#include <stdio.h>
|
||||
|
||||
#define TRACE 0
|
||||
|
||||
Clust::Clust()
|
||||
{
|
||||
m_Nodes = 0;
|
||||
m_uNodeCount = 0;
|
||||
m_uLeafCount = 0;
|
||||
m_uClusterCount = 0;
|
||||
m_JoinStyle = JOIN_Undefined;
|
||||
m_dDist = 0;
|
||||
m_uLeafCount = 0;
|
||||
m_ptrSet = 0;
|
||||
}
|
||||
|
||||
Clust::~Clust()
|
||||
{
|
||||
delete[] m_Nodes;
|
||||
delete[] m_dDist;
|
||||
delete[] m_ClusterIndexToNodeIndex;
|
||||
}
|
||||
|
||||
void Clust::Create(ClustSet &Set, CLUSTER Method)
|
||||
{
|
||||
m_ptrSet = &Set;
|
||||
|
||||
SetLeafCount(Set.GetLeafCount());
|
||||
|
||||
switch (Method)
|
||||
{
|
||||
case CLUSTER_UPGMA:
|
||||
m_JoinStyle = JOIN_NearestNeighbor;
|
||||
m_CentroidStyle = LINKAGE_Avg;
|
||||
break;
|
||||
|
||||
case CLUSTER_UPGMAMax:
|
||||
m_JoinStyle = JOIN_NearestNeighbor;
|
||||
m_CentroidStyle = LINKAGE_Max;
|
||||
break;
|
||||
|
||||
case CLUSTER_UPGMAMin:
|
||||
m_JoinStyle = JOIN_NearestNeighbor;
|
||||
m_CentroidStyle = LINKAGE_Min;
|
||||
break;
|
||||
|
||||
case CLUSTER_UPGMB:
|
||||
m_JoinStyle = JOIN_NearestNeighbor;
|
||||
m_CentroidStyle = LINKAGE_Biased;
|
||||
break;
|
||||
|
||||
case CLUSTER_NeighborJoining:
|
||||
m_JoinStyle = JOIN_NeighborJoining;
|
||||
m_CentroidStyle = LINKAGE_NeighborJoining;
|
||||
break;
|
||||
|
||||
default:
|
||||
Quit("Clust::Create, invalid method %d", Method);
|
||||
}
|
||||
|
||||
if (m_uLeafCount <= 1)
|
||||
Quit("Clust::Create: no leaves");
|
||||
|
||||
m_uNodeCount = 2*m_uLeafCount - 1;
|
||||
m_Nodes = new ClustNode[m_uNodeCount];
|
||||
m_ClusterIndexToNodeIndex = new unsigned[m_uLeafCount];
|
||||
|
||||
m_ptrClusterList = 0;
|
||||
for (unsigned uNodeIndex = 0; uNodeIndex < m_uNodeCount; ++uNodeIndex)
|
||||
{
|
||||
ClustNode &Node = m_Nodes[uNodeIndex];
|
||||
Node.m_uIndex = uNodeIndex;
|
||||
if (uNodeIndex < m_uLeafCount)
|
||||
{
|
||||
Node.m_uSize = 1;
|
||||
Node.m_uLeafIndexes = new unsigned[1];
|
||||
Node.m_uLeafIndexes[0] = uNodeIndex;
|
||||
AddToClusterList(uNodeIndex);
|
||||
}
|
||||
else
|
||||
Node.m_uSize = 0;
|
||||
}
|
||||
|
||||
// Compute initial distance matrix between leaves
|
||||
SetProgressDesc("Build dist matrix");
|
||||
unsigned uPairIndex = 0;
|
||||
const unsigned uPairCount = (m_uLeafCount*(m_uLeafCount - 1))/2;
|
||||
for (unsigned i = 0; i < m_uLeafCount; ++i)
|
||||
for (unsigned j = 0; j < i; ++j)
|
||||
{
|
||||
const float dDist = (float) m_ptrSet->ComputeDist(*this, i, j);
|
||||
SetDist(i, j, dDist);
|
||||
if (0 == uPairIndex%10000)
|
||||
Progress(uPairIndex, uPairCount);
|
||||
++uPairIndex;
|
||||
}
|
||||
ProgressStepsDone();
|
||||
|
||||
// Call CreateCluster once for each internal node in the tree
|
||||
SetProgressDesc("Build guide tree");
|
||||
m_uClusterCount = m_uLeafCount;
|
||||
const unsigned uInternalNodeCount = m_uNodeCount - m_uLeafCount;
|
||||
for (unsigned uNodeIndex = m_uLeafCount; uNodeIndex < m_uNodeCount; ++uNodeIndex)
|
||||
{
|
||||
unsigned i = uNodeIndex + 1 - m_uLeafCount;
|
||||
Progress(i, uInternalNodeCount);
|
||||
CreateCluster();
|
||||
}
|
||||
ProgressStepsDone();
|
||||
}
|
||||
|
||||
void Clust::CreateCluster()
|
||||
{
|
||||
unsigned uLeftNodeIndex;
|
||||
unsigned uRightNodeIndex;
|
||||
float dLeftLength;
|
||||
float dRightLength;
|
||||
ChooseJoin(&uLeftNodeIndex, &uRightNodeIndex, &dLeftLength, &dRightLength);
|
||||
|
||||
const unsigned uNewNodeIndex = m_uNodeCount - m_uClusterCount + 1;
|
||||
|
||||
JoinNodes(uLeftNodeIndex, uRightNodeIndex, dLeftLength, dRightLength,
|
||||
uNewNodeIndex);
|
||||
|
||||
#if TRACE
|
||||
Log("Merge New=%u L=%u R=%u Ld=%7.2g Rd=%7.2g\n",
|
||||
uNewNodeIndex, uLeftNodeIndex, uRightNodeIndex, dLeftLength, dRightLength);
|
||||
#endif
|
||||
|
||||
// Compute distances to other clusters
|
||||
--m_uClusterCount;
|
||||
for (unsigned uNodeIndex = GetFirstCluster(); uNodeIndex != uInsane;
|
||||
uNodeIndex = GetNextCluster(uNodeIndex))
|
||||
{
|
||||
if (uNodeIndex == uLeftNodeIndex || uNodeIndex == uRightNodeIndex)
|
||||
continue;
|
||||
|
||||
if (uNewNodeIndex == uNodeIndex)
|
||||
continue;
|
||||
|
||||
const float dDist = ComputeDist(uNewNodeIndex, uNodeIndex);
|
||||
SetDist(uNewNodeIndex, uNodeIndex, dDist);
|
||||
}
|
||||
|
||||
for (unsigned uNodeIndex = GetFirstCluster(); uNodeIndex != uInsane;
|
||||
uNodeIndex = GetNextCluster(uNodeIndex))
|
||||
{
|
||||
if (uNodeIndex == uLeftNodeIndex || uNodeIndex == uRightNodeIndex)
|
||||
continue;
|
||||
|
||||
if (uNewNodeIndex == uNodeIndex)
|
||||
continue;
|
||||
|
||||
#if REDLACK
|
||||
const float dMetric = ComputeMetric(uNewNodeIndex, uNodeIndex);
|
||||
InsertMetric(uNewNodeIndex, uNodeIndex, dMetric);
|
||||
#endif
|
||||
}
|
||||
}
|
||||
|
||||
void Clust::ChooseJoin(unsigned *ptruLeftIndex, unsigned *ptruRightIndex,
|
||||
float *ptrdLeftLength, float *ptrdRightLength)
|
||||
{
|
||||
switch (m_JoinStyle)
|
||||
{
|
||||
case JOIN_NearestNeighbor:
|
||||
ChooseJoinNearestNeighbor(ptruLeftIndex, ptruRightIndex, ptrdLeftLength,
|
||||
ptrdRightLength);
|
||||
return;
|
||||
case JOIN_NeighborJoining:
|
||||
ChooseJoinNeighborJoining(ptruLeftIndex, ptruRightIndex, ptrdLeftLength,
|
||||
ptrdRightLength);
|
||||
return;
|
||||
}
|
||||
Quit("Clust::ChooseJoin, Invalid join style %u", m_JoinStyle);
|
||||
}
|
||||
|
||||
void Clust::ChooseJoinNearestNeighbor(unsigned *ptruLeftIndex,
|
||||
unsigned *ptruRightIndex, float *ptrdLeftLength, float *ptrdRightLength)
|
||||
{
|
||||
const unsigned uClusterCount = GetClusterCount();
|
||||
|
||||
unsigned uMinLeftNodeIndex;
|
||||
unsigned uMinRightNodeIndex;
|
||||
GetMinMetric(&uMinLeftNodeIndex, &uMinRightNodeIndex);
|
||||
|
||||
float dMinDist = GetDist(uMinLeftNodeIndex, uMinRightNodeIndex);
|
||||
|
||||
const float dLeftHeight = GetHeight(uMinLeftNodeIndex);
|
||||
const float dRightHeight = GetHeight(uMinRightNodeIndex);
|
||||
|
||||
*ptruLeftIndex = uMinLeftNodeIndex;
|
||||
*ptruRightIndex = uMinRightNodeIndex;
|
||||
*ptrdLeftLength = dMinDist/2 - dLeftHeight;
|
||||
*ptrdRightLength = dMinDist/2 - dRightHeight;
|
||||
}
|
||||
|
||||
void Clust::ChooseJoinNeighborJoining(unsigned *ptruLeftIndex,
|
||||
unsigned *ptruRightIndex, float *ptrdLeftLength, float *ptrdRightLength)
|
||||
{
|
||||
const unsigned uClusterCount = GetClusterCount();
|
||||
|
||||
//unsigned uMinLeftNodeIndex = uInsane;
|
||||
//unsigned uMinRightNodeIndex = uInsane;
|
||||
//float dMinD = PLUS_INFINITY;
|
||||
//for (unsigned i = GetFirstCluster(); i != uInsane; i = GetNextCluster(i))
|
||||
// {
|
||||
// const float ri = Calc_r(i);
|
||||
// for (unsigned j = GetNextCluster(i); j != uInsane; j = GetNextCluster(j))
|
||||
// {
|
||||
// const float rj = Calc_r(j);
|
||||
// const float dij = GetDist(i, j);
|
||||
// const float Dij = dij - (ri + rj);
|
||||
// if (Dij < dMinD)
|
||||
// {
|
||||
// dMinD = Dij;
|
||||
// uMinLeftNodeIndex = i;
|
||||
// uMinRightNodeIndex = j;
|
||||
// }
|
||||
// }
|
||||
// }
|
||||
|
||||
unsigned uMinLeftNodeIndex;
|
||||
unsigned uMinRightNodeIndex;
|
||||
GetMinMetric(&uMinLeftNodeIndex, &uMinRightNodeIndex);
|
||||
|
||||
const float dDistLR = GetDist(uMinLeftNodeIndex, uMinRightNodeIndex);
|
||||
const float rL = Calc_r(uMinLeftNodeIndex);
|
||||
const float rR = Calc_r(uMinRightNodeIndex);
|
||||
|
||||
const float dLeftLength = (dDistLR + rL - rR)/2;
|
||||
const float dRightLength = (dDistLR - rL + rR)/2;
|
||||
|
||||
*ptruLeftIndex = uMinLeftNodeIndex;
|
||||
*ptruRightIndex = uMinRightNodeIndex;
|
||||
*ptrdLeftLength = dLeftLength;
|
||||
*ptrdRightLength = dRightLength;
|
||||
}
|
||||
|
||||
void Clust::JoinNodes(unsigned uLeftIndex, unsigned uRightIndex, float dLeftLength,
|
||||
float dRightLength, unsigned uNodeIndex)
|
||||
{
|
||||
ClustNode &Parent = m_Nodes[uNodeIndex];
|
||||
ClustNode &Left = m_Nodes[uLeftIndex];
|
||||
ClustNode &Right = m_Nodes[uRightIndex];
|
||||
|
||||
Left.m_dLength = dLeftLength;
|
||||
Right.m_dLength = dRightLength;
|
||||
|
||||
Parent.m_ptrLeft = &Left;
|
||||
Parent.m_ptrRight = &Right;
|
||||
|
||||
Left.m_ptrParent = &Parent;
|
||||
Right.m_ptrParent = &Parent;
|
||||
|
||||
const unsigned uLeftSize = Left.m_uSize;
|
||||
const unsigned uRightSize = Right.m_uSize;
|
||||
const unsigned uParentSize = uLeftSize + uRightSize;
|
||||
Parent.m_uSize = uParentSize;
|
||||
|
||||
assert(0 == Parent.m_uLeafIndexes);
|
||||
Parent.m_uLeafIndexes = new unsigned[uParentSize];
|
||||
|
||||
const unsigned uLeftBytes = uLeftSize*sizeof(unsigned);
|
||||
const unsigned uRightBytes = uRightSize*sizeof(unsigned);
|
||||
memcpy(Parent.m_uLeafIndexes, Left.m_uLeafIndexes, uLeftBytes);
|
||||
memcpy(Parent.m_uLeafIndexes + uLeftSize, Right.m_uLeafIndexes, uRightBytes);
|
||||
|
||||
DeleteFromClusterList(uLeftIndex);
|
||||
DeleteFromClusterList(uRightIndex);
|
||||
AddToClusterList(uNodeIndex);
|
||||
}
|
||||
|
||||
float Clust::Calc_r(unsigned uNodeIndex) const
|
||||
{
|
||||
const unsigned uClusterCount = GetClusterCount();
|
||||
if (2 == uClusterCount)
|
||||
return 0;
|
||||
|
||||
float dSum = 0;
|
||||
for (unsigned i = GetFirstCluster(); i != uInsane; i = GetNextCluster(i))
|
||||
{
|
||||
if (i == uNodeIndex)
|
||||
continue;
|
||||
dSum += GetDist(uNodeIndex, i);
|
||||
}
|
||||
return dSum/(uClusterCount - 2);
|
||||
}
|
||||
|
||||
float Clust::ComputeDist(unsigned uNewNodeIndex, unsigned uNodeIndex)
|
||||
{
|
||||
switch (m_CentroidStyle)
|
||||
{
|
||||
case LINKAGE_Avg:
|
||||
return ComputeDistAverageLinkage(uNewNodeIndex, uNodeIndex);
|
||||
|
||||
case LINKAGE_Min:
|
||||
return ComputeDistMinLinkage(uNewNodeIndex, uNodeIndex);
|
||||
|
||||
case LINKAGE_Max:
|
||||
return ComputeDistMaxLinkage(uNewNodeIndex, uNodeIndex);
|
||||
|
||||
case LINKAGE_Biased:
|
||||
return ComputeDistMAFFT(uNewNodeIndex, uNodeIndex);
|
||||
|
||||
case LINKAGE_NeighborJoining:
|
||||
return ComputeDistNeighborJoining(uNewNodeIndex, uNodeIndex);
|
||||
}
|
||||
Quit("Clust::ComputeDist, invalid centroid style %u", m_CentroidStyle);
|
||||
return (float) g_dNAN;
|
||||
}
|
||||
|
||||
float Clust::ComputeDistMinLinkage(unsigned uNewNodeIndex, unsigned uNodeIndex)
|
||||
{
|
||||
const unsigned uLeftNodeIndex = GetLeftIndex(uNewNodeIndex);
|
||||
const unsigned uRightNodeIndex = GetRightIndex(uNewNodeIndex);
|
||||
const float dDistL = GetDist(uLeftNodeIndex, uNodeIndex);
|
||||
const float dDistR = GetDist(uRightNodeIndex, uNodeIndex);
|
||||
return (dDistL < dDistR ? dDistL : dDistR);
|
||||
}
|
||||
|
||||
float Clust::ComputeDistMaxLinkage(unsigned uNewNodeIndex, unsigned uNodeIndex)
|
||||
{
|
||||
const unsigned uLeftNodeIndex = GetLeftIndex(uNewNodeIndex);
|
||||
const unsigned uRightNodeIndex = GetRightIndex(uNewNodeIndex);
|
||||
const float dDistL = GetDist(uLeftNodeIndex, uNodeIndex);
|
||||
const float dDistR = GetDist(uRightNodeIndex, uNodeIndex);
|
||||
return (dDistL > dDistR ? dDistL : dDistR);
|
||||
}
|
||||
|
||||
float Clust::ComputeDistAverageLinkage(unsigned uNewNodeIndex, unsigned uNodeIndex)
|
||||
{
|
||||
const unsigned uLeftNodeIndex = GetLeftIndex(uNewNodeIndex);
|
||||
const unsigned uRightNodeIndex = GetRightIndex(uNewNodeIndex);
|
||||
const float dDistL = GetDist(uLeftNodeIndex, uNodeIndex);
|
||||
const float dDistR = GetDist(uRightNodeIndex, uNodeIndex);
|
||||
return (dDistL + dDistR)/2;
|
||||
}
|
||||
|
||||
float Clust::ComputeDistNeighborJoining(unsigned uNewNodeIndex, unsigned uNodeIndex)
|
||||
{
|
||||
const unsigned uLeftNodeIndex = GetLeftIndex(uNewNodeIndex);
|
||||
const unsigned uRightNodeIndex = GetRightIndex(uNewNodeIndex);
|
||||
const float dDistLR = GetDist(uLeftNodeIndex, uRightNodeIndex);
|
||||
const float dDistL = GetDist(uLeftNodeIndex, uNodeIndex);
|
||||
const float dDistR = GetDist(uRightNodeIndex, uNodeIndex);
|
||||
const float dDist = (dDistL + dDistR - dDistLR)/2;
|
||||
return dDist;
|
||||
}
|
||||
|
||||
// This is a mysterious variant of UPGMA reverse-engineered from MAFFT source.
|
||||
float Clust::ComputeDistMAFFT(unsigned uNewNodeIndex, unsigned uNodeIndex)
|
||||
{
|
||||
const unsigned uLeftNodeIndex = GetLeftIndex(uNewNodeIndex);
|
||||
const unsigned uRightNodeIndex = GetRightIndex(uNewNodeIndex);
|
||||
|
||||
const float dDistLR = GetDist(uLeftNodeIndex, uRightNodeIndex);
|
||||
const float dDistL = GetDist(uLeftNodeIndex, uNodeIndex);
|
||||
const float dDistR = GetDist(uRightNodeIndex, uNodeIndex);
|
||||
const float dMinDistLR = (dDistL < dDistR ? dDistL : dDistR);
|
||||
const float dSumDistLR = dDistL + dDistR;
|
||||
const float dDist = dMinDistLR*(1 - g_dSUEFF) + dSumDistLR*g_dSUEFF/2;
|
||||
return dDist;
|
||||
}
|
||||
|
||||
unsigned Clust::GetClusterCount() const
|
||||
{
|
||||
return m_uClusterCount;
|
||||
}
|
||||
|
||||
void Clust::LogMe() const
|
||||
{
|
||||
Log("Clust %u leaves, %u nodes, %u clusters.\n",
|
||||
m_uLeafCount, m_uNodeCount, m_uClusterCount);
|
||||
|
||||
Log("Distance matrix\n");
|
||||
const unsigned uNodeCount = GetNodeCount();
|
||||
Log(" ");
|
||||
for (unsigned i = 0; i < uNodeCount - 1; ++i)
|
||||
Log(" %7u", i);
|
||||
Log("\n");
|
||||
|
||||
Log(" ");
|
||||
for (unsigned i = 0; i < uNodeCount - 1; ++i)
|
||||
Log(" ------");
|
||||
Log("\n");
|
||||
|
||||
for (unsigned i = 0; i < uNodeCount - 1; ++i)
|
||||
{
|
||||
Log("%4u: ", i);
|
||||
for (unsigned j = 0; j < i; ++j)
|
||||
Log(" %7.2g", GetDist(i, j));
|
||||
Log("\n");
|
||||
}
|
||||
|
||||
Log("\n");
|
||||
Log("Node Size Prnt Left Rght Length Name\n");
|
||||
Log("---- ---- ---- ---- ---- ------ ----\n");
|
||||
for (unsigned uNodeIndex = 0; uNodeIndex < m_uNodeCount; ++uNodeIndex)
|
||||
{
|
||||
const ClustNode &Node = m_Nodes[uNodeIndex];
|
||||
Log("%4u %4u", uNodeIndex, Node.m_uSize);
|
||||
if (0 != Node.m_ptrParent)
|
||||
Log(" %4u", Node.m_ptrParent->m_uIndex);
|
||||
else
|
||||
Log(" ");
|
||||
|
||||
if (0 != Node.m_ptrLeft)
|
||||
Log(" %4u", Node.m_ptrLeft->m_uIndex);
|
||||
else
|
||||
Log(" ");
|
||||
|
||||
if (0 != Node.m_ptrRight)
|
||||
Log(" %4u", Node.m_ptrRight->m_uIndex);
|
||||
else
|
||||
Log(" ");
|
||||
|
||||
if (uNodeIndex != m_uNodeCount - 1)
|
||||
Log(" %7.3g", Node.m_dLength);
|
||||
if (IsLeaf(uNodeIndex))
|
||||
{
|
||||
const char *ptrName = GetNodeName(uNodeIndex);
|
||||
if (0 != ptrName)
|
||||
Log(" %s", ptrName);
|
||||
}
|
||||
if (GetRootNodeIndex() == uNodeIndex)
|
||||
Log(" [ROOT]");
|
||||
Log("\n");
|
||||
}
|
||||
}
|
||||
|
||||
const ClustNode &Clust::GetNode(unsigned uNodeIndex) const
|
||||
{
|
||||
if (uNodeIndex >= m_uNodeCount)
|
||||
Quit("ClustNode::GetNode(%u) %u", uNodeIndex, m_uNodeCount);
|
||||
return m_Nodes[uNodeIndex];
|
||||
}
|
||||
|
||||
bool Clust::IsLeaf(unsigned uNodeIndex) const
|
||||
{
|
||||
return uNodeIndex < m_uLeafCount;
|
||||
}
|
||||
|
||||
unsigned Clust::GetClusterSize(unsigned uNodeIndex) const
|
||||
{
|
||||
const ClustNode &Node = GetNode(uNodeIndex);
|
||||
return Node.m_uSize;
|
||||
}
|
||||
|
||||
unsigned Clust::GetLeftIndex(unsigned uNodeIndex) const
|
||||
{
|
||||
const ClustNode &Node = GetNode(uNodeIndex);
|
||||
if (0 == Node.m_ptrLeft)
|
||||
Quit("Clust::GetLeftIndex: leaf");
|
||||
return Node.m_ptrLeft->m_uIndex;
|
||||
}
|
||||
|
||||
unsigned Clust::GetRightIndex(unsigned uNodeIndex) const
|
||||
{
|
||||
const ClustNode &Node = GetNode(uNodeIndex);
|
||||
if (0 == Node.m_ptrRight)
|
||||
Quit("Clust::GetRightIndex: leaf");
|
||||
return Node.m_ptrRight->m_uIndex;
|
||||
}
|
||||
|
||||
float Clust::GetLength(unsigned uNodeIndex) const
|
||||
{
|
||||
const ClustNode &Node = GetNode(uNodeIndex);
|
||||
return Node.m_dLength;
|
||||
}
|
||||
|
||||
void Clust::SetLeafCount(unsigned uLeafCount)
|
||||
{
|
||||
if (uLeafCount <= 1)
|
||||
Quit("Clust::SetLeafCount(%u)", uLeafCount);
|
||||
|
||||
m_uLeafCount = uLeafCount;
|
||||
const unsigned uNodeCount = GetNodeCount();
|
||||
|
||||
// Triangular matrix size excluding diagonal (all zeros in our case).
|
||||
m_uTriangularMatrixSize = (uNodeCount*(uNodeCount - 1))/2;
|
||||
m_dDist = new float[m_uTriangularMatrixSize];
|
||||
}
|
||||
|
||||
unsigned Clust::GetLeafCount() const
|
||||
{
|
||||
return m_uLeafCount;
|
||||
}
|
||||
|
||||
unsigned Clust::VectorIndex(unsigned uIndex1, unsigned uIndex2) const
|
||||
{
|
||||
const unsigned uNodeCount = GetNodeCount();
|
||||
if (uIndex1 >= uNodeCount || uIndex2 >= uNodeCount)
|
||||
Quit("DistVectorIndex(%u,%u) %u", uIndex1, uIndex2, uNodeCount);
|
||||
unsigned v;
|
||||
if (uIndex1 >= uIndex2)
|
||||
v = uIndex2 + (uIndex1*(uIndex1 - 1))/2;
|
||||
else
|
||||
v = uIndex1 + (uIndex2*(uIndex2 - 1))/2;
|
||||
assert(v < m_uTriangularMatrixSize);
|
||||
return v;
|
||||
}
|
||||
|
||||
float Clust::GetDist(unsigned uIndex1, unsigned uIndex2) const
|
||||
{
|
||||
unsigned v = VectorIndex(uIndex1, uIndex2);
|
||||
return m_dDist[v];
|
||||
}
|
||||
|
||||
void Clust::SetDist(unsigned uIndex1, unsigned uIndex2, float dDist)
|
||||
{
|
||||
unsigned v = VectorIndex(uIndex1, uIndex2);
|
||||
m_dDist[v] = dDist;
|
||||
}
|
||||
|
||||
float Clust::GetHeight(unsigned uNodeIndex) const
|
||||
{
|
||||
if (IsLeaf(uNodeIndex))
|
||||
return 0;
|
||||
|
||||
const unsigned uLeftIndex = GetLeftIndex(uNodeIndex);
|
||||
const unsigned uRightIndex = GetRightIndex(uNodeIndex);
|
||||
const float dLeftLength = GetLength(uLeftIndex);
|
||||
const float dRightLength = GetLength(uRightIndex);
|
||||
const float dLeftHeight = dLeftLength + GetHeight(uLeftIndex);
|
||||
const float dRightHeight = dRightLength + GetHeight(uRightIndex);
|
||||
return (dLeftHeight + dRightHeight)/2;
|
||||
}
|
||||
|
||||
const char *Clust::GetNodeName(unsigned uNodeIndex) const
|
||||
{
|
||||
if (!IsLeaf(uNodeIndex))
|
||||
Quit("Clust::GetNodeName, is not leaf");
|
||||
return m_ptrSet->GetLeafName(uNodeIndex);
|
||||
}
|
||||
|
||||
unsigned Clust::GetNodeId(unsigned uNodeIndex) const
|
||||
{
|
||||
if (uNodeIndex >= GetLeafCount())
|
||||
return 0;
|
||||
return m_ptrSet->GetLeafId(uNodeIndex);
|
||||
}
|
||||
|
||||
unsigned Clust::GetLeaf(unsigned uNodeIndex, unsigned uLeafIndex) const
|
||||
{
|
||||
const ClustNode &Node = GetNode(uNodeIndex);
|
||||
const unsigned uLeafCount = Node.m_uSize;
|
||||
if (uLeafIndex >= uLeafCount)
|
||||
Quit("Clust::GetLeaf, invalid index");
|
||||
const unsigned uIndex = Node.m_uLeafIndexes[uLeafIndex];
|
||||
if (uIndex >= m_uNodeCount)
|
||||
Quit("Clust::GetLeaf, index out of range");
|
||||
return uIndex;
|
||||
}
|
||||
|
||||
unsigned Clust::GetFirstCluster() const
|
||||
{
|
||||
if (0 == m_ptrClusterList)
|
||||
return uInsane;
|
||||
return m_ptrClusterList->m_uIndex;
|
||||
}
|
||||
|
||||
unsigned Clust::GetNextCluster(unsigned uIndex) const
|
||||
{
|
||||
ClustNode *ptrNode = &m_Nodes[uIndex];
|
||||
if (0 == ptrNode->m_ptrNextCluster)
|
||||
return uInsane;
|
||||
return ptrNode->m_ptrNextCluster->m_uIndex;
|
||||
}
|
||||
|
||||
void Clust::DeleteFromClusterList(unsigned uNodeIndex)
|
||||
{
|
||||
assert(uNodeIndex < m_uNodeCount);
|
||||
ClustNode *ptrNode = &m_Nodes[uNodeIndex];
|
||||
ClustNode *ptrPrev = ptrNode->m_ptrPrevCluster;
|
||||
ClustNode *ptrNext = ptrNode->m_ptrNextCluster;
|
||||
|
||||
if (0 != ptrNext)
|
||||
ptrNext->m_ptrPrevCluster = ptrPrev;
|
||||
if (0 == ptrPrev)
|
||||
{
|
||||
assert(m_ptrClusterList == ptrNode);
|
||||
m_ptrClusterList = ptrNext;
|
||||
}
|
||||
else
|
||||
ptrPrev->m_ptrNextCluster = ptrNext;
|
||||
|
||||
ptrNode->m_ptrNextCluster = 0;
|
||||
ptrNode->m_ptrPrevCluster = 0;
|
||||
}
|
||||
|
||||
void Clust::AddToClusterList(unsigned uNodeIndex)
|
||||
{
|
||||
assert(uNodeIndex < m_uNodeCount);
|
||||
ClustNode *ptrNode = &m_Nodes[uNodeIndex];
|
||||
|
||||
if (0 != m_ptrClusterList)
|
||||
m_ptrClusterList->m_ptrPrevCluster = ptrNode;
|
||||
|
||||
ptrNode->m_ptrNextCluster = m_ptrClusterList;
|
||||
ptrNode->m_ptrPrevCluster = 0;
|
||||
|
||||
m_ptrClusterList = ptrNode;
|
||||
}
|
||||
|
||||
float Clust::ComputeMetric(unsigned uIndex1, unsigned uIndex2) const
|
||||
{
|
||||
switch (m_JoinStyle)
|
||||
{
|
||||
case JOIN_NearestNeighbor:
|
||||
return ComputeMetricNearestNeighbor(uIndex1, uIndex2);
|
||||
|
||||
case JOIN_NeighborJoining:
|
||||
return ComputeMetricNeighborJoining(uIndex1, uIndex2);
|
||||
}
|
||||
Quit("Clust::ComputeMetric");
|
||||
return 0;
|
||||
}
|
||||
|
||||
float Clust::ComputeMetricNeighborJoining(unsigned i, unsigned j) const
|
||||
{
|
||||
float ri = Calc_r(i);
|
||||
float rj = Calc_r(j);
|
||||
float dij = GetDist(i, j);
|
||||
float dMetric = dij - (ri + rj);
|
||||
return (float) dMetric;
|
||||
}
|
||||
|
||||
float Clust::ComputeMetricNearestNeighbor(unsigned i, unsigned j) const
|
||||
{
|
||||
return (float) GetDist(i, j);
|
||||
}
|
||||
|
||||
float Clust::GetMinMetricBruteForce(unsigned *ptruIndex1, unsigned *ptruIndex2) const
|
||||
{
|
||||
unsigned uMinLeftNodeIndex = uInsane;
|
||||
unsigned uMinRightNodeIndex = uInsane;
|
||||
float dMinMetric = PLUS_INFINITY;
|
||||
for (unsigned uLeftNodeIndex = GetFirstCluster(); uLeftNodeIndex != uInsane;
|
||||
uLeftNodeIndex = GetNextCluster(uLeftNodeIndex))
|
||||
{
|
||||
for (unsigned uRightNodeIndex = GetNextCluster(uLeftNodeIndex);
|
||||
uRightNodeIndex != uInsane;
|
||||
uRightNodeIndex = GetNextCluster(uRightNodeIndex))
|
||||
{
|
||||
float dMetric = ComputeMetric(uLeftNodeIndex, uRightNodeIndex);
|
||||
if (dMetric < dMinMetric)
|
||||
{
|
||||
dMinMetric = dMetric;
|
||||
uMinLeftNodeIndex = uLeftNodeIndex;
|
||||
uMinRightNodeIndex = uRightNodeIndex;
|
||||
}
|
||||
}
|
||||
}
|
||||
*ptruIndex1 = uMinLeftNodeIndex;
|
||||
*ptruIndex2 = uMinRightNodeIndex;
|
||||
return dMinMetric;
|
||||
}
|
||||
|
||||
float Clust::GetMinMetric(unsigned *ptruIndex1, unsigned *ptruIndex2) const
|
||||
{
|
||||
return GetMinMetricBruteForce(ptruIndex1, ptruIndex2);
|
||||
}
|
148
src/muscle/muscle3.8.31/src/clust.h
Normal file
148
src/muscle/muscle3.8.31/src/clust.h
Normal file
@ -0,0 +1,148 @@
|
||||
#ifndef Clust_h
|
||||
#define Clust_h
|
||||
|
||||
class Clust;
|
||||
class ClustNode;
|
||||
class ClustSet;
|
||||
class Phylip;
|
||||
class SortedNode;
|
||||
|
||||
const unsigned RB_NIL = ((unsigned) 0xfff0);
|
||||
|
||||
class ClustNode
|
||||
{
|
||||
public:
|
||||
ClustNode()
|
||||
{
|
||||
m_uIndex = uInsane;
|
||||
m_uSize = uInsane;
|
||||
m_dLength = (float) dInsane;
|
||||
m_ptrLeft = 0;
|
||||
m_ptrRight = 0;
|
||||
m_ptrParent = 0;
|
||||
m_ptrNextCluster = 0;
|
||||
m_ptrPrevCluster = 0;
|
||||
m_uLeafIndexes = 0;
|
||||
}
|
||||
~ClustNode()
|
||||
{
|
||||
delete[] m_uLeafIndexes;
|
||||
}
|
||||
unsigned m_uIndex;
|
||||
unsigned m_uSize;
|
||||
float m_dLength;
|
||||
ClustNode *m_ptrLeft;
|
||||
ClustNode *m_ptrRight;
|
||||
ClustNode *m_ptrParent;
|
||||
ClustNode *m_ptrNextCluster;
|
||||
ClustNode *m_ptrPrevCluster;
|
||||
unsigned *m_uLeafIndexes;
|
||||
};
|
||||
|
||||
class Clust
|
||||
{
|
||||
public:
|
||||
Clust();
|
||||
virtual ~Clust();
|
||||
|
||||
void Create(ClustSet &Set, CLUSTER Method);
|
||||
|
||||
unsigned GetLeafCount() const;
|
||||
|
||||
unsigned GetClusterCount() const;
|
||||
unsigned GetClusterSize(unsigned uNodeIndex) const;
|
||||
unsigned GetLeaf(unsigned uClusterIndex, unsigned uLeafIndex) const;
|
||||
|
||||
unsigned GetNodeCount() const { return 2*m_uLeafCount - 1; }
|
||||
const ClustNode &GetRoot() const { return m_Nodes[GetRootNodeIndex()]; }
|
||||
unsigned GetRootNodeIndex() const { return m_uNodeCount - 1; }
|
||||
|
||||
const ClustNode &GetNode(unsigned uNodeIndex) const;
|
||||
bool IsLeaf(unsigned uNodeIndex) const;
|
||||
unsigned GetLeftIndex(unsigned uNodeIndex) const;
|
||||
unsigned GetRightIndex(unsigned uNodeIndex) const;
|
||||
float GetLength(unsigned uNodeIndex) const;
|
||||
float GetHeight(unsigned uNodeIndex) const;
|
||||
const char *GetNodeName(unsigned uNodeIndex) const;
|
||||
unsigned GetNodeId(unsigned uNodeIndex) const;
|
||||
|
||||
JOIN GetJoinStyle() const { return m_JoinStyle; }
|
||||
LINKAGE GetCentroidStyle() const { return m_CentroidStyle; }
|
||||
|
||||
void SetDist(unsigned uIndex1, unsigned uIndex2, float dDist);
|
||||
float GetDist(unsigned uIndex1, unsigned uIndex2) const;
|
||||
|
||||
void ToPhylip(Phylip &tree);
|
||||
|
||||
void LogMe() const;
|
||||
|
||||
//private:
|
||||
void SetLeafCount(unsigned uLeafCount);
|
||||
|
||||
void CreateCluster();
|
||||
void JoinNodes(unsigned uLeftNodeIndex, unsigned uRightNodeIndex,
|
||||
float dLeftLength, float dRightLength, unsigned uNewNodeIndex);
|
||||
|
||||
void ChooseJoin(unsigned *ptruLeftIndex, unsigned *ptruRightIndex,
|
||||
float *ptrdLeftLength, float *ptrdRightLength);
|
||||
void ChooseJoinNeighborJoining(unsigned *ptruLeftIndex, unsigned *ptruRightIndex,
|
||||
float *ptrdLeftLength, float *ptrdRightLength);
|
||||
void ChooseJoinNearestNeighbor(unsigned *ptruLeftIndex, unsigned *ptruRightIndex,
|
||||
float *ptrdLeftLength, float *ptrdRightLength);
|
||||
|
||||
float ComputeDist(unsigned uNewNodeIndex, unsigned uNodeIndex);
|
||||
float ComputeDistAverageLinkage(unsigned uNewNodeIndex, unsigned uNodeIndex);
|
||||
float ComputeDistMinLinkage(unsigned uNewNodeIndex, unsigned uNodeIndex);
|
||||
float ComputeDistMaxLinkage(unsigned uNewNodeIndex, unsigned uNodeIndex);
|
||||
float ComputeDistNeighborJoining(unsigned uNewNewIndex, unsigned uNodeIndex);
|
||||
float ComputeDistMAFFT(unsigned uNewNewIndex, unsigned uNodeIndex);
|
||||
|
||||
float Calc_r(unsigned uNodeIndex) const;
|
||||
|
||||
unsigned VectorIndex(unsigned uIndex1, unsigned uIndex2) const;
|
||||
|
||||
unsigned GetFirstCluster() const;
|
||||
unsigned GetNextCluster(unsigned uNodeIndex) const;
|
||||
|
||||
float ComputeMetric(unsigned uIndex1, unsigned uIndex2) const;
|
||||
float ComputeMetricNearestNeighbor(unsigned i, unsigned j) const;
|
||||
float ComputeMetricNeighborJoining(unsigned i, unsigned j) const;
|
||||
|
||||
void InitMetric(unsigned uMaxNodeIndex);
|
||||
void InsertMetric(unsigned uIndex1, unsigned uIndex2, float dMetric);
|
||||
float GetMinMetric(unsigned *ptruIndex1, unsigned *ptruIndex2) const;
|
||||
float GetMinMetricBruteForce(unsigned *ptruIndex1, unsigned *ptruIndex2) const;
|
||||
void DeleteMetric(unsigned uIndex);
|
||||
void DeleteMetric(unsigned uIndex1, unsigned uIndex2);
|
||||
void ListMetric() const;
|
||||
|
||||
void DeleteFromClusterList(unsigned uNodeIndex);
|
||||
void AddToClusterList(unsigned uNodeIndex);
|
||||
|
||||
void RBDelete(unsigned RBNode);
|
||||
unsigned RBInsert(unsigned i, unsigned j, float fMetric);
|
||||
|
||||
unsigned RBNext(unsigned RBNode) const;
|
||||
unsigned RBPrev(unsigned RBNode) const;
|
||||
unsigned RBMin(unsigned RBNode) const;
|
||||
unsigned RBMax(unsigned RBNode) const;
|
||||
|
||||
void ValidateRB(const char szMsg[] = 0) const;
|
||||
void ValidateRBNode(unsigned Node, const char szMsg[]) const;
|
||||
|
||||
//private:
|
||||
JOIN m_JoinStyle;
|
||||
LINKAGE m_CentroidStyle;
|
||||
ClustNode *m_Nodes;
|
||||
unsigned *m_ClusterIndexToNodeIndex;
|
||||
unsigned *m_NodeIndexToClusterIndex;
|
||||
unsigned m_uLeafCount;
|
||||
unsigned m_uNodeCount;
|
||||
unsigned m_uClusterCount;
|
||||
unsigned m_uTriangularMatrixSize;
|
||||
float *m_dDist;
|
||||
ClustSet *m_ptrSet;
|
||||
ClustNode *m_ptrClusterList;
|
||||
};
|
||||
|
||||
#endif // Clust_h
|
339
src/muscle/muscle3.8.31/src/cluster.cpp
Normal file
339
src/muscle/muscle3.8.31/src/cluster.cpp
Normal file
@ -0,0 +1,339 @@
|
||||
#include "muscle.h"
|
||||
#include "cluster.h"
|
||||
#include "distfunc.h"
|
||||
|
||||
static inline float Min(float d1, float d2)
|
||||
{
|
||||
return d1 < d2 ? d1 : d2;
|
||||
}
|
||||
|
||||
static inline float Max(float d1, float d2)
|
||||
{
|
||||
return d1 > d2 ? d1 : d2;
|
||||
}
|
||||
|
||||
static inline float Mean(float d1, float d2)
|
||||
{
|
||||
return (float) ((d1 + d2)/2.0);
|
||||
}
|
||||
|
||||
#if _DEBUG
|
||||
void ClusterTree::Validate(unsigned uNodeCount)
|
||||
{
|
||||
unsigned n;
|
||||
ClusterNode *pNode;
|
||||
unsigned uDisjointListCount = 0;
|
||||
for (pNode = m_ptrDisjoints; pNode; pNode = pNode->GetNextDisjoint())
|
||||
{
|
||||
ClusterNode *pPrev = pNode->GetPrevDisjoint();
|
||||
ClusterNode *pNext = pNode->GetNextDisjoint();
|
||||
if (0 != pPrev)
|
||||
{
|
||||
if (pPrev->GetNextDisjoint() != pNode)
|
||||
{
|
||||
Log("Prev->This mismatch, prev=\n");
|
||||
pPrev->LogMe();
|
||||
Log("This=\n");
|
||||
pNode->LogMe();
|
||||
Quit("ClusterTree::Validate()");
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
if (pNode != m_ptrDisjoints)
|
||||
{
|
||||
Log("[%u]->prev = 0 but != m_ptrDisjoints=%d\n",
|
||||
pNode->GetIndex(),
|
||||
m_ptrDisjoints ? m_ptrDisjoints->GetIndex() : 0xffffffff);
|
||||
pNode->LogMe();
|
||||
Quit("ClusterTree::Validate()");
|
||||
}
|
||||
}
|
||||
if (0 != pNext)
|
||||
{
|
||||
if (pNext->GetPrevDisjoint() != pNode)
|
||||
{
|
||||
Log("Next->This mismatch, next=\n");
|
||||
pNext->LogMe();
|
||||
Log("This=\n");
|
||||
pNode->LogMe();
|
||||
Quit("ClusterTree::Validate()");
|
||||
}
|
||||
}
|
||||
++uDisjointListCount;
|
||||
if (uDisjointListCount > m_uNodeCount)
|
||||
Quit("Loop in disjoint list");
|
||||
}
|
||||
|
||||
unsigned uParentlessNodeCount = 0;
|
||||
for (n = 0; n < uNodeCount; ++n)
|
||||
if (0 == m_Nodes[n].GetParent())
|
||||
++uParentlessNodeCount;
|
||||
|
||||
if (uDisjointListCount != uParentlessNodeCount)
|
||||
Quit("Disjoints = %u Parentless = %u\n", uDisjointListCount,
|
||||
uParentlessNodeCount);
|
||||
}
|
||||
#else // !_DEBUG
|
||||
#define Validate(uNodeCount) // empty
|
||||
#endif
|
||||
|
||||
void ClusterNode::LogMe() const
|
||||
{
|
||||
unsigned uClusterSize = GetClusterSize();
|
||||
Log("[%02u] w=%5.3f CW=%5.3f LBW=%5.3f RBW=%5.3f LWT=%5.3f RWT=%5.3f L=%02d R=%02d P=%02d NxDj=%02d PvDj=%02d Sz=%02d {",
|
||||
m_uIndex,
|
||||
m_dWeight,
|
||||
GetClusterWeight(),
|
||||
GetLeftBranchWeight(),
|
||||
GetRightBranchWeight(),
|
||||
GetLeftWeight(),
|
||||
GetRightWeight(),
|
||||
m_ptrLeft ? m_ptrLeft->GetIndex() : 0xffffffff,
|
||||
m_ptrRight ? m_ptrRight->GetIndex() : 0xffffffff,
|
||||
m_ptrParent ? m_ptrParent->GetIndex() : 0xffffffff,
|
||||
m_ptrNextDisjoint ? m_ptrNextDisjoint->GetIndex() : 0xffffffff,
|
||||
m_ptrPrevDisjoint ? m_ptrPrevDisjoint->GetIndex() : 0xffffffff,
|
||||
uClusterSize);
|
||||
for (unsigned i = 0; i < uClusterSize; ++i)
|
||||
Log(" %u", GetClusterLeaf(i)->GetIndex());
|
||||
Log(" }\n");
|
||||
}
|
||||
|
||||
// How many leaves in the sub-tree under this node?
|
||||
unsigned ClusterNode::GetClusterSize() const
|
||||
{
|
||||
unsigned uLeafCount = 0;
|
||||
|
||||
if (0 == m_ptrLeft && 0 == m_ptrRight)
|
||||
return 1;
|
||||
|
||||
if (0 != m_ptrLeft)
|
||||
uLeafCount += m_ptrLeft->GetClusterSize();
|
||||
if (0 != m_ptrRight)
|
||||
uLeafCount += m_ptrRight->GetClusterSize();
|
||||
assert(uLeafCount > 0);
|
||||
return uLeafCount;
|
||||
}
|
||||
|
||||
double ClusterNode::GetClusterWeight() const
|
||||
{
|
||||
double dWeight = 0.0;
|
||||
if (0 != m_ptrLeft)
|
||||
dWeight += m_ptrLeft->GetClusterWeight();
|
||||
if (0 != m_ptrRight)
|
||||
dWeight += m_ptrRight->GetClusterWeight();
|
||||
return dWeight + GetWeight();
|
||||
}
|
||||
|
||||
double ClusterNode::GetLeftBranchWeight() const
|
||||
{
|
||||
const ClusterNode *ptrLeft = GetLeft();
|
||||
if (0 == ptrLeft)
|
||||
return 0.0;
|
||||
|
||||
return GetWeight() - ptrLeft->GetWeight();
|
||||
}
|
||||
|
||||
double ClusterNode::GetRightBranchWeight() const
|
||||
{
|
||||
const ClusterNode *ptrRight = GetRight();
|
||||
if (0 == ptrRight)
|
||||
return 0.0;
|
||||
|
||||
return GetWeight() - ptrRight->GetWeight();
|
||||
}
|
||||
|
||||
double ClusterNode::GetRightWeight() const
|
||||
{
|
||||
const ClusterNode *ptrRight = GetRight();
|
||||
if (0 == ptrRight)
|
||||
return 0.0;
|
||||
return ptrRight->GetClusterWeight() + GetWeight();
|
||||
}
|
||||
|
||||
double ClusterNode::GetLeftWeight() const
|
||||
{
|
||||
const ClusterNode *ptrLeft = GetLeft();
|
||||
if (0 == ptrLeft)
|
||||
return 0.0;
|
||||
return ptrLeft->GetClusterWeight() + GetWeight();
|
||||
}
|
||||
|
||||
// Return n'th leaf in the sub-tree under this node.
|
||||
const ClusterNode *ClusterNode::GetClusterLeaf(unsigned uLeafIndex) const
|
||||
{
|
||||
if (0 != m_ptrLeft)
|
||||
{
|
||||
if (0 == m_ptrRight)
|
||||
return this;
|
||||
|
||||
unsigned uLeftLeafCount = m_ptrLeft->GetClusterSize();
|
||||
|
||||
if (uLeafIndex < uLeftLeafCount)
|
||||
return m_ptrLeft->GetClusterLeaf(uLeafIndex);
|
||||
|
||||
assert(uLeafIndex >= uLeftLeafCount);
|
||||
return m_ptrRight->GetClusterLeaf(uLeafIndex - uLeftLeafCount);
|
||||
}
|
||||
if (0 == m_ptrRight)
|
||||
return this;
|
||||
return m_ptrRight->GetClusterLeaf(uLeafIndex);
|
||||
}
|
||||
|
||||
void ClusterTree::DeleteFromDisjoints(ClusterNode *ptrNode)
|
||||
{
|
||||
ClusterNode *ptrPrev = ptrNode->GetPrevDisjoint();
|
||||
ClusterNode *ptrNext = ptrNode->GetNextDisjoint();
|
||||
|
||||
if (0 != ptrPrev)
|
||||
ptrPrev->SetNextDisjoint(ptrNext);
|
||||
else
|
||||
m_ptrDisjoints = ptrNext;
|
||||
|
||||
if (0 != ptrNext)
|
||||
ptrNext->SetPrevDisjoint(ptrPrev);
|
||||
|
||||
#if _DEBUG
|
||||
// not algorithmically necessary, but improves clarity
|
||||
// and supports Validate().
|
||||
ptrNode->SetPrevDisjoint(0);
|
||||
ptrNode->SetNextDisjoint(0);
|
||||
#endif
|
||||
}
|
||||
|
||||
void ClusterTree::AddToDisjoints(ClusterNode *ptrNode)
|
||||
{
|
||||
ptrNode->SetNextDisjoint(m_ptrDisjoints);
|
||||
ptrNode->SetPrevDisjoint(0);
|
||||
if (0 != m_ptrDisjoints)
|
||||
m_ptrDisjoints->SetPrevDisjoint(ptrNode);
|
||||
m_ptrDisjoints = ptrNode;
|
||||
}
|
||||
|
||||
ClusterTree::ClusterTree()
|
||||
{
|
||||
m_ptrDisjoints = 0;
|
||||
m_Nodes = 0;
|
||||
m_uNodeCount = 0;
|
||||
}
|
||||
|
||||
ClusterTree::~ClusterTree()
|
||||
{
|
||||
delete[] m_Nodes;
|
||||
}
|
||||
|
||||
void ClusterTree::LogMe() const
|
||||
{
|
||||
Log("Disjoints=%d\n", m_ptrDisjoints ? m_ptrDisjoints->GetIndex() : 0xffffffff);
|
||||
for (unsigned i = 0; i < m_uNodeCount; ++i)
|
||||
{
|
||||
m_Nodes[i].LogMe();
|
||||
}
|
||||
}
|
||||
|
||||
ClusterNode *ClusterTree::GetRoot() const
|
||||
{
|
||||
return &m_Nodes[m_uNodeCount - 1];
|
||||
}
|
||||
|
||||
// This is the UPGMA algorithm as described in Durbin et al. p166.
|
||||
void ClusterTree::Create(const DistFunc &Dist)
|
||||
{
|
||||
unsigned i;
|
||||
m_uLeafCount = Dist.GetCount();
|
||||
m_uNodeCount = 2*m_uLeafCount - 1;
|
||||
|
||||
delete[] m_Nodes;
|
||||
m_Nodes = new ClusterNode[m_uNodeCount];
|
||||
|
||||
for (i = 0; i < m_uNodeCount; ++i)
|
||||
m_Nodes[i].SetIndex(i);
|
||||
|
||||
for (i = 0; i < m_uLeafCount - 1; ++i)
|
||||
m_Nodes[i].SetNextDisjoint(&m_Nodes[i+1]);
|
||||
|
||||
for (i = 1; i < m_uLeafCount; ++i)
|
||||
m_Nodes[i].SetPrevDisjoint(&m_Nodes[i-1]);
|
||||
|
||||
m_ptrDisjoints = &m_Nodes[0];
|
||||
|
||||
// Log("Initial state\n");
|
||||
// LogMe();
|
||||
// Log("\n");
|
||||
|
||||
DistFunc ClusterDist;
|
||||
ClusterDist.SetCount(m_uNodeCount);
|
||||
double dMaxDist = 0.0;
|
||||
for (i = 0; i < m_uLeafCount; ++i)
|
||||
for (unsigned j = 0; j < m_uLeafCount; ++j)
|
||||
{
|
||||
float dDist = Dist.GetDist(i, j);
|
||||
ClusterDist.SetDist(i, j, dDist);
|
||||
}
|
||||
|
||||
Validate(m_uLeafCount);
|
||||
|
||||
// Iteration. N-1 joins needed to create a binary tree from N leaves.
|
||||
for (unsigned uJoinIndex = m_uLeafCount; uJoinIndex < m_uNodeCount;
|
||||
++uJoinIndex)
|
||||
{
|
||||
// Find closest pair of clusters
|
||||
unsigned uIndexClosest1;
|
||||
unsigned uIndexClosest2;
|
||||
bool bFound = false;
|
||||
double dDistClosest = 9e99;
|
||||
for (ClusterNode *ptrNode1 = m_ptrDisjoints; ptrNode1;
|
||||
ptrNode1 = ptrNode1->GetNextDisjoint())
|
||||
{
|
||||
for (ClusterNode *ptrNode2 = ptrNode1->GetNextDisjoint(); ptrNode2;
|
||||
ptrNode2 = ptrNode2->GetNextDisjoint())
|
||||
{
|
||||
unsigned i1 = ptrNode1->GetIndex();
|
||||
unsigned i2 = ptrNode2->GetIndex();
|
||||
double dDist = ClusterDist.GetDist(i1, i2);
|
||||
if (dDist < dDistClosest)
|
||||
{
|
||||
bFound = true;
|
||||
dDistClosest = dDist;
|
||||
uIndexClosest1 = i1;
|
||||
uIndexClosest2 = i2;
|
||||
}
|
||||
}
|
||||
}
|
||||
assert(bFound);
|
||||
|
||||
ClusterNode &Join = m_Nodes[uJoinIndex];
|
||||
ClusterNode &Child1 = m_Nodes[uIndexClosest1];
|
||||
ClusterNode &Child2 = m_Nodes[uIndexClosest2];
|
||||
|
||||
Join.SetLeft(&Child1);
|
||||
Join.SetRight(&Child2);
|
||||
Join.SetWeight(dDistClosest);
|
||||
|
||||
Child1.SetParent(&Join);
|
||||
Child2.SetParent(&Join);
|
||||
|
||||
DeleteFromDisjoints(&Child1);
|
||||
DeleteFromDisjoints(&Child2);
|
||||
AddToDisjoints(&Join);
|
||||
|
||||
// Log("After join %d %d\n", uIndexClosest1, uIndexClosest2);
|
||||
// LogMe();
|
||||
|
||||
// Calculate distance of every remaining disjoint cluster to the
|
||||
// new cluster created by the join
|
||||
for (ClusterNode *ptrNode = m_ptrDisjoints; ptrNode;
|
||||
ptrNode = ptrNode->GetNextDisjoint())
|
||||
{
|
||||
unsigned uNodeIndex = ptrNode->GetIndex();
|
||||
float dDist1 = ClusterDist.GetDist(uNodeIndex, uIndexClosest1);
|
||||
float dDist2 = ClusterDist.GetDist(uNodeIndex, uIndexClosest2);
|
||||
float dDist = Min(dDist1, dDist2);
|
||||
ClusterDist.SetDist(uJoinIndex, uNodeIndex, dDist);
|
||||
}
|
||||
Validate(uJoinIndex+1);
|
||||
}
|
||||
GetRoot()->GetClusterWeight();
|
||||
// LogMe();
|
||||
}
|
86
src/muscle/muscle3.8.31/src/cluster.h
Normal file
86
src/muscle/muscle3.8.31/src/cluster.h
Normal file
@ -0,0 +1,86 @@
|
||||
class DistFunc;
|
||||
|
||||
class ClusterNode
|
||||
{
|
||||
friend class ClusterTree;
|
||||
public:
|
||||
ClusterNode()
|
||||
{
|
||||
m_dWeight = 0.0;
|
||||
m_dWeight2 = 0.0;
|
||||
m_ptrLeft = 0;
|
||||
m_ptrRight = 0;
|
||||
m_ptrParent = 0;
|
||||
m_uIndex = 0;
|
||||
m_ptrPrevDisjoint = 0;
|
||||
m_ptrNextDisjoint = 0;
|
||||
}
|
||||
~ClusterNode() {}
|
||||
|
||||
public:
|
||||
unsigned GetIndex() const { return m_uIndex; }
|
||||
ClusterNode *GetLeft() const { return m_ptrLeft; }
|
||||
ClusterNode *GetRight() const { return m_ptrRight; }
|
||||
ClusterNode *GetParent() const { return m_ptrParent; }
|
||||
double GetWeight() const { return m_dWeight; }
|
||||
|
||||
const ClusterNode *GetClusterLeaf(unsigned uLeafIndex) const;
|
||||
unsigned GetClusterSize() const;
|
||||
double GetClusterWeight() const;
|
||||
double GetLeftBranchWeight() const;
|
||||
double GetRightBranchWeight() const;
|
||||
double GetLeftWeight() const;
|
||||
double GetRightWeight() const;
|
||||
|
||||
void LogMe() const;
|
||||
|
||||
double GetWeight2() const { return m_dWeight2; }
|
||||
void SetWeight2(double dWeight2) { m_dWeight2 = dWeight2; }
|
||||
|
||||
protected:
|
||||
void SetIndex(unsigned uIndex) { m_uIndex = uIndex; }
|
||||
void SetWeight(double dWeight) { m_dWeight = dWeight; }
|
||||
void SetLeft(ClusterNode *ptrLeft) { m_ptrLeft = ptrLeft; }
|
||||
void SetRight(ClusterNode *ptrRight) { m_ptrRight = ptrRight; }
|
||||
void SetParent(ClusterNode *ptrParent) { m_ptrParent = ptrParent; }
|
||||
void SetNextDisjoint(ClusterNode *ptrNode) { m_ptrNextDisjoint = ptrNode; }
|
||||
void SetPrevDisjoint(ClusterNode *ptrNode) { m_ptrPrevDisjoint = ptrNode; }
|
||||
|
||||
ClusterNode *GetNextDisjoint() { return m_ptrNextDisjoint; }
|
||||
ClusterNode *GetPrevDisjoint() { return m_ptrPrevDisjoint; }
|
||||
|
||||
private:
|
||||
double m_dWeight;
|
||||
double m_dWeight2;
|
||||
unsigned m_uIndex;
|
||||
ClusterNode *m_ptrLeft;
|
||||
ClusterNode *m_ptrRight;
|
||||
ClusterNode *m_ptrParent;
|
||||
ClusterNode *m_ptrNextDisjoint;
|
||||
ClusterNode *m_ptrPrevDisjoint;
|
||||
};
|
||||
|
||||
class ClusterTree
|
||||
{
|
||||
public:
|
||||
ClusterTree();
|
||||
virtual ~ClusterTree();
|
||||
|
||||
void Create(const DistFunc &DF);
|
||||
|
||||
ClusterNode *GetRoot() const;
|
||||
void LogMe() const;
|
||||
|
||||
protected:
|
||||
void Join(ClusterNode *ptrNode1, ClusterNode *ptrNode2,
|
||||
ClusterNode *ptrJoin);
|
||||
void AddToDisjoints(ClusterNode *ptrNode);
|
||||
void DeleteFromDisjoints(ClusterNode *ptrNode);
|
||||
void Validate(unsigned uNodeCount);
|
||||
|
||||
private:
|
||||
ClusterNode *m_ptrDisjoints;
|
||||
ClusterNode *m_Nodes;
|
||||
unsigned m_uNodeCount;
|
||||
unsigned m_uLeafCount;
|
||||
};
|
21
src/muscle/muscle3.8.31/src/clustset.h
Normal file
21
src/muscle/muscle3.8.31/src/clustset.h
Normal file
@ -0,0 +1,21 @@
|
||||
#ifndef ClustSet_h
|
||||
#define ClustSet_h
|
||||
|
||||
enum JOIN;
|
||||
enum LINKAGE;
|
||||
class Clust;
|
||||
|
||||
class ClustSet
|
||||
{
|
||||
public:
|
||||
virtual unsigned GetLeafCount() = 0;
|
||||
virtual double ComputeDist(const Clust &C, unsigned uNodeIndex1,
|
||||
unsigned uNodeIndex2) = 0;
|
||||
virtual void JoinNodes(const Clust &C, unsigned uLeftNodeIndex,
|
||||
unsigned uRightNodeIndex, unsigned uJoinedNodeIndex,
|
||||
double *ptrdLeftLength, double *ptrdRightLength) = 0;
|
||||
virtual const char *GetLeafName(unsigned uNodeIndex) = 0;
|
||||
virtual unsigned GetLeafId(unsigned uNodeIndex) = 0;
|
||||
};
|
||||
|
||||
#endif // ClustSet_h
|
48
src/muscle/muscle3.8.31/src/clustsetdf.h
Normal file
48
src/muscle/muscle3.8.31/src/clustsetdf.h
Normal file
@ -0,0 +1,48 @@
|
||||
#ifndef ClustSetDF_h
|
||||
#define ClustSetDF_h
|
||||
|
||||
class MSA;
|
||||
class Clust;
|
||||
|
||||
#include "clustset.h"
|
||||
#include "distfunc.h"
|
||||
#include "msa.h"
|
||||
|
||||
class ClustSetDF : public ClustSet
|
||||
{
|
||||
public:
|
||||
ClustSetDF(const DistFunc &DF) :
|
||||
m_ptrDF(&DF)
|
||||
{
|
||||
}
|
||||
|
||||
public:
|
||||
virtual unsigned GetLeafCount()
|
||||
{
|
||||
return m_ptrDF->GetCount();
|
||||
}
|
||||
virtual const char *GetLeafName(unsigned uNodeIndex)
|
||||
{
|
||||
return m_ptrDF->GetName(uNodeIndex);
|
||||
}
|
||||
virtual unsigned GetLeafId(unsigned uNodeIndex)
|
||||
{
|
||||
return m_ptrDF->GetId(uNodeIndex);
|
||||
}
|
||||
virtual void JoinNodes(const Clust &C, unsigned uLeftNodeIndex,
|
||||
unsigned uRightNodeIndex, unsigned uJoinedNodeIndex,
|
||||
double *ptrdLeftLength, double *ptrdRightLength)
|
||||
{
|
||||
Quit("ClustSetDF::JoinNodes, should never be called");
|
||||
}
|
||||
virtual double ComputeDist(const Clust &C, unsigned uNodeIndex1,
|
||||
unsigned uNodeIndex2)
|
||||
{
|
||||
return m_ptrDF->GetDist(uNodeIndex1, uNodeIndex2);
|
||||
}
|
||||
|
||||
private:
|
||||
const DistFunc *m_ptrDF;
|
||||
};
|
||||
|
||||
#endif // ClustSetDF_h
|
55
src/muscle/muscle3.8.31/src/clustsetmsa.h
Normal file
55
src/muscle/muscle3.8.31/src/clustsetmsa.h
Normal file
@ -0,0 +1,55 @@
|
||||
#ifndef ClustSetMSA_h
|
||||
#define ClustSetMSA_h
|
||||
|
||||
class MSA;
|
||||
class Clust;
|
||||
|
||||
#include "clustset.h"
|
||||
#include "msadist.h"
|
||||
|
||||
// Distance matrix based set.
|
||||
// Computes distances between leaves, never between
|
||||
// joined clusters (leaves this to distance matrix method).
|
||||
class ClustSetMSA : public ClustSet
|
||||
{
|
||||
public:
|
||||
ClustSetMSA(const MSA &msa, MSADist &MD) :
|
||||
m_ptrMSA(&msa),
|
||||
m_ptrMSADist(&MD)
|
||||
{
|
||||
}
|
||||
|
||||
public:
|
||||
virtual unsigned GetLeafCount()
|
||||
{
|
||||
return m_ptrMSA->GetSeqCount();
|
||||
}
|
||||
virtual const char *GetLeafName(unsigned uNodeIndex)
|
||||
{
|
||||
return m_ptrMSA->GetSeqName(uNodeIndex);
|
||||
}
|
||||
virtual unsigned GetLeafId(unsigned uNodeIndex)
|
||||
{
|
||||
return m_ptrMSA->GetSeqId(uNodeIndex);
|
||||
}
|
||||
virtual void JoinNodes(const Clust &C, unsigned uLeftNodeIndex,
|
||||
unsigned uRightNodeIndex, unsigned uJoinedNodeIndex,
|
||||
double *ptrdLeftLength, double *ptrdRightLength)
|
||||
{
|
||||
Quit("ClustSetMSA::JoinNodes, should never be called");
|
||||
}
|
||||
virtual double ComputeDist(const Clust &C, unsigned uNodeIndex1,
|
||||
unsigned uNodeIndex2)
|
||||
{
|
||||
return m_ptrMSADist->ComputeDist(*m_ptrMSA, uNodeIndex1, uNodeIndex2);
|
||||
}
|
||||
|
||||
public:
|
||||
const MSA &GetMSA();
|
||||
|
||||
private:
|
||||
const MSA *m_ptrMSA;
|
||||
MSADist *m_ptrMSADist;
|
||||
};
|
||||
|
||||
#endif // ClustSetMSA_h
|
190
src/muscle/muscle3.8.31/src/clwwt.cpp
Normal file
190
src/muscle/muscle3.8.31/src/clwwt.cpp
Normal file
@ -0,0 +1,190 @@
|
||||
#include "muscle.h"
|
||||
#include "tree.h"
|
||||
#include "msa.h"
|
||||
|
||||
/***
|
||||
Compute weights by the CLUSTALW method.
|
||||
Thompson, Higgins and Gibson (1994), CABIOS (10) 19-29;
|
||||
see also CLUSTALW paper.
|
||||
|
||||
Weights are computed from the edge lengths of a rooted tree.
|
||||
|
||||
Define the strength of an edge to be its length divided by the number
|
||||
of leaves under that edge. The weight of a sequence is then the sum
|
||||
of edge strengths on the path from the root to the leaf.
|
||||
|
||||
Example.
|
||||
|
||||
0.2
|
||||
-----A 0.1
|
||||
-x ------- B 0.7
|
||||
--------y ----------- C
|
||||
0.3 ----------z
|
||||
0.4 -------------- D
|
||||
0.8
|
||||
|
||||
Edge Length Leaves Strength
|
||||
---- ----- ------ --------
|
||||
xy 0.3 3 0.1
|
||||
xA 0.2 1 0.2
|
||||
yz 0.4 2 0.2
|
||||
yB 0.1 1 0.1
|
||||
zC 0.7 1 0.7
|
||||
zD 0.8 1 0.8
|
||||
|
||||
Leaf Path Strengths Weight
|
||||
---- ---- --------- ------
|
||||
A xA 0.2 0.2
|
||||
B xy-yB 0.1 + 0.1 0.2
|
||||
C xy-yz-zC 0.1 + 0.2 + 0.7 1.0
|
||||
D xy-yz-zD 0.1 + 0.2 + 0.8 1.1
|
||||
|
||||
***/
|
||||
|
||||
#define TRACE 0
|
||||
|
||||
static unsigned CountLeaves(const Tree &tree, unsigned uNodeIndex,
|
||||
unsigned LeavesUnderNode[])
|
||||
{
|
||||
if (tree.IsLeaf(uNodeIndex))
|
||||
{
|
||||
LeavesUnderNode[uNodeIndex] = 1;
|
||||
return 1;
|
||||
}
|
||||
|
||||
const unsigned uLeft = tree.GetLeft(uNodeIndex);
|
||||
const unsigned uRight = tree.GetRight(uNodeIndex);
|
||||
const unsigned uRightCount = CountLeaves(tree, uRight, LeavesUnderNode);
|
||||
const unsigned uLeftCount = CountLeaves(tree, uLeft, LeavesUnderNode);
|
||||
const unsigned uCount = uRightCount + uLeftCount;
|
||||
LeavesUnderNode[uNodeIndex] = uCount;
|
||||
return uCount;
|
||||
}
|
||||
|
||||
void CalcClustalWWeights(const Tree &tree, WEIGHT Weights[])
|
||||
{
|
||||
#if TRACE
|
||||
Log("CalcClustalWWeights\n");
|
||||
tree.LogMe();
|
||||
#endif
|
||||
|
||||
const unsigned uLeafCount = tree.GetLeafCount();
|
||||
if (0 == uLeafCount)
|
||||
return;
|
||||
else if (1 == uLeafCount)
|
||||
{
|
||||
Weights[0] = (WEIGHT) 1.0;
|
||||
return;
|
||||
}
|
||||
else if (2 == uLeafCount)
|
||||
{
|
||||
Weights[0] = (WEIGHT) 0.5;
|
||||
Weights[1] = (WEIGHT) 0.5;
|
||||
return;
|
||||
}
|
||||
|
||||
if (!tree.IsRooted())
|
||||
Quit("CalcClustalWWeights requires rooted tree");
|
||||
|
||||
const unsigned uNodeCount = tree.GetNodeCount();
|
||||
unsigned *LeavesUnderNode = new unsigned[uNodeCount];
|
||||
memset(LeavesUnderNode, 0, uNodeCount*sizeof(unsigned));
|
||||
|
||||
const unsigned uRootNodeIndex = tree.GetRootNodeIndex();
|
||||
unsigned uLeavesUnderRoot = CountLeaves(tree, uRootNodeIndex, LeavesUnderNode);
|
||||
if (uLeavesUnderRoot != uLeafCount)
|
||||
Quit("WeightsFromTreee: Internal error, root count %u %u",
|
||||
uLeavesUnderRoot, uLeafCount);
|
||||
|
||||
#if TRACE
|
||||
Log("Node Leaves Length Strength\n");
|
||||
Log("---- ------ -------- --------\n");
|
||||
// 1234 123456 12345678 12345678
|
||||
#endif
|
||||
|
||||
double *Strengths = new double[uNodeCount];
|
||||
for (unsigned uNodeIndex = 0; uNodeIndex < uNodeCount; ++uNodeIndex)
|
||||
{
|
||||
if (tree.IsRoot(uNodeIndex))
|
||||
{
|
||||
Strengths[uNodeIndex] = 0.0;
|
||||
continue;
|
||||
}
|
||||
const unsigned uParent = tree.GetParent(uNodeIndex);
|
||||
const double dLength = tree.GetEdgeLength(uNodeIndex, uParent);
|
||||
const unsigned uLeaves = LeavesUnderNode[uNodeIndex];
|
||||
const double dStrength = dLength / (double) uLeaves;
|
||||
Strengths[uNodeIndex] = dStrength;
|
||||
#if TRACE
|
||||
Log("%4u %6u %8g %8g\n", uNodeIndex, uLeaves, dLength, dStrength);
|
||||
#endif
|
||||
}
|
||||
|
||||
#if TRACE
|
||||
Log("\n");
|
||||
Log(" Seq Path..Weight\n");
|
||||
Log("-------------------- ------------\n");
|
||||
#endif
|
||||
for (unsigned n = 0; n < uLeafCount; ++n)
|
||||
{
|
||||
const unsigned uLeafNodeIndex = tree.LeafIndexToNodeIndex(n);
|
||||
#if TRACE
|
||||
Log("%20.20s %4u ", tree.GetLeafName(uLeafNodeIndex), uLeafNodeIndex);
|
||||
#endif
|
||||
if (!tree.IsLeaf(uLeafNodeIndex))
|
||||
Quit("CalcClustalWWeights: leaf");
|
||||
|
||||
double dWeight = 0;
|
||||
unsigned uNode = uLeafNodeIndex;
|
||||
while (!tree.IsRoot(uNode))
|
||||
{
|
||||
dWeight += Strengths[uNode];
|
||||
uNode = tree.GetParent(uNode);
|
||||
#if TRACE
|
||||
Log("->%u(%g)", uNode, Strengths[uNode]);
|
||||
#endif
|
||||
}
|
||||
if (dWeight < 0.0001)
|
||||
{
|
||||
#if TRACE
|
||||
Log("zero->one");
|
||||
#endif
|
||||
dWeight = 1.0;
|
||||
}
|
||||
Weights[n] = (WEIGHT) dWeight;
|
||||
#if TRACE
|
||||
Log(" = %g\n", dWeight);
|
||||
#endif
|
||||
}
|
||||
|
||||
delete[] Strengths;
|
||||
delete[] LeavesUnderNode;
|
||||
|
||||
Normalize(Weights, uLeafCount);
|
||||
}
|
||||
|
||||
void MSA::SetClustalWWeights(const Tree &tree)
|
||||
{
|
||||
const unsigned uSeqCount = GetSeqCount();
|
||||
const unsigned uLeafCount = tree.GetLeafCount();
|
||||
|
||||
WEIGHT *Weights = new WEIGHT[uSeqCount];
|
||||
|
||||
CalcClustalWWeights(tree, Weights);
|
||||
|
||||
for (unsigned n = 0; n < uLeafCount; ++n)
|
||||
{
|
||||
const WEIGHT w = Weights[n];
|
||||
const unsigned uLeafNodeIndex = tree.LeafIndexToNodeIndex(n);
|
||||
const unsigned uId = tree.GetLeafId(uLeafNodeIndex);
|
||||
const unsigned uSeqIndex = GetSeqIndex(uId);
|
||||
#if DEBUG
|
||||
if (GetSeqName(uSeqIndex) != tree.GetLeafName(uLeafNodeIndex))
|
||||
Quit("MSA::SetClustalWWeights: names don't match");
|
||||
#endif
|
||||
SetSeqWeight(uSeqIndex, w);
|
||||
}
|
||||
NormalizeWeights((WEIGHT) 1.0);
|
||||
|
||||
delete[] Weights;
|
||||
}
|
189
src/muscle/muscle3.8.31/src/color.cpp
Normal file
189
src/muscle/muscle3.8.31/src/color.cpp
Normal file
@ -0,0 +1,189 @@
|
||||
#include "muscle.h"
|
||||
#include "msa.h"
|
||||
|
||||
static int Blosum62[23][23] =
|
||||
{
|
||||
// A B C D E F G H I K L M N P Q R S T V W X Y Z
|
||||
+4, -2, +0, -2, -1, -2, +0, -2, -1, -1, -1, -1, -2, -1, -1, -1, +1, +0, +0, -3, -1, -2, -1, // A
|
||||
-2, +6, -3, +6, +2, -3, -1, -1, -3, -1, -4, -3, +1, -1, +0, -2, +0, -1, -3, -4, -1, -3, +2, // B
|
||||
+0, -3, +9, -3, -4, -2, -3, -3, -1, -3, -1, -1, -3, -3, -3, -3, -1, -1, -1, -2, -1, -2, -4, // C
|
||||
-2, +6, -3, +6, +2, -3, -1, -1, -3, -1, -4, -3, +1, -1, +0, -2, +0, -1, -3, -4, -1, -3, +2, // D
|
||||
-1, +2, -4, +2, +5, -3, -2, +0, -3, +1, -3, -2, +0, -1, +2, +0, +0, -1, -2, -3, -1, -2, +5, // E
|
||||
|
||||
-2, -3, -2, -3, -3, +6, -3, -1, +0, -3, +0, +0, -3, -4, -3, -3, -2, -2, -1, +1, -1, +3, -3, // F
|
||||
+0, -1, -3, -1, -2, -3, +6, -2, -4, -2, -4, -3, +0, -2, -2, -2, +0, -2, -3, -2, -1, -3, -2, // G
|
||||
-2, -1, -3, -1, +0, -1, -2, +8, -3, -1, -3, -2, +1, -2, +0, +0, -1, -2, -3, -2, -1, +2, +0, // H
|
||||
-1, -3, -1, -3, -3, +0, -4, -3, +4, -3, +2, +1, -3, -3, -3, -3, -2, -1, +3, -3, -1, -1, -3, // I
|
||||
-1, -1, -3, -1, +1, -3, -2, -1, -3, +5, -2, -1, +0, -1, +1, +2, +0, -1, -2, -3, -1, -2, +1, // K
|
||||
|
||||
-1, -4, -1, -4, -3, +0, -4, -3, +2, -2, +4, +2, -3, -3, -2, -2, -2, -1, +1, -2, -1, -1, -3, // L
|
||||
-1, -3, -1, -3, -2, +0, -3, -2, +1, -1, +2, +5, -2, -2, +0, -1, -1, -1, +1, -1, -1, -1, -2, // M
|
||||
-2, +1, -3, +1, +0, -3, +0, +1, -3, +0, -3, -2, +6, -2, +0, +0, +1, +0, -3, -4, -1, -2, +0, // N
|
||||
-1, -1, -3, -1, -1, -4, -2, -2, -3, -1, -3, -2, -2, +7, -1, -2, -1, -1, -2, -4, -1, -3, -1, // P
|
||||
-1, +0, -3, +0, +2, -3, -2, +0, -3, +1, -2, +0, +0, -1, +5, +1, +0, -1, -2, -2, -1, -1, +2, // Q
|
||||
|
||||
-1, -2, -3, -2, +0, -3, -2, +0, -3, +2, -2, -1, +0, -2, +1, +5, -1, -1, -3, -3, -1, -2, +0, // R
|
||||
+1, +0, -1, +0, +0, -2, +0, -1, -2, +0, -2, -1, +1, -1, +0, -1, +4, +1, -2, -3, -1, -2, +0, // S
|
||||
+0, -1, -1, -1, -1, -2, -2, -2, -1, -1, -1, -1, +0, -1, -1, -1, +1, +5, +0, -2, -1, -2, -1, // T
|
||||
+0, -3, -1, -3, -2, -1, -3, -3, +3, -2, +1, +1, -3, -2, -2, -3, -2, +0, +4, -3, -1, -1, -2, // V
|
||||
-3, -4, -2, -4, -3, +1, -2, -2, -3, -3, -2, -1, -4, -4, -2, -3, -3, -2, -3,+11, -1, +2, -3, // W
|
||||
|
||||
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, // X
|
||||
-2, -3, -2, -3, -2, +3, -3, +2, -1, -2, -1, -1, -2, -3, -1, -2, -2, -2, -1, +2, -1, +7, -2, // Y
|
||||
-1, +2, -4, +2, +5, -3, -2, +0, -3, +1, -3, -2, +0, -1, +2, +0, +0, -1, -2, -3, -1, -2, +5, // Z
|
||||
};
|
||||
|
||||
static int toi_tab[26] =
|
||||
{
|
||||
0, // A
|
||||
1, // B
|
||||
2, // C
|
||||
3, // D
|
||||
4, // E
|
||||
5, // F
|
||||
6, // G
|
||||
7, // H
|
||||
8, // I
|
||||
-1, // J
|
||||
9, // K
|
||||
10, // L
|
||||
11, // M
|
||||
12, // N
|
||||
-1, // O
|
||||
13, // P
|
||||
14, // Q
|
||||
15, // R
|
||||
16, // S
|
||||
17, // T
|
||||
17, // U
|
||||
18, // V
|
||||
19, // W
|
||||
20, // X
|
||||
21, // Y
|
||||
22, // Z
|
||||
};
|
||||
|
||||
static int toi(char c)
|
||||
{
|
||||
c = toupper(c);
|
||||
return toi_tab[c - 'A'];
|
||||
}
|
||||
|
||||
static int BlosumScore(char c1, char c2)
|
||||
{
|
||||
int i1 = toi(c1);
|
||||
int i2 = toi(c2);
|
||||
return Blosum62[i1][i2];
|
||||
}
|
||||
|
||||
/***
|
||||
Consider a column with 5 As and 3 Bs.
|
||||
There are:
|
||||
5x4 pairs of As.
|
||||
3x2 pairs of Bs.
|
||||
5x3x2 AB pairs
|
||||
8x7 = 5x4 + 3x2 + 5x3x2 pairs of letters
|
||||
***/
|
||||
static double BlosumScoreCol(const MSA &a, unsigned uColIndex)
|
||||
{
|
||||
int iCounts[23];
|
||||
memset(iCounts, 0, sizeof(iCounts));
|
||||
const unsigned uSeqCount = a.GetSeqCount();
|
||||
unsigned uCharCount = 0;
|
||||
for (unsigned uSeqIndex = 0; uSeqIndex < uSeqCount; ++uSeqIndex)
|
||||
{
|
||||
char c = a.GetChar(uSeqIndex, uColIndex);
|
||||
if (IsGapChar(c))
|
||||
continue;
|
||||
int iChar = toi(c);
|
||||
++iCounts[iChar];
|
||||
++uCharCount;
|
||||
}
|
||||
if (uCharCount < 2)
|
||||
return -9;
|
||||
int iTotalScore = 0;
|
||||
for (int i1 = 0; i1 < 23; ++i1)
|
||||
{
|
||||
int iCounts1 = iCounts[i1];
|
||||
iTotalScore += iCounts1*(iCounts1 - 1)*Blosum62[i1][i1];
|
||||
for (int i2 = i1 + 1; i2 < 23; ++i2)
|
||||
iTotalScore += iCounts[i2]*iCounts1*2*Blosum62[i1][i2];
|
||||
}
|
||||
int iPairCount = uCharCount*(uCharCount - 1);
|
||||
return (double) iTotalScore / (double) iPairCount;
|
||||
}
|
||||
|
||||
/***
|
||||
Consider a column with 5 As and 3 Bs.
|
||||
A residue of type Q scores:
|
||||
5xAQ + 3xBQ
|
||||
***/
|
||||
static void AssignColorsCol(const MSA &a, unsigned uColIndex, int **Colors)
|
||||
{
|
||||
int iCounts[23];
|
||||
memset(iCounts, 0, sizeof(iCounts));
|
||||
const unsigned uSeqCount = a.GetSeqCount();
|
||||
unsigned uCharCount = 0;
|
||||
for (unsigned uSeqIndex = 0; uSeqIndex < uSeqCount; ++uSeqIndex)
|
||||
{
|
||||
char c = a.GetChar(uSeqIndex, uColIndex);
|
||||
if (IsGapChar(c))
|
||||
continue;
|
||||
int iChar = toi(c);
|
||||
++iCounts[iChar];
|
||||
++uCharCount;
|
||||
}
|
||||
int iMostConservedType = -1;
|
||||
int iMostConservedCount = -1;
|
||||
for (unsigned i = 0; i < 23; ++i)
|
||||
{
|
||||
if (iCounts[i] > iMostConservedCount)
|
||||
{
|
||||
iMostConservedType = i;
|
||||
iMostConservedCount = iCounts[i];
|
||||
}
|
||||
}
|
||||
|
||||
double dColScore = BlosumScoreCol(a, uColIndex);
|
||||
int c;
|
||||
if (dColScore >= 3.0)
|
||||
c = 3;
|
||||
//else if (dColScore >= 1.0)
|
||||
// c = 2;
|
||||
else if (dColScore >= 0.2)
|
||||
c = 1;
|
||||
else
|
||||
c = 0;
|
||||
|
||||
int Color[23];
|
||||
for (unsigned uLetter = 0; uLetter < 23; ++uLetter)
|
||||
{
|
||||
double dScore = Blosum62[uLetter][iMostConservedType];
|
||||
if (dScore >= dColScore)
|
||||
Color[uLetter] = c;
|
||||
else
|
||||
Color[uLetter] = 0;
|
||||
}
|
||||
|
||||
for (unsigned uSeqIndex = 0; uSeqIndex < uSeqCount; ++uSeqIndex)
|
||||
{
|
||||
char c = a.GetChar(uSeqIndex, uColIndex);
|
||||
if (IsGapChar(c))
|
||||
{
|
||||
Colors[uSeqIndex][uColIndex] = 0;
|
||||
continue;
|
||||
}
|
||||
int iLetter = toi(c);
|
||||
if (iLetter >= 0 && iLetter < 23)
|
||||
Colors[uSeqIndex][uColIndex] = Color[iLetter];
|
||||
else
|
||||
Colors[uSeqIndex][uColIndex] = 0;
|
||||
}
|
||||
}
|
||||
|
||||
void AssignColors(const MSA &a, int **Colors)
|
||||
{
|
||||
const unsigned uColCount = a.GetColCount();
|
||||
for (unsigned uColIndex = 0; uColIndex < uColCount; ++uColIndex)
|
||||
AssignColorsCol(a, uColIndex, Colors);
|
||||
}
|
118
src/muscle/muscle3.8.31/src/cons.cpp
Normal file
118
src/muscle/muscle3.8.31/src/cons.cpp
Normal file
@ -0,0 +1,118 @@
|
||||
/***
|
||||
Conservation value for a column in an MSA is defined as the number
|
||||
of times the most common letter appears divided by the number of
|
||||
sequences.
|
||||
***/
|
||||
|
||||
#include "muscle.h"
|
||||
#include "msa.h"
|
||||
#include <math.h>
|
||||
|
||||
double MSA::GetAvgCons() const
|
||||
{
|
||||
assert(GetSeqCount() > 0);
|
||||
double dSum = 0;
|
||||
unsigned uNonGapColCount = 0;
|
||||
for (unsigned uColIndex = 0; uColIndex < GetColCount(); ++uColIndex)
|
||||
{
|
||||
if (!IsGapColumn(uColIndex))
|
||||
{
|
||||
dSum += GetCons(uColIndex);
|
||||
++uNonGapColCount;
|
||||
}
|
||||
}
|
||||
assert(uNonGapColCount > 0);
|
||||
double dAvg = dSum / uNonGapColCount;
|
||||
assert(dAvg > 0 && dAvg <= 1);
|
||||
return dAvg;
|
||||
}
|
||||
|
||||
double MSA::GetCons(unsigned uColIndex) const
|
||||
{
|
||||
unsigned Counts[MAX_ALPHA];
|
||||
for (unsigned uLetter = 0; uLetter < g_AlphaSize; ++uLetter)
|
||||
Counts[uLetter] = 0;
|
||||
|
||||
unsigned uMaxCount = 0;
|
||||
for (unsigned uSeqIndex = 0; uSeqIndex < GetSeqCount(); ++uSeqIndex)
|
||||
{
|
||||
if (IsGap(uSeqIndex, uColIndex))
|
||||
continue;
|
||||
char c = GetChar(uSeqIndex, uColIndex);
|
||||
c = toupper(c);
|
||||
if ('X' == c || 'B' == c || 'Z' == c)
|
||||
continue;
|
||||
unsigned uLetter = GetLetter(uSeqIndex, uColIndex);
|
||||
unsigned uCount = Counts[uLetter] + 1;
|
||||
if (uCount > uMaxCount)
|
||||
uMaxCount = uCount;
|
||||
Counts[uLetter] = uCount;
|
||||
}
|
||||
|
||||
// Cons is undefined for all-gap column
|
||||
if (0 == uMaxCount)
|
||||
{
|
||||
// assert(false);
|
||||
return 1;
|
||||
}
|
||||
|
||||
double dCons = (double) uMaxCount / (double) GetSeqCount();
|
||||
assert(dCons > 0 && dCons <= 1);
|
||||
return dCons;
|
||||
}
|
||||
|
||||
// Perecent identity of a pair of sequences.
|
||||
// Positions with one or both gapped are ignored.
|
||||
double MSA::GetPctIdentityPair(unsigned uSeqIndex1, unsigned uSeqIndex2) const
|
||||
{
|
||||
const unsigned uColCount = GetColCount();
|
||||
unsigned uPosCount = 0;
|
||||
unsigned uSameCount = 0;
|
||||
for (unsigned uColIndex = 0; uColIndex < uColCount; ++uColIndex)
|
||||
{
|
||||
const char c1 = GetChar(uSeqIndex1, uColIndex);
|
||||
const char c2 = GetChar(uSeqIndex2, uColIndex);
|
||||
if (IsGapChar(c1) || IsGapChar(c2))
|
||||
continue;
|
||||
if (c1 == c2)
|
||||
++uSameCount;
|
||||
++uPosCount;
|
||||
}
|
||||
if (0 == uPosCount)
|
||||
return 0;
|
||||
return (double) uSameCount / (double) uPosCount;
|
||||
}
|
||||
|
||||
// Perecent group identity of a pair of sequences.
|
||||
// Positions with one or both gapped are ignored.
|
||||
double MSA::GetPctGroupIdentityPair(unsigned uSeqIndex1,
|
||||
unsigned uSeqIndex2) const
|
||||
{
|
||||
extern unsigned ResidueGroup[];
|
||||
|
||||
const unsigned uColCount = GetColCount();
|
||||
unsigned uPosCount = 0;
|
||||
unsigned uSameCount = 0;
|
||||
for (unsigned uColIndex = 0; uColIndex < uColCount; ++uColIndex)
|
||||
{
|
||||
if (IsGap(uSeqIndex1, uColIndex))
|
||||
continue;
|
||||
if (IsGap(uSeqIndex2, uColIndex))
|
||||
continue;
|
||||
if (IsWildcard(uSeqIndex1, uColIndex))
|
||||
continue;
|
||||
if (IsWildcard(uSeqIndex2, uColIndex))
|
||||
continue;
|
||||
|
||||
const unsigned uLetter1 = GetLetter(uSeqIndex1, uColIndex);
|
||||
const unsigned uLetter2 = GetLetter(uSeqIndex2, uColIndex);
|
||||
const unsigned uGroup1 = ResidueGroup[uLetter1];
|
||||
const unsigned uGroup2 = ResidueGroup[uLetter2];
|
||||
if (uGroup1 == uGroup2)
|
||||
++uSameCount;
|
||||
++uPosCount;
|
||||
}
|
||||
if (0 == uPosCount)
|
||||
return 0;
|
||||
return (double) uSameCount / (double) uPosCount;
|
||||
}
|
378
src/muscle/muscle3.8.31/src/diaglist.cpp
Normal file
378
src/muscle/muscle3.8.31/src/diaglist.cpp
Normal file
@ -0,0 +1,378 @@
|
||||
#include "muscle.h"
|
||||
#include "diaglist.h"
|
||||
#include "pwpath.h"
|
||||
|
||||
#define MAX(x, y) ((x) > (y) ? (x) : (y))
|
||||
#define MIN(x, y) ((x) < (y) ? (x) : (y))
|
||||
|
||||
void DiagList::Add(const Diag &d)
|
||||
{
|
||||
if (m_uCount == MAX_DIAGS)
|
||||
Quit("DiagList::Add, overflow %u", m_uCount);
|
||||
m_Diags[m_uCount] = d;
|
||||
++m_uCount;
|
||||
}
|
||||
|
||||
void DiagList::Add(unsigned uStartPosA, unsigned uStartPosB, unsigned uLength)
|
||||
{
|
||||
Diag d;
|
||||
d.m_uStartPosA = uStartPosA;
|
||||
d.m_uStartPosB = uStartPosB;
|
||||
d.m_uLength = uLength;
|
||||
Add(d);
|
||||
}
|
||||
|
||||
const Diag &DiagList::Get(unsigned uIndex) const
|
||||
{
|
||||
if (uIndex >= m_uCount)
|
||||
Quit("DiagList::Get(%u), count=%u", uIndex, m_uCount);
|
||||
return m_Diags[uIndex];
|
||||
}
|
||||
|
||||
void DiagList::LogMe() const
|
||||
{
|
||||
Log("DiagList::LogMe, count=%u\n", m_uCount);
|
||||
Log(" n StartA StartB Length\n");
|
||||
Log("--- ------ ------ ------\n");
|
||||
for (unsigned n = 0; n < m_uCount; ++n)
|
||||
{
|
||||
const Diag &d = m_Diags[n];
|
||||
Log("%3u %6u %6u %6u\n",
|
||||
n, d.m_uStartPosA, d.m_uStartPosB, d.m_uLength);
|
||||
}
|
||||
}
|
||||
|
||||
void DiagList::FromPath(const PWPath &Path)
|
||||
{
|
||||
Clear();
|
||||
|
||||
const unsigned uEdgeCount = Path.GetEdgeCount();
|
||||
unsigned uLength = 0;
|
||||
unsigned uStartPosA;
|
||||
unsigned uStartPosB;
|
||||
for (unsigned uEdgeIndex = 0; uEdgeIndex < uEdgeCount; ++uEdgeIndex)
|
||||
{
|
||||
const PWEdge &Edge = Path.GetEdge(uEdgeIndex);
|
||||
|
||||
// Typical cases
|
||||
if (Edge.cType == 'M')
|
||||
{
|
||||
if (0 == uLength)
|
||||
{
|
||||
uStartPosA = Edge.uPrefixLengthA - 1;
|
||||
uStartPosB = Edge.uPrefixLengthB - 1;
|
||||
}
|
||||
++uLength;
|
||||
}
|
||||
else
|
||||
{
|
||||
if (uLength >= g_uMinDiagLength)
|
||||
Add(uStartPosA, uStartPosB, uLength);
|
||||
uLength = 0;
|
||||
}
|
||||
}
|
||||
|
||||
// Special case for last edge
|
||||
if (uLength >= g_uMinDiagLength)
|
||||
Add(uStartPosA, uStartPosB, uLength);
|
||||
}
|
||||
|
||||
bool DiagList::NonZeroIntersection(const Diag &d) const
|
||||
{
|
||||
for (unsigned n = 0; n < m_uCount; ++n)
|
||||
{
|
||||
const Diag &d2 = m_Diags[n];
|
||||
if (DiagOverlap(d, d2) > 0)
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
// DialogOverlap returns the length of the overlapping
|
||||
// section of the two diagonals along the diagonals
|
||||
// themselves; in other words, the length of
|
||||
// the intersection of the two sets of cells in
|
||||
// the matrix.
|
||||
unsigned DiagOverlap(const Diag &d1, const Diag &d2)
|
||||
{
|
||||
// Determine where the diagonals intersect the A
|
||||
// axis (extending them if required). If they
|
||||
// intersect at different points, they do not
|
||||
// overlap. Coordinates on a diagonal are
|
||||
// given by B = A + c where c is the value of
|
||||
// A at the intersection with the A axis.
|
||||
// Hence, c = B - A for any point on the diagonal.
|
||||
int c1 = (int) d1.m_uStartPosB - (int) d1.m_uStartPosA;
|
||||
int c2 = (int) d2.m_uStartPosB - (int) d2.m_uStartPosA;
|
||||
if (c1 != c2)
|
||||
return 0;
|
||||
|
||||
assert(DiagOverlapA(d1, d2) == DiagOverlapB(d1, d2));
|
||||
return DiagOverlapA(d1, d2);
|
||||
}
|
||||
|
||||
// DialogOverlapA returns the length of the overlapping
|
||||
// section of the projection of the two diagonals onto
|
||||
// the A axis.
|
||||
unsigned DiagOverlapA(const Diag &d1, const Diag &d2)
|
||||
{
|
||||
unsigned uMaxStart = MAX(d1.m_uStartPosA, d2.m_uStartPosA);
|
||||
unsigned uMinEnd = MIN(d1.m_uStartPosA + d1.m_uLength - 1,
|
||||
d2.m_uStartPosA + d2.m_uLength - 1);
|
||||
|
||||
int iLength = (int) uMinEnd - (int) uMaxStart + 1;
|
||||
if (iLength < 0)
|
||||
return 0;
|
||||
return (unsigned) iLength;
|
||||
}
|
||||
|
||||
// DialogOverlapB returns the length of the overlapping
|
||||
// section of the projection of the two diagonals onto
|
||||
// the B axis.
|
||||
unsigned DiagOverlapB(const Diag &d1, const Diag &d2)
|
||||
{
|
||||
unsigned uMaxStart = MAX(d1.m_uStartPosB, d2.m_uStartPosB);
|
||||
unsigned uMinEnd = MIN(d1.m_uStartPosB + d1.m_uLength - 1,
|
||||
d2.m_uStartPosB + d2.m_uLength - 1);
|
||||
|
||||
int iLength = (int) uMinEnd - (int) uMaxStart + 1;
|
||||
if (iLength < 0)
|
||||
return 0;
|
||||
return (unsigned) iLength;
|
||||
}
|
||||
|
||||
// Returns true if the two diagonals can be on the
|
||||
// same path through the DP matrix. If DiagCompatible
|
||||
// returns false, they cannot be in the same path
|
||||
// and hence "contradict" each other.
|
||||
bool DiagCompatible(const Diag &d1, const Diag &d2)
|
||||
{
|
||||
if (DiagOverlap(d1, d2) > 0)
|
||||
return true;
|
||||
return 0 == DiagOverlapA(d1, d2) && 0 == DiagOverlapB(d1, d2);
|
||||
}
|
||||
|
||||
// Returns the length of the "break" between two diagonals.
|
||||
unsigned DiagBreak(const Diag &d1, const Diag &d2)
|
||||
{
|
||||
int c1 = (int) d1.m_uStartPosB - (int) d1.m_uStartPosA;
|
||||
int c2 = (int) d2.m_uStartPosB - (int) d2.m_uStartPosA;
|
||||
if (c1 != c2)
|
||||
return 0;
|
||||
|
||||
int iMaxStart = MAX(d1.m_uStartPosA, d2.m_uStartPosA);
|
||||
int iMinEnd = MIN(d1.m_uStartPosA + d1.m_uLength - 1,
|
||||
d2.m_uStartPosA + d1.m_uLength - 1);
|
||||
int iBreak = iMaxStart - iMinEnd - 1;
|
||||
if (iBreak < 0)
|
||||
return 0;
|
||||
return (unsigned) iBreak;
|
||||
}
|
||||
|
||||
// Merge diagonals that are continuations of each other with
|
||||
// short breaks of up to length g_uMaxDiagBreak.
|
||||
// In a sorted list of diagonals, we only have to check
|
||||
// consecutive entries.
|
||||
void MergeDiags(DiagList &DL)
|
||||
{
|
||||
return;
|
||||
#if DEBUG
|
||||
if (!DL.IsSorted())
|
||||
Quit("MergeDiags: !IsSorted");
|
||||
#endif
|
||||
|
||||
// TODO: Fix this!
|
||||
// Breaks must be with no offset (no gaps)
|
||||
const unsigned uCount = DL.GetCount();
|
||||
if (uCount <= 1)
|
||||
return;
|
||||
|
||||
DiagList NewList;
|
||||
|
||||
Diag MergedDiag;
|
||||
const Diag *ptrPrev = &DL.Get(0);
|
||||
for (unsigned i = 1; i < uCount; ++i)
|
||||
{
|
||||
const Diag *ptrDiag = &DL.Get(i);
|
||||
unsigned uBreakLength = DiagBreak(*ptrPrev, *ptrDiag);
|
||||
if (uBreakLength <= g_uMaxDiagBreak)
|
||||
{
|
||||
MergedDiag.m_uStartPosA = ptrPrev->m_uStartPosA;
|
||||
MergedDiag.m_uStartPosB = ptrPrev->m_uStartPosB;
|
||||
MergedDiag.m_uLength = ptrPrev->m_uLength + ptrDiag->m_uLength
|
||||
+ uBreakLength;
|
||||
ptrPrev = &MergedDiag;
|
||||
}
|
||||
else
|
||||
{
|
||||
NewList.Add(*ptrPrev);
|
||||
ptrPrev = ptrDiag;
|
||||
}
|
||||
}
|
||||
NewList.Add(*ptrPrev);
|
||||
DL.Copy(NewList);
|
||||
}
|
||||
|
||||
void DiagList::DeleteIncompatible()
|
||||
{
|
||||
assert(IsSorted());
|
||||
|
||||
if (m_uCount < 2)
|
||||
return;
|
||||
|
||||
bool *bFlagForDeletion = new bool[m_uCount];
|
||||
for (unsigned i = 0; i < m_uCount; ++i)
|
||||
bFlagForDeletion[i] = false;
|
||||
|
||||
for (unsigned i = 0; i < m_uCount; ++i)
|
||||
{
|
||||
const Diag &di = m_Diags[i];
|
||||
for (unsigned j = i + 1; j < m_uCount; ++j)
|
||||
{
|
||||
const Diag &dj = m_Diags[j];
|
||||
|
||||
// Verify sorted correctly
|
||||
assert(di.m_uStartPosA <= dj.m_uStartPosA);
|
||||
|
||||
// If two diagonals are incompatible and
|
||||
// one is is much longer than the other,
|
||||
// keep the longer one.
|
||||
if (!DiagCompatible(di, dj))
|
||||
{
|
||||
if (di.m_uLength > dj.m_uLength*4)
|
||||
bFlagForDeletion[j] = true;
|
||||
else if (dj.m_uLength > di.m_uLength*4)
|
||||
bFlagForDeletion[i] = true;
|
||||
else
|
||||
{
|
||||
bFlagForDeletion[i] = true;
|
||||
bFlagForDeletion[j] = true;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
for (unsigned i = 0; i < m_uCount; ++i)
|
||||
{
|
||||
const Diag &di = m_Diags[i];
|
||||
if (bFlagForDeletion[i])
|
||||
continue;
|
||||
|
||||
for (unsigned j = i + 1; j < m_uCount; ++j)
|
||||
{
|
||||
const Diag &dj = m_Diags[j];
|
||||
if (bFlagForDeletion[j])
|
||||
continue;
|
||||
|
||||
// Verify sorted correctly
|
||||
assert(di.m_uStartPosA <= dj.m_uStartPosA);
|
||||
|
||||
// If sort order in B different from sorted order in A,
|
||||
// either diags are incompatible or we detected a repeat
|
||||
// or permutation.
|
||||
if (di.m_uStartPosB >= dj.m_uStartPosB || !DiagCompatible(di, dj))
|
||||
{
|
||||
bFlagForDeletion[i] = true;
|
||||
bFlagForDeletion[j] = true;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
unsigned uNewCount = 0;
|
||||
Diag *NewDiags = new Diag[m_uCount];
|
||||
for (unsigned i = 0; i < m_uCount; ++i)
|
||||
{
|
||||
if (bFlagForDeletion[i])
|
||||
continue;
|
||||
|
||||
const Diag &d = m_Diags[i];
|
||||
NewDiags[uNewCount] = d;
|
||||
++uNewCount;
|
||||
}
|
||||
memcpy(m_Diags, NewDiags, uNewCount*sizeof(Diag));
|
||||
m_uCount = uNewCount;
|
||||
delete[] NewDiags;
|
||||
}
|
||||
|
||||
void DiagList::Copy(const DiagList &DL)
|
||||
{
|
||||
Clear();
|
||||
unsigned uCount = DL.GetCount();
|
||||
for (unsigned i = 0; i < uCount; ++i)
|
||||
Add(DL.Get(i));
|
||||
}
|
||||
|
||||
// Check if sorted in increasing order of m_uStartPosA
|
||||
bool DiagList::IsSorted() const
|
||||
{
|
||||
return true;
|
||||
unsigned uCount = GetCount();
|
||||
for (unsigned i = 1; i < uCount; ++i)
|
||||
if (m_Diags[i-1].m_uStartPosA > m_Diags[i].m_uStartPosA)
|
||||
return false;
|
||||
return true;
|
||||
}
|
||||
|
||||
// Sort in increasing order of m_uStartPosA
|
||||
// Dumb bubble sort, but don't care about speed
|
||||
// because don't get long lists.
|
||||
void DiagList::Sort()
|
||||
{
|
||||
if (m_uCount < 2)
|
||||
return;
|
||||
|
||||
bool bContinue = true;
|
||||
while (bContinue)
|
||||
{
|
||||
bContinue = false;
|
||||
for (unsigned i = 0; i < m_uCount - 1; ++i)
|
||||
{
|
||||
if (m_Diags[i].m_uStartPosA > m_Diags[i+1].m_uStartPosA)
|
||||
{
|
||||
Diag Tmp = m_Diags[i];
|
||||
m_Diags[i] = m_Diags[i+1];
|
||||
m_Diags[i+1] = Tmp;
|
||||
bContinue = true;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
//void TestDiag()
|
||||
// {
|
||||
// Diag d1;
|
||||
// Diag d2;
|
||||
// Diag d3;
|
||||
//
|
||||
// d1.m_uStartPosA = 0;
|
||||
// d1.m_uStartPosB = 1;
|
||||
// d1.m_uLength = 32;
|
||||
//
|
||||
// d2.m_uStartPosA = 55;
|
||||
// d2.m_uStartPosB = 70;
|
||||
// d2.m_uLength = 36;
|
||||
//
|
||||
// d3.m_uStartPosA = 102;
|
||||
// d3.m_uStartPosB = 122;
|
||||
// d3.m_uLength = 50;
|
||||
//
|
||||
// DiagList DL;
|
||||
// DL.Add(d1);
|
||||
// DL.Add(d2);
|
||||
// DL.Add(d3);
|
||||
//
|
||||
// Log("Before DeleteIncompatible:\n");
|
||||
// DL.LogMe();
|
||||
// DL.DeleteIncompatible();
|
||||
//
|
||||
// Log("After DeleteIncompatible:\n");
|
||||
// DL.LogMe();
|
||||
//
|
||||
// MergeDiags(DL);
|
||||
// Log("After Merge:\n");
|
||||
// DL.LogMe();
|
||||
//
|
||||
// DPRegionList RL;
|
||||
// DiagListToDPRegionList(DL, RL, 200, 200);
|
||||
// RL.LogMe();
|
||||
// }
|
89
src/muscle/muscle3.8.31/src/diaglist.h
Normal file
89
src/muscle/muscle3.8.31/src/diaglist.h
Normal file
@ -0,0 +1,89 @@
|
||||
#ifndef diaglist_h
|
||||
#define diaglist_h
|
||||
|
||||
const unsigned EMPTY = (unsigned) ~0;
|
||||
const unsigned MAX_DIAGS = 1024;
|
||||
|
||||
struct Diag
|
||||
{
|
||||
unsigned m_uStartPosA;
|
||||
unsigned m_uStartPosB;
|
||||
unsigned m_uLength;
|
||||
};
|
||||
|
||||
struct Rect
|
||||
{
|
||||
unsigned m_uStartPosA;
|
||||
unsigned m_uStartPosB;
|
||||
unsigned m_uLengthA;
|
||||
unsigned m_uLengthB;
|
||||
};
|
||||
|
||||
class DiagList
|
||||
{
|
||||
public:
|
||||
DiagList()
|
||||
{
|
||||
m_uCount = 0;
|
||||
}
|
||||
~DiagList()
|
||||
{
|
||||
Free();
|
||||
}
|
||||
|
||||
public:
|
||||
// Creation
|
||||
void Clear()
|
||||
{
|
||||
Free();
|
||||
}
|
||||
void FromPath(const PWPath &Path);
|
||||
void Add(const Diag &d);
|
||||
void Add(unsigned uStartPosA, unsigned uStartPosB, unsigned uLength);
|
||||
void DeleteIncompatible();
|
||||
|
||||
// Accessors
|
||||
unsigned GetCount() const
|
||||
{
|
||||
return m_uCount;
|
||||
}
|
||||
const Diag &Get(unsigned uIndex) const;
|
||||
|
||||
// Operations
|
||||
void Sort();
|
||||
void Copy(const DiagList &DL);
|
||||
|
||||
// Query
|
||||
// returns true iff given diagonal is included in the list
|
||||
// in whole or in part.
|
||||
bool NonZeroIntersection(const Diag &d) const;
|
||||
bool IsSorted() const;
|
||||
|
||||
// Diagnostics
|
||||
void LogMe() const;
|
||||
|
||||
private:
|
||||
void Free()
|
||||
{
|
||||
m_uCount = 0;
|
||||
}
|
||||
|
||||
private:
|
||||
unsigned m_uCount;
|
||||
Diag m_Diags[MAX_DIAGS];
|
||||
};
|
||||
|
||||
unsigned DiagOverlap(const Diag &d1, const Diag &d2);
|
||||
unsigned DiagOverlapA(const Diag &d1, const Diag &d2);
|
||||
unsigned DiagOverlapB(const Diag &d1, const Diag &d2);
|
||||
unsigned DiagBreak(const Diag &d1, const Diag &d2);
|
||||
bool DiagCompatible(const Diag &d1, const Diag &d2);
|
||||
void CheckDiags(const ProfPos *PA, unsigned uLengthA, const ProfPos *PB,
|
||||
unsigned uLengthB, const MSA &msaA, const MSA &msaB, const PWPath &Path);
|
||||
void FindDiags(const ProfPos *PX, unsigned uLengthX, const ProfPos *PY,
|
||||
unsigned uLengthY, DiagList &DL);
|
||||
void FindDiagsNuc(const ProfPos *PX, unsigned uLengthX, const ProfPos *PY,
|
||||
unsigned uLengthY, DiagList &DL);
|
||||
void MergeDiags(DiagList &DL);
|
||||
|
||||
#endif // diaglist_h
|
162
src/muscle/muscle3.8.31/src/diffobjscore.cpp
Normal file
162
src/muscle/muscle3.8.31/src/diffobjscore.cpp
Normal file
@ -0,0 +1,162 @@
|
||||
#include "muscle.h"
|
||||
#include "msa.h"
|
||||
#include "objscore.h"
|
||||
#include "profile.h"
|
||||
|
||||
#define TRACE 0
|
||||
#define COMPARE_3_52 0
|
||||
#define BRUTE_LETTERS 0
|
||||
|
||||
static SCORE ScoreColLetters(const MSA &msa, unsigned uColIndex)
|
||||
{
|
||||
SCOREMATRIX &Mx = *g_ptrScoreMatrix;
|
||||
const unsigned uSeqCount = msa.GetSeqCount();
|
||||
|
||||
#if BRUTE_LETTERS
|
||||
SCORE BruteScore = 0;
|
||||
for (unsigned uSeqIndex1 = 0; uSeqIndex1 < uSeqCount; ++uSeqIndex1)
|
||||
{
|
||||
unsigned uLetter1 = msa.GetLetterEx(uSeqIndex1, uColIndex);
|
||||
if (uLetter1 >= g_AlphaSize)
|
||||
continue;
|
||||
WEIGHT w1 = msa.GetSeqWeight(uSeqIndex1);
|
||||
for (unsigned uSeqIndex2 = uSeqIndex1 + 1; uSeqIndex2 < uSeqCount; ++uSeqIndex2)
|
||||
{
|
||||
unsigned uLetter2 = msa.GetLetterEx(uSeqIndex2, uColIndex);
|
||||
if (uLetter2 >= g_AlphaSize)
|
||||
continue;
|
||||
WEIGHT w2 = msa.GetSeqWeight(uSeqIndex2);
|
||||
BruteScore += w1*w2*Mx[uLetter1][uLetter2];
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
double N = 0;
|
||||
for (unsigned uSeqIndex1 = 0; uSeqIndex1 < uSeqCount; ++uSeqIndex1)
|
||||
{
|
||||
WEIGHT w = msa.GetSeqWeight(uSeqIndex1);
|
||||
N += w;
|
||||
}
|
||||
if (N <= 0)
|
||||
return 0;
|
||||
|
||||
FCOUNT Freqs[20];
|
||||
memset(Freqs, 0, sizeof(Freqs));
|
||||
SCORE Score = 0;
|
||||
for (unsigned uSeqIndex1 = 0; uSeqIndex1 < uSeqCount; ++uSeqIndex1)
|
||||
{
|
||||
unsigned uLetter = msa.GetLetterEx(uSeqIndex1, uColIndex);
|
||||
if (uLetter >= g_AlphaSize)
|
||||
continue;
|
||||
WEIGHT w = msa.GetSeqWeight(uSeqIndex1);
|
||||
Freqs[uLetter] += w;
|
||||
Score -= w*w*Mx[uLetter][uLetter];
|
||||
}
|
||||
|
||||
for (unsigned uLetter1 = 0; uLetter1 < g_AlphaSize; ++uLetter1)
|
||||
{
|
||||
const FCOUNT f1 = Freqs[uLetter1];
|
||||
Score += f1*f1*Mx[uLetter1][uLetter1];
|
||||
for (unsigned uLetter2 = uLetter1 + 1; uLetter2 < g_AlphaSize; ++uLetter2)
|
||||
{
|
||||
const FCOUNT f2 = Freqs[uLetter2];
|
||||
Score += 2*f1*f2*Mx[uLetter1][uLetter2];
|
||||
}
|
||||
}
|
||||
Score /= 2;
|
||||
#if BRUTE_LETTERS
|
||||
assert(BTEq(BruteScore, Score));
|
||||
#endif
|
||||
return Score;
|
||||
}
|
||||
|
||||
static SCORE ScoreLetters(const MSA &msa, const unsigned Edges[],
|
||||
unsigned uEdgeCount)
|
||||
{
|
||||
const unsigned uSeqCount = msa.GetSeqCount();
|
||||
const unsigned uColCount = msa.GetColCount();
|
||||
|
||||
// Letters
|
||||
SCORE Score = 0;
|
||||
for (unsigned uEdgeIndex = 0; uEdgeIndex < uEdgeCount; ++uEdgeIndex)
|
||||
{
|
||||
const unsigned uColIndex = Edges[uEdgeIndex];
|
||||
assert(uColIndex < uColCount);
|
||||
Score += ScoreColLetters(msa, uColIndex);
|
||||
}
|
||||
return Score;
|
||||
}
|
||||
|
||||
void GetLetterScores(const MSA &msa, SCORE Scores[])
|
||||
{
|
||||
const unsigned uColCount = msa.GetColCount();
|
||||
for (unsigned uColIndex = 0; uColIndex < uColCount; ++uColIndex)
|
||||
Scores[uColIndex] = ScoreColLetters(msa, uColIndex);
|
||||
}
|
||||
|
||||
SCORE DiffObjScore(
|
||||
const MSA &msa1, const PWPath &Path1, const unsigned Edges1[], unsigned uEdgeCount1,
|
||||
const MSA &msa2, const PWPath &Path2, const unsigned Edges2[], unsigned uEdgeCount2)
|
||||
{
|
||||
#if TRACE
|
||||
{
|
||||
Log("============DiffObjScore===========\n");
|
||||
Log("msa1:\n");
|
||||
msa1.LogMe();
|
||||
Log("\n");
|
||||
Log("Cols1: ");
|
||||
for (unsigned i = 0; i < uEdgeCount1; ++i)
|
||||
Log(" %u", Edges1[i]);
|
||||
Log("\n\n");
|
||||
Log("msa2:\n");
|
||||
msa2.LogMe();
|
||||
Log("Cols2: ");
|
||||
for (unsigned i = 0; i < uEdgeCount2; ++i)
|
||||
Log(" %u", Edges2[i]);
|
||||
Log("\n\n");
|
||||
}
|
||||
#endif
|
||||
|
||||
#if COMPARE_3_52
|
||||
extern SCORE g_SPScoreLetters;
|
||||
extern SCORE g_SPScoreGaps;
|
||||
SCORE SP1 = ObjScoreSP(msa1);
|
||||
SCORE SPLetters1 = g_SPScoreLetters;
|
||||
SCORE SPGaps1 = g_SPScoreGaps;
|
||||
|
||||
SCORE SP2 = ObjScoreSP(msa2);
|
||||
SCORE SPLetters2 = g_SPScoreLetters;
|
||||
SCORE SPGaps2 = g_SPScoreGaps;
|
||||
SCORE SPDiffLetters = SPLetters2 - SPLetters1;
|
||||
SCORE SPDiffGaps = SPGaps2 - SPGaps1;
|
||||
SCORE SPDiff = SPDiffLetters + SPDiffGaps;
|
||||
#endif
|
||||
|
||||
SCORE Letters1 = ScoreLetters(msa1, Edges1, uEdgeCount1);
|
||||
SCORE Letters2 = ScoreLetters(msa2, Edges2, uEdgeCount2);
|
||||
|
||||
SCORE Gaps1 = ScoreGaps(msa1, Edges1, uEdgeCount1);
|
||||
SCORE Gaps2 = ScoreGaps(msa2, Edges2, uEdgeCount2);
|
||||
|
||||
SCORE DiffLetters = Letters2 - Letters1;
|
||||
SCORE DiffGaps = Gaps2 - Gaps1;
|
||||
SCORE Diff = DiffLetters + DiffGaps;
|
||||
|
||||
#if COMPARE_3_52
|
||||
Log("ObjScoreSP Letters1=%.4g Letters2=%.4g DiffLetters=%.4g\n",
|
||||
SPLetters1, SPLetters2, SPDiffLetters);
|
||||
|
||||
Log("DiffObjScore Letters1=%.4g Letters2=%.4g DiffLetters=%.4g\n",
|
||||
Letters1, Letters2, DiffLetters);
|
||||
|
||||
Log("ObjScoreSP Gaps1=%.4g Gaps2=%.4g DiffGaps=%.4g\n",
|
||||
SPGaps1, SPGaps2, SPDiffGaps);
|
||||
|
||||
Log("DiffObjScore Gaps1=%.4g Gaps2=%.4g DiffGaps=%.4g\n",
|
||||
Gaps1, Gaps2, DiffGaps);
|
||||
|
||||
Log("SP diff=%.4g DiffObjScore Diff=%.4g\n", SPDiff, Diff);
|
||||
#endif
|
||||
|
||||
return Diff;
|
||||
}
|
114
src/muscle/muscle3.8.31/src/diffpaths.cpp
Normal file
114
src/muscle/muscle3.8.31/src/diffpaths.cpp
Normal file
@ -0,0 +1,114 @@
|
||||
#include "muscle.h"
|
||||
#include "pwpath.h"
|
||||
|
||||
#define TRACE 0
|
||||
|
||||
void DiffPaths(const PWPath &p1, const PWPath &p2, unsigned Edges1[],
|
||||
unsigned *ptruDiffCount1, unsigned Edges2[], unsigned *ptruDiffCount2)
|
||||
{
|
||||
#if TRACE
|
||||
Log("DiffPaths\n");
|
||||
Log("p1=");
|
||||
p1.LogMe();
|
||||
Log("p2=");
|
||||
p2.LogMe();
|
||||
#endif
|
||||
const unsigned uEdgeCount1 = p1.GetEdgeCount();
|
||||
const unsigned uEdgeCount2 = p2.GetEdgeCount();
|
||||
|
||||
unsigned uDiffCount1 = 0;
|
||||
unsigned uDiffCount2 = 0;
|
||||
unsigned uEdgeIndex1 = 0;
|
||||
unsigned uEdgeIndex2 = 0;
|
||||
const PWEdge *Edge1 = &p1.GetEdge(uEdgeIndex1);
|
||||
const PWEdge *Edge2 = &p2.GetEdge(uEdgeIndex2);
|
||||
for (;;)
|
||||
{
|
||||
unsigned uEdgeIndexTop1 = uEdgeIndex1;
|
||||
unsigned uEdgeIndexTop2 = uEdgeIndex2;
|
||||
Edge1 = &p1.GetEdge(uEdgeIndex1);
|
||||
Edge2 = &p2.GetEdge(uEdgeIndex2);
|
||||
#if TRACE
|
||||
Log("e1[%u] PLA%u PLB%u %c, e2[%u] PLA%u PLB %u %c DC1=%u DC2=%u\n",
|
||||
uEdgeIndex1, Edge1->uPrefixLengthA, Edge1->uPrefixLengthB, Edge1->cType,
|
||||
uEdgeIndex2, Edge2->uPrefixLengthA, Edge2->uPrefixLengthB, Edge2->cType,
|
||||
uDiffCount1, uDiffCount2);
|
||||
#endif
|
||||
if (Edge1->uPrefixLengthA == Edge2->uPrefixLengthA &&
|
||||
Edge1->uPrefixLengthB == Edge2->uPrefixLengthB)
|
||||
{
|
||||
if (!Edge1->Equal(*Edge2))
|
||||
{
|
||||
Edges1[uDiffCount1++] = uEdgeIndex1;
|
||||
Edges2[uDiffCount2++] = uEdgeIndex2;
|
||||
}
|
||||
++uEdgeIndex1;
|
||||
++uEdgeIndex2;
|
||||
}
|
||||
|
||||
else if (Edge2->uPrefixLengthA < Edge1->uPrefixLengthA ||
|
||||
Edge2->uPrefixLengthB < Edge1->uPrefixLengthB)
|
||||
Edges2[uDiffCount2++] = uEdgeIndex2++;
|
||||
|
||||
else if (Edge1->uPrefixLengthA < Edge2->uPrefixLengthA ||
|
||||
Edge1->uPrefixLengthB < Edge2->uPrefixLengthB)
|
||||
Edges1[uDiffCount1++] = uEdgeIndex1++;
|
||||
|
||||
if (uEdgeCount1 == uEdgeIndex1)
|
||||
{
|
||||
while (uEdgeIndex2 < uEdgeCount2)
|
||||
Edges2[uDiffCount2++] = uEdgeIndex2++;
|
||||
goto Done;
|
||||
}
|
||||
if (uEdgeCount2 == uEdgeIndex2)
|
||||
{
|
||||
while (uEdgeIndex1 < uEdgeCount1)
|
||||
Edges1[uDiffCount1++] = uEdgeIndex1++;
|
||||
goto Done;
|
||||
}
|
||||
if (uEdgeIndex1 == uEdgeIndexTop1 && uEdgeIndex2 == uEdgeIndexTop2)
|
||||
Quit("DiffPaths stuck");
|
||||
}
|
||||
Done:;
|
||||
#if TRACE
|
||||
Log("DiffCount1=%u (%u %u)\n", uDiffCount1, uEdgeCount1, uEdgeCount2);
|
||||
Log("Diffs1=");
|
||||
for (unsigned i = 0; i < uDiffCount1; ++i)
|
||||
{
|
||||
const PWEdge e = p1.GetEdge(Edges1[i]);
|
||||
Log(" %u=%c%u.%u", Edges1[i], e.cType, e.uPrefixLengthA, e.uPrefixLengthB);
|
||||
}
|
||||
Log("\n");
|
||||
Log("DiffCount2=%u\n", uDiffCount2);
|
||||
Log("Diffs2=");
|
||||
for (unsigned i = 0; i < uDiffCount2; ++i)
|
||||
{
|
||||
const PWEdge e = p2.GetEdge(Edges2[i]);
|
||||
Log(" %u=%c%u.%u", Edges2[i], e.cType, e.uPrefixLengthA, e.uPrefixLengthB);
|
||||
}
|
||||
Log("\n");
|
||||
#endif
|
||||
*ptruDiffCount1 = uDiffCount1;
|
||||
*ptruDiffCount2 = uDiffCount2;
|
||||
}
|
||||
|
||||
void TestDiffPaths()
|
||||
{
|
||||
PWPath p1;
|
||||
PWPath p2;
|
||||
|
||||
p1.AppendEdge('M', 1, 1);
|
||||
p1.AppendEdge('M', 2, 2);
|
||||
p1.AppendEdge('M', 3, 3);
|
||||
|
||||
p2.AppendEdge('M', 1, 1);
|
||||
p2.AppendEdge('D', 2, 1);
|
||||
p2.AppendEdge('I', 2, 2);
|
||||
p2.AppendEdge('M', 3, 3);
|
||||
|
||||
unsigned Edges1[64];
|
||||
unsigned Edges2[64];
|
||||
unsigned uDiffCount1;
|
||||
unsigned uDiffCount2;
|
||||
DiffPaths(p1, p2, Edges1, &uDiffCount1, Edges2, &uDiffCount2);
|
||||
}
|
381
src/muscle/muscle3.8.31/src/difftrees.cpp
Normal file
381
src/muscle/muscle3.8.31/src/difftrees.cpp
Normal file
@ -0,0 +1,381 @@
|
||||
#include "muscle.h"
|
||||
#include "tree.h"
|
||||
|
||||
#define TRACE 0
|
||||
|
||||
/***
|
||||
Algorithm to compare two trees, X and Y.
|
||||
|
||||
A node x in X and node y in Y are defined to be
|
||||
similar iff the set of leaves in the subtree under
|
||||
x is identical to the set of leaves under y.
|
||||
|
||||
A node is defined to be dissimilar iff it is not
|
||||
similar to any node in the other tree.
|
||||
|
||||
Nodes x and y are defined to be married iff every
|
||||
node in the subtree under x is similar to a node
|
||||
in the subtree under y. Married nodes are considered
|
||||
to be equal. The subtrees under two married nodes can
|
||||
at most differ by exchanges of left and right branches,
|
||||
which we do not consider to be significant here.
|
||||
|
||||
A node is defined to be a bachelor iff it is not
|
||||
married. If a node is a bachelor, then it has a
|
||||
dissimilar node in its subtree, and it follows
|
||||
immediately from the definition of marriage that its
|
||||
parent is also a bachelor. Hence all nodes on the path
|
||||
from a bachelor node to the root are bachelors.
|
||||
|
||||
We assume the trees have the same set of leaves, so
|
||||
every leaf is trivially both similar and married to
|
||||
the same leaf in the opposite tree. Bachelor nodes
|
||||
are therefore always internal (i.e., non-leaf) nodes.
|
||||
|
||||
A node is defined to be a diff iff (a) it is married
|
||||
and (b) its parent is a bachelor. The subtree under
|
||||
a diff is maximally similar to the other tree. (In
|
||||
other words, you cannot extend the subtree without
|
||||
adding a bachelor).
|
||||
|
||||
The set of diffs is the subset of the two trees that
|
||||
we consider to be identical.
|
||||
|
||||
Example:
|
||||
|
||||
-----A
|
||||
-----k
|
||||
----j -----B
|
||||
--i -----C
|
||||
------D
|
||||
|
||||
|
||||
-----A
|
||||
-----p
|
||||
----n -----B
|
||||
--m -----D
|
||||
------C
|
||||
|
||||
|
||||
The following pairs of internal nodes are similar.
|
||||
|
||||
Nodes Set of leaves
|
||||
----- -------------
|
||||
k,p A,B
|
||||
i,m A,B,C,D
|
||||
|
||||
Bachelors in the first tree are i and j, bachelors
|
||||
in the second tree are m and n.
|
||||
|
||||
Node k and p are married, but i and m are not (because j
|
||||
and n are bachelors). The diffs are C, D and k.
|
||||
|
||||
The set of bachelor nodes can be viewed as the internal
|
||||
nodes of a tree, the leaves of which are diffs. (To see
|
||||
that there can't be disjoint subtrees, note that the path
|
||||
from a diff to a root is all bachelor nodes, so there is
|
||||
always a path between two diffs that goes through the root).
|
||||
We call this tree the "diffs tree".
|
||||
|
||||
There is a simple O(N) algorithm to build the diffs tree.
|
||||
To achieve O(N) we avoid traversing a given subtree multiple
|
||||
times and also avoid comparing lists of leaves.
|
||||
|
||||
We visit nodes in depth-first order (i.e., a node is visited
|
||||
before its parent).
|
||||
|
||||
If either child of a node is a bachelor, we flag it as
|
||||
a bachelor.
|
||||
|
||||
If both children of the node we are visiting are married,
|
||||
we check whether the spouses of those children have the
|
||||
same parent in the other tree. If the parents are different,
|
||||
the current node is a bachelor. If they have the same parent,
|
||||
then the node we are visiting is the spouse of that parent.
|
||||
We assign this newly identified married couple a unique integer
|
||||
id. The id of a node is in one-to-one correspondence with the
|
||||
set of leaves in its subtree. Two nodes have the same set of
|
||||
leaves iff they have the same id. Bachelor nodes do not get
|
||||
an id.
|
||||
***/
|
||||
|
||||
static void BuildDiffs(const Tree &tree, unsigned uTreeNodeIndex,
|
||||
const bool bIsDiff[], Tree &Diffs, unsigned uDiffsNodeIndex,
|
||||
unsigned IdToDiffsLeafNodeIndex[])
|
||||
{
|
||||
#if TRACE
|
||||
Log("BuildDiffs(TreeNode=%u IsDiff=%d IsLeaf=%d)\n",
|
||||
uTreeNodeIndex, bIsDiff[uTreeNodeIndex], tree.IsLeaf(uTreeNodeIndex));
|
||||
#endif
|
||||
if (bIsDiff[uTreeNodeIndex])
|
||||
{
|
||||
unsigned uLeafCount = tree.GetLeafCount();
|
||||
unsigned *Leaves = new unsigned[uLeafCount];
|
||||
GetLeaves(tree, uTreeNodeIndex, Leaves, &uLeafCount);
|
||||
for (unsigned n = 0; n < uLeafCount; ++n)
|
||||
{
|
||||
const unsigned uLeafNodeIndex = Leaves[n];
|
||||
const unsigned uId = tree.GetLeafId(uLeafNodeIndex);
|
||||
if (uId >= tree.GetLeafCount())
|
||||
Quit("BuildDiffs, id out of range");
|
||||
IdToDiffsLeafNodeIndex[uId] = uDiffsNodeIndex;
|
||||
#if TRACE
|
||||
Log(" Leaf id=%u DiffsNode=%u\n", uId, uDiffsNodeIndex);
|
||||
#endif
|
||||
}
|
||||
delete[] Leaves;
|
||||
return;
|
||||
}
|
||||
|
||||
if (tree.IsLeaf(uTreeNodeIndex))
|
||||
Quit("BuildDiffs: should never reach leaf");
|
||||
|
||||
const unsigned uTreeLeft = tree.GetLeft(uTreeNodeIndex);
|
||||
const unsigned uTreeRight = tree.GetRight(uTreeNodeIndex);
|
||||
|
||||
const unsigned uDiffsLeft = Diffs.AppendBranch(uDiffsNodeIndex);
|
||||
const unsigned uDiffsRight = uDiffsLeft + 1;
|
||||
|
||||
BuildDiffs(tree, uTreeLeft, bIsDiff, Diffs, uDiffsLeft, IdToDiffsLeafNodeIndex);
|
||||
BuildDiffs(tree, uTreeRight, bIsDiff, Diffs, uDiffsRight, IdToDiffsLeafNodeIndex);
|
||||
}
|
||||
|
||||
void DiffTrees(const Tree &Tree1, const Tree &Tree2, Tree &Diffs,
|
||||
unsigned IdToDiffsLeafNodeIndex[])
|
||||
{
|
||||
#if TRACE
|
||||
Log("Tree1:\n");
|
||||
Tree1.LogMe();
|
||||
Log("\n");
|
||||
Log("Tree2:\n");
|
||||
Tree2.LogMe();
|
||||
#endif
|
||||
|
||||
if (!Tree1.IsRooted() || !Tree2.IsRooted())
|
||||
Quit("DiffTrees: requires rooted trees");
|
||||
|
||||
const unsigned uNodeCount = Tree1.GetNodeCount();
|
||||
const unsigned uNodeCount2 = Tree2.GetNodeCount();
|
||||
|
||||
const unsigned uLeafCount = Tree1.GetLeafCount();
|
||||
const unsigned uLeafCount2 = Tree2.GetLeafCount();
|
||||
assert(uLeafCount == uLeafCount2);
|
||||
|
||||
if (uNodeCount != uNodeCount2)
|
||||
Quit("DiffTrees: different node counts");
|
||||
|
||||
// Allocate tables so we can convert tree node index to
|
||||
// and from the unique id with a O(1) lookup.
|
||||
unsigned *NodeIndexToId1 = new unsigned[uNodeCount];
|
||||
unsigned *IdToNodeIndex2 = new unsigned[uNodeCount];
|
||||
|
||||
bool *bIsBachelor1 = new bool[uNodeCount];
|
||||
bool *bIsDiff1 = new bool[uNodeCount];
|
||||
|
||||
for (unsigned uNodeIndex = 0; uNodeIndex < uNodeCount; ++uNodeIndex)
|
||||
{
|
||||
NodeIndexToId1[uNodeIndex] = uNodeCount;
|
||||
bIsBachelor1[uNodeIndex] = false;
|
||||
bIsDiff1[uNodeIndex] = false;
|
||||
|
||||
// Use uNodeCount as value meaning "not set".
|
||||
IdToNodeIndex2[uNodeIndex] = uNodeCount;
|
||||
}
|
||||
|
||||
// Initialize node index <-> id lookup tables
|
||||
for (unsigned uNodeIndex = 0; uNodeIndex < uNodeCount; ++uNodeIndex)
|
||||
{
|
||||
if (Tree1.IsLeaf(uNodeIndex))
|
||||
{
|
||||
const unsigned uId = Tree1.GetLeafId(uNodeIndex);
|
||||
if (uId >= uNodeCount)
|
||||
Quit("Diff trees requires existing leaf ids in range 0 .. (N-1)");
|
||||
NodeIndexToId1[uNodeIndex] = uId;
|
||||
}
|
||||
|
||||
if (Tree2.IsLeaf(uNodeIndex))
|
||||
{
|
||||
const unsigned uId = Tree2.GetLeafId(uNodeIndex);
|
||||
if (uId >= uNodeCount)
|
||||
Quit("Diff trees requires existing leaf ids in range 0 .. (N-1)");
|
||||
IdToNodeIndex2[uId] = uNodeIndex;
|
||||
}
|
||||
}
|
||||
|
||||
// Validity check. This verifies that the ids
|
||||
// pre-assigned to the leaves in Tree1 are unique
|
||||
// (note that the id<N check above does not rule
|
||||
// out two leaves having duplicate ids).
|
||||
for (unsigned uId = 0; uId < uLeafCount; ++uId)
|
||||
{
|
||||
unsigned uNodeIndex2 = IdToNodeIndex2[uId];
|
||||
if (uNodeCount == uNodeIndex2)
|
||||
Quit("DiffTrees, check 2");
|
||||
}
|
||||
|
||||
// Ids assigned to internal nodes are N, N+1 ...
|
||||
// An internal node id uniquely identifies a set
|
||||
// of two or more leaves.
|
||||
unsigned uInternalNodeId = uLeafCount;
|
||||
|
||||
// Depth-first traversal of tree.
|
||||
// The order guarantees that a node is visited before
|
||||
// its parent is visited.
|
||||
for (unsigned uNodeIndex1 = Tree1.FirstDepthFirstNode();
|
||||
NULL_NEIGHBOR != uNodeIndex1;
|
||||
uNodeIndex1 = Tree1.NextDepthFirstNode(uNodeIndex1))
|
||||
{
|
||||
#if TRACE
|
||||
Log("Main loop: Node1=%u IsLeaf=%d IsBachelor=%d\n",
|
||||
uNodeIndex1,
|
||||
Tree1.IsLeaf(uNodeIndex1),
|
||||
bIsBachelor1[uNodeIndex1]);
|
||||
#endif
|
||||
|
||||
// Leaves are trivial; nothing to do.
|
||||
if (Tree1.IsLeaf(uNodeIndex1) || bIsBachelor1[uNodeIndex1])
|
||||
continue;
|
||||
|
||||
// If either child is a bachelor, flag
|
||||
// this node as a bachelor and continue.
|
||||
unsigned uLeft1 = Tree1.GetLeft(uNodeIndex1);
|
||||
if (bIsBachelor1[uLeft1])
|
||||
{
|
||||
bIsBachelor1[uNodeIndex1] = true;
|
||||
continue;
|
||||
}
|
||||
|
||||
unsigned uRight1 = Tree1.GetRight(uNodeIndex1);
|
||||
if (bIsBachelor1[uRight1])
|
||||
{
|
||||
bIsBachelor1[uNodeIndex1] = true;
|
||||
continue;
|
||||
}
|
||||
|
||||
// Both children are married.
|
||||
// Married nodes are guaranteed to have an id.
|
||||
unsigned uIdLeft = NodeIndexToId1[uLeft1];
|
||||
unsigned uIdRight = NodeIndexToId1[uRight1];
|
||||
|
||||
if (uIdLeft == uNodeCount || uIdRight == uNodeCount)
|
||||
Quit("DiffTrees, check 5");
|
||||
|
||||
// uLeft2 is the spouse of uLeft1, and similarly for uRight2.
|
||||
unsigned uLeft2 = IdToNodeIndex2[uIdLeft];
|
||||
unsigned uRight2 = IdToNodeIndex2[uIdRight];
|
||||
|
||||
if (uLeft2 == uNodeCount || uRight2 == uNodeCount)
|
||||
Quit("DiffTrees, check 6");
|
||||
|
||||
// If the spouses of uLeft1 and uRight1 have the same
|
||||
// parent, then this parent is the spouse of uNodeIndex1.
|
||||
// Otherwise, uNodeIndex1 is a diff.
|
||||
unsigned uParentLeft2 = Tree2.GetParent(uLeft2);
|
||||
unsigned uParentRight2 = Tree2.GetParent(uRight2);
|
||||
|
||||
#if TRACE
|
||||
Log("L1=%u R1=%u L2=%u R2=%u PL2=%u PR2=%u\n",
|
||||
uLeft1,
|
||||
uRight1,
|
||||
uLeft2,
|
||||
uRight2,
|
||||
uParentLeft2,
|
||||
uParentRight2);
|
||||
#endif
|
||||
|
||||
if (uParentLeft2 == uParentRight2)
|
||||
{
|
||||
NodeIndexToId1[uNodeIndex1] = uInternalNodeId;
|
||||
IdToNodeIndex2[uInternalNodeId] = uParentLeft2;
|
||||
++uInternalNodeId;
|
||||
}
|
||||
else
|
||||
bIsBachelor1[uNodeIndex1] = true;
|
||||
}
|
||||
|
||||
unsigned uDiffCount = 0;
|
||||
for (unsigned uNodeIndex = 0; uNodeIndex < uNodeCount; ++uNodeIndex)
|
||||
{
|
||||
if (bIsBachelor1[uNodeIndex])
|
||||
continue;
|
||||
if (Tree1.IsRoot(uNodeIndex))
|
||||
{
|
||||
// Special case: if no bachelors, consider the
|
||||
// root a diff.
|
||||
if (!bIsBachelor1[uNodeIndex])
|
||||
bIsDiff1[uNodeIndex] = true;
|
||||
continue;
|
||||
}
|
||||
const unsigned uParent = Tree1.GetParent(uNodeIndex);
|
||||
if (bIsBachelor1[uParent])
|
||||
{
|
||||
bIsDiff1[uNodeIndex] = true;
|
||||
++uDiffCount;
|
||||
}
|
||||
}
|
||||
|
||||
#if TRACE
|
||||
Log("Tree1:\n");
|
||||
Log("Node Id Bach Diff Name\n");
|
||||
Log("---- ---- ---- ---- ----\n");
|
||||
for (unsigned n = 0; n < uNodeCount; ++n)
|
||||
{
|
||||
Log("%4u %4u %d %d",
|
||||
n,
|
||||
NodeIndexToId1[n],
|
||||
bIsBachelor1[n],
|
||||
bIsDiff1[n]);
|
||||
if (Tree1.IsLeaf(n))
|
||||
Log(" %s", Tree1.GetLeafName(n));
|
||||
Log("\n");
|
||||
}
|
||||
Log("\n");
|
||||
Log("Tree2:\n");
|
||||
Log("Node Id Name\n");
|
||||
Log("---- ---- ----\n");
|
||||
for (unsigned n = 0; n < uNodeCount; ++n)
|
||||
{
|
||||
Log("%4u ", n);
|
||||
if (Tree2.IsLeaf(n))
|
||||
Log(" %s", Tree2.GetLeafName(n));
|
||||
Log("\n");
|
||||
}
|
||||
#endif
|
||||
|
||||
Diffs.CreateRooted();
|
||||
const unsigned uDiffsRootIndex = Diffs.GetRootNodeIndex();
|
||||
const unsigned uRootIndex1 = Tree1.GetRootNodeIndex();
|
||||
|
||||
for (unsigned n = 0; n < uLeafCount; ++n)
|
||||
IdToDiffsLeafNodeIndex[n] = uNodeCount;
|
||||
|
||||
BuildDiffs(Tree1, uRootIndex1, bIsDiff1, Diffs, uDiffsRootIndex,
|
||||
IdToDiffsLeafNodeIndex);
|
||||
|
||||
#if TRACE
|
||||
Log("\n");
|
||||
Log("Diffs:\n");
|
||||
Diffs.LogMe();
|
||||
Log("\n");
|
||||
Log("IdToDiffsLeafNodeIndex:");
|
||||
for (unsigned n = 0; n < uLeafCount; ++n)
|
||||
{
|
||||
if (n%16 == 0)
|
||||
Log("\n");
|
||||
else
|
||||
Log(" ");
|
||||
Log("%u=%u", n, IdToDiffsLeafNodeIndex[n]);
|
||||
}
|
||||
Log("\n");
|
||||
#endif
|
||||
|
||||
for (unsigned n = 0; n < uLeafCount; ++n)
|
||||
if (IdToDiffsLeafNodeIndex[n] == uNodeCount)
|
||||
Quit("TreeDiffs check 7");
|
||||
|
||||
delete[] NodeIndexToId1;
|
||||
delete[] IdToNodeIndex2;
|
||||
|
||||
delete[] bIsBachelor1;
|
||||
delete[] bIsDiff1;
|
||||
}
|
235
src/muscle/muscle3.8.31/src/difftreese.cpp
Normal file
235
src/muscle/muscle3.8.31/src/difftreese.cpp
Normal file
@ -0,0 +1,235 @@
|
||||
#include "muscle.h"
|
||||
#include "tree.h"
|
||||
|
||||
#define TRACE 0
|
||||
|
||||
/***
|
||||
Algorithm to compare two trees, X and Y.
|
||||
|
||||
A node x in X and node y in Y are defined to be
|
||||
similar iff the set of leaves in the subtree under
|
||||
x is identical to the set of leaves under y.
|
||||
|
||||
A node is defined to be changed iff it is not
|
||||
similar to any node in the other tree.
|
||||
|
||||
Nodes x and y are defined to be married iff every
|
||||
node in the subtree under x is similar to a node
|
||||
in the subtree under y. Married nodes are considered
|
||||
to be equal. The subtrees under two married nodes can
|
||||
at most differ by exchanges of left and right branches,
|
||||
which we do not consider to be significant here.
|
||||
|
||||
A node is changed iff it is not married. If a node is
|
||||
changed, then it has a dissimilar node in its subtree,
|
||||
and it follows immediately from the definition of marriage
|
||||
that its parent is also a bachelor. Hence all nodes on the
|
||||
path from a changed node to the root are changed.
|
||||
|
||||
We assume the trees have the same set of leaves, so
|
||||
every leaf is trivially both similar and married to
|
||||
the same leaf in the opposite tree. Changed nodes
|
||||
are therefore always internal (i.e., non-leaf) nodes.
|
||||
|
||||
Example:
|
||||
|
||||
-----A
|
||||
-----k
|
||||
----j -----B
|
||||
--i -----C
|
||||
------D
|
||||
|
||||
|
||||
-----A
|
||||
-----p
|
||||
----n -----B
|
||||
--m -----D
|
||||
------C
|
||||
|
||||
|
||||
The following pairs of internal nodes are similar.
|
||||
|
||||
Nodes Set of leaves
|
||||
----- -------------
|
||||
k,p A,B
|
||||
i,m A,B,C,D
|
||||
|
||||
Changed nodes in the first tree are i and j, changed nodes
|
||||
in the second tree are m and n.
|
||||
|
||||
Node k and p are married, but i and m are not (because j
|
||||
and n are changed). The diffs are C, D and k.
|
||||
|
||||
To achieve O(N) we avoid traversing a given subtree multiple
|
||||
times and also avoid comparing lists of leaves.
|
||||
|
||||
We visit nodes in depth-first order (i.e., a node is visited
|
||||
before its parent).
|
||||
|
||||
If either child of a node is changed, we flag it as changed.
|
||||
|
||||
If both children of the node we are visiting are married,
|
||||
we check whether the spouses of those children have the
|
||||
same parent in the other tree. If the parents are different,
|
||||
the current node is a bachelor. If they have the same parent,
|
||||
then the node we are visiting is the spouse of that parent.
|
||||
We assign this newly identified married couple a unique integer
|
||||
id. The id of a node is in one-to-one correspondence with the
|
||||
set of leaves in its subtree. Two nodes have the same set of
|
||||
leaves iff they have the same id. Changed nodes do not get
|
||||
an id.
|
||||
***/
|
||||
|
||||
void DiffTreesE(const Tree &NewTree, const Tree &OldTree,
|
||||
unsigned NewNodeIndexToOldNodeIndex[])
|
||||
{
|
||||
#if TRACE
|
||||
Log("DiffTreesE NewTree:\n");
|
||||
NewTree.LogMe();
|
||||
Log("\n");
|
||||
Log("OldTree:\n");
|
||||
OldTree.LogMe();
|
||||
#endif
|
||||
|
||||
if (!NewTree.IsRooted() || !OldTree.IsRooted())
|
||||
Quit("DiffTrees: requires rooted trees");
|
||||
|
||||
const unsigned uNodeCount = NewTree.GetNodeCount();
|
||||
const unsigned uOldNodeCount = OldTree.GetNodeCount();
|
||||
const unsigned uLeafCount = NewTree.GetLeafCount();
|
||||
const unsigned uOldLeafCount = OldTree.GetLeafCount();
|
||||
if (uNodeCount != uOldNodeCount || uLeafCount != uOldLeafCount)
|
||||
Quit("DiffTreesE: different node counts");
|
||||
|
||||
{
|
||||
unsigned *IdToOldNodeIndex = new unsigned[uNodeCount];
|
||||
for (unsigned uOldNodeIndex = 0; uOldNodeIndex < uNodeCount; ++uOldNodeIndex)
|
||||
{
|
||||
if (OldTree.IsLeaf(uOldNodeIndex))
|
||||
{
|
||||
unsigned Id = OldTree.GetLeafId(uOldNodeIndex);
|
||||
IdToOldNodeIndex[Id] = uOldNodeIndex;
|
||||
}
|
||||
}
|
||||
|
||||
// Initialize NewNodeIndexToOldNodeIndex[]
|
||||
// All internal nodes are marked as changed, but may be updated later.
|
||||
for (unsigned uNewNodeIndex = 0; uNewNodeIndex < uNodeCount; ++uNewNodeIndex)
|
||||
{
|
||||
if (NewTree.IsLeaf(uNewNodeIndex))
|
||||
{
|
||||
unsigned uId = NewTree.GetLeafId(uNewNodeIndex);
|
||||
assert(uId < uLeafCount);
|
||||
|
||||
unsigned uOldNodeIndex = IdToOldNodeIndex[uId];
|
||||
assert(uOldNodeIndex < uNodeCount);
|
||||
|
||||
NewNodeIndexToOldNodeIndex[uNewNodeIndex] = uOldNodeIndex;
|
||||
}
|
||||
else
|
||||
NewNodeIndexToOldNodeIndex[uNewNodeIndex] = NODE_CHANGED;
|
||||
}
|
||||
delete[] IdToOldNodeIndex;
|
||||
}
|
||||
|
||||
// Depth-first traversal of tree.
|
||||
// The order guarantees that a node is visited before
|
||||
// its parent is visited.
|
||||
for (unsigned uNewNodeIndex = NewTree.FirstDepthFirstNode();
|
||||
NULL_NEIGHBOR != uNewNodeIndex;
|
||||
uNewNodeIndex = NewTree.NextDepthFirstNode(uNewNodeIndex))
|
||||
{
|
||||
if (NewTree.IsLeaf(uNewNodeIndex))
|
||||
continue;
|
||||
|
||||
// If either child is changed, flag this node as changed and continue.
|
||||
unsigned uNewLeft = NewTree.GetLeft(uNewNodeIndex);
|
||||
unsigned uOldLeft = NewNodeIndexToOldNodeIndex[uNewLeft];
|
||||
if (NODE_CHANGED == uOldLeft)
|
||||
{
|
||||
NewNodeIndexToOldNodeIndex[uNewLeft] = NODE_CHANGED;
|
||||
continue;
|
||||
}
|
||||
|
||||
unsigned uNewRight = NewTree.GetRight(uNewNodeIndex);
|
||||
unsigned uOldRight = NewNodeIndexToOldNodeIndex[uNewRight];
|
||||
if (NODE_CHANGED == NewNodeIndexToOldNodeIndex[uNewRight])
|
||||
{
|
||||
NewNodeIndexToOldNodeIndex[uNewRight] = NODE_CHANGED;
|
||||
continue;
|
||||
}
|
||||
|
||||
unsigned uOldParentLeft = OldTree.GetParent(uOldLeft);
|
||||
unsigned uOldParentRight = OldTree.GetParent(uOldRight);
|
||||
if (uOldParentLeft == uOldParentRight)
|
||||
NewNodeIndexToOldNodeIndex[uNewNodeIndex] = uOldParentLeft;
|
||||
else
|
||||
NewNodeIndexToOldNodeIndex[uNewNodeIndex] = NODE_CHANGED;
|
||||
}
|
||||
|
||||
#if TRACE
|
||||
{
|
||||
Log("NewToOld ");
|
||||
for (unsigned uNewNodeIndex = 0; uNewNodeIndex < uNodeCount; ++uNewNodeIndex)
|
||||
{
|
||||
Log(" [%3u]=", uNewNodeIndex);
|
||||
if (NODE_CHANGED == NewNodeIndexToOldNodeIndex[uNewNodeIndex])
|
||||
Log(" X");
|
||||
else
|
||||
Log("%3u", NewNodeIndexToOldNodeIndex[uNewNodeIndex]);
|
||||
if ((uNewNodeIndex+1)%8 == 0)
|
||||
Log("\n ");
|
||||
}
|
||||
Log("\n");
|
||||
}
|
||||
#endif
|
||||
|
||||
#if DEBUG
|
||||
{
|
||||
for (unsigned uNewNodeIndex = 0; uNewNodeIndex < uNodeCount; ++uNewNodeIndex)
|
||||
{
|
||||
unsigned uOld = NewNodeIndexToOldNodeIndex[uNewNodeIndex];
|
||||
if (NewTree.IsLeaf(uNewNodeIndex))
|
||||
{
|
||||
if (uOld >= uNodeCount)
|
||||
{
|
||||
Log("NewNode=%u uOld=%u > uNodeCount=%u\n",
|
||||
uNewNodeIndex, uOld, uNodeCount);
|
||||
Quit("Diff check failed");
|
||||
}
|
||||
unsigned uIdNew = NewTree.GetLeafId(uNewNodeIndex);
|
||||
unsigned uIdOld = OldTree.GetLeafId(uOld);
|
||||
if (uIdNew != uIdOld)
|
||||
{
|
||||
Log("NewNode=%u uOld=%u IdNew=%u IdOld=%u\n",
|
||||
uNewNodeIndex, uOld, uIdNew, uIdOld);
|
||||
Quit("Diff check failed");
|
||||
}
|
||||
continue;
|
||||
}
|
||||
|
||||
if (NODE_CHANGED == uOld)
|
||||
continue;
|
||||
|
||||
unsigned uNewLeft = NewTree.GetLeft(uNewNodeIndex);
|
||||
unsigned uNewRight = NewTree.GetRight(uNewNodeIndex);
|
||||
|
||||
unsigned uOldLeft = OldTree.GetLeft(uOld);
|
||||
unsigned uOldRight = OldTree.GetRight(uOld);
|
||||
|
||||
unsigned uNewLeftPartner = NewNodeIndexToOldNodeIndex[uNewLeft];
|
||||
unsigned uNewRightPartner = NewNodeIndexToOldNodeIndex[uNewRight];
|
||||
|
||||
bool bSameNotRotated = (uNewLeftPartner == uOldLeft && uNewRightPartner == uOldRight);
|
||||
bool bSameRotated = (uNewLeftPartner == uOldRight && uNewRightPartner == uOldLeft);
|
||||
if (!bSameNotRotated && !bSameRotated)
|
||||
{
|
||||
Log("NewNode=%u NewL=%u NewR=%u\n", uNewNodeIndex, uNewLeft, uNewRight);
|
||||
Log("OldNode=%u OldL=%u OldR=%u\n", uOld, uOldLeft, uOldRight);
|
||||
Log("NewLPartner=%u NewRPartner=%u\n", uNewLeftPartner, uNewRightPartner);
|
||||
Quit("Diff check failed");
|
||||
}
|
||||
}
|
||||
}
|
||||
#endif
|
||||
}
|
89
src/muscle/muscle3.8.31/src/distcalc.cpp
Normal file
89
src/muscle/muscle3.8.31/src/distcalc.cpp
Normal file
@ -0,0 +1,89 @@
|
||||
#include "muscle.h"
|
||||
#include "distfunc.h"
|
||||
#include "distcalc.h"
|
||||
#include "msa.h"
|
||||
|
||||
void DistCalcDF::Init(const DistFunc &DF)
|
||||
{
|
||||
m_ptrDF = &DF;
|
||||
}
|
||||
|
||||
void DistCalcDF::CalcDistRange(unsigned i, dist_t Dist[]) const
|
||||
{
|
||||
for (unsigned j = 0; j < i; ++j)
|
||||
Dist[j] = m_ptrDF->GetDist(i, j);
|
||||
}
|
||||
|
||||
unsigned DistCalcDF::GetCount() const
|
||||
{
|
||||
return m_ptrDF->GetCount();
|
||||
}
|
||||
|
||||
unsigned DistCalcDF::GetId(unsigned i) const
|
||||
{
|
||||
return m_ptrDF->GetId(i);
|
||||
}
|
||||
|
||||
const char *DistCalcDF::GetName(unsigned i) const
|
||||
{
|
||||
return m_ptrDF->GetName(i);
|
||||
}
|
||||
|
||||
void DistCalcMSA::Init(const MSA &msa, DISTANCE Distance)
|
||||
{
|
||||
m_ptrMSA = &msa;
|
||||
m_Distance = Distance;
|
||||
}
|
||||
|
||||
void DistCalcMSA::CalcDistRange(unsigned i, dist_t Dist[]) const
|
||||
{
|
||||
for (unsigned j = 0; j < i; ++j)
|
||||
{
|
||||
switch (m_Distance)
|
||||
{
|
||||
case DISTANCE_PctIdKimura:
|
||||
{
|
||||
const float PctId = (float) m_ptrMSA->GetPctIdentityPair(i, j);
|
||||
Dist[j] = (float) KimuraDist(PctId);
|
||||
break;
|
||||
}
|
||||
case DISTANCE_PctIdLog:
|
||||
{
|
||||
const float PctId = (float) m_ptrMSA->GetPctIdentityPair(i, j);
|
||||
Dist[j] = (float) PctIdToMAFFTDist(PctId);
|
||||
break;
|
||||
}
|
||||
case DISTANCE_ScoreDist:
|
||||
{
|
||||
double GetScoreDist(const MSA &msa, unsigned SeqIndex1, unsigned SeqIndex2);
|
||||
Dist[j] = (float) GetScoreDist(*m_ptrMSA, i, j);
|
||||
continue;
|
||||
}
|
||||
case DISTANCE_Edit:
|
||||
{
|
||||
const float PctId = (float) m_ptrMSA->GetPctIdentityPair(i, j);
|
||||
if (PctId > 1.0)
|
||||
Quit("Internal error, DISTANCE_Edit, pct id=%.3g", PctId);
|
||||
Dist[j] = (float) 1.0 - PctId;
|
||||
break;
|
||||
}
|
||||
default:
|
||||
Quit("DistCalcMSA: Invalid DISTANCE_%u", m_Distance);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
unsigned DistCalcMSA::GetCount() const
|
||||
{
|
||||
return m_ptrMSA->GetSeqCount();
|
||||
}
|
||||
|
||||
unsigned DistCalcMSA::GetId(unsigned i) const
|
||||
{
|
||||
return m_ptrMSA->GetSeqId(i);
|
||||
}
|
||||
|
||||
const char *DistCalcMSA::GetName(unsigned i) const
|
||||
{
|
||||
return m_ptrMSA->GetSeqName(i);
|
||||
}
|
45
src/muscle/muscle3.8.31/src/distcalc.h
Normal file
45
src/muscle/muscle3.8.31/src/distcalc.h
Normal file
@ -0,0 +1,45 @@
|
||||
#ifndef DistCalc_h
|
||||
#define DistCalc_h
|
||||
|
||||
typedef float dist_t;
|
||||
const dist_t BIG_DIST = (dist_t) 1e29;
|
||||
|
||||
class DistFunc;
|
||||
|
||||
class DistCalc
|
||||
{
|
||||
public:
|
||||
virtual void CalcDistRange(unsigned i, dist_t Dist[]) const = 0;
|
||||
virtual unsigned GetCount() const = 0;
|
||||
virtual unsigned GetId(unsigned i) const = 0;
|
||||
virtual const char *GetName(unsigned i) const = 0;
|
||||
};
|
||||
|
||||
class DistCalcDF : public DistCalc
|
||||
{
|
||||
public:
|
||||
void Init(const DistFunc &DF);
|
||||
virtual void CalcDistRange(unsigned i, dist_t Dist[]) const;
|
||||
virtual unsigned GetCount() const;
|
||||
virtual unsigned GetId(unsigned i) const;
|
||||
virtual const char *GetName(unsigned i) const;
|
||||
|
||||
private:
|
||||
const DistFunc *m_ptrDF;
|
||||
};
|
||||
|
||||
class DistCalcMSA : public DistCalc
|
||||
{
|
||||
public:
|
||||
void Init(const MSA &msa, DISTANCE Distance);
|
||||
virtual void CalcDistRange(unsigned i, dist_t Dist[]) const;
|
||||
virtual unsigned GetCount() const;
|
||||
virtual unsigned GetId(unsigned i) const;
|
||||
virtual const char *GetName(unsigned i) const;
|
||||
|
||||
private:
|
||||
const MSA *m_ptrMSA;
|
||||
DISTANCE m_Distance;
|
||||
};
|
||||
|
||||
#endif // DistCalc_h
|
113
src/muscle/muscle3.8.31/src/distfunc.cpp
Normal file
113
src/muscle/muscle3.8.31/src/distfunc.cpp
Normal file
@ -0,0 +1,113 @@
|
||||
#include "muscle.h"
|
||||
#include "distfunc.h"
|
||||
#include <assert.h>
|
||||
|
||||
DistFunc::DistFunc()
|
||||
{
|
||||
m_Dists = 0;
|
||||
m_uCount = 0;
|
||||
m_uCacheCount = 0;
|
||||
m_Names = 0;
|
||||
m_Ids = 0;
|
||||
}
|
||||
|
||||
DistFunc::~DistFunc()
|
||||
{
|
||||
if (0 != m_Names)
|
||||
{
|
||||
for (unsigned i = 0; i < m_uCount; ++i)
|
||||
free(m_Names[i]);
|
||||
}
|
||||
delete[] m_Dists;
|
||||
delete[] m_Names;
|
||||
delete[] m_Ids;
|
||||
}
|
||||
|
||||
float DistFunc::GetDist(unsigned uIndex1, unsigned uIndex2) const
|
||||
{
|
||||
return m_Dists[VectorIndex(uIndex1, uIndex2)];
|
||||
}
|
||||
|
||||
unsigned DistFunc::GetCount() const
|
||||
{
|
||||
return m_uCount;
|
||||
}
|
||||
|
||||
void DistFunc::SetCount(unsigned uCount)
|
||||
{
|
||||
m_uCount = uCount;
|
||||
if (uCount <= m_uCacheCount)
|
||||
return;
|
||||
delete[] m_Dists;
|
||||
m_Dists = new float[VectorLength()];
|
||||
m_Names = new char *[m_uCount];
|
||||
m_Ids = new unsigned[m_uCount];
|
||||
m_uCacheCount = uCount;
|
||||
|
||||
memset(m_Names, 0, m_uCount*sizeof(char *));
|
||||
memset(m_Ids, 0xff, m_uCount*sizeof(unsigned));
|
||||
memset(m_Dists, 0, VectorLength()*sizeof(float));
|
||||
}
|
||||
|
||||
void DistFunc::SetDist(unsigned uIndex1, unsigned uIndex2, float dDist)
|
||||
{
|
||||
m_Dists[VectorIndex(uIndex1, uIndex2)] = dDist;
|
||||
m_Dists[VectorIndex(uIndex2, uIndex1)] = dDist;
|
||||
}
|
||||
|
||||
unsigned DistFunc::VectorIndex(unsigned uIndex1, unsigned uIndex2) const
|
||||
{
|
||||
assert(uIndex1 < m_uCount && uIndex2 < m_uCount);
|
||||
return uIndex1*m_uCount + uIndex2;
|
||||
}
|
||||
|
||||
unsigned DistFunc::VectorLength() const
|
||||
{
|
||||
return m_uCount*m_uCount;
|
||||
}
|
||||
|
||||
void DistFunc::SetName(unsigned uIndex, const char szName[])
|
||||
{
|
||||
assert(uIndex < m_uCount);
|
||||
m_Names[uIndex] = strsave(szName);
|
||||
}
|
||||
|
||||
void DistFunc::SetId(unsigned uIndex, unsigned uId)
|
||||
{
|
||||
assert(uIndex < m_uCount);
|
||||
m_Ids[uIndex] = uId;
|
||||
}
|
||||
|
||||
const char *DistFunc::GetName(unsigned uIndex) const
|
||||
{
|
||||
assert(uIndex < m_uCount);
|
||||
return m_Names[uIndex];
|
||||
}
|
||||
|
||||
unsigned DistFunc::GetId(unsigned uIndex) const
|
||||
{
|
||||
assert(uIndex < m_uCount);
|
||||
return m_Ids[uIndex];
|
||||
}
|
||||
|
||||
void DistFunc::LogMe() const
|
||||
{
|
||||
Log("DistFunc::LogMe count=%u\n", m_uCount);
|
||||
Log(" ");
|
||||
for (unsigned i = 0; i < m_uCount; ++i)
|
||||
Log(" %7u", i);
|
||||
Log("\n");
|
||||
|
||||
Log(" ");
|
||||
for (unsigned i = 0; i < m_uCount; ++i)
|
||||
Log(" %7.7s", m_Names[i] ? m_Names[i] : "");
|
||||
Log("\n");
|
||||
|
||||
for (unsigned i = 0; i < m_uCount; ++i)
|
||||
{
|
||||
Log("%4u %10.10s : ", i, m_Names[i] ? m_Names[i] : "");
|
||||
for (unsigned j = 0; j <= i; ++j)
|
||||
Log(" %7.4g", GetDist(i, j));
|
||||
Log("\n");
|
||||
}
|
||||
}
|
36
src/muscle/muscle3.8.31/src/distfunc.h
Normal file
36
src/muscle/muscle3.8.31/src/distfunc.h
Normal file
@ -0,0 +1,36 @@
|
||||
#ifndef DistFunc_h
|
||||
#define DistFunc_h
|
||||
|
||||
class DistFunc
|
||||
{
|
||||
public:
|
||||
DistFunc();
|
||||
virtual ~DistFunc();
|
||||
|
||||
public:
|
||||
virtual void SetCount(unsigned uCount);
|
||||
virtual void SetDist(unsigned uIndex1, unsigned uIndex2, float dDist);
|
||||
|
||||
void SetName(unsigned uIndex, const char szName[]);
|
||||
void SetId(unsigned uIndex, unsigned uId);
|
||||
const char *GetName(unsigned uIndex) const;
|
||||
unsigned GetId(unsigned uIndex) const;
|
||||
|
||||
virtual float GetDist(unsigned uIndex1, unsigned uIndex2) const;
|
||||
virtual unsigned GetCount() const;
|
||||
|
||||
void LogMe() const;
|
||||
|
||||
protected:
|
||||
unsigned VectorIndex(unsigned uIndex, unsigned uIndex2) const;
|
||||
unsigned VectorLength() const;
|
||||
|
||||
private:
|
||||
unsigned m_uCount;
|
||||
unsigned m_uCacheCount;
|
||||
float *m_Dists;
|
||||
char **m_Names;
|
||||
unsigned *m_Ids;
|
||||
};
|
||||
|
||||
#endif // DistFunc_h
|
45
src/muscle/muscle3.8.31/src/distpwkimura.cpp
Normal file
45
src/muscle/muscle3.8.31/src/distpwkimura.cpp
Normal file
@ -0,0 +1,45 @@
|
||||
#include "muscle.h"
|
||||
#include "distfunc.h"
|
||||
#include "msa.h"
|
||||
#include "seqvect.h"
|
||||
#include "pwpath.h"
|
||||
|
||||
void DistPWKimura(const SeqVect &v, DistFunc &DF)
|
||||
{
|
||||
SEQWEIGHT SeqWeightSave = GetSeqWeightMethod();
|
||||
SetSeqWeightMethod(SEQWEIGHT_Henikoff);
|
||||
|
||||
const unsigned uSeqCount = v.Length();
|
||||
DF.SetCount(uSeqCount);
|
||||
|
||||
const unsigned uPairCount = (uSeqCount*(uSeqCount + 1))/2;
|
||||
unsigned uCount = 0;
|
||||
SetProgressDesc("PWKimura distance");
|
||||
for (unsigned uSeqIndex1 = 0; uSeqIndex1 < uSeqCount; ++uSeqIndex1)
|
||||
{
|
||||
const Seq &s1 = v.GetSeq(uSeqIndex1);
|
||||
MSA msa1;
|
||||
msa1.FromSeq(s1);
|
||||
for (unsigned uSeqIndex2 = 0; uSeqIndex2 < uSeqIndex1; ++uSeqIndex2)
|
||||
{
|
||||
if (0 == uCount%20)
|
||||
Progress(uCount, uPairCount);
|
||||
++uCount;
|
||||
const Seq &s2 = v.GetSeq(uSeqIndex2);
|
||||
MSA msa2;
|
||||
msa2.FromSeq(s2);
|
||||
|
||||
PWPath Path;
|
||||
MSA msaOut;
|
||||
AlignTwoMSAs(msa1, msa2, msaOut, Path, false, false);
|
||||
|
||||
double dPctId = msaOut.GetPctIdentityPair(0, 1);
|
||||
float f = (float) KimuraDist(dPctId);
|
||||
|
||||
DF.SetDist(uSeqIndex1, uSeqIndex2, f);
|
||||
}
|
||||
}
|
||||
ProgressStepsDone();
|
||||
|
||||
SetSeqWeightMethod(SeqWeightSave);
|
||||
}
|
299
src/muscle/muscle3.8.31/src/domuscle.cpp
Normal file
299
src/muscle/muscle3.8.31/src/domuscle.cpp
Normal file
@ -0,0 +1,299 @@
|
||||
#include "muscle.h"
|
||||
#include "textfile.h"
|
||||
#include "seqvect.h"
|
||||
#include "distfunc.h"
|
||||
#include "msa.h"
|
||||
#include "tree.h"
|
||||
#include "profile.h"
|
||||
#include "timing.h"
|
||||
|
||||
static char g_strUseTreeWarning[] =
|
||||
"\n******** WARNING ****************\n"
|
||||
"\nYou specified the -usetree option.\n"
|
||||
"Note that a good evolutionary tree may NOT be a good\n"
|
||||
"guide tree for multiple alignment. For more details,\n"
|
||||
"please refer to the user guide. To disable this\n"
|
||||
"warning, use -usetree_nowarn <treefilename>.\n\n";
|
||||
|
||||
void DoMuscle()
|
||||
{
|
||||
SetOutputFileName(g_pstrOutFileName);
|
||||
SetInputFileName(g_pstrInFileName);
|
||||
|
||||
SetMaxIters(g_uMaxIters);
|
||||
SetSeqWeightMethod(g_SeqWeight1);
|
||||
|
||||
TextFile fileIn(g_pstrInFileName);
|
||||
SeqVect v;
|
||||
v.FromFASTAFile(fileIn);
|
||||
const unsigned uSeqCount = v.Length();
|
||||
|
||||
if (0 == uSeqCount)
|
||||
Quit("No sequences in input file");
|
||||
|
||||
ALPHA Alpha = ALPHA_Undefined;
|
||||
switch (g_SeqType)
|
||||
{
|
||||
case SEQTYPE_Auto:
|
||||
Alpha = v.GuessAlpha();
|
||||
break;
|
||||
|
||||
case SEQTYPE_Protein:
|
||||
Alpha = ALPHA_Amino;
|
||||
break;
|
||||
|
||||
case SEQTYPE_DNA:
|
||||
Alpha = ALPHA_DNA;
|
||||
break;
|
||||
|
||||
case SEQTYPE_RNA:
|
||||
Alpha = ALPHA_RNA;
|
||||
break;
|
||||
|
||||
default:
|
||||
Quit("Invalid seq type");
|
||||
}
|
||||
SetAlpha(Alpha);
|
||||
v.FixAlpha();
|
||||
|
||||
PTR_SCOREMATRIX UserMatrix = 0;
|
||||
if (0 != g_pstrMatrixFileName)
|
||||
{
|
||||
const char *FileName = g_pstrMatrixFileName;
|
||||
const char *Path = getenv("MUSCLE_MXPATH");
|
||||
if (Path != 0)
|
||||
{
|
||||
size_t n = strlen(Path) + 1 + strlen(FileName) + 1;
|
||||
char *NewFileName = new char[n];
|
||||
sprintf(NewFileName, "%s/%s", Path, FileName);
|
||||
FileName = NewFileName;
|
||||
}
|
||||
TextFile File(FileName);
|
||||
UserMatrix = ReadMx(File);
|
||||
g_Alpha = ALPHA_Amino;
|
||||
g_PPScore = PPSCORE_SP;
|
||||
}
|
||||
|
||||
SetPPScore();
|
||||
|
||||
if (0 != UserMatrix)
|
||||
g_ptrScoreMatrix = UserMatrix;
|
||||
|
||||
unsigned uMaxL = 0;
|
||||
unsigned uTotL = 0;
|
||||
for (unsigned uSeqIndex = 0; uSeqIndex < uSeqCount; ++uSeqIndex)
|
||||
{
|
||||
unsigned L = v.GetSeq(uSeqIndex).Length();
|
||||
uTotL += L;
|
||||
if (L > uMaxL)
|
||||
uMaxL = L;
|
||||
}
|
||||
|
||||
SetIter(1);
|
||||
g_bDiags = g_bDiags1;
|
||||
SetSeqStats(uSeqCount, uMaxL, uTotL/uSeqCount);
|
||||
|
||||
SetMuscleSeqVect(v);
|
||||
|
||||
MSA::SetIdCount(uSeqCount);
|
||||
|
||||
// Initialize sequence ids.
|
||||
// From this point on, ids must somehow propogate from here.
|
||||
for (unsigned uSeqIndex = 0; uSeqIndex < uSeqCount; ++uSeqIndex)
|
||||
v.SetSeqId(uSeqIndex, uSeqIndex);
|
||||
|
||||
if (0 == uSeqCount)
|
||||
Quit("Input file '%s' has no sequences", g_pstrInFileName);
|
||||
if (1 == uSeqCount)
|
||||
{
|
||||
TextFile fileOut(g_pstrOutFileName, true);
|
||||
v.ToFile(fileOut);
|
||||
return;
|
||||
}
|
||||
|
||||
if (uSeqCount > 1)
|
||||
MHackStart(v);
|
||||
|
||||
// First iteration
|
||||
Tree GuideTree;
|
||||
if (0 != g_pstrUseTreeFileName)
|
||||
{
|
||||
// Discourage users...
|
||||
if (!g_bUseTreeNoWarn)
|
||||
fprintf(stderr, "%s", g_strUseTreeWarning);
|
||||
|
||||
// Read tree from file
|
||||
TextFile TreeFile(g_pstrUseTreeFileName);
|
||||
GuideTree.FromFile(TreeFile);
|
||||
|
||||
// Make sure tree is rooted
|
||||
if (!GuideTree.IsRooted())
|
||||
Quit("User tree must be rooted");
|
||||
|
||||
if (GuideTree.GetLeafCount() != uSeqCount)
|
||||
Quit("User tree does not match input sequences");
|
||||
|
||||
const unsigned uNodeCount = GuideTree.GetNodeCount();
|
||||
for (unsigned uNodeIndex = 0; uNodeIndex < uNodeCount; ++uNodeIndex)
|
||||
{
|
||||
if (!GuideTree.IsLeaf(uNodeIndex))
|
||||
continue;
|
||||
const char *LeafName = GuideTree.GetLeafName(uNodeIndex);
|
||||
unsigned uSeqIndex;
|
||||
bool SeqFound = v.FindName(LeafName, &uSeqIndex);
|
||||
if (!SeqFound)
|
||||
Quit("Label %s in tree does not match sequences", LeafName);
|
||||
unsigned uId = v.GetSeqIdFromName(LeafName);
|
||||
GuideTree.SetLeafId(uNodeIndex, uId);
|
||||
}
|
||||
}
|
||||
else
|
||||
TreeFromSeqVect(v, GuideTree, g_Cluster1, g_Distance1, g_Root1,
|
||||
g_pstrDistMxFileName1);
|
||||
|
||||
const char *Tree1 = ValueOpt("Tree1");
|
||||
if (0 != Tree1)
|
||||
{
|
||||
TextFile f(Tree1, true);
|
||||
GuideTree.ToFile(f);
|
||||
if (g_bClusterOnly)
|
||||
return;
|
||||
}
|
||||
|
||||
SetMuscleTree(GuideTree);
|
||||
ValidateMuscleIds(GuideTree);
|
||||
|
||||
MSA msa;
|
||||
ProgNode *ProgNodes = 0;
|
||||
if (g_bLow)
|
||||
ProgNodes = ProgressiveAlignE(v, GuideTree, msa);
|
||||
else
|
||||
ProgressiveAlign(v, GuideTree, msa);
|
||||
SetCurrentAlignment(msa);
|
||||
|
||||
if (0 != g_pstrComputeWeightsFileName)
|
||||
{
|
||||
extern void OutWeights(const char *FileName, const MSA &msa);
|
||||
SetMSAWeightsMuscle(msa);
|
||||
OutWeights(g_pstrComputeWeightsFileName, msa);
|
||||
return;
|
||||
}
|
||||
|
||||
ValidateMuscleIds(msa);
|
||||
|
||||
if (1 == g_uMaxIters || 2 == uSeqCount)
|
||||
{
|
||||
//TextFile fileOut(g_pstrOutFileName, true);
|
||||
//MHackEnd(msa);
|
||||
//msa.ToFile(fileOut);
|
||||
MuscleOutput(msa);
|
||||
return;
|
||||
}
|
||||
|
||||
if (0 == g_pstrUseTreeFileName)
|
||||
{
|
||||
g_bDiags = g_bDiags2;
|
||||
SetIter(2);
|
||||
|
||||
if (g_bLow)
|
||||
{
|
||||
if (0 != g_uMaxTreeRefineIters)
|
||||
RefineTreeE(msa, v, GuideTree, ProgNodes);
|
||||
}
|
||||
else
|
||||
RefineTree(msa, GuideTree);
|
||||
|
||||
const char *Tree2 = ValueOpt("Tree2");
|
||||
if (0 != Tree2)
|
||||
{
|
||||
TextFile f(Tree2, true);
|
||||
GuideTree.ToFile(f);
|
||||
}
|
||||
}
|
||||
|
||||
SetSeqWeightMethod(g_SeqWeight2);
|
||||
SetMuscleTree(GuideTree);
|
||||
|
||||
if (g_bAnchors)
|
||||
RefineVert(msa, GuideTree, g_uMaxIters - 2);
|
||||
else
|
||||
RefineHoriz(msa, GuideTree, g_uMaxIters - 2, false, false);
|
||||
|
||||
#if 0
|
||||
// Refining by subfamilies is disabled as it didn't give better
|
||||
// results. I tried doing this before and after RefineHoriz.
|
||||
// Should get back to this as it seems like this should work.
|
||||
RefineSubfams(msa, GuideTree, g_uMaxIters - 2);
|
||||
#endif
|
||||
|
||||
ValidateMuscleIds(msa);
|
||||
ValidateMuscleIds(GuideTree);
|
||||
|
||||
//TextFile fileOut(g_pstrOutFileName, true);
|
||||
//MHackEnd(msa);
|
||||
//msa.ToFile(fileOut);
|
||||
MuscleOutput(msa);
|
||||
}
|
||||
|
||||
void Run()
|
||||
{
|
||||
SetStartTime();
|
||||
Log("Started %s\n", GetTimeAsStr());
|
||||
for (int i = 0; i < g_argc; ++i)
|
||||
Log("%s ", g_argv[i]);
|
||||
Log("\n");
|
||||
|
||||
#if TIMING
|
||||
TICKS t1 = GetClockTicks();
|
||||
#endif
|
||||
if (g_bRefine)
|
||||
Refine();
|
||||
else if (g_bRefineW)
|
||||
{
|
||||
extern void DoRefineW();
|
||||
DoRefineW();
|
||||
}
|
||||
else if (g_bProfDB)
|
||||
ProfDB();
|
||||
else if (g_bSW)
|
||||
Local();
|
||||
else if (0 != g_pstrSPFileName)
|
||||
DoSP();
|
||||
else if (g_bProfile)
|
||||
Profile();
|
||||
else if (g_bPPScore)
|
||||
PPScore();
|
||||
else if (g_bPAS)
|
||||
ProgAlignSubFams();
|
||||
else if (g_bMakeTree)
|
||||
{
|
||||
extern void DoMakeTree();
|
||||
DoMakeTree();
|
||||
}
|
||||
else
|
||||
DoMuscle();
|
||||
|
||||
#if TIMING
|
||||
extern TICKS g_ticksDP;
|
||||
extern TICKS g_ticksObjScore;
|
||||
TICKS t2 = GetClockTicks();
|
||||
TICKS TotalTicks = t2 - t1;
|
||||
TICKS ticksOther = TotalTicks - g_ticksDP - g_ticksObjScore;
|
||||
double dSecs = TicksToSecs(TotalTicks);
|
||||
double PctDP = (double) g_ticksDP*100.0/(double) TotalTicks;
|
||||
double PctOS = (double) g_ticksObjScore*100.0/(double) TotalTicks;
|
||||
double PctOther = (double) ticksOther*100.0/(double) TotalTicks;
|
||||
Log(" Ticks Secs Pct\n");
|
||||
Log(" ============ ======= =====\n");
|
||||
Log("DP %12ld %7.2f %5.1f%%\n",
|
||||
(long) g_ticksDP, TicksToSecs(g_ticksDP), PctDP);
|
||||
Log("OS %12ld %7.2f %5.1f%%\n",
|
||||
(long) g_ticksObjScore, TicksToSecs(g_ticksObjScore), PctOS);
|
||||
Log("Other %12ld %7.2f %5.1f%%\n",
|
||||
(long) ticksOther, TicksToSecs(ticksOther), PctOther);
|
||||
Log("Total %12ld %7.2f 100.0%%\n", (long) TotalTicks, dSecs);
|
||||
#endif
|
||||
|
||||
ListDiagSavings();
|
||||
Log("Finished %s\n", GetTimeAsStr());
|
||||
}
|
60
src/muscle/muscle3.8.31/src/dosp.cpp
Normal file
60
src/muscle/muscle3.8.31/src/dosp.cpp
Normal file
@ -0,0 +1,60 @@
|
||||
#include "muscle.h"
|
||||
#include "textfile.h"
|
||||
#include "msa.h"
|
||||
#include "objscore.h"
|
||||
#include "tree.h"
|
||||
#include "profile.h"
|
||||
|
||||
void DoSP()
|
||||
{
|
||||
TextFile f(g_pstrSPFileName);
|
||||
|
||||
MSA a;
|
||||
a.FromFile(f);
|
||||
|
||||
ALPHA Alpha = ALPHA_Undefined;
|
||||
switch (g_SeqType)
|
||||
{
|
||||
case SEQTYPE_Auto:
|
||||
Alpha = a.GuessAlpha();
|
||||
break;
|
||||
|
||||
case SEQTYPE_Protein:
|
||||
Alpha = ALPHA_Amino;
|
||||
break;
|
||||
|
||||
case SEQTYPE_DNA:
|
||||
Alpha = ALPHA_DNA;
|
||||
break;
|
||||
|
||||
case SEQTYPE_RNA:
|
||||
Alpha = ALPHA_RNA;
|
||||
break;
|
||||
|
||||
default:
|
||||
Quit("Invalid SeqType");
|
||||
}
|
||||
SetAlpha(Alpha);
|
||||
a.FixAlpha();
|
||||
|
||||
SetPPScore();
|
||||
|
||||
const unsigned uSeqCount = a.GetSeqCount();
|
||||
if (0 == uSeqCount)
|
||||
Quit("No sequences in input file %s", g_pstrSPFileName);
|
||||
|
||||
MSA::SetIdCount(uSeqCount);
|
||||
for (unsigned uSeqIndex = 0; uSeqIndex < uSeqCount; ++uSeqIndex)
|
||||
a.SetSeqId(uSeqIndex, uSeqIndex);
|
||||
|
||||
SetSeqWeightMethod(g_SeqWeight1);
|
||||
Tree tree;
|
||||
TreeFromMSA(a, tree, g_Cluster2, g_Distance2, g_Root2);
|
||||
SetMuscleTree(tree);
|
||||
SetMSAWeightsMuscle((MSA &) a);
|
||||
|
||||
SCORE SP = ObjScoreSP(a);
|
||||
|
||||
Log("File=%s;SP=%.4g\n", g_pstrSPFileName, SP);
|
||||
fprintf(stderr, "File=%s;SP=%.4g\n", g_pstrSPFileName, SP);
|
||||
}
|
73
src/muscle/muscle3.8.31/src/dpregionlist.h
Normal file
73
src/muscle/muscle3.8.31/src/dpregionlist.h
Normal file
@ -0,0 +1,73 @@
|
||||
#ifndef DPRegionList_h
|
||||
#define DPRegionList_h
|
||||
|
||||
#include "diaglist.h"
|
||||
|
||||
enum DPREGIONTYPE
|
||||
{
|
||||
DPREGIONTYPE_Unknown,
|
||||
DPREGIONTYPE_Diag,
|
||||
DPREGIONTYPE_Rect
|
||||
};
|
||||
|
||||
struct DPRegion
|
||||
{
|
||||
DPREGIONTYPE m_Type;
|
||||
union
|
||||
{
|
||||
Diag m_Diag;
|
||||
Rect m_Rect;
|
||||
};
|
||||
};
|
||||
|
||||
const unsigned MAX_DPREGIONS = 1024;
|
||||
|
||||
class DPRegionList
|
||||
{
|
||||
public:
|
||||
DPRegionList()
|
||||
{
|
||||
m_uCount = 0;
|
||||
}
|
||||
~DPRegionList()
|
||||
{
|
||||
Free();
|
||||
}
|
||||
|
||||
public:
|
||||
// Creation
|
||||
void Clear()
|
||||
{
|
||||
Free();
|
||||
}
|
||||
void Add(const DPRegion &r);
|
||||
|
||||
// Accessors
|
||||
unsigned GetCount() const
|
||||
{
|
||||
return m_uCount;
|
||||
}
|
||||
const DPRegion &Get(unsigned uIndex) const
|
||||
{
|
||||
assert(uIndex < m_uCount);
|
||||
return m_DPRegions[uIndex];
|
||||
}
|
||||
|
||||
// Diagnostics
|
||||
void LogMe() const;
|
||||
|
||||
private:
|
||||
void Free()
|
||||
{
|
||||
m_uCount = 0;
|
||||
}
|
||||
|
||||
private:
|
||||
unsigned m_uCount;
|
||||
DPRegion m_DPRegions[MAX_DPREGIONS];
|
||||
};
|
||||
|
||||
void DiagListToDPRegionList(const DiagList &DL, DPRegionList &RL,
|
||||
unsigned uLengthA, unsigned uLengthB);
|
||||
|
||||
#endif // DPRegionList_h
|
108
src/muscle/muscle3.8.31/src/dpreglist.cpp
Normal file
108
src/muscle/muscle3.8.31/src/dpreglist.cpp
Normal file
@ -0,0 +1,108 @@
|
||||
#include "muscle.h"
|
||||
#include "dpreglist.h"
|
||||
|
||||
unsigned DPRegionList::GetDPArea() const
|
||||
{
|
||||
unsigned uArea = 0;
|
||||
for (unsigned i = 0; i < m_uCount; ++i)
|
||||
{
|
||||
const DPRegion &r = m_DPRegions[i];
|
||||
if (DPREGIONTYPE_Rect == r.m_Type)
|
||||
uArea += r.m_Rect.m_uLengthA*r.m_Rect.m_uLengthB;
|
||||
}
|
||||
return uArea;
|
||||
}
|
||||
|
||||
void DPRegionList::Add(const DPRegion &r)
|
||||
{
|
||||
if (m_uCount == MAX_DPREGIONS)
|
||||
Quit("DPRegionList::Add, overflow %d", m_uCount);
|
||||
m_DPRegions[m_uCount] = r;
|
||||
++m_uCount;
|
||||
}
|
||||
|
||||
void DPRegionList::LogMe() const
|
||||
{
|
||||
Log("DPRegionList::LogMe, count=%u\n", m_uCount);
|
||||
Log("Region Type StartA StartB EndA EndB\n");
|
||||
Log("------ ---- ------ ------ ---- ----\n");
|
||||
for (unsigned i = 0; i < m_uCount; ++i)
|
||||
{
|
||||
const DPRegion &r = m_DPRegions[i];
|
||||
Log("%6u ", i);
|
||||
if (DPREGIONTYPE_Diag == r.m_Type)
|
||||
Log("Diag %6u %6u %6u %6u\n",
|
||||
r.m_Diag.m_uStartPosA,
|
||||
r.m_Diag.m_uStartPosB,
|
||||
r.m_Diag.m_uStartPosA + r.m_Diag.m_uLength - 1,
|
||||
r.m_Diag.m_uStartPosB + r.m_Diag.m_uLength - 1);
|
||||
else if (DPREGIONTYPE_Rect == r.m_Type)
|
||||
Log("Rect %6u %6u %6u %6u\n",
|
||||
r.m_Rect.m_uStartPosA,
|
||||
r.m_Rect.m_uStartPosB,
|
||||
r.m_Rect.m_uStartPosA + r.m_Rect.m_uLengthA - 1,
|
||||
r.m_Rect.m_uStartPosB + r.m_Rect.m_uLengthB - 1);
|
||||
else
|
||||
Log(" *** ERROR *** Type=%u\n", r.m_Type);
|
||||
}
|
||||
}
|
||||
|
||||
void DiagListToDPRegionList(const DiagList &DL, DPRegionList &RL,
|
||||
unsigned uLengthA, unsigned uLengthB)
|
||||
{
|
||||
if (g_uDiagMargin > g_uMinDiagLength/2)
|
||||
Quit("Invalid parameters, diagmargin=%d must be <= 2*diaglength=%d",
|
||||
g_uDiagMargin, g_uMinDiagLength);
|
||||
|
||||
unsigned uStartPosA = 0;
|
||||
unsigned uStartPosB = 0;
|
||||
const unsigned uDiagCount = DL.GetCount();
|
||||
DPRegion r;
|
||||
for (unsigned uDiagIndex = 0; uDiagIndex < uDiagCount; ++uDiagIndex)
|
||||
{
|
||||
const Diag &d = DL.Get(uDiagIndex);
|
||||
assert(d.m_uLength >= g_uMinDiagLength);
|
||||
const unsigned uStartVertexA = d.m_uStartPosA + g_uDiagMargin - 1;
|
||||
const unsigned uStartVertexB = d.m_uStartPosB + g_uDiagMargin - 1;
|
||||
const unsigned uEndVertexA = d.m_uStartPosA + d.m_uLength - g_uDiagMargin;
|
||||
const unsigned uEndVertexB = d.m_uStartPosB + d.m_uLength - g_uDiagMargin;
|
||||
|
||||
r.m_Type = DPREGIONTYPE_Rect;
|
||||
r.m_Rect.m_uStartPosA = uStartPosA;
|
||||
r.m_Rect.m_uStartPosB = uStartPosB;
|
||||
|
||||
assert(uStartVertexA + 1 >= uStartPosA);
|
||||
assert(uStartVertexB + 1 >= uStartPosB);
|
||||
r.m_Rect.m_uLengthA = uStartVertexA + 1 - uStartPosA;
|
||||
r.m_Rect.m_uLengthB = uStartVertexB + 1 - uStartPosB;
|
||||
RL.Add(r);
|
||||
|
||||
if (uEndVertexA > uStartVertexA + 1)
|
||||
{
|
||||
const unsigned uDiagLengthMinusCaps = uEndVertexA - uStartVertexA - 1;
|
||||
|
||||
r.m_Type = DPREGIONTYPE_Diag;
|
||||
r.m_Diag.m_uStartPosA = uStartVertexA + 1;
|
||||
r.m_Diag.m_uStartPosB = uStartVertexB + 1;
|
||||
assert(uEndVertexA - uStartVertexA == uEndVertexB - uStartVertexB);
|
||||
r.m_Diag.m_uLength = uEndVertexA - uStartVertexA - 1;
|
||||
RL.Add(r);
|
||||
}
|
||||
|
||||
uStartPosA = uEndVertexA;
|
||||
uStartPosB = uEndVertexB;
|
||||
}
|
||||
|
||||
assert((int) uLengthA - (int) uStartPosA >= (int) g_uDiagMargin);
|
||||
assert((int) uLengthB - (int) uStartPosB >= (int) g_uDiagMargin);
|
||||
|
||||
r.m_Type = DPREGIONTYPE_Rect;
|
||||
r.m_Rect.m_uStartPosA = uStartPosA;
|
||||
r.m_Rect.m_uStartPosB = uStartPosB;
|
||||
|
||||
assert(uLengthA >= uStartPosA);
|
||||
assert(uLengthB >= uStartPosB);
|
||||
r.m_Rect.m_uLengthA = uLengthA - uStartPosA;
|
||||
r.m_Rect.m_uLengthB = uLengthB - uStartPosB;
|
||||
RL.Add(r);
|
||||
}
|
76
src/muscle/muscle3.8.31/src/dpreglist.h
Normal file
76
src/muscle/muscle3.8.31/src/dpreglist.h
Normal file
@ -0,0 +1,76 @@
|
||||
#ifndef dpreglist_h
|
||||
#define dpreglist_h
|
||||
|
||||
#include "diaglist.h"
|
||||
|
||||
enum DPREGIONTYPE
|
||||
{
|
||||
DPREGIONTYPE_Unknown,
|
||||
DPREGIONTYPE_Diag,
|
||||
DPREGIONTYPE_Rect
|
||||
};
|
||||
|
||||
struct DPRegion
|
||||
{
|
||||
DPREGIONTYPE m_Type;
|
||||
union
|
||||
{
|
||||
Diag m_Diag;
|
||||
Rect m_Rect;
|
||||
};
|
||||
};
|
||||
|
||||
const unsigned MAX_DPREGIONS = 1024;
|
||||
|
||||
class DPRegionList
|
||||
{
|
||||
public:
|
||||
DPRegionList()
|
||||
{
|
||||
m_uCount = 0;
|
||||
}
|
||||
~DPRegionList()
|
||||
{
|
||||
Free();
|
||||
}
|
||||
|
||||
public:
|
||||
// Creation
|
||||
void Clear()
|
||||
{
|
||||
Free();
|
||||
}
|
||||
void Add(const DPRegion &r);
|
||||
|
||||
// Accessors
|
||||
unsigned GetCount() const
|
||||
{
|
||||
return m_uCount;
|
||||
}
|
||||
|
||||
const DPRegion &Get(unsigned uIndex) const
|
||||
{
|
||||
assert(uIndex < m_uCount);
|
||||
return m_DPRegions[uIndex];
|
||||
}
|
||||
|
||||
unsigned GetDPArea() const;
|
||||
|
||||
// Diagnostics
|
||||
void LogMe() const;
|
||||
|
||||
private:
|
||||
void Free()
|
||||
{
|
||||
m_uCount = 0;
|
||||
}
|
||||
|
||||
private:
|
||||
unsigned m_uCount;
|
||||
DPRegion m_DPRegions[MAX_DPREGIONS];
|
||||
};
|
||||
|
||||
void DiagListToDPRegionList(const DiagList &DL, DPRegionList &RL,
|
||||
unsigned uLengthA, unsigned uLengthB);
|
||||
|
||||
#endif // dpreglist_h
|
41
src/muscle/muscle3.8.31/src/drawtree.cpp
Normal file
41
src/muscle/muscle3.8.31/src/drawtree.cpp
Normal file
@ -0,0 +1,41 @@
|
||||
#include "muscle.h"
|
||||
#include "tree.h"
|
||||
|
||||
/***
|
||||
Simple tree drawing algorithm.
|
||||
|
||||
y coordinate of node is index in depth-first traversal.
|
||||
x coordinate is distance from root.
|
||||
***/
|
||||
|
||||
static unsigned DistFromRoot(const Tree &tree, unsigned uNodeIndex)
|
||||
{
|
||||
const unsigned uRoot = tree.GetRootNodeIndex();
|
||||
unsigned uDist = 0;
|
||||
while (uNodeIndex != uRoot)
|
||||
{
|
||||
++uDist;
|
||||
uNodeIndex = tree.GetParent(uNodeIndex);
|
||||
}
|
||||
return uDist;
|
||||
}
|
||||
|
||||
static void DrawNode(const Tree &tree, unsigned uNodeIndex)
|
||||
{
|
||||
if (!tree.IsLeaf(uNodeIndex))
|
||||
DrawNode(tree, tree.GetLeft(uNodeIndex));
|
||||
|
||||
unsigned uDist = DistFromRoot(tree, uNodeIndex);
|
||||
for (unsigned i = 0; i < 5*uDist; ++i)
|
||||
Log(" ");
|
||||
Log("%d\n", uNodeIndex);
|
||||
|
||||
if (!tree.IsLeaf(uNodeIndex))
|
||||
DrawNode(tree, tree.GetRight(uNodeIndex));
|
||||
}
|
||||
|
||||
void DrawTree(const Tree &tree)
|
||||
{
|
||||
unsigned uRoot = tree.GetRootNodeIndex();
|
||||
DrawNode(tree, uRoot);
|
||||
}
|
88
src/muscle/muscle3.8.31/src/edgelist.cpp
Normal file
88
src/muscle/muscle3.8.31/src/edgelist.cpp
Normal file
@ -0,0 +1,88 @@
|
||||
#include "muscle.h"
|
||||
#include "edgelist.h"
|
||||
|
||||
EdgeList::EdgeList()
|
||||
{
|
||||
m_uNode1 = 0;
|
||||
m_uNode2 = 0;
|
||||
m_uCount = 0;
|
||||
m_uCacheSize = 0;
|
||||
}
|
||||
|
||||
EdgeList::~EdgeList()
|
||||
{
|
||||
Clear();
|
||||
}
|
||||
|
||||
void EdgeList::Clear()
|
||||
{
|
||||
delete[] m_uNode1;
|
||||
delete[] m_uNode2;
|
||||
m_uNode1 = 0;
|
||||
m_uNode2 = 0;
|
||||
m_uCount = 0;
|
||||
m_uCacheSize = 0;
|
||||
}
|
||||
|
||||
void EdgeList::Add(unsigned uNode1, unsigned uNode2)
|
||||
{
|
||||
if (m_uCount <= m_uCacheSize)
|
||||
Expand();
|
||||
m_uNode1[m_uCount] = uNode1;
|
||||
m_uNode2[m_uCount] = uNode2;
|
||||
++m_uCount;
|
||||
}
|
||||
|
||||
unsigned EdgeList::GetCount() const
|
||||
{
|
||||
return m_uCount;
|
||||
}
|
||||
|
||||
void EdgeList::GetEdge(unsigned uIndex, unsigned *ptruNode1, unsigned *ptruNode2) const
|
||||
{
|
||||
if (uIndex > m_uCount)
|
||||
Quit("EdgeList::GetEdge(%u) count=%u", uIndex, m_uCount);
|
||||
*ptruNode1 = m_uNode1[uIndex];
|
||||
*ptruNode2 = m_uNode2[uIndex];
|
||||
}
|
||||
|
||||
void EdgeList::Copy(const EdgeList &rhs)
|
||||
{
|
||||
Clear();
|
||||
const unsigned uCount = rhs.GetCount();
|
||||
for (unsigned n = 0; n < uCount; ++n)
|
||||
{
|
||||
unsigned uNode1;
|
||||
unsigned uNode2;
|
||||
rhs.GetEdge(n, &uNode1, &uNode2);
|
||||
Add(uNode1, uNode2);
|
||||
}
|
||||
}
|
||||
|
||||
void EdgeList::Expand()
|
||||
{
|
||||
unsigned uNewCacheSize = m_uCacheSize + 512;
|
||||
unsigned *NewNode1 = new unsigned[uNewCacheSize];
|
||||
unsigned *NewNode2 = new unsigned[uNewCacheSize];
|
||||
if (m_uCount > 0)
|
||||
{
|
||||
memcpy(NewNode1, m_uNode1, m_uCount*sizeof(unsigned));
|
||||
memcpy(NewNode2, m_uNode2, m_uCount*sizeof(unsigned));
|
||||
}
|
||||
delete[] m_uNode1;
|
||||
delete[] m_uNode2;
|
||||
m_uNode1 = NewNode1;
|
||||
m_uNode2 = NewNode2;
|
||||
m_uCacheSize = uNewCacheSize;
|
||||
}
|
||||
|
||||
void EdgeList::LogMe() const
|
||||
{
|
||||
for (unsigned n = 0; n < m_uCount; ++n)
|
||||
{
|
||||
if (n > 0)
|
||||
Log(" ");
|
||||
Log("%u->%u", m_uNode1[n], m_uNode2[n]);
|
||||
}
|
||||
Log("\n");
|
||||
}
|
28
src/muscle/muscle3.8.31/src/edgelist.h
Normal file
28
src/muscle/muscle3.8.31/src/edgelist.h
Normal file
@ -0,0 +1,28 @@
|
||||
#ifndef EdgeList_h
|
||||
#define EdgeList_h
|
||||
|
||||
class EdgeList
|
||||
{
|
||||
public:
|
||||
EdgeList();
|
||||
virtual ~EdgeList();
|
||||
|
||||
public:
|
||||
void Clear();
|
||||
void Add(unsigned uNode1, unsigned uNode2);
|
||||
unsigned GetCount() const;
|
||||
void GetEdge(unsigned uIndex, unsigned *ptruNode1, unsigned *ptruNode2) const;
|
||||
void Copy(const EdgeList &rhs);
|
||||
void LogMe() const;
|
||||
|
||||
private:
|
||||
void Expand();
|
||||
|
||||
private:
|
||||
unsigned m_uCount;
|
||||
unsigned m_uCacheSize;
|
||||
unsigned *m_uNode1;
|
||||
unsigned *m_uNode2;
|
||||
};
|
||||
|
||||
#endif // EdgeList_h
|
8
src/muscle/muscle3.8.31/src/enumopts.cpp
Normal file
8
src/muscle/muscle3.8.31/src/enumopts.cpp
Normal file
@ -0,0 +1,8 @@
|
||||
#include "muscle.h"
|
||||
#include "enumopts.h"
|
||||
|
||||
#define s(t) EnumOpt t##_Opts[] = {
|
||||
#define c(t, x) #x, t##_##x,
|
||||
#define e(t) 0, 0 };
|
||||
|
||||
#include "enums.h"
|
16
src/muscle/muscle3.8.31/src/enumopts.h
Normal file
16
src/muscle/muscle3.8.31/src/enumopts.h
Normal file
@ -0,0 +1,16 @@
|
||||
#ifndef enumopts_h
|
||||
#define enumopts_h
|
||||
|
||||
struct EnumOpt
|
||||
{
|
||||
const char *pstrOpt;
|
||||
int iValue;
|
||||
};
|
||||
|
||||
#define s(t) extern EnumOpt t##_Opts[];
|
||||
#define c(t, x) /* empty */
|
||||
#define e(t) /* empty */
|
||||
#include "enums.h"
|
||||
|
||||
|
||||
#endif // enumopts_h
|
98
src/muscle/muscle3.8.31/src/enums.h
Normal file
98
src/muscle/muscle3.8.31/src/enums.h
Normal file
@ -0,0 +1,98 @@
|
||||
// enums.h
|
||||
// Define enum types.
|
||||
// Exploit macro hacks to avoid lots of repetetive typing.
|
||||
// Generally I am opposed to macro hacks because of the
|
||||
// highly obscure code that results, but in this case it
|
||||
// makes maintenance much easier and less error-prone.
|
||||
// The idea is that this file can be included in different
|
||||
// places with different definitions of s (Start), c (Case)
|
||||
// and e (End). See types.h.
|
||||
|
||||
s(ALPHA)
|
||||
c(ALPHA, Amino)
|
||||
c(ALPHA, DNA)
|
||||
c(ALPHA, RNA)
|
||||
e(ALPHA)
|
||||
|
||||
s(SEQTYPE)
|
||||
c(SEQTYPE, Protein)
|
||||
c(SEQTYPE, DNA)
|
||||
c(SEQTYPE, RNA)
|
||||
c(SEQTYPE, Auto)
|
||||
e(SEQTYPE)
|
||||
|
||||
s(ROOT)
|
||||
c(ROOT, Pseudo)
|
||||
c(ROOT, MidLongestSpan)
|
||||
c(ROOT, MinAvgLeafDist)
|
||||
e(ROOT)
|
||||
|
||||
s(CLUSTER)
|
||||
c(CLUSTER, UPGMA)
|
||||
c(CLUSTER, UPGMAMax)
|
||||
c(CLUSTER, UPGMAMin)
|
||||
c(CLUSTER, UPGMB)
|
||||
c(CLUSTER, NeighborJoining)
|
||||
e(CLUSTER)
|
||||
|
||||
s(JOIN)
|
||||
c(JOIN, NearestNeighbor)
|
||||
c(JOIN, NeighborJoining)
|
||||
e(JOIN)
|
||||
|
||||
s(LINKAGE)
|
||||
c(LINKAGE, Min)
|
||||
c(LINKAGE, Avg)
|
||||
c(LINKAGE, Max)
|
||||
c(LINKAGE, NeighborJoining)
|
||||
c(LINKAGE, Biased)
|
||||
e(LINKAGE)
|
||||
|
||||
s(DISTANCE)
|
||||
c(DISTANCE, Kmer6_6)
|
||||
c(DISTANCE, Kmer20_3)
|
||||
c(DISTANCE, Kmer20_4)
|
||||
c(DISTANCE, Kbit20_3)
|
||||
c(DISTANCE, Kmer4_6)
|
||||
c(DISTANCE, PctIdKimura)
|
||||
c(DISTANCE, PctIdLog)
|
||||
c(DISTANCE, PWKimura)
|
||||
c(DISTANCE, PWScoreDist)
|
||||
c(DISTANCE, ScoreDist)
|
||||
c(DISTANCE, Edit)
|
||||
e(DISTANCE)
|
||||
|
||||
s(PPSCORE)
|
||||
c(PPSCORE, LE)
|
||||
c(PPSCORE, SP)
|
||||
c(PPSCORE, SV)
|
||||
c(PPSCORE, SPN)
|
||||
e(PPSCORE)
|
||||
|
||||
s(SEQWEIGHT)
|
||||
c(SEQWEIGHT, None)
|
||||
c(SEQWEIGHT, Henikoff)
|
||||
c(SEQWEIGHT, HenikoffPB)
|
||||
c(SEQWEIGHT, GSC)
|
||||
c(SEQWEIGHT, ClustalW)
|
||||
c(SEQWEIGHT, ThreeWay)
|
||||
e(SEQWEIGHT)
|
||||
|
||||
s(OBJSCORE)
|
||||
c(OBJSCORE, SP) // Sum of Pairs of sequences
|
||||
c(OBJSCORE, DP) // Dynamic Programming score
|
||||
c(OBJSCORE, XP) // Cross Pairs = sum of pairs between two MSAs
|
||||
c(OBJSCORE, PS) // sum of Prof-Seq score for all seqs in MSA
|
||||
c(OBJSCORE, SPF) // sum of pairs, fast approximation
|
||||
c(OBJSCORE, SPM) // sp if <= 100 seqs, spf otherwise
|
||||
e(OBJSCORE)
|
||||
|
||||
s(TERMGAPS)
|
||||
c(TERMGAPS, Full)
|
||||
c(TERMGAPS, Half)
|
||||
c(TERMGAPS, Ext)
|
||||
e(TERMGAPS)
|
||||
|
||||
#undef s
|
||||
#undef c
|
||||
#undef e
|
16
src/muscle/muscle3.8.31/src/enumtostr.cpp
Normal file
16
src/muscle/muscle3.8.31/src/enumtostr.cpp
Normal file
@ -0,0 +1,16 @@
|
||||
#include "muscle.h"
|
||||
#include <stdio.h>
|
||||
|
||||
static char szMsg[64];
|
||||
|
||||
// Define XXXToStr(XXX x) functions for each enum type XXX.
|
||||
#define s(t) const char *t##ToStr(t x) { switch (x) { case t##_Undefined: return "Undefined";
|
||||
#define c(t, x) case t##_##x: return #x;
|
||||
#define e(t) } sprintf(szMsg, #t "_%d", x); return szMsg; }
|
||||
#include "enums.h"
|
||||
|
||||
// Define StrToXXX(const char *Str) functions for each enum type XXX.
|
||||
#define s(t) t StrTo##t(const char *Str) { if (0) ;
|
||||
#define c(t, x) else if (0 == stricmp(#x, Str)) return t##_##x;
|
||||
#define e(t) Quit("Invalid value %s for type %s", Str, #t); return t##_Undefined; }
|
||||
#include "enums.h"
|
689
src/muscle/muscle3.8.31/src/estring.cpp
Normal file
689
src/muscle/muscle3.8.31/src/estring.cpp
Normal file
@ -0,0 +1,689 @@
|
||||
#include "muscle.h"
|
||||
#include "pwpath.h"
|
||||
#include "estring.h"
|
||||
#include "seq.h"
|
||||
#include "msa.h"
|
||||
|
||||
/***
|
||||
An "estring" is an edit string that operates on a sequence.
|
||||
An estring is represented as a vector of integers.
|
||||
It is interpreted in order of increasing suffix.
|
||||
A positive value n means copy n letters.
|
||||
A negative value -n means insert n indels.
|
||||
Zero marks the end of the vector.
|
||||
Consecutive entries must have opposite sign, i.e. the
|
||||
shortest possible representation must be used.
|
||||
|
||||
A "tpair" is a traceback path for a pairwise alignment
|
||||
represented as two estrings, one for each sequence.
|
||||
***/
|
||||
|
||||
#define c2(c,d) (((unsigned char) c) << 8 | (unsigned char) d)
|
||||
|
||||
unsigned LengthEstring(const short es[])
|
||||
{
|
||||
unsigned i = 0;
|
||||
while (*es++ != 0)
|
||||
++i;
|
||||
return i;
|
||||
}
|
||||
|
||||
short *EstringNewCopy(const short es[])
|
||||
{
|
||||
unsigned n = LengthEstring(es) + 1;
|
||||
short *esNew = new short[n];
|
||||
memcpy(esNew, es, n*sizeof(short));
|
||||
return esNew;
|
||||
}
|
||||
|
||||
void LogEstring(const short es[])
|
||||
{
|
||||
Log("<");
|
||||
for (unsigned i = 0; es[i] != 0; ++i)
|
||||
{
|
||||
if (i > 0)
|
||||
Log(" ");
|
||||
Log("%d", es[i]);
|
||||
}
|
||||
Log(">");
|
||||
}
|
||||
|
||||
static bool EstringsEq(const short es1[], const short es2[])
|
||||
{
|
||||
for (;;)
|
||||
{
|
||||
if (*es1 != *es2)
|
||||
return false;
|
||||
if (0 == *es1)
|
||||
break;
|
||||
++es1;
|
||||
++es2;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
static void EstringCounts(const short es[], unsigned *ptruSymbols,
|
||||
unsigned *ptruIndels)
|
||||
{
|
||||
unsigned uSymbols = 0;
|
||||
unsigned uIndels = 0;
|
||||
for (unsigned i = 0; es[i] != 0; ++i)
|
||||
{
|
||||
short n = es[i];
|
||||
if (n > 0)
|
||||
uSymbols += n;
|
||||
else if (n < 0)
|
||||
uIndels += -n;
|
||||
}
|
||||
*ptruSymbols = uSymbols;
|
||||
*ptruIndels = uIndels;
|
||||
}
|
||||
|
||||
static char *EstringOp(const short es[], const char s[])
|
||||
{
|
||||
unsigned uSymbols;
|
||||
unsigned uIndels;
|
||||
EstringCounts(es, &uSymbols, &uIndels);
|
||||
assert((unsigned) strlen(s) == uSymbols);
|
||||
char *sout = new char[uSymbols + uIndels + 1];
|
||||
char *psout = sout;
|
||||
for (;;)
|
||||
{
|
||||
int n = *es++;
|
||||
if (0 == n)
|
||||
break;
|
||||
if (n > 0)
|
||||
for (int i = 0; i < n; ++i)
|
||||
*psout++ = *s++;
|
||||
else
|
||||
for (int i = 0; i < -n; ++i)
|
||||
*psout++ = '-';
|
||||
}
|
||||
assert(0 == *s);
|
||||
*psout = 0;
|
||||
return sout;
|
||||
}
|
||||
|
||||
void EstringOp(const short es[], const Seq &sIn, Seq &sOut)
|
||||
{
|
||||
#if DEBUG
|
||||
unsigned uSymbols;
|
||||
unsigned uIndels;
|
||||
EstringCounts(es, &uSymbols, &uIndels);
|
||||
assert(sIn.Length() == uSymbols);
|
||||
#endif
|
||||
sOut.Clear();
|
||||
sOut.SetName(sIn.GetName());
|
||||
int p = 0;
|
||||
for (;;)
|
||||
{
|
||||
int n = *es++;
|
||||
if (0 == n)
|
||||
break;
|
||||
if (n > 0)
|
||||
for (int i = 0; i < n; ++i)
|
||||
{
|
||||
char c = sIn[p++];
|
||||
sOut.push_back(c);
|
||||
}
|
||||
else
|
||||
for (int i = 0; i < -n; ++i)
|
||||
sOut.push_back('-');
|
||||
}
|
||||
}
|
||||
|
||||
unsigned EstringOp(const short es[], const Seq &sIn, MSA &a)
|
||||
{
|
||||
unsigned uSymbols;
|
||||
unsigned uIndels;
|
||||
EstringCounts(es, &uSymbols, &uIndels);
|
||||
assert(sIn.Length() == uSymbols);
|
||||
|
||||
unsigned uColCount = uSymbols + uIndels;
|
||||
|
||||
a.Clear();
|
||||
a.SetSize(1, uColCount);
|
||||
|
||||
a.SetSeqName(0, sIn.GetName());
|
||||
a.SetSeqId(0, sIn.GetId());
|
||||
|
||||
unsigned p = 0;
|
||||
unsigned uColIndex = 0;
|
||||
for (;;)
|
||||
{
|
||||
int n = *es++;
|
||||
if (0 == n)
|
||||
break;
|
||||
if (n > 0)
|
||||
for (int i = 0; i < n; ++i)
|
||||
{
|
||||
char c = sIn[p++];
|
||||
a.SetChar(0, uColIndex++, c);
|
||||
}
|
||||
else
|
||||
for (int i = 0; i < -n; ++i)
|
||||
a.SetChar(0, uColIndex++, '-');
|
||||
}
|
||||
assert(uColIndex == uColCount);
|
||||
return uColCount;
|
||||
}
|
||||
|
||||
void PathToEstrings(const PWPath &Path, short **ptresA, short **ptresB)
|
||||
{
|
||||
// First pass to determine size of estrings esA and esB
|
||||
const unsigned uEdgeCount = Path.GetEdgeCount();
|
||||
if (0 == uEdgeCount)
|
||||
{
|
||||
short *esA = new short[1];
|
||||
short *esB = new short[1];
|
||||
esA[0] = 0;
|
||||
esB[0] = 0;
|
||||
*ptresA = esA;
|
||||
*ptresB = esB;
|
||||
return;
|
||||
}
|
||||
|
||||
unsigned iLengthA = 1;
|
||||
unsigned iLengthB = 1;
|
||||
const char cFirstEdgeType = Path.GetEdge(0).cType;
|
||||
char cPrevEdgeType = cFirstEdgeType;
|
||||
for (unsigned uEdgeIndex = 1; uEdgeIndex < uEdgeCount; ++uEdgeIndex)
|
||||
{
|
||||
const PWEdge &Edge = Path.GetEdge(uEdgeIndex);
|
||||
char cEdgeType = Edge.cType;
|
||||
|
||||
switch (c2(cPrevEdgeType, cEdgeType))
|
||||
{
|
||||
case c2('M', 'M'):
|
||||
case c2('D', 'D'):
|
||||
case c2('I', 'I'):
|
||||
break;
|
||||
|
||||
case c2('D', 'M'):
|
||||
case c2('M', 'D'):
|
||||
++iLengthB;
|
||||
break;
|
||||
|
||||
case c2('I', 'M'):
|
||||
case c2('M', 'I'):
|
||||
++iLengthA;
|
||||
break;
|
||||
|
||||
case c2('I', 'D'):
|
||||
case c2('D', 'I'):
|
||||
++iLengthB;
|
||||
++iLengthA;
|
||||
break;
|
||||
|
||||
default:
|
||||
assert(false);
|
||||
}
|
||||
cPrevEdgeType = cEdgeType;
|
||||
}
|
||||
|
||||
// Pass2 for seq A
|
||||
{
|
||||
short *esA = new short[iLengthA+1];
|
||||
unsigned iA = 0;
|
||||
switch (Path.GetEdge(0).cType)
|
||||
{
|
||||
case 'M':
|
||||
case 'D':
|
||||
esA[0] = 1;
|
||||
break;
|
||||
|
||||
case 'I':
|
||||
esA[0] = -1;
|
||||
break;
|
||||
|
||||
default:
|
||||
assert(false);
|
||||
}
|
||||
|
||||
char cPrevEdgeType = cFirstEdgeType;
|
||||
for (unsigned uEdgeIndex = 1; uEdgeIndex < uEdgeCount; ++uEdgeIndex)
|
||||
{
|
||||
const PWEdge &Edge = Path.GetEdge(uEdgeIndex);
|
||||
char cEdgeType = Edge.cType;
|
||||
|
||||
switch (c2(cPrevEdgeType, cEdgeType))
|
||||
{
|
||||
case c2('M', 'M'):
|
||||
case c2('D', 'D'):
|
||||
case c2('D', 'M'):
|
||||
case c2('M', 'D'):
|
||||
++(esA[iA]);
|
||||
break;
|
||||
|
||||
case c2('I', 'D'):
|
||||
case c2('I', 'M'):
|
||||
++iA;
|
||||
esA[iA] = 1;
|
||||
break;
|
||||
|
||||
case c2('M', 'I'):
|
||||
case c2('D', 'I'):
|
||||
++iA;
|
||||
esA[iA] = -1;
|
||||
break;
|
||||
|
||||
case c2('I', 'I'):
|
||||
--(esA[iA]);
|
||||
break;
|
||||
|
||||
default:
|
||||
assert(false);
|
||||
}
|
||||
|
||||
cPrevEdgeType = cEdgeType;
|
||||
}
|
||||
assert(iA == iLengthA - 1);
|
||||
esA[iLengthA] = 0;
|
||||
*ptresA = esA;
|
||||
}
|
||||
|
||||
{
|
||||
// Pass2 for seq B
|
||||
short *esB = new short[iLengthB+1];
|
||||
unsigned iB = 0;
|
||||
switch (Path.GetEdge(0).cType)
|
||||
{
|
||||
case 'M':
|
||||
case 'I':
|
||||
esB[0] = 1;
|
||||
break;
|
||||
|
||||
case 'D':
|
||||
esB[0] = -1;
|
||||
break;
|
||||
|
||||
default:
|
||||
assert(false);
|
||||
}
|
||||
|
||||
char cPrevEdgeType = cFirstEdgeType;
|
||||
for (unsigned uEdgeIndex = 1; uEdgeIndex < uEdgeCount; ++uEdgeIndex)
|
||||
{
|
||||
const PWEdge &Edge = Path.GetEdge(uEdgeIndex);
|
||||
char cEdgeType = Edge.cType;
|
||||
|
||||
switch (c2(cPrevEdgeType, cEdgeType))
|
||||
{
|
||||
case c2('M', 'M'):
|
||||
case c2('I', 'I'):
|
||||
case c2('I', 'M'):
|
||||
case c2('M', 'I'):
|
||||
++(esB[iB]);
|
||||
break;
|
||||
|
||||
case c2('D', 'I'):
|
||||
case c2('D', 'M'):
|
||||
++iB;
|
||||
esB[iB] = 1;
|
||||
break;
|
||||
|
||||
case c2('M', 'D'):
|
||||
case c2('I', 'D'):
|
||||
++iB;
|
||||
esB[iB] = -1;
|
||||
break;
|
||||
|
||||
case c2('D', 'D'):
|
||||
--(esB[iB]);
|
||||
break;
|
||||
|
||||
default:
|
||||
assert(false);
|
||||
}
|
||||
|
||||
cPrevEdgeType = cEdgeType;
|
||||
}
|
||||
assert(iB == iLengthB - 1);
|
||||
esB[iLengthB] = 0;
|
||||
*ptresB = esB;
|
||||
}
|
||||
|
||||
#if DEBUG
|
||||
{
|
||||
const PWEdge &LastEdge = Path.GetEdge(uEdgeCount - 1);
|
||||
unsigned uSymbols;
|
||||
unsigned uIndels;
|
||||
EstringCounts(*ptresA, &uSymbols, &uIndels);
|
||||
assert(uSymbols == LastEdge.uPrefixLengthA);
|
||||
assert(uSymbols + uIndels == uEdgeCount);
|
||||
|
||||
EstringCounts(*ptresB, &uSymbols, &uIndels);
|
||||
assert(uSymbols == LastEdge.uPrefixLengthB);
|
||||
assert(uSymbols + uIndels == uEdgeCount);
|
||||
|
||||
PWPath TmpPath;
|
||||
EstringsToPath(*ptresA, *ptresB, TmpPath);
|
||||
TmpPath.AssertEqual(Path);
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
void EstringsToPath(const short esA[], const short esB[], PWPath &Path)
|
||||
{
|
||||
Path.Clear();
|
||||
unsigned iA = 0;
|
||||
unsigned iB = 0;
|
||||
int nA = esA[iA++];
|
||||
int nB = esB[iB++];
|
||||
unsigned uPrefixLengthA = 0;
|
||||
unsigned uPrefixLengthB = 0;
|
||||
for (;;)
|
||||
{
|
||||
char cType;
|
||||
if (nA > 0)
|
||||
{
|
||||
if (nB > 0)
|
||||
{
|
||||
cType = 'M';
|
||||
--nA;
|
||||
--nB;
|
||||
}
|
||||
else if (nB < 0)
|
||||
{
|
||||
cType = 'D';
|
||||
--nA;
|
||||
++nB;
|
||||
}
|
||||
else
|
||||
assert(false);
|
||||
}
|
||||
else if (nA < 0)
|
||||
{
|
||||
if (nB > 0)
|
||||
{
|
||||
cType = 'I';
|
||||
++nA;
|
||||
--nB;
|
||||
}
|
||||
else
|
||||
assert(false);
|
||||
}
|
||||
else
|
||||
assert(false);
|
||||
|
||||
switch (cType)
|
||||
{
|
||||
case 'M':
|
||||
++uPrefixLengthA;
|
||||
++uPrefixLengthB;
|
||||
break;
|
||||
case 'D':
|
||||
++uPrefixLengthA;
|
||||
break;
|
||||
case 'I':
|
||||
++uPrefixLengthB;
|
||||
break;
|
||||
}
|
||||
|
||||
PWEdge Edge;
|
||||
Edge.cType = cType;
|
||||
Edge.uPrefixLengthA = uPrefixLengthA;
|
||||
Edge.uPrefixLengthB = uPrefixLengthB;
|
||||
Path.AppendEdge(Edge);
|
||||
|
||||
if (nA == 0)
|
||||
{
|
||||
if (0 == esA[iA])
|
||||
{
|
||||
assert(0 == esB[iB]);
|
||||
break;
|
||||
}
|
||||
nA = esA[iA++];
|
||||
}
|
||||
if (nB == 0)
|
||||
nB = esB[iB++];
|
||||
}
|
||||
}
|
||||
|
||||
/***
|
||||
Multiply two estrings to make a third estring.
|
||||
The product of two estrings e1*e2 is defined to be
|
||||
the estring that produces the same result as applying
|
||||
e1 then e2. Multiplication is not commutative. In fact,
|
||||
the reversed order is undefined unless both estrings
|
||||
consist of a single, identical, positive entry.
|
||||
A primary motivation for using estrings is that
|
||||
multiplication is very fast, reducing the time
|
||||
needed to construct the root alignment.
|
||||
|
||||
Example
|
||||
|
||||
<-1,3>(XXX) = -XXX
|
||||
<2,-1,2>(-XXX) = -X-XX
|
||||
|
||||
Therefore,
|
||||
|
||||
<-1,3>*<2,-1,2> = <-1,1,-1,2>
|
||||
***/
|
||||
|
||||
static bool CanMultiplyEstrings(const short es1[], const short es2[])
|
||||
{
|
||||
unsigned uSymbols1;
|
||||
unsigned uSymbols2;
|
||||
unsigned uIndels1;
|
||||
unsigned uIndels2;
|
||||
EstringCounts(es1, &uSymbols1, &uIndels1);
|
||||
EstringCounts(es2, &uSymbols2, &uIndels2);
|
||||
return uSymbols1 + uIndels1 == uSymbols2;
|
||||
}
|
||||
|
||||
static inline void AppendGaps(short esp[], int &ip, int n)
|
||||
{
|
||||
if (-1 == ip)
|
||||
esp[++ip] = n;
|
||||
else if (esp[ip] < 0)
|
||||
esp[ip] += n;
|
||||
else
|
||||
esp[++ip] = n;
|
||||
}
|
||||
|
||||
static inline void AppendSymbols(short esp[], int &ip, int n)
|
||||
{
|
||||
if (-1 == ip)
|
||||
esp[++ip] = n;
|
||||
else if (esp[ip] > 0)
|
||||
esp[ip] += n;
|
||||
else
|
||||
esp[++ip] = n;
|
||||
}
|
||||
|
||||
void MulEstrings(const short es1[], const short es2[], short esp[])
|
||||
{
|
||||
assert(CanMultiplyEstrings(es1, es2));
|
||||
|
||||
unsigned i1 = 0;
|
||||
int ip = -1;
|
||||
int n1 = es1[i1++];
|
||||
for (unsigned i2 = 0; ; ++i2)
|
||||
{
|
||||
int n2 = es2[i2];
|
||||
if (0 == n2)
|
||||
break;
|
||||
if (n2 > 0)
|
||||
{
|
||||
for (;;)
|
||||
{
|
||||
if (n1 < 0)
|
||||
{
|
||||
if (n2 > -n1)
|
||||
{
|
||||
AppendGaps(esp, ip, n1);
|
||||
n2 += n1;
|
||||
n1 = es1[i1++];
|
||||
}
|
||||
else if (n2 == -n1)
|
||||
{
|
||||
AppendGaps(esp, ip, n1);
|
||||
n1 = es1[i1++];
|
||||
break;
|
||||
}
|
||||
else
|
||||
{
|
||||
assert(n2 < -n1);
|
||||
AppendGaps(esp, ip, -n2);
|
||||
n1 += n2;
|
||||
break;
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
assert(n1 > 0);
|
||||
if (n2 > n1)
|
||||
{
|
||||
AppendSymbols(esp, ip, n1);
|
||||
n2 -= n1;
|
||||
n1 = es1[i1++];
|
||||
}
|
||||
else if (n2 == n1)
|
||||
{
|
||||
AppendSymbols(esp, ip, n1);
|
||||
n1 = es1[i1++];
|
||||
break;
|
||||
}
|
||||
else
|
||||
{
|
||||
assert(n2 < n1);
|
||||
AppendSymbols(esp, ip, n2);
|
||||
n1 -= n2;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
assert(n2 < 0);
|
||||
AppendGaps(esp, ip, n2);
|
||||
}
|
||||
}
|
||||
esp[++ip] = 0;
|
||||
|
||||
#if DEBUG
|
||||
{
|
||||
int MaxLen = (int) (LengthEstring(es1) + LengthEstring(es2) + 1);
|
||||
assert(ip < MaxLen);
|
||||
if (ip >= 2)
|
||||
for (int i = 0; i < ip - 2; ++i)
|
||||
{
|
||||
if (!(esp[i] > 0 && esp[i+1] < 0 || esp[i] < 0 && esp[i+1] > 0))
|
||||
{
|
||||
Log("Bad result of MulEstring: ");
|
||||
LogEstring(esp);
|
||||
Quit("Assert failed (alternating signs)");
|
||||
}
|
||||
}
|
||||
unsigned uSymbols1;
|
||||
unsigned uSymbols2;
|
||||
unsigned uSymbolsp;
|
||||
unsigned uIndels1;
|
||||
unsigned uIndels2;
|
||||
unsigned uIndelsp;
|
||||
EstringCounts(es1, &uSymbols1, &uIndels1);
|
||||
EstringCounts(es2, &uSymbols2, &uIndels2);
|
||||
EstringCounts(esp, &uSymbolsp, &uIndelsp);
|
||||
if (uSymbols1 + uIndels1 != uSymbols2)
|
||||
{
|
||||
Log("Bad result of MulEstring: ");
|
||||
LogEstring(esp);
|
||||
Quit("Assert failed (counts1 %u %u %u)",
|
||||
uSymbols1, uIndels1, uSymbols2);
|
||||
}
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
static void test(const short es1[], const short es2[], const short esa[])
|
||||
{
|
||||
unsigned uSymbols1;
|
||||
unsigned uSymbols2;
|
||||
unsigned uIndels1;
|
||||
unsigned uIndels2;
|
||||
EstringCounts(es1, &uSymbols1, &uIndels1);
|
||||
EstringCounts(es2, &uSymbols2, &uIndels2);
|
||||
|
||||
char s[4096];
|
||||
memset(s, 'X', sizeof(s));
|
||||
s[uSymbols1] = 0;
|
||||
|
||||
char *s1 = EstringOp(es1, s);
|
||||
char *s12 = EstringOp(es2, s1);
|
||||
|
||||
memset(s, 'X', sizeof(s));
|
||||
s[uSymbols2] = 0;
|
||||
char *s2 = EstringOp(es2, s);
|
||||
|
||||
Log("%s * %s = %s\n", s1, s2, s12);
|
||||
|
||||
LogEstring(es1);
|
||||
Log(" * ");
|
||||
LogEstring(es2);
|
||||
Log(" = ");
|
||||
LogEstring(esa);
|
||||
Log("\n");
|
||||
|
||||
short esp[4096];
|
||||
MulEstrings(es1, es2, esp);
|
||||
LogEstring(esp);
|
||||
if (!EstringsEq(esp, esa))
|
||||
Log(" *ERROR* ");
|
||||
Log("\n");
|
||||
|
||||
memset(s, 'X', sizeof(s));
|
||||
s[uSymbols1] = 0;
|
||||
char *sp = EstringOp(esp, s);
|
||||
Log("%s\n", sp);
|
||||
Log("\n==========\n\n");
|
||||
}
|
||||
|
||||
void TestEstrings()
|
||||
{
|
||||
SetListFileName("c:\\tmp\\muscle.log", false);
|
||||
//{
|
||||
//short es1[] = { -1, 1, -1, 0 };
|
||||
//short es2[] = { 1, -1, 2, 0 };
|
||||
//short esa[] = { -2, 1, -1, 0 };
|
||||
//test(es1, es2, esa);
|
||||
//}
|
||||
//{
|
||||
//short es1[] = { 2, -1, 2, 0 };
|
||||
//short es2[] = { 1, -1, 3, -1, 1, 0 };
|
||||
//short esa[] = { 1, -1, 1, -1, 1, -1, 1, 0 };
|
||||
//test(es1, es2, esa);
|
||||
//}
|
||||
//{
|
||||
//short es1[] = { -1, 3, 0 };
|
||||
//short es2[] = { 2, -1, 2, 0 };
|
||||
//short esa[] = { -1, 1, -1, 2, 0 };
|
||||
//test(es1, es2, esa);
|
||||
//}
|
||||
//{
|
||||
//short es1[] = { -1, 1, -1, 1, 0};
|
||||
//short es2[] = { 4, 0 };
|
||||
//short esa[] = { -1, 1, -1, 1, 0};
|
||||
//test(es1, es2, esa);
|
||||
//}
|
||||
//{
|
||||
//short es1[] = { 1, -1, 1, -1, 0};
|
||||
//short es2[] = { 4, 0 };
|
||||
//short esa[] = { 1, -1, 1, -1, 0};
|
||||
//test(es1, es2, esa);
|
||||
//}
|
||||
//{
|
||||
//short es1[] = { 1, -1, 1, -1, 0};
|
||||
//short es2[] = { -1, 4, -1, 0 };
|
||||
//short esa[] = { -1, 1, -1, 1, -2, 0};
|
||||
//test(es1, es2, esa);
|
||||
//}
|
||||
{
|
||||
short es1[] = { 106, -77, 56, -2, 155, -3, 123, -2, 0};
|
||||
short es2[] = { 50, -36, 34, -3, 12, -6, 1, -6, 18, -17, 60, -5, 349, -56, 0 };
|
||||
short esa[] = { 0 };
|
||||
test(es1, es2, esa);
|
||||
}
|
||||
exit(0);
|
||||
}
|
13
src/muscle/muscle3.8.31/src/estring.h
Normal file
13
src/muscle/muscle3.8.31/src/estring.h
Normal file
@ -0,0 +1,13 @@
|
||||
#ifndef pathsum_h
|
||||
#define pathsum_h
|
||||
|
||||
void PathToEstrings(const PWPath &Path, short **ptresA, short **ptresB);
|
||||
void EstringsToPath(const short esA[], const short esB[], PWPath &Path);
|
||||
void MulEstrings(const short es1[], const short es2[], short esp[]);
|
||||
void EstringOp(const short es[], const Seq &sIn, Seq &sOut);
|
||||
unsigned EstringOp(const short es[], const Seq &sIn, MSA &a);
|
||||
void LogEstring(const short es[]);
|
||||
unsigned LengthEstring(const short es[]);
|
||||
short *EstringNewCopy(const short es[]);
|
||||
|
||||
#endif // pathsum_h
|
56
src/muscle/muscle3.8.31/src/fasta.cpp
Normal file
56
src/muscle/muscle3.8.31/src/fasta.cpp
Normal file
@ -0,0 +1,56 @@
|
||||
#include "muscle.h"
|
||||
#include <stdio.h>
|
||||
#include <ctype.h>
|
||||
#include "msa.h"
|
||||
#include "textfile.h"
|
||||
|
||||
const unsigned FASTA_BLOCK = 60;
|
||||
|
||||
void MSA::FromFASTAFile(TextFile &File)
|
||||
{
|
||||
Clear();
|
||||
|
||||
FILE *f = File.GetStdioFile();
|
||||
|
||||
unsigned uSeqCount = 0;
|
||||
unsigned uColCount = uInsane;
|
||||
for (;;)
|
||||
{
|
||||
char *Label;
|
||||
unsigned uSeqLength;
|
||||
char *SeqData = GetFastaSeq(f, &uSeqLength, &Label, false);
|
||||
if (0 == SeqData)
|
||||
break;
|
||||
AppendSeq(SeqData, uSeqLength, Label);
|
||||
}
|
||||
}
|
||||
|
||||
void MSA::ToFASTAFile(TextFile &File) const
|
||||
{
|
||||
const unsigned uColCount = GetColCount();
|
||||
assert(uColCount > 0);
|
||||
const unsigned uLinesPerSeq = (GetColCount() - 1)/FASTA_BLOCK + 1;
|
||||
const unsigned uSeqCount = GetSeqCount();
|
||||
|
||||
for (unsigned uSeqIndex = 0; uSeqIndex < uSeqCount; ++uSeqIndex)
|
||||
{
|
||||
File.PutString(">");
|
||||
File.PutString(GetSeqName(uSeqIndex));
|
||||
File.PutString("\n");
|
||||
|
||||
unsigned n = 0;
|
||||
for (unsigned uLine = 0; uLine < uLinesPerSeq; ++uLine)
|
||||
{
|
||||
unsigned uLetters = uColCount - uLine*FASTA_BLOCK;
|
||||
if (uLetters > FASTA_BLOCK)
|
||||
uLetters = FASTA_BLOCK;
|
||||
for (unsigned i = 0; i < uLetters; ++i)
|
||||
{
|
||||
char c = GetChar(uSeqIndex, n);
|
||||
File.PutChar(c);
|
||||
++n;
|
||||
}
|
||||
File.PutChar('\n');
|
||||
}
|
||||
}
|
||||
}
|
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user