6f0ba35953ccb6caa718309c2cd52e46d536ddfa

@@ -0,0 +1,30 @@
#---------------------------------------------------------------
# $Id: $
# ---------------------------------------------------------------
# @file: Makefile
# @desc: makefile for lxpack
#
# @history:
# @history:
# @+	    <Gloup> : Apr 97 : Created
# @+	    <Gloup> : Mar 02 : Updated for LXxware
#
# @note: should be processed with gnu compatible make
# @note: helixware_compatible
#
# @end:
# ---------------------------------------------------------------
#
include ../../config/auto.conf

DIRS = muscle3.8.31

include ../../config/targets/propagate.targ

include ../../config/targets/help.targ

all::
	$(MAKE) ACTION=$@ _action

clean::
	$(MAKE) -C lxpack portclean
@@ -0,0 +1,30 @@
#---------------------------------------------------------------
# $Id: $
# ---------------------------------------------------------------
# @file: Makefile
# @desc: makefile for lxpack
#
# @history:
# @history:
# @+	    <Gloup> : Apr 97 : Created
# @+	    <Gloup> : Mar 02 : Updated for LXxware
#
# @note: should be processed with gnu compatible make
# @note: helixware_compatible
#
# @end:
# ---------------------------------------------------------------
#
include ../../../config/auto.conf

DIRS = src

include ../../../config/targets/propagate.targ

include ../../../config/targets/help.targ

all::
	$(MAKE) ACTION=$@ _action

clean::
	$(MAKE) -C lxpack portclean
@@ -0,0 +1,11 @@
include ../../../../config/auto.conf

all: muscle install 

muscle:
	chmod +x ./mk
	(export CXX=$(CXX) && ./mk)

install:
	cp muscle $(BINDIR)
	
@@ -0,0 +1,27 @@
MUSCLE v3.0 source code README
------------------------------

http://www.drive5.com/muscle

This version of MUSCLE was built and tested on two platforms:
Windows XP and Red Hat Linux 8.0.

On Windows, I used Microsoft Visual C++ .Net, which I find
to be the best C++ compile / edit / test environment I've
tried on any platform. The Microsoft project file is
muscle.vcproj.

The Linux make file is Makefile. This is a very simple-minded
make file (because I am a Linux development novice), so should
be easy to understand. By default, it uses shared libraries,
but I found this to give problems when copying between
different Linux versions. The fix was to use the linker
flag -lm static (commented out), which gives a much bigger
but more portable binary. The posted binary was linked with
static libraries.

The source code was not written to be maintained by anyone
but me, so the usual apologies and caveats apply.

Bob Edgar,
January 2004
@@ -0,0 +1,802 @@
#include "muscle.h"
#include "msa.h"
#include "pwpath.h"
#include "profile.h"

#define	TRACE	0

static void LogPP(const ProfPos &PP)
	{
	Log("ResidueGroup   %u\n", PP.m_uResidueGroup);
	Log("AllGaps      %d\n", PP.m_bAllGaps);
	Log("Occ          %.3g\n", PP.m_fOcc);
	Log("LL=%.3g LG=%.3g GL=%.3g GG=%.3g\n", PP.m_LL, PP.m_LG, PP.m_GL, PP.m_GG);
	Log("Freqs        ");
	for (unsigned i = 0; i < 20; ++i)
		if (PP.m_fcCounts[i] > 0)
			Log("%c=%.3g ", LetterToChar(i), PP.m_fcCounts[i]);
	Log("\n");
	}

static void AssertProfPosEq(const ProfPos *PA, const ProfPos *PB, unsigned i)
	{
	const ProfPos &PPA = PA[i];
	const ProfPos &PPB = PB[i];
#define	eq(x)	if (PPA.m_##x != PPB.m_##x) { LogPP(PPA); LogPP(PPB); Quit("AssertProfPosEq." #x); }
#define be(x)	if (!BTEq(PPA.m_##x, PPB.m_##x)) { LogPP(PPA); LogPP(PPB); Quit("AssertProfPosEq." #x); }
	eq(bAllGaps)
	eq(uResidueGroup)

	be(LL)
	be(LG)
	be(GL)
	be(GG)
	be(fOcc)
	be(scoreGapOpen)
	be(scoreGapClose)

	for (unsigned j = 0; j < 20; ++j)
		{
#define	eqj(x)	if (PPA.m_##x != PPB.m_##x) Quit("AssertProfPosEq j=%u " #x, j);
#define bej(x)	if (!BTEq(PPA.m_##x, PPB.m_##x)) Quit("AssertProfPosEq j=%u " #x, j);
		bej(fcCounts[j]);
//		eqj(uSortOrder[j]) // may differ due to ties, don't check?
		bej(AAScores[j])
#undef eqj
#undef bej
		}
#undef eq
#undef be
	}

void AssertProfsEq(const ProfPos *PA, unsigned uLengthA, const ProfPos *PB,
  unsigned uLengthB)
	{
	if (uLengthA != uLengthB)
		Quit("AssertProfsEq: lengths differ %u %u", uLengthA, uLengthB);
	for (unsigned i = 0; i < uLengthB; ++i)
		AssertProfPosEq(PA, PB, i);
	}

#if	DEBUG
static void ValidateProf(const ProfPos *Prof, unsigned uLength)
	{
	for (unsigned i = 0; i < uLength; ++i)
		{
		const ProfPos &PP = Prof[i];

		FCOUNT s1 = PP.m_LL + PP.m_LG + PP.m_GL + PP.m_GG;
		assert(BTEq(s1, 1.0));

		if (i > 0)
			{
			const ProfPos &PPPrev = Prof[i-1];
			FCOUNT s2 = PPPrev.m_LL + PPPrev.m_GL;
			FCOUNT s3 = PP.m_LL + PP.m_LG;
			assert(BTEq(s2, s3));
			}
		if (i < uLength - 1)
			{
			const ProfPos &PPNext = Prof[i+1];
			FCOUNT s4 = PP.m_LL + PP.m_GL;
			FCOUNT s5 = PPNext.m_LL + PPNext.m_LG;
			assert(BTEq(s4, s5));
			}
		}
	}
#else
#define ValidateProf(Prof, Length)	/* empty */
#endif

static void ScoresFromFreqsPos(ProfPos *Prof, unsigned uLength, unsigned uPos)
	{
	ProfPos &PP = Prof[uPos];
	SortCounts(PP.m_fcCounts, PP.m_uSortOrder);
	PP.m_uResidueGroup = ResidueGroupFromFCounts(PP.m_fcCounts);

// "Occupancy"
	PP.m_fOcc = PP.m_LL + PP.m_GL;

// Frequency of gap-opens in this position (i)
// Gap open 	= letter in i-1 and gap in i
//				= iff LG in i
	FCOUNT fcOpen = PP.m_LG;

// Frequency of gap-closes in this position
// Gap close	= gap in i and letter in i+1
//				= iff GL in i+1
	FCOUNT fcClose;
	if (uPos + 1 < uLength)
		fcClose = Prof[uPos + 1].m_GL;
	else
		fcClose = PP.m_GG + PP.m_LG;

	PP.m_scoreGapOpen = (SCORE) ((1.0 - fcOpen)*g_scoreGapOpen/2.0);
	PP.m_scoreGapClose = (SCORE) ((1.0 - fcClose)*g_scoreGapOpen/2.0);
#if	DOUBLE_AFFINE
	PP.m_scoreGapOpen2 = (SCORE) ((1.0 - fcOpen)*g_scoreGapOpen2/2.0);
	PP.m_scoreGapClose2 = (SCORE) ((1.0 - fcClose)*g_scoreGapOpen2/2.0);
#endif

	for (unsigned i = 0; i < g_AlphaSize; ++i)
		{
		SCORE scoreSum = 0;
		for (unsigned j = 0; j < g_AlphaSize; ++j)
			scoreSum += PP.m_fcCounts[j]*(*g_ptrScoreMatrix)[i][j];
		PP.m_AAScores[i] = scoreSum;
		}
	}

void ProfScoresFromFreqs(ProfPos *Prof, unsigned uLength)
	{
	for (unsigned i = 0; i < uLength; ++i)
		ScoresFromFreqsPos(Prof, uLength, i);
	}

static void AppendDelete(const MSA &msaA, unsigned &uColIndexA,
  unsigned uSeqCountA, unsigned uSeqCountB, MSA &msaCombined,
  unsigned &uColIndexCombined)
	{
#if	TRACE
	Log("AppendDelete ColIxA=%u ColIxCmb=%u\n",
	  uColIndexA, uColIndexCombined);
#endif
	for (unsigned uSeqIndexA = 0; uSeqIndexA < uSeqCountA; ++uSeqIndexA)
		{
		char c = msaA.GetChar(uSeqIndexA, uColIndexA);
		msaCombined.SetChar(uSeqIndexA, uColIndexCombined, c);
		}

	for (unsigned uSeqIndexB = 0; uSeqIndexB < uSeqCountB; ++uSeqIndexB)
		msaCombined.SetChar(uSeqCountA + uSeqIndexB, uColIndexCombined, '-');

	++uColIndexCombined;
	++uColIndexA;
	}

static void AppendInsert(const MSA &msaB, unsigned &uColIndexB,
  unsigned uSeqCountA, unsigned uSeqCountB, MSA &msaCombined,
  unsigned &uColIndexCombined)
	{
#if	TRACE
	Log("AppendInsert ColIxB=%u ColIxCmb=%u\n",
	  uColIndexB, uColIndexCombined);
#endif
	for (unsigned uSeqIndexA = 0; uSeqIndexA < uSeqCountA; ++uSeqIndexA)
		msaCombined.SetChar(uSeqIndexA, uColIndexCombined, '-');

	for (unsigned uSeqIndexB = 0; uSeqIndexB < uSeqCountB; ++uSeqIndexB)
		{
		char c = msaB.GetChar(uSeqIndexB, uColIndexB);
		msaCombined.SetChar(uSeqCountA + uSeqIndexB, uColIndexCombined, c);
		}

	++uColIndexCombined;
	++uColIndexB;
	}

static void AppendTplInserts(const MSA &msaA, unsigned &uColIndexA, unsigned uColCountA,
  const MSA &msaB, unsigned &uColIndexB, unsigned uColCountB, unsigned uSeqCountA,
  unsigned uSeqCountB, MSA &msaCombined, unsigned &uColIndexCombined)
	{
#if	TRACE
	Log("AppendTplInserts ColIxA=%u ColIxB=%u ColIxCmb=%u\n",
	  uColIndexA, uColIndexB, uColIndexCombined);
#endif
	const unsigned uLengthA = msaA.GetColCount();
	const unsigned uLengthB = msaB.GetColCount();

	unsigned uNewColCount = uColCountA;
	if (uColCountB > uNewColCount)
		uNewColCount = uColCountB;

	for (unsigned n = 0; n < uColCountA; ++n)
		{
		for (unsigned uSeqIndexA = 0; uSeqIndexA < uSeqCountA; ++uSeqIndexA)
			{
			char c = msaA.GetChar(uSeqIndexA, uColIndexA + n);
			c = UnalignChar(c);
			msaCombined.SetChar(uSeqIndexA, uColIndexCombined + n, c);
			}
		}
	for (unsigned n = uColCountA; n < uNewColCount; ++n)
		{
		for (unsigned uSeqIndexA = 0; uSeqIndexA < uSeqCountA; ++uSeqIndexA)
			msaCombined.SetChar(uSeqIndexA, uColIndexCombined + n, '.');
		}

	for (unsigned n = 0; n < uColCountB; ++n)
		{
		for (unsigned uSeqIndexB = 0; uSeqIndexB < uSeqCountB; ++uSeqIndexB)
			{
			char c = msaB.GetChar(uSeqIndexB, uColIndexB + n);
			c = UnalignChar(c);
			msaCombined.SetChar(uSeqCountA + uSeqIndexB, uColIndexCombined + n, c);
			}
		}
	for (unsigned n = uColCountB; n < uNewColCount; ++n)
		{
		for (unsigned uSeqIndexB = 0; uSeqIndexB < uSeqCountB; ++uSeqIndexB)
			msaCombined.SetChar(uSeqCountA + uSeqIndexB, uColIndexCombined + n, '.');
		}

	uColIndexCombined += uNewColCount;
	uColIndexA += uColCountA;
	uColIndexB += uColCountB;
	}

static void AppendMatch(const MSA &msaA, unsigned &uColIndexA, const MSA &msaB,
  unsigned &uColIndexB, unsigned uSeqCountA, unsigned uSeqCountB,
  MSA &msaCombined, unsigned &uColIndexCombined)
	{
#if	TRACE
	Log("AppendMatch ColIxA=%u ColIxB=%u ColIxCmb=%u\n",
	  uColIndexA, uColIndexB, uColIndexCombined);
#endif

	for (unsigned uSeqIndexA = 0; uSeqIndexA < uSeqCountA; ++uSeqIndexA)
		{
		char c = msaA.GetChar(uSeqIndexA, uColIndexA);
		msaCombined.SetChar(uSeqIndexA, uColIndexCombined, c);
		}

	for (unsigned uSeqIndexB = 0; uSeqIndexB < uSeqCountB; ++uSeqIndexB)
		{
		char c = msaB.GetChar(uSeqIndexB, uColIndexB);
		msaCombined.SetChar(uSeqCountA + uSeqIndexB, uColIndexCombined, c);
		}

	++uColIndexA;
	++uColIndexB;
	++uColIndexCombined;
	}

void AlignTwoMSAsGivenPath(const PWPath &Path, const MSA &msaA, const MSA &msaB,
  MSA &msaCombined)
	{
	msaCombined.Clear();

#if	TRACE
	Log("FastAlignProfiles\n");
	Log("Template A:\n");
	msaA.LogMe();
	Log("Template B:\n");
	msaB.LogMe();
#endif

	const unsigned uColCountA = msaA.GetColCount();
	const unsigned uColCountB = msaB.GetColCount();

	const unsigned uSeqCountA = msaA.GetSeqCount();
	const unsigned uSeqCountB = msaB.GetSeqCount();

	msaCombined.SetSeqCount(uSeqCountA + uSeqCountB);

// Copy sequence names into combined MSA
	for (unsigned uSeqIndexA = 0; uSeqIndexA < uSeqCountA; ++uSeqIndexA)
		{
		msaCombined.SetSeqName(uSeqIndexA, msaA.GetSeqName(uSeqIndexA));
		msaCombined.SetSeqId(uSeqIndexA, msaA.GetSeqId(uSeqIndexA));
		}

	for (unsigned uSeqIndexB = 0; uSeqIndexB < uSeqCountB; ++uSeqIndexB)
		{
		msaCombined.SetSeqName(uSeqCountA + uSeqIndexB, msaB.GetSeqName(uSeqIndexB));
		msaCombined.SetSeqId(uSeqCountA + uSeqIndexB, msaB.GetSeqId(uSeqIndexB));
		}

	unsigned uColIndexA = 0;
	unsigned uColIndexB = 0;
	unsigned uColIndexCombined = 0;
	const unsigned uEdgeCount = Path.GetEdgeCount();
	for (unsigned uEdgeIndex = 0; uEdgeIndex < uEdgeCount; ++uEdgeIndex)
		{
		const PWEdge &Edge = Path.GetEdge(uEdgeIndex);
#if	TRACE
		Log("\nEdge %u %c%u.%u\n",
		  uEdgeIndex,
		  Edge.cType,
		  Edge.uPrefixLengthA,
		  Edge.uPrefixLengthB);
#endif
		const char cType = Edge.cType;
		const unsigned uPrefixLengthA = Edge.uPrefixLengthA;
		unsigned uColCountA = 0;
		if (uPrefixLengthA > 0)
			{
			const unsigned uNodeIndexA = uPrefixLengthA - 1;
			const unsigned uTplColIndexA = uNodeIndexA;
			if (uTplColIndexA > uColIndexA)
				uColCountA = uTplColIndexA - uColIndexA;
			}

		const unsigned uPrefixLengthB = Edge.uPrefixLengthB;
		unsigned uColCountB = 0;
		if (uPrefixLengthB > 0)
			{
			const unsigned uNodeIndexB = uPrefixLengthB - 1;
			const unsigned uTplColIndexB = uNodeIndexB;
			if (uTplColIndexB > uColIndexB)
				uColCountB = uTplColIndexB - uColIndexB;
			}

// TODO: This code looks like a hangover from HMM estimation -- can we delete it?
		assert(uColCountA == 0);
		assert(uColCountB == 0);
		AppendTplInserts(msaA, uColIndexA, uColCountA, msaB, uColIndexB, uColCountB,
		  uSeqCountA, uSeqCountB, msaCombined, uColIndexCombined);

		switch (cType)
			{
		case 'M':
			{
			assert(uPrefixLengthA > 0);
			assert(uPrefixLengthB > 0);
			const unsigned uColA = uPrefixLengthA - 1;
			const unsigned uColB = uPrefixLengthB - 1;
			assert(uColIndexA == uColA);
			assert(uColIndexB == uColB);
			AppendMatch(msaA, uColIndexA, msaB, uColIndexB, uSeqCountA, uSeqCountB,
			  msaCombined, uColIndexCombined);
			break;
			}
		case 'D':
			{
			assert(uPrefixLengthA > 0);
			const unsigned uColA = uPrefixLengthA - 1;
			assert(uColIndexA == uColA);
			AppendDelete(msaA, uColIndexA, uSeqCountA, uSeqCountB, msaCombined, uColIndexCombined);
			break;
			}
		case 'I':
			{
			assert(uPrefixLengthB > 0);
			const unsigned uColB = uPrefixLengthB - 1;
			assert(uColIndexB == uColB);
			AppendInsert(msaB, uColIndexB, uSeqCountA, uSeqCountB, msaCombined, uColIndexCombined);
			break;
			}
		default:
			assert(false);
			}
		}
	unsigned uInsertColCountA = uColCountA - uColIndexA;
	unsigned uInsertColCountB = uColCountB - uColIndexB;

// TODO: This code looks like a hangover from HMM estimation -- can we delete it?
	assert(uInsertColCountA == 0);
	assert(uInsertColCountB == 0);
	AppendTplInserts(msaA, uColIndexA, uInsertColCountA, msaB, uColIndexB,
	  uInsertColCountB, uSeqCountA, uSeqCountB, msaCombined, uColIndexCombined);

	assert(msaCombined.GetColCount() == uEdgeCount);
	}

static const ProfPos PPStart =
	{
	false,		//m_bAllGaps;
	{ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }, // m_uSortOrder[21];
	{ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }, // m_fcCounts[20];
	1.0,	// m_LL;
	0.0,	// m_LG;
	0.0,	// m_GL;
	0.0,	// m_GG;
	{ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }, // m_ALScores
	0,		// m_uResidueGroup;
	1.0,	// m_fOcc;
	0.0,	// m_fcStartOcc;
	0.0,	// m_fcEndOcc;
	0.0,	// m_scoreGapOpen;
	0.0,	// m_scoreGapClose;
	};

// MM
//  Ai–1	Ai		Out
//  X		X	LL	LL
//  X		-	LG	LG
//  -		X	GL	GL
//  -		-	GG	GG
//  
//  Bj–1	Bj
//  X		X	LL	LL
//  X		-	LG	LG
//  -		X	GL	GL
//  -		-	GG	GG
static void SetGapsMM(
  const ProfPos *PA, unsigned uPrefixLengthA, WEIGHT wA,
  const ProfPos *PB, unsigned uPrefixLengthB, WEIGHT wB,
  ProfPos *POut, unsigned uColIndexOut)
	{
	const ProfPos &PPA = uPrefixLengthA > 0 ? PA[uPrefixLengthA-1] : PPStart;
	const ProfPos &PPB = uPrefixLengthB > 0 ? PB[uPrefixLengthB-1] : PPStart;
	ProfPos &PPO = POut[uColIndexOut];

	PPO.m_LL = wA*PPA.m_LL + wB*PPB.m_LL;
	PPO.m_LG = wA*PPA.m_LG + wB*PPB.m_LG;
	PPO.m_GL = wA*PPA.m_GL + wB*PPB.m_GL;
	PPO.m_GG = wA*PPA.m_GG + wB*PPB.m_GG;
	}

// MD
//  Ai–1	Ai		Out
//  X		X	LL	LL
//  X		-	LG	LG
//  -		X	GL	GL
//  -		-	GG	GG
//  
//  Bj		(-)
//  X		-	?L	LG
//  -		-	?G	GG
static void SetGapsMD(
  const ProfPos *PA, unsigned uPrefixLengthA, WEIGHT wA,
  const ProfPos *PB, unsigned uPrefixLengthB, WEIGHT wB,
  ProfPos *POut, unsigned uColIndexOut)
	{
	const ProfPos &PPA = uPrefixLengthA > 0 ? PA[uPrefixLengthA-1] : PPStart;
	const ProfPos &PPB = uPrefixLengthB > 0 ? PB[uPrefixLengthB-1] : PPStart;
	ProfPos &PPO = POut[uColIndexOut];

	PPO.m_LL = wA*PPA.m_LL;
	PPO.m_LG = wA*PPA.m_LG + wB*(PPB.m_LL + PPB.m_GL);
	PPO.m_GL = wA*PPA.m_GL;
	PPO.m_GG = wA*PPA.m_GG + wB*(PPB.m_LG + PPB.m_GG);
	}

// DD
//  Ai–1	Ai		Out
//  X		X	LL	LL
//  X		-	LG	LG
//  -		X	GL	GL
//  -		-	GG	GG
//  
//  (-)		(-)
//  -		-	??	GG
static void SetGapsDD(
  const ProfPos *PA, unsigned uPrefixLengthA, WEIGHT wA,
  const ProfPos *PB, unsigned uPrefixLengthB, WEIGHT wB,
  ProfPos *POut, unsigned uColIndexOut)
	{
	const ProfPos &PPA = uPrefixLengthA > 0 ? PA[uPrefixLengthA-1] : PPStart;
	ProfPos &PPO = POut[uColIndexOut];

	PPO.m_LL = wA*PPA.m_LL;
	PPO.m_LG = wA*PPA.m_LG;
	PPO.m_GL = wA*PPA.m_GL;
	PPO.m_GG = wA*PPA.m_GG + wB;
	}

// MI
//  Ai		(-)		Out
//  X		-	?L	LG
//  -		-	?G	GG

//  Bj–1	Bj
//  X		X	LL	LL
//  X		-	LG	LG
//  -		X	GL	GL
//  -		-	GG	GG
static void SetGapsMI(
  const ProfPos *PA, unsigned uPrefixLengthA, WEIGHT wA,
  const ProfPos *PB, unsigned uPrefixLengthB, WEIGHT wB,
  ProfPos *POut, unsigned uColIndexOut)
	{
	const ProfPos &PPA = uPrefixLengthA > 0 ? PA[uPrefixLengthA-1] : PPStart;
	const ProfPos &PPB = uPrefixLengthB > 0 ? PB[uPrefixLengthB-1] : PPStart;
	ProfPos &PPO = POut[uColIndexOut];

	PPO.m_LL = wB*PPB.m_LL;
	PPO.m_LG = wB*PPB.m_LG + wA*(PPA.m_LL + PPA.m_GL);
	PPO.m_GL = wB*PPB.m_GL;
	PPO.m_GG = wB*PPB.m_GG + wA*(PPA.m_LG + PPA.m_GG);
	}

// DM
//  Ai–1	Ai		Out
//  X		X	LL	LL
//  X		-	LG	LG
//  -		X	GL	GL
//  -		-	GG	GG
//  
//  (-)		Bj		
//  -		X		?L	GL
//  -		-		?G	GG
static void SetGapsDM(
  const ProfPos *PA, unsigned uPrefixLengthA, WEIGHT wA,
  const ProfPos *PB, unsigned uPrefixLengthB, WEIGHT wB,
  ProfPos *POut, unsigned uColIndexOut)
	{
	const ProfPos &PPA = uPrefixLengthA > 0 ? PA[uPrefixLengthA-1] : PPStart;
	const ProfPos &PPB = uPrefixLengthB > 0 ? PB[uPrefixLengthB-1] : PPStart;
	ProfPos &PPO = POut[uColIndexOut];

	PPO.m_LL = wA*PPA.m_LL;
	PPO.m_LG = wA*PPA.m_LG;
	PPO.m_GL = wA*PPA.m_GL + wB*(PPB.m_LL + PPB.m_GL);
	PPO.m_GG = wA*PPA.m_GG + wB*(PPB.m_LG + PPB.m_GG);
	}

// IM
//  (-)		Ai		Out		
//  -		X	?L	GL
//  -		-	?G	GG

//  Bj–1	Bj
//  X		X	LL	LL
//  X		-	LG	LG
//  -		X	GL	GL
//  -		-	GG	GG
static void SetGapsIM(
  const ProfPos *PA, unsigned uPrefixLengthA, WEIGHT wA,
  const ProfPos *PB, unsigned uPrefixLengthB, WEIGHT wB,
  ProfPos *POut, unsigned uColIndexOut)
	{
	const ProfPos &PPA = uPrefixLengthA > 0 ? PA[uPrefixLengthA-1] : PPStart;
	const ProfPos &PPB = uPrefixLengthB > 0 ? PB[uPrefixLengthB-1] : PPStart;
	ProfPos &PPO = POut[uColIndexOut];

	PPO.m_LL = wB*PPB.m_LL;
	PPO.m_LG = wB*PPB.m_LG;
	PPO.m_GL = wB*PPB.m_GL + wA*(PPA.m_LL + PPA.m_GL);
	PPO.m_GG = wB*PPB.m_GG + wA*(PPA.m_LG + PPA.m_GG);
	}

// ID
//  (-)		Ai		Out
//  -		X	?L	GL
//  -		-	?G	GG

//  Bj		(-)
//  X		-	?L	LG
//  -		-	?G	GG
static void SetGapsID(
  const ProfPos *PA, unsigned uPrefixLengthA, WEIGHT wA,
  const ProfPos *PB, unsigned uPrefixLengthB, WEIGHT wB,
  ProfPos *POut, unsigned uColIndexOut)
	{
	const ProfPos &PPA = uPrefixLengthA > 0 ? PA[uPrefixLengthA-1] : PPStart;
	const ProfPos &PPB = uPrefixLengthB > 0 ? PB[uPrefixLengthB-1] : PPStart;
	ProfPos &PPO = POut[uColIndexOut];

	PPO.m_LL = 0;
	PPO.m_LG = wB*PPB.m_GL + wB*PPB.m_LL;
	PPO.m_GL = wA*PPA.m_GL + wA*PPA.m_LL;
	PPO.m_GG = wA*(PPA.m_LG + PPA.m_GG) + wB*(PPB.m_LG + PPB.m_GG);
	}

// DI
//  Ai		(-)		Out
//  X		-	?L	LG
//  -		-	?G	GG

//  (-)		Bj
//  -		X	?L	GL
//  -		-	?G	GG
static void SetGapsDI(
  const ProfPos *PA, unsigned uPrefixLengthA, WEIGHT wA,
  const ProfPos *PB, unsigned uPrefixLengthB, WEIGHT wB,
  ProfPos *POut, unsigned uColIndexOut)
	{
	const ProfPos &PPA = uPrefixLengthA > 0 ? PA[uPrefixLengthA-1] : PPStart;
	const ProfPos &PPB = uPrefixLengthB > 0 ? PB[uPrefixLengthB-1] : PPStart;
	ProfPos &PPO = POut[uColIndexOut];

	PPO.m_LL = 0;
	PPO.m_LG = wA*PPA.m_GL + wA*PPA.m_LL;
	PPO.m_GL = wB*PPB.m_GL + wB*PPB.m_LL;
	PPO.m_GG = wA*(PPA.m_LG + PPA.m_GG) + wB*(PPB.m_LG + PPB.m_GG);
	}

// II
//  (-)		(-)		Out
//  -		-	??	GG

//  Bj–1	Bj
//  X		X	LL	LL
//  X		-	LG	LG
//  -		X	GL	GL
//  -		-	GG	GG
static void SetGapsII(
  const ProfPos *PA, unsigned uPrefixLengthA, WEIGHT wA,
  const ProfPos *PB, unsigned uPrefixLengthB, WEIGHT wB,
  ProfPos *POut, unsigned uColIndexOut)
	{
	const ProfPos &PPB = uPrefixLengthB > 0 ? PB[uPrefixLengthB-1] : PPStart;
	ProfPos &PPO = POut[uColIndexOut];

	PPO.m_LL = wB*PPB.m_LL;
	PPO.m_LG = wB*PPB.m_LG;
	PPO.m_GL = wB*PPB.m_GL;
	PPO.m_GG = wB*PPB.m_GG + wA;
	}

static void SetFreqs(
  const ProfPos *PA, unsigned uPrefixLengthA, WEIGHT wA,
  const ProfPos *PB, unsigned uPrefixLengthB, WEIGHT wB,
  ProfPos *POut, unsigned uColIndexOut)
	{
	const ProfPos &PPA = uPrefixLengthA > 0 ? PA[uPrefixLengthA-1] : PPStart;
	const ProfPos &PPB = uPrefixLengthB > 0 ? PB[uPrefixLengthB-1] : PPStart;
	ProfPos &PPO = POut[uColIndexOut];

	if (g_bNormalizeCounts)
		{
		const FCOUNT fA = PPA.m_fOcc*wA/(wA + wB);
		const FCOUNT fB = PPB.m_fOcc*wB/(wA + wB);
		FCOUNT fTotal = 0;
		for (unsigned i = 0; i < 20; ++i)
			{
			const FCOUNT f = fA*PPA.m_fcCounts[i] + fB*PPB.m_fcCounts[i];
			PPO.m_fcCounts[i] = f;
			fTotal += f;
			}
		if (fTotal > 0)
			for (unsigned i = 0; i < 20; ++i)
				PPO.m_fcCounts[i] /= fTotal;
		}
	else
		{
		for (unsigned i = 0; i < 20; ++i)
			PPO.m_fcCounts[i] = wA*PPA.m_fcCounts[i] + wB*PPB.m_fcCounts[i];
		}
	}

void AlignTwoProfsGivenPath(const PWPath &Path,
  const ProfPos *PA, unsigned uPrefixLengthA, WEIGHT wA,
  const ProfPos *PB, unsigned uPrefixLengthB, WEIGHT wB,
  ProfPos **ptrPOut, unsigned *ptruLengthOut)
	{
#if	TRACE
	Log("AlignTwoProfsGivenPath wA=%.3g wB=%.3g Path=\n", wA, wB);
	Path.LogMe();
#endif
	assert(BTEq(wA + wB, 1.0));

	unsigned uColIndexA = 0;
	unsigned uColIndexB = 0;
	unsigned uColIndexOut = 0;
	const unsigned uEdgeCount = Path.GetEdgeCount();
	ProfPos *POut = new ProfPos[uEdgeCount];
	char cPrevType = 'M';
	for (unsigned uEdgeIndex = 0; uEdgeIndex < uEdgeCount; ++uEdgeIndex)
		{
		const PWEdge &Edge = Path.GetEdge(uEdgeIndex);
		const char cType = Edge.cType;

		const unsigned uPrefixLengthA = Edge.uPrefixLengthA;
		const unsigned uPrefixLengthB = Edge.uPrefixLengthB;

#if	TRACE
		Log("\nEdge %u %c%u.%u ColA=%u ColB=%u\n",
		  uEdgeIndex,
		  Edge.cType,
		  Edge.uPrefixLengthA,
		  Edge.uPrefixLengthB,
		  uColIndexA,
		  uColIndexB);
#endif

		POut[uColIndexOut].m_bAllGaps = false;
		switch (cType)
			{
		case 'M':
			{
			assert(uPrefixLengthA > 0);
			assert(uPrefixLengthB > 0);
			SetFreqs(
			  PA, uPrefixLengthA, wA,
			  PB, uPrefixLengthB, wB,
			  POut, uColIndexOut);
			switch (cPrevType)
				{
			case 'M':
				SetGapsMM(
				  PA, uPrefixLengthA, wA,
				  PB, uPrefixLengthB, wB,
				  POut, uColIndexOut);
			  break;
			case 'D':
				SetGapsDM(
				  PA, uPrefixLengthA, wA,
				  PB, uPrefixLengthB, wB,
				  POut, uColIndexOut);
				break;
			case 'I':
				SetGapsIM(
				  PA, uPrefixLengthA, wA,
				  PB, uPrefixLengthB, wB,
				  POut, uColIndexOut);
				break;
			default:
				Quit("Bad cPrevType");
				}
			++uColIndexA;
			++uColIndexB;
			++uColIndexOut;
			break;
			}
		case 'D':
			{
			assert(uPrefixLengthA > 0);
			SetFreqs(
			  PA, uPrefixLengthA, wA,
			  PB, uPrefixLengthB, 0,
			  POut, uColIndexOut);
			switch (cPrevType)
				{
			case 'M':
				SetGapsMD(
				  PA, uPrefixLengthA, wA,
				  PB, uPrefixLengthB, wB,
				  POut, uColIndexOut);
			  break;
			case 'D':
				SetGapsDD(
				  PA, uPrefixLengthA, wA,
				  PB, uPrefixLengthB, wB,
				  POut, uColIndexOut);
				break;
			case 'I':
				SetGapsID(
				  PA, uPrefixLengthA, wA,
				  PB, uPrefixLengthB, wB,
				  POut, uColIndexOut);
				break;
			default:
				Quit("Bad cPrevType");
				}
			++uColIndexA;
			++uColIndexOut;
			break;
			}
		case 'I':
			{
			assert(uPrefixLengthB > 0);
			SetFreqs(
			  PA, uPrefixLengthA, 0,
			  PB, uPrefixLengthB, wB,
			  POut, uColIndexOut);
			switch (cPrevType)
				{
			case 'M':
				SetGapsMI(
				  PA, uPrefixLengthA, wA,
				  PB, uPrefixLengthB, wB,
				  POut, uColIndexOut);
			  break;
			case 'D':
				SetGapsDI(
				  PA, uPrefixLengthA, wA,
				  PB, uPrefixLengthB, wB,
				  POut, uColIndexOut);
				break;
			case 'I':
				SetGapsII(
				  PA, uPrefixLengthA, wA,
				  PB, uPrefixLengthB, wB,
				  POut, uColIndexOut);
				break;
			default:
				Quit("Bad cPrevType");
				}
			++uColIndexB;
			++uColIndexOut;
			break;
			}
		default:
			assert(false);
			}
		cPrevType = cType;
		}
	assert(uColIndexOut == uEdgeCount);

	ProfScoresFromFreqs(POut, uEdgeCount);
	ValidateProf(POut, uEdgeCount);

	*ptrPOut = POut;
	*ptruLengthOut = uEdgeCount;

#if	TRACE
	Log("AlignTwoProfsGivenPath:\n");
	ListProfile(POut, uEdgeCount, 0);
#endif
	}
@@ -0,0 +1,237 @@
#include "muscle.h"
#include "msa.h"
#include "pwpath.h"
#include "profile.h"

#define	TRACE	0

static void AppendDelete(const MSA &msaA, unsigned &uColIndexA,
  unsigned uSeqCountA, unsigned uSeqCountB, MSA &msaCombined,
  unsigned &uColIndexCombined)
	{
#if	TRACE
	Log("AppendDelete ColIxA=%u ColIxCmb=%u\n",
	  uColIndexA, uColIndexCombined);
#endif
	for (unsigned uSeqIndexA = 0; uSeqIndexA < uSeqCountA; ++uSeqIndexA)
		{
		char c = msaA.GetChar(uSeqIndexA, uColIndexA);
		msaCombined.SetChar(uSeqIndexA, uColIndexCombined, c);
		}

	for (unsigned uSeqIndexB = 0; uSeqIndexB < uSeqCountB; ++uSeqIndexB)
		msaCombined.SetChar(uSeqCountA + uSeqIndexB, uColIndexCombined, '-');

	++uColIndexCombined;
	++uColIndexA;
	}

static void AppendInsert(const MSA &msaB, unsigned &uColIndexB,
  unsigned uSeqCountA, unsigned uSeqCountB, MSA &msaCombined,
  unsigned &uColIndexCombined)
	{
#if	TRACE
	Log("AppendInsert ColIxB=%u ColIxCmb=%u\n",
	  uColIndexB, uColIndexCombined);
#endif
	for (unsigned uSeqIndexA = 0; uSeqIndexA < uSeqCountA; ++uSeqIndexA)
		msaCombined.SetChar(uSeqIndexA, uColIndexCombined, '-');

	for (unsigned uSeqIndexB = 0; uSeqIndexB < uSeqCountB; ++uSeqIndexB)
		{
		char c = msaB.GetChar(uSeqIndexB, uColIndexB);
		msaCombined.SetChar(uSeqCountA + uSeqIndexB, uColIndexCombined, c);
		}

	++uColIndexCombined;
	++uColIndexB;
	}

static void AppendUnalignedTerminals(const MSA &msaA, unsigned &uColIndexA, unsigned uColCountA,
  const MSA &msaB, unsigned &uColIndexB, unsigned uColCountB, unsigned uSeqCountA,
  unsigned uSeqCountB, MSA &msaCombined, unsigned &uColIndexCombined)
	{
#if	TRACE
	Log("AppendUnalignedTerminals ColIxA=%u ColIxB=%u ColIxCmb=%u\n",
	  uColIndexA, uColIndexB, uColIndexCombined);
#endif
	const unsigned uLengthA = msaA.GetColCount();
	const unsigned uLengthB = msaB.GetColCount();

	unsigned uNewColCount = uColCountA;
	if (uColCountB > uNewColCount)
		uNewColCount = uColCountB;

	for (unsigned n = 0; n < uColCountA; ++n)
		{
		for (unsigned uSeqIndexA = 0; uSeqIndexA < uSeqCountA; ++uSeqIndexA)
			{
			char c = msaA.GetChar(uSeqIndexA, uColIndexA + n);
			c = UnalignChar(c);
			msaCombined.SetChar(uSeqIndexA, uColIndexCombined + n, c);
			}
		}
	for (unsigned n = uColCountA; n < uNewColCount; ++n)
		{
		for (unsigned uSeqIndexA = 0; uSeqIndexA < uSeqCountA; ++uSeqIndexA)
			msaCombined.SetChar(uSeqIndexA, uColIndexCombined + n, '.');
		}

	for (unsigned n = 0; n < uColCountB; ++n)
		{
		for (unsigned uSeqIndexB = 0; uSeqIndexB < uSeqCountB; ++uSeqIndexB)
			{
			char c = msaB.GetChar(uSeqIndexB, uColIndexB + n);
			c = UnalignChar(c);
			msaCombined.SetChar(uSeqCountA + uSeqIndexB, uColIndexCombined + n, c);
			}
		}
	for (unsigned n = uColCountB; n < uNewColCount; ++n)
		{
		for (unsigned uSeqIndexB = 0; uSeqIndexB < uSeqCountB; ++uSeqIndexB)
			msaCombined.SetChar(uSeqCountA + uSeqIndexB, uColIndexCombined + n, '.');
		}

	uColIndexCombined += uNewColCount;
	uColIndexA += uColCountA;
	uColIndexB += uColCountB;
	}

static void AppendMatch(const MSA &msaA, unsigned &uColIndexA, const MSA &msaB,
  unsigned &uColIndexB, unsigned uSeqCountA, unsigned uSeqCountB,
  MSA &msaCombined, unsigned &uColIndexCombined)
	{
#if	TRACE
	Log("AppendMatch ColIxA=%u ColIxB=%u ColIxCmb=%u\n",
	  uColIndexA, uColIndexB, uColIndexCombined);
#endif

	for (unsigned uSeqIndexA = 0; uSeqIndexA < uSeqCountA; ++uSeqIndexA)
		{
		char c = msaA.GetChar(uSeqIndexA, uColIndexA);
		msaCombined.SetChar(uSeqIndexA, uColIndexCombined, c);
		}

	for (unsigned uSeqIndexB = 0; uSeqIndexB < uSeqCountB; ++uSeqIndexB)
		{
		char c = msaB.GetChar(uSeqIndexB, uColIndexB);
		msaCombined.SetChar(uSeqCountA + uSeqIndexB, uColIndexCombined, c);
		}

	++uColIndexA;
	++uColIndexB;
	++uColIndexCombined;
	}

void AlignTwoMSAsGivenPathSW(const PWPath &Path, const MSA &msaA, const MSA &msaB,
  MSA &msaCombined)
	{
	msaCombined.Clear();

#if	TRACE
	Log("AlignTwoMSAsGivenPathSW\n");
	Log("Template A:\n");
	msaA.LogMe();
	Log("Template B:\n");
	msaB.LogMe();
#endif

	const unsigned uColCountA = msaA.GetColCount();
	const unsigned uColCountB = msaB.GetColCount();

	const unsigned uSeqCountA = msaA.GetSeqCount();
	const unsigned uSeqCountB = msaB.GetSeqCount();

	msaCombined.SetSeqCount(uSeqCountA + uSeqCountB);

// Copy sequence names into combined MSA
	for (unsigned uSeqIndexA = 0; uSeqIndexA < uSeqCountA; ++uSeqIndexA)
		{
		msaCombined.SetSeqName(uSeqIndexA, msaA.GetSeqName(uSeqIndexA));
		msaCombined.SetSeqId(uSeqIndexA, msaA.GetSeqId(uSeqIndexA));
		}

	for (unsigned uSeqIndexB = 0; uSeqIndexB < uSeqCountB; ++uSeqIndexB)
		{
		msaCombined.SetSeqName(uSeqCountA + uSeqIndexB, msaB.GetSeqName(uSeqIndexB));
		msaCombined.SetSeqId(uSeqCountA + uSeqIndexB, msaB.GetSeqId(uSeqIndexB));
		}

	unsigned uColIndexA = 0;
	unsigned uColIndexB = 0;
	unsigned uColIndexCombined = 0;
	const unsigned uEdgeCount = Path.GetEdgeCount();
	for (unsigned uEdgeIndex = 0; uEdgeIndex < uEdgeCount; ++uEdgeIndex)
		{
		const PWEdge &Edge = Path.GetEdge(uEdgeIndex);
#if	TRACE
		Log("\nEdge %u %c%u.%u\n",
		  uEdgeIndex,
		  Edge.cType,
		  Edge.uPrefixLengthA,
		  Edge.uPrefixLengthB);
#endif
		const char cType = Edge.cType;
		const unsigned uPrefixLengthA = Edge.uPrefixLengthA;
		unsigned uColCountA = 0;
		if (uPrefixLengthA > 0)
			{
			const unsigned uNodeIndexA = uPrefixLengthA - 1;
			const unsigned uTplColIndexA = uNodeIndexA;
			if (uTplColIndexA > uColIndexA)
				uColCountA = uTplColIndexA - uColIndexA;
			}

		const unsigned uPrefixLengthB = Edge.uPrefixLengthB;
		unsigned uColCountB = 0;
		if (uPrefixLengthB > 0)
			{
			const unsigned uNodeIndexB = uPrefixLengthB - 1;
			const unsigned uTplColIndexB = uNodeIndexB;
			if (uTplColIndexB > uColIndexB)
				uColCountB = uTplColIndexB - uColIndexB;
			}

		AppendUnalignedTerminals(msaA, uColIndexA, uColCountA, msaB, uColIndexB, uColCountB,
		  uSeqCountA, uSeqCountB, msaCombined, uColIndexCombined);

		switch (cType)
			{
		case 'M':
			{
			assert(uPrefixLengthA > 0);
			assert(uPrefixLengthB > 0);
			const unsigned uColA = uPrefixLengthA - 1;
			const unsigned uColB = uPrefixLengthB - 1;
			assert(uColIndexA == uColA);
			assert(uColIndexB == uColB);
			AppendMatch(msaA, uColIndexA, msaB, uColIndexB, uSeqCountA, uSeqCountB,
			  msaCombined, uColIndexCombined);
			break;
			}
		case 'D':
			{
			assert(uPrefixLengthA > 0);
			const unsigned uColA = uPrefixLengthA - 1;
			assert(uColIndexA == uColA);
			AppendDelete(msaA, uColIndexA, uSeqCountA, uSeqCountB, msaCombined, uColIndexCombined);
			break;
			}
		case 'I':
			{
			assert(uPrefixLengthB > 0);
			const unsigned uColB = uPrefixLengthB - 1;
			assert(uColIndexB == uColB);
			AppendInsert(msaB, uColIndexB, uSeqCountA, uSeqCountB, msaCombined, uColIndexCombined);
			break;
			}
		default:
			assert(false);
			}
		}
	unsigned uInsertColCountA = uColCountA - uColIndexA;
	unsigned uInsertColCountB = uColCountB - uColIndexB;

	AppendUnalignedTerminals(msaA, uColIndexA, uInsertColCountA, msaB, uColIndexB,
	  uInsertColCountB, uSeqCountA, uSeqCountB, msaCombined, uColIndexCombined);
	}
@@ -0,0 +1,41 @@
#include "muscle.h"
#include "msa.h"
#include "profile.h"
#include "pwpath.h"
#include "textfile.h"
#include "timing.h"

SCORE AlignTwoMSAs(const MSA &msa1, const MSA &msa2, MSA &msaOut, PWPath &Path,
  bool bLockLeft, bool bLockRight)
	{
	const unsigned uLengthA = msa1.GetColCount();
	const unsigned uLengthB = msa2.GetColCount();

	ProfPos *PA = ProfileFromMSA(msa1);
	ProfPos *PB = ProfileFromMSA(msa2);

	if (bLockLeft)
		{
		PA[0].m_scoreGapOpen = MINUS_INFINITY;
		PB[0].m_scoreGapOpen = MINUS_INFINITY;
		}

	if (bLockRight)
		{
		PA[uLengthA-1].m_scoreGapClose = MINUS_INFINITY;
		PB[uLengthB-1].m_scoreGapClose = MINUS_INFINITY;
		}

	float r = (float) uLengthA/ (float) (uLengthB + 1); // +1 to prevent div 0
	if (r < 1)
		r = 1/r;

	SCORE Score = GlobalAlign(PA, uLengthA, PB, uLengthB, Path);

	AlignTwoMSAsGivenPath(Path, msa1, msa2, msaOut);

	delete[] PA;
	delete[] PB;

	return Score;
	}
@@ -0,0 +1,31 @@
#include "muscle.h"
#include "msa.h"
#include "profile.h"
#include "pwpath.h"

SCORE GlobalAlign4(ProfPos *PA, unsigned uLengthA, ProfPos *PB,
  unsigned uLengthB, PWPath &Path);

SCORE AlignTwoProfs(
  const ProfPos *PA, unsigned uLengthA, WEIGHT wA,
  const ProfPos *PB, unsigned uLengthB, WEIGHT wB,
  PWPath &Path, ProfPos **ptrPout, unsigned *ptruLengthOut)
	{
	assert(uLengthA < 100000);
	assert(uLengthB < 100000);

	float r = (float) uLengthA/ (float) (uLengthB + 1); // +1 to prevent div 0
	if (r < 1)
		r = 1/r;

	SCORE Score = GlobalAlign(PA, uLengthA, PB, uLengthB, Path);

	AlignTwoProfsGivenPath(Path, PA, uLengthB, wA/(wA + wB), PB, uLengthB, wB/(wA + wB),
	  ptrPout, ptruLengthOut);

#if	HYDRO
	if (ALPHA_Amino == g_Alpha)
		Hydro(*ptrPout, *ptruLengthOut);
#endif
	return Score;
	}
@@ -0,0 +1,170 @@
#include "muscle.h"
#include <stdio.h>
#include <ctype.h>
#include "msa.h"
#include "textfile.h"

const unsigned uCharsPerLine = 60;
const int MIN_NAME = 10;
const int MAX_NAME = 32;

static char GetAlnConsensusChar(const MSA &a, unsigned uColIndex);

void MSA::ToAlnFile(TextFile &File) const
	{
	if (g_bClwStrict)
		File.PutString("CLUSTAL W (1.81) multiple sequence alignment\n");
	else
		{
		File.PutString("MUSCLE ("
		  SHORT_VERSION ")"
		  " multiple sequence alignment\n");
		File.PutString("\n");
		}

	int iLongestNameLength = 0;
	for (unsigned uSeqIndex = 0; uSeqIndex < GetSeqCount(); ++uSeqIndex)
		{
		const char *ptrName = GetSeqName(uSeqIndex);
		const char *ptrBlank = strchr(ptrName, ' ');
		int iLength;
		if (0 != ptrBlank)
			iLength = (int) (ptrBlank - ptrName);
		else
			iLength = (int) strlen(ptrName);
		if (iLength > iLongestNameLength)
			iLongestNameLength = iLength;
		}
	if (iLongestNameLength > MAX_NAME)
		iLongestNameLength = MAX_NAME;
	if (iLongestNameLength < MIN_NAME)
		iLongestNameLength = MIN_NAME;

	unsigned uLineCount = (GetColCount() - 1)/uCharsPerLine + 1;
	for (unsigned uLineIndex = 0; uLineIndex < uLineCount; ++uLineIndex)
		{
		File.PutString("\n");
		unsigned uStartColIndex = uLineIndex*uCharsPerLine;
		unsigned uEndColIndex = uStartColIndex + uCharsPerLine - 1;
		if (uEndColIndex >= GetColCount())
			uEndColIndex = GetColCount() - 1;
		char Name[MAX_NAME+1];
		for (unsigned uSeqIndex = 0; uSeqIndex < GetSeqCount(); ++uSeqIndex)
			{
			const char *ptrName = GetSeqName(uSeqIndex);
			const char *ptrBlank = strchr(ptrName, ' ');
			int iLength;
			if (0 != ptrBlank)
				iLength = (int) (ptrBlank - ptrName);
			else
				iLength = (int) strlen(ptrName);
			if (iLength > MAX_NAME)
				iLength = MAX_NAME;
			memset(Name, ' ', MAX_NAME);
			memcpy(Name, ptrName, iLength);
			Name[iLongestNameLength] = 0;

			File.PutFormat("%s      ", Name);
			for (unsigned uColIndex = uStartColIndex; uColIndex <= uEndColIndex;
			  ++uColIndex)
				{
				const char c = GetChar(uSeqIndex, uColIndex);
				File.PutFormat("%c", toupper(c));
				}
			File.PutString("\n");
			}

		memset(Name, ' ', MAX_NAME);
		Name[iLongestNameLength] = 0;
		File.PutFormat("%s      ", Name);
		for (unsigned uColIndex = uStartColIndex; uColIndex <= uEndColIndex;
		  ++uColIndex)
			{
			const char c = GetAlnConsensusChar(*this, uColIndex);
			File.PutChar(c);
			}
		File.PutString("\n");
		}
	}

static char GetAlnConsensusChar(const MSA &a, unsigned uColIndex)
	{
	const unsigned uSeqCount = a.GetSeqCount();
	unsigned BitMap = 0;
	unsigned Count = 0;
	for (unsigned uSeqIndex = 0; uSeqIndex < uSeqCount; ++uSeqIndex)
		{
		unsigned uLetter = a.GetLetterEx(uSeqIndex, uColIndex);
		assert(uLetter < 32);
		unsigned Bit = (1 << uLetter);
		if (!(BitMap & Bit))
			++Count;
		BitMap |= Bit;
		}

//	'*' indicates positions which have a single, fully conserved residue
	if (1 == Count)
		return '*';

	if (ALPHA_Amino != g_Alpha)
		return ' ';

#define B(a)	(1 << AX_##a)
#define S2(a, b)		S(B(a) | B(b))
#define S3(a, b, c)		S(B(a) | B(b) | B(c))
#define S4(a, b, c, d)	S(B(a) | B(b) | B(c) | B(d))
#define S(w)	if (0 == (BitMap & ~(w)) && (BitMap & (w)) != 0) return ':';

#define W3(a, b, c)				W(B(a) | B(b) | B(c))
#define W4(a, b, c, d)			W(B(a) | B(b) | B(c) | B(d))
#define W5(a, b, c, d, e)		W(B(a) | B(b) | B(c) | B(d) | B(e))
#define W6(a, b, c, d, e, f)	W(B(a) | B(b) | B(c) | B(d) | B(e) | B(f))
#define W(w)	if (0 == (BitMap & ~(w)) && (BitMap & (w)) != 0) return '.';

//	':' indicates that one of the following 'strong'
// groups is fully conserved
//                 STA
//                 NEQK
//                 NHQK
//                 NDEQ
//                 QHRK
//                 MILV
//                 MILF
//                 HY
//                 FYW
//
	S3(S, T, A)
	S4(N, E, Q, K)
	S4(N, H, Q, K)
	S4(N, D, E, Q)
	S4(M, I, L, V)
	S4(M, I, L, F)
	S2(H, Y)
	S3(F, Y, W)

//	'.' indicates that one of the following 'weaker' 
// groups is fully conserved
//                 CSA
//                 ATV
//                 SAG
//                 STNK
//                 STPA
//                 SGND
//                 SNDEQK
//                 NDEQHK
//                 NEQHRK
//                 FVLIM
//                 HFY
	W3(C, S, A)
	W3(A, T, V)
	W3(S, A, G)
	W4(S, T, N, K)
	W4(S, T, P, A)
	W4(S, G, N, D)
	W6(S, N, D, E, Q, K)
	W6(N, W, Q, H, R, K)
	W5(F, V, L, I, M)
	W3(H, F, Y)

	return ' ';
	}
@@ -0,0 +1,283 @@
#include "muscle.h"
#include <ctype.h>

/***
From Bioperl docs:
Extended DNA / RNA alphabet
------------------------------------------
Symbol       Meaning      Nucleic Acid
------------------------------------------
    A            A           Adenine
    C            C           Cytosine
    G            G           Guanine
    T            T           Thymine
    U            U           Uracil
    M          A or C
    R          A or G
    W          A or T
    S          C or G
    Y          C or T
    K          G or T
    V        A or C or G
    H        A or C or T
    D        A or G or T
    B        C or G or T
    X      G or A or T or C
    N      G or A or T or C

IUPAC-IUB SYMBOLS FOR NUCLEOTIDE NOMENCLATURE:
         Cornish-Bowden (1985) Nucl. Acids Res. 13: 3021-3030.
***/

unsigned g_CharToLetter[MAX_CHAR];
unsigned g_CharToLetterEx[MAX_CHAR];

char g_LetterToChar[MAX_ALPHA];
char g_LetterExToChar[MAX_ALPHA_EX];

char g_UnalignChar[MAX_CHAR];
char g_AlignChar[MAX_CHAR];

bool g_IsWildcardChar[MAX_CHAR];
bool g_IsResidueChar[MAX_CHAR];

ALPHA g_Alpha = ALPHA_Undefined;
unsigned g_AlphaSize = 0;

#define Res(c, Letter)												\
	{																\
	const unsigned char Upper = (unsigned char) toupper(c);			\
	const unsigned char Lower = (unsigned char) tolower(c);			\
	g_CharToLetter[Upper] = Letter;									\
	g_CharToLetter[Lower] = Letter;									\
	g_CharToLetterEx[Upper] = Letter;								\
	g_CharToLetterEx[Lower] = Letter;								\
	g_LetterToChar[Letter] = Upper;									\
	g_LetterExToChar[Letter] = Upper;								\
	g_IsResidueChar[Upper] = true;									\
	g_IsResidueChar[Lower] = true;									\
	g_AlignChar[Upper] = Upper;										\
	g_AlignChar[Lower] = Upper;										\
	g_UnalignChar[Upper] = Lower;									\
	g_UnalignChar[Lower] = Lower;									\
	}

#define Wild(c, Letter)												\
	{																\
	const unsigned char Upper = (unsigned char) toupper(c);			\
	const unsigned char Lower = (unsigned char) tolower(c);			\
	g_CharToLetterEx[Upper] = Letter;								\
	g_CharToLetterEx[Lower] = Letter;								\
	g_LetterExToChar[Letter] = Upper;								\
	g_IsResidueChar[Upper] = true;									\
	g_IsResidueChar[Lower] = true;									\
	g_AlignChar[Upper] = Upper;										\
	g_AlignChar[Lower] = Upper;										\
	g_UnalignChar[Upper] = Lower;									\
	g_UnalignChar[Lower] = Lower;									\
	g_IsWildcardChar[Lower] = true;									\
	g_IsWildcardChar[Upper] = true;									\
	}

static unsigned GetAlphaSize(ALPHA Alpha)
	{
	switch (Alpha)
		{
	case ALPHA_Amino:
		return 20;

	case ALPHA_RNA:
	case ALPHA_DNA:
		return 4;
		}
	Quit("Invalid Alpha=%d", Alpha);
	return 0;
	}

static void InitArrays()
	{
	memset(g_CharToLetter, 0xff, sizeof(g_CharToLetter));
	memset(g_CharToLetterEx, 0xff, sizeof(g_CharToLetterEx));

	memset(g_LetterToChar, '?', sizeof(g_LetterToChar));
	memset(g_LetterExToChar, '?', sizeof(g_LetterExToChar));

	memset(g_AlignChar, '?', sizeof(g_UnalignChar));
	memset(g_UnalignChar, '?', sizeof(g_UnalignChar));

	memset(g_IsWildcardChar, 0, sizeof(g_IsWildcardChar));
	}

static void SetGapChar(char c)
	{
	unsigned char u = (unsigned char) c;

	g_CharToLetterEx[u] = AX_GAP;
	g_LetterExToChar[AX_GAP] = u;
	g_AlignChar[u] = u;
	g_UnalignChar[u] = u;
	}

static void SetAlphaDNA()
	{
	Res('A', NX_A)
	Res('C', NX_C)
	Res('G', NX_G)
	Res('T', NX_T)
	Wild('M', NX_M)
	Wild('R', NX_R)
	Wild('W', NX_W)
	Wild('S', NX_S)
	Wild('Y', NX_Y)
	Wild('K', NX_K)
	Wild('V', NX_V)
	Wild('H', NX_H)
	Wild('D', NX_D)
	Wild('B', NX_B)
	Wild('X', NX_X)
	Wild('N', NX_N)
	}

static void SetAlphaRNA()
	{
	Res('A', NX_A)
	Res('C', NX_C)
	Res('G', NX_G)
	Res('U', NX_U)
	Res('T', NX_T)
	Wild('M', NX_M)
	Wild('R', NX_R)
	Wild('W', NX_W)
	Wild('S', NX_S)
	Wild('Y', NX_Y)
	Wild('K', NX_K)
	Wild('V', NX_V)
	Wild('H', NX_H)
	Wild('D', NX_D)
	Wild('B', NX_B)
	Wild('X', NX_X)
	Wild('N', NX_N)
	}

static void SetAlphaAmino()
	{
	Res('A', AX_A)
	Res('C', AX_C)
	Res('D', AX_D)
	Res('E', AX_E)
	Res('F', AX_F)
	Res('G', AX_G)
	Res('H', AX_H)
	Res('I', AX_I)
	Res('K', AX_K)
	Res('L', AX_L)
	Res('M', AX_M)
	Res('N', AX_N)
	Res('P', AX_P)
	Res('Q', AX_Q)
	Res('R', AX_R)
	Res('S', AX_S)
	Res('T', AX_T)
	Res('V', AX_V)
	Res('W', AX_W)
	Res('Y', AX_Y)

	Wild('B', AX_B)
	Wild('X', AX_X)
	Wild('Z', AX_Z)
	}

void SetAlpha(ALPHA Alpha)
	{
	InitArrays();

	SetGapChar('.');
	SetGapChar('-');

	switch (Alpha)
		{
	case ALPHA_Amino:
		SetAlphaAmino();
		break;

	case ALPHA_DNA:
		SetAlphaDNA();

	case ALPHA_RNA:
		SetAlphaRNA();
		break;

	default:
		Quit("Invalid Alpha=%d", Alpha);
		}

	g_AlphaSize = GetAlphaSize(Alpha);
	g_Alpha = Alpha;

	if (g_bVerbose)
		Log("Alphabet %s\n", ALPHAToStr(g_Alpha));
	}

char GetWildcardChar()
	{
	switch (g_Alpha)
		{
	case ALPHA_Amino:
		return 'X';

	case ALPHA_DNA:
	case ALPHA_RNA:
		return 'N';

	default:
		Quit("Invalid Alpha=%d", g_Alpha);
		}
	return '?';
	}

bool IsNucleo(char c)
	{
	return strchr("ACGTURYNacgturyn", c) != 0;
	}

bool IsDNA(char c)
	{
	return strchr("AGCTNagctn", c) != 0;
	}

bool IsRNA(char c)
	{
	return strchr("AGCUNagcun", c) != 0;
	}

static char InvalidLetters[256];
static int InvalidLetterCount = 0;

void ClearInvalidLetterWarning()
	{
	memset(InvalidLetters, 0, 256);
	}

void InvalidLetterWarning(char c, char w)
	{
	InvalidLetters[(unsigned char) c] = 1;
	++InvalidLetterCount;
	}

void ReportInvalidLetters()
	{
	if (0 == InvalidLetterCount)
		return;

	char Str[257];
	memset(Str, 0, 257);

	int n = 0;
	for (int i = 0; i < 256; ++i)
		{
		if (InvalidLetters[i])
			Str[n++] = (char) i;
		}
	Warning("Assuming %s (see -seqtype option), invalid letters found: %s",
	  ALPHAToStr(g_Alpha), Str);
	}
@@ -0,0 +1,106 @@
#ifndef	alpha_h
#define	alpha_h

bool StrHasAmino(const char *Str);
bool StrHasGap(const char *Str);
void ClearInvalidLetterWarning();
void InvalidLetterWarning(char c, char w);
void ReportInvalidLetters();

extern unsigned g_CharToLetter[];
extern unsigned g_CharToLetterEx[];

extern char g_LetterToChar[];
extern char g_LetterExToChar[];

extern char g_UnalignChar[];
extern char g_AlignChar[];

extern bool g_IsWildcardChar[];
extern bool g_IsResidueChar[];

#define CharToLetter(c)		(g_CharToLetter[(unsigned char) (c)])
#define CharToLetterEx(c)	(g_CharToLetterEx[(unsigned char) (c)])

#define LetterToChar(u)		(g_LetterToChar[u])
#define LetterExToChar(u)	(g_LetterExToChar[u])

#define IsResidueChar(c)	(g_IsResidueChar[(unsigned char) (c)])
#define IsGapChar(c)		('-' == (c) || '.' == (c))
#define IsWildcardChar(c)	(g_IsWildcardChar[(unsigned char) (c)])

#define AlignChar(c)		(g_AlignChar[(unsigned char) (c)])
#define UnalignChar(c)		(g_UnalignChar[(unsigned char) (c)])

// AX=Amino alphabet with eXtensions (B, Z and X)
enum AX
	{
	AX_A,
	AX_C,
	AX_D,
	AX_E,
	AX_F,
	AX_G,
	AX_H,
	AX_I,
	AX_K,
	AX_L,
	AX_M,
	AX_N,
	AX_P,
	AX_Q,
	AX_R,
	AX_S,
	AX_T,
	AX_V,
	AX_W,
	AX_Y,

	AX_X,	// Any

	AX_B,	// D or N
	AX_Z,	// E or Q

	AX_GAP,
	};
const unsigned AX_COUNT = AX_GAP + 1;

// NX=Nucleotide alphabet with extensions
enum NX
	{
	NX_A,
	NX_C,
	NX_G,
	NX_T,
	NX_U = NX_T,

    NX_M, // AC
    NX_R, // AG
    NX_W, // AT
    NX_S, // CG
    NX_Y, // CT
    NX_K, // GT
    NX_V, // ACG
    NX_H, // ACT
    NX_D, // AGT
    NX_B, // CGT
    NX_X, // GATC
    NX_N, // GATC
	NX_GAP
	};
const unsigned NX_COUNT = NX_GAP + 1;

const unsigned MAX_ALPHA = 20;
const unsigned MAX_ALPHA_EX = AX_COUNT;
const unsigned MAX_CHAR = 256;

extern ALPHA g_Alpha;
extern unsigned g_AlphaSize;

void SetAlpha(ALPHA Alpha);
char GetWildcardChar();
bool IsNucleo(char c);
bool IsDNA(char c);
bool IsRNA(char c);

#endif	// alpha_h
@@ -0,0 +1,218 @@
#include "muscle.h"
#include "msa.h"
#include "objscore.h"

#define	TRACE	0

static void WindowSmooth(const SCORE Score[], unsigned uCount, unsigned uWindowLength,
  SCORE SmoothScore[], double dCeil)
	{
#define	Ceil(x)	((SCORE) ((x) > dCeil ? dCeil : (x)))

	if (1 != uWindowLength%2)
		Quit("WindowSmooth=%u must be odd", uWindowLength);

	if (uCount <= uWindowLength)
		{
		for (unsigned i = 0; i < uCount; ++i)
			SmoothScore[i] = 0;
		return;
		}

	const unsigned w2 = uWindowLength/2;
	for (unsigned i = 0; i < w2; ++i)
		{
		SmoothScore[i] = 0;
		SmoothScore[uCount - i - 1] = 0;
		}

	SCORE scoreWindowTotal = 0;
	for (unsigned i = 0; i < uWindowLength; ++i)
		{
		scoreWindowTotal += Ceil(Score[i]);
		}

	for (unsigned i = w2; ; ++i)
		{
		SmoothScore[i] = scoreWindowTotal/uWindowLength;
		if (i == uCount - w2 - 1)
			break;

		scoreWindowTotal -= Ceil(Score[i - w2]);
		scoreWindowTotal += Ceil(Score[i + w2 + 1]);
		}
#undef Ceil
	}

// Find columns that score above the given threshold.
// A range of scores is defined between the average
// and the maximum. The threshold is a fraction 0.0 .. 1.0
// within that range, where 0.0 is the average score
// and 1.0 is the maximum score.
// "Grade" is by analogy with grading on a curve.
static void FindBestColsGrade(const SCORE Score[], unsigned uCount,
  double dThreshold, unsigned BestCols[], unsigned *ptruBestColCount)
	{
	SCORE scoreTotal = 0;
	for (unsigned uIndex = 0; uIndex < uCount; ++uIndex)
		scoreTotal += Score[uIndex];
	const SCORE scoreAvg = scoreTotal / uCount;

	SCORE scoreMax = MINUS_INFINITY;
	for (unsigned uIndex = 0; uIndex < uCount; ++uIndex)
		if (Score[uIndex] > scoreMax)
			scoreMax = Score[uIndex];

	unsigned uBestColCount = 0;
	for (unsigned uIndex = 0; uIndex < uCount; ++uIndex)
		{
		const SCORE s = Score[uIndex];
		const double dHeight = (s - scoreAvg)/(scoreMax - scoreAvg);
		if (dHeight >= dThreshold)
			{
			BestCols[uBestColCount] = uIndex;
			++uBestColCount;
			}
		}
	*ptruBestColCount = uBestColCount;
	}

// Best col only if all following criteria satisfied:
// (1) Score >= min
// (2) Smoothed score >= min
// (3) No gaps.
static void FindBestColsCombo(const MSA &msa, const SCORE Score[],
  const SCORE SmoothScore[], double dMinScore, double dMinSmoothScore,
  unsigned BestCols[], unsigned *ptruBestColCount)
	{
	const unsigned uColCount = msa.GetColCount();

	unsigned uBestColCount = 0;
	for (unsigned uIndex = 0; uIndex < uColCount; ++uIndex)
		{
		if (Score[uIndex] < dMinScore)
			continue;
		if (SmoothScore[uIndex] < dMinSmoothScore)
			continue;
		if (msa.ColumnHasGap(uIndex))
			continue;
		BestCols[uBestColCount] = uIndex;
		++uBestColCount;
		}
	*ptruBestColCount = uBestColCount;
	}

static void ListBestCols(const MSA &msa, const SCORE Score[], const SCORE SmoothScore[],
  unsigned BestCols[], unsigned uBestColCount)
	{
	const unsigned uColCount = msa.GetColCount();
	const unsigned uSeqCount = msa.GetSeqCount();

	Log("Col  ");
	for (unsigned uSeqIndex = 0; uSeqIndex < uSeqCount; ++uSeqIndex)
		Log("%u", uSeqIndex%10);
	Log("  ");

	for (unsigned uColIndex = 0; uColIndex < uColCount; ++uColIndex)
		{
		Log("%3u  ", uColIndex);
		for (unsigned uSeqIndex = 0; uSeqIndex < uSeqCount; ++uSeqIndex)
			Log("%c", msa.GetChar(uSeqIndex, uColIndex));

		Log("  %10.3f", Score[uColIndex]);
		Log("  %10.3f", SmoothScore[uColIndex]);

		for (unsigned i = 0; i < uBestColCount; ++i)
			if (BestCols[i] == uColIndex)
				Log(" <-- Best");
		Log("\n");
		}
	}

// If two best columns are found within a window, choose
// the highest-scoring. If more than two, choose the one
// closest to the center of the window.
static void MergeBestCols(const SCORE Scores[], const unsigned BestCols[],
  unsigned uBestColCount, unsigned uWindowLength, unsigned AnchorCols[],
  unsigned *ptruAnchorColCount)
	{
	unsigned uAnchorColCount = 0;
	for (unsigned n = 0; n < uBestColCount; /* update inside loop */)
		{
		unsigned uBestColIndex = BestCols[n];
		unsigned uCountWithinWindow = 0;
		for (unsigned i = n + 1; i < uBestColCount; ++i)
			{
			unsigned uBestColIndex2 = BestCols[i];
			if (uBestColIndex2 - uBestColIndex >= uWindowLength)
				break;
			++uCountWithinWindow;
			}
		unsigned uAnchorCol = uBestColIndex;
		if (1 == uCountWithinWindow)
			{
			unsigned uBestColIndex2 = BestCols[n+1];
			if (Scores[uBestColIndex] > Scores[uBestColIndex2])
				uAnchorCol = uBestColIndex;
			else
				uAnchorCol = uBestColIndex2;
			}
		else if (uCountWithinWindow > 1)
			{
			unsigned uWindowCenter = uBestColIndex + uWindowLength/2;
			int iClosestDist = uWindowLength;
			unsigned uClosestCol = uBestColIndex;
			for (unsigned i = n + 1; i < n + uCountWithinWindow; ++i)
				{
				unsigned uColIndex = BestCols[i];
				int iDist = uColIndex - uBestColIndex;
				if (iDist < 0)
					iDist = -iDist;
				if (iDist < iClosestDist)
					{
					uClosestCol = uColIndex;
					iClosestDist = iDist;
					}
				}
			uAnchorCol = uClosestCol;
			}
		AnchorCols[uAnchorColCount] = uAnchorCol;
		++uAnchorColCount;
		n += uCountWithinWindow + 1;
		}
	*ptruAnchorColCount = uAnchorColCount;
	}

void FindAnchorCols(const MSA &msa, unsigned AnchorCols[],
  unsigned *ptruAnchorColCount)
	{
	const unsigned uColCount = msa.GetColCount();
	if (uColCount < 16)
		{
		*ptruAnchorColCount = 0;
		return;
		}

	SCORE *MatchScore = new SCORE[uColCount];
	SCORE *SmoothScore = new SCORE[uColCount];
	unsigned *BestCols = new unsigned[uColCount];

	GetLetterScores(msa, MatchScore);
	WindowSmooth(MatchScore, uColCount, g_uSmoothWindowLength, SmoothScore,
	  g_dSmoothScoreCeil);

	unsigned uBestColCount;
	FindBestColsCombo(msa, MatchScore, SmoothScore, g_dMinBestColScore, g_dMinSmoothScore,
	  BestCols, &uBestColCount);

#if	TRACE
	ListBestCols(msa, MatchScore, SmoothScore, BestCols, uBestColCount);
#endif

	MergeBestCols(MatchScore, BestCols, uBestColCount, g_uAnchorSpacing, AnchorCols,
	  ptruAnchorColCount);

	delete[] MatchScore;
	delete[] SmoothScore;
	delete[] BestCols;
	}
@@ -0,0 +1,206 @@
#include "muscle.h"
#include "pwpath.h"

#define TRACE 0

static char XlatEdgeType(char c)
	{
	if ('E' == c)
		return 'D';
	if ('J' == c)
		return 'I';
	return c;
	}

static const char *BitsToStr(char Bits)
	{
	static char Str[] = "xM xD xI";

	switch (Bits & BIT_xM)
		{
	case BIT_MM:
		Str[0] = 'M';
		break;
	case BIT_DM:
		Str[0] = 'D';
		break;
	case BIT_IM:
		Str[0] = 'I';
		break;
		}

	switch (Bits & BIT_xD)
		{
	case BIT_MD:
		Str[3] = 'M';
		break;
	case BIT_DD:
		Str[3] = 'D';
		break;
		}

	switch (Bits & BIT_xI)
		{
	case BIT_MI:
		Str[6] = 'M';
		break;
	case BIT_II:
		Str[6] = 'I';
		break;
		}

	return Str;
	}

static inline char XChar(char Bits, char cType)
	{
	switch (cType)
		{
	case 'M':
		{
		switch (Bits & BIT_xM)
			{
		case BIT_MM:
			return 'M';
		case BIT_DM:
			return 'D';
		case BIT_IM:
			return 'I';
#if	DOUBLE_AFFINE
		case BIT_EM:
			return 'E';
		case BIT_JM:
			return 'J';
#endif
			}
		Quit("Huh!?");
		return '?';
		}
	case 'D':
		{
		switch (Bits & BIT_xD)
			{
		case BIT_MD:
			return 'M';
		case BIT_DD:
			return 'D';
			}
		Quit("Huh!?");
		return '?';
		}
	case 'I':
		{
		switch (Bits & BIT_xI)
			{
		case BIT_MI:
			return 'M';
		case BIT_II:
			return 'I';
			}
		Quit("Huh!?");
		return '?';
		}
#if	DOUBLE_AFFINE
	case 'E':
		{
		switch (Bits & BIT_xE)
			{
		case BIT_ME:
			return 'M';
		case BIT_EE:
			return 'E';
			}
		Quit("Huh!?");
		return '?';
		}
	case 'J':
		{
		switch (Bits & BIT_xJ)
			{
		case BIT_MJ:
			return 'M';
		case BIT_JJ:
			return 'J';
			}
		Quit("Huh!?");
		return '?';
		}
#endif
	default:
		Quit("Huh?");
		return '?';
		}
	}

void BitTraceBack(char **TraceBack, unsigned uLengthA, unsigned uLengthB,
  char LastEdge, PWPath &Path)
	{
#if	TRACE
	Log("BitTraceBack\n");
#endif
	Path.Clear();

	PWEdge Edge;
	Edge.uPrefixLengthA = uLengthA;
	Edge.uPrefixLengthB = uLengthB;
	char Bits = TraceBack[uLengthA][uLengthB];
	Edge.cType = LastEdge;
	for (;;)
		{
#if	TRACE
		Log("Prepend %c%d.%d\n", Edge.cType, Edge.uPrefixLengthA, Edge.uPrefixLengthB);
#endif
		char cSave = Edge.cType;
		Edge.cType = XlatEdgeType(cSave);
		Path.PrependEdge(Edge);
		Edge.cType = cSave;

		unsigned PLA = Edge.uPrefixLengthA;
		unsigned PLB = Edge.uPrefixLengthB;
		char Bits = TraceBack[PLA][PLB];
		char NextEdgeType = XChar(Bits, Edge.cType);
#if	TRACE
		Log("XChar(%s, %c) = %c\n", BitsToStr(Bits), Edge.cType, NextEdgeType);
#endif
		switch (Edge.cType)
			{
		case 'M':
			{
			if (Edge.uPrefixLengthA == 0)
				Quit("BitTraceBack MA=0");
			if (Edge.uPrefixLengthB == 0)
				Quit("BitTraceBack MA=0");
			--(Edge.uPrefixLengthA);
			--(Edge.uPrefixLengthB);
			break;
			}
		case 'D':
		case 'E':
			{
			if (Edge.uPrefixLengthA == 0)
				Quit("BitTraceBack DA=0");
			--(Edge.uPrefixLengthA);
			break;
			}
		case 'I':
		case 'J':
			{
			if (Edge.uPrefixLengthB == 0)
				Quit("BitTraceBack IB=0");
			--(Edge.uPrefixLengthB);
			break;
			}
		default:
			Quit("BitTraceBack: Invalid edge %c", Edge);
			}

		if (0 == Edge.uPrefixLengthA && 0 == Edge.uPrefixLengthB)
			break;

		Edge.cType = NextEdgeType;
		}

#if	TRACE
	Path.LogMe();
#endif
	}
@@ -0,0 +1,28 @@
#include "muscle.h"

int BLOSUM62[20][20] =
    {
//    A   C   D   E   F   G   H   I   K   L   M   N   P   Q   R   S   T   V   W   Y 
    { 4,  0, -2, -1, -2,  0, -2, -1, -1, -1, -1, -2, -1, -1, -1,  1,  0,  0, -3, -2},  // A
    { 0,  9, -3, -4, -2, -3, -3, -1, -3, -1, -1, -3, -3, -3, -3, -1, -1, -1, -2, -2},  // C
    {-2, -3,  6,  2, -3, -1, -1, -3, -1, -4, -3,  1, -1,  0, -2,  0, -1, -3, -4, -3},  // D
    {-1, -4,  2,  5, -3, -2,  0, -3,  1, -3, -2,  0, -1,  2,  0,  0, -1, -2, -3, -2},  // E
    {-2, -2, -3, -3,  6, -3, -1,  0, -3,  0,  0, -3, -4, -3, -3, -2, -2, -1,  1,  3},  // F
    { 0, -3, -1, -2, -3,  6, -2, -4, -2, -4, -3,  0, -2, -2, -2,  0, -2, -3, -2, -3},  // G
    {-2, -3, -1,  0, -1, -2,  8, -3, -1, -3, -2,  1, -2,  0,  0, -1, -2, -3, -2,  2},  // H
    {-1, -1, -3, -3,  0, -4, -3,  4, -3,  2,  1, -3, -3, -3, -3, -2, -1,  3, -3, -1},  // I
    {-1, -3, -1,  1, -3, -2, -1, -3,  5, -2, -1,  0, -1,  1,  2,  0, -1, -2, -3, -2},  // K
    {-1, -1, -4, -3,  0, -4, -3,  2, -2,  4,  2, -3, -3, -2, -2, -2, -1,  1, -2, -1},  // L
    {-1, -1, -3, -2,  0, -3, -2,  1, -1,  2,  5, -2, -2,  0, -1, -1, -1,  1, -1, -1},  // M
    {-2, -3,  1,  0, -3,  0,  1, -3,  0, -3, -2,  6, -2,  0,  0,  1,  0, -3, -4, -2},  // N
    {-1, -3, -1, -1, -4, -2, -2, -3, -1, -3, -2, -2,  7, -1, -2, -1, -1, -2, -4, -3},  // P
    {-1, -3,  0,  2, -3, -2,  0, -3,  1, -2,  0,  0, -1,  5,  1,  0, -1, -2, -2, -1},  // Q
    {-1, -3, -2,  0, -3, -2,  0, -3,  2, -2, -1,  0, -2,  1,  5, -1, -1, -3, -3, -2},  // R
    { 1, -1,  0,  0, -2,  0, -1, -2,  0, -2, -1,  1, -1,  0, -1,  4,  1, -2, -3, -2},  // S
    { 0, -1, -1, -1, -2, -2, -2, -1, -1, -1, -1,  0, -1, -1, -1,  1,  5,  0, -2, -2},  // T
    { 0, -1, -3, -2, -1, -3, -3,  3, -2,  1,  1, -3, -2, -2, -3, -2,  0,  4, -3, -1},  // V
    {-3, -2, -4, -3,  1, -2, -2, -3, -3, -2, -1, -4, -4, -2, -3, -3, -2, -3, 11,  2},  // W
    {-2, -2, -3, -2,  3, -3,  2, -1, -2, -1, -1, -2, -3, -1, -2, -2, -2, -1,  2,  7},  // Y
    };

double BLOSUM62_Expected = -0.5209;
@@ -0,0 +1,118 @@
#include "muscle.h"

#define GAPVAL		0.3
#define GAPGAPVAL	5.0

// Blosum62 log-average factor matrix
static float Blosum62LA[20][20] =
	{
#define v(x)	((float) x)
#define S_ROW(n, c, A, C, D, E, F, G, H, I, K, L, M, N, P, Q, R, S, T, V, W, Y) \
	{ v(A), v(C), v(D), v(E), v(F), v(G), v(H), v(I), v(K), v(L), v(M), v(N), v(P), v(Q), \
	v(R), v(S), v(T), v(V), v(W), v(Y) },

// Blosum62	log	average matrix
//				A			C			D			E			F
//				G			H			I			K			L
//				M			N			P			Q			R
//				S			T			V			W			Y
S_ROW( 0, 'A',  3.9029401,  0.8679881,  0.5446049,  0.7412640,  0.4648942, 
                1.0568696,  0.5693654,  0.6324813,  0.7753898,  0.6019460, 
                0.7231498,  0.5883077,  0.7541214,  0.7568035,  0.6126988, 
                1.4721037,  0.9844022,  0.9364584,  0.4165484,  0.5426125)

S_ROW( 1, 'C',  0.8679881, 19.5765802,  0.3014542,  0.2859347,  0.4389910, 
                0.4203886,  0.3550472,  0.6534589,  0.3491296,  0.6422760, 
                0.6113537,  0.3978026,  0.3795628,  0.3657796,  0.3089379, 
                0.7384148,  0.7405530,  0.7558448,  0.4499807,  0.4342013)

S_ROW( 2, 'D',  0.5446049,  0.3014542,  7.3979253,  1.6878109,  0.2989696, 
                0.6343015,  0.6785593,  0.3390155,  0.7840905,  0.2866128, 
                0.3464547,  1.5538520,  0.5987177,  0.8970811,  0.5732000, 
                0.9135051,  0.6947898,  0.3365004,  0.2321050,  0.3456829)

S_ROW( 3, 'E',  0.7412640,  0.2859347,  1.6878109,  5.4695276,  0.3307441, 
                0.4812675,  0.9600400,  0.3305223,  1.3082782,  0.3728734, 
                0.5003421,  0.9112983,  0.6792027,  1.9017376,  0.9607983, 
                0.9503570,  0.7414260,  0.4289431,  0.3743021,  0.4964664)

S_ROW( 4, 'F',  0.4648942,  0.4389910,  0.2989696,  0.3307441,  8.1287983, 
                0.3406407,  0.6519893,  0.9457698,  0.3440433,  1.1545978, 
                1.0043715,  0.3542882,  0.2874440,  0.3339729,  0.3807263, 
                0.4399736,  0.4816930,  0.7450894,  1.3743775,  2.7693817)

S_ROW( 5, 'G',  1.0568696,  0.4203886,  0.6343015,  0.4812675,  0.3406407, 
                6.8763075,  0.4929663,  0.2750096,  0.5888716,  0.2845039, 
                0.3954865,  0.8637114,  0.4773858,  0.5386498,  0.4499840, 
                0.9035965,  0.5792712,  0.3369551,  0.4216898,  0.3487141)

S_ROW( 6, 'H',  0.5693654,  0.3550472,  0.6785593,  0.9600400,  0.6519893, 
                0.4929663, 13.5060070,  0.3262878,  0.7788884,  0.3806759, 
                0.5841316,  1.2220028,  0.4728797,  1.1679835,  0.9170473, 
                0.7367319,  0.5575021,  0.3394474,  0.4440859,  1.7979036)

S_ROW( 7, 'I',  0.6324813,  0.6534589,  0.3390155,  0.3305223,  0.9457698, 
                0.2750096,  0.3262878,  3.9979299,  0.3963730,  1.6944349, 
                1.4777449,  0.3279345,  0.3846629,  0.3829375,  0.3547509, 
                0.4431634,  0.7798163,  2.4175121,  0.4088732,  0.6303898)

S_ROW( 8, 'K',  0.7753898,  0.3491296,  0.7840905,  1.3082782,  0.3440433, 
                0.5888716,  0.7788884,  0.3963730,  4.7643359,  0.4282702, 
                0.6253033,  0.9398419,  0.7037741,  1.5543233,  2.0768092, 
                0.9319192,  0.7929060,  0.4565429,  0.3589319,  0.5321784)

S_ROW( 9, 'L',  0.6019460,  0.6422760,  0.2866128,  0.3728734,  1.1545978, 
                0.2845039,  0.3806759,  1.6944349,  0.4282702,  3.7966214, 
                1.9942957,  0.3100430,  0.3711219,  0.4773261,  0.4739194, 
                0.4288939,  0.6603292,  1.3142355,  0.5680359,  0.6920589)

S_ROW(10, 'M',  0.7231498,  0.6113537,  0.3464547,  0.5003421,  1.0043715, 
                0.3954865,  0.5841316,  1.4777449,  0.6253033,  1.9942957, 
                6.4814549,  0.4745299,  0.4238960,  0.8642486,  0.6226249, 
                0.5985578,  0.7938018,  1.2689365,  0.6103022,  0.7083636)

S_ROW(11, 'N',  0.5883077,  0.3978026,  1.5538520,  0.9112983,  0.3542882, 
                0.8637114,  1.2220028,  0.3279345,  0.9398419,  0.3100430, 
                0.4745299,  7.0940964,  0.4999337,  1.0005835,  0.8586298, 
                1.2315289,  0.9841525,  0.3690340,  0.2777841,  0.4860309)

S_ROW(12, 'P',  0.7541214,  0.3795628,  0.5987177,  0.6792027,  0.2874440, 
                0.4773858,  0.4728797,  0.3846629,  0.7037741,  0.3711219, 
                0.4238960,  0.4999337, 12.8375452,  0.6412803,  0.4815348, 
                0.7555033,  0.6888962,  0.4430825,  0.2818321,  0.3635216)

S_ROW(13, 'Q',  0.7568035,  0.3657796,  0.8970811,  1.9017376,  0.3339729, 
                0.5386498,  1.1679835,  0.3829375,  1.5543233,  0.4773261, 
                0.8642486,  1.0005835,  0.6412803,  6.2444210,  1.4057958, 
                0.9655559,  0.7913219,  0.4667781,  0.5093584,  0.6110951)

S_ROW(14, 'R',  0.6126988,  0.3089379,  0.5732000,  0.9607983,  0.3807263, 
                0.4499840,  0.9170473,  0.3547509,  2.0768092,  0.4739194, 
                0.6226249,  0.8586298,  0.4815348,  1.4057958,  6.6655769, 
                0.7671661,  0.6777544,  0.4200721,  0.3951049,  0.5559652)

S_ROW(15, 'S',  1.4721037,  0.7384148,  0.9135051,  0.9503570,  0.4399736, 
                0.9035965,  0.7367319,  0.4431634,  0.9319192,  0.4288939, 
                0.5985578,  1.2315289,  0.7555033,  0.9655559,  0.7671661, 
                3.8428476,  1.6139205,  0.5652240,  0.3853031,  0.5575206)

S_ROW(16, 'T',  0.9844022,  0.7405530,  0.6947898,  0.7414260,  0.4816930, 
                0.5792712,  0.5575021,  0.7798163,  0.7929060,  0.6603292, 
                0.7938018,  0.9841525,  0.6888962,  0.7913219,  0.6777544, 
                1.6139205,  4.8321048,  0.9809432,  0.4309317,  0.5731577)

S_ROW(17, 'V',  0.9364584,  0.7558448,  0.3365004,  0.4289431,  0.7450894, 
                0.3369551,  0.3394474,  2.4175121,  0.4565429,  1.3142355, 
                1.2689365,  0.3690340,  0.4430825,  0.4667781,  0.4200721, 
                0.5652240,  0.9809432,  3.6921553,  0.3744576,  0.6580390)

S_ROW(18, 'W',  0.4165484,  0.4499807,  0.2321050,  0.3743021,  1.3743775, 
                0.4216898,  0.4440859,  0.4088732,  0.3589319,  0.5680359, 
                0.6103022,  0.2777841,  0.2818321,  0.5093584,  0.3951049, 
                0.3853031,  0.4309317,  0.3744576, 38.1077830,  2.1098056)

S_ROW(19, 'Y',  0.5426125,  0.4342013,  0.3456829,  0.4964664,  2.7693817, 
                0.3487141,  1.7979036,  0.6303898,  0.5321784,  0.6920589, 
                0.7083636,  0.4860309,  0.3635216,  0.6110951,  0.5559652, 
                0.5575206,  0.5731577,  0.6580390,  2.1098056,  9.8322054)
	};
@@ -0,0 +1,666 @@
#include "muscle.h"
#include "clust.h"
#include "clustset.h"
#include <stdio.h>

#define TRACE		0

Clust::Clust()
	{
	m_Nodes = 0;
	m_uNodeCount = 0;
	m_uLeafCount = 0;
	m_uClusterCount = 0;
	m_JoinStyle = JOIN_Undefined;
	m_dDist = 0;
	m_uLeafCount = 0;
	m_ptrSet = 0;
	}

Clust::~Clust()
	{
	delete[] m_Nodes;
	delete[] m_dDist;
	delete[] m_ClusterIndexToNodeIndex;
	}

void Clust::Create(ClustSet &Set, CLUSTER Method)
	{
	m_ptrSet = &Set;

	SetLeafCount(Set.GetLeafCount());

	switch (Method)
		{
	case CLUSTER_UPGMA:
		m_JoinStyle = JOIN_NearestNeighbor;
		m_CentroidStyle = LINKAGE_Avg;
		break;

	case CLUSTER_UPGMAMax:
		m_JoinStyle = JOIN_NearestNeighbor;
		m_CentroidStyle = LINKAGE_Max;
		break;

	case CLUSTER_UPGMAMin:
		m_JoinStyle = JOIN_NearestNeighbor;
		m_CentroidStyle = LINKAGE_Min;
		break;

	case CLUSTER_UPGMB:
		m_JoinStyle = JOIN_NearestNeighbor;
		m_CentroidStyle = LINKAGE_Biased;
		break;

	case CLUSTER_NeighborJoining:
		m_JoinStyle = JOIN_NeighborJoining;
		m_CentroidStyle = LINKAGE_NeighborJoining;
		break;

	default:
		Quit("Clust::Create, invalid method %d", Method);
		}

	if (m_uLeafCount <= 1)
		Quit("Clust::Create: no leaves");

	m_uNodeCount = 2*m_uLeafCount - 1;
	m_Nodes = new ClustNode[m_uNodeCount];
	m_ClusterIndexToNodeIndex = new unsigned[m_uLeafCount];

	m_ptrClusterList = 0;
	for (unsigned uNodeIndex = 0; uNodeIndex < m_uNodeCount; ++uNodeIndex)
		{
		ClustNode &Node = m_Nodes[uNodeIndex];
		Node.m_uIndex = uNodeIndex;
		if (uNodeIndex < m_uLeafCount)
			{
			Node.m_uSize = 1;
			Node.m_uLeafIndexes = new unsigned[1];
			Node.m_uLeafIndexes[0] = uNodeIndex;
			AddToClusterList(uNodeIndex);
			}
		else
			Node.m_uSize = 0;
		}

// Compute initial distance matrix between leaves
	SetProgressDesc("Build dist matrix");
	unsigned uPairIndex = 0;
	const unsigned uPairCount = (m_uLeafCount*(m_uLeafCount - 1))/2;
	for (unsigned i = 0; i < m_uLeafCount; ++i)
		for (unsigned j = 0; j < i; ++j)
			{
			const float dDist = (float) m_ptrSet->ComputeDist(*this, i, j);
			SetDist(i, j, dDist);
			if (0 == uPairIndex%10000)
				Progress(uPairIndex, uPairCount);
			++uPairIndex;
			}
	ProgressStepsDone();

// Call CreateCluster once for each internal node in the tree
	SetProgressDesc("Build guide tree");
	m_uClusterCount = m_uLeafCount;
	const unsigned uInternalNodeCount = m_uNodeCount - m_uLeafCount;
	for (unsigned uNodeIndex = m_uLeafCount; uNodeIndex < m_uNodeCount; ++uNodeIndex)
		{
		unsigned i = uNodeIndex + 1 - m_uLeafCount;
		Progress(i, uInternalNodeCount);
		CreateCluster();
		}
	ProgressStepsDone();
	}

void Clust::CreateCluster()
	{
	unsigned uLeftNodeIndex;
	unsigned uRightNodeIndex;
	float dLeftLength;
	float dRightLength;
	ChooseJoin(&uLeftNodeIndex, &uRightNodeIndex, &dLeftLength, &dRightLength);

	const unsigned uNewNodeIndex = m_uNodeCount - m_uClusterCount + 1;

	JoinNodes(uLeftNodeIndex, uRightNodeIndex, dLeftLength, dRightLength,
	  uNewNodeIndex);

#if	TRACE
	Log("Merge New=%u L=%u R=%u Ld=%7.2g Rd=%7.2g\n",
	  uNewNodeIndex, uLeftNodeIndex, uRightNodeIndex, dLeftLength, dRightLength);
#endif

// Compute distances to other clusters
	--m_uClusterCount;
	for (unsigned uNodeIndex = GetFirstCluster(); uNodeIndex != uInsane;
	  uNodeIndex = GetNextCluster(uNodeIndex))
		{
		if (uNodeIndex == uLeftNodeIndex || uNodeIndex == uRightNodeIndex)
			continue;

		if (uNewNodeIndex == uNodeIndex)
			continue;

		const float dDist = ComputeDist(uNewNodeIndex, uNodeIndex);
		SetDist(uNewNodeIndex, uNodeIndex, dDist);
		}

	for (unsigned uNodeIndex = GetFirstCluster(); uNodeIndex != uInsane;
	  uNodeIndex = GetNextCluster(uNodeIndex))
		{
		if (uNodeIndex == uLeftNodeIndex || uNodeIndex == uRightNodeIndex)
			continue;

		if (uNewNodeIndex == uNodeIndex)
			continue;

#if	REDLACK
		const float dMetric = ComputeMetric(uNewNodeIndex, uNodeIndex);
		InsertMetric(uNewNodeIndex, uNodeIndex, dMetric);
#endif
		}
	}

void Clust::ChooseJoin(unsigned *ptruLeftIndex, unsigned *ptruRightIndex,
  float *ptrdLeftLength, float *ptrdRightLength)
	{
	switch (m_JoinStyle)
		{
	case JOIN_NearestNeighbor:
		ChooseJoinNearestNeighbor(ptruLeftIndex, ptruRightIndex, ptrdLeftLength,
		  ptrdRightLength);
		return;
	case JOIN_NeighborJoining:
		ChooseJoinNeighborJoining(ptruLeftIndex, ptruRightIndex, ptrdLeftLength,
		  ptrdRightLength);
		return;
		}
	Quit("Clust::ChooseJoin, Invalid join style %u", m_JoinStyle);
	}

void Clust::ChooseJoinNearestNeighbor(unsigned *ptruLeftIndex,
  unsigned *ptruRightIndex, float *ptrdLeftLength, float *ptrdRightLength)
	{
	const unsigned uClusterCount = GetClusterCount();

	unsigned uMinLeftNodeIndex;
	unsigned uMinRightNodeIndex;
	GetMinMetric(&uMinLeftNodeIndex, &uMinRightNodeIndex);

	float dMinDist = GetDist(uMinLeftNodeIndex, uMinRightNodeIndex);

	const float dLeftHeight = GetHeight(uMinLeftNodeIndex);
	const float dRightHeight = GetHeight(uMinRightNodeIndex);

	*ptruLeftIndex = uMinLeftNodeIndex;
	*ptruRightIndex = uMinRightNodeIndex;
	*ptrdLeftLength = dMinDist/2 - dLeftHeight;
	*ptrdRightLength = dMinDist/2 - dRightHeight;
	}

void Clust::ChooseJoinNeighborJoining(unsigned *ptruLeftIndex,
  unsigned *ptruRightIndex, float *ptrdLeftLength, float *ptrdRightLength)
	{
	const unsigned uClusterCount = GetClusterCount();

	//unsigned uMinLeftNodeIndex = uInsane;
	//unsigned uMinRightNodeIndex = uInsane;
	//float dMinD = PLUS_INFINITY;
	//for (unsigned i = GetFirstCluster(); i != uInsane; i = GetNextCluster(i))
	//	{
	//	const float ri = Calc_r(i);
	//	for (unsigned j = GetNextCluster(i); j != uInsane; j = GetNextCluster(j))
	//		{
	//		const float rj = Calc_r(j);
	//		const float dij = GetDist(i, j);
	//		const float Dij = dij - (ri + rj);
	//		if (Dij < dMinD)
	//			{
	//			dMinD = Dij;
	//			uMinLeftNodeIndex = i;
	//			uMinRightNodeIndex = j;
	//			}
	//		}
	//	}

	unsigned uMinLeftNodeIndex;
	unsigned uMinRightNodeIndex;
	GetMinMetric(&uMinLeftNodeIndex, &uMinRightNodeIndex);

	const float dDistLR = GetDist(uMinLeftNodeIndex, uMinRightNodeIndex);
	const float rL = Calc_r(uMinLeftNodeIndex);
	const float rR = Calc_r(uMinRightNodeIndex);

	const float dLeftLength = (dDistLR + rL - rR)/2;
	const float dRightLength = (dDistLR - rL + rR)/2;

	*ptruLeftIndex = uMinLeftNodeIndex;
	*ptruRightIndex = uMinRightNodeIndex;
	*ptrdLeftLength = dLeftLength;
	*ptrdRightLength = dRightLength;
	}

void Clust::JoinNodes(unsigned uLeftIndex, unsigned uRightIndex, float dLeftLength,
  float dRightLength, unsigned uNodeIndex)
	{
	ClustNode &Parent = m_Nodes[uNodeIndex];
	ClustNode &Left = m_Nodes[uLeftIndex];
	ClustNode &Right = m_Nodes[uRightIndex];

	Left.m_dLength = dLeftLength;
	Right.m_dLength = dRightLength;

	Parent.m_ptrLeft = &Left;
	Parent.m_ptrRight = &Right;

	Left.m_ptrParent = &Parent;
	Right.m_ptrParent = &Parent;

	const unsigned uLeftSize = Left.m_uSize;
	const unsigned uRightSize = Right.m_uSize;
	const unsigned uParentSize = uLeftSize + uRightSize;
	Parent.m_uSize = uParentSize;

	assert(0 == Parent.m_uLeafIndexes);
	Parent.m_uLeafIndexes = new unsigned[uParentSize];

	const unsigned uLeftBytes = uLeftSize*sizeof(unsigned);
	const unsigned uRightBytes = uRightSize*sizeof(unsigned);
	memcpy(Parent.m_uLeafIndexes, Left.m_uLeafIndexes, uLeftBytes);
	memcpy(Parent.m_uLeafIndexes + uLeftSize, Right.m_uLeafIndexes, uRightBytes);

	DeleteFromClusterList(uLeftIndex);
	DeleteFromClusterList(uRightIndex);
	AddToClusterList(uNodeIndex);
	}

float Clust::Calc_r(unsigned uNodeIndex) const
	{
	const unsigned uClusterCount = GetClusterCount();
	if (2 == uClusterCount)
		return 0;

	float dSum = 0;
	for (unsigned i = GetFirstCluster(); i != uInsane; i = GetNextCluster(i))
		{
		if (i == uNodeIndex)
			continue;
		dSum += GetDist(uNodeIndex, i);
		}
	return dSum/(uClusterCount - 2);
	}

float Clust::ComputeDist(unsigned uNewNodeIndex, unsigned uNodeIndex)
	{
	switch (m_CentroidStyle)
		{
	case LINKAGE_Avg:
		return ComputeDistAverageLinkage(uNewNodeIndex, uNodeIndex);

	case LINKAGE_Min:
		return ComputeDistMinLinkage(uNewNodeIndex, uNodeIndex);

	case LINKAGE_Max:
		return ComputeDistMaxLinkage(uNewNodeIndex, uNodeIndex);

	case LINKAGE_Biased:
		return ComputeDistMAFFT(uNewNodeIndex, uNodeIndex);

	case LINKAGE_NeighborJoining:
		return ComputeDistNeighborJoining(uNewNodeIndex, uNodeIndex);
		}
	Quit("Clust::ComputeDist, invalid centroid style %u", m_CentroidStyle);
	return (float) g_dNAN;
	}

float Clust::ComputeDistMinLinkage(unsigned uNewNodeIndex, unsigned uNodeIndex)
	{
	const unsigned uLeftNodeIndex = GetLeftIndex(uNewNodeIndex);
	const unsigned uRightNodeIndex = GetRightIndex(uNewNodeIndex);
	const float dDistL = GetDist(uLeftNodeIndex, uNodeIndex);
	const float dDistR = GetDist(uRightNodeIndex, uNodeIndex);
	return (dDistL < dDistR ? dDistL : dDistR);
	}

float Clust::ComputeDistMaxLinkage(unsigned uNewNodeIndex, unsigned uNodeIndex)
	{
	const unsigned uLeftNodeIndex = GetLeftIndex(uNewNodeIndex);
	const unsigned uRightNodeIndex = GetRightIndex(uNewNodeIndex);
	const float dDistL = GetDist(uLeftNodeIndex, uNodeIndex);
	const float dDistR = GetDist(uRightNodeIndex, uNodeIndex);
	return (dDistL > dDistR ? dDistL : dDistR);
	}

float Clust::ComputeDistAverageLinkage(unsigned uNewNodeIndex, unsigned uNodeIndex)
	{
	const unsigned uLeftNodeIndex = GetLeftIndex(uNewNodeIndex);
	const unsigned uRightNodeIndex = GetRightIndex(uNewNodeIndex);
	const float dDistL = GetDist(uLeftNodeIndex, uNodeIndex);
	const float dDistR = GetDist(uRightNodeIndex, uNodeIndex);
	return (dDistL + dDistR)/2;
	}

float Clust::ComputeDistNeighborJoining(unsigned uNewNodeIndex, unsigned uNodeIndex)
	{
	const unsigned uLeftNodeIndex = GetLeftIndex(uNewNodeIndex);
	const unsigned uRightNodeIndex = GetRightIndex(uNewNodeIndex);
	const float dDistLR = GetDist(uLeftNodeIndex, uRightNodeIndex);
	const float dDistL = GetDist(uLeftNodeIndex, uNodeIndex);
	const float dDistR = GetDist(uRightNodeIndex, uNodeIndex);
	const float dDist = (dDistL + dDistR - dDistLR)/2;
	return dDist;
	}

// This is a mysterious variant of UPGMA reverse-engineered from MAFFT source.
float Clust::ComputeDistMAFFT(unsigned uNewNodeIndex, unsigned uNodeIndex)
	{
	const unsigned uLeftNodeIndex = GetLeftIndex(uNewNodeIndex);
	const unsigned uRightNodeIndex = GetRightIndex(uNewNodeIndex);

	const float dDistLR = GetDist(uLeftNodeIndex, uRightNodeIndex);
	const float dDistL = GetDist(uLeftNodeIndex, uNodeIndex);
	const float dDistR = GetDist(uRightNodeIndex, uNodeIndex);
	const float dMinDistLR = (dDistL < dDistR ? dDistL : dDistR);
	const float dSumDistLR = dDistL + dDistR;
	const float dDist = dMinDistLR*(1 - g_dSUEFF) + dSumDistLR*g_dSUEFF/2;
	return dDist;
	}

unsigned Clust::GetClusterCount() const
	{
	return m_uClusterCount;
	}

void Clust::LogMe() const
	{
	Log("Clust %u leaves, %u nodes, %u clusters.\n",
	  m_uLeafCount, m_uNodeCount, m_uClusterCount);

	Log("Distance matrix\n");
	const unsigned uNodeCount = GetNodeCount();
	Log("       ");
	for (unsigned i = 0; i < uNodeCount - 1; ++i)
		Log(" %7u", i);
	Log("\n");

	Log("       ");
	for (unsigned i = 0; i < uNodeCount - 1; ++i)
		Log("  ------");
	Log("\n");

	for (unsigned i = 0; i < uNodeCount - 1; ++i)
		{
		Log("%4u:  ", i);
		for (unsigned j = 0; j < i; ++j)
			Log(" %7.2g", GetDist(i, j));
		Log("\n");
		}

	Log("\n");
	Log("Node  Size  Prnt  Left  Rght   Length  Name\n");
	Log("----  ----  ----  ----  ----   ------  ----\n");
	for (unsigned uNodeIndex = 0; uNodeIndex < m_uNodeCount; ++uNodeIndex)
		{
		const ClustNode &Node = m_Nodes[uNodeIndex];
		Log("%4u  %4u", uNodeIndex, Node.m_uSize);
		if (0 != Node.m_ptrParent)
			Log("  %4u", Node.m_ptrParent->m_uIndex);
		else
			Log("      ");

		if (0 != Node.m_ptrLeft)
			Log("  %4u", Node.m_ptrLeft->m_uIndex);
		else
			Log("      ");

		if (0 != Node.m_ptrRight)
			Log("  %4u", Node.m_ptrRight->m_uIndex);
		else
			Log("      ");

		if (uNodeIndex != m_uNodeCount - 1)
			Log("  %7.3g", Node.m_dLength);
		if (IsLeaf(uNodeIndex))
			{
			const char *ptrName = GetNodeName(uNodeIndex);
			if (0 != ptrName)
				Log("  %s", ptrName);
			}
		if (GetRootNodeIndex() == uNodeIndex)
			Log("    [ROOT]");
		Log("\n");
		}
	}

const ClustNode &Clust::GetNode(unsigned uNodeIndex) const
	{
	if (uNodeIndex >= m_uNodeCount)
		Quit("ClustNode::GetNode(%u) %u", uNodeIndex, m_uNodeCount);
	return m_Nodes[uNodeIndex];
	}

bool Clust::IsLeaf(unsigned uNodeIndex) const
	{
	return uNodeIndex < m_uLeafCount;
	}

unsigned Clust::GetClusterSize(unsigned uNodeIndex) const
	{
	const ClustNode &Node = GetNode(uNodeIndex);
	return Node.m_uSize;
	}

unsigned Clust::GetLeftIndex(unsigned uNodeIndex) const
	{
	const ClustNode &Node = GetNode(uNodeIndex);
	if (0 == Node.m_ptrLeft)
		Quit("Clust::GetLeftIndex: leaf");
	return Node.m_ptrLeft->m_uIndex;
	}

unsigned Clust::GetRightIndex(unsigned uNodeIndex) const
	{
	const ClustNode &Node = GetNode(uNodeIndex);
	if (0 == Node.m_ptrRight)
		Quit("Clust::GetRightIndex: leaf");
	return Node.m_ptrRight->m_uIndex;
	}

float Clust::GetLength(unsigned uNodeIndex) const
	{
	const ClustNode &Node = GetNode(uNodeIndex);
	return Node.m_dLength;
	}

void Clust::SetLeafCount(unsigned uLeafCount)
	{
	if (uLeafCount <= 1)
		Quit("Clust::SetLeafCount(%u)", uLeafCount);

	m_uLeafCount = uLeafCount;
	const unsigned uNodeCount = GetNodeCount();

// Triangular matrix size excluding diagonal (all zeros in our case).
	m_uTriangularMatrixSize = (uNodeCount*(uNodeCount - 1))/2;
	m_dDist = new float[m_uTriangularMatrixSize];
	}

unsigned Clust::GetLeafCount() const
	{
	return m_uLeafCount;
	}

unsigned Clust::VectorIndex(unsigned uIndex1, unsigned uIndex2) const
	{
	const unsigned uNodeCount = GetNodeCount();
	if (uIndex1 >= uNodeCount || uIndex2 >= uNodeCount)
		Quit("DistVectorIndex(%u,%u) %u", uIndex1, uIndex2, uNodeCount);
	unsigned v;
	if (uIndex1 >= uIndex2)
		v = uIndex2 + (uIndex1*(uIndex1 - 1))/2;
	else
		v = uIndex1 + (uIndex2*(uIndex2 - 1))/2;
	assert(v < m_uTriangularMatrixSize);
	return v;
	}

float Clust::GetDist(unsigned uIndex1, unsigned uIndex2) const
	{
	unsigned v = VectorIndex(uIndex1, uIndex2);
	return m_dDist[v];
	}

void Clust::SetDist(unsigned uIndex1, unsigned uIndex2, float dDist)
	{
	unsigned v = VectorIndex(uIndex1, uIndex2);
	m_dDist[v] = dDist;
	}

float Clust::GetHeight(unsigned uNodeIndex) const
	{
	if (IsLeaf(uNodeIndex))
		return 0;

	const unsigned uLeftIndex = GetLeftIndex(uNodeIndex);
	const unsigned uRightIndex = GetRightIndex(uNodeIndex);
	const float dLeftLength = GetLength(uLeftIndex);
	const float dRightLength = GetLength(uRightIndex);
	const float dLeftHeight = dLeftLength + GetHeight(uLeftIndex);
	const float dRightHeight = dRightLength + GetHeight(uRightIndex);
	return (dLeftHeight + dRightHeight)/2;
	}

const char *Clust::GetNodeName(unsigned uNodeIndex) const
	{
	if (!IsLeaf(uNodeIndex))
		Quit("Clust::GetNodeName, is not leaf");
	return m_ptrSet->GetLeafName(uNodeIndex);
	}

unsigned Clust::GetNodeId(unsigned uNodeIndex) const
	{
	if (uNodeIndex >= GetLeafCount())
		return 0;
	return m_ptrSet->GetLeafId(uNodeIndex);
	}

unsigned Clust::GetLeaf(unsigned uNodeIndex, unsigned uLeafIndex) const
	{
	const ClustNode &Node = GetNode(uNodeIndex);
	const unsigned uLeafCount = Node.m_uSize;
	if (uLeafIndex >= uLeafCount)
		Quit("Clust::GetLeaf, invalid index");
	const unsigned uIndex = Node.m_uLeafIndexes[uLeafIndex];
	if (uIndex >= m_uNodeCount)
		Quit("Clust::GetLeaf, index out of range");
	return uIndex;
	}

unsigned Clust::GetFirstCluster() const
	{
	if (0 == m_ptrClusterList)
		return uInsane;
	return m_ptrClusterList->m_uIndex;
	}

unsigned Clust::GetNextCluster(unsigned uIndex) const
	{
	ClustNode *ptrNode = &m_Nodes[uIndex];
	if (0 == ptrNode->m_ptrNextCluster)
		return uInsane;
	return ptrNode->m_ptrNextCluster->m_uIndex;
	}

void Clust::DeleteFromClusterList(unsigned uNodeIndex)
	{
	assert(uNodeIndex < m_uNodeCount);
	ClustNode *ptrNode = &m_Nodes[uNodeIndex];
	ClustNode *ptrPrev = ptrNode->m_ptrPrevCluster;
	ClustNode *ptrNext = ptrNode->m_ptrNextCluster;

	if (0 != ptrNext)
		ptrNext->m_ptrPrevCluster = ptrPrev;
	if (0 == ptrPrev)
		{
		assert(m_ptrClusterList == ptrNode);
		m_ptrClusterList = ptrNext;
		}
	else
		ptrPrev->m_ptrNextCluster = ptrNext;

	ptrNode->m_ptrNextCluster = 0;
	ptrNode->m_ptrPrevCluster = 0;
	}

void Clust::AddToClusterList(unsigned uNodeIndex)
	{
	assert(uNodeIndex < m_uNodeCount);
	ClustNode *ptrNode = &m_Nodes[uNodeIndex];

	if (0 != m_ptrClusterList)
		m_ptrClusterList->m_ptrPrevCluster = ptrNode;

	ptrNode->m_ptrNextCluster = m_ptrClusterList;
	ptrNode->m_ptrPrevCluster = 0;

	m_ptrClusterList = ptrNode;
	}

float Clust::ComputeMetric(unsigned uIndex1, unsigned uIndex2) const
	{
	switch (m_JoinStyle)
		{
	case JOIN_NearestNeighbor:
		return ComputeMetricNearestNeighbor(uIndex1, uIndex2);

	case JOIN_NeighborJoining:
		return ComputeMetricNeighborJoining(uIndex1, uIndex2);
		}
	Quit("Clust::ComputeMetric");
	return 0;
	}

float Clust::ComputeMetricNeighborJoining(unsigned i, unsigned j) const
	{
	float ri = Calc_r(i);
	float rj = Calc_r(j);
	float dij = GetDist(i, j);
	float dMetric = dij - (ri + rj);
	return (float) dMetric;
	}

float Clust::ComputeMetricNearestNeighbor(unsigned i, unsigned j) const
	{
	return (float) GetDist(i, j);
	}

float Clust::GetMinMetricBruteForce(unsigned *ptruIndex1, unsigned *ptruIndex2) const
	{
	unsigned uMinLeftNodeIndex = uInsane;
	unsigned uMinRightNodeIndex = uInsane;
	float dMinMetric = PLUS_INFINITY;
	for (unsigned uLeftNodeIndex = GetFirstCluster(); uLeftNodeIndex != uInsane;
	  uLeftNodeIndex = GetNextCluster(uLeftNodeIndex))
		{
		for (unsigned uRightNodeIndex = GetNextCluster(uLeftNodeIndex);
		  uRightNodeIndex != uInsane;
		  uRightNodeIndex = GetNextCluster(uRightNodeIndex))
			{
			float dMetric = ComputeMetric(uLeftNodeIndex, uRightNodeIndex);
			if (dMetric < dMinMetric)
				{
				dMinMetric = dMetric;
				uMinLeftNodeIndex = uLeftNodeIndex;
				uMinRightNodeIndex = uRightNodeIndex;
				}
			}
		}
	*ptruIndex1 = uMinLeftNodeIndex;
	*ptruIndex2 = uMinRightNodeIndex;
	return dMinMetric;
	}

float Clust::GetMinMetric(unsigned *ptruIndex1, unsigned *ptruIndex2) const
	{
	return GetMinMetricBruteForce(ptruIndex1, ptruIndex2);
	}
@@ -0,0 +1,148 @@
#ifndef Clust_h
#define Clust_h

class Clust;
class ClustNode;
class ClustSet;
class Phylip;
class SortedNode;

const unsigned RB_NIL = ((unsigned) 0xfff0);

class ClustNode
	{
public:
	ClustNode()
		{
		m_uIndex = uInsane;
		m_uSize = uInsane;
		m_dLength = (float) dInsane;
		m_ptrLeft = 0;
		m_ptrRight = 0;
		m_ptrParent = 0;
		m_ptrNextCluster = 0;
		m_ptrPrevCluster = 0;
		m_uLeafIndexes = 0;
		}
	~ClustNode()
		{
		delete[] m_uLeafIndexes;
		}
	unsigned m_uIndex;
	unsigned m_uSize;
	float m_dLength;
	ClustNode *m_ptrLeft;
	ClustNode *m_ptrRight;
	ClustNode *m_ptrParent;
	ClustNode *m_ptrNextCluster;
	ClustNode *m_ptrPrevCluster;
	unsigned *m_uLeafIndexes;
	};

class Clust
	{
public:
	Clust();
	virtual ~Clust();

	void Create(ClustSet &Set, CLUSTER Method);

	unsigned GetLeafCount() const;

	unsigned GetClusterCount() const;
	unsigned GetClusterSize(unsigned uNodeIndex) const;
	unsigned GetLeaf(unsigned uClusterIndex, unsigned uLeafIndex) const;

	unsigned GetNodeCount() const { return 2*m_uLeafCount - 1; }
	const ClustNode &GetRoot() const { return m_Nodes[GetRootNodeIndex()]; }
	unsigned GetRootNodeIndex() const { return m_uNodeCount - 1; }

	const ClustNode &GetNode(unsigned uNodeIndex) const;
	bool IsLeaf(unsigned uNodeIndex) const;
	unsigned GetLeftIndex(unsigned uNodeIndex) const;
	unsigned GetRightIndex(unsigned uNodeIndex) const;
	float GetLength(unsigned uNodeIndex) const;
	float GetHeight(unsigned uNodeIndex) const;
	const char *GetNodeName(unsigned uNodeIndex) const;
	unsigned GetNodeId(unsigned uNodeIndex) const;

	JOIN GetJoinStyle() const { return m_JoinStyle; }
	LINKAGE GetCentroidStyle() const { return m_CentroidStyle; }

	void SetDist(unsigned uIndex1, unsigned uIndex2, float dDist);
	float GetDist(unsigned uIndex1, unsigned uIndex2) const;

	void ToPhylip(Phylip &tree);

	void LogMe() const;

//private:
	void SetLeafCount(unsigned uLeafCount);

	void CreateCluster();
	void JoinNodes(unsigned uLeftNodeIndex, unsigned uRightNodeIndex, 
	  float dLeftLength, float dRightLength, unsigned uNewNodeIndex);

	void ChooseJoin(unsigned *ptruLeftIndex, unsigned *ptruRightIndex,
	  float *ptrdLeftLength, float *ptrdRightLength);
	void ChooseJoinNeighborJoining(unsigned *ptruLeftIndex, unsigned *ptruRightIndex,
	  float *ptrdLeftLength, float *ptrdRightLength);
	void ChooseJoinNearestNeighbor(unsigned *ptruLeftIndex, unsigned *ptruRightIndex,
	  float *ptrdLeftLength, float *ptrdRightLength);

	float ComputeDist(unsigned uNewNodeIndex, unsigned uNodeIndex);
	float ComputeDistAverageLinkage(unsigned uNewNodeIndex, unsigned uNodeIndex);
	float ComputeDistMinLinkage(unsigned uNewNodeIndex, unsigned uNodeIndex);
	float ComputeDistMaxLinkage(unsigned uNewNodeIndex, unsigned uNodeIndex);
	float ComputeDistNeighborJoining(unsigned uNewNewIndex, unsigned uNodeIndex);
	float ComputeDistMAFFT(unsigned uNewNewIndex, unsigned uNodeIndex);

	float Calc_r(unsigned uNodeIndex) const;

	unsigned VectorIndex(unsigned uIndex1, unsigned uIndex2) const;

	unsigned GetFirstCluster() const;
	unsigned GetNextCluster(unsigned uNodeIndex) const;

	float ComputeMetric(unsigned uIndex1, unsigned uIndex2) const;
	float ComputeMetricNearestNeighbor(unsigned i, unsigned j) const;
	float ComputeMetricNeighborJoining(unsigned i, unsigned j) const;

	void InitMetric(unsigned uMaxNodeIndex);
	void InsertMetric(unsigned uIndex1, unsigned uIndex2, float dMetric);
	float GetMinMetric(unsigned *ptruIndex1, unsigned *ptruIndex2) const;
	float GetMinMetricBruteForce(unsigned *ptruIndex1, unsigned *ptruIndex2) const;
	void DeleteMetric(unsigned uIndex);
	void DeleteMetric(unsigned uIndex1, unsigned uIndex2);
	void ListMetric() const;

	void DeleteFromClusterList(unsigned uNodeIndex);
	void AddToClusterList(unsigned uNodeIndex);

	void RBDelete(unsigned RBNode);
	unsigned RBInsert(unsigned i, unsigned j, float fMetric);

	unsigned RBNext(unsigned RBNode) const;
	unsigned RBPrev(unsigned RBNode) const;
	unsigned RBMin(unsigned RBNode) const;
	unsigned RBMax(unsigned RBNode) const;

	void ValidateRB(const char szMsg[] = 0) const;
	void ValidateRBNode(unsigned Node, const char szMsg[]) const;

//private:
	JOIN m_JoinStyle;
	LINKAGE m_CentroidStyle;
	ClustNode *m_Nodes;
	unsigned *m_ClusterIndexToNodeIndex;
	unsigned *m_NodeIndexToClusterIndex;
	unsigned m_uLeafCount;
	unsigned m_uNodeCount;
	unsigned m_uClusterCount;
	unsigned m_uTriangularMatrixSize;
	float *m_dDist;
	ClustSet *m_ptrSet;
	ClustNode *m_ptrClusterList;
	};

#endif // Clust_h
@@ -0,0 +1,339 @@
#include "muscle.h"
#include "cluster.h"
#include "distfunc.h"

static inline float Min(float d1, float d2)
	{
	return d1 < d2 ? d1 : d2;
	}

static inline float Max(float d1, float d2)
	{
	return d1 > d2 ? d1 : d2;
	}

static inline float Mean(float d1, float d2)
	{
	return (float) ((d1 + d2)/2.0);
	}

#if	_DEBUG
void ClusterTree::Validate(unsigned uNodeCount)
	{
	unsigned n;
	ClusterNode *pNode;
	unsigned uDisjointListCount = 0;
	for (pNode = m_ptrDisjoints; pNode; pNode = pNode->GetNextDisjoint())
		{
		ClusterNode *pPrev = pNode->GetPrevDisjoint();
		ClusterNode *pNext = pNode->GetNextDisjoint();
		if (0 != pPrev)
			{
			if (pPrev->GetNextDisjoint() != pNode)
				{
				Log("Prev->This mismatch, prev=\n");
				pPrev->LogMe();
				Log("This=\n");
				pNode->LogMe();
				Quit("ClusterTree::Validate()");
				}
			}
		else
			{
			if (pNode != m_ptrDisjoints)
				{
				Log("[%u]->prev = 0 but != m_ptrDisjoints=%d\n",
				  pNode->GetIndex(),
				  m_ptrDisjoints ? m_ptrDisjoints->GetIndex() : 0xffffffff);
				pNode->LogMe();
				Quit("ClusterTree::Validate()");
				}
			}
		if (0 != pNext)
			{
			if (pNext->GetPrevDisjoint() != pNode)
				{
				Log("Next->This mismatch, next=\n");
				pNext->LogMe();
				Log("This=\n");
				pNode->LogMe();
				Quit("ClusterTree::Validate()");
				}
			}
		++uDisjointListCount;
		if (uDisjointListCount > m_uNodeCount)
			Quit("Loop in disjoint list");
		}

	unsigned uParentlessNodeCount = 0;
	for (n = 0; n < uNodeCount; ++n)
		if (0 == m_Nodes[n].GetParent())
			++uParentlessNodeCount;
	
	if (uDisjointListCount != uParentlessNodeCount)
		Quit("Disjoints = %u Parentless = %u\n", uDisjointListCount,
		  uParentlessNodeCount);
	}
#else	// !_DEBUG
#define	Validate(uNodeCount)	// empty
#endif

void ClusterNode::LogMe() const
	{
	unsigned uClusterSize = GetClusterSize();
	Log("[%02u] w=%5.3f  CW=%5.3f  LBW=%5.3f  RBW=%5.3f  LWT=%5.3f  RWT=%5.3f  L=%02d  R=%02d  P=%02d  NxDj=%02d  PvDj=%02d  Sz=%02d  {",
		m_uIndex,
		m_dWeight,
		GetClusterWeight(),
		GetLeftBranchWeight(),
		GetRightBranchWeight(),
		GetLeftWeight(),
		GetRightWeight(),
		m_ptrLeft ? m_ptrLeft->GetIndex() : 0xffffffff,
		m_ptrRight ? m_ptrRight->GetIndex() : 0xffffffff,
		m_ptrParent ? m_ptrParent->GetIndex() : 0xffffffff,
		m_ptrNextDisjoint ? m_ptrNextDisjoint->GetIndex() : 0xffffffff,
		m_ptrPrevDisjoint ? m_ptrPrevDisjoint->GetIndex() : 0xffffffff,
		uClusterSize);
	for (unsigned i = 0; i < uClusterSize; ++i)
		Log(" %u", GetClusterLeaf(i)->GetIndex());
	Log(" }\n");
	}

// How many leaves in the sub-tree under this node?
unsigned ClusterNode::GetClusterSize() const
	{
	unsigned uLeafCount = 0;

	if (0 == m_ptrLeft && 0 == m_ptrRight)
		return 1;

	if (0 != m_ptrLeft)
		uLeafCount += m_ptrLeft->GetClusterSize();
	if (0 != m_ptrRight)
		uLeafCount += m_ptrRight->GetClusterSize();
	assert(uLeafCount > 0);
	return uLeafCount;
	}

double ClusterNode::GetClusterWeight() const
	{
	double dWeight = 0.0;
	if (0 != m_ptrLeft)
		dWeight += m_ptrLeft->GetClusterWeight();
	if (0 != m_ptrRight)
		dWeight += m_ptrRight->GetClusterWeight();
	return dWeight + GetWeight();
	}

double ClusterNode::GetLeftBranchWeight() const
	{
	const ClusterNode *ptrLeft = GetLeft();
	if (0 == ptrLeft)
		return 0.0;

	return GetWeight() - ptrLeft->GetWeight();
	}

double ClusterNode::GetRightBranchWeight() const
	{
	const ClusterNode *ptrRight = GetRight();
	if (0 == ptrRight)
		return 0.0;

	return GetWeight() - ptrRight->GetWeight();
	}

double ClusterNode::GetRightWeight() const
	{
	const ClusterNode *ptrRight = GetRight();
	if (0 == ptrRight)
		return 0.0;
	return ptrRight->GetClusterWeight() + GetWeight();
	}

double ClusterNode::GetLeftWeight() const
	{
	const ClusterNode *ptrLeft = GetLeft();
	if (0 == ptrLeft)
		return 0.0;
	return ptrLeft->GetClusterWeight() + GetWeight();
	}

// Return n'th leaf in the sub-tree under this node.
const ClusterNode *ClusterNode::GetClusterLeaf(unsigned uLeafIndex) const
	{
	if (0 != m_ptrLeft)
		{
		if (0 == m_ptrRight)
			return this;

		unsigned uLeftLeafCount = m_ptrLeft->GetClusterSize();

		if (uLeafIndex < uLeftLeafCount)
			return m_ptrLeft->GetClusterLeaf(uLeafIndex);

		assert(uLeafIndex >= uLeftLeafCount);
		return m_ptrRight->GetClusterLeaf(uLeafIndex - uLeftLeafCount);
		}
	if (0 == m_ptrRight)
		return this;
	return m_ptrRight->GetClusterLeaf(uLeafIndex);
	}

void ClusterTree::DeleteFromDisjoints(ClusterNode *ptrNode)
	{
	ClusterNode *ptrPrev = ptrNode->GetPrevDisjoint();
	ClusterNode *ptrNext = ptrNode->GetNextDisjoint();

	if (0 != ptrPrev)
		ptrPrev->SetNextDisjoint(ptrNext);
	else
		m_ptrDisjoints = ptrNext;

	if (0 != ptrNext)
		ptrNext->SetPrevDisjoint(ptrPrev);

#if	_DEBUG
// not algorithmically necessary, but improves clarity
// and supports Validate().
	ptrNode->SetPrevDisjoint(0);
	ptrNode->SetNextDisjoint(0);
#endif
	}

void ClusterTree::AddToDisjoints(ClusterNode *ptrNode)
	{
	ptrNode->SetNextDisjoint(m_ptrDisjoints);
	ptrNode->SetPrevDisjoint(0);
	if (0 != m_ptrDisjoints)
		m_ptrDisjoints->SetPrevDisjoint(ptrNode);
	m_ptrDisjoints = ptrNode;
	}

ClusterTree::ClusterTree()
	{
	m_ptrDisjoints = 0;
	m_Nodes = 0;
	m_uNodeCount = 0;
	}

ClusterTree::~ClusterTree()
	{
	delete[] m_Nodes;
	}

void ClusterTree::LogMe() const
	{
	Log("Disjoints=%d\n", m_ptrDisjoints ? m_ptrDisjoints->GetIndex() : 0xffffffff);
	for (unsigned i = 0; i < m_uNodeCount; ++i)
		{
		m_Nodes[i].LogMe();
		}
	}

ClusterNode *ClusterTree::GetRoot() const
	{
	return &m_Nodes[m_uNodeCount - 1];
	}

// This is the UPGMA algorithm as described in Durbin et al. p166.
void ClusterTree::Create(const DistFunc &Dist)
	{
	unsigned i;
	m_uLeafCount = Dist.GetCount();
	m_uNodeCount = 2*m_uLeafCount - 1;

	delete[] m_Nodes;
	m_Nodes = new ClusterNode[m_uNodeCount];

	for (i = 0; i < m_uNodeCount; ++i)
		m_Nodes[i].SetIndex(i);

	for (i = 0; i < m_uLeafCount - 1; ++i)
		m_Nodes[i].SetNextDisjoint(&m_Nodes[i+1]);

	for (i = 1; i < m_uLeafCount; ++i)
		m_Nodes[i].SetPrevDisjoint(&m_Nodes[i-1]);
	
	m_ptrDisjoints = &m_Nodes[0];

//	Log("Initial state\n");
//	LogMe();
//	Log("\n");

	DistFunc ClusterDist;
	ClusterDist.SetCount(m_uNodeCount);
	double dMaxDist = 0.0;
	for (i = 0; i < m_uLeafCount; ++i)
		for (unsigned j = 0; j < m_uLeafCount; ++j)
			{
			float dDist = Dist.GetDist(i, j);
			ClusterDist.SetDist(i, j, dDist);
			}

	Validate(m_uLeafCount);

// Iteration. N-1 joins needed to create a binary tree from N leaves.
	for (unsigned uJoinIndex = m_uLeafCount; uJoinIndex < m_uNodeCount;
	  ++uJoinIndex)
		{
	// Find closest pair of clusters
		unsigned uIndexClosest1;
		unsigned uIndexClosest2;
		bool bFound = false;
		double dDistClosest = 9e99;
		for (ClusterNode *ptrNode1 = m_ptrDisjoints; ptrNode1;
		  ptrNode1 = ptrNode1->GetNextDisjoint())
			{
			for (ClusterNode *ptrNode2 = ptrNode1->GetNextDisjoint(); ptrNode2;
			  ptrNode2 = ptrNode2->GetNextDisjoint())
				{
				unsigned i1 = ptrNode1->GetIndex();
				unsigned i2 = ptrNode2->GetIndex();
				double dDist = ClusterDist.GetDist(i1, i2);
				if (dDist < dDistClosest)
					{
					bFound = true;
					dDistClosest = dDist;
					uIndexClosest1 = i1;
					uIndexClosest2 = i2;
					}
				}
			}
		assert(bFound);

		ClusterNode &Join = m_Nodes[uJoinIndex];
		ClusterNode &Child1 = m_Nodes[uIndexClosest1];
		ClusterNode &Child2 = m_Nodes[uIndexClosest2];

		Join.SetLeft(&Child1);
		Join.SetRight(&Child2);
		Join.SetWeight(dDistClosest);

		Child1.SetParent(&Join);
		Child2.SetParent(&Join);

		DeleteFromDisjoints(&Child1);
		DeleteFromDisjoints(&Child2);
		AddToDisjoints(&Join);

//		Log("After join %d %d\n", uIndexClosest1, uIndexClosest2);
//		LogMe();

	// Calculate distance of every remaining disjoint cluster to the
	// new cluster created by the join
		for (ClusterNode *ptrNode = m_ptrDisjoints; ptrNode;
		  ptrNode = ptrNode->GetNextDisjoint())
			{
			unsigned uNodeIndex = ptrNode->GetIndex();
			float dDist1 = ClusterDist.GetDist(uNodeIndex, uIndexClosest1);
			float dDist2 = ClusterDist.GetDist(uNodeIndex, uIndexClosest2);
			float dDist = Min(dDist1, dDist2);
			ClusterDist.SetDist(uJoinIndex, uNodeIndex, dDist);
			}
		Validate(uJoinIndex+1);
		}
	GetRoot()->GetClusterWeight();
//	LogMe();
	}
@@ -0,0 +1,86 @@
class DistFunc;

class ClusterNode
	{
	friend class ClusterTree;
public:
	ClusterNode()
		{
		m_dWeight = 0.0;
		m_dWeight2 = 0.0;
		m_ptrLeft = 0;
		m_ptrRight = 0;
		m_ptrParent = 0;
		m_uIndex = 0;
		m_ptrPrevDisjoint = 0;
		m_ptrNextDisjoint = 0;
		}
	~ClusterNode() {}

public:
	unsigned GetIndex() const { return m_uIndex; }
	ClusterNode *GetLeft() const { return m_ptrLeft; }
	ClusterNode *GetRight() const { return m_ptrRight; }
	ClusterNode *GetParent() const { return m_ptrParent; }
	double GetWeight() const { return m_dWeight; }

	const ClusterNode *GetClusterLeaf(unsigned uLeafIndex) const;
	unsigned GetClusterSize() const;
	double GetClusterWeight() const;
	double GetLeftBranchWeight() const;
	double GetRightBranchWeight() const;
	double GetLeftWeight() const;
	double GetRightWeight() const;

	void LogMe() const;

	double GetWeight2() const { return m_dWeight2; }
	void SetWeight2(double dWeight2) { m_dWeight2 = dWeight2; }

protected:
	void SetIndex(unsigned uIndex) { m_uIndex = uIndex; }
	void SetWeight(double dWeight) { m_dWeight = dWeight; }
	void SetLeft(ClusterNode *ptrLeft) { m_ptrLeft = ptrLeft; }
	void SetRight(ClusterNode *ptrRight) { m_ptrRight = ptrRight; }
	void SetParent(ClusterNode *ptrParent) { m_ptrParent = ptrParent; }
	void SetNextDisjoint(ClusterNode *ptrNode) { m_ptrNextDisjoint = ptrNode; }
	void SetPrevDisjoint(ClusterNode *ptrNode) { m_ptrPrevDisjoint = ptrNode; }

	ClusterNode *GetNextDisjoint() { return m_ptrNextDisjoint; }
	ClusterNode *GetPrevDisjoint() { return m_ptrPrevDisjoint; }

private:
	double m_dWeight;
	double m_dWeight2;
	unsigned m_uIndex;
	ClusterNode *m_ptrLeft;
	ClusterNode *m_ptrRight;
	ClusterNode *m_ptrParent;
	ClusterNode *m_ptrNextDisjoint;
	ClusterNode *m_ptrPrevDisjoint;
	};

class ClusterTree
	{
public:
	ClusterTree();
	virtual ~ClusterTree();

	void Create(const DistFunc &DF);

	ClusterNode *GetRoot() const;
	void LogMe() const;

protected:
	void Join(ClusterNode *ptrNode1, ClusterNode *ptrNode2,
	  ClusterNode *ptrJoin);
	void AddToDisjoints(ClusterNode *ptrNode);
	void DeleteFromDisjoints(ClusterNode *ptrNode);
	void Validate(unsigned uNodeCount);

private:
	ClusterNode *m_ptrDisjoints;
	ClusterNode *m_Nodes;
	unsigned m_uNodeCount;
	unsigned m_uLeafCount;
	};
@@ -0,0 +1,21 @@
#ifndef ClustSet_h
#define ClustSet_h

enum JOIN;
enum LINKAGE;
class Clust;

class ClustSet
	{
public:
	virtual unsigned GetLeafCount() = 0;
	virtual double ComputeDist(const Clust &C, unsigned uNodeIndex1,
	  unsigned uNodeIndex2) = 0;
	virtual void JoinNodes(const Clust &C, unsigned uLeftNodeIndex,
	  unsigned uRightNodeIndex, unsigned uJoinedNodeIndex,
	  double *ptrdLeftLength, double *ptrdRightLength) = 0;
	virtual const char *GetLeafName(unsigned uNodeIndex) = 0;
	virtual unsigned GetLeafId(unsigned uNodeIndex) = 0;
	};

#endif	// ClustSet_h
@@ -0,0 +1,48 @@
#ifndef ClustSetDF_h
#define ClustSetDF_h

class MSA;
class Clust;

#include "clustset.h"
#include "distfunc.h"
#include "msa.h"

class ClustSetDF : public ClustSet
	{
public:
	ClustSetDF(const DistFunc &DF) :
		m_ptrDF(&DF)
		{
		}

public:
	virtual unsigned GetLeafCount()
		{
		return m_ptrDF->GetCount();
		}
	virtual const char *GetLeafName(unsigned uNodeIndex)
		{
		return m_ptrDF->GetName(uNodeIndex);
		}
	virtual unsigned GetLeafId(unsigned uNodeIndex)
		{
		return m_ptrDF->GetId(uNodeIndex);
		}
	virtual void JoinNodes(const Clust &C, unsigned uLeftNodeIndex,
	  unsigned uRightNodeIndex, unsigned uJoinedNodeIndex,
	  double *ptrdLeftLength, double *ptrdRightLength)
		{
		Quit("ClustSetDF::JoinNodes, should never be called");
		}
	virtual double ComputeDist(const Clust &C, unsigned uNodeIndex1,
	  unsigned uNodeIndex2)
		{
		return m_ptrDF->GetDist(uNodeIndex1, uNodeIndex2);
		}

private:
	const DistFunc *m_ptrDF;
	};

#endif	// ClustSetDF_h
@@ -0,0 +1,55 @@
#ifndef ClustSetMSA_h
#define ClustSetMSA_h

class MSA;
class Clust;

#include "clustset.h"
#include "msadist.h"

// Distance matrix based set.
// Computes distances between leaves, never between
// joined clusters (leaves this to distance matrix method).
class ClustSetMSA : public ClustSet
	{
public:
	ClustSetMSA(const MSA &msa, MSADist &MD) :
		m_ptrMSA(&msa),
		m_ptrMSADist(&MD)
		{
		}

public:
	virtual unsigned GetLeafCount()
		{
		return m_ptrMSA->GetSeqCount();
		}
	virtual const char *GetLeafName(unsigned uNodeIndex)
		{
		return m_ptrMSA->GetSeqName(uNodeIndex);
		}
	virtual unsigned GetLeafId(unsigned uNodeIndex)
		{
		return m_ptrMSA->GetSeqId(uNodeIndex);
		}
	virtual void JoinNodes(const Clust &C, unsigned uLeftNodeIndex,
	  unsigned uRightNodeIndex, unsigned uJoinedNodeIndex,
	  double *ptrdLeftLength, double *ptrdRightLength)
		{
		Quit("ClustSetMSA::JoinNodes, should never be called");
		}
	virtual double ComputeDist(const Clust &C, unsigned uNodeIndex1,
	  unsigned uNodeIndex2)
		{
		return m_ptrMSADist->ComputeDist(*m_ptrMSA, uNodeIndex1, uNodeIndex2);
		}

public:
	const MSA &GetMSA();

private:
	const MSA *m_ptrMSA;
	MSADist *m_ptrMSADist;
	};

#endif	// ClustSetMSA_h
@@ -0,0 +1,190 @@
#include "muscle.h"
#include "tree.h"
#include "msa.h"

/***
Compute weights by the CLUSTALW method.
Thompson, Higgins and Gibson (1994), CABIOS (10) 19-29;
see also CLUSTALW paper.

Weights are computed from the edge lengths of a rooted tree.

Define the strength of an edge to be its length divided by the number
of leaves under that edge. The weight of a sequence is then the sum
of edge strengths on the path from the root to the leaf.

Example.

        0.2
       -----A     0.1
	 -x         ------- B     0.7
	   --------y           ----------- C
	    0.3     ----------z
                    0.4    -------------- D
                                 0.8

Edge	Length	Leaves	Strength
----	-----	------	--------
xy		0.3		3		0.1
xA		0.2		1		0.2
yz		0.4		2		0.2
yB		0.1		1		0.1
zC		0.7		1		0.7
zD		0.8		1		0.8

Leaf	Path		Strengths			Weight
----	----		---------			------
A		xA			0.2					0.2
B		xy-yB		0.1 + 0.1			0.2
C		xy-yz-zC	0.1 + 0.2 + 0.7		1.0
D		xy-yz-zD	0.1 + 0.2 + 0.8		1.1

***/

#define TRACE 0

static unsigned CountLeaves(const Tree &tree, unsigned uNodeIndex,
  unsigned LeavesUnderNode[])
	{
	if (tree.IsLeaf(uNodeIndex))
		{
		LeavesUnderNode[uNodeIndex] = 1;
		return 1;
		}

	const unsigned uLeft = tree.GetLeft(uNodeIndex);
	const unsigned uRight = tree.GetRight(uNodeIndex);
	const unsigned uRightCount = CountLeaves(tree, uRight, LeavesUnderNode);
	const unsigned uLeftCount = CountLeaves(tree, uLeft, LeavesUnderNode);
	const unsigned uCount = uRightCount + uLeftCount;
	LeavesUnderNode[uNodeIndex] = uCount;
	return uCount;
	}

void CalcClustalWWeights(const Tree &tree, WEIGHT Weights[])
	{
#if	TRACE
	Log("CalcClustalWWeights\n");
	tree.LogMe();
#endif

	const unsigned uLeafCount = tree.GetLeafCount();
	if (0 == uLeafCount)
		return;
	else if (1 == uLeafCount)
		{
		Weights[0] = (WEIGHT) 1.0;
		return;
		}
	else if (2 == uLeafCount)
		{
		Weights[0] = (WEIGHT) 0.5;
		Weights[1] = (WEIGHT) 0.5;
		return;
		}

	if (!tree.IsRooted())
		Quit("CalcClustalWWeights requires rooted tree");

	const unsigned uNodeCount = tree.GetNodeCount();
	unsigned *LeavesUnderNode = new unsigned[uNodeCount];
	memset(LeavesUnderNode, 0, uNodeCount*sizeof(unsigned));

	const unsigned uRootNodeIndex = tree.GetRootNodeIndex();
	unsigned uLeavesUnderRoot = CountLeaves(tree, uRootNodeIndex, LeavesUnderNode);
	if (uLeavesUnderRoot != uLeafCount)
		Quit("WeightsFromTreee: Internal error, root count %u %u",
		  uLeavesUnderRoot, uLeafCount);

#if	TRACE
	Log("Node  Leaves    Length  Strength\n");
	Log("----  ------  --------  --------\n");
	//    1234  123456  12345678  12345678
#endif

	double *Strengths = new double[uNodeCount];
	for (unsigned uNodeIndex = 0; uNodeIndex < uNodeCount; ++uNodeIndex)
		{
		if (tree.IsRoot(uNodeIndex))
			{
			Strengths[uNodeIndex] = 0.0;
			continue;
			}
		const unsigned uParent = tree.GetParent(uNodeIndex);
		const double dLength = tree.GetEdgeLength(uNodeIndex, uParent);
		const unsigned uLeaves = LeavesUnderNode[uNodeIndex];
		const double dStrength = dLength / (double) uLeaves;
		Strengths[uNodeIndex] = dStrength;
#if	TRACE
		Log("%4u  %6u  %8g  %8g\n", uNodeIndex, uLeaves, dLength, dStrength);
#endif
		}

#if	TRACE
	Log("\n");
	Log("                 Seq  Path..Weight\n");
	Log("--------------------  ------------\n");
#endif
	for (unsigned n = 0; n < uLeafCount; ++n)
		{
		const unsigned uLeafNodeIndex = tree.LeafIndexToNodeIndex(n);
#if	TRACE
		Log("%20.20s  %4u ", tree.GetLeafName(uLeafNodeIndex), uLeafNodeIndex);
#endif
		if (!tree.IsLeaf(uLeafNodeIndex))
			Quit("CalcClustalWWeights: leaf");

		double dWeight = 0;
		unsigned uNode = uLeafNodeIndex;
		while (!tree.IsRoot(uNode))
			{
			dWeight += Strengths[uNode];
			uNode = tree.GetParent(uNode);
#if	TRACE
			Log("->%u(%g)", uNode, Strengths[uNode]);
#endif
			}
		if (dWeight < 0.0001)
			{
#if	TRACE
			Log("zero->one");
#endif
			dWeight = 1.0;
			}
		Weights[n] = (WEIGHT) dWeight;
#if	TRACE
		Log(" = %g\n", dWeight);
#endif
		}

	delete[] Strengths;
	delete[] LeavesUnderNode;

	Normalize(Weights, uLeafCount);
	}

void MSA::SetClustalWWeights(const Tree &tree)
	{
	const unsigned uSeqCount = GetSeqCount();
	const unsigned uLeafCount = tree.GetLeafCount();

	WEIGHT *Weights = new WEIGHT[uSeqCount];

	CalcClustalWWeights(tree, Weights);

	for (unsigned n = 0; n < uLeafCount; ++n)
		{
		const WEIGHT w = Weights[n];
		const unsigned uLeafNodeIndex = tree.LeafIndexToNodeIndex(n);
		const unsigned uId = tree.GetLeafId(uLeafNodeIndex);
		const unsigned uSeqIndex = GetSeqIndex(uId);
#if	DEBUG
		if (GetSeqName(uSeqIndex) != tree.GetLeafName(uLeafNodeIndex))
			Quit("MSA::SetClustalWWeights: names don't match");
#endif
		SetSeqWeight(uSeqIndex, w);
		}
	NormalizeWeights((WEIGHT) 1.0);

	delete[] Weights;
	}
@@ -0,0 +1,189 @@
#include "muscle.h"
#include "msa.h"

static int Blosum62[23][23] =
	{
//   A   B   C   D   E    F   G   H   I   K    L   M   N   P   Q    R   S   T   V   W    X   Y   Z 
	+4, -2, +0, -2, -1,  -2, +0, -2, -1, -1,  -1, -1, -2, -1, -1,  -1, +1, +0, +0, -3,  -1, -2, -1,  // A
	-2, +6, -3, +6, +2,  -3, -1, -1, -3, -1,  -4, -3, +1, -1, +0,  -2, +0, -1, -3, -4,  -1, -3, +2,  // B
	+0, -3, +9, -3, -4,  -2, -3, -3, -1, -3,  -1, -1, -3, -3, -3,  -3, -1, -1, -1, -2,  -1, -2, -4,  // C
	-2, +6, -3, +6, +2,  -3, -1, -1, -3, -1,  -4, -3, +1, -1, +0,  -2, +0, -1, -3, -4,  -1, -3, +2,  // D
	-1, +2, -4, +2, +5,  -3, -2, +0, -3, +1,  -3, -2, +0, -1, +2,  +0, +0, -1, -2, -3,  -1, -2, +5,  // E
	
	-2, -3, -2, -3, -3,  +6, -3, -1, +0, -3,  +0, +0, -3, -4, -3,  -3, -2, -2, -1, +1,  -1, +3, -3,  // F
	+0, -1, -3, -1, -2,  -3, +6, -2, -4, -2,  -4, -3, +0, -2, -2,  -2, +0, -2, -3, -2,  -1, -3, -2,  // G
	-2, -1, -3, -1, +0,  -1, -2, +8, -3, -1,  -3, -2, +1, -2, +0,  +0, -1, -2, -3, -2,  -1, +2, +0,  // H
	-1, -3, -1, -3, -3,  +0, -4, -3, +4, -3,  +2, +1, -3, -3, -3,  -3, -2, -1, +3, -3,  -1, -1, -3,  // I
	-1, -1, -3, -1, +1,  -3, -2, -1, -3, +5,  -2, -1, +0, -1, +1,  +2, +0, -1, -2, -3,  -1, -2, +1,  // K
	
	-1, -4, -1, -4, -3,  +0, -4, -3, +2, -2,  +4, +2, -3, -3, -2,  -2, -2, -1, +1, -2,  -1, -1, -3,  // L
	-1, -3, -1, -3, -2,  +0, -3, -2, +1, -1,  +2, +5, -2, -2, +0,  -1, -1, -1, +1, -1,  -1, -1, -2,  // M
	-2, +1, -3, +1, +0,  -3, +0, +1, -3, +0,  -3, -2, +6, -2, +0,  +0, +1, +0, -3, -4,  -1, -2, +0,  // N
	-1, -1, -3, -1, -1,  -4, -2, -2, -3, -1,  -3, -2, -2, +7, -1,  -2, -1, -1, -2, -4,  -1, -3, -1,  // P
	-1, +0, -3, +0, +2,  -3, -2, +0, -3, +1,  -2, +0, +0, -1, +5,  +1, +0, -1, -2, -2,  -1, -1, +2,  // Q
	
	-1, -2, -3, -2, +0,  -3, -2, +0, -3, +2,  -2, -1, +0, -2, +1,  +5, -1, -1, -3, -3,  -1, -2, +0,  // R
	+1, +0, -1, +0, +0,  -2, +0, -1, -2, +0,  -2, -1, +1, -1, +0,  -1, +4, +1, -2, -3,  -1, -2, +0,  // S
	+0, -1, -1, -1, -1,  -2, -2, -2, -1, -1,  -1, -1, +0, -1, -1,  -1, +1, +5, +0, -2,  -1, -2, -1,  // T
	+0, -3, -1, -3, -2,  -1, -3, -3, +3, -2,  +1, +1, -3, -2, -2,  -3, -2, +0, +4, -3,  -1, -1, -2,  // V
	-3, -4, -2, -4, -3,  +1, -2, -2, -3, -3,  -2, -1, -4, -4, -2,  -3, -3, -2, -3,+11,  -1, +2, -3,  // W
	
	-1, -1, -1, -1, -1,  -1, -1, -1, -1, -1,  -1, -1, -1, -1, -1,  -1, -1, -1, -1, -1,  -1, -1, -1,  // X
	-2, -3, -2, -3, -2,  +3, -3, +2, -1, -2,  -1, -1, -2, -3, -1,  -2, -2, -2, -1, +2,  -1, +7, -2,  // Y
	-1, +2, -4, +2, +5,  -3, -2, +0, -3, +1,  -3, -2, +0, -1, +2,  +0, +0, -1, -2, -3,  -1, -2, +5,  // Z
	};

static int toi_tab[26] =
	{
	0,	// A
	1,	// B
	2,	// C
	3,	// D
	4,	// E
	5,	// F
	6,	// G
	7,	// H
	8,	// I
	-1,	// J
	9,	// K
	10,	// L
	11,	// M
	12,	// N
	-1,	// O
	13,	// P
	14,	// Q
	15,	// R
	16,	// S
	17,	// T
	17,	// U
	18,	// V
	19,	// W
	20,	// X
	21,	// Y
	22,	// Z
	};

static int toi(char c)
	{
	c = toupper(c);
	return toi_tab[c - 'A'];
	}

static int BlosumScore(char c1, char c2)
	{
	int i1 = toi(c1);
	int i2 = toi(c2);
	return Blosum62[i1][i2];
	}

/***
Consider a column with 5 As and 3 Bs.
There are:
	5x4 pairs of As.
	3x2 pairs of Bs.
	5x3x2 AB pairs
	8x7 = 5x4 + 3x2 + 5x3x2 pairs of letters
***/
static double BlosumScoreCol(const MSA &a, unsigned uColIndex)
	{
	int iCounts[23];
	memset(iCounts, 0, sizeof(iCounts));
	const unsigned uSeqCount = a.GetSeqCount();
	unsigned uCharCount = 0;
	for (unsigned uSeqIndex = 0; uSeqIndex < uSeqCount; ++uSeqIndex)
		{
		char c = a.GetChar(uSeqIndex, uColIndex);
		if (IsGapChar(c))
			continue;
		int iChar = toi(c);
		++iCounts[iChar];
		++uCharCount;
		}
	if (uCharCount < 2)
		return -9;
	int iTotalScore = 0;
	for (int i1 = 0; i1 < 23; ++i1)
		{
		int iCounts1 = iCounts[i1];
		iTotalScore += iCounts1*(iCounts1 - 1)*Blosum62[i1][i1];
		for (int i2 = i1 + 1; i2 < 23; ++i2)
			iTotalScore += iCounts[i2]*iCounts1*2*Blosum62[i1][i2];
		}
	int iPairCount = uCharCount*(uCharCount - 1);
	return (double) iTotalScore / (double) iPairCount;
	}

/***
Consider a column with 5 As and 3 Bs.
A residue of type Q scores:
	5xAQ + 3xBQ
***/
static void AssignColorsCol(const MSA &a, unsigned uColIndex, int **Colors)
	{
	int iCounts[23];
	memset(iCounts, 0, sizeof(iCounts));
	const unsigned uSeqCount = a.GetSeqCount();
	unsigned uCharCount = 0;
	for (unsigned uSeqIndex = 0; uSeqIndex < uSeqCount; ++uSeqIndex)
		{
		char c = a.GetChar(uSeqIndex, uColIndex);
		if (IsGapChar(c))
			continue;
		int iChar = toi(c);
		++iCounts[iChar];
		++uCharCount;
		}
	int iMostConservedType = -1;
	int iMostConservedCount = -1;
	for (unsigned i = 0; i < 23; ++i)
		{
		if (iCounts[i] > iMostConservedCount)
			{
			iMostConservedType = i;
			iMostConservedCount = iCounts[i];
			}
		}

	double dColScore = BlosumScoreCol(a, uColIndex);
	int c;
	if (dColScore >= 3.0)
		c = 3;
	//else if (dColScore >= 1.0)
	//	c = 2;
	else if (dColScore >= 0.2)
		c = 1;
	else
		c = 0;

	int Color[23];
	for (unsigned uLetter = 0; uLetter < 23; ++uLetter)
		{
		double dScore = Blosum62[uLetter][iMostConservedType];
		if (dScore >= dColScore)
			Color[uLetter] = c;
		else
			Color[uLetter] = 0;
		}

	for (unsigned uSeqIndex = 0; uSeqIndex < uSeqCount; ++uSeqIndex)
		{
		char c = a.GetChar(uSeqIndex, uColIndex);
		if (IsGapChar(c))
			{
			Colors[uSeqIndex][uColIndex] = 0;
			continue;
			}
		int iLetter = toi(c);
		if (iLetter >= 0 && iLetter < 23)
			Colors[uSeqIndex][uColIndex] = Color[iLetter];
		else
			Colors[uSeqIndex][uColIndex] = 0;
		}
	}

void AssignColors(const MSA &a, int **Colors)
	{
	const unsigned uColCount = a.GetColCount();
	for (unsigned uColIndex = 0; uColIndex < uColCount; ++uColIndex)
		AssignColorsCol(a, uColIndex, Colors);
	}
@@ -0,0 +1,118 @@
/***
Conservation value for a column in an MSA is defined as the number
of times the most common letter appears divided by the number of
sequences.
***/

#include "muscle.h"
#include "msa.h"
#include <math.h>

double MSA::GetAvgCons() const
	{
	assert(GetSeqCount() > 0);
	double dSum = 0;
	unsigned uNonGapColCount = 0;
	for (unsigned uColIndex = 0; uColIndex < GetColCount(); ++uColIndex)
		{
		if (!IsGapColumn(uColIndex))
			{
			dSum += GetCons(uColIndex);
			++uNonGapColCount;
			}
		}
	assert(uNonGapColCount > 0);
	double dAvg = dSum / uNonGapColCount;
	assert(dAvg > 0 && dAvg <= 1);
	return dAvg;
	}

double MSA::GetCons(unsigned uColIndex) const
	{
	unsigned Counts[MAX_ALPHA];
	for (unsigned uLetter = 0; uLetter < g_AlphaSize; ++uLetter)
		Counts[uLetter] = 0;

	unsigned uMaxCount = 0;
	for (unsigned uSeqIndex = 0; uSeqIndex < GetSeqCount(); ++uSeqIndex)
		{
		if (IsGap(uSeqIndex, uColIndex))
			continue;
		char c = GetChar(uSeqIndex, uColIndex);
		c = toupper(c);
		if ('X' == c || 'B' == c || 'Z' == c)
			continue;
		unsigned uLetter = GetLetter(uSeqIndex, uColIndex);
		unsigned uCount = Counts[uLetter] + 1;
		if (uCount > uMaxCount)
			uMaxCount = uCount;
		Counts[uLetter] = uCount;
		}

// Cons is undefined for all-gap column
	if (0 == uMaxCount)
		{
//		assert(false);
		return 1;
		}

	double dCons = (double) uMaxCount / (double) GetSeqCount();
	assert(dCons > 0 && dCons <= 1);
	return dCons;
	}

// Perecent identity of a pair of sequences.
// Positions with one or both gapped are ignored.
double MSA::GetPctIdentityPair(unsigned uSeqIndex1, unsigned uSeqIndex2) const
	{
	const unsigned uColCount = GetColCount();
	unsigned uPosCount = 0;
	unsigned uSameCount = 0;
	for (unsigned uColIndex = 0; uColIndex < uColCount; ++uColIndex)
		{
		const char c1 = GetChar(uSeqIndex1, uColIndex);
		const char c2 = GetChar(uSeqIndex2, uColIndex);
		if (IsGapChar(c1) || IsGapChar(c2))
			continue;
		if (c1 == c2)
			++uSameCount;
		++uPosCount;
		}
	if (0 == uPosCount)
		return 0;
	return (double) uSameCount / (double) uPosCount;
	}

// Perecent group identity of a pair of sequences.
// Positions with one or both gapped are ignored.
double MSA::GetPctGroupIdentityPair(unsigned uSeqIndex1,
  unsigned uSeqIndex2) const
	{
	extern unsigned ResidueGroup[];

	const unsigned uColCount = GetColCount();
	unsigned uPosCount = 0;
	unsigned uSameCount = 0;
	for (unsigned uColIndex = 0; uColIndex < uColCount; ++uColIndex)
		{
		if (IsGap(uSeqIndex1, uColIndex))
			continue;
		if (IsGap(uSeqIndex2, uColIndex))
			continue;
		if (IsWildcard(uSeqIndex1, uColIndex))
			continue;
		if (IsWildcard(uSeqIndex2, uColIndex))
			continue;

		const unsigned uLetter1 = GetLetter(uSeqIndex1, uColIndex);
		const unsigned uLetter2 = GetLetter(uSeqIndex2, uColIndex);
		const unsigned uGroup1 = ResidueGroup[uLetter1];
		const unsigned uGroup2 = ResidueGroup[uLetter2];
		if (uGroup1 == uGroup2)
			++uSameCount;
		++uPosCount;
		}
	if (0 == uPosCount)
		return 0;
	return (double) uSameCount / (double) uPosCount;
	}
@@ -0,0 +1,378 @@
#include "muscle.h"
#include "diaglist.h"
#include "pwpath.h"

#define MAX(x, y)	((x) > (y) ? (x) : (y))
#define MIN(x, y)	((x) < (y) ? (x) : (y))

void DiagList::Add(const Diag &d)
	{
	if (m_uCount == MAX_DIAGS)
		Quit("DiagList::Add, overflow %u", m_uCount);
	m_Diags[m_uCount] = d;
	++m_uCount;
	}

void DiagList::Add(unsigned uStartPosA, unsigned uStartPosB, unsigned uLength)
	{
	Diag d;
	d.m_uStartPosA = uStartPosA;
	d.m_uStartPosB = uStartPosB;
	d.m_uLength = uLength;
	Add(d);
	}

const Diag &DiagList::Get(unsigned uIndex) const
	{
	if (uIndex >= m_uCount)
		Quit("DiagList::Get(%u), count=%u", uIndex, m_uCount);
	return m_Diags[uIndex];
	}

void DiagList::LogMe() const
	{
	Log("DiagList::LogMe, count=%u\n", m_uCount);
	Log("  n  StartA  StartB  Length\n");
	Log("---  ------  ------  ------\n");
	for (unsigned n = 0; n < m_uCount; ++n)
		{
		const Diag &d = m_Diags[n];
		Log("%3u  %6u  %6u  %6u\n",
		  n, d.m_uStartPosA, d.m_uStartPosB, d.m_uLength);
		}
	}

void DiagList::FromPath(const PWPath &Path)
	{
	Clear();

	const unsigned uEdgeCount = Path.GetEdgeCount();
	unsigned uLength = 0;
	unsigned uStartPosA;
	unsigned uStartPosB;
	for (unsigned uEdgeIndex = 0; uEdgeIndex < uEdgeCount; ++uEdgeIndex)
		{
		const PWEdge &Edge = Path.GetEdge(uEdgeIndex);

	// Typical cases
		if (Edge.cType == 'M')
			{
			if (0 == uLength)
				{
				uStartPosA = Edge.uPrefixLengthA - 1;
				uStartPosB = Edge.uPrefixLengthB - 1;
				}
			++uLength;
			}
		else
			{
			if (uLength >= g_uMinDiagLength)
				Add(uStartPosA, uStartPosB, uLength);
			uLength = 0;
			}
		}

// Special case for last edge
	if (uLength >= g_uMinDiagLength)
		Add(uStartPosA, uStartPosB, uLength);
	}

bool DiagList::NonZeroIntersection(const Diag &d) const
	{
	for (unsigned n = 0; n < m_uCount; ++n)
		{
		const Diag &d2 = m_Diags[n];
		if (DiagOverlap(d, d2) > 0)
			return true;
		}
	return false;
	}

// DialogOverlap returns the length of the overlapping
// section of the two diagonals along the diagonals
// themselves; in other words, the length of
// the intersection of the two sets of cells in
// the matrix.
unsigned DiagOverlap(const Diag &d1, const Diag &d2)
	{
// Determine where the diagonals intersect the A
// axis (extending them if required). If they
// intersect at different points, they do not
// overlap. Coordinates on a diagonal are
// given by B = A + c where c is the value of
// A at the intersection with the A axis.
// Hence, c = B - A for any point on the diagonal.
	int c1 = (int) d1.m_uStartPosB - (int) d1.m_uStartPosA;
	int c2 = (int) d2.m_uStartPosB - (int) d2.m_uStartPosA;
	if (c1 != c2)
		return 0;

	assert(DiagOverlapA(d1, d2) == DiagOverlapB(d1, d2));
	return DiagOverlapA(d1, d2);
	}

// DialogOverlapA returns the length of the overlapping
// section of the projection of the two diagonals onto
// the A axis.
unsigned DiagOverlapA(const Diag &d1, const Diag &d2)
	{
	unsigned uMaxStart = MAX(d1.m_uStartPosA, d2.m_uStartPosA);
	unsigned uMinEnd = MIN(d1.m_uStartPosA + d1.m_uLength - 1,
	  d2.m_uStartPosA + d2.m_uLength - 1);

	int iLength = (int) uMinEnd - (int) uMaxStart + 1;
	if (iLength < 0)
		return 0;
	return (unsigned) iLength;
	}

// DialogOverlapB returns the length of the overlapping
// section of the projection of the two diagonals onto
// the B axis.
unsigned DiagOverlapB(const Diag &d1, const Diag &d2)
	{
	unsigned uMaxStart = MAX(d1.m_uStartPosB, d2.m_uStartPosB);
	unsigned uMinEnd = MIN(d1.m_uStartPosB + d1.m_uLength - 1,
	  d2.m_uStartPosB + d2.m_uLength - 1);

	int iLength = (int) uMinEnd - (int) uMaxStart + 1;
	if (iLength < 0)
		return 0;
	return (unsigned) iLength;
	}

// Returns true if the two diagonals can be on the
// same path through the DP matrix. If DiagCompatible
// returns false, they cannot be in the same path
// and hence "contradict" each other.
bool DiagCompatible(const Diag &d1, const Diag &d2)
	{
	if (DiagOverlap(d1, d2) > 0)
		return true;
	return 0 == DiagOverlapA(d1, d2) && 0 == DiagOverlapB(d1, d2);
	}

// Returns the length of the "break" between two diagonals.
unsigned DiagBreak(const Diag &d1, const Diag &d2)
	{
	int c1 = (int) d1.m_uStartPosB - (int) d1.m_uStartPosA;
	int c2 = (int) d2.m_uStartPosB - (int) d2.m_uStartPosA;
	if (c1 != c2)
		return 0;

	int iMaxStart = MAX(d1.m_uStartPosA, d2.m_uStartPosA);
	int iMinEnd = MIN(d1.m_uStartPosA + d1.m_uLength - 1,
	  d2.m_uStartPosA + d1.m_uLength - 1);
	int iBreak = iMaxStart - iMinEnd - 1;
	if (iBreak < 0)
		return 0;
	return (unsigned) iBreak;
	}

// Merge diagonals that are continuations of each other with
// short breaks of up to length g_uMaxDiagBreak.
// In a sorted list of diagonals, we only have to check
// consecutive entries.
void MergeDiags(DiagList &DL)
	{
	return;
#if	DEBUG
	if (!DL.IsSorted())
		Quit("MergeDiags: !IsSorted");
#endif

// TODO: Fix this!
// Breaks must be with no offset (no gaps)
	const unsigned uCount = DL.GetCount();
	if (uCount <= 1)
		return;

	DiagList NewList;

	Diag MergedDiag;
	const Diag *ptrPrev = &DL.Get(0);
	for (unsigned i = 1; i < uCount; ++i)
		{
		const Diag *ptrDiag = &DL.Get(i);
		unsigned uBreakLength = DiagBreak(*ptrPrev, *ptrDiag);
		if (uBreakLength <= g_uMaxDiagBreak)
			{
			MergedDiag.m_uStartPosA = ptrPrev->m_uStartPosA;
			MergedDiag.m_uStartPosB = ptrPrev->m_uStartPosB;
			MergedDiag.m_uLength = ptrPrev->m_uLength + ptrDiag->m_uLength
			  + uBreakLength;
			ptrPrev = &MergedDiag;
			}
		else
			{
			NewList.Add(*ptrPrev);
			ptrPrev = ptrDiag;
			}
		}
	NewList.Add(*ptrPrev);
	DL.Copy(NewList);
	}

void DiagList::DeleteIncompatible()
	{
	assert(IsSorted());

	if (m_uCount < 2)
		return;

	bool *bFlagForDeletion = new bool[m_uCount];
	for (unsigned i = 0; i < m_uCount; ++i)
		bFlagForDeletion[i] = false;

	for (unsigned i = 0; i < m_uCount; ++i)
		{
		const Diag &di = m_Diags[i];
		for (unsigned j = i + 1; j < m_uCount; ++j)
			{
			const Diag &dj = m_Diags[j];

		// Verify sorted correctly
			assert(di.m_uStartPosA <= dj.m_uStartPosA);

		// If two diagonals are incompatible and
		// one is is much longer than the other,
		// keep the longer one.
			if (!DiagCompatible(di, dj))
				{
				if (di.m_uLength > dj.m_uLength*4)
					bFlagForDeletion[j] = true;
				else if (dj.m_uLength > di.m_uLength*4)
					bFlagForDeletion[i] = true;
				else
					{
					bFlagForDeletion[i] = true;
					bFlagForDeletion[j] = true;
					}
				}
			}
		}

	for (unsigned i = 0; i < m_uCount; ++i)
		{
		const Diag &di = m_Diags[i];
		if (bFlagForDeletion[i])
			continue;

		for (unsigned j = i + 1; j < m_uCount; ++j)
			{
			const Diag &dj = m_Diags[j];
			if (bFlagForDeletion[j])
				continue;

		// Verify sorted correctly
			assert(di.m_uStartPosA <= dj.m_uStartPosA);

		// If sort order in B different from sorted order in A,
		// either diags are incompatible or we detected a repeat
		// or permutation.
			if (di.m_uStartPosB >= dj.m_uStartPosB || !DiagCompatible(di, dj))
				{
				bFlagForDeletion[i] = true;
				bFlagForDeletion[j] = true;
				}
			}
		}

	unsigned uNewCount = 0;
	Diag *NewDiags = new Diag[m_uCount];
	for (unsigned i = 0; i < m_uCount; ++i)
		{
		if (bFlagForDeletion[i])
			continue;

		const Diag &d = m_Diags[i];
		NewDiags[uNewCount] = d;
		++uNewCount;
		}
	memcpy(m_Diags, NewDiags, uNewCount*sizeof(Diag));
	m_uCount = uNewCount;
	delete[] NewDiags;
	}

void DiagList::Copy(const DiagList &DL)
	{
	Clear();
	unsigned uCount = DL.GetCount();
	for (unsigned i = 0; i < uCount; ++i)
		Add(DL.Get(i));
	}

// Check if sorted in increasing order of m_uStartPosA
bool DiagList::IsSorted() const
	{
	return true;
	unsigned uCount = GetCount();
	for (unsigned i = 1; i < uCount; ++i)
		if (m_Diags[i-1].m_uStartPosA > m_Diags[i].m_uStartPosA)
			return false;
	return true;
	}

// Sort in increasing order of m_uStartPosA
// Dumb bubble sort, but don't care about speed
// because don't get long lists.
void DiagList::Sort()
	{
	if (m_uCount < 2)
		return;

	bool bContinue = true;
	while (bContinue)
		{
		bContinue = false;
		for (unsigned i = 0; i < m_uCount - 1; ++i)
			{
			if (m_Diags[i].m_uStartPosA > m_Diags[i+1].m_uStartPosA)
				{
				Diag Tmp = m_Diags[i];
				m_Diags[i] = m_Diags[i+1];
				m_Diags[i+1] = Tmp;
				bContinue = true;
				}
			}
		}
	}

//void TestDiag()
//	{
//	Diag d1;
//	Diag d2;
//	Diag d3;
//
//	d1.m_uStartPosA = 0;
//	d1.m_uStartPosB = 1;
//	d1.m_uLength = 32;
//
//	d2.m_uStartPosA = 55;
//	d2.m_uStartPosB = 70;
//	d2.m_uLength = 36;
//
//	d3.m_uStartPosA = 102;
//	d3.m_uStartPosB = 122;
//	d3.m_uLength = 50;
//
//	DiagList DL;
//	DL.Add(d1);
//	DL.Add(d2);
//	DL.Add(d3);
//
//	Log("Before DeleteIncompatible:\n");
//	DL.LogMe();
//	DL.DeleteIncompatible();
//
//	Log("After DeleteIncompatible:\n");
//	DL.LogMe();
//
//	MergeDiags(DL);
//	Log("After Merge:\n");
//	DL.LogMe();
//
//	DPRegionList RL;
//	DiagListToDPRegionList(DL, RL, 200, 200);
//	RL.LogMe();
//	}
@@ -0,0 +1,89 @@
#ifndef diaglist_h
#define diaglist_h

const unsigned EMPTY = (unsigned) ~0;
const unsigned MAX_DIAGS = 1024;

struct Diag
	{
	unsigned m_uStartPosA;
	unsigned m_uStartPosB;
	unsigned m_uLength;
	};

struct Rect
	{
	unsigned m_uStartPosA;
	unsigned m_uStartPosB;
	unsigned m_uLengthA;
	unsigned m_uLengthB;
	};

class DiagList
	{
public:
	DiagList()
		{
		m_uCount = 0;
		}
	~DiagList()
		{
		Free();
		}

public:
// Creation
	void Clear()
		{
		Free();
		}
	void FromPath(const PWPath &Path);
	void Add(const Diag &d);
	void Add(unsigned uStartPosA, unsigned uStartPosB, unsigned uLength);
	void DeleteIncompatible();

// Accessors
	unsigned GetCount() const
		{
		return m_uCount;
		}
	const Diag &Get(unsigned uIndex) const;

// Operations
	void Sort();
	void Copy(const DiagList &DL);

// Query
	// returns true iff given diagonal is included in the list
	// in whole or in part.
	bool NonZeroIntersection(const Diag &d) const;
	bool IsSorted() const;

// Diagnostics
	void LogMe() const;

private:
	void Free()
		{
		m_uCount = 0;
		}

private:
	unsigned m_uCount;
	Diag m_Diags[MAX_DIAGS];
	};

unsigned DiagOverlap(const Diag &d1, const Diag &d2);
unsigned DiagOverlapA(const Diag &d1, const Diag &d2);
unsigned DiagOverlapB(const Diag &d1, const Diag &d2);
unsigned DiagBreak(const Diag &d1, const Diag &d2);
bool DiagCompatible(const Diag &d1, const Diag &d2);
void CheckDiags(const ProfPos *PA, unsigned uLengthA, const ProfPos *PB,
  unsigned uLengthB, const MSA &msaA, const MSA &msaB, const PWPath &Path);
void FindDiags(const ProfPos *PX, unsigned uLengthX, const ProfPos *PY,
  unsigned uLengthY, DiagList &DL);
void FindDiagsNuc(const ProfPos *PX, unsigned uLengthX, const ProfPos *PY,
  unsigned uLengthY, DiagList &DL);
void MergeDiags(DiagList &DL);

#endif // diaglist_h
@@ -0,0 +1,162 @@
#include "muscle.h"
#include "msa.h"
#include "objscore.h"
#include "profile.h"

#define TRACE				0
#define COMPARE_3_52		0
#define BRUTE_LETTERS		0

static SCORE ScoreColLetters(const MSA &msa, unsigned uColIndex)
	{
	SCOREMATRIX &Mx = *g_ptrScoreMatrix;
	const unsigned uSeqCount = msa.GetSeqCount();

#if	BRUTE_LETTERS
	SCORE BruteScore = 0;
	for (unsigned uSeqIndex1 = 0; uSeqIndex1 < uSeqCount; ++uSeqIndex1)
		{
		unsigned uLetter1 = msa.GetLetterEx(uSeqIndex1, uColIndex);
		if (uLetter1 >= g_AlphaSize)
			continue;
		WEIGHT w1 = msa.GetSeqWeight(uSeqIndex1);
		for (unsigned uSeqIndex2 = uSeqIndex1 + 1; uSeqIndex2 < uSeqCount; ++uSeqIndex2)
			{
			unsigned uLetter2 = msa.GetLetterEx(uSeqIndex2, uColIndex);
			if (uLetter2 >= g_AlphaSize)
				continue;
			WEIGHT w2 = msa.GetSeqWeight(uSeqIndex2);
			BruteScore += w1*w2*Mx[uLetter1][uLetter2];
			}
		}
#endif
	
	double N = 0;
	for (unsigned uSeqIndex1 = 0; uSeqIndex1 < uSeqCount; ++uSeqIndex1)
		{
		WEIGHT w = msa.GetSeqWeight(uSeqIndex1);
		N += w;
		}
	if (N <= 0)
		return 0;

	FCOUNT Freqs[20];
	memset(Freqs, 0, sizeof(Freqs));
	SCORE Score = 0;
	for (unsigned uSeqIndex1 = 0; uSeqIndex1 < uSeqCount; ++uSeqIndex1)
		{
		unsigned uLetter = msa.GetLetterEx(uSeqIndex1, uColIndex);
		if (uLetter >= g_AlphaSize)
			continue;
		WEIGHT w = msa.GetSeqWeight(uSeqIndex1);
		Freqs[uLetter] += w;
		Score -= w*w*Mx[uLetter][uLetter];
		}

	for (unsigned uLetter1 = 0; uLetter1 < g_AlphaSize; ++uLetter1)
		{
		const FCOUNT f1 = Freqs[uLetter1];
		Score += f1*f1*Mx[uLetter1][uLetter1];
		for (unsigned uLetter2 = uLetter1 + 1; uLetter2 < g_AlphaSize; ++uLetter2)
			{
			const FCOUNT f2 = Freqs[uLetter2];
			Score += 2*f1*f2*Mx[uLetter1][uLetter2];
			}
		}
	Score /= 2;
#if	BRUTE_LETTERS
	assert(BTEq(BruteScore, Score));
#endif
	return Score;
	}

static SCORE ScoreLetters(const MSA &msa, const unsigned Edges[],
  unsigned uEdgeCount)
	{
	const unsigned uSeqCount = msa.GetSeqCount();
	const unsigned uColCount = msa.GetColCount();

// Letters
	SCORE Score = 0;
	for (unsigned uEdgeIndex = 0; uEdgeIndex < uEdgeCount; ++uEdgeIndex)
		{
		const unsigned uColIndex = Edges[uEdgeIndex];
		assert(uColIndex < uColCount);
		Score += ScoreColLetters(msa, uColIndex);
		}
	return Score;
	}

void GetLetterScores(const MSA &msa, SCORE Scores[])
	{
	const unsigned uColCount = msa.GetColCount();
	for (unsigned uColIndex = 0; uColIndex < uColCount; ++uColIndex)
		Scores[uColIndex] = ScoreColLetters(msa, uColIndex);
	}

SCORE DiffObjScore(
  const MSA &msa1, const PWPath &Path1, const unsigned Edges1[], unsigned uEdgeCount1, 
  const MSA &msa2, const PWPath &Path2, const unsigned Edges2[], unsigned uEdgeCount2)
	{
#if	TRACE
	{
	Log("============DiffObjScore===========\n");
	Log("msa1:\n");
	msa1.LogMe();
	Log("\n");
	Log("Cols1: ");
	for (unsigned i = 0; i < uEdgeCount1; ++i)
		Log(" %u", Edges1[i]);
	Log("\n\n");
	Log("msa2:\n");
	msa2.LogMe();
	Log("Cols2: ");
	for (unsigned i = 0; i < uEdgeCount2; ++i)
		Log(" %u", Edges2[i]);
	Log("\n\n");
	}
#endif

#if	COMPARE_3_52
	extern SCORE g_SPScoreLetters;
	extern SCORE g_SPScoreGaps;
	SCORE SP1 = ObjScoreSP(msa1);
	SCORE SPLetters1 = g_SPScoreLetters;
	SCORE SPGaps1 = g_SPScoreGaps;

	SCORE SP2 = ObjScoreSP(msa2);
	SCORE SPLetters2 = g_SPScoreLetters;
	SCORE SPGaps2 = g_SPScoreGaps;
	SCORE SPDiffLetters = SPLetters2 - SPLetters1;
	SCORE SPDiffGaps = SPGaps2 - SPGaps1;
	SCORE SPDiff = SPDiffLetters + SPDiffGaps;
#endif

	SCORE Letters1 = ScoreLetters(msa1, Edges1, uEdgeCount1);
	SCORE Letters2 = ScoreLetters(msa2, Edges2, uEdgeCount2);

	SCORE Gaps1 = ScoreGaps(msa1, Edges1, uEdgeCount1);
	SCORE Gaps2 = ScoreGaps(msa2, Edges2, uEdgeCount2);

	SCORE DiffLetters = Letters2 - Letters1;
	SCORE DiffGaps = Gaps2 - Gaps1;
	SCORE Diff = DiffLetters + DiffGaps;

#if	COMPARE_3_52
	Log("ObjScoreSP    Letters1=%.4g  Letters2=%.4g  DiffLetters=%.4g\n",
	  SPLetters1, SPLetters2, SPDiffLetters);

	Log("DiffObjScore  Letters1=%.4g  Letters2=%.4g  DiffLetters=%.4g\n",
	  Letters1, Letters2, DiffLetters);

	Log("ObjScoreSP    Gaps1=%.4g  Gaps2=%.4g  DiffGaps=%.4g\n",
	  SPGaps1, SPGaps2, SPDiffGaps);

	Log("DiffObjScore  Gaps1=%.4g  Gaps2=%.4g  DiffGaps=%.4g\n",
	  Gaps1, Gaps2, DiffGaps);

	Log("SP diff=%.4g DiffObjScore Diff=%.4g\n", SPDiff, Diff);
#endif

	return Diff;
	}
@@ -0,0 +1,114 @@
#include "muscle.h"
#include "pwpath.h"

#define TRACE	0

void DiffPaths(const PWPath &p1, const PWPath &p2, unsigned Edges1[],
  unsigned *ptruDiffCount1, unsigned Edges2[], unsigned *ptruDiffCount2)
	{
#if	TRACE
	Log("DiffPaths\n");
	Log("p1=");
	p1.LogMe();
	Log("p2=");
	p2.LogMe();
#endif
	const unsigned uEdgeCount1 = p1.GetEdgeCount();
	const unsigned uEdgeCount2 = p2.GetEdgeCount();

	unsigned uDiffCount1 = 0;
	unsigned uDiffCount2 = 0;
	unsigned uEdgeIndex1 = 0;
	unsigned uEdgeIndex2 = 0;
	const PWEdge *Edge1 = &p1.GetEdge(uEdgeIndex1);
	const PWEdge *Edge2 = &p2.GetEdge(uEdgeIndex2);
	for (;;)
		{
		unsigned uEdgeIndexTop1 = uEdgeIndex1;
		unsigned uEdgeIndexTop2 = uEdgeIndex2;
		Edge1 = &p1.GetEdge(uEdgeIndex1);
		Edge2 = &p2.GetEdge(uEdgeIndex2);
#if	TRACE
		Log("e1[%u] PLA%u PLB%u %c, e2[%u] PLA%u PLB %u %c  DC1=%u DC2=%u\n",
		  uEdgeIndex1, Edge1->uPrefixLengthA, Edge1->uPrefixLengthB, Edge1->cType,
		  uEdgeIndex2, Edge2->uPrefixLengthA, Edge2->uPrefixLengthB, Edge2->cType,
		  uDiffCount1, uDiffCount2);
#endif
		if (Edge1->uPrefixLengthA == Edge2->uPrefixLengthA &&
		  Edge1->uPrefixLengthB == Edge2->uPrefixLengthB)
			{
			if (!Edge1->Equal(*Edge2))
				{
				Edges1[uDiffCount1++] = uEdgeIndex1;
				Edges2[uDiffCount2++] = uEdgeIndex2;
				}
			++uEdgeIndex1;
			++uEdgeIndex2;
			}

		else if (Edge2->uPrefixLengthA < Edge1->uPrefixLengthA ||
		  Edge2->uPrefixLengthB < Edge1->uPrefixLengthB)
			Edges2[uDiffCount2++] = uEdgeIndex2++;

		else if (Edge1->uPrefixLengthA < Edge2->uPrefixLengthA ||
		  Edge1->uPrefixLengthB < Edge2->uPrefixLengthB)
			Edges1[uDiffCount1++] = uEdgeIndex1++;

		if (uEdgeCount1 == uEdgeIndex1)
			{
			while (uEdgeIndex2 < uEdgeCount2)
				Edges2[uDiffCount2++] = uEdgeIndex2++;
			goto Done;
			}
		if (uEdgeCount2 == uEdgeIndex2)
			{
			while (uEdgeIndex1 < uEdgeCount1)
				Edges1[uDiffCount1++] = uEdgeIndex1++;
			goto Done;
			}
		if (uEdgeIndex1 == uEdgeIndexTop1 && uEdgeIndex2 == uEdgeIndexTop2)
			Quit("DiffPaths stuck");
		}
Done:;
#if	TRACE
	Log("DiffCount1=%u (%u %u)\n", uDiffCount1, uEdgeCount1, uEdgeCount2);
	Log("Diffs1=");
	for (unsigned i = 0; i < uDiffCount1; ++i)
		{
		const PWEdge e = p1.GetEdge(Edges1[i]);
		Log(" %u=%c%u.%u", Edges1[i], e.cType, e.uPrefixLengthA, e.uPrefixLengthB); 
		}
	Log("\n");
	Log("DiffCount2=%u\n", uDiffCount2);
	Log("Diffs2=");
	for (unsigned i = 0; i < uDiffCount2; ++i)
		{
		const PWEdge e = p2.GetEdge(Edges2[i]);
		Log(" %u=%c%u.%u", Edges2[i], e.cType, e.uPrefixLengthA, e.uPrefixLengthB); 
		}
	Log("\n");
#endif
	*ptruDiffCount1 = uDiffCount1;
	*ptruDiffCount2 = uDiffCount2;
	}

void TestDiffPaths()
	{
	PWPath p1;
	PWPath p2;

	p1.AppendEdge('M', 1, 1);
	p1.AppendEdge('M', 2, 2);
	p1.AppendEdge('M', 3, 3);

	p2.AppendEdge('M', 1, 1);
	p2.AppendEdge('D', 2, 1);
	p2.AppendEdge('I', 2, 2);
	p2.AppendEdge('M', 3, 3);

	unsigned Edges1[64];
	unsigned Edges2[64];
	unsigned uDiffCount1;
	unsigned uDiffCount2;
	DiffPaths(p1, p2, Edges1, &uDiffCount1, Edges2, &uDiffCount2);
	}
@@ -0,0 +1,381 @@
#include "muscle.h"
#include "tree.h"

#define TRACE	0

/***
Algorithm to compare two trees, X and Y.

A node x in X and node y in Y are defined to be
similar iff the set of leaves in the subtree under
x is identical to the set of leaves under y.

A node is defined to be dissimilar iff it is not
similar to any node in the other tree.

Nodes x and y are defined to be married iff every
node in the subtree under x is similar to a node
in the subtree under y. Married nodes are considered
to be equal. The subtrees under two married nodes can
at most differ by exchanges of left and right branches,
which we do not consider to be significant here.

A node is defined to be a bachelor iff it is not
married. If a node is a bachelor, then it has a
dissimilar node in its subtree, and it follows
immediately from the definition of marriage that its
parent is also a bachelor. Hence all nodes on the path
from a bachelor node to the root are bachelors.

We assume the trees have the same set of leaves, so
every leaf is trivially both similar and married to
the same leaf in the opposite tree. Bachelor nodes
are therefore always internal (i.e., non-leaf) nodes.

A node is defined to be a diff iff (a) it is married
and (b) its parent is a bachelor. The subtree under
a diff is maximally similar to the other tree. (In
other words, you cannot extend the subtree without
adding a bachelor). 

The set of diffs is the subset of the two trees that
we consider to be identical.

Example:

              -----A
        -----k
   ----j      -----B
--i     -----C
   ------D


              -----A
        -----p
   ----n      -----B
--m     -----D
   ------C


The following pairs of internal nodes are similar.

	Nodes	Set of leaves
	-----	-------------
	k,p		A,B
	i,m		A,B,C,D

Bachelors in the first tree are i and j, bachelors
in the second tree are m and n.

Node k and p are married, but i and m are not (because j
and n are bachelors). The diffs are C, D and k.

The set of bachelor nodes can be viewed as the internal
nodes of a tree, the leaves of which are diffs. (To see
that there can't be disjoint subtrees, note that the path
from a diff to a root is all bachelor nodes, so there is
always a path between two diffs that goes through the root).
We call this tree the "diffs tree".

There is a simple O(N) algorithm to build the diffs tree.
To achieve O(N) we avoid traversing a given subtree multiple
times and also avoid comparing lists of leaves. 

We visit nodes in depth-first order (i.e., a node is visited
before its parent).

If either child of a node is a bachelor, we flag it as
a bachelor.

If both children of the node we are visiting are married,
we check whether the spouses of those children have the
same parent in the other tree. If the parents are different,
the current node is a bachelor. If they have the same parent,
then the node we are visiting is the spouse of that parent.
We assign this newly identified married couple a unique integer
id. The id of a node is in one-to-one correspondence with the
set of leaves in its subtree. Two nodes have the same set of
leaves iff they have the same id. Bachelor nodes do not get
an id.
***/

static void BuildDiffs(const Tree &tree, unsigned uTreeNodeIndex,
  const bool bIsDiff[], Tree &Diffs, unsigned uDiffsNodeIndex,
  unsigned IdToDiffsLeafNodeIndex[])
	{
#if	TRACE
	Log("BuildDiffs(TreeNode=%u IsDiff=%d IsLeaf=%d)\n",
	  uTreeNodeIndex, bIsDiff[uTreeNodeIndex], tree.IsLeaf(uTreeNodeIndex));
#endif
	if (bIsDiff[uTreeNodeIndex])
		{
		unsigned uLeafCount = tree.GetLeafCount();
		unsigned *Leaves = new unsigned[uLeafCount];
		GetLeaves(tree, uTreeNodeIndex, Leaves, &uLeafCount);
		for (unsigned n = 0; n < uLeafCount; ++n)
			{
			const unsigned uLeafNodeIndex = Leaves[n];
			const unsigned uId = tree.GetLeafId(uLeafNodeIndex);
			if (uId >= tree.GetLeafCount())
				Quit("BuildDiffs, id out of range");
			IdToDiffsLeafNodeIndex[uId] = uDiffsNodeIndex;
#if	TRACE
			Log("  Leaf id=%u DiffsNode=%u\n", uId, uDiffsNodeIndex);
#endif
			}
		delete[] Leaves;
		return;
		}

	if (tree.IsLeaf(uTreeNodeIndex))
		Quit("BuildDiffs: should never reach leaf");

	const unsigned uTreeLeft = tree.GetLeft(uTreeNodeIndex);
	const unsigned uTreeRight = tree.GetRight(uTreeNodeIndex);

	const unsigned uDiffsLeft = Diffs.AppendBranch(uDiffsNodeIndex);
	const unsigned uDiffsRight = uDiffsLeft + 1;

	BuildDiffs(tree, uTreeLeft, bIsDiff, Diffs, uDiffsLeft, IdToDiffsLeafNodeIndex);
	BuildDiffs(tree, uTreeRight, bIsDiff, Diffs, uDiffsRight, IdToDiffsLeafNodeIndex);
	}

void DiffTrees(const Tree &Tree1, const Tree &Tree2, Tree &Diffs,
  unsigned IdToDiffsLeafNodeIndex[])
	{
#if	TRACE
	Log("Tree1:\n");
	Tree1.LogMe();
	Log("\n");
	Log("Tree2:\n");
	Tree2.LogMe();
#endif

	if (!Tree1.IsRooted() || !Tree2.IsRooted())
		Quit("DiffTrees: requires rooted trees");

	const unsigned uNodeCount = Tree1.GetNodeCount();
	const unsigned uNodeCount2 = Tree2.GetNodeCount();
	
	const unsigned uLeafCount = Tree1.GetLeafCount();
	const unsigned uLeafCount2 = Tree2.GetLeafCount();
	assert(uLeafCount == uLeafCount2);

	if (uNodeCount != uNodeCount2)
		Quit("DiffTrees: different node counts");

// Allocate tables so we can convert tree node index to
// and from the unique id with a O(1) lookup.
	unsigned *NodeIndexToId1 = new unsigned[uNodeCount];
	unsigned *IdToNodeIndex2 = new unsigned[uNodeCount];

	bool *bIsBachelor1 = new bool[uNodeCount];
	bool *bIsDiff1 = new bool[uNodeCount];

	for (unsigned uNodeIndex = 0; uNodeIndex < uNodeCount; ++uNodeIndex)
		{
		NodeIndexToId1[uNodeIndex] = uNodeCount;
		bIsBachelor1[uNodeIndex] = false;
		bIsDiff1[uNodeIndex] = false;

	// Use uNodeCount as value meaning "not set".
		IdToNodeIndex2[uNodeIndex] = uNodeCount;
		}

// Initialize node index <-> id lookup tables
	for (unsigned uNodeIndex = 0; uNodeIndex < uNodeCount; ++uNodeIndex)
		{
		if (Tree1.IsLeaf(uNodeIndex))
			{
			const unsigned uId = Tree1.GetLeafId(uNodeIndex);
			if (uId >= uNodeCount)
				Quit("Diff trees requires existing leaf ids in range 0 .. (N-1)");
			NodeIndexToId1[uNodeIndex] = uId;
			}

		if (Tree2.IsLeaf(uNodeIndex))
			{
			const unsigned uId = Tree2.GetLeafId(uNodeIndex);
			if (uId >= uNodeCount)
				Quit("Diff trees requires existing leaf ids in range 0 .. (N-1)");
			IdToNodeIndex2[uId] = uNodeIndex;
			}
		}

// Validity check. This verifies that the ids
// pre-assigned to the leaves in Tree1 are unique
// (note that the id<N check above does not rule
// out two leaves having duplicate ids).
	for (unsigned uId = 0; uId < uLeafCount; ++uId)
		{
		unsigned uNodeIndex2 = IdToNodeIndex2[uId];
		if (uNodeCount == uNodeIndex2)
			Quit("DiffTrees, check 2");
		}

// Ids assigned to internal nodes are N, N+1 ...
// An internal node id uniquely identifies a set
// of two or more leaves.
	unsigned uInternalNodeId = uLeafCount;

// Depth-first traversal of tree.
// The order guarantees that a node is visited before
// its parent is visited.
	for (unsigned uNodeIndex1 = Tree1.FirstDepthFirstNode();
	  NULL_NEIGHBOR != uNodeIndex1;
	  uNodeIndex1 = Tree1.NextDepthFirstNode(uNodeIndex1))
		{
#if	TRACE
		Log("Main loop: Node1=%u IsLeaf=%d IsBachelor=%d\n",
		  uNodeIndex1,
		  Tree1.IsLeaf(uNodeIndex1),
		  bIsBachelor1[uNodeIndex1]);
#endif

	// Leaves are trivial; nothing to do.
		if (Tree1.IsLeaf(uNodeIndex1) || bIsBachelor1[uNodeIndex1])
			continue;

	// If either child is a bachelor, flag
	// this node as a bachelor and continue.
		unsigned uLeft1 = Tree1.GetLeft(uNodeIndex1);
		if (bIsBachelor1[uLeft1])
			{
			bIsBachelor1[uNodeIndex1] = true;
			continue;
			}

		unsigned uRight1 = Tree1.GetRight(uNodeIndex1);
		if (bIsBachelor1[uRight1])
			{
			bIsBachelor1[uNodeIndex1] = true;
			continue;
			}

	// Both children are married.
	// Married nodes are guaranteed to have an id.
		unsigned uIdLeft = NodeIndexToId1[uLeft1];
		unsigned uIdRight = NodeIndexToId1[uRight1];

		if (uIdLeft == uNodeCount || uIdRight == uNodeCount)
			Quit("DiffTrees, check 5");

	// uLeft2 is the spouse of uLeft1, and similarly for uRight2.
		unsigned uLeft2 = IdToNodeIndex2[uIdLeft];
		unsigned uRight2 = IdToNodeIndex2[uIdRight];

		if (uLeft2 == uNodeCount || uRight2 == uNodeCount)
			Quit("DiffTrees, check 6");

	// If the spouses of uLeft1 and uRight1 have the same
	// parent, then this parent is the spouse of uNodeIndex1.
	// Otherwise, uNodeIndex1 is a diff.
		unsigned uParentLeft2 = Tree2.GetParent(uLeft2);
		unsigned uParentRight2 = Tree2.GetParent(uRight2);

#if	TRACE
		Log("L1=%u R1=%u L2=%u R2=%u PL2=%u PR2=%u\n",
		  uLeft1,
		  uRight1,
		  uLeft2,
		  uRight2,
		  uParentLeft2,
		  uParentRight2);
#endif

		if (uParentLeft2 == uParentRight2)
			{
			NodeIndexToId1[uNodeIndex1] = uInternalNodeId;
			IdToNodeIndex2[uInternalNodeId] = uParentLeft2;
			++uInternalNodeId;
			}
		else
			bIsBachelor1[uNodeIndex1] = true;
		}

	unsigned uDiffCount = 0;
	for (unsigned uNodeIndex = 0; uNodeIndex < uNodeCount; ++uNodeIndex)
		{
		if (bIsBachelor1[uNodeIndex])
			continue;
		if (Tree1.IsRoot(uNodeIndex))
			{
		// Special case: if no bachelors, consider the
		// root a diff.
			if (!bIsBachelor1[uNodeIndex])
				bIsDiff1[uNodeIndex] = true;
			continue;
			}
		const unsigned uParent = Tree1.GetParent(uNodeIndex);
		if (bIsBachelor1[uParent])
			{
			bIsDiff1[uNodeIndex] = true;
			++uDiffCount;
			}
		}

#if	TRACE
	Log("Tree1:\n");
	Log("Node    Id  Bach  Diff  Name\n");
	Log("----  ----  ----  ----  ----\n");
	for (unsigned n = 0; n < uNodeCount; ++n)
		{
		Log("%4u  %4u     %d     %d",
		  n,
		  NodeIndexToId1[n],
		  bIsBachelor1[n],
		  bIsDiff1[n]);
		if (Tree1.IsLeaf(n))
			Log("  %s", Tree1.GetLeafName(n));
		Log("\n");
		}
	Log("\n");
	Log("Tree2:\n");
	Log("Node    Id              Name\n");
	Log("----  ----              ----\n");
	for (unsigned n = 0; n < uNodeCount; ++n)
		{
		Log("%4u                  ", n);
		if (Tree2.IsLeaf(n))
			Log("  %s", Tree2.GetLeafName(n));
		Log("\n");
		}
#endif

	Diffs.CreateRooted();
	const unsigned uDiffsRootIndex = Diffs.GetRootNodeIndex();
	const unsigned uRootIndex1 = Tree1.GetRootNodeIndex();

	for (unsigned n = 0; n < uLeafCount; ++n)
		IdToDiffsLeafNodeIndex[n] = uNodeCount;

	BuildDiffs(Tree1, uRootIndex1, bIsDiff1, Diffs, uDiffsRootIndex,
	  IdToDiffsLeafNodeIndex);

#if TRACE
	Log("\n");
	Log("Diffs:\n");
	Diffs.LogMe();
	Log("\n");
	Log("IdToDiffsLeafNodeIndex:");
	for (unsigned n = 0; n < uLeafCount; ++n)
		{
		if (n%16 == 0)
			Log("\n");
		else
			Log(" ");
		Log("%u=%u", n, IdToDiffsLeafNodeIndex[n]);
		}
	Log("\n");
#endif

	for (unsigned n = 0; n < uLeafCount; ++n)
		if (IdToDiffsLeafNodeIndex[n] == uNodeCount)
			Quit("TreeDiffs check 7");

	delete[] NodeIndexToId1;
	delete[] IdToNodeIndex2;

	delete[] bIsBachelor1;
	delete[] bIsDiff1;
	}
@@ -0,0 +1,235 @@
#include "muscle.h"
#include "tree.h"

#define TRACE	0

/***
Algorithm to compare two trees, X and Y.

A node x in X and node y in Y are defined to be
similar iff the set of leaves in the subtree under
x is identical to the set of leaves under y.

A node is defined to be changed iff it is not
similar to any node in the other tree.

Nodes x and y are defined to be married iff every
node in the subtree under x is similar to a node
in the subtree under y. Married nodes are considered
to be equal. The subtrees under two married nodes can
at most differ by exchanges of left and right branches,
which we do not consider to be significant here.

A node is changed iff it is not married. If a node is
changed, then it has a dissimilar node in its subtree,
and it follows immediately from the definition of marriage
that its parent is also a bachelor. Hence all nodes on the
path from a changed node to the root are changed.

We assume the trees have the same set of leaves, so
every leaf is trivially both similar and married to
the same leaf in the opposite tree. Changed nodes
are therefore always internal (i.e., non-leaf) nodes.

Example:

              -----A
        -----k
   ----j      -----B
--i     -----C
   ------D


              -----A
        -----p
   ----n      -----B
--m     -----D
   ------C


The following pairs of internal nodes are similar.

	Nodes	Set of leaves
	-----	-------------
	k,p		A,B
	i,m		A,B,C,D

Changed nodes in the first tree are i and j, changed nodes
in the second tree are m and n.

Node k and p are married, but i and m are not (because j
and n are changed). The diffs are C, D and k.

To achieve O(N) we avoid traversing a given subtree multiple
times and also avoid comparing lists of leaves. 

We visit nodes in depth-first order (i.e., a node is visited
before its parent).

If either child of a node is changed, we flag it as changed.

If both children of the node we are visiting are married,
we check whether the spouses of those children have the
same parent in the other tree. If the parents are different,
the current node is a bachelor. If they have the same parent,
then the node we are visiting is the spouse of that parent.
We assign this newly identified married couple a unique integer
id. The id of a node is in one-to-one correspondence with the
set of leaves in its subtree. Two nodes have the same set of
leaves iff they have the same id. Changed nodes do not get
an id.
***/

void DiffTreesE(const Tree &NewTree, const Tree &OldTree,
  unsigned NewNodeIndexToOldNodeIndex[])
	{
#if	TRACE
	Log("DiffTreesE NewTree:\n");
	NewTree.LogMe();
	Log("\n");
	Log("OldTree:\n");
	OldTree.LogMe();
#endif

	if (!NewTree.IsRooted() || !OldTree.IsRooted())
		Quit("DiffTrees: requires rooted trees");

	const unsigned uNodeCount = NewTree.GetNodeCount();
	const unsigned uOldNodeCount = OldTree.GetNodeCount();
	const unsigned uLeafCount = NewTree.GetLeafCount();
	const unsigned uOldLeafCount = OldTree.GetLeafCount();
	if (uNodeCount != uOldNodeCount || uLeafCount != uOldLeafCount)
		Quit("DiffTreesE: different node counts");

	{
	unsigned *IdToOldNodeIndex = new unsigned[uNodeCount];
	for (unsigned uOldNodeIndex = 0; uOldNodeIndex < uNodeCount; ++uOldNodeIndex)
		{
		if (OldTree.IsLeaf(uOldNodeIndex))
			{
			unsigned Id = OldTree.GetLeafId(uOldNodeIndex);
			IdToOldNodeIndex[Id] = uOldNodeIndex;
			}
		}

// Initialize NewNodeIndexToOldNodeIndex[]
// All internal nodes are marked as changed, but may be updated later.
	for (unsigned uNewNodeIndex = 0; uNewNodeIndex < uNodeCount; ++uNewNodeIndex)
		{
		if (NewTree.IsLeaf(uNewNodeIndex))
			{
			unsigned uId = NewTree.GetLeafId(uNewNodeIndex);
			assert(uId < uLeafCount);

			unsigned uOldNodeIndex = IdToOldNodeIndex[uId];
			assert(uOldNodeIndex < uNodeCount);

			NewNodeIndexToOldNodeIndex[uNewNodeIndex] = uOldNodeIndex;
			}
		else
			NewNodeIndexToOldNodeIndex[uNewNodeIndex] = NODE_CHANGED;
		}
	delete[] IdToOldNodeIndex;
	}

// Depth-first traversal of tree.
// The order guarantees that a node is visited before
// its parent is visited.
	for (unsigned uNewNodeIndex = NewTree.FirstDepthFirstNode();
	  NULL_NEIGHBOR != uNewNodeIndex;
	  uNewNodeIndex = NewTree.NextDepthFirstNode(uNewNodeIndex))
		{
		if (NewTree.IsLeaf(uNewNodeIndex))
			continue;

	// If either child is changed, flag this node as changed and continue.
		unsigned uNewLeft = NewTree.GetLeft(uNewNodeIndex);
		unsigned uOldLeft = NewNodeIndexToOldNodeIndex[uNewLeft];
		if (NODE_CHANGED == uOldLeft)
			{
			NewNodeIndexToOldNodeIndex[uNewLeft] = NODE_CHANGED;
			continue;
			}

		unsigned uNewRight = NewTree.GetRight(uNewNodeIndex);
		unsigned uOldRight = NewNodeIndexToOldNodeIndex[uNewRight];
		if (NODE_CHANGED == NewNodeIndexToOldNodeIndex[uNewRight])
			{
			NewNodeIndexToOldNodeIndex[uNewRight] = NODE_CHANGED;
			continue;
			}

		unsigned uOldParentLeft = OldTree.GetParent(uOldLeft);
		unsigned uOldParentRight = OldTree.GetParent(uOldRight);
		if (uOldParentLeft == uOldParentRight)
			NewNodeIndexToOldNodeIndex[uNewNodeIndex] = uOldParentLeft;
		else
			NewNodeIndexToOldNodeIndex[uNewNodeIndex] = NODE_CHANGED;
		}

#if TRACE
	{
	Log("NewToOld ");
	for (unsigned uNewNodeIndex = 0; uNewNodeIndex < uNodeCount; ++uNewNodeIndex)
		{
		Log(" [%3u]=", uNewNodeIndex);
		if (NODE_CHANGED == NewNodeIndexToOldNodeIndex[uNewNodeIndex])
			Log("  X");
		else
			Log("%3u", NewNodeIndexToOldNodeIndex[uNewNodeIndex]);
		if ((uNewNodeIndex+1)%8 == 0)
			Log("\n         ");
		}
	Log("\n");
	}
#endif

#if	DEBUG
	{
	for (unsigned uNewNodeIndex = 0; uNewNodeIndex < uNodeCount; ++uNewNodeIndex)
		{
		unsigned uOld = NewNodeIndexToOldNodeIndex[uNewNodeIndex];
		if (NewTree.IsLeaf(uNewNodeIndex))
			{
			if (uOld >= uNodeCount)
				{
				Log("NewNode=%u uOld=%u > uNodeCount=%u\n",
				  uNewNodeIndex, uOld, uNodeCount);
				Quit("Diff check failed");
				}
			unsigned uIdNew = NewTree.GetLeafId(uNewNodeIndex);
			unsigned uIdOld = OldTree.GetLeafId(uOld);
			if (uIdNew != uIdOld)
				{
				Log("NewNode=%u uOld=%u IdNew=%u IdOld=%u\n",
				  uNewNodeIndex, uOld, uIdNew, uIdOld);
				Quit("Diff check failed");
				}
			continue;
			}

		if (NODE_CHANGED == uOld)
			continue;

		unsigned uNewLeft = NewTree.GetLeft(uNewNodeIndex);
		unsigned uNewRight = NewTree.GetRight(uNewNodeIndex);

		unsigned uOldLeft = OldTree.GetLeft(uOld);
		unsigned uOldRight = OldTree.GetRight(uOld);

		unsigned uNewLeftPartner = NewNodeIndexToOldNodeIndex[uNewLeft];
		unsigned uNewRightPartner = NewNodeIndexToOldNodeIndex[uNewRight];

		bool bSameNotRotated = (uNewLeftPartner == uOldLeft && uNewRightPartner == uOldRight);
		bool bSameRotated = (uNewLeftPartner == uOldRight && uNewRightPartner == uOldLeft);
		if (!bSameNotRotated && !bSameRotated)
			{
			Log("NewNode=%u NewL=%u NewR=%u\n", uNewNodeIndex, uNewLeft, uNewRight);
			Log("OldNode=%u OldL=%u OldR=%u\n", uOld, uOldLeft, uOldRight);
			Log("NewLPartner=%u NewRPartner=%u\n", uNewLeftPartner, uNewRightPartner);
			Quit("Diff check failed");
			}
		}
	}
#endif
	}
@@ -0,0 +1,89 @@
#include "muscle.h"
#include "distfunc.h"
#include "distcalc.h"
#include "msa.h"

void DistCalcDF::Init(const DistFunc &DF)
	{
	m_ptrDF = &DF;
	}

void DistCalcDF::CalcDistRange(unsigned i, dist_t Dist[]) const
	{
	for (unsigned j = 0; j < i; ++j)
		Dist[j] = m_ptrDF->GetDist(i, j);
	}

unsigned DistCalcDF::GetCount() const
	{
	return m_ptrDF->GetCount();
	}

unsigned DistCalcDF::GetId(unsigned i) const
	{
	return m_ptrDF->GetId(i);
	}

const char *DistCalcDF::GetName(unsigned i) const
	{
	return m_ptrDF->GetName(i);
	}

void DistCalcMSA::Init(const MSA &msa, DISTANCE Distance)
	{
	m_ptrMSA = &msa;
	m_Distance = Distance;
	}

void DistCalcMSA::CalcDistRange(unsigned i, dist_t Dist[]) const
	{
	for (unsigned j = 0; j < i; ++j)
		{
		switch (m_Distance)
			{
		case DISTANCE_PctIdKimura:
			{
			const float PctId = (float) m_ptrMSA->GetPctIdentityPair(i, j);
			Dist[j] = (float) KimuraDist(PctId);
			break;
			}
		case DISTANCE_PctIdLog:
			{
			const float PctId = (float) m_ptrMSA->GetPctIdentityPair(i, j);
			Dist[j] = (float) PctIdToMAFFTDist(PctId);
			break;
			}
		case DISTANCE_ScoreDist:
			{
			double GetScoreDist(const MSA &msa, unsigned SeqIndex1, unsigned SeqIndex2);
			Dist[j] = (float) GetScoreDist(*m_ptrMSA, i, j);
			continue;
			}
		case DISTANCE_Edit:
			{
			const float PctId = (float) m_ptrMSA->GetPctIdentityPair(i, j);
			if (PctId > 1.0)
				Quit("Internal error, DISTANCE_Edit, pct id=%.3g", PctId);
			Dist[j] = (float) 1.0 - PctId;
			break;
			}
		default:
			Quit("DistCalcMSA: Invalid DISTANCE_%u", m_Distance);
			}
		}
	}

unsigned DistCalcMSA::GetCount() const
	{
	return m_ptrMSA->GetSeqCount();
	}

unsigned DistCalcMSA::GetId(unsigned i) const
	{
	return m_ptrMSA->GetSeqId(i);
	}

const char *DistCalcMSA::GetName(unsigned i) const
	{
	return m_ptrMSA->GetSeqName(i);
	}
@@ -0,0 +1,45 @@
#ifndef DistCalc_h
#define DistCalc_h

typedef float dist_t;
const dist_t BIG_DIST = (dist_t) 1e29;

class DistFunc;

class DistCalc
	{
public:
	virtual void CalcDistRange(unsigned i, dist_t Dist[]) const = 0;
	virtual unsigned GetCount() const = 0;
	virtual unsigned GetId(unsigned i) const = 0;
	virtual const char *GetName(unsigned i) const = 0;
	};

class DistCalcDF : public DistCalc
	{
public:
	void Init(const DistFunc &DF);
	virtual void CalcDistRange(unsigned i, dist_t Dist[]) const;
	virtual unsigned GetCount() const;
	virtual unsigned GetId(unsigned i) const;
	virtual const char *GetName(unsigned i) const;

private:
	const DistFunc *m_ptrDF;
	};

class DistCalcMSA : public DistCalc
	{
public:
	void Init(const MSA &msa, DISTANCE Distance);
	virtual void CalcDistRange(unsigned i, dist_t Dist[]) const;
	virtual unsigned GetCount() const;
	virtual unsigned GetId(unsigned i) const;
	virtual const char *GetName(unsigned i) const;

private:
	const MSA *m_ptrMSA;
	DISTANCE m_Distance;
	};

#endif	// DistCalc_h
@@ -0,0 +1,113 @@
#include "muscle.h"
#include "distfunc.h"
#include <assert.h>

DistFunc::DistFunc()
	{
	m_Dists = 0;
	m_uCount = 0;
	m_uCacheCount = 0;
	m_Names = 0;
	m_Ids = 0;
	}

DistFunc::~DistFunc()
	{
	if (0 != m_Names)
		{
		for (unsigned i = 0; i < m_uCount; ++i)
			free(m_Names[i]);
		}
	delete[] m_Dists;
	delete[] m_Names;
	delete[] m_Ids;
	}

float DistFunc::GetDist(unsigned uIndex1, unsigned uIndex2) const
	{
	return m_Dists[VectorIndex(uIndex1, uIndex2)];
	}

unsigned DistFunc::GetCount() const
	{
	return m_uCount;
	}

void DistFunc::SetCount(unsigned uCount)
	{
	m_uCount = uCount;
	if (uCount <= m_uCacheCount)
		return;
	delete[] m_Dists;
	m_Dists = new float[VectorLength()];
	m_Names = new char *[m_uCount];
	m_Ids = new unsigned[m_uCount];
	m_uCacheCount = uCount;

	memset(m_Names, 0, m_uCount*sizeof(char *));
	memset(m_Ids, 0xff, m_uCount*sizeof(unsigned));
	memset(m_Dists, 0, VectorLength()*sizeof(float));
	}

void DistFunc::SetDist(unsigned uIndex1, unsigned uIndex2, float dDist)
	{
	m_Dists[VectorIndex(uIndex1, uIndex2)] = dDist;
	m_Dists[VectorIndex(uIndex2, uIndex1)] = dDist;
	}

unsigned DistFunc::VectorIndex(unsigned uIndex1, unsigned uIndex2) const
	{
	assert(uIndex1 < m_uCount && uIndex2 < m_uCount);
	return uIndex1*m_uCount + uIndex2;
	}

unsigned DistFunc::VectorLength() const
	{
	return m_uCount*m_uCount;
	}

void DistFunc::SetName(unsigned uIndex, const char szName[])
	{
	assert(uIndex < m_uCount);
	m_Names[uIndex] = strsave(szName);
	}

void DistFunc::SetId(unsigned uIndex, unsigned uId)
	{
	assert(uIndex < m_uCount);
	m_Ids[uIndex] = uId;
	}

const char *DistFunc::GetName(unsigned uIndex) const
	{
	assert(uIndex < m_uCount);
	return m_Names[uIndex];
	}

unsigned DistFunc::GetId(unsigned uIndex) const
	{
	assert(uIndex < m_uCount);
	return m_Ids[uIndex];
	}

void DistFunc::LogMe() const
	{
	Log("DistFunc::LogMe count=%u\n", m_uCount);
	Log("                     ");
	for (unsigned i = 0; i < m_uCount; ++i)
		Log(" %7u", i);
	Log("\n");

	Log("                     ");
	for (unsigned i = 0; i < m_uCount; ++i)
		Log(" %7.7s", m_Names[i] ? m_Names[i] : "");
	Log("\n");

	for (unsigned i = 0; i < m_uCount; ++i)
		{
		Log("%4u  %10.10s  :  ", i, m_Names[i] ? m_Names[i] : "");
		for (unsigned j = 0; j <= i; ++j)
			Log(" %7.4g", GetDist(i, j));
		Log("\n");
		}
	}
@@ -0,0 +1,36 @@
#ifndef DistFunc_h
#define DistFunc_h

class DistFunc
	{
public:
	DistFunc();
	virtual ~DistFunc();

public:
	virtual void SetCount(unsigned uCount);
	virtual void SetDist(unsigned uIndex1, unsigned uIndex2, float dDist);

	void SetName(unsigned uIndex, const char szName[]);
	void SetId(unsigned uIndex, unsigned uId);
	const char *GetName(unsigned uIndex) const;
	unsigned GetId(unsigned uIndex) const;

	virtual float GetDist(unsigned uIndex1, unsigned uIndex2) const;
	virtual unsigned GetCount() const;

	void LogMe() const;

protected:
	unsigned VectorIndex(unsigned uIndex, unsigned uIndex2) const;
	unsigned VectorLength() const;

private:
	unsigned m_uCount;
	unsigned m_uCacheCount;
	float *m_Dists;
	char **m_Names;
	unsigned *m_Ids;
	};

#endif	// DistFunc_h
@@ -0,0 +1,45 @@
#include "muscle.h"
#include "distfunc.h"
#include "msa.h"
#include "seqvect.h"
#include "pwpath.h"

void DistPWKimura(const SeqVect &v, DistFunc &DF)
	{
	SEQWEIGHT SeqWeightSave = GetSeqWeightMethod();
	SetSeqWeightMethod(SEQWEIGHT_Henikoff);

	const unsigned uSeqCount = v.Length();
	DF.SetCount(uSeqCount);

	const unsigned uPairCount = (uSeqCount*(uSeqCount + 1))/2;
	unsigned uCount = 0;
	SetProgressDesc("PWKimura distance");
	for (unsigned uSeqIndex1 = 0; uSeqIndex1 < uSeqCount; ++uSeqIndex1)
		{
		const Seq &s1 = v.GetSeq(uSeqIndex1);
		MSA msa1;
		msa1.FromSeq(s1);
		for (unsigned uSeqIndex2 = 0; uSeqIndex2 < uSeqIndex1; ++uSeqIndex2)
			{
			if (0 == uCount%20)
				Progress(uCount, uPairCount);
			++uCount;
			const Seq &s2 = v.GetSeq(uSeqIndex2);
			MSA msa2;
			msa2.FromSeq(s2);
		
			PWPath Path;
			MSA msaOut;
			AlignTwoMSAs(msa1, msa2, msaOut, Path, false, false);

			double dPctId = msaOut.GetPctIdentityPair(0, 1);
			float f = (float) KimuraDist(dPctId);

			DF.SetDist(uSeqIndex1, uSeqIndex2, f);
			}
		}
	ProgressStepsDone();

	SetSeqWeightMethod(SeqWeightSave);
	}
@@ -0,0 +1,299 @@
#include "muscle.h"
#include "textfile.h"
#include "seqvect.h"
#include "distfunc.h"
#include "msa.h"
#include "tree.h"
#include "profile.h"
#include "timing.h"

static char g_strUseTreeWarning[] =
"\n******** WARNING ****************\n"
"\nYou specified the -usetree option.\n"
"Note that a good evolutionary tree may NOT be a good\n"
"guide tree for multiple alignment. For more details,\n"
"please refer to the user guide. To disable this\n"
"warning, use -usetree_nowarn <treefilename>.\n\n";

void DoMuscle()
	{
	SetOutputFileName(g_pstrOutFileName);
	SetInputFileName(g_pstrInFileName);

	SetMaxIters(g_uMaxIters);
	SetSeqWeightMethod(g_SeqWeight1);

	TextFile fileIn(g_pstrInFileName);
	SeqVect v;
	v.FromFASTAFile(fileIn);
	const unsigned uSeqCount = v.Length();

	if (0 == uSeqCount)
		Quit("No sequences in input file");

	ALPHA Alpha = ALPHA_Undefined;
	switch (g_SeqType)
		{
	case SEQTYPE_Auto:
		Alpha = v.GuessAlpha();
		break;

	case SEQTYPE_Protein:
		Alpha = ALPHA_Amino;
		break;

	case SEQTYPE_DNA:
		Alpha = ALPHA_DNA;
		break;

	case SEQTYPE_RNA:
		Alpha = ALPHA_RNA;
		break;

	default:
		Quit("Invalid seq type");
		}
	SetAlpha(Alpha);
	v.FixAlpha();

	PTR_SCOREMATRIX UserMatrix = 0;
	if (0 != g_pstrMatrixFileName)
		{
		const char *FileName = g_pstrMatrixFileName;
		const char *Path = getenv("MUSCLE_MXPATH");
		if (Path != 0)
			{
			size_t n = strlen(Path) + 1 + strlen(FileName) + 1;
			char *NewFileName = new char[n];
			sprintf(NewFileName, "%s/%s", Path, FileName);
			FileName = NewFileName;
			}
		TextFile File(FileName);
		UserMatrix = ReadMx(File);
		g_Alpha = ALPHA_Amino;
		g_PPScore = PPSCORE_SP;
		}

	SetPPScore();

	if (0 != UserMatrix)
		g_ptrScoreMatrix = UserMatrix;

	unsigned uMaxL = 0;
	unsigned uTotL = 0;
	for (unsigned uSeqIndex = 0; uSeqIndex < uSeqCount; ++uSeqIndex)
		{
		unsigned L = v.GetSeq(uSeqIndex).Length();
		uTotL += L;
		if (L > uMaxL)
			uMaxL = L;
		}

	SetIter(1);
	g_bDiags = g_bDiags1;
	SetSeqStats(uSeqCount, uMaxL, uTotL/uSeqCount);

	SetMuscleSeqVect(v);

	MSA::SetIdCount(uSeqCount);

// Initialize sequence ids.
// From this point on, ids must somehow propogate from here.
	for (unsigned uSeqIndex = 0; uSeqIndex < uSeqCount; ++uSeqIndex)
		v.SetSeqId(uSeqIndex, uSeqIndex);

	if (0 == uSeqCount)
		Quit("Input file '%s' has no sequences", g_pstrInFileName);
	if (1 == uSeqCount)
		{
		TextFile fileOut(g_pstrOutFileName, true);
		v.ToFile(fileOut);
		return;
		}

	if (uSeqCount > 1)
		MHackStart(v);

// First iteration
	Tree GuideTree;
	if (0 != g_pstrUseTreeFileName)
		{
	// Discourage users...
		if (!g_bUseTreeNoWarn)
			fprintf(stderr, "%s", g_strUseTreeWarning);

	// Read tree from file
		TextFile TreeFile(g_pstrUseTreeFileName);
		GuideTree.FromFile(TreeFile);

	// Make sure tree is rooted
		if (!GuideTree.IsRooted())
			Quit("User tree must be rooted");

		if (GuideTree.GetLeafCount() != uSeqCount)
			Quit("User tree does not match input sequences");

		const unsigned uNodeCount = GuideTree.GetNodeCount();
		for (unsigned uNodeIndex = 0; uNodeIndex < uNodeCount; ++uNodeIndex)
			{
			if (!GuideTree.IsLeaf(uNodeIndex))
				continue;
			const char *LeafName = GuideTree.GetLeafName(uNodeIndex);
			unsigned uSeqIndex;
			bool SeqFound = v.FindName(LeafName, &uSeqIndex);
			if (!SeqFound)
				Quit("Label %s in tree does not match sequences", LeafName);
			unsigned uId = v.GetSeqIdFromName(LeafName);
			GuideTree.SetLeafId(uNodeIndex, uId);
			}
		}
	else
		TreeFromSeqVect(v, GuideTree, g_Cluster1, g_Distance1, g_Root1,
		  g_pstrDistMxFileName1);

	const char *Tree1 = ValueOpt("Tree1");
	if (0 != Tree1)
		{
		TextFile f(Tree1, true);
		GuideTree.ToFile(f);
		if (g_bClusterOnly)
			return;
		}

	SetMuscleTree(GuideTree);
	ValidateMuscleIds(GuideTree);

	MSA msa;
	ProgNode *ProgNodes = 0;
	if (g_bLow)
		ProgNodes = ProgressiveAlignE(v, GuideTree, msa);
	else
		ProgressiveAlign(v, GuideTree, msa);
	SetCurrentAlignment(msa);

	if (0 != g_pstrComputeWeightsFileName)
		{
		extern void OutWeights(const char *FileName, const MSA &msa);
		SetMSAWeightsMuscle(msa);
		OutWeights(g_pstrComputeWeightsFileName, msa);
		return;
		}

	ValidateMuscleIds(msa);

	if (1 == g_uMaxIters || 2 == uSeqCount)
		{
		//TextFile fileOut(g_pstrOutFileName, true);
		//MHackEnd(msa);
		//msa.ToFile(fileOut);
		MuscleOutput(msa);
		return;
		}

	if (0 == g_pstrUseTreeFileName)
		{
		g_bDiags = g_bDiags2;
		SetIter(2);

		if (g_bLow)
			{
			if (0 != g_uMaxTreeRefineIters)
				RefineTreeE(msa, v, GuideTree, ProgNodes);
			}
		else
			RefineTree(msa, GuideTree);

		const char *Tree2 = ValueOpt("Tree2");
		if (0 != Tree2)
			{
			TextFile f(Tree2, true);
			GuideTree.ToFile(f);
			}
		}

	SetSeqWeightMethod(g_SeqWeight2);
	SetMuscleTree(GuideTree);

	if (g_bAnchors)
		RefineVert(msa, GuideTree, g_uMaxIters - 2);
	else
		RefineHoriz(msa, GuideTree, g_uMaxIters - 2, false, false);

#if	0
// Refining by subfamilies is disabled as it didn't give better
// results. I tried doing this before and after RefineHoriz.
// Should get back to this as it seems like this should work.
	RefineSubfams(msa, GuideTree, g_uMaxIters - 2);
#endif

	ValidateMuscleIds(msa);
	ValidateMuscleIds(GuideTree);

	//TextFile fileOut(g_pstrOutFileName, true);
	//MHackEnd(msa);
	//msa.ToFile(fileOut);
	MuscleOutput(msa);
	}

void Run()
	{
	SetStartTime();
	Log("Started %s\n", GetTimeAsStr());
	for (int i = 0; i < g_argc; ++i)
		Log("%s ", g_argv[i]);
	Log("\n");

#if	TIMING
	TICKS t1 = GetClockTicks();
#endif
	if (g_bRefine)
		Refine();
	else if (g_bRefineW)
		{
		extern void DoRefineW();
		DoRefineW();
		}
	else if (g_bProfDB)
		ProfDB();
	else if (g_bSW)
		Local();
	else if (0 != g_pstrSPFileName)
		DoSP();
	else if (g_bProfile)
		Profile();
	else if (g_bPPScore)
		PPScore();
	else if (g_bPAS)
		ProgAlignSubFams();
	else if (g_bMakeTree)
		{
		extern void DoMakeTree();
		DoMakeTree();
		}
	else
		DoMuscle();

#if	TIMING
	extern TICKS g_ticksDP;
	extern TICKS g_ticksObjScore;
	TICKS t2 = GetClockTicks();
	TICKS TotalTicks = t2 - t1;
	TICKS ticksOther = TotalTicks - g_ticksDP - g_ticksObjScore;
	double dSecs = TicksToSecs(TotalTicks);
	double PctDP = (double) g_ticksDP*100.0/(double) TotalTicks;
	double PctOS = (double) g_ticksObjScore*100.0/(double) TotalTicks;
	double PctOther = (double) ticksOther*100.0/(double) TotalTicks;
	Log("                 Ticks     Secs    Pct\n");
	Log("          ============  =======  =====\n");
	Log("DP        %12ld  %7.2f  %5.1f%%\n",
	  (long) g_ticksDP, TicksToSecs(g_ticksDP), PctDP);
	Log("OS        %12ld  %7.2f  %5.1f%%\n",
	  (long) g_ticksObjScore, TicksToSecs(g_ticksObjScore), PctOS);
	Log("Other     %12ld  %7.2f  %5.1f%%\n",
	  (long) ticksOther, TicksToSecs(ticksOther), PctOther);
	Log("Total     %12ld  %7.2f  100.0%%\n", (long) TotalTicks, dSecs);
#endif

	ListDiagSavings();
	Log("Finished %s\n", GetTimeAsStr());
	}
@@ -0,0 +1,60 @@
#include "muscle.h"
#include "textfile.h"
#include "msa.h"
#include "objscore.h"
#include "tree.h"
#include "profile.h"

void DoSP()
	{
	TextFile f(g_pstrSPFileName);

	MSA a;
	a.FromFile(f);

	ALPHA Alpha = ALPHA_Undefined;
	switch (g_SeqType)
		{
	case SEQTYPE_Auto:
		Alpha = a.GuessAlpha();
		break;

	case SEQTYPE_Protein:
		Alpha = ALPHA_Amino;
		break;

	case SEQTYPE_DNA:
		Alpha = ALPHA_DNA;
		break;

	case SEQTYPE_RNA:
		Alpha = ALPHA_RNA;
		break;

	default:
		Quit("Invalid SeqType");
		}
	SetAlpha(Alpha);
	a.FixAlpha();

	SetPPScore();

	const unsigned uSeqCount = a.GetSeqCount();
	if (0 == uSeqCount)
		Quit("No sequences in input file %s", g_pstrSPFileName);

	MSA::SetIdCount(uSeqCount);
	for (unsigned uSeqIndex = 0; uSeqIndex < uSeqCount; ++uSeqIndex)
		a.SetSeqId(uSeqIndex, uSeqIndex);

	SetSeqWeightMethod(g_SeqWeight1);
	Tree tree;
	TreeFromMSA(a, tree, g_Cluster2, g_Distance2, g_Root2);
	SetMuscleTree(tree);
	SetMSAWeightsMuscle((MSA &) a);

	SCORE SP = ObjScoreSP(a);

	Log("File=%s;SP=%.4g\n", g_pstrSPFileName, SP);
	fprintf(stderr, "File=%s;SP=%.4g\n", g_pstrSPFileName, SP);
	}
@@ -0,0 +1,73 @@
#ifndef DPRegionList_h
#define DPRegionList_h

#include "diaglist.h"

enum DPREGIONTYPE
	{
	DPREGIONTYPE_Unknown,
	DPREGIONTYPE_Diag,
	DPREGIONTYPE_Rect
	};

struct DPRegion
	{
	DPREGIONTYPE m_Type;
	union
		{
		Diag m_Diag;
		Rect m_Rect;
		};
	};

const unsigned MAX_DPREGIONS = 1024;

class DPRegionList
	{
public:
	DPRegionList()
		{
		m_uCount = 0;
		}
	~DPRegionList()
		{
		Free();
		}

public:
// Creation
	void Clear()
		{
		Free();
		}
	void Add(const DPRegion &r);

// Accessors
	unsigned GetCount() const
		{
		return m_uCount;
		}
	const DPRegion &Get(unsigned uIndex) const
		{
		assert(uIndex < m_uCount);
		return m_DPRegions[uIndex];
		}

// Diagnostics
	void LogMe() const;

private:
	void Free()
		{
		m_uCount = 0;
		}

private:
	unsigned m_uCount;
	DPRegion m_DPRegions[MAX_DPREGIONS];
	};

void DiagListToDPRegionList(const DiagList &DL, DPRegionList &RL,
  unsigned uLengthA, unsigned uLengthB);

#endif	// DPRegionList_h
@@ -0,0 +1,108 @@
#include "muscle.h"
#include "dpreglist.h"

unsigned DPRegionList::GetDPArea() const
	{
	unsigned uArea = 0;
	for (unsigned i = 0; i < m_uCount; ++i)
		{
		const DPRegion &r = m_DPRegions[i];
		if (DPREGIONTYPE_Rect == r.m_Type)
			uArea += r.m_Rect.m_uLengthA*r.m_Rect.m_uLengthB;
		}
	return uArea;
	}

void DPRegionList::Add(const DPRegion &r)
	{
	if (m_uCount == MAX_DPREGIONS)
		Quit("DPRegionList::Add, overflow %d", m_uCount);
	m_DPRegions[m_uCount] = r;
	++m_uCount;
	}

void DPRegionList::LogMe() const
	{
	Log("DPRegionList::LogMe, count=%u\n", m_uCount);
	Log("Region  Type  StartA  StartB    EndA    EndB\n");
	Log("------  ----  ------  ------    ----    ----\n");
	for (unsigned i = 0; i < m_uCount; ++i)
		{
		const DPRegion &r = m_DPRegions[i];
		Log("%6u  ", i);
		if (DPREGIONTYPE_Diag == r.m_Type)
			Log("Diag  %6u  %6u  %6u  %6u\n",
			  r.m_Diag.m_uStartPosA,
			  r.m_Diag.m_uStartPosB,
			  r.m_Diag.m_uStartPosA + r.m_Diag.m_uLength - 1,
			  r.m_Diag.m_uStartPosB + r.m_Diag.m_uLength - 1);
		else if (DPREGIONTYPE_Rect == r.m_Type)
			Log("Rect  %6u  %6u  %6u  %6u\n",
			  r.m_Rect.m_uStartPosA,
			  r.m_Rect.m_uStartPosB,
			  r.m_Rect.m_uStartPosA + r.m_Rect.m_uLengthA - 1,
			  r.m_Rect.m_uStartPosB + r.m_Rect.m_uLengthB - 1);
		else
			Log(" *** ERROR *** Type=%u\n", r.m_Type);
		}
	}

void DiagListToDPRegionList(const DiagList &DL, DPRegionList &RL,
  unsigned uLengthA, unsigned uLengthB)
	{
	if (g_uDiagMargin > g_uMinDiagLength/2)
		Quit("Invalid parameters, diagmargin=%d must be <= 2*diaglength=%d",
		  g_uDiagMargin, g_uMinDiagLength);

	unsigned uStartPosA = 0;
	unsigned uStartPosB = 0;
	const unsigned uDiagCount = DL.GetCount();
	DPRegion r;
	for (unsigned uDiagIndex = 0; uDiagIndex < uDiagCount; ++uDiagIndex)
		{
		const Diag &d = DL.Get(uDiagIndex);
		assert(d.m_uLength >= g_uMinDiagLength);
		const unsigned uStartVertexA = d.m_uStartPosA + g_uDiagMargin - 1;
		const unsigned uStartVertexB = d.m_uStartPosB + g_uDiagMargin - 1;
		const unsigned uEndVertexA = d.m_uStartPosA + d.m_uLength - g_uDiagMargin;
		const unsigned uEndVertexB = d.m_uStartPosB + d.m_uLength - g_uDiagMargin;

		r.m_Type = DPREGIONTYPE_Rect;
		r.m_Rect.m_uStartPosA = uStartPosA;
		r.m_Rect.m_uStartPosB = uStartPosB;

		assert(uStartVertexA + 1 >= uStartPosA);
		assert(uStartVertexB + 1 >= uStartPosB);
		r.m_Rect.m_uLengthA = uStartVertexA + 1 - uStartPosA;
		r.m_Rect.m_uLengthB = uStartVertexB + 1 - uStartPosB;
		RL.Add(r);

		if (uEndVertexA > uStartVertexA + 1)
			{
			const unsigned uDiagLengthMinusCaps = uEndVertexA - uStartVertexA - 1;

			r.m_Type = DPREGIONTYPE_Diag;
			r.m_Diag.m_uStartPosA = uStartVertexA + 1;
			r.m_Diag.m_uStartPosB = uStartVertexB + 1;
			assert(uEndVertexA - uStartVertexA == uEndVertexB - uStartVertexB);
			r.m_Diag.m_uLength = uEndVertexA - uStartVertexA - 1;
			RL.Add(r);
			}

		uStartPosA = uEndVertexA;
		uStartPosB = uEndVertexB;
		}

	assert((int) uLengthA - (int) uStartPosA >= (int) g_uDiagMargin);
	assert((int) uLengthB - (int) uStartPosB >= (int) g_uDiagMargin);

	r.m_Type = DPREGIONTYPE_Rect;
	r.m_Rect.m_uStartPosA = uStartPosA;
	r.m_Rect.m_uStartPosB = uStartPosB;

	assert(uLengthA >= uStartPosA);
	assert(uLengthB >= uStartPosB);
	r.m_Rect.m_uLengthA = uLengthA - uStartPosA;
	r.m_Rect.m_uLengthB = uLengthB - uStartPosB;
	RL.Add(r);
	}
@@ -0,0 +1,76 @@
#ifndef dpreglist_h
#define dpreglist_h

#include "diaglist.h"

enum DPREGIONTYPE
	{
	DPREGIONTYPE_Unknown,
	DPREGIONTYPE_Diag,
	DPREGIONTYPE_Rect
	};

struct DPRegion
	{
	DPREGIONTYPE m_Type;
	union
		{
		Diag m_Diag;
		Rect m_Rect;
		};
	};

const unsigned MAX_DPREGIONS = 1024;

class DPRegionList
	{
public:
	DPRegionList()
		{
		m_uCount = 0;
		}
	~DPRegionList()
		{
		Free();
		}

public:
// Creation
	void Clear()
		{
		Free();
		}
	void Add(const DPRegion &r);

// Accessors
	unsigned GetCount() const
		{
		return m_uCount;
		}

	const DPRegion &Get(unsigned uIndex) const
		{
		assert(uIndex < m_uCount);
		return m_DPRegions[uIndex];
		}

	unsigned GetDPArea() const;

// Diagnostics
	void LogMe() const;

private:
	void Free()
		{
		m_uCount = 0;
		}

private:
	unsigned m_uCount;
	DPRegion m_DPRegions[MAX_DPREGIONS];
	};

void DiagListToDPRegionList(const DiagList &DL, DPRegionList &RL,
  unsigned uLengthA, unsigned uLengthB);

#endif	// dpreglist_h
@@ -0,0 +1,41 @@
#include "muscle.h"
#include "tree.h"

/***
Simple tree drawing algorithm.

y coordinate of node is index in depth-first traversal.
x coordinate is distance from root.
***/

static unsigned DistFromRoot(const Tree &tree, unsigned uNodeIndex)
	{
	const unsigned uRoot = tree.GetRootNodeIndex();
	unsigned uDist = 0;
	while (uNodeIndex != uRoot)
		{
		++uDist;
		uNodeIndex = tree.GetParent(uNodeIndex);
		}
	return uDist;
	}

static void DrawNode(const Tree &tree, unsigned uNodeIndex)
	{
	if (!tree.IsLeaf(uNodeIndex))
		DrawNode(tree, tree.GetLeft(uNodeIndex));

	unsigned uDist = DistFromRoot(tree, uNodeIndex);
	for (unsigned i = 0; i < 5*uDist; ++i)
		Log(" ");
	Log("%d\n", uNodeIndex);

	if (!tree.IsLeaf(uNodeIndex))
		DrawNode(tree, tree.GetRight(uNodeIndex));
	}

void DrawTree(const Tree &tree)
	{
	unsigned uRoot = tree.GetRootNodeIndex();
	DrawNode(tree, uRoot);
	}
@@ -0,0 +1,88 @@
#include "muscle.h"
#include "edgelist.h"

EdgeList::EdgeList()
	{
	m_uNode1 = 0;
	m_uNode2 = 0;
	m_uCount = 0;
	m_uCacheSize = 0;
	}

EdgeList::~EdgeList()
	{
	Clear();
	}

void EdgeList::Clear()
	{
	delete[] m_uNode1;
	delete[] m_uNode2;
	m_uNode1 = 0;
	m_uNode2 = 0;
	m_uCount = 0;
	m_uCacheSize = 0;
	}

void EdgeList::Add(unsigned uNode1, unsigned uNode2)
	{
	if (m_uCount <= m_uCacheSize)
		Expand();
	m_uNode1[m_uCount] = uNode1;
	m_uNode2[m_uCount] = uNode2;
	++m_uCount;
	}

unsigned EdgeList::GetCount() const
	{
	return m_uCount;
	}

void EdgeList::GetEdge(unsigned uIndex, unsigned *ptruNode1, unsigned *ptruNode2) const
	{
	if (uIndex > m_uCount)
		Quit("EdgeList::GetEdge(%u) count=%u", uIndex, m_uCount);
	*ptruNode1 = m_uNode1[uIndex];
	*ptruNode2 = m_uNode2[uIndex];
	}

void EdgeList::Copy(const EdgeList &rhs)
	{
	Clear();
	const unsigned uCount = rhs.GetCount();
	for (unsigned n = 0; n < uCount; ++n)
		{
		unsigned uNode1;
		unsigned uNode2;
		rhs.GetEdge(n, &uNode1, &uNode2);
		Add(uNode1, uNode2);
		}
	}

void EdgeList::Expand()
	{
	unsigned uNewCacheSize = m_uCacheSize + 512;
	unsigned *NewNode1 = new unsigned[uNewCacheSize];
	unsigned *NewNode2 = new unsigned[uNewCacheSize];
	if (m_uCount > 0)
		{
		memcpy(NewNode1, m_uNode1, m_uCount*sizeof(unsigned));
		memcpy(NewNode2, m_uNode2, m_uCount*sizeof(unsigned));
		}
	delete[] m_uNode1;
	delete[] m_uNode2;
	m_uNode1 = NewNode1;
	m_uNode2 = NewNode2;
	m_uCacheSize = uNewCacheSize;
	}

void EdgeList::LogMe() const
	{
	for (unsigned n = 0; n < m_uCount; ++n)
		{
		if (n > 0)
			Log(" ");
		Log("%u->%u", m_uNode1[n], m_uNode2[n]);
		}
	Log("\n");
	}
@@ -0,0 +1,28 @@
#ifndef EdgeList_h
#define EdgeList_h

class EdgeList
	{
public:
	EdgeList();
	virtual ~EdgeList();

public:
	void Clear();
	void Add(unsigned uNode1, unsigned uNode2);
	unsigned GetCount() const;
	void GetEdge(unsigned uIndex, unsigned *ptruNode1, unsigned *ptruNode2) const;
	void Copy(const EdgeList &rhs);
	void LogMe() const;

private:
	void Expand();

private:
	unsigned m_uCount;
	unsigned m_uCacheSize;
	unsigned *m_uNode1;
	unsigned *m_uNode2;
	};

#endif	// EdgeList_h
@@ -0,0 +1,8 @@
#include "muscle.h"
#include "enumopts.h"

#define	s(t)		EnumOpt t##_Opts[] = {
#define c(t, x)		#x, t##_##x,
#define e(t)		0, 0 };

#include "enums.h"
@@ -0,0 +1,16 @@
#ifndef enumopts_h
#define enumopts_h

struct EnumOpt
	{
	const char *pstrOpt;
	int iValue;
	};

#define	s(t)		extern EnumOpt t##_Opts[];
#define c(t, x)		/* empty */
#define e(t)		/* empty */
#include "enums.h"	


#endif // enumopts_h
@@ -0,0 +1,98 @@
// enums.h
// Define enum types.
// Exploit macro hacks to avoid lots of repetetive typing.
// Generally I am opposed to macro hacks because of the
// highly obscure code that results, but in this case it
// makes maintenance much easier and less error-prone.
// The idea is that this file can be included in different
// places with different definitions of s (Start), c (Case)
// and e (End). See types.h.

s(ALPHA)
c(ALPHA, Amino)
c(ALPHA, DNA)
c(ALPHA, RNA)
e(ALPHA)

s(SEQTYPE)
c(SEQTYPE, Protein)
c(SEQTYPE, DNA)
c(SEQTYPE, RNA)
c(SEQTYPE, Auto)
e(SEQTYPE)

s(ROOT)
c(ROOT, Pseudo)
c(ROOT, MidLongestSpan)
c(ROOT, MinAvgLeafDist)
e(ROOT)

s(CLUSTER)
c(CLUSTER, UPGMA)
c(CLUSTER, UPGMAMax)
c(CLUSTER, UPGMAMin)
c(CLUSTER, UPGMB)
c(CLUSTER, NeighborJoining)
e(CLUSTER)

s(JOIN)
c(JOIN, NearestNeighbor)
c(JOIN, NeighborJoining)
e(JOIN)

s(LINKAGE)
c(LINKAGE, Min)
c(LINKAGE, Avg)
c(LINKAGE, Max)
c(LINKAGE, NeighborJoining)
c(LINKAGE, Biased)
e(LINKAGE)

s(DISTANCE)
c(DISTANCE, Kmer6_6)
c(DISTANCE, Kmer20_3)
c(DISTANCE, Kmer20_4)
c(DISTANCE, Kbit20_3)
c(DISTANCE, Kmer4_6)
c(DISTANCE, PctIdKimura)
c(DISTANCE, PctIdLog)
c(DISTANCE, PWKimura)
c(DISTANCE, PWScoreDist)
c(DISTANCE, ScoreDist)
c(DISTANCE, Edit)
e(DISTANCE)

s(PPSCORE)
c(PPSCORE, LE)
c(PPSCORE, SP)
c(PPSCORE, SV)
c(PPSCORE, SPN)
e(PPSCORE)

s(SEQWEIGHT)
c(SEQWEIGHT, None)
c(SEQWEIGHT, Henikoff)
c(SEQWEIGHT, HenikoffPB)
c(SEQWEIGHT, GSC)
c(SEQWEIGHT, ClustalW)
c(SEQWEIGHT, ThreeWay)
e(SEQWEIGHT)

s(OBJSCORE)
c(OBJSCORE, SP)				// Sum of Pairs of sequences
c(OBJSCORE, DP)				// Dynamic Programming score
c(OBJSCORE, XP)				// Cross Pairs = sum of pairs between two MSAs
c(OBJSCORE, PS)				// sum of Prof-Seq score for all seqs in MSA
c(OBJSCORE, SPF)			// sum of pairs, fast approximation
c(OBJSCORE, SPM)			// sp if <= 100 seqs, spf otherwise
e(OBJSCORE)

s(TERMGAPS)
c(TERMGAPS, Full)
c(TERMGAPS, Half)
c(TERMGAPS, Ext)
e(TERMGAPS)

#undef s
#undef c
#undef e
@@ -0,0 +1,16 @@
#include "muscle.h"
#include <stdio.h>

static char szMsg[64];

// Define XXXToStr(XXX x) functions for each enum type XXX.
#define s(t)	const char *t##ToStr(t x) { switch (x) { case t##_Undefined: return "Undefined";
#define c(t, x)	case t##_##x: return #x;
#define e(t)	} sprintf(szMsg, #t "_%d", x); return szMsg; }
#include "enums.h"

// Define StrToXXX(const char *Str) functions for each enum type XXX.
#define s(t)	t StrTo##t(const char *Str) { if (0) ;
#define c(t, x)	else if (0 == stricmp(#x, Str)) return t##_##x;
#define e(t)	Quit("Invalid value %s for type %s", Str, #t); return t##_Undefined; }
#include "enums.h"
@@ -0,0 +1,689 @@
#include "muscle.h"
#include "pwpath.h"
#include "estring.h"
#include "seq.h"
#include "msa.h"

/***
An "estring" is an edit string that operates on a sequence.
An estring is represented as a vector of integers.
It is interpreted in order of increasing suffix.
A positive value n means copy n letters.
A negative value -n means insert n indels.
Zero marks the end of the vector.
Consecutive entries must have opposite sign, i.e. the
shortest possible representation must be used.

A "tpair" is a traceback path for a pairwise alignment
represented as two estrings, one for each sequence.
***/

#define c2(c,d)	(((unsigned char) c) << 8 | (unsigned char) d)

unsigned LengthEstring(const short es[])
	{
	unsigned i = 0;
	while (*es++ != 0)
		++i;
	return i;
	}

short *EstringNewCopy(const short es[])
	{
	unsigned n = LengthEstring(es) + 1;
	short *esNew = new short[n];
	memcpy(esNew, es, n*sizeof(short));
	return esNew;
	}

void LogEstring(const short es[])
	{
	Log("<");
	for (unsigned i = 0; es[i] != 0; ++i)
		{
		if (i > 0)
			Log(" ");
		Log("%d", es[i]);
		}
	Log(">");
	}

static bool EstringsEq(const short es1[], const short es2[])
	{
	for (;;)
		{
		if (*es1 != *es2)
			return false;
		if (0 == *es1)
			break;
		++es1;
		++es2;
		}
	return true;
	}

static void EstringCounts(const short es[], unsigned *ptruSymbols,
  unsigned *ptruIndels)
	{
	unsigned uSymbols = 0;
	unsigned uIndels = 0;
	for (unsigned i = 0; es[i] != 0; ++i)
		{
		short n = es[i];
		if (n > 0)
			uSymbols += n;
		else if (n < 0)
			uIndels += -n;
		}
	*ptruSymbols = uSymbols;
	*ptruIndels = uIndels;
	}

static char *EstringOp(const short es[], const char s[])
	{
	unsigned uSymbols;
	unsigned uIndels;
	EstringCounts(es, &uSymbols, &uIndels);
	assert((unsigned) strlen(s) == uSymbols);
	char *sout = new char[uSymbols + uIndels + 1];
	char *psout = sout;
	for (;;)
		{
		int n = *es++;
		if (0 == n)
			break;
		if (n > 0)
			for (int i = 0; i < n; ++i)
				*psout++ = *s++;
		else
			for (int i = 0; i < -n; ++i)
				*psout++ = '-';
		}
	assert(0 == *s);
	*psout = 0;
	return sout;
	}

void EstringOp(const short es[], const Seq &sIn, Seq &sOut)
	{
#if	DEBUG
	unsigned uSymbols;
	unsigned uIndels;
	EstringCounts(es, &uSymbols, &uIndels);
	assert(sIn.Length() == uSymbols);
#endif
	sOut.Clear();
	sOut.SetName(sIn.GetName());
	int p = 0;
	for (;;)
		{
		int n = *es++;
		if (0 == n)
			break;
		if (n > 0)
			for (int i = 0; i < n; ++i)
				{
				char c = sIn[p++];
				sOut.push_back(c);
				}
		else
			for (int i = 0; i < -n; ++i)
				sOut.push_back('-');
		}
	}

unsigned EstringOp(const short es[], const Seq &sIn, MSA &a)
	{
	unsigned uSymbols;
	unsigned uIndels;
	EstringCounts(es, &uSymbols, &uIndels);
	assert(sIn.Length() == uSymbols);

	unsigned uColCount = uSymbols + uIndels;

	a.Clear();
	a.SetSize(1, uColCount);

	a.SetSeqName(0, sIn.GetName());
	a.SetSeqId(0, sIn.GetId());

	unsigned p = 0;
	unsigned uColIndex = 0;
	for (;;)
		{
		int n = *es++;
		if (0 == n)
			break;
		if (n > 0)
			for (int i = 0; i < n; ++i)
				{
				char c = sIn[p++];
				a.SetChar(0, uColIndex++, c);
				}
		else
			for (int i = 0; i < -n; ++i)
				a.SetChar(0, uColIndex++, '-');
		}
	assert(uColIndex == uColCount);
	return uColCount;
	}

void PathToEstrings(const PWPath &Path, short **ptresA, short **ptresB)
	{
// First pass to determine size of estrings esA and esB
	const unsigned uEdgeCount = Path.GetEdgeCount();
	if (0 == uEdgeCount)
		{
		short *esA = new short[1];
		short *esB = new short[1];
		esA[0] = 0;
		esB[0] = 0;
		*ptresA = esA;
		*ptresB = esB;
		return;
		}

	unsigned iLengthA = 1;
	unsigned iLengthB = 1;
	const char cFirstEdgeType = Path.GetEdge(0).cType;
	char cPrevEdgeType = cFirstEdgeType;
	for (unsigned uEdgeIndex = 1; uEdgeIndex < uEdgeCount; ++uEdgeIndex)
		{
		const PWEdge &Edge = Path.GetEdge(uEdgeIndex);
		char cEdgeType = Edge.cType;

		switch (c2(cPrevEdgeType, cEdgeType))
			{
		case c2('M', 'M'):
		case c2('D', 'D'):
		case c2('I', 'I'):
			break;

		case c2('D', 'M'):
		case c2('M', 'D'):
			++iLengthB;
			break;

		case c2('I', 'M'):
		case c2('M', 'I'):
			++iLengthA;
			break;

		case c2('I', 'D'):
		case c2('D', 'I'):
			++iLengthB;
			++iLengthA;
			break;

		default:
			assert(false);
			}
		cPrevEdgeType = cEdgeType;
		}

// Pass2 for seq A
	{
	short *esA = new short[iLengthA+1];
	unsigned iA = 0;
	switch (Path.GetEdge(0).cType)
		{
	case 'M':
	case 'D':
		esA[0] = 1;
		break;

	case 'I':
		esA[0] = -1;
		break;

	default:
		assert(false);
		}

	char cPrevEdgeType = cFirstEdgeType;
	for (unsigned uEdgeIndex = 1; uEdgeIndex < uEdgeCount; ++uEdgeIndex)
		{
		const PWEdge &Edge = Path.GetEdge(uEdgeIndex);
		char cEdgeType = Edge.cType;

		switch (c2(cPrevEdgeType, cEdgeType))
			{
		case c2('M', 'M'):
		case c2('D', 'D'):
		case c2('D', 'M'):
		case c2('M', 'D'):
			++(esA[iA]);
			break;

		case c2('I', 'D'):
		case c2('I', 'M'):
			++iA;
			esA[iA] = 1;
			break;

		case c2('M', 'I'):
		case c2('D', 'I'):
			++iA;
			esA[iA] = -1;
			break;

		case c2('I', 'I'):
			--(esA[iA]);
			break;

		default:
			assert(false);
			}

		cPrevEdgeType = cEdgeType;
		}
	assert(iA == iLengthA - 1);
	esA[iLengthA] = 0;
	*ptresA = esA;
	}

	{
// Pass2 for seq B
	short *esB = new short[iLengthB+1];
	unsigned iB = 0;
	switch (Path.GetEdge(0).cType)
		{
	case 'M':
	case 'I':
		esB[0] = 1;
		break;

	case 'D':
		esB[0] = -1;
		break;

	default:
		assert(false);
		}

	char cPrevEdgeType = cFirstEdgeType;
	for (unsigned uEdgeIndex = 1; uEdgeIndex < uEdgeCount; ++uEdgeIndex)
		{
		const PWEdge &Edge = Path.GetEdge(uEdgeIndex);
		char cEdgeType = Edge.cType;

		switch (c2(cPrevEdgeType, cEdgeType))
			{
		case c2('M', 'M'):
		case c2('I', 'I'):
		case c2('I', 'M'):
		case c2('M', 'I'):
			++(esB[iB]);
			break;

		case c2('D', 'I'):
		case c2('D', 'M'):
			++iB;
			esB[iB] = 1;
			break;

		case c2('M', 'D'):
		case c2('I', 'D'):
			++iB;
			esB[iB] = -1;
			break;

		case c2('D', 'D'):
			--(esB[iB]);
			break;

		default:
			assert(false);
			}

		cPrevEdgeType = cEdgeType;
		}
	assert(iB == iLengthB - 1);
	esB[iLengthB] = 0;
	*ptresB = esB;
	}

#if	DEBUG
	{
	const PWEdge &LastEdge = Path.GetEdge(uEdgeCount - 1);
	unsigned uSymbols;
	unsigned uIndels;
	EstringCounts(*ptresA, &uSymbols, &uIndels);
	assert(uSymbols == LastEdge.uPrefixLengthA);
	assert(uSymbols + uIndels == uEdgeCount);

	EstringCounts(*ptresB, &uSymbols, &uIndels);
	assert(uSymbols == LastEdge.uPrefixLengthB);
	assert(uSymbols + uIndels == uEdgeCount);

	PWPath TmpPath;
	EstringsToPath(*ptresA, *ptresB, TmpPath);
	TmpPath.AssertEqual(Path);
	}
#endif
	}

void EstringsToPath(const short esA[], const short esB[], PWPath &Path)
	{
	Path.Clear();
	unsigned iA = 0;
	unsigned iB = 0;
	int nA = esA[iA++];
	int nB = esB[iB++];
	unsigned uPrefixLengthA = 0;
	unsigned uPrefixLengthB = 0;
	for (;;)
		{
		char cType;
		if (nA > 0)
			{
			if (nB > 0)
				{
				cType = 'M';
				--nA;
				--nB;
				}
			else if (nB < 0)
				{
				cType = 'D';
				--nA;
				++nB;
				}
			else
				assert(false);
			}
		else if (nA < 0)
			{
			if (nB > 0)
				{
				cType = 'I';
				++nA;
				--nB;
				}
			else
				assert(false);
			}
		else
			assert(false);

		switch (cType)
			{
		case 'M':
			++uPrefixLengthA;
			++uPrefixLengthB;
			break;
		case 'D':
			++uPrefixLengthA;
			break;
		case 'I':
			++uPrefixLengthB;
			break;
			}

		PWEdge Edge;
		Edge.cType = cType;
		Edge.uPrefixLengthA = uPrefixLengthA;
		Edge.uPrefixLengthB = uPrefixLengthB;
		Path.AppendEdge(Edge);

		if (nA == 0)
			{
			if (0 == esA[iA])
				{
				assert(0 == esB[iB]);
				break;
				}
			nA = esA[iA++];
			}
		if (nB == 0)
			nB = esB[iB++];
		}
	}

/***
Multiply two estrings to make a third estring.
The product of two estrings e1*e2 is defined to be
the estring that produces the same result as applying
e1 then e2. Multiplication is not commutative. In fact,
the reversed order is undefined unless both estrings
consist of a single, identical, positive entry.
A primary motivation for using estrings is that
multiplication is very fast, reducing the time
needed to construct the root alignment.

Example

	<-1,3>(XXX)	= -XXX
	<2,-1,2>(-XXX) = -X-XX

Therefore,

	<-1,3>*<2,-1,2> = <-1,1,-1,2>
***/

static bool CanMultiplyEstrings(const short es1[], const short es2[])
	{
	unsigned uSymbols1;
	unsigned uSymbols2;
	unsigned uIndels1;
	unsigned uIndels2;
	EstringCounts(es1, &uSymbols1, &uIndels1);
	EstringCounts(es2, &uSymbols2, &uIndels2);
	return uSymbols1 + uIndels1 == uSymbols2;
	}

static inline void AppendGaps(short esp[], int &ip, int n)
	{
	if (-1 == ip)
		esp[++ip] = n;
	else if (esp[ip] < 0)
		esp[ip] += n;
	else
		esp[++ip] = n;
	}

static inline void AppendSymbols(short esp[], int &ip, int n)
	{
	if (-1 == ip)
		esp[++ip] = n;
	else if (esp[ip] > 0)
		esp[ip] += n;
	else
		esp[++ip] = n;
	}

void MulEstrings(const short es1[], const short es2[], short esp[])
	{
	assert(CanMultiplyEstrings(es1, es2));

	unsigned i1 = 0;
	int ip = -1;
	int n1 = es1[i1++];
	for (unsigned i2 = 0; ; ++i2)
		{
		int n2 = es2[i2];
		if (0 == n2)
			break;
		if (n2 > 0)
			{
			for (;;)
				{
				if (n1 < 0)
					{
					if (n2 > -n1)
						{
						AppendGaps(esp, ip, n1);
						n2 += n1;
						n1 = es1[i1++];
						}
					else if (n2 == -n1)
						{
						AppendGaps(esp, ip, n1);
						n1 = es1[i1++];
						break;
						}
					else
						{
						assert(n2 < -n1);
						AppendGaps(esp, ip, -n2);
						n1 += n2;
						break;
						}
					}
				else
					{
					assert(n1 > 0);
					if (n2 > n1)
						{
						AppendSymbols(esp, ip, n1);
						n2 -= n1;
						n1 = es1[i1++];
						}
					else if (n2 == n1)
						{
						AppendSymbols(esp, ip, n1);
						n1 = es1[i1++];
						break;
						}
					else
						{
						assert(n2 < n1);
						AppendSymbols(esp, ip, n2);
						n1 -= n2;
						break;
						}
					}
				}
			}
		else
			{
			assert(n2 < 0);
			AppendGaps(esp, ip, n2);
			}
		}
	esp[++ip] = 0;

#if	DEBUG
	{
	int MaxLen = (int) (LengthEstring(es1) + LengthEstring(es2) + 1);
	assert(ip < MaxLen);
	if (ip >= 2)
		for (int i = 0; i < ip - 2; ++i)
			{
			if (!(esp[i] > 0 && esp[i+1] < 0 || esp[i] < 0 && esp[i+1] > 0))
				{
				Log("Bad result of MulEstring: ");
				LogEstring(esp);
				Quit("Assert failed (alternating signs)");
				}
			}
	unsigned uSymbols1;
	unsigned uSymbols2;
	unsigned uSymbolsp;
	unsigned uIndels1;
	unsigned uIndels2;
	unsigned uIndelsp;
	EstringCounts(es1, &uSymbols1, &uIndels1);
	EstringCounts(es2, &uSymbols2, &uIndels2);
	EstringCounts(esp, &uSymbolsp, &uIndelsp);
	if (uSymbols1 + uIndels1 != uSymbols2)
		{
		Log("Bad result of MulEstring: ");
		LogEstring(esp);
		Quit("Assert failed (counts1 %u %u %u)",
		  uSymbols1, uIndels1, uSymbols2);
		}
	}
#endif
	}

static void test(const short es1[], const short es2[], const short esa[])
	{
	unsigned uSymbols1;
	unsigned uSymbols2;
	unsigned uIndels1;
	unsigned uIndels2;
	EstringCounts(es1, &uSymbols1, &uIndels1);
	EstringCounts(es2, &uSymbols2, &uIndels2);

	char s[4096];
	memset(s, 'X', sizeof(s));
	s[uSymbols1] = 0;

	char *s1 = EstringOp(es1, s);
	char *s12 = EstringOp(es2, s1);

	memset(s, 'X', sizeof(s));
	s[uSymbols2] = 0;
	char *s2 = EstringOp(es2, s);

	Log("%s * %s = %s\n", s1, s2, s12);

	LogEstring(es1);
	Log(" * ");
	LogEstring(es2);
	Log(" = ");
	LogEstring(esa);
	Log("\n");

	short esp[4096];
	MulEstrings(es1, es2, esp);
	LogEstring(esp);
	if (!EstringsEq(esp, esa))
		Log(" *ERROR* ");
	Log("\n");

	memset(s, 'X', sizeof(s));
	s[uSymbols1] = 0;
	char *sp = EstringOp(esp, s);
	Log("%s\n", sp);
	Log("\n==========\n\n");
	}

void TestEstrings()
	{
	SetListFileName("c:\\tmp\\muscle.log", false);
	//{
	//short es1[] = { -1, 1, -1, 0 };
	//short es2[] = { 1, -1, 2, 0 };
	//short esa[] = { -2, 1, -1, 0 };
	//test(es1, es2, esa);
	//}
	//{
	//short es1[] = { 2, -1, 2, 0 };
	//short es2[] = { 1, -1, 3, -1, 1, 0 };
	//short esa[] = { 1, -1, 1, -1, 1, -1, 1, 0 };
	//test(es1, es2, esa);
	//}
	//{
	//short es1[] = { -1, 3, 0 };
	//short es2[] = { 2, -1, 2, 0 };
	//short esa[] = { -1, 1, -1, 2, 0 };
	//test(es1, es2, esa);
	//}
	//{
	//short es1[] = { -1, 1, -1, 1, 0};
	//short es2[] = { 4, 0 };
	//short esa[] = { -1, 1, -1, 1, 0};
	//test(es1, es2, esa);
	//}
	//{
	//short es1[] = { 1, -1, 1, -1, 0};
	//short es2[] = { 4, 0 };
	//short esa[] = { 1, -1, 1, -1, 0};
	//test(es1, es2, esa);
	//}
	//{
	//short es1[] = { 1, -1, 1, -1, 0};
	//short es2[] = { -1, 4, -1, 0 };
	//short esa[] = { -1, 1, -1, 1, -2, 0};
	//test(es1, es2, esa);
	//}
	{
	short es1[] = { 106, -77, 56, -2, 155, -3, 123, -2, 0};
	short es2[] = { 50, -36, 34, -3, 12, -6, 1, -6, 18, -17, 60, -5, 349, -56, 0 };
	short esa[] = { 0 };
	test(es1, es2, esa);
	}
	exit(0);
	}
@@ -0,0 +1,13 @@
#ifndef pathsum_h
#define pathsum_h

void PathToEstrings(const PWPath &Path, short **ptresA, short **ptresB);
void EstringsToPath(const short esA[], const short esB[], PWPath &Path);
void MulEstrings(const short es1[], const short es2[], short esp[]);
void EstringOp(const short es[], const Seq &sIn, Seq &sOut);
unsigned EstringOp(const short es[], const Seq &sIn, MSA &a);
void LogEstring(const short es[]);
unsigned LengthEstring(const short es[]);
short *EstringNewCopy(const short es[]);

#endif	// pathsum_h
@@ -0,0 +1,56 @@
#include "muscle.h"
#include <stdio.h>
#include <ctype.h>
#include "msa.h"
#include "textfile.h"

const unsigned FASTA_BLOCK = 60;

void MSA::FromFASTAFile(TextFile &File)
	{
	Clear();

	FILE *f = File.GetStdioFile();
	
	unsigned uSeqCount = 0;
	unsigned uColCount = uInsane;
	for (;;)
		{
		char *Label;
		unsigned uSeqLength;
		char *SeqData = GetFastaSeq(f, &uSeqLength, &Label, false);
		if (0 == SeqData)
			break;
		AppendSeq(SeqData, uSeqLength, Label);
		}
	}

void MSA::ToFASTAFile(TextFile &File) const
	{
	const unsigned uColCount = GetColCount();
	assert(uColCount > 0);
	const unsigned uLinesPerSeq = (GetColCount() - 1)/FASTA_BLOCK + 1;
	const unsigned uSeqCount = GetSeqCount();

	for (unsigned uSeqIndex = 0; uSeqIndex < uSeqCount; ++uSeqIndex)
		{
		File.PutString(">");
		File.PutString(GetSeqName(uSeqIndex));
		File.PutString("\n");

		unsigned n = 0;
		for (unsigned uLine = 0; uLine < uLinesPerSeq; ++uLine)
			{
			unsigned uLetters = uColCount - uLine*FASTA_BLOCK;
			if (uLetters > FASTA_BLOCK)
				uLetters = FASTA_BLOCK;
			for (unsigned i = 0; i < uLetters; ++i)
				{
				char c = GetChar(uSeqIndex, n);
				File.PutChar(c);
				++n;
				}
			File.PutChar('\n');
			}
		}
	}
@@ -0,0 +1,114 @@
#include "muscle.h"
#include <stdio.h>
#include <errno.h>

//const int BUFFER_BYTES = 16*1024;
const int BUFFER_BYTES = 128;
const int CR = '\r';
const int NL = '\n';

#define ADD(c)															\
		{																\
		if (Pos >= BufferLength)										\
			{															\
			const int NewBufferLength = BufferLength + BUFFER_BYTES;	\
			char *NewBuffer	= new char[NewBufferLength];				\
			memcpy(NewBuffer, Buffer, BufferLength);					\
			delete[] Buffer;											\
			Buffer = NewBuffer;											\
			BufferLength = NewBufferLength;								\
			}															\
		Buffer[Pos++] = c;												\
		}

// Get next sequence from file.
char *GetFastaSeq(FILE *f, unsigned *ptrSeqLength, char **ptrLabel, bool DeleteGaps)
	{
	unsigned BufferLength = 0;
	unsigned Pos = 0;
	char *Buffer = 0;

	int c = fgetc(f);
	if (EOF == c)
		return 0;
	if ('>' != c)
		Quit("Invalid file format, expected '>' to start FASTA label");

	for (;;)
		{
		int c = fgetc(f);
		if (EOF == c)
			Quit("End-of-file or input error in FASTA label");

	// NL or CR terminates label
		if (NL == c || CR == c)
			break;

	// All other characters added to label
		ADD(c)
		}

// Nul-terminate label
	ADD(0)
	*ptrLabel = Buffer;

	BufferLength = 0;
	Pos = 0;
	Buffer = 0;
	int PreviousChar = NL;
	for (;;)
		{
		int c = fgetc(f);
		if (EOF == c)
			{
			if (feof(f))
				break;
			else if (ferror(f))
				Quit("Error reading FASTA file, ferror=TRUE feof=FALSE errno=%d %s",
				  errno, strerror(errno));
			else
				Quit("Error reading FASTA file, fgetc=EOF feof=FALSE ferror=FALSE errno=%d %s",
				  errno, strerror(errno));
			}

		if ('>' == c)
			{
			if (NL == PreviousChar || CR == PreviousChar)
				{
				ungetc(c, f);
				break;
				}
			else
				Quit("Unexpected '>' in FASTA sequence data");
			}
		else if (isspace(c))
			;
		else if (IsGapChar(c))
			{
			if (!DeleteGaps)
				ADD(c)
			}
		else if (isalpha(c))
			{
			c = toupper(c);
			ADD(c)
			}
		else if (isprint(c))
			{
			Warning("Invalid character '%c' in FASTA sequence data, ignored", c);
			continue;
			}
		else
			{
			Warning("Invalid byte hex %02x in FASTA sequence data, ignored", (unsigned char) c);
			continue;
			}
		PreviousChar = c;
		}

	if (0 == Pos)
		return GetFastaSeq(f, ptrSeqLength, ptrLabel, DeleteGaps);

	*ptrSeqLength = Pos;
	return Buffer;
	}
@@ -0,0 +1,77 @@
#include "muscle.h"
#include "seqvect.h"
#include "distfunc.h"
#include "clust.h"
#include "clustsetdf.h"
#include "tree.h"
#include "clust.h"
#include "distcalc.h"
#include <math.h>

static void TreeFromSeqVect_NJ(const DistFunc &DF, CLUSTER Cluster, Tree &tree)
	{
    ClustSetDF CSD(DF);

    Clust C;
    C.Create(CSD, Cluster);

    tree.FromClust(C);
	}

static void TreeFromSeqVect_UPGMA(const DistFunc &DF, CLUSTER Cluster, Tree &tree)
	{
	LINKAGE Linkage = LINKAGE_Undefined;
	switch (Cluster)
		{
	case CLUSTER_UPGMA:
		Linkage = LINKAGE_Avg;
		break;
	case CLUSTER_UPGMAMin:
		Linkage = LINKAGE_Min;
		break;
	case CLUSTER_UPGMAMax:
		Linkage = LINKAGE_Max;
		break;
	case CLUSTER_UPGMB:
		Linkage = LINKAGE_Biased;
		break;
	default:
		Quit("TreeFromSeqVect_UPGMA, CLUSTER_%u not supported", Cluster);
		}
	
	DistCalcDF DC;
	DC.Init(DF);
	UPGMA2(DC, tree, Linkage);
	}

static void SaveDF(const SeqVect &v, DistFunc &d, const char *FileName)
	{
	FILE *f = fopen(FileName, "w");
	if (f == 0)
		Quit("Cannot create %s", FileName);

	unsigned n = v.GetSeqCount();
	fprintf(f, "%u\n", n);
	for (unsigned i = 0; i < n; ++i)
		{
		fprintf(f, "%10.10s  ", v.GetSeqName(i));
		for (unsigned j = 0; j < i; ++j)
			fprintf(f, "  %9g", d.GetDist(i, j));
		fprintf(f, "\n");
		}
	fclose(f);
	}

void TreeFromSeqVect(const SeqVect &v, Tree &tree, CLUSTER Cluster,
  DISTANCE Distance, ROOT Root, const char *SaveFileName)
	{
	DistFunc DF;
	DistUnaligned(v, Distance, DF);
	if (SaveFileName != 0)
		SaveDF(v, DF, SaveFileName);
	if (CLUSTER_NeighborJoining == Cluster)
		TreeFromSeqVect_NJ(DF, Cluster, tree);
	else
		TreeFromSeqVect_UPGMA(DF, Cluster, tree);
	FixRoot(tree, Root);
	}
@@ -0,0 +1,56 @@
#include "muscle.h"
#include "distfunc.h"
#include "seqvect.h"

void DistPWScoreDist(const SeqVect &v, DistFunc &DF);

void DistUnaligned(const SeqVect &v, DISTANCE DistMethod, DistFunc &DF)
	{
	const unsigned uSeqCount = v.Length();

	switch (DistMethod)
		{
	case DISTANCE_Kmer6_6:
		DistKmer6_6(v, DF);
		break;

	case DISTANCE_Kmer20_3:
		DistKmer20_3(v, DF);
		break;

	case DISTANCE_Kmer20_4:
		FastDistKmer(v, DF);
		break;

	case DISTANCE_Kbit20_3:
		DistKbit20_3(v, DF);
		break;

	case DISTANCE_Kmer4_6:
		DistKmer4_6(v, DF);
		break;

	case DISTANCE_PWKimura:
		DistPWKimura(v, DF);
		break;

	case DISTANCE_PWScoreDist:
		DistPWScoreDist(v, DF);
		break;

	default:
		Quit("DistUnaligned, unsupported distance method %d", DistMethod);
		}

//	const char **SeqNames = (const char **) malloc(uSeqCount*sizeof(char *));
	for (unsigned uSeqIndex = 0; uSeqIndex < uSeqCount; ++uSeqIndex)
		{
		const Seq &s = *(v[uSeqIndex]);

		const char *ptrName = s.GetName();
		unsigned uId = s.GetId();

		DF.SetName(uSeqIndex, ptrName);
		DF.SetId(uSeqIndex, uId);
		}
	}
@@ -0,0 +1,206 @@
#include "muscle.h"
#include "distfunc.h"
#include "seqvect.h"
#include <math.h>

const unsigned TRIPLE_COUNT = 20*20*20;

struct TripleCount
	{
	unsigned m_uSeqCount;			// How many sequences have this triple?
	unsigned short *m_Counts;		// m_Counts[s] = nr of times triple found in seq s
	};
static TripleCount *TripleCounts;

// WARNING: Sequences MUST be stripped of gaps and upper case!
void DistKmer20_3(const SeqVect &v, DistFunc &DF)
	{
	const unsigned uSeqCount = v.Length();

	DF.SetCount(uSeqCount);
	if (0 == uSeqCount)
		return;
	for (unsigned uSeq1 = 0; uSeq1 < uSeqCount; ++uSeq1)
		{
		DF.SetDist(uSeq1, uSeq1, 0);
		for (unsigned uSeq2 = 0; uSeq2 < uSeq1; ++uSeq2)
			DF.SetDist(uSeq1, uSeq2, 0);
		}

	const unsigned uTripleArrayBytes = TRIPLE_COUNT*sizeof(TripleCount);
	TripleCounts = (TripleCount *) malloc(uTripleArrayBytes);
	if (0 == TripleCounts)
		Quit("Not enough memory (TripleCounts)");
	memset(TripleCounts, 0, uTripleArrayBytes);

	for (unsigned uWord = 0; uWord < TRIPLE_COUNT; ++uWord)
		{
		TripleCount &tc = *(TripleCounts + uWord);
		const unsigned uBytes = uSeqCount*sizeof(short);
		tc.m_Counts = (unsigned short *) malloc(uBytes);
		memset(tc.m_Counts, 0, uBytes);
		}

	for (unsigned uSeqIndex = 0; uSeqIndex < uSeqCount; ++uSeqIndex)
		{
		Seq &s = *(v[uSeqIndex]);
		const unsigned uSeqLength = s.Length();
		for (unsigned uPos = 0; uPos < uSeqLength - 2; ++uPos)
			{
			const unsigned uLetter1 = CharToLetterEx(s[uPos]);
			if (uLetter1 >= 20)
				continue;
			const unsigned uLetter2 = CharToLetterEx(s[uPos+1]);
			if (uLetter2 >= 20)
				continue;
			const unsigned uLetter3 = CharToLetterEx(s[uPos+2]);
			if (uLetter3 >= 20)
				continue;

			const unsigned uWord = uLetter1 + uLetter2*20 + uLetter3*20*20;
			assert(uWord < TRIPLE_COUNT);

			TripleCount &tc = *(TripleCounts + uWord);
			const unsigned uOldCount = tc.m_Counts[uSeqIndex];
			if (0 == uOldCount)
				++(tc.m_uSeqCount);

			++(tc.m_Counts[uSeqIndex]);
			}
		}

#if TRACE
	{
	Log("TripleCounts\n");
	unsigned uGrandTotal = 0;
	for (unsigned uWord = 0; uWord < TRIPLE_COUNT; ++uWord)
		{
		const TripleCount &tc = *(TripleCounts + uWord);
		if (0 == tc.m_uSeqCount)
			continue;

		const unsigned uLetter3 = uWord/(20*20);
		const unsigned uLetter2 = (uWord - uLetter3*20*20)/20;
		const unsigned uLetter1 = uWord%20;
		Log("Word %6u %c%c%c   %6u",
		  uWord,
		  LetterToCharAmino(uLetter1),
		  LetterToCharAmino(uLetter2),
		  LetterToCharAmino(uLetter3),
		  tc.m_uSeqCount);

		unsigned uSeqCountWithThisWord = 0;
		for (unsigned uSeqIndex = 0; uSeqIndex < uSeqCount; ++uSeqIndex)
			{
			const unsigned uCount = tc.m_Counts[uSeqIndex];
			if (uCount > 0)
				{
				++uSeqCountWithThisWord;
				Log(" %u=%u", uSeqIndex, uCount);
				uGrandTotal += uCount;
				}
			}
		if (uSeqCountWithThisWord != tc.m_uSeqCount)
			Log(" *** SQ ERROR *** %u %u", tc.m_uSeqCount, uSeqCountWithThisWord);
		Log("\n");
		}
	
	unsigned uTotalBySeqLength = 0;
	for (unsigned uSeqIndex = 0; uSeqIndex < uSeqCount; ++uSeqIndex)
		{
		Seq &s = *(v[uSeqIndex]);
		const unsigned uSeqLength = s.Length();
		uTotalBySeqLength += uSeqLength - 2;
		}
	if (uGrandTotal != uTotalBySeqLength)
		Log("*** TOTALS DISAGREE *** %u %u\n", uGrandTotal, uTotalBySeqLength);
	}
#endif

	const unsigned uSeqListBytes = uSeqCount*sizeof(unsigned);
	unsigned short *SeqList = (unsigned short *) malloc(uSeqListBytes);

	for (unsigned uWord = 0; uWord < TRIPLE_COUNT; ++uWord)
		{
		const TripleCount &tc = *(TripleCounts + uWord);
		if (0 == tc.m_uSeqCount)
			continue;

		unsigned uSeqCountFound = 0;
		memset(SeqList, 0, uSeqListBytes);

		for (unsigned uSeqIndex = 0; uSeqIndex < uSeqCount; ++uSeqIndex)
			{
			if (tc.m_Counts[uSeqIndex] > 0)
				{
				SeqList[uSeqCountFound] = uSeqIndex;
				++uSeqCountFound;
				if (uSeqCountFound == tc.m_uSeqCount)
					break;
				}
			}
		assert(uSeqCountFound == tc.m_uSeqCount);

		for (unsigned uSeq1 = 0; uSeq1 < uSeqCountFound; ++uSeq1)
			{
			const unsigned uSeqIndex1 = SeqList[uSeq1];
			const unsigned uCount1 = tc.m_Counts[uSeqIndex1];
			for (unsigned uSeq2 = 0; uSeq2 < uSeq1; ++uSeq2)
				{
				const unsigned uSeqIndex2 = SeqList[uSeq2];
				const unsigned uCount2 = tc.m_Counts[uSeqIndex2];
				const unsigned uMinCount = uCount1 < uCount2 ? uCount1 : uCount2;
				const double d = DF.GetDist(uSeqIndex1, uSeqIndex2);
				DF.SetDist(uSeqIndex1, uSeqIndex2, (float) (d + uMinCount));
				}
			}
		}
	delete[] SeqList;
	free(TripleCounts);

	unsigned uDone = 0;
	const unsigned uTotal = (uSeqCount*(uSeqCount - 1))/2;
	for (unsigned uSeq1 = 0; uSeq1 < uSeqCount; ++uSeq1)
		{
		DF.SetDist(uSeq1, uSeq1, 0.0);

		const Seq &s1 = *(v[uSeq1]);
		const unsigned uLength1 = s1.Length();

		for (unsigned uSeq2 = 0; uSeq2 < uSeq1; ++uSeq2)
			{
			const Seq &s2 = *(v[uSeq2]);
			const unsigned uLength2 = s2.Length();
			unsigned uMinLength = uLength1 < uLength2 ? uLength1 : uLength2;
			if (uMinLength < 3)
				{
				DF.SetDist(uSeq1, uSeq2, 1.0);
				continue;
				}

			const double dTripleCount = DF.GetDist(uSeq1, uSeq2);
			if (dTripleCount == 0)
				{
				DF.SetDist(uSeq1, uSeq2, 1.0);
				continue;
				}
			double dNormalizedTripletScore = dTripleCount/(uMinLength - 2);
			//double dEstimatedPairwiseIdentity = exp(0.3912*log(dNormalizedTripletScore));
			//if (dEstimatedPairwiseIdentity > 1)
			//	dEstimatedPairwiseIdentity = 1;
//			DF.SetDist(uSeq1, uSeq2, (float) (1.0 - dEstimatedPairwiseIdentity));
			DF.SetDist(uSeq1, uSeq2, (float) dNormalizedTripletScore);

#if	TRACE
			{
			Log("%s - %s  Triplet count = %g  Lengths %u, %u Estimated pwid = %g\n",
			  s1.GetName(), s2.GetName(), dTripleCount, uLength1, uLength2,
			  dEstimatedPairwiseIdentity);
			}
#endif
			if (uDone%1000 == 0)
				Progress(uDone, uTotal);
			}
		}
	ProgressStepsDone();
	}
@@ -0,0 +1,109 @@
#include "muscle.h"
#include "distfunc.h"
#include "seqvect.h"
#include <math.h>

#define	MIN(x, y)	((x) < (y) ? (x) : (y))

static void SetKmerBitVector(const Seq &s, byte Bits[])
	{
	const unsigned uLength = s.Length();
	const unsigned k = 3;	// kmer length
	unsigned i = 0;
	unsigned c = 0;
	unsigned h = 0;
	for (unsigned j = 0; j < k - 1; ++j)
		{
		unsigned x = CharToLetterEx(s[i++]);
		if (x <= AX_Y)
			c = c*20 + x;
		else
			{
			c = 0;
			h = j + 1;
			}
		}
	for ( ; i < uLength; ++i)
		{
		unsigned x = CharToLetterEx(s[i++]);
		if (x <= AX_Y)
			c = (c*20 + x)%8000;
		else
			{
			c = 0;
			h = i + k;
			}
		if (i >= h)
			{
			unsigned ByteOffset = c/8;
			unsigned BitOffset = c%8;
			Bits[ByteOffset] |= (1 << BitOffset);
			}
		}
	}

static unsigned CommonBitCount(const byte Bits1[], const byte Bits2[])
	{
	const byte * const p1end = Bits1 + 1000;
	const byte *p2 = Bits2;

	unsigned uCount = 0;
	for (const byte *p1 = Bits1; p1 != p1end; ++p1)
		{
	// Here is a cute trick for efficiently counting the
	// bits common between two bytes by combining them into
	// a single word.
		unsigned b = *p1 | (*p2 << 8);
		while (b != 0)
			{
			if (b & 0x101)
				++uCount;
			b >>= 1;
			}
		++p2;
		}
	return uCount;
	}

void DistKbit20_3(const SeqVect &v, DistFunc &DF)
	{
	const unsigned uSeqCount = v.Length();
	DF.SetCount(uSeqCount);

// There are 20^3 = 8,000 distinct kmers in the 20-letter alphabet.
// For each sequence, we create a bit vector of length 8,000, i.e.
// 1,000 bytes, having one bit per kmer. The bit is set to 1 if the
// kmer is present in the sequence.
	const unsigned uBytes = uSeqCount*1000;
	byte *BitVector = new byte[uBytes];
	memset(BitVector, 0, uBytes);

	SetProgressDesc("K-bit distance matrix");
	for (unsigned uSeqIndex = 0; uSeqIndex < uSeqCount; ++uSeqIndex)
		SetKmerBitVector(*v[uSeqIndex], BitVector + uSeqIndex*1000);

	unsigned uDone = 0;
	const unsigned uTotal = (uSeqCount*(uSeqCount - 1))/2;
	for (unsigned uSeqIndex1 = 0; uSeqIndex1 < uSeqCount; ++uSeqIndex1)
		{
		const byte *Bits1 = BitVector + uSeqIndex1*1000;
		const unsigned uLength1 = v[uSeqIndex1]->Length();
		for (unsigned uSeqIndex2 = 0; uSeqIndex2 < uSeqIndex1; ++uSeqIndex2)
			{
			const byte *Bits2 = BitVector + uSeqIndex2*1000;
			const unsigned uLength2 = v[uSeqIndex2]->Length();
			const float fCount = (float) CommonBitCount(Bits1, Bits2);

		// Distance measure = K / min(L1, L2)
		// K is number of distinct kmers that are found in both sequences
			const float fDist = fCount / MIN(uLength1, uLength2);
			DF.SetDist(uSeqIndex1, uSeqIndex2, fDist);
			if (uDone%10000 == 0)
				Progress(uDone, uTotal);
			++uDone;
			}
		}
	ProgressStepsDone();

	delete[] BitVector;
	}
@@ -0,0 +1,247 @@
#include "muscle.h"
#include "msa.h"
#include "seqvect.h"
#include "seq.h"
#include "distfunc.h"
#include <math.h>

#define TRACE	0

/***
Some candidate alphabets considered because they
have high correlations and small table sizes.
Correlation coefficent is between k-mer distance
and %id D measured from a CLUSTALW alignment.
Table size is N^k where N is size of alphabet.
A is standard (uncompressed) amino alphabet.

                           Correlation
Alpha   N  k  Table Size   all   25-50%
-----  --  -  ----------   ----  ------
A      20  3       8,000  0.943   0.575
A      20  4     160,000  0.962   0.685 <<
LiA    14  4      38,416  0.966   0.645
SEB    14  4      38,416  0.964   0.634
LiA    13  4      28,561  0.965   0.640
LiA    12  4      20,736  0.963   0.620
LiA    10  5     100,000  0.964   0.652

We select A with k=4 because it has the best
correlations. The only drawback is a large table
size, but space is readily available and the only 
additional time cost is in resetting the table to
zero, which can be done quickly with memset or by
keeping a list of the k-mers that were found (should
test to see which is faster, and may vary by compiler
and processor type). It also has the minor advantage
that we don't need to convert the alphabet.

Fractional identity d is estimated as follows.

	F = fractional k-mer count
	if F is 0: F = 0.01
	Y = log(0.02 + F)
	d = -4.1 + 4.12*Y

The constant 0.02 was chosen to make the relationship
between Y and D linear. The constants -4.1 and 4.12
were chosen to fit a straight line to the scatterplot
of Y vs D.
***/

#define MIN(x, y)	(((x) < (y)) ? (x) : (y))

const unsigned K = 4;
const unsigned N = 20;
const unsigned N_2 = 20*20;
const unsigned N_3 = 20*20*20;
const unsigned N_4 = 20*20*20*20;

const unsigned TABLE_SIZE = N_4;

// For debug output
const char *KmerToStr(unsigned Kmer)
	{
	static char s[5];

	unsigned c3 = (Kmer/N_3)%N;
	unsigned c2 = (Kmer/N_2)%N;
	unsigned c1 = (Kmer/N)%N;
	unsigned c0 = Kmer%N;

	s[0] = LetterToChar(c3);
	s[1] = LetterToChar(c2);
	s[2] = LetterToChar(c1);
	s[3] = LetterToChar(c0);
	return s;
	}

void CountKmers(const byte s[], unsigned uSeqLength, byte KmerCounts[])
	{
#if	TRACE
	Log("CountKmers\n");
#endif
	memset(KmerCounts, 0, TABLE_SIZE*sizeof(byte));

	const byte *ptrKmerStart = s;
	const byte *ptrKmerEnd = s + 4;
	const byte *ptrSeqEnd = s + uSeqLength;

	unsigned c3 = s[0]*N_3;
	unsigned c2 = s[1]*N_2;
	unsigned c1 = s[2]*N;
	unsigned c0 = s[3];

	unsigned Kmer = c3 + c2 + c1 + c0;

	for (;;)
		{
		assert(Kmer < TABLE_SIZE);

#if	TRACE
		Log("Kmer=%d=%s\n", Kmer, KmerToStr(Kmer));
#endif
		++(KmerCounts[Kmer]);

		if (ptrKmerEnd == ptrSeqEnd)
			break;

	// Compute k-mer as function of previous k-mer:
	// 1. Subtract first letter from previous k-mer.
	// 2. Multiply by N.
	// 3. Add next letter.
		c3 = (*ptrKmerStart++) * N_3;
		Kmer = (Kmer - c3)*N;
		Kmer += *ptrKmerEnd++;
		}
	}

unsigned CommonKmerCount(const byte Seq[], unsigned uSeqLength,
  const byte KmerCounts1[], const byte Seq2[], unsigned uSeqLength2)
	{
	byte KmerCounts2[TABLE_SIZE];
	CountKmers(Seq2, uSeqLength2, KmerCounts2);

	const byte *ptrKmerStart = Seq;
	const byte *ptrKmerEnd = Seq + 4;
	const byte *ptrSeqEnd = Seq + uSeqLength;

	unsigned c3 = Seq[0]*N_3;
	unsigned c2 = Seq[1]*N_2;
	unsigned c1 = Seq[2]*N;
	unsigned c0 = Seq[3];

	unsigned Kmer = c3 + c2 + c1 + c0;

	unsigned uCommonCount = 0;
	for (;;)
		{
		assert(Kmer < TABLE_SIZE);

		const byte Count1 = KmerCounts1[Kmer];
		const byte Count2 = KmerCounts2[Kmer];

		uCommonCount += MIN(Count1, Count2);

	// Hack so we don't double-count
		KmerCounts2[Kmer] = 0;

		if (ptrKmerEnd == ptrSeqEnd)
			break;

	// Compute k-mer as function of previous k-mer:
	// 1. Subtract first letter from previous k-mer.
	// 2. Multiply by N.
	// 3. Add next letter.
		c3 = (*ptrKmerStart++) * N_3;
		Kmer = (Kmer - c3)*N;
		Kmer += *ptrKmerEnd++;
		}
	return uCommonCount;
	}

static void SeqToLetters(const Seq &s, byte Letters[])
	{
	const unsigned uSeqLength = s.Length();
	for (unsigned uCol = 0; uCol < uSeqLength; ++uCol)
		{
		char c = s.GetChar(uCol);
	// Ugly hack. My k-mer counting code isn't wild-card
	// aware. Arbitrarily replace wildcards by a specific
	// amino acid.
		if (IsWildcardChar(c))
			c = 'A';
		*Letters++ = CharToLetter(c);
		}
	}

void FastDistKmer(const SeqVect &v, DistFunc &DF)
	{
	byte KmerCounts[TABLE_SIZE];

	const unsigned uSeqCount = v.GetSeqCount();

	DF.SetCount(uSeqCount);
	if (0 == uSeqCount)
		return;

// Initialize distance matrix to zero
	for (unsigned uSeq1 = 0; uSeq1 < uSeqCount; ++uSeq1)
		{
		DF.SetDist(uSeq1, uSeq1, 0);
		for (unsigned uSeq2 = 0; uSeq2 < uSeq1; ++uSeq2)
			DF.SetDist(uSeq1, uSeq2, 0);
		}

	unsigned uMaxLength = 0;
	for (unsigned uSeqIndex = 0; uSeqIndex < uSeqCount; ++uSeqIndex)
		{
		const Seq &s = v.GetSeq(uSeqIndex);
		unsigned uSeqLength = s.Length();
		if (uSeqLength > uMaxLength)
			uMaxLength = uSeqLength;
		}
	if (0 == uMaxLength)
		return;

	byte *Seq1Letters = new byte[uMaxLength];
	byte *Seq2Letters = new byte[uMaxLength];

	for (unsigned uSeqIndex1 = 0; uSeqIndex1 < uSeqCount - 1; ++uSeqIndex1)
		{
		const Seq &s1 = v.GetSeq(uSeqIndex1);
		const unsigned uSeqLength1 = s1.Length();

		SeqToLetters(s1, Seq1Letters);
		CountKmers(Seq1Letters, uSeqLength1, KmerCounts);

		for (unsigned uSeqIndex2 = uSeqIndex1 + 1; uSeqIndex2 < uSeqCount;
		  ++uSeqIndex2)
			{
			const Seq &s2 = v.GetSeq(uSeqIndex2);
			const unsigned uSeqLength2 = s2.Length();

			SeqToLetters(s2, Seq2Letters);

			unsigned uCommonKmerCount = CommonKmerCount(Seq1Letters, uSeqLength1,
			  KmerCounts, Seq2Letters, uSeqLength2);

			unsigned uMinLength = MIN(uSeqLength1, uSeqLength2);
			double F = (double) uCommonKmerCount / (uMinLength - K + 1);
			if (0.0 == F)
				F = 0.01;
			double Y = log(0.02 + F);
			double EstimatedPctId = Y/4.12 + 0.995;
			double KD = KimuraDist(EstimatedPctId);
//			DF.SetDist(uSeqIndex1, uSeqIndex2, (float) KD);
			DF.SetDist(uSeqIndex1, uSeqIndex2, (float) (1 - F));
#if	TRACE
			Log("CommonCount=%u, MinLength=%u, F=%6.4f Y=%6.4f, %%id=%6.4f, KimuraDist=%8.4f\n",
			  uCommonKmerCount, uMinLength, F, Y, EstimatedPctId, KD);
#endif
			}
		}

	delete[] Seq1Letters;
	delete[] Seq2Letters;
	}
@@ -0,0 +1,290 @@
#include "muscle.h"
#include "distfunc.h"
#include "seqvect.h"
#include <math.h>

#define TRACE 0

#define MIN(x, y)	(((x) < (y)) ? (x) : (y))
#define MAX(x, y)	(((x) > (y)) ? (x) : (y))

const unsigned TUPLE_COUNT = 6*6*6*6*6*6;
static unsigned char Count1[TUPLE_COUNT];
static unsigned char Count2[TUPLE_COUNT];

// Amino acid groups according to MAFFT (sextet5)
// 0 =  A G P S T
// 1 =  I L M V
// 2 =  N D Q E B Z
// 3 =  R H K
// 4 =  F W Y
// 5 =  C
// 6 =  X . - U
unsigned ResidueGroup[] =
	{
	0,		// AX_A,
	5,		// AX_C,
	2,		// AX_D,
	2,		// AX_E,
	4,		// AX_F,
	0,		// AX_G,
	3,		// AX_H,
	1,		// AX_I,
	3,		// AX_K,
	1,		// AX_L,
	1,		// AX_M,
	2,		// AX_N,
	0,		// AX_P,
	2,		// AX_Q,
	3,		// AX_R,
	0,		// AX_S,
	0,		// AX_T,
	1,		// AX_V,
	4,		// AX_W,
	4,		// AX_Y,

	2,		// AX_B,	// D or N
	2,		// AX_Z,	// E or Q
	0,		// AX_X,	// Unknown		// ******** TODO *************
										// This isn't the correct way of avoiding group 6
	0		// AX_GAP,					// ******** TODO ******************
	};
unsigned uResidueGroupCount = sizeof(ResidueGroup)/sizeof(ResidueGroup[0]);

static char *TupleToStr(int t)
	{
	static char s[7];
	int t1, t2, t3, t4, t5, t6;

	t1 = t%6;
	t2 = (t/6)%6;
	t3 = (t/(6*6))%6;
	t4 = (t/(6*6*6))%6;
	t5 = (t/(6*6*6*6))%6;
	t6 = (t/(6*6*6*6*6))%6;

	s[5] = '0' + t1;
	s[4] = '0' + t2;
	s[3] = '0' + t3;
	s[2] = '0' + t4;
	s[1] = '0' + t5;
	s[0] = '0' + t6;
	return s;
	}

static unsigned GetTuple(const unsigned uLetters[], unsigned n)
	{
	assert(uLetters[n] < uResidueGroupCount);
	assert(uLetters[n+1] < uResidueGroupCount);
	assert(uLetters[n+2] < uResidueGroupCount);
	assert(uLetters[n+3] < uResidueGroupCount);
	assert(uLetters[n+4] < uResidueGroupCount);
	assert(uLetters[n+5] < uResidueGroupCount);

	unsigned u1 = ResidueGroup[uLetters[n]];
	unsigned u2 = ResidueGroup[uLetters[n+1]];
	unsigned u3 = ResidueGroup[uLetters[n+2]];
	unsigned u4 = ResidueGroup[uLetters[n+3]];
	unsigned u5 = ResidueGroup[uLetters[n+4]];
	unsigned u6 = ResidueGroup[uLetters[n+5]];

	return u6 + u5*6 + u4*6*6 + u3*6*6*6 + u2*6*6*6*6 + u1*6*6*6*6*6;
	}

static void CountTuples(const unsigned L[], unsigned uTupleCount, unsigned char Count[])
	{
	memset(Count, 0, TUPLE_COUNT*sizeof(unsigned char));
	for (unsigned n = 0; n < uTupleCount; ++n)
		{
		const unsigned uTuple = GetTuple(L, n);
		++(Count[uTuple]);
		}
	}

static void ListCount(const unsigned char Count[])
	{
	for (unsigned n = 0; n < TUPLE_COUNT; ++n)
		{
		if (0 == Count[n])
			continue;
		Log("%s  %u\n", TupleToStr(n), Count[n]);
		}
	}

void DistKmer6_6(const SeqVect &v, DistFunc &DF)
	{
	const unsigned uSeqCount = v.Length();

	DF.SetCount(uSeqCount);
	if (0 == uSeqCount)
		return;

// Initialize distance matrix to zero
	for (unsigned uSeq1 = 0; uSeq1 < uSeqCount; ++uSeq1)
		{
		DF.SetDist(uSeq1, uSeq1, 0);
		for (unsigned uSeq2 = 0; uSeq2 < uSeq1; ++uSeq2)
			DF.SetDist(uSeq1, uSeq2, 0);
		}

// Convert to letters
	unsigned **Letters = new unsigned *[uSeqCount];
	for (unsigned uSeqIndex = 0; uSeqIndex < uSeqCount; ++uSeqIndex)
		{
		Seq &s = *(v[uSeqIndex]);
		const unsigned uSeqLength = s.Length();
		unsigned *L = new unsigned[uSeqLength];
		Letters[uSeqIndex] = L;
		for (unsigned n = 0; n < uSeqLength; ++n)
			{
			char c = s[n];
			L[n] = CharToLetterEx(c);
			assert(L[n] < uResidueGroupCount);
			}
		}

	unsigned **uCommonTupleCount = new unsigned *[uSeqCount];
	for (unsigned n = 0; n < uSeqCount; ++n)
		{
		uCommonTupleCount[n] = new unsigned[uSeqCount];
		memset(uCommonTupleCount[n], 0, uSeqCount*sizeof(unsigned));
		}

	const unsigned uPairCount = (uSeqCount*(uSeqCount + 1))/2;
	unsigned uCount = 0;
	for (unsigned uSeq1 = 0; uSeq1 < uSeqCount; ++uSeq1)
		{
		Seq &seq1 = *(v[uSeq1]);
		const unsigned uSeqLength1 = seq1.Length();
		if (uSeqLength1 < 5)
			continue;

		const unsigned uTupleCount = uSeqLength1 - 5;
		const unsigned *L = Letters[uSeq1];
		CountTuples(L, uTupleCount, Count1);
#if	TRACE
		{
		Log("Seq1=%d\n", uSeq1);
		Log("Groups:\n");
		for (unsigned n = 0; n < uSeqLength1; ++n)
			Log("%u", ResidueGroup[L[n]]);
		Log("\n");

		Log("Tuples:\n");
		ListCount(Count1);
		}
#endif

		SetProgressDesc("K-mer dist pass 1");
		for (unsigned uSeq2 = 0; uSeq2 <= uSeq1; ++uSeq2)
			{
			if (0 == uCount%500)
				Progress(uCount, uPairCount);
			++uCount;
			Seq &seq2 = *(v[uSeq2]);
			const unsigned uSeqLength2 = seq2.Length();
			if (uSeqLength2 < 5)
				{
				if (uSeq1 == uSeq2)
					DF.SetDist(uSeq1, uSeq2, 0);
				else
					DF.SetDist(uSeq1, uSeq2, 1);
				continue;
				}

		// First pass through seq 2 to count tuples
			const unsigned uTupleCount = uSeqLength2 - 5;
			const unsigned *L = Letters[uSeq2];
			CountTuples(L, uTupleCount, Count2);
#if	TRACE
			Log("Seq2=%d Counts=\n", uSeq2);
			ListCount(Count2);
#endif

		// Second pass to accumulate sum of shared tuples
		// MAFFT defines this as the sum over unique tuples
		// in seq2 of the minimum of the number of tuples found
		// in the two sequences.
			unsigned uSum = 0;
			for (unsigned n = 0; n < uTupleCount; ++n)
				{
				const unsigned uTuple = GetTuple(L, n);
				uSum += MIN(Count1[uTuple], Count2[uTuple]);

			// This is a hack to make sure each unique tuple counted only once.
				Count2[uTuple] = 0;
				}
#if	TRACE
			{
			Seq &s1 = *(v[uSeq1]);
			Seq &s2 = *(v[uSeq2]);
			const char *pName1 = s1.GetName();
			const char *pName2 = s2.GetName();
			Log("Common count %s(%d) - %s(%d) =%u\n",
			  pName1, uSeq1, pName2, uSeq2, uSum);
			}
#endif
			uCommonTupleCount[uSeq1][uSeq2] = uSum;
			uCommonTupleCount[uSeq2][uSeq1] = uSum;
			}
		}
	ProgressStepsDone();

	uCount = 0;
	SetProgressDesc("K-mer dist pass 2");
	for (unsigned uSeq1 = 0; uSeq1 < uSeqCount; ++uSeq1)
		{
		Seq &s1 = *(v[uSeq1]);
		const char *pName1 = s1.GetName();

		double dCommonTupleCount11 = uCommonTupleCount[uSeq1][uSeq1];
		if (0 == dCommonTupleCount11)
			dCommonTupleCount11 = 1;

		DF.SetDist(uSeq1, uSeq1, 0);
		for (unsigned uSeq2 = 0; uSeq2 < uSeq1; ++uSeq2)
			{
			if (0 == uCount%500)
				Progress(uCount, uPairCount);
			++uCount;

			double dCommonTupleCount22 = uCommonTupleCount[uSeq2][uSeq2];
			if (0 == dCommonTupleCount22)
				dCommonTupleCount22 = 1;

			const double dDist1 = 3.0*(dCommonTupleCount11 - uCommonTupleCount[uSeq1][uSeq2])
			  /dCommonTupleCount11;
			const double dDist2 = 3.0*(dCommonTupleCount22 - uCommonTupleCount[uSeq1][uSeq2])
			  /dCommonTupleCount22;

		// dMinDist is the value used for tree-building in MAFFT
			const double dMinDist = MIN(dDist1, dDist2);
			DF.SetDist(uSeq1, uSeq2, (float) dMinDist);

			//const double dEstimatedPctId = TupleDistToEstimatedPctId(dMinDist);
			//g_dfPwId.SetDist(uSeq1, uSeq2, dEstimatedPctId);
		// **** TODO **** why does this make score slightly worse??
			//const double dKimuraDist = KimuraDist(dEstimatedPctId);
			//DF.SetDist(uSeq1, uSeq2, dKimuraDist);
			}
		}
	ProgressStepsDone();

	for (unsigned n = 0; n < uSeqCount; ++n)
		delete[] uCommonTupleCount[n];
	delete[] uCommonTupleCount;
	delete[] Letters;
	}

double PctIdToMAFFTDist(double dPctId)
	{
	if (dPctId < 0.05)
		dPctId = 0.05;
	double dDist = -log(dPctId);
	return dDist;
	}

double PctIdToHeightMAFFT(double dPctId)
	{
	return PctIdToMAFFTDist(dPctId);
	}
@@ -0,0 +1,265 @@
#include "muscle.h"
#include "distfunc.h"
#include "seqvect.h"
#include <math.h>

#define TRACE 0

#define MIN(x, y)	(((x) < (y)) ? (x) : (y))
#define MAX(x, y)	(((x) > (y)) ? (x) : (y))

const unsigned TUPLE_COUNT = 6*6*6*6*6*6;
static unsigned char Count1[TUPLE_COUNT];
static unsigned char Count2[TUPLE_COUNT];

// Nucleotide groups according to MAFFT (sextet5)
// 0 =  A
// 1 =  C
// 2 =  G
// 3 =  T
// 4 =  other

static unsigned ResidueGroup[] =
	{
	0,		// NX_A,
	1,		// NX_C,
	2,		// NX_G,
	3,		// NX_T/U
	4,		// NX_N,
	4,		// NX_R,
	4,		// NX_Y,
	4,		// NX_GAP
	};
static unsigned uResidueGroupCount = sizeof(ResidueGroup)/sizeof(ResidueGroup[0]);

static char *TupleToStr(int t)
	{
	static char s[7];
	int t1, t2, t3, t4, t5, t6;

	t1 = t%6;
	t2 = (t/6)%6;
	t3 = (t/(6*6))%6;
	t4 = (t/(6*6*6))%6;
	t5 = (t/(6*6*6*6))%6;
	t6 = (t/(6*6*6*6*6))%6;

	s[5] = '0' + t1;
	s[4] = '0' + t2;
	s[3] = '0' + t3;
	s[2] = '0' + t4;
	s[1] = '0' + t5;
	s[0] = '0' + t6;
	return s;
	}

static unsigned GetTuple(const unsigned uLetters[], unsigned n)
	{
	assert(uLetters[n] < uResidueGroupCount);
	assert(uLetters[n+1] < uResidueGroupCount);
	assert(uLetters[n+2] < uResidueGroupCount);
	assert(uLetters[n+3] < uResidueGroupCount);
	assert(uLetters[n+4] < uResidueGroupCount);
	assert(uLetters[n+5] < uResidueGroupCount);

	unsigned u1 = ResidueGroup[uLetters[n]];
	unsigned u2 = ResidueGroup[uLetters[n+1]];
	unsigned u3 = ResidueGroup[uLetters[n+2]];
	unsigned u4 = ResidueGroup[uLetters[n+3]];
	unsigned u5 = ResidueGroup[uLetters[n+4]];
	unsigned u6 = ResidueGroup[uLetters[n+5]];

	return u6 + u5*6 + u4*6*6 + u3*6*6*6 + u2*6*6*6*6 + u1*6*6*6*6*6;
	}

static void CountTuples(const unsigned L[], unsigned uTupleCount, unsigned char Count[])
	{
	memset(Count, 0, TUPLE_COUNT*sizeof(unsigned char));
	for (unsigned n = 0; n < uTupleCount; ++n)
		{
		const unsigned uTuple = GetTuple(L, n);
		++(Count[uTuple]);
		}
	}

static void ListCount(const unsigned char Count[])
	{
	for (unsigned n = 0; n < TUPLE_COUNT; ++n)
		{
		if (0 == Count[n])
			continue;
		Log("%s  %u\n", TupleToStr(n), Count[n]);
		}
	}

void DistKmer4_6(const SeqVect &v, DistFunc &DF)
	{
	if (ALPHA_DNA != g_Alpha && ALPHA_RNA != g_Alpha)
		Quit("DistKmer4_6 requires nucleo alphabet");

	const unsigned uSeqCount = v.Length();

	DF.SetCount(uSeqCount);
	if (0 == uSeqCount)
		return;

// Initialize distance matrix to zero
	for (unsigned uSeq1 = 0; uSeq1 < uSeqCount; ++uSeq1)
		{
		DF.SetDist(uSeq1, uSeq1, 0);
		for (unsigned uSeq2 = 0; uSeq2 < uSeq1; ++uSeq2)
			DF.SetDist(uSeq1, uSeq2, 0);
		}

// Convert to letters
	unsigned **Letters = new unsigned *[uSeqCount];
	for (unsigned uSeqIndex = 0; uSeqIndex < uSeqCount; ++uSeqIndex)
		{
		Seq &s = *(v[uSeqIndex]);
		const unsigned uSeqLength = s.Length();
		unsigned *L = new unsigned[uSeqLength];
		Letters[uSeqIndex] = L;
		for (unsigned n = 0; n < uSeqLength; ++n)
			{
			char c = s[n];
			L[n] = CharToLetterEx(c);
			if (L[n] >= 4)
				L[n] = 4;
			}
		}

	unsigned **uCommonTupleCount = new unsigned *[uSeqCount];
	for (unsigned n = 0; n < uSeqCount; ++n)
		{
		uCommonTupleCount[n] = new unsigned[uSeqCount];
		memset(uCommonTupleCount[n], 0, uSeqCount*sizeof(unsigned));
		}

	const unsigned uPairCount = (uSeqCount*(uSeqCount + 1))/2;
	unsigned uCount = 0;
	for (unsigned uSeq1 = 0; uSeq1 < uSeqCount; ++uSeq1)
		{
		Seq &seq1 = *(v[uSeq1]);
		const unsigned uSeqLength1 = seq1.Length();
		if (uSeqLength1 < 5)
			continue;

		const unsigned uTupleCount = uSeqLength1 - 5;
		const unsigned *L = Letters[uSeq1];
		CountTuples(L, uTupleCount, Count1);
#if	TRACE
		{
		Log("Seq1=%d\n", uSeq1);
		Log("Groups:\n");
		for (unsigned n = 0; n < uSeqLength1; ++n)
			Log("%u", ResidueGroup[L[n]]);
		Log("\n");

		Log("Tuples:\n");
		ListCount(Count1);
		}
#endif

		SetProgressDesc("K-mer dist pass 1");
		for (unsigned uSeq2 = 0; uSeq2 <= uSeq1; ++uSeq2)
			{
			if (0 == uCount%500)
				Progress(uCount, uPairCount);
			++uCount;
			Seq &seq2 = *(v[uSeq2]);
			const unsigned uSeqLength2 = seq2.Length();
			if (uSeqLength2 < 5)
				{
				if (uSeq1 == uSeq2)
					DF.SetDist(uSeq1, uSeq2, 0);
				else
					DF.SetDist(uSeq1, uSeq2, 1);
				continue;
				}

		// First pass through seq 2 to count tuples
			const unsigned uTupleCount = uSeqLength2 - 5;
			const unsigned *L = Letters[uSeq2];
			CountTuples(L, uTupleCount, Count2);
#if	TRACE
			Log("Seq2=%d Counts=\n", uSeq2);
			ListCount(Count2);
#endif

		// Second pass to accumulate sum of shared tuples
		// MAFFT defines this as the sum over unique tuples
		// in seq2 of the minimum of the number of tuples found
		// in the two sequences.
			unsigned uSum = 0;
			for (unsigned n = 0; n < uTupleCount; ++n)
				{
				const unsigned uTuple = GetTuple(L, n);
				uSum += MIN(Count1[uTuple], Count2[uTuple]);

			// This is a hack to make sure each unique tuple counted only once.
				Count2[uTuple] = 0;
				}
#if	TRACE
			{
			Seq &s1 = *(v[uSeq1]);
			Seq &s2 = *(v[uSeq2]);
			const char *pName1 = s1.GetName();
			const char *pName2 = s2.GetName();
			Log("Common count %s(%d) - %s(%d) =%u\n",
			  pName1, uSeq1, pName2, uSeq2, uSum);
			}
#endif
			uCommonTupleCount[uSeq1][uSeq2] = uSum;
			uCommonTupleCount[uSeq2][uSeq1] = uSum;
			}
		}
	ProgressStepsDone();

	uCount = 0;
	SetProgressDesc("K-mer dist pass 2");
	for (unsigned uSeq1 = 0; uSeq1 < uSeqCount; ++uSeq1)
		{
		Seq &s1 = *(v[uSeq1]);
		const char *pName1 = s1.GetName();

		double dCommonTupleCount11 = uCommonTupleCount[uSeq1][uSeq1];
		if (0 == dCommonTupleCount11)
			dCommonTupleCount11 = 1;

		DF.SetDist(uSeq1, uSeq1, 0);
		for (unsigned uSeq2 = 0; uSeq2 < uSeq1; ++uSeq2)
			{
			if (0 == uCount%500)
				Progress(uCount, uPairCount);
			++uCount;

			double dCommonTupleCount22 = uCommonTupleCount[uSeq2][uSeq2];
			if (0 == dCommonTupleCount22)
				dCommonTupleCount22 = 1;

			const double dDist1 = 3.0*(dCommonTupleCount11 - uCommonTupleCount[uSeq1][uSeq2])
			  /dCommonTupleCount11;
			const double dDist2 = 3.0*(dCommonTupleCount22 - uCommonTupleCount[uSeq1][uSeq2])
			  /dCommonTupleCount22;

		// dMinDist is the value used for tree-building in MAFFT
			const double dMinDist = MIN(dDist1, dDist2);
			DF.SetDist(uSeq1, uSeq2, (float) dMinDist);

			//const double dEstimatedPctId = TupleDistToEstimatedPctId(dMinDist);
			//g_dfPwId.SetDist(uSeq1, uSeq2, dEstimatedPctId);
		// **** TODO **** why does this make score slightly worse??
			//const double dKimuraDist = KimuraDist(dEstimatedPctId);
			//DF.SetDist(uSeq1, uSeq2, dKimuraDist);
			}
		}
	ProgressStepsDone();

	for (unsigned n = 0; n < uSeqCount; ++n)
		{
		delete[] uCommonTupleCount[n];
		delete[] Letters[n];
		}
	delete[] uCommonTupleCount;
	delete[] Letters;
	}
@@ -0,0 +1,165 @@
#include "muscle.h"
#include "profile.h"
#include "pwpath.h"

SCORE FastScorePath2(const ProfPos *PA, unsigned uLengthA,
  const ProfPos *PB, unsigned uLengthB, const PWPath &Path)
	{
	const unsigned uEdgeCount = Path.GetEdgeCount();
	Log("Edge  SS     PLA   PLB   Match     Gap    Total\n");
	Log("----  --     ---   ---   -----     ---    -----\n");
	char cType = 'S';
	SCORE scoreTotal = 0;
	for (unsigned uEdgeIndex = 0; uEdgeIndex < uEdgeCount; ++uEdgeIndex)
		{
		const PWEdge &Edge = Path.GetEdge(uEdgeIndex);
		const char cPrevType = cType;
		cType = Edge.cType;
		const unsigned uPrefixLengthA = Edge.uPrefixLengthA;
		const unsigned uPrefixLengthB = Edge.uPrefixLengthB;
		bool bGap = false;
		bool bMatch = false;
		SCORE scoreGap = 0;
		SCORE scoreMatch = 0;

		switch (cType)
			{
		case 'M':
			{
			if (0 == uPrefixLengthA || 0 == uPrefixLengthB)
				Quit("FastScorePath2, M zero length");

			const ProfPos &PPA = PA[uPrefixLengthA - 1];
			const ProfPos &PPB = PB[uPrefixLengthB - 1];

			bMatch = true;
			scoreMatch = ScoreProfPos2(PPA, PPB);

			if ('D' == cPrevType)
				{
				bGap = true;
				assert(uPrefixLengthA > 1);
				scoreGap = PA[uPrefixLengthA-2].m_scoreGapClose;
				}
			else if ('I' == cPrevType)
				{
				bGap = true;
				assert(uPrefixLengthB > 1);
				scoreGap = PB[uPrefixLengthB-2].m_scoreGapClose;
				}
			break;
			}

		case 'D':
			{
			if (0 == uPrefixLengthA)
				Quit("FastScorePath2, D zero length");

			const ProfPos &PPA = PA[uPrefixLengthA - 1];
			bGap = true;
			switch (cPrevType)
				{
			case 'S':
				scoreGap = PPA.m_scoreGapOpen;
				break;
			case 'M':
				scoreGap = PPA.m_scoreGapOpen;
				break;
			case 'D':
//				scoreGap = g_scoreGapExtend;
				scoreGap = 0;
				break;
			case 'I':
				Quit("FastScorePath2 DI");
				}
			break;
			}

		case 'I':
			{
			if (0 == uPrefixLengthB)
				Quit("FastScorePath2, I zero length");

			const ProfPos &PPB = PB[uPrefixLengthB - 1];
			bGap = true;
			switch (cPrevType)
				{
			case 'S':
				scoreGap = PPB.m_scoreGapOpen;
				break;
			case 'M':
				scoreGap = PPB.m_scoreGapOpen;
				break;
			case 'I':
				scoreGap = 0;
//				scoreGap = g_scoreGapExtend;
				break;
			case 'D':
				Quit("FastScorePath2 DI");
				}
			break;
			}

		case 'U':
			{
			Quit("FastScorePath2 U");
			}

		default:
			Quit("FastScorePath2: invalid type %c", cType);
			}

		Log("%4u  %c%c  %4u  %4u  ", uEdgeIndex, cPrevType, cType,
		  uPrefixLengthA, uPrefixLengthB);
		if (bMatch)
			Log("%7.1f  ", scoreMatch);
		else
			Log("         ");
		if (bGap)
			Log("%7.1f  ", scoreGap);
		else
			Log("         ");
		SCORE scoreEdge = scoreMatch + scoreGap;
		scoreTotal += scoreEdge;
		Log("%7.1f  %7.1f", scoreEdge, scoreTotal);
		Log("\n");
		}

	SCORE scoreGap = 0;
//	if (!g_bTermGapsHalf)
		switch (cType)
			{
		case 'M':
			scoreGap = 0;
			break;

		case 'D':
			{
			const ProfPos &LastPPA = PA[uLengthA - 1];
			scoreGap = LastPPA.m_scoreGapClose;
			break;
			}

		case 'I':
			{
			const ProfPos &LastPPB = PB[uLengthB - 1];
			scoreGap = LastPPB.m_scoreGapClose;
			break;
			}

		case 'U':
			Quit("Unaligned regions not supported");

		case 'S':
			break;

		default:
			Quit("Invalid type %c", cType);
			}

	Log("      %cE  %4u  %4u           %7.1f\n", cType, uLengthA, uLengthB, scoreGap);
	scoreTotal += scoreGap;

	Log("Total = %g\n", scoreTotal);
	return scoreTotal;
	}
@@ -0,0 +1,161 @@
#include "muscle.h"
#include "profile.h"
#include "diaglist.h"

#define TRACE	0

const unsigned KTUP = 5;
const unsigned KTUPS = 6*6*6*6*6;
static unsigned TuplePos[KTUPS];

static char *TupleToStr(int t)
	{
	static char s[7];
	int t1, t2, t3, t4, t5;

	t1 = t%6;
	t2 = (t/6)%6;
	t3 = (t/(6*6))%6;
	t4 = (t/(6*6*6))%6;
	t5 = (t/(6*6*6*6))%6;

	s[4] = '0' + t1;
	s[3] = '0' + t2;
	s[2] = '0' + t3;
	s[1] = '0' + t4;
	s[0] = '0' + t5;
	return s;
	}

static unsigned GetTuple(const ProfPos *PP, unsigned uPos)
	{
	const unsigned t0 = PP[uPos].m_uResidueGroup;
	if (RESIDUE_GROUP_MULTIPLE == t0)
		return EMPTY;

	const unsigned t1 = PP[uPos+1].m_uResidueGroup;
	if (RESIDUE_GROUP_MULTIPLE == t1)
		return EMPTY;

	const unsigned t2 = PP[uPos+2].m_uResidueGroup;
	if (RESIDUE_GROUP_MULTIPLE == t2)
		return EMPTY;

	const unsigned t3 = PP[uPos+3].m_uResidueGroup;
	if (RESIDUE_GROUP_MULTIPLE == t3)
		return EMPTY;

	const unsigned t4 = PP[uPos+4].m_uResidueGroup;
	if (RESIDUE_GROUP_MULTIPLE == t4)
		return EMPTY;

	return t0 + t1*6 + t2*6*6 + t3*6*6*6 + t4*6*6*6*6;
	}

void FindDiags(const ProfPos *PX, unsigned uLengthX, const ProfPos *PY,
  unsigned uLengthY, DiagList &DL)
	{
	if (ALPHA_Amino != g_Alpha)
		Quit("FindDiags: requires amino acid alphabet");

	DL.Clear();

	if (uLengthX < 12 || uLengthY < 12)
		return;

// Set A to shorter profile, B to longer
	const ProfPos *PA;
	const ProfPos *PB;
	unsigned uLengthA;
	unsigned uLengthB;
	bool bSwap;
	if (uLengthX < uLengthY)
		{
		bSwap = false;
		PA = PX;
		PB = PY;
		uLengthA = uLengthX;
		uLengthB = uLengthY;
		}
	else
		{
		bSwap = true;
		PA = PY;
		PB = PX;
		uLengthA = uLengthY;
		uLengthB = uLengthX;
		}

// Build tuple map for the longer profile, B
	if (uLengthB < KTUP)
		Quit("FindDiags: profile too short");

	memset(TuplePos, EMPTY, sizeof(TuplePos));

	for (unsigned uPos = 0; uPos < uLengthB - KTUP; ++uPos)
		{
		const unsigned uTuple = GetTuple(PB, uPos);
		if (EMPTY == uTuple)
			continue;
		TuplePos[uTuple] = uPos;
		}

// Find matches
	for (unsigned uPosA = 0; uPosA < uLengthA - KTUP; ++uPosA)
		{
		const unsigned uTuple = GetTuple(PA, uPosA);
		if (EMPTY == uTuple)
			continue;
		const unsigned uPosB = TuplePos[uTuple];
		if (EMPTY == uPosB)
			continue;

	// This tuple is found in both profiles
		unsigned uStartPosA = uPosA;
		unsigned uStartPosB = uPosB;

	// Try to extend the match forwards
		unsigned uEndPosA = uPosA + KTUP - 1;
		unsigned uEndPosB = uPosB + KTUP - 1;
		for (;;)
			{
			if (uLengthA - 1 == uEndPosA || uLengthB - 1 == uEndPosB)
				break;
			const unsigned uAAGroupA = PA[uEndPosA+1].m_uResidueGroup;
			if (RESIDUE_GROUP_MULTIPLE == uAAGroupA)
				break;
			const unsigned uAAGroupB = PB[uEndPosB+1].m_uResidueGroup;
			if (RESIDUE_GROUP_MULTIPLE == uAAGroupB)
				break;
			if (uAAGroupA != uAAGroupB)
				break;
			++uEndPosA;
			++uEndPosB;
			}
		uPosA = uEndPosA;

#if	TRACE
		{
		Log("Match: A %4u-%4u   ", uStartPosA, uEndPosA);
		for (unsigned n = uStartPosA; n <= uEndPosA; ++n)
			Log("%c", 'A' + PA[n].m_uResidueGroup);
		Log("\n");
		Log("       B %4u-%4u   ", uStartPosB, uEndPosB);
		for (unsigned n = uStartPosB; n <= uEndPosB; ++n)
			Log("%c", 'A' + PB[n].m_uResidueGroup);
		Log("\n");
		}
#endif

		const unsigned uLength = uEndPosA - uStartPosA + 1;
		assert(uEndPosB - uStartPosB + 1 == uLength);

		if (uLength >= g_uMinDiagLength)
			{
			if (bSwap)
				DL.Add(uStartPosB, uStartPosA, uLength);
			else
				DL.Add(uStartPosA, uStartPosB, uLength);
			}
		}
	}
@@ -0,0 +1,152 @@
#include "muscle.h"
#include "profile.h"
#include "diaglist.h"

#define TRACE	0

#define pow4(i)	(1 << (2*i))	// 4^i = 2^(2*i)
const unsigned K = 7;
const unsigned KTUPS = pow4(K);
static unsigned TuplePos[KTUPS];

static char *TupleToStr(int t)
	{
	static char s[K];

	for (int i = 0; i < K; ++i)
		{
		unsigned Letter = (t/(pow4(i)))%4;
		assert(Letter >= 0 && Letter < 4);
		s[K-i-1] = LetterToChar(Letter);
		}

	return s;
	}

static unsigned GetTuple(const ProfPos *PP, unsigned uPos)
	{
	unsigned t = 0;

	for (unsigned i = 0; i < K; ++i)
		{
		const unsigned uLetter = PP[uPos+i].m_uResidueGroup;
		if (RESIDUE_GROUP_MULTIPLE == uLetter)
			return EMPTY;
		t = t*4 + uLetter;
		}

	return t;
	}

void FindDiagsNuc(const ProfPos *PX, unsigned uLengthX, const ProfPos *PY,
  unsigned uLengthY, DiagList &DL)
	{
	if (ALPHA_DNA != g_Alpha && ALPHA_RNA != g_Alpha)
		Quit("FindDiagsNuc: requires nucleo alphabet");

	DL.Clear();

// 16 is arbitrary slop, no principled reason for this.
	if (uLengthX < K + 16 || uLengthY < K + 16)
		return;

// Set A to shorter profile, B to longer
	const ProfPos *PA;
	const ProfPos *PB;
	unsigned uLengthA;
	unsigned uLengthB;
	bool bSwap;
	if (uLengthX < uLengthY)
		{
		bSwap = false;
		PA = PX;
		PB = PY;
		uLengthA = uLengthX;
		uLengthB = uLengthY;
		}
	else
		{
		bSwap = true;
		PA = PY;
		PB = PX;
		uLengthA = uLengthY;
		uLengthB = uLengthX;
		}

#if	TRACE
	Log("FindDiagsNuc(LengthA=%d LengthB=%d\n", uLengthA, uLengthB);
#endif

// Build tuple map for the longer profile, B
	if (uLengthB < K)
		Quit("FindDiags: profile too short");

	memset(TuplePos, EMPTY, sizeof(TuplePos));

	for (unsigned uPos = 0; uPos < uLengthB - K; ++uPos)
		{
		const unsigned uTuple = GetTuple(PB, uPos);
		if (EMPTY == uTuple)
			continue;
		TuplePos[uTuple] = uPos;
		}

// Find matches
	for (unsigned uPosA = 0; uPosA < uLengthA - K; ++uPosA)
		{
		const unsigned uTuple = GetTuple(PA, uPosA);
		if (EMPTY == uTuple)
			continue;
		const unsigned uPosB = TuplePos[uTuple];
		if (EMPTY == uPosB)
			continue;

	// This tuple is found in both profiles
		unsigned uStartPosA = uPosA;
		unsigned uStartPosB = uPosB;

	// Try to extend the match forwards
		unsigned uEndPosA = uPosA + K - 1;
		unsigned uEndPosB = uPosB + K - 1;
		for (;;)
			{
			if (uLengthA - 1 == uEndPosA || uLengthB - 1 == uEndPosB)
				break;
			const unsigned uAAGroupA = PA[uEndPosA+1].m_uResidueGroup;
			if (RESIDUE_GROUP_MULTIPLE == uAAGroupA)
				break;
			const unsigned uAAGroupB = PB[uEndPosB+1].m_uResidueGroup;
			if (RESIDUE_GROUP_MULTIPLE == uAAGroupB)
				break;
			if (uAAGroupA != uAAGroupB)
				break;
			++uEndPosA;
			++uEndPosB;
			}
		uPosA = uEndPosA;

#if	TRACE
		{
		Log("Match: A %4u-%4u   ", uStartPosA, uEndPosA);
		for (unsigned n = uStartPosA; n <= uEndPosA; ++n)
			Log("%c", LetterToChar(PA[n].m_uResidueGroup));
		Log("\n");
		Log("       B %4u-%4u   ", uStartPosB, uEndPosB);
		for (unsigned n = uStartPosB; n <= uEndPosB; ++n)
			Log("%c", LetterToChar(PB[n].m_uResidueGroup));
		Log("\n");
		}
#endif

		const unsigned uLength = uEndPosA - uStartPosA + 1;
		assert(uEndPosB - uStartPosB + 1 == uLength);

		if (uLength >= g_uMinDiagLength)
			{
			if (bSwap)
				DL.Add(uStartPosB, uStartPosA, uLength);
			else
				DL.Add(uStartPosA, uStartPosB, uLength);
			}
		}
	}
@@ -0,0 +1,69 @@
// source code generated by dimer.py

static SCORE GapScoreMM(const ProfPos &PPA, const ProfPos &PPB)
	{
	return
	  g_scoreGapOpen*(PPA.m_LL*PPB.m_LG + PPA.m_LG*PPB.m_LL + PPA.m_LG*PPB.m_GL + PPA.m_GL*PPB.m_LG) +
	  g_scoreGapExtend*(PPA.m_LL*PPB.m_GG + PPA.m_GG*PPB.m_LL) +
	  g_scoreGapAmbig*(PPA.m_GL*PPB.m_GG + PPA.m_GG*PPB.m_GL);
	}

static SCORE GapScoreMD(const ProfPos &PPA, const ProfPos &PPB)
	{
	return
	  g_scoreGapOpen*(PPA.m_LL*PPB.m_LL + PPA.m_LL*PPB.m_GL + PPA.m_GL*PPB.m_LL + PPA.m_GL*PPB.m_GL) +
	  g_scoreGapExtend*(PPA.m_LL*PPB.m_LG + PPA.m_LL*PPB.m_GG) +
	  g_scoreGapAmbig*(PPA.m_GL*PPB.m_LG + PPA.m_GL*PPB.m_GG);
	}

static SCORE GapScoreMI(const ProfPos &PPA, const ProfPos &PPB)
	{
	return
	  g_scoreGapOpen*(PPA.m_LL*PPB.m_LL + PPA.m_LL*PPB.m_GL + PPA.m_GL*PPB.m_LL + PPA.m_GL*PPB.m_GL) +
	  g_scoreGapExtend*(PPA.m_LG*PPB.m_LL + PPA.m_GG*PPB.m_LL) +
	  g_scoreGapAmbig*(PPA.m_LG*PPB.m_GL + PPA.m_GG*PPB.m_GL);
	}

static SCORE GapScoreDM(const ProfPos &PPA, const ProfPos &PPB)
	{
	return
	  g_scoreGapOpen*(PPA.m_LG*PPB.m_LL + PPA.m_LG*PPB.m_GL) +
	  g_scoreGapExtend*(PPA.m_LL*PPB.m_LG + PPA.m_LL*PPB.m_GG) +
	  g_scoreGapAmbig*(PPA.m_GL*PPB.m_LG + PPA.m_GL*PPB.m_GG + PPA.m_GG*PPB.m_LL + PPA.m_GG*PPB.m_GL);
	}

static SCORE GapScoreDD(const ProfPos &PPA, const ProfPos &PPB)
	{
	return
	  g_scoreGapExtend*(PPA.m_LL*PPB.m_LL + PPA.m_LL*PPB.m_LG + PPA.m_LL*PPB.m_GL + PPA.m_LL*PPB.m_GG) +
	  g_scoreGapAmbig*(PPA.m_GL*PPB.m_LL + PPA.m_GL*PPB.m_LG + PPA.m_GL*PPB.m_GL + PPA.m_GL*PPB.m_GG);
	}

static SCORE GapScoreDI(const ProfPos &PPA, const ProfPos &PPB)
	{
	return
	  g_scoreGapOpen*(PPA.m_LL*PPB.m_LL + PPA.m_LL*PPB.m_GL + PPA.m_GL*PPB.m_LL + PPA.m_GL*PPB.m_GL) +
	  g_scoreGapAmbig*(PPA.m_LG*PPB.m_LL + PPA.m_LG*PPB.m_GL + PPA.m_GG*PPB.m_LL + PPA.m_GG*PPB.m_GL);
	}

static SCORE GapScoreIM(const ProfPos &PPA, const ProfPos &PPB)
	{
	return
	  g_scoreGapOpen*(PPA.m_LL*PPB.m_LG + PPA.m_GL*PPB.m_LG) +
	  g_scoreGapExtend*(PPA.m_LG*PPB.m_LL + PPA.m_GG*PPB.m_LL) +
	  g_scoreGapAmbig*(PPA.m_LL*PPB.m_GG + PPA.m_LG*PPB.m_GL + PPA.m_GL*PPB.m_GG + PPA.m_GG*PPB.m_GL);
	}

static SCORE GapScoreID(const ProfPos &PPA, const ProfPos &PPB)
	{
	return
	  g_scoreGapOpen*(PPA.m_LL*PPB.m_LL + PPA.m_LL*PPB.m_GL + PPA.m_GL*PPB.m_LL + PPA.m_GL*PPB.m_GL) +
	  g_scoreGapAmbig*(PPA.m_LL*PPB.m_LG + PPA.m_LL*PPB.m_GG + PPA.m_GL*PPB.m_LG + PPA.m_GL*PPB.m_GG);
	}

static SCORE GapScoreII(const ProfPos &PPA, const ProfPos &PPB)
	{
	return
	  g_scoreGapExtend*(PPA.m_LL*PPB.m_LL + PPA.m_LG*PPB.m_LL + PPA.m_GL*PPB.m_LL + PPA.m_GG*PPB.m_LL) +
	  g_scoreGapAmbig*(PPA.m_LL*PPB.m_GL + PPA.m_LG*PPB.m_GL + PPA.m_GL*PPB.m_GL + PPA.m_GG*PPB.m_GL);
	}
@@ -0,0 +1,32 @@
#include "muscle.h"
#include "pwpath.h"
#include "timing.h"
#include "textfile.h"
#include "msa.h"
#include "profile.h"

SCORE GlobalAlign(const ProfPos *PA, unsigned uLengthA, const ProfPos *PB,
  unsigned uLengthB, PWPath &Path)
	{
	if (g_bDiags)
		return GlobalAlignDiags(PA, uLengthA, PB, uLengthB, Path);
	else
		return GlobalAlignNoDiags(PA, uLengthA, PB, uLengthB, Path);
	}

SCORE GlobalAlignNoDiags(const ProfPos *PA, unsigned uLengthA, const ProfPos *PB,
  unsigned uLengthB, PWPath &Path)
	{
	switch (g_PPScore)
		{
	case PPSCORE_LE:
		return GlobalAlignLA(PA, uLengthA, PB, uLengthB, Path);

	case PPSCORE_SP:
		return GlobalAlignNS(PA, uLengthA, PB, uLengthB, Path);

	case PPSCORE_SV:
		return GlobalAlignSimple(PA, uLengthA, PB, uLengthB, Path);
		}
	return 0;
	}
@@ -0,0 +1,165 @@
#include "muscle.h"
#include "pwpath.h"
#include "timing.h"
#include "textfile.h"
#include "msa.h"
#include "profile.h"

#if	!VER_3_52

#define COMPARE_SIMPLE	0

#if	TIMING
TICKS g_ticksDP = 0;
#endif

#if	1
extern bool g_bKeepSimpleDP;
SCORE NWSmall(const ProfPos *PA, unsigned uLengthA, const ProfPos *PB,
  unsigned uLengthB, PWPath &Path);
SCORE NWDASmall(const ProfPos *PA, unsigned uLengthA, const ProfPos *PB,
  unsigned uLengthB, PWPath &Path);
SCORE NWDASimple(const ProfPos *PA, unsigned uLengthA, const ProfPos *PB,
  unsigned uLengthB, PWPath &Path);
SCORE NWDASimple2(const ProfPos *PA, unsigned uLengthA, const ProfPos *PB,
  unsigned uLengthB, PWPath &Path);
SCORE GlobalAlignSimple(const ProfPos *PA, unsigned uLengthA, const ProfPos *PB,
  unsigned uLengthB, PWPath &Path);

SCORE GlobalAlignNoDiags(const ProfPos *PA, unsigned uLengthA, const ProfPos *PB,
  unsigned uLengthB, PWPath &Path)
	{
	return GlobalAlign(PA, uLengthA, PB, uLengthB, Path);
	}

#if	COMPARE_SIMPLE

SCORE GlobalAlign(const ProfPos *PA, unsigned uLengthA, const ProfPos *PB,
  unsigned uLengthB, PWPath &Path)
	{
#if	TIMING
	TICKS t1 = GetClockTicks();
#endif
	g_bKeepSimpleDP = true;
	PWPath SimplePath;
	GlobalAlignSimple(PA, uLengthA, PB, uLengthB, SimplePath);

	SCORE Score = NWSmall(PA, uLengthA, PB, uLengthB, Path);

	if (!Path.Equal(SimplePath))
		{
		Log("Simple:\n");
		SimplePath.LogMe();
		Log("Small:\n");
		Path.LogMe();
		Quit("Paths differ");
		}

#if	TIMING
	TICKS t2 = GetClockTicks();
	g_ticksDP += (t2 - t1);
#endif
	return Score;
	}

#else // COMPARE_SIMPLE

SCORE GlobalAlign(const ProfPos *PA, unsigned uLengthA, const ProfPos *PB,
  unsigned uLengthB, PWPath &Path)
	{
#if	TIMING
	TICKS t1 = GetClockTicks();
#endif
	SCORE Score = NWSmall(PA, uLengthA, PB, uLengthB, Path);
#if	TIMING
	TICKS t2 = GetClockTicks();
	g_ticksDP += (t2 - t1);
#endif
	return Score;
	}

#endif

#else // 1

static void AllInserts(PWPath &Path, unsigned uLengthB)
	{
	Path.Clear();
	PWEdge Edge;
	Edge.cType = 'I';
	Edge.uPrefixLengthA = 0;
	for (unsigned uPrefixLengthB = 1; uPrefixLengthB <= uLengthB; ++uPrefixLengthB)
		{
		Edge.uPrefixLengthB = uPrefixLengthB;
		Path.AppendEdge(Edge);
		}
	}

static void AllDeletes(PWPath &Path, unsigned uLengthA)
	{
	Path.Clear();
	PWEdge Edge;
	Edge.cType = 'D';
	Edge.uPrefixLengthB = 0;
	for (unsigned uPrefixLengthA = 1; uPrefixLengthA <= uLengthA; ++uPrefixLengthA)
		{
		Edge.uPrefixLengthA = uPrefixLengthA;
		Path.AppendEdge(Edge);
		}
	}

SCORE GlobalAlign(const ProfPos *PA, unsigned uLengthA, const ProfPos *PB,
  unsigned uLengthB, PWPath &Path)
	{
#if	TIMING
	TICKS t1 = GetClockTicks();
#endif
	if (0 == uLengthA)
		{
		AllInserts(Path, uLengthB);
		return 0;
		}
	else if (0 == uLengthB)
		{
		AllDeletes(Path, uLengthA);
		return 0;
		}

	SCORE Score = 0;
	if (g_bDiags)
		Score = GlobalAlignDiags(PA, uLengthA, PB, uLengthB, Path);
	else
		Score = GlobalAlignNoDiags(PA, uLengthA, PB, uLengthB, Path);
#if	TIMING
	TICKS t2 = GetClockTicks();
	g_ticksDP += (t2 - t1);
#endif
	return Score;
	}

SCORE GlobalAlignNoDiags(const ProfPos *PA, unsigned uLengthA, const ProfPos *PB,
  unsigned uLengthB, PWPath &Path)
	{
	if (g_bDimer)
		return GlobalAlignDimer(PA, uLengthA, PB, uLengthB, Path);

	switch (g_PPScore)
		{
	case PPSCORE_LE:
		return GlobalAlignLE(PA, uLengthA, PB, uLengthB, Path);

	case PPSCORE_SP:
	case PPSCORE_SV:
		return GlobalAlignSP(PA, uLengthA, PB, uLengthB, Path);

	case PPSCORE_SPN:
		return GlobalAlignSPN(PA, uLengthA, PB, uLengthB, Path);
		}

	Quit("Invalid PP score (GlobalAlignNoDiags)");
	return 0;
	}

#endif

#endif	// !VER_3_52
@@ -0,0 +1,55 @@
#include "muscle.h"
#include "pwpath.h"
#include "timing.h"
#include "textfile.h"
#include "msa.h"
#include "profile.h"

#if	VER_3_52

#if	TIMING
TICKS g_ticksDP = 0;
#endif

SCORE GlobalAlign(const ProfPos *PA, unsigned uLengthA, const ProfPos *PB,
  unsigned uLengthB, PWPath &Path)
	{
#if	TIMING
	TICKS t1 = GetClockTicks();
#endif
	SCORE Score = 0;
	if (g_bDiags)
		Score = GlobalAlignDiags(PA, uLengthA, PB, uLengthB, Path);
	else
		Score = GlobalAlignNoDiags(PA, uLengthA, PB, uLengthB, Path);
#if	TIMING
	TICKS t2 = GetClockTicks();
	g_ticksDP += (t2 - t1);
#endif
	return Score;
	}

SCORE GlobalAlignNoDiags(const ProfPos *PA, unsigned uLengthA, const ProfPos *PB,
  unsigned uLengthB, PWPath &Path)
	{
	if (g_bDimer)
		return GlobalAlignDimer(PA, uLengthA, PB, uLengthB, Path);

	switch (g_PPScore)
		{
	case PPSCORE_LE:
		return GlobalAlignLE(PA, uLengthA, PB, uLengthB, Path);

	case PPSCORE_SP:
	case PPSCORE_SV:
		return GlobalAlignSP(PA, uLengthA, PB, uLengthB, Path);

	case PPSCORE_SPN:
		return GlobalAlignSPN(PA, uLengthA, PB, uLengthB, Path);
		}

	Quit("Invalid PP score (GlobalAlignNoDiags)");
	return 0;
	}

#endif	// VER_3_52
@@ -0,0 +1,172 @@
#include "muscle.h"
#include "dpreglist.h"
#include "diaglist.h"
#include "pwpath.h"
#include "profile.h"
#include "timing.h"

#define TRACE		0
#define TRACE_PATH	0
#define LIST_DIAGS	0

static double g_dDPAreaWithoutDiags = 0.0;
static double g_dDPAreaWithDiags = 0.0;

static void OffsetPath(PWPath &Path, unsigned uOffsetA, unsigned uOffsetB)
	{
	const unsigned uEdgeCount = Path.GetEdgeCount();
	for (unsigned uEdgeIndex = 0; uEdgeIndex < uEdgeCount; ++uEdgeIndex)
		{
		const PWEdge &Edge = Path.GetEdge(uEdgeIndex);

	// Nasty hack -- poke new values back into path, circumventing class
		PWEdge &NonConstEdge = (PWEdge &) Edge;
		NonConstEdge.uPrefixLengthA += uOffsetA;
		NonConstEdge.uPrefixLengthB += uOffsetB;
		}
	}

static void DiagToPath(const Diag &d, PWPath &Path)
	{
	Path.Clear();
	const unsigned uLength = d.m_uLength;
	for (unsigned i = 0; i < uLength; ++i)
		{
		PWEdge Edge;
		Edge.cType = 'M';
		Edge.uPrefixLengthA = d.m_uStartPosA + i + 1;
		Edge.uPrefixLengthB = d.m_uStartPosB + i + 1;
		Path.AppendEdge(Edge);
		}
	}

static void AppendRegPath(PWPath &Path, const PWPath &RegPath)
	{
	const unsigned uRegEdgeCount = RegPath.GetEdgeCount();
	for (unsigned uRegEdgeIndex = 0; uRegEdgeIndex < uRegEdgeCount; ++uRegEdgeIndex)
		{
		const PWEdge &RegEdge = RegPath.GetEdge(uRegEdgeIndex);
		Path.AppendEdge(RegEdge);
		}
	}

SCORE GlobalAlignDiags(const ProfPos *PA, unsigned uLengthA, const ProfPos *PB,
  unsigned uLengthB, PWPath &Path)
	{
#if	LIST_DIAGS
	TICKS t1 = GetClockTicks();
#endif

	DiagList DL;

	if (ALPHA_Amino == g_Alpha)
		FindDiags(PA, uLengthA, PB, uLengthB, DL);
	else if (ALPHA_DNA == g_Alpha || ALPHA_RNA == g_Alpha)
		FindDiagsNuc(PA, uLengthA, PB, uLengthB, DL);
	else
		Quit("GlobalAlignDiags: bad alpha");

#if	TRACE
	Log("GlobalAlignDiags, diag list:\n");
	DL.LogMe();
#endif

	DL.Sort();
	DL.DeleteIncompatible();

#if	TRACE
	Log("After DeleteIncompatible:\n");
	DL.LogMe();
#endif

	MergeDiags(DL);

#if	TRACE
	Log("After MergeDiags:\n");
	DL.LogMe();
#endif

	DPRegionList RL;
	DiagListToDPRegionList(DL, RL, uLengthA, uLengthB);

#if	TRACE
	Log("RegionList:\n");
	RL.LogMe();
#endif

#if	LIST_DIAGS
	{
	TICKS t2 = GetClockTicks();
	unsigned uArea = RL.GetDPArea();
	Log("ticks=%ld\n", (long) (t2 - t1));
	Log("area=%u\n", uArea);
	}
#endif

	g_dDPAreaWithoutDiags += uLengthA*uLengthB;

	double dDPAreaWithDiags = 0.0;
	const unsigned uRegionCount = RL.GetCount();
	for (unsigned uRegionIndex = 0; uRegionIndex < uRegionCount; ++uRegionIndex)
		{
		const DPRegion &r = RL.Get(uRegionIndex);

		PWPath RegPath;
		if (DPREGIONTYPE_Diag == r.m_Type)
			{
			DiagToPath(r.m_Diag, RegPath);
#if	TRACE_PATH
			Log("DiagToPath, path=\n");
			RegPath.LogMe();
#endif
			}
		else if (DPREGIONTYPE_Rect == r.m_Type)
			{
			const unsigned uRegStartPosA = r.m_Rect.m_uStartPosA;
			const unsigned uRegStartPosB = r.m_Rect.m_uStartPosB;
			const unsigned uRegLengthA = r.m_Rect.m_uLengthA;
			const unsigned uRegLengthB = r.m_Rect.m_uLengthB;
			const ProfPos *RegPA = PA + uRegStartPosA;
			const ProfPos *RegPB = PB + uRegStartPosB;

			dDPAreaWithDiags += uRegLengthA*uRegLengthB;
			GlobalAlignNoDiags(RegPA, uRegLengthA, RegPB, uRegLengthB, RegPath);
#if	TRACE_PATH
			Log("GlobalAlignNoDiags RegPath=\n");
			RegPath.LogMe();
#endif
			OffsetPath(RegPath, uRegStartPosA, uRegStartPosB);
#if	TRACE_PATH
			Log("After offset path, RegPath=\n");
			RegPath.LogMe();
#endif
			}
		else
			Quit("GlobalAlignDiags, Invalid region type %u", r.m_Type);

		AppendRegPath(Path, RegPath);
#if	TRACE_PATH
		Log("After AppendPath, path=");
		Path.LogMe();
#endif
		}

#if	TRACE
	{
	double dDPAreaWithoutDiags = uLengthA*uLengthB;
	Log("DP area with diags %.3g without %.3g pct saved %.3g %%\n",
	  dDPAreaWithDiags, dDPAreaWithoutDiags, (1.0 - dDPAreaWithDiags/dDPAreaWithoutDiags)*100.0);
	}
#endif
	g_dDPAreaWithDiags += dDPAreaWithDiags;
	return 0;
	}

void ListDiagSavings()
	{
	if (!g_bVerbose || !g_bDiags)
		return;
	double dAreaSaved = g_dDPAreaWithoutDiags - g_dDPAreaWithDiags;
	double dPct = dAreaSaved*100.0/g_dDPAreaWithoutDiags;
	Log("DP area saved by diagonals %-4.1f%%\n", dPct);
	}
@@ -0,0 +1,432 @@
#include "muscle.h"
#include "profile.h"
#include "pwpath.h"

#define	OCC	1

struct DP_MEMORY
	{
	unsigned uLength;
	SCORE *GapOpenA;
	SCORE *GapOpenB;
	SCORE *GapCloseA;
	SCORE *GapCloseB;
	SCORE *MPrev;
	SCORE *MCurr;
	SCORE *MWork;
	SCORE *DPrev;
	SCORE *DCurr;
	SCORE *DWork;
	SCORE **ScoreMxB;
#if	OCC
	FCOUNT *OccA;
	FCOUNT *OccB;
#endif
	unsigned **SortOrderA;
	unsigned *uDeletePos;
	FCOUNT **FreqsA;
	int **TraceBack;
	};

static struct DP_MEMORY DPM;

static void AllocDPMem(unsigned uLengthA, unsigned uLengthB)
	{
// Max prefix length
	unsigned uLength = (uLengthA > uLengthB ? uLengthA : uLengthB) + 1;
	if (uLength < DPM.uLength)
		return;

// Add 256 to allow for future expansion and
// round up to next multiple of 32.
	uLength += 256;
	uLength += 32 - uLength%32;

	const unsigned uOldLength = DPM.uLength;
	if (uOldLength > 0)
		{
		for (unsigned i = 0; i < uOldLength; ++i)
			{
			delete[] DPM.TraceBack[i];
			delete[] DPM.FreqsA[i];
			delete[] DPM.SortOrderA[i];
			}
		for (unsigned n = 0; n < 20; ++n)
			delete[] DPM.ScoreMxB[n];

		delete[] DPM.MPrev;
		delete[] DPM.MCurr;
		delete[] DPM.MWork;
		delete[] DPM.DPrev;
		delete[] DPM.DCurr;
		delete[] DPM.DWork;
		delete[] DPM.uDeletePos;
		delete[] DPM.GapOpenA;
		delete[] DPM.GapOpenB;
		delete[] DPM.GapCloseA;
		delete[] DPM.GapCloseB;
		delete[] DPM.SortOrderA;
		delete[] DPM.FreqsA;
		delete[] DPM.ScoreMxB;
		delete[] DPM.TraceBack;
#if	OCC
		delete[] DPM.OccA;
		delete[] DPM.OccB;
#endif
		}

	DPM.uLength = uLength;

	DPM.GapOpenA = new SCORE[uLength];
	DPM.GapOpenB = new SCORE[uLength];
	DPM.GapCloseA = new SCORE[uLength];
	DPM.GapCloseB = new SCORE[uLength];
#if	OCC
	DPM.OccA = new FCOUNT[uLength];
	DPM.OccB = new FCOUNT[uLength];
#endif

	DPM.SortOrderA = new unsigned*[uLength];
	DPM.FreqsA = new FCOUNT*[uLength];
	DPM.ScoreMxB = new SCORE*[20];
	DPM.MPrev = new SCORE[uLength];
	DPM.MCurr = new SCORE[uLength];
	DPM.MWork = new SCORE[uLength];

	DPM.DPrev = new SCORE[uLength];
	DPM.DCurr = new SCORE[uLength];
	DPM.DWork = new SCORE[uLength];
	DPM.uDeletePos = new unsigned[uLength];

	DPM.TraceBack = new int*[uLength];

	for (unsigned uLetter = 0; uLetter < 20; ++uLetter)
		DPM.ScoreMxB[uLetter] = new SCORE[uLength];

	for (unsigned i = 0; i < uLength; ++i)
		{
		DPM.SortOrderA[i] = new unsigned[20];
		DPM.FreqsA[i] = new FCOUNT[20];
		DPM.TraceBack[i] = new int[uLength];
		}
	}

SCORE GlobalAlignLA(const ProfPos *PA, unsigned uLengthA, const ProfPos *PB,
  unsigned uLengthB, PWPath &Path)
	{
	const unsigned uPrefixCountA = uLengthA + 1;
	const unsigned uPrefixCountB = uLengthB + 1;

	AllocDPMem(uLengthA, uLengthB);

	SCORE *GapOpenA = DPM.GapOpenA;
	SCORE *GapOpenB = DPM.GapOpenB;
	SCORE *GapCloseA = DPM.GapCloseA;
	SCORE *GapCloseB = DPM.GapCloseB;

	unsigned **SortOrderA = DPM.SortOrderA;
	FCOUNT **FreqsA = DPM.FreqsA;
	SCORE **ScoreMxB = DPM.ScoreMxB;
	SCORE *MPrev = DPM.MPrev;
	SCORE *MCurr = DPM.MCurr;
	SCORE *MWork = DPM.MWork;

	SCORE *DPrev = DPM.DPrev;
	SCORE *DCurr = DPM.DCurr;
	SCORE *DWork = DPM.DWork;

#if	OCC
	FCOUNT *OccA = DPM.OccA;
	FCOUNT *OccB = DPM.OccB;
#endif

	unsigned *uDeletePos = DPM.uDeletePos;

	int **TraceBack = DPM.TraceBack;

	for (unsigned i = 0; i < uLengthA; ++i)
		{
		GapOpenA[i] = PA[i].m_scoreGapOpen;
		GapCloseA[i] = PA[i].m_scoreGapClose;
#if	OCC
		OccA[i] = PA[i].m_fOcc;
#endif

		for (unsigned uLetter = 0; uLetter < 20; ++uLetter)
			{
			SortOrderA[i][uLetter] = PA[i].m_uSortOrder[uLetter];
			FreqsA[i][uLetter] = PA[i].m_fcCounts[uLetter];
			}
		}

	for (unsigned j = 0; j < uLengthB; ++j)
		{
		GapOpenB[j] = PB[j].m_scoreGapOpen;
		GapCloseB[j] = PB[j].m_scoreGapClose;
#if	OCC
		OccB[j] = PB[j].m_fOcc;
#endif
		}

	for (unsigned uLetter = 0; uLetter < 20; ++uLetter)
		{
		for (unsigned j = 0; j < uLengthB; ++j)
			ScoreMxB[uLetter][j] = PB[j].m_AAScores[uLetter];
		}

	for (unsigned i = 0; i < uPrefixCountA; ++i)
		memset(TraceBack[i], 0, uPrefixCountB*sizeof(int));

// Special case for i=0
	unsigned **ptrSortOrderA = SortOrderA;
	FCOUNT **ptrFreqsA = FreqsA;
	assert(ptrSortOrderA == &(SortOrderA[0]));
	assert(ptrFreqsA == &(FreqsA[0]));
	TraceBack[0][0] = 0;

	SCORE scoreSum = 0;
	unsigned *ptrSortOrderAi = SortOrderA[0];
	const unsigned *ptrSortOrderAEnd = ptrSortOrderAi + 20;
	FCOUNT *ptrFreqsAi = FreqsA[0];
	for (; ptrSortOrderAi != ptrSortOrderAEnd; ++ptrSortOrderAi)
		{
		const unsigned uLetter = *ptrSortOrderAi;
		const FCOUNT fcLetter = ptrFreqsAi[uLetter];
		if (0 == fcLetter)
			break;
		scoreSum += fcLetter*ScoreMxB[uLetter][0];
		}
	if (0 == scoreSum)
		MPrev[0] = -2.5;
	else
		{
#if	OCC
		MPrev[0] = (logf(scoreSum) - g_scoreCenter)*OccA[0]*OccB[0];
#else
		MPrev[0] = (logf(scoreSum) - g_scoreCenter);
#endif
		}

// D(0,0) is -infinity (requires I->D).
	DPrev[0] = MINUS_INFINITY;

	for (unsigned j = 1; j < uLengthB; ++j)
		{
	// Only way to get M(0, j) looks like this:
	//		A	----X
	//		B	XXXXX
	//			0   j
	// So gap-open at j=0, gap-close at j-1.
		SCORE scoreSum = 0;
		unsigned *ptrSortOrderAi = SortOrderA[0];
		const unsigned *ptrSortOrderAEnd = ptrSortOrderAi + 20;
		FCOUNT *ptrFreqsAi = FreqsA[0];
		for (; ptrSortOrderAi != ptrSortOrderAEnd; ++ptrSortOrderAi)
			{
			const unsigned uLetter = *ptrSortOrderAi;
			const FCOUNT fcLetter = ptrFreqsAi[uLetter];
			if (0 == fcLetter)
				break;
			scoreSum += fcLetter*ScoreMxB[uLetter][j];
			}
		if (0 == scoreSum)
			MPrev[j] = -2.5;
		else
			{
#if	OCC
			MPrev[j] = (logf(scoreSum) - g_scoreCenter)*OccA[0]*OccB[j] +
			  GapOpenB[0] + GapCloseB[j-1];
#else
			MPrev[j] = (logf(scoreSum) - g_scoreCenter) +
			  GapOpenB[0] + GapCloseB[j-1];
#endif
			}
		TraceBack[0][j] = -(int) j;

	// Assume no D->I transitions, then can't be a delete if only
	// one letter from A.
		DPrev[j] = MINUS_INFINITY;
		}

	SCORE IPrev_j_1;
	for (unsigned i = 1; i < uLengthA; ++i)
		{
		++ptrSortOrderA;
		++ptrFreqsA;
		assert(ptrSortOrderA == &(SortOrderA[i]));
		assert(ptrFreqsA == &(FreqsA[i]));

		SCORE *ptrMCurr_j = MCurr;
		memset(ptrMCurr_j, 0, uLengthB*sizeof(SCORE));
		const FCOUNT *FreqsAi = *ptrFreqsA;

		const unsigned *SortOrderAi = *ptrSortOrderA;
		const unsigned *ptrSortOrderAiEnd = SortOrderAi + 20;
		const SCORE *ptrMCurrMax = MCurr + uLengthB;
		for (const unsigned *ptrSortOrderAi = SortOrderAi;
		  ptrSortOrderAi != ptrSortOrderAiEnd;
		  ++ptrSortOrderAi)
			{
			const unsigned uLetter = *ptrSortOrderAi;
			SCORE *NSBR_Letter = ScoreMxB[uLetter];
			const FCOUNT fcLetter = FreqsAi[uLetter];
			if (0 == fcLetter)
				break;
			SCORE *ptrNSBR = NSBR_Letter;
			for (SCORE *ptrMCurr = MCurr; ptrMCurr != ptrMCurrMax; ++ptrMCurr)
				*ptrMCurr += fcLetter*(*ptrNSBR++);
			}

#if	OCC
		const FCOUNT OccAi = OccA[i];
#endif
		for (unsigned j = 0; j < uLengthB; ++j)
			{
			if (MCurr[j] == 0)
				MCurr[j] = -2.5;
			else
#if	OCC
				MCurr[j] = (logf(MCurr[j]) - g_scoreCenter)*OccAi*OccB[j];
#else
				MCurr[j] = (logf(MCurr[j]) - g_scoreCenter);
#endif
			}

		ptrMCurr_j = MCurr;
		unsigned *ptrDeletePos = uDeletePos;

	// Special case for j=0
	// Only way to get M(i, 0) looks like this:
	//			0   i
	//		A	XXXXX
	//		B	----X
	// So gap-open at i=0, gap-close at i-1.
		assert(ptrMCurr_j == &(MCurr[0]));
		*ptrMCurr_j += GapOpenA[0] + GapCloseA[i-1];

		++ptrMCurr_j;

		int *ptrTraceBack_ij = TraceBack[i];
		*ptrTraceBack_ij++ = (int) i;

		SCORE *ptrMPrev_j = MPrev;
		SCORE *ptrDPrev = DPrev;
		SCORE d = *ptrDPrev;
		SCORE DNew = *ptrMPrev_j + GapOpenA[i];
		if (DNew > d)
			{
			d = DNew;
			*ptrDeletePos = i;
			}

		SCORE *ptrDCurr = DCurr;

		assert(ptrDCurr == &(DCurr[0]));
		*ptrDCurr = d;

	// Can't have an insert if no letters from B
		IPrev_j_1 = MINUS_INFINITY;

		unsigned uInsertPos;
		const SCORE scoreGapOpenAi = GapOpenA[i];
		const SCORE scoreGapCloseAi_1 = GapCloseA[i-1];

		for (unsigned j = 1; j < uLengthB; ++j)
			{
		// Here, MPrev_j is preserved from previous
		// iteration so with current i,j is M[i-1][j-1]
			SCORE MPrev_j = *ptrMPrev_j;
			SCORE INew = MPrev_j + GapOpenB[j];
			if (INew > IPrev_j_1)
				{
				IPrev_j_1 = INew;
				uInsertPos = j;
				}

			SCORE scoreMax = MPrev_j;

			assert(ptrDPrev == &(DPrev[j-1]));
			SCORE scoreD = *ptrDPrev++ + scoreGapCloseAi_1;
			if (scoreD > scoreMax)
				{
				scoreMax = scoreD;
				assert(ptrDeletePos == &(uDeletePos[j-1]));
				*ptrTraceBack_ij = (int) i - (int) *ptrDeletePos;
				assert(*ptrTraceBack_ij > 0);
				}
			++ptrDeletePos;

			SCORE scoreI = IPrev_j_1 + GapCloseB[j-1];
			if (scoreI > scoreMax)
				{
				scoreMax = scoreI;
				*ptrTraceBack_ij = (int) uInsertPos - (int) j;
				assert(*ptrTraceBack_ij < 0);
				}

			assert(ptrSortOrderA == &(SortOrderA[i]));
			assert(ptrFreqsA == &(FreqsA[i]));

			*ptrMCurr_j += scoreMax;
			assert(ptrMCurr_j == &(MCurr[j]));
			++ptrMCurr_j;

			MPrev_j = *(++ptrMPrev_j);
			assert(ptrDPrev == &(DPrev[j]));
			SCORE d = *ptrDPrev;
			SCORE DNew = MPrev_j + scoreGapOpenAi;
			if (DNew > d)
				{
				d = DNew;
				assert(ptrDeletePos == &uDeletePos[j]);
				*ptrDeletePos = i;
				}
			assert(ptrDCurr + 1 == &(DCurr[j]));
			*(++ptrDCurr) = d;

			++ptrTraceBack_ij;
			}

		Rotate(MPrev, MCurr, MWork);
		Rotate(DPrev, DCurr, DWork);
		}

// Special case for i=uLengthA
	SCORE IPrev = MINUS_INFINITY;

	unsigned uInsertPos;

	for (unsigned j = 1; j < uLengthB; ++j)
		{
		SCORE INew = MPrev[j-1] + GapOpenB[j];
		if (INew > IPrev)
			{
			uInsertPos = j;
			IPrev = INew;
			}
		}

// Special case for i=uLengthA, j=uLengthB
	SCORE scoreMax = MPrev[uLengthB-1];
	int iTraceBack = 0;

	SCORE scoreD = DPrev[uLengthB-1] + GapCloseA[uLengthA-1];
	if (scoreD > scoreMax)
		{
		scoreMax = scoreD;
		iTraceBack = (int) uLengthA - (int) uDeletePos[uLengthB-1];
		}

	SCORE scoreI = IPrev + GapCloseB[uLengthB-1];
	if (scoreI > scoreMax)
		{
		scoreMax = scoreI;
		iTraceBack = (int) uInsertPos - (int) uLengthB;
		}

	TraceBack[uLengthA][uLengthB] = iTraceBack;

	TraceBackToPath(TraceBack, uLengthA, uLengthB, Path);

	return scoreMax;
	}
@@ -0,0 +1,435 @@
#include "muscle.h"
#include "profile.h"
#include "pwpath.h"

#define	OCC	1

struct DP_MEMORY
	{
	unsigned uLength;
	SCORE *GapOpenA;
	SCORE *GapOpenB;
	SCORE *GapCloseA;
	SCORE *GapCloseB;
	SCORE *MPrev;
	SCORE *MCurr;
	SCORE *MWork;
	SCORE *DPrev;
	SCORE *DCurr;
	SCORE *DWork;
	SCORE **ScoreMxB;
#if	OCC
	FCOUNT *OccA;
	FCOUNT *OccB;
#endif
	unsigned **SortOrderA;
	unsigned *uDeletePos;
	FCOUNT **FreqsA;
	int **TraceBack;
	};

static struct DP_MEMORY DPM;

static void AllocDPMem(unsigned uLengthA, unsigned uLengthB)
	{
// Max prefix length
	unsigned uLength = (uLengthA > uLengthB ? uLengthA : uLengthB) + 1;
	if (uLength < DPM.uLength)
		return;

// Add 256 to allow for future expansion and
// round up to next multiple of 32.
	uLength += 256;
	uLength += 32 - uLength%32;

	const unsigned uOldLength = DPM.uLength;
	if (uOldLength > 0)
		{
		for (unsigned i = 0; i < uOldLength; ++i)
			{
			delete[] DPM.TraceBack[i];
			delete[] DPM.FreqsA[i];
			delete[] DPM.SortOrderA[i];
			}
		for (unsigned n = 0; n < 20; ++n)
			delete[] DPM.ScoreMxB[n];

		delete[] DPM.MPrev;
		delete[] DPM.MCurr;
		delete[] DPM.MWork;
		delete[] DPM.DPrev;
		delete[] DPM.DCurr;
		delete[] DPM.DWork;
		delete[] DPM.uDeletePos;
		delete[] DPM.GapOpenA;
		delete[] DPM.GapOpenB;
		delete[] DPM.GapCloseA;
		delete[] DPM.GapCloseB;
		delete[] DPM.SortOrderA;
		delete[] DPM.FreqsA;
		delete[] DPM.ScoreMxB;
		delete[] DPM.TraceBack;
#if	OCC
		delete[] DPM.OccA;
		delete[] DPM.OccB;
#endif
		}

	DPM.uLength = uLength;

	DPM.GapOpenA = new SCORE[uLength];
	DPM.GapOpenB = new SCORE[uLength];
	DPM.GapCloseA = new SCORE[uLength];
	DPM.GapCloseB = new SCORE[uLength];
#if	OCC
	DPM.OccA = new FCOUNT[uLength];
	DPM.OccB = new FCOUNT[uLength];
#endif

	DPM.SortOrderA = new unsigned*[uLength];
	DPM.FreqsA = new FCOUNT*[uLength];
	DPM.ScoreMxB = new SCORE*[20];
	DPM.MPrev = new SCORE[uLength];
	DPM.MCurr = new SCORE[uLength];
	DPM.MWork = new SCORE[uLength];

	DPM.DPrev = new SCORE[uLength];
	DPM.DCurr = new SCORE[uLength];
	DPM.DWork = new SCORE[uLength];
	DPM.uDeletePos = new unsigned[uLength];

	DPM.TraceBack = new int*[uLength];

	for (unsigned uLetter = 0; uLetter < 20; ++uLetter)
		DPM.ScoreMxB[uLetter] = new SCORE[uLength];

	for (unsigned i = 0; i < uLength; ++i)
		{
		DPM.SortOrderA[i] = new unsigned[20];
		DPM.FreqsA[i] = new FCOUNT[20];
		DPM.TraceBack[i] = new int[uLength];
		}
	}

SCORE GlobalAlignLE(const ProfPos *PA, unsigned uLengthA, const ProfPos *PB,
  unsigned uLengthB, PWPath &Path)
	{
	SetTermGaps(PA, uLengthA);
	SetTermGaps(PB, uLengthB);

	const unsigned uPrefixCountA = uLengthA + 1;
	const unsigned uPrefixCountB = uLengthB + 1;

	AllocDPMem(uLengthA, uLengthB);

	SCORE *GapOpenA = DPM.GapOpenA;
	SCORE *GapOpenB = DPM.GapOpenB;
	SCORE *GapCloseA = DPM.GapCloseA;
	SCORE *GapCloseB = DPM.GapCloseB;

	unsigned **SortOrderA = DPM.SortOrderA;
	FCOUNT **FreqsA = DPM.FreqsA;
	SCORE **ScoreMxB = DPM.ScoreMxB;
	SCORE *MPrev = DPM.MPrev;
	SCORE *MCurr = DPM.MCurr;
	SCORE *MWork = DPM.MWork;

	SCORE *DPrev = DPM.DPrev;
	SCORE *DCurr = DPM.DCurr;
	SCORE *DWork = DPM.DWork;

#if	OCC
	FCOUNT *OccA = DPM.OccA;
	FCOUNT *OccB = DPM.OccB;
#endif

	unsigned *uDeletePos = DPM.uDeletePos;

	int **TraceBack = DPM.TraceBack;

	for (unsigned i = 0; i < uLengthA; ++i)
		{
		GapOpenA[i] = PA[i].m_scoreGapOpen;
		GapCloseA[i] = PA[i].m_scoreGapClose;
#if	OCC
		OccA[i] = PA[i].m_fOcc;
#endif

		for (unsigned uLetter = 0; uLetter < 20; ++uLetter)
			{
			SortOrderA[i][uLetter] = PA[i].m_uSortOrder[uLetter];
			FreqsA[i][uLetter] = PA[i].m_fcCounts[uLetter];
			}
		}

	for (unsigned j = 0; j < uLengthB; ++j)
		{
		GapOpenB[j] = PB[j].m_scoreGapOpen;
		GapCloseB[j] = PB[j].m_scoreGapClose;
#if	OCC
		OccB[j] = PB[j].m_fOcc;
#endif
		}

	for (unsigned uLetter = 0; uLetter < 20; ++uLetter)
		{
		for (unsigned j = 0; j < uLengthB; ++j)
			ScoreMxB[uLetter][j] = PB[j].m_AAScores[uLetter];
		}

	for (unsigned i = 0; i < uPrefixCountA; ++i)
		memset(TraceBack[i], 0, uPrefixCountB*sizeof(int));

// Special case for i=0
	unsigned **ptrSortOrderA = SortOrderA;
	FCOUNT **ptrFreqsA = FreqsA;
	assert(ptrSortOrderA == &(SortOrderA[0]));
	assert(ptrFreqsA == &(FreqsA[0]));
	TraceBack[0][0] = 0;

	SCORE scoreSum = 0;
	unsigned *ptrSortOrderAi = SortOrderA[0];
	const unsigned *ptrSortOrderAEnd = ptrSortOrderAi + 20;
	FCOUNT *ptrFreqsAi = FreqsA[0];
	for (; ptrSortOrderAi != ptrSortOrderAEnd; ++ptrSortOrderAi)
		{
		const unsigned uLetter = *ptrSortOrderAi;
		const FCOUNT fcLetter = ptrFreqsAi[uLetter];
		if (0 == fcLetter)
			break;
		scoreSum += fcLetter*ScoreMxB[uLetter][0];
		}
	if (0 == scoreSum)
		MPrev[0] = -2.5;
	else
		{
#if	OCC
		MPrev[0] = (logf(scoreSum) - g_scoreCenter)*OccA[0]*OccB[0];
#else
		MPrev[0] = (logf(scoreSum) - g_scoreCenter);
#endif
		}

// D(0,0) is -infinity (requires I->D).
	DPrev[0] = MINUS_INFINITY;

	for (unsigned j = 1; j < uLengthB; ++j)
		{
	// Only way to get M(0, j) looks like this:
	//		A	----X
	//		B	XXXXX
	//			0   j
	// So gap-open at j=0, gap-close at j-1.
		SCORE scoreSum = 0;
		unsigned *ptrSortOrderAi = SortOrderA[0];
		const unsigned *ptrSortOrderAEnd = ptrSortOrderAi + 20;
		FCOUNT *ptrFreqsAi = FreqsA[0];
		for (; ptrSortOrderAi != ptrSortOrderAEnd; ++ptrSortOrderAi)
			{
			const unsigned uLetter = *ptrSortOrderAi;
			const FCOUNT fcLetter = ptrFreqsAi[uLetter];
			if (0 == fcLetter)
				break;
			scoreSum += fcLetter*ScoreMxB[uLetter][j];
			}
		if (0 == scoreSum)
			MPrev[j] = -2.5;
		else
			{
#if	OCC
			MPrev[j] = (logf(scoreSum) - g_scoreCenter)*OccA[0]*OccB[j] +
			  GapOpenB[0] + GapCloseB[j-1];
#else
			MPrev[j] = (logf(scoreSum) - g_scoreCenter) +
			  GapOpenB[0] + GapCloseB[j-1];
#endif
			}
		TraceBack[0][j] = -(int) j;

	// Assume no D->I transitions, then can't be a delete if only
	// one letter from A.
		DPrev[j] = MINUS_INFINITY;
		}

	SCORE IPrev_j_1;
	for (unsigned i = 1; i < uLengthA; ++i)
		{
		++ptrSortOrderA;
		++ptrFreqsA;
		assert(ptrSortOrderA == &(SortOrderA[i]));
		assert(ptrFreqsA == &(FreqsA[i]));

		SCORE *ptrMCurr_j = MCurr;
		memset(ptrMCurr_j, 0, uLengthB*sizeof(SCORE));
		const FCOUNT *FreqsAi = *ptrFreqsA;

		const unsigned *SortOrderAi = *ptrSortOrderA;
		const unsigned *ptrSortOrderAiEnd = SortOrderAi + 20;
		const SCORE *ptrMCurrMax = MCurr + uLengthB;
		for (const unsigned *ptrSortOrderAi = SortOrderAi;
		  ptrSortOrderAi != ptrSortOrderAiEnd;
		  ++ptrSortOrderAi)
			{
			const unsigned uLetter = *ptrSortOrderAi;
			SCORE *NSBR_Letter = ScoreMxB[uLetter];
			const FCOUNT fcLetter = FreqsAi[uLetter];
			if (0 == fcLetter)
				break;
			SCORE *ptrNSBR = NSBR_Letter;
			for (SCORE *ptrMCurr = MCurr; ptrMCurr != ptrMCurrMax; ++ptrMCurr)
				*ptrMCurr += fcLetter*(*ptrNSBR++);
			}

#if	OCC
		const FCOUNT OccAi = OccA[i];
#endif
		for (unsigned j = 0; j < uLengthB; ++j)
			{
			if (MCurr[j] == 0)
				MCurr[j] = -2.5;
			else
#if	OCC
				MCurr[j] = (logf(MCurr[j]) - g_scoreCenter)*OccAi*OccB[j];
#else
				MCurr[j] = (logf(MCurr[j]) - g_scoreCenter);
#endif
			}

		ptrMCurr_j = MCurr;
		unsigned *ptrDeletePos = uDeletePos;

	// Special case for j=0
	// Only way to get M(i, 0) looks like this:
	//			0   i
	//		A	XXXXX
	//		B	----X
	// So gap-open at i=0, gap-close at i-1.
		assert(ptrMCurr_j == &(MCurr[0]));
		*ptrMCurr_j += GapOpenA[0] + GapCloseA[i-1];

		++ptrMCurr_j;

		int *ptrTraceBack_ij = TraceBack[i];
		*ptrTraceBack_ij++ = (int) i;

		SCORE *ptrMPrev_j = MPrev;
		SCORE *ptrDPrev = DPrev;
		SCORE d = *ptrDPrev;
		SCORE DNew = *ptrMPrev_j + GapOpenA[i];
		if (DNew > d)
			{
			d = DNew;
			*ptrDeletePos = i;
			}

		SCORE *ptrDCurr = DCurr;

		assert(ptrDCurr == &(DCurr[0]));
		*ptrDCurr = d;

	// Can't have an insert if no letters from B
		IPrev_j_1 = MINUS_INFINITY;

		unsigned uInsertPos = 0;
		const SCORE scoreGapOpenAi = GapOpenA[i];
		const SCORE scoreGapCloseAi_1 = GapCloseA[i-1];

		for (unsigned j = 1; j < uLengthB; ++j)
			{
		// Here, MPrev_j is preserved from previous
		// iteration so with current i,j is M[i-1][j-1]
			SCORE MPrev_j = *ptrMPrev_j;
			SCORE INew = MPrev_j + GapOpenB[j];
			if (INew > IPrev_j_1)
				{
				IPrev_j_1 = INew;
				uInsertPos = j;
				}

			SCORE scoreMax = MPrev_j;

			assert(ptrDPrev == &(DPrev[j-1]));
			SCORE scoreD = *ptrDPrev++ + scoreGapCloseAi_1;
			if (scoreD > scoreMax)
				{
				scoreMax = scoreD;
				assert(ptrDeletePos == &(uDeletePos[j-1]));
				*ptrTraceBack_ij = (int) i - (int) *ptrDeletePos;
				assert(*ptrTraceBack_ij > 0);
				}
			++ptrDeletePos;

			SCORE scoreI = IPrev_j_1 + GapCloseB[j-1];
			if (scoreI > scoreMax)
				{
				scoreMax = scoreI;
				*ptrTraceBack_ij = (int) uInsertPos - (int) j;
				assert(*ptrTraceBack_ij < 0);
				}

			assert(ptrSortOrderA == &(SortOrderA[i]));
			assert(ptrFreqsA == &(FreqsA[i]));

			*ptrMCurr_j += scoreMax;
			assert(ptrMCurr_j == &(MCurr[j]));
			++ptrMCurr_j;

			MPrev_j = *(++ptrMPrev_j);
			assert(ptrDPrev == &(DPrev[j]));
			SCORE d = *ptrDPrev;
			SCORE DNew = MPrev_j + scoreGapOpenAi;
			if (DNew > d)
				{
				d = DNew;
				assert(ptrDeletePos == &uDeletePos[j]);
				*ptrDeletePos = i;
				}
			assert(ptrDCurr + 1 == &(DCurr[j]));
			*(++ptrDCurr) = d;

			++ptrTraceBack_ij;
			}

		Rotate(MPrev, MCurr, MWork);
		Rotate(DPrev, DCurr, DWork);
		}

// Special case for i=uLengthA
	SCORE IPrev = MINUS_INFINITY;

	unsigned uInsertPos;

	for (unsigned j = 1; j < uLengthB; ++j)
		{
		SCORE INew = MPrev[j-1] + GapOpenB[j];
		if (INew > IPrev)
			{
			uInsertPos = j;
			IPrev = INew;
			}
		}

// Special case for i=uLengthA, j=uLengthB
	SCORE scoreMax = MPrev[uLengthB-1];
	int iTraceBack = 0;

	SCORE scoreD = DPrev[uLengthB-1] + GapCloseA[uLengthA-1];
	if (scoreD > scoreMax)
		{
		scoreMax = scoreD;
		iTraceBack = (int) uLengthA - (int) uDeletePos[uLengthB-1];
		}

	SCORE scoreI = IPrev + GapCloseB[uLengthB-1];
	if (scoreI > scoreMax)
		{
		scoreMax = scoreI;
		iTraceBack = (int) uInsertPos - (int) uLengthB;
		}

	TraceBack[uLengthA][uLengthB] = iTraceBack;

	TraceBackToPath(TraceBack, uLengthA, uLengthB, Path);

	return scoreMax;
	}
@@ -0,0 +1,374 @@
#include "muscle.h"
#include "profile.h"
#include "pwpath.h"

struct DP_MEMORY
	{
	unsigned uLength;
	SCORE *GapOpenA;
	SCORE *GapOpenB;
	SCORE *GapCloseA;
	SCORE *GapCloseB;
	SCORE *MPrev;
	SCORE *MCurr;
	SCORE *MWork;
	SCORE *DPrev;
	SCORE *DCurr;
	SCORE *DWork;
	SCORE **ScoreMxB;
	unsigned **SortOrderA;
	unsigned *uDeletePos;
	FCOUNT **FreqsA;
	int **TraceBack;
	};

static struct DP_MEMORY DPM;

static void AllocDPMem(unsigned uLengthA, unsigned uLengthB)
	{
// Max prefix length
	unsigned uLength = (uLengthA > uLengthB ? uLengthA : uLengthB) + 1;
	if (uLength < DPM.uLength)
		return;

// Add 256 to allow for future expansion and
// round up to next multiple of 32.
	uLength += 256;
	uLength += 32 - uLength%32;

	const unsigned uOldLength = DPM.uLength;
	if (uOldLength > 0)
		{
		for (unsigned i = 0; i < uOldLength; ++i)
			{
			delete[] DPM.TraceBack[i];
			delete[] DPM.FreqsA[i];
			delete[] DPM.SortOrderA[i];
			}
		for (unsigned n = 0; n < 20; ++n)
			delete[] DPM.ScoreMxB[n];

		delete[] DPM.MPrev;
		delete[] DPM.MCurr;
		delete[] DPM.MWork;
		delete[] DPM.DPrev;
		delete[] DPM.DCurr;
		delete[] DPM.DWork;
		delete[] DPM.uDeletePos;
		delete[] DPM.GapOpenA;
		delete[] DPM.GapOpenB;
		delete[] DPM.GapCloseA;
		delete[] DPM.GapCloseB;
		delete[] DPM.SortOrderA;
		delete[] DPM.FreqsA;
		delete[] DPM.ScoreMxB;
		delete[] DPM.TraceBack;
		}

	DPM.uLength = uLength;

	DPM.GapOpenA = new SCORE[uLength];
	DPM.GapOpenB = new SCORE[uLength];
	DPM.GapCloseA = new SCORE[uLength];
	DPM.GapCloseB = new SCORE[uLength];

	DPM.SortOrderA = new unsigned*[uLength];
	DPM.FreqsA = new FCOUNT*[uLength];
	DPM.ScoreMxB = new SCORE*[20];
	DPM.MPrev = new SCORE[uLength];
	DPM.MCurr = new SCORE[uLength];
	DPM.MWork = new SCORE[uLength];

	DPM.DPrev = new SCORE[uLength];
	DPM.DCurr = new SCORE[uLength];
	DPM.DWork = new SCORE[uLength];
	DPM.uDeletePos = new unsigned[uLength];

	DPM.TraceBack = new int*[uLength];

	for (unsigned uLetter = 0; uLetter < 20; ++uLetter)
		DPM.ScoreMxB[uLetter] = new SCORE[uLength];

	for (unsigned i = 0; i < uLength; ++i)
		{
		DPM.SortOrderA[i] = new unsigned[20];
		DPM.FreqsA[i] = new FCOUNT[20];
		DPM.TraceBack[i] = new int[uLength];
		}
	}

SCORE GlobalAlignNS(const ProfPos *PA, unsigned uLengthA, const ProfPos *PB,
  unsigned uLengthB, PWPath &Path)
	{
	const unsigned uPrefixCountA = uLengthA + 1;
	const unsigned uPrefixCountB = uLengthB + 1;

	AllocDPMem(uLengthA, uLengthB);

	SCORE *GapOpenA = DPM.GapOpenA;
	SCORE *GapOpenB = DPM.GapOpenB;
	SCORE *GapCloseA = DPM.GapCloseA;
	SCORE *GapCloseB = DPM.GapCloseB;

	unsigned **SortOrderA = DPM.SortOrderA;
	FCOUNT **FreqsA = DPM.FreqsA;
	SCORE **ScoreMxB = DPM.ScoreMxB;
	SCORE *MPrev = DPM.MPrev;
	SCORE *MCurr = DPM.MCurr;
	SCORE *MWork = DPM.MWork;

	SCORE *DPrev = DPM.DPrev;
	SCORE *DCurr = DPM.DCurr;
	SCORE *DWork = DPM.DWork;
	unsigned *uDeletePos = DPM.uDeletePos;

	int **TraceBack = DPM.TraceBack;

	for (unsigned i = 0; i < uLengthA; ++i)
		{
		GapOpenA[i] = PA[i].m_scoreGapOpen;
		GapCloseA[i] = PA[i].m_scoreGapClose;

		for (unsigned uLetter = 0; uLetter < 20; ++uLetter)
			{
			SortOrderA[i][uLetter] = PA[i].m_uSortOrder[uLetter];
			FreqsA[i][uLetter] = PA[i].m_fcCounts[uLetter];
			}
		}

	for (unsigned j = 0; j < uLengthB; ++j)
		{
		GapOpenB[j] = PB[j].m_scoreGapOpen;
		GapCloseB[j] = PB[j].m_scoreGapClose;
		}

	for (unsigned uLetter = 0; uLetter < 20; ++uLetter)
		{
		for (unsigned j = 0; j < uLengthB; ++j)
			ScoreMxB[uLetter][j] = PB[j].m_AAScores[uLetter];
		}

	for (unsigned i = 0; i < uPrefixCountA; ++i)
		memset(TraceBack[i], 0, uPrefixCountB*sizeof(int));

// Special case for i=0
	unsigned **ptrSortOrderA = SortOrderA;
	FCOUNT **ptrFreqsA = FreqsA;
	assert(ptrSortOrderA == &(SortOrderA[0]));
	assert(ptrFreqsA == &(FreqsA[0]));
	TraceBack[0][0] = 0;

	SCORE scoreSum = 0;
	unsigned *ptrSortOrderAi = SortOrderA[0];
	const unsigned *ptrSortOrderAEnd = ptrSortOrderAi + 20;
	FCOUNT *ptrFreqsAi = FreqsA[0];
	for (; ptrSortOrderAi != ptrSortOrderAEnd; ++ptrSortOrderAi)
		{
		const unsigned uLetter = *ptrSortOrderAi;
		const FCOUNT fcLetter = ptrFreqsAi[uLetter];
		if (0 == fcLetter)
			break;
		scoreSum += fcLetter*ScoreMxB[uLetter][0];
		}
	MPrev[0] = scoreSum - g_scoreCenter;

// D(0,0) is -infinity (requires I->D).
	DPrev[0] = MINUS_INFINITY;

	for (unsigned j = 1; j < uLengthB; ++j)
		{
	// Only way to get M(0, j) looks like this:
	//		A	----X
	//		B	XXXXX
	//			0   j
	// So gap-open at j=0, gap-close at j-1.
		SCORE scoreSum = 0;
		unsigned *ptrSortOrderAi = SortOrderA[0];
		const unsigned *ptrSortOrderAEnd = ptrSortOrderAi + 20;
		FCOUNT *ptrFreqsAi = FreqsA[0];
		for (; ptrSortOrderAi != ptrSortOrderAEnd; ++ptrSortOrderAi)
			{
			const unsigned uLetter = *ptrSortOrderAi;
			const FCOUNT fcLetter = ptrFreqsAi[uLetter];
			if (0 == fcLetter)
				break;
			scoreSum += fcLetter*ScoreMxB[uLetter][j];
			}
		MPrev[j] = scoreSum - g_scoreCenter + GapOpenB[0] + GapCloseB[j-1];
		TraceBack[0][j] = -(int) j;

	// Assume no D->I transitions, then can't be a delete if only
	// one letter from A.
		DPrev[j] = MINUS_INFINITY;
		}

	SCORE IPrev_j_1;
	for (unsigned i = 1; i < uLengthA; ++i)
		{
		++ptrSortOrderA;
		++ptrFreqsA;
		assert(ptrSortOrderA == &(SortOrderA[i]));
		assert(ptrFreqsA == &(FreqsA[i]));

		SCORE *ptrMCurr_j = MCurr;
		memset(ptrMCurr_j, 0, uLengthB*sizeof(SCORE));
		const FCOUNT *FreqsAi = *ptrFreqsA;

		const unsigned *SortOrderAi = *ptrSortOrderA;
		const unsigned *ptrSortOrderAiEnd = SortOrderAi + 20;
		const SCORE *ptrMCurrMax = MCurr + uLengthB;
		for (const unsigned *ptrSortOrderAi = SortOrderAi;
		  ptrSortOrderAi != ptrSortOrderAiEnd;
		  ++ptrSortOrderAi)
			{
			const unsigned uLetter = *ptrSortOrderAi;
			SCORE *NSBR_Letter = ScoreMxB[uLetter];
			const FCOUNT fcLetter = FreqsAi[uLetter];
			if (0 == fcLetter)
				break;
			SCORE *ptrNSBR = NSBR_Letter;
			for (SCORE *ptrMCurr = MCurr; ptrMCurr != ptrMCurrMax; ++ptrMCurr)
				*ptrMCurr += fcLetter*(*ptrNSBR++);
			}

		for (unsigned j = 0; j < uLengthB; ++j)
			MCurr[j] -= g_scoreCenter;

		ptrMCurr_j = MCurr;
		unsigned *ptrDeletePos = uDeletePos;

	// Special case for j=0
	// Only way to get M(i, 0) looks like this:
	//			0   i
	//		A	XXXXX
	//		B	----X
	// So gap-open at i=0, gap-close at i-1.
		assert(ptrMCurr_j == &(MCurr[0]));
		*ptrMCurr_j += GapOpenA[0] + GapCloseA[i-1];

		++ptrMCurr_j;

		int *ptrTraceBack_ij = TraceBack[i];
		*ptrTraceBack_ij++ = (int) i;

		SCORE *ptrMPrev_j = MPrev;
		SCORE *ptrDPrev = DPrev;
		SCORE d = *ptrDPrev;
		SCORE DNew = *ptrMPrev_j + GapOpenA[i];
		if (DNew > d)
			{
			d = DNew;
			*ptrDeletePos = i;
			}

		SCORE *ptrDCurr = DCurr;

		assert(ptrDCurr == &(DCurr[0]));
		*ptrDCurr = d;

	// Can't have an insert if no letters from B
		IPrev_j_1 = MINUS_INFINITY;

		unsigned uInsertPos;
		const SCORE scoreGapOpenAi = GapOpenA[i];
		const SCORE scoreGapCloseAi_1 = GapCloseA[i-1];

		for (unsigned j = 1; j < uLengthB; ++j)
			{
		// Here, MPrev_j is preserved from previous
		// iteration so with current i,j is M[i-1][j-1]
			SCORE MPrev_j = *ptrMPrev_j;
			SCORE INew = MPrev_j + GapOpenB[j];
			if (INew > IPrev_j_1)
				{
				IPrev_j_1 = INew;
				uInsertPos = j;
				}

			SCORE scoreMax = MPrev_j;

			assert(ptrDPrev == &(DPrev[j-1]));
			SCORE scoreD = *ptrDPrev++ + scoreGapCloseAi_1;
			if (scoreD > scoreMax)
				{
				scoreMax = scoreD;
				assert(ptrDeletePos == &(uDeletePos[j-1]));
				*ptrTraceBack_ij = (int) i - (int) *ptrDeletePos;
				assert(*ptrTraceBack_ij > 0);
				}
			++ptrDeletePos;

			SCORE scoreI = IPrev_j_1 + GapCloseB[j-1];
			if (scoreI > scoreMax)
				{
				scoreMax = scoreI;
				*ptrTraceBack_ij = (int) uInsertPos - (int) j;
				assert(*ptrTraceBack_ij < 0);
				}

			assert(ptrSortOrderA == &(SortOrderA[i]));
			assert(ptrFreqsA == &(FreqsA[i]));

			*ptrMCurr_j += scoreMax;
			assert(ptrMCurr_j == &(MCurr[j]));
			++ptrMCurr_j;

			MPrev_j = *(++ptrMPrev_j);
			assert(ptrDPrev == &(DPrev[j]));
			SCORE d = *ptrDPrev;
			SCORE DNew = MPrev_j + scoreGapOpenAi;
			if (DNew > d)
				{
				d = DNew;
				assert(ptrDeletePos == &uDeletePos[j]);
				*ptrDeletePos = i;
				}
			assert(ptrDCurr + 1 == &(DCurr[j]));
			*(++ptrDCurr) = d;

			++ptrTraceBack_ij;
			}

		Rotate(MPrev, MCurr, MWork);
		Rotate(DPrev, DCurr, DWork);
		}

// Special case for i=uLengthA
	SCORE IPrev = MINUS_INFINITY;

	unsigned uInsertPos;

	for (unsigned j = 1; j < uLengthB; ++j)
		{
		SCORE INew = MPrev[j-1] + GapOpenB[j];
		if (INew > IPrev)
			{
			uInsertPos = j;
			IPrev = INew;
			}
		}

// Special case for i=uLengthA, j=uLengthB
	SCORE scoreMax = MPrev[uLengthB-1];
	int iTraceBack = 0;

	SCORE scoreD = DPrev[uLengthB-1] + GapCloseA[uLengthA-1];
	if (scoreD > scoreMax)
		{
		scoreMax = scoreD;
		iTraceBack = (int) uLengthA - (int) uDeletePos[uLengthB-1];
		}

	SCORE scoreI = IPrev + GapCloseB[uLengthB-1];
	if (scoreI > scoreMax)
		{
		scoreMax = scoreI;
		iTraceBack = (int) uInsertPos - (int) uLengthB;
		}

	TraceBack[uLengthA][uLengthB] = iTraceBack;

	TraceBackToPath(TraceBack, uLengthA, uLengthB, Path);

	return scoreMax;
	}
@@ -0,0 +1,368 @@
#include "muscle.h"
#include <math.h>
#include "pwpath.h"
#include "profile.h"
#include <stdio.h>

#define	TRACE	0

#if	1 // SINGLE_AFFINE

extern bool g_bKeepSimpleDP;
extern SCORE *g_DPM;
extern SCORE *g_DPD;
extern SCORE *g_DPI;
extern char *g_TBM;
extern char *g_TBD;
extern char *g_TBI;

static const char *LocalScoreToStr(SCORE s)
	{
	static char str[16];
	if (s < -100000)
		return "     *";
	sprintf(str, "%6.1f", s);
	return str;
	}

static void ListTB(const char *TBM_, const ProfPos *PA, const ProfPos *PB,
  unsigned uPrefixCountA, unsigned uPrefixCountB)
	{
	Log("        ");
	for (unsigned uPrefixLengthB = 0; uPrefixLengthB < uPrefixCountB; ++uPrefixLengthB)
		{
		char c = ' ';
		if (uPrefixLengthB > 0)
			c = ConsensusChar(PB[uPrefixLengthB - 1]);
		Log(" %4u:%c", uPrefixLengthB, c);
		}
	Log("\n");
	for (unsigned uPrefixLengthA = 0; uPrefixLengthA < uPrefixCountA; ++uPrefixLengthA)
		{
		char c = ' ';
		if (uPrefixLengthA > 0)
			c = ConsensusChar(PA[uPrefixLengthA - 1]);
		Log("%4u:%c  ", uPrefixLengthA, c);
		for (unsigned uPrefixLengthB = 0; uPrefixLengthB < uPrefixCountB; ++uPrefixLengthB)
			Log(" %6c", TBM(uPrefixLengthA, uPrefixLengthB));
		Log("\n");
		}
	}

static void ListDP(const SCORE *DPM_, const ProfPos *PA, const ProfPos *PB,
  unsigned uPrefixCountA, unsigned uPrefixCountB)
	{
	Log("        ");
	for (unsigned uPrefixLengthB = 0; uPrefixLengthB < uPrefixCountB; ++uPrefixLengthB)
		{
		char c = ' ';
		if (uPrefixLengthB > 0)
			c = ConsensusChar(PB[uPrefixLengthB - 1]);
		Log(" %4u:%c", uPrefixLengthB, c);
		}
	Log("\n");
	for (unsigned uPrefixLengthA = 0; uPrefixLengthA < uPrefixCountA; ++uPrefixLengthA)
		{
		char c = ' ';
		if (uPrefixLengthA > 0)
			c = ConsensusChar(PA[uPrefixLengthA - 1]);
		Log("%4u:%c  ", uPrefixLengthA, c);
		for (unsigned uPrefixLengthB = 0; uPrefixLengthB < uPrefixCountB; ++uPrefixLengthB)
			Log(" %s", LocalScoreToStr(DPM(uPrefixLengthA, uPrefixLengthB)));
		Log("\n");
		}
	}

SCORE GlobalAlignSimple(const ProfPos *PA, unsigned uLengthA, const ProfPos *PB,
  unsigned uLengthB, PWPath &Path)
	{
	assert(uLengthB > 0 && uLengthA > 0);

	SetTermGaps(PA, uLengthA);
	SetTermGaps(PB, uLengthB);

	const unsigned uPrefixCountA = uLengthA + 1;
	const unsigned uPrefixCountB = uLengthB + 1;

// Allocate DP matrices
	const size_t LM = uPrefixCountA*uPrefixCountB;
	SCORE *DPL_ = new SCORE[LM];
	SCORE *DPM_ = new SCORE[LM];
	SCORE *DPD_ = new SCORE[LM];
	SCORE *DPI_ = new SCORE[LM];

	char *TBM_ = new char[LM];
	char *TBD_ = new char[LM];
	char *TBI_ = new char[LM];

	memset(TBM_, '?', LM);
	memset(TBD_, '?', LM);
	memset(TBI_, '?', LM);

	DPM(0, 0) = 0;
	DPD(0, 0) = MINUS_INFINITY;
	DPI(0, 0) = MINUS_INFINITY;

	DPM(1, 0) = MINUS_INFINITY;
	DPD(1, 0) = PA[0].m_scoreGapOpen;
	TBD(1, 0) = 'D';
	DPI(1, 0) = MINUS_INFINITY;

	DPM(0, 1) = MINUS_INFINITY;
	DPD(0, 1) = MINUS_INFINITY;
	DPI(0, 1) = PB[0].m_scoreGapOpen;
	TBI(0, 1) = 'I';

// Empty prefix of B is special case
	for (unsigned uPrefixLengthA = 2; uPrefixLengthA < uPrefixCountA; ++uPrefixLengthA)
		{
	// M=LetterA+LetterB, impossible with empty prefix
		DPM(uPrefixLengthA, 0) = MINUS_INFINITY;

	// D=LetterA+GapB
		DPD(uPrefixLengthA, 0) = DPD(uPrefixLengthA - 1, 0) + g_scoreGapExtend;
		TBD(uPrefixLengthA, 0) = 'D';

	// I=GapA+LetterB, impossible with empty prefix
		DPI(uPrefixLengthA, 0) = MINUS_INFINITY;
		}

// Empty prefix of A is special case
	for (unsigned uPrefixLengthB = 2; uPrefixLengthB < uPrefixCountB; ++uPrefixLengthB)
		{
	// M=LetterA+LetterB, impossible with empty prefix
		DPM(0, uPrefixLengthB) = MINUS_INFINITY;

	// D=LetterA+GapB, impossible with empty prefix
		DPD(0, uPrefixLengthB) = MINUS_INFINITY;

	// I=GapA+LetterB
		DPI(0, uPrefixLengthB) = DPI(0, uPrefixLengthB - 1) + g_scoreGapExtend;
		TBI(0, uPrefixLengthB) = 'I';
		}

// Special case to agree with NWFast, no D-I transitions so...
	DPD(uLengthA, 0) = MINUS_INFINITY;
//	DPI(0, uLengthB) = MINUS_INFINITY;

// ============
// Main DP loop
// ============
	SCORE scoreGapCloseB = MINUS_INFINITY;
	for (unsigned uPrefixLengthB = 1; uPrefixLengthB < uPrefixCountB; ++uPrefixLengthB)
		{
		const ProfPos &PPB = PB[uPrefixLengthB - 1];

		SCORE scoreGapCloseA = MINUS_INFINITY;
		for (unsigned uPrefixLengthA = 1; uPrefixLengthA < uPrefixCountA; ++uPrefixLengthA)
			{
			const ProfPos &PPA = PA[uPrefixLengthA - 1];

			{
		// Match M=LetterA+LetterB
			SCORE scoreLL = ScoreProfPos2(PPA, PPB);
			DPL(uPrefixLengthA, uPrefixLengthB) = scoreLL;

			SCORE scoreMM = DPM(uPrefixLengthA-1, uPrefixLengthB-1);
			SCORE scoreDM = DPD(uPrefixLengthA-1, uPrefixLengthB-1) + scoreGapCloseA;
			SCORE scoreIM = DPI(uPrefixLengthA-1, uPrefixLengthB-1) + scoreGapCloseB;

			SCORE scoreBest;
			if (scoreMM >= scoreDM && scoreMM >= scoreIM)
				{
				scoreBest = scoreMM;
				TBM(uPrefixLengthA, uPrefixLengthB) = 'M';
				}
			else if (scoreDM >= scoreMM && scoreDM >= scoreIM)
				{
				scoreBest = scoreDM;
				TBM(uPrefixLengthA, uPrefixLengthB) = 'D';
				}
			else 
				{
				assert(scoreIM >= scoreMM && scoreIM >= scoreDM);
				scoreBest = scoreIM;
				TBM(uPrefixLengthA, uPrefixLengthB) = 'I';
				}
			DPM(uPrefixLengthA, uPrefixLengthB) = scoreBest + scoreLL;
			}

			{
		// Delete D=LetterA+GapB
			SCORE scoreMD = DPM(uPrefixLengthA-1, uPrefixLengthB) +
			  PA[uPrefixLengthA-1].m_scoreGapOpen;
			SCORE scoreDD = DPD(uPrefixLengthA-1, uPrefixLengthB) + g_scoreGapExtend;

			SCORE scoreBest;
			if (scoreMD >= scoreDD)
				{
				scoreBest = scoreMD;
				TBD(uPrefixLengthA, uPrefixLengthB) = 'M';
				}
			else
				{
				assert(scoreDD >= scoreMD);
				scoreBest = scoreDD;
				TBD(uPrefixLengthA, uPrefixLengthB) = 'D';
				}
			DPD(uPrefixLengthA, uPrefixLengthB) = scoreBest;
			}

		// Insert I=GapA+LetterB
			{
			SCORE scoreMI = DPM(uPrefixLengthA, uPrefixLengthB-1) +
			  PB[uPrefixLengthB - 1].m_scoreGapOpen;
			SCORE scoreII = DPI(uPrefixLengthA, uPrefixLengthB-1) + g_scoreGapExtend;

			SCORE scoreBest;
			if (scoreMI >= scoreII)
				{
				scoreBest = scoreMI;
				TBI(uPrefixLengthA, uPrefixLengthB) = 'M';
				}
			else 
				{
				assert(scoreII > scoreMI);
				scoreBest = scoreII;
				TBI(uPrefixLengthA, uPrefixLengthB) = 'I';
				}
			DPI(uPrefixLengthA, uPrefixLengthB) = scoreBest;
			}

			scoreGapCloseA = PPA.m_scoreGapClose;
			}
		scoreGapCloseB = PPB.m_scoreGapClose;
		}

#if TRACE
	Log("\n");
	Log("Simple DPL:\n");
	ListDP(DPL_, PA, PB, uPrefixCountA, uPrefixCountB);
	Log("\n");
	Log("Simple DPM:\n");
	ListDP(DPM_, PA, PB, uPrefixCountA, uPrefixCountB);
	Log("\n");
	Log("Simple DPD:\n");
	ListDP(DPD_, PA, PB, uPrefixCountA, uPrefixCountB);
	Log("\n");
	Log("Simple DPI:\n");
	ListDP(DPI_, PA, PB, uPrefixCountA, uPrefixCountB);
	Log("\n");
	Log("Simple TBM:\n");
	ListTB(TBM_, PA, PB, uPrefixCountA, uPrefixCountB);
	Log("\n");
	Log("Simple TBD:\n");
	ListTB(TBD_, PA, PB, uPrefixCountA, uPrefixCountB);
	Log("\n");
	Log("Simple TBI:\n");
	ListTB(TBI_, PA, PB, uPrefixCountA, uPrefixCountB);
#endif

// Trace-back
// ==========
	Path.Clear();

// Find last edge
	SCORE M = DPM(uLengthA, uLengthB);
	SCORE D = DPD(uLengthA, uLengthB) + PA[uLengthA-1].m_scoreGapClose;
	SCORE I = DPI(uLengthA, uLengthB) + PB[uLengthB-1].m_scoreGapClose;
	char cEdgeType = '?';

	SCORE BestScore = MINUS_INFINITY;
	if (M >= D && M >= I)
		{
		cEdgeType = 'M';
		BestScore = M;
		}
	else if (D >= M && D >= I)
		{
		cEdgeType = 'D';
		BestScore = D;
		}
	else 
		{
		assert(I >= M && I >= D);
		cEdgeType = 'I';
		BestScore = I;
		}

#if	TRACE
	Log("Simple: MAB=%.4g DAB=%.4g IAB=%.4g best=%c\n", M, D, I, cEdgeType);
#endif

	unsigned PLA = uLengthA;
	unsigned PLB = uLengthB;
	for (;;)
		{
		PWEdge Edge;
		Edge.cType = cEdgeType;
		Edge.uPrefixLengthA = PLA;
		Edge.uPrefixLengthB = PLB;
#if	TRACE
		Log("Prepend %c%d.%d\n", Edge.cType, PLA, PLB);
#endif
		Path.PrependEdge(Edge);

		switch (cEdgeType)
			{
		case 'M':
			assert(PLA > 0);
			assert(PLB > 0);
			cEdgeType = TBM(PLA, PLB);
			--PLA;
			--PLB;
			break;

		case 'D':
			assert(PLA > 0);
			cEdgeType = TBD(PLA, PLB);
			--PLA;
			break;

		case 'I':
			assert(PLB > 0);
			cEdgeType = TBI(PLA, PLB);
			--PLB;
			break;
		
		default:
			Quit("Invalid edge %c", cEdgeType);
			}
		if (0 == PLA && 0 == PLB)
			break;
		}
	Path.Validate();

//	SCORE Score = TraceBack(PA, uLengthA, PB, uLengthB, DPM_, DPD_, DPI_, Path);

#if	TRACE
	SCORE scorePath = FastScorePath2(PA, uLengthA, PB, uLengthB, Path);
	Path.LogMe();
	Log("Score = %s Path = %s\n", LocalScoreToStr(BestScore), LocalScoreToStr(scorePath));
#endif

	if (g_bKeepSimpleDP)
		{
		g_DPM = DPM_;
		g_DPD = DPD_;
		g_DPI = DPI_;

		g_TBM = TBM_;
		g_TBD = TBD_;
		g_TBI = TBI_;
		}
	else
		{
		delete[] DPM_;
		delete[] DPD_;
		delete[] DPI_;

		delete[] TBM_;
		delete[] TBD_;
		delete[] TBI_;
		}

	return BestScore;
	}

#endif // SINLGLE_AFFINE
@@ -0,0 +1,374 @@
#include "muscle.h"
#include "profile.h"
#include "pwpath.h"

struct DP_MEMORY
	{
	unsigned uLength;
	SCORE *GapOpenA;
	SCORE *GapOpenB;
	SCORE *GapCloseA;
	SCORE *GapCloseB;
	SCORE *MPrev;
	SCORE *MCurr;
	SCORE *MWork;
	SCORE *DPrev;
	SCORE *DCurr;
	SCORE *DWork;
	SCORE **ScoreMxB;
	unsigned **SortOrderA;
	unsigned *uDeletePos;
	FCOUNT **FreqsA;
	int **TraceBack;
	};

static struct DP_MEMORY DPM;

static void AllocDPMem(unsigned uLengthA, unsigned uLengthB)
	{
// Max prefix length
	unsigned uLength = (uLengthA > uLengthB ? uLengthA : uLengthB) + 1;
	if (uLength < DPM.uLength)
		return;

// Add 256 to allow for future expansion and
// round up to next multiple of 32.
	uLength += 256;
	uLength += 32 - uLength%32;

	const unsigned uOldLength = DPM.uLength;
	if (uOldLength > 0)
		{
		for (unsigned i = 0; i < uOldLength; ++i)
			{
			delete[] DPM.TraceBack[i];
			delete[] DPM.FreqsA[i];
			delete[] DPM.SortOrderA[i];
			}
		for (unsigned n = 0; n < 20; ++n)
			delete[] DPM.ScoreMxB[n];

		delete[] DPM.MPrev;
		delete[] DPM.MCurr;
		delete[] DPM.MWork;
		delete[] DPM.DPrev;
		delete[] DPM.DCurr;
		delete[] DPM.DWork;
		delete[] DPM.uDeletePos;
		delete[] DPM.GapOpenA;
		delete[] DPM.GapOpenB;
		delete[] DPM.GapCloseA;
		delete[] DPM.GapCloseB;
		delete[] DPM.SortOrderA;
		delete[] DPM.FreqsA;
		delete[] DPM.ScoreMxB;
		delete[] DPM.TraceBack;
		}

	DPM.uLength = uLength;

	DPM.GapOpenA = new SCORE[uLength];
	DPM.GapOpenB = new SCORE[uLength];
	DPM.GapCloseA = new SCORE[uLength];
	DPM.GapCloseB = new SCORE[uLength];

	DPM.SortOrderA = new unsigned*[uLength];
	DPM.FreqsA = new FCOUNT*[uLength];
	DPM.ScoreMxB = new SCORE*[20];
	DPM.MPrev = new SCORE[uLength];
	DPM.MCurr = new SCORE[uLength];
	DPM.MWork = new SCORE[uLength];

	DPM.DPrev = new SCORE[uLength];
	DPM.DCurr = new SCORE[uLength];
	DPM.DWork = new SCORE[uLength];
	DPM.uDeletePos = new unsigned[uLength];

	DPM.TraceBack = new int*[uLength];

	for (unsigned uLetter = 0; uLetter < 20; ++uLetter)
		DPM.ScoreMxB[uLetter] = new SCORE[uLength];

	for (unsigned i = 0; i < uLength; ++i)
		{
		DPM.SortOrderA[i] = new unsigned[20];
		DPM.FreqsA[i] = new FCOUNT[20];
		DPM.TraceBack[i] = new int[uLength];
		}
	}

SCORE GlobalAlignSP(const ProfPos *PA, unsigned uLengthA, const ProfPos *PB,
  unsigned uLengthB, PWPath &Path)
	{
	const unsigned uPrefixCountA = uLengthA + 1;
	const unsigned uPrefixCountB = uLengthB + 1;

	AllocDPMem(uLengthA, uLengthB);

	SCORE *GapOpenA = DPM.GapOpenA;
	SCORE *GapOpenB = DPM.GapOpenB;
	SCORE *GapCloseA = DPM.GapCloseA;
	SCORE *GapCloseB = DPM.GapCloseB;

	unsigned **SortOrderA = DPM.SortOrderA;
	FCOUNT **FreqsA = DPM.FreqsA;
	SCORE **ScoreMxB = DPM.ScoreMxB;
	SCORE *MPrev = DPM.MPrev;
	SCORE *MCurr = DPM.MCurr;
	SCORE *MWork = DPM.MWork;

	SCORE *DPrev = DPM.DPrev;
	SCORE *DCurr = DPM.DCurr;
	SCORE *DWork = DPM.DWork;
	unsigned *uDeletePos = DPM.uDeletePos;

	int **TraceBack = DPM.TraceBack;

	for (unsigned i = 0; i < uLengthA; ++i)
		{
		GapOpenA[i] = PA[i].m_scoreGapOpen;
		GapCloseA[i] = PA[i].m_scoreGapClose;

		for (unsigned uLetter = 0; uLetter < 20; ++uLetter)
			{
			SortOrderA[i][uLetter] = PA[i].m_uSortOrder[uLetter];
			FreqsA[i][uLetter] = PA[i].m_fcCounts[uLetter];
			}
		}

	for (unsigned j = 0; j < uLengthB; ++j)
		{
		GapOpenB[j] = PB[j].m_scoreGapOpen;
		GapCloseB[j] = PB[j].m_scoreGapClose;
		}

	for (unsigned uLetter = 0; uLetter < 20; ++uLetter)
		{
		for (unsigned j = 0; j < uLengthB; ++j)
			ScoreMxB[uLetter][j] = PB[j].m_AAScores[uLetter];
		}

	for (unsigned i = 0; i < uPrefixCountA; ++i)
		memset(TraceBack[i], 0, uPrefixCountB*sizeof(int));

// Special case for i=0
	unsigned **ptrSortOrderA = SortOrderA;
	FCOUNT **ptrFreqsA = FreqsA;
	assert(ptrSortOrderA == &(SortOrderA[0]));
	assert(ptrFreqsA == &(FreqsA[0]));
	TraceBack[0][0] = 0;

	SCORE scoreSum = 0;
	unsigned *ptrSortOrderAi = SortOrderA[0];
	const unsigned *ptrSortOrderAEnd = ptrSortOrderAi + 20;
	FCOUNT *ptrFreqsAi = FreqsA[0];
	for (; ptrSortOrderAi != ptrSortOrderAEnd; ++ptrSortOrderAi)
		{
		const unsigned uLetter = *ptrSortOrderAi;
		const FCOUNT fcLetter = ptrFreqsAi[uLetter];
		if (0 == fcLetter)
			break;
		scoreSum += fcLetter*ScoreMxB[uLetter][0];
		}
	MPrev[0] = scoreSum - g_scoreCenter;

// D(0,0) is -infinity (requires I->D).
	DPrev[0] = MINUS_INFINITY;

	for (unsigned j = 1; j < uLengthB; ++j)
		{
	// Only way to get M(0, j) looks like this:
	//		A	----X
	//		B	XXXXX
	//			0   j
	// So gap-open at j=0, gap-close at j-1.
		SCORE scoreSum = 0;
		unsigned *ptrSortOrderAi = SortOrderA[0];
		const unsigned *ptrSortOrderAEnd = ptrSortOrderAi + 20;
		FCOUNT *ptrFreqsAi = FreqsA[0];
		for (; ptrSortOrderAi != ptrSortOrderAEnd; ++ptrSortOrderAi)
			{
			const unsigned uLetter = *ptrSortOrderAi;
			const FCOUNT fcLetter = ptrFreqsAi[uLetter];
			if (0 == fcLetter)
				break;
			scoreSum += fcLetter*ScoreMxB[uLetter][j];
			}
		MPrev[j] = scoreSum - g_scoreCenter + GapOpenB[0] + GapCloseB[j-1];
		TraceBack[0][j] = -(int) j;

	// Assume no D->I transitions, then can't be a delete if only
	// one letter from A.
		DPrev[j] = MINUS_INFINITY;
		}

	SCORE IPrev_j_1;
	for (unsigned i = 1; i < uLengthA; ++i)
		{
		++ptrSortOrderA;
		++ptrFreqsA;
		assert(ptrSortOrderA == &(SortOrderA[i]));
		assert(ptrFreqsA == &(FreqsA[i]));

		SCORE *ptrMCurr_j = MCurr;
		memset(ptrMCurr_j, 0, uLengthB*sizeof(SCORE));
		const FCOUNT *FreqsAi = *ptrFreqsA;

		const unsigned *SortOrderAi = *ptrSortOrderA;
		const unsigned *ptrSortOrderAiEnd = SortOrderAi + 20;
		const SCORE *ptrMCurrMax = MCurr + uLengthB;
		for (const unsigned *ptrSortOrderAi = SortOrderAi;
		  ptrSortOrderAi != ptrSortOrderAiEnd;
		  ++ptrSortOrderAi)
			{
			const unsigned uLetter = *ptrSortOrderAi;
			SCORE *NSBR_Letter = ScoreMxB[uLetter];
			const FCOUNT fcLetter = FreqsAi[uLetter];
			if (0 == fcLetter)
				break;
			SCORE *ptrNSBR = NSBR_Letter;
			for (SCORE *ptrMCurr = MCurr; ptrMCurr != ptrMCurrMax; ++ptrMCurr)
				*ptrMCurr += fcLetter*(*ptrNSBR++);
			}

		for (unsigned j = 0; j < uLengthB; ++j)
			MCurr[j] -= g_scoreCenter;

		ptrMCurr_j = MCurr;
		unsigned *ptrDeletePos = uDeletePos;

	// Special case for j=0
	// Only way to get M(i, 0) looks like this:
	//			0   i
	//		A	XXXXX
	//		B	----X
	// So gap-open at i=0, gap-close at i-1.
		assert(ptrMCurr_j == &(MCurr[0]));
		*ptrMCurr_j += GapOpenA[0] + GapCloseA[i-1];

		++ptrMCurr_j;

		int *ptrTraceBack_ij = TraceBack[i];
		*ptrTraceBack_ij++ = (int) i;

		SCORE *ptrMPrev_j = MPrev;
		SCORE *ptrDPrev = DPrev;
		SCORE d = *ptrDPrev;
		SCORE DNew = *ptrMPrev_j + GapOpenA[i];
		if (DNew > d)
			{
			d = DNew;
			*ptrDeletePos = i;
			}

		SCORE *ptrDCurr = DCurr;

		assert(ptrDCurr == &(DCurr[0]));
		*ptrDCurr = d;

	// Can't have an insert if no letters from B
		IPrev_j_1 = MINUS_INFINITY;

		unsigned uInsertPos;
		const SCORE scoreGapOpenAi = GapOpenA[i];
		const SCORE scoreGapCloseAi_1 = GapCloseA[i-1];

		for (unsigned j = 1; j < uLengthB; ++j)
			{
		// Here, MPrev_j is preserved from previous
		// iteration so with current i,j is M[i-1][j-1]
			SCORE MPrev_j = *ptrMPrev_j;
			SCORE INew = MPrev_j + GapOpenB[j];
			if (INew > IPrev_j_1)
				{
				IPrev_j_1 = INew;
				uInsertPos = j;
				}

			SCORE scoreMax = MPrev_j;

			assert(ptrDPrev == &(DPrev[j-1]));
			SCORE scoreD = *ptrDPrev++ + scoreGapCloseAi_1;
			if (scoreD > scoreMax)
				{
				scoreMax = scoreD;
				assert(ptrDeletePos == &(uDeletePos[j-1]));
				*ptrTraceBack_ij = (int) i - (int) *ptrDeletePos;
				assert(*ptrTraceBack_ij > 0);
				}
			++ptrDeletePos;

			SCORE scoreI = IPrev_j_1 + GapCloseB[j-1];
			if (scoreI > scoreMax)
				{
				scoreMax = scoreI;
				*ptrTraceBack_ij = (int) uInsertPos - (int) j;
				assert(*ptrTraceBack_ij < 0);
				}

			assert(ptrSortOrderA == &(SortOrderA[i]));
			assert(ptrFreqsA == &(FreqsA[i]));

			*ptrMCurr_j += scoreMax;
			assert(ptrMCurr_j == &(MCurr[j]));
			++ptrMCurr_j;

			MPrev_j = *(++ptrMPrev_j);
			assert(ptrDPrev == &(DPrev[j]));
			SCORE d = *ptrDPrev;
			SCORE DNew = MPrev_j + scoreGapOpenAi;
			if (DNew > d)
				{
				d = DNew;
				assert(ptrDeletePos == &uDeletePos[j]);
				*ptrDeletePos = i;
				}
			assert(ptrDCurr + 1 == &(DCurr[j]));
			*(++ptrDCurr) = d;

			++ptrTraceBack_ij;
			}

		Rotate(MPrev, MCurr, MWork);
		Rotate(DPrev, DCurr, DWork);
		}

// Special case for i=uLengthA
	SCORE IPrev = MINUS_INFINITY;

	unsigned uInsertPos;

	for (unsigned j = 1; j < uLengthB; ++j)
		{
		SCORE INew = MPrev[j-1] + GapOpenB[j];
		if (INew > IPrev)
			{
			uInsertPos = j;
			IPrev = INew;
			}
		}

// Special case for i=uLengthA, j=uLengthB
	SCORE scoreMax = MPrev[uLengthB-1];
	int iTraceBack = 0;

	SCORE scoreD = DPrev[uLengthB-1] + GapCloseA[uLengthA-1];
	if (scoreD > scoreMax)
		{
		scoreMax = scoreD;
		iTraceBack = (int) uLengthA - (int) uDeletePos[uLengthB-1];
		}

	SCORE scoreI = IPrev + GapCloseB[uLengthB-1];
	if (scoreI > scoreMax)
		{
		scoreMax = scoreI;
		iTraceBack = (int) uInsertPos - (int) uLengthB;
		}

	TraceBack[uLengthA][uLengthB] = iTraceBack;

	TraceBackToPath(TraceBack, uLengthA, uLengthB, Path);

	return scoreMax;
	}
@@ -0,0 +1,409 @@
#include "muscle.h"
#include "profile.h"
#include "pwpath.h"

struct DP_MEMORY
	{
	unsigned uLength;
	SCORE *GapOpenA;
	SCORE *GapOpenB;
	SCORE *GapCloseA;
	SCORE *GapCloseB;
	SCORE *MPrev;
	SCORE *MCurr;
	SCORE *MWork;
	SCORE *DPrev;
	SCORE *DCurr;
	SCORE *DWork;
	SCORE **ScoreMxB;
	unsigned **SortOrderA;
	unsigned *uDeletePos;
	FCOUNT **FreqsA;
	int **TraceBack;
	};

static struct DP_MEMORY DPM;

void FreeDPMemSPN()
	{
	const unsigned uOldLength = DPM.uLength;
	if (0 == uOldLength)
		return;

	for (unsigned i = 0; i < uOldLength; ++i)
		{
		delete[] DPM.TraceBack[i];
		delete[] DPM.FreqsA[i];
		delete[] DPM.SortOrderA[i];
		}
	for (unsigned n = 0; n < 4; ++n)
		delete[] DPM.ScoreMxB[n];

	delete[] DPM.MPrev;
	delete[] DPM.MCurr;
	delete[] DPM.MWork;
	delete[] DPM.DPrev;
	delete[] DPM.DCurr;
	delete[] DPM.DWork;
	delete[] DPM.uDeletePos;
	delete[] DPM.GapOpenA;
	delete[] DPM.GapOpenB;
	delete[] DPM.GapCloseA;
	delete[] DPM.GapCloseB;
	delete[] DPM.SortOrderA;
	delete[] DPM.FreqsA;
	delete[] DPM.ScoreMxB;
	delete[] DPM.TraceBack;
	}

static void AllocDPMem(unsigned uLengthA, unsigned uLengthB)
	{
// Max prefix length
	unsigned uLength = (uLengthA > uLengthB ? uLengthA : uLengthB) + 1;
	if (uLength < DPM.uLength)
		return;

// Add 256 to allow for future expansion and
// round up to next multiple of 32.
	uLength += 256;
	uLength += 32 - uLength%32;

	const unsigned uOldLength = DPM.uLength;
	if (uOldLength > 0)
		{
		for (unsigned i = 0; i < uOldLength; ++i)
			{
			delete[] DPM.TraceBack[i];
			delete[] DPM.FreqsA[i];
			delete[] DPM.SortOrderA[i];
			}
		for (unsigned n = 0; n < 4; ++n)
			delete[] DPM.ScoreMxB[n];

		delete[] DPM.MPrev;
		delete[] DPM.MCurr;
		delete[] DPM.MWork;
		delete[] DPM.DPrev;
		delete[] DPM.DCurr;
		delete[] DPM.DWork;
		delete[] DPM.uDeletePos;
		delete[] DPM.GapOpenA;
		delete[] DPM.GapOpenB;
		delete[] DPM.GapCloseA;
		delete[] DPM.GapCloseB;
		delete[] DPM.SortOrderA;
		delete[] DPM.FreqsA;
		delete[] DPM.ScoreMxB;
		delete[] DPM.TraceBack;
		}

	DPM.uLength = uLength;

	DPM.GapOpenA = new SCORE[uLength];
	DPM.GapOpenB = new SCORE[uLength];
	DPM.GapCloseA = new SCORE[uLength];
	DPM.GapCloseB = new SCORE[uLength];

	DPM.SortOrderA = new unsigned*[uLength];
	DPM.FreqsA = new FCOUNT*[uLength];
	DPM.ScoreMxB = new SCORE*[4];
	DPM.MPrev = new SCORE[uLength];
	DPM.MCurr = new SCORE[uLength];
	DPM.MWork = new SCORE[uLength];

	DPM.DPrev = new SCORE[uLength];
	DPM.DCurr = new SCORE[uLength];
	DPM.DWork = new SCORE[uLength];
	DPM.uDeletePos = new unsigned[uLength];

	DPM.TraceBack = new int*[uLength];

	for (unsigned uLetter = 0; uLetter < 4; ++uLetter)
		DPM.ScoreMxB[uLetter] = new SCORE[uLength];

	for (unsigned i = 0; i < uLength; ++i)
		{
		DPM.SortOrderA[i] = new unsigned[4];
		DPM.FreqsA[i] = new FCOUNT[4];
		DPM.TraceBack[i] = new int[uLength];
		}
	}

SCORE GlobalAlignSPN(const ProfPos *PA, unsigned uLengthA, const ProfPos *PB,
  unsigned uLengthB, PWPath &Path)
	{
	if (ALPHA_DNA != g_Alpha || ALPHA_RNA == g_Alpha)
		Quit("GlobalAlignSPN: must be nucleo");

	const unsigned uPrefixCountA = uLengthA + 1;
	const unsigned uPrefixCountB = uLengthB + 1;

	AllocDPMem(uLengthA, uLengthB);

	SCORE *GapOpenA = DPM.GapOpenA;
	SCORE *GapOpenB = DPM.GapOpenB;
	SCORE *GapCloseA = DPM.GapCloseA;
	SCORE *GapCloseB = DPM.GapCloseB;

	unsigned **SortOrderA = DPM.SortOrderA;
	FCOUNT **FreqsA = DPM.FreqsA;
	SCORE **ScoreMxB = DPM.ScoreMxB;
	SCORE *MPrev = DPM.MPrev;
	SCORE *MCurr = DPM.MCurr;
	SCORE *MWork = DPM.MWork;

	SCORE *DPrev = DPM.DPrev;
	SCORE *DCurr = DPM.DCurr;
	SCORE *DWork = DPM.DWork;
	unsigned *uDeletePos = DPM.uDeletePos;

	int **TraceBack = DPM.TraceBack;

	for (unsigned i = 0; i < uLengthA; ++i)
		{
		GapOpenA[i] = PA[i].m_scoreGapOpen;
		GapCloseA[i] = PA[i].m_scoreGapClose;

		for (unsigned uLetter = 0; uLetter < 4; ++uLetter)
			{
			SortOrderA[i][uLetter] = PA[i].m_uSortOrder[uLetter];
			FreqsA[i][uLetter] = PA[i].m_fcCounts[uLetter];
			}
		}

	for (unsigned j = 0; j < uLengthB; ++j)
		{
		GapOpenB[j] = PB[j].m_scoreGapOpen;
		GapCloseB[j] = PB[j].m_scoreGapClose;
		}

	for (unsigned uLetter = 0; uLetter < 4; ++uLetter)
		{
		for (unsigned j = 0; j < uLengthB; ++j)
			ScoreMxB[uLetter][j] = PB[j].m_AAScores[uLetter];
		}

	for (unsigned i = 0; i < uPrefixCountA; ++i)
		memset(TraceBack[i], 0, uPrefixCountB*sizeof(int));

// Special case for i=0
	unsigned **ptrSortOrderA = SortOrderA;
	FCOUNT **ptrFreqsA = FreqsA;
	assert(ptrSortOrderA == &(SortOrderA[0]));
	assert(ptrFreqsA == &(FreqsA[0]));
	TraceBack[0][0] = 0;

	SCORE scoreSum = 0;
	unsigned *ptrSortOrderAi = SortOrderA[0];
	const unsigned *ptrSortOrderAEnd = ptrSortOrderAi + 4;
	FCOUNT *ptrFreqsAi = FreqsA[0];
	for (; ptrSortOrderAi != ptrSortOrderAEnd; ++ptrSortOrderAi)
		{
		const unsigned uLetter = *ptrSortOrderAi;
		const FCOUNT fcLetter = ptrFreqsAi[uLetter];
		if (0 == fcLetter)
			break;
		scoreSum += fcLetter*ScoreMxB[uLetter][0];
		}
	MPrev[0] = scoreSum - g_scoreCenter;

// D(0,0) is -infinity (requires I->D).
	DPrev[0] = MINUS_INFINITY;

	for (unsigned j = 1; j < uLengthB; ++j)
		{
	// Only way to get M(0, j) looks like this:
	//		A	----X
	//		B	XXXXX
	//			0   j
	// So gap-open at j=0, gap-close at j-1.
		SCORE scoreSum = 0;
		unsigned *ptrSortOrderAi = SortOrderA[0];
		const unsigned *ptrSortOrderAEnd = ptrSortOrderAi + 4;
		FCOUNT *ptrFreqsAi = FreqsA[0];
		for (; ptrSortOrderAi != ptrSortOrderAEnd; ++ptrSortOrderAi)
			{
			const unsigned uLetter = *ptrSortOrderAi;
			const FCOUNT fcLetter = ptrFreqsAi[uLetter];
			if (0 == fcLetter)
				break;
			scoreSum += fcLetter*ScoreMxB[uLetter][j];
			}
		MPrev[j] = scoreSum - g_scoreCenter + GapOpenB[0] + GapCloseB[j-1];
		TraceBack[0][j] = -(int) j;

	// Assume no D->I transitions, then can't be a delete if only
	// one letter from A.
		DPrev[j] = MINUS_INFINITY;
		}

	SCORE IPrev_j_1;
	for (unsigned i = 1; i < uLengthA; ++i)
		{
		++ptrSortOrderA;
		++ptrFreqsA;
		assert(ptrSortOrderA == &(SortOrderA[i]));
		assert(ptrFreqsA == &(FreqsA[i]));

		SCORE *ptrMCurr_j = MCurr;
		memset(ptrMCurr_j, 0, uLengthB*sizeof(SCORE));
		const FCOUNT *FreqsAi = *ptrFreqsA;

		const unsigned *SortOrderAi = *ptrSortOrderA;
		const unsigned *ptrSortOrderAiEnd = SortOrderAi + 4;
		const SCORE *ptrMCurrMax = MCurr + uLengthB;
		for (const unsigned *ptrSortOrderAi = SortOrderAi;
		  ptrSortOrderAi != ptrSortOrderAiEnd;
		  ++ptrSortOrderAi)
			{
			const unsigned uLetter = *ptrSortOrderAi;
			SCORE *NSBR_Letter = ScoreMxB[uLetter];
			const FCOUNT fcLetter = FreqsAi[uLetter];
			if (0 == fcLetter)
				break;
			SCORE *ptrNSBR = NSBR_Letter;
			for (SCORE *ptrMCurr = MCurr; ptrMCurr != ptrMCurrMax; ++ptrMCurr)
				*ptrMCurr += fcLetter*(*ptrNSBR++);
			}

		for (unsigned j = 0; j < uLengthB; ++j)
			MCurr[j] -= g_scoreCenter;

		ptrMCurr_j = MCurr;
		unsigned *ptrDeletePos = uDeletePos;

	// Special case for j=0
	// Only way to get M(i, 0) looks like this:
	//			0   i
	//		A	XXXXX
	//		B	----X
	// So gap-open at i=0, gap-close at i-1.
		assert(ptrMCurr_j == &(MCurr[0]));
		*ptrMCurr_j += GapOpenA[0] + GapCloseA[i-1];

		++ptrMCurr_j;

		int *ptrTraceBack_ij = TraceBack[i];
		*ptrTraceBack_ij++ = (int) i;

		SCORE *ptrMPrev_j = MPrev;
		SCORE *ptrDPrev = DPrev;
		SCORE d = *ptrDPrev;
		SCORE DNew = *ptrMPrev_j + GapOpenA[i];
		if (DNew > d)
			{
			d = DNew;
			*ptrDeletePos = i;
			}

		SCORE *ptrDCurr = DCurr;

		assert(ptrDCurr == &(DCurr[0]));
		*ptrDCurr = d;

	// Can't have an insert if no letters from B
		IPrev_j_1 = MINUS_INFINITY;

		unsigned uInsertPos;
		const SCORE scoreGapOpenAi = GapOpenA[i];
		const SCORE scoreGapCloseAi_1 = GapCloseA[i-1];

		for (unsigned j = 1; j < uLengthB; ++j)
			{
		// Here, MPrev_j is preserved from previous
		// iteration so with current i,j is M[i-1][j-1]
			SCORE MPrev_j = *ptrMPrev_j;
			SCORE INew = MPrev_j + GapOpenB[j];
			if (INew > IPrev_j_1)
				{
				IPrev_j_1 = INew;
				uInsertPos = j;
				}

			SCORE scoreMax = MPrev_j;

			assert(ptrDPrev == &(DPrev[j-1]));
			SCORE scoreD = *ptrDPrev++ + scoreGapCloseAi_1;
			if (scoreD > scoreMax)
				{
				scoreMax = scoreD;
				assert(ptrDeletePos == &(uDeletePos[j-1]));
				*ptrTraceBack_ij = (int) i - (int) *ptrDeletePos;
				assert(*ptrTraceBack_ij > 0);
				}
			++ptrDeletePos;

			SCORE scoreI = IPrev_j_1 + GapCloseB[j-1];
			if (scoreI > scoreMax)
				{
				scoreMax = scoreI;
				*ptrTraceBack_ij = (int) uInsertPos - (int) j;
				assert(*ptrTraceBack_ij < 0);
				}

			assert(ptrSortOrderA == &(SortOrderA[i]));
			assert(ptrFreqsA == &(FreqsA[i]));

			*ptrMCurr_j += scoreMax;
			assert(ptrMCurr_j == &(MCurr[j]));
			++ptrMCurr_j;

			MPrev_j = *(++ptrMPrev_j);
			assert(ptrDPrev == &(DPrev[j]));
			SCORE d = *ptrDPrev;
			SCORE DNew = MPrev_j + scoreGapOpenAi;
			if (DNew > d)
				{
				d = DNew;
				assert(ptrDeletePos == &uDeletePos[j]);
				*ptrDeletePos = i;
				}
			assert(ptrDCurr + 1 == &(DCurr[j]));
			*(++ptrDCurr) = d;

			++ptrTraceBack_ij;
			}

		Rotate(MPrev, MCurr, MWork);
		Rotate(DPrev, DCurr, DWork);
		}

// Special case for i=uLengthA
	SCORE IPrev = MINUS_INFINITY;

	unsigned uInsertPos;

	for (unsigned j = 1; j < uLengthB; ++j)
		{
		SCORE INew = MPrev[j-1] + GapOpenB[j];
		if (INew > IPrev)
			{
			uInsertPos = j;
			IPrev = INew;
			}
		}

// Special case for i=uLengthA, j=uLengthB
	SCORE scoreMax = MPrev[uLengthB-1];
	int iTraceBack = 0;

	SCORE scoreD = DPrev[uLengthB-1] + GapCloseA[uLengthA-1];
	if (scoreD > scoreMax)
		{
		scoreMax = scoreD;
		iTraceBack = (int) uLengthA - (int) uDeletePos[uLengthB-1];
		}

	SCORE scoreI = IPrev + GapCloseB[uLengthB-1];
	if (scoreI > scoreMax)
		{
		scoreMax = scoreI;
		iTraceBack = (int) uInsertPos - (int) uLengthB;
		}

	TraceBack[uLengthA][uLengthB] = iTraceBack;

	TraceBackToPath(TraceBack, uLengthA, uLengthB, Path);

	return scoreMax;
	}
@@ -0,0 +1,318 @@
#include "muscle.h"
#include "profile.h"
#include "pwpath.h"
#include "seq.h"

extern SCOREMATRIX VTML_SP;

// #define SUBST(i, j)	Subst(seqA, seqB, i, j)
#define SUBST(i, j)		MxRowA[i][seqB.GetLetter(j)]

static SCORE Subst(const Seq &seqA, const Seq &seqB, unsigned i, unsigned j)
	{
	assert(i < seqA.Length());
	assert(j < seqB.Length());

	unsigned uLetterA = seqA.GetLetter(i);
	unsigned uLetterB = seqB.GetLetter(j);
	return VTML_SP[uLetterA][uLetterB] + g_scoreCenter;
	}

struct DP_MEMORY
	{
	unsigned uLength;
	SCORE *MPrev;
	SCORE *MCurr;
	SCORE *MWork;
	SCORE *DPrev;
	SCORE *DCurr;
	SCORE *DWork;
	SCORE **MxRowA;
	unsigned *LettersB;
	unsigned *uDeletePos;
	int **TraceBack;
	};

static struct DP_MEMORY DPM;

static void AllocDPMem(unsigned uLengthA, unsigned uLengthB)
	{
// Max prefix length
	unsigned uLength = (uLengthA > uLengthB ? uLengthA : uLengthB) + 1;
	if (uLength < DPM.uLength)
		return;

// Add 256 to allow for future expansion and
// round up to next multiple of 32.
	uLength += 256;
	uLength += 32 - uLength%32;

	const unsigned uOldLength = DPM.uLength;
	if (uOldLength > 0)
		{
		for (unsigned i = 0; i < uOldLength; ++i)
			delete[] DPM.TraceBack[i];

		delete[] DPM.MPrev;
		delete[] DPM.MCurr;
		delete[] DPM.MWork;
		delete[] DPM.DPrev;
		delete[] DPM.DCurr;
		delete[] DPM.DWork;
		delete[] DPM.MxRowA;
		delete[] DPM.LettersB;
		delete[] DPM.uDeletePos;
		delete[] DPM.TraceBack;
		}

	DPM.uLength = uLength;

	DPM.MPrev = new SCORE[uLength];
	DPM.MCurr = new SCORE[uLength];
	DPM.MWork = new SCORE[uLength];

	DPM.DPrev = new SCORE[uLength];
	DPM.DCurr = new SCORE[uLength];
	DPM.DWork = new SCORE[uLength];
	DPM.MxRowA = new SCORE *[uLength];
	DPM.LettersB = new unsigned[uLength];
	DPM.uDeletePos = new unsigned[uLength];

	DPM.TraceBack = new int*[uLength];

	for (unsigned i = 0; i < uLength; ++i)
		DPM.TraceBack[i] = new int[uLength];
	}

static void RowFromSeq(const Seq &s, SCORE *Row[])
	{
	const unsigned uLength = s.Length();
	for (unsigned i = 0; i < uLength; ++i)
		{
		char c = s.GetChar(i);
		unsigned uLetter = CharToLetter(c);
		if (uLetter < 20)
			Row[i] = VTML_SP[uLetter];
		else
			Row[i] = VTML_SP[AX_X];
		}
	}

static void LettersFromSeq(const Seq &s, unsigned Letters[])
	{
	const unsigned uLength = s.Length();
	for (unsigned i = 0; i < uLength; ++i)
		{
		char c = s.GetChar(i);
		unsigned uLetter = CharToLetter(c);
		if (uLetter < 20)
			Letters[i] = uLetter;
		else
			Letters[i] = AX_X;
		}
	}

SCORE GlobalAlignSS(const Seq &seqA, const Seq &seqB, PWPath &Path)
	{
	const unsigned uLengthA = seqA.Length();
	const unsigned uLengthB = seqB.Length();
	const unsigned uPrefixCountA = uLengthA + 1;
	const unsigned uPrefixCountB = uLengthB + 1;

	AllocDPMem(uLengthA, uLengthB);

	SCORE *MPrev = DPM.MPrev;
	SCORE *MCurr = DPM.MCurr;
	SCORE *MWork = DPM.MWork;

	SCORE *DPrev = DPM.DPrev;
	SCORE *DCurr = DPM.DCurr;
	SCORE *DWork = DPM.DWork;
	SCORE **MxRowA = DPM.MxRowA;
	unsigned *LettersB = DPM.LettersB;

	RowFromSeq(seqA, MxRowA);
	LettersFromSeq(seqB, LettersB);

	unsigned *uDeletePos = DPM.uDeletePos;

	int **TraceBack = DPM.TraceBack;

#if	DEBUG
	for (unsigned i = 0; i < uPrefixCountA; ++i)
		memset(TraceBack[i], 0, uPrefixCountB*sizeof(int));
#endif

// Special case for i=0
	TraceBack[0][0] = 0;
	MPrev[0] = MxRowA[0][LettersB[0]];

// D(0,0) is -infinity (requires I->D).
	DPrev[0] = MINUS_INFINITY;

	for (unsigned j = 1; j < uLengthB; ++j)
		{
		unsigned uLetterB = LettersB[j];

	// Only way to get M(0, j) looks like this:
	//		A	----X
	//		B	XXXXX
	//			0   j
	// So gap-open at j=0, gap-close at j-1.
		MPrev[j] = MxRowA[0][uLetterB] + g_scoreGapOpen/2; // term gaps half
		TraceBack[0][j] = -(int) j;

	// Assume no D->I transitions, then can't be a delete if only
	// one letter from A.
		DPrev[j] = MINUS_INFINITY;
		}

	SCORE IPrev_j_1;
	for (unsigned i = 1; i < uLengthA; ++i)
		{
		SCORE *ptrMCurr_j = MCurr;
		memset(ptrMCurr_j, 0, uLengthB*sizeof(SCORE));

		const SCORE *RowA = MxRowA[i];
		const SCORE *ptrRowA = MxRowA[i];
		const SCORE *ptrMCurrEnd = ptrMCurr_j + uLengthB;
		unsigned *ptrLettersB = LettersB;
		for (; ptrMCurr_j != ptrMCurrEnd; ++ptrMCurr_j)
			{
			*ptrMCurr_j = RowA[*ptrLettersB];
			++ptrLettersB;
			}

		unsigned *ptrDeletePos = uDeletePos;

	// Special case for j=0
	// Only way to get M(i, 0) looks like this:
	//			0   i
	//		A	XXXXX
	//		B	----X
	// So gap-open at i=0, gap-close at i-1.
		ptrMCurr_j = MCurr;
		assert(ptrMCurr_j == &(MCurr[0]));
		*ptrMCurr_j += g_scoreGapOpen/2;	// term gaps half

		++ptrMCurr_j;

		int *ptrTraceBack_ij = TraceBack[i];
		*ptrTraceBack_ij++ = (int) i;

		SCORE *ptrMPrev_j = MPrev;
		SCORE *ptrDPrev = DPrev;
		SCORE d = *ptrDPrev;
		SCORE DNew = *ptrMPrev_j + g_scoreGapOpen;
		if (DNew > d)
			{
			d = DNew;
			*ptrDeletePos = i;
			}

		SCORE *ptrDCurr = DCurr;

		assert(ptrDCurr == &(DCurr[0]));
		*ptrDCurr = d;

	// Can't have an insert if no letters from B
		IPrev_j_1 = MINUS_INFINITY;

		unsigned uInsertPos;

		for (unsigned j = 1; j < uLengthB; ++j)
			{
		// Here, MPrev_j is preserved from previous
		// iteration so with current i,j is M[i-1][j-1]
			SCORE MPrev_j = *ptrMPrev_j;
			SCORE INew = MPrev_j + g_scoreGapOpen;
			if (INew > IPrev_j_1)
				{
				IPrev_j_1 = INew;
				uInsertPos = j;
				}

			SCORE scoreMax = MPrev_j;

			assert(ptrDPrev == &(DPrev[j-1]));
			SCORE scoreD = *ptrDPrev++;
			if (scoreD > scoreMax)
				{
				scoreMax = scoreD;
				assert(ptrDeletePos == &(uDeletePos[j-1]));
				*ptrTraceBack_ij = (int) i - (int) *ptrDeletePos;
				assert(*ptrTraceBack_ij > 0);
				}
			++ptrDeletePos;

			SCORE scoreI = IPrev_j_1;
			if (scoreI > scoreMax)
				{
				scoreMax = scoreI;
				*ptrTraceBack_ij = (int) uInsertPos - (int) j;
				assert(*ptrTraceBack_ij < 0);
				}

			*ptrMCurr_j += scoreMax;
			assert(ptrMCurr_j == &(MCurr[j]));
			++ptrMCurr_j;

			MPrev_j = *(++ptrMPrev_j);
			assert(ptrDPrev == &(DPrev[j]));
			SCORE d = *ptrDPrev;
			SCORE DNew = MPrev_j + g_scoreGapOpen;
			if (DNew > d)
				{
				d = DNew;
				assert(ptrDeletePos == &uDeletePos[j]);
				*ptrDeletePos = i;
				}
			assert(ptrDCurr + 1 == &(DCurr[j]));
			*(++ptrDCurr) = d;

			++ptrTraceBack_ij;
			}

		Rotate(MPrev, MCurr, MWork);
		Rotate(DPrev, DCurr, DWork);
		}

// Special case for i=uLengthA
	SCORE IPrev = MINUS_INFINITY;

	unsigned uInsertPos;

	for (unsigned j = 1; j < uLengthB; ++j)
		{
		SCORE INew = MPrev[j-1];
		if (INew > IPrev)
			{
			uInsertPos = j;
			IPrev = INew;
			}
		}

// Special case for i=uLengthA, j=uLengthB
	SCORE scoreMax = MPrev[uLengthB-1];
	int iTraceBack = 0;

	SCORE scoreD = DPrev[uLengthB-1] - g_scoreGapOpen/2;	// term gaps half
	if (scoreD > scoreMax)
		{
		scoreMax = scoreD;
		iTraceBack = (int) uLengthA - (int) uDeletePos[uLengthB-1];
		}

	SCORE scoreI = IPrev - g_scoreGapOpen/2;
	if (scoreI > scoreMax)
		{
		scoreMax = scoreI;
		iTraceBack = (int) uInsertPos - (int) uLengthB;
		}

	TraceBack[uLengthA][uLengthB] = iTraceBack;

	TraceBackToPath(TraceBack, uLengthA, uLengthB, Path);

	return scoreMax;
	}
@@ -0,0 +1,390 @@
#include "muscle.h"
#include <math.h>
#include <stdio.h>	// for sprintf
#include "pwpath.h"
#include "profile.h"
#include "gapscoredimer.h"

#define	TRACE	0

static SCORE TraceBackDimer(  const SCORE *DPM_, const SCORE *DPD_, const SCORE *DPI_,
  const char *TBM_, const char *TBD_, const char *TBI_,
  unsigned uLengthA, unsigned uLengthB, PWPath &Path);

static const char *LocalScoreToStr(SCORE s)
	{
	static char str[16];
	if (MINUS_INFINITY == s)
		return "     *";
	sprintf(str, "%6.3g", s);
	return str;
	}

#if	TRACE
static void ListDP(const SCORE *DPM_, const ProfPos *PA, const ProfPos *PB,
  unsigned uPrefixCountA, unsigned uPrefixCountB)
	{
	Log("        ");
	for (unsigned uPrefixLengthB = 0; uPrefixLengthB < uPrefixCountB; ++uPrefixLengthB)
		{
		char c = ' ';
		if (uPrefixLengthB > 0)
			c = ConsensusChar(PB[uPrefixLengthB - 1]);
		Log(" %4u:%c", uPrefixLengthB, c);
		}
	Log("\n");
	for (unsigned uPrefixLengthA = 0; uPrefixLengthA < uPrefixCountA; ++uPrefixLengthA)
		{
		char c = ' ';
		if (uPrefixLengthA > 0)
			c = ConsensusChar(PA[uPrefixLengthA - 1]);
		Log("%4u:%c  ", uPrefixLengthA, c);
		for (unsigned uPrefixLengthB = 0; uPrefixLengthB < uPrefixCountB; ++uPrefixLengthB)
			Log(" %s", LocalScoreToStr(DPM(uPrefixLengthA, uPrefixLengthB)));
		Log("\n");
		}
	}

static void ListTB(const char *TBM_, const ProfPos *PA, const ProfPos *PB,
  unsigned uPrefixCountA, unsigned uPrefixCountB)
	{
	Log("        ");
	for (unsigned uPrefixLengthB = 0; uPrefixLengthB < uPrefixCountB; ++uPrefixLengthB)
		Log("%2d", uPrefixLengthB);
	Log("\n");
	Log("        ");
	for (unsigned uPrefixLengthB = 0; uPrefixLengthB < uPrefixCountB; ++uPrefixLengthB)
		{
		char c = ' ';
		if (uPrefixLengthB > 0)
			c = ConsensusChar(PB[uPrefixLengthB - 1]);
		Log(" %c", c);
		}
	Log("\n");
	for (unsigned uPrefixLengthA = 0; uPrefixLengthA < uPrefixCountA; ++uPrefixLengthA)
		{
		char c = ' ';
		if (uPrefixLengthA > 0)
			c = ConsensusChar(PA[uPrefixLengthA - 1]);
		Log("%4u:%c  ", uPrefixLengthA, c);
		for (unsigned uPrefixLengthB = 0; uPrefixLengthB < uPrefixCountB; ++uPrefixLengthB)
			Log(" %c", TBM(uPrefixLengthA, uPrefixLengthB));
		Log("\n");
		}
	}
#endif // TRACE

static ProfPos PPTerm;
static bool InitializePPTerm()
	{
	PPTerm.m_bAllGaps = false;
	PPTerm.m_LL = 1;
	PPTerm.m_LG = 0;
	PPTerm.m_GL = 0;
	PPTerm.m_GG = 0;
	PPTerm.m_fOcc = 1;
	return true;
	}
static bool PPTermInitialized = InitializePPTerm();

static SCORE ScoreProfPosDimerLE(const ProfPos &PPA, const ProfPos &PPB)
	{
	SCORE Score = 0;
	for (unsigned n = 0; n < 20; ++n)
		{
		const unsigned uLetter = PPA.m_uSortOrder[n];
		const FCOUNT fcLetter = PPA.m_fcCounts[uLetter];
		if (0 == fcLetter)
			break;
		Score += fcLetter*PPB.m_AAScores[uLetter];
		}
	if (0 == Score)
		return -2.5;
	SCORE logScore = logf(Score);
	return (SCORE) (logScore*(PPA.m_fOcc * PPB.m_fOcc));
	}

static SCORE ScoreProfPosDimerPSP(const ProfPos &PPA, const ProfPos &PPB)
	{
	SCORE Score = 0;
	for (unsigned n = 0; n < 20; ++n)
		{
		const unsigned uLetter = PPA.m_uSortOrder[n];
		const FCOUNT fcLetter = PPA.m_fcCounts[uLetter];
		if (0 == fcLetter)
			break;
		Score += fcLetter*PPB.m_AAScores[uLetter];
		}
	return Score;
	}

static SCORE ScoreProfPosDimer(const ProfPos &PPA, const ProfPos &PPB)
	{
	switch (g_PPScore)
		{
	case PPSCORE_LE:
		return ScoreProfPosDimerLE(PPA, PPB);

	case PPSCORE_SP:
	case PPSCORE_SV:
		return ScoreProfPosDimerPSP(PPA, PPB);
		}
	Quit("Invalid g_PPScore");
	return 0;
	}

// Global alignment dynamic programming
// This variant optimizes the profile-profile SP score under the
// dimer approximation.
SCORE GlobalAlignDimer(const ProfPos *PA, unsigned uLengthA, const ProfPos *PB,
  unsigned uLengthB, PWPath &Path)
	{
	assert(uLengthB > 0 && uLengthA > 0);

	const unsigned uPrefixCountA = uLengthA + 1;
	const unsigned uPrefixCountB = uLengthB + 1;

// Allocate DP matrices
	const size_t LM = uPrefixCountA*uPrefixCountB;
	SCORE *DPM_ = new SCORE[LM];
	SCORE *DPD_ = new SCORE[LM];
	SCORE *DPI_ = new SCORE[LM];

	char *TBM_ = new char[LM];
	char *TBD_ = new char[LM];
	char *TBI_ = new char[LM];

	DPM(0, 0) = 0;
	DPD(0, 0) = MINUS_INFINITY;
	DPI(0, 0) = MINUS_INFINITY;

	TBM(0, 0) = 'S';
	TBD(0, 0) = '?';
	TBI(0, 0) = '?';

	DPM(1, 0) = MINUS_INFINITY;
	DPD(1, 0) = GapScoreMD(PA[0], PPTerm);
	DPI(1, 0) = MINUS_INFINITY;

	TBM(1, 0) = '?';
	TBD(1, 0) = 'S';
	TBI(1, 0) = '?';

	DPM(0, 1) = MINUS_INFINITY;
	DPD(0, 1) = MINUS_INFINITY;
	DPI(0, 1) = GapScoreMI(PPTerm, PB[0]);

	TBM(0, 1) = '?';
	TBD(0, 1) = '?';
	TBI(0, 1) = 'S';

// Empty prefix of B is special case
	for (unsigned uPrefixLengthA = 2; uPrefixLengthA < uPrefixCountA; ++uPrefixLengthA)
		{
	// M=LetterA+LetterB, impossible with empty prefix
		DPM(uPrefixLengthA, 0) = MINUS_INFINITY;
		TBM(uPrefixLengthA, 0) = '?';

	// D=LetterA+GapB
		DPD(uPrefixLengthA, 0) = DPD(uPrefixLengthA - 1, 0) +
		  GapScoreDD(PA[uPrefixLengthA - 1], PPTerm);
		TBD(uPrefixLengthA, 0) = 'D';

	// I=GapA+LetterB, impossible with empty prefix
		DPI(uPrefixLengthA, 0) = MINUS_INFINITY;
		TBI(uPrefixLengthA, 0) = '?';
		}

// Empty prefix of A is special case
	for (unsigned uPrefixLengthB = 2; uPrefixLengthB < uPrefixCountB; ++uPrefixLengthB)
		{
	// M=LetterA+LetterB, impossible with empty prefix
		DPM(0, uPrefixLengthB) = MINUS_INFINITY;
		TBM(0, uPrefixLengthB) = '?';

	// D=LetterA+GapB, impossible with empty prefix
		DPD(0, uPrefixLengthB) = MINUS_INFINITY;
		TBD(0, uPrefixLengthB) = '?';

	// I=GapA+LetterB
		DPI(0, uPrefixLengthB) = DPI(0, uPrefixLengthB - 1) +
		  GapScoreII(PPTerm, PB[uPrefixLengthB - 1]);
		TBI(0, uPrefixLengthB) = 'I';
		}

// ============
// Main DP loop
// ============
	for (unsigned uPrefixLengthB = 1; uPrefixLengthB < uPrefixCountB; ++uPrefixLengthB)
		{
		const ProfPos &PPB = PB[uPrefixLengthB - 1];
		for (unsigned uPrefixLengthA = 1; uPrefixLengthA < uPrefixCountA; ++uPrefixLengthA)
			{
			const ProfPos &PPA = PA[uPrefixLengthA - 1];
			{
		// Match M=LetterA+LetterB
			SCORE scoreLL = ScoreProfPosDimer(PPA, PPB);

			SCORE scoreMM = DPM(uPrefixLengthA-1, uPrefixLengthB-1) + GapScoreMM(PPA, PPB);
			SCORE scoreDM = DPD(uPrefixLengthA-1, uPrefixLengthB-1) + GapScoreDM(PPA, PPB);
			SCORE scoreIM = DPI(uPrefixLengthA-1, uPrefixLengthB-1) + GapScoreIM(PPA, PPB);

			SCORE scoreBest = scoreMM;
			char c = 'M';
			if (scoreDM > scoreBest)
				{
				scoreBest = scoreDM;
				c = 'D';
				}
			if (scoreIM > scoreBest)
				{
				scoreBest = scoreIM;
				c = 'I';
				}

			DPM(uPrefixLengthA, uPrefixLengthB) = scoreBest + scoreLL;
			TBM(uPrefixLengthA, uPrefixLengthB) = c;
			}
			{
		// Delete D=LetterA+GapB
			SCORE scoreMD = DPM(uPrefixLengthA-1, uPrefixLengthB) + GapScoreMD(PPA, PPB);
			SCORE scoreDD = DPD(uPrefixLengthA-1, uPrefixLengthB) + GapScoreDD(PPA, PPB);
			SCORE scoreID = DPI(uPrefixLengthA-1, uPrefixLengthB) + GapScoreID(PPA, PPB);

			SCORE scoreBest = scoreMD;
			char c = 'M';
			if (scoreDD > scoreBest)
				{
				scoreBest = scoreDD;
				c = 'D';
				}
			if (scoreID > scoreBest)
				{
				scoreBest = scoreID;
				c = 'I';
				}

			DPD(uPrefixLengthA, uPrefixLengthB) = scoreBest;
			TBD(uPrefixLengthA, uPrefixLengthB) = c;
			}
			{
		// Insert I=GapA+LetterB
			SCORE scoreMI = DPM(uPrefixLengthA, uPrefixLengthB-1) + GapScoreMI(PPA, PPB);
			SCORE scoreDI = DPD(uPrefixLengthA, uPrefixLengthB-1) + GapScoreDI(PPA, PPB);
			SCORE scoreII = DPI(uPrefixLengthA, uPrefixLengthB-1) + GapScoreII(PPA, PPB);

			SCORE scoreBest = scoreMI;
			char c = 'M';
			if (scoreDI > scoreBest)
				{
				scoreBest = scoreDI;
				c = 'D';
				}
			if (scoreII > scoreBest)
				{
				scoreBest = scoreII;
				c = 'I';
				}

			DPI(uPrefixLengthA, uPrefixLengthB) = scoreBest;
			TBI(uPrefixLengthA, uPrefixLengthB) = c;
			}
			}
		}

#if TRACE
	Log("DPM:\n");
	ListDP(DPM_, PA, PB, uPrefixCountA, uPrefixCountB);
	Log("DPD:\n");
	ListDP(DPD_, PA, PB, uPrefixCountA, uPrefixCountB);
	Log("DPI:\n");
	ListDP(DPI_, PA, PB, uPrefixCountA, uPrefixCountB);
	Log("TBM:\n");
	ListTB(TBM_, PA, PB, uPrefixCountA, uPrefixCountB);
	Log("TBD:\n");
	ListTB(TBD_, PA, PB, uPrefixCountA, uPrefixCountB);
	Log("TBI:\n");
	ListTB(TBI_, PA, PB, uPrefixCountA, uPrefixCountB);
#endif

	SCORE Score = TraceBackDimer(DPM_, DPD_, DPI_, TBM_, TBD_, TBI_,
	  uLengthA, uLengthB, Path);

#if	TRACE
	Log("GlobalAlignDimer score = %.3g\n", Score);
#endif

	delete[] DPM_;
	delete[] DPD_;
	delete[] DPI_;

	delete[] TBM_;
	delete[] TBD_;
	delete[] TBI_;

	return Score;
	}

static SCORE TraceBackDimer(  const SCORE *DPM_, const SCORE *DPD_, const SCORE *DPI_,
  const char *TBM_, const char *TBD_, const char *TBI_,
  unsigned uLengthA, unsigned uLengthB, PWPath &Path)
	{
	const unsigned uPrefixCountA = uLengthA + 1;

	unsigned uPrefixLengthA = uLengthA;
	unsigned uPrefixLengthB = uLengthB;

	char cEdge = 'M';
	SCORE scoreMax = DPM(uLengthA, uLengthB);
	if (DPD(uLengthA, uLengthB) > scoreMax)
		{
		scoreMax = DPD(uLengthA, uLengthB);
		cEdge = 'D';
		}
	if (DPI(uLengthA, uLengthB) > scoreMax)
		{
		scoreMax = DPI(uLengthA, uLengthB);
		cEdge = 'I';
		}

	for (;;)
		{
		if (0 == uPrefixLengthA && 0 == uPrefixLengthB)
			break;

		PWEdge Edge;
		Edge.cType = cEdge;
		Edge.uPrefixLengthA = uPrefixLengthA;
		Edge.uPrefixLengthB = uPrefixLengthB;
		Path.PrependEdge(Edge);

#if TRACE
		Log("PLA=%u PLB=%u Edge=%c\n", uPrefixLengthA, uPrefixLengthB, cEdge);
#endif
		switch (cEdge)
			{
		case 'M':
			assert(uPrefixLengthA > 0 && uPrefixLengthB > 0);
			cEdge = TBM(uPrefixLengthA, uPrefixLengthB);
			--uPrefixLengthA;
			--uPrefixLengthB;
			break;
		case 'D':
			assert(uPrefixLengthA > 0);
			cEdge = TBD(uPrefixLengthA, uPrefixLengthB);
			--uPrefixLengthA;
			break;
		case 'I':
			assert(uPrefixLengthB > 0);
			cEdge = TBI(uPrefixLengthA, uPrefixLengthB);
			--uPrefixLengthB;
			break;
		default:
			Quit("Invalid edge PLA=%u PLB=%u %c", uPrefixLengthA, uPrefixLengthB, cEdge);
			}
		}
#if	TRACE
	Path.LogMe();
#endif
	return scoreMax;
	}
@@ -0,0 +1,289 @@
#if	WIN32
#include <windows.h>
#include <share.h>
#endif

#include "muscle.h"
#include <stdio.h>
#include <stdlib.h>
#include <stdarg.h>
#include <string.h>
#include <math.h>
#include <assert.h>
#include <time.h>
#include <errno.h>

#ifndef	MAX_PATH
#define	MAX_PATH	260
#endif

static char g_strListFileName[MAX_PATH];
static bool g_bListFileAppend = false;

static SEQWEIGHT g_SeqWeight = SEQWEIGHT_Undefined;

void SetSeqWeightMethod(SEQWEIGHT Method)
	{
	g_SeqWeight = Method;
	}

SEQWEIGHT GetSeqWeightMethod()
	{
	return g_SeqWeight;
	}

void SetListFileName(const char *ptrListFileName, bool bAppend)
	{
	assert(strlen(ptrListFileName) < MAX_PATH);
	strcpy(g_strListFileName, ptrListFileName);
	g_bListFileAppend = bAppend;
	}

void Log(const char szFormat[], ...)
	{
	if (0 == g_strListFileName[0])
		return;

	static FILE *f = NULL;
	const char *mode;
	if (g_bListFileAppend)
		mode = "a";
	else
		mode = "w";
	if (NULL == f)
		f = _fsopen(g_strListFileName, mode, _SH_DENYNO);
	if (NULL == f)
		{
		perror(g_strListFileName);
		exit(EXIT_NotStarted);
		}

	char szStr[4096];
	va_list ArgList;
	va_start(ArgList, szFormat);
	vsprintf(szStr, szFormat, ArgList);
	fprintf(f, "%s", szStr);
	fflush(f);
	}

const char *GetTimeAsStr()
	{
	static char szStr[32];
	time_t t;
	time(&t);
	struct tm *ptmCurrentTime = localtime(&t);
	strcpy(szStr, asctime(ptmCurrentTime));
	assert('\n' == szStr[24]);
	szStr[24] = 0;
	return szStr;
	}

// Exit immediately with error message, printf-style.
void Quit(const char szFormat[], ...)
	{
	va_list ArgList;
	char szStr[4096];

	va_start(ArgList, szFormat);
	vsprintf(szStr, szFormat, ArgList);

	fprintf(stderr, "\n*** ERROR ***  %s\n", szStr);

	Log("\n*** FATAL ERROR ***  ");
	Log("%s\n", szStr);
	Log("Stopped %s\n", GetTimeAsStr());

#ifdef WIN32
	if (IsDebuggerPresent())
		{
		int iBtn = MessageBox(NULL, szStr, "muscle", MB_ICONERROR | MB_OKCANCEL);
		if (IDCANCEL == iBtn)
			Break();
		}
#endif
	exit(EXIT_FatalError);
	}

void Warning(const char szFormat[], ...)
	{
	va_list ArgList;
	char szStr[4096];

	va_start(ArgList, szFormat);
	vsprintf(szStr, szFormat, ArgList);

	fprintf(stderr, "\n*** WARNING *** %s\n", szStr);
	Log("\n*** WARNING ***  %s\n", szStr);
	}

// Remove leading and trailing blanks from string
void TrimBlanks(char szStr[])
	{
	TrimLeadingBlanks(szStr);
	TrimTrailingBlanks(szStr);
	}

void TrimLeadingBlanks(char szStr[])
	{
	size_t n = strlen(szStr);
	while (szStr[0] == ' ')
		{
		memmove(szStr, szStr+1, n);
		szStr[--n] = 0;
		}
	}

void TrimTrailingBlanks(char szStr[])
	{
	size_t n = strlen(szStr);
	while (n > 0 && szStr[n-1] == ' ')
		szStr[--n] = 0;
	}

bool Verbose()
	{
	return true;
	}

SCORE StrToScore(const char *pszStr)
	{
	return (SCORE) atof(pszStr);
	}

void StripWhitespace(char szStr[])
	{
	unsigned uOutPos = 0;
	unsigned uInPos = 0;
	while (char c = szStr[uInPos++])
		if (' ' != c && '\t' != c && '\n' != c && '\r' != c)
			szStr[uOutPos++] = c;
	szStr[uOutPos] = 0;
	}

void StripGaps(char szStr[])
	{
	unsigned uOutPos = 0;
	unsigned uInPos = 0;
	while (char c = szStr[uInPos++])
		if ('-' != c)
			szStr[uOutPos++] = c;
	szStr[uOutPos] = 0;
	}

bool IsValidSignedInteger(const char *Str)
	{
	if (0 == strlen(Str))
		return false;
	if ('+' == *Str || '-' == *Str)
		++Str;
	while (char c = *Str++)
		if (!isdigit(c))
			return false;
	return true;
	}

bool IsValidInteger(const char *Str)
	{
	if (0 == strlen(Str))
		return false;
	while (char c = *Str++)
		if (!isdigit(c))
			return false;
	return true;
	}

// Is c valid as first character in an identifier?
bool isidentf(char c)
	{
	return isalpha(c) || '_' == c;
	}

// Is c valid character in an identifier?
bool isident(char c)
	{
	return isalpha(c) || isdigit(c) || '_' == c;
	}

bool IsValidIdentifier(const char *Str)
	{
	if (!isidentf(Str[0]))
		return false;
	while (char c = *Str++)
		if (!isident(c))
			return false;
	return true;
	}

void SetLogFile()
	{
	const char *strFileName = ValueOpt("loga");
	if (0 != strFileName)
		g_bListFileAppend = true;
	else
		strFileName = ValueOpt("log");
	if (0 == strFileName)
		return;
	strcpy(g_strListFileName, strFileName);
	}

// Get filename, stripping any extension and directory parts.
void NameFromPath(const char szPath[], char szName[], unsigned uBytes)
	{
	if (0 == uBytes)
		return;
	const char *pstrLastSlash = strrchr(szPath, '/');
	const char *pstrLastBackslash = strrchr(szPath, '\\');
	const char *pstrLastDot = strrchr(szPath, '.');
	const char *pstrLastSep = pstrLastSlash > pstrLastBackslash ?
	  pstrLastSlash : pstrLastBackslash;
	const char *pstrBegin = pstrLastSep ? pstrLastSep + 1 : szPath;
	const char *pstrEnd = pstrLastDot ? pstrLastDot - 1 : szPath + strlen(szPath);
	unsigned uNameLength = (unsigned) (pstrEnd - pstrBegin + 1);
	if (uNameLength > uBytes - 1)
		uNameLength = uBytes - 1;
	memcpy(szName, pstrBegin, uNameLength);
	szName[uNameLength] = 0;
	}

char *strsave(const char *s)
	{
	char *ptrCopy = strdup(s);
	if (0 == ptrCopy)
		Quit("Out of memory");
	return ptrCopy;
	}

bool IsValidFloatChar(char c)
	{
	return isdigit(c) || '.' == c || 'e' == c || 'E' == c || 'd' == c ||
	  'D' == c || '.' == c || '+' == c || '-' == c;
	}

void Call_MY_ASSERT(const char *file, int line, bool b, const char *msg)
	{
	if (b)
		return;
	Quit("%s(%d): MY_ASSERT(%s)", file, line, msg);
	}

static size_t g_MemTotal;

void MemPlus(size_t Bytes, char *Where)
	{
	g_MemTotal += Bytes;
	Log("+%10u  %6u  %6u  %s\n",
	  (unsigned) Bytes,
	  (unsigned) GetMemUseMB(),
	  (unsigned) (g_MemTotal/1000000),
	  Where);
	}

void MemMinus(size_t Bytes, char *Where)
	{
	g_MemTotal -= Bytes;
	Log("-%10u  %6u  %6u  %s\n",
	  (unsigned) Bytes,
	  (unsigned) GetMemUseMB(),
	  (unsigned) (g_MemTotal/1000000),
	  Where);
	}
@@ -0,0 +1,163 @@
#include "muscle.h"

#if		defined(__linux__)
#include <sys/time.h>
#include <sys/resource.h>
#include <unistd.h>
#include <errno.h>
#include <stdio.h>
#include <fcntl.h>

const int ONE_MB = 1000000;
const int MEM_WARNING_THRESHOLD = 20*ONE_MB;

double GetNAN()
	{
	static unsigned long nan[2]={0xffffffff, 0x7fffffff};
	double dNAN = *( double* )nan;
	return dNAN;
	}

double g_dNAN = GetNAN();

void chkmem(const char szMsg[])
	{
	//assert(_CrtCheckMemory());
	}

void Break()
	{
	//DebugBreak();
	}

static char szCmdLine[4096];

void *ptrStartBreak = sbrk(0);

const char *GetCmdLine()
	{
	return szCmdLine;
	}

double GetMemUseMB()
	{
	static char statm[64];
	static int PageSize;
	if (0 == statm[0])
		{
		PageSize = sysconf(_SC_PAGESIZE);
		pid_t pid = getpid();
		sprintf(statm, "/proc/%d/statm", (int) pid);
		}

	int fd = open(statm, O_RDONLY);
	if (-1 == fd)
		return -1;
	char Buffer[64];
	int n = read(fd, Buffer, sizeof(Buffer) - 1);
	close(fd);
	fd = -1;

	if (n <= 0)
		{
		static bool Warned = false;
		if (!Warned)
			{
			Warned = true;
			Warning("*Warning* Cannot read %s errno=%d %s",
			  statm, errno, strerror(errno));
			}
		return 0;
		}
	Buffer[n] = 0;
	int Pages = atoi(Buffer);

	return ((double) Pages * (double) PageSize)/1e6;
	}

void SaveCmdLine(int argc, char *argv[])
	{
	for (int i = 0; i < argc; ++i)
		{
		if (i > 0)
			strcat(szCmdLine, " ");
		strcat(szCmdLine, argv[i]);
		}
	}

double dPeakMemUseMB = 0;

double GetPeakMemUseMB()
	{
	CheckMemUse();
	return dPeakMemUseMB;
	}

double GetCPUGHz()
	{
	double dGHz = 2.5;
	const char *e = getenv("CPUGHZ");
	if (0 != e)
		dGHz = atof(e);
	return dGHz;
	}

void CheckMemUse()
	{
	double dMB = GetMemUseMB();
	if (dMB > dPeakMemUseMB)
		dPeakMemUseMB = dMB;
	}

double GetRAMSizeMB()
	{
	const double DEFAULT_RAM = 500;
	static double RAMMB = 0;
	if (RAMMB != 0)
		return RAMMB;

	int fd = open("/proc/meminfo", O_RDONLY);
	if (-1 == fd)
		{
		static bool Warned = false;
		if (!Warned)
			{
			Warned = true;
			Warning("*Warning* Cannot open /proc/meminfo errno=%d %s",
			  errno, strerror(errno));
			}
		return DEFAULT_RAM;
		}
	char Buffer[1024];
	int n = read(fd, Buffer, sizeof(Buffer) - 1);
	close(fd);
	fd = -1;

	if (n <= 0)
		{
		static bool Warned = false;
		if (!Warned)
			{
			Warned = true;
			Warning("*Warning* Cannot read /proc/meminfo errno=%d %s",
			  errno, strerror(errno));
			}
		return DEFAULT_RAM;
		}
	Buffer[n] = 0;
	char *pMem = strstr(Buffer, "MemTotal: ");
	if (0 == pMem)
		{
		static bool Warned = false;
		if (!Warned)
			{
			Warned = true;
			Warning("*Warning* 'MemTotal:' not found in /proc/meminfo");
			}
		return DEFAULT_RAM;
		}
	int Bytes = atoi(pMem+9)*1000;
	return ((double) Bytes)/1e6;
	}

#endif	// !WIN32
@@ -0,0 +1,92 @@
#ifdef __MACH__

#include <memory.h>
#include <stdlib.h>
#include <stdio.h>
#include <unistd.h>
#include <sys/types.h>
#include <sys/sysctl.h>
#include <sys/socket.h>
#include <sys/gmon.h>
#include <mach/vm_param.h>
#include <netinet/in.h>
#include <netinet/icmp6.h>
#include <sys/vmmeter.h>
#include <sys/proc.h>
#include <mach/task_info.h>
#include <mach/task.h>
#include <mach/mach_init.h>
#include <mach/vm_statistics.h>

const double DEFAULT_RAM = 1e9;
const double DEFAULT_MEM_USE = 1e6;

double GetNAN()
	{
	static unsigned long nan[2]={0xffffffff, 0x7fffffff};
	double dNAN = *( double* )nan;
	return dNAN;
	}

double g_dNAN = GetNAN();


double GetRAMSize()
	{
	static double CACHED_RAM = 0;
	if (CACHED_RAM != 0)
		return CACHED_RAM;

	uint64_t MemPages = 0;
	size_t Len = sizeof(MemPages);
	if (sysctlbyname("hw.memsize", &MemPages, &Len, NULL, 0) < 0)
		return DEFAULT_RAM;
	return (double) MemPages;
	}

double GetRAMSizeMB()
	{
	return GetRAMSize()/1e6;
	}

static double g_uPeakMemUseBytes;

double GetMaxMemUseBytes()
	{
	return g_uPeakMemUseBytes;
	}

double GetPeakMemUseBytes()
	{
	return GetMaxMemUseBytes();
	}

double GetMemUseBytes()
	{
	task_t mytask = mach_task_self();
	struct task_basic_info ti;
	memset((void *) &ti, 0, sizeof(ti));
	mach_msg_type_number_t count = TASK_BASIC_INFO_COUNT;
	kern_return_t ok = task_info(mytask, TASK_BASIC_INFO, (task_info_t) &ti, &count);
	if (ok == KERN_INVALID_ARGUMENT)
		return DEFAULT_MEM_USE;

	if (ok != KERN_SUCCESS)
		return DEFAULT_MEM_USE;

	double uBytes = (double ) ti.resident_size;
	if (uBytes > g_uPeakMemUseBytes)
		g_uPeakMemUseBytes = uBytes;
	return uBytes;
	}

double GetMemUseMB()
	{
	return GetMemUseBytes()/1e6;
	}

void OSInit()
	{
	}

#endif // __MACH__
@@ -0,0 +1,62 @@
#include "muscle.h"

#if		!defined(__linux__) && !defined(_MSC_VER) && !defined(__MACH__)

double GetNAN()
	{
	return 0.0;
	}

double g_dNAN = GetNAN();

void chkmem(const char szMsg[])
	{
	}

void Break()
	{
	}

char szCmdLine[4096];

const char *GetCmdLine()
	{
	return "muscle";
	}

double GetMemUseMB()
	{
	return 100.0;
	}

void SaveCmdLine(int argc, char *argv[])
	{
	for (int i = 0; i < argc; ++i)
		{
		if (i > 0)
			strcat(szCmdLine, " ");
		strcat(szCmdLine, argv[i]);
		}
	}

double GetPeakMemUseMB()
	{
	return 100.0;
	}

double GetCPUGHz()
	{
	return 2.0;
	}

void CheckMemUse()
	{
	}

double GetRAMSizeMB()
	{
	return 500.0;
	}

#endif

@@ -0,0 +1,100 @@
#include "muscle.h"

#if	WIN32
#include <windows.h>
#include <crtdbg.h>
#include <psapi.h>
#include <float.h>
#include <stdio.h>

void DebugPrintf(const char *szFormat, ...)
	{
	va_list ArgList;
	char szStr[4096];

	va_start(ArgList, szFormat);
	vsprintf(szStr, szFormat, ArgList);

	OutputDebugString(szStr);
	}

double GetNAN()
	{
	static unsigned long nan[2]={0xffffffff, 0x7fffffff};
	double dNAN = *( double* )nan;
	assert(_isnan(dNAN));
	return dNAN;
	}

double g_dNAN = GetNAN();

void chkmem(const char szMsg[])
	{
	if (!_CrtCheckMemory())
		Quit("chkmem(%s)", szMsg);
	}

void Break()
	{
	if (IsDebuggerPresent())
		DebugBreak();
	}

const char *GetCmdLine()
	{
	return GetCommandLine();
	}

static unsigned uPeakMemUseBytes;

double GetRAMSizeMB()
	{
	MEMORYSTATUS MS;
	GlobalMemoryStatus(&MS);
	return MS.dwAvailPhys/1e6;
	}

double GetMemUseMB()
	{
	HANDLE hProc = GetCurrentProcess();
	PROCESS_MEMORY_COUNTERS PMC;
	BOOL bOk = GetProcessMemoryInfo(hProc, &PMC, sizeof(PMC));
	assert(bOk);
	//printf("GetMemUseMB()\n");
	//printf("%12u  PageFaultCount\n", (unsigned) PMC.PageFaultCount);
	//printf("%12u  PagefileUsage\n", (unsigned) PMC.PagefileUsage);
	//printf("%12u  PeakPagefileUsage\n", (unsigned) PMC.PeakPagefileUsage);
	//printf("%12u  WorkingSetSize\n", (unsigned) PMC.WorkingSetSize);
	//printf("%12u  PeakWorkingSetSize\n", (unsigned) PMC.PeakWorkingSetSize);
	//printf("%12u  QuotaPagedPoolUsage\n", (unsigned) PMC.QuotaPagedPoolUsage);
	//printf("%12u  QuotaPeakPagedPoolUsage\n", (unsigned) PMC.QuotaPeakPagedPoolUsage);
	//printf("%12u  QuotaNonPagedPoolUsage\n", (unsigned) PMC.QuotaNonPagedPoolUsage);
	//printf("%12u  QuotaPeakNonPagedPoolUsage\n", (unsigned) PMC.QuotaPeakNonPagedPoolUsage);
	unsigned uBytes = (unsigned) PMC.WorkingSetSize;
	if (uBytes > uPeakMemUseBytes)
		uPeakMemUseBytes = uBytes;
	return (uBytes + 500000.0)/1000000.0;
	}

double GetPeakMemUseMB()
	{
	return (uPeakMemUseBytes + 500000.0)/1000000.0;
	}

void CheckMemUse()
	{
// Side-effect: sets peak usage in uPeakMemUseBytes
	GetMemUseMB();
	}

double GetCPUGHz()
	{
	double dGHz = 2.5;
	const char *e = getenv("CPUGHZ");
	if (0 != e)
		dGHz = atof(e);
	if (dGHz < 0.1 || dGHz > 1000.0)
		Quit("Invalid value '%s' for environment variable CPUGHZ", e);
	return dGHz;
	}
#endif	// WIN32
@@ -0,0 +1,499 @@
#include "muscle.h"
#include "gonnet.h"

#define ROW(A, C, D, E, F, G, H, I, K, L, M, N, P, Q, R, S, T, V, W, Y) \
	{ A/4.0, C/4.0, D/4.0, E/4.0, F/4.0, G/4.0, H/4.0, I/4.0, K/4.0, L/4.0, M/4.0, N/4.0, P/4.0, Q/4.0, R/4.0, S/4.0, T/4.0, V/4.0, W/4.0, Y/4.0 },

static double Gonnet80[20][20] =
	{
//        A       C       D       E       F       G       H       I       K       L
//        M       N       P       Q       R       S       T       V       W       Y
ROW(   1990,   1140,    930,   1070,    600,   1130,    850,    810,    940,    810,
        980,    900,   1080,   1020,    880,   1380,   1190,   1180,    370,    590) // A

ROW(   1140,   2780,    310,    300,    850,    630,    810,    700,    360,    690,
        850,    690,    310,    480,    640,   1090,    900,   1030,    810,    920) // C

ROW(    930,    310,   2200,   1550,    130,    980,   1070,    180,   1030,    150,
        360,   1450,    820,   1150,    800,   1100,   1000,    350,      0,    550) // D

ROW(   1070,    300,   1550,   2120,    220,    770,   1070,    510,   1280,    490,
        710,   1110,    890,   1470,   1010,   1050,    970,    730,    260,    500) // E

ROW(    600,    850,    130,    220,   2380,     90,    980,   1090,    350,   1310,
       1270,    490,    310,    540,    340,    470,    620,    930,   1400,   1730) // F

ROW(   1130,    630,    980,    770,     90,   2210,    710,    100,    740,    200,
        410,   1060,    660,    800,    810,   1080,    720,    380,    430,    300) // G

ROW(    850,    810,   1070,   1070,    980,    710,   2510,    600,   1120,    670,
        860,   1330,    790,   1380,   1140,    990,   1000,    590,    810,   1450) // H

ROW(    810,    700,    180,    510,   1090,    100,    600,   2100,    650,   1460,
       1490,    530,    490,    640,    530,    620,    960,   1650,    610,    770) // I

ROW(    940,    360,   1030,   1280,    350,    740,   1120,    650,   2090,    660,
        870,   1220,    870,   1410,   1570,   1040,   1090,    700,    350,    640) // K

ROW(    810,    690,    150,    490,   1310,    200,    670,   1460,    660,   2010,
       1550,    450,    660,    850,    660,    600,    750,   1270,    800,    890) // L

ROW(    980,    850,    360,    710,   1270,    410,    860,   1490,    870,   1550,
       2410,    620,    460,   1050,    710,    830,    990,   1250,    790,    870) // M

ROW(    900,    690,   1450,   1110,    490,   1060,   1330,    530,   1220,    450,
        620,   2210,    760,   1180,   1020,   1290,   1170,    550,    380,    850) // N

ROW(   1080,    310,    820,    890,    310,    660,    790,    490,    870,    660,
        460,    760,   2380,   1000,    790,   1100,   1040,    670,    120,    480) // P

ROW(   1020,    480,   1150,   1470,    540,    800,   1380,    640,   1410,    850,
       1050,   1180,   1000,   2190,   1350,   1090,   1060,    730,    620,    710) // Q

ROW(    880,    640,    800,   1010,    340,    810,   1140,    530,   1570,    660,
        710,   1020,    790,   1350,   2210,    970,    970,    640,    830,    740) // R

ROW(   1380,   1090,   1100,   1050,    470,   1080,    990,    620,   1040,    600,
        830,   1290,   1100,   1090,    970,   2020,   1490,    810,    520,    780) // S

ROW(   1190,    900,   1000,    970,    620,    720,   1000,    960,   1090,    750,
        990,   1170,   1040,   1060,    970,   1490,   2050,   1150,    370,    660) // T

ROW(   1180,   1030,    350,    730,    930,    380,    590,   1650,    700,   1270,
       1250,    550,    670,    730,    640,    810,   1150,   2040,    440,    770) // V

ROW(    370,    810,      0,    260,   1400,    430,    810,    610,    350,    800,
        790,    380,    120,    620,    830,    520,    370,    440,   2970,   1470) // W

ROW(    590,    920,    550,    500,   1730,    300,   1450,    770,    640,    890,
        870,    850,    480,    710,    740,    780,    660,    770,   1470,   2470) // Y
	};

static double Gonnet120[20][20] =
	{
//        A       C       D       E       F       G       H       I       K       L
//        M       N       P       Q       R       S       T       V       W       Y
ROW(   1550,    950,    780,    870,    480,    930,    700,    690,    770,    660,
        790,    760,    900,    840,    730,   1120,    980,    960,    280,    480) // A

ROW(    950,   2400,    270,    280,    700,    510,    650,    600,    320,    570,
        700,    550,    280,    400,    510,    890,    750,    850,    670,    760) // C

ROW(    780,    270,   1780,   1310,     90,    820,    890,    160,    880,    140,
        320,   1220,    680,    970,    690,    910,    830,    310,      0,    430) // D

ROW(    870,    280,   1310,   1680,    180,    650,    900,    410,   1070,    390,
        560,    950,    740,   1210,    860,    870,    810,    580,    180,    400) // E

ROW(    480,    700,     90,    180,   1980,     40,    820,    930,    290,   1110,
       1070,    380,    240,    430,    280,    380,    490,    790,   1230,   1510) // F

ROW(    930,    510,    820,    650,     40,   1860,    590,     90,    620,    140,
        310,    890,    550,    660,    660,    900,    610,    310,    300,    220) // G

ROW(    700,    650,    890,    900,    820,    590,   2060,    480,    940,    540,
        680,   1100,    650,   1130,    950,    820,    820,    490,    680,   1220) // H

ROW(    690,    600,    160,    410,    930,     90,    480,   1680,    520,   1240,
       1250,    410,    400,    530,    430,    520,    790,   1380,    500,    650) // I

ROW(    770,    320,    880,   1070,    290,    620,    940,    520,   1650,    520,
        690,   1010,    720,   1160,   1320,    860,    900,    570,    280,    520) // K

ROW(    660,    570,    140,    390,   1110,    140,    540,   1240,    520,   1620,
       1300,    350,    520,    660,    520,    490,    620,   1090,    670,    760) // L

ROW(    790,    700,    320,    560,   1070,    310,    680,   1250,    690,   1300,
       1910,    500,    400,    820,    580,    670,    800,   1060,    650,    740) // M

ROW(    760,    550,   1220,    950,    380,    890,   1100,    410,   1010,    350,
        500,   1760,    640,    970,    860,   1060,    960,    460,    280,    680) // N

ROW(    900,    280,    680,    740,    240,    550,    650,    400,    720,    520,
        400,    640,   2010,    820,    660,    910,    860,    540,     70,    370) // P

ROW(    840,    400,    970,   1210,    430,    660,   1130,    530,   1160,    660,
        820,    970,    820,   1700,   1120,    890,    870,    600,    470,    580) // Q

ROW(    730,    510,    690,    860,    280,    660,    950,    430,   1320,    520,
        580,    860,    660,   1120,   1790,    810,    800,    520,    660,    590) // R

ROW(   1120,    890,    910,    870,    380,    900,    820,    520,    860,    490,
        670,   1060,    910,    890,    810,   1560,   1220,    680,    390,    610) // S

ROW(    980,    750,    830,    810,    490,    610,    820,    790,    900,    620,
        800,    960,    860,    870,    800,   1220,   1600,    930,    290,    540) // T

ROW(    960,    850,    310,    580,    790,    310,    490,   1380,    570,   1090,
       1060,    460,    540,    600,    520,    680,    930,   1610,    370,    630) // V

ROW(    280,    670,      0,    180,   1230,    300,    680,    500,    280,    670,
        650,    280,     70,    470,    660,    390,    290,    370,   2620,   1290) // W

ROW(    480,    760,    430,    400,   1510,    220,   1220,    650,    520,    760,
        740,    680,    370,    580,    590,    610,    540,    630,   1290,   2070) // Y
	};

static SCORE Gonnet160[20][20] =
	{
//        A       C       D       E       F       G       H       I       K       L
//        M       N       P       Q       R       S       T       V       W       Y
ROW(   1240,    810,    670,    740,    400,    800,    600,    600,    660,    560,
        660,    660,    770,    710,    620,    940,    830,    790,    230,    410) // A

ROW(    810,   2130,    250,    260,    600,    440,    550,    530,    300,    490,
        590,    470,    260,    360,    430,    760,    640,    720,    570,    650) // C

ROW(    670,    250,   1480,   1120,     80,    710,    770,    160,    770,    130,
        280,   1040,    590,    840,    620,    780,    720,    290,      0,    360) // D

ROW(    740,    260,   1120,   1370,    160,    570,    770,    350,    910,    330,
        470,    830,    640,   1010,    750,    750,    700,    480,    140,    340) // E

ROW(    400,    600,     80,    160,   1690,     20,    710,    810,    250,    970,
        920,    310,    200,    370,    250,    330,    420,    700,   1100,   1340) // F

ROW(    800,    440,    710,    570,     20,   1600,    510,     80,    540,    110,
        260,    760,    480,    570,    570,    770,    540,    260,    230,    180) // G

ROW(    600,    550,    770,    770,    710,    510,   1710,    410,    800,    460,
        570,    930,    560,    950,    810,    700,    700,    430,    590,   1050) // H

ROW(    600,    530,    160,    350,    810,     80,    410,   1370,    430,   1080,
       1070,    340,    350,    460,    370,    450,    660,   1180,    440,    580) // I

ROW(    660,    300,    770,    910,    250,    540,    800,    430,   1330,    440,
        570,    860,    620,    980,   1130,    740,    760,    480,    240,    430) // K

ROW(    560,    490,    130,    330,    970,    110,    460,   1080,    440,   1350,
       1120,    300,    430,    540,    430,    420,    540,    950,    580,    670) // L

ROW(    660,    590,    280,    470,    920,    260,    570,   1070,    570,   1120,
       1540,    420,    360,    660,    490,    550,    670,    920,    560,    650) // M

ROW(    660,    470,   1040,    830,    310,    760,    930,    340,    860,    300,
        420,   1430,    560,    830,    740,    890,    810,    400,    230,    560) // N

ROW(    770,    260,    590,    640,    200,    480,    560,    350,    620,    430,
        360,    560,   1740,    700,    570,    780,    740,    460,     40,    300) // P

ROW(    710,    360,    840,   1010,    370,    570,    950,    460,    980,    540,
        660,    830,    700,   1340,    950,    760,    740,    510,    380,    490) // Q

ROW(    620,    430,    620,    750,    250,    570,    810,    370,   1130,    430,
        490,    740,    570,    950,   1490,    690,    690,    440,    540,    490) // R

ROW(    940,    760,    780,    750,    330,    770,    700,    450,    740,    420,
        550,    890,    780,    760,    690,   1220,   1010,    580,    310,    500) // S

ROW(    830,    640,    720,    700,    420,    540,    700,    660,    760,    540,
        670,    810,    740,    740,    690,   1010,   1280,    780,    240,    460) // T

ROW(    790,    720,    290,    480,    700,    260,    430,   1180,    480,    950,
        920,    400,    460,    510,    440,    580,    780,   1310,    330,    540) // V

ROW(    230,    570,      0,    140,   1100,    230,    590,    440,    240,    580,
        560,    230,     40,    380,    540,    310,    240,    330,   2360,   1160) // W

ROW(    410,    650,    360,    340,   1340,    180,   1050,    580,    430,    670,
        650,    560,    300,    490,    490,    500,    460,    540,   1160,   1780) // Y
	};

double Gonnet16[21][21] =
	{
//       A      C      D      E      F      G      H      I      K      L
//       M      N      P      Q      R      S      T      V      W      Y
ROW(   124,    81,    67,    74,    40,    80,    60,    60,    66,    56,
        66,    66,    77,    71,    62,    94,    83,    79,    23,    41) // A

ROW(    81,   213,    25,    26,    60,    44,    55,    53,    30,    49,
        59,    47,    26,    36,    43,    76,    64,    72,    57,    65) // C

ROW(    67,    25,   148,   112,     8,    71,    77,    16,    77,    13,
        28,   104,    59,    84,    62,    78,    72,    29,     0,    36) // D

ROW(    74,    26,   112,   137,    16,    57,    77,    35,    91,    33,
        47,    83,    64,   101,    75,    75,    70,    48,    14,    34) // E

ROW(    40,    60,     8,    16,   169,     2,    71,    81,    25,    97,
        92,    31,    20,    37,    25,    33,    42,    70,   110,   134) // F

ROW(    80,    44,    71,    57,     2,   160,    51,     8,    54,    11,
        26,    76,    48,    57,    57,    77,    54,    26,    23,    18) // G

ROW(    60,    55,    77,    77,    71,    51,   171,    41,    80,    46,
        57,    93,    56,    95,    81,    70,    70,    43,    59,   105) // H

ROW(    60,    53,    16,    35,    81,     8,    41,   137,    43,   108,
       107,    34,    35,    46,    37,    45,    66,   118,    44,    58) // I

ROW(    66,    30,    77,    91,    25,    54,    80,    43,   133,    44,
        57,    86,    62,    98,   113,    74,    76,    48,    24,    43) // K

ROW(    56,    49,    13,    33,    97,    11,    46,   108,    44,   135,
       112,    30,    43,    54,    43,    42,    54,    95,    58,    67) // L

ROW(    66,    59,    28,    47,    92,    26,    57,   107,    57,   112,
       154,    42,    36,    66,    49,    55,    67,    92,    56,    65) // M

ROW(    66,    47,   104,    83,    31,    76,    93,    34,    86,    30,
        42,   143,    56,    83,    74,    89,    81,    40,    23,    56) // N

ROW(    77,    26,    59,    64,    20,    48,    56,    35,    62,    43,
        36,    56,   174,    70,    57,    78,    74,    46,     4,    30) // P

ROW(    71,    36,    84,   101,    37,    57,    95,    46,    98,    54,
        66,    83,    70,   134,    95,    76,    74,    51,    38,    49) // Q

ROW(    62,    43,    62,    75,    25,    57,    81,    37,   113,    43,
        49,    74,    57,    95,   149,    69,    69,    44,    54,    49) // R

ROW(    94,    76,    78,    75,    33,    77,    70,    45,    74,    42,
        55,    89,    78,    76,    69,   122,   101,    58,    31,    50) // S

ROW(    83,    64,    72,    70,    42,    54,    70,    66,    76,    54,
        67,    81,    74,    74,    69,   101,   128,    78,    24,    46) // T

ROW(    79,    72,    29,    48,    70,    26,    43,   118,    48,    95,
        92,    40,    46,    51,    44,    58,    78,   131,    33,    54) // V

ROW(    23,    57,     0,    14,   110,    23,    59,    44,    24,    58,
        56,    23,     4,    38,    54,    31,    24,    33,   236,   116) // W

ROW(    41,    65,    36,    34,   134,    18,   105,    58,    43,    67,
        65,    56,    30,    49,    49,    50,    46,    54,   116,   178) // Y
	};

static double Gonnet250[20][20] =
	{
//        A       C       D       E       F       G       H       I       K       L
//        M       N       P       Q       R       S       T       V       W       Y
ROW(    760,    570,    490,    520,    290,    570,    440,    440,    480,    400,
        450,    490,    550,    500,    460,    630,    580,    530,    160,    300) // A

ROW(    570,   1670,    200,    220,    440,    320,    390,    410,    240,    370,
        430,    340,    210,    280,    300,    530,    470,    520,    420,    470) // C

ROW(    490,    200,    990,    790,     70,    530,    560,    140,    570,    120,
        220,    740,    450,    610,    490,    570,    520,    230,      0,    240) // D

ROW(    520,    220,    790,    880,    130,    440,    560,    250,    640,    240,
        320,    610,    470,    690,    560,    540,    510,    330,     90,    250) // E

ROW(    290,    440,     70,    130,   1220,      0,    510,    620,    190,    720,
        680,    210,    140,    260,    200,    240,    300,    530,    880,   1030) // F

ROW(    570,    320,    530,    440,      0,   1180,    380,     70,    410,     80,
        170,    560,    360,    420,    420,    560,    410,    190,    120,    120) // G

ROW(    440,    390,    560,    560,    510,    380,   1120,    300,    580,    330,
        390,    640,    410,    640,    580,    500,    490,    320,    440,    740) // H

ROW(    440,    410,    140,    250,    620,     70,    300,    920,    310,    800,
        770,    240,    260,    330,    280,    340,    460,    830,    340,    450) // I

ROW(    480,    240,    570,    640,    190,    410,    580,    310,    840,    310,
        380,    600,    460,    670,    790,    530,    530,    350,    170,    310) // K

ROW(    400,    370,    120,    240,    720,     80,    330,    800,    310,    920,
        800,    220,    290,    360,    300,    310,    390,    700,    450,    520) // L

ROW(    450,    430,    220,    320,    680,    170,    390,    770,    380,    800,
        950,    300,    280,    420,    350,    380,    460,    680,    420,    500) // M

ROW(    490,    340,    740,    610,    210,    560,    640,    240,    600,    220,
        300,    900,    430,    590,    550,    610,    570,    300,    160,    380) // N

ROW(    550,    210,    450,    470,    140,    360,    410,    260,    460,    290,
        280,    430,   1280,    500,    430,    560,    530,    340,     20,    210) // P

ROW(    500,    280,    610,    690,    260,    420,    640,    330,    670,    360,
        420,    590,    500,    790,    670,    540,    520,    370,    250,    350) // Q

ROW(    460,    300,    490,    560,    200,    420,    580,    280,    790,    300,
        350,    550,    430,    670,    990,    500,    500,    320,    360,    340) // R

ROW(    630,    530,    570,    540,    240,    560,    500,    340,    530,    310,
        380,    610,    560,    540,    500,    740,    670,    420,    190,    330) // S

ROW(    580,    470,    520,    510,    300,    410,    490,    460,    530,    390,
        460,    570,    530,    520,    500,    670,    770,    520,    170,    330) // T

ROW(    530,    520,    230,    330,    530,    190,    320,    830,    350,    700,
        680,    300,    340,    370,    320,    420,    520,    860,    260,    410) // V

ROW(    160,    420,      0,     90,    880,    120,    440,    340,    170,    450,
        420,    160,     20,    250,    360,    190,    170,    260,   1940,    930) // W

ROW(    300,    470,    240,    250,   1030,    120,    740,    450,    310,    520,
        500,    380,    210,    350,    340,    330,    330,    410,    930,   1300) // Y
	};

static double Gonnet350[20][20] =
	{
//        A       C       D       E       F       G       H       I       K       L
//        M       N       P       Q       R       S       T       V       W       Y
ROW(    450,    390,    350,    360,    210,    400,    310,    310,    340,    280,
        310,    350,    380,    350,    330,    410,    390,    350,    110,    210) // A

ROW(    390,   1280,    160,    180,    320,    230,    270,    300,    190,    280,
        310,    240,    170,    210,    220,    360,    330,    370,    310,    340) // C

ROW(    350,    160,    640,    540,     50,    390,    400,    110,    410,    100,
        160,    500,    330,    430,    370,    400,    370,    170,      0,    170) // D

ROW(    360,    180,    540,    550,    100,    330,    390,    180,    440,    170,
        220,    440,    350,    460,    410,    380,    360,    230,     60,    180) // E

ROW(    210,    320,     50,    100,    860,      0,    360,    460,    140,    530,
        490,    150,    100,    190,    150,    170,    220,    400,    700,    770) // F

ROW(    400,    230,    390,    330,      0,    860,    280,     60,    310,     50,
        120,    400,    280,    310,    310,    400,    300,    140,     50,     80) // G

ROW(    310,    270,    400,    390,    360,    280,    680,    220,    400,    240,
        270,    430,    300,    420,    410,    350,    340,    240,    320,    500) // H

ROW(    310,    300,    110,    180,    460,     60,    220,    620,    220,    570,
        540,    170,    190,    240,    200,    240,    320,    570,    260,    340) // I

ROW(    340,    190,    410,    440,    140,    310,    400,    220,    530,    210,
        260,    420,    330,    450,    530,    370,    370,    250,    120,    210) // K

ROW(    280,    280,    100,    170,    530,     50,    240,    570,    210,    630,
        560,    160,    200,    240,    210,    220,    280,    510,    340,    400) // L

ROW(    310,    310,    160,    220,    490,    120,    270,    540,    260,    560,
        580,    210,    210,    280,    240,    260,    310,    490,    320,    370) // M

ROW(    350,    240,    500,    440,    150,    400,    430,    170,    420,    160,
        210,    550,    320,    410,    390,    410,    390,    220,    110,    250) // N

ROW(    380,    170,    330,    350,    100,    280,    300,    190,    330,    200,
        210,    320,    910,    350,    310,    390,    370,    240,     10,    150) // P

ROW(    350,    210,    430,    460,    190,    310,    420,    240,    450,    240,
        280,    410,    350,    470,    450,    370,    360,    260,    160,    240) // Q

ROW(    330,    220,    370,    410,    150,    310,    410,    200,    530,    210,
        240,    390,    310,    450,    630,    360,    350,    230,    230,    230) // R

ROW(    410,    360,    400,    380,    170,    400,    350,    240,    370,    220,
        260,    410,    390,    370,    360,    450,    430,    290,    130,    230) // S

ROW(    390,    330,    370,    360,    220,    300,    340,    320,    370,    280,
        310,    390,    370,    360,    350,    430,    460,    350,    120,    230) // T

ROW(    350,    370,    170,    230,    400,    140,    240,    570,    250,    510,
        490,    220,    240,    260,    230,    290,    350,    560,    210,    310) // V

ROW(    110,    310,      0,     60,    700,     50,    320,    260,    120,    340,
        320,    110,     10,    160,    230,    130,    120,    210,   1590,    740) // W

ROW(    210,    340,    170,    180,    770,     80,    500,    340,    210,    400,
        370,    250,    150,    240,    230,    230,    230,    310,    740,    920) // Y
	};

const t_ROW *GetGonnetMatrix(unsigned N)
	{
	switch (N)
		{
	case 80:
		return Gonnet80;
	case 120:
		return Gonnet120;
	//case 16:
	//	return Gonnet16;
	//case 160:
	//	return Gonnet160;
	case 250:
		return Gonnet250;
	case 350:
		return Gonnet350;
		}
	Quit("Invalid Gonnet%u", N);
	return 0;
	}

//SCORE GetGonnetGapOpen(unsigned N)
//	{
//	switch (N)
//		{
//	case 80:
//		return -639;
//	case 120:
//		return -863;
//	case 160:
//		return -611;
//	case 250:
//		return -308;
//	case 350:
//		return -158;
//		}
//	Quit("Invalid Gonnet%u", N);
//	return 0;
//	}

SCORE GetGonnetGapOpen(unsigned N)
	{
	switch (N)
		{
	case 80:
		return -1000;
	case 120:
		return -800;
	case 160:
		return -700;
	case 250:
		return -200;
	case 350:
		return -175;
		}
	Quit("Invalid Gonnet%u", N);
	return 0;
	}

SCORE GetGonnetGapExtend(unsigned N)
	{
	switch (N)
		{
	case 80:
		return 350;
	case 120:
		return 200;
	case 160:
		return 175;
	case 250:
		return 20;
	case 350:
		return 20;
		}
	Quit("Invalid Gonnet%u", N);
	return 0;
	}

//double GonnetLookup[400][400];
//
//static bool InitGonnetLookup()
//	{
//	for (unsigned i = 0; i < 400; ++i)
//		{
//		const unsigned A1 = i/20;
//		const unsigned A2 = i%20;
//		for (unsigned j = 0; j <= i; ++j)
//			{
//			const unsigned B1 = j/20;
//			const unsigned B2 = j%20;
//			
//			const double s00 = Gonnet16[A1][B1];
//			const double s01 = Gonnet16[A1][B2];
//			const double s10 = Gonnet16[A2][B1];
//			const double s11 = Gonnet16[A2][B2];
//
//			GonnetLookup[i][j] = GonnetLookup[j][i] = (s00 + s01 + s10 + s11)/4;
//			}
//		}
//	return true;
//	}
//
//static bool bGonnetLookupInitialized = InitGonnetLookup();
@@ -0,0 +1,12 @@
#ifndef Gonnet_h
#define Gonnet_h

typedef double t_ROW[20];

const t_ROW *GetGonnetMatrix(unsigned N);
SCORE GetGonnetGapOpen(unsigned N);
SCORE GetGonnetGapExtend(unsigned N);

extern double GonnetLookup[400][400];

#endif	// Gonnet_h
@@ -0,0 +1,84 @@
#include "muscle.h"
#include "msa.h"

/***
Compute Henikoff weights.
Steven Henikoff and Jorja G. Henikoff (1994), Position-based sequence weights.
J. Mol. Biol., 243(4):574-578.

Award each different residue an equal share of the weight, and then to divide up
that weight equally among the sequences sharing the same residue. So if in a
position of a multiple alignment, r different residues are represented, a residue
represented in only one sequence contributes a score of 1/r to that sequence, whereas a
residue represented in s sequences contributes a score of 1/rs to each of the s
sequences. For each sequence, the contributions from each position are summed to give
a sequence weight.

See also HenikoffWeightPB.
***/

void MSA::CalcHenikoffWeightsCol(unsigned uColIndex) const
	{
	const unsigned uSeqCount = GetSeqCount();

// Compute letter counts in this column
	unsigned uLetterCount[MAX_ALPHA];
	memset(uLetterCount, 0, sizeof(uLetterCount));
	unsigned uDifferentLetterCount = 0;
	for (unsigned uSeqIndex = 0; uSeqIndex < uSeqCount; ++uSeqIndex)
		{
		unsigned uLetter = GetLetterEx(uSeqIndex, uColIndex);
		if (uLetter >= 20)
			continue;
		unsigned uNewCount = uLetterCount[uLetter] + 1;
		uLetterCount[uLetter] = uNewCount;
		if (1 == uNewCount)
			++uDifferentLetterCount;
		}

// Compute weight contributions
	for (unsigned uSeqIndex = 0; uSeqIndex < uSeqCount; ++uSeqIndex)
		{
		unsigned uLetter = GetLetterEx(uSeqIndex, uColIndex);
		if (uLetter >= 20)
			continue;
		const unsigned uCount = uLetterCount[uLetter];
		unsigned uDenom = uCount*uDifferentLetterCount;
		if (uDenom == 0)
			continue;
		m_Weights[uSeqIndex] += (WEIGHT) (1.0/uDenom);
		}
	}

void MSA::SetHenikoffWeights() const
	{
	const unsigned uColCount = GetColCount();
	const unsigned uSeqCount = GetSeqCount();

	if (0 == uSeqCount)
		return;
	else if (1 == uSeqCount)
		{
		m_Weights[0] = (WEIGHT) 1.0;
		return;
		}
	else if (2 == uSeqCount)
		{
		m_Weights[0] = (WEIGHT) 0.5;
		m_Weights[1] = (WEIGHT) 0.5;
		return;
		}

	for (unsigned uSeqIndex = 0; uSeqIndex < uSeqCount; ++uSeqIndex)
		m_Weights[uSeqIndex] = 0.0;

	for (unsigned uColIndex = 0; uColIndex < uColCount; ++uColIndex)
		CalcHenikoffWeightsCol(uColIndex);

// Set all-gap seqs weight to 0
	for (unsigned uSeqIndex = 0; uSeqIndex < uSeqCount; ++uSeqIndex)
		if (IsGapSeq(uSeqIndex))
			m_Weights[uSeqIndex] = 0.0;

	Normalize(m_Weights, uSeqCount);
	}
@@ -0,0 +1,124 @@
#include "muscle.h"
#include "msa.h"

/***
Compute Henikoff weights.
Steven Henikoff and Jorja G. Henikoff (1994), Position-based sequence weights.
J. Mol. Biol., 243(4):574-578.

Award each different residue an equal share of the weight, and then to divide up
that weight equally among the sequences sharing the same residue. So if in a
position of a multiple alignment, r different residues are represented, a residue
represented in only one sequence contributes a score of 1/r to that sequence, whereas a
residue represented in s sequences contributes a score of 1/rs to each of the s
sequences. For each sequence, the contributions from each position are summed to give
a sequence weight.

Here we use the variant from PSI-BLAST, which (a) treats gaps as a 21st letter,
and (b) ignores columns that are perfectly conserved.

>>> WARNING -- I SUSPECT THIS DOESN'T WORK CORRECTLY <<<
***/

void MSA::CalcHenikoffWeightsColPB(unsigned uColIndex) const
	{
	const unsigned uSeqCount = GetSeqCount();

// Compute letter counts in this column
	unsigned uLetterCount[MAX_ALPHA+1];
	memset(uLetterCount, 0, (MAX_ALPHA+1)*sizeof(unsigned));
	unsigned uLetter;
	for (unsigned uSeqIndex = 0; uSeqIndex < uSeqCount; ++uSeqIndex)
		{
		if (IsGap(uSeqIndex, uColIndex) || IsWildcard(uSeqIndex, uColIndex))
			uLetter = MAX_ALPHA;
		else
			uLetter = GetLetter(uSeqIndex, uColIndex);
		++(uLetterCount[uLetter]);
		}

// Check for special case of perfect conservation
	for (unsigned uLetter = 0; uLetter < MAX_ALPHA+1; ++uLetter)
		{
		unsigned uCount = uLetterCount[uLetter];
		if (uCount > 0)
			{
		// Perfectly conserved?
			if (uCount == uSeqCount)
				return;
			else
			// If count > 0 but less than nr. sequences, can't be conserved
				break;
			}
		}

// Compute weight contributions
	for (unsigned uSeqIndex = 0; uSeqIndex < uSeqCount; ++uSeqIndex)
		{
		unsigned uLetter;
		if (IsGap(uSeqIndex, uColIndex) || IsWildcard(uSeqIndex, uColIndex))
			uLetter = MAX_ALPHA;
		else
			uLetter = GetLetter(uSeqIndex, uColIndex);
		const unsigned uCount = uLetterCount[uLetter];
		m_Weights[uSeqIndex] += (WEIGHT) (1.0/uCount);
		}
	}

bool MSA::IsGapSeq(unsigned uSeqIndex) const
	{
	const unsigned uColCount = GetColCount();
	for (unsigned uColIndex = 0; uColIndex < uColCount; ++uColIndex)
		if (!IsGap(uSeqIndex, uColIndex))
			return false;
	return true;
	}

void MSA::SetUniformWeights() const
	{
	const unsigned uSeqCount = GetSeqCount();
	if (0 == uSeqCount)
		return;

	const WEIGHT w = (WEIGHT) (1.0 / uSeqCount);
	for (unsigned uSeqIndex = 0; uSeqIndex < uSeqCount; ++uSeqIndex)
		m_Weights[uSeqIndex] = w;
	}

void MSA::SetHenikoffWeightsPB() const
	{
	const unsigned uColCount = GetColCount();
	const unsigned uSeqCount = GetSeqCount();

	if (0 == uSeqCount)
		return;
	else if (1 == uSeqCount)
		{
		m_Weights[0] = 1.0;
		return;
		}
	else if (2 == uSeqCount)
		{
		m_Weights[0] = (WEIGHT) 0.5;
		m_Weights[1] = (WEIGHT) 0.5;
		return;
		}

	for (unsigned uSeqIndex = 0; uSeqIndex < uSeqCount; ++uSeqIndex)
		m_Weights[uSeqIndex] = 0.0;

	for (unsigned uColIndex = 0; uColIndex < uColCount; ++uColIndex)
		CalcHenikoffWeightsColPB(uColIndex);

// Set all-gap seqs weight to 0
	for (unsigned uSeqIndex = 0; uSeqIndex < uSeqCount; ++uSeqIndex)
		if (IsGapSeq(uSeqIndex))
			m_Weights[uSeqIndex] = 0.0;

// Check for special case of identical sequences, which will cause all
// columns to be skipped becasue they're perfectly conserved.
	if (VectorIsZero(m_Weights, uSeqCount))
		VectorSet(m_Weights, uSeqCount, 1.0);

	Normalize(m_Weights, uSeqCount);
	}
@@ -0,0 +1,136 @@
#include "muscle.h"
#include <stdio.h>
#include <ctype.h>
#include "msa.h"
#include "textfile.h"

const unsigned uCharsPerLine = 60;
const int MIN_NAME = 10;
const int MAX_NAME = 32;

extern void AssignColors(const MSA &a, int **Colors);

static int **MakeColors(const MSA &a)
	{
	const unsigned uSeqCount = a.GetSeqCount();
	const unsigned uColCount = a.GetColCount();

	int **Colors = new int *[uSeqCount];
	for (unsigned i = 0; i < uSeqCount; ++i)
		{
		Colors[i] = new int[uColCount];
		memset(Colors[i], 0, uColCount*sizeof(int));
		}
	AssignColors(a, Colors);
	return Colors;
	}

static void ChangeColor(TextFile &File, int From, int To)
	{
	if (From == To)
		return;

#define	COLOR_WHITE		"FFFFFF"
#define	COLOR_GRAY		"C0C0C0"
#define	COLOR_BLACK		"000000"
#define COLOR_RED		"FF0000"
#define COLOR_GREEN		"00FF00"
#define COLOR_BLUE		"5590FF"
#define COLOR_LIGHTBLUE	"77FFFF"

#define X(c)	File.PutString("</SPAN><SPAN STYLE=\"background-color:#" c "\">");
	switch (To)
		{
	case 0:
		X(COLOR_WHITE)
		break;
	case 1:
		X(COLOR_GRAY)
		break;
	case 2:
		X(COLOR_BLUE)
		break;
	case 3:
		X(COLOR_LIGHTBLUE)
		break;
		}
	}

#define COLOR_WINDOW "FFEEE0"

void MSA::ToHTMLFile(TextFile &File) const
	{
	File.PutString("<HTML>\n");
	File.PutString("<BODY BGCOLOR=\"#" COLOR_WINDOW "\">\n");
	File.PutString("<PRE>");

	int **Colors = MakeColors(*this);

	int iLongestNameLength = 0;
	for (unsigned uSeqIndex = 0; uSeqIndex < GetSeqCount(); ++uSeqIndex)
		{
		const char *ptrName = GetSeqName(uSeqIndex);
		const char *ptrBlank = strchr(ptrName, ' ');
		int iLength;
		if (0 != ptrBlank)
			iLength = (int) (ptrBlank - ptrName);
		else
			iLength = (int) strlen(ptrName);
		if (iLength > iLongestNameLength)
			iLongestNameLength = iLength;
		}
	if (iLongestNameLength > MAX_NAME)
		iLongestNameLength = MAX_NAME;
	if (iLongestNameLength < MIN_NAME)
		iLongestNameLength = MIN_NAME;

	unsigned uLineCount = (GetColCount() - 1)/uCharsPerLine + 1;
	int CurrentColor = -1;
	for (unsigned uLineIndex = 0; uLineIndex < uLineCount; ++uLineIndex)
		{
		File.PutString("\n");
		unsigned uStartColIndex = uLineIndex*uCharsPerLine;
		unsigned uEndColIndex = uStartColIndex + uCharsPerLine - 1;
		if (uEndColIndex >= GetColCount())
			uEndColIndex = GetColCount() - 1;
		char Name[MAX_NAME+1];
		for (unsigned uSeqIndex = 0; uSeqIndex < GetSeqCount(); ++uSeqIndex)
			{
			const char *ptrName = GetSeqName(uSeqIndex);
			const char *ptrBlank = strchr(ptrName, ' ');
			int iLength;
			if (0 != ptrBlank)
				iLength = (int) (ptrBlank - ptrName);
			else
				iLength = (int) strlen(ptrName);
			if (iLength > MAX_NAME)
				iLength = MAX_NAME;
			memset(Name, ' ', MAX_NAME);
			memcpy(Name, ptrName, iLength);
			Name[iLongestNameLength] = 0;

//			File.PutString("<FONT COLOR=\"#000000\">");
			CurrentColor = -1;
			File.PutString("<SPAN STYLE=\"background-color:#" COLOR_WINDOW "\">");
			File.PutFormat("%s      ", Name);
			File.PutString("<SPAN STYLE=\"background-color:#FFFFFF\">");
			for (unsigned uColIndex = uStartColIndex; uColIndex <= uEndColIndex;
			  ++uColIndex)
				{
				const int Color = Colors[uSeqIndex][uColIndex];
				ChangeColor(File, CurrentColor, Color);
				CurrentColor = Color;
				const char c = GetChar(uSeqIndex, uColIndex);
				if (Color == 0)
					File.PutFormat("%c", tolower(c));
				else
					File.PutFormat("%c", toupper(c));
				}
			File.PutString("\n");
			}
		}
	File.PutString("</SPAN>\n");
	File.PutString("</PRE>\n");
	File.PutString("</BODY>\n");
	File.PutString("</HTML>\n");
	}
@@ -0,0 +1,42 @@
#include "muscle.h"
#include "profile.h"

extern void TomHydro(ProfPos *Prof, unsigned Length);

// Apply hydrophobicity heuristic to a profile
void Hydro(ProfPos *Prof, unsigned uLength)
	{
	if (ALPHA_Amino != g_Alpha)
		return;

	if (g_bTomHydro)
		{
		TomHydro(Prof, uLength);
		return;
		}

	if (0 == g_uHydrophobicRunLength)
		return;

	if (uLength <= g_uHydrophobicRunLength)
		return;

	unsigned uRunLength = 0;
	unsigned L2 = g_uHydrophobicRunLength/2;
	for (unsigned uColIndex = L2; uColIndex < uLength - L2; ++uColIndex)
		{
		ProfPos &PP = Prof[uColIndex];
		bool bHydro = IsHydrophobic(PP.m_fcCounts);
		if (bHydro)
			{
			++uRunLength;
			if (uRunLength >= g_uHydrophobicRunLength)
				{
				Prof[uColIndex-L2].m_scoreGapOpen *= (SCORE) g_dHydroFactor;
				Prof[uColIndex-L2].m_scoreGapClose *= (SCORE) g_dHydroFactor;
				}
			}
		else
			uRunLength = 0;
		}
	}
@@ -0,0 +1,354 @@
#include "muscle.h"
#include <math.h>

PROB ScoreToProb(SCORE Score)
	{
	if (MINUS_INFINITY >= Score)
		return 0.0;
	return (PROB) pow(2.0, (double) Score/INTSCALE);
	}

//#if	0
//static const double log2e = log2(exp(1.0));
//
//double lnTolog2(double ln)
//	{
//	return ln*log2e;
//	}
//
//double log2(double x)
//	{
//	if (0 == x)
//		return MINUS_INFINITY;
//
//	static const double dInvLn2 = 1.0/log(2.0);
//// Multiply by inverse of log(2) just in case multiplication
//// is faster than division.
//	return log(x)*dInvLn2;
//	}
//#endif

//SCORE ProbToScore(PROB Prob)
//	{
//	if (0.0 == Prob)
//		return MINUS_INFINITY;
////	return (SCORE) floor(INTSCALE*log2(Prob));
//	return (SCORE) log2(Prob);
//	}

WEIGHT DoubleToWeight(double d)
	{
	assert(d >= 0);
	return (WEIGHT) (INTSCALE*d);
	}

double WeightToDouble(WEIGHT w)
	{
	return (double) w / (double) INTSCALE;
	}

SCORE DoubleToScore(double d)
	{
	return (SCORE)(d*(double) INTSCALE);
	}

bool ScoreEq(SCORE s1, SCORE s2)
	{
	return BTEq(s1, s2);
	}

static bool BTEq2(BASETYPE b1, BASETYPE b2)
	{
	double diff = fabs(b1 - b2);
	if (diff < 0.0001)
		return true;
	double sum = fabs(b1) + fabs(b2);
	return diff/sum < 0.005;
	}

bool BTEq(double b1, double b2)
	{
	return BTEq2((BASETYPE) b1, (BASETYPE) b2);
	}

//const double dLn2 = log(2.0);

//// pow2(x)=2^x
//double pow2(double x)
//	{
//	if (MINUS_INFINITY == x)
//		return 0;
//	return exp(x*dLn2);
//	}

//// lp2(x) = log2(1 + 2^-x), x >= 0
//double lp2(double x)
//	{
//	return log2(1 + pow2(-x));
//	}

// SumLog(x, y) = log2(2^x + 2^y)
//SCORE SumLog(SCORE x, SCORE y)
//	{
//	return (SCORE) log2(pow2(x) + pow2(y));
//	}
//
//// SumLog(x, y, z) = log2(2^x + 2^y + 2^z)
//SCORE SumLog(SCORE x, SCORE y, SCORE z)
//	{
//	return (SCORE) log2(pow2(x) + pow2(y) + pow2(z));
//	}
//
//// SumLog(w, x, y, z) = log2(2^w + 2^x + 2^y + 2^z)
//SCORE SumLog(SCORE w, SCORE x, SCORE y, SCORE z)
//	{
//	return (SCORE) log2(pow2(w) + pow2(x) + pow2(y) + pow2(z));
//	}

//SCORE lp2Fast(SCORE x)
//	{
//	assert(x >= 0);
//	const int iTableSize = 1000;
//	const double dRange = 20.0;
//	const double dScale = dRange/iTableSize;
//	static SCORE dValue[iTableSize];
//	static bool bInit = false;
//	if (!bInit)
//		{
//		for (int i = 0; i < iTableSize; ++i)
//			dValue[i] = (SCORE) lp2(i*dScale);
//		bInit = true;
//		}
//	if (x >= dRange)
//		return 0.0;
//	int i = (int) (x/dScale);
//	assert(i >= 0 && i < iTableSize);
//	SCORE dResult = dValue[i];
//	assert(BTEq(dResult, lp2(x)));
//	return dResult;
//	}
//
//// SumLog(x, y) = log2(2^x + 2^y)
//SCORE SumLogFast(SCORE x, SCORE y)
//	{
//	if (MINUS_INFINITY == x)
//		{
//		if (MINUS_INFINITY == y)
//			return MINUS_INFINITY;
//		return y;
//		}
//	else if (MINUS_INFINITY == y)
//		return x;
//
//	SCORE dResult;
//	if (x > y)
//		dResult = x + lp2Fast(x-y);
//	else
//		dResult = y + lp2Fast(y-x);
//	assert(SumLog(x, y) == dResult);
//	return dResult;
//	}
//
//SCORE SumLogFast(SCORE x, SCORE y, SCORE z)
//	{
//	SCORE dResult = SumLogFast(x, SumLogFast(y, z));
//	assert(SumLog(x, y, z) == dResult);
//	return dResult;
//	}

//SCORE SumLogFast(SCORE w, SCORE x, SCORE y, SCORE z)
//	{
//	SCORE dResult = SumLogFast(SumLogFast(w, x), SumLogFast(y, z));
//	assert(SumLog(w, x, y, z) == dResult);
//	return dResult;
//	}

double VecSum(const double v[], unsigned n)
	{
	double dSum = 0.0;
	for (unsigned i = 0; i < n; ++i)
		dSum += v[i];
	return dSum;
	}

void Normalize(PROB p[], unsigned n)
	{
	unsigned i;
	PROB dSum = 0.0;
	for (i = 0; i < n; ++i)
		dSum += p[i];
	if (0.0 == dSum)
		Quit("Normalize, sum=0");
	for (i = 0; i < n; ++i)
		p[i] /= dSum;
	}

void NormalizeUnlessZero(PROB p[], unsigned n)
	{
	unsigned i;
	PROB dSum = 0.0;
	for (i = 0; i < n; ++i)
		dSum += p[i];
	if (0.0 == dSum)
		return;
	for (i = 0; i < n; ++i)
		p[i] /= dSum;
	}

void Normalize(PROB p[], unsigned n, double dRequiredTotal)
	{
	unsigned i;
	double dSum = 0.0;
	for (i = 0; i < n; ++i)
		dSum += p[i];
	if (0.0 == dSum)
		Quit("Normalize, sum=0");
	double dFactor = dRequiredTotal / dSum;
	for (i = 0; i < n; ++i)
		p[i] *= (PROB) dFactor;
	}

bool VectorIsZero(const double dValues[], unsigned n)
	{
	for (unsigned i = 0; i < n; ++i)
		if (dValues[i] != 0.0)
			return false;
	return true;
	}

void VectorSet(double dValues[], unsigned n, double d)
	{
	for (unsigned i = 0; i < n; ++i)
		dValues[i] = d;
	}

bool VectorIsZero(const float dValues[], unsigned n)
	{
	for (unsigned i = 0; i < n; ++i)
		if (dValues[i] != 0.0)
			return false;
	return true;
	}

void VectorSet(float dValues[], unsigned n, float d)
	{
	for (unsigned i = 0; i < n; ++i)
		dValues[i] = d;
	}

double Correl(const double P[], const double Q[], unsigned uCount)
	{
	double dSumP = 0.0;
	double dSumQ = 0.0;
	for (unsigned n = 0; n < uCount; ++n)
		{
		dSumP += P[n];
		dSumQ += Q[n];
		}
	const double dMeanP = dSumP/uCount;
	const double dMeanQ = dSumQ/uCount;

	double dSum1 = 0.0;
	double dSum2 = 0.0;
	double dSum3 = 0.0;
	for (unsigned n = 0; n < uCount; ++n)
		{
		const double dDiffP = P[n] - dMeanP;
		const double dDiffQ = Q[n] - dMeanQ;
		dSum1 += dDiffP*dDiffQ;
		dSum2 += dDiffP*dDiffP;
		dSum3 += dDiffQ*dDiffQ;
		}
	if (0 == dSum1)
		return 0;
	const double dCorrel = dSum1 / sqrt(dSum2*dSum3);
	return dCorrel;
	}

float Correl(const float P[], const float Q[], unsigned uCount)
	{
	float dSumP = 0.0;
	float dSumQ = 0.0;
	for (unsigned n = 0; n < uCount; ++n)
		{
		dSumP += P[n];
		dSumQ += Q[n];
		}
	const float dMeanP = dSumP/uCount;
	const float dMeanQ = dSumQ/uCount;

	float dSum1 = 0.0;
	float dSum2 = 0.0;
	float dSum3 = 0.0;
	for (unsigned n = 0; n < uCount; ++n)
		{
		const float dDiffP = P[n] - dMeanP;
		const float dDiffQ = Q[n] - dMeanQ;
		dSum1 += dDiffP*dDiffQ;
		dSum2 += dDiffP*dDiffP;
		dSum3 += dDiffQ*dDiffQ;
		}
	if (0 == dSum1)
		return 0;
	const float dCorrel = dSum1 / (float) sqrt(dSum2*dSum3);
	return dCorrel;
	}

// Simple (but slow) function to compute Pearson ranks
// that allows for ties. Correctness and simplicity
// are priorities over speed here.
void Rank(const float P[], float Ranks[], unsigned uCount)
	{
	for (unsigned n = 0; n < uCount; ++n)
		{
		unsigned uNumberGreater = 0;
		unsigned uNumberEqual = 0;
		unsigned uNumberLess = 0;
		double dValue = P[n];
		for (unsigned i = 0; i < uCount; ++i)
			{
			double v = P[i];
			if (v == dValue)
				++uNumberEqual;
			else if (v < dValue)
				++uNumberLess;
			else
				++uNumberGreater;
			}
		assert(uNumberEqual >= 1);
		assert(uNumberEqual + uNumberLess + uNumberGreater == uCount);
		Ranks[n] = (float) (1 + uNumberLess + (uNumberEqual - 1)/2.0);
		}
	}

void Rank(const double P[], double Ranks[], unsigned uCount)
	{
	for (unsigned n = 0; n < uCount; ++n)
		{
		unsigned uNumberGreater = 0;
		unsigned uNumberEqual = 0;
		unsigned uNumberLess = 0;
		double dValue = P[n];
		for (unsigned i = 0; i < uCount; ++i)
			{
			double v = P[i];
			if (v == dValue)
				++uNumberEqual;
			else if (v < dValue)
				++uNumberLess;
			else
				++uNumberGreater;
			}
		assert(uNumberEqual >= 1);
		assert(uNumberEqual + uNumberLess + uNumberGreater == uCount);
		Ranks[n] = (double) (1 + uNumberLess + (uNumberEqual - 1)/2.0);
		}
	}

FCOUNT SumCounts(const FCOUNT Counts[])
	{
	FCOUNT Sum = 0;
	for (int i = 0; i < 20; ++i)
		Sum += Counts[i];
	return Sum;
	}
@@ -0,0 +1,210 @@
// IntMath.h: Header for doing fractional math with integers for speed.

#ifndef IntMath_h
#define	IntMath_h

typedef float BASETYPE;
//typedef double BASETYPE;

// Scaling factor used to store certain floating point
// values as integers to a few significant figures.
//const int INTSCALE = 1000;
const int INTSCALE = 1;

// Type for a probability in range 0.0 to 1.0.
typedef BASETYPE PROB;

// Type for an log-odds integer score.
// Stored as log2(PROB)*INTSCALE.
//typedef int	SCORE;
typedef BASETYPE SCORE;

// Type for a weight.
// Stored as w*INTSCALE where w is in range 0.0 to 1.0.
//typedef unsigned WEIGHT;
typedef BASETYPE WEIGHT;

// Type for a fractional weighted count stored as n*WEIGHT/N
// where n=measured count (integer >= 0) and N is total for
// the distribution (e.g., n=number of residues of a given
// type in a column, N=number of residues in the column).
// Hence values in an FCOUNT variable range from 0..INTSCALE
// as an integer, representing "true" values 0.0 to 1.0.
//typedef unsigned FCOUNT;
typedef BASETYPE FCOUNT;

// Representation of -infinity. Value should
// be large and negative, but not so large
// that adding a few of them overflows.
// TODO: Multiplied by 10 to work around bug
// when aligning Bali 1ckaA in ref4, which is
// so long that B->Mmax got to -infinity, causing
// traceback to fail.
//const int MINUS_INFINITY = -10000000;
const BASETYPE MINUS_INFINITY = (BASETYPE) -1e37;
const BASETYPE PLUS_INFINITY = (BASETYPE) 1e37;

// Probability relative to a null model
typedef double RPROB;

PROB ScoreToProb(SCORE Score);
SCORE ProbToScore(PROB Prob);
SCORE DoubleToScore(double d);
WEIGHT DoubleToWeight(double d);
double WeightToDouble(WEIGHT w);
SCORE MulScoreWeight(SCORE Score, WEIGHT Weight);
bool ScoreEq(SCORE s1, SCORE s2);
bool BTEq(double b1, double b2);

static double ScoreToDouble(SCORE Score)
	{
	return (double) Score / (double) INTSCALE;
	}

#if	0
// In-line assembler for Result = (x*y)/z
// Note that imul and idiv will do 64-bit arithmetic
// on 32-bit operands, so this shouldn't overflow
// Can't write this efficiently in C/C++ (would
// often overlow 32 bits).
#define MulDivAssign(Result, x, y, z)	\
	{									\
	int X = (x);						\
	int Y = (y);						\
	int Z = (z);						\
	_asm mov	eax,X					\
	_asm imul	Y						\
	_asm mov	ecx,Z					\
	_asm idiv	ecx						\
	_asm mov	Result,eax				\
	}
#else
#define MulDivAssign(Result, x, y, z)	Result = (((x)*(y))/(z))
#endif

#define	MulScoreWeight(r, s, w)		MulDivAssign(r, s, w, INTSCALE)
#define MulWeightWCount(r, wt, wc)	MulDivAssign(r, wt, wc, INTSCALE)
#define MulFCountScore(r, fc, sc)	MulDivAssign(r, fc, sc, INTSCALE)

#if	_DEBUG

static inline SCORE Add2(SCORE a, SCORE b)
	{
	if (MINUS_INFINITY == a)
		return MINUS_INFINITY;
	if (MINUS_INFINITY == b)
		return MINUS_INFINITY;
	SCORE sum = a + b;
	if (sum < MINUS_INFINITY)
		return MINUS_INFINITY;
//	assert(sum < OVERFLOW_WARN);
	return sum;
	}

static inline SCORE Add3(SCORE a, SCORE b, SCORE c)
	{
	return Add2(Add2(a, b), c);
	}

static inline SCORE Add4(SCORE a, SCORE b, SCORE c, SCORE d)
	{
	return Add2(Add2(a, b), Add2(c, d));
	}

static inline SCORE Add5(SCORE a, SCORE b, SCORE c, SCORE d, SCORE e)
	{
	return Add3(Add2(a, b), Add2(c, d), e);
	}

static inline SCORE Add6(SCORE a, SCORE b, SCORE c, SCORE d, SCORE e, SCORE f)
	{
	return Add3(Add2(a, b), Add2(c, d), Add2(e, f));
	}

static inline SCORE Add7(SCORE a, SCORE b, SCORE c, SCORE d, SCORE e, SCORE f, SCORE g)
	{
	return Add4(Add2(a, b), Add2(c, d), Add2(e, f), g);
	}

static inline SCORE Mul2(SCORE a, SCORE b)
	{
	if (MINUS_INFINITY == a)
		return MINUS_INFINITY;
	if (MINUS_INFINITY == b)
		return MINUS_INFINITY;
	//__int64 prod = (__int64) a * (__int64) b;
	//assert((SCORE) prod == prod);
	//return (SCORE) prod;
	return a*b;
	}

static inline SCORE Sub2(SCORE a, SCORE b)
	{
	if (MINUS_INFINITY == a)
		return MINUS_INFINITY;
	if (MINUS_INFINITY == b)
		return MINUS_INFINITY;
	SCORE diff = a - b;
	if (diff < MINUS_INFINITY)
		return MINUS_INFINITY;
//	assert(diff < OVERFLOW_WARN);
	return diff;
	}

static inline SCORE Div2(SCORE a, int b)
	{
	if (MINUS_INFINITY == a)
		return MINUS_INFINITY;
	return a/b;
	}

//static inline SCORE MulScoreWeight(SCORE s, WEIGHT w)
//	{
//	SCORE Prod = s*(SCORE) w;
//	assert(Prod < OVERFLOW_WARN);
//	extern void Log(const char Format[], ...);
//	if (Prod/(SCORE) w != s)
//		Log("**WARRNING MulScoreWeight Prod=%d w=%d Prod/w=%d s=%d\n",
//		  Prod,
//		  w,
//		  Prod/(SCORE) w,
//		  s);
//	assert(Prod/ (SCORE) w == s);
//	return Prod/INTSCALE;
//	}
//
//static inline WCOUNT MulWeightWCount(WEIGHT wt, WCOUNT wc)
//	{
//	return (wt*wc)/INTSCALE;
//	}

#else
#define	Add2(a, b)					((a) + (b))
#define Sub2(a, b)					((MINUS_INFINITY == (a)) ? MINUS_INFINITY : ((a) - (b)))
#define Div2(a, b)					((MINUS_INFINITY == (a)) ? MINUS_INFINITY : ((a) / (b)))
#define	Add3(a, b, c)				((a) + (b) + (c))
#define	Add4(a, b, c, d)			((a) + (b) + (c) + (d))
#define	Add5(a, b, c, d, e)			((a) + (b) + (c) + (d) + (e))
#define	Add6(a, b, c, d, e, f)		((a) + (b) + (c) + (d) + (e) + (f))
#define	Add7(a, b, c, d, e, f, g)	((a) + (b) + (c) + (d) + (e) + (f) + (g))
//#define	MulScoreWeight(s, w)		(((s)*(SCORE) (w))/INTSCALE)
#define	Mul2(a, b)					((a)*(b))
#endif

//static inline SCORE MulFCountScore(FCOUNT fc, SCORE sc)
//	{
//// Fast way to say "if (fc >= 2^15 || sc >= 2^15)":
//	if ((fc | sc) & 0xffff1000)
//		{
//		SCORE Score = ((fc+5)/10)*sc;
//		assert(Score < assert);
//		OVERFLOW_WARN(Score > MINUS_INFINITY);
//		return Score/(INTSCALE/10);
//		}
//	SCORE Score = fc*sc;
//	assert(Score < OVERFLOW_WARN);
//	assert(Score > MINUS_INFINITY);
//	return Score/INTSCALE;
//	}

#endif	// IntMath_h
@@ -0,0 +1,100 @@
#include "muscle.h"
#include "textfile.h"
#include "msa.h"
#include "profile.h"
#include "pwpath.h"
#include "tree.h"

#define TRACE	0

static void MSAFromFileName(const char *FileName, MSA &a)
	{
	TextFile File(FileName);
	a.FromFile(File);
	}

static ProfPos *ProfileFromMSALocal(MSA &msa, Tree &tree)
	{
	const unsigned uSeqCount = msa.GetSeqCount();
	for (unsigned uSeqIndex = 0; uSeqIndex < uSeqCount; ++uSeqIndex)
		msa.SetSeqId(uSeqIndex, uSeqIndex);

	TreeFromMSA(msa, tree, g_Cluster1, g_Distance1, g_Root1);
	SetMuscleTree(tree);
	return ProfileFromMSA(msa);
	}

void Local()
	{
	if (0 == g_pstrFileName1 || 0 == g_pstrFileName2)
		Quit("Must specify both -in1 and -in2 for -sw");

	SetSeqWeightMethod(g_SeqWeight1);

	MSA msa1;
	MSA msa2;

	MSAFromFileName(g_pstrFileName1, msa1);
	MSAFromFileName(g_pstrFileName2, msa2);

	ALPHA Alpha = ALPHA_Undefined;
	switch (g_SeqType)
		{
	case SEQTYPE_Auto:
		Alpha = msa1.GuessAlpha();
		break;

	case SEQTYPE_Protein:
		Alpha = ALPHA_Amino;
		break;

	case SEQTYPE_DNA:
		Alpha = ALPHA_DNA;
		break;

	case SEQTYPE_RNA:
		Alpha = ALPHA_RNA;
		break;

	default:
		Quit("Invalid SeqType");
		}
	SetAlpha(Alpha);

	msa1.FixAlpha();
	msa2.FixAlpha();

	if (ALPHA_DNA == Alpha || ALPHA_RNA == Alpha)
		SetPPScore(PPSCORE_SPN);

	const unsigned uSeqCount1 = msa1.GetSeqCount();
	const unsigned uSeqCount2 = msa2.GetSeqCount();
	const unsigned uMaxSeqCount = (uSeqCount1 > uSeqCount2 ? uSeqCount1 : uSeqCount2);
	MSA::SetIdCount(uMaxSeqCount);

	unsigned uLength1 = msa1.GetColCount();
	unsigned uLength2 = msa2.GetColCount();

	Tree tree1;
	Tree tree2;

	ProfPos *Prof1 = ProfileFromMSALocal(msa1, tree1);
	ProfPos *Prof2 = ProfileFromMSALocal(msa2, tree2);

	PWPath Path;
	SW(Prof1, uLength1, Prof2, uLength2, Path);

#if	TRACE
	Path.LogMe();
#endif

	MSA msaOut;
	AlignTwoMSAsGivenPathSW(Path, msa1, msa2, msaOut);

#if	TRACE
	msaOut.LogMe();
#endif

	TextFile fileOut(g_pstrOutFileName, true);
	msaOut.ToFile(fileOut);
	}
@@ -0,0 +1,72 @@
//@@TODO reconcile /muscle with /muscle3.6

#include "muscle.h"
#include <stdio.h>
#ifdef	WIN32
#include <windows.h>	// for SetPriorityClass()
#include <io.h>			// for isatty()
#else
#include <unistd.h>		// for isatty()
#endif

const char *MUSCLE_LONG_VERSION	= "MUSCLE v" SHORT_VERSION "."
#include "svnversion.h"
" by Robert C. Edgar";

int g_argc;
char **g_argv;

int main(int argc, char **argv)
	{
#if	WIN32
// Multi-tasking does not work well in CPU-bound
// console apps running under Win32.
// Reducing the process priority allows GUI apps
// to run responsively in parallel.
	SetPriorityClass(GetCurrentProcess(), BELOW_NORMAL_PRIORITY_CLASS);
#endif
	g_argc = argc;
	g_argv = argv;

	SetNewHandler();
	SetStartTime();
	ProcessArgVect(argc - 1, argv + 1);
	SetParams();
	SetLogFile();

	//extern void TestSubFams(const char *);
	//TestSubFams(g_pstrInFileName);
	//return 0;

	if (g_bVersion)
		{
		printf("%s\n", MUSCLE_LONG_VERSION);
		exit(EXIT_SUCCESS);
		}

	if (!g_bQuiet)
		Credits();

	if (MissingCommand() && isatty(0))
		{
		Usage();
		exit(EXIT_SUCCESS);
		}

	if (g_bCatchExceptions)
		{
		try
			{
			Run();
			}
		catch (...)
			{
			OnException();
			exit(EXIT_Except);
			}
		}
	else
		Run();

	exit(EXIT_Success);
	}
@@ -0,0 +1,2 @@
g++ -O3 -march=pentiumpro -mcpu=pentiumpro -funroll-loops -Winline -DNDEBUG=1 -o muscle aligngivenpath.o aligngivenpathsw.o aligntwomsas.o aligntwoprofs.o alpha.o anchors.o blosumla.o clust.o cluster.o clwwt.o cons.o diaglist.o difftrees.o difftreese.o distcalc.o distfunc.o domuscle.o dosp.o dpreglist.o edgelist.o enumopts.o enumtostr.o estring.o fasta.o fastclust.o fastdist.o fastdistjones.o fastdistkbit.o fastdistkmer.o fastdistmafft.o fastscorepath2.o finddiags.o glbalign.o glbaligndiag.o glbalignle.o glbalignsimple.o glbalignsp.o globals.o globalslinux.o globalswin32.o gonnet.o gotowt.o henikoffweight.o henikoffweightpb.o hydro.o intmath.o local.o main.o makerootmsa.o mpam200.o msa.o msa2.o msadistkimura.o msf.o objscore.o objscore2.o onexception.o options.o pam200mafft.o params.o phy.o phy2.o phy3.o phy4.o phyfromclust.o phyfromfile.o phytofile.o posgap.o profile.o profilefrommsa.o progalign.o progress.o progressivealign.o pwpath.o realigndiffs.o realigndiffse.o refine.o refinehoriz.o refinesubfams.o refinetree.o refinetreee.o refinevert.o savebest.o scorehistory.o scoremx.o seq.o seqvect.o setblosumweights.o setgscweights.o setnewhandler.o sw.o textfile.o threewaywt.o traceback.o tracebackopt.o tracebacksw.o treefrommsa.o typetostr.o upgma2.o usage.o validateids.o vtml2.o -lm -static
strip muscle
@@ -0,0 +1,231 @@
#include "muscle.h"
#include "tree.h"
#include "seqvect.h"
#include "profile.h"
#include "msa.h"
#include "pwpath.h"
#include "estring.h"

#define TRACE		0
#define VALIDATE	0

static void PathSeq(const Seq &s, const PWPath &Path, bool bRight, Seq &sOut)
	{
	short *esA;
	short *esB;
	PathToEstrings(Path, &esA, &esB);

	const unsigned uSeqLength = s.Length();
	const unsigned uEdgeCount = Path.GetEdgeCount();

	sOut.Clear();
	sOut.SetName(s.GetName());
	unsigned uPos = 0;
	for (unsigned uEdgeIndex = 0; uEdgeIndex < uEdgeCount; ++uEdgeIndex)
		{
		const PWEdge &Edge = Path.GetEdge(uEdgeIndex);
		char cType = Edge.cType;
		if (bRight)
			{
			if (cType == 'I')
				cType = 'D';
			else if (cType == 'D')
				cType = 'I';
			}
		switch (cType)
			{
		case 'M':
			sOut.AppendChar(s[uPos++]);
			break;
		case 'D':
			sOut.AppendChar('-');
			break;
		case 'I':
			sOut.AppendChar(s[uPos++]);
			break;
		default:
			Quit("PathSeq, invalid edge type %c", cType);
			}
		}
	}

#if	VALIDATE

static void MakeRootSeq(const Seq &s, const Tree &GuideTree, unsigned uLeafNodeIndex,
  const ProgNode Nodes[], Seq &sRoot)
	{
	sRoot.Copy(s);
	unsigned uNodeIndex = uLeafNodeIndex;
	for (;;)
		{
	  	unsigned uParent = GuideTree.GetParent(uNodeIndex);
		if (NULL_NEIGHBOR == uParent)
			break;
		bool bRight = (GuideTree.GetLeft(uParent) == uNodeIndex);
		uNodeIndex = uParent;
		const PWPath &Path = Nodes[uNodeIndex].m_Path;
		Seq sTmp;
		PathSeq(sRoot, Path, bRight, sTmp);
		sTmp.SetId(0);
		sRoot.Copy(sTmp);
		}
	}

#endif	// VALIDATE

static short *MakeRootSeqE(const Seq &s, const Tree &GuideTree, unsigned uLeafNodeIndex,
  const ProgNode Nodes[], Seq &sRoot, short *Estring1, short *Estring2)
	{
	short *EstringCurr = Estring1;
	short *EstringNext = Estring2;

	const unsigned uSeqLength = s.Length();
	EstringCurr[0] = uSeqLength;
	EstringCurr[1] = 0;

	unsigned uNodeIndex = uLeafNodeIndex;
	for (;;)
		{
	  	unsigned uParent = GuideTree.GetParent(uNodeIndex);
		if (NULL_NEIGHBOR == uParent)
			break;
		bool bRight = (GuideTree.GetLeft(uParent) == uNodeIndex);
		uNodeIndex = uParent;
		const PWPath &Path = Nodes[uNodeIndex].m_Path;
		const short *EstringNode = bRight ?
		  Nodes[uNodeIndex].m_EstringL : Nodes[uNodeIndex].m_EstringR;

		MulEstrings(EstringCurr, EstringNode, EstringNext);
#if	TRACE
		Log("\n");
		Log("Curr=");
		LogEstring(EstringCurr);
		Log("\n");
		Log("Node=");
		LogEstring(EstringNode);
		Log("\n");
		Log("Prod=");
		LogEstring(EstringNext);
		Log("\n");
#endif
		short *EstringTmp = EstringNext;
		EstringNext = EstringCurr;
		EstringCurr = EstringTmp;
		}
	EstringOp(EstringCurr, s, sRoot);

#if	TRACE
	Log("Root estring=");
	LogEstring(EstringCurr);
	Log("\n");
	Log("Root seq=");
	sRoot.LogMe();
#endif
	return EstringCurr;
	}

static unsigned GetFirstNodeIndex(const Tree &tree)
	{
	if (g_bStable)
		return 0;
	return tree.FirstDepthFirstNode();
	}

static unsigned GetNextNodeIndex(const Tree &tree, unsigned uPrevNodeIndex)
	{
	if (g_bStable)
		{
		const unsigned uNodeCount = tree.GetNodeCount();
		unsigned uNodeIndex = uPrevNodeIndex;
		for (;;)
			{
			++uNodeIndex;
			if (uNodeIndex >= uNodeCount)
				return NULL_NEIGHBOR;
			if (tree.IsLeaf(uNodeIndex))
				return uNodeIndex;
			}
		}
	unsigned uNodeIndex = uPrevNodeIndex;
	for (;;)
		{
		uNodeIndex = tree.NextDepthFirstNode(uNodeIndex);
		if (NULL_NEIGHBOR == uNodeIndex || tree.IsLeaf(uNodeIndex))
			return uNodeIndex;
		}
	}

void MakeRootMSA(const SeqVect &v, const Tree &GuideTree, ProgNode Nodes[],
  MSA &a)
	{
#if	TRACE
	Log("MakeRootMSA Tree=");
	GuideTree.LogMe();
#endif
	const unsigned uSeqCount = v.GetSeqCount();
	unsigned uColCount = uInsane;
	unsigned uSeqIndex = 0;
	const unsigned uTreeNodeCount = GuideTree.GetNodeCount();
	const unsigned uRootNodeIndex = GuideTree.GetRootNodeIndex();
	const PWPath &RootPath = Nodes[uRootNodeIndex].m_Path;
	const unsigned uRootColCount = RootPath.GetEdgeCount();
	const unsigned uEstringSize = uRootColCount + 1;
	short *Estring1 = new short[uEstringSize];
	short *Estring2 = new short[uEstringSize];
	SetProgressDesc("Root alignment");

	unsigned uTreeNodeIndex = GetFirstNodeIndex(GuideTree);
	do
		{
		Progress(uSeqIndex, uSeqCount);

		unsigned uId = GuideTree.GetLeafId(uTreeNodeIndex);
		const Seq &s = *(v[uId]);

		Seq sRootE;
		short *es = MakeRootSeqE(s, GuideTree, uTreeNodeIndex, Nodes, sRootE,
		  Estring1, Estring2);
		Nodes[uTreeNodeIndex].m_EstringL = EstringNewCopy(es);

#if	VALIDATE
		Seq sRoot;
		MakeRootSeq(s, GuideTree, uTreeNodeIndex, Nodes, sRoot);
		if (!sRoot.Eq(sRootE))
			{
			Log("sRoot=");
			sRoot.LogMe();
			Log("sRootE=");
			sRootE.LogMe();
			Quit("Root seqs differ");
			}
#if	TRACE
		Log("MakeRootSeq=\n");
		sRoot.LogMe();
#endif
#endif

		if (uInsane == uColCount)
			{
			uColCount = sRootE.Length();
			a.SetSize(uSeqCount, uColCount);
			}
		else
			{
			assert(uColCount == sRootE.Length());
			}
		a.SetSeqName(uSeqIndex, s.GetName());
		a.SetSeqId(uSeqIndex, uId);
		for (unsigned uColIndex = 0; uColIndex < uColCount; ++uColIndex)
			a.SetChar(uSeqIndex, uColIndex, sRootE[uColIndex]);
		++uSeqIndex;

		uTreeNodeIndex = GetNextNodeIndex(GuideTree, uTreeNodeIndex);
		}
	while (NULL_NEIGHBOR != uTreeNodeIndex);

	delete[] Estring1;
	delete[] Estring2;

	ProgressStepsDone();
	assert(uSeqIndex == uSeqCount);
	}
@@ -0,0 +1,62 @@
#include "muscle.h"
#include "tree.h"
#include "profile.h"
#include "msa.h"
#include "seqvect.h"
#include "pwpath.h"

static void DoSeq(Seq &s, unsigned uSeqIndex, const ProfPos *RootProf,
  unsigned uRootProfLength, MSA &msaOut)
	{
	MSA msaSeq;
	msaSeq.FromSeq(s);
	const unsigned uSeqLength = s.Length();

	MSA msaDummy;
	msaDummy.SetSize(1, uRootProfLength);
	msaDummy.SetSeqId(0, 0);
	msaDummy.SetSeqName(0, "Dummy0");
	for (unsigned uColIndex = 0; uColIndex < uRootProfLength; ++uColIndex)
		msaDummy.SetChar(0, uColIndex, '?');

	ProfPos *SeqProf = ProfileFromMSA(msaSeq);
	for (unsigned uColIndex = 0; uColIndex < uSeqLength; ++uColIndex)
		{
		ProfPos &PP = SeqProf[uColIndex];
		PP.m_scoreGapOpen = MINUS_INFINITY;
		PP.m_scoreGapClose = MINUS_INFINITY;
		}

	ProfPos *ProfOut;
	unsigned uLengthOut;
	PWPath Path;
	AlignTwoProfs(SeqProf, uSeqLength, 1.0, RootProf, uRootProfLength, 1.0,
	  Path, &ProfOut, &uLengthOut);
	assert(uLengthOut = uRootProfLength);
	delete[] ProfOut;

	MSA msaCombined;
	AlignTwoMSAsGivenPath(Path, msaSeq, msaDummy, msaCombined);

	msaCombined.LogMe();
	msaOut.SetSeqName(uSeqIndex, s.GetName());
	msaOut.SetSeqId(uSeqIndex, s.GetId());
	for (unsigned uColIndex = 0; uColIndex < uRootProfLength; ++uColIndex)
		msaOut.SetChar(uSeqIndex, uColIndex, msaCombined.GetChar(0, uColIndex));
	}

// Steven Brenner's O(NL^2) proposal for creating a root alignment
// Align each sequence to the profile at the root.
// Compare the e-string solution, which is O(NL log N).
void MakeRootMSABrenner(SeqVect &v, const Tree &GuideTree, ProgNode Nodes[],
  MSA &a)
	{
	const unsigned uSeqCount = v.Length();
	const unsigned uRootNodeIndex = GuideTree.GetRootNodeIndex();
	const ProfPos *RootProfile = Nodes[uRootNodeIndex].m_Prof;
	const unsigned uRootColCount = Nodes[uRootNodeIndex].m_uLength;
	a.SetSize(uSeqCount, uRootColCount);

	for (unsigned uSeqIndex = 0; uSeqIndex < uSeqCount; ++uSeqIndex)
		DoSeq(*v[uSeqIndex], uSeqIndex, RootProfile, uRootColCount, a);
	}
@@ -0,0 +1,38 @@
#include "muscle.h"
#include "msa.h"
#include "textfile.h"
#include "tree.h"

void DoMakeTree()
	{
	if (g_pstrInFileName == 0 || g_pstrOutFileName == 0)
		Quit("-maketree requires -in <msa> and -out <treefile>");

	SetStartTime();

	SetSeqWeightMethod(g_SeqWeight1);

	TextFile MSAFile(g_pstrInFileName);

	MSA msa;
	msa.FromFile(MSAFile);

	unsigned uSeqCount = msa.GetSeqCount();
	MSA::SetIdCount(uSeqCount);

// Initialize sequence ids.
// From this point on, ids must somehow propogate from here.
	for (unsigned uSeqIndex = 0; uSeqIndex < uSeqCount; ++uSeqIndex)
		msa.SetSeqId(uSeqIndex, uSeqIndex);
	SetMuscleInputMSA(msa);

	Progress("%u sequences", uSeqCount);

	Tree tree;
	TreeFromMSA(msa, tree, g_Cluster2, g_Distance2, g_Root2);

	TextFile TreeFile(g_pstrOutFileName, true);
	tree.ToFile(TreeFile);

	Progress("Tree created");
	}
@@ -0,0 +1,64 @@
#include "muscle.h"
#include "seqvect.h"
#include "msa.h"

/***
Methionine hack.
Most proteins start with M.
This results in odd-looking alignments with the terminal Ms aligned followed
immediately by gaps.
Hack this by treating terminal M like X.
***/

static bool *M;

void MHackStart(SeqVect &v)
	{
	if (ALPHA_Amino != g_Alpha)
		return;

	const unsigned uSeqCount = v.Length();
	M = new bool[uSeqCount];
	memset(M, 0, uSeqCount*sizeof(bool));
	for (unsigned uSeqIndex = 0; uSeqIndex < uSeqCount; ++uSeqIndex)
		{
		Seq &s = v.GetSeq(uSeqIndex);
		if (0 == s.Length())
			continue;
		unsigned uId = s.GetId();
		if (s[0] == 'M' || s[0] == 'm')
			{
			M[uId] = true;
			s[0] = 'X';
			}
		}
	}

void MHackEnd(MSA &msa)
	{
	if (ALPHA_Amino != g_Alpha)
		return;
	if (0 == M)
		return;

	const unsigned uSeqCount = msa.GetSeqCount();
	const unsigned uColCount = msa.GetColCount();
	for (unsigned uSeqIndex = 0; uSeqIndex < uSeqCount; ++uSeqIndex)
		{
		unsigned uId = msa.GetSeqId(uSeqIndex);
		if (M[uId])
			{
			for (unsigned uColIndex = 0; uColIndex < uColCount; ++uColIndex)
				{
				if (!msa.IsGap(uSeqIndex, uColIndex))
					{
					msa.SetChar(uSeqIndex, uColIndex, 'M');
					break;
					}
				}
			}
		}

	delete[] M;
	M = 0;
	}
@@ -0,0 +1,21 @@
#!/bin/bash
CPPNames='aligngivenpath aligngivenpathsw aligntwomsas aligntwoprofs aln alpha anchors bittraceback blosum62 blosumla clust cluster clwwt color cons diaglist diffobjscore diffpaths difftrees difftreese distcalc distfunc distpwkimura domuscle dosp dpreglist drawtree edgelist enumopts enumtostr estring fasta fasta2 fastclust fastdist fastdistjones fastdistkbit fastdistkmer fastdistmafft fastdistnuc fastscorepath2 finddiags finddiagsn glbalign glbalign352 glbaligndiag glbalignle glbalignsimple glbalignsp glbalignspn glbalignss glbalndimer globals globalslinux globalsosx globalsother globalswin32 gonnet henikoffweight henikoffweightpb html hydro intmath local main makerootmsa makerootmsab maketree mhack mpam200 msa msa2 msadistkimura msf muscle muscleout nucmx nwdasimple nwdasimple2 nwdasmall nwrec nwsmall objscore objscore2 objscoreda onexception options outweights pam200mafft params phy phy2 phy3 phy4 phyfromclust phyfromfile physeq phytofile posgap ppscore profdb profile profilefrommsa progalign progress progressivealign pwpath readmx realigndiffs realigndiffse refine refinehoriz refinesubfams refinetree refinetreee refinevert refinew savebest scoredist scoregaps scorehistory scorepp seq seqvect setblosumweights setgscweights setnewhandler spfast sptest stabilize subfam subfams sw termgaps textfile threewaywt tomhydro traceback tracebackopt tracebacksw treefrommsa typetostr upgma2 usage validateids vtml2 writescorefile'
ObjNames='aligngivenpath.o aligngivenpathsw.o aligntwomsas.o aligntwoprofs.o aln.o alpha.o anchors.o bittraceback.o blosum62.o blosumla.o clust.o cluster.o clwwt.o color.o cons.o diaglist.o diffobjscore.o diffpaths.o difftrees.o difftreese.o distcalc.o distfunc.o distpwkimura.o domuscle.o dosp.o dpreglist.o drawtree.o edgelist.o enumopts.o enumtostr.o estring.o fasta.o fasta2.o fastclust.o fastdist.o fastdistjones.o fastdistkbit.o fastdistkmer.o fastdistmafft.o fastdistnuc.o fastscorepath2.o finddiags.o finddiagsn.o glbalign.o glbalign352.o glbaligndiag.o glbalignle.o glbalignsimple.o glbalignsp.o glbalignspn.o glbalignss.o glbalndimer.o globals.o globalslinux.o globalsosx.o globalsother.o globalswin32.o gonnet.o henikoffweight.o henikoffweightpb.o html.o hydro.o intmath.o local.o main.o makerootmsa.o makerootmsab.o maketree.o mhack.o mpam200.o msa.o msa2.o msadistkimura.o msf.o muscle.o muscleout.o nucmx.o nwdasimple.o nwdasimple2.o nwdasmall.o nwrec.o nwsmall.o objscore.o objscore2.o objscoreda.o onexception.o options.o outweights.o pam200mafft.o params.o phy.o phy2.o phy3.o phy4.o phyfromclust.o phyfromfile.o physeq.o phytofile.o posgap.o ppscore.o profdb.o profile.o profilefrommsa.o progalign.o progress.o progressivealign.o pwpath.o readmx.o realigndiffs.o realigndiffse.o refine.o refinehoriz.o refinesubfams.o refinetree.o refinetreee.o refinevert.o refinew.o savebest.o scoredist.o scoregaps.o scorehistory.o scorepp.o seq.o seqvect.o setblosumweights.o setgscweights.o setnewhandler.o spfast.o sptest.o stabilize.o subfam.o subfams.o sw.o termgaps.o textfile.o threewaywt.o tomhydro.o traceback.o tracebackopt.o tracebacksw.o treefrommsa.o typetostr.o upgma2.o usage.o validateids.o vtml2.o writescorefile.o'

rm -f *.o muscle.make.stdout.txt muscle.make.stderr.txt
for CPPName in $CPPNames
do
  echo $CPPName >> /dev/tty
  $CXX $ENV_GCC_OPTS -c -O3 -msse2 -mfpmath=sse -D_FILE_OFFSET_BITS=64 -DNDEBUG=1 $CPPName.cpp -o $CPPName.o  >> muscle.make.stdout.txt 2>> muscle.make.stderr.txt
done

LINK_OPTS=
if [ `uname -s` == Linux ] ; then
    LINK_OPTS=-static
fi
$CXX $LINK_OPTS $ENV_LINK_OPTS -g -o muscle $ObjNames  >> muscle.make.stdout.txt 2>> muscle.make.stderr.txt
tail muscle.make.stderr.txt

strip muscle
ls -lh muscle
sum muscle
@@ -0,0 +1,107 @@
#include "muscle.h"

const float PAM_200_CENTER = (float) 20.0;

#define v(x)	((float) x + PAM_200_CENTER)
#define ROW(A, C, D, E, F, G, H, I, K, L, M, N, P, Q, R, S, T, V, W, Y) \
	{ v(A), v(C), v(D), v(E), v(F), v(G), v(H), v(I), v(K), v(L), \
	  v(M), v(N), v(P), v(Q), v(R), v(S), v(T), v(V), v(W), v(Y) },

float PAM200[32][32] =
	{
//       A       C       D       E       F       G       H       I       K       L
//       M       N       P       Q       R       S       T       V       W       Y
ROW(   388,     -0,     34,     32,   -202,    159,    -88,     89,    -55,    -67, 
        19,     86,    186,    -34,    -32,    237,    273,    171,   -326,   -239)  // A
ROW(    -0,   1170,   -248,   -315,     74,    -14,     43,   -151,   -204,   -196, 
      -132,    -49,   -142,   -215,     29,    165,     -7,    -69,    179,    313)  // C
ROW(    34,   -248,    625,    496,   -419,    148,     78,   -245,     55,   -361, 
      -255,    332,   -169,    122,    -64,     45,    -13,   -167,   -438,   -148)  // D
ROW(    32,   -315,    496,    610,   -480,    125,     25,   -245,    175,   -327, 
      -242,    166,   -141,    279,     34,    -30,    -56,   -150,   -386,   -305)  // E
ROW(  -202,     74,   -419,   -480,    888,   -407,     62,     80,   -443,    320, 
        67,   -236,   -180,   -294,   -327,    -51,   -173,     31,     -1,    584)  // F
ROW(   159,    -14,    148,    125,   -407,    662,   -114,   -216,    -34,   -324, 
      -246,     79,    -77,    -68,     97,    155,     21,    -93,    -58,   -349)  // G
ROW(   -88,     43,     78,     25,     62,   -114,    766,   -205,    144,    -92, 
      -152,    238,     66,    368,    257,     35,    -35,   -217,   -201,    468)  // H
ROW(    89,   -151,   -245,   -245,     80,   -216,   -205,    554,   -224,    288, 
       391,   -114,   -115,   -222,   -208,    -19,    162,    469,   -274,   -153)  // I
ROW(   -55,   -204,     55,    175,   -443,    -34,    144,   -224,    632,   -249, 
      -118,    186,    -86,    315,    466,      2,     19,   -227,   -216,   -264)  // K
ROW(   -67,   -196,   -361,   -327,    320,   -324,    -92,    288,   -249,    591, 
       369,   -223,     53,    -86,   -170,    -69,    -41,    239,    -66,    -29)  // L
ROW(    19,   -132,   -255,   -242,     67,   -246,   -152,    391,   -118,    369, 
       756,   -131,    -98,   -124,   -129,    -49,    129,    331,   -229,   -182)  // M
ROW(    86,    -49,    332,    166,   -236,     79,    238,   -114,    186,   -223, 
      -131,    516,    -21,     88,     73,    240,    168,   -118,   -379,     -8)  // N
ROW(   186,   -142,   -169,   -141,   -180,    -77,     66,   -115,    -86,     53, 
       -98,    -21,    736,    122,      5,    221,    139,    -75,   -373,   -226)  // P
ROW(   -34,   -215,    122,    279,   -294,    -68,    368,   -222,    315,    -86, 
      -124,     88,    122,    635,    301,    -13,    -35,   -195,   -243,    -73)  // Q
ROW(   -32,     29,    -64,     34,   -327,     97,    257,   -208,    466,   -170, 
      -129,     73,      5,    301,    606,     28,     -4,   -201,    104,   -133)  // R
ROW(   237,    165,     45,    -30,    -51,    155,     35,    -19,      2,    -69, 
       -49,    240,    221,    -13,     28,    353,    259,      8,   -213,    -55)  // S
ROW(   273,     -7,    -13,    -56,   -173,     21,    -35,    162,     19,    -41, 
       129,    168,    139,    -35,     -4,    259,    422,    143,   -343,   -190)  // T
ROW(   171,    -69,   -167,   -150,     31,    -93,   -217,    469,   -227,    239, 
       331,   -118,    -75,   -195,   -201,      8,    143,    505,   -245,   -197)  // V
ROW(  -326,    179,   -438,   -386,     -1,    -58,   -201,   -274,   -216,    -66, 
      -229,   -379,   -373,   -243,    104,   -213,   -343,   -245,   1475,     63)  // W
ROW(  -239,    313,   -148,   -305,    584,   -349,    468,   -153,   -264,    -29, 
      -182,     -8,   -226,    -73,   -133,    -55,   -190,   -197,     63,    979)  // Y
	};

#undef v
#define v(x)	((float) x)
#define RNC(A, C, D, E, F, G, H, I, K, L, M, N, P, Q, R, S, T, V, W, Y) \
	{ v(A), v(C), v(D), v(E), v(F), v(G), v(H), v(I), v(K), v(L), \
	  v(M), v(N), v(P), v(Q), v(R), v(S), v(T), v(V), v(W), v(Y) },

float PAM200NoCenter[32][32] =

	{
//       A       C       D       E       F       G       H       I       K       L
//       M       N       P       Q       R       S       T       V       W       Y
RNC(   388,     -0,     34,     32,   -202,    159,    -88,     89,    -55,    -67, 
        19,     86,    186,    -34,    -32,    237,    273,    171,   -326,   -239)  // A
RNC(    -0,   1170,   -248,   -315,     74,    -14,     43,   -151,   -204,   -196, 
      -132,    -49,   -142,   -215,     29,    165,     -7,    -69,    179,    313)  // C
RNC(    34,   -248,    625,    496,   -419,    148,     78,   -245,     55,   -361, 
      -255,    332,   -169,    122,    -64,     45,    -13,   -167,   -438,   -148)  // D
RNC(    32,   -315,    496,    610,   -480,    125,     25,   -245,    175,   -327, 
      -242,    166,   -141,    279,     34,    -30,    -56,   -150,   -386,   -305)  // E
RNC(  -202,     74,   -419,   -480,    888,   -407,     62,     80,   -443,    320, 
        67,   -236,   -180,   -294,   -327,    -51,   -173,     31,     -1,    584)  // F
RNC(   159,    -14,    148,    125,   -407,    662,   -114,   -216,    -34,   -324, 
      -246,     79,    -77,    -68,     97,    155,     21,    -93,    -58,   -349)  // G
RNC(   -88,     43,     78,     25,     62,   -114,    766,   -205,    144,    -92, 
      -152,    238,     66,    368,    257,     35,    -35,   -217,   -201,    468)  // H
RNC(    89,   -151,   -245,   -245,     80,   -216,   -205,    554,   -224,    288, 
       391,   -114,   -115,   -222,   -208,    -19,    162,    469,   -274,   -153)  // I
RNC(   -55,   -204,     55,    175,   -443,    -34,    144,   -224,    632,   -249, 
      -118,    186,    -86,    315,    466,      2,     19,   -227,   -216,   -264)  // K
RNC(   -67,   -196,   -361,   -327,    320,   -324,    -92,    288,   -249,    591, 
       369,   -223,     53,    -86,   -170,    -69,    -41,    239,    -66,    -29)  // L
RNC(    19,   -132,   -255,   -242,     67,   -246,   -152,    391,   -118,    369, 
       756,   -131,    -98,   -124,   -129,    -49,    129,    331,   -229,   -182)  // M
RNC(    86,    -49,    332,    166,   -236,     79,    238,   -114,    186,   -223, 
      -131,    516,    -21,     88,     73,    240,    168,   -118,   -379,     -8)  // N
RNC(   186,   -142,   -169,   -141,   -180,    -77,     66,   -115,    -86,     53, 
       -98,    -21,    736,    122,      5,    221,    139,    -75,   -373,   -226)  // P
RNC(   -34,   -215,    122,    279,   -294,    -68,    368,   -222,    315,    -86, 
      -124,     88,    122,    635,    301,    -13,    -35,   -195,   -243,    -73)  // Q
RNC(   -32,     29,    -64,     34,   -327,     97,    257,   -208,    466,   -170, 
      -129,     73,      5,    301,    606,     28,     -4,   -201,    104,   -133)  // R
RNC(   237,    165,     45,    -30,    -51,    155,     35,    -19,      2,    -69, 
       -49,    240,    221,    -13,     28,    353,    259,      8,   -213,    -55)  // S
RNC(   273,     -7,    -13,    -56,   -173,     21,    -35,    162,     19,    -41, 
       129,    168,    139,    -35,     -4,    259,    422,    143,   -343,   -190)  // T
RNC(   171,    -69,   -167,   -150,     31,    -93,   -217,    469,   -227,    239, 
       331,   -118,    -75,   -195,   -201,      8,    143,    505,   -245,   -197)  // V
RNC(  -326,    179,   -438,   -386,     -1,    -58,   -201,   -274,   -216,    -66, 
      -229,   -379,   -373,   -243,    104,   -213,   -343,   -245,   1475,     63)  // W
RNC(  -239,    313,   -148,   -305,    584,   -349,    468,   -153,   -264,    -29, 
      -182,     -8,   -226,    -73,   -133,    -55,   -190,   -197,     63,    979)  // Y
	};
@@ -0,0 +1,851 @@
#include "muscle.h"
#include "msa.h"
#include "textfile.h"
#include "seq.h"
#include <math.h>

const unsigned DEFAULT_SEQ_LENGTH = 500;

unsigned MSA::m_uIdCount = 0;

MSA::MSA()
	{
	m_uSeqCount = 0;
	m_uColCount = 0;

	m_szSeqs = 0;
	m_szNames = 0;
	m_Weights = 0;

	m_IdToSeqIndex = 0;
	m_SeqIndexToId = 0;

	m_uCacheSeqCount = 0;
	m_uCacheSeqLength = 0;
	}

MSA::~MSA()
	{
	Free();
	}

void MSA::Free()
	{
	for (unsigned n = 0; n < m_uSeqCount; ++n)
		{
		delete[] m_szSeqs[n];
		delete[] m_szNames[n];
		}

	delete[] m_szSeqs;
	delete[] m_szNames;
	delete[] m_Weights;
	delete[] m_IdToSeqIndex;
	delete[] m_SeqIndexToId;

	m_uSeqCount = 0;
	m_uColCount = 0;

	m_szSeqs = 0;
	m_szNames = 0;
	m_Weights = 0;

	m_IdToSeqIndex = 0;
	m_SeqIndexToId = 0;
	}

void MSA::SetSize(unsigned uSeqCount, unsigned uColCount)
	{
	Free();

	m_uSeqCount = uSeqCount;
	m_uCacheSeqLength = uColCount;
	m_uColCount = 0;

	if (0 == uSeqCount && 0 == uColCount)
		return;

	m_szSeqs = new char *[uSeqCount];
	m_szNames = new char *[uSeqCount];
	m_Weights = new WEIGHT[uSeqCount];

	for (unsigned uSeqIndex = 0; uSeqIndex < uSeqCount; ++uSeqIndex)
		{
		m_szSeqs[uSeqIndex] = new char[uColCount+1];
		m_szNames[uSeqIndex] = 0;
#if	DEBUG
		m_Weights[uSeqIndex] = BTInsane;
		memset(m_szSeqs[uSeqIndex], '?', uColCount);
#endif
		m_szSeqs[uSeqIndex][uColCount] = 0;
		}

	if (m_uIdCount > 0)
		{
		m_IdToSeqIndex = new unsigned[m_uIdCount];
		m_SeqIndexToId = new unsigned[m_uSeqCount];
#if	DEBUG
		memset(m_IdToSeqIndex, 0xff, m_uIdCount*sizeof(unsigned));
		memset(m_SeqIndexToId, 0xff, m_uSeqCount*sizeof(unsigned));
#endif
		}
	}

void MSA::LogMe() const
	{
	if (0 == GetColCount())
		{
		Log("MSA empty\n");
		return;
		}

	const unsigned uColsPerLine = 50;
	unsigned uLinesPerSeq = (GetColCount() - 1)/uColsPerLine + 1;
	for (unsigned n = 0; n < uLinesPerSeq; ++n)
		{
		unsigned i;
		unsigned iStart = n*uColsPerLine;
		unsigned iEnd = GetColCount();
		if (iEnd - iStart + 1 > uColsPerLine)
			iEnd = iStart + uColsPerLine;
		Log("                       ");
		for (i = iStart; i < iEnd; ++i)
			Log("%u", i%10);
		Log("\n");
		Log("                       ");
		for (i = iStart; i + 9 < iEnd; i += 10)
			Log("%-10u", i);
		if (n == uLinesPerSeq - 1)
			Log(" %-10u", GetColCount());
		Log("\n");
		for (unsigned uSeqIndex = 0; uSeqIndex < m_uSeqCount; ++uSeqIndex)
			{
			Log("%12.12s", m_szNames[uSeqIndex]);
			if (m_Weights[uSeqIndex] != BTInsane)
				Log(" (%5.3f)", m_Weights[uSeqIndex]);
			else
				Log("        ");
			Log("   ");
			for (i = iStart; i < iEnd; ++i)
				Log("%c", GetChar(uSeqIndex, i));
			if (0 != m_SeqIndexToId)
				Log(" [%5u]", m_SeqIndexToId[uSeqIndex]);
			Log("\n");
			}
		Log("\n\n");
		}
	}

char MSA::GetChar(unsigned uSeqIndex, unsigned uIndex) const
	{
// TODO: Performance cost?
	if (uSeqIndex >= m_uSeqCount || uIndex >= m_uColCount)
		Quit("MSA::GetChar(%u/%u,%u/%u)",
		  uSeqIndex, m_uSeqCount, uIndex, m_uColCount);

	char c = m_szSeqs[uSeqIndex][uIndex];
//	assert(IsLegalChar(c));
	return c;
	}

unsigned MSA::GetLetter(unsigned uSeqIndex, unsigned uIndex) const
	{
// TODO: Performance cost?
	char c = GetChar(uSeqIndex, uIndex);
	unsigned uLetter = CharToLetter(c);
	if (uLetter >= 20)
		{
		char c = ' ';
		if (uSeqIndex < m_uSeqCount && uIndex < m_uColCount)
			c = m_szSeqs[uSeqIndex][uIndex];
		Quit("MSA::GetLetter(%u/%u, %u/%u)='%c'/%u",
		  uSeqIndex, m_uSeqCount, uIndex, m_uColCount, c, uLetter);
		}
	return uLetter;
	}

unsigned MSA::GetLetterEx(unsigned uSeqIndex, unsigned uIndex) const
	{
// TODO: Performance cost?
	char c = GetChar(uSeqIndex, uIndex);
	unsigned uLetter = CharToLetterEx(c);
	return uLetter;
	}

void MSA::SetSeqName(unsigned uSeqIndex, const char szName[])
	{
	if (uSeqIndex >= m_uSeqCount)
		Quit("MSA::SetSeqName(%u, %s), count=%u", uSeqIndex, m_uSeqCount);
	delete[] m_szNames[uSeqIndex];
	int n = (int) strlen(szName) + 1;
	m_szNames[uSeqIndex] = new char[n];
	memcpy(m_szNames[uSeqIndex], szName, n);
	}

const char *MSA::GetSeqName(unsigned uSeqIndex) const
	{
	if (uSeqIndex >= m_uSeqCount)
		Quit("MSA::GetSeqName(%u), count=%u", uSeqIndex, m_uSeqCount);
	return m_szNames[uSeqIndex];
	}

bool MSA::IsGap(unsigned uSeqIndex, unsigned uIndex) const
	{
	char c = GetChar(uSeqIndex, uIndex);
	return IsGapChar(c);
	}

bool MSA::IsWildcard(unsigned uSeqIndex, unsigned uIndex) const
	{
	char c = GetChar(uSeqIndex, uIndex);
	return IsWildcardChar(c);
	}

void MSA::SetChar(unsigned uSeqIndex, unsigned uIndex, char c)
	{
	if (uSeqIndex >= m_uSeqCount || uIndex > m_uCacheSeqLength)
		Quit("MSA::SetChar(%u,%u)", uSeqIndex, uIndex);

	if (uIndex == m_uCacheSeqLength)
		{
		const unsigned uNewCacheSeqLength = m_uCacheSeqLength + DEFAULT_SEQ_LENGTH;
		for (unsigned n = 0; n < m_uSeqCount; ++n)
			{
			char *ptrNewSeq = new char[uNewCacheSeqLength+1];
			memcpy(ptrNewSeq, m_szSeqs[n], m_uCacheSeqLength);
			memset(ptrNewSeq + m_uCacheSeqLength, '?', DEFAULT_SEQ_LENGTH);
			ptrNewSeq[uNewCacheSeqLength] = 0;
			delete[] m_szSeqs[n];
			m_szSeqs[n] = ptrNewSeq;
			}

		m_uColCount = uIndex;
		m_uCacheSeqLength = uNewCacheSeqLength;
		}

	if (uIndex >= m_uColCount)
		m_uColCount = uIndex + 1;
	m_szSeqs[uSeqIndex][uIndex] = c;
	}

void MSA::GetSeq(unsigned uSeqIndex, Seq &seq) const
	{
	assert(uSeqIndex < m_uSeqCount);

	seq.Clear();

	for (unsigned n = 0; n < m_uColCount; ++n)
		if (!IsGap(uSeqIndex, n))
			{
			char c = GetChar(uSeqIndex, n);
			if (!isalpha(c))
				Quit("Invalid character '%c' in sequence", c);
			c = toupper(c);
			seq.push_back(c);
			}
	const char *ptrName = GetSeqName(uSeqIndex);
	seq.SetName(ptrName);
	}

bool MSA::HasGap() const
	{
	for (unsigned uSeqIndex = 0; uSeqIndex < GetSeqCount(); ++uSeqIndex)
		for (unsigned n = 0; n < GetColCount(); ++n)
			if (IsGap(uSeqIndex, n))
				return true;
	return false;
	}

bool MSA::IsLegalLetter(unsigned uLetter) const
	{
	return uLetter < 20;
	}

void MSA::SetSeqCount(unsigned uSeqCount)
	{
	Free();
	SetSize(uSeqCount, DEFAULT_SEQ_LENGTH);
	}

void MSA::CopyCol(unsigned uFromCol, unsigned uToCol)
	{
	assert(uFromCol < GetColCount());
	assert(uToCol < GetColCount());
	if (uFromCol == uToCol)
		return;

	for (unsigned uSeqIndex = 0; uSeqIndex < GetSeqCount(); ++uSeqIndex)
		{
		const char c = GetChar(uSeqIndex, uFromCol);
		SetChar(uSeqIndex, uToCol, c);
		}
	}

void MSA::Copy(const MSA &msa)
	{
	Free();
	const unsigned uSeqCount = msa.GetSeqCount();
	const unsigned uColCount = msa.GetColCount();
	SetSize(uSeqCount, uColCount);

	for (unsigned uSeqIndex = 0; uSeqIndex < uSeqCount; ++uSeqIndex)
		{
		SetSeqName(uSeqIndex, msa.GetSeqName(uSeqIndex));
		const unsigned uId = msa.GetSeqId(uSeqIndex);
		SetSeqId(uSeqIndex, uId);
		for (unsigned uColIndex = 0; uColIndex < uColCount; ++uColIndex)
			{
			const char c = msa.GetChar(uSeqIndex, uColIndex);
			SetChar(uSeqIndex, uColIndex, c);
			}
		}
	}

bool MSA::IsGapColumn(unsigned uColIndex) const
	{
	assert(GetSeqCount() > 0);
	for (unsigned uSeqIndex = 0; uSeqIndex < GetSeqCount(); ++uSeqIndex)
		if (!IsGap(uSeqIndex, uColIndex))
			return false;
	return true;
	}

bool MSA::GetSeqIndex(const char *ptrSeqName, unsigned *ptruSeqIndex) const
	{
	for (unsigned uSeqIndex = 0; uSeqIndex < GetSeqCount(); ++uSeqIndex)
		if (0 == stricmp(ptrSeqName, GetSeqName(uSeqIndex)))
			{
			*ptruSeqIndex = uSeqIndex;
			return true;
			}
	return false;
	}

void MSA::DeleteCol(unsigned uColIndex)
	{
	assert(uColIndex < m_uColCount);
	size_t n = m_uColCount - uColIndex;
	if (n > 0)
		{
		for (unsigned uSeqIndex = 0; uSeqIndex < GetSeqCount(); ++uSeqIndex)
			{
			char *ptrSeq = m_szSeqs[uSeqIndex];
			memmove(ptrSeq + uColIndex, ptrSeq + uColIndex + 1, n);
			}
		}
	--m_uColCount;
	}

void MSA::DeleteColumns(unsigned uColIndex, unsigned uColCount)
	{
	for (unsigned n = 0; n < uColCount; ++n)
		DeleteCol(uColIndex);
	}

void MSA::FromFile(TextFile &File)
	{
	FromFASTAFile(File);
	}

// Weights sum to 1, WCounts sum to NIC
WEIGHT MSA::GetSeqWeight(unsigned uSeqIndex) const
	{
	assert(uSeqIndex < m_uSeqCount);
	WEIGHT w = m_Weights[uSeqIndex];
	if (w == wInsane)
		Quit("Seq weight not set");
	return w;
	}

void MSA::SetSeqWeight(unsigned uSeqIndex, WEIGHT w) const
	{
	assert(uSeqIndex < m_uSeqCount);
	m_Weights[uSeqIndex] = w;
	}

void MSA::NormalizeWeights(WEIGHT wDesiredTotal) const
	{
	WEIGHT wTotal = 0;
	for (unsigned uSeqIndex = 0; uSeqIndex < m_uSeqCount; ++uSeqIndex)
		wTotal += m_Weights[uSeqIndex];

	if (0 == wTotal)
		return;

	const WEIGHT f = wDesiredTotal/wTotal;
	for (unsigned uSeqIndex = 0; uSeqIndex < m_uSeqCount; ++uSeqIndex)
		m_Weights[uSeqIndex] *= f;
	}

void MSA::CalcWeights() const
	{
	Quit("Calc weights not implemented");
	}

static void FmtChar(char c, unsigned uWidth)
	{
	Log("%c", c);
	for (unsigned n = 0; n < uWidth - 1; ++n)
		Log(" ");
	}

static void FmtInt(unsigned u, unsigned uWidth)
	{
	static char szStr[1024];
	assert(uWidth < sizeof(szStr));
	if (u > 0)
		sprintf(szStr, "%u", u);
	else
		strcpy(szStr, ".");
	Log(szStr);
	unsigned n = (unsigned) strlen(szStr);
	if (n < uWidth)
		for (unsigned i = 0; i < uWidth - n; ++i)
			Log(" ");
	}

static void FmtInt0(unsigned u, unsigned uWidth)
	{
	static char szStr[1024];
	assert(uWidth < sizeof(szStr));
	sprintf(szStr, "%u", u);
	Log(szStr);
	unsigned n = (unsigned) strlen(szStr);
	if (n < uWidth)
		for (unsigned i = 0; i < uWidth - n; ++i)
			Log(" ");
	}

static void FmtPad(unsigned n)
	{
	for (unsigned i = 0; i < n; ++i)
		Log(" ");
	}

void MSA::FromSeq(const Seq &s)
	{
	unsigned uSeqLength = s.Length();
	SetSize(1, uSeqLength);
	SetSeqName(0, s.GetName());
	if (0 != m_SeqIndexToId)
		SetSeqId(0, s.GetId());
	for (unsigned n = 0; n < uSeqLength; ++n)
		SetChar(0, n, s[n]);
	}

unsigned MSA::GetCharCount(unsigned uSeqIndex, unsigned uColIndex) const
	{
	assert(uSeqIndex < GetSeqCount());
	assert(uColIndex < GetColCount());

	unsigned uCol = 0;
	for (unsigned n = 0; n <= uColIndex; ++n)
		if (!IsGap(uSeqIndex, n))
			++uCol;
	return uCol;
	}

void MSA::CopySeq(unsigned uToSeqIndex, const MSA &msaFrom, unsigned uFromSeqIndex)
	{
	assert(uToSeqIndex < m_uSeqCount);
	const unsigned uColCount = msaFrom.GetColCount();
	assert(m_uColCount == uColCount ||
	  (0 == m_uColCount && uColCount <= m_uCacheSeqLength));

	memcpy(m_szSeqs[uToSeqIndex], msaFrom.GetSeqBuffer(uFromSeqIndex), uColCount);
	SetSeqName(uToSeqIndex, msaFrom.GetSeqName(uFromSeqIndex));
	if (0 == m_uColCount)
		m_uColCount = uColCount;
	}

const char *MSA::GetSeqBuffer(unsigned uSeqIndex) const
	{
	assert(uSeqIndex < m_uSeqCount);
	return m_szSeqs[uSeqIndex];
	}

void MSA::DeleteSeq(unsigned uSeqIndex)
	{
	assert(uSeqIndex < m_uSeqCount);

	delete m_szSeqs[uSeqIndex];
	delete m_szNames[uSeqIndex];

	const unsigned uBytesToMove = (m_uSeqCount - uSeqIndex)*sizeof(char *);
	if (uBytesToMove > 0)
		{
		memmove(m_szSeqs + uSeqIndex, m_szSeqs + uSeqIndex + 1, uBytesToMove);
		memmove(m_szNames + uSeqIndex, m_szNames + uSeqIndex + 1, uBytesToMove);
		}

	--m_uSeqCount;

	delete[] m_Weights;
	m_Weights = 0;
	}

bool MSA::IsEmptyCol(unsigned uColIndex) const
	{
	const unsigned uSeqCount = GetSeqCount();
	for (unsigned uSeqIndex = 0; uSeqIndex < uSeqCount; ++uSeqIndex)
		if (!IsGap(uSeqIndex, uColIndex))
			return false;
	return true;
	}

//void MSA::DeleteEmptyCols(bool bProgress)
//	{
//	unsigned uColCount = GetColCount();
//	for (unsigned uColIndex = 0; uColIndex < uColCount; ++uColIndex)
//		{
//		if (IsEmptyCol(uColIndex))
//			{
//			if (bProgress)
//				{
//				Log("Deleting col %u of %u\n", uColIndex, uColCount);
//				printf("Deleting col %u of %u\n", uColIndex, uColCount);
//				}
//			DeleteCol(uColIndex);
//			--uColCount;
//			}
//		}
//	}

unsigned MSA::AlignedColIndexToColIndex(unsigned uAlignedColIndex) const
	{
	Quit("MSA::AlignedColIndexToColIndex not implemented");
	return 0;
	}

WEIGHT MSA::GetTotalSeqWeight() const
	{
	WEIGHT wTotal = 0;
	const unsigned uSeqCount = GetSeqCount();
	for (unsigned uSeqIndex = 0; uSeqIndex < uSeqCount; ++uSeqIndex)
		wTotal += m_Weights[uSeqIndex];
	return wTotal;
	}

bool MSA::SeqsEq(const MSA &a1, unsigned uSeqIndex1, const MSA &a2,
  unsigned uSeqIndex2)
	{
	Seq s1;
	Seq s2;

	a1.GetSeq(uSeqIndex1, s1);
	a2.GetSeq(uSeqIndex2, s2);

	s1.StripGaps();
	s2.StripGaps();

	return s1.EqIgnoreCase(s2);
	}

unsigned MSA::GetSeqLength(unsigned uSeqIndex) const
	{
	assert(uSeqIndex < GetSeqCount());

	const unsigned uColCount = GetColCount();
	unsigned uLength = 0;
	for (unsigned uColIndex = 0; uColIndex < uColCount; ++uColIndex)
		if (!IsGap(uSeqIndex, uColIndex))
			++uLength;
	return uLength;
	}

void MSA::GetPWID(unsigned uSeqIndex1, unsigned uSeqIndex2, double *ptrPWID,
  unsigned *ptruPosCount) const
	{
	assert(uSeqIndex1 < GetSeqCount());
	assert(uSeqIndex2 < GetSeqCount());

	unsigned uSameCount = 0;
	unsigned uPosCount = 0;
	const unsigned uColCount = GetColCount();
	for (unsigned uColIndex = 0; uColIndex < uColCount; ++uColIndex)
		{
		char c1 = GetChar(uSeqIndex1, uColIndex);
		if (IsGapChar(c1))
			continue;
		char c2 = GetChar(uSeqIndex2, uColIndex);
		if (IsGapChar(c2))
			continue;
		++uPosCount;
		if (c1 == c2)
			++uSameCount;
		}
	*ptruPosCount = uPosCount;
	if (uPosCount > 0)
		*ptrPWID = 100.0 * (double) uSameCount / (double) uPosCount;
	else
		*ptrPWID = 0;
	}

void MSA::UnWeight()
	{
	for (unsigned uSeqIndex = 0; uSeqIndex < GetSeqCount(); ++uSeqIndex)
		m_Weights[uSeqIndex] = BTInsane;
	}

unsigned MSA::UniqueResidueTypes(unsigned uColIndex) const
	{
	assert(uColIndex < GetColCount());

	unsigned Counts[MAX_ALPHA];
	memset(Counts, 0, sizeof(Counts));
	const unsigned uSeqCount = GetSeqCount();
	for (unsigned uSeqIndex = 0; uSeqIndex < uSeqCount; ++uSeqIndex)
		{
		if (IsGap(uSeqIndex, uColIndex) || IsWildcard(uSeqIndex, uColIndex))
			continue;
		const unsigned uLetter = GetLetter(uSeqIndex, uColIndex);
		++(Counts[uLetter]);
		}
	unsigned uUniqueCount = 0;
	for (unsigned uLetter = 0; uLetter < g_AlphaSize; ++uLetter)
		if (Counts[uLetter] > 0)
			++uUniqueCount;
	return uUniqueCount;
	}

double MSA::GetOcc(unsigned uColIndex) const
	{
	unsigned uGapCount = 0;
	for (unsigned uSeqIndex = 0; uSeqIndex < GetSeqCount(); ++uSeqIndex)
		if (IsGap(uSeqIndex, uColIndex))
			++uGapCount;
	unsigned uSeqCount = GetSeqCount();
	return (double) (uSeqCount - uGapCount) / (double) uSeqCount;
	}

void MSA::ToFile(TextFile &File) const
	{
	if (g_bMSF)
		ToMSFFile(File);
	else if (g_bAln)
		ToAlnFile(File);
	else if (g_bHTML)
		ToHTMLFile(File);
	else if (g_bPHYS)
		ToPhySequentialFile(File);
	else if (g_bPHYI)
		ToPhyInterleavedFile(File);
	else
		ToFASTAFile(File);
	if (0 != g_pstrScoreFileName)
		WriteScoreFile(*this);
	}

bool MSA::ColumnHasGap(unsigned uColIndex) const
	{
	const unsigned uSeqCount = GetSeqCount();
	for (unsigned uSeqIndex = 0; uSeqIndex < uSeqCount; ++uSeqIndex)
		if (IsGap(uSeqIndex, uColIndex))
			return true;
	return false;
	}

void MSA::SetIdCount(unsigned uIdCount)
	{
	//if (m_uIdCount != 0)
	//	Quit("MSA::SetIdCount: may only be called once");

	if (m_uIdCount > 0)
		{
		if (uIdCount > m_uIdCount)
			Quit("MSA::SetIdCount: cannot increase count");
		return;
		}
	m_uIdCount = uIdCount;
	}

void MSA::SetSeqId(unsigned uSeqIndex, unsigned uId)
	{
	assert(uSeqIndex < m_uSeqCount);
	assert(uId < m_uIdCount);
	if (0 == m_SeqIndexToId)
		{
		if (0 == m_uIdCount)
			Quit("MSA::SetSeqId, SetIdCount has not been called");
		m_IdToSeqIndex = new unsigned[m_uIdCount];
		m_SeqIndexToId = new unsigned[m_uSeqCount];

		memset(m_IdToSeqIndex, 0xff, m_uIdCount*sizeof(unsigned));
		memset(m_SeqIndexToId, 0xff, m_uSeqCount*sizeof(unsigned));
		}
	m_SeqIndexToId[uSeqIndex] = uId;
	m_IdToSeqIndex[uId] = uSeqIndex;
	}

unsigned MSA::GetSeqIndex(unsigned uId) const
	{
	assert(uId < m_uIdCount);
	assert(0 != m_IdToSeqIndex);
	unsigned uSeqIndex = m_IdToSeqIndex[uId];
	assert(uSeqIndex < m_uSeqCount);
	return uSeqIndex;
	}

bool MSA::GetSeqIndex(unsigned uId, unsigned *ptruIndex) const
	{
	for (unsigned uSeqIndex = 0; uSeqIndex < m_uSeqCount; ++uSeqIndex)
		{
		if (uId == m_SeqIndexToId[uSeqIndex])
			{
			*ptruIndex = uSeqIndex;
			return true;
			}
		}
	return false;
	}

unsigned MSA::GetSeqId(unsigned uSeqIndex) const
	{
	assert(uSeqIndex < m_uSeqCount);
	unsigned uId = m_SeqIndexToId[uSeqIndex];
	assert(uId < m_uIdCount);
	return uId;
	}

bool MSA::WeightsSet() const
	{
	return BTInsane != m_Weights[0];
	}

void MSASubsetByIds(const MSA &msaIn, const unsigned Ids[], unsigned uIdCount,
  MSA &msaOut)
	{
	const unsigned uColCount = msaIn.GetColCount();
	msaOut.SetSize(uIdCount, uColCount);
	for (unsigned uSeqIndexOut = 0; uSeqIndexOut < uIdCount; ++uSeqIndexOut)
		{
		const unsigned uId = Ids[uSeqIndexOut];

		const unsigned uSeqIndexIn = msaIn.GetSeqIndex(uId);
		const char *ptrName = msaIn.GetSeqName(uSeqIndexIn);

		msaOut.SetSeqId(uSeqIndexOut, uId);
		msaOut.SetSeqName(uSeqIndexOut, ptrName);

		for (unsigned uColIndex = 0; uColIndex < uColCount; ++uColIndex)
			{
			const char c = msaIn.GetChar(uSeqIndexIn, uColIndex);
			msaOut.SetChar(uSeqIndexOut, uColIndex, c);
			}
		}
	}

// Caller must allocate ptrSeq and ptrLabel as new char[n].
void MSA::AppendSeq(char *ptrSeq, unsigned uSeqLength, char *ptrLabel)
	{
	if (m_uSeqCount > m_uCacheSeqCount)
		Quit("Internal error MSA::AppendSeq");
	if (m_uSeqCount == m_uCacheSeqCount)
		ExpandCache(m_uSeqCount + 4, uSeqLength);
	m_szSeqs[m_uSeqCount] = ptrSeq;
	m_szNames[m_uSeqCount] = ptrLabel;
	++m_uSeqCount;
	}

void MSA::ExpandCache(unsigned uSeqCount, unsigned uColCount)
	{
	if (m_IdToSeqIndex != 0 || m_SeqIndexToId != 0 || uSeqCount < m_uSeqCount)
		Quit("Internal error MSA::ExpandCache");

	if (m_uSeqCount > 0 && uColCount != m_uColCount)
		Quit("Internal error MSA::ExpandCache, ColCount changed");

	char **NewSeqs = new char *[uSeqCount];
	char **NewNames = new char *[uSeqCount];
	WEIGHT *NewWeights = new WEIGHT[uSeqCount];

	for (unsigned uSeqIndex = 0; uSeqIndex < m_uSeqCount; ++uSeqIndex)
		{
		NewSeqs[uSeqIndex] = m_szSeqs[uSeqIndex];
		NewNames[uSeqIndex] = m_szNames[uSeqIndex];
		NewWeights[uSeqIndex] = m_Weights[uSeqIndex];
		}

	for (unsigned uSeqIndex = m_uSeqCount; uSeqIndex < uSeqCount; ++uSeqIndex)
		{
		char *Seq = new char[uColCount];
		NewSeqs[uSeqIndex] = Seq;
#if	DEBUG
		memset(Seq, '?', uColCount);
#endif
		}

	delete[] m_szSeqs;
	delete[] m_szNames;
	delete[] m_Weights;

	m_szSeqs = NewSeqs;
	m_szNames = NewNames;
	m_Weights = NewWeights;

	m_uCacheSeqCount = uSeqCount;
	m_uCacheSeqLength = uColCount;
	m_uColCount = uColCount;
	}

void MSA::FixAlpha()
	{
	ClearInvalidLetterWarning();
	for (unsigned uSeqIndex = 0; uSeqIndex < m_uSeqCount; ++uSeqIndex)
		{
		for (unsigned uColIndex = 0; uColIndex < m_uColCount; ++uColIndex)
			{
			char c = GetChar(uSeqIndex, uColIndex);
			if (!IsResidueChar(c) && !IsGapChar(c))
				{
				char w = GetWildcardChar();
				// Warning("Invalid letter '%c', replaced by '%c'", c, w);
				InvalidLetterWarning(c, w);
				SetChar(uSeqIndex, uColIndex, w);
				}
			}
		}
	ReportInvalidLetters();
	}

ALPHA MSA::GuessAlpha() const
	{
// If at least MIN_NUCLEO_PCT of the first CHAR_COUNT non-gap
// letters belong to the nucleotide alphabet, guess nucleo.
// Otherwise amino.
	const unsigned CHAR_COUNT = 100;
	const unsigned MIN_NUCLEO_PCT = 95;

	const unsigned uSeqCount = GetSeqCount();
	const unsigned uColCount = GetColCount();
	if (0 == uSeqCount)
		return ALPHA_Amino;

	unsigned uDNACount = 0;
	unsigned uRNACount = 0;
	unsigned uTotal = 0;
	unsigned i = 0;
	for (;;)
		{
		unsigned uSeqIndex = i/uColCount;
		if (uSeqIndex >= uSeqCount)
			break;
		unsigned uColIndex = i%uColCount;
		++i;
		char c = GetChar(uSeqIndex, uColIndex);
		if (IsGapChar(c))
			continue;
		if (IsDNA(c))
			++uDNACount;
		if (IsRNA(c))
			++uRNACount;
		++uTotal;
		if (uTotal >= CHAR_COUNT)
			break;
		}
	if (uTotal != 0 && ((uRNACount*100)/uTotal) >= MIN_NUCLEO_PCT)
		return ALPHA_RNA;
	if (uTotal != 0 && ((uDNACount*100)/uTotal) >= MIN_NUCLEO_PCT)
		return ALPHA_DNA;
	return ALPHA_Amino;
	}