Patch compilation of binaries
Former-commit-id: 688670c339643a282bdeabafafff3b451be83cb6 Former-commit-id: 60d3e42d2af73515fea50d9c97dc4eacda9c8abb
This commit is contained in:
30
src/muscle/Makefile
Executable file
30
src/muscle/Makefile
Executable file
@@ -0,0 +1,30 @@
|
||||
#---------------------------------------------------------------
|
||||
# $Id: $
|
||||
# ---------------------------------------------------------------
|
||||
# @file: Makefile
|
||||
# @desc: makefile for lxpack
|
||||
#
|
||||
# @history:
|
||||
# @history:
|
||||
# @+ <Gloup> : Apr 97 : Created
|
||||
# @+ <Gloup> : Mar 02 : Updated for LXxware
|
||||
#
|
||||
# @note: should be processed with gnu compatible make
|
||||
# @note: helixware_compatible
|
||||
#
|
||||
# @end:
|
||||
# ---------------------------------------------------------------
|
||||
#
|
||||
include ../../config/auto.conf
|
||||
|
||||
DIRS = muscle3.8.31
|
||||
|
||||
include ../../config/targets/propagate.targ
|
||||
|
||||
include ../../config/targets/help.targ
|
||||
|
||||
all::
|
||||
$(MAKE) ACTION=$@ _action
|
||||
|
||||
clean::
|
||||
$(MAKE) -C lxpack portclean
|
||||
30
src/muscle/muscle3.8.31/Makefile
Executable file
30
src/muscle/muscle3.8.31/Makefile
Executable file
@@ -0,0 +1,30 @@
|
||||
#---------------------------------------------------------------
|
||||
# $Id: $
|
||||
# ---------------------------------------------------------------
|
||||
# @file: Makefile
|
||||
# @desc: makefile for lxpack
|
||||
#
|
||||
# @history:
|
||||
# @history:
|
||||
# @+ <Gloup> : Apr 97 : Created
|
||||
# @+ <Gloup> : Mar 02 : Updated for LXxware
|
||||
#
|
||||
# @note: should be processed with gnu compatible make
|
||||
# @note: helixware_compatible
|
||||
#
|
||||
# @end:
|
||||
# ---------------------------------------------------------------
|
||||
#
|
||||
include ../../../config/auto.conf
|
||||
|
||||
DIRS = src
|
||||
|
||||
include ../../../config/targets/propagate.targ
|
||||
|
||||
include ../../../config/targets/help.targ
|
||||
|
||||
all::
|
||||
$(MAKE) ACTION=$@ _action
|
||||
|
||||
clean::
|
||||
$(MAKE) -C lxpack portclean
|
||||
11
src/muscle/muscle3.8.31/src/Makefile
Normal file
11
src/muscle/muscle3.8.31/src/Makefile
Normal file
@@ -0,0 +1,11 @@
|
||||
include ../../../../config/auto.conf
|
||||
|
||||
all: muscle install
|
||||
|
||||
muscle:
|
||||
chmod +x ./mk
|
||||
(export CXX=$(CXX) && ./mk)
|
||||
|
||||
install:
|
||||
cp muscle $(BINDIR)
|
||||
|
||||
27
src/muscle/muscle3.8.31/src/README.txt
Normal file
27
src/muscle/muscle3.8.31/src/README.txt
Normal file
@@ -0,0 +1,27 @@
|
||||
MUSCLE v3.0 source code README
|
||||
------------------------------
|
||||
|
||||
http://www.drive5.com/muscle
|
||||
|
||||
This version of MUSCLE was built and tested on two platforms:
|
||||
Windows XP and Red Hat Linux 8.0.
|
||||
|
||||
On Windows, I used Microsoft Visual C++ .Net, which I find
|
||||
to be the best C++ compile / edit / test environment I've
|
||||
tried on any platform. The Microsoft project file is
|
||||
muscle.vcproj.
|
||||
|
||||
The Linux make file is Makefile. This is a very simple-minded
|
||||
make file (because I am a Linux development novice), so should
|
||||
be easy to understand. By default, it uses shared libraries,
|
||||
but I found this to give problems when copying between
|
||||
different Linux versions. The fix was to use the linker
|
||||
flag -lm static (commented out), which gives a much bigger
|
||||
but more portable binary. The posted binary was linked with
|
||||
static libraries.
|
||||
|
||||
The source code was not written to be maintained by anyone
|
||||
but me, so the usual apologies and caveats apply.
|
||||
|
||||
Bob Edgar,
|
||||
January 2004
|
||||
802
src/muscle/muscle3.8.31/src/aligngivenpath.cpp
Normal file
802
src/muscle/muscle3.8.31/src/aligngivenpath.cpp
Normal file
@@ -0,0 +1,802 @@
|
||||
#include "muscle.h"
|
||||
#include "msa.h"
|
||||
#include "pwpath.h"
|
||||
#include "profile.h"
|
||||
|
||||
#define TRACE 0
|
||||
|
||||
static void LogPP(const ProfPos &PP)
|
||||
{
|
||||
Log("ResidueGroup %u\n", PP.m_uResidueGroup);
|
||||
Log("AllGaps %d\n", PP.m_bAllGaps);
|
||||
Log("Occ %.3g\n", PP.m_fOcc);
|
||||
Log("LL=%.3g LG=%.3g GL=%.3g GG=%.3g\n", PP.m_LL, PP.m_LG, PP.m_GL, PP.m_GG);
|
||||
Log("Freqs ");
|
||||
for (unsigned i = 0; i < 20; ++i)
|
||||
if (PP.m_fcCounts[i] > 0)
|
||||
Log("%c=%.3g ", LetterToChar(i), PP.m_fcCounts[i]);
|
||||
Log("\n");
|
||||
}
|
||||
|
||||
static void AssertProfPosEq(const ProfPos *PA, const ProfPos *PB, unsigned i)
|
||||
{
|
||||
const ProfPos &PPA = PA[i];
|
||||
const ProfPos &PPB = PB[i];
|
||||
#define eq(x) if (PPA.m_##x != PPB.m_##x) { LogPP(PPA); LogPP(PPB); Quit("AssertProfPosEq." #x); }
|
||||
#define be(x) if (!BTEq(PPA.m_##x, PPB.m_##x)) { LogPP(PPA); LogPP(PPB); Quit("AssertProfPosEq." #x); }
|
||||
eq(bAllGaps)
|
||||
eq(uResidueGroup)
|
||||
|
||||
be(LL)
|
||||
be(LG)
|
||||
be(GL)
|
||||
be(GG)
|
||||
be(fOcc)
|
||||
be(scoreGapOpen)
|
||||
be(scoreGapClose)
|
||||
|
||||
for (unsigned j = 0; j < 20; ++j)
|
||||
{
|
||||
#define eqj(x) if (PPA.m_##x != PPB.m_##x) Quit("AssertProfPosEq j=%u " #x, j);
|
||||
#define bej(x) if (!BTEq(PPA.m_##x, PPB.m_##x)) Quit("AssertProfPosEq j=%u " #x, j);
|
||||
bej(fcCounts[j]);
|
||||
// eqj(uSortOrder[j]) // may differ due to ties, don't check?
|
||||
bej(AAScores[j])
|
||||
#undef eqj
|
||||
#undef bej
|
||||
}
|
||||
#undef eq
|
||||
#undef be
|
||||
}
|
||||
|
||||
void AssertProfsEq(const ProfPos *PA, unsigned uLengthA, const ProfPos *PB,
|
||||
unsigned uLengthB)
|
||||
{
|
||||
if (uLengthA != uLengthB)
|
||||
Quit("AssertProfsEq: lengths differ %u %u", uLengthA, uLengthB);
|
||||
for (unsigned i = 0; i < uLengthB; ++i)
|
||||
AssertProfPosEq(PA, PB, i);
|
||||
}
|
||||
|
||||
#if DEBUG
|
||||
static void ValidateProf(const ProfPos *Prof, unsigned uLength)
|
||||
{
|
||||
for (unsigned i = 0; i < uLength; ++i)
|
||||
{
|
||||
const ProfPos &PP = Prof[i];
|
||||
|
||||
FCOUNT s1 = PP.m_LL + PP.m_LG + PP.m_GL + PP.m_GG;
|
||||
assert(BTEq(s1, 1.0));
|
||||
|
||||
if (i > 0)
|
||||
{
|
||||
const ProfPos &PPPrev = Prof[i-1];
|
||||
FCOUNT s2 = PPPrev.m_LL + PPPrev.m_GL;
|
||||
FCOUNT s3 = PP.m_LL + PP.m_LG;
|
||||
assert(BTEq(s2, s3));
|
||||
}
|
||||
if (i < uLength - 1)
|
||||
{
|
||||
const ProfPos &PPNext = Prof[i+1];
|
||||
FCOUNT s4 = PP.m_LL + PP.m_GL;
|
||||
FCOUNT s5 = PPNext.m_LL + PPNext.m_LG;
|
||||
assert(BTEq(s4, s5));
|
||||
}
|
||||
}
|
||||
}
|
||||
#else
|
||||
#define ValidateProf(Prof, Length) /* empty */
|
||||
#endif
|
||||
|
||||
static void ScoresFromFreqsPos(ProfPos *Prof, unsigned uLength, unsigned uPos)
|
||||
{
|
||||
ProfPos &PP = Prof[uPos];
|
||||
SortCounts(PP.m_fcCounts, PP.m_uSortOrder);
|
||||
PP.m_uResidueGroup = ResidueGroupFromFCounts(PP.m_fcCounts);
|
||||
|
||||
// "Occupancy"
|
||||
PP.m_fOcc = PP.m_LL + PP.m_GL;
|
||||
|
||||
// Frequency of gap-opens in this position (i)
|
||||
// Gap open = letter in i-1 and gap in i
|
||||
// = iff LG in i
|
||||
FCOUNT fcOpen = PP.m_LG;
|
||||
|
||||
// Frequency of gap-closes in this position
|
||||
// Gap close = gap in i and letter in i+1
|
||||
// = iff GL in i+1
|
||||
FCOUNT fcClose;
|
||||
if (uPos + 1 < uLength)
|
||||
fcClose = Prof[uPos + 1].m_GL;
|
||||
else
|
||||
fcClose = PP.m_GG + PP.m_LG;
|
||||
|
||||
PP.m_scoreGapOpen = (SCORE) ((1.0 - fcOpen)*g_scoreGapOpen/2.0);
|
||||
PP.m_scoreGapClose = (SCORE) ((1.0 - fcClose)*g_scoreGapOpen/2.0);
|
||||
#if DOUBLE_AFFINE
|
||||
PP.m_scoreGapOpen2 = (SCORE) ((1.0 - fcOpen)*g_scoreGapOpen2/2.0);
|
||||
PP.m_scoreGapClose2 = (SCORE) ((1.0 - fcClose)*g_scoreGapOpen2/2.0);
|
||||
#endif
|
||||
|
||||
for (unsigned i = 0; i < g_AlphaSize; ++i)
|
||||
{
|
||||
SCORE scoreSum = 0;
|
||||
for (unsigned j = 0; j < g_AlphaSize; ++j)
|
||||
scoreSum += PP.m_fcCounts[j]*(*g_ptrScoreMatrix)[i][j];
|
||||
PP.m_AAScores[i] = scoreSum;
|
||||
}
|
||||
}
|
||||
|
||||
void ProfScoresFromFreqs(ProfPos *Prof, unsigned uLength)
|
||||
{
|
||||
for (unsigned i = 0; i < uLength; ++i)
|
||||
ScoresFromFreqsPos(Prof, uLength, i);
|
||||
}
|
||||
|
||||
static void AppendDelete(const MSA &msaA, unsigned &uColIndexA,
|
||||
unsigned uSeqCountA, unsigned uSeqCountB, MSA &msaCombined,
|
||||
unsigned &uColIndexCombined)
|
||||
{
|
||||
#if TRACE
|
||||
Log("AppendDelete ColIxA=%u ColIxCmb=%u\n",
|
||||
uColIndexA, uColIndexCombined);
|
||||
#endif
|
||||
for (unsigned uSeqIndexA = 0; uSeqIndexA < uSeqCountA; ++uSeqIndexA)
|
||||
{
|
||||
char c = msaA.GetChar(uSeqIndexA, uColIndexA);
|
||||
msaCombined.SetChar(uSeqIndexA, uColIndexCombined, c);
|
||||
}
|
||||
|
||||
for (unsigned uSeqIndexB = 0; uSeqIndexB < uSeqCountB; ++uSeqIndexB)
|
||||
msaCombined.SetChar(uSeqCountA + uSeqIndexB, uColIndexCombined, '-');
|
||||
|
||||
++uColIndexCombined;
|
||||
++uColIndexA;
|
||||
}
|
||||
|
||||
static void AppendInsert(const MSA &msaB, unsigned &uColIndexB,
|
||||
unsigned uSeqCountA, unsigned uSeqCountB, MSA &msaCombined,
|
||||
unsigned &uColIndexCombined)
|
||||
{
|
||||
#if TRACE
|
||||
Log("AppendInsert ColIxB=%u ColIxCmb=%u\n",
|
||||
uColIndexB, uColIndexCombined);
|
||||
#endif
|
||||
for (unsigned uSeqIndexA = 0; uSeqIndexA < uSeqCountA; ++uSeqIndexA)
|
||||
msaCombined.SetChar(uSeqIndexA, uColIndexCombined, '-');
|
||||
|
||||
for (unsigned uSeqIndexB = 0; uSeqIndexB < uSeqCountB; ++uSeqIndexB)
|
||||
{
|
||||
char c = msaB.GetChar(uSeqIndexB, uColIndexB);
|
||||
msaCombined.SetChar(uSeqCountA + uSeqIndexB, uColIndexCombined, c);
|
||||
}
|
||||
|
||||
++uColIndexCombined;
|
||||
++uColIndexB;
|
||||
}
|
||||
|
||||
static void AppendTplInserts(const MSA &msaA, unsigned &uColIndexA, unsigned uColCountA,
|
||||
const MSA &msaB, unsigned &uColIndexB, unsigned uColCountB, unsigned uSeqCountA,
|
||||
unsigned uSeqCountB, MSA &msaCombined, unsigned &uColIndexCombined)
|
||||
{
|
||||
#if TRACE
|
||||
Log("AppendTplInserts ColIxA=%u ColIxB=%u ColIxCmb=%u\n",
|
||||
uColIndexA, uColIndexB, uColIndexCombined);
|
||||
#endif
|
||||
const unsigned uLengthA = msaA.GetColCount();
|
||||
const unsigned uLengthB = msaB.GetColCount();
|
||||
|
||||
unsigned uNewColCount = uColCountA;
|
||||
if (uColCountB > uNewColCount)
|
||||
uNewColCount = uColCountB;
|
||||
|
||||
for (unsigned n = 0; n < uColCountA; ++n)
|
||||
{
|
||||
for (unsigned uSeqIndexA = 0; uSeqIndexA < uSeqCountA; ++uSeqIndexA)
|
||||
{
|
||||
char c = msaA.GetChar(uSeqIndexA, uColIndexA + n);
|
||||
c = UnalignChar(c);
|
||||
msaCombined.SetChar(uSeqIndexA, uColIndexCombined + n, c);
|
||||
}
|
||||
}
|
||||
for (unsigned n = uColCountA; n < uNewColCount; ++n)
|
||||
{
|
||||
for (unsigned uSeqIndexA = 0; uSeqIndexA < uSeqCountA; ++uSeqIndexA)
|
||||
msaCombined.SetChar(uSeqIndexA, uColIndexCombined + n, '.');
|
||||
}
|
||||
|
||||
for (unsigned n = 0; n < uColCountB; ++n)
|
||||
{
|
||||
for (unsigned uSeqIndexB = 0; uSeqIndexB < uSeqCountB; ++uSeqIndexB)
|
||||
{
|
||||
char c = msaB.GetChar(uSeqIndexB, uColIndexB + n);
|
||||
c = UnalignChar(c);
|
||||
msaCombined.SetChar(uSeqCountA + uSeqIndexB, uColIndexCombined + n, c);
|
||||
}
|
||||
}
|
||||
for (unsigned n = uColCountB; n < uNewColCount; ++n)
|
||||
{
|
||||
for (unsigned uSeqIndexB = 0; uSeqIndexB < uSeqCountB; ++uSeqIndexB)
|
||||
msaCombined.SetChar(uSeqCountA + uSeqIndexB, uColIndexCombined + n, '.');
|
||||
}
|
||||
|
||||
uColIndexCombined += uNewColCount;
|
||||
uColIndexA += uColCountA;
|
||||
uColIndexB += uColCountB;
|
||||
}
|
||||
|
||||
static void AppendMatch(const MSA &msaA, unsigned &uColIndexA, const MSA &msaB,
|
||||
unsigned &uColIndexB, unsigned uSeqCountA, unsigned uSeqCountB,
|
||||
MSA &msaCombined, unsigned &uColIndexCombined)
|
||||
{
|
||||
#if TRACE
|
||||
Log("AppendMatch ColIxA=%u ColIxB=%u ColIxCmb=%u\n",
|
||||
uColIndexA, uColIndexB, uColIndexCombined);
|
||||
#endif
|
||||
|
||||
for (unsigned uSeqIndexA = 0; uSeqIndexA < uSeqCountA; ++uSeqIndexA)
|
||||
{
|
||||
char c = msaA.GetChar(uSeqIndexA, uColIndexA);
|
||||
msaCombined.SetChar(uSeqIndexA, uColIndexCombined, c);
|
||||
}
|
||||
|
||||
for (unsigned uSeqIndexB = 0; uSeqIndexB < uSeqCountB; ++uSeqIndexB)
|
||||
{
|
||||
char c = msaB.GetChar(uSeqIndexB, uColIndexB);
|
||||
msaCombined.SetChar(uSeqCountA + uSeqIndexB, uColIndexCombined, c);
|
||||
}
|
||||
|
||||
++uColIndexA;
|
||||
++uColIndexB;
|
||||
++uColIndexCombined;
|
||||
}
|
||||
|
||||
void AlignTwoMSAsGivenPath(const PWPath &Path, const MSA &msaA, const MSA &msaB,
|
||||
MSA &msaCombined)
|
||||
{
|
||||
msaCombined.Clear();
|
||||
|
||||
#if TRACE
|
||||
Log("FastAlignProfiles\n");
|
||||
Log("Template A:\n");
|
||||
msaA.LogMe();
|
||||
Log("Template B:\n");
|
||||
msaB.LogMe();
|
||||
#endif
|
||||
|
||||
const unsigned uColCountA = msaA.GetColCount();
|
||||
const unsigned uColCountB = msaB.GetColCount();
|
||||
|
||||
const unsigned uSeqCountA = msaA.GetSeqCount();
|
||||
const unsigned uSeqCountB = msaB.GetSeqCount();
|
||||
|
||||
msaCombined.SetSeqCount(uSeqCountA + uSeqCountB);
|
||||
|
||||
// Copy sequence names into combined MSA
|
||||
for (unsigned uSeqIndexA = 0; uSeqIndexA < uSeqCountA; ++uSeqIndexA)
|
||||
{
|
||||
msaCombined.SetSeqName(uSeqIndexA, msaA.GetSeqName(uSeqIndexA));
|
||||
msaCombined.SetSeqId(uSeqIndexA, msaA.GetSeqId(uSeqIndexA));
|
||||
}
|
||||
|
||||
for (unsigned uSeqIndexB = 0; uSeqIndexB < uSeqCountB; ++uSeqIndexB)
|
||||
{
|
||||
msaCombined.SetSeqName(uSeqCountA + uSeqIndexB, msaB.GetSeqName(uSeqIndexB));
|
||||
msaCombined.SetSeqId(uSeqCountA + uSeqIndexB, msaB.GetSeqId(uSeqIndexB));
|
||||
}
|
||||
|
||||
unsigned uColIndexA = 0;
|
||||
unsigned uColIndexB = 0;
|
||||
unsigned uColIndexCombined = 0;
|
||||
const unsigned uEdgeCount = Path.GetEdgeCount();
|
||||
for (unsigned uEdgeIndex = 0; uEdgeIndex < uEdgeCount; ++uEdgeIndex)
|
||||
{
|
||||
const PWEdge &Edge = Path.GetEdge(uEdgeIndex);
|
||||
#if TRACE
|
||||
Log("\nEdge %u %c%u.%u\n",
|
||||
uEdgeIndex,
|
||||
Edge.cType,
|
||||
Edge.uPrefixLengthA,
|
||||
Edge.uPrefixLengthB);
|
||||
#endif
|
||||
const char cType = Edge.cType;
|
||||
const unsigned uPrefixLengthA = Edge.uPrefixLengthA;
|
||||
unsigned uColCountA = 0;
|
||||
if (uPrefixLengthA > 0)
|
||||
{
|
||||
const unsigned uNodeIndexA = uPrefixLengthA - 1;
|
||||
const unsigned uTplColIndexA = uNodeIndexA;
|
||||
if (uTplColIndexA > uColIndexA)
|
||||
uColCountA = uTplColIndexA - uColIndexA;
|
||||
}
|
||||
|
||||
const unsigned uPrefixLengthB = Edge.uPrefixLengthB;
|
||||
unsigned uColCountB = 0;
|
||||
if (uPrefixLengthB > 0)
|
||||
{
|
||||
const unsigned uNodeIndexB = uPrefixLengthB - 1;
|
||||
const unsigned uTplColIndexB = uNodeIndexB;
|
||||
if (uTplColIndexB > uColIndexB)
|
||||
uColCountB = uTplColIndexB - uColIndexB;
|
||||
}
|
||||
|
||||
// TODO: This code looks like a hangover from HMM estimation -- can we delete it?
|
||||
assert(uColCountA == 0);
|
||||
assert(uColCountB == 0);
|
||||
AppendTplInserts(msaA, uColIndexA, uColCountA, msaB, uColIndexB, uColCountB,
|
||||
uSeqCountA, uSeqCountB, msaCombined, uColIndexCombined);
|
||||
|
||||
switch (cType)
|
||||
{
|
||||
case 'M':
|
||||
{
|
||||
assert(uPrefixLengthA > 0);
|
||||
assert(uPrefixLengthB > 0);
|
||||
const unsigned uColA = uPrefixLengthA - 1;
|
||||
const unsigned uColB = uPrefixLengthB - 1;
|
||||
assert(uColIndexA == uColA);
|
||||
assert(uColIndexB == uColB);
|
||||
AppendMatch(msaA, uColIndexA, msaB, uColIndexB, uSeqCountA, uSeqCountB,
|
||||
msaCombined, uColIndexCombined);
|
||||
break;
|
||||
}
|
||||
case 'D':
|
||||
{
|
||||
assert(uPrefixLengthA > 0);
|
||||
const unsigned uColA = uPrefixLengthA - 1;
|
||||
assert(uColIndexA == uColA);
|
||||
AppendDelete(msaA, uColIndexA, uSeqCountA, uSeqCountB, msaCombined, uColIndexCombined);
|
||||
break;
|
||||
}
|
||||
case 'I':
|
||||
{
|
||||
assert(uPrefixLengthB > 0);
|
||||
const unsigned uColB = uPrefixLengthB - 1;
|
||||
assert(uColIndexB == uColB);
|
||||
AppendInsert(msaB, uColIndexB, uSeqCountA, uSeqCountB, msaCombined, uColIndexCombined);
|
||||
break;
|
||||
}
|
||||
default:
|
||||
assert(false);
|
||||
}
|
||||
}
|
||||
unsigned uInsertColCountA = uColCountA - uColIndexA;
|
||||
unsigned uInsertColCountB = uColCountB - uColIndexB;
|
||||
|
||||
// TODO: This code looks like a hangover from HMM estimation -- can we delete it?
|
||||
assert(uInsertColCountA == 0);
|
||||
assert(uInsertColCountB == 0);
|
||||
AppendTplInserts(msaA, uColIndexA, uInsertColCountA, msaB, uColIndexB,
|
||||
uInsertColCountB, uSeqCountA, uSeqCountB, msaCombined, uColIndexCombined);
|
||||
|
||||
assert(msaCombined.GetColCount() == uEdgeCount);
|
||||
}
|
||||
|
||||
static const ProfPos PPStart =
|
||||
{
|
||||
false, //m_bAllGaps;
|
||||
{ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }, // m_uSortOrder[21];
|
||||
{ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }, // m_fcCounts[20];
|
||||
1.0, // m_LL;
|
||||
0.0, // m_LG;
|
||||
0.0, // m_GL;
|
||||
0.0, // m_GG;
|
||||
{ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }, // m_ALScores
|
||||
0, // m_uResidueGroup;
|
||||
1.0, // m_fOcc;
|
||||
0.0, // m_fcStartOcc;
|
||||
0.0, // m_fcEndOcc;
|
||||
0.0, // m_scoreGapOpen;
|
||||
0.0, // m_scoreGapClose;
|
||||
};
|
||||
|
||||
// MM
|
||||
// Ai–1 Ai Out
|
||||
// X X LL LL
|
||||
// X - LG LG
|
||||
// - X GL GL
|
||||
// - - GG GG
|
||||
//
|
||||
// Bj–1 Bj
|
||||
// X X LL LL
|
||||
// X - LG LG
|
||||
// - X GL GL
|
||||
// - - GG GG
|
||||
static void SetGapsMM(
|
||||
const ProfPos *PA, unsigned uPrefixLengthA, WEIGHT wA,
|
||||
const ProfPos *PB, unsigned uPrefixLengthB, WEIGHT wB,
|
||||
ProfPos *POut, unsigned uColIndexOut)
|
||||
{
|
||||
const ProfPos &PPA = uPrefixLengthA > 0 ? PA[uPrefixLengthA-1] : PPStart;
|
||||
const ProfPos &PPB = uPrefixLengthB > 0 ? PB[uPrefixLengthB-1] : PPStart;
|
||||
ProfPos &PPO = POut[uColIndexOut];
|
||||
|
||||
PPO.m_LL = wA*PPA.m_LL + wB*PPB.m_LL;
|
||||
PPO.m_LG = wA*PPA.m_LG + wB*PPB.m_LG;
|
||||
PPO.m_GL = wA*PPA.m_GL + wB*PPB.m_GL;
|
||||
PPO.m_GG = wA*PPA.m_GG + wB*PPB.m_GG;
|
||||
}
|
||||
|
||||
// MD
|
||||
// Ai–1 Ai Out
|
||||
// X X LL LL
|
||||
// X - LG LG
|
||||
// - X GL GL
|
||||
// - - GG GG
|
||||
//
|
||||
// Bj (-)
|
||||
// X - ?L LG
|
||||
// - - ?G GG
|
||||
static void SetGapsMD(
|
||||
const ProfPos *PA, unsigned uPrefixLengthA, WEIGHT wA,
|
||||
const ProfPos *PB, unsigned uPrefixLengthB, WEIGHT wB,
|
||||
ProfPos *POut, unsigned uColIndexOut)
|
||||
{
|
||||
const ProfPos &PPA = uPrefixLengthA > 0 ? PA[uPrefixLengthA-1] : PPStart;
|
||||
const ProfPos &PPB = uPrefixLengthB > 0 ? PB[uPrefixLengthB-1] : PPStart;
|
||||
ProfPos &PPO = POut[uColIndexOut];
|
||||
|
||||
PPO.m_LL = wA*PPA.m_LL;
|
||||
PPO.m_LG = wA*PPA.m_LG + wB*(PPB.m_LL + PPB.m_GL);
|
||||
PPO.m_GL = wA*PPA.m_GL;
|
||||
PPO.m_GG = wA*PPA.m_GG + wB*(PPB.m_LG + PPB.m_GG);
|
||||
}
|
||||
|
||||
// DD
|
||||
// Ai–1 Ai Out
|
||||
// X X LL LL
|
||||
// X - LG LG
|
||||
// - X GL GL
|
||||
// - - GG GG
|
||||
//
|
||||
// (-) (-)
|
||||
// - - ?? GG
|
||||
static void SetGapsDD(
|
||||
const ProfPos *PA, unsigned uPrefixLengthA, WEIGHT wA,
|
||||
const ProfPos *PB, unsigned uPrefixLengthB, WEIGHT wB,
|
||||
ProfPos *POut, unsigned uColIndexOut)
|
||||
{
|
||||
const ProfPos &PPA = uPrefixLengthA > 0 ? PA[uPrefixLengthA-1] : PPStart;
|
||||
ProfPos &PPO = POut[uColIndexOut];
|
||||
|
||||
PPO.m_LL = wA*PPA.m_LL;
|
||||
PPO.m_LG = wA*PPA.m_LG;
|
||||
PPO.m_GL = wA*PPA.m_GL;
|
||||
PPO.m_GG = wA*PPA.m_GG + wB;
|
||||
}
|
||||
|
||||
// MI
|
||||
// Ai (-) Out
|
||||
// X - ?L LG
|
||||
// - - ?G GG
|
||||
|
||||
// Bj–1 Bj
|
||||
// X X LL LL
|
||||
// X - LG LG
|
||||
// - X GL GL
|
||||
// - - GG GG
|
||||
static void SetGapsMI(
|
||||
const ProfPos *PA, unsigned uPrefixLengthA, WEIGHT wA,
|
||||
const ProfPos *PB, unsigned uPrefixLengthB, WEIGHT wB,
|
||||
ProfPos *POut, unsigned uColIndexOut)
|
||||
{
|
||||
const ProfPos &PPA = uPrefixLengthA > 0 ? PA[uPrefixLengthA-1] : PPStart;
|
||||
const ProfPos &PPB = uPrefixLengthB > 0 ? PB[uPrefixLengthB-1] : PPStart;
|
||||
ProfPos &PPO = POut[uColIndexOut];
|
||||
|
||||
PPO.m_LL = wB*PPB.m_LL;
|
||||
PPO.m_LG = wB*PPB.m_LG + wA*(PPA.m_LL + PPA.m_GL);
|
||||
PPO.m_GL = wB*PPB.m_GL;
|
||||
PPO.m_GG = wB*PPB.m_GG + wA*(PPA.m_LG + PPA.m_GG);
|
||||
}
|
||||
|
||||
// DM
|
||||
// Ai–1 Ai Out
|
||||
// X X LL LL
|
||||
// X - LG LG
|
||||
// - X GL GL
|
||||
// - - GG GG
|
||||
//
|
||||
// (-) Bj
|
||||
// - X ?L GL
|
||||
// - - ?G GG
|
||||
static void SetGapsDM(
|
||||
const ProfPos *PA, unsigned uPrefixLengthA, WEIGHT wA,
|
||||
const ProfPos *PB, unsigned uPrefixLengthB, WEIGHT wB,
|
||||
ProfPos *POut, unsigned uColIndexOut)
|
||||
{
|
||||
const ProfPos &PPA = uPrefixLengthA > 0 ? PA[uPrefixLengthA-1] : PPStart;
|
||||
const ProfPos &PPB = uPrefixLengthB > 0 ? PB[uPrefixLengthB-1] : PPStart;
|
||||
ProfPos &PPO = POut[uColIndexOut];
|
||||
|
||||
PPO.m_LL = wA*PPA.m_LL;
|
||||
PPO.m_LG = wA*PPA.m_LG;
|
||||
PPO.m_GL = wA*PPA.m_GL + wB*(PPB.m_LL + PPB.m_GL);
|
||||
PPO.m_GG = wA*PPA.m_GG + wB*(PPB.m_LG + PPB.m_GG);
|
||||
}
|
||||
|
||||
// IM
|
||||
// (-) Ai Out
|
||||
// - X ?L GL
|
||||
// - - ?G GG
|
||||
|
||||
// Bj–1 Bj
|
||||
// X X LL LL
|
||||
// X - LG LG
|
||||
// - X GL GL
|
||||
// - - GG GG
|
||||
static void SetGapsIM(
|
||||
const ProfPos *PA, unsigned uPrefixLengthA, WEIGHT wA,
|
||||
const ProfPos *PB, unsigned uPrefixLengthB, WEIGHT wB,
|
||||
ProfPos *POut, unsigned uColIndexOut)
|
||||
{
|
||||
const ProfPos &PPA = uPrefixLengthA > 0 ? PA[uPrefixLengthA-1] : PPStart;
|
||||
const ProfPos &PPB = uPrefixLengthB > 0 ? PB[uPrefixLengthB-1] : PPStart;
|
||||
ProfPos &PPO = POut[uColIndexOut];
|
||||
|
||||
PPO.m_LL = wB*PPB.m_LL;
|
||||
PPO.m_LG = wB*PPB.m_LG;
|
||||
PPO.m_GL = wB*PPB.m_GL + wA*(PPA.m_LL + PPA.m_GL);
|
||||
PPO.m_GG = wB*PPB.m_GG + wA*(PPA.m_LG + PPA.m_GG);
|
||||
}
|
||||
|
||||
// ID
|
||||
// (-) Ai Out
|
||||
// - X ?L GL
|
||||
// - - ?G GG
|
||||
|
||||
// Bj (-)
|
||||
// X - ?L LG
|
||||
// - - ?G GG
|
||||
static void SetGapsID(
|
||||
const ProfPos *PA, unsigned uPrefixLengthA, WEIGHT wA,
|
||||
const ProfPos *PB, unsigned uPrefixLengthB, WEIGHT wB,
|
||||
ProfPos *POut, unsigned uColIndexOut)
|
||||
{
|
||||
const ProfPos &PPA = uPrefixLengthA > 0 ? PA[uPrefixLengthA-1] : PPStart;
|
||||
const ProfPos &PPB = uPrefixLengthB > 0 ? PB[uPrefixLengthB-1] : PPStart;
|
||||
ProfPos &PPO = POut[uColIndexOut];
|
||||
|
||||
PPO.m_LL = 0;
|
||||
PPO.m_LG = wB*PPB.m_GL + wB*PPB.m_LL;
|
||||
PPO.m_GL = wA*PPA.m_GL + wA*PPA.m_LL;
|
||||
PPO.m_GG = wA*(PPA.m_LG + PPA.m_GG) + wB*(PPB.m_LG + PPB.m_GG);
|
||||
}
|
||||
|
||||
// DI
|
||||
// Ai (-) Out
|
||||
// X - ?L LG
|
||||
// - - ?G GG
|
||||
|
||||
// (-) Bj
|
||||
// - X ?L GL
|
||||
// - - ?G GG
|
||||
static void SetGapsDI(
|
||||
const ProfPos *PA, unsigned uPrefixLengthA, WEIGHT wA,
|
||||
const ProfPos *PB, unsigned uPrefixLengthB, WEIGHT wB,
|
||||
ProfPos *POut, unsigned uColIndexOut)
|
||||
{
|
||||
const ProfPos &PPA = uPrefixLengthA > 0 ? PA[uPrefixLengthA-1] : PPStart;
|
||||
const ProfPos &PPB = uPrefixLengthB > 0 ? PB[uPrefixLengthB-1] : PPStart;
|
||||
ProfPos &PPO = POut[uColIndexOut];
|
||||
|
||||
PPO.m_LL = 0;
|
||||
PPO.m_LG = wA*PPA.m_GL + wA*PPA.m_LL;
|
||||
PPO.m_GL = wB*PPB.m_GL + wB*PPB.m_LL;
|
||||
PPO.m_GG = wA*(PPA.m_LG + PPA.m_GG) + wB*(PPB.m_LG + PPB.m_GG);
|
||||
}
|
||||
|
||||
// II
|
||||
// (-) (-) Out
|
||||
// - - ?? GG
|
||||
|
||||
// Bj–1 Bj
|
||||
// X X LL LL
|
||||
// X - LG LG
|
||||
// - X GL GL
|
||||
// - - GG GG
|
||||
static void SetGapsII(
|
||||
const ProfPos *PA, unsigned uPrefixLengthA, WEIGHT wA,
|
||||
const ProfPos *PB, unsigned uPrefixLengthB, WEIGHT wB,
|
||||
ProfPos *POut, unsigned uColIndexOut)
|
||||
{
|
||||
const ProfPos &PPB = uPrefixLengthB > 0 ? PB[uPrefixLengthB-1] : PPStart;
|
||||
ProfPos &PPO = POut[uColIndexOut];
|
||||
|
||||
PPO.m_LL = wB*PPB.m_LL;
|
||||
PPO.m_LG = wB*PPB.m_LG;
|
||||
PPO.m_GL = wB*PPB.m_GL;
|
||||
PPO.m_GG = wB*PPB.m_GG + wA;
|
||||
}
|
||||
|
||||
static void SetFreqs(
|
||||
const ProfPos *PA, unsigned uPrefixLengthA, WEIGHT wA,
|
||||
const ProfPos *PB, unsigned uPrefixLengthB, WEIGHT wB,
|
||||
ProfPos *POut, unsigned uColIndexOut)
|
||||
{
|
||||
const ProfPos &PPA = uPrefixLengthA > 0 ? PA[uPrefixLengthA-1] : PPStart;
|
||||
const ProfPos &PPB = uPrefixLengthB > 0 ? PB[uPrefixLengthB-1] : PPStart;
|
||||
ProfPos &PPO = POut[uColIndexOut];
|
||||
|
||||
if (g_bNormalizeCounts)
|
||||
{
|
||||
const FCOUNT fA = PPA.m_fOcc*wA/(wA + wB);
|
||||
const FCOUNT fB = PPB.m_fOcc*wB/(wA + wB);
|
||||
FCOUNT fTotal = 0;
|
||||
for (unsigned i = 0; i < 20; ++i)
|
||||
{
|
||||
const FCOUNT f = fA*PPA.m_fcCounts[i] + fB*PPB.m_fcCounts[i];
|
||||
PPO.m_fcCounts[i] = f;
|
||||
fTotal += f;
|
||||
}
|
||||
if (fTotal > 0)
|
||||
for (unsigned i = 0; i < 20; ++i)
|
||||
PPO.m_fcCounts[i] /= fTotal;
|
||||
}
|
||||
else
|
||||
{
|
||||
for (unsigned i = 0; i < 20; ++i)
|
||||
PPO.m_fcCounts[i] = wA*PPA.m_fcCounts[i] + wB*PPB.m_fcCounts[i];
|
||||
}
|
||||
}
|
||||
|
||||
void AlignTwoProfsGivenPath(const PWPath &Path,
|
||||
const ProfPos *PA, unsigned uPrefixLengthA, WEIGHT wA,
|
||||
const ProfPos *PB, unsigned uPrefixLengthB, WEIGHT wB,
|
||||
ProfPos **ptrPOut, unsigned *ptruLengthOut)
|
||||
{
|
||||
#if TRACE
|
||||
Log("AlignTwoProfsGivenPath wA=%.3g wB=%.3g Path=\n", wA, wB);
|
||||
Path.LogMe();
|
||||
#endif
|
||||
assert(BTEq(wA + wB, 1.0));
|
||||
|
||||
unsigned uColIndexA = 0;
|
||||
unsigned uColIndexB = 0;
|
||||
unsigned uColIndexOut = 0;
|
||||
const unsigned uEdgeCount = Path.GetEdgeCount();
|
||||
ProfPos *POut = new ProfPos[uEdgeCount];
|
||||
char cPrevType = 'M';
|
||||
for (unsigned uEdgeIndex = 0; uEdgeIndex < uEdgeCount; ++uEdgeIndex)
|
||||
{
|
||||
const PWEdge &Edge = Path.GetEdge(uEdgeIndex);
|
||||
const char cType = Edge.cType;
|
||||
|
||||
const unsigned uPrefixLengthA = Edge.uPrefixLengthA;
|
||||
const unsigned uPrefixLengthB = Edge.uPrefixLengthB;
|
||||
|
||||
#if TRACE
|
||||
Log("\nEdge %u %c%u.%u ColA=%u ColB=%u\n",
|
||||
uEdgeIndex,
|
||||
Edge.cType,
|
||||
Edge.uPrefixLengthA,
|
||||
Edge.uPrefixLengthB,
|
||||
uColIndexA,
|
||||
uColIndexB);
|
||||
#endif
|
||||
|
||||
POut[uColIndexOut].m_bAllGaps = false;
|
||||
switch (cType)
|
||||
{
|
||||
case 'M':
|
||||
{
|
||||
assert(uPrefixLengthA > 0);
|
||||
assert(uPrefixLengthB > 0);
|
||||
SetFreqs(
|
||||
PA, uPrefixLengthA, wA,
|
||||
PB, uPrefixLengthB, wB,
|
||||
POut, uColIndexOut);
|
||||
switch (cPrevType)
|
||||
{
|
||||
case 'M':
|
||||
SetGapsMM(
|
||||
PA, uPrefixLengthA, wA,
|
||||
PB, uPrefixLengthB, wB,
|
||||
POut, uColIndexOut);
|
||||
break;
|
||||
case 'D':
|
||||
SetGapsDM(
|
||||
PA, uPrefixLengthA, wA,
|
||||
PB, uPrefixLengthB, wB,
|
||||
POut, uColIndexOut);
|
||||
break;
|
||||
case 'I':
|
||||
SetGapsIM(
|
||||
PA, uPrefixLengthA, wA,
|
||||
PB, uPrefixLengthB, wB,
|
||||
POut, uColIndexOut);
|
||||
break;
|
||||
default:
|
||||
Quit("Bad cPrevType");
|
||||
}
|
||||
++uColIndexA;
|
||||
++uColIndexB;
|
||||
++uColIndexOut;
|
||||
break;
|
||||
}
|
||||
case 'D':
|
||||
{
|
||||
assert(uPrefixLengthA > 0);
|
||||
SetFreqs(
|
||||
PA, uPrefixLengthA, wA,
|
||||
PB, uPrefixLengthB, 0,
|
||||
POut, uColIndexOut);
|
||||
switch (cPrevType)
|
||||
{
|
||||
case 'M':
|
||||
SetGapsMD(
|
||||
PA, uPrefixLengthA, wA,
|
||||
PB, uPrefixLengthB, wB,
|
||||
POut, uColIndexOut);
|
||||
break;
|
||||
case 'D':
|
||||
SetGapsDD(
|
||||
PA, uPrefixLengthA, wA,
|
||||
PB, uPrefixLengthB, wB,
|
||||
POut, uColIndexOut);
|
||||
break;
|
||||
case 'I':
|
||||
SetGapsID(
|
||||
PA, uPrefixLengthA, wA,
|
||||
PB, uPrefixLengthB, wB,
|
||||
POut, uColIndexOut);
|
||||
break;
|
||||
default:
|
||||
Quit("Bad cPrevType");
|
||||
}
|
||||
++uColIndexA;
|
||||
++uColIndexOut;
|
||||
break;
|
||||
}
|
||||
case 'I':
|
||||
{
|
||||
assert(uPrefixLengthB > 0);
|
||||
SetFreqs(
|
||||
PA, uPrefixLengthA, 0,
|
||||
PB, uPrefixLengthB, wB,
|
||||
POut, uColIndexOut);
|
||||
switch (cPrevType)
|
||||
{
|
||||
case 'M':
|
||||
SetGapsMI(
|
||||
PA, uPrefixLengthA, wA,
|
||||
PB, uPrefixLengthB, wB,
|
||||
POut, uColIndexOut);
|
||||
break;
|
||||
case 'D':
|
||||
SetGapsDI(
|
||||
PA, uPrefixLengthA, wA,
|
||||
PB, uPrefixLengthB, wB,
|
||||
POut, uColIndexOut);
|
||||
break;
|
||||
case 'I':
|
||||
SetGapsII(
|
||||
PA, uPrefixLengthA, wA,
|
||||
PB, uPrefixLengthB, wB,
|
||||
POut, uColIndexOut);
|
||||
break;
|
||||
default:
|
||||
Quit("Bad cPrevType");
|
||||
}
|
||||
++uColIndexB;
|
||||
++uColIndexOut;
|
||||
break;
|
||||
}
|
||||
default:
|
||||
assert(false);
|
||||
}
|
||||
cPrevType = cType;
|
||||
}
|
||||
assert(uColIndexOut == uEdgeCount);
|
||||
|
||||
ProfScoresFromFreqs(POut, uEdgeCount);
|
||||
ValidateProf(POut, uEdgeCount);
|
||||
|
||||
*ptrPOut = POut;
|
||||
*ptruLengthOut = uEdgeCount;
|
||||
|
||||
#if TRACE
|
||||
Log("AlignTwoProfsGivenPath:\n");
|
||||
ListProfile(POut, uEdgeCount, 0);
|
||||
#endif
|
||||
}
|
||||
237
src/muscle/muscle3.8.31/src/aligngivenpathsw.cpp
Normal file
237
src/muscle/muscle3.8.31/src/aligngivenpathsw.cpp
Normal file
@@ -0,0 +1,237 @@
|
||||
#include "muscle.h"
|
||||
#include "msa.h"
|
||||
#include "pwpath.h"
|
||||
#include "profile.h"
|
||||
|
||||
#define TRACE 0
|
||||
|
||||
static void AppendDelete(const MSA &msaA, unsigned &uColIndexA,
|
||||
unsigned uSeqCountA, unsigned uSeqCountB, MSA &msaCombined,
|
||||
unsigned &uColIndexCombined)
|
||||
{
|
||||
#if TRACE
|
||||
Log("AppendDelete ColIxA=%u ColIxCmb=%u\n",
|
||||
uColIndexA, uColIndexCombined);
|
||||
#endif
|
||||
for (unsigned uSeqIndexA = 0; uSeqIndexA < uSeqCountA; ++uSeqIndexA)
|
||||
{
|
||||
char c = msaA.GetChar(uSeqIndexA, uColIndexA);
|
||||
msaCombined.SetChar(uSeqIndexA, uColIndexCombined, c);
|
||||
}
|
||||
|
||||
for (unsigned uSeqIndexB = 0; uSeqIndexB < uSeqCountB; ++uSeqIndexB)
|
||||
msaCombined.SetChar(uSeqCountA + uSeqIndexB, uColIndexCombined, '-');
|
||||
|
||||
++uColIndexCombined;
|
||||
++uColIndexA;
|
||||
}
|
||||
|
||||
static void AppendInsert(const MSA &msaB, unsigned &uColIndexB,
|
||||
unsigned uSeqCountA, unsigned uSeqCountB, MSA &msaCombined,
|
||||
unsigned &uColIndexCombined)
|
||||
{
|
||||
#if TRACE
|
||||
Log("AppendInsert ColIxB=%u ColIxCmb=%u\n",
|
||||
uColIndexB, uColIndexCombined);
|
||||
#endif
|
||||
for (unsigned uSeqIndexA = 0; uSeqIndexA < uSeqCountA; ++uSeqIndexA)
|
||||
msaCombined.SetChar(uSeqIndexA, uColIndexCombined, '-');
|
||||
|
||||
for (unsigned uSeqIndexB = 0; uSeqIndexB < uSeqCountB; ++uSeqIndexB)
|
||||
{
|
||||
char c = msaB.GetChar(uSeqIndexB, uColIndexB);
|
||||
msaCombined.SetChar(uSeqCountA + uSeqIndexB, uColIndexCombined, c);
|
||||
}
|
||||
|
||||
++uColIndexCombined;
|
||||
++uColIndexB;
|
||||
}
|
||||
|
||||
static void AppendUnalignedTerminals(const MSA &msaA, unsigned &uColIndexA, unsigned uColCountA,
|
||||
const MSA &msaB, unsigned &uColIndexB, unsigned uColCountB, unsigned uSeqCountA,
|
||||
unsigned uSeqCountB, MSA &msaCombined, unsigned &uColIndexCombined)
|
||||
{
|
||||
#if TRACE
|
||||
Log("AppendUnalignedTerminals ColIxA=%u ColIxB=%u ColIxCmb=%u\n",
|
||||
uColIndexA, uColIndexB, uColIndexCombined);
|
||||
#endif
|
||||
const unsigned uLengthA = msaA.GetColCount();
|
||||
const unsigned uLengthB = msaB.GetColCount();
|
||||
|
||||
unsigned uNewColCount = uColCountA;
|
||||
if (uColCountB > uNewColCount)
|
||||
uNewColCount = uColCountB;
|
||||
|
||||
for (unsigned n = 0; n < uColCountA; ++n)
|
||||
{
|
||||
for (unsigned uSeqIndexA = 0; uSeqIndexA < uSeqCountA; ++uSeqIndexA)
|
||||
{
|
||||
char c = msaA.GetChar(uSeqIndexA, uColIndexA + n);
|
||||
c = UnalignChar(c);
|
||||
msaCombined.SetChar(uSeqIndexA, uColIndexCombined + n, c);
|
||||
}
|
||||
}
|
||||
for (unsigned n = uColCountA; n < uNewColCount; ++n)
|
||||
{
|
||||
for (unsigned uSeqIndexA = 0; uSeqIndexA < uSeqCountA; ++uSeqIndexA)
|
||||
msaCombined.SetChar(uSeqIndexA, uColIndexCombined + n, '.');
|
||||
}
|
||||
|
||||
for (unsigned n = 0; n < uColCountB; ++n)
|
||||
{
|
||||
for (unsigned uSeqIndexB = 0; uSeqIndexB < uSeqCountB; ++uSeqIndexB)
|
||||
{
|
||||
char c = msaB.GetChar(uSeqIndexB, uColIndexB + n);
|
||||
c = UnalignChar(c);
|
||||
msaCombined.SetChar(uSeqCountA + uSeqIndexB, uColIndexCombined + n, c);
|
||||
}
|
||||
}
|
||||
for (unsigned n = uColCountB; n < uNewColCount; ++n)
|
||||
{
|
||||
for (unsigned uSeqIndexB = 0; uSeqIndexB < uSeqCountB; ++uSeqIndexB)
|
||||
msaCombined.SetChar(uSeqCountA + uSeqIndexB, uColIndexCombined + n, '.');
|
||||
}
|
||||
|
||||
uColIndexCombined += uNewColCount;
|
||||
uColIndexA += uColCountA;
|
||||
uColIndexB += uColCountB;
|
||||
}
|
||||
|
||||
static void AppendMatch(const MSA &msaA, unsigned &uColIndexA, const MSA &msaB,
|
||||
unsigned &uColIndexB, unsigned uSeqCountA, unsigned uSeqCountB,
|
||||
MSA &msaCombined, unsigned &uColIndexCombined)
|
||||
{
|
||||
#if TRACE
|
||||
Log("AppendMatch ColIxA=%u ColIxB=%u ColIxCmb=%u\n",
|
||||
uColIndexA, uColIndexB, uColIndexCombined);
|
||||
#endif
|
||||
|
||||
for (unsigned uSeqIndexA = 0; uSeqIndexA < uSeqCountA; ++uSeqIndexA)
|
||||
{
|
||||
char c = msaA.GetChar(uSeqIndexA, uColIndexA);
|
||||
msaCombined.SetChar(uSeqIndexA, uColIndexCombined, c);
|
||||
}
|
||||
|
||||
for (unsigned uSeqIndexB = 0; uSeqIndexB < uSeqCountB; ++uSeqIndexB)
|
||||
{
|
||||
char c = msaB.GetChar(uSeqIndexB, uColIndexB);
|
||||
msaCombined.SetChar(uSeqCountA + uSeqIndexB, uColIndexCombined, c);
|
||||
}
|
||||
|
||||
++uColIndexA;
|
||||
++uColIndexB;
|
||||
++uColIndexCombined;
|
||||
}
|
||||
|
||||
void AlignTwoMSAsGivenPathSW(const PWPath &Path, const MSA &msaA, const MSA &msaB,
|
||||
MSA &msaCombined)
|
||||
{
|
||||
msaCombined.Clear();
|
||||
|
||||
#if TRACE
|
||||
Log("AlignTwoMSAsGivenPathSW\n");
|
||||
Log("Template A:\n");
|
||||
msaA.LogMe();
|
||||
Log("Template B:\n");
|
||||
msaB.LogMe();
|
||||
#endif
|
||||
|
||||
const unsigned uColCountA = msaA.GetColCount();
|
||||
const unsigned uColCountB = msaB.GetColCount();
|
||||
|
||||
const unsigned uSeqCountA = msaA.GetSeqCount();
|
||||
const unsigned uSeqCountB = msaB.GetSeqCount();
|
||||
|
||||
msaCombined.SetSeqCount(uSeqCountA + uSeqCountB);
|
||||
|
||||
// Copy sequence names into combined MSA
|
||||
for (unsigned uSeqIndexA = 0; uSeqIndexA < uSeqCountA; ++uSeqIndexA)
|
||||
{
|
||||
msaCombined.SetSeqName(uSeqIndexA, msaA.GetSeqName(uSeqIndexA));
|
||||
msaCombined.SetSeqId(uSeqIndexA, msaA.GetSeqId(uSeqIndexA));
|
||||
}
|
||||
|
||||
for (unsigned uSeqIndexB = 0; uSeqIndexB < uSeqCountB; ++uSeqIndexB)
|
||||
{
|
||||
msaCombined.SetSeqName(uSeqCountA + uSeqIndexB, msaB.GetSeqName(uSeqIndexB));
|
||||
msaCombined.SetSeqId(uSeqCountA + uSeqIndexB, msaB.GetSeqId(uSeqIndexB));
|
||||
}
|
||||
|
||||
unsigned uColIndexA = 0;
|
||||
unsigned uColIndexB = 0;
|
||||
unsigned uColIndexCombined = 0;
|
||||
const unsigned uEdgeCount = Path.GetEdgeCount();
|
||||
for (unsigned uEdgeIndex = 0; uEdgeIndex < uEdgeCount; ++uEdgeIndex)
|
||||
{
|
||||
const PWEdge &Edge = Path.GetEdge(uEdgeIndex);
|
||||
#if TRACE
|
||||
Log("\nEdge %u %c%u.%u\n",
|
||||
uEdgeIndex,
|
||||
Edge.cType,
|
||||
Edge.uPrefixLengthA,
|
||||
Edge.uPrefixLengthB);
|
||||
#endif
|
||||
const char cType = Edge.cType;
|
||||
const unsigned uPrefixLengthA = Edge.uPrefixLengthA;
|
||||
unsigned uColCountA = 0;
|
||||
if (uPrefixLengthA > 0)
|
||||
{
|
||||
const unsigned uNodeIndexA = uPrefixLengthA - 1;
|
||||
const unsigned uTplColIndexA = uNodeIndexA;
|
||||
if (uTplColIndexA > uColIndexA)
|
||||
uColCountA = uTplColIndexA - uColIndexA;
|
||||
}
|
||||
|
||||
const unsigned uPrefixLengthB = Edge.uPrefixLengthB;
|
||||
unsigned uColCountB = 0;
|
||||
if (uPrefixLengthB > 0)
|
||||
{
|
||||
const unsigned uNodeIndexB = uPrefixLengthB - 1;
|
||||
const unsigned uTplColIndexB = uNodeIndexB;
|
||||
if (uTplColIndexB > uColIndexB)
|
||||
uColCountB = uTplColIndexB - uColIndexB;
|
||||
}
|
||||
|
||||
AppendUnalignedTerminals(msaA, uColIndexA, uColCountA, msaB, uColIndexB, uColCountB,
|
||||
uSeqCountA, uSeqCountB, msaCombined, uColIndexCombined);
|
||||
|
||||
switch (cType)
|
||||
{
|
||||
case 'M':
|
||||
{
|
||||
assert(uPrefixLengthA > 0);
|
||||
assert(uPrefixLengthB > 0);
|
||||
const unsigned uColA = uPrefixLengthA - 1;
|
||||
const unsigned uColB = uPrefixLengthB - 1;
|
||||
assert(uColIndexA == uColA);
|
||||
assert(uColIndexB == uColB);
|
||||
AppendMatch(msaA, uColIndexA, msaB, uColIndexB, uSeqCountA, uSeqCountB,
|
||||
msaCombined, uColIndexCombined);
|
||||
break;
|
||||
}
|
||||
case 'D':
|
||||
{
|
||||
assert(uPrefixLengthA > 0);
|
||||
const unsigned uColA = uPrefixLengthA - 1;
|
||||
assert(uColIndexA == uColA);
|
||||
AppendDelete(msaA, uColIndexA, uSeqCountA, uSeqCountB, msaCombined, uColIndexCombined);
|
||||
break;
|
||||
}
|
||||
case 'I':
|
||||
{
|
||||
assert(uPrefixLengthB > 0);
|
||||
const unsigned uColB = uPrefixLengthB - 1;
|
||||
assert(uColIndexB == uColB);
|
||||
AppendInsert(msaB, uColIndexB, uSeqCountA, uSeqCountB, msaCombined, uColIndexCombined);
|
||||
break;
|
||||
}
|
||||
default:
|
||||
assert(false);
|
||||
}
|
||||
}
|
||||
unsigned uInsertColCountA = uColCountA - uColIndexA;
|
||||
unsigned uInsertColCountB = uColCountB - uColIndexB;
|
||||
|
||||
AppendUnalignedTerminals(msaA, uColIndexA, uInsertColCountA, msaB, uColIndexB,
|
||||
uInsertColCountB, uSeqCountA, uSeqCountB, msaCombined, uColIndexCombined);
|
||||
}
|
||||
41
src/muscle/muscle3.8.31/src/aligntwomsas.cpp
Normal file
41
src/muscle/muscle3.8.31/src/aligntwomsas.cpp
Normal file
@@ -0,0 +1,41 @@
|
||||
#include "muscle.h"
|
||||
#include "msa.h"
|
||||
#include "profile.h"
|
||||
#include "pwpath.h"
|
||||
#include "textfile.h"
|
||||
#include "timing.h"
|
||||
|
||||
SCORE AlignTwoMSAs(const MSA &msa1, const MSA &msa2, MSA &msaOut, PWPath &Path,
|
||||
bool bLockLeft, bool bLockRight)
|
||||
{
|
||||
const unsigned uLengthA = msa1.GetColCount();
|
||||
const unsigned uLengthB = msa2.GetColCount();
|
||||
|
||||
ProfPos *PA = ProfileFromMSA(msa1);
|
||||
ProfPos *PB = ProfileFromMSA(msa2);
|
||||
|
||||
if (bLockLeft)
|
||||
{
|
||||
PA[0].m_scoreGapOpen = MINUS_INFINITY;
|
||||
PB[0].m_scoreGapOpen = MINUS_INFINITY;
|
||||
}
|
||||
|
||||
if (bLockRight)
|
||||
{
|
||||
PA[uLengthA-1].m_scoreGapClose = MINUS_INFINITY;
|
||||
PB[uLengthB-1].m_scoreGapClose = MINUS_INFINITY;
|
||||
}
|
||||
|
||||
float r = (float) uLengthA/ (float) (uLengthB + 1); // +1 to prevent div 0
|
||||
if (r < 1)
|
||||
r = 1/r;
|
||||
|
||||
SCORE Score = GlobalAlign(PA, uLengthA, PB, uLengthB, Path);
|
||||
|
||||
AlignTwoMSAsGivenPath(Path, msa1, msa2, msaOut);
|
||||
|
||||
delete[] PA;
|
||||
delete[] PB;
|
||||
|
||||
return Score;
|
||||
}
|
||||
31
src/muscle/muscle3.8.31/src/aligntwoprofs.cpp
Normal file
31
src/muscle/muscle3.8.31/src/aligntwoprofs.cpp
Normal file
@@ -0,0 +1,31 @@
|
||||
#include "muscle.h"
|
||||
#include "msa.h"
|
||||
#include "profile.h"
|
||||
#include "pwpath.h"
|
||||
|
||||
SCORE GlobalAlign4(ProfPos *PA, unsigned uLengthA, ProfPos *PB,
|
||||
unsigned uLengthB, PWPath &Path);
|
||||
|
||||
SCORE AlignTwoProfs(
|
||||
const ProfPos *PA, unsigned uLengthA, WEIGHT wA,
|
||||
const ProfPos *PB, unsigned uLengthB, WEIGHT wB,
|
||||
PWPath &Path, ProfPos **ptrPout, unsigned *ptruLengthOut)
|
||||
{
|
||||
assert(uLengthA < 100000);
|
||||
assert(uLengthB < 100000);
|
||||
|
||||
float r = (float) uLengthA/ (float) (uLengthB + 1); // +1 to prevent div 0
|
||||
if (r < 1)
|
||||
r = 1/r;
|
||||
|
||||
SCORE Score = GlobalAlign(PA, uLengthA, PB, uLengthB, Path);
|
||||
|
||||
AlignTwoProfsGivenPath(Path, PA, uLengthB, wA/(wA + wB), PB, uLengthB, wB/(wA + wB),
|
||||
ptrPout, ptruLengthOut);
|
||||
|
||||
#if HYDRO
|
||||
if (ALPHA_Amino == g_Alpha)
|
||||
Hydro(*ptrPout, *ptruLengthOut);
|
||||
#endif
|
||||
return Score;
|
||||
}
|
||||
170
src/muscle/muscle3.8.31/src/aln.cpp
Normal file
170
src/muscle/muscle3.8.31/src/aln.cpp
Normal file
@@ -0,0 +1,170 @@
|
||||
#include "muscle.h"
|
||||
#include <stdio.h>
|
||||
#include <ctype.h>
|
||||
#include "msa.h"
|
||||
#include "textfile.h"
|
||||
|
||||
const unsigned uCharsPerLine = 60;
|
||||
const int MIN_NAME = 10;
|
||||
const int MAX_NAME = 32;
|
||||
|
||||
static char GetAlnConsensusChar(const MSA &a, unsigned uColIndex);
|
||||
|
||||
void MSA::ToAlnFile(TextFile &File) const
|
||||
{
|
||||
if (g_bClwStrict)
|
||||
File.PutString("CLUSTAL W (1.81) multiple sequence alignment\n");
|
||||
else
|
||||
{
|
||||
File.PutString("MUSCLE ("
|
||||
SHORT_VERSION ")"
|
||||
" multiple sequence alignment\n");
|
||||
File.PutString("\n");
|
||||
}
|
||||
|
||||
int iLongestNameLength = 0;
|
||||
for (unsigned uSeqIndex = 0; uSeqIndex < GetSeqCount(); ++uSeqIndex)
|
||||
{
|
||||
const char *ptrName = GetSeqName(uSeqIndex);
|
||||
const char *ptrBlank = strchr(ptrName, ' ');
|
||||
int iLength;
|
||||
if (0 != ptrBlank)
|
||||
iLength = (int) (ptrBlank - ptrName);
|
||||
else
|
||||
iLength = (int) strlen(ptrName);
|
||||
if (iLength > iLongestNameLength)
|
||||
iLongestNameLength = iLength;
|
||||
}
|
||||
if (iLongestNameLength > MAX_NAME)
|
||||
iLongestNameLength = MAX_NAME;
|
||||
if (iLongestNameLength < MIN_NAME)
|
||||
iLongestNameLength = MIN_NAME;
|
||||
|
||||
unsigned uLineCount = (GetColCount() - 1)/uCharsPerLine + 1;
|
||||
for (unsigned uLineIndex = 0; uLineIndex < uLineCount; ++uLineIndex)
|
||||
{
|
||||
File.PutString("\n");
|
||||
unsigned uStartColIndex = uLineIndex*uCharsPerLine;
|
||||
unsigned uEndColIndex = uStartColIndex + uCharsPerLine - 1;
|
||||
if (uEndColIndex >= GetColCount())
|
||||
uEndColIndex = GetColCount() - 1;
|
||||
char Name[MAX_NAME+1];
|
||||
for (unsigned uSeqIndex = 0; uSeqIndex < GetSeqCount(); ++uSeqIndex)
|
||||
{
|
||||
const char *ptrName = GetSeqName(uSeqIndex);
|
||||
const char *ptrBlank = strchr(ptrName, ' ');
|
||||
int iLength;
|
||||
if (0 != ptrBlank)
|
||||
iLength = (int) (ptrBlank - ptrName);
|
||||
else
|
||||
iLength = (int) strlen(ptrName);
|
||||
if (iLength > MAX_NAME)
|
||||
iLength = MAX_NAME;
|
||||
memset(Name, ' ', MAX_NAME);
|
||||
memcpy(Name, ptrName, iLength);
|
||||
Name[iLongestNameLength] = 0;
|
||||
|
||||
File.PutFormat("%s ", Name);
|
||||
for (unsigned uColIndex = uStartColIndex; uColIndex <= uEndColIndex;
|
||||
++uColIndex)
|
||||
{
|
||||
const char c = GetChar(uSeqIndex, uColIndex);
|
||||
File.PutFormat("%c", toupper(c));
|
||||
}
|
||||
File.PutString("\n");
|
||||
}
|
||||
|
||||
memset(Name, ' ', MAX_NAME);
|
||||
Name[iLongestNameLength] = 0;
|
||||
File.PutFormat("%s ", Name);
|
||||
for (unsigned uColIndex = uStartColIndex; uColIndex <= uEndColIndex;
|
||||
++uColIndex)
|
||||
{
|
||||
const char c = GetAlnConsensusChar(*this, uColIndex);
|
||||
File.PutChar(c);
|
||||
}
|
||||
File.PutString("\n");
|
||||
}
|
||||
}
|
||||
|
||||
static char GetAlnConsensusChar(const MSA &a, unsigned uColIndex)
|
||||
{
|
||||
const unsigned uSeqCount = a.GetSeqCount();
|
||||
unsigned BitMap = 0;
|
||||
unsigned Count = 0;
|
||||
for (unsigned uSeqIndex = 0; uSeqIndex < uSeqCount; ++uSeqIndex)
|
||||
{
|
||||
unsigned uLetter = a.GetLetterEx(uSeqIndex, uColIndex);
|
||||
assert(uLetter < 32);
|
||||
unsigned Bit = (1 << uLetter);
|
||||
if (!(BitMap & Bit))
|
||||
++Count;
|
||||
BitMap |= Bit;
|
||||
}
|
||||
|
||||
// '*' indicates positions which have a single, fully conserved residue
|
||||
if (1 == Count)
|
||||
return '*';
|
||||
|
||||
if (ALPHA_Amino != g_Alpha)
|
||||
return ' ';
|
||||
|
||||
#define B(a) (1 << AX_##a)
|
||||
#define S2(a, b) S(B(a) | B(b))
|
||||
#define S3(a, b, c) S(B(a) | B(b) | B(c))
|
||||
#define S4(a, b, c, d) S(B(a) | B(b) | B(c) | B(d))
|
||||
#define S(w) if (0 == (BitMap & ~(w)) && (BitMap & (w)) != 0) return ':';
|
||||
|
||||
#define W3(a, b, c) W(B(a) | B(b) | B(c))
|
||||
#define W4(a, b, c, d) W(B(a) | B(b) | B(c) | B(d))
|
||||
#define W5(a, b, c, d, e) W(B(a) | B(b) | B(c) | B(d) | B(e))
|
||||
#define W6(a, b, c, d, e, f) W(B(a) | B(b) | B(c) | B(d) | B(e) | B(f))
|
||||
#define W(w) if (0 == (BitMap & ~(w)) && (BitMap & (w)) != 0) return '.';
|
||||
|
||||
// ':' indicates that one of the following 'strong'
|
||||
// groups is fully conserved
|
||||
// STA
|
||||
// NEQK
|
||||
// NHQK
|
||||
// NDEQ
|
||||
// QHRK
|
||||
// MILV
|
||||
// MILF
|
||||
// HY
|
||||
// FYW
|
||||
//
|
||||
S3(S, T, A)
|
||||
S4(N, E, Q, K)
|
||||
S4(N, H, Q, K)
|
||||
S4(N, D, E, Q)
|
||||
S4(M, I, L, V)
|
||||
S4(M, I, L, F)
|
||||
S2(H, Y)
|
||||
S3(F, Y, W)
|
||||
|
||||
// '.' indicates that one of the following 'weaker'
|
||||
// groups is fully conserved
|
||||
// CSA
|
||||
// ATV
|
||||
// SAG
|
||||
// STNK
|
||||
// STPA
|
||||
// SGND
|
||||
// SNDEQK
|
||||
// NDEQHK
|
||||
// NEQHRK
|
||||
// FVLIM
|
||||
// HFY
|
||||
W3(C, S, A)
|
||||
W3(A, T, V)
|
||||
W3(S, A, G)
|
||||
W4(S, T, N, K)
|
||||
W4(S, T, P, A)
|
||||
W4(S, G, N, D)
|
||||
W6(S, N, D, E, Q, K)
|
||||
W6(N, W, Q, H, R, K)
|
||||
W5(F, V, L, I, M)
|
||||
W3(H, F, Y)
|
||||
|
||||
return ' ';
|
||||
}
|
||||
283
src/muscle/muscle3.8.31/src/alpha.cpp
Normal file
283
src/muscle/muscle3.8.31/src/alpha.cpp
Normal file
@@ -0,0 +1,283 @@
|
||||
#include "muscle.h"
|
||||
#include <ctype.h>
|
||||
|
||||
/***
|
||||
From Bioperl docs:
|
||||
Extended DNA / RNA alphabet
|
||||
------------------------------------------
|
||||
Symbol Meaning Nucleic Acid
|
||||
------------------------------------------
|
||||
A A Adenine
|
||||
C C Cytosine
|
||||
G G Guanine
|
||||
T T Thymine
|
||||
U U Uracil
|
||||
M A or C
|
||||
R A or G
|
||||
W A or T
|
||||
S C or G
|
||||
Y C or T
|
||||
K G or T
|
||||
V A or C or G
|
||||
H A or C or T
|
||||
D A or G or T
|
||||
B C or G or T
|
||||
X G or A or T or C
|
||||
N G or A or T or C
|
||||
|
||||
IUPAC-IUB SYMBOLS FOR NUCLEOTIDE NOMENCLATURE:
|
||||
Cornish-Bowden (1985) Nucl. Acids Res. 13: 3021-3030.
|
||||
***/
|
||||
|
||||
unsigned g_CharToLetter[MAX_CHAR];
|
||||
unsigned g_CharToLetterEx[MAX_CHAR];
|
||||
|
||||
char g_LetterToChar[MAX_ALPHA];
|
||||
char g_LetterExToChar[MAX_ALPHA_EX];
|
||||
|
||||
char g_UnalignChar[MAX_CHAR];
|
||||
char g_AlignChar[MAX_CHAR];
|
||||
|
||||
bool g_IsWildcardChar[MAX_CHAR];
|
||||
bool g_IsResidueChar[MAX_CHAR];
|
||||
|
||||
ALPHA g_Alpha = ALPHA_Undefined;
|
||||
unsigned g_AlphaSize = 0;
|
||||
|
||||
#define Res(c, Letter) \
|
||||
{ \
|
||||
const unsigned char Upper = (unsigned char) toupper(c); \
|
||||
const unsigned char Lower = (unsigned char) tolower(c); \
|
||||
g_CharToLetter[Upper] = Letter; \
|
||||
g_CharToLetter[Lower] = Letter; \
|
||||
g_CharToLetterEx[Upper] = Letter; \
|
||||
g_CharToLetterEx[Lower] = Letter; \
|
||||
g_LetterToChar[Letter] = Upper; \
|
||||
g_LetterExToChar[Letter] = Upper; \
|
||||
g_IsResidueChar[Upper] = true; \
|
||||
g_IsResidueChar[Lower] = true; \
|
||||
g_AlignChar[Upper] = Upper; \
|
||||
g_AlignChar[Lower] = Upper; \
|
||||
g_UnalignChar[Upper] = Lower; \
|
||||
g_UnalignChar[Lower] = Lower; \
|
||||
}
|
||||
|
||||
#define Wild(c, Letter) \
|
||||
{ \
|
||||
const unsigned char Upper = (unsigned char) toupper(c); \
|
||||
const unsigned char Lower = (unsigned char) tolower(c); \
|
||||
g_CharToLetterEx[Upper] = Letter; \
|
||||
g_CharToLetterEx[Lower] = Letter; \
|
||||
g_LetterExToChar[Letter] = Upper; \
|
||||
g_IsResidueChar[Upper] = true; \
|
||||
g_IsResidueChar[Lower] = true; \
|
||||
g_AlignChar[Upper] = Upper; \
|
||||
g_AlignChar[Lower] = Upper; \
|
||||
g_UnalignChar[Upper] = Lower; \
|
||||
g_UnalignChar[Lower] = Lower; \
|
||||
g_IsWildcardChar[Lower] = true; \
|
||||
g_IsWildcardChar[Upper] = true; \
|
||||
}
|
||||
|
||||
static unsigned GetAlphaSize(ALPHA Alpha)
|
||||
{
|
||||
switch (Alpha)
|
||||
{
|
||||
case ALPHA_Amino:
|
||||
return 20;
|
||||
|
||||
case ALPHA_RNA:
|
||||
case ALPHA_DNA:
|
||||
return 4;
|
||||
}
|
||||
Quit("Invalid Alpha=%d", Alpha);
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void InitArrays()
|
||||
{
|
||||
memset(g_CharToLetter, 0xff, sizeof(g_CharToLetter));
|
||||
memset(g_CharToLetterEx, 0xff, sizeof(g_CharToLetterEx));
|
||||
|
||||
memset(g_LetterToChar, '?', sizeof(g_LetterToChar));
|
||||
memset(g_LetterExToChar, '?', sizeof(g_LetterExToChar));
|
||||
|
||||
memset(g_AlignChar, '?', sizeof(g_UnalignChar));
|
||||
memset(g_UnalignChar, '?', sizeof(g_UnalignChar));
|
||||
|
||||
memset(g_IsWildcardChar, 0, sizeof(g_IsWildcardChar));
|
||||
}
|
||||
|
||||
static void SetGapChar(char c)
|
||||
{
|
||||
unsigned char u = (unsigned char) c;
|
||||
|
||||
g_CharToLetterEx[u] = AX_GAP;
|
||||
g_LetterExToChar[AX_GAP] = u;
|
||||
g_AlignChar[u] = u;
|
||||
g_UnalignChar[u] = u;
|
||||
}
|
||||
|
||||
static void SetAlphaDNA()
|
||||
{
|
||||
Res('A', NX_A)
|
||||
Res('C', NX_C)
|
||||
Res('G', NX_G)
|
||||
Res('T', NX_T)
|
||||
Wild('M', NX_M)
|
||||
Wild('R', NX_R)
|
||||
Wild('W', NX_W)
|
||||
Wild('S', NX_S)
|
||||
Wild('Y', NX_Y)
|
||||
Wild('K', NX_K)
|
||||
Wild('V', NX_V)
|
||||
Wild('H', NX_H)
|
||||
Wild('D', NX_D)
|
||||
Wild('B', NX_B)
|
||||
Wild('X', NX_X)
|
||||
Wild('N', NX_N)
|
||||
}
|
||||
|
||||
static void SetAlphaRNA()
|
||||
{
|
||||
Res('A', NX_A)
|
||||
Res('C', NX_C)
|
||||
Res('G', NX_G)
|
||||
Res('U', NX_U)
|
||||
Res('T', NX_T)
|
||||
Wild('M', NX_M)
|
||||
Wild('R', NX_R)
|
||||
Wild('W', NX_W)
|
||||
Wild('S', NX_S)
|
||||
Wild('Y', NX_Y)
|
||||
Wild('K', NX_K)
|
||||
Wild('V', NX_V)
|
||||
Wild('H', NX_H)
|
||||
Wild('D', NX_D)
|
||||
Wild('B', NX_B)
|
||||
Wild('X', NX_X)
|
||||
Wild('N', NX_N)
|
||||
}
|
||||
|
||||
static void SetAlphaAmino()
|
||||
{
|
||||
Res('A', AX_A)
|
||||
Res('C', AX_C)
|
||||
Res('D', AX_D)
|
||||
Res('E', AX_E)
|
||||
Res('F', AX_F)
|
||||
Res('G', AX_G)
|
||||
Res('H', AX_H)
|
||||
Res('I', AX_I)
|
||||
Res('K', AX_K)
|
||||
Res('L', AX_L)
|
||||
Res('M', AX_M)
|
||||
Res('N', AX_N)
|
||||
Res('P', AX_P)
|
||||
Res('Q', AX_Q)
|
||||
Res('R', AX_R)
|
||||
Res('S', AX_S)
|
||||
Res('T', AX_T)
|
||||
Res('V', AX_V)
|
||||
Res('W', AX_W)
|
||||
Res('Y', AX_Y)
|
||||
|
||||
Wild('B', AX_B)
|
||||
Wild('X', AX_X)
|
||||
Wild('Z', AX_Z)
|
||||
}
|
||||
|
||||
void SetAlpha(ALPHA Alpha)
|
||||
{
|
||||
InitArrays();
|
||||
|
||||
SetGapChar('.');
|
||||
SetGapChar('-');
|
||||
|
||||
switch (Alpha)
|
||||
{
|
||||
case ALPHA_Amino:
|
||||
SetAlphaAmino();
|
||||
break;
|
||||
|
||||
case ALPHA_DNA:
|
||||
SetAlphaDNA();
|
||||
|
||||
case ALPHA_RNA:
|
||||
SetAlphaRNA();
|
||||
break;
|
||||
|
||||
default:
|
||||
Quit("Invalid Alpha=%d", Alpha);
|
||||
}
|
||||
|
||||
g_AlphaSize = GetAlphaSize(Alpha);
|
||||
g_Alpha = Alpha;
|
||||
|
||||
if (g_bVerbose)
|
||||
Log("Alphabet %s\n", ALPHAToStr(g_Alpha));
|
||||
}
|
||||
|
||||
char GetWildcardChar()
|
||||
{
|
||||
switch (g_Alpha)
|
||||
{
|
||||
case ALPHA_Amino:
|
||||
return 'X';
|
||||
|
||||
case ALPHA_DNA:
|
||||
case ALPHA_RNA:
|
||||
return 'N';
|
||||
|
||||
default:
|
||||
Quit("Invalid Alpha=%d", g_Alpha);
|
||||
}
|
||||
return '?';
|
||||
}
|
||||
|
||||
bool IsNucleo(char c)
|
||||
{
|
||||
return strchr("ACGTURYNacgturyn", c) != 0;
|
||||
}
|
||||
|
||||
bool IsDNA(char c)
|
||||
{
|
||||
return strchr("AGCTNagctn", c) != 0;
|
||||
}
|
||||
|
||||
bool IsRNA(char c)
|
||||
{
|
||||
return strchr("AGCUNagcun", c) != 0;
|
||||
}
|
||||
|
||||
static char InvalidLetters[256];
|
||||
static int InvalidLetterCount = 0;
|
||||
|
||||
void ClearInvalidLetterWarning()
|
||||
{
|
||||
memset(InvalidLetters, 0, 256);
|
||||
}
|
||||
|
||||
void InvalidLetterWarning(char c, char w)
|
||||
{
|
||||
InvalidLetters[(unsigned char) c] = 1;
|
||||
++InvalidLetterCount;
|
||||
}
|
||||
|
||||
void ReportInvalidLetters()
|
||||
{
|
||||
if (0 == InvalidLetterCount)
|
||||
return;
|
||||
|
||||
char Str[257];
|
||||
memset(Str, 0, 257);
|
||||
|
||||
int n = 0;
|
||||
for (int i = 0; i < 256; ++i)
|
||||
{
|
||||
if (InvalidLetters[i])
|
||||
Str[n++] = (char) i;
|
||||
}
|
||||
Warning("Assuming %s (see -seqtype option), invalid letters found: %s",
|
||||
ALPHAToStr(g_Alpha), Str);
|
||||
}
|
||||
106
src/muscle/muscle3.8.31/src/alpha.h
Normal file
106
src/muscle/muscle3.8.31/src/alpha.h
Normal file
@@ -0,0 +1,106 @@
|
||||
#ifndef alpha_h
|
||||
#define alpha_h
|
||||
|
||||
bool StrHasAmino(const char *Str);
|
||||
bool StrHasGap(const char *Str);
|
||||
void ClearInvalidLetterWarning();
|
||||
void InvalidLetterWarning(char c, char w);
|
||||
void ReportInvalidLetters();
|
||||
|
||||
extern unsigned g_CharToLetter[];
|
||||
extern unsigned g_CharToLetterEx[];
|
||||
|
||||
extern char g_LetterToChar[];
|
||||
extern char g_LetterExToChar[];
|
||||
|
||||
extern char g_UnalignChar[];
|
||||
extern char g_AlignChar[];
|
||||
|
||||
extern bool g_IsWildcardChar[];
|
||||
extern bool g_IsResidueChar[];
|
||||
|
||||
#define CharToLetter(c) (g_CharToLetter[(unsigned char) (c)])
|
||||
#define CharToLetterEx(c) (g_CharToLetterEx[(unsigned char) (c)])
|
||||
|
||||
#define LetterToChar(u) (g_LetterToChar[u])
|
||||
#define LetterExToChar(u) (g_LetterExToChar[u])
|
||||
|
||||
#define IsResidueChar(c) (g_IsResidueChar[(unsigned char) (c)])
|
||||
#define IsGapChar(c) ('-' == (c) || '.' == (c))
|
||||
#define IsWildcardChar(c) (g_IsWildcardChar[(unsigned char) (c)])
|
||||
|
||||
#define AlignChar(c) (g_AlignChar[(unsigned char) (c)])
|
||||
#define UnalignChar(c) (g_UnalignChar[(unsigned char) (c)])
|
||||
|
||||
// AX=Amino alphabet with eXtensions (B, Z and X)
|
||||
enum AX
|
||||
{
|
||||
AX_A,
|
||||
AX_C,
|
||||
AX_D,
|
||||
AX_E,
|
||||
AX_F,
|
||||
AX_G,
|
||||
AX_H,
|
||||
AX_I,
|
||||
AX_K,
|
||||
AX_L,
|
||||
AX_M,
|
||||
AX_N,
|
||||
AX_P,
|
||||
AX_Q,
|
||||
AX_R,
|
||||
AX_S,
|
||||
AX_T,
|
||||
AX_V,
|
||||
AX_W,
|
||||
AX_Y,
|
||||
|
||||
AX_X, // Any
|
||||
|
||||
AX_B, // D or N
|
||||
AX_Z, // E or Q
|
||||
|
||||
AX_GAP,
|
||||
};
|
||||
const unsigned AX_COUNT = AX_GAP + 1;
|
||||
|
||||
// NX=Nucleotide alphabet with extensions
|
||||
enum NX
|
||||
{
|
||||
NX_A,
|
||||
NX_C,
|
||||
NX_G,
|
||||
NX_T,
|
||||
NX_U = NX_T,
|
||||
|
||||
NX_M, // AC
|
||||
NX_R, // AG
|
||||
NX_W, // AT
|
||||
NX_S, // CG
|
||||
NX_Y, // CT
|
||||
NX_K, // GT
|
||||
NX_V, // ACG
|
||||
NX_H, // ACT
|
||||
NX_D, // AGT
|
||||
NX_B, // CGT
|
||||
NX_X, // GATC
|
||||
NX_N, // GATC
|
||||
NX_GAP
|
||||
};
|
||||
const unsigned NX_COUNT = NX_GAP + 1;
|
||||
|
||||
const unsigned MAX_ALPHA = 20;
|
||||
const unsigned MAX_ALPHA_EX = AX_COUNT;
|
||||
const unsigned MAX_CHAR = 256;
|
||||
|
||||
extern ALPHA g_Alpha;
|
||||
extern unsigned g_AlphaSize;
|
||||
|
||||
void SetAlpha(ALPHA Alpha);
|
||||
char GetWildcardChar();
|
||||
bool IsNucleo(char c);
|
||||
bool IsDNA(char c);
|
||||
bool IsRNA(char c);
|
||||
|
||||
#endif // alpha_h
|
||||
218
src/muscle/muscle3.8.31/src/anchors.cpp
Normal file
218
src/muscle/muscle3.8.31/src/anchors.cpp
Normal file
@@ -0,0 +1,218 @@
|
||||
#include "muscle.h"
|
||||
#include "msa.h"
|
||||
#include "objscore.h"
|
||||
|
||||
#define TRACE 0
|
||||
|
||||
static void WindowSmooth(const SCORE Score[], unsigned uCount, unsigned uWindowLength,
|
||||
SCORE SmoothScore[], double dCeil)
|
||||
{
|
||||
#define Ceil(x) ((SCORE) ((x) > dCeil ? dCeil : (x)))
|
||||
|
||||
if (1 != uWindowLength%2)
|
||||
Quit("WindowSmooth=%u must be odd", uWindowLength);
|
||||
|
||||
if (uCount <= uWindowLength)
|
||||
{
|
||||
for (unsigned i = 0; i < uCount; ++i)
|
||||
SmoothScore[i] = 0;
|
||||
return;
|
||||
}
|
||||
|
||||
const unsigned w2 = uWindowLength/2;
|
||||
for (unsigned i = 0; i < w2; ++i)
|
||||
{
|
||||
SmoothScore[i] = 0;
|
||||
SmoothScore[uCount - i - 1] = 0;
|
||||
}
|
||||
|
||||
SCORE scoreWindowTotal = 0;
|
||||
for (unsigned i = 0; i < uWindowLength; ++i)
|
||||
{
|
||||
scoreWindowTotal += Ceil(Score[i]);
|
||||
}
|
||||
|
||||
for (unsigned i = w2; ; ++i)
|
||||
{
|
||||
SmoothScore[i] = scoreWindowTotal/uWindowLength;
|
||||
if (i == uCount - w2 - 1)
|
||||
break;
|
||||
|
||||
scoreWindowTotal -= Ceil(Score[i - w2]);
|
||||
scoreWindowTotal += Ceil(Score[i + w2 + 1]);
|
||||
}
|
||||
#undef Ceil
|
||||
}
|
||||
|
||||
// Find columns that score above the given threshold.
|
||||
// A range of scores is defined between the average
|
||||
// and the maximum. The threshold is a fraction 0.0 .. 1.0
|
||||
// within that range, where 0.0 is the average score
|
||||
// and 1.0 is the maximum score.
|
||||
// "Grade" is by analogy with grading on a curve.
|
||||
static void FindBestColsGrade(const SCORE Score[], unsigned uCount,
|
||||
double dThreshold, unsigned BestCols[], unsigned *ptruBestColCount)
|
||||
{
|
||||
SCORE scoreTotal = 0;
|
||||
for (unsigned uIndex = 0; uIndex < uCount; ++uIndex)
|
||||
scoreTotal += Score[uIndex];
|
||||
const SCORE scoreAvg = scoreTotal / uCount;
|
||||
|
||||
SCORE scoreMax = MINUS_INFINITY;
|
||||
for (unsigned uIndex = 0; uIndex < uCount; ++uIndex)
|
||||
if (Score[uIndex] > scoreMax)
|
||||
scoreMax = Score[uIndex];
|
||||
|
||||
unsigned uBestColCount = 0;
|
||||
for (unsigned uIndex = 0; uIndex < uCount; ++uIndex)
|
||||
{
|
||||
const SCORE s = Score[uIndex];
|
||||
const double dHeight = (s - scoreAvg)/(scoreMax - scoreAvg);
|
||||
if (dHeight >= dThreshold)
|
||||
{
|
||||
BestCols[uBestColCount] = uIndex;
|
||||
++uBestColCount;
|
||||
}
|
||||
}
|
||||
*ptruBestColCount = uBestColCount;
|
||||
}
|
||||
|
||||
// Best col only if all following criteria satisfied:
|
||||
// (1) Score >= min
|
||||
// (2) Smoothed score >= min
|
||||
// (3) No gaps.
|
||||
static void FindBestColsCombo(const MSA &msa, const SCORE Score[],
|
||||
const SCORE SmoothScore[], double dMinScore, double dMinSmoothScore,
|
||||
unsigned BestCols[], unsigned *ptruBestColCount)
|
||||
{
|
||||
const unsigned uColCount = msa.GetColCount();
|
||||
|
||||
unsigned uBestColCount = 0;
|
||||
for (unsigned uIndex = 0; uIndex < uColCount; ++uIndex)
|
||||
{
|
||||
if (Score[uIndex] < dMinScore)
|
||||
continue;
|
||||
if (SmoothScore[uIndex] < dMinSmoothScore)
|
||||
continue;
|
||||
if (msa.ColumnHasGap(uIndex))
|
||||
continue;
|
||||
BestCols[uBestColCount] = uIndex;
|
||||
++uBestColCount;
|
||||
}
|
||||
*ptruBestColCount = uBestColCount;
|
||||
}
|
||||
|
||||
static void ListBestCols(const MSA &msa, const SCORE Score[], const SCORE SmoothScore[],
|
||||
unsigned BestCols[], unsigned uBestColCount)
|
||||
{
|
||||
const unsigned uColCount = msa.GetColCount();
|
||||
const unsigned uSeqCount = msa.GetSeqCount();
|
||||
|
||||
Log("Col ");
|
||||
for (unsigned uSeqIndex = 0; uSeqIndex < uSeqCount; ++uSeqIndex)
|
||||
Log("%u", uSeqIndex%10);
|
||||
Log(" ");
|
||||
|
||||
for (unsigned uColIndex = 0; uColIndex < uColCount; ++uColIndex)
|
||||
{
|
||||
Log("%3u ", uColIndex);
|
||||
for (unsigned uSeqIndex = 0; uSeqIndex < uSeqCount; ++uSeqIndex)
|
||||
Log("%c", msa.GetChar(uSeqIndex, uColIndex));
|
||||
|
||||
Log(" %10.3f", Score[uColIndex]);
|
||||
Log(" %10.3f", SmoothScore[uColIndex]);
|
||||
|
||||
for (unsigned i = 0; i < uBestColCount; ++i)
|
||||
if (BestCols[i] == uColIndex)
|
||||
Log(" <-- Best");
|
||||
Log("\n");
|
||||
}
|
||||
}
|
||||
|
||||
// If two best columns are found within a window, choose
|
||||
// the highest-scoring. If more than two, choose the one
|
||||
// closest to the center of the window.
|
||||
static void MergeBestCols(const SCORE Scores[], const unsigned BestCols[],
|
||||
unsigned uBestColCount, unsigned uWindowLength, unsigned AnchorCols[],
|
||||
unsigned *ptruAnchorColCount)
|
||||
{
|
||||
unsigned uAnchorColCount = 0;
|
||||
for (unsigned n = 0; n < uBestColCount; /* update inside loop */)
|
||||
{
|
||||
unsigned uBestColIndex = BestCols[n];
|
||||
unsigned uCountWithinWindow = 0;
|
||||
for (unsigned i = n + 1; i < uBestColCount; ++i)
|
||||
{
|
||||
unsigned uBestColIndex2 = BestCols[i];
|
||||
if (uBestColIndex2 - uBestColIndex >= uWindowLength)
|
||||
break;
|
||||
++uCountWithinWindow;
|
||||
}
|
||||
unsigned uAnchorCol = uBestColIndex;
|
||||
if (1 == uCountWithinWindow)
|
||||
{
|
||||
unsigned uBestColIndex2 = BestCols[n+1];
|
||||
if (Scores[uBestColIndex] > Scores[uBestColIndex2])
|
||||
uAnchorCol = uBestColIndex;
|
||||
else
|
||||
uAnchorCol = uBestColIndex2;
|
||||
}
|
||||
else if (uCountWithinWindow > 1)
|
||||
{
|
||||
unsigned uWindowCenter = uBestColIndex + uWindowLength/2;
|
||||
int iClosestDist = uWindowLength;
|
||||
unsigned uClosestCol = uBestColIndex;
|
||||
for (unsigned i = n + 1; i < n + uCountWithinWindow; ++i)
|
||||
{
|
||||
unsigned uColIndex = BestCols[i];
|
||||
int iDist = uColIndex - uBestColIndex;
|
||||
if (iDist < 0)
|
||||
iDist = -iDist;
|
||||
if (iDist < iClosestDist)
|
||||
{
|
||||
uClosestCol = uColIndex;
|
||||
iClosestDist = iDist;
|
||||
}
|
||||
}
|
||||
uAnchorCol = uClosestCol;
|
||||
}
|
||||
AnchorCols[uAnchorColCount] = uAnchorCol;
|
||||
++uAnchorColCount;
|
||||
n += uCountWithinWindow + 1;
|
||||
}
|
||||
*ptruAnchorColCount = uAnchorColCount;
|
||||
}
|
||||
|
||||
void FindAnchorCols(const MSA &msa, unsigned AnchorCols[],
|
||||
unsigned *ptruAnchorColCount)
|
||||
{
|
||||
const unsigned uColCount = msa.GetColCount();
|
||||
if (uColCount < 16)
|
||||
{
|
||||
*ptruAnchorColCount = 0;
|
||||
return;
|
||||
}
|
||||
|
||||
SCORE *MatchScore = new SCORE[uColCount];
|
||||
SCORE *SmoothScore = new SCORE[uColCount];
|
||||
unsigned *BestCols = new unsigned[uColCount];
|
||||
|
||||
GetLetterScores(msa, MatchScore);
|
||||
WindowSmooth(MatchScore, uColCount, g_uSmoothWindowLength, SmoothScore,
|
||||
g_dSmoothScoreCeil);
|
||||
|
||||
unsigned uBestColCount;
|
||||
FindBestColsCombo(msa, MatchScore, SmoothScore, g_dMinBestColScore, g_dMinSmoothScore,
|
||||
BestCols, &uBestColCount);
|
||||
|
||||
#if TRACE
|
||||
ListBestCols(msa, MatchScore, SmoothScore, BestCols, uBestColCount);
|
||||
#endif
|
||||
|
||||
MergeBestCols(MatchScore, BestCols, uBestColCount, g_uAnchorSpacing, AnchorCols,
|
||||
ptruAnchorColCount);
|
||||
|
||||
delete[] MatchScore;
|
||||
delete[] SmoothScore;
|
||||
delete[] BestCols;
|
||||
}
|
||||
206
src/muscle/muscle3.8.31/src/bittraceback.cpp
Normal file
206
src/muscle/muscle3.8.31/src/bittraceback.cpp
Normal file
@@ -0,0 +1,206 @@
|
||||
#include "muscle.h"
|
||||
#include "pwpath.h"
|
||||
|
||||
#define TRACE 0
|
||||
|
||||
static char XlatEdgeType(char c)
|
||||
{
|
||||
if ('E' == c)
|
||||
return 'D';
|
||||
if ('J' == c)
|
||||
return 'I';
|
||||
return c;
|
||||
}
|
||||
|
||||
static const char *BitsToStr(char Bits)
|
||||
{
|
||||
static char Str[] = "xM xD xI";
|
||||
|
||||
switch (Bits & BIT_xM)
|
||||
{
|
||||
case BIT_MM:
|
||||
Str[0] = 'M';
|
||||
break;
|
||||
case BIT_DM:
|
||||
Str[0] = 'D';
|
||||
break;
|
||||
case BIT_IM:
|
||||
Str[0] = 'I';
|
||||
break;
|
||||
}
|
||||
|
||||
switch (Bits & BIT_xD)
|
||||
{
|
||||
case BIT_MD:
|
||||
Str[3] = 'M';
|
||||
break;
|
||||
case BIT_DD:
|
||||
Str[3] = 'D';
|
||||
break;
|
||||
}
|
||||
|
||||
switch (Bits & BIT_xI)
|
||||
{
|
||||
case BIT_MI:
|
||||
Str[6] = 'M';
|
||||
break;
|
||||
case BIT_II:
|
||||
Str[6] = 'I';
|
||||
break;
|
||||
}
|
||||
|
||||
return Str;
|
||||
}
|
||||
|
||||
static inline char XChar(char Bits, char cType)
|
||||
{
|
||||
switch (cType)
|
||||
{
|
||||
case 'M':
|
||||
{
|
||||
switch (Bits & BIT_xM)
|
||||
{
|
||||
case BIT_MM:
|
||||
return 'M';
|
||||
case BIT_DM:
|
||||
return 'D';
|
||||
case BIT_IM:
|
||||
return 'I';
|
||||
#if DOUBLE_AFFINE
|
||||
case BIT_EM:
|
||||
return 'E';
|
||||
case BIT_JM:
|
||||
return 'J';
|
||||
#endif
|
||||
}
|
||||
Quit("Huh!?");
|
||||
return '?';
|
||||
}
|
||||
case 'D':
|
||||
{
|
||||
switch (Bits & BIT_xD)
|
||||
{
|
||||
case BIT_MD:
|
||||
return 'M';
|
||||
case BIT_DD:
|
||||
return 'D';
|
||||
}
|
||||
Quit("Huh!?");
|
||||
return '?';
|
||||
}
|
||||
case 'I':
|
||||
{
|
||||
switch (Bits & BIT_xI)
|
||||
{
|
||||
case BIT_MI:
|
||||
return 'M';
|
||||
case BIT_II:
|
||||
return 'I';
|
||||
}
|
||||
Quit("Huh!?");
|
||||
return '?';
|
||||
}
|
||||
#if DOUBLE_AFFINE
|
||||
case 'E':
|
||||
{
|
||||
switch (Bits & BIT_xE)
|
||||
{
|
||||
case BIT_ME:
|
||||
return 'M';
|
||||
case BIT_EE:
|
||||
return 'E';
|
||||
}
|
||||
Quit("Huh!?");
|
||||
return '?';
|
||||
}
|
||||
case 'J':
|
||||
{
|
||||
switch (Bits & BIT_xJ)
|
||||
{
|
||||
case BIT_MJ:
|
||||
return 'M';
|
||||
case BIT_JJ:
|
||||
return 'J';
|
||||
}
|
||||
Quit("Huh!?");
|
||||
return '?';
|
||||
}
|
||||
#endif
|
||||
default:
|
||||
Quit("Huh?");
|
||||
return '?';
|
||||
}
|
||||
}
|
||||
|
||||
void BitTraceBack(char **TraceBack, unsigned uLengthA, unsigned uLengthB,
|
||||
char LastEdge, PWPath &Path)
|
||||
{
|
||||
#if TRACE
|
||||
Log("BitTraceBack\n");
|
||||
#endif
|
||||
Path.Clear();
|
||||
|
||||
PWEdge Edge;
|
||||
Edge.uPrefixLengthA = uLengthA;
|
||||
Edge.uPrefixLengthB = uLengthB;
|
||||
char Bits = TraceBack[uLengthA][uLengthB];
|
||||
Edge.cType = LastEdge;
|
||||
for (;;)
|
||||
{
|
||||
#if TRACE
|
||||
Log("Prepend %c%d.%d\n", Edge.cType, Edge.uPrefixLengthA, Edge.uPrefixLengthB);
|
||||
#endif
|
||||
char cSave = Edge.cType;
|
||||
Edge.cType = XlatEdgeType(cSave);
|
||||
Path.PrependEdge(Edge);
|
||||
Edge.cType = cSave;
|
||||
|
||||
unsigned PLA = Edge.uPrefixLengthA;
|
||||
unsigned PLB = Edge.uPrefixLengthB;
|
||||
char Bits = TraceBack[PLA][PLB];
|
||||
char NextEdgeType = XChar(Bits, Edge.cType);
|
||||
#if TRACE
|
||||
Log("XChar(%s, %c) = %c\n", BitsToStr(Bits), Edge.cType, NextEdgeType);
|
||||
#endif
|
||||
switch (Edge.cType)
|
||||
{
|
||||
case 'M':
|
||||
{
|
||||
if (Edge.uPrefixLengthA == 0)
|
||||
Quit("BitTraceBack MA=0");
|
||||
if (Edge.uPrefixLengthB == 0)
|
||||
Quit("BitTraceBack MA=0");
|
||||
--(Edge.uPrefixLengthA);
|
||||
--(Edge.uPrefixLengthB);
|
||||
break;
|
||||
}
|
||||
case 'D':
|
||||
case 'E':
|
||||
{
|
||||
if (Edge.uPrefixLengthA == 0)
|
||||
Quit("BitTraceBack DA=0");
|
||||
--(Edge.uPrefixLengthA);
|
||||
break;
|
||||
}
|
||||
case 'I':
|
||||
case 'J':
|
||||
{
|
||||
if (Edge.uPrefixLengthB == 0)
|
||||
Quit("BitTraceBack IB=0");
|
||||
--(Edge.uPrefixLengthB);
|
||||
break;
|
||||
}
|
||||
default:
|
||||
Quit("BitTraceBack: Invalid edge %c", Edge);
|
||||
}
|
||||
|
||||
if (0 == Edge.uPrefixLengthA && 0 == Edge.uPrefixLengthB)
|
||||
break;
|
||||
|
||||
Edge.cType = NextEdgeType;
|
||||
}
|
||||
|
||||
#if TRACE
|
||||
Path.LogMe();
|
||||
#endif
|
||||
}
|
||||
28
src/muscle/muscle3.8.31/src/blosum62.cpp
Normal file
28
src/muscle/muscle3.8.31/src/blosum62.cpp
Normal file
@@ -0,0 +1,28 @@
|
||||
#include "muscle.h"
|
||||
|
||||
int BLOSUM62[20][20] =
|
||||
{
|
||||
// A C D E F G H I K L M N P Q R S T V W Y
|
||||
{ 4, 0, -2, -1, -2, 0, -2, -1, -1, -1, -1, -2, -1, -1, -1, 1, 0, 0, -3, -2}, // A
|
||||
{ 0, 9, -3, -4, -2, -3, -3, -1, -3, -1, -1, -3, -3, -3, -3, -1, -1, -1, -2, -2}, // C
|
||||
{-2, -3, 6, 2, -3, -1, -1, -3, -1, -4, -3, 1, -1, 0, -2, 0, -1, -3, -4, -3}, // D
|
||||
{-1, -4, 2, 5, -3, -2, 0, -3, 1, -3, -2, 0, -1, 2, 0, 0, -1, -2, -3, -2}, // E
|
||||
{-2, -2, -3, -3, 6, -3, -1, 0, -3, 0, 0, -3, -4, -3, -3, -2, -2, -1, 1, 3}, // F
|
||||
{ 0, -3, -1, -2, -3, 6, -2, -4, -2, -4, -3, 0, -2, -2, -2, 0, -2, -3, -2, -3}, // G
|
||||
{-2, -3, -1, 0, -1, -2, 8, -3, -1, -3, -2, 1, -2, 0, 0, -1, -2, -3, -2, 2}, // H
|
||||
{-1, -1, -3, -3, 0, -4, -3, 4, -3, 2, 1, -3, -3, -3, -3, -2, -1, 3, -3, -1}, // I
|
||||
{-1, -3, -1, 1, -3, -2, -1, -3, 5, -2, -1, 0, -1, 1, 2, 0, -1, -2, -3, -2}, // K
|
||||
{-1, -1, -4, -3, 0, -4, -3, 2, -2, 4, 2, -3, -3, -2, -2, -2, -1, 1, -2, -1}, // L
|
||||
{-1, -1, -3, -2, 0, -3, -2, 1, -1, 2, 5, -2, -2, 0, -1, -1, -1, 1, -1, -1}, // M
|
||||
{-2, -3, 1, 0, -3, 0, 1, -3, 0, -3, -2, 6, -2, 0, 0, 1, 0, -3, -4, -2}, // N
|
||||
{-1, -3, -1, -1, -4, -2, -2, -3, -1, -3, -2, -2, 7, -1, -2, -1, -1, -2, -4, -3}, // P
|
||||
{-1, -3, 0, 2, -3, -2, 0, -3, 1, -2, 0, 0, -1, 5, 1, 0, -1, -2, -2, -1}, // Q
|
||||
{-1, -3, -2, 0, -3, -2, 0, -3, 2, -2, -1, 0, -2, 1, 5, -1, -1, -3, -3, -2}, // R
|
||||
{ 1, -1, 0, 0, -2, 0, -1, -2, 0, -2, -1, 1, -1, 0, -1, 4, 1, -2, -3, -2}, // S
|
||||
{ 0, -1, -1, -1, -2, -2, -2, -1, -1, -1, -1, 0, -1, -1, -1, 1, 5, 0, -2, -2}, // T
|
||||
{ 0, -1, -3, -2, -1, -3, -3, 3, -2, 1, 1, -3, -2, -2, -3, -2, 0, 4, -3, -1}, // V
|
||||
{-3, -2, -4, -3, 1, -2, -2, -3, -3, -2, -1, -4, -4, -2, -3, -3, -2, -3, 11, 2}, // W
|
||||
{-2, -2, -3, -2, 3, -3, 2, -1, -2, -1, -1, -2, -3, -1, -2, -2, -2, -1, 2, 7}, // Y
|
||||
};
|
||||
|
||||
double BLOSUM62_Expected = -0.5209;
|
||||
118
src/muscle/muscle3.8.31/src/blosumla.cpp
Normal file
118
src/muscle/muscle3.8.31/src/blosumla.cpp
Normal file
@@ -0,0 +1,118 @@
|
||||
#include "muscle.h"
|
||||
|
||||
#define GAPVAL 0.3
|
||||
#define GAPGAPVAL 5.0
|
||||
|
||||
// Blosum62 log-average factor matrix
|
||||
static float Blosum62LA[20][20] =
|
||||
{
|
||||
#define v(x) ((float) x)
|
||||
#define S_ROW(n, c, A, C, D, E, F, G, H, I, K, L, M, N, P, Q, R, S, T, V, W, Y) \
|
||||
{ v(A), v(C), v(D), v(E), v(F), v(G), v(H), v(I), v(K), v(L), v(M), v(N), v(P), v(Q), \
|
||||
v(R), v(S), v(T), v(V), v(W), v(Y) },
|
||||
|
||||
// Blosum62 log average matrix
|
||||
// A C D E F
|
||||
// G H I K L
|
||||
// M N P Q R
|
||||
// S T V W Y
|
||||
S_ROW( 0, 'A', 3.9029401, 0.8679881, 0.5446049, 0.7412640, 0.4648942,
|
||||
1.0568696, 0.5693654, 0.6324813, 0.7753898, 0.6019460,
|
||||
0.7231498, 0.5883077, 0.7541214, 0.7568035, 0.6126988,
|
||||
1.4721037, 0.9844022, 0.9364584, 0.4165484, 0.5426125)
|
||||
|
||||
S_ROW( 1, 'C', 0.8679881, 19.5765802, 0.3014542, 0.2859347, 0.4389910,
|
||||
0.4203886, 0.3550472, 0.6534589, 0.3491296, 0.6422760,
|
||||
0.6113537, 0.3978026, 0.3795628, 0.3657796, 0.3089379,
|
||||
0.7384148, 0.7405530, 0.7558448, 0.4499807, 0.4342013)
|
||||
|
||||
S_ROW( 2, 'D', 0.5446049, 0.3014542, 7.3979253, 1.6878109, 0.2989696,
|
||||
0.6343015, 0.6785593, 0.3390155, 0.7840905, 0.2866128,
|
||||
0.3464547, 1.5538520, 0.5987177, 0.8970811, 0.5732000,
|
||||
0.9135051, 0.6947898, 0.3365004, 0.2321050, 0.3456829)
|
||||
|
||||
S_ROW( 3, 'E', 0.7412640, 0.2859347, 1.6878109, 5.4695276, 0.3307441,
|
||||
0.4812675, 0.9600400, 0.3305223, 1.3082782, 0.3728734,
|
||||
0.5003421, 0.9112983, 0.6792027, 1.9017376, 0.9607983,
|
||||
0.9503570, 0.7414260, 0.4289431, 0.3743021, 0.4964664)
|
||||
|
||||
S_ROW( 4, 'F', 0.4648942, 0.4389910, 0.2989696, 0.3307441, 8.1287983,
|
||||
0.3406407, 0.6519893, 0.9457698, 0.3440433, 1.1545978,
|
||||
1.0043715, 0.3542882, 0.2874440, 0.3339729, 0.3807263,
|
||||
0.4399736, 0.4816930, 0.7450894, 1.3743775, 2.7693817)
|
||||
|
||||
S_ROW( 5, 'G', 1.0568696, 0.4203886, 0.6343015, 0.4812675, 0.3406407,
|
||||
6.8763075, 0.4929663, 0.2750096, 0.5888716, 0.2845039,
|
||||
0.3954865, 0.8637114, 0.4773858, 0.5386498, 0.4499840,
|
||||
0.9035965, 0.5792712, 0.3369551, 0.4216898, 0.3487141)
|
||||
|
||||
S_ROW( 6, 'H', 0.5693654, 0.3550472, 0.6785593, 0.9600400, 0.6519893,
|
||||
0.4929663, 13.5060070, 0.3262878, 0.7788884, 0.3806759,
|
||||
0.5841316, 1.2220028, 0.4728797, 1.1679835, 0.9170473,
|
||||
0.7367319, 0.5575021, 0.3394474, 0.4440859, 1.7979036)
|
||||
|
||||
S_ROW( 7, 'I', 0.6324813, 0.6534589, 0.3390155, 0.3305223, 0.9457698,
|
||||
0.2750096, 0.3262878, 3.9979299, 0.3963730, 1.6944349,
|
||||
1.4777449, 0.3279345, 0.3846629, 0.3829375, 0.3547509,
|
||||
0.4431634, 0.7798163, 2.4175121, 0.4088732, 0.6303898)
|
||||
|
||||
S_ROW( 8, 'K', 0.7753898, 0.3491296, 0.7840905, 1.3082782, 0.3440433,
|
||||
0.5888716, 0.7788884, 0.3963730, 4.7643359, 0.4282702,
|
||||
0.6253033, 0.9398419, 0.7037741, 1.5543233, 2.0768092,
|
||||
0.9319192, 0.7929060, 0.4565429, 0.3589319, 0.5321784)
|
||||
|
||||
S_ROW( 9, 'L', 0.6019460, 0.6422760, 0.2866128, 0.3728734, 1.1545978,
|
||||
0.2845039, 0.3806759, 1.6944349, 0.4282702, 3.7966214,
|
||||
1.9942957, 0.3100430, 0.3711219, 0.4773261, 0.4739194,
|
||||
0.4288939, 0.6603292, 1.3142355, 0.5680359, 0.6920589)
|
||||
|
||||
S_ROW(10, 'M', 0.7231498, 0.6113537, 0.3464547, 0.5003421, 1.0043715,
|
||||
0.3954865, 0.5841316, 1.4777449, 0.6253033, 1.9942957,
|
||||
6.4814549, 0.4745299, 0.4238960, 0.8642486, 0.6226249,
|
||||
0.5985578, 0.7938018, 1.2689365, 0.6103022, 0.7083636)
|
||||
|
||||
S_ROW(11, 'N', 0.5883077, 0.3978026, 1.5538520, 0.9112983, 0.3542882,
|
||||
0.8637114, 1.2220028, 0.3279345, 0.9398419, 0.3100430,
|
||||
0.4745299, 7.0940964, 0.4999337, 1.0005835, 0.8586298,
|
||||
1.2315289, 0.9841525, 0.3690340, 0.2777841, 0.4860309)
|
||||
|
||||
S_ROW(12, 'P', 0.7541214, 0.3795628, 0.5987177, 0.6792027, 0.2874440,
|
||||
0.4773858, 0.4728797, 0.3846629, 0.7037741, 0.3711219,
|
||||
0.4238960, 0.4999337, 12.8375452, 0.6412803, 0.4815348,
|
||||
0.7555033, 0.6888962, 0.4430825, 0.2818321, 0.3635216)
|
||||
|
||||
S_ROW(13, 'Q', 0.7568035, 0.3657796, 0.8970811, 1.9017376, 0.3339729,
|
||||
0.5386498, 1.1679835, 0.3829375, 1.5543233, 0.4773261,
|
||||
0.8642486, 1.0005835, 0.6412803, 6.2444210, 1.4057958,
|
||||
0.9655559, 0.7913219, 0.4667781, 0.5093584, 0.6110951)
|
||||
|
||||
S_ROW(14, 'R', 0.6126988, 0.3089379, 0.5732000, 0.9607983, 0.3807263,
|
||||
0.4499840, 0.9170473, 0.3547509, 2.0768092, 0.4739194,
|
||||
0.6226249, 0.8586298, 0.4815348, 1.4057958, 6.6655769,
|
||||
0.7671661, 0.6777544, 0.4200721, 0.3951049, 0.5559652)
|
||||
|
||||
S_ROW(15, 'S', 1.4721037, 0.7384148, 0.9135051, 0.9503570, 0.4399736,
|
||||
0.9035965, 0.7367319, 0.4431634, 0.9319192, 0.4288939,
|
||||
0.5985578, 1.2315289, 0.7555033, 0.9655559, 0.7671661,
|
||||
3.8428476, 1.6139205, 0.5652240, 0.3853031, 0.5575206)
|
||||
|
||||
S_ROW(16, 'T', 0.9844022, 0.7405530, 0.6947898, 0.7414260, 0.4816930,
|
||||
0.5792712, 0.5575021, 0.7798163, 0.7929060, 0.6603292,
|
||||
0.7938018, 0.9841525, 0.6888962, 0.7913219, 0.6777544,
|
||||
1.6139205, 4.8321048, 0.9809432, 0.4309317, 0.5731577)
|
||||
|
||||
S_ROW(17, 'V', 0.9364584, 0.7558448, 0.3365004, 0.4289431, 0.7450894,
|
||||
0.3369551, 0.3394474, 2.4175121, 0.4565429, 1.3142355,
|
||||
1.2689365, 0.3690340, 0.4430825, 0.4667781, 0.4200721,
|
||||
0.5652240, 0.9809432, 3.6921553, 0.3744576, 0.6580390)
|
||||
|
||||
S_ROW(18, 'W', 0.4165484, 0.4499807, 0.2321050, 0.3743021, 1.3743775,
|
||||
0.4216898, 0.4440859, 0.4088732, 0.3589319, 0.5680359,
|
||||
0.6103022, 0.2777841, 0.2818321, 0.5093584, 0.3951049,
|
||||
0.3853031, 0.4309317, 0.3744576, 38.1077830, 2.1098056)
|
||||
|
||||
S_ROW(19, 'Y', 0.5426125, 0.4342013, 0.3456829, 0.4964664, 2.7693817,
|
||||
0.3487141, 1.7979036, 0.6303898, 0.5321784, 0.6920589,
|
||||
0.7083636, 0.4860309, 0.3635216, 0.6110951, 0.5559652,
|
||||
0.5575206, 0.5731577, 0.6580390, 2.1098056, 9.8322054)
|
||||
};
|
||||
666
src/muscle/muscle3.8.31/src/clust.cpp
Normal file
666
src/muscle/muscle3.8.31/src/clust.cpp
Normal file
@@ -0,0 +1,666 @@
|
||||
#include "muscle.h"
|
||||
#include "clust.h"
|
||||
#include "clustset.h"
|
||||
#include <stdio.h>
|
||||
|
||||
#define TRACE 0
|
||||
|
||||
Clust::Clust()
|
||||
{
|
||||
m_Nodes = 0;
|
||||
m_uNodeCount = 0;
|
||||
m_uLeafCount = 0;
|
||||
m_uClusterCount = 0;
|
||||
m_JoinStyle = JOIN_Undefined;
|
||||
m_dDist = 0;
|
||||
m_uLeafCount = 0;
|
||||
m_ptrSet = 0;
|
||||
}
|
||||
|
||||
Clust::~Clust()
|
||||
{
|
||||
delete[] m_Nodes;
|
||||
delete[] m_dDist;
|
||||
delete[] m_ClusterIndexToNodeIndex;
|
||||
}
|
||||
|
||||
void Clust::Create(ClustSet &Set, CLUSTER Method)
|
||||
{
|
||||
m_ptrSet = &Set;
|
||||
|
||||
SetLeafCount(Set.GetLeafCount());
|
||||
|
||||
switch (Method)
|
||||
{
|
||||
case CLUSTER_UPGMA:
|
||||
m_JoinStyle = JOIN_NearestNeighbor;
|
||||
m_CentroidStyle = LINKAGE_Avg;
|
||||
break;
|
||||
|
||||
case CLUSTER_UPGMAMax:
|
||||
m_JoinStyle = JOIN_NearestNeighbor;
|
||||
m_CentroidStyle = LINKAGE_Max;
|
||||
break;
|
||||
|
||||
case CLUSTER_UPGMAMin:
|
||||
m_JoinStyle = JOIN_NearestNeighbor;
|
||||
m_CentroidStyle = LINKAGE_Min;
|
||||
break;
|
||||
|
||||
case CLUSTER_UPGMB:
|
||||
m_JoinStyle = JOIN_NearestNeighbor;
|
||||
m_CentroidStyle = LINKAGE_Biased;
|
||||
break;
|
||||
|
||||
case CLUSTER_NeighborJoining:
|
||||
m_JoinStyle = JOIN_NeighborJoining;
|
||||
m_CentroidStyle = LINKAGE_NeighborJoining;
|
||||
break;
|
||||
|
||||
default:
|
||||
Quit("Clust::Create, invalid method %d", Method);
|
||||
}
|
||||
|
||||
if (m_uLeafCount <= 1)
|
||||
Quit("Clust::Create: no leaves");
|
||||
|
||||
m_uNodeCount = 2*m_uLeafCount - 1;
|
||||
m_Nodes = new ClustNode[m_uNodeCount];
|
||||
m_ClusterIndexToNodeIndex = new unsigned[m_uLeafCount];
|
||||
|
||||
m_ptrClusterList = 0;
|
||||
for (unsigned uNodeIndex = 0; uNodeIndex < m_uNodeCount; ++uNodeIndex)
|
||||
{
|
||||
ClustNode &Node = m_Nodes[uNodeIndex];
|
||||
Node.m_uIndex = uNodeIndex;
|
||||
if (uNodeIndex < m_uLeafCount)
|
||||
{
|
||||
Node.m_uSize = 1;
|
||||
Node.m_uLeafIndexes = new unsigned[1];
|
||||
Node.m_uLeafIndexes[0] = uNodeIndex;
|
||||
AddToClusterList(uNodeIndex);
|
||||
}
|
||||
else
|
||||
Node.m_uSize = 0;
|
||||
}
|
||||
|
||||
// Compute initial distance matrix between leaves
|
||||
SetProgressDesc("Build dist matrix");
|
||||
unsigned uPairIndex = 0;
|
||||
const unsigned uPairCount = (m_uLeafCount*(m_uLeafCount - 1))/2;
|
||||
for (unsigned i = 0; i < m_uLeafCount; ++i)
|
||||
for (unsigned j = 0; j < i; ++j)
|
||||
{
|
||||
const float dDist = (float) m_ptrSet->ComputeDist(*this, i, j);
|
||||
SetDist(i, j, dDist);
|
||||
if (0 == uPairIndex%10000)
|
||||
Progress(uPairIndex, uPairCount);
|
||||
++uPairIndex;
|
||||
}
|
||||
ProgressStepsDone();
|
||||
|
||||
// Call CreateCluster once for each internal node in the tree
|
||||
SetProgressDesc("Build guide tree");
|
||||
m_uClusterCount = m_uLeafCount;
|
||||
const unsigned uInternalNodeCount = m_uNodeCount - m_uLeafCount;
|
||||
for (unsigned uNodeIndex = m_uLeafCount; uNodeIndex < m_uNodeCount; ++uNodeIndex)
|
||||
{
|
||||
unsigned i = uNodeIndex + 1 - m_uLeafCount;
|
||||
Progress(i, uInternalNodeCount);
|
||||
CreateCluster();
|
||||
}
|
||||
ProgressStepsDone();
|
||||
}
|
||||
|
||||
void Clust::CreateCluster()
|
||||
{
|
||||
unsigned uLeftNodeIndex;
|
||||
unsigned uRightNodeIndex;
|
||||
float dLeftLength;
|
||||
float dRightLength;
|
||||
ChooseJoin(&uLeftNodeIndex, &uRightNodeIndex, &dLeftLength, &dRightLength);
|
||||
|
||||
const unsigned uNewNodeIndex = m_uNodeCount - m_uClusterCount + 1;
|
||||
|
||||
JoinNodes(uLeftNodeIndex, uRightNodeIndex, dLeftLength, dRightLength,
|
||||
uNewNodeIndex);
|
||||
|
||||
#if TRACE
|
||||
Log("Merge New=%u L=%u R=%u Ld=%7.2g Rd=%7.2g\n",
|
||||
uNewNodeIndex, uLeftNodeIndex, uRightNodeIndex, dLeftLength, dRightLength);
|
||||
#endif
|
||||
|
||||
// Compute distances to other clusters
|
||||
--m_uClusterCount;
|
||||
for (unsigned uNodeIndex = GetFirstCluster(); uNodeIndex != uInsane;
|
||||
uNodeIndex = GetNextCluster(uNodeIndex))
|
||||
{
|
||||
if (uNodeIndex == uLeftNodeIndex || uNodeIndex == uRightNodeIndex)
|
||||
continue;
|
||||
|
||||
if (uNewNodeIndex == uNodeIndex)
|
||||
continue;
|
||||
|
||||
const float dDist = ComputeDist(uNewNodeIndex, uNodeIndex);
|
||||
SetDist(uNewNodeIndex, uNodeIndex, dDist);
|
||||
}
|
||||
|
||||
for (unsigned uNodeIndex = GetFirstCluster(); uNodeIndex != uInsane;
|
||||
uNodeIndex = GetNextCluster(uNodeIndex))
|
||||
{
|
||||
if (uNodeIndex == uLeftNodeIndex || uNodeIndex == uRightNodeIndex)
|
||||
continue;
|
||||
|
||||
if (uNewNodeIndex == uNodeIndex)
|
||||
continue;
|
||||
|
||||
#if REDLACK
|
||||
const float dMetric = ComputeMetric(uNewNodeIndex, uNodeIndex);
|
||||
InsertMetric(uNewNodeIndex, uNodeIndex, dMetric);
|
||||
#endif
|
||||
}
|
||||
}
|
||||
|
||||
void Clust::ChooseJoin(unsigned *ptruLeftIndex, unsigned *ptruRightIndex,
|
||||
float *ptrdLeftLength, float *ptrdRightLength)
|
||||
{
|
||||
switch (m_JoinStyle)
|
||||
{
|
||||
case JOIN_NearestNeighbor:
|
||||
ChooseJoinNearestNeighbor(ptruLeftIndex, ptruRightIndex, ptrdLeftLength,
|
||||
ptrdRightLength);
|
||||
return;
|
||||
case JOIN_NeighborJoining:
|
||||
ChooseJoinNeighborJoining(ptruLeftIndex, ptruRightIndex, ptrdLeftLength,
|
||||
ptrdRightLength);
|
||||
return;
|
||||
}
|
||||
Quit("Clust::ChooseJoin, Invalid join style %u", m_JoinStyle);
|
||||
}
|
||||
|
||||
void Clust::ChooseJoinNearestNeighbor(unsigned *ptruLeftIndex,
|
||||
unsigned *ptruRightIndex, float *ptrdLeftLength, float *ptrdRightLength)
|
||||
{
|
||||
const unsigned uClusterCount = GetClusterCount();
|
||||
|
||||
unsigned uMinLeftNodeIndex;
|
||||
unsigned uMinRightNodeIndex;
|
||||
GetMinMetric(&uMinLeftNodeIndex, &uMinRightNodeIndex);
|
||||
|
||||
float dMinDist = GetDist(uMinLeftNodeIndex, uMinRightNodeIndex);
|
||||
|
||||
const float dLeftHeight = GetHeight(uMinLeftNodeIndex);
|
||||
const float dRightHeight = GetHeight(uMinRightNodeIndex);
|
||||
|
||||
*ptruLeftIndex = uMinLeftNodeIndex;
|
||||
*ptruRightIndex = uMinRightNodeIndex;
|
||||
*ptrdLeftLength = dMinDist/2 - dLeftHeight;
|
||||
*ptrdRightLength = dMinDist/2 - dRightHeight;
|
||||
}
|
||||
|
||||
void Clust::ChooseJoinNeighborJoining(unsigned *ptruLeftIndex,
|
||||
unsigned *ptruRightIndex, float *ptrdLeftLength, float *ptrdRightLength)
|
||||
{
|
||||
const unsigned uClusterCount = GetClusterCount();
|
||||
|
||||
//unsigned uMinLeftNodeIndex = uInsane;
|
||||
//unsigned uMinRightNodeIndex = uInsane;
|
||||
//float dMinD = PLUS_INFINITY;
|
||||
//for (unsigned i = GetFirstCluster(); i != uInsane; i = GetNextCluster(i))
|
||||
// {
|
||||
// const float ri = Calc_r(i);
|
||||
// for (unsigned j = GetNextCluster(i); j != uInsane; j = GetNextCluster(j))
|
||||
// {
|
||||
// const float rj = Calc_r(j);
|
||||
// const float dij = GetDist(i, j);
|
||||
// const float Dij = dij - (ri + rj);
|
||||
// if (Dij < dMinD)
|
||||
// {
|
||||
// dMinD = Dij;
|
||||
// uMinLeftNodeIndex = i;
|
||||
// uMinRightNodeIndex = j;
|
||||
// }
|
||||
// }
|
||||
// }
|
||||
|
||||
unsigned uMinLeftNodeIndex;
|
||||
unsigned uMinRightNodeIndex;
|
||||
GetMinMetric(&uMinLeftNodeIndex, &uMinRightNodeIndex);
|
||||
|
||||
const float dDistLR = GetDist(uMinLeftNodeIndex, uMinRightNodeIndex);
|
||||
const float rL = Calc_r(uMinLeftNodeIndex);
|
||||
const float rR = Calc_r(uMinRightNodeIndex);
|
||||
|
||||
const float dLeftLength = (dDistLR + rL - rR)/2;
|
||||
const float dRightLength = (dDistLR - rL + rR)/2;
|
||||
|
||||
*ptruLeftIndex = uMinLeftNodeIndex;
|
||||
*ptruRightIndex = uMinRightNodeIndex;
|
||||
*ptrdLeftLength = dLeftLength;
|
||||
*ptrdRightLength = dRightLength;
|
||||
}
|
||||
|
||||
void Clust::JoinNodes(unsigned uLeftIndex, unsigned uRightIndex, float dLeftLength,
|
||||
float dRightLength, unsigned uNodeIndex)
|
||||
{
|
||||
ClustNode &Parent = m_Nodes[uNodeIndex];
|
||||
ClustNode &Left = m_Nodes[uLeftIndex];
|
||||
ClustNode &Right = m_Nodes[uRightIndex];
|
||||
|
||||
Left.m_dLength = dLeftLength;
|
||||
Right.m_dLength = dRightLength;
|
||||
|
||||
Parent.m_ptrLeft = &Left;
|
||||
Parent.m_ptrRight = &Right;
|
||||
|
||||
Left.m_ptrParent = &Parent;
|
||||
Right.m_ptrParent = &Parent;
|
||||
|
||||
const unsigned uLeftSize = Left.m_uSize;
|
||||
const unsigned uRightSize = Right.m_uSize;
|
||||
const unsigned uParentSize = uLeftSize + uRightSize;
|
||||
Parent.m_uSize = uParentSize;
|
||||
|
||||
assert(0 == Parent.m_uLeafIndexes);
|
||||
Parent.m_uLeafIndexes = new unsigned[uParentSize];
|
||||
|
||||
const unsigned uLeftBytes = uLeftSize*sizeof(unsigned);
|
||||
const unsigned uRightBytes = uRightSize*sizeof(unsigned);
|
||||
memcpy(Parent.m_uLeafIndexes, Left.m_uLeafIndexes, uLeftBytes);
|
||||
memcpy(Parent.m_uLeafIndexes + uLeftSize, Right.m_uLeafIndexes, uRightBytes);
|
||||
|
||||
DeleteFromClusterList(uLeftIndex);
|
||||
DeleteFromClusterList(uRightIndex);
|
||||
AddToClusterList(uNodeIndex);
|
||||
}
|
||||
|
||||
float Clust::Calc_r(unsigned uNodeIndex) const
|
||||
{
|
||||
const unsigned uClusterCount = GetClusterCount();
|
||||
if (2 == uClusterCount)
|
||||
return 0;
|
||||
|
||||
float dSum = 0;
|
||||
for (unsigned i = GetFirstCluster(); i != uInsane; i = GetNextCluster(i))
|
||||
{
|
||||
if (i == uNodeIndex)
|
||||
continue;
|
||||
dSum += GetDist(uNodeIndex, i);
|
||||
}
|
||||
return dSum/(uClusterCount - 2);
|
||||
}
|
||||
|
||||
float Clust::ComputeDist(unsigned uNewNodeIndex, unsigned uNodeIndex)
|
||||
{
|
||||
switch (m_CentroidStyle)
|
||||
{
|
||||
case LINKAGE_Avg:
|
||||
return ComputeDistAverageLinkage(uNewNodeIndex, uNodeIndex);
|
||||
|
||||
case LINKAGE_Min:
|
||||
return ComputeDistMinLinkage(uNewNodeIndex, uNodeIndex);
|
||||
|
||||
case LINKAGE_Max:
|
||||
return ComputeDistMaxLinkage(uNewNodeIndex, uNodeIndex);
|
||||
|
||||
case LINKAGE_Biased:
|
||||
return ComputeDistMAFFT(uNewNodeIndex, uNodeIndex);
|
||||
|
||||
case LINKAGE_NeighborJoining:
|
||||
return ComputeDistNeighborJoining(uNewNodeIndex, uNodeIndex);
|
||||
}
|
||||
Quit("Clust::ComputeDist, invalid centroid style %u", m_CentroidStyle);
|
||||
return (float) g_dNAN;
|
||||
}
|
||||
|
||||
float Clust::ComputeDistMinLinkage(unsigned uNewNodeIndex, unsigned uNodeIndex)
|
||||
{
|
||||
const unsigned uLeftNodeIndex = GetLeftIndex(uNewNodeIndex);
|
||||
const unsigned uRightNodeIndex = GetRightIndex(uNewNodeIndex);
|
||||
const float dDistL = GetDist(uLeftNodeIndex, uNodeIndex);
|
||||
const float dDistR = GetDist(uRightNodeIndex, uNodeIndex);
|
||||
return (dDistL < dDistR ? dDistL : dDistR);
|
||||
}
|
||||
|
||||
float Clust::ComputeDistMaxLinkage(unsigned uNewNodeIndex, unsigned uNodeIndex)
|
||||
{
|
||||
const unsigned uLeftNodeIndex = GetLeftIndex(uNewNodeIndex);
|
||||
const unsigned uRightNodeIndex = GetRightIndex(uNewNodeIndex);
|
||||
const float dDistL = GetDist(uLeftNodeIndex, uNodeIndex);
|
||||
const float dDistR = GetDist(uRightNodeIndex, uNodeIndex);
|
||||
return (dDistL > dDistR ? dDistL : dDistR);
|
||||
}
|
||||
|
||||
float Clust::ComputeDistAverageLinkage(unsigned uNewNodeIndex, unsigned uNodeIndex)
|
||||
{
|
||||
const unsigned uLeftNodeIndex = GetLeftIndex(uNewNodeIndex);
|
||||
const unsigned uRightNodeIndex = GetRightIndex(uNewNodeIndex);
|
||||
const float dDistL = GetDist(uLeftNodeIndex, uNodeIndex);
|
||||
const float dDistR = GetDist(uRightNodeIndex, uNodeIndex);
|
||||
return (dDistL + dDistR)/2;
|
||||
}
|
||||
|
||||
float Clust::ComputeDistNeighborJoining(unsigned uNewNodeIndex, unsigned uNodeIndex)
|
||||
{
|
||||
const unsigned uLeftNodeIndex = GetLeftIndex(uNewNodeIndex);
|
||||
const unsigned uRightNodeIndex = GetRightIndex(uNewNodeIndex);
|
||||
const float dDistLR = GetDist(uLeftNodeIndex, uRightNodeIndex);
|
||||
const float dDistL = GetDist(uLeftNodeIndex, uNodeIndex);
|
||||
const float dDistR = GetDist(uRightNodeIndex, uNodeIndex);
|
||||
const float dDist = (dDistL + dDistR - dDistLR)/2;
|
||||
return dDist;
|
||||
}
|
||||
|
||||
// This is a mysterious variant of UPGMA reverse-engineered from MAFFT source.
|
||||
float Clust::ComputeDistMAFFT(unsigned uNewNodeIndex, unsigned uNodeIndex)
|
||||
{
|
||||
const unsigned uLeftNodeIndex = GetLeftIndex(uNewNodeIndex);
|
||||
const unsigned uRightNodeIndex = GetRightIndex(uNewNodeIndex);
|
||||
|
||||
const float dDistLR = GetDist(uLeftNodeIndex, uRightNodeIndex);
|
||||
const float dDistL = GetDist(uLeftNodeIndex, uNodeIndex);
|
||||
const float dDistR = GetDist(uRightNodeIndex, uNodeIndex);
|
||||
const float dMinDistLR = (dDistL < dDistR ? dDistL : dDistR);
|
||||
const float dSumDistLR = dDistL + dDistR;
|
||||
const float dDist = dMinDistLR*(1 - g_dSUEFF) + dSumDistLR*g_dSUEFF/2;
|
||||
return dDist;
|
||||
}
|
||||
|
||||
unsigned Clust::GetClusterCount() const
|
||||
{
|
||||
return m_uClusterCount;
|
||||
}
|
||||
|
||||
void Clust::LogMe() const
|
||||
{
|
||||
Log("Clust %u leaves, %u nodes, %u clusters.\n",
|
||||
m_uLeafCount, m_uNodeCount, m_uClusterCount);
|
||||
|
||||
Log("Distance matrix\n");
|
||||
const unsigned uNodeCount = GetNodeCount();
|
||||
Log(" ");
|
||||
for (unsigned i = 0; i < uNodeCount - 1; ++i)
|
||||
Log(" %7u", i);
|
||||
Log("\n");
|
||||
|
||||
Log(" ");
|
||||
for (unsigned i = 0; i < uNodeCount - 1; ++i)
|
||||
Log(" ------");
|
||||
Log("\n");
|
||||
|
||||
for (unsigned i = 0; i < uNodeCount - 1; ++i)
|
||||
{
|
||||
Log("%4u: ", i);
|
||||
for (unsigned j = 0; j < i; ++j)
|
||||
Log(" %7.2g", GetDist(i, j));
|
||||
Log("\n");
|
||||
}
|
||||
|
||||
Log("\n");
|
||||
Log("Node Size Prnt Left Rght Length Name\n");
|
||||
Log("---- ---- ---- ---- ---- ------ ----\n");
|
||||
for (unsigned uNodeIndex = 0; uNodeIndex < m_uNodeCount; ++uNodeIndex)
|
||||
{
|
||||
const ClustNode &Node = m_Nodes[uNodeIndex];
|
||||
Log("%4u %4u", uNodeIndex, Node.m_uSize);
|
||||
if (0 != Node.m_ptrParent)
|
||||
Log(" %4u", Node.m_ptrParent->m_uIndex);
|
||||
else
|
||||
Log(" ");
|
||||
|
||||
if (0 != Node.m_ptrLeft)
|
||||
Log(" %4u", Node.m_ptrLeft->m_uIndex);
|
||||
else
|
||||
Log(" ");
|
||||
|
||||
if (0 != Node.m_ptrRight)
|
||||
Log(" %4u", Node.m_ptrRight->m_uIndex);
|
||||
else
|
||||
Log(" ");
|
||||
|
||||
if (uNodeIndex != m_uNodeCount - 1)
|
||||
Log(" %7.3g", Node.m_dLength);
|
||||
if (IsLeaf(uNodeIndex))
|
||||
{
|
||||
const char *ptrName = GetNodeName(uNodeIndex);
|
||||
if (0 != ptrName)
|
||||
Log(" %s", ptrName);
|
||||
}
|
||||
if (GetRootNodeIndex() == uNodeIndex)
|
||||
Log(" [ROOT]");
|
||||
Log("\n");
|
||||
}
|
||||
}
|
||||
|
||||
const ClustNode &Clust::GetNode(unsigned uNodeIndex) const
|
||||
{
|
||||
if (uNodeIndex >= m_uNodeCount)
|
||||
Quit("ClustNode::GetNode(%u) %u", uNodeIndex, m_uNodeCount);
|
||||
return m_Nodes[uNodeIndex];
|
||||
}
|
||||
|
||||
bool Clust::IsLeaf(unsigned uNodeIndex) const
|
||||
{
|
||||
return uNodeIndex < m_uLeafCount;
|
||||
}
|
||||
|
||||
unsigned Clust::GetClusterSize(unsigned uNodeIndex) const
|
||||
{
|
||||
const ClustNode &Node = GetNode(uNodeIndex);
|
||||
return Node.m_uSize;
|
||||
}
|
||||
|
||||
unsigned Clust::GetLeftIndex(unsigned uNodeIndex) const
|
||||
{
|
||||
const ClustNode &Node = GetNode(uNodeIndex);
|
||||
if (0 == Node.m_ptrLeft)
|
||||
Quit("Clust::GetLeftIndex: leaf");
|
||||
return Node.m_ptrLeft->m_uIndex;
|
||||
}
|
||||
|
||||
unsigned Clust::GetRightIndex(unsigned uNodeIndex) const
|
||||
{
|
||||
const ClustNode &Node = GetNode(uNodeIndex);
|
||||
if (0 == Node.m_ptrRight)
|
||||
Quit("Clust::GetRightIndex: leaf");
|
||||
return Node.m_ptrRight->m_uIndex;
|
||||
}
|
||||
|
||||
float Clust::GetLength(unsigned uNodeIndex) const
|
||||
{
|
||||
const ClustNode &Node = GetNode(uNodeIndex);
|
||||
return Node.m_dLength;
|
||||
}
|
||||
|
||||
void Clust::SetLeafCount(unsigned uLeafCount)
|
||||
{
|
||||
if (uLeafCount <= 1)
|
||||
Quit("Clust::SetLeafCount(%u)", uLeafCount);
|
||||
|
||||
m_uLeafCount = uLeafCount;
|
||||
const unsigned uNodeCount = GetNodeCount();
|
||||
|
||||
// Triangular matrix size excluding diagonal (all zeros in our case).
|
||||
m_uTriangularMatrixSize = (uNodeCount*(uNodeCount - 1))/2;
|
||||
m_dDist = new float[m_uTriangularMatrixSize];
|
||||
}
|
||||
|
||||
unsigned Clust::GetLeafCount() const
|
||||
{
|
||||
return m_uLeafCount;
|
||||
}
|
||||
|
||||
unsigned Clust::VectorIndex(unsigned uIndex1, unsigned uIndex2) const
|
||||
{
|
||||
const unsigned uNodeCount = GetNodeCount();
|
||||
if (uIndex1 >= uNodeCount || uIndex2 >= uNodeCount)
|
||||
Quit("DistVectorIndex(%u,%u) %u", uIndex1, uIndex2, uNodeCount);
|
||||
unsigned v;
|
||||
if (uIndex1 >= uIndex2)
|
||||
v = uIndex2 + (uIndex1*(uIndex1 - 1))/2;
|
||||
else
|
||||
v = uIndex1 + (uIndex2*(uIndex2 - 1))/2;
|
||||
assert(v < m_uTriangularMatrixSize);
|
||||
return v;
|
||||
}
|
||||
|
||||
float Clust::GetDist(unsigned uIndex1, unsigned uIndex2) const
|
||||
{
|
||||
unsigned v = VectorIndex(uIndex1, uIndex2);
|
||||
return m_dDist[v];
|
||||
}
|
||||
|
||||
void Clust::SetDist(unsigned uIndex1, unsigned uIndex2, float dDist)
|
||||
{
|
||||
unsigned v = VectorIndex(uIndex1, uIndex2);
|
||||
m_dDist[v] = dDist;
|
||||
}
|
||||
|
||||
float Clust::GetHeight(unsigned uNodeIndex) const
|
||||
{
|
||||
if (IsLeaf(uNodeIndex))
|
||||
return 0;
|
||||
|
||||
const unsigned uLeftIndex = GetLeftIndex(uNodeIndex);
|
||||
const unsigned uRightIndex = GetRightIndex(uNodeIndex);
|
||||
const float dLeftLength = GetLength(uLeftIndex);
|
||||
const float dRightLength = GetLength(uRightIndex);
|
||||
const float dLeftHeight = dLeftLength + GetHeight(uLeftIndex);
|
||||
const float dRightHeight = dRightLength + GetHeight(uRightIndex);
|
||||
return (dLeftHeight + dRightHeight)/2;
|
||||
}
|
||||
|
||||
const char *Clust::GetNodeName(unsigned uNodeIndex) const
|
||||
{
|
||||
if (!IsLeaf(uNodeIndex))
|
||||
Quit("Clust::GetNodeName, is not leaf");
|
||||
return m_ptrSet->GetLeafName(uNodeIndex);
|
||||
}
|
||||
|
||||
unsigned Clust::GetNodeId(unsigned uNodeIndex) const
|
||||
{
|
||||
if (uNodeIndex >= GetLeafCount())
|
||||
return 0;
|
||||
return m_ptrSet->GetLeafId(uNodeIndex);
|
||||
}
|
||||
|
||||
unsigned Clust::GetLeaf(unsigned uNodeIndex, unsigned uLeafIndex) const
|
||||
{
|
||||
const ClustNode &Node = GetNode(uNodeIndex);
|
||||
const unsigned uLeafCount = Node.m_uSize;
|
||||
if (uLeafIndex >= uLeafCount)
|
||||
Quit("Clust::GetLeaf, invalid index");
|
||||
const unsigned uIndex = Node.m_uLeafIndexes[uLeafIndex];
|
||||
if (uIndex >= m_uNodeCount)
|
||||
Quit("Clust::GetLeaf, index out of range");
|
||||
return uIndex;
|
||||
}
|
||||
|
||||
unsigned Clust::GetFirstCluster() const
|
||||
{
|
||||
if (0 == m_ptrClusterList)
|
||||
return uInsane;
|
||||
return m_ptrClusterList->m_uIndex;
|
||||
}
|
||||
|
||||
unsigned Clust::GetNextCluster(unsigned uIndex) const
|
||||
{
|
||||
ClustNode *ptrNode = &m_Nodes[uIndex];
|
||||
if (0 == ptrNode->m_ptrNextCluster)
|
||||
return uInsane;
|
||||
return ptrNode->m_ptrNextCluster->m_uIndex;
|
||||
}
|
||||
|
||||
void Clust::DeleteFromClusterList(unsigned uNodeIndex)
|
||||
{
|
||||
assert(uNodeIndex < m_uNodeCount);
|
||||
ClustNode *ptrNode = &m_Nodes[uNodeIndex];
|
||||
ClustNode *ptrPrev = ptrNode->m_ptrPrevCluster;
|
||||
ClustNode *ptrNext = ptrNode->m_ptrNextCluster;
|
||||
|
||||
if (0 != ptrNext)
|
||||
ptrNext->m_ptrPrevCluster = ptrPrev;
|
||||
if (0 == ptrPrev)
|
||||
{
|
||||
assert(m_ptrClusterList == ptrNode);
|
||||
m_ptrClusterList = ptrNext;
|
||||
}
|
||||
else
|
||||
ptrPrev->m_ptrNextCluster = ptrNext;
|
||||
|
||||
ptrNode->m_ptrNextCluster = 0;
|
||||
ptrNode->m_ptrPrevCluster = 0;
|
||||
}
|
||||
|
||||
void Clust::AddToClusterList(unsigned uNodeIndex)
|
||||
{
|
||||
assert(uNodeIndex < m_uNodeCount);
|
||||
ClustNode *ptrNode = &m_Nodes[uNodeIndex];
|
||||
|
||||
if (0 != m_ptrClusterList)
|
||||
m_ptrClusterList->m_ptrPrevCluster = ptrNode;
|
||||
|
||||
ptrNode->m_ptrNextCluster = m_ptrClusterList;
|
||||
ptrNode->m_ptrPrevCluster = 0;
|
||||
|
||||
m_ptrClusterList = ptrNode;
|
||||
}
|
||||
|
||||
float Clust::ComputeMetric(unsigned uIndex1, unsigned uIndex2) const
|
||||
{
|
||||
switch (m_JoinStyle)
|
||||
{
|
||||
case JOIN_NearestNeighbor:
|
||||
return ComputeMetricNearestNeighbor(uIndex1, uIndex2);
|
||||
|
||||
case JOIN_NeighborJoining:
|
||||
return ComputeMetricNeighborJoining(uIndex1, uIndex2);
|
||||
}
|
||||
Quit("Clust::ComputeMetric");
|
||||
return 0;
|
||||
}
|
||||
|
||||
float Clust::ComputeMetricNeighborJoining(unsigned i, unsigned j) const
|
||||
{
|
||||
float ri = Calc_r(i);
|
||||
float rj = Calc_r(j);
|
||||
float dij = GetDist(i, j);
|
||||
float dMetric = dij - (ri + rj);
|
||||
return (float) dMetric;
|
||||
}
|
||||
|
||||
float Clust::ComputeMetricNearestNeighbor(unsigned i, unsigned j) const
|
||||
{
|
||||
return (float) GetDist(i, j);
|
||||
}
|
||||
|
||||
float Clust::GetMinMetricBruteForce(unsigned *ptruIndex1, unsigned *ptruIndex2) const
|
||||
{
|
||||
unsigned uMinLeftNodeIndex = uInsane;
|
||||
unsigned uMinRightNodeIndex = uInsane;
|
||||
float dMinMetric = PLUS_INFINITY;
|
||||
for (unsigned uLeftNodeIndex = GetFirstCluster(); uLeftNodeIndex != uInsane;
|
||||
uLeftNodeIndex = GetNextCluster(uLeftNodeIndex))
|
||||
{
|
||||
for (unsigned uRightNodeIndex = GetNextCluster(uLeftNodeIndex);
|
||||
uRightNodeIndex != uInsane;
|
||||
uRightNodeIndex = GetNextCluster(uRightNodeIndex))
|
||||
{
|
||||
float dMetric = ComputeMetric(uLeftNodeIndex, uRightNodeIndex);
|
||||
if (dMetric < dMinMetric)
|
||||
{
|
||||
dMinMetric = dMetric;
|
||||
uMinLeftNodeIndex = uLeftNodeIndex;
|
||||
uMinRightNodeIndex = uRightNodeIndex;
|
||||
}
|
||||
}
|
||||
}
|
||||
*ptruIndex1 = uMinLeftNodeIndex;
|
||||
*ptruIndex2 = uMinRightNodeIndex;
|
||||
return dMinMetric;
|
||||
}
|
||||
|
||||
float Clust::GetMinMetric(unsigned *ptruIndex1, unsigned *ptruIndex2) const
|
||||
{
|
||||
return GetMinMetricBruteForce(ptruIndex1, ptruIndex2);
|
||||
}
|
||||
148
src/muscle/muscle3.8.31/src/clust.h
Normal file
148
src/muscle/muscle3.8.31/src/clust.h
Normal file
@@ -0,0 +1,148 @@
|
||||
#ifndef Clust_h
|
||||
#define Clust_h
|
||||
|
||||
class Clust;
|
||||
class ClustNode;
|
||||
class ClustSet;
|
||||
class Phylip;
|
||||
class SortedNode;
|
||||
|
||||
const unsigned RB_NIL = ((unsigned) 0xfff0);
|
||||
|
||||
class ClustNode
|
||||
{
|
||||
public:
|
||||
ClustNode()
|
||||
{
|
||||
m_uIndex = uInsane;
|
||||
m_uSize = uInsane;
|
||||
m_dLength = (float) dInsane;
|
||||
m_ptrLeft = 0;
|
||||
m_ptrRight = 0;
|
||||
m_ptrParent = 0;
|
||||
m_ptrNextCluster = 0;
|
||||
m_ptrPrevCluster = 0;
|
||||
m_uLeafIndexes = 0;
|
||||
}
|
||||
~ClustNode()
|
||||
{
|
||||
delete[] m_uLeafIndexes;
|
||||
}
|
||||
unsigned m_uIndex;
|
||||
unsigned m_uSize;
|
||||
float m_dLength;
|
||||
ClustNode *m_ptrLeft;
|
||||
ClustNode *m_ptrRight;
|
||||
ClustNode *m_ptrParent;
|
||||
ClustNode *m_ptrNextCluster;
|
||||
ClustNode *m_ptrPrevCluster;
|
||||
unsigned *m_uLeafIndexes;
|
||||
};
|
||||
|
||||
class Clust
|
||||
{
|
||||
public:
|
||||
Clust();
|
||||
virtual ~Clust();
|
||||
|
||||
void Create(ClustSet &Set, CLUSTER Method);
|
||||
|
||||
unsigned GetLeafCount() const;
|
||||
|
||||
unsigned GetClusterCount() const;
|
||||
unsigned GetClusterSize(unsigned uNodeIndex) const;
|
||||
unsigned GetLeaf(unsigned uClusterIndex, unsigned uLeafIndex) const;
|
||||
|
||||
unsigned GetNodeCount() const { return 2*m_uLeafCount - 1; }
|
||||
const ClustNode &GetRoot() const { return m_Nodes[GetRootNodeIndex()]; }
|
||||
unsigned GetRootNodeIndex() const { return m_uNodeCount - 1; }
|
||||
|
||||
const ClustNode &GetNode(unsigned uNodeIndex) const;
|
||||
bool IsLeaf(unsigned uNodeIndex) const;
|
||||
unsigned GetLeftIndex(unsigned uNodeIndex) const;
|
||||
unsigned GetRightIndex(unsigned uNodeIndex) const;
|
||||
float GetLength(unsigned uNodeIndex) const;
|
||||
float GetHeight(unsigned uNodeIndex) const;
|
||||
const char *GetNodeName(unsigned uNodeIndex) const;
|
||||
unsigned GetNodeId(unsigned uNodeIndex) const;
|
||||
|
||||
JOIN GetJoinStyle() const { return m_JoinStyle; }
|
||||
LINKAGE GetCentroidStyle() const { return m_CentroidStyle; }
|
||||
|
||||
void SetDist(unsigned uIndex1, unsigned uIndex2, float dDist);
|
||||
float GetDist(unsigned uIndex1, unsigned uIndex2) const;
|
||||
|
||||
void ToPhylip(Phylip &tree);
|
||||
|
||||
void LogMe() const;
|
||||
|
||||
//private:
|
||||
void SetLeafCount(unsigned uLeafCount);
|
||||
|
||||
void CreateCluster();
|
||||
void JoinNodes(unsigned uLeftNodeIndex, unsigned uRightNodeIndex,
|
||||
float dLeftLength, float dRightLength, unsigned uNewNodeIndex);
|
||||
|
||||
void ChooseJoin(unsigned *ptruLeftIndex, unsigned *ptruRightIndex,
|
||||
float *ptrdLeftLength, float *ptrdRightLength);
|
||||
void ChooseJoinNeighborJoining(unsigned *ptruLeftIndex, unsigned *ptruRightIndex,
|
||||
float *ptrdLeftLength, float *ptrdRightLength);
|
||||
void ChooseJoinNearestNeighbor(unsigned *ptruLeftIndex, unsigned *ptruRightIndex,
|
||||
float *ptrdLeftLength, float *ptrdRightLength);
|
||||
|
||||
float ComputeDist(unsigned uNewNodeIndex, unsigned uNodeIndex);
|
||||
float ComputeDistAverageLinkage(unsigned uNewNodeIndex, unsigned uNodeIndex);
|
||||
float ComputeDistMinLinkage(unsigned uNewNodeIndex, unsigned uNodeIndex);
|
||||
float ComputeDistMaxLinkage(unsigned uNewNodeIndex, unsigned uNodeIndex);
|
||||
float ComputeDistNeighborJoining(unsigned uNewNewIndex, unsigned uNodeIndex);
|
||||
float ComputeDistMAFFT(unsigned uNewNewIndex, unsigned uNodeIndex);
|
||||
|
||||
float Calc_r(unsigned uNodeIndex) const;
|
||||
|
||||
unsigned VectorIndex(unsigned uIndex1, unsigned uIndex2) const;
|
||||
|
||||
unsigned GetFirstCluster() const;
|
||||
unsigned GetNextCluster(unsigned uNodeIndex) const;
|
||||
|
||||
float ComputeMetric(unsigned uIndex1, unsigned uIndex2) const;
|
||||
float ComputeMetricNearestNeighbor(unsigned i, unsigned j) const;
|
||||
float ComputeMetricNeighborJoining(unsigned i, unsigned j) const;
|
||||
|
||||
void InitMetric(unsigned uMaxNodeIndex);
|
||||
void InsertMetric(unsigned uIndex1, unsigned uIndex2, float dMetric);
|
||||
float GetMinMetric(unsigned *ptruIndex1, unsigned *ptruIndex2) const;
|
||||
float GetMinMetricBruteForce(unsigned *ptruIndex1, unsigned *ptruIndex2) const;
|
||||
void DeleteMetric(unsigned uIndex);
|
||||
void DeleteMetric(unsigned uIndex1, unsigned uIndex2);
|
||||
void ListMetric() const;
|
||||
|
||||
void DeleteFromClusterList(unsigned uNodeIndex);
|
||||
void AddToClusterList(unsigned uNodeIndex);
|
||||
|
||||
void RBDelete(unsigned RBNode);
|
||||
unsigned RBInsert(unsigned i, unsigned j, float fMetric);
|
||||
|
||||
unsigned RBNext(unsigned RBNode) const;
|
||||
unsigned RBPrev(unsigned RBNode) const;
|
||||
unsigned RBMin(unsigned RBNode) const;
|
||||
unsigned RBMax(unsigned RBNode) const;
|
||||
|
||||
void ValidateRB(const char szMsg[] = 0) const;
|
||||
void ValidateRBNode(unsigned Node, const char szMsg[]) const;
|
||||
|
||||
//private:
|
||||
JOIN m_JoinStyle;
|
||||
LINKAGE m_CentroidStyle;
|
||||
ClustNode *m_Nodes;
|
||||
unsigned *m_ClusterIndexToNodeIndex;
|
||||
unsigned *m_NodeIndexToClusterIndex;
|
||||
unsigned m_uLeafCount;
|
||||
unsigned m_uNodeCount;
|
||||
unsigned m_uClusterCount;
|
||||
unsigned m_uTriangularMatrixSize;
|
||||
float *m_dDist;
|
||||
ClustSet *m_ptrSet;
|
||||
ClustNode *m_ptrClusterList;
|
||||
};
|
||||
|
||||
#endif // Clust_h
|
||||
339
src/muscle/muscle3.8.31/src/cluster.cpp
Normal file
339
src/muscle/muscle3.8.31/src/cluster.cpp
Normal file
@@ -0,0 +1,339 @@
|
||||
#include "muscle.h"
|
||||
#include "cluster.h"
|
||||
#include "distfunc.h"
|
||||
|
||||
static inline float Min(float d1, float d2)
|
||||
{
|
||||
return d1 < d2 ? d1 : d2;
|
||||
}
|
||||
|
||||
static inline float Max(float d1, float d2)
|
||||
{
|
||||
return d1 > d2 ? d1 : d2;
|
||||
}
|
||||
|
||||
static inline float Mean(float d1, float d2)
|
||||
{
|
||||
return (float) ((d1 + d2)/2.0);
|
||||
}
|
||||
|
||||
#if _DEBUG
|
||||
void ClusterTree::Validate(unsigned uNodeCount)
|
||||
{
|
||||
unsigned n;
|
||||
ClusterNode *pNode;
|
||||
unsigned uDisjointListCount = 0;
|
||||
for (pNode = m_ptrDisjoints; pNode; pNode = pNode->GetNextDisjoint())
|
||||
{
|
||||
ClusterNode *pPrev = pNode->GetPrevDisjoint();
|
||||
ClusterNode *pNext = pNode->GetNextDisjoint();
|
||||
if (0 != pPrev)
|
||||
{
|
||||
if (pPrev->GetNextDisjoint() != pNode)
|
||||
{
|
||||
Log("Prev->This mismatch, prev=\n");
|
||||
pPrev->LogMe();
|
||||
Log("This=\n");
|
||||
pNode->LogMe();
|
||||
Quit("ClusterTree::Validate()");
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
if (pNode != m_ptrDisjoints)
|
||||
{
|
||||
Log("[%u]->prev = 0 but != m_ptrDisjoints=%d\n",
|
||||
pNode->GetIndex(),
|
||||
m_ptrDisjoints ? m_ptrDisjoints->GetIndex() : 0xffffffff);
|
||||
pNode->LogMe();
|
||||
Quit("ClusterTree::Validate()");
|
||||
}
|
||||
}
|
||||
if (0 != pNext)
|
||||
{
|
||||
if (pNext->GetPrevDisjoint() != pNode)
|
||||
{
|
||||
Log("Next->This mismatch, next=\n");
|
||||
pNext->LogMe();
|
||||
Log("This=\n");
|
||||
pNode->LogMe();
|
||||
Quit("ClusterTree::Validate()");
|
||||
}
|
||||
}
|
||||
++uDisjointListCount;
|
||||
if (uDisjointListCount > m_uNodeCount)
|
||||
Quit("Loop in disjoint list");
|
||||
}
|
||||
|
||||
unsigned uParentlessNodeCount = 0;
|
||||
for (n = 0; n < uNodeCount; ++n)
|
||||
if (0 == m_Nodes[n].GetParent())
|
||||
++uParentlessNodeCount;
|
||||
|
||||
if (uDisjointListCount != uParentlessNodeCount)
|
||||
Quit("Disjoints = %u Parentless = %u\n", uDisjointListCount,
|
||||
uParentlessNodeCount);
|
||||
}
|
||||
#else // !_DEBUG
|
||||
#define Validate(uNodeCount) // empty
|
||||
#endif
|
||||
|
||||
void ClusterNode::LogMe() const
|
||||
{
|
||||
unsigned uClusterSize = GetClusterSize();
|
||||
Log("[%02u] w=%5.3f CW=%5.3f LBW=%5.3f RBW=%5.3f LWT=%5.3f RWT=%5.3f L=%02d R=%02d P=%02d NxDj=%02d PvDj=%02d Sz=%02d {",
|
||||
m_uIndex,
|
||||
m_dWeight,
|
||||
GetClusterWeight(),
|
||||
GetLeftBranchWeight(),
|
||||
GetRightBranchWeight(),
|
||||
GetLeftWeight(),
|
||||
GetRightWeight(),
|
||||
m_ptrLeft ? m_ptrLeft->GetIndex() : 0xffffffff,
|
||||
m_ptrRight ? m_ptrRight->GetIndex() : 0xffffffff,
|
||||
m_ptrParent ? m_ptrParent->GetIndex() : 0xffffffff,
|
||||
m_ptrNextDisjoint ? m_ptrNextDisjoint->GetIndex() : 0xffffffff,
|
||||
m_ptrPrevDisjoint ? m_ptrPrevDisjoint->GetIndex() : 0xffffffff,
|
||||
uClusterSize);
|
||||
for (unsigned i = 0; i < uClusterSize; ++i)
|
||||
Log(" %u", GetClusterLeaf(i)->GetIndex());
|
||||
Log(" }\n");
|
||||
}
|
||||
|
||||
// How many leaves in the sub-tree under this node?
|
||||
unsigned ClusterNode::GetClusterSize() const
|
||||
{
|
||||
unsigned uLeafCount = 0;
|
||||
|
||||
if (0 == m_ptrLeft && 0 == m_ptrRight)
|
||||
return 1;
|
||||
|
||||
if (0 != m_ptrLeft)
|
||||
uLeafCount += m_ptrLeft->GetClusterSize();
|
||||
if (0 != m_ptrRight)
|
||||
uLeafCount += m_ptrRight->GetClusterSize();
|
||||
assert(uLeafCount > 0);
|
||||
return uLeafCount;
|
||||
}
|
||||
|
||||
double ClusterNode::GetClusterWeight() const
|
||||
{
|
||||
double dWeight = 0.0;
|
||||
if (0 != m_ptrLeft)
|
||||
dWeight += m_ptrLeft->GetClusterWeight();
|
||||
if (0 != m_ptrRight)
|
||||
dWeight += m_ptrRight->GetClusterWeight();
|
||||
return dWeight + GetWeight();
|
||||
}
|
||||
|
||||
double ClusterNode::GetLeftBranchWeight() const
|
||||
{
|
||||
const ClusterNode *ptrLeft = GetLeft();
|
||||
if (0 == ptrLeft)
|
||||
return 0.0;
|
||||
|
||||
return GetWeight() - ptrLeft->GetWeight();
|
||||
}
|
||||
|
||||
double ClusterNode::GetRightBranchWeight() const
|
||||
{
|
||||
const ClusterNode *ptrRight = GetRight();
|
||||
if (0 == ptrRight)
|
||||
return 0.0;
|
||||
|
||||
return GetWeight() - ptrRight->GetWeight();
|
||||
}
|
||||
|
||||
double ClusterNode::GetRightWeight() const
|
||||
{
|
||||
const ClusterNode *ptrRight = GetRight();
|
||||
if (0 == ptrRight)
|
||||
return 0.0;
|
||||
return ptrRight->GetClusterWeight() + GetWeight();
|
||||
}
|
||||
|
||||
double ClusterNode::GetLeftWeight() const
|
||||
{
|
||||
const ClusterNode *ptrLeft = GetLeft();
|
||||
if (0 == ptrLeft)
|
||||
return 0.0;
|
||||
return ptrLeft->GetClusterWeight() + GetWeight();
|
||||
}
|
||||
|
||||
// Return n'th leaf in the sub-tree under this node.
|
||||
const ClusterNode *ClusterNode::GetClusterLeaf(unsigned uLeafIndex) const
|
||||
{
|
||||
if (0 != m_ptrLeft)
|
||||
{
|
||||
if (0 == m_ptrRight)
|
||||
return this;
|
||||
|
||||
unsigned uLeftLeafCount = m_ptrLeft->GetClusterSize();
|
||||
|
||||
if (uLeafIndex < uLeftLeafCount)
|
||||
return m_ptrLeft->GetClusterLeaf(uLeafIndex);
|
||||
|
||||
assert(uLeafIndex >= uLeftLeafCount);
|
||||
return m_ptrRight->GetClusterLeaf(uLeafIndex - uLeftLeafCount);
|
||||
}
|
||||
if (0 == m_ptrRight)
|
||||
return this;
|
||||
return m_ptrRight->GetClusterLeaf(uLeafIndex);
|
||||
}
|
||||
|
||||
void ClusterTree::DeleteFromDisjoints(ClusterNode *ptrNode)
|
||||
{
|
||||
ClusterNode *ptrPrev = ptrNode->GetPrevDisjoint();
|
||||
ClusterNode *ptrNext = ptrNode->GetNextDisjoint();
|
||||
|
||||
if (0 != ptrPrev)
|
||||
ptrPrev->SetNextDisjoint(ptrNext);
|
||||
else
|
||||
m_ptrDisjoints = ptrNext;
|
||||
|
||||
if (0 != ptrNext)
|
||||
ptrNext->SetPrevDisjoint(ptrPrev);
|
||||
|
||||
#if _DEBUG
|
||||
// not algorithmically necessary, but improves clarity
|
||||
// and supports Validate().
|
||||
ptrNode->SetPrevDisjoint(0);
|
||||
ptrNode->SetNextDisjoint(0);
|
||||
#endif
|
||||
}
|
||||
|
||||
void ClusterTree::AddToDisjoints(ClusterNode *ptrNode)
|
||||
{
|
||||
ptrNode->SetNextDisjoint(m_ptrDisjoints);
|
||||
ptrNode->SetPrevDisjoint(0);
|
||||
if (0 != m_ptrDisjoints)
|
||||
m_ptrDisjoints->SetPrevDisjoint(ptrNode);
|
||||
m_ptrDisjoints = ptrNode;
|
||||
}
|
||||
|
||||
ClusterTree::ClusterTree()
|
||||
{
|
||||
m_ptrDisjoints = 0;
|
||||
m_Nodes = 0;
|
||||
m_uNodeCount = 0;
|
||||
}
|
||||
|
||||
ClusterTree::~ClusterTree()
|
||||
{
|
||||
delete[] m_Nodes;
|
||||
}
|
||||
|
||||
void ClusterTree::LogMe() const
|
||||
{
|
||||
Log("Disjoints=%d\n", m_ptrDisjoints ? m_ptrDisjoints->GetIndex() : 0xffffffff);
|
||||
for (unsigned i = 0; i < m_uNodeCount; ++i)
|
||||
{
|
||||
m_Nodes[i].LogMe();
|
||||
}
|
||||
}
|
||||
|
||||
ClusterNode *ClusterTree::GetRoot() const
|
||||
{
|
||||
return &m_Nodes[m_uNodeCount - 1];
|
||||
}
|
||||
|
||||
// This is the UPGMA algorithm as described in Durbin et al. p166.
|
||||
void ClusterTree::Create(const DistFunc &Dist)
|
||||
{
|
||||
unsigned i;
|
||||
m_uLeafCount = Dist.GetCount();
|
||||
m_uNodeCount = 2*m_uLeafCount - 1;
|
||||
|
||||
delete[] m_Nodes;
|
||||
m_Nodes = new ClusterNode[m_uNodeCount];
|
||||
|
||||
for (i = 0; i < m_uNodeCount; ++i)
|
||||
m_Nodes[i].SetIndex(i);
|
||||
|
||||
for (i = 0; i < m_uLeafCount - 1; ++i)
|
||||
m_Nodes[i].SetNextDisjoint(&m_Nodes[i+1]);
|
||||
|
||||
for (i = 1; i < m_uLeafCount; ++i)
|
||||
m_Nodes[i].SetPrevDisjoint(&m_Nodes[i-1]);
|
||||
|
||||
m_ptrDisjoints = &m_Nodes[0];
|
||||
|
||||
// Log("Initial state\n");
|
||||
// LogMe();
|
||||
// Log("\n");
|
||||
|
||||
DistFunc ClusterDist;
|
||||
ClusterDist.SetCount(m_uNodeCount);
|
||||
double dMaxDist = 0.0;
|
||||
for (i = 0; i < m_uLeafCount; ++i)
|
||||
for (unsigned j = 0; j < m_uLeafCount; ++j)
|
||||
{
|
||||
float dDist = Dist.GetDist(i, j);
|
||||
ClusterDist.SetDist(i, j, dDist);
|
||||
}
|
||||
|
||||
Validate(m_uLeafCount);
|
||||
|
||||
// Iteration. N-1 joins needed to create a binary tree from N leaves.
|
||||
for (unsigned uJoinIndex = m_uLeafCount; uJoinIndex < m_uNodeCount;
|
||||
++uJoinIndex)
|
||||
{
|
||||
// Find closest pair of clusters
|
||||
unsigned uIndexClosest1;
|
||||
unsigned uIndexClosest2;
|
||||
bool bFound = false;
|
||||
double dDistClosest = 9e99;
|
||||
for (ClusterNode *ptrNode1 = m_ptrDisjoints; ptrNode1;
|
||||
ptrNode1 = ptrNode1->GetNextDisjoint())
|
||||
{
|
||||
for (ClusterNode *ptrNode2 = ptrNode1->GetNextDisjoint(); ptrNode2;
|
||||
ptrNode2 = ptrNode2->GetNextDisjoint())
|
||||
{
|
||||
unsigned i1 = ptrNode1->GetIndex();
|
||||
unsigned i2 = ptrNode2->GetIndex();
|
||||
double dDist = ClusterDist.GetDist(i1, i2);
|
||||
if (dDist < dDistClosest)
|
||||
{
|
||||
bFound = true;
|
||||
dDistClosest = dDist;
|
||||
uIndexClosest1 = i1;
|
||||
uIndexClosest2 = i2;
|
||||
}
|
||||
}
|
||||
}
|
||||
assert(bFound);
|
||||
|
||||
ClusterNode &Join = m_Nodes[uJoinIndex];
|
||||
ClusterNode &Child1 = m_Nodes[uIndexClosest1];
|
||||
ClusterNode &Child2 = m_Nodes[uIndexClosest2];
|
||||
|
||||
Join.SetLeft(&Child1);
|
||||
Join.SetRight(&Child2);
|
||||
Join.SetWeight(dDistClosest);
|
||||
|
||||
Child1.SetParent(&Join);
|
||||
Child2.SetParent(&Join);
|
||||
|
||||
DeleteFromDisjoints(&Child1);
|
||||
DeleteFromDisjoints(&Child2);
|
||||
AddToDisjoints(&Join);
|
||||
|
||||
// Log("After join %d %d\n", uIndexClosest1, uIndexClosest2);
|
||||
// LogMe();
|
||||
|
||||
// Calculate distance of every remaining disjoint cluster to the
|
||||
// new cluster created by the join
|
||||
for (ClusterNode *ptrNode = m_ptrDisjoints; ptrNode;
|
||||
ptrNode = ptrNode->GetNextDisjoint())
|
||||
{
|
||||
unsigned uNodeIndex = ptrNode->GetIndex();
|
||||
float dDist1 = ClusterDist.GetDist(uNodeIndex, uIndexClosest1);
|
||||
float dDist2 = ClusterDist.GetDist(uNodeIndex, uIndexClosest2);
|
||||
float dDist = Min(dDist1, dDist2);
|
||||
ClusterDist.SetDist(uJoinIndex, uNodeIndex, dDist);
|
||||
}
|
||||
Validate(uJoinIndex+1);
|
||||
}
|
||||
GetRoot()->GetClusterWeight();
|
||||
// LogMe();
|
||||
}
|
||||
86
src/muscle/muscle3.8.31/src/cluster.h
Normal file
86
src/muscle/muscle3.8.31/src/cluster.h
Normal file
@@ -0,0 +1,86 @@
|
||||
class DistFunc;
|
||||
|
||||
class ClusterNode
|
||||
{
|
||||
friend class ClusterTree;
|
||||
public:
|
||||
ClusterNode()
|
||||
{
|
||||
m_dWeight = 0.0;
|
||||
m_dWeight2 = 0.0;
|
||||
m_ptrLeft = 0;
|
||||
m_ptrRight = 0;
|
||||
m_ptrParent = 0;
|
||||
m_uIndex = 0;
|
||||
m_ptrPrevDisjoint = 0;
|
||||
m_ptrNextDisjoint = 0;
|
||||
}
|
||||
~ClusterNode() {}
|
||||
|
||||
public:
|
||||
unsigned GetIndex() const { return m_uIndex; }
|
||||
ClusterNode *GetLeft() const { return m_ptrLeft; }
|
||||
ClusterNode *GetRight() const { return m_ptrRight; }
|
||||
ClusterNode *GetParent() const { return m_ptrParent; }
|
||||
double GetWeight() const { return m_dWeight; }
|
||||
|
||||
const ClusterNode *GetClusterLeaf(unsigned uLeafIndex) const;
|
||||
unsigned GetClusterSize() const;
|
||||
double GetClusterWeight() const;
|
||||
double GetLeftBranchWeight() const;
|
||||
double GetRightBranchWeight() const;
|
||||
double GetLeftWeight() const;
|
||||
double GetRightWeight() const;
|
||||
|
||||
void LogMe() const;
|
||||
|
||||
double GetWeight2() const { return m_dWeight2; }
|
||||
void SetWeight2(double dWeight2) { m_dWeight2 = dWeight2; }
|
||||
|
||||
protected:
|
||||
void SetIndex(unsigned uIndex) { m_uIndex = uIndex; }
|
||||
void SetWeight(double dWeight) { m_dWeight = dWeight; }
|
||||
void SetLeft(ClusterNode *ptrLeft) { m_ptrLeft = ptrLeft; }
|
||||
void SetRight(ClusterNode *ptrRight) { m_ptrRight = ptrRight; }
|
||||
void SetParent(ClusterNode *ptrParent) { m_ptrParent = ptrParent; }
|
||||
void SetNextDisjoint(ClusterNode *ptrNode) { m_ptrNextDisjoint = ptrNode; }
|
||||
void SetPrevDisjoint(ClusterNode *ptrNode) { m_ptrPrevDisjoint = ptrNode; }
|
||||
|
||||
ClusterNode *GetNextDisjoint() { return m_ptrNextDisjoint; }
|
||||
ClusterNode *GetPrevDisjoint() { return m_ptrPrevDisjoint; }
|
||||
|
||||
private:
|
||||
double m_dWeight;
|
||||
double m_dWeight2;
|
||||
unsigned m_uIndex;
|
||||
ClusterNode *m_ptrLeft;
|
||||
ClusterNode *m_ptrRight;
|
||||
ClusterNode *m_ptrParent;
|
||||
ClusterNode *m_ptrNextDisjoint;
|
||||
ClusterNode *m_ptrPrevDisjoint;
|
||||
};
|
||||
|
||||
class ClusterTree
|
||||
{
|
||||
public:
|
||||
ClusterTree();
|
||||
virtual ~ClusterTree();
|
||||
|
||||
void Create(const DistFunc &DF);
|
||||
|
||||
ClusterNode *GetRoot() const;
|
||||
void LogMe() const;
|
||||
|
||||
protected:
|
||||
void Join(ClusterNode *ptrNode1, ClusterNode *ptrNode2,
|
||||
ClusterNode *ptrJoin);
|
||||
void AddToDisjoints(ClusterNode *ptrNode);
|
||||
void DeleteFromDisjoints(ClusterNode *ptrNode);
|
||||
void Validate(unsigned uNodeCount);
|
||||
|
||||
private:
|
||||
ClusterNode *m_ptrDisjoints;
|
||||
ClusterNode *m_Nodes;
|
||||
unsigned m_uNodeCount;
|
||||
unsigned m_uLeafCount;
|
||||
};
|
||||
21
src/muscle/muscle3.8.31/src/clustset.h
Normal file
21
src/muscle/muscle3.8.31/src/clustset.h
Normal file
@@ -0,0 +1,21 @@
|
||||
#ifndef ClustSet_h
|
||||
#define ClustSet_h
|
||||
|
||||
enum JOIN;
|
||||
enum LINKAGE;
|
||||
class Clust;
|
||||
|
||||
class ClustSet
|
||||
{
|
||||
public:
|
||||
virtual unsigned GetLeafCount() = 0;
|
||||
virtual double ComputeDist(const Clust &C, unsigned uNodeIndex1,
|
||||
unsigned uNodeIndex2) = 0;
|
||||
virtual void JoinNodes(const Clust &C, unsigned uLeftNodeIndex,
|
||||
unsigned uRightNodeIndex, unsigned uJoinedNodeIndex,
|
||||
double *ptrdLeftLength, double *ptrdRightLength) = 0;
|
||||
virtual const char *GetLeafName(unsigned uNodeIndex) = 0;
|
||||
virtual unsigned GetLeafId(unsigned uNodeIndex) = 0;
|
||||
};
|
||||
|
||||
#endif // ClustSet_h
|
||||
48
src/muscle/muscle3.8.31/src/clustsetdf.h
Normal file
48
src/muscle/muscle3.8.31/src/clustsetdf.h
Normal file
@@ -0,0 +1,48 @@
|
||||
#ifndef ClustSetDF_h
|
||||
#define ClustSetDF_h
|
||||
|
||||
class MSA;
|
||||
class Clust;
|
||||
|
||||
#include "clustset.h"
|
||||
#include "distfunc.h"
|
||||
#include "msa.h"
|
||||
|
||||
class ClustSetDF : public ClustSet
|
||||
{
|
||||
public:
|
||||
ClustSetDF(const DistFunc &DF) :
|
||||
m_ptrDF(&DF)
|
||||
{
|
||||
}
|
||||
|
||||
public:
|
||||
virtual unsigned GetLeafCount()
|
||||
{
|
||||
return m_ptrDF->GetCount();
|
||||
}
|
||||
virtual const char *GetLeafName(unsigned uNodeIndex)
|
||||
{
|
||||
return m_ptrDF->GetName(uNodeIndex);
|
||||
}
|
||||
virtual unsigned GetLeafId(unsigned uNodeIndex)
|
||||
{
|
||||
return m_ptrDF->GetId(uNodeIndex);
|
||||
}
|
||||
virtual void JoinNodes(const Clust &C, unsigned uLeftNodeIndex,
|
||||
unsigned uRightNodeIndex, unsigned uJoinedNodeIndex,
|
||||
double *ptrdLeftLength, double *ptrdRightLength)
|
||||
{
|
||||
Quit("ClustSetDF::JoinNodes, should never be called");
|
||||
}
|
||||
virtual double ComputeDist(const Clust &C, unsigned uNodeIndex1,
|
||||
unsigned uNodeIndex2)
|
||||
{
|
||||
return m_ptrDF->GetDist(uNodeIndex1, uNodeIndex2);
|
||||
}
|
||||
|
||||
private:
|
||||
const DistFunc *m_ptrDF;
|
||||
};
|
||||
|
||||
#endif // ClustSetDF_h
|
||||
55
src/muscle/muscle3.8.31/src/clustsetmsa.h
Normal file
55
src/muscle/muscle3.8.31/src/clustsetmsa.h
Normal file
@@ -0,0 +1,55 @@
|
||||
#ifndef ClustSetMSA_h
|
||||
#define ClustSetMSA_h
|
||||
|
||||
class MSA;
|
||||
class Clust;
|
||||
|
||||
#include "clustset.h"
|
||||
#include "msadist.h"
|
||||
|
||||
// Distance matrix based set.
|
||||
// Computes distances between leaves, never between
|
||||
// joined clusters (leaves this to distance matrix method).
|
||||
class ClustSetMSA : public ClustSet
|
||||
{
|
||||
public:
|
||||
ClustSetMSA(const MSA &msa, MSADist &MD) :
|
||||
m_ptrMSA(&msa),
|
||||
m_ptrMSADist(&MD)
|
||||
{
|
||||
}
|
||||
|
||||
public:
|
||||
virtual unsigned GetLeafCount()
|
||||
{
|
||||
return m_ptrMSA->GetSeqCount();
|
||||
}
|
||||
virtual const char *GetLeafName(unsigned uNodeIndex)
|
||||
{
|
||||
return m_ptrMSA->GetSeqName(uNodeIndex);
|
||||
}
|
||||
virtual unsigned GetLeafId(unsigned uNodeIndex)
|
||||
{
|
||||
return m_ptrMSA->GetSeqId(uNodeIndex);
|
||||
}
|
||||
virtual void JoinNodes(const Clust &C, unsigned uLeftNodeIndex,
|
||||
unsigned uRightNodeIndex, unsigned uJoinedNodeIndex,
|
||||
double *ptrdLeftLength, double *ptrdRightLength)
|
||||
{
|
||||
Quit("ClustSetMSA::JoinNodes, should never be called");
|
||||
}
|
||||
virtual double ComputeDist(const Clust &C, unsigned uNodeIndex1,
|
||||
unsigned uNodeIndex2)
|
||||
{
|
||||
return m_ptrMSADist->ComputeDist(*m_ptrMSA, uNodeIndex1, uNodeIndex2);
|
||||
}
|
||||
|
||||
public:
|
||||
const MSA &GetMSA();
|
||||
|
||||
private:
|
||||
const MSA *m_ptrMSA;
|
||||
MSADist *m_ptrMSADist;
|
||||
};
|
||||
|
||||
#endif // ClustSetMSA_h
|
||||
190
src/muscle/muscle3.8.31/src/clwwt.cpp
Normal file
190
src/muscle/muscle3.8.31/src/clwwt.cpp
Normal file
@@ -0,0 +1,190 @@
|
||||
#include "muscle.h"
|
||||
#include "tree.h"
|
||||
#include "msa.h"
|
||||
|
||||
/***
|
||||
Compute weights by the CLUSTALW method.
|
||||
Thompson, Higgins and Gibson (1994), CABIOS (10) 19-29;
|
||||
see also CLUSTALW paper.
|
||||
|
||||
Weights are computed from the edge lengths of a rooted tree.
|
||||
|
||||
Define the strength of an edge to be its length divided by the number
|
||||
of leaves under that edge. The weight of a sequence is then the sum
|
||||
of edge strengths on the path from the root to the leaf.
|
||||
|
||||
Example.
|
||||
|
||||
0.2
|
||||
-----A 0.1
|
||||
-x ------- B 0.7
|
||||
--------y ----------- C
|
||||
0.3 ----------z
|
||||
0.4 -------------- D
|
||||
0.8
|
||||
|
||||
Edge Length Leaves Strength
|
||||
---- ----- ------ --------
|
||||
xy 0.3 3 0.1
|
||||
xA 0.2 1 0.2
|
||||
yz 0.4 2 0.2
|
||||
yB 0.1 1 0.1
|
||||
zC 0.7 1 0.7
|
||||
zD 0.8 1 0.8
|
||||
|
||||
Leaf Path Strengths Weight
|
||||
---- ---- --------- ------
|
||||
A xA 0.2 0.2
|
||||
B xy-yB 0.1 + 0.1 0.2
|
||||
C xy-yz-zC 0.1 + 0.2 + 0.7 1.0
|
||||
D xy-yz-zD 0.1 + 0.2 + 0.8 1.1
|
||||
|
||||
***/
|
||||
|
||||
#define TRACE 0
|
||||
|
||||
static unsigned CountLeaves(const Tree &tree, unsigned uNodeIndex,
|
||||
unsigned LeavesUnderNode[])
|
||||
{
|
||||
if (tree.IsLeaf(uNodeIndex))
|
||||
{
|
||||
LeavesUnderNode[uNodeIndex] = 1;
|
||||
return 1;
|
||||
}
|
||||
|
||||
const unsigned uLeft = tree.GetLeft(uNodeIndex);
|
||||
const unsigned uRight = tree.GetRight(uNodeIndex);
|
||||
const unsigned uRightCount = CountLeaves(tree, uRight, LeavesUnderNode);
|
||||
const unsigned uLeftCount = CountLeaves(tree, uLeft, LeavesUnderNode);
|
||||
const unsigned uCount = uRightCount + uLeftCount;
|
||||
LeavesUnderNode[uNodeIndex] = uCount;
|
||||
return uCount;
|
||||
}
|
||||
|
||||
void CalcClustalWWeights(const Tree &tree, WEIGHT Weights[])
|
||||
{
|
||||
#if TRACE
|
||||
Log("CalcClustalWWeights\n");
|
||||
tree.LogMe();
|
||||
#endif
|
||||
|
||||
const unsigned uLeafCount = tree.GetLeafCount();
|
||||
if (0 == uLeafCount)
|
||||
return;
|
||||
else if (1 == uLeafCount)
|
||||
{
|
||||
Weights[0] = (WEIGHT) 1.0;
|
||||
return;
|
||||
}
|
||||
else if (2 == uLeafCount)
|
||||
{
|
||||
Weights[0] = (WEIGHT) 0.5;
|
||||
Weights[1] = (WEIGHT) 0.5;
|
||||
return;
|
||||
}
|
||||
|
||||
if (!tree.IsRooted())
|
||||
Quit("CalcClustalWWeights requires rooted tree");
|
||||
|
||||
const unsigned uNodeCount = tree.GetNodeCount();
|
||||
unsigned *LeavesUnderNode = new unsigned[uNodeCount];
|
||||
memset(LeavesUnderNode, 0, uNodeCount*sizeof(unsigned));
|
||||
|
||||
const unsigned uRootNodeIndex = tree.GetRootNodeIndex();
|
||||
unsigned uLeavesUnderRoot = CountLeaves(tree, uRootNodeIndex, LeavesUnderNode);
|
||||
if (uLeavesUnderRoot != uLeafCount)
|
||||
Quit("WeightsFromTreee: Internal error, root count %u %u",
|
||||
uLeavesUnderRoot, uLeafCount);
|
||||
|
||||
#if TRACE
|
||||
Log("Node Leaves Length Strength\n");
|
||||
Log("---- ------ -------- --------\n");
|
||||
// 1234 123456 12345678 12345678
|
||||
#endif
|
||||
|
||||
double *Strengths = new double[uNodeCount];
|
||||
for (unsigned uNodeIndex = 0; uNodeIndex < uNodeCount; ++uNodeIndex)
|
||||
{
|
||||
if (tree.IsRoot(uNodeIndex))
|
||||
{
|
||||
Strengths[uNodeIndex] = 0.0;
|
||||
continue;
|
||||
}
|
||||
const unsigned uParent = tree.GetParent(uNodeIndex);
|
||||
const double dLength = tree.GetEdgeLength(uNodeIndex, uParent);
|
||||
const unsigned uLeaves = LeavesUnderNode[uNodeIndex];
|
||||
const double dStrength = dLength / (double) uLeaves;
|
||||
Strengths[uNodeIndex] = dStrength;
|
||||
#if TRACE
|
||||
Log("%4u %6u %8g %8g\n", uNodeIndex, uLeaves, dLength, dStrength);
|
||||
#endif
|
||||
}
|
||||
|
||||
#if TRACE
|
||||
Log("\n");
|
||||
Log(" Seq Path..Weight\n");
|
||||
Log("-------------------- ------------\n");
|
||||
#endif
|
||||
for (unsigned n = 0; n < uLeafCount; ++n)
|
||||
{
|
||||
const unsigned uLeafNodeIndex = tree.LeafIndexToNodeIndex(n);
|
||||
#if TRACE
|
||||
Log("%20.20s %4u ", tree.GetLeafName(uLeafNodeIndex), uLeafNodeIndex);
|
||||
#endif
|
||||
if (!tree.IsLeaf(uLeafNodeIndex))
|
||||
Quit("CalcClustalWWeights: leaf");
|
||||
|
||||
double dWeight = 0;
|
||||
unsigned uNode = uLeafNodeIndex;
|
||||
while (!tree.IsRoot(uNode))
|
||||
{
|
||||
dWeight += Strengths[uNode];
|
||||
uNode = tree.GetParent(uNode);
|
||||
#if TRACE
|
||||
Log("->%u(%g)", uNode, Strengths[uNode]);
|
||||
#endif
|
||||
}
|
||||
if (dWeight < 0.0001)
|
||||
{
|
||||
#if TRACE
|
||||
Log("zero->one");
|
||||
#endif
|
||||
dWeight = 1.0;
|
||||
}
|
||||
Weights[n] = (WEIGHT) dWeight;
|
||||
#if TRACE
|
||||
Log(" = %g\n", dWeight);
|
||||
#endif
|
||||
}
|
||||
|
||||
delete[] Strengths;
|
||||
delete[] LeavesUnderNode;
|
||||
|
||||
Normalize(Weights, uLeafCount);
|
||||
}
|
||||
|
||||
void MSA::SetClustalWWeights(const Tree &tree)
|
||||
{
|
||||
const unsigned uSeqCount = GetSeqCount();
|
||||
const unsigned uLeafCount = tree.GetLeafCount();
|
||||
|
||||
WEIGHT *Weights = new WEIGHT[uSeqCount];
|
||||
|
||||
CalcClustalWWeights(tree, Weights);
|
||||
|
||||
for (unsigned n = 0; n < uLeafCount; ++n)
|
||||
{
|
||||
const WEIGHT w = Weights[n];
|
||||
const unsigned uLeafNodeIndex = tree.LeafIndexToNodeIndex(n);
|
||||
const unsigned uId = tree.GetLeafId(uLeafNodeIndex);
|
||||
const unsigned uSeqIndex = GetSeqIndex(uId);
|
||||
#if DEBUG
|
||||
if (GetSeqName(uSeqIndex) != tree.GetLeafName(uLeafNodeIndex))
|
||||
Quit("MSA::SetClustalWWeights: names don't match");
|
||||
#endif
|
||||
SetSeqWeight(uSeqIndex, w);
|
||||
}
|
||||
NormalizeWeights((WEIGHT) 1.0);
|
||||
|
||||
delete[] Weights;
|
||||
}
|
||||
189
src/muscle/muscle3.8.31/src/color.cpp
Normal file
189
src/muscle/muscle3.8.31/src/color.cpp
Normal file
@@ -0,0 +1,189 @@
|
||||
#include "muscle.h"
|
||||
#include "msa.h"
|
||||
|
||||
static int Blosum62[23][23] =
|
||||
{
|
||||
// A B C D E F G H I K L M N P Q R S T V W X Y Z
|
||||
+4, -2, +0, -2, -1, -2, +0, -2, -1, -1, -1, -1, -2, -1, -1, -1, +1, +0, +0, -3, -1, -2, -1, // A
|
||||
-2, +6, -3, +6, +2, -3, -1, -1, -3, -1, -4, -3, +1, -1, +0, -2, +0, -1, -3, -4, -1, -3, +2, // B
|
||||
+0, -3, +9, -3, -4, -2, -3, -3, -1, -3, -1, -1, -3, -3, -3, -3, -1, -1, -1, -2, -1, -2, -4, // C
|
||||
-2, +6, -3, +6, +2, -3, -1, -1, -3, -1, -4, -3, +1, -1, +0, -2, +0, -1, -3, -4, -1, -3, +2, // D
|
||||
-1, +2, -4, +2, +5, -3, -2, +0, -3, +1, -3, -2, +0, -1, +2, +0, +0, -1, -2, -3, -1, -2, +5, // E
|
||||
|
||||
-2, -3, -2, -3, -3, +6, -3, -1, +0, -3, +0, +0, -3, -4, -3, -3, -2, -2, -1, +1, -1, +3, -3, // F
|
||||
+0, -1, -3, -1, -2, -3, +6, -2, -4, -2, -4, -3, +0, -2, -2, -2, +0, -2, -3, -2, -1, -3, -2, // G
|
||||
-2, -1, -3, -1, +0, -1, -2, +8, -3, -1, -3, -2, +1, -2, +0, +0, -1, -2, -3, -2, -1, +2, +0, // H
|
||||
-1, -3, -1, -3, -3, +0, -4, -3, +4, -3, +2, +1, -3, -3, -3, -3, -2, -1, +3, -3, -1, -1, -3, // I
|
||||
-1, -1, -3, -1, +1, -3, -2, -1, -3, +5, -2, -1, +0, -1, +1, +2, +0, -1, -2, -3, -1, -2, +1, // K
|
||||
|
||||
-1, -4, -1, -4, -3, +0, -4, -3, +2, -2, +4, +2, -3, -3, -2, -2, -2, -1, +1, -2, -1, -1, -3, // L
|
||||
-1, -3, -1, -3, -2, +0, -3, -2, +1, -1, +2, +5, -2, -2, +0, -1, -1, -1, +1, -1, -1, -1, -2, // M
|
||||
-2, +1, -3, +1, +0, -3, +0, +1, -3, +0, -3, -2, +6, -2, +0, +0, +1, +0, -3, -4, -1, -2, +0, // N
|
||||
-1, -1, -3, -1, -1, -4, -2, -2, -3, -1, -3, -2, -2, +7, -1, -2, -1, -1, -2, -4, -1, -3, -1, // P
|
||||
-1, +0, -3, +0, +2, -3, -2, +0, -3, +1, -2, +0, +0, -1, +5, +1, +0, -1, -2, -2, -1, -1, +2, // Q
|
||||
|
||||
-1, -2, -3, -2, +0, -3, -2, +0, -3, +2, -2, -1, +0, -2, +1, +5, -1, -1, -3, -3, -1, -2, +0, // R
|
||||
+1, +0, -1, +0, +0, -2, +0, -1, -2, +0, -2, -1, +1, -1, +0, -1, +4, +1, -2, -3, -1, -2, +0, // S
|
||||
+0, -1, -1, -1, -1, -2, -2, -2, -1, -1, -1, -1, +0, -1, -1, -1, +1, +5, +0, -2, -1, -2, -1, // T
|
||||
+0, -3, -1, -3, -2, -1, -3, -3, +3, -2, +1, +1, -3, -2, -2, -3, -2, +0, +4, -3, -1, -1, -2, // V
|
||||
-3, -4, -2, -4, -3, +1, -2, -2, -3, -3, -2, -1, -4, -4, -2, -3, -3, -2, -3,+11, -1, +2, -3, // W
|
||||
|
||||
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, // X
|
||||
-2, -3, -2, -3, -2, +3, -3, +2, -1, -2, -1, -1, -2, -3, -1, -2, -2, -2, -1, +2, -1, +7, -2, // Y
|
||||
-1, +2, -4, +2, +5, -3, -2, +0, -3, +1, -3, -2, +0, -1, +2, +0, +0, -1, -2, -3, -1, -2, +5, // Z
|
||||
};
|
||||
|
||||
static int toi_tab[26] =
|
||||
{
|
||||
0, // A
|
||||
1, // B
|
||||
2, // C
|
||||
3, // D
|
||||
4, // E
|
||||
5, // F
|
||||
6, // G
|
||||
7, // H
|
||||
8, // I
|
||||
-1, // J
|
||||
9, // K
|
||||
10, // L
|
||||
11, // M
|
||||
12, // N
|
||||
-1, // O
|
||||
13, // P
|
||||
14, // Q
|
||||
15, // R
|
||||
16, // S
|
||||
17, // T
|
||||
17, // U
|
||||
18, // V
|
||||
19, // W
|
||||
20, // X
|
||||
21, // Y
|
||||
22, // Z
|
||||
};
|
||||
|
||||
static int toi(char c)
|
||||
{
|
||||
c = toupper(c);
|
||||
return toi_tab[c - 'A'];
|
||||
}
|
||||
|
||||
static int BlosumScore(char c1, char c2)
|
||||
{
|
||||
int i1 = toi(c1);
|
||||
int i2 = toi(c2);
|
||||
return Blosum62[i1][i2];
|
||||
}
|
||||
|
||||
/***
|
||||
Consider a column with 5 As and 3 Bs.
|
||||
There are:
|
||||
5x4 pairs of As.
|
||||
3x2 pairs of Bs.
|
||||
5x3x2 AB pairs
|
||||
8x7 = 5x4 + 3x2 + 5x3x2 pairs of letters
|
||||
***/
|
||||
static double BlosumScoreCol(const MSA &a, unsigned uColIndex)
|
||||
{
|
||||
int iCounts[23];
|
||||
memset(iCounts, 0, sizeof(iCounts));
|
||||
const unsigned uSeqCount = a.GetSeqCount();
|
||||
unsigned uCharCount = 0;
|
||||
for (unsigned uSeqIndex = 0; uSeqIndex < uSeqCount; ++uSeqIndex)
|
||||
{
|
||||
char c = a.GetChar(uSeqIndex, uColIndex);
|
||||
if (IsGapChar(c))
|
||||
continue;
|
||||
int iChar = toi(c);
|
||||
++iCounts[iChar];
|
||||
++uCharCount;
|
||||
}
|
||||
if (uCharCount < 2)
|
||||
return -9;
|
||||
int iTotalScore = 0;
|
||||
for (int i1 = 0; i1 < 23; ++i1)
|
||||
{
|
||||
int iCounts1 = iCounts[i1];
|
||||
iTotalScore += iCounts1*(iCounts1 - 1)*Blosum62[i1][i1];
|
||||
for (int i2 = i1 + 1; i2 < 23; ++i2)
|
||||
iTotalScore += iCounts[i2]*iCounts1*2*Blosum62[i1][i2];
|
||||
}
|
||||
int iPairCount = uCharCount*(uCharCount - 1);
|
||||
return (double) iTotalScore / (double) iPairCount;
|
||||
}
|
||||
|
||||
/***
|
||||
Consider a column with 5 As and 3 Bs.
|
||||
A residue of type Q scores:
|
||||
5xAQ + 3xBQ
|
||||
***/
|
||||
static void AssignColorsCol(const MSA &a, unsigned uColIndex, int **Colors)
|
||||
{
|
||||
int iCounts[23];
|
||||
memset(iCounts, 0, sizeof(iCounts));
|
||||
const unsigned uSeqCount = a.GetSeqCount();
|
||||
unsigned uCharCount = 0;
|
||||
for (unsigned uSeqIndex = 0; uSeqIndex < uSeqCount; ++uSeqIndex)
|
||||
{
|
||||
char c = a.GetChar(uSeqIndex, uColIndex);
|
||||
if (IsGapChar(c))
|
||||
continue;
|
||||
int iChar = toi(c);
|
||||
++iCounts[iChar];
|
||||
++uCharCount;
|
||||
}
|
||||
int iMostConservedType = -1;
|
||||
int iMostConservedCount = -1;
|
||||
for (unsigned i = 0; i < 23; ++i)
|
||||
{
|
||||
if (iCounts[i] > iMostConservedCount)
|
||||
{
|
||||
iMostConservedType = i;
|
||||
iMostConservedCount = iCounts[i];
|
||||
}
|
||||
}
|
||||
|
||||
double dColScore = BlosumScoreCol(a, uColIndex);
|
||||
int c;
|
||||
if (dColScore >= 3.0)
|
||||
c = 3;
|
||||
//else if (dColScore >= 1.0)
|
||||
// c = 2;
|
||||
else if (dColScore >= 0.2)
|
||||
c = 1;
|
||||
else
|
||||
c = 0;
|
||||
|
||||
int Color[23];
|
||||
for (unsigned uLetter = 0; uLetter < 23; ++uLetter)
|
||||
{
|
||||
double dScore = Blosum62[uLetter][iMostConservedType];
|
||||
if (dScore >= dColScore)
|
||||
Color[uLetter] = c;
|
||||
else
|
||||
Color[uLetter] = 0;
|
||||
}
|
||||
|
||||
for (unsigned uSeqIndex = 0; uSeqIndex < uSeqCount; ++uSeqIndex)
|
||||
{
|
||||
char c = a.GetChar(uSeqIndex, uColIndex);
|
||||
if (IsGapChar(c))
|
||||
{
|
||||
Colors[uSeqIndex][uColIndex] = 0;
|
||||
continue;
|
||||
}
|
||||
int iLetter = toi(c);
|
||||
if (iLetter >= 0 && iLetter < 23)
|
||||
Colors[uSeqIndex][uColIndex] = Color[iLetter];
|
||||
else
|
||||
Colors[uSeqIndex][uColIndex] = 0;
|
||||
}
|
||||
}
|
||||
|
||||
void AssignColors(const MSA &a, int **Colors)
|
||||
{
|
||||
const unsigned uColCount = a.GetColCount();
|
||||
for (unsigned uColIndex = 0; uColIndex < uColCount; ++uColIndex)
|
||||
AssignColorsCol(a, uColIndex, Colors);
|
||||
}
|
||||
118
src/muscle/muscle3.8.31/src/cons.cpp
Normal file
118
src/muscle/muscle3.8.31/src/cons.cpp
Normal file
@@ -0,0 +1,118 @@
|
||||
/***
|
||||
Conservation value for a column in an MSA is defined as the number
|
||||
of times the most common letter appears divided by the number of
|
||||
sequences.
|
||||
***/
|
||||
|
||||
#include "muscle.h"
|
||||
#include "msa.h"
|
||||
#include <math.h>
|
||||
|
||||
double MSA::GetAvgCons() const
|
||||
{
|
||||
assert(GetSeqCount() > 0);
|
||||
double dSum = 0;
|
||||
unsigned uNonGapColCount = 0;
|
||||
for (unsigned uColIndex = 0; uColIndex < GetColCount(); ++uColIndex)
|
||||
{
|
||||
if (!IsGapColumn(uColIndex))
|
||||
{
|
||||
dSum += GetCons(uColIndex);
|
||||
++uNonGapColCount;
|
||||
}
|
||||
}
|
||||
assert(uNonGapColCount > 0);
|
||||
double dAvg = dSum / uNonGapColCount;
|
||||
assert(dAvg > 0 && dAvg <= 1);
|
||||
return dAvg;
|
||||
}
|
||||
|
||||
double MSA::GetCons(unsigned uColIndex) const
|
||||
{
|
||||
unsigned Counts[MAX_ALPHA];
|
||||
for (unsigned uLetter = 0; uLetter < g_AlphaSize; ++uLetter)
|
||||
Counts[uLetter] = 0;
|
||||
|
||||
unsigned uMaxCount = 0;
|
||||
for (unsigned uSeqIndex = 0; uSeqIndex < GetSeqCount(); ++uSeqIndex)
|
||||
{
|
||||
if (IsGap(uSeqIndex, uColIndex))
|
||||
continue;
|
||||
char c = GetChar(uSeqIndex, uColIndex);
|
||||
c = toupper(c);
|
||||
if ('X' == c || 'B' == c || 'Z' == c)
|
||||
continue;
|
||||
unsigned uLetter = GetLetter(uSeqIndex, uColIndex);
|
||||
unsigned uCount = Counts[uLetter] + 1;
|
||||
if (uCount > uMaxCount)
|
||||
uMaxCount = uCount;
|
||||
Counts[uLetter] = uCount;
|
||||
}
|
||||
|
||||
// Cons is undefined for all-gap column
|
||||
if (0 == uMaxCount)
|
||||
{
|
||||
// assert(false);
|
||||
return 1;
|
||||
}
|
||||
|
||||
double dCons = (double) uMaxCount / (double) GetSeqCount();
|
||||
assert(dCons > 0 && dCons <= 1);
|
||||
return dCons;
|
||||
}
|
||||
|
||||
// Perecent identity of a pair of sequences.
|
||||
// Positions with one or both gapped are ignored.
|
||||
double MSA::GetPctIdentityPair(unsigned uSeqIndex1, unsigned uSeqIndex2) const
|
||||
{
|
||||
const unsigned uColCount = GetColCount();
|
||||
unsigned uPosCount = 0;
|
||||
unsigned uSameCount = 0;
|
||||
for (unsigned uColIndex = 0; uColIndex < uColCount; ++uColIndex)
|
||||
{
|
||||
const char c1 = GetChar(uSeqIndex1, uColIndex);
|
||||
const char c2 = GetChar(uSeqIndex2, uColIndex);
|
||||
if (IsGapChar(c1) || IsGapChar(c2))
|
||||
continue;
|
||||
if (c1 == c2)
|
||||
++uSameCount;
|
||||
++uPosCount;
|
||||
}
|
||||
if (0 == uPosCount)
|
||||
return 0;
|
||||
return (double) uSameCount / (double) uPosCount;
|
||||
}
|
||||
|
||||
// Perecent group identity of a pair of sequences.
|
||||
// Positions with one or both gapped are ignored.
|
||||
double MSA::GetPctGroupIdentityPair(unsigned uSeqIndex1,
|
||||
unsigned uSeqIndex2) const
|
||||
{
|
||||
extern unsigned ResidueGroup[];
|
||||
|
||||
const unsigned uColCount = GetColCount();
|
||||
unsigned uPosCount = 0;
|
||||
unsigned uSameCount = 0;
|
||||
for (unsigned uColIndex = 0; uColIndex < uColCount; ++uColIndex)
|
||||
{
|
||||
if (IsGap(uSeqIndex1, uColIndex))
|
||||
continue;
|
||||
if (IsGap(uSeqIndex2, uColIndex))
|
||||
continue;
|
||||
if (IsWildcard(uSeqIndex1, uColIndex))
|
||||
continue;
|
||||
if (IsWildcard(uSeqIndex2, uColIndex))
|
||||
continue;
|
||||
|
||||
const unsigned uLetter1 = GetLetter(uSeqIndex1, uColIndex);
|
||||
const unsigned uLetter2 = GetLetter(uSeqIndex2, uColIndex);
|
||||
const unsigned uGroup1 = ResidueGroup[uLetter1];
|
||||
const unsigned uGroup2 = ResidueGroup[uLetter2];
|
||||
if (uGroup1 == uGroup2)
|
||||
++uSameCount;
|
||||
++uPosCount;
|
||||
}
|
||||
if (0 == uPosCount)
|
||||
return 0;
|
||||
return (double) uSameCount / (double) uPosCount;
|
||||
}
|
||||
378
src/muscle/muscle3.8.31/src/diaglist.cpp
Normal file
378
src/muscle/muscle3.8.31/src/diaglist.cpp
Normal file
@@ -0,0 +1,378 @@
|
||||
#include "muscle.h"
|
||||
#include "diaglist.h"
|
||||
#include "pwpath.h"
|
||||
|
||||
#define MAX(x, y) ((x) > (y) ? (x) : (y))
|
||||
#define MIN(x, y) ((x) < (y) ? (x) : (y))
|
||||
|
||||
void DiagList::Add(const Diag &d)
|
||||
{
|
||||
if (m_uCount == MAX_DIAGS)
|
||||
Quit("DiagList::Add, overflow %u", m_uCount);
|
||||
m_Diags[m_uCount] = d;
|
||||
++m_uCount;
|
||||
}
|
||||
|
||||
void DiagList::Add(unsigned uStartPosA, unsigned uStartPosB, unsigned uLength)
|
||||
{
|
||||
Diag d;
|
||||
d.m_uStartPosA = uStartPosA;
|
||||
d.m_uStartPosB = uStartPosB;
|
||||
d.m_uLength = uLength;
|
||||
Add(d);
|
||||
}
|
||||
|
||||
const Diag &DiagList::Get(unsigned uIndex) const
|
||||
{
|
||||
if (uIndex >= m_uCount)
|
||||
Quit("DiagList::Get(%u), count=%u", uIndex, m_uCount);
|
||||
return m_Diags[uIndex];
|
||||
}
|
||||
|
||||
void DiagList::LogMe() const
|
||||
{
|
||||
Log("DiagList::LogMe, count=%u\n", m_uCount);
|
||||
Log(" n StartA StartB Length\n");
|
||||
Log("--- ------ ------ ------\n");
|
||||
for (unsigned n = 0; n < m_uCount; ++n)
|
||||
{
|
||||
const Diag &d = m_Diags[n];
|
||||
Log("%3u %6u %6u %6u\n",
|
||||
n, d.m_uStartPosA, d.m_uStartPosB, d.m_uLength);
|
||||
}
|
||||
}
|
||||
|
||||
void DiagList::FromPath(const PWPath &Path)
|
||||
{
|
||||
Clear();
|
||||
|
||||
const unsigned uEdgeCount = Path.GetEdgeCount();
|
||||
unsigned uLength = 0;
|
||||
unsigned uStartPosA;
|
||||
unsigned uStartPosB;
|
||||
for (unsigned uEdgeIndex = 0; uEdgeIndex < uEdgeCount; ++uEdgeIndex)
|
||||
{
|
||||
const PWEdge &Edge = Path.GetEdge(uEdgeIndex);
|
||||
|
||||
// Typical cases
|
||||
if (Edge.cType == 'M')
|
||||
{
|
||||
if (0 == uLength)
|
||||
{
|
||||
uStartPosA = Edge.uPrefixLengthA - 1;
|
||||
uStartPosB = Edge.uPrefixLengthB - 1;
|
||||
}
|
||||
++uLength;
|
||||
}
|
||||
else
|
||||
{
|
||||
if (uLength >= g_uMinDiagLength)
|
||||
Add(uStartPosA, uStartPosB, uLength);
|
||||
uLength = 0;
|
||||
}
|
||||
}
|
||||
|
||||
// Special case for last edge
|
||||
if (uLength >= g_uMinDiagLength)
|
||||
Add(uStartPosA, uStartPosB, uLength);
|
||||
}
|
||||
|
||||
bool DiagList::NonZeroIntersection(const Diag &d) const
|
||||
{
|
||||
for (unsigned n = 0; n < m_uCount; ++n)
|
||||
{
|
||||
const Diag &d2 = m_Diags[n];
|
||||
if (DiagOverlap(d, d2) > 0)
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
// DialogOverlap returns the length of the overlapping
|
||||
// section of the two diagonals along the diagonals
|
||||
// themselves; in other words, the length of
|
||||
// the intersection of the two sets of cells in
|
||||
// the matrix.
|
||||
unsigned DiagOverlap(const Diag &d1, const Diag &d2)
|
||||
{
|
||||
// Determine where the diagonals intersect the A
|
||||
// axis (extending them if required). If they
|
||||
// intersect at different points, they do not
|
||||
// overlap. Coordinates on a diagonal are
|
||||
// given by B = A + c where c is the value of
|
||||
// A at the intersection with the A axis.
|
||||
// Hence, c = B - A for any point on the diagonal.
|
||||
int c1 = (int) d1.m_uStartPosB - (int) d1.m_uStartPosA;
|
||||
int c2 = (int) d2.m_uStartPosB - (int) d2.m_uStartPosA;
|
||||
if (c1 != c2)
|
||||
return 0;
|
||||
|
||||
assert(DiagOverlapA(d1, d2) == DiagOverlapB(d1, d2));
|
||||
return DiagOverlapA(d1, d2);
|
||||
}
|
||||
|
||||
// DialogOverlapA returns the length of the overlapping
|
||||
// section of the projection of the two diagonals onto
|
||||
// the A axis.
|
||||
unsigned DiagOverlapA(const Diag &d1, const Diag &d2)
|
||||
{
|
||||
unsigned uMaxStart = MAX(d1.m_uStartPosA, d2.m_uStartPosA);
|
||||
unsigned uMinEnd = MIN(d1.m_uStartPosA + d1.m_uLength - 1,
|
||||
d2.m_uStartPosA + d2.m_uLength - 1);
|
||||
|
||||
int iLength = (int) uMinEnd - (int) uMaxStart + 1;
|
||||
if (iLength < 0)
|
||||
return 0;
|
||||
return (unsigned) iLength;
|
||||
}
|
||||
|
||||
// DialogOverlapB returns the length of the overlapping
|
||||
// section of the projection of the two diagonals onto
|
||||
// the B axis.
|
||||
unsigned DiagOverlapB(const Diag &d1, const Diag &d2)
|
||||
{
|
||||
unsigned uMaxStart = MAX(d1.m_uStartPosB, d2.m_uStartPosB);
|
||||
unsigned uMinEnd = MIN(d1.m_uStartPosB + d1.m_uLength - 1,
|
||||
d2.m_uStartPosB + d2.m_uLength - 1);
|
||||
|
||||
int iLength = (int) uMinEnd - (int) uMaxStart + 1;
|
||||
if (iLength < 0)
|
||||
return 0;
|
||||
return (unsigned) iLength;
|
||||
}
|
||||
|
||||
// Returns true if the two diagonals can be on the
|
||||
// same path through the DP matrix. If DiagCompatible
|
||||
// returns false, they cannot be in the same path
|
||||
// and hence "contradict" each other.
|
||||
bool DiagCompatible(const Diag &d1, const Diag &d2)
|
||||
{
|
||||
if (DiagOverlap(d1, d2) > 0)
|
||||
return true;
|
||||
return 0 == DiagOverlapA(d1, d2) && 0 == DiagOverlapB(d1, d2);
|
||||
}
|
||||
|
||||
// Returns the length of the "break" between two diagonals.
|
||||
unsigned DiagBreak(const Diag &d1, const Diag &d2)
|
||||
{
|
||||
int c1 = (int) d1.m_uStartPosB - (int) d1.m_uStartPosA;
|
||||
int c2 = (int) d2.m_uStartPosB - (int) d2.m_uStartPosA;
|
||||
if (c1 != c2)
|
||||
return 0;
|
||||
|
||||
int iMaxStart = MAX(d1.m_uStartPosA, d2.m_uStartPosA);
|
||||
int iMinEnd = MIN(d1.m_uStartPosA + d1.m_uLength - 1,
|
||||
d2.m_uStartPosA + d1.m_uLength - 1);
|
||||
int iBreak = iMaxStart - iMinEnd - 1;
|
||||
if (iBreak < 0)
|
||||
return 0;
|
||||
return (unsigned) iBreak;
|
||||
}
|
||||
|
||||
// Merge diagonals that are continuations of each other with
|
||||
// short breaks of up to length g_uMaxDiagBreak.
|
||||
// In a sorted list of diagonals, we only have to check
|
||||
// consecutive entries.
|
||||
void MergeDiags(DiagList &DL)
|
||||
{
|
||||
return;
|
||||
#if DEBUG
|
||||
if (!DL.IsSorted())
|
||||
Quit("MergeDiags: !IsSorted");
|
||||
#endif
|
||||
|
||||
// TODO: Fix this!
|
||||
// Breaks must be with no offset (no gaps)
|
||||
const unsigned uCount = DL.GetCount();
|
||||
if (uCount <= 1)
|
||||
return;
|
||||
|
||||
DiagList NewList;
|
||||
|
||||
Diag MergedDiag;
|
||||
const Diag *ptrPrev = &DL.Get(0);
|
||||
for (unsigned i = 1; i < uCount; ++i)
|
||||
{
|
||||
const Diag *ptrDiag = &DL.Get(i);
|
||||
unsigned uBreakLength = DiagBreak(*ptrPrev, *ptrDiag);
|
||||
if (uBreakLength <= g_uMaxDiagBreak)
|
||||
{
|
||||
MergedDiag.m_uStartPosA = ptrPrev->m_uStartPosA;
|
||||
MergedDiag.m_uStartPosB = ptrPrev->m_uStartPosB;
|
||||
MergedDiag.m_uLength = ptrPrev->m_uLength + ptrDiag->m_uLength
|
||||
+ uBreakLength;
|
||||
ptrPrev = &MergedDiag;
|
||||
}
|
||||
else
|
||||
{
|
||||
NewList.Add(*ptrPrev);
|
||||
ptrPrev = ptrDiag;
|
||||
}
|
||||
}
|
||||
NewList.Add(*ptrPrev);
|
||||
DL.Copy(NewList);
|
||||
}
|
||||
|
||||
void DiagList::DeleteIncompatible()
|
||||
{
|
||||
assert(IsSorted());
|
||||
|
||||
if (m_uCount < 2)
|
||||
return;
|
||||
|
||||
bool *bFlagForDeletion = new bool[m_uCount];
|
||||
for (unsigned i = 0; i < m_uCount; ++i)
|
||||
bFlagForDeletion[i] = false;
|
||||
|
||||
for (unsigned i = 0; i < m_uCount; ++i)
|
||||
{
|
||||
const Diag &di = m_Diags[i];
|
||||
for (unsigned j = i + 1; j < m_uCount; ++j)
|
||||
{
|
||||
const Diag &dj = m_Diags[j];
|
||||
|
||||
// Verify sorted correctly
|
||||
assert(di.m_uStartPosA <= dj.m_uStartPosA);
|
||||
|
||||
// If two diagonals are incompatible and
|
||||
// one is is much longer than the other,
|
||||
// keep the longer one.
|
||||
if (!DiagCompatible(di, dj))
|
||||
{
|
||||
if (di.m_uLength > dj.m_uLength*4)
|
||||
bFlagForDeletion[j] = true;
|
||||
else if (dj.m_uLength > di.m_uLength*4)
|
||||
bFlagForDeletion[i] = true;
|
||||
else
|
||||
{
|
||||
bFlagForDeletion[i] = true;
|
||||
bFlagForDeletion[j] = true;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
for (unsigned i = 0; i < m_uCount; ++i)
|
||||
{
|
||||
const Diag &di = m_Diags[i];
|
||||
if (bFlagForDeletion[i])
|
||||
continue;
|
||||
|
||||
for (unsigned j = i + 1; j < m_uCount; ++j)
|
||||
{
|
||||
const Diag &dj = m_Diags[j];
|
||||
if (bFlagForDeletion[j])
|
||||
continue;
|
||||
|
||||
// Verify sorted correctly
|
||||
assert(di.m_uStartPosA <= dj.m_uStartPosA);
|
||||
|
||||
// If sort order in B different from sorted order in A,
|
||||
// either diags are incompatible or we detected a repeat
|
||||
// or permutation.
|
||||
if (di.m_uStartPosB >= dj.m_uStartPosB || !DiagCompatible(di, dj))
|
||||
{
|
||||
bFlagForDeletion[i] = true;
|
||||
bFlagForDeletion[j] = true;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
unsigned uNewCount = 0;
|
||||
Diag *NewDiags = new Diag[m_uCount];
|
||||
for (unsigned i = 0; i < m_uCount; ++i)
|
||||
{
|
||||
if (bFlagForDeletion[i])
|
||||
continue;
|
||||
|
||||
const Diag &d = m_Diags[i];
|
||||
NewDiags[uNewCount] = d;
|
||||
++uNewCount;
|
||||
}
|
||||
memcpy(m_Diags, NewDiags, uNewCount*sizeof(Diag));
|
||||
m_uCount = uNewCount;
|
||||
delete[] NewDiags;
|
||||
}
|
||||
|
||||
void DiagList::Copy(const DiagList &DL)
|
||||
{
|
||||
Clear();
|
||||
unsigned uCount = DL.GetCount();
|
||||
for (unsigned i = 0; i < uCount; ++i)
|
||||
Add(DL.Get(i));
|
||||
}
|
||||
|
||||
// Check if sorted in increasing order of m_uStartPosA
|
||||
bool DiagList::IsSorted() const
|
||||
{
|
||||
return true;
|
||||
unsigned uCount = GetCount();
|
||||
for (unsigned i = 1; i < uCount; ++i)
|
||||
if (m_Diags[i-1].m_uStartPosA > m_Diags[i].m_uStartPosA)
|
||||
return false;
|
||||
return true;
|
||||
}
|
||||
|
||||
// Sort in increasing order of m_uStartPosA
|
||||
// Dumb bubble sort, but don't care about speed
|
||||
// because don't get long lists.
|
||||
void DiagList::Sort()
|
||||
{
|
||||
if (m_uCount < 2)
|
||||
return;
|
||||
|
||||
bool bContinue = true;
|
||||
while (bContinue)
|
||||
{
|
||||
bContinue = false;
|
||||
for (unsigned i = 0; i < m_uCount - 1; ++i)
|
||||
{
|
||||
if (m_Diags[i].m_uStartPosA > m_Diags[i+1].m_uStartPosA)
|
||||
{
|
||||
Diag Tmp = m_Diags[i];
|
||||
m_Diags[i] = m_Diags[i+1];
|
||||
m_Diags[i+1] = Tmp;
|
||||
bContinue = true;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
//void TestDiag()
|
||||
// {
|
||||
// Diag d1;
|
||||
// Diag d2;
|
||||
// Diag d3;
|
||||
//
|
||||
// d1.m_uStartPosA = 0;
|
||||
// d1.m_uStartPosB = 1;
|
||||
// d1.m_uLength = 32;
|
||||
//
|
||||
// d2.m_uStartPosA = 55;
|
||||
// d2.m_uStartPosB = 70;
|
||||
// d2.m_uLength = 36;
|
||||
//
|
||||
// d3.m_uStartPosA = 102;
|
||||
// d3.m_uStartPosB = 122;
|
||||
// d3.m_uLength = 50;
|
||||
//
|
||||
// DiagList DL;
|
||||
// DL.Add(d1);
|
||||
// DL.Add(d2);
|
||||
// DL.Add(d3);
|
||||
//
|
||||
// Log("Before DeleteIncompatible:\n");
|
||||
// DL.LogMe();
|
||||
// DL.DeleteIncompatible();
|
||||
//
|
||||
// Log("After DeleteIncompatible:\n");
|
||||
// DL.LogMe();
|
||||
//
|
||||
// MergeDiags(DL);
|
||||
// Log("After Merge:\n");
|
||||
// DL.LogMe();
|
||||
//
|
||||
// DPRegionList RL;
|
||||
// DiagListToDPRegionList(DL, RL, 200, 200);
|
||||
// RL.LogMe();
|
||||
// }
|
||||
89
src/muscle/muscle3.8.31/src/diaglist.h
Normal file
89
src/muscle/muscle3.8.31/src/diaglist.h
Normal file
@@ -0,0 +1,89 @@
|
||||
#ifndef diaglist_h
|
||||
#define diaglist_h
|
||||
|
||||
const unsigned EMPTY = (unsigned) ~0;
|
||||
const unsigned MAX_DIAGS = 1024;
|
||||
|
||||
struct Diag
|
||||
{
|
||||
unsigned m_uStartPosA;
|
||||
unsigned m_uStartPosB;
|
||||
unsigned m_uLength;
|
||||
};
|
||||
|
||||
struct Rect
|
||||
{
|
||||
unsigned m_uStartPosA;
|
||||
unsigned m_uStartPosB;
|
||||
unsigned m_uLengthA;
|
||||
unsigned m_uLengthB;
|
||||
};
|
||||
|
||||
class DiagList
|
||||
{
|
||||
public:
|
||||
DiagList()
|
||||
{
|
||||
m_uCount = 0;
|
||||
}
|
||||
~DiagList()
|
||||
{
|
||||
Free();
|
||||
}
|
||||
|
||||
public:
|
||||
// Creation
|
||||
void Clear()
|
||||
{
|
||||
Free();
|
||||
}
|
||||
void FromPath(const PWPath &Path);
|
||||
void Add(const Diag &d);
|
||||
void Add(unsigned uStartPosA, unsigned uStartPosB, unsigned uLength);
|
||||
void DeleteIncompatible();
|
||||
|
||||
// Accessors
|
||||
unsigned GetCount() const
|
||||
{
|
||||
return m_uCount;
|
||||
}
|
||||
const Diag &Get(unsigned uIndex) const;
|
||||
|
||||
// Operations
|
||||
void Sort();
|
||||
void Copy(const DiagList &DL);
|
||||
|
||||
// Query
|
||||
// returns true iff given diagonal is included in the list
|
||||
// in whole or in part.
|
||||
bool NonZeroIntersection(const Diag &d) const;
|
||||
bool IsSorted() const;
|
||||
|
||||
// Diagnostics
|
||||
void LogMe() const;
|
||||
|
||||
private:
|
||||
void Free()
|
||||
{
|
||||
m_uCount = 0;
|
||||
}
|
||||
|
||||
private:
|
||||
unsigned m_uCount;
|
||||
Diag m_Diags[MAX_DIAGS];
|
||||
};
|
||||
|
||||
unsigned DiagOverlap(const Diag &d1, const Diag &d2);
|
||||
unsigned DiagOverlapA(const Diag &d1, const Diag &d2);
|
||||
unsigned DiagOverlapB(const Diag &d1, const Diag &d2);
|
||||
unsigned DiagBreak(const Diag &d1, const Diag &d2);
|
||||
bool DiagCompatible(const Diag &d1, const Diag &d2);
|
||||
void CheckDiags(const ProfPos *PA, unsigned uLengthA, const ProfPos *PB,
|
||||
unsigned uLengthB, const MSA &msaA, const MSA &msaB, const PWPath &Path);
|
||||
void FindDiags(const ProfPos *PX, unsigned uLengthX, const ProfPos *PY,
|
||||
unsigned uLengthY, DiagList &DL);
|
||||
void FindDiagsNuc(const ProfPos *PX, unsigned uLengthX, const ProfPos *PY,
|
||||
unsigned uLengthY, DiagList &DL);
|
||||
void MergeDiags(DiagList &DL);
|
||||
|
||||
#endif // diaglist_h
|
||||
162
src/muscle/muscle3.8.31/src/diffobjscore.cpp
Normal file
162
src/muscle/muscle3.8.31/src/diffobjscore.cpp
Normal file
@@ -0,0 +1,162 @@
|
||||
#include "muscle.h"
|
||||
#include "msa.h"
|
||||
#include "objscore.h"
|
||||
#include "profile.h"
|
||||
|
||||
#define TRACE 0
|
||||
#define COMPARE_3_52 0
|
||||
#define BRUTE_LETTERS 0
|
||||
|
||||
static SCORE ScoreColLetters(const MSA &msa, unsigned uColIndex)
|
||||
{
|
||||
SCOREMATRIX &Mx = *g_ptrScoreMatrix;
|
||||
const unsigned uSeqCount = msa.GetSeqCount();
|
||||
|
||||
#if BRUTE_LETTERS
|
||||
SCORE BruteScore = 0;
|
||||
for (unsigned uSeqIndex1 = 0; uSeqIndex1 < uSeqCount; ++uSeqIndex1)
|
||||
{
|
||||
unsigned uLetter1 = msa.GetLetterEx(uSeqIndex1, uColIndex);
|
||||
if (uLetter1 >= g_AlphaSize)
|
||||
continue;
|
||||
WEIGHT w1 = msa.GetSeqWeight(uSeqIndex1);
|
||||
for (unsigned uSeqIndex2 = uSeqIndex1 + 1; uSeqIndex2 < uSeqCount; ++uSeqIndex2)
|
||||
{
|
||||
unsigned uLetter2 = msa.GetLetterEx(uSeqIndex2, uColIndex);
|
||||
if (uLetter2 >= g_AlphaSize)
|
||||
continue;
|
||||
WEIGHT w2 = msa.GetSeqWeight(uSeqIndex2);
|
||||
BruteScore += w1*w2*Mx[uLetter1][uLetter2];
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
double N = 0;
|
||||
for (unsigned uSeqIndex1 = 0; uSeqIndex1 < uSeqCount; ++uSeqIndex1)
|
||||
{
|
||||
WEIGHT w = msa.GetSeqWeight(uSeqIndex1);
|
||||
N += w;
|
||||
}
|
||||
if (N <= 0)
|
||||
return 0;
|
||||
|
||||
FCOUNT Freqs[20];
|
||||
memset(Freqs, 0, sizeof(Freqs));
|
||||
SCORE Score = 0;
|
||||
for (unsigned uSeqIndex1 = 0; uSeqIndex1 < uSeqCount; ++uSeqIndex1)
|
||||
{
|
||||
unsigned uLetter = msa.GetLetterEx(uSeqIndex1, uColIndex);
|
||||
if (uLetter >= g_AlphaSize)
|
||||
continue;
|
||||
WEIGHT w = msa.GetSeqWeight(uSeqIndex1);
|
||||
Freqs[uLetter] += w;
|
||||
Score -= w*w*Mx[uLetter][uLetter];
|
||||
}
|
||||
|
||||
for (unsigned uLetter1 = 0; uLetter1 < g_AlphaSize; ++uLetter1)
|
||||
{
|
||||
const FCOUNT f1 = Freqs[uLetter1];
|
||||
Score += f1*f1*Mx[uLetter1][uLetter1];
|
||||
for (unsigned uLetter2 = uLetter1 + 1; uLetter2 < g_AlphaSize; ++uLetter2)
|
||||
{
|
||||
const FCOUNT f2 = Freqs[uLetter2];
|
||||
Score += 2*f1*f2*Mx[uLetter1][uLetter2];
|
||||
}
|
||||
}
|
||||
Score /= 2;
|
||||
#if BRUTE_LETTERS
|
||||
assert(BTEq(BruteScore, Score));
|
||||
#endif
|
||||
return Score;
|
||||
}
|
||||
|
||||
static SCORE ScoreLetters(const MSA &msa, const unsigned Edges[],
|
||||
unsigned uEdgeCount)
|
||||
{
|
||||
const unsigned uSeqCount = msa.GetSeqCount();
|
||||
const unsigned uColCount = msa.GetColCount();
|
||||
|
||||
// Letters
|
||||
SCORE Score = 0;
|
||||
for (unsigned uEdgeIndex = 0; uEdgeIndex < uEdgeCount; ++uEdgeIndex)
|
||||
{
|
||||
const unsigned uColIndex = Edges[uEdgeIndex];
|
||||
assert(uColIndex < uColCount);
|
||||
Score += ScoreColLetters(msa, uColIndex);
|
||||
}
|
||||
return Score;
|
||||
}
|
||||
|
||||
void GetLetterScores(const MSA &msa, SCORE Scores[])
|
||||
{
|
||||
const unsigned uColCount = msa.GetColCount();
|
||||
for (unsigned uColIndex = 0; uColIndex < uColCount; ++uColIndex)
|
||||
Scores[uColIndex] = ScoreColLetters(msa, uColIndex);
|
||||
}
|
||||
|
||||
SCORE DiffObjScore(
|
||||
const MSA &msa1, const PWPath &Path1, const unsigned Edges1[], unsigned uEdgeCount1,
|
||||
const MSA &msa2, const PWPath &Path2, const unsigned Edges2[], unsigned uEdgeCount2)
|
||||
{
|
||||
#if TRACE
|
||||
{
|
||||
Log("============DiffObjScore===========\n");
|
||||
Log("msa1:\n");
|
||||
msa1.LogMe();
|
||||
Log("\n");
|
||||
Log("Cols1: ");
|
||||
for (unsigned i = 0; i < uEdgeCount1; ++i)
|
||||
Log(" %u", Edges1[i]);
|
||||
Log("\n\n");
|
||||
Log("msa2:\n");
|
||||
msa2.LogMe();
|
||||
Log("Cols2: ");
|
||||
for (unsigned i = 0; i < uEdgeCount2; ++i)
|
||||
Log(" %u", Edges2[i]);
|
||||
Log("\n\n");
|
||||
}
|
||||
#endif
|
||||
|
||||
#if COMPARE_3_52
|
||||
extern SCORE g_SPScoreLetters;
|
||||
extern SCORE g_SPScoreGaps;
|
||||
SCORE SP1 = ObjScoreSP(msa1);
|
||||
SCORE SPLetters1 = g_SPScoreLetters;
|
||||
SCORE SPGaps1 = g_SPScoreGaps;
|
||||
|
||||
SCORE SP2 = ObjScoreSP(msa2);
|
||||
SCORE SPLetters2 = g_SPScoreLetters;
|
||||
SCORE SPGaps2 = g_SPScoreGaps;
|
||||
SCORE SPDiffLetters = SPLetters2 - SPLetters1;
|
||||
SCORE SPDiffGaps = SPGaps2 - SPGaps1;
|
||||
SCORE SPDiff = SPDiffLetters + SPDiffGaps;
|
||||
#endif
|
||||
|
||||
SCORE Letters1 = ScoreLetters(msa1, Edges1, uEdgeCount1);
|
||||
SCORE Letters2 = ScoreLetters(msa2, Edges2, uEdgeCount2);
|
||||
|
||||
SCORE Gaps1 = ScoreGaps(msa1, Edges1, uEdgeCount1);
|
||||
SCORE Gaps2 = ScoreGaps(msa2, Edges2, uEdgeCount2);
|
||||
|
||||
SCORE DiffLetters = Letters2 - Letters1;
|
||||
SCORE DiffGaps = Gaps2 - Gaps1;
|
||||
SCORE Diff = DiffLetters + DiffGaps;
|
||||
|
||||
#if COMPARE_3_52
|
||||
Log("ObjScoreSP Letters1=%.4g Letters2=%.4g DiffLetters=%.4g\n",
|
||||
SPLetters1, SPLetters2, SPDiffLetters);
|
||||
|
||||
Log("DiffObjScore Letters1=%.4g Letters2=%.4g DiffLetters=%.4g\n",
|
||||
Letters1, Letters2, DiffLetters);
|
||||
|
||||
Log("ObjScoreSP Gaps1=%.4g Gaps2=%.4g DiffGaps=%.4g\n",
|
||||
SPGaps1, SPGaps2, SPDiffGaps);
|
||||
|
||||
Log("DiffObjScore Gaps1=%.4g Gaps2=%.4g DiffGaps=%.4g\n",
|
||||
Gaps1, Gaps2, DiffGaps);
|
||||
|
||||
Log("SP diff=%.4g DiffObjScore Diff=%.4g\n", SPDiff, Diff);
|
||||
#endif
|
||||
|
||||
return Diff;
|
||||
}
|
||||
114
src/muscle/muscle3.8.31/src/diffpaths.cpp
Normal file
114
src/muscle/muscle3.8.31/src/diffpaths.cpp
Normal file
@@ -0,0 +1,114 @@
|
||||
#include "muscle.h"
|
||||
#include "pwpath.h"
|
||||
|
||||
#define TRACE 0
|
||||
|
||||
void DiffPaths(const PWPath &p1, const PWPath &p2, unsigned Edges1[],
|
||||
unsigned *ptruDiffCount1, unsigned Edges2[], unsigned *ptruDiffCount2)
|
||||
{
|
||||
#if TRACE
|
||||
Log("DiffPaths\n");
|
||||
Log("p1=");
|
||||
p1.LogMe();
|
||||
Log("p2=");
|
||||
p2.LogMe();
|
||||
#endif
|
||||
const unsigned uEdgeCount1 = p1.GetEdgeCount();
|
||||
const unsigned uEdgeCount2 = p2.GetEdgeCount();
|
||||
|
||||
unsigned uDiffCount1 = 0;
|
||||
unsigned uDiffCount2 = 0;
|
||||
unsigned uEdgeIndex1 = 0;
|
||||
unsigned uEdgeIndex2 = 0;
|
||||
const PWEdge *Edge1 = &p1.GetEdge(uEdgeIndex1);
|
||||
const PWEdge *Edge2 = &p2.GetEdge(uEdgeIndex2);
|
||||
for (;;)
|
||||
{
|
||||
unsigned uEdgeIndexTop1 = uEdgeIndex1;
|
||||
unsigned uEdgeIndexTop2 = uEdgeIndex2;
|
||||
Edge1 = &p1.GetEdge(uEdgeIndex1);
|
||||
Edge2 = &p2.GetEdge(uEdgeIndex2);
|
||||
#if TRACE
|
||||
Log("e1[%u] PLA%u PLB%u %c, e2[%u] PLA%u PLB %u %c DC1=%u DC2=%u\n",
|
||||
uEdgeIndex1, Edge1->uPrefixLengthA, Edge1->uPrefixLengthB, Edge1->cType,
|
||||
uEdgeIndex2, Edge2->uPrefixLengthA, Edge2->uPrefixLengthB, Edge2->cType,
|
||||
uDiffCount1, uDiffCount2);
|
||||
#endif
|
||||
if (Edge1->uPrefixLengthA == Edge2->uPrefixLengthA &&
|
||||
Edge1->uPrefixLengthB == Edge2->uPrefixLengthB)
|
||||
{
|
||||
if (!Edge1->Equal(*Edge2))
|
||||
{
|
||||
Edges1[uDiffCount1++] = uEdgeIndex1;
|
||||
Edges2[uDiffCount2++] = uEdgeIndex2;
|
||||
}
|
||||
++uEdgeIndex1;
|
||||
++uEdgeIndex2;
|
||||
}
|
||||
|
||||
else if (Edge2->uPrefixLengthA < Edge1->uPrefixLengthA ||
|
||||
Edge2->uPrefixLengthB < Edge1->uPrefixLengthB)
|
||||
Edges2[uDiffCount2++] = uEdgeIndex2++;
|
||||
|
||||
else if (Edge1->uPrefixLengthA < Edge2->uPrefixLengthA ||
|
||||
Edge1->uPrefixLengthB < Edge2->uPrefixLengthB)
|
||||
Edges1[uDiffCount1++] = uEdgeIndex1++;
|
||||
|
||||
if (uEdgeCount1 == uEdgeIndex1)
|
||||
{
|
||||
while (uEdgeIndex2 < uEdgeCount2)
|
||||
Edges2[uDiffCount2++] = uEdgeIndex2++;
|
||||
goto Done;
|
||||
}
|
||||
if (uEdgeCount2 == uEdgeIndex2)
|
||||
{
|
||||
while (uEdgeIndex1 < uEdgeCount1)
|
||||
Edges1[uDiffCount1++] = uEdgeIndex1++;
|
||||
goto Done;
|
||||
}
|
||||
if (uEdgeIndex1 == uEdgeIndexTop1 && uEdgeIndex2 == uEdgeIndexTop2)
|
||||
Quit("DiffPaths stuck");
|
||||
}
|
||||
Done:;
|
||||
#if TRACE
|
||||
Log("DiffCount1=%u (%u %u)\n", uDiffCount1, uEdgeCount1, uEdgeCount2);
|
||||
Log("Diffs1=");
|
||||
for (unsigned i = 0; i < uDiffCount1; ++i)
|
||||
{
|
||||
const PWEdge e = p1.GetEdge(Edges1[i]);
|
||||
Log(" %u=%c%u.%u", Edges1[i], e.cType, e.uPrefixLengthA, e.uPrefixLengthB);
|
||||
}
|
||||
Log("\n");
|
||||
Log("DiffCount2=%u\n", uDiffCount2);
|
||||
Log("Diffs2=");
|
||||
for (unsigned i = 0; i < uDiffCount2; ++i)
|
||||
{
|
||||
const PWEdge e = p2.GetEdge(Edges2[i]);
|
||||
Log(" %u=%c%u.%u", Edges2[i], e.cType, e.uPrefixLengthA, e.uPrefixLengthB);
|
||||
}
|
||||
Log("\n");
|
||||
#endif
|
||||
*ptruDiffCount1 = uDiffCount1;
|
||||
*ptruDiffCount2 = uDiffCount2;
|
||||
}
|
||||
|
||||
void TestDiffPaths()
|
||||
{
|
||||
PWPath p1;
|
||||
PWPath p2;
|
||||
|
||||
p1.AppendEdge('M', 1, 1);
|
||||
p1.AppendEdge('M', 2, 2);
|
||||
p1.AppendEdge('M', 3, 3);
|
||||
|
||||
p2.AppendEdge('M', 1, 1);
|
||||
p2.AppendEdge('D', 2, 1);
|
||||
p2.AppendEdge('I', 2, 2);
|
||||
p2.AppendEdge('M', 3, 3);
|
||||
|
||||
unsigned Edges1[64];
|
||||
unsigned Edges2[64];
|
||||
unsigned uDiffCount1;
|
||||
unsigned uDiffCount2;
|
||||
DiffPaths(p1, p2, Edges1, &uDiffCount1, Edges2, &uDiffCount2);
|
||||
}
|
||||
381
src/muscle/muscle3.8.31/src/difftrees.cpp
Normal file
381
src/muscle/muscle3.8.31/src/difftrees.cpp
Normal file
@@ -0,0 +1,381 @@
|
||||
#include "muscle.h"
|
||||
#include "tree.h"
|
||||
|
||||
#define TRACE 0
|
||||
|
||||
/***
|
||||
Algorithm to compare two trees, X and Y.
|
||||
|
||||
A node x in X and node y in Y are defined to be
|
||||
similar iff the set of leaves in the subtree under
|
||||
x is identical to the set of leaves under y.
|
||||
|
||||
A node is defined to be dissimilar iff it is not
|
||||
similar to any node in the other tree.
|
||||
|
||||
Nodes x and y are defined to be married iff every
|
||||
node in the subtree under x is similar to a node
|
||||
in the subtree under y. Married nodes are considered
|
||||
to be equal. The subtrees under two married nodes can
|
||||
at most differ by exchanges of left and right branches,
|
||||
which we do not consider to be significant here.
|
||||
|
||||
A node is defined to be a bachelor iff it is not
|
||||
married. If a node is a bachelor, then it has a
|
||||
dissimilar node in its subtree, and it follows
|
||||
immediately from the definition of marriage that its
|
||||
parent is also a bachelor. Hence all nodes on the path
|
||||
from a bachelor node to the root are bachelors.
|
||||
|
||||
We assume the trees have the same set of leaves, so
|
||||
every leaf is trivially both similar and married to
|
||||
the same leaf in the opposite tree. Bachelor nodes
|
||||
are therefore always internal (i.e., non-leaf) nodes.
|
||||
|
||||
A node is defined to be a diff iff (a) it is married
|
||||
and (b) its parent is a bachelor. The subtree under
|
||||
a diff is maximally similar to the other tree. (In
|
||||
other words, you cannot extend the subtree without
|
||||
adding a bachelor).
|
||||
|
||||
The set of diffs is the subset of the two trees that
|
||||
we consider to be identical.
|
||||
|
||||
Example:
|
||||
|
||||
-----A
|
||||
-----k
|
||||
----j -----B
|
||||
--i -----C
|
||||
------D
|
||||
|
||||
|
||||
-----A
|
||||
-----p
|
||||
----n -----B
|
||||
--m -----D
|
||||
------C
|
||||
|
||||
|
||||
The following pairs of internal nodes are similar.
|
||||
|
||||
Nodes Set of leaves
|
||||
----- -------------
|
||||
k,p A,B
|
||||
i,m A,B,C,D
|
||||
|
||||
Bachelors in the first tree are i and j, bachelors
|
||||
in the second tree are m and n.
|
||||
|
||||
Node k and p are married, but i and m are not (because j
|
||||
and n are bachelors). The diffs are C, D and k.
|
||||
|
||||
The set of bachelor nodes can be viewed as the internal
|
||||
nodes of a tree, the leaves of which are diffs. (To see
|
||||
that there can't be disjoint subtrees, note that the path
|
||||
from a diff to a root is all bachelor nodes, so there is
|
||||
always a path between two diffs that goes through the root).
|
||||
We call this tree the "diffs tree".
|
||||
|
||||
There is a simple O(N) algorithm to build the diffs tree.
|
||||
To achieve O(N) we avoid traversing a given subtree multiple
|
||||
times and also avoid comparing lists of leaves.
|
||||
|
||||
We visit nodes in depth-first order (i.e., a node is visited
|
||||
before its parent).
|
||||
|
||||
If either child of a node is a bachelor, we flag it as
|
||||
a bachelor.
|
||||
|
||||
If both children of the node we are visiting are married,
|
||||
we check whether the spouses of those children have the
|
||||
same parent in the other tree. If the parents are different,
|
||||
the current node is a bachelor. If they have the same parent,
|
||||
then the node we are visiting is the spouse of that parent.
|
||||
We assign this newly identified married couple a unique integer
|
||||
id. The id of a node is in one-to-one correspondence with the
|
||||
set of leaves in its subtree. Two nodes have the same set of
|
||||
leaves iff they have the same id. Bachelor nodes do not get
|
||||
an id.
|
||||
***/
|
||||
|
||||
static void BuildDiffs(const Tree &tree, unsigned uTreeNodeIndex,
|
||||
const bool bIsDiff[], Tree &Diffs, unsigned uDiffsNodeIndex,
|
||||
unsigned IdToDiffsLeafNodeIndex[])
|
||||
{
|
||||
#if TRACE
|
||||
Log("BuildDiffs(TreeNode=%u IsDiff=%d IsLeaf=%d)\n",
|
||||
uTreeNodeIndex, bIsDiff[uTreeNodeIndex], tree.IsLeaf(uTreeNodeIndex));
|
||||
#endif
|
||||
if (bIsDiff[uTreeNodeIndex])
|
||||
{
|
||||
unsigned uLeafCount = tree.GetLeafCount();
|
||||
unsigned *Leaves = new unsigned[uLeafCount];
|
||||
GetLeaves(tree, uTreeNodeIndex, Leaves, &uLeafCount);
|
||||
for (unsigned n = 0; n < uLeafCount; ++n)
|
||||
{
|
||||
const unsigned uLeafNodeIndex = Leaves[n];
|
||||
const unsigned uId = tree.GetLeafId(uLeafNodeIndex);
|
||||
if (uId >= tree.GetLeafCount())
|
||||
Quit("BuildDiffs, id out of range");
|
||||
IdToDiffsLeafNodeIndex[uId] = uDiffsNodeIndex;
|
||||
#if TRACE
|
||||
Log(" Leaf id=%u DiffsNode=%u\n", uId, uDiffsNodeIndex);
|
||||
#endif
|
||||
}
|
||||
delete[] Leaves;
|
||||
return;
|
||||
}
|
||||
|
||||
if (tree.IsLeaf(uTreeNodeIndex))
|
||||
Quit("BuildDiffs: should never reach leaf");
|
||||
|
||||
const unsigned uTreeLeft = tree.GetLeft(uTreeNodeIndex);
|
||||
const unsigned uTreeRight = tree.GetRight(uTreeNodeIndex);
|
||||
|
||||
const unsigned uDiffsLeft = Diffs.AppendBranch(uDiffsNodeIndex);
|
||||
const unsigned uDiffsRight = uDiffsLeft + 1;
|
||||
|
||||
BuildDiffs(tree, uTreeLeft, bIsDiff, Diffs, uDiffsLeft, IdToDiffsLeafNodeIndex);
|
||||
BuildDiffs(tree, uTreeRight, bIsDiff, Diffs, uDiffsRight, IdToDiffsLeafNodeIndex);
|
||||
}
|
||||
|
||||
void DiffTrees(const Tree &Tree1, const Tree &Tree2, Tree &Diffs,
|
||||
unsigned IdToDiffsLeafNodeIndex[])
|
||||
{
|
||||
#if TRACE
|
||||
Log("Tree1:\n");
|
||||
Tree1.LogMe();
|
||||
Log("\n");
|
||||
Log("Tree2:\n");
|
||||
Tree2.LogMe();
|
||||
#endif
|
||||
|
||||
if (!Tree1.IsRooted() || !Tree2.IsRooted())
|
||||
Quit("DiffTrees: requires rooted trees");
|
||||
|
||||
const unsigned uNodeCount = Tree1.GetNodeCount();
|
||||
const unsigned uNodeCount2 = Tree2.GetNodeCount();
|
||||
|
||||
const unsigned uLeafCount = Tree1.GetLeafCount();
|
||||
const unsigned uLeafCount2 = Tree2.GetLeafCount();
|
||||
assert(uLeafCount == uLeafCount2);
|
||||
|
||||
if (uNodeCount != uNodeCount2)
|
||||
Quit("DiffTrees: different node counts");
|
||||
|
||||
// Allocate tables so we can convert tree node index to
|
||||
// and from the unique id with a O(1) lookup.
|
||||
unsigned *NodeIndexToId1 = new unsigned[uNodeCount];
|
||||
unsigned *IdToNodeIndex2 = new unsigned[uNodeCount];
|
||||
|
||||
bool *bIsBachelor1 = new bool[uNodeCount];
|
||||
bool *bIsDiff1 = new bool[uNodeCount];
|
||||
|
||||
for (unsigned uNodeIndex = 0; uNodeIndex < uNodeCount; ++uNodeIndex)
|
||||
{
|
||||
NodeIndexToId1[uNodeIndex] = uNodeCount;
|
||||
bIsBachelor1[uNodeIndex] = false;
|
||||
bIsDiff1[uNodeIndex] = false;
|
||||
|
||||
// Use uNodeCount as value meaning "not set".
|
||||
IdToNodeIndex2[uNodeIndex] = uNodeCount;
|
||||
}
|
||||
|
||||
// Initialize node index <-> id lookup tables
|
||||
for (unsigned uNodeIndex = 0; uNodeIndex < uNodeCount; ++uNodeIndex)
|
||||
{
|
||||
if (Tree1.IsLeaf(uNodeIndex))
|
||||
{
|
||||
const unsigned uId = Tree1.GetLeafId(uNodeIndex);
|
||||
if (uId >= uNodeCount)
|
||||
Quit("Diff trees requires existing leaf ids in range 0 .. (N-1)");
|
||||
NodeIndexToId1[uNodeIndex] = uId;
|
||||
}
|
||||
|
||||
if (Tree2.IsLeaf(uNodeIndex))
|
||||
{
|
||||
const unsigned uId = Tree2.GetLeafId(uNodeIndex);
|
||||
if (uId >= uNodeCount)
|
||||
Quit("Diff trees requires existing leaf ids in range 0 .. (N-1)");
|
||||
IdToNodeIndex2[uId] = uNodeIndex;
|
||||
}
|
||||
}
|
||||
|
||||
// Validity check. This verifies that the ids
|
||||
// pre-assigned to the leaves in Tree1 are unique
|
||||
// (note that the id<N check above does not rule
|
||||
// out two leaves having duplicate ids).
|
||||
for (unsigned uId = 0; uId < uLeafCount; ++uId)
|
||||
{
|
||||
unsigned uNodeIndex2 = IdToNodeIndex2[uId];
|
||||
if (uNodeCount == uNodeIndex2)
|
||||
Quit("DiffTrees, check 2");
|
||||
}
|
||||
|
||||
// Ids assigned to internal nodes are N, N+1 ...
|
||||
// An internal node id uniquely identifies a set
|
||||
// of two or more leaves.
|
||||
unsigned uInternalNodeId = uLeafCount;
|
||||
|
||||
// Depth-first traversal of tree.
|
||||
// The order guarantees that a node is visited before
|
||||
// its parent is visited.
|
||||
for (unsigned uNodeIndex1 = Tree1.FirstDepthFirstNode();
|
||||
NULL_NEIGHBOR != uNodeIndex1;
|
||||
uNodeIndex1 = Tree1.NextDepthFirstNode(uNodeIndex1))
|
||||
{
|
||||
#if TRACE
|
||||
Log("Main loop: Node1=%u IsLeaf=%d IsBachelor=%d\n",
|
||||
uNodeIndex1,
|
||||
Tree1.IsLeaf(uNodeIndex1),
|
||||
bIsBachelor1[uNodeIndex1]);
|
||||
#endif
|
||||
|
||||
// Leaves are trivial; nothing to do.
|
||||
if (Tree1.IsLeaf(uNodeIndex1) || bIsBachelor1[uNodeIndex1])
|
||||
continue;
|
||||
|
||||
// If either child is a bachelor, flag
|
||||
// this node as a bachelor and continue.
|
||||
unsigned uLeft1 = Tree1.GetLeft(uNodeIndex1);
|
||||
if (bIsBachelor1[uLeft1])
|
||||
{
|
||||
bIsBachelor1[uNodeIndex1] = true;
|
||||
continue;
|
||||
}
|
||||
|
||||
unsigned uRight1 = Tree1.GetRight(uNodeIndex1);
|
||||
if (bIsBachelor1[uRight1])
|
||||
{
|
||||
bIsBachelor1[uNodeIndex1] = true;
|
||||
continue;
|
||||
}
|
||||
|
||||
// Both children are married.
|
||||
// Married nodes are guaranteed to have an id.
|
||||
unsigned uIdLeft = NodeIndexToId1[uLeft1];
|
||||
unsigned uIdRight = NodeIndexToId1[uRight1];
|
||||
|
||||
if (uIdLeft == uNodeCount || uIdRight == uNodeCount)
|
||||
Quit("DiffTrees, check 5");
|
||||
|
||||
// uLeft2 is the spouse of uLeft1, and similarly for uRight2.
|
||||
unsigned uLeft2 = IdToNodeIndex2[uIdLeft];
|
||||
unsigned uRight2 = IdToNodeIndex2[uIdRight];
|
||||
|
||||
if (uLeft2 == uNodeCount || uRight2 == uNodeCount)
|
||||
Quit("DiffTrees, check 6");
|
||||
|
||||
// If the spouses of uLeft1 and uRight1 have the same
|
||||
// parent, then this parent is the spouse of uNodeIndex1.
|
||||
// Otherwise, uNodeIndex1 is a diff.
|
||||
unsigned uParentLeft2 = Tree2.GetParent(uLeft2);
|
||||
unsigned uParentRight2 = Tree2.GetParent(uRight2);
|
||||
|
||||
#if TRACE
|
||||
Log("L1=%u R1=%u L2=%u R2=%u PL2=%u PR2=%u\n",
|
||||
uLeft1,
|
||||
uRight1,
|
||||
uLeft2,
|
||||
uRight2,
|
||||
uParentLeft2,
|
||||
uParentRight2);
|
||||
#endif
|
||||
|
||||
if (uParentLeft2 == uParentRight2)
|
||||
{
|
||||
NodeIndexToId1[uNodeIndex1] = uInternalNodeId;
|
||||
IdToNodeIndex2[uInternalNodeId] = uParentLeft2;
|
||||
++uInternalNodeId;
|
||||
}
|
||||
else
|
||||
bIsBachelor1[uNodeIndex1] = true;
|
||||
}
|
||||
|
||||
unsigned uDiffCount = 0;
|
||||
for (unsigned uNodeIndex = 0; uNodeIndex < uNodeCount; ++uNodeIndex)
|
||||
{
|
||||
if (bIsBachelor1[uNodeIndex])
|
||||
continue;
|
||||
if (Tree1.IsRoot(uNodeIndex))
|
||||
{
|
||||
// Special case: if no bachelors, consider the
|
||||
// root a diff.
|
||||
if (!bIsBachelor1[uNodeIndex])
|
||||
bIsDiff1[uNodeIndex] = true;
|
||||
continue;
|
||||
}
|
||||
const unsigned uParent = Tree1.GetParent(uNodeIndex);
|
||||
if (bIsBachelor1[uParent])
|
||||
{
|
||||
bIsDiff1[uNodeIndex] = true;
|
||||
++uDiffCount;
|
||||
}
|
||||
}
|
||||
|
||||
#if TRACE
|
||||
Log("Tree1:\n");
|
||||
Log("Node Id Bach Diff Name\n");
|
||||
Log("---- ---- ---- ---- ----\n");
|
||||
for (unsigned n = 0; n < uNodeCount; ++n)
|
||||
{
|
||||
Log("%4u %4u %d %d",
|
||||
n,
|
||||
NodeIndexToId1[n],
|
||||
bIsBachelor1[n],
|
||||
bIsDiff1[n]);
|
||||
if (Tree1.IsLeaf(n))
|
||||
Log(" %s", Tree1.GetLeafName(n));
|
||||
Log("\n");
|
||||
}
|
||||
Log("\n");
|
||||
Log("Tree2:\n");
|
||||
Log("Node Id Name\n");
|
||||
Log("---- ---- ----\n");
|
||||
for (unsigned n = 0; n < uNodeCount; ++n)
|
||||
{
|
||||
Log("%4u ", n);
|
||||
if (Tree2.IsLeaf(n))
|
||||
Log(" %s", Tree2.GetLeafName(n));
|
||||
Log("\n");
|
||||
}
|
||||
#endif
|
||||
|
||||
Diffs.CreateRooted();
|
||||
const unsigned uDiffsRootIndex = Diffs.GetRootNodeIndex();
|
||||
const unsigned uRootIndex1 = Tree1.GetRootNodeIndex();
|
||||
|
||||
for (unsigned n = 0; n < uLeafCount; ++n)
|
||||
IdToDiffsLeafNodeIndex[n] = uNodeCount;
|
||||
|
||||
BuildDiffs(Tree1, uRootIndex1, bIsDiff1, Diffs, uDiffsRootIndex,
|
||||
IdToDiffsLeafNodeIndex);
|
||||
|
||||
#if TRACE
|
||||
Log("\n");
|
||||
Log("Diffs:\n");
|
||||
Diffs.LogMe();
|
||||
Log("\n");
|
||||
Log("IdToDiffsLeafNodeIndex:");
|
||||
for (unsigned n = 0; n < uLeafCount; ++n)
|
||||
{
|
||||
if (n%16 == 0)
|
||||
Log("\n");
|
||||
else
|
||||
Log(" ");
|
||||
Log("%u=%u", n, IdToDiffsLeafNodeIndex[n]);
|
||||
}
|
||||
Log("\n");
|
||||
#endif
|
||||
|
||||
for (unsigned n = 0; n < uLeafCount; ++n)
|
||||
if (IdToDiffsLeafNodeIndex[n] == uNodeCount)
|
||||
Quit("TreeDiffs check 7");
|
||||
|
||||
delete[] NodeIndexToId1;
|
||||
delete[] IdToNodeIndex2;
|
||||
|
||||
delete[] bIsBachelor1;
|
||||
delete[] bIsDiff1;
|
||||
}
|
||||
235
src/muscle/muscle3.8.31/src/difftreese.cpp
Normal file
235
src/muscle/muscle3.8.31/src/difftreese.cpp
Normal file
@@ -0,0 +1,235 @@
|
||||
#include "muscle.h"
|
||||
#include "tree.h"
|
||||
|
||||
#define TRACE 0
|
||||
|
||||
/***
|
||||
Algorithm to compare two trees, X and Y.
|
||||
|
||||
A node x in X and node y in Y are defined to be
|
||||
similar iff the set of leaves in the subtree under
|
||||
x is identical to the set of leaves under y.
|
||||
|
||||
A node is defined to be changed iff it is not
|
||||
similar to any node in the other tree.
|
||||
|
||||
Nodes x and y are defined to be married iff every
|
||||
node in the subtree under x is similar to a node
|
||||
in the subtree under y. Married nodes are considered
|
||||
to be equal. The subtrees under two married nodes can
|
||||
at most differ by exchanges of left and right branches,
|
||||
which we do not consider to be significant here.
|
||||
|
||||
A node is changed iff it is not married. If a node is
|
||||
changed, then it has a dissimilar node in its subtree,
|
||||
and it follows immediately from the definition of marriage
|
||||
that its parent is also a bachelor. Hence all nodes on the
|
||||
path from a changed node to the root are changed.
|
||||
|
||||
We assume the trees have the same set of leaves, so
|
||||
every leaf is trivially both similar and married to
|
||||
the same leaf in the opposite tree. Changed nodes
|
||||
are therefore always internal (i.e., non-leaf) nodes.
|
||||
|
||||
Example:
|
||||
|
||||
-----A
|
||||
-----k
|
||||
----j -----B
|
||||
--i -----C
|
||||
------D
|
||||
|
||||
|
||||
-----A
|
||||
-----p
|
||||
----n -----B
|
||||
--m -----D
|
||||
------C
|
||||
|
||||
|
||||
The following pairs of internal nodes are similar.
|
||||
|
||||
Nodes Set of leaves
|
||||
----- -------------
|
||||
k,p A,B
|
||||
i,m A,B,C,D
|
||||
|
||||
Changed nodes in the first tree are i and j, changed nodes
|
||||
in the second tree are m and n.
|
||||
|
||||
Node k and p are married, but i and m are not (because j
|
||||
and n are changed). The diffs are C, D and k.
|
||||
|
||||
To achieve O(N) we avoid traversing a given subtree multiple
|
||||
times and also avoid comparing lists of leaves.
|
||||
|
||||
We visit nodes in depth-first order (i.e., a node is visited
|
||||
before its parent).
|
||||
|
||||
If either child of a node is changed, we flag it as changed.
|
||||
|
||||
If both children of the node we are visiting are married,
|
||||
we check whether the spouses of those children have the
|
||||
same parent in the other tree. If the parents are different,
|
||||
the current node is a bachelor. If they have the same parent,
|
||||
then the node we are visiting is the spouse of that parent.
|
||||
We assign this newly identified married couple a unique integer
|
||||
id. The id of a node is in one-to-one correspondence with the
|
||||
set of leaves in its subtree. Two nodes have the same set of
|
||||
leaves iff they have the same id. Changed nodes do not get
|
||||
an id.
|
||||
***/
|
||||
|
||||
void DiffTreesE(const Tree &NewTree, const Tree &OldTree,
|
||||
unsigned NewNodeIndexToOldNodeIndex[])
|
||||
{
|
||||
#if TRACE
|
||||
Log("DiffTreesE NewTree:\n");
|
||||
NewTree.LogMe();
|
||||
Log("\n");
|
||||
Log("OldTree:\n");
|
||||
OldTree.LogMe();
|
||||
#endif
|
||||
|
||||
if (!NewTree.IsRooted() || !OldTree.IsRooted())
|
||||
Quit("DiffTrees: requires rooted trees");
|
||||
|
||||
const unsigned uNodeCount = NewTree.GetNodeCount();
|
||||
const unsigned uOldNodeCount = OldTree.GetNodeCount();
|
||||
const unsigned uLeafCount = NewTree.GetLeafCount();
|
||||
const unsigned uOldLeafCount = OldTree.GetLeafCount();
|
||||
if (uNodeCount != uOldNodeCount || uLeafCount != uOldLeafCount)
|
||||
Quit("DiffTreesE: different node counts");
|
||||
|
||||
{
|
||||
unsigned *IdToOldNodeIndex = new unsigned[uNodeCount];
|
||||
for (unsigned uOldNodeIndex = 0; uOldNodeIndex < uNodeCount; ++uOldNodeIndex)
|
||||
{
|
||||
if (OldTree.IsLeaf(uOldNodeIndex))
|
||||
{
|
||||
unsigned Id = OldTree.GetLeafId(uOldNodeIndex);
|
||||
IdToOldNodeIndex[Id] = uOldNodeIndex;
|
||||
}
|
||||
}
|
||||
|
||||
// Initialize NewNodeIndexToOldNodeIndex[]
|
||||
// All internal nodes are marked as changed, but may be updated later.
|
||||
for (unsigned uNewNodeIndex = 0; uNewNodeIndex < uNodeCount; ++uNewNodeIndex)
|
||||
{
|
||||
if (NewTree.IsLeaf(uNewNodeIndex))
|
||||
{
|
||||
unsigned uId = NewTree.GetLeafId(uNewNodeIndex);
|
||||
assert(uId < uLeafCount);
|
||||
|
||||
unsigned uOldNodeIndex = IdToOldNodeIndex[uId];
|
||||
assert(uOldNodeIndex < uNodeCount);
|
||||
|
||||
NewNodeIndexToOldNodeIndex[uNewNodeIndex] = uOldNodeIndex;
|
||||
}
|
||||
else
|
||||
NewNodeIndexToOldNodeIndex[uNewNodeIndex] = NODE_CHANGED;
|
||||
}
|
||||
delete[] IdToOldNodeIndex;
|
||||
}
|
||||
|
||||
// Depth-first traversal of tree.
|
||||
// The order guarantees that a node is visited before
|
||||
// its parent is visited.
|
||||
for (unsigned uNewNodeIndex = NewTree.FirstDepthFirstNode();
|
||||
NULL_NEIGHBOR != uNewNodeIndex;
|
||||
uNewNodeIndex = NewTree.NextDepthFirstNode(uNewNodeIndex))
|
||||
{
|
||||
if (NewTree.IsLeaf(uNewNodeIndex))
|
||||
continue;
|
||||
|
||||
// If either child is changed, flag this node as changed and continue.
|
||||
unsigned uNewLeft = NewTree.GetLeft(uNewNodeIndex);
|
||||
unsigned uOldLeft = NewNodeIndexToOldNodeIndex[uNewLeft];
|
||||
if (NODE_CHANGED == uOldLeft)
|
||||
{
|
||||
NewNodeIndexToOldNodeIndex[uNewLeft] = NODE_CHANGED;
|
||||
continue;
|
||||
}
|
||||
|
||||
unsigned uNewRight = NewTree.GetRight(uNewNodeIndex);
|
||||
unsigned uOldRight = NewNodeIndexToOldNodeIndex[uNewRight];
|
||||
if (NODE_CHANGED == NewNodeIndexToOldNodeIndex[uNewRight])
|
||||
{
|
||||
NewNodeIndexToOldNodeIndex[uNewRight] = NODE_CHANGED;
|
||||
continue;
|
||||
}
|
||||
|
||||
unsigned uOldParentLeft = OldTree.GetParent(uOldLeft);
|
||||
unsigned uOldParentRight = OldTree.GetParent(uOldRight);
|
||||
if (uOldParentLeft == uOldParentRight)
|
||||
NewNodeIndexToOldNodeIndex[uNewNodeIndex] = uOldParentLeft;
|
||||
else
|
||||
NewNodeIndexToOldNodeIndex[uNewNodeIndex] = NODE_CHANGED;
|
||||
}
|
||||
|
||||
#if TRACE
|
||||
{
|
||||
Log("NewToOld ");
|
||||
for (unsigned uNewNodeIndex = 0; uNewNodeIndex < uNodeCount; ++uNewNodeIndex)
|
||||
{
|
||||
Log(" [%3u]=", uNewNodeIndex);
|
||||
if (NODE_CHANGED == NewNodeIndexToOldNodeIndex[uNewNodeIndex])
|
||||
Log(" X");
|
||||
else
|
||||
Log("%3u", NewNodeIndexToOldNodeIndex[uNewNodeIndex]);
|
||||
if ((uNewNodeIndex+1)%8 == 0)
|
||||
Log("\n ");
|
||||
}
|
||||
Log("\n");
|
||||
}
|
||||
#endif
|
||||
|
||||
#if DEBUG
|
||||
{
|
||||
for (unsigned uNewNodeIndex = 0; uNewNodeIndex < uNodeCount; ++uNewNodeIndex)
|
||||
{
|
||||
unsigned uOld = NewNodeIndexToOldNodeIndex[uNewNodeIndex];
|
||||
if (NewTree.IsLeaf(uNewNodeIndex))
|
||||
{
|
||||
if (uOld >= uNodeCount)
|
||||
{
|
||||
Log("NewNode=%u uOld=%u > uNodeCount=%u\n",
|
||||
uNewNodeIndex, uOld, uNodeCount);
|
||||
Quit("Diff check failed");
|
||||
}
|
||||
unsigned uIdNew = NewTree.GetLeafId(uNewNodeIndex);
|
||||
unsigned uIdOld = OldTree.GetLeafId(uOld);
|
||||
if (uIdNew != uIdOld)
|
||||
{
|
||||
Log("NewNode=%u uOld=%u IdNew=%u IdOld=%u\n",
|
||||
uNewNodeIndex, uOld, uIdNew, uIdOld);
|
||||
Quit("Diff check failed");
|
||||
}
|
||||
continue;
|
||||
}
|
||||
|
||||
if (NODE_CHANGED == uOld)
|
||||
continue;
|
||||
|
||||
unsigned uNewLeft = NewTree.GetLeft(uNewNodeIndex);
|
||||
unsigned uNewRight = NewTree.GetRight(uNewNodeIndex);
|
||||
|
||||
unsigned uOldLeft = OldTree.GetLeft(uOld);
|
||||
unsigned uOldRight = OldTree.GetRight(uOld);
|
||||
|
||||
unsigned uNewLeftPartner = NewNodeIndexToOldNodeIndex[uNewLeft];
|
||||
unsigned uNewRightPartner = NewNodeIndexToOldNodeIndex[uNewRight];
|
||||
|
||||
bool bSameNotRotated = (uNewLeftPartner == uOldLeft && uNewRightPartner == uOldRight);
|
||||
bool bSameRotated = (uNewLeftPartner == uOldRight && uNewRightPartner == uOldLeft);
|
||||
if (!bSameNotRotated && !bSameRotated)
|
||||
{
|
||||
Log("NewNode=%u NewL=%u NewR=%u\n", uNewNodeIndex, uNewLeft, uNewRight);
|
||||
Log("OldNode=%u OldL=%u OldR=%u\n", uOld, uOldLeft, uOldRight);
|
||||
Log("NewLPartner=%u NewRPartner=%u\n", uNewLeftPartner, uNewRightPartner);
|
||||
Quit("Diff check failed");
|
||||
}
|
||||
}
|
||||
}
|
||||
#endif
|
||||
}
|
||||
89
src/muscle/muscle3.8.31/src/distcalc.cpp
Normal file
89
src/muscle/muscle3.8.31/src/distcalc.cpp
Normal file
@@ -0,0 +1,89 @@
|
||||
#include "muscle.h"
|
||||
#include "distfunc.h"
|
||||
#include "distcalc.h"
|
||||
#include "msa.h"
|
||||
|
||||
void DistCalcDF::Init(const DistFunc &DF)
|
||||
{
|
||||
m_ptrDF = &DF;
|
||||
}
|
||||
|
||||
void DistCalcDF::CalcDistRange(unsigned i, dist_t Dist[]) const
|
||||
{
|
||||
for (unsigned j = 0; j < i; ++j)
|
||||
Dist[j] = m_ptrDF->GetDist(i, j);
|
||||
}
|
||||
|
||||
unsigned DistCalcDF::GetCount() const
|
||||
{
|
||||
return m_ptrDF->GetCount();
|
||||
}
|
||||
|
||||
unsigned DistCalcDF::GetId(unsigned i) const
|
||||
{
|
||||
return m_ptrDF->GetId(i);
|
||||
}
|
||||
|
||||
const char *DistCalcDF::GetName(unsigned i) const
|
||||
{
|
||||
return m_ptrDF->GetName(i);
|
||||
}
|
||||
|
||||
void DistCalcMSA::Init(const MSA &msa, DISTANCE Distance)
|
||||
{
|
||||
m_ptrMSA = &msa;
|
||||
m_Distance = Distance;
|
||||
}
|
||||
|
||||
void DistCalcMSA::CalcDistRange(unsigned i, dist_t Dist[]) const
|
||||
{
|
||||
for (unsigned j = 0; j < i; ++j)
|
||||
{
|
||||
switch (m_Distance)
|
||||
{
|
||||
case DISTANCE_PctIdKimura:
|
||||
{
|
||||
const float PctId = (float) m_ptrMSA->GetPctIdentityPair(i, j);
|
||||
Dist[j] = (float) KimuraDist(PctId);
|
||||
break;
|
||||
}
|
||||
case DISTANCE_PctIdLog:
|
||||
{
|
||||
const float PctId = (float) m_ptrMSA->GetPctIdentityPair(i, j);
|
||||
Dist[j] = (float) PctIdToMAFFTDist(PctId);
|
||||
break;
|
||||
}
|
||||
case DISTANCE_ScoreDist:
|
||||
{
|
||||
double GetScoreDist(const MSA &msa, unsigned SeqIndex1, unsigned SeqIndex2);
|
||||
Dist[j] = (float) GetScoreDist(*m_ptrMSA, i, j);
|
||||
continue;
|
||||
}
|
||||
case DISTANCE_Edit:
|
||||
{
|
||||
const float PctId = (float) m_ptrMSA->GetPctIdentityPair(i, j);
|
||||
if (PctId > 1.0)
|
||||
Quit("Internal error, DISTANCE_Edit, pct id=%.3g", PctId);
|
||||
Dist[j] = (float) 1.0 - PctId;
|
||||
break;
|
||||
}
|
||||
default:
|
||||
Quit("DistCalcMSA: Invalid DISTANCE_%u", m_Distance);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
unsigned DistCalcMSA::GetCount() const
|
||||
{
|
||||
return m_ptrMSA->GetSeqCount();
|
||||
}
|
||||
|
||||
unsigned DistCalcMSA::GetId(unsigned i) const
|
||||
{
|
||||
return m_ptrMSA->GetSeqId(i);
|
||||
}
|
||||
|
||||
const char *DistCalcMSA::GetName(unsigned i) const
|
||||
{
|
||||
return m_ptrMSA->GetSeqName(i);
|
||||
}
|
||||
45
src/muscle/muscle3.8.31/src/distcalc.h
Normal file
45
src/muscle/muscle3.8.31/src/distcalc.h
Normal file
@@ -0,0 +1,45 @@
|
||||
#ifndef DistCalc_h
|
||||
#define DistCalc_h
|
||||
|
||||
typedef float dist_t;
|
||||
const dist_t BIG_DIST = (dist_t) 1e29;
|
||||
|
||||
class DistFunc;
|
||||
|
||||
class DistCalc
|
||||
{
|
||||
public:
|
||||
virtual void CalcDistRange(unsigned i, dist_t Dist[]) const = 0;
|
||||
virtual unsigned GetCount() const = 0;
|
||||
virtual unsigned GetId(unsigned i) const = 0;
|
||||
virtual const char *GetName(unsigned i) const = 0;
|
||||
};
|
||||
|
||||
class DistCalcDF : public DistCalc
|
||||
{
|
||||
public:
|
||||
void Init(const DistFunc &DF);
|
||||
virtual void CalcDistRange(unsigned i, dist_t Dist[]) const;
|
||||
virtual unsigned GetCount() const;
|
||||
virtual unsigned GetId(unsigned i) const;
|
||||
virtual const char *GetName(unsigned i) const;
|
||||
|
||||
private:
|
||||
const DistFunc *m_ptrDF;
|
||||
};
|
||||
|
||||
class DistCalcMSA : public DistCalc
|
||||
{
|
||||
public:
|
||||
void Init(const MSA &msa, DISTANCE Distance);
|
||||
virtual void CalcDistRange(unsigned i, dist_t Dist[]) const;
|
||||
virtual unsigned GetCount() const;
|
||||
virtual unsigned GetId(unsigned i) const;
|
||||
virtual const char *GetName(unsigned i) const;
|
||||
|
||||
private:
|
||||
const MSA *m_ptrMSA;
|
||||
DISTANCE m_Distance;
|
||||
};
|
||||
|
||||
#endif // DistCalc_h
|
||||
113
src/muscle/muscle3.8.31/src/distfunc.cpp
Normal file
113
src/muscle/muscle3.8.31/src/distfunc.cpp
Normal file
@@ -0,0 +1,113 @@
|
||||
#include "muscle.h"
|
||||
#include "distfunc.h"
|
||||
#include <assert.h>
|
||||
|
||||
DistFunc::DistFunc()
|
||||
{
|
||||
m_Dists = 0;
|
||||
m_uCount = 0;
|
||||
m_uCacheCount = 0;
|
||||
m_Names = 0;
|
||||
m_Ids = 0;
|
||||
}
|
||||
|
||||
DistFunc::~DistFunc()
|
||||
{
|
||||
if (0 != m_Names)
|
||||
{
|
||||
for (unsigned i = 0; i < m_uCount; ++i)
|
||||
free(m_Names[i]);
|
||||
}
|
||||
delete[] m_Dists;
|
||||
delete[] m_Names;
|
||||
delete[] m_Ids;
|
||||
}
|
||||
|
||||
float DistFunc::GetDist(unsigned uIndex1, unsigned uIndex2) const
|
||||
{
|
||||
return m_Dists[VectorIndex(uIndex1, uIndex2)];
|
||||
}
|
||||
|
||||
unsigned DistFunc::GetCount() const
|
||||
{
|
||||
return m_uCount;
|
||||
}
|
||||
|
||||
void DistFunc::SetCount(unsigned uCount)
|
||||
{
|
||||
m_uCount = uCount;
|
||||
if (uCount <= m_uCacheCount)
|
||||
return;
|
||||
delete[] m_Dists;
|
||||
m_Dists = new float[VectorLength()];
|
||||
m_Names = new char *[m_uCount];
|
||||
m_Ids = new unsigned[m_uCount];
|
||||
m_uCacheCount = uCount;
|
||||
|
||||
memset(m_Names, 0, m_uCount*sizeof(char *));
|
||||
memset(m_Ids, 0xff, m_uCount*sizeof(unsigned));
|
||||
memset(m_Dists, 0, VectorLength()*sizeof(float));
|
||||
}
|
||||
|
||||
void DistFunc::SetDist(unsigned uIndex1, unsigned uIndex2, float dDist)
|
||||
{
|
||||
m_Dists[VectorIndex(uIndex1, uIndex2)] = dDist;
|
||||
m_Dists[VectorIndex(uIndex2, uIndex1)] = dDist;
|
||||
}
|
||||
|
||||
unsigned DistFunc::VectorIndex(unsigned uIndex1, unsigned uIndex2) const
|
||||
{
|
||||
assert(uIndex1 < m_uCount && uIndex2 < m_uCount);
|
||||
return uIndex1*m_uCount + uIndex2;
|
||||
}
|
||||
|
||||
unsigned DistFunc::VectorLength() const
|
||||
{
|
||||
return m_uCount*m_uCount;
|
||||
}
|
||||
|
||||
void DistFunc::SetName(unsigned uIndex, const char szName[])
|
||||
{
|
||||
assert(uIndex < m_uCount);
|
||||
m_Names[uIndex] = strsave(szName);
|
||||
}
|
||||
|
||||
void DistFunc::SetId(unsigned uIndex, unsigned uId)
|
||||
{
|
||||
assert(uIndex < m_uCount);
|
||||
m_Ids[uIndex] = uId;
|
||||
}
|
||||
|
||||
const char *DistFunc::GetName(unsigned uIndex) const
|
||||
{
|
||||
assert(uIndex < m_uCount);
|
||||
return m_Names[uIndex];
|
||||
}
|
||||
|
||||
unsigned DistFunc::GetId(unsigned uIndex) const
|
||||
{
|
||||
assert(uIndex < m_uCount);
|
||||
return m_Ids[uIndex];
|
||||
}
|
||||
|
||||
void DistFunc::LogMe() const
|
||||
{
|
||||
Log("DistFunc::LogMe count=%u\n", m_uCount);
|
||||
Log(" ");
|
||||
for (unsigned i = 0; i < m_uCount; ++i)
|
||||
Log(" %7u", i);
|
||||
Log("\n");
|
||||
|
||||
Log(" ");
|
||||
for (unsigned i = 0; i < m_uCount; ++i)
|
||||
Log(" %7.7s", m_Names[i] ? m_Names[i] : "");
|
||||
Log("\n");
|
||||
|
||||
for (unsigned i = 0; i < m_uCount; ++i)
|
||||
{
|
||||
Log("%4u %10.10s : ", i, m_Names[i] ? m_Names[i] : "");
|
||||
for (unsigned j = 0; j <= i; ++j)
|
||||
Log(" %7.4g", GetDist(i, j));
|
||||
Log("\n");
|
||||
}
|
||||
}
|
||||
36
src/muscle/muscle3.8.31/src/distfunc.h
Normal file
36
src/muscle/muscle3.8.31/src/distfunc.h
Normal file
@@ -0,0 +1,36 @@
|
||||
#ifndef DistFunc_h
|
||||
#define DistFunc_h
|
||||
|
||||
class DistFunc
|
||||
{
|
||||
public:
|
||||
DistFunc();
|
||||
virtual ~DistFunc();
|
||||
|
||||
public:
|
||||
virtual void SetCount(unsigned uCount);
|
||||
virtual void SetDist(unsigned uIndex1, unsigned uIndex2, float dDist);
|
||||
|
||||
void SetName(unsigned uIndex, const char szName[]);
|
||||
void SetId(unsigned uIndex, unsigned uId);
|
||||
const char *GetName(unsigned uIndex) const;
|
||||
unsigned GetId(unsigned uIndex) const;
|
||||
|
||||
virtual float GetDist(unsigned uIndex1, unsigned uIndex2) const;
|
||||
virtual unsigned GetCount() const;
|
||||
|
||||
void LogMe() const;
|
||||
|
||||
protected:
|
||||
unsigned VectorIndex(unsigned uIndex, unsigned uIndex2) const;
|
||||
unsigned VectorLength() const;
|
||||
|
||||
private:
|
||||
unsigned m_uCount;
|
||||
unsigned m_uCacheCount;
|
||||
float *m_Dists;
|
||||
char **m_Names;
|
||||
unsigned *m_Ids;
|
||||
};
|
||||
|
||||
#endif // DistFunc_h
|
||||
45
src/muscle/muscle3.8.31/src/distpwkimura.cpp
Normal file
45
src/muscle/muscle3.8.31/src/distpwkimura.cpp
Normal file
@@ -0,0 +1,45 @@
|
||||
#include "muscle.h"
|
||||
#include "distfunc.h"
|
||||
#include "msa.h"
|
||||
#include "seqvect.h"
|
||||
#include "pwpath.h"
|
||||
|
||||
void DistPWKimura(const SeqVect &v, DistFunc &DF)
|
||||
{
|
||||
SEQWEIGHT SeqWeightSave = GetSeqWeightMethod();
|
||||
SetSeqWeightMethod(SEQWEIGHT_Henikoff);
|
||||
|
||||
const unsigned uSeqCount = v.Length();
|
||||
DF.SetCount(uSeqCount);
|
||||
|
||||
const unsigned uPairCount = (uSeqCount*(uSeqCount + 1))/2;
|
||||
unsigned uCount = 0;
|
||||
SetProgressDesc("PWKimura distance");
|
||||
for (unsigned uSeqIndex1 = 0; uSeqIndex1 < uSeqCount; ++uSeqIndex1)
|
||||
{
|
||||
const Seq &s1 = v.GetSeq(uSeqIndex1);
|
||||
MSA msa1;
|
||||
msa1.FromSeq(s1);
|
||||
for (unsigned uSeqIndex2 = 0; uSeqIndex2 < uSeqIndex1; ++uSeqIndex2)
|
||||
{
|
||||
if (0 == uCount%20)
|
||||
Progress(uCount, uPairCount);
|
||||
++uCount;
|
||||
const Seq &s2 = v.GetSeq(uSeqIndex2);
|
||||
MSA msa2;
|
||||
msa2.FromSeq(s2);
|
||||
|
||||
PWPath Path;
|
||||
MSA msaOut;
|
||||
AlignTwoMSAs(msa1, msa2, msaOut, Path, false, false);
|
||||
|
||||
double dPctId = msaOut.GetPctIdentityPair(0, 1);
|
||||
float f = (float) KimuraDist(dPctId);
|
||||
|
||||
DF.SetDist(uSeqIndex1, uSeqIndex2, f);
|
||||
}
|
||||
}
|
||||
ProgressStepsDone();
|
||||
|
||||
SetSeqWeightMethod(SeqWeightSave);
|
||||
}
|
||||
299
src/muscle/muscle3.8.31/src/domuscle.cpp
Normal file
299
src/muscle/muscle3.8.31/src/domuscle.cpp
Normal file
@@ -0,0 +1,299 @@
|
||||
#include "muscle.h"
|
||||
#include "textfile.h"
|
||||
#include "seqvect.h"
|
||||
#include "distfunc.h"
|
||||
#include "msa.h"
|
||||
#include "tree.h"
|
||||
#include "profile.h"
|
||||
#include "timing.h"
|
||||
|
||||
static char g_strUseTreeWarning[] =
|
||||
"\n******** WARNING ****************\n"
|
||||
"\nYou specified the -usetree option.\n"
|
||||
"Note that a good evolutionary tree may NOT be a good\n"
|
||||
"guide tree for multiple alignment. For more details,\n"
|
||||
"please refer to the user guide. To disable this\n"
|
||||
"warning, use -usetree_nowarn <treefilename>.\n\n";
|
||||
|
||||
void DoMuscle()
|
||||
{
|
||||
SetOutputFileName(g_pstrOutFileName);
|
||||
SetInputFileName(g_pstrInFileName);
|
||||
|
||||
SetMaxIters(g_uMaxIters);
|
||||
SetSeqWeightMethod(g_SeqWeight1);
|
||||
|
||||
TextFile fileIn(g_pstrInFileName);
|
||||
SeqVect v;
|
||||
v.FromFASTAFile(fileIn);
|
||||
const unsigned uSeqCount = v.Length();
|
||||
|
||||
if (0 == uSeqCount)
|
||||
Quit("No sequences in input file");
|
||||
|
||||
ALPHA Alpha = ALPHA_Undefined;
|
||||
switch (g_SeqType)
|
||||
{
|
||||
case SEQTYPE_Auto:
|
||||
Alpha = v.GuessAlpha();
|
||||
break;
|
||||
|
||||
case SEQTYPE_Protein:
|
||||
Alpha = ALPHA_Amino;
|
||||
break;
|
||||
|
||||
case SEQTYPE_DNA:
|
||||
Alpha = ALPHA_DNA;
|
||||
break;
|
||||
|
||||
case SEQTYPE_RNA:
|
||||
Alpha = ALPHA_RNA;
|
||||
break;
|
||||
|
||||
default:
|
||||
Quit("Invalid seq type");
|
||||
}
|
||||
SetAlpha(Alpha);
|
||||
v.FixAlpha();
|
||||
|
||||
PTR_SCOREMATRIX UserMatrix = 0;
|
||||
if (0 != g_pstrMatrixFileName)
|
||||
{
|
||||
const char *FileName = g_pstrMatrixFileName;
|
||||
const char *Path = getenv("MUSCLE_MXPATH");
|
||||
if (Path != 0)
|
||||
{
|
||||
size_t n = strlen(Path) + 1 + strlen(FileName) + 1;
|
||||
char *NewFileName = new char[n];
|
||||
sprintf(NewFileName, "%s/%s", Path, FileName);
|
||||
FileName = NewFileName;
|
||||
}
|
||||
TextFile File(FileName);
|
||||
UserMatrix = ReadMx(File);
|
||||
g_Alpha = ALPHA_Amino;
|
||||
g_PPScore = PPSCORE_SP;
|
||||
}
|
||||
|
||||
SetPPScore();
|
||||
|
||||
if (0 != UserMatrix)
|
||||
g_ptrScoreMatrix = UserMatrix;
|
||||
|
||||
unsigned uMaxL = 0;
|
||||
unsigned uTotL = 0;
|
||||
for (unsigned uSeqIndex = 0; uSeqIndex < uSeqCount; ++uSeqIndex)
|
||||
{
|
||||
unsigned L = v.GetSeq(uSeqIndex).Length();
|
||||
uTotL += L;
|
||||
if (L > uMaxL)
|
||||
uMaxL = L;
|
||||
}
|
||||
|
||||
SetIter(1);
|
||||
g_bDiags = g_bDiags1;
|
||||
SetSeqStats(uSeqCount, uMaxL, uTotL/uSeqCount);
|
||||
|
||||
SetMuscleSeqVect(v);
|
||||
|
||||
MSA::SetIdCount(uSeqCount);
|
||||
|
||||
// Initialize sequence ids.
|
||||
// From this point on, ids must somehow propogate from here.
|
||||
for (unsigned uSeqIndex = 0; uSeqIndex < uSeqCount; ++uSeqIndex)
|
||||
v.SetSeqId(uSeqIndex, uSeqIndex);
|
||||
|
||||
if (0 == uSeqCount)
|
||||
Quit("Input file '%s' has no sequences", g_pstrInFileName);
|
||||
if (1 == uSeqCount)
|
||||
{
|
||||
TextFile fileOut(g_pstrOutFileName, true);
|
||||
v.ToFile(fileOut);
|
||||
return;
|
||||
}
|
||||
|
||||
if (uSeqCount > 1)
|
||||
MHackStart(v);
|
||||
|
||||
// First iteration
|
||||
Tree GuideTree;
|
||||
if (0 != g_pstrUseTreeFileName)
|
||||
{
|
||||
// Discourage users...
|
||||
if (!g_bUseTreeNoWarn)
|
||||
fprintf(stderr, "%s", g_strUseTreeWarning);
|
||||
|
||||
// Read tree from file
|
||||
TextFile TreeFile(g_pstrUseTreeFileName);
|
||||
GuideTree.FromFile(TreeFile);
|
||||
|
||||
// Make sure tree is rooted
|
||||
if (!GuideTree.IsRooted())
|
||||
Quit("User tree must be rooted");
|
||||
|
||||
if (GuideTree.GetLeafCount() != uSeqCount)
|
||||
Quit("User tree does not match input sequences");
|
||||
|
||||
const unsigned uNodeCount = GuideTree.GetNodeCount();
|
||||
for (unsigned uNodeIndex = 0; uNodeIndex < uNodeCount; ++uNodeIndex)
|
||||
{
|
||||
if (!GuideTree.IsLeaf(uNodeIndex))
|
||||
continue;
|
||||
const char *LeafName = GuideTree.GetLeafName(uNodeIndex);
|
||||
unsigned uSeqIndex;
|
||||
bool SeqFound = v.FindName(LeafName, &uSeqIndex);
|
||||
if (!SeqFound)
|
||||
Quit("Label %s in tree does not match sequences", LeafName);
|
||||
unsigned uId = v.GetSeqIdFromName(LeafName);
|
||||
GuideTree.SetLeafId(uNodeIndex, uId);
|
||||
}
|
||||
}
|
||||
else
|
||||
TreeFromSeqVect(v, GuideTree, g_Cluster1, g_Distance1, g_Root1,
|
||||
g_pstrDistMxFileName1);
|
||||
|
||||
const char *Tree1 = ValueOpt("Tree1");
|
||||
if (0 != Tree1)
|
||||
{
|
||||
TextFile f(Tree1, true);
|
||||
GuideTree.ToFile(f);
|
||||
if (g_bClusterOnly)
|
||||
return;
|
||||
}
|
||||
|
||||
SetMuscleTree(GuideTree);
|
||||
ValidateMuscleIds(GuideTree);
|
||||
|
||||
MSA msa;
|
||||
ProgNode *ProgNodes = 0;
|
||||
if (g_bLow)
|
||||
ProgNodes = ProgressiveAlignE(v, GuideTree, msa);
|
||||
else
|
||||
ProgressiveAlign(v, GuideTree, msa);
|
||||
SetCurrentAlignment(msa);
|
||||
|
||||
if (0 != g_pstrComputeWeightsFileName)
|
||||
{
|
||||
extern void OutWeights(const char *FileName, const MSA &msa);
|
||||
SetMSAWeightsMuscle(msa);
|
||||
OutWeights(g_pstrComputeWeightsFileName, msa);
|
||||
return;
|
||||
}
|
||||
|
||||
ValidateMuscleIds(msa);
|
||||
|
||||
if (1 == g_uMaxIters || 2 == uSeqCount)
|
||||
{
|
||||
//TextFile fileOut(g_pstrOutFileName, true);
|
||||
//MHackEnd(msa);
|
||||
//msa.ToFile(fileOut);
|
||||
MuscleOutput(msa);
|
||||
return;
|
||||
}
|
||||
|
||||
if (0 == g_pstrUseTreeFileName)
|
||||
{
|
||||
g_bDiags = g_bDiags2;
|
||||
SetIter(2);
|
||||
|
||||
if (g_bLow)
|
||||
{
|
||||
if (0 != g_uMaxTreeRefineIters)
|
||||
RefineTreeE(msa, v, GuideTree, ProgNodes);
|
||||
}
|
||||
else
|
||||
RefineTree(msa, GuideTree);
|
||||
|
||||
const char *Tree2 = ValueOpt("Tree2");
|
||||
if (0 != Tree2)
|
||||
{
|
||||
TextFile f(Tree2, true);
|
||||
GuideTree.ToFile(f);
|
||||
}
|
||||
}
|
||||
|
||||
SetSeqWeightMethod(g_SeqWeight2);
|
||||
SetMuscleTree(GuideTree);
|
||||
|
||||
if (g_bAnchors)
|
||||
RefineVert(msa, GuideTree, g_uMaxIters - 2);
|
||||
else
|
||||
RefineHoriz(msa, GuideTree, g_uMaxIters - 2, false, false);
|
||||
|
||||
#if 0
|
||||
// Refining by subfamilies is disabled as it didn't give better
|
||||
// results. I tried doing this before and after RefineHoriz.
|
||||
// Should get back to this as it seems like this should work.
|
||||
RefineSubfams(msa, GuideTree, g_uMaxIters - 2);
|
||||
#endif
|
||||
|
||||
ValidateMuscleIds(msa);
|
||||
ValidateMuscleIds(GuideTree);
|
||||
|
||||
//TextFile fileOut(g_pstrOutFileName, true);
|
||||
//MHackEnd(msa);
|
||||
//msa.ToFile(fileOut);
|
||||
MuscleOutput(msa);
|
||||
}
|
||||
|
||||
void Run()
|
||||
{
|
||||
SetStartTime();
|
||||
Log("Started %s\n", GetTimeAsStr());
|
||||
for (int i = 0; i < g_argc; ++i)
|
||||
Log("%s ", g_argv[i]);
|
||||
Log("\n");
|
||||
|
||||
#if TIMING
|
||||
TICKS t1 = GetClockTicks();
|
||||
#endif
|
||||
if (g_bRefine)
|
||||
Refine();
|
||||
else if (g_bRefineW)
|
||||
{
|
||||
extern void DoRefineW();
|
||||
DoRefineW();
|
||||
}
|
||||
else if (g_bProfDB)
|
||||
ProfDB();
|
||||
else if (g_bSW)
|
||||
Local();
|
||||
else if (0 != g_pstrSPFileName)
|
||||
DoSP();
|
||||
else if (g_bProfile)
|
||||
Profile();
|
||||
else if (g_bPPScore)
|
||||
PPScore();
|
||||
else if (g_bPAS)
|
||||
ProgAlignSubFams();
|
||||
else if (g_bMakeTree)
|
||||
{
|
||||
extern void DoMakeTree();
|
||||
DoMakeTree();
|
||||
}
|
||||
else
|
||||
DoMuscle();
|
||||
|
||||
#if TIMING
|
||||
extern TICKS g_ticksDP;
|
||||
extern TICKS g_ticksObjScore;
|
||||
TICKS t2 = GetClockTicks();
|
||||
TICKS TotalTicks = t2 - t1;
|
||||
TICKS ticksOther = TotalTicks - g_ticksDP - g_ticksObjScore;
|
||||
double dSecs = TicksToSecs(TotalTicks);
|
||||
double PctDP = (double) g_ticksDP*100.0/(double) TotalTicks;
|
||||
double PctOS = (double) g_ticksObjScore*100.0/(double) TotalTicks;
|
||||
double PctOther = (double) ticksOther*100.0/(double) TotalTicks;
|
||||
Log(" Ticks Secs Pct\n");
|
||||
Log(" ============ ======= =====\n");
|
||||
Log("DP %12ld %7.2f %5.1f%%\n",
|
||||
(long) g_ticksDP, TicksToSecs(g_ticksDP), PctDP);
|
||||
Log("OS %12ld %7.2f %5.1f%%\n",
|
||||
(long) g_ticksObjScore, TicksToSecs(g_ticksObjScore), PctOS);
|
||||
Log("Other %12ld %7.2f %5.1f%%\n",
|
||||
(long) ticksOther, TicksToSecs(ticksOther), PctOther);
|
||||
Log("Total %12ld %7.2f 100.0%%\n", (long) TotalTicks, dSecs);
|
||||
#endif
|
||||
|
||||
ListDiagSavings();
|
||||
Log("Finished %s\n", GetTimeAsStr());
|
||||
}
|
||||
60
src/muscle/muscle3.8.31/src/dosp.cpp
Normal file
60
src/muscle/muscle3.8.31/src/dosp.cpp
Normal file
@@ -0,0 +1,60 @@
|
||||
#include "muscle.h"
|
||||
#include "textfile.h"
|
||||
#include "msa.h"
|
||||
#include "objscore.h"
|
||||
#include "tree.h"
|
||||
#include "profile.h"
|
||||
|
||||
void DoSP()
|
||||
{
|
||||
TextFile f(g_pstrSPFileName);
|
||||
|
||||
MSA a;
|
||||
a.FromFile(f);
|
||||
|
||||
ALPHA Alpha = ALPHA_Undefined;
|
||||
switch (g_SeqType)
|
||||
{
|
||||
case SEQTYPE_Auto:
|
||||
Alpha = a.GuessAlpha();
|
||||
break;
|
||||
|
||||
case SEQTYPE_Protein:
|
||||
Alpha = ALPHA_Amino;
|
||||
break;
|
||||
|
||||
case SEQTYPE_DNA:
|
||||
Alpha = ALPHA_DNA;
|
||||
break;
|
||||
|
||||
case SEQTYPE_RNA:
|
||||
Alpha = ALPHA_RNA;
|
||||
break;
|
||||
|
||||
default:
|
||||
Quit("Invalid SeqType");
|
||||
}
|
||||
SetAlpha(Alpha);
|
||||
a.FixAlpha();
|
||||
|
||||
SetPPScore();
|
||||
|
||||
const unsigned uSeqCount = a.GetSeqCount();
|
||||
if (0 == uSeqCount)
|
||||
Quit("No sequences in input file %s", g_pstrSPFileName);
|
||||
|
||||
MSA::SetIdCount(uSeqCount);
|
||||
for (unsigned uSeqIndex = 0; uSeqIndex < uSeqCount; ++uSeqIndex)
|
||||
a.SetSeqId(uSeqIndex, uSeqIndex);
|
||||
|
||||
SetSeqWeightMethod(g_SeqWeight1);
|
||||
Tree tree;
|
||||
TreeFromMSA(a, tree, g_Cluster2, g_Distance2, g_Root2);
|
||||
SetMuscleTree(tree);
|
||||
SetMSAWeightsMuscle((MSA &) a);
|
||||
|
||||
SCORE SP = ObjScoreSP(a);
|
||||
|
||||
Log("File=%s;SP=%.4g\n", g_pstrSPFileName, SP);
|
||||
fprintf(stderr, "File=%s;SP=%.4g\n", g_pstrSPFileName, SP);
|
||||
}
|
||||
73
src/muscle/muscle3.8.31/src/dpregionlist.h
Normal file
73
src/muscle/muscle3.8.31/src/dpregionlist.h
Normal file
@@ -0,0 +1,73 @@
|
||||
#ifndef DPRegionList_h
|
||||
#define DPRegionList_h
|
||||
|
||||
#include "diaglist.h"
|
||||
|
||||
enum DPREGIONTYPE
|
||||
{
|
||||
DPREGIONTYPE_Unknown,
|
||||
DPREGIONTYPE_Diag,
|
||||
DPREGIONTYPE_Rect
|
||||
};
|
||||
|
||||
struct DPRegion
|
||||
{
|
||||
DPREGIONTYPE m_Type;
|
||||
union
|
||||
{
|
||||
Diag m_Diag;
|
||||
Rect m_Rect;
|
||||
};
|
||||
};
|
||||
|
||||
const unsigned MAX_DPREGIONS = 1024;
|
||||
|
||||
class DPRegionList
|
||||
{
|
||||
public:
|
||||
DPRegionList()
|
||||
{
|
||||
m_uCount = 0;
|
||||
}
|
||||
~DPRegionList()
|
||||
{
|
||||
Free();
|
||||
}
|
||||
|
||||
public:
|
||||
// Creation
|
||||
void Clear()
|
||||
{
|
||||
Free();
|
||||
}
|
||||
void Add(const DPRegion &r);
|
||||
|
||||
// Accessors
|
||||
unsigned GetCount() const
|
||||
{
|
||||
return m_uCount;
|
||||
}
|
||||
const DPRegion &Get(unsigned uIndex) const
|
||||
{
|
||||
assert(uIndex < m_uCount);
|
||||
return m_DPRegions[uIndex];
|
||||
}
|
||||
|
||||
// Diagnostics
|
||||
void LogMe() const;
|
||||
|
||||
private:
|
||||
void Free()
|
||||
{
|
||||
m_uCount = 0;
|
||||
}
|
||||
|
||||
private:
|
||||
unsigned m_uCount;
|
||||
DPRegion m_DPRegions[MAX_DPREGIONS];
|
||||
};
|
||||
|
||||
void DiagListToDPRegionList(const DiagList &DL, DPRegionList &RL,
|
||||
unsigned uLengthA, unsigned uLengthB);
|
||||
|
||||
#endif // DPRegionList_h
|
||||
108
src/muscle/muscle3.8.31/src/dpreglist.cpp
Normal file
108
src/muscle/muscle3.8.31/src/dpreglist.cpp
Normal file
@@ -0,0 +1,108 @@
|
||||
#include "muscle.h"
|
||||
#include "dpreglist.h"
|
||||
|
||||
unsigned DPRegionList::GetDPArea() const
|
||||
{
|
||||
unsigned uArea = 0;
|
||||
for (unsigned i = 0; i < m_uCount; ++i)
|
||||
{
|
||||
const DPRegion &r = m_DPRegions[i];
|
||||
if (DPREGIONTYPE_Rect == r.m_Type)
|
||||
uArea += r.m_Rect.m_uLengthA*r.m_Rect.m_uLengthB;
|
||||
}
|
||||
return uArea;
|
||||
}
|
||||
|
||||
void DPRegionList::Add(const DPRegion &r)
|
||||
{
|
||||
if (m_uCount == MAX_DPREGIONS)
|
||||
Quit("DPRegionList::Add, overflow %d", m_uCount);
|
||||
m_DPRegions[m_uCount] = r;
|
||||
++m_uCount;
|
||||
}
|
||||
|
||||
void DPRegionList::LogMe() const
|
||||
{
|
||||
Log("DPRegionList::LogMe, count=%u\n", m_uCount);
|
||||
Log("Region Type StartA StartB EndA EndB\n");
|
||||
Log("------ ---- ------ ------ ---- ----\n");
|
||||
for (unsigned i = 0; i < m_uCount; ++i)
|
||||
{
|
||||
const DPRegion &r = m_DPRegions[i];
|
||||
Log("%6u ", i);
|
||||
if (DPREGIONTYPE_Diag == r.m_Type)
|
||||
Log("Diag %6u %6u %6u %6u\n",
|
||||
r.m_Diag.m_uStartPosA,
|
||||
r.m_Diag.m_uStartPosB,
|
||||
r.m_Diag.m_uStartPosA + r.m_Diag.m_uLength - 1,
|
||||
r.m_Diag.m_uStartPosB + r.m_Diag.m_uLength - 1);
|
||||
else if (DPREGIONTYPE_Rect == r.m_Type)
|
||||
Log("Rect %6u %6u %6u %6u\n",
|
||||
r.m_Rect.m_uStartPosA,
|
||||
r.m_Rect.m_uStartPosB,
|
||||
r.m_Rect.m_uStartPosA + r.m_Rect.m_uLengthA - 1,
|
||||
r.m_Rect.m_uStartPosB + r.m_Rect.m_uLengthB - 1);
|
||||
else
|
||||
Log(" *** ERROR *** Type=%u\n", r.m_Type);
|
||||
}
|
||||
}
|
||||
|
||||
void DiagListToDPRegionList(const DiagList &DL, DPRegionList &RL,
|
||||
unsigned uLengthA, unsigned uLengthB)
|
||||
{
|
||||
if (g_uDiagMargin > g_uMinDiagLength/2)
|
||||
Quit("Invalid parameters, diagmargin=%d must be <= 2*diaglength=%d",
|
||||
g_uDiagMargin, g_uMinDiagLength);
|
||||
|
||||
unsigned uStartPosA = 0;
|
||||
unsigned uStartPosB = 0;
|
||||
const unsigned uDiagCount = DL.GetCount();
|
||||
DPRegion r;
|
||||
for (unsigned uDiagIndex = 0; uDiagIndex < uDiagCount; ++uDiagIndex)
|
||||
{
|
||||
const Diag &d = DL.Get(uDiagIndex);
|
||||
assert(d.m_uLength >= g_uMinDiagLength);
|
||||
const unsigned uStartVertexA = d.m_uStartPosA + g_uDiagMargin - 1;
|
||||
const unsigned uStartVertexB = d.m_uStartPosB + g_uDiagMargin - 1;
|
||||
const unsigned uEndVertexA = d.m_uStartPosA + d.m_uLength - g_uDiagMargin;
|
||||
const unsigned uEndVertexB = d.m_uStartPosB + d.m_uLength - g_uDiagMargin;
|
||||
|
||||
r.m_Type = DPREGIONTYPE_Rect;
|
||||
r.m_Rect.m_uStartPosA = uStartPosA;
|
||||
r.m_Rect.m_uStartPosB = uStartPosB;
|
||||
|
||||
assert(uStartVertexA + 1 >= uStartPosA);
|
||||
assert(uStartVertexB + 1 >= uStartPosB);
|
||||
r.m_Rect.m_uLengthA = uStartVertexA + 1 - uStartPosA;
|
||||
r.m_Rect.m_uLengthB = uStartVertexB + 1 - uStartPosB;
|
||||
RL.Add(r);
|
||||
|
||||
if (uEndVertexA > uStartVertexA + 1)
|
||||
{
|
||||
const unsigned uDiagLengthMinusCaps = uEndVertexA - uStartVertexA - 1;
|
||||
|
||||
r.m_Type = DPREGIONTYPE_Diag;
|
||||
r.m_Diag.m_uStartPosA = uStartVertexA + 1;
|
||||
r.m_Diag.m_uStartPosB = uStartVertexB + 1;
|
||||
assert(uEndVertexA - uStartVertexA == uEndVertexB - uStartVertexB);
|
||||
r.m_Diag.m_uLength = uEndVertexA - uStartVertexA - 1;
|
||||
RL.Add(r);
|
||||
}
|
||||
|
||||
uStartPosA = uEndVertexA;
|
||||
uStartPosB = uEndVertexB;
|
||||
}
|
||||
|
||||
assert((int) uLengthA - (int) uStartPosA >= (int) g_uDiagMargin);
|
||||
assert((int) uLengthB - (int) uStartPosB >= (int) g_uDiagMargin);
|
||||
|
||||
r.m_Type = DPREGIONTYPE_Rect;
|
||||
r.m_Rect.m_uStartPosA = uStartPosA;
|
||||
r.m_Rect.m_uStartPosB = uStartPosB;
|
||||
|
||||
assert(uLengthA >= uStartPosA);
|
||||
assert(uLengthB >= uStartPosB);
|
||||
r.m_Rect.m_uLengthA = uLengthA - uStartPosA;
|
||||
r.m_Rect.m_uLengthB = uLengthB - uStartPosB;
|
||||
RL.Add(r);
|
||||
}
|
||||
76
src/muscle/muscle3.8.31/src/dpreglist.h
Normal file
76
src/muscle/muscle3.8.31/src/dpreglist.h
Normal file
@@ -0,0 +1,76 @@
|
||||
#ifndef dpreglist_h
|
||||
#define dpreglist_h
|
||||
|
||||
#include "diaglist.h"
|
||||
|
||||
enum DPREGIONTYPE
|
||||
{
|
||||
DPREGIONTYPE_Unknown,
|
||||
DPREGIONTYPE_Diag,
|
||||
DPREGIONTYPE_Rect
|
||||
};
|
||||
|
||||
struct DPRegion
|
||||
{
|
||||
DPREGIONTYPE m_Type;
|
||||
union
|
||||
{
|
||||
Diag m_Diag;
|
||||
Rect m_Rect;
|
||||
};
|
||||
};
|
||||
|
||||
const unsigned MAX_DPREGIONS = 1024;
|
||||
|
||||
class DPRegionList
|
||||
{
|
||||
public:
|
||||
DPRegionList()
|
||||
{
|
||||
m_uCount = 0;
|
||||
}
|
||||
~DPRegionList()
|
||||
{
|
||||
Free();
|
||||
}
|
||||
|
||||
public:
|
||||
// Creation
|
||||
void Clear()
|
||||
{
|
||||
Free();
|
||||
}
|
||||
void Add(const DPRegion &r);
|
||||
|
||||
// Accessors
|
||||
unsigned GetCount() const
|
||||
{
|
||||
return m_uCount;
|
||||
}
|
||||
|
||||
const DPRegion &Get(unsigned uIndex) const
|
||||
{
|
||||
assert(uIndex < m_uCount);
|
||||
return m_DPRegions[uIndex];
|
||||
}
|
||||
|
||||
unsigned GetDPArea() const;
|
||||
|
||||
// Diagnostics
|
||||
void LogMe() const;
|
||||
|
||||
private:
|
||||
void Free()
|
||||
{
|
||||
m_uCount = 0;
|
||||
}
|
||||
|
||||
private:
|
||||
unsigned m_uCount;
|
||||
DPRegion m_DPRegions[MAX_DPREGIONS];
|
||||
};
|
||||
|
||||
void DiagListToDPRegionList(const DiagList &DL, DPRegionList &RL,
|
||||
unsigned uLengthA, unsigned uLengthB);
|
||||
|
||||
#endif // dpreglist_h
|
||||
41
src/muscle/muscle3.8.31/src/drawtree.cpp
Normal file
41
src/muscle/muscle3.8.31/src/drawtree.cpp
Normal file
@@ -0,0 +1,41 @@
|
||||
#include "muscle.h"
|
||||
#include "tree.h"
|
||||
|
||||
/***
|
||||
Simple tree drawing algorithm.
|
||||
|
||||
y coordinate of node is index in depth-first traversal.
|
||||
x coordinate is distance from root.
|
||||
***/
|
||||
|
||||
static unsigned DistFromRoot(const Tree &tree, unsigned uNodeIndex)
|
||||
{
|
||||
const unsigned uRoot = tree.GetRootNodeIndex();
|
||||
unsigned uDist = 0;
|
||||
while (uNodeIndex != uRoot)
|
||||
{
|
||||
++uDist;
|
||||
uNodeIndex = tree.GetParent(uNodeIndex);
|
||||
}
|
||||
return uDist;
|
||||
}
|
||||
|
||||
static void DrawNode(const Tree &tree, unsigned uNodeIndex)
|
||||
{
|
||||
if (!tree.IsLeaf(uNodeIndex))
|
||||
DrawNode(tree, tree.GetLeft(uNodeIndex));
|
||||
|
||||
unsigned uDist = DistFromRoot(tree, uNodeIndex);
|
||||
for (unsigned i = 0; i < 5*uDist; ++i)
|
||||
Log(" ");
|
||||
Log("%d\n", uNodeIndex);
|
||||
|
||||
if (!tree.IsLeaf(uNodeIndex))
|
||||
DrawNode(tree, tree.GetRight(uNodeIndex));
|
||||
}
|
||||
|
||||
void DrawTree(const Tree &tree)
|
||||
{
|
||||
unsigned uRoot = tree.GetRootNodeIndex();
|
||||
DrawNode(tree, uRoot);
|
||||
}
|
||||
88
src/muscle/muscle3.8.31/src/edgelist.cpp
Normal file
88
src/muscle/muscle3.8.31/src/edgelist.cpp
Normal file
@@ -0,0 +1,88 @@
|
||||
#include "muscle.h"
|
||||
#include "edgelist.h"
|
||||
|
||||
EdgeList::EdgeList()
|
||||
{
|
||||
m_uNode1 = 0;
|
||||
m_uNode2 = 0;
|
||||
m_uCount = 0;
|
||||
m_uCacheSize = 0;
|
||||
}
|
||||
|
||||
EdgeList::~EdgeList()
|
||||
{
|
||||
Clear();
|
||||
}
|
||||
|
||||
void EdgeList::Clear()
|
||||
{
|
||||
delete[] m_uNode1;
|
||||
delete[] m_uNode2;
|
||||
m_uNode1 = 0;
|
||||
m_uNode2 = 0;
|
||||
m_uCount = 0;
|
||||
m_uCacheSize = 0;
|
||||
}
|
||||
|
||||
void EdgeList::Add(unsigned uNode1, unsigned uNode2)
|
||||
{
|
||||
if (m_uCount <= m_uCacheSize)
|
||||
Expand();
|
||||
m_uNode1[m_uCount] = uNode1;
|
||||
m_uNode2[m_uCount] = uNode2;
|
||||
++m_uCount;
|
||||
}
|
||||
|
||||
unsigned EdgeList::GetCount() const
|
||||
{
|
||||
return m_uCount;
|
||||
}
|
||||
|
||||
void EdgeList::GetEdge(unsigned uIndex, unsigned *ptruNode1, unsigned *ptruNode2) const
|
||||
{
|
||||
if (uIndex > m_uCount)
|
||||
Quit("EdgeList::GetEdge(%u) count=%u", uIndex, m_uCount);
|
||||
*ptruNode1 = m_uNode1[uIndex];
|
||||
*ptruNode2 = m_uNode2[uIndex];
|
||||
}
|
||||
|
||||
void EdgeList::Copy(const EdgeList &rhs)
|
||||
{
|
||||
Clear();
|
||||
const unsigned uCount = rhs.GetCount();
|
||||
for (unsigned n = 0; n < uCount; ++n)
|
||||
{
|
||||
unsigned uNode1;
|
||||
unsigned uNode2;
|
||||
rhs.GetEdge(n, &uNode1, &uNode2);
|
||||
Add(uNode1, uNode2);
|
||||
}
|
||||
}
|
||||
|
||||
void EdgeList::Expand()
|
||||
{
|
||||
unsigned uNewCacheSize = m_uCacheSize + 512;
|
||||
unsigned *NewNode1 = new unsigned[uNewCacheSize];
|
||||
unsigned *NewNode2 = new unsigned[uNewCacheSize];
|
||||
if (m_uCount > 0)
|
||||
{
|
||||
memcpy(NewNode1, m_uNode1, m_uCount*sizeof(unsigned));
|
||||
memcpy(NewNode2, m_uNode2, m_uCount*sizeof(unsigned));
|
||||
}
|
||||
delete[] m_uNode1;
|
||||
delete[] m_uNode2;
|
||||
m_uNode1 = NewNode1;
|
||||
m_uNode2 = NewNode2;
|
||||
m_uCacheSize = uNewCacheSize;
|
||||
}
|
||||
|
||||
void EdgeList::LogMe() const
|
||||
{
|
||||
for (unsigned n = 0; n < m_uCount; ++n)
|
||||
{
|
||||
if (n > 0)
|
||||
Log(" ");
|
||||
Log("%u->%u", m_uNode1[n], m_uNode2[n]);
|
||||
}
|
||||
Log("\n");
|
||||
}
|
||||
28
src/muscle/muscle3.8.31/src/edgelist.h
Normal file
28
src/muscle/muscle3.8.31/src/edgelist.h
Normal file
@@ -0,0 +1,28 @@
|
||||
#ifndef EdgeList_h
|
||||
#define EdgeList_h
|
||||
|
||||
class EdgeList
|
||||
{
|
||||
public:
|
||||
EdgeList();
|
||||
virtual ~EdgeList();
|
||||
|
||||
public:
|
||||
void Clear();
|
||||
void Add(unsigned uNode1, unsigned uNode2);
|
||||
unsigned GetCount() const;
|
||||
void GetEdge(unsigned uIndex, unsigned *ptruNode1, unsigned *ptruNode2) const;
|
||||
void Copy(const EdgeList &rhs);
|
||||
void LogMe() const;
|
||||
|
||||
private:
|
||||
void Expand();
|
||||
|
||||
private:
|
||||
unsigned m_uCount;
|
||||
unsigned m_uCacheSize;
|
||||
unsigned *m_uNode1;
|
||||
unsigned *m_uNode2;
|
||||
};
|
||||
|
||||
#endif // EdgeList_h
|
||||
8
src/muscle/muscle3.8.31/src/enumopts.cpp
Normal file
8
src/muscle/muscle3.8.31/src/enumopts.cpp
Normal file
@@ -0,0 +1,8 @@
|
||||
#include "muscle.h"
|
||||
#include "enumopts.h"
|
||||
|
||||
#define s(t) EnumOpt t##_Opts[] = {
|
||||
#define c(t, x) #x, t##_##x,
|
||||
#define e(t) 0, 0 };
|
||||
|
||||
#include "enums.h"
|
||||
16
src/muscle/muscle3.8.31/src/enumopts.h
Normal file
16
src/muscle/muscle3.8.31/src/enumopts.h
Normal file
@@ -0,0 +1,16 @@
|
||||
#ifndef enumopts_h
|
||||
#define enumopts_h
|
||||
|
||||
struct EnumOpt
|
||||
{
|
||||
const char *pstrOpt;
|
||||
int iValue;
|
||||
};
|
||||
|
||||
#define s(t) extern EnumOpt t##_Opts[];
|
||||
#define c(t, x) /* empty */
|
||||
#define e(t) /* empty */
|
||||
#include "enums.h"
|
||||
|
||||
|
||||
#endif // enumopts_h
|
||||
98
src/muscle/muscle3.8.31/src/enums.h
Normal file
98
src/muscle/muscle3.8.31/src/enums.h
Normal file
@@ -0,0 +1,98 @@
|
||||
// enums.h
|
||||
// Define enum types.
|
||||
// Exploit macro hacks to avoid lots of repetetive typing.
|
||||
// Generally I am opposed to macro hacks because of the
|
||||
// highly obscure code that results, but in this case it
|
||||
// makes maintenance much easier and less error-prone.
|
||||
// The idea is that this file can be included in different
|
||||
// places with different definitions of s (Start), c (Case)
|
||||
// and e (End). See types.h.
|
||||
|
||||
s(ALPHA)
|
||||
c(ALPHA, Amino)
|
||||
c(ALPHA, DNA)
|
||||
c(ALPHA, RNA)
|
||||
e(ALPHA)
|
||||
|
||||
s(SEQTYPE)
|
||||
c(SEQTYPE, Protein)
|
||||
c(SEQTYPE, DNA)
|
||||
c(SEQTYPE, RNA)
|
||||
c(SEQTYPE, Auto)
|
||||
e(SEQTYPE)
|
||||
|
||||
s(ROOT)
|
||||
c(ROOT, Pseudo)
|
||||
c(ROOT, MidLongestSpan)
|
||||
c(ROOT, MinAvgLeafDist)
|
||||
e(ROOT)
|
||||
|
||||
s(CLUSTER)
|
||||
c(CLUSTER, UPGMA)
|
||||
c(CLUSTER, UPGMAMax)
|
||||
c(CLUSTER, UPGMAMin)
|
||||
c(CLUSTER, UPGMB)
|
||||
c(CLUSTER, NeighborJoining)
|
||||
e(CLUSTER)
|
||||
|
||||
s(JOIN)
|
||||
c(JOIN, NearestNeighbor)
|
||||
c(JOIN, NeighborJoining)
|
||||
e(JOIN)
|
||||
|
||||
s(LINKAGE)
|
||||
c(LINKAGE, Min)
|
||||
c(LINKAGE, Avg)
|
||||
c(LINKAGE, Max)
|
||||
c(LINKAGE, NeighborJoining)
|
||||
c(LINKAGE, Biased)
|
||||
e(LINKAGE)
|
||||
|
||||
s(DISTANCE)
|
||||
c(DISTANCE, Kmer6_6)
|
||||
c(DISTANCE, Kmer20_3)
|
||||
c(DISTANCE, Kmer20_4)
|
||||
c(DISTANCE, Kbit20_3)
|
||||
c(DISTANCE, Kmer4_6)
|
||||
c(DISTANCE, PctIdKimura)
|
||||
c(DISTANCE, PctIdLog)
|
||||
c(DISTANCE, PWKimura)
|
||||
c(DISTANCE, PWScoreDist)
|
||||
c(DISTANCE, ScoreDist)
|
||||
c(DISTANCE, Edit)
|
||||
e(DISTANCE)
|
||||
|
||||
s(PPSCORE)
|
||||
c(PPSCORE, LE)
|
||||
c(PPSCORE, SP)
|
||||
c(PPSCORE, SV)
|
||||
c(PPSCORE, SPN)
|
||||
e(PPSCORE)
|
||||
|
||||
s(SEQWEIGHT)
|
||||
c(SEQWEIGHT, None)
|
||||
c(SEQWEIGHT, Henikoff)
|
||||
c(SEQWEIGHT, HenikoffPB)
|
||||
c(SEQWEIGHT, GSC)
|
||||
c(SEQWEIGHT, ClustalW)
|
||||
c(SEQWEIGHT, ThreeWay)
|
||||
e(SEQWEIGHT)
|
||||
|
||||
s(OBJSCORE)
|
||||
c(OBJSCORE, SP) // Sum of Pairs of sequences
|
||||
c(OBJSCORE, DP) // Dynamic Programming score
|
||||
c(OBJSCORE, XP) // Cross Pairs = sum of pairs between two MSAs
|
||||
c(OBJSCORE, PS) // sum of Prof-Seq score for all seqs in MSA
|
||||
c(OBJSCORE, SPF) // sum of pairs, fast approximation
|
||||
c(OBJSCORE, SPM) // sp if <= 100 seqs, spf otherwise
|
||||
e(OBJSCORE)
|
||||
|
||||
s(TERMGAPS)
|
||||
c(TERMGAPS, Full)
|
||||
c(TERMGAPS, Half)
|
||||
c(TERMGAPS, Ext)
|
||||
e(TERMGAPS)
|
||||
|
||||
#undef s
|
||||
#undef c
|
||||
#undef e
|
||||
16
src/muscle/muscle3.8.31/src/enumtostr.cpp
Normal file
16
src/muscle/muscle3.8.31/src/enumtostr.cpp
Normal file
@@ -0,0 +1,16 @@
|
||||
#include "muscle.h"
|
||||
#include <stdio.h>
|
||||
|
||||
static char szMsg[64];
|
||||
|
||||
// Define XXXToStr(XXX x) functions for each enum type XXX.
|
||||
#define s(t) const char *t##ToStr(t x) { switch (x) { case t##_Undefined: return "Undefined";
|
||||
#define c(t, x) case t##_##x: return #x;
|
||||
#define e(t) } sprintf(szMsg, #t "_%d", x); return szMsg; }
|
||||
#include "enums.h"
|
||||
|
||||
// Define StrToXXX(const char *Str) functions for each enum type XXX.
|
||||
#define s(t) t StrTo##t(const char *Str) { if (0) ;
|
||||
#define c(t, x) else if (0 == stricmp(#x, Str)) return t##_##x;
|
||||
#define e(t) Quit("Invalid value %s for type %s", Str, #t); return t##_Undefined; }
|
||||
#include "enums.h"
|
||||
689
src/muscle/muscle3.8.31/src/estring.cpp
Normal file
689
src/muscle/muscle3.8.31/src/estring.cpp
Normal file
@@ -0,0 +1,689 @@
|
||||
#include "muscle.h"
|
||||
#include "pwpath.h"
|
||||
#include "estring.h"
|
||||
#include "seq.h"
|
||||
#include "msa.h"
|
||||
|
||||
/***
|
||||
An "estring" is an edit string that operates on a sequence.
|
||||
An estring is represented as a vector of integers.
|
||||
It is interpreted in order of increasing suffix.
|
||||
A positive value n means copy n letters.
|
||||
A negative value -n means insert n indels.
|
||||
Zero marks the end of the vector.
|
||||
Consecutive entries must have opposite sign, i.e. the
|
||||
shortest possible representation must be used.
|
||||
|
||||
A "tpair" is a traceback path for a pairwise alignment
|
||||
represented as two estrings, one for each sequence.
|
||||
***/
|
||||
|
||||
#define c2(c,d) (((unsigned char) c) << 8 | (unsigned char) d)
|
||||
|
||||
unsigned LengthEstring(const short es[])
|
||||
{
|
||||
unsigned i = 0;
|
||||
while (*es++ != 0)
|
||||
++i;
|
||||
return i;
|
||||
}
|
||||
|
||||
short *EstringNewCopy(const short es[])
|
||||
{
|
||||
unsigned n = LengthEstring(es) + 1;
|
||||
short *esNew = new short[n];
|
||||
memcpy(esNew, es, n*sizeof(short));
|
||||
return esNew;
|
||||
}
|
||||
|
||||
void LogEstring(const short es[])
|
||||
{
|
||||
Log("<");
|
||||
for (unsigned i = 0; es[i] != 0; ++i)
|
||||
{
|
||||
if (i > 0)
|
||||
Log(" ");
|
||||
Log("%d", es[i]);
|
||||
}
|
||||
Log(">");
|
||||
}
|
||||
|
||||
static bool EstringsEq(const short es1[], const short es2[])
|
||||
{
|
||||
for (;;)
|
||||
{
|
||||
if (*es1 != *es2)
|
||||
return false;
|
||||
if (0 == *es1)
|
||||
break;
|
||||
++es1;
|
||||
++es2;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
static void EstringCounts(const short es[], unsigned *ptruSymbols,
|
||||
unsigned *ptruIndels)
|
||||
{
|
||||
unsigned uSymbols = 0;
|
||||
unsigned uIndels = 0;
|
||||
for (unsigned i = 0; es[i] != 0; ++i)
|
||||
{
|
||||
short n = es[i];
|
||||
if (n > 0)
|
||||
uSymbols += n;
|
||||
else if (n < 0)
|
||||
uIndels += -n;
|
||||
}
|
||||
*ptruSymbols = uSymbols;
|
||||
*ptruIndels = uIndels;
|
||||
}
|
||||
|
||||
static char *EstringOp(const short es[], const char s[])
|
||||
{
|
||||
unsigned uSymbols;
|
||||
unsigned uIndels;
|
||||
EstringCounts(es, &uSymbols, &uIndels);
|
||||
assert((unsigned) strlen(s) == uSymbols);
|
||||
char *sout = new char[uSymbols + uIndels + 1];
|
||||
char *psout = sout;
|
||||
for (;;)
|
||||
{
|
||||
int n = *es++;
|
||||
if (0 == n)
|
||||
break;
|
||||
if (n > 0)
|
||||
for (int i = 0; i < n; ++i)
|
||||
*psout++ = *s++;
|
||||
else
|
||||
for (int i = 0; i < -n; ++i)
|
||||
*psout++ = '-';
|
||||
}
|
||||
assert(0 == *s);
|
||||
*psout = 0;
|
||||
return sout;
|
||||
}
|
||||
|
||||
void EstringOp(const short es[], const Seq &sIn, Seq &sOut)
|
||||
{
|
||||
#if DEBUG
|
||||
unsigned uSymbols;
|
||||
unsigned uIndels;
|
||||
EstringCounts(es, &uSymbols, &uIndels);
|
||||
assert(sIn.Length() == uSymbols);
|
||||
#endif
|
||||
sOut.Clear();
|
||||
sOut.SetName(sIn.GetName());
|
||||
int p = 0;
|
||||
for (;;)
|
||||
{
|
||||
int n = *es++;
|
||||
if (0 == n)
|
||||
break;
|
||||
if (n > 0)
|
||||
for (int i = 0; i < n; ++i)
|
||||
{
|
||||
char c = sIn[p++];
|
||||
sOut.push_back(c);
|
||||
}
|
||||
else
|
||||
for (int i = 0; i < -n; ++i)
|
||||
sOut.push_back('-');
|
||||
}
|
||||
}
|
||||
|
||||
unsigned EstringOp(const short es[], const Seq &sIn, MSA &a)
|
||||
{
|
||||
unsigned uSymbols;
|
||||
unsigned uIndels;
|
||||
EstringCounts(es, &uSymbols, &uIndels);
|
||||
assert(sIn.Length() == uSymbols);
|
||||
|
||||
unsigned uColCount = uSymbols + uIndels;
|
||||
|
||||
a.Clear();
|
||||
a.SetSize(1, uColCount);
|
||||
|
||||
a.SetSeqName(0, sIn.GetName());
|
||||
a.SetSeqId(0, sIn.GetId());
|
||||
|
||||
unsigned p = 0;
|
||||
unsigned uColIndex = 0;
|
||||
for (;;)
|
||||
{
|
||||
int n = *es++;
|
||||
if (0 == n)
|
||||
break;
|
||||
if (n > 0)
|
||||
for (int i = 0; i < n; ++i)
|
||||
{
|
||||
char c = sIn[p++];
|
||||
a.SetChar(0, uColIndex++, c);
|
||||
}
|
||||
else
|
||||
for (int i = 0; i < -n; ++i)
|
||||
a.SetChar(0, uColIndex++, '-');
|
||||
}
|
||||
assert(uColIndex == uColCount);
|
||||
return uColCount;
|
||||
}
|
||||
|
||||
void PathToEstrings(const PWPath &Path, short **ptresA, short **ptresB)
|
||||
{
|
||||
// First pass to determine size of estrings esA and esB
|
||||
const unsigned uEdgeCount = Path.GetEdgeCount();
|
||||
if (0 == uEdgeCount)
|
||||
{
|
||||
short *esA = new short[1];
|
||||
short *esB = new short[1];
|
||||
esA[0] = 0;
|
||||
esB[0] = 0;
|
||||
*ptresA = esA;
|
||||
*ptresB = esB;
|
||||
return;
|
||||
}
|
||||
|
||||
unsigned iLengthA = 1;
|
||||
unsigned iLengthB = 1;
|
||||
const char cFirstEdgeType = Path.GetEdge(0).cType;
|
||||
char cPrevEdgeType = cFirstEdgeType;
|
||||
for (unsigned uEdgeIndex = 1; uEdgeIndex < uEdgeCount; ++uEdgeIndex)
|
||||
{
|
||||
const PWEdge &Edge = Path.GetEdge(uEdgeIndex);
|
||||
char cEdgeType = Edge.cType;
|
||||
|
||||
switch (c2(cPrevEdgeType, cEdgeType))
|
||||
{
|
||||
case c2('M', 'M'):
|
||||
case c2('D', 'D'):
|
||||
case c2('I', 'I'):
|
||||
break;
|
||||
|
||||
case c2('D', 'M'):
|
||||
case c2('M', 'D'):
|
||||
++iLengthB;
|
||||
break;
|
||||
|
||||
case c2('I', 'M'):
|
||||
case c2('M', 'I'):
|
||||
++iLengthA;
|
||||
break;
|
||||
|
||||
case c2('I', 'D'):
|
||||
case c2('D', 'I'):
|
||||
++iLengthB;
|
||||
++iLengthA;
|
||||
break;
|
||||
|
||||
default:
|
||||
assert(false);
|
||||
}
|
||||
cPrevEdgeType = cEdgeType;
|
||||
}
|
||||
|
||||
// Pass2 for seq A
|
||||
{
|
||||
short *esA = new short[iLengthA+1];
|
||||
unsigned iA = 0;
|
||||
switch (Path.GetEdge(0).cType)
|
||||
{
|
||||
case 'M':
|
||||
case 'D':
|
||||
esA[0] = 1;
|
||||
break;
|
||||
|
||||
case 'I':
|
||||
esA[0] = -1;
|
||||
break;
|
||||
|
||||
default:
|
||||
assert(false);
|
||||
}
|
||||
|
||||
char cPrevEdgeType = cFirstEdgeType;
|
||||
for (unsigned uEdgeIndex = 1; uEdgeIndex < uEdgeCount; ++uEdgeIndex)
|
||||
{
|
||||
const PWEdge &Edge = Path.GetEdge(uEdgeIndex);
|
||||
char cEdgeType = Edge.cType;
|
||||
|
||||
switch (c2(cPrevEdgeType, cEdgeType))
|
||||
{
|
||||
case c2('M', 'M'):
|
||||
case c2('D', 'D'):
|
||||
case c2('D', 'M'):
|
||||
case c2('M', 'D'):
|
||||
++(esA[iA]);
|
||||
break;
|
||||
|
||||
case c2('I', 'D'):
|
||||
case c2('I', 'M'):
|
||||
++iA;
|
||||
esA[iA] = 1;
|
||||
break;
|
||||
|
||||
case c2('M', 'I'):
|
||||
case c2('D', 'I'):
|
||||
++iA;
|
||||
esA[iA] = -1;
|
||||
break;
|
||||
|
||||
case c2('I', 'I'):
|
||||
--(esA[iA]);
|
||||
break;
|
||||
|
||||
default:
|
||||
assert(false);
|
||||
}
|
||||
|
||||
cPrevEdgeType = cEdgeType;
|
||||
}
|
||||
assert(iA == iLengthA - 1);
|
||||
esA[iLengthA] = 0;
|
||||
*ptresA = esA;
|
||||
}
|
||||
|
||||
{
|
||||
// Pass2 for seq B
|
||||
short *esB = new short[iLengthB+1];
|
||||
unsigned iB = 0;
|
||||
switch (Path.GetEdge(0).cType)
|
||||
{
|
||||
case 'M':
|
||||
case 'I':
|
||||
esB[0] = 1;
|
||||
break;
|
||||
|
||||
case 'D':
|
||||
esB[0] = -1;
|
||||
break;
|
||||
|
||||
default:
|
||||
assert(false);
|
||||
}
|
||||
|
||||
char cPrevEdgeType = cFirstEdgeType;
|
||||
for (unsigned uEdgeIndex = 1; uEdgeIndex < uEdgeCount; ++uEdgeIndex)
|
||||
{
|
||||
const PWEdge &Edge = Path.GetEdge(uEdgeIndex);
|
||||
char cEdgeType = Edge.cType;
|
||||
|
||||
switch (c2(cPrevEdgeType, cEdgeType))
|
||||
{
|
||||
case c2('M', 'M'):
|
||||
case c2('I', 'I'):
|
||||
case c2('I', 'M'):
|
||||
case c2('M', 'I'):
|
||||
++(esB[iB]);
|
||||
break;
|
||||
|
||||
case c2('D', 'I'):
|
||||
case c2('D', 'M'):
|
||||
++iB;
|
||||
esB[iB] = 1;
|
||||
break;
|
||||
|
||||
case c2('M', 'D'):
|
||||
case c2('I', 'D'):
|
||||
++iB;
|
||||
esB[iB] = -1;
|
||||
break;
|
||||
|
||||
case c2('D', 'D'):
|
||||
--(esB[iB]);
|
||||
break;
|
||||
|
||||
default:
|
||||
assert(false);
|
||||
}
|
||||
|
||||
cPrevEdgeType = cEdgeType;
|
||||
}
|
||||
assert(iB == iLengthB - 1);
|
||||
esB[iLengthB] = 0;
|
||||
*ptresB = esB;
|
||||
}
|
||||
|
||||
#if DEBUG
|
||||
{
|
||||
const PWEdge &LastEdge = Path.GetEdge(uEdgeCount - 1);
|
||||
unsigned uSymbols;
|
||||
unsigned uIndels;
|
||||
EstringCounts(*ptresA, &uSymbols, &uIndels);
|
||||
assert(uSymbols == LastEdge.uPrefixLengthA);
|
||||
assert(uSymbols + uIndels == uEdgeCount);
|
||||
|
||||
EstringCounts(*ptresB, &uSymbols, &uIndels);
|
||||
assert(uSymbols == LastEdge.uPrefixLengthB);
|
||||
assert(uSymbols + uIndels == uEdgeCount);
|
||||
|
||||
PWPath TmpPath;
|
||||
EstringsToPath(*ptresA, *ptresB, TmpPath);
|
||||
TmpPath.AssertEqual(Path);
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
void EstringsToPath(const short esA[], const short esB[], PWPath &Path)
|
||||
{
|
||||
Path.Clear();
|
||||
unsigned iA = 0;
|
||||
unsigned iB = 0;
|
||||
int nA = esA[iA++];
|
||||
int nB = esB[iB++];
|
||||
unsigned uPrefixLengthA = 0;
|
||||
unsigned uPrefixLengthB = 0;
|
||||
for (;;)
|
||||
{
|
||||
char cType;
|
||||
if (nA > 0)
|
||||
{
|
||||
if (nB > 0)
|
||||
{
|
||||
cType = 'M';
|
||||
--nA;
|
||||
--nB;
|
||||
}
|
||||
else if (nB < 0)
|
||||
{
|
||||
cType = 'D';
|
||||
--nA;
|
||||
++nB;
|
||||
}
|
||||
else
|
||||
assert(false);
|
||||
}
|
||||
else if (nA < 0)
|
||||
{
|
||||
if (nB > 0)
|
||||
{
|
||||
cType = 'I';
|
||||
++nA;
|
||||
--nB;
|
||||
}
|
||||
else
|
||||
assert(false);
|
||||
}
|
||||
else
|
||||
assert(false);
|
||||
|
||||
switch (cType)
|
||||
{
|
||||
case 'M':
|
||||
++uPrefixLengthA;
|
||||
++uPrefixLengthB;
|
||||
break;
|
||||
case 'D':
|
||||
++uPrefixLengthA;
|
||||
break;
|
||||
case 'I':
|
||||
++uPrefixLengthB;
|
||||
break;
|
||||
}
|
||||
|
||||
PWEdge Edge;
|
||||
Edge.cType = cType;
|
||||
Edge.uPrefixLengthA = uPrefixLengthA;
|
||||
Edge.uPrefixLengthB = uPrefixLengthB;
|
||||
Path.AppendEdge(Edge);
|
||||
|
||||
if (nA == 0)
|
||||
{
|
||||
if (0 == esA[iA])
|
||||
{
|
||||
assert(0 == esB[iB]);
|
||||
break;
|
||||
}
|
||||
nA = esA[iA++];
|
||||
}
|
||||
if (nB == 0)
|
||||
nB = esB[iB++];
|
||||
}
|
||||
}
|
||||
|
||||
/***
|
||||
Multiply two estrings to make a third estring.
|
||||
The product of two estrings e1*e2 is defined to be
|
||||
the estring that produces the same result as applying
|
||||
e1 then e2. Multiplication is not commutative. In fact,
|
||||
the reversed order is undefined unless both estrings
|
||||
consist of a single, identical, positive entry.
|
||||
A primary motivation for using estrings is that
|
||||
multiplication is very fast, reducing the time
|
||||
needed to construct the root alignment.
|
||||
|
||||
Example
|
||||
|
||||
<-1,3>(XXX) = -XXX
|
||||
<2,-1,2>(-XXX) = -X-XX
|
||||
|
||||
Therefore,
|
||||
|
||||
<-1,3>*<2,-1,2> = <-1,1,-1,2>
|
||||
***/
|
||||
|
||||
static bool CanMultiplyEstrings(const short es1[], const short es2[])
|
||||
{
|
||||
unsigned uSymbols1;
|
||||
unsigned uSymbols2;
|
||||
unsigned uIndels1;
|
||||
unsigned uIndels2;
|
||||
EstringCounts(es1, &uSymbols1, &uIndels1);
|
||||
EstringCounts(es2, &uSymbols2, &uIndels2);
|
||||
return uSymbols1 + uIndels1 == uSymbols2;
|
||||
}
|
||||
|
||||
static inline void AppendGaps(short esp[], int &ip, int n)
|
||||
{
|
||||
if (-1 == ip)
|
||||
esp[++ip] = n;
|
||||
else if (esp[ip] < 0)
|
||||
esp[ip] += n;
|
||||
else
|
||||
esp[++ip] = n;
|
||||
}
|
||||
|
||||
static inline void AppendSymbols(short esp[], int &ip, int n)
|
||||
{
|
||||
if (-1 == ip)
|
||||
esp[++ip] = n;
|
||||
else if (esp[ip] > 0)
|
||||
esp[ip] += n;
|
||||
else
|
||||
esp[++ip] = n;
|
||||
}
|
||||
|
||||
void MulEstrings(const short es1[], const short es2[], short esp[])
|
||||
{
|
||||
assert(CanMultiplyEstrings(es1, es2));
|
||||
|
||||
unsigned i1 = 0;
|
||||
int ip = -1;
|
||||
int n1 = es1[i1++];
|
||||
for (unsigned i2 = 0; ; ++i2)
|
||||
{
|
||||
int n2 = es2[i2];
|
||||
if (0 == n2)
|
||||
break;
|
||||
if (n2 > 0)
|
||||
{
|
||||
for (;;)
|
||||
{
|
||||
if (n1 < 0)
|
||||
{
|
||||
if (n2 > -n1)
|
||||
{
|
||||
AppendGaps(esp, ip, n1);
|
||||
n2 += n1;
|
||||
n1 = es1[i1++];
|
||||
}
|
||||
else if (n2 == -n1)
|
||||
{
|
||||
AppendGaps(esp, ip, n1);
|
||||
n1 = es1[i1++];
|
||||
break;
|
||||
}
|
||||
else
|
||||
{
|
||||
assert(n2 < -n1);
|
||||
AppendGaps(esp, ip, -n2);
|
||||
n1 += n2;
|
||||
break;
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
assert(n1 > 0);
|
||||
if (n2 > n1)
|
||||
{
|
||||
AppendSymbols(esp, ip, n1);
|
||||
n2 -= n1;
|
||||
n1 = es1[i1++];
|
||||
}
|
||||
else if (n2 == n1)
|
||||
{
|
||||
AppendSymbols(esp, ip, n1);
|
||||
n1 = es1[i1++];
|
||||
break;
|
||||
}
|
||||
else
|
||||
{
|
||||
assert(n2 < n1);
|
||||
AppendSymbols(esp, ip, n2);
|
||||
n1 -= n2;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
assert(n2 < 0);
|
||||
AppendGaps(esp, ip, n2);
|
||||
}
|
||||
}
|
||||
esp[++ip] = 0;
|
||||
|
||||
#if DEBUG
|
||||
{
|
||||
int MaxLen = (int) (LengthEstring(es1) + LengthEstring(es2) + 1);
|
||||
assert(ip < MaxLen);
|
||||
if (ip >= 2)
|
||||
for (int i = 0; i < ip - 2; ++i)
|
||||
{
|
||||
if (!(esp[i] > 0 && esp[i+1] < 0 || esp[i] < 0 && esp[i+1] > 0))
|
||||
{
|
||||
Log("Bad result of MulEstring: ");
|
||||
LogEstring(esp);
|
||||
Quit("Assert failed (alternating signs)");
|
||||
}
|
||||
}
|
||||
unsigned uSymbols1;
|
||||
unsigned uSymbols2;
|
||||
unsigned uSymbolsp;
|
||||
unsigned uIndels1;
|
||||
unsigned uIndels2;
|
||||
unsigned uIndelsp;
|
||||
EstringCounts(es1, &uSymbols1, &uIndels1);
|
||||
EstringCounts(es2, &uSymbols2, &uIndels2);
|
||||
EstringCounts(esp, &uSymbolsp, &uIndelsp);
|
||||
if (uSymbols1 + uIndels1 != uSymbols2)
|
||||
{
|
||||
Log("Bad result of MulEstring: ");
|
||||
LogEstring(esp);
|
||||
Quit("Assert failed (counts1 %u %u %u)",
|
||||
uSymbols1, uIndels1, uSymbols2);
|
||||
}
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
static void test(const short es1[], const short es2[], const short esa[])
|
||||
{
|
||||
unsigned uSymbols1;
|
||||
unsigned uSymbols2;
|
||||
unsigned uIndels1;
|
||||
unsigned uIndels2;
|
||||
EstringCounts(es1, &uSymbols1, &uIndels1);
|
||||
EstringCounts(es2, &uSymbols2, &uIndels2);
|
||||
|
||||
char s[4096];
|
||||
memset(s, 'X', sizeof(s));
|
||||
s[uSymbols1] = 0;
|
||||
|
||||
char *s1 = EstringOp(es1, s);
|
||||
char *s12 = EstringOp(es2, s1);
|
||||
|
||||
memset(s, 'X', sizeof(s));
|
||||
s[uSymbols2] = 0;
|
||||
char *s2 = EstringOp(es2, s);
|
||||
|
||||
Log("%s * %s = %s\n", s1, s2, s12);
|
||||
|
||||
LogEstring(es1);
|
||||
Log(" * ");
|
||||
LogEstring(es2);
|
||||
Log(" = ");
|
||||
LogEstring(esa);
|
||||
Log("\n");
|
||||
|
||||
short esp[4096];
|
||||
MulEstrings(es1, es2, esp);
|
||||
LogEstring(esp);
|
||||
if (!EstringsEq(esp, esa))
|
||||
Log(" *ERROR* ");
|
||||
Log("\n");
|
||||
|
||||
memset(s, 'X', sizeof(s));
|
||||
s[uSymbols1] = 0;
|
||||
char *sp = EstringOp(esp, s);
|
||||
Log("%s\n", sp);
|
||||
Log("\n==========\n\n");
|
||||
}
|
||||
|
||||
void TestEstrings()
|
||||
{
|
||||
SetListFileName("c:\\tmp\\muscle.log", false);
|
||||
//{
|
||||
//short es1[] = { -1, 1, -1, 0 };
|
||||
//short es2[] = { 1, -1, 2, 0 };
|
||||
//short esa[] = { -2, 1, -1, 0 };
|
||||
//test(es1, es2, esa);
|
||||
//}
|
||||
//{
|
||||
//short es1[] = { 2, -1, 2, 0 };
|
||||
//short es2[] = { 1, -1, 3, -1, 1, 0 };
|
||||
//short esa[] = { 1, -1, 1, -1, 1, -1, 1, 0 };
|
||||
//test(es1, es2, esa);
|
||||
//}
|
||||
//{
|
||||
//short es1[] = { -1, 3, 0 };
|
||||
//short es2[] = { 2, -1, 2, 0 };
|
||||
//short esa[] = { -1, 1, -1, 2, 0 };
|
||||
//test(es1, es2, esa);
|
||||
//}
|
||||
//{
|
||||
//short es1[] = { -1, 1, -1, 1, 0};
|
||||
//short es2[] = { 4, 0 };
|
||||
//short esa[] = { -1, 1, -1, 1, 0};
|
||||
//test(es1, es2, esa);
|
||||
//}
|
||||
//{
|
||||
//short es1[] = { 1, -1, 1, -1, 0};
|
||||
//short es2[] = { 4, 0 };
|
||||
//short esa[] = { 1, -1, 1, -1, 0};
|
||||
//test(es1, es2, esa);
|
||||
//}
|
||||
//{
|
||||
//short es1[] = { 1, -1, 1, -1, 0};
|
||||
//short es2[] = { -1, 4, -1, 0 };
|
||||
//short esa[] = { -1, 1, -1, 1, -2, 0};
|
||||
//test(es1, es2, esa);
|
||||
//}
|
||||
{
|
||||
short es1[] = { 106, -77, 56, -2, 155, -3, 123, -2, 0};
|
||||
short es2[] = { 50, -36, 34, -3, 12, -6, 1, -6, 18, -17, 60, -5, 349, -56, 0 };
|
||||
short esa[] = { 0 };
|
||||
test(es1, es2, esa);
|
||||
}
|
||||
exit(0);
|
||||
}
|
||||
13
src/muscle/muscle3.8.31/src/estring.h
Normal file
13
src/muscle/muscle3.8.31/src/estring.h
Normal file
@@ -0,0 +1,13 @@
|
||||
#ifndef pathsum_h
|
||||
#define pathsum_h
|
||||
|
||||
void PathToEstrings(const PWPath &Path, short **ptresA, short **ptresB);
|
||||
void EstringsToPath(const short esA[], const short esB[], PWPath &Path);
|
||||
void MulEstrings(const short es1[], const short es2[], short esp[]);
|
||||
void EstringOp(const short es[], const Seq &sIn, Seq &sOut);
|
||||
unsigned EstringOp(const short es[], const Seq &sIn, MSA &a);
|
||||
void LogEstring(const short es[]);
|
||||
unsigned LengthEstring(const short es[]);
|
||||
short *EstringNewCopy(const short es[]);
|
||||
|
||||
#endif // pathsum_h
|
||||
56
src/muscle/muscle3.8.31/src/fasta.cpp
Normal file
56
src/muscle/muscle3.8.31/src/fasta.cpp
Normal file
@@ -0,0 +1,56 @@
|
||||
#include "muscle.h"
|
||||
#include <stdio.h>
|
||||
#include <ctype.h>
|
||||
#include "msa.h"
|
||||
#include "textfile.h"
|
||||
|
||||
const unsigned FASTA_BLOCK = 60;
|
||||
|
||||
void MSA::FromFASTAFile(TextFile &File)
|
||||
{
|
||||
Clear();
|
||||
|
||||
FILE *f = File.GetStdioFile();
|
||||
|
||||
unsigned uSeqCount = 0;
|
||||
unsigned uColCount = uInsane;
|
||||
for (;;)
|
||||
{
|
||||
char *Label;
|
||||
unsigned uSeqLength;
|
||||
char *SeqData = GetFastaSeq(f, &uSeqLength, &Label, false);
|
||||
if (0 == SeqData)
|
||||
break;
|
||||
AppendSeq(SeqData, uSeqLength, Label);
|
||||
}
|
||||
}
|
||||
|
||||
void MSA::ToFASTAFile(TextFile &File) const
|
||||
{
|
||||
const unsigned uColCount = GetColCount();
|
||||
assert(uColCount > 0);
|
||||
const unsigned uLinesPerSeq = (GetColCount() - 1)/FASTA_BLOCK + 1;
|
||||
const unsigned uSeqCount = GetSeqCount();
|
||||
|
||||
for (unsigned uSeqIndex = 0; uSeqIndex < uSeqCount; ++uSeqIndex)
|
||||
{
|
||||
File.PutString(">");
|
||||
File.PutString(GetSeqName(uSeqIndex));
|
||||
File.PutString("\n");
|
||||
|
||||
unsigned n = 0;
|
||||
for (unsigned uLine = 0; uLine < uLinesPerSeq; ++uLine)
|
||||
{
|
||||
unsigned uLetters = uColCount - uLine*FASTA_BLOCK;
|
||||
if (uLetters > FASTA_BLOCK)
|
||||
uLetters = FASTA_BLOCK;
|
||||
for (unsigned i = 0; i < uLetters; ++i)
|
||||
{
|
||||
char c = GetChar(uSeqIndex, n);
|
||||
File.PutChar(c);
|
||||
++n;
|
||||
}
|
||||
File.PutChar('\n');
|
||||
}
|
||||
}
|
||||
}
|
||||
114
src/muscle/muscle3.8.31/src/fasta2.cpp
Normal file
114
src/muscle/muscle3.8.31/src/fasta2.cpp
Normal file
@@ -0,0 +1,114 @@
|
||||
#include "muscle.h"
|
||||
#include <stdio.h>
|
||||
#include <errno.h>
|
||||
|
||||
//const int BUFFER_BYTES = 16*1024;
|
||||
const int BUFFER_BYTES = 128;
|
||||
const int CR = '\r';
|
||||
const int NL = '\n';
|
||||
|
||||
#define ADD(c) \
|
||||
{ \
|
||||
if (Pos >= BufferLength) \
|
||||
{ \
|
||||
const int NewBufferLength = BufferLength + BUFFER_BYTES; \
|
||||
char *NewBuffer = new char[NewBufferLength]; \
|
||||
memcpy(NewBuffer, Buffer, BufferLength); \
|
||||
delete[] Buffer; \
|
||||
Buffer = NewBuffer; \
|
||||
BufferLength = NewBufferLength; \
|
||||
} \
|
||||
Buffer[Pos++] = c; \
|
||||
}
|
||||
|
||||
// Get next sequence from file.
|
||||
char *GetFastaSeq(FILE *f, unsigned *ptrSeqLength, char **ptrLabel, bool DeleteGaps)
|
||||
{
|
||||
unsigned BufferLength = 0;
|
||||
unsigned Pos = 0;
|
||||
char *Buffer = 0;
|
||||
|
||||
int c = fgetc(f);
|
||||
if (EOF == c)
|
||||
return 0;
|
||||
if ('>' != c)
|
||||
Quit("Invalid file format, expected '>' to start FASTA label");
|
||||
|
||||
for (;;)
|
||||
{
|
||||
int c = fgetc(f);
|
||||
if (EOF == c)
|
||||
Quit("End-of-file or input error in FASTA label");
|
||||
|
||||
// NL or CR terminates label
|
||||
if (NL == c || CR == c)
|
||||
break;
|
||||
|
||||
// All other characters added to label
|
||||
ADD(c)
|
||||
}
|
||||
|
||||
// Nul-terminate label
|
||||
ADD(0)
|
||||
*ptrLabel = Buffer;
|
||||
|
||||
BufferLength = 0;
|
||||
Pos = 0;
|
||||
Buffer = 0;
|
||||
int PreviousChar = NL;
|
||||
for (;;)
|
||||
{
|
||||
int c = fgetc(f);
|
||||
if (EOF == c)
|
||||
{
|
||||
if (feof(f))
|
||||
break;
|
||||
else if (ferror(f))
|
||||
Quit("Error reading FASTA file, ferror=TRUE feof=FALSE errno=%d %s",
|
||||
errno, strerror(errno));
|
||||
else
|
||||
Quit("Error reading FASTA file, fgetc=EOF feof=FALSE ferror=FALSE errno=%d %s",
|
||||
errno, strerror(errno));
|
||||
}
|
||||
|
||||
if ('>' == c)
|
||||
{
|
||||
if (NL == PreviousChar || CR == PreviousChar)
|
||||
{
|
||||
ungetc(c, f);
|
||||
break;
|
||||
}
|
||||
else
|
||||
Quit("Unexpected '>' in FASTA sequence data");
|
||||
}
|
||||
else if (isspace(c))
|
||||
;
|
||||
else if (IsGapChar(c))
|
||||
{
|
||||
if (!DeleteGaps)
|
||||
ADD(c)
|
||||
}
|
||||
else if (isalpha(c))
|
||||
{
|
||||
c = toupper(c);
|
||||
ADD(c)
|
||||
}
|
||||
else if (isprint(c))
|
||||
{
|
||||
Warning("Invalid character '%c' in FASTA sequence data, ignored", c);
|
||||
continue;
|
||||
}
|
||||
else
|
||||
{
|
||||
Warning("Invalid byte hex %02x in FASTA sequence data, ignored", (unsigned char) c);
|
||||
continue;
|
||||
}
|
||||
PreviousChar = c;
|
||||
}
|
||||
|
||||
if (0 == Pos)
|
||||
return GetFastaSeq(f, ptrSeqLength, ptrLabel, DeleteGaps);
|
||||
|
||||
*ptrSeqLength = Pos;
|
||||
return Buffer;
|
||||
}
|
||||
77
src/muscle/muscle3.8.31/src/fastclust.cpp
Normal file
77
src/muscle/muscle3.8.31/src/fastclust.cpp
Normal file
@@ -0,0 +1,77 @@
|
||||
#include "muscle.h"
|
||||
#include "seqvect.h"
|
||||
#include "distfunc.h"
|
||||
#include "clust.h"
|
||||
#include "clustsetdf.h"
|
||||
#include "tree.h"
|
||||
#include "clust.h"
|
||||
#include "distcalc.h"
|
||||
#include <math.h>
|
||||
|
||||
static void TreeFromSeqVect_NJ(const DistFunc &DF, CLUSTER Cluster, Tree &tree)
|
||||
{
|
||||
ClustSetDF CSD(DF);
|
||||
|
||||
Clust C;
|
||||
C.Create(CSD, Cluster);
|
||||
|
||||
tree.FromClust(C);
|
||||
}
|
||||
|
||||
static void TreeFromSeqVect_UPGMA(const DistFunc &DF, CLUSTER Cluster, Tree &tree)
|
||||
{
|
||||
LINKAGE Linkage = LINKAGE_Undefined;
|
||||
switch (Cluster)
|
||||
{
|
||||
case CLUSTER_UPGMA:
|
||||
Linkage = LINKAGE_Avg;
|
||||
break;
|
||||
case CLUSTER_UPGMAMin:
|
||||
Linkage = LINKAGE_Min;
|
||||
break;
|
||||
case CLUSTER_UPGMAMax:
|
||||
Linkage = LINKAGE_Max;
|
||||
break;
|
||||
case CLUSTER_UPGMB:
|
||||
Linkage = LINKAGE_Biased;
|
||||
break;
|
||||
default:
|
||||
Quit("TreeFromSeqVect_UPGMA, CLUSTER_%u not supported", Cluster);
|
||||
}
|
||||
|
||||
DistCalcDF DC;
|
||||
DC.Init(DF);
|
||||
UPGMA2(DC, tree, Linkage);
|
||||
}
|
||||
|
||||
static void SaveDF(const SeqVect &v, DistFunc &d, const char *FileName)
|
||||
{
|
||||
FILE *f = fopen(FileName, "w");
|
||||
if (f == 0)
|
||||
Quit("Cannot create %s", FileName);
|
||||
|
||||
unsigned n = v.GetSeqCount();
|
||||
fprintf(f, "%u\n", n);
|
||||
for (unsigned i = 0; i < n; ++i)
|
||||
{
|
||||
fprintf(f, "%10.10s ", v.GetSeqName(i));
|
||||
for (unsigned j = 0; j < i; ++j)
|
||||
fprintf(f, " %9g", d.GetDist(i, j));
|
||||
fprintf(f, "\n");
|
||||
}
|
||||
fclose(f);
|
||||
}
|
||||
|
||||
void TreeFromSeqVect(const SeqVect &v, Tree &tree, CLUSTER Cluster,
|
||||
DISTANCE Distance, ROOT Root, const char *SaveFileName)
|
||||
{
|
||||
DistFunc DF;
|
||||
DistUnaligned(v, Distance, DF);
|
||||
if (SaveFileName != 0)
|
||||
SaveDF(v, DF, SaveFileName);
|
||||
if (CLUSTER_NeighborJoining == Cluster)
|
||||
TreeFromSeqVect_NJ(DF, Cluster, tree);
|
||||
else
|
||||
TreeFromSeqVect_UPGMA(DF, Cluster, tree);
|
||||
FixRoot(tree, Root);
|
||||
}
|
||||
56
src/muscle/muscle3.8.31/src/fastdist.cpp
Normal file
56
src/muscle/muscle3.8.31/src/fastdist.cpp
Normal file
@@ -0,0 +1,56 @@
|
||||
#include "muscle.h"
|
||||
#include "distfunc.h"
|
||||
#include "seqvect.h"
|
||||
|
||||
void DistPWScoreDist(const SeqVect &v, DistFunc &DF);
|
||||
|
||||
void DistUnaligned(const SeqVect &v, DISTANCE DistMethod, DistFunc &DF)
|
||||
{
|
||||
const unsigned uSeqCount = v.Length();
|
||||
|
||||
switch (DistMethod)
|
||||
{
|
||||
case DISTANCE_Kmer6_6:
|
||||
DistKmer6_6(v, DF);
|
||||
break;
|
||||
|
||||
case DISTANCE_Kmer20_3:
|
||||
DistKmer20_3(v, DF);
|
||||
break;
|
||||
|
||||
case DISTANCE_Kmer20_4:
|
||||
FastDistKmer(v, DF);
|
||||
break;
|
||||
|
||||
case DISTANCE_Kbit20_3:
|
||||
DistKbit20_3(v, DF);
|
||||
break;
|
||||
|
||||
case DISTANCE_Kmer4_6:
|
||||
DistKmer4_6(v, DF);
|
||||
break;
|
||||
|
||||
case DISTANCE_PWKimura:
|
||||
DistPWKimura(v, DF);
|
||||
break;
|
||||
|
||||
case DISTANCE_PWScoreDist:
|
||||
DistPWScoreDist(v, DF);
|
||||
break;
|
||||
|
||||
default:
|
||||
Quit("DistUnaligned, unsupported distance method %d", DistMethod);
|
||||
}
|
||||
|
||||
// const char **SeqNames = (const char **) malloc(uSeqCount*sizeof(char *));
|
||||
for (unsigned uSeqIndex = 0; uSeqIndex < uSeqCount; ++uSeqIndex)
|
||||
{
|
||||
const Seq &s = *(v[uSeqIndex]);
|
||||
|
||||
const char *ptrName = s.GetName();
|
||||
unsigned uId = s.GetId();
|
||||
|
||||
DF.SetName(uSeqIndex, ptrName);
|
||||
DF.SetId(uSeqIndex, uId);
|
||||
}
|
||||
}
|
||||
206
src/muscle/muscle3.8.31/src/fastdistjones.cpp
Normal file
206
src/muscle/muscle3.8.31/src/fastdistjones.cpp
Normal file
@@ -0,0 +1,206 @@
|
||||
#include "muscle.h"
|
||||
#include "distfunc.h"
|
||||
#include "seqvect.h"
|
||||
#include <math.h>
|
||||
|
||||
const unsigned TRIPLE_COUNT = 20*20*20;
|
||||
|
||||
struct TripleCount
|
||||
{
|
||||
unsigned m_uSeqCount; // How many sequences have this triple?
|
||||
unsigned short *m_Counts; // m_Counts[s] = nr of times triple found in seq s
|
||||
};
|
||||
static TripleCount *TripleCounts;
|
||||
|
||||
// WARNING: Sequences MUST be stripped of gaps and upper case!
|
||||
void DistKmer20_3(const SeqVect &v, DistFunc &DF)
|
||||
{
|
||||
const unsigned uSeqCount = v.Length();
|
||||
|
||||
DF.SetCount(uSeqCount);
|
||||
if (0 == uSeqCount)
|
||||
return;
|
||||
for (unsigned uSeq1 = 0; uSeq1 < uSeqCount; ++uSeq1)
|
||||
{
|
||||
DF.SetDist(uSeq1, uSeq1, 0);
|
||||
for (unsigned uSeq2 = 0; uSeq2 < uSeq1; ++uSeq2)
|
||||
DF.SetDist(uSeq1, uSeq2, 0);
|
||||
}
|
||||
|
||||
const unsigned uTripleArrayBytes = TRIPLE_COUNT*sizeof(TripleCount);
|
||||
TripleCounts = (TripleCount *) malloc(uTripleArrayBytes);
|
||||
if (0 == TripleCounts)
|
||||
Quit("Not enough memory (TripleCounts)");
|
||||
memset(TripleCounts, 0, uTripleArrayBytes);
|
||||
|
||||
for (unsigned uWord = 0; uWord < TRIPLE_COUNT; ++uWord)
|
||||
{
|
||||
TripleCount &tc = *(TripleCounts + uWord);
|
||||
const unsigned uBytes = uSeqCount*sizeof(short);
|
||||
tc.m_Counts = (unsigned short *) malloc(uBytes);
|
||||
memset(tc.m_Counts, 0, uBytes);
|
||||
}
|
||||
|
||||
for (unsigned uSeqIndex = 0; uSeqIndex < uSeqCount; ++uSeqIndex)
|
||||
{
|
||||
Seq &s = *(v[uSeqIndex]);
|
||||
const unsigned uSeqLength = s.Length();
|
||||
for (unsigned uPos = 0; uPos < uSeqLength - 2; ++uPos)
|
||||
{
|
||||
const unsigned uLetter1 = CharToLetterEx(s[uPos]);
|
||||
if (uLetter1 >= 20)
|
||||
continue;
|
||||
const unsigned uLetter2 = CharToLetterEx(s[uPos+1]);
|
||||
if (uLetter2 >= 20)
|
||||
continue;
|
||||
const unsigned uLetter3 = CharToLetterEx(s[uPos+2]);
|
||||
if (uLetter3 >= 20)
|
||||
continue;
|
||||
|
||||
const unsigned uWord = uLetter1 + uLetter2*20 + uLetter3*20*20;
|
||||
assert(uWord < TRIPLE_COUNT);
|
||||
|
||||
TripleCount &tc = *(TripleCounts + uWord);
|
||||
const unsigned uOldCount = tc.m_Counts[uSeqIndex];
|
||||
if (0 == uOldCount)
|
||||
++(tc.m_uSeqCount);
|
||||
|
||||
++(tc.m_Counts[uSeqIndex]);
|
||||
}
|
||||
}
|
||||
|
||||
#if TRACE
|
||||
{
|
||||
Log("TripleCounts\n");
|
||||
unsigned uGrandTotal = 0;
|
||||
for (unsigned uWord = 0; uWord < TRIPLE_COUNT; ++uWord)
|
||||
{
|
||||
const TripleCount &tc = *(TripleCounts + uWord);
|
||||
if (0 == tc.m_uSeqCount)
|
||||
continue;
|
||||
|
||||
const unsigned uLetter3 = uWord/(20*20);
|
||||
const unsigned uLetter2 = (uWord - uLetter3*20*20)/20;
|
||||
const unsigned uLetter1 = uWord%20;
|
||||
Log("Word %6u %c%c%c %6u",
|
||||
uWord,
|
||||
LetterToCharAmino(uLetter1),
|
||||
LetterToCharAmino(uLetter2),
|
||||
LetterToCharAmino(uLetter3),
|
||||
tc.m_uSeqCount);
|
||||
|
||||
unsigned uSeqCountWithThisWord = 0;
|
||||
for (unsigned uSeqIndex = 0; uSeqIndex < uSeqCount; ++uSeqIndex)
|
||||
{
|
||||
const unsigned uCount = tc.m_Counts[uSeqIndex];
|
||||
if (uCount > 0)
|
||||
{
|
||||
++uSeqCountWithThisWord;
|
||||
Log(" %u=%u", uSeqIndex, uCount);
|
||||
uGrandTotal += uCount;
|
||||
}
|
||||
}
|
||||
if (uSeqCountWithThisWord != tc.m_uSeqCount)
|
||||
Log(" *** SQ ERROR *** %u %u", tc.m_uSeqCount, uSeqCountWithThisWord);
|
||||
Log("\n");
|
||||
}
|
||||
|
||||
unsigned uTotalBySeqLength = 0;
|
||||
for (unsigned uSeqIndex = 0; uSeqIndex < uSeqCount; ++uSeqIndex)
|
||||
{
|
||||
Seq &s = *(v[uSeqIndex]);
|
||||
const unsigned uSeqLength = s.Length();
|
||||
uTotalBySeqLength += uSeqLength - 2;
|
||||
}
|
||||
if (uGrandTotal != uTotalBySeqLength)
|
||||
Log("*** TOTALS DISAGREE *** %u %u\n", uGrandTotal, uTotalBySeqLength);
|
||||
}
|
||||
#endif
|
||||
|
||||
const unsigned uSeqListBytes = uSeqCount*sizeof(unsigned);
|
||||
unsigned short *SeqList = (unsigned short *) malloc(uSeqListBytes);
|
||||
|
||||
for (unsigned uWord = 0; uWord < TRIPLE_COUNT; ++uWord)
|
||||
{
|
||||
const TripleCount &tc = *(TripleCounts + uWord);
|
||||
if (0 == tc.m_uSeqCount)
|
||||
continue;
|
||||
|
||||
unsigned uSeqCountFound = 0;
|
||||
memset(SeqList, 0, uSeqListBytes);
|
||||
|
||||
for (unsigned uSeqIndex = 0; uSeqIndex < uSeqCount; ++uSeqIndex)
|
||||
{
|
||||
if (tc.m_Counts[uSeqIndex] > 0)
|
||||
{
|
||||
SeqList[uSeqCountFound] = uSeqIndex;
|
||||
++uSeqCountFound;
|
||||
if (uSeqCountFound == tc.m_uSeqCount)
|
||||
break;
|
||||
}
|
||||
}
|
||||
assert(uSeqCountFound == tc.m_uSeqCount);
|
||||
|
||||
for (unsigned uSeq1 = 0; uSeq1 < uSeqCountFound; ++uSeq1)
|
||||
{
|
||||
const unsigned uSeqIndex1 = SeqList[uSeq1];
|
||||
const unsigned uCount1 = tc.m_Counts[uSeqIndex1];
|
||||
for (unsigned uSeq2 = 0; uSeq2 < uSeq1; ++uSeq2)
|
||||
{
|
||||
const unsigned uSeqIndex2 = SeqList[uSeq2];
|
||||
const unsigned uCount2 = tc.m_Counts[uSeqIndex2];
|
||||
const unsigned uMinCount = uCount1 < uCount2 ? uCount1 : uCount2;
|
||||
const double d = DF.GetDist(uSeqIndex1, uSeqIndex2);
|
||||
DF.SetDist(uSeqIndex1, uSeqIndex2, (float) (d + uMinCount));
|
||||
}
|
||||
}
|
||||
}
|
||||
delete[] SeqList;
|
||||
free(TripleCounts);
|
||||
|
||||
unsigned uDone = 0;
|
||||
const unsigned uTotal = (uSeqCount*(uSeqCount - 1))/2;
|
||||
for (unsigned uSeq1 = 0; uSeq1 < uSeqCount; ++uSeq1)
|
||||
{
|
||||
DF.SetDist(uSeq1, uSeq1, 0.0);
|
||||
|
||||
const Seq &s1 = *(v[uSeq1]);
|
||||
const unsigned uLength1 = s1.Length();
|
||||
|
||||
for (unsigned uSeq2 = 0; uSeq2 < uSeq1; ++uSeq2)
|
||||
{
|
||||
const Seq &s2 = *(v[uSeq2]);
|
||||
const unsigned uLength2 = s2.Length();
|
||||
unsigned uMinLength = uLength1 < uLength2 ? uLength1 : uLength2;
|
||||
if (uMinLength < 3)
|
||||
{
|
||||
DF.SetDist(uSeq1, uSeq2, 1.0);
|
||||
continue;
|
||||
}
|
||||
|
||||
const double dTripleCount = DF.GetDist(uSeq1, uSeq2);
|
||||
if (dTripleCount == 0)
|
||||
{
|
||||
DF.SetDist(uSeq1, uSeq2, 1.0);
|
||||
continue;
|
||||
}
|
||||
double dNormalizedTripletScore = dTripleCount/(uMinLength - 2);
|
||||
//double dEstimatedPairwiseIdentity = exp(0.3912*log(dNormalizedTripletScore));
|
||||
//if (dEstimatedPairwiseIdentity > 1)
|
||||
// dEstimatedPairwiseIdentity = 1;
|
||||
// DF.SetDist(uSeq1, uSeq2, (float) (1.0 - dEstimatedPairwiseIdentity));
|
||||
DF.SetDist(uSeq1, uSeq2, (float) dNormalizedTripletScore);
|
||||
|
||||
#if TRACE
|
||||
{
|
||||
Log("%s - %s Triplet count = %g Lengths %u, %u Estimated pwid = %g\n",
|
||||
s1.GetName(), s2.GetName(), dTripleCount, uLength1, uLength2,
|
||||
dEstimatedPairwiseIdentity);
|
||||
}
|
||||
#endif
|
||||
if (uDone%1000 == 0)
|
||||
Progress(uDone, uTotal);
|
||||
}
|
||||
}
|
||||
ProgressStepsDone();
|
||||
}
|
||||
109
src/muscle/muscle3.8.31/src/fastdistkbit.cpp
Normal file
109
src/muscle/muscle3.8.31/src/fastdistkbit.cpp
Normal file
@@ -0,0 +1,109 @@
|
||||
#include "muscle.h"
|
||||
#include "distfunc.h"
|
||||
#include "seqvect.h"
|
||||
#include <math.h>
|
||||
|
||||
#define MIN(x, y) ((x) < (y) ? (x) : (y))
|
||||
|
||||
static void SetKmerBitVector(const Seq &s, byte Bits[])
|
||||
{
|
||||
const unsigned uLength = s.Length();
|
||||
const unsigned k = 3; // kmer length
|
||||
unsigned i = 0;
|
||||
unsigned c = 0;
|
||||
unsigned h = 0;
|
||||
for (unsigned j = 0; j < k - 1; ++j)
|
||||
{
|
||||
unsigned x = CharToLetterEx(s[i++]);
|
||||
if (x <= AX_Y)
|
||||
c = c*20 + x;
|
||||
else
|
||||
{
|
||||
c = 0;
|
||||
h = j + 1;
|
||||
}
|
||||
}
|
||||
for ( ; i < uLength; ++i)
|
||||
{
|
||||
unsigned x = CharToLetterEx(s[i++]);
|
||||
if (x <= AX_Y)
|
||||
c = (c*20 + x)%8000;
|
||||
else
|
||||
{
|
||||
c = 0;
|
||||
h = i + k;
|
||||
}
|
||||
if (i >= h)
|
||||
{
|
||||
unsigned ByteOffset = c/8;
|
||||
unsigned BitOffset = c%8;
|
||||
Bits[ByteOffset] |= (1 << BitOffset);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static unsigned CommonBitCount(const byte Bits1[], const byte Bits2[])
|
||||
{
|
||||
const byte * const p1end = Bits1 + 1000;
|
||||
const byte *p2 = Bits2;
|
||||
|
||||
unsigned uCount = 0;
|
||||
for (const byte *p1 = Bits1; p1 != p1end; ++p1)
|
||||
{
|
||||
// Here is a cute trick for efficiently counting the
|
||||
// bits common between two bytes by combining them into
|
||||
// a single word.
|
||||
unsigned b = *p1 | (*p2 << 8);
|
||||
while (b != 0)
|
||||
{
|
||||
if (b & 0x101)
|
||||
++uCount;
|
||||
b >>= 1;
|
||||
}
|
||||
++p2;
|
||||
}
|
||||
return uCount;
|
||||
}
|
||||
|
||||
void DistKbit20_3(const SeqVect &v, DistFunc &DF)
|
||||
{
|
||||
const unsigned uSeqCount = v.Length();
|
||||
DF.SetCount(uSeqCount);
|
||||
|
||||
// There are 20^3 = 8,000 distinct kmers in the 20-letter alphabet.
|
||||
// For each sequence, we create a bit vector of length 8,000, i.e.
|
||||
// 1,000 bytes, having one bit per kmer. The bit is set to 1 if the
|
||||
// kmer is present in the sequence.
|
||||
const unsigned uBytes = uSeqCount*1000;
|
||||
byte *BitVector = new byte[uBytes];
|
||||
memset(BitVector, 0, uBytes);
|
||||
|
||||
SetProgressDesc("K-bit distance matrix");
|
||||
for (unsigned uSeqIndex = 0; uSeqIndex < uSeqCount; ++uSeqIndex)
|
||||
SetKmerBitVector(*v[uSeqIndex], BitVector + uSeqIndex*1000);
|
||||
|
||||
unsigned uDone = 0;
|
||||
const unsigned uTotal = (uSeqCount*(uSeqCount - 1))/2;
|
||||
for (unsigned uSeqIndex1 = 0; uSeqIndex1 < uSeqCount; ++uSeqIndex1)
|
||||
{
|
||||
const byte *Bits1 = BitVector + uSeqIndex1*1000;
|
||||
const unsigned uLength1 = v[uSeqIndex1]->Length();
|
||||
for (unsigned uSeqIndex2 = 0; uSeqIndex2 < uSeqIndex1; ++uSeqIndex2)
|
||||
{
|
||||
const byte *Bits2 = BitVector + uSeqIndex2*1000;
|
||||
const unsigned uLength2 = v[uSeqIndex2]->Length();
|
||||
const float fCount = (float) CommonBitCount(Bits1, Bits2);
|
||||
|
||||
// Distance measure = K / min(L1, L2)
|
||||
// K is number of distinct kmers that are found in both sequences
|
||||
const float fDist = fCount / MIN(uLength1, uLength2);
|
||||
DF.SetDist(uSeqIndex1, uSeqIndex2, fDist);
|
||||
if (uDone%10000 == 0)
|
||||
Progress(uDone, uTotal);
|
||||
++uDone;
|
||||
}
|
||||
}
|
||||
ProgressStepsDone();
|
||||
|
||||
delete[] BitVector;
|
||||
}
|
||||
247
src/muscle/muscle3.8.31/src/fastdistkmer.cpp
Normal file
247
src/muscle/muscle3.8.31/src/fastdistkmer.cpp
Normal file
@@ -0,0 +1,247 @@
|
||||
#include "muscle.h"
|
||||
#include "msa.h"
|
||||
#include "seqvect.h"
|
||||
#include "seq.h"
|
||||
#include "distfunc.h"
|
||||
#include <math.h>
|
||||
|
||||
#define TRACE 0
|
||||
|
||||
/***
|
||||
Some candidate alphabets considered because they
|
||||
have high correlations and small table sizes.
|
||||
Correlation coefficent is between k-mer distance
|
||||
and %id D measured from a CLUSTALW alignment.
|
||||
Table size is N^k where N is size of alphabet.
|
||||
A is standard (uncompressed) amino alphabet.
|
||||
|
||||
Correlation
|
||||
Alpha N k Table Size all 25-50%
|
||||
----- -- - ---------- ---- ------
|
||||
A 20 3 8,000 0.943 0.575
|
||||
A 20 4 160,000 0.962 0.685 <<
|
||||
LiA 14 4 38,416 0.966 0.645
|
||||
SEB 14 4 38,416 0.964 0.634
|
||||
LiA 13 4 28,561 0.965 0.640
|
||||
LiA 12 4 20,736 0.963 0.620
|
||||
LiA 10 5 100,000 0.964 0.652
|
||||
|
||||
We select A with k=4 because it has the best
|
||||
correlations. The only drawback is a large table
|
||||
size, but space is readily available and the only
|
||||
additional time cost is in resetting the table to
|
||||
zero, which can be done quickly with memset or by
|
||||
keeping a list of the k-mers that were found (should
|
||||
test to see which is faster, and may vary by compiler
|
||||
and processor type). It also has the minor advantage
|
||||
that we don't need to convert the alphabet.
|
||||
|
||||
Fractional identity d is estimated as follows.
|
||||
|
||||
F = fractional k-mer count
|
||||
if F is 0: F = 0.01
|
||||
Y = log(0.02 + F)
|
||||
d = -4.1 + 4.12*Y
|
||||
|
||||
The constant 0.02 was chosen to make the relationship
|
||||
between Y and D linear. The constants -4.1 and 4.12
|
||||
were chosen to fit a straight line to the scatterplot
|
||||
of Y vs D.
|
||||
***/
|
||||
|
||||
#define MIN(x, y) (((x) < (y)) ? (x) : (y))
|
||||
|
||||
const unsigned K = 4;
|
||||
const unsigned N = 20;
|
||||
const unsigned N_2 = 20*20;
|
||||
const unsigned N_3 = 20*20*20;
|
||||
const unsigned N_4 = 20*20*20*20;
|
||||
|
||||
const unsigned TABLE_SIZE = N_4;
|
||||
|
||||
// For debug output
|
||||
const char *KmerToStr(unsigned Kmer)
|
||||
{
|
||||
static char s[5];
|
||||
|
||||
unsigned c3 = (Kmer/N_3)%N;
|
||||
unsigned c2 = (Kmer/N_2)%N;
|
||||
unsigned c1 = (Kmer/N)%N;
|
||||
unsigned c0 = Kmer%N;
|
||||
|
||||
s[0] = LetterToChar(c3);
|
||||
s[1] = LetterToChar(c2);
|
||||
s[2] = LetterToChar(c1);
|
||||
s[3] = LetterToChar(c0);
|
||||
return s;
|
||||
}
|
||||
|
||||
void CountKmers(const byte s[], unsigned uSeqLength, byte KmerCounts[])
|
||||
{
|
||||
#if TRACE
|
||||
Log("CountKmers\n");
|
||||
#endif
|
||||
memset(KmerCounts, 0, TABLE_SIZE*sizeof(byte));
|
||||
|
||||
const byte *ptrKmerStart = s;
|
||||
const byte *ptrKmerEnd = s + 4;
|
||||
const byte *ptrSeqEnd = s + uSeqLength;
|
||||
|
||||
unsigned c3 = s[0]*N_3;
|
||||
unsigned c2 = s[1]*N_2;
|
||||
unsigned c1 = s[2]*N;
|
||||
unsigned c0 = s[3];
|
||||
|
||||
unsigned Kmer = c3 + c2 + c1 + c0;
|
||||
|
||||
for (;;)
|
||||
{
|
||||
assert(Kmer < TABLE_SIZE);
|
||||
|
||||
#if TRACE
|
||||
Log("Kmer=%d=%s\n", Kmer, KmerToStr(Kmer));
|
||||
#endif
|
||||
++(KmerCounts[Kmer]);
|
||||
|
||||
if (ptrKmerEnd == ptrSeqEnd)
|
||||
break;
|
||||
|
||||
// Compute k-mer as function of previous k-mer:
|
||||
// 1. Subtract first letter from previous k-mer.
|
||||
// 2. Multiply by N.
|
||||
// 3. Add next letter.
|
||||
c3 = (*ptrKmerStart++) * N_3;
|
||||
Kmer = (Kmer - c3)*N;
|
||||
Kmer += *ptrKmerEnd++;
|
||||
}
|
||||
}
|
||||
|
||||
unsigned CommonKmerCount(const byte Seq[], unsigned uSeqLength,
|
||||
const byte KmerCounts1[], const byte Seq2[], unsigned uSeqLength2)
|
||||
{
|
||||
byte KmerCounts2[TABLE_SIZE];
|
||||
CountKmers(Seq2, uSeqLength2, KmerCounts2);
|
||||
|
||||
const byte *ptrKmerStart = Seq;
|
||||
const byte *ptrKmerEnd = Seq + 4;
|
||||
const byte *ptrSeqEnd = Seq + uSeqLength;
|
||||
|
||||
unsigned c3 = Seq[0]*N_3;
|
||||
unsigned c2 = Seq[1]*N_2;
|
||||
unsigned c1 = Seq[2]*N;
|
||||
unsigned c0 = Seq[3];
|
||||
|
||||
unsigned Kmer = c3 + c2 + c1 + c0;
|
||||
|
||||
unsigned uCommonCount = 0;
|
||||
for (;;)
|
||||
{
|
||||
assert(Kmer < TABLE_SIZE);
|
||||
|
||||
const byte Count1 = KmerCounts1[Kmer];
|
||||
const byte Count2 = KmerCounts2[Kmer];
|
||||
|
||||
uCommonCount += MIN(Count1, Count2);
|
||||
|
||||
// Hack so we don't double-count
|
||||
KmerCounts2[Kmer] = 0;
|
||||
|
||||
if (ptrKmerEnd == ptrSeqEnd)
|
||||
break;
|
||||
|
||||
// Compute k-mer as function of previous k-mer:
|
||||
// 1. Subtract first letter from previous k-mer.
|
||||
// 2. Multiply by N.
|
||||
// 3. Add next letter.
|
||||
c3 = (*ptrKmerStart++) * N_3;
|
||||
Kmer = (Kmer - c3)*N;
|
||||
Kmer += *ptrKmerEnd++;
|
||||
}
|
||||
return uCommonCount;
|
||||
}
|
||||
|
||||
static void SeqToLetters(const Seq &s, byte Letters[])
|
||||
{
|
||||
const unsigned uSeqLength = s.Length();
|
||||
for (unsigned uCol = 0; uCol < uSeqLength; ++uCol)
|
||||
{
|
||||
char c = s.GetChar(uCol);
|
||||
// Ugly hack. My k-mer counting code isn't wild-card
|
||||
// aware. Arbitrarily replace wildcards by a specific
|
||||
// amino acid.
|
||||
if (IsWildcardChar(c))
|
||||
c = 'A';
|
||||
*Letters++ = CharToLetter(c);
|
||||
}
|
||||
}
|
||||
|
||||
void FastDistKmer(const SeqVect &v, DistFunc &DF)
|
||||
{
|
||||
byte KmerCounts[TABLE_SIZE];
|
||||
|
||||
const unsigned uSeqCount = v.GetSeqCount();
|
||||
|
||||
DF.SetCount(uSeqCount);
|
||||
if (0 == uSeqCount)
|
||||
return;
|
||||
|
||||
// Initialize distance matrix to zero
|
||||
for (unsigned uSeq1 = 0; uSeq1 < uSeqCount; ++uSeq1)
|
||||
{
|
||||
DF.SetDist(uSeq1, uSeq1, 0);
|
||||
for (unsigned uSeq2 = 0; uSeq2 < uSeq1; ++uSeq2)
|
||||
DF.SetDist(uSeq1, uSeq2, 0);
|
||||
}
|
||||
|
||||
unsigned uMaxLength = 0;
|
||||
for (unsigned uSeqIndex = 0; uSeqIndex < uSeqCount; ++uSeqIndex)
|
||||
{
|
||||
const Seq &s = v.GetSeq(uSeqIndex);
|
||||
unsigned uSeqLength = s.Length();
|
||||
if (uSeqLength > uMaxLength)
|
||||
uMaxLength = uSeqLength;
|
||||
}
|
||||
if (0 == uMaxLength)
|
||||
return;
|
||||
|
||||
byte *Seq1Letters = new byte[uMaxLength];
|
||||
byte *Seq2Letters = new byte[uMaxLength];
|
||||
|
||||
for (unsigned uSeqIndex1 = 0; uSeqIndex1 < uSeqCount - 1; ++uSeqIndex1)
|
||||
{
|
||||
const Seq &s1 = v.GetSeq(uSeqIndex1);
|
||||
const unsigned uSeqLength1 = s1.Length();
|
||||
|
||||
SeqToLetters(s1, Seq1Letters);
|
||||
CountKmers(Seq1Letters, uSeqLength1, KmerCounts);
|
||||
|
||||
for (unsigned uSeqIndex2 = uSeqIndex1 + 1; uSeqIndex2 < uSeqCount;
|
||||
++uSeqIndex2)
|
||||
{
|
||||
const Seq &s2 = v.GetSeq(uSeqIndex2);
|
||||
const unsigned uSeqLength2 = s2.Length();
|
||||
|
||||
SeqToLetters(s2, Seq2Letters);
|
||||
|
||||
unsigned uCommonKmerCount = CommonKmerCount(Seq1Letters, uSeqLength1,
|
||||
KmerCounts, Seq2Letters, uSeqLength2);
|
||||
|
||||
unsigned uMinLength = MIN(uSeqLength1, uSeqLength2);
|
||||
double F = (double) uCommonKmerCount / (uMinLength - K + 1);
|
||||
if (0.0 == F)
|
||||
F = 0.01;
|
||||
double Y = log(0.02 + F);
|
||||
double EstimatedPctId = Y/4.12 + 0.995;
|
||||
double KD = KimuraDist(EstimatedPctId);
|
||||
// DF.SetDist(uSeqIndex1, uSeqIndex2, (float) KD);
|
||||
DF.SetDist(uSeqIndex1, uSeqIndex2, (float) (1 - F));
|
||||
#if TRACE
|
||||
Log("CommonCount=%u, MinLength=%u, F=%6.4f Y=%6.4f, %%id=%6.4f, KimuraDist=%8.4f\n",
|
||||
uCommonKmerCount, uMinLength, F, Y, EstimatedPctId, KD);
|
||||
#endif
|
||||
}
|
||||
}
|
||||
|
||||
delete[] Seq1Letters;
|
||||
delete[] Seq2Letters;
|
||||
}
|
||||
290
src/muscle/muscle3.8.31/src/fastdistmafft.cpp
Normal file
290
src/muscle/muscle3.8.31/src/fastdistmafft.cpp
Normal file
@@ -0,0 +1,290 @@
|
||||
#include "muscle.h"
|
||||
#include "distfunc.h"
|
||||
#include "seqvect.h"
|
||||
#include <math.h>
|
||||
|
||||
#define TRACE 0
|
||||
|
||||
#define MIN(x, y) (((x) < (y)) ? (x) : (y))
|
||||
#define MAX(x, y) (((x) > (y)) ? (x) : (y))
|
||||
|
||||
const unsigned TUPLE_COUNT = 6*6*6*6*6*6;
|
||||
static unsigned char Count1[TUPLE_COUNT];
|
||||
static unsigned char Count2[TUPLE_COUNT];
|
||||
|
||||
// Amino acid groups according to MAFFT (sextet5)
|
||||
// 0 = A G P S T
|
||||
// 1 = I L M V
|
||||
// 2 = N D Q E B Z
|
||||
// 3 = R H K
|
||||
// 4 = F W Y
|
||||
// 5 = C
|
||||
// 6 = X . - U
|
||||
unsigned ResidueGroup[] =
|
||||
{
|
||||
0, // AX_A,
|
||||
5, // AX_C,
|
||||
2, // AX_D,
|
||||
2, // AX_E,
|
||||
4, // AX_F,
|
||||
0, // AX_G,
|
||||
3, // AX_H,
|
||||
1, // AX_I,
|
||||
3, // AX_K,
|
||||
1, // AX_L,
|
||||
1, // AX_M,
|
||||
2, // AX_N,
|
||||
0, // AX_P,
|
||||
2, // AX_Q,
|
||||
3, // AX_R,
|
||||
0, // AX_S,
|
||||
0, // AX_T,
|
||||
1, // AX_V,
|
||||
4, // AX_W,
|
||||
4, // AX_Y,
|
||||
|
||||
2, // AX_B, // D or N
|
||||
2, // AX_Z, // E or Q
|
||||
0, // AX_X, // Unknown // ******** TODO *************
|
||||
// This isn't the correct way of avoiding group 6
|
||||
0 // AX_GAP, // ******** TODO ******************
|
||||
};
|
||||
unsigned uResidueGroupCount = sizeof(ResidueGroup)/sizeof(ResidueGroup[0]);
|
||||
|
||||
static char *TupleToStr(int t)
|
||||
{
|
||||
static char s[7];
|
||||
int t1, t2, t3, t4, t5, t6;
|
||||
|
||||
t1 = t%6;
|
||||
t2 = (t/6)%6;
|
||||
t3 = (t/(6*6))%6;
|
||||
t4 = (t/(6*6*6))%6;
|
||||
t5 = (t/(6*6*6*6))%6;
|
||||
t6 = (t/(6*6*6*6*6))%6;
|
||||
|
||||
s[5] = '0' + t1;
|
||||
s[4] = '0' + t2;
|
||||
s[3] = '0' + t3;
|
||||
s[2] = '0' + t4;
|
||||
s[1] = '0' + t5;
|
||||
s[0] = '0' + t6;
|
||||
return s;
|
||||
}
|
||||
|
||||
static unsigned GetTuple(const unsigned uLetters[], unsigned n)
|
||||
{
|
||||
assert(uLetters[n] < uResidueGroupCount);
|
||||
assert(uLetters[n+1] < uResidueGroupCount);
|
||||
assert(uLetters[n+2] < uResidueGroupCount);
|
||||
assert(uLetters[n+3] < uResidueGroupCount);
|
||||
assert(uLetters[n+4] < uResidueGroupCount);
|
||||
assert(uLetters[n+5] < uResidueGroupCount);
|
||||
|
||||
unsigned u1 = ResidueGroup[uLetters[n]];
|
||||
unsigned u2 = ResidueGroup[uLetters[n+1]];
|
||||
unsigned u3 = ResidueGroup[uLetters[n+2]];
|
||||
unsigned u4 = ResidueGroup[uLetters[n+3]];
|
||||
unsigned u5 = ResidueGroup[uLetters[n+4]];
|
||||
unsigned u6 = ResidueGroup[uLetters[n+5]];
|
||||
|
||||
return u6 + u5*6 + u4*6*6 + u3*6*6*6 + u2*6*6*6*6 + u1*6*6*6*6*6;
|
||||
}
|
||||
|
||||
static void CountTuples(const unsigned L[], unsigned uTupleCount, unsigned char Count[])
|
||||
{
|
||||
memset(Count, 0, TUPLE_COUNT*sizeof(unsigned char));
|
||||
for (unsigned n = 0; n < uTupleCount; ++n)
|
||||
{
|
||||
const unsigned uTuple = GetTuple(L, n);
|
||||
++(Count[uTuple]);
|
||||
}
|
||||
}
|
||||
|
||||
static void ListCount(const unsigned char Count[])
|
||||
{
|
||||
for (unsigned n = 0; n < TUPLE_COUNT; ++n)
|
||||
{
|
||||
if (0 == Count[n])
|
||||
continue;
|
||||
Log("%s %u\n", TupleToStr(n), Count[n]);
|
||||
}
|
||||
}
|
||||
|
||||
void DistKmer6_6(const SeqVect &v, DistFunc &DF)
|
||||
{
|
||||
const unsigned uSeqCount = v.Length();
|
||||
|
||||
DF.SetCount(uSeqCount);
|
||||
if (0 == uSeqCount)
|
||||
return;
|
||||
|
||||
// Initialize distance matrix to zero
|
||||
for (unsigned uSeq1 = 0; uSeq1 < uSeqCount; ++uSeq1)
|
||||
{
|
||||
DF.SetDist(uSeq1, uSeq1, 0);
|
||||
for (unsigned uSeq2 = 0; uSeq2 < uSeq1; ++uSeq2)
|
||||
DF.SetDist(uSeq1, uSeq2, 0);
|
||||
}
|
||||
|
||||
// Convert to letters
|
||||
unsigned **Letters = new unsigned *[uSeqCount];
|
||||
for (unsigned uSeqIndex = 0; uSeqIndex < uSeqCount; ++uSeqIndex)
|
||||
{
|
||||
Seq &s = *(v[uSeqIndex]);
|
||||
const unsigned uSeqLength = s.Length();
|
||||
unsigned *L = new unsigned[uSeqLength];
|
||||
Letters[uSeqIndex] = L;
|
||||
for (unsigned n = 0; n < uSeqLength; ++n)
|
||||
{
|
||||
char c = s[n];
|
||||
L[n] = CharToLetterEx(c);
|
||||
assert(L[n] < uResidueGroupCount);
|
||||
}
|
||||
}
|
||||
|
||||
unsigned **uCommonTupleCount = new unsigned *[uSeqCount];
|
||||
for (unsigned n = 0; n < uSeqCount; ++n)
|
||||
{
|
||||
uCommonTupleCount[n] = new unsigned[uSeqCount];
|
||||
memset(uCommonTupleCount[n], 0, uSeqCount*sizeof(unsigned));
|
||||
}
|
||||
|
||||
const unsigned uPairCount = (uSeqCount*(uSeqCount + 1))/2;
|
||||
unsigned uCount = 0;
|
||||
for (unsigned uSeq1 = 0; uSeq1 < uSeqCount; ++uSeq1)
|
||||
{
|
||||
Seq &seq1 = *(v[uSeq1]);
|
||||
const unsigned uSeqLength1 = seq1.Length();
|
||||
if (uSeqLength1 < 5)
|
||||
continue;
|
||||
|
||||
const unsigned uTupleCount = uSeqLength1 - 5;
|
||||
const unsigned *L = Letters[uSeq1];
|
||||
CountTuples(L, uTupleCount, Count1);
|
||||
#if TRACE
|
||||
{
|
||||
Log("Seq1=%d\n", uSeq1);
|
||||
Log("Groups:\n");
|
||||
for (unsigned n = 0; n < uSeqLength1; ++n)
|
||||
Log("%u", ResidueGroup[L[n]]);
|
||||
Log("\n");
|
||||
|
||||
Log("Tuples:\n");
|
||||
ListCount(Count1);
|
||||
}
|
||||
#endif
|
||||
|
||||
SetProgressDesc("K-mer dist pass 1");
|
||||
for (unsigned uSeq2 = 0; uSeq2 <= uSeq1; ++uSeq2)
|
||||
{
|
||||
if (0 == uCount%500)
|
||||
Progress(uCount, uPairCount);
|
||||
++uCount;
|
||||
Seq &seq2 = *(v[uSeq2]);
|
||||
const unsigned uSeqLength2 = seq2.Length();
|
||||
if (uSeqLength2 < 5)
|
||||
{
|
||||
if (uSeq1 == uSeq2)
|
||||
DF.SetDist(uSeq1, uSeq2, 0);
|
||||
else
|
||||
DF.SetDist(uSeq1, uSeq2, 1);
|
||||
continue;
|
||||
}
|
||||
|
||||
// First pass through seq 2 to count tuples
|
||||
const unsigned uTupleCount = uSeqLength2 - 5;
|
||||
const unsigned *L = Letters[uSeq2];
|
||||
CountTuples(L, uTupleCount, Count2);
|
||||
#if TRACE
|
||||
Log("Seq2=%d Counts=\n", uSeq2);
|
||||
ListCount(Count2);
|
||||
#endif
|
||||
|
||||
// Second pass to accumulate sum of shared tuples
|
||||
// MAFFT defines this as the sum over unique tuples
|
||||
// in seq2 of the minimum of the number of tuples found
|
||||
// in the two sequences.
|
||||
unsigned uSum = 0;
|
||||
for (unsigned n = 0; n < uTupleCount; ++n)
|
||||
{
|
||||
const unsigned uTuple = GetTuple(L, n);
|
||||
uSum += MIN(Count1[uTuple], Count2[uTuple]);
|
||||
|
||||
// This is a hack to make sure each unique tuple counted only once.
|
||||
Count2[uTuple] = 0;
|
||||
}
|
||||
#if TRACE
|
||||
{
|
||||
Seq &s1 = *(v[uSeq1]);
|
||||
Seq &s2 = *(v[uSeq2]);
|
||||
const char *pName1 = s1.GetName();
|
||||
const char *pName2 = s2.GetName();
|
||||
Log("Common count %s(%d) - %s(%d) =%u\n",
|
||||
pName1, uSeq1, pName2, uSeq2, uSum);
|
||||
}
|
||||
#endif
|
||||
uCommonTupleCount[uSeq1][uSeq2] = uSum;
|
||||
uCommonTupleCount[uSeq2][uSeq1] = uSum;
|
||||
}
|
||||
}
|
||||
ProgressStepsDone();
|
||||
|
||||
uCount = 0;
|
||||
SetProgressDesc("K-mer dist pass 2");
|
||||
for (unsigned uSeq1 = 0; uSeq1 < uSeqCount; ++uSeq1)
|
||||
{
|
||||
Seq &s1 = *(v[uSeq1]);
|
||||
const char *pName1 = s1.GetName();
|
||||
|
||||
double dCommonTupleCount11 = uCommonTupleCount[uSeq1][uSeq1];
|
||||
if (0 == dCommonTupleCount11)
|
||||
dCommonTupleCount11 = 1;
|
||||
|
||||
DF.SetDist(uSeq1, uSeq1, 0);
|
||||
for (unsigned uSeq2 = 0; uSeq2 < uSeq1; ++uSeq2)
|
||||
{
|
||||
if (0 == uCount%500)
|
||||
Progress(uCount, uPairCount);
|
||||
++uCount;
|
||||
|
||||
double dCommonTupleCount22 = uCommonTupleCount[uSeq2][uSeq2];
|
||||
if (0 == dCommonTupleCount22)
|
||||
dCommonTupleCount22 = 1;
|
||||
|
||||
const double dDist1 = 3.0*(dCommonTupleCount11 - uCommonTupleCount[uSeq1][uSeq2])
|
||||
/dCommonTupleCount11;
|
||||
const double dDist2 = 3.0*(dCommonTupleCount22 - uCommonTupleCount[uSeq1][uSeq2])
|
||||
/dCommonTupleCount22;
|
||||
|
||||
// dMinDist is the value used for tree-building in MAFFT
|
||||
const double dMinDist = MIN(dDist1, dDist2);
|
||||
DF.SetDist(uSeq1, uSeq2, (float) dMinDist);
|
||||
|
||||
//const double dEstimatedPctId = TupleDistToEstimatedPctId(dMinDist);
|
||||
//g_dfPwId.SetDist(uSeq1, uSeq2, dEstimatedPctId);
|
||||
// **** TODO **** why does this make score slightly worse??
|
||||
//const double dKimuraDist = KimuraDist(dEstimatedPctId);
|
||||
//DF.SetDist(uSeq1, uSeq2, dKimuraDist);
|
||||
}
|
||||
}
|
||||
ProgressStepsDone();
|
||||
|
||||
for (unsigned n = 0; n < uSeqCount; ++n)
|
||||
delete[] uCommonTupleCount[n];
|
||||
delete[] uCommonTupleCount;
|
||||
delete[] Letters;
|
||||
}
|
||||
|
||||
double PctIdToMAFFTDist(double dPctId)
|
||||
{
|
||||
if (dPctId < 0.05)
|
||||
dPctId = 0.05;
|
||||
double dDist = -log(dPctId);
|
||||
return dDist;
|
||||
}
|
||||
|
||||
double PctIdToHeightMAFFT(double dPctId)
|
||||
{
|
||||
return PctIdToMAFFTDist(dPctId);
|
||||
}
|
||||
265
src/muscle/muscle3.8.31/src/fastdistnuc.cpp
Normal file
265
src/muscle/muscle3.8.31/src/fastdistnuc.cpp
Normal file
@@ -0,0 +1,265 @@
|
||||
#include "muscle.h"
|
||||
#include "distfunc.h"
|
||||
#include "seqvect.h"
|
||||
#include <math.h>
|
||||
|
||||
#define TRACE 0
|
||||
|
||||
#define MIN(x, y) (((x) < (y)) ? (x) : (y))
|
||||
#define MAX(x, y) (((x) > (y)) ? (x) : (y))
|
||||
|
||||
const unsigned TUPLE_COUNT = 6*6*6*6*6*6;
|
||||
static unsigned char Count1[TUPLE_COUNT];
|
||||
static unsigned char Count2[TUPLE_COUNT];
|
||||
|
||||
// Nucleotide groups according to MAFFT (sextet5)
|
||||
// 0 = A
|
||||
// 1 = C
|
||||
// 2 = G
|
||||
// 3 = T
|
||||
// 4 = other
|
||||
|
||||
static unsigned ResidueGroup[] =
|
||||
{
|
||||
0, // NX_A,
|
||||
1, // NX_C,
|
||||
2, // NX_G,
|
||||
3, // NX_T/U
|
||||
4, // NX_N,
|
||||
4, // NX_R,
|
||||
4, // NX_Y,
|
||||
4, // NX_GAP
|
||||
};
|
||||
static unsigned uResidueGroupCount = sizeof(ResidueGroup)/sizeof(ResidueGroup[0]);
|
||||
|
||||
static char *TupleToStr(int t)
|
||||
{
|
||||
static char s[7];
|
||||
int t1, t2, t3, t4, t5, t6;
|
||||
|
||||
t1 = t%6;
|
||||
t2 = (t/6)%6;
|
||||
t3 = (t/(6*6))%6;
|
||||
t4 = (t/(6*6*6))%6;
|
||||
t5 = (t/(6*6*6*6))%6;
|
||||
t6 = (t/(6*6*6*6*6))%6;
|
||||
|
||||
s[5] = '0' + t1;
|
||||
s[4] = '0' + t2;
|
||||
s[3] = '0' + t3;
|
||||
s[2] = '0' + t4;
|
||||
s[1] = '0' + t5;
|
||||
s[0] = '0' + t6;
|
||||
return s;
|
||||
}
|
||||
|
||||
static unsigned GetTuple(const unsigned uLetters[], unsigned n)
|
||||
{
|
||||
assert(uLetters[n] < uResidueGroupCount);
|
||||
assert(uLetters[n+1] < uResidueGroupCount);
|
||||
assert(uLetters[n+2] < uResidueGroupCount);
|
||||
assert(uLetters[n+3] < uResidueGroupCount);
|
||||
assert(uLetters[n+4] < uResidueGroupCount);
|
||||
assert(uLetters[n+5] < uResidueGroupCount);
|
||||
|
||||
unsigned u1 = ResidueGroup[uLetters[n]];
|
||||
unsigned u2 = ResidueGroup[uLetters[n+1]];
|
||||
unsigned u3 = ResidueGroup[uLetters[n+2]];
|
||||
unsigned u4 = ResidueGroup[uLetters[n+3]];
|
||||
unsigned u5 = ResidueGroup[uLetters[n+4]];
|
||||
unsigned u6 = ResidueGroup[uLetters[n+5]];
|
||||
|
||||
return u6 + u5*6 + u4*6*6 + u3*6*6*6 + u2*6*6*6*6 + u1*6*6*6*6*6;
|
||||
}
|
||||
|
||||
static void CountTuples(const unsigned L[], unsigned uTupleCount, unsigned char Count[])
|
||||
{
|
||||
memset(Count, 0, TUPLE_COUNT*sizeof(unsigned char));
|
||||
for (unsigned n = 0; n < uTupleCount; ++n)
|
||||
{
|
||||
const unsigned uTuple = GetTuple(L, n);
|
||||
++(Count[uTuple]);
|
||||
}
|
||||
}
|
||||
|
||||
static void ListCount(const unsigned char Count[])
|
||||
{
|
||||
for (unsigned n = 0; n < TUPLE_COUNT; ++n)
|
||||
{
|
||||
if (0 == Count[n])
|
||||
continue;
|
||||
Log("%s %u\n", TupleToStr(n), Count[n]);
|
||||
}
|
||||
}
|
||||
|
||||
void DistKmer4_6(const SeqVect &v, DistFunc &DF)
|
||||
{
|
||||
if (ALPHA_DNA != g_Alpha && ALPHA_RNA != g_Alpha)
|
||||
Quit("DistKmer4_6 requires nucleo alphabet");
|
||||
|
||||
const unsigned uSeqCount = v.Length();
|
||||
|
||||
DF.SetCount(uSeqCount);
|
||||
if (0 == uSeqCount)
|
||||
return;
|
||||
|
||||
// Initialize distance matrix to zero
|
||||
for (unsigned uSeq1 = 0; uSeq1 < uSeqCount; ++uSeq1)
|
||||
{
|
||||
DF.SetDist(uSeq1, uSeq1, 0);
|
||||
for (unsigned uSeq2 = 0; uSeq2 < uSeq1; ++uSeq2)
|
||||
DF.SetDist(uSeq1, uSeq2, 0);
|
||||
}
|
||||
|
||||
// Convert to letters
|
||||
unsigned **Letters = new unsigned *[uSeqCount];
|
||||
for (unsigned uSeqIndex = 0; uSeqIndex < uSeqCount; ++uSeqIndex)
|
||||
{
|
||||
Seq &s = *(v[uSeqIndex]);
|
||||
const unsigned uSeqLength = s.Length();
|
||||
unsigned *L = new unsigned[uSeqLength];
|
||||
Letters[uSeqIndex] = L;
|
||||
for (unsigned n = 0; n < uSeqLength; ++n)
|
||||
{
|
||||
char c = s[n];
|
||||
L[n] = CharToLetterEx(c);
|
||||
if (L[n] >= 4)
|
||||
L[n] = 4;
|
||||
}
|
||||
}
|
||||
|
||||
unsigned **uCommonTupleCount = new unsigned *[uSeqCount];
|
||||
for (unsigned n = 0; n < uSeqCount; ++n)
|
||||
{
|
||||
uCommonTupleCount[n] = new unsigned[uSeqCount];
|
||||
memset(uCommonTupleCount[n], 0, uSeqCount*sizeof(unsigned));
|
||||
}
|
||||
|
||||
const unsigned uPairCount = (uSeqCount*(uSeqCount + 1))/2;
|
||||
unsigned uCount = 0;
|
||||
for (unsigned uSeq1 = 0; uSeq1 < uSeqCount; ++uSeq1)
|
||||
{
|
||||
Seq &seq1 = *(v[uSeq1]);
|
||||
const unsigned uSeqLength1 = seq1.Length();
|
||||
if (uSeqLength1 < 5)
|
||||
continue;
|
||||
|
||||
const unsigned uTupleCount = uSeqLength1 - 5;
|
||||
const unsigned *L = Letters[uSeq1];
|
||||
CountTuples(L, uTupleCount, Count1);
|
||||
#if TRACE
|
||||
{
|
||||
Log("Seq1=%d\n", uSeq1);
|
||||
Log("Groups:\n");
|
||||
for (unsigned n = 0; n < uSeqLength1; ++n)
|
||||
Log("%u", ResidueGroup[L[n]]);
|
||||
Log("\n");
|
||||
|
||||
Log("Tuples:\n");
|
||||
ListCount(Count1);
|
||||
}
|
||||
#endif
|
||||
|
||||
SetProgressDesc("K-mer dist pass 1");
|
||||
for (unsigned uSeq2 = 0; uSeq2 <= uSeq1; ++uSeq2)
|
||||
{
|
||||
if (0 == uCount%500)
|
||||
Progress(uCount, uPairCount);
|
||||
++uCount;
|
||||
Seq &seq2 = *(v[uSeq2]);
|
||||
const unsigned uSeqLength2 = seq2.Length();
|
||||
if (uSeqLength2 < 5)
|
||||
{
|
||||
if (uSeq1 == uSeq2)
|
||||
DF.SetDist(uSeq1, uSeq2, 0);
|
||||
else
|
||||
DF.SetDist(uSeq1, uSeq2, 1);
|
||||
continue;
|
||||
}
|
||||
|
||||
// First pass through seq 2 to count tuples
|
||||
const unsigned uTupleCount = uSeqLength2 - 5;
|
||||
const unsigned *L = Letters[uSeq2];
|
||||
CountTuples(L, uTupleCount, Count2);
|
||||
#if TRACE
|
||||
Log("Seq2=%d Counts=\n", uSeq2);
|
||||
ListCount(Count2);
|
||||
#endif
|
||||
|
||||
// Second pass to accumulate sum of shared tuples
|
||||
// MAFFT defines this as the sum over unique tuples
|
||||
// in seq2 of the minimum of the number of tuples found
|
||||
// in the two sequences.
|
||||
unsigned uSum = 0;
|
||||
for (unsigned n = 0; n < uTupleCount; ++n)
|
||||
{
|
||||
const unsigned uTuple = GetTuple(L, n);
|
||||
uSum += MIN(Count1[uTuple], Count2[uTuple]);
|
||||
|
||||
// This is a hack to make sure each unique tuple counted only once.
|
||||
Count2[uTuple] = 0;
|
||||
}
|
||||
#if TRACE
|
||||
{
|
||||
Seq &s1 = *(v[uSeq1]);
|
||||
Seq &s2 = *(v[uSeq2]);
|
||||
const char *pName1 = s1.GetName();
|
||||
const char *pName2 = s2.GetName();
|
||||
Log("Common count %s(%d) - %s(%d) =%u\n",
|
||||
pName1, uSeq1, pName2, uSeq2, uSum);
|
||||
}
|
||||
#endif
|
||||
uCommonTupleCount[uSeq1][uSeq2] = uSum;
|
||||
uCommonTupleCount[uSeq2][uSeq1] = uSum;
|
||||
}
|
||||
}
|
||||
ProgressStepsDone();
|
||||
|
||||
uCount = 0;
|
||||
SetProgressDesc("K-mer dist pass 2");
|
||||
for (unsigned uSeq1 = 0; uSeq1 < uSeqCount; ++uSeq1)
|
||||
{
|
||||
Seq &s1 = *(v[uSeq1]);
|
||||
const char *pName1 = s1.GetName();
|
||||
|
||||
double dCommonTupleCount11 = uCommonTupleCount[uSeq1][uSeq1];
|
||||
if (0 == dCommonTupleCount11)
|
||||
dCommonTupleCount11 = 1;
|
||||
|
||||
DF.SetDist(uSeq1, uSeq1, 0);
|
||||
for (unsigned uSeq2 = 0; uSeq2 < uSeq1; ++uSeq2)
|
||||
{
|
||||
if (0 == uCount%500)
|
||||
Progress(uCount, uPairCount);
|
||||
++uCount;
|
||||
|
||||
double dCommonTupleCount22 = uCommonTupleCount[uSeq2][uSeq2];
|
||||
if (0 == dCommonTupleCount22)
|
||||
dCommonTupleCount22 = 1;
|
||||
|
||||
const double dDist1 = 3.0*(dCommonTupleCount11 - uCommonTupleCount[uSeq1][uSeq2])
|
||||
/dCommonTupleCount11;
|
||||
const double dDist2 = 3.0*(dCommonTupleCount22 - uCommonTupleCount[uSeq1][uSeq2])
|
||||
/dCommonTupleCount22;
|
||||
|
||||
// dMinDist is the value used for tree-building in MAFFT
|
||||
const double dMinDist = MIN(dDist1, dDist2);
|
||||
DF.SetDist(uSeq1, uSeq2, (float) dMinDist);
|
||||
|
||||
//const double dEstimatedPctId = TupleDistToEstimatedPctId(dMinDist);
|
||||
//g_dfPwId.SetDist(uSeq1, uSeq2, dEstimatedPctId);
|
||||
// **** TODO **** why does this make score slightly worse??
|
||||
//const double dKimuraDist = KimuraDist(dEstimatedPctId);
|
||||
//DF.SetDist(uSeq1, uSeq2, dKimuraDist);
|
||||
}
|
||||
}
|
||||
ProgressStepsDone();
|
||||
|
||||
for (unsigned n = 0; n < uSeqCount; ++n)
|
||||
{
|
||||
delete[] uCommonTupleCount[n];
|
||||
delete[] Letters[n];
|
||||
}
|
||||
delete[] uCommonTupleCount;
|
||||
delete[] Letters;
|
||||
}
|
||||
165
src/muscle/muscle3.8.31/src/fastscorepath2.cpp
Normal file
165
src/muscle/muscle3.8.31/src/fastscorepath2.cpp
Normal file
@@ -0,0 +1,165 @@
|
||||
#include "muscle.h"
|
||||
#include "profile.h"
|
||||
#include "pwpath.h"
|
||||
|
||||
SCORE FastScorePath2(const ProfPos *PA, unsigned uLengthA,
|
||||
const ProfPos *PB, unsigned uLengthB, const PWPath &Path)
|
||||
{
|
||||
const unsigned uEdgeCount = Path.GetEdgeCount();
|
||||
Log("Edge SS PLA PLB Match Gap Total\n");
|
||||
Log("---- -- --- --- ----- --- -----\n");
|
||||
char cType = 'S';
|
||||
SCORE scoreTotal = 0;
|
||||
for (unsigned uEdgeIndex = 0; uEdgeIndex < uEdgeCount; ++uEdgeIndex)
|
||||
{
|
||||
const PWEdge &Edge = Path.GetEdge(uEdgeIndex);
|
||||
const char cPrevType = cType;
|
||||
cType = Edge.cType;
|
||||
const unsigned uPrefixLengthA = Edge.uPrefixLengthA;
|
||||
const unsigned uPrefixLengthB = Edge.uPrefixLengthB;
|
||||
bool bGap = false;
|
||||
bool bMatch = false;
|
||||
SCORE scoreGap = 0;
|
||||
SCORE scoreMatch = 0;
|
||||
|
||||
switch (cType)
|
||||
{
|
||||
case 'M':
|
||||
{
|
||||
if (0 == uPrefixLengthA || 0 == uPrefixLengthB)
|
||||
Quit("FastScorePath2, M zero length");
|
||||
|
||||
const ProfPos &PPA = PA[uPrefixLengthA - 1];
|
||||
const ProfPos &PPB = PB[uPrefixLengthB - 1];
|
||||
|
||||
bMatch = true;
|
||||
scoreMatch = ScoreProfPos2(PPA, PPB);
|
||||
|
||||
if ('D' == cPrevType)
|
||||
{
|
||||
bGap = true;
|
||||
assert(uPrefixLengthA > 1);
|
||||
scoreGap = PA[uPrefixLengthA-2].m_scoreGapClose;
|
||||
}
|
||||
else if ('I' == cPrevType)
|
||||
{
|
||||
bGap = true;
|
||||
assert(uPrefixLengthB > 1);
|
||||
scoreGap = PB[uPrefixLengthB-2].m_scoreGapClose;
|
||||
}
|
||||
break;
|
||||
}
|
||||
|
||||
case 'D':
|
||||
{
|
||||
if (0 == uPrefixLengthA)
|
||||
Quit("FastScorePath2, D zero length");
|
||||
|
||||
const ProfPos &PPA = PA[uPrefixLengthA - 1];
|
||||
bGap = true;
|
||||
switch (cPrevType)
|
||||
{
|
||||
case 'S':
|
||||
scoreGap = PPA.m_scoreGapOpen;
|
||||
break;
|
||||
case 'M':
|
||||
scoreGap = PPA.m_scoreGapOpen;
|
||||
break;
|
||||
case 'D':
|
||||
// scoreGap = g_scoreGapExtend;
|
||||
scoreGap = 0;
|
||||
break;
|
||||
case 'I':
|
||||
Quit("FastScorePath2 DI");
|
||||
}
|
||||
break;
|
||||
}
|
||||
|
||||
case 'I':
|
||||
{
|
||||
if (0 == uPrefixLengthB)
|
||||
Quit("FastScorePath2, I zero length");
|
||||
|
||||
const ProfPos &PPB = PB[uPrefixLengthB - 1];
|
||||
bGap = true;
|
||||
switch (cPrevType)
|
||||
{
|
||||
case 'S':
|
||||
scoreGap = PPB.m_scoreGapOpen;
|
||||
break;
|
||||
case 'M':
|
||||
scoreGap = PPB.m_scoreGapOpen;
|
||||
break;
|
||||
case 'I':
|
||||
scoreGap = 0;
|
||||
// scoreGap = g_scoreGapExtend;
|
||||
break;
|
||||
case 'D':
|
||||
Quit("FastScorePath2 DI");
|
||||
}
|
||||
break;
|
||||
}
|
||||
|
||||
case 'U':
|
||||
{
|
||||
Quit("FastScorePath2 U");
|
||||
}
|
||||
|
||||
default:
|
||||
Quit("FastScorePath2: invalid type %c", cType);
|
||||
}
|
||||
|
||||
Log("%4u %c%c %4u %4u ", uEdgeIndex, cPrevType, cType,
|
||||
uPrefixLengthA, uPrefixLengthB);
|
||||
if (bMatch)
|
||||
Log("%7.1f ", scoreMatch);
|
||||
else
|
||||
Log(" ");
|
||||
if (bGap)
|
||||
Log("%7.1f ", scoreGap);
|
||||
else
|
||||
Log(" ");
|
||||
SCORE scoreEdge = scoreMatch + scoreGap;
|
||||
scoreTotal += scoreEdge;
|
||||
Log("%7.1f %7.1f", scoreEdge, scoreTotal);
|
||||
Log("\n");
|
||||
}
|
||||
|
||||
SCORE scoreGap = 0;
|
||||
// if (!g_bTermGapsHalf)
|
||||
switch (cType)
|
||||
{
|
||||
case 'M':
|
||||
scoreGap = 0;
|
||||
break;
|
||||
|
||||
case 'D':
|
||||
{
|
||||
const ProfPos &LastPPA = PA[uLengthA - 1];
|
||||
scoreGap = LastPPA.m_scoreGapClose;
|
||||
break;
|
||||
}
|
||||
|
||||
case 'I':
|
||||
{
|
||||
const ProfPos &LastPPB = PB[uLengthB - 1];
|
||||
scoreGap = LastPPB.m_scoreGapClose;
|
||||
break;
|
||||
}
|
||||
|
||||
case 'U':
|
||||
Quit("Unaligned regions not supported");
|
||||
|
||||
case 'S':
|
||||
break;
|
||||
|
||||
default:
|
||||
Quit("Invalid type %c", cType);
|
||||
}
|
||||
|
||||
Log(" %cE %4u %4u %7.1f\n", cType, uLengthA, uLengthB, scoreGap);
|
||||
scoreTotal += scoreGap;
|
||||
|
||||
Log("Total = %g\n", scoreTotal);
|
||||
return scoreTotal;
|
||||
}
|
||||
161
src/muscle/muscle3.8.31/src/finddiags.cpp
Normal file
161
src/muscle/muscle3.8.31/src/finddiags.cpp
Normal file
@@ -0,0 +1,161 @@
|
||||
#include "muscle.h"
|
||||
#include "profile.h"
|
||||
#include "diaglist.h"
|
||||
|
||||
#define TRACE 0
|
||||
|
||||
const unsigned KTUP = 5;
|
||||
const unsigned KTUPS = 6*6*6*6*6;
|
||||
static unsigned TuplePos[KTUPS];
|
||||
|
||||
static char *TupleToStr(int t)
|
||||
{
|
||||
static char s[7];
|
||||
int t1, t2, t3, t4, t5;
|
||||
|
||||
t1 = t%6;
|
||||
t2 = (t/6)%6;
|
||||
t3 = (t/(6*6))%6;
|
||||
t4 = (t/(6*6*6))%6;
|
||||
t5 = (t/(6*6*6*6))%6;
|
||||
|
||||
s[4] = '0' + t1;
|
||||
s[3] = '0' + t2;
|
||||
s[2] = '0' + t3;
|
||||
s[1] = '0' + t4;
|
||||
s[0] = '0' + t5;
|
||||
return s;
|
||||
}
|
||||
|
||||
static unsigned GetTuple(const ProfPos *PP, unsigned uPos)
|
||||
{
|
||||
const unsigned t0 = PP[uPos].m_uResidueGroup;
|
||||
if (RESIDUE_GROUP_MULTIPLE == t0)
|
||||
return EMPTY;
|
||||
|
||||
const unsigned t1 = PP[uPos+1].m_uResidueGroup;
|
||||
if (RESIDUE_GROUP_MULTIPLE == t1)
|
||||
return EMPTY;
|
||||
|
||||
const unsigned t2 = PP[uPos+2].m_uResidueGroup;
|
||||
if (RESIDUE_GROUP_MULTIPLE == t2)
|
||||
return EMPTY;
|
||||
|
||||
const unsigned t3 = PP[uPos+3].m_uResidueGroup;
|
||||
if (RESIDUE_GROUP_MULTIPLE == t3)
|
||||
return EMPTY;
|
||||
|
||||
const unsigned t4 = PP[uPos+4].m_uResidueGroup;
|
||||
if (RESIDUE_GROUP_MULTIPLE == t4)
|
||||
return EMPTY;
|
||||
|
||||
return t0 + t1*6 + t2*6*6 + t3*6*6*6 + t4*6*6*6*6;
|
||||
}
|
||||
|
||||
void FindDiags(const ProfPos *PX, unsigned uLengthX, const ProfPos *PY,
|
||||
unsigned uLengthY, DiagList &DL)
|
||||
{
|
||||
if (ALPHA_Amino != g_Alpha)
|
||||
Quit("FindDiags: requires amino acid alphabet");
|
||||
|
||||
DL.Clear();
|
||||
|
||||
if (uLengthX < 12 || uLengthY < 12)
|
||||
return;
|
||||
|
||||
// Set A to shorter profile, B to longer
|
||||
const ProfPos *PA;
|
||||
const ProfPos *PB;
|
||||
unsigned uLengthA;
|
||||
unsigned uLengthB;
|
||||
bool bSwap;
|
||||
if (uLengthX < uLengthY)
|
||||
{
|
||||
bSwap = false;
|
||||
PA = PX;
|
||||
PB = PY;
|
||||
uLengthA = uLengthX;
|
||||
uLengthB = uLengthY;
|
||||
}
|
||||
else
|
||||
{
|
||||
bSwap = true;
|
||||
PA = PY;
|
||||
PB = PX;
|
||||
uLengthA = uLengthY;
|
||||
uLengthB = uLengthX;
|
||||
}
|
||||
|
||||
// Build tuple map for the longer profile, B
|
||||
if (uLengthB < KTUP)
|
||||
Quit("FindDiags: profile too short");
|
||||
|
||||
memset(TuplePos, EMPTY, sizeof(TuplePos));
|
||||
|
||||
for (unsigned uPos = 0; uPos < uLengthB - KTUP; ++uPos)
|
||||
{
|
||||
const unsigned uTuple = GetTuple(PB, uPos);
|
||||
if (EMPTY == uTuple)
|
||||
continue;
|
||||
TuplePos[uTuple] = uPos;
|
||||
}
|
||||
|
||||
// Find matches
|
||||
for (unsigned uPosA = 0; uPosA < uLengthA - KTUP; ++uPosA)
|
||||
{
|
||||
const unsigned uTuple = GetTuple(PA, uPosA);
|
||||
if (EMPTY == uTuple)
|
||||
continue;
|
||||
const unsigned uPosB = TuplePos[uTuple];
|
||||
if (EMPTY == uPosB)
|
||||
continue;
|
||||
|
||||
// This tuple is found in both profiles
|
||||
unsigned uStartPosA = uPosA;
|
||||
unsigned uStartPosB = uPosB;
|
||||
|
||||
// Try to extend the match forwards
|
||||
unsigned uEndPosA = uPosA + KTUP - 1;
|
||||
unsigned uEndPosB = uPosB + KTUP - 1;
|
||||
for (;;)
|
||||
{
|
||||
if (uLengthA - 1 == uEndPosA || uLengthB - 1 == uEndPosB)
|
||||
break;
|
||||
const unsigned uAAGroupA = PA[uEndPosA+1].m_uResidueGroup;
|
||||
if (RESIDUE_GROUP_MULTIPLE == uAAGroupA)
|
||||
break;
|
||||
const unsigned uAAGroupB = PB[uEndPosB+1].m_uResidueGroup;
|
||||
if (RESIDUE_GROUP_MULTIPLE == uAAGroupB)
|
||||
break;
|
||||
if (uAAGroupA != uAAGroupB)
|
||||
break;
|
||||
++uEndPosA;
|
||||
++uEndPosB;
|
||||
}
|
||||
uPosA = uEndPosA;
|
||||
|
||||
#if TRACE
|
||||
{
|
||||
Log("Match: A %4u-%4u ", uStartPosA, uEndPosA);
|
||||
for (unsigned n = uStartPosA; n <= uEndPosA; ++n)
|
||||
Log("%c", 'A' + PA[n].m_uResidueGroup);
|
||||
Log("\n");
|
||||
Log(" B %4u-%4u ", uStartPosB, uEndPosB);
|
||||
for (unsigned n = uStartPosB; n <= uEndPosB; ++n)
|
||||
Log("%c", 'A' + PB[n].m_uResidueGroup);
|
||||
Log("\n");
|
||||
}
|
||||
#endif
|
||||
|
||||
const unsigned uLength = uEndPosA - uStartPosA + 1;
|
||||
assert(uEndPosB - uStartPosB + 1 == uLength);
|
||||
|
||||
if (uLength >= g_uMinDiagLength)
|
||||
{
|
||||
if (bSwap)
|
||||
DL.Add(uStartPosB, uStartPosA, uLength);
|
||||
else
|
||||
DL.Add(uStartPosA, uStartPosB, uLength);
|
||||
}
|
||||
}
|
||||
}
|
||||
152
src/muscle/muscle3.8.31/src/finddiagsn.cpp
Normal file
152
src/muscle/muscle3.8.31/src/finddiagsn.cpp
Normal file
@@ -0,0 +1,152 @@
|
||||
#include "muscle.h"
|
||||
#include "profile.h"
|
||||
#include "diaglist.h"
|
||||
|
||||
#define TRACE 0
|
||||
|
||||
#define pow4(i) (1 << (2*i)) // 4^i = 2^(2*i)
|
||||
const unsigned K = 7;
|
||||
const unsigned KTUPS = pow4(K);
|
||||
static unsigned TuplePos[KTUPS];
|
||||
|
||||
static char *TupleToStr(int t)
|
||||
{
|
||||
static char s[K];
|
||||
|
||||
for (int i = 0; i < K; ++i)
|
||||
{
|
||||
unsigned Letter = (t/(pow4(i)))%4;
|
||||
assert(Letter >= 0 && Letter < 4);
|
||||
s[K-i-1] = LetterToChar(Letter);
|
||||
}
|
||||
|
||||
return s;
|
||||
}
|
||||
|
||||
static unsigned GetTuple(const ProfPos *PP, unsigned uPos)
|
||||
{
|
||||
unsigned t = 0;
|
||||
|
||||
for (unsigned i = 0; i < K; ++i)
|
||||
{
|
||||
const unsigned uLetter = PP[uPos+i].m_uResidueGroup;
|
||||
if (RESIDUE_GROUP_MULTIPLE == uLetter)
|
||||
return EMPTY;
|
||||
t = t*4 + uLetter;
|
||||
}
|
||||
|
||||
return t;
|
||||
}
|
||||
|
||||
void FindDiagsNuc(const ProfPos *PX, unsigned uLengthX, const ProfPos *PY,
|
||||
unsigned uLengthY, DiagList &DL)
|
||||
{
|
||||
if (ALPHA_DNA != g_Alpha && ALPHA_RNA != g_Alpha)
|
||||
Quit("FindDiagsNuc: requires nucleo alphabet");
|
||||
|
||||
DL.Clear();
|
||||
|
||||
// 16 is arbitrary slop, no principled reason for this.
|
||||
if (uLengthX < K + 16 || uLengthY < K + 16)
|
||||
return;
|
||||
|
||||
// Set A to shorter profile, B to longer
|
||||
const ProfPos *PA;
|
||||
const ProfPos *PB;
|
||||
unsigned uLengthA;
|
||||
unsigned uLengthB;
|
||||
bool bSwap;
|
||||
if (uLengthX < uLengthY)
|
||||
{
|
||||
bSwap = false;
|
||||
PA = PX;
|
||||
PB = PY;
|
||||
uLengthA = uLengthX;
|
||||
uLengthB = uLengthY;
|
||||
}
|
||||
else
|
||||
{
|
||||
bSwap = true;
|
||||
PA = PY;
|
||||
PB = PX;
|
||||
uLengthA = uLengthY;
|
||||
uLengthB = uLengthX;
|
||||
}
|
||||
|
||||
#if TRACE
|
||||
Log("FindDiagsNuc(LengthA=%d LengthB=%d\n", uLengthA, uLengthB);
|
||||
#endif
|
||||
|
||||
// Build tuple map for the longer profile, B
|
||||
if (uLengthB < K)
|
||||
Quit("FindDiags: profile too short");
|
||||
|
||||
memset(TuplePos, EMPTY, sizeof(TuplePos));
|
||||
|
||||
for (unsigned uPos = 0; uPos < uLengthB - K; ++uPos)
|
||||
{
|
||||
const unsigned uTuple = GetTuple(PB, uPos);
|
||||
if (EMPTY == uTuple)
|
||||
continue;
|
||||
TuplePos[uTuple] = uPos;
|
||||
}
|
||||
|
||||
// Find matches
|
||||
for (unsigned uPosA = 0; uPosA < uLengthA - K; ++uPosA)
|
||||
{
|
||||
const unsigned uTuple = GetTuple(PA, uPosA);
|
||||
if (EMPTY == uTuple)
|
||||
continue;
|
||||
const unsigned uPosB = TuplePos[uTuple];
|
||||
if (EMPTY == uPosB)
|
||||
continue;
|
||||
|
||||
// This tuple is found in both profiles
|
||||
unsigned uStartPosA = uPosA;
|
||||
unsigned uStartPosB = uPosB;
|
||||
|
||||
// Try to extend the match forwards
|
||||
unsigned uEndPosA = uPosA + K - 1;
|
||||
unsigned uEndPosB = uPosB + K - 1;
|
||||
for (;;)
|
||||
{
|
||||
if (uLengthA - 1 == uEndPosA || uLengthB - 1 == uEndPosB)
|
||||
break;
|
||||
const unsigned uAAGroupA = PA[uEndPosA+1].m_uResidueGroup;
|
||||
if (RESIDUE_GROUP_MULTIPLE == uAAGroupA)
|
||||
break;
|
||||
const unsigned uAAGroupB = PB[uEndPosB+1].m_uResidueGroup;
|
||||
if (RESIDUE_GROUP_MULTIPLE == uAAGroupB)
|
||||
break;
|
||||
if (uAAGroupA != uAAGroupB)
|
||||
break;
|
||||
++uEndPosA;
|
||||
++uEndPosB;
|
||||
}
|
||||
uPosA = uEndPosA;
|
||||
|
||||
#if TRACE
|
||||
{
|
||||
Log("Match: A %4u-%4u ", uStartPosA, uEndPosA);
|
||||
for (unsigned n = uStartPosA; n <= uEndPosA; ++n)
|
||||
Log("%c", LetterToChar(PA[n].m_uResidueGroup));
|
||||
Log("\n");
|
||||
Log(" B %4u-%4u ", uStartPosB, uEndPosB);
|
||||
for (unsigned n = uStartPosB; n <= uEndPosB; ++n)
|
||||
Log("%c", LetterToChar(PB[n].m_uResidueGroup));
|
||||
Log("\n");
|
||||
}
|
||||
#endif
|
||||
|
||||
const unsigned uLength = uEndPosA - uStartPosA + 1;
|
||||
assert(uEndPosB - uStartPosB + 1 == uLength);
|
||||
|
||||
if (uLength >= g_uMinDiagLength)
|
||||
{
|
||||
if (bSwap)
|
||||
DL.Add(uStartPosB, uStartPosA, uLength);
|
||||
else
|
||||
DL.Add(uStartPosA, uStartPosB, uLength);
|
||||
}
|
||||
}
|
||||
}
|
||||
69
src/muscle/muscle3.8.31/src/gapscoredimer.h
Normal file
69
src/muscle/muscle3.8.31/src/gapscoredimer.h
Normal file
@@ -0,0 +1,69 @@
|
||||
// source code generated by dimer.py
|
||||
|
||||
static SCORE GapScoreMM(const ProfPos &PPA, const ProfPos &PPB)
|
||||
{
|
||||
return
|
||||
g_scoreGapOpen*(PPA.m_LL*PPB.m_LG + PPA.m_LG*PPB.m_LL + PPA.m_LG*PPB.m_GL + PPA.m_GL*PPB.m_LG) +
|
||||
g_scoreGapExtend*(PPA.m_LL*PPB.m_GG + PPA.m_GG*PPB.m_LL) +
|
||||
g_scoreGapAmbig*(PPA.m_GL*PPB.m_GG + PPA.m_GG*PPB.m_GL);
|
||||
}
|
||||
|
||||
static SCORE GapScoreMD(const ProfPos &PPA, const ProfPos &PPB)
|
||||
{
|
||||
return
|
||||
g_scoreGapOpen*(PPA.m_LL*PPB.m_LL + PPA.m_LL*PPB.m_GL + PPA.m_GL*PPB.m_LL + PPA.m_GL*PPB.m_GL) +
|
||||
g_scoreGapExtend*(PPA.m_LL*PPB.m_LG + PPA.m_LL*PPB.m_GG) +
|
||||
g_scoreGapAmbig*(PPA.m_GL*PPB.m_LG + PPA.m_GL*PPB.m_GG);
|
||||
}
|
||||
|
||||
static SCORE GapScoreMI(const ProfPos &PPA, const ProfPos &PPB)
|
||||
{
|
||||
return
|
||||
g_scoreGapOpen*(PPA.m_LL*PPB.m_LL + PPA.m_LL*PPB.m_GL + PPA.m_GL*PPB.m_LL + PPA.m_GL*PPB.m_GL) +
|
||||
g_scoreGapExtend*(PPA.m_LG*PPB.m_LL + PPA.m_GG*PPB.m_LL) +
|
||||
g_scoreGapAmbig*(PPA.m_LG*PPB.m_GL + PPA.m_GG*PPB.m_GL);
|
||||
}
|
||||
|
||||
static SCORE GapScoreDM(const ProfPos &PPA, const ProfPos &PPB)
|
||||
{
|
||||
return
|
||||
g_scoreGapOpen*(PPA.m_LG*PPB.m_LL + PPA.m_LG*PPB.m_GL) +
|
||||
g_scoreGapExtend*(PPA.m_LL*PPB.m_LG + PPA.m_LL*PPB.m_GG) +
|
||||
g_scoreGapAmbig*(PPA.m_GL*PPB.m_LG + PPA.m_GL*PPB.m_GG + PPA.m_GG*PPB.m_LL + PPA.m_GG*PPB.m_GL);
|
||||
}
|
||||
|
||||
static SCORE GapScoreDD(const ProfPos &PPA, const ProfPos &PPB)
|
||||
{
|
||||
return
|
||||
g_scoreGapExtend*(PPA.m_LL*PPB.m_LL + PPA.m_LL*PPB.m_LG + PPA.m_LL*PPB.m_GL + PPA.m_LL*PPB.m_GG) +
|
||||
g_scoreGapAmbig*(PPA.m_GL*PPB.m_LL + PPA.m_GL*PPB.m_LG + PPA.m_GL*PPB.m_GL + PPA.m_GL*PPB.m_GG);
|
||||
}
|
||||
|
||||
static SCORE GapScoreDI(const ProfPos &PPA, const ProfPos &PPB)
|
||||
{
|
||||
return
|
||||
g_scoreGapOpen*(PPA.m_LL*PPB.m_LL + PPA.m_LL*PPB.m_GL + PPA.m_GL*PPB.m_LL + PPA.m_GL*PPB.m_GL) +
|
||||
g_scoreGapAmbig*(PPA.m_LG*PPB.m_LL + PPA.m_LG*PPB.m_GL + PPA.m_GG*PPB.m_LL + PPA.m_GG*PPB.m_GL);
|
||||
}
|
||||
|
||||
static SCORE GapScoreIM(const ProfPos &PPA, const ProfPos &PPB)
|
||||
{
|
||||
return
|
||||
g_scoreGapOpen*(PPA.m_LL*PPB.m_LG + PPA.m_GL*PPB.m_LG) +
|
||||
g_scoreGapExtend*(PPA.m_LG*PPB.m_LL + PPA.m_GG*PPB.m_LL) +
|
||||
g_scoreGapAmbig*(PPA.m_LL*PPB.m_GG + PPA.m_LG*PPB.m_GL + PPA.m_GL*PPB.m_GG + PPA.m_GG*PPB.m_GL);
|
||||
}
|
||||
|
||||
static SCORE GapScoreID(const ProfPos &PPA, const ProfPos &PPB)
|
||||
{
|
||||
return
|
||||
g_scoreGapOpen*(PPA.m_LL*PPB.m_LL + PPA.m_LL*PPB.m_GL + PPA.m_GL*PPB.m_LL + PPA.m_GL*PPB.m_GL) +
|
||||
g_scoreGapAmbig*(PPA.m_LL*PPB.m_LG + PPA.m_LL*PPB.m_GG + PPA.m_GL*PPB.m_LG + PPA.m_GL*PPB.m_GG);
|
||||
}
|
||||
|
||||
static SCORE GapScoreII(const ProfPos &PPA, const ProfPos &PPB)
|
||||
{
|
||||
return
|
||||
g_scoreGapExtend*(PPA.m_LL*PPB.m_LL + PPA.m_LG*PPB.m_LL + PPA.m_GL*PPB.m_LL + PPA.m_GG*PPB.m_LL) +
|
||||
g_scoreGapAmbig*(PPA.m_LL*PPB.m_GL + PPA.m_LG*PPB.m_GL + PPA.m_GL*PPB.m_GL + PPA.m_GG*PPB.m_GL);
|
||||
}
|
||||
32
src/muscle/muscle3.8.31/src/gatest.cpp
Normal file
32
src/muscle/muscle3.8.31/src/gatest.cpp
Normal file
@@ -0,0 +1,32 @@
|
||||
#include "muscle.h"
|
||||
#include "pwpath.h"
|
||||
#include "timing.h"
|
||||
#include "textfile.h"
|
||||
#include "msa.h"
|
||||
#include "profile.h"
|
||||
|
||||
SCORE GlobalAlign(const ProfPos *PA, unsigned uLengthA, const ProfPos *PB,
|
||||
unsigned uLengthB, PWPath &Path)
|
||||
{
|
||||
if (g_bDiags)
|
||||
return GlobalAlignDiags(PA, uLengthA, PB, uLengthB, Path);
|
||||
else
|
||||
return GlobalAlignNoDiags(PA, uLengthA, PB, uLengthB, Path);
|
||||
}
|
||||
|
||||
SCORE GlobalAlignNoDiags(const ProfPos *PA, unsigned uLengthA, const ProfPos *PB,
|
||||
unsigned uLengthB, PWPath &Path)
|
||||
{
|
||||
switch (g_PPScore)
|
||||
{
|
||||
case PPSCORE_LE:
|
||||
return GlobalAlignLA(PA, uLengthA, PB, uLengthB, Path);
|
||||
|
||||
case PPSCORE_SP:
|
||||
return GlobalAlignNS(PA, uLengthA, PB, uLengthB, Path);
|
||||
|
||||
case PPSCORE_SV:
|
||||
return GlobalAlignSimple(PA, uLengthA, PB, uLengthB, Path);
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
165
src/muscle/muscle3.8.31/src/glbalign.cpp
Normal file
165
src/muscle/muscle3.8.31/src/glbalign.cpp
Normal file
@@ -0,0 +1,165 @@
|
||||
#include "muscle.h"
|
||||
#include "pwpath.h"
|
||||
#include "timing.h"
|
||||
#include "textfile.h"
|
||||
#include "msa.h"
|
||||
#include "profile.h"
|
||||
|
||||
#if !VER_3_52
|
||||
|
||||
#define COMPARE_SIMPLE 0
|
||||
|
||||
#if TIMING
|
||||
TICKS g_ticksDP = 0;
|
||||
#endif
|
||||
|
||||
#if 1
|
||||
extern bool g_bKeepSimpleDP;
|
||||
SCORE NWSmall(const ProfPos *PA, unsigned uLengthA, const ProfPos *PB,
|
||||
unsigned uLengthB, PWPath &Path);
|
||||
SCORE NWDASmall(const ProfPos *PA, unsigned uLengthA, const ProfPos *PB,
|
||||
unsigned uLengthB, PWPath &Path);
|
||||
SCORE NWDASimple(const ProfPos *PA, unsigned uLengthA, const ProfPos *PB,
|
||||
unsigned uLengthB, PWPath &Path);
|
||||
SCORE NWDASimple2(const ProfPos *PA, unsigned uLengthA, const ProfPos *PB,
|
||||
unsigned uLengthB, PWPath &Path);
|
||||
SCORE GlobalAlignSimple(const ProfPos *PA, unsigned uLengthA, const ProfPos *PB,
|
||||
unsigned uLengthB, PWPath &Path);
|
||||
|
||||
SCORE GlobalAlignNoDiags(const ProfPos *PA, unsigned uLengthA, const ProfPos *PB,
|
||||
unsigned uLengthB, PWPath &Path)
|
||||
{
|
||||
return GlobalAlign(PA, uLengthA, PB, uLengthB, Path);
|
||||
}
|
||||
|
||||
#if COMPARE_SIMPLE
|
||||
|
||||
SCORE GlobalAlign(const ProfPos *PA, unsigned uLengthA, const ProfPos *PB,
|
||||
unsigned uLengthB, PWPath &Path)
|
||||
{
|
||||
#if TIMING
|
||||
TICKS t1 = GetClockTicks();
|
||||
#endif
|
||||
g_bKeepSimpleDP = true;
|
||||
PWPath SimplePath;
|
||||
GlobalAlignSimple(PA, uLengthA, PB, uLengthB, SimplePath);
|
||||
|
||||
SCORE Score = NWSmall(PA, uLengthA, PB, uLengthB, Path);
|
||||
|
||||
if (!Path.Equal(SimplePath))
|
||||
{
|
||||
Log("Simple:\n");
|
||||
SimplePath.LogMe();
|
||||
Log("Small:\n");
|
||||
Path.LogMe();
|
||||
Quit("Paths differ");
|
||||
}
|
||||
|
||||
#if TIMING
|
||||
TICKS t2 = GetClockTicks();
|
||||
g_ticksDP += (t2 - t1);
|
||||
#endif
|
||||
return Score;
|
||||
}
|
||||
|
||||
#else // COMPARE_SIMPLE
|
||||
|
||||
SCORE GlobalAlign(const ProfPos *PA, unsigned uLengthA, const ProfPos *PB,
|
||||
unsigned uLengthB, PWPath &Path)
|
||||
{
|
||||
#if TIMING
|
||||
TICKS t1 = GetClockTicks();
|
||||
#endif
|
||||
SCORE Score = NWSmall(PA, uLengthA, PB, uLengthB, Path);
|
||||
#if TIMING
|
||||
TICKS t2 = GetClockTicks();
|
||||
g_ticksDP += (t2 - t1);
|
||||
#endif
|
||||
return Score;
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
#else // 1
|
||||
|
||||
static void AllInserts(PWPath &Path, unsigned uLengthB)
|
||||
{
|
||||
Path.Clear();
|
||||
PWEdge Edge;
|
||||
Edge.cType = 'I';
|
||||
Edge.uPrefixLengthA = 0;
|
||||
for (unsigned uPrefixLengthB = 1; uPrefixLengthB <= uLengthB; ++uPrefixLengthB)
|
||||
{
|
||||
Edge.uPrefixLengthB = uPrefixLengthB;
|
||||
Path.AppendEdge(Edge);
|
||||
}
|
||||
}
|
||||
|
||||
static void AllDeletes(PWPath &Path, unsigned uLengthA)
|
||||
{
|
||||
Path.Clear();
|
||||
PWEdge Edge;
|
||||
Edge.cType = 'D';
|
||||
Edge.uPrefixLengthB = 0;
|
||||
for (unsigned uPrefixLengthA = 1; uPrefixLengthA <= uLengthA; ++uPrefixLengthA)
|
||||
{
|
||||
Edge.uPrefixLengthA = uPrefixLengthA;
|
||||
Path.AppendEdge(Edge);
|
||||
}
|
||||
}
|
||||
|
||||
SCORE GlobalAlign(const ProfPos *PA, unsigned uLengthA, const ProfPos *PB,
|
||||
unsigned uLengthB, PWPath &Path)
|
||||
{
|
||||
#if TIMING
|
||||
TICKS t1 = GetClockTicks();
|
||||
#endif
|
||||
if (0 == uLengthA)
|
||||
{
|
||||
AllInserts(Path, uLengthB);
|
||||
return 0;
|
||||
}
|
||||
else if (0 == uLengthB)
|
||||
{
|
||||
AllDeletes(Path, uLengthA);
|
||||
return 0;
|
||||
}
|
||||
|
||||
SCORE Score = 0;
|
||||
if (g_bDiags)
|
||||
Score = GlobalAlignDiags(PA, uLengthA, PB, uLengthB, Path);
|
||||
else
|
||||
Score = GlobalAlignNoDiags(PA, uLengthA, PB, uLengthB, Path);
|
||||
#if TIMING
|
||||
TICKS t2 = GetClockTicks();
|
||||
g_ticksDP += (t2 - t1);
|
||||
#endif
|
||||
return Score;
|
||||
}
|
||||
|
||||
SCORE GlobalAlignNoDiags(const ProfPos *PA, unsigned uLengthA, const ProfPos *PB,
|
||||
unsigned uLengthB, PWPath &Path)
|
||||
{
|
||||
if (g_bDimer)
|
||||
return GlobalAlignDimer(PA, uLengthA, PB, uLengthB, Path);
|
||||
|
||||
switch (g_PPScore)
|
||||
{
|
||||
case PPSCORE_LE:
|
||||
return GlobalAlignLE(PA, uLengthA, PB, uLengthB, Path);
|
||||
|
||||
case PPSCORE_SP:
|
||||
case PPSCORE_SV:
|
||||
return GlobalAlignSP(PA, uLengthA, PB, uLengthB, Path);
|
||||
|
||||
case PPSCORE_SPN:
|
||||
return GlobalAlignSPN(PA, uLengthA, PB, uLengthB, Path);
|
||||
}
|
||||
|
||||
Quit("Invalid PP score (GlobalAlignNoDiags)");
|
||||
return 0;
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
#endif // !VER_3_52
|
||||
55
src/muscle/muscle3.8.31/src/glbalign352.cpp
Normal file
55
src/muscle/muscle3.8.31/src/glbalign352.cpp
Normal file
@@ -0,0 +1,55 @@
|
||||
#include "muscle.h"
|
||||
#include "pwpath.h"
|
||||
#include "timing.h"
|
||||
#include "textfile.h"
|
||||
#include "msa.h"
|
||||
#include "profile.h"
|
||||
|
||||
#if VER_3_52
|
||||
|
||||
#if TIMING
|
||||
TICKS g_ticksDP = 0;
|
||||
#endif
|
||||
|
||||
SCORE GlobalAlign(const ProfPos *PA, unsigned uLengthA, const ProfPos *PB,
|
||||
unsigned uLengthB, PWPath &Path)
|
||||
{
|
||||
#if TIMING
|
||||
TICKS t1 = GetClockTicks();
|
||||
#endif
|
||||
SCORE Score = 0;
|
||||
if (g_bDiags)
|
||||
Score = GlobalAlignDiags(PA, uLengthA, PB, uLengthB, Path);
|
||||
else
|
||||
Score = GlobalAlignNoDiags(PA, uLengthA, PB, uLengthB, Path);
|
||||
#if TIMING
|
||||
TICKS t2 = GetClockTicks();
|
||||
g_ticksDP += (t2 - t1);
|
||||
#endif
|
||||
return Score;
|
||||
}
|
||||
|
||||
SCORE GlobalAlignNoDiags(const ProfPos *PA, unsigned uLengthA, const ProfPos *PB,
|
||||
unsigned uLengthB, PWPath &Path)
|
||||
{
|
||||
if (g_bDimer)
|
||||
return GlobalAlignDimer(PA, uLengthA, PB, uLengthB, Path);
|
||||
|
||||
switch (g_PPScore)
|
||||
{
|
||||
case PPSCORE_LE:
|
||||
return GlobalAlignLE(PA, uLengthA, PB, uLengthB, Path);
|
||||
|
||||
case PPSCORE_SP:
|
||||
case PPSCORE_SV:
|
||||
return GlobalAlignSP(PA, uLengthA, PB, uLengthB, Path);
|
||||
|
||||
case PPSCORE_SPN:
|
||||
return GlobalAlignSPN(PA, uLengthA, PB, uLengthB, Path);
|
||||
}
|
||||
|
||||
Quit("Invalid PP score (GlobalAlignNoDiags)");
|
||||
return 0;
|
||||
}
|
||||
|
||||
#endif // VER_3_52
|
||||
172
src/muscle/muscle3.8.31/src/glbaligndiag.cpp
Normal file
172
src/muscle/muscle3.8.31/src/glbaligndiag.cpp
Normal file
@@ -0,0 +1,172 @@
|
||||
#include "muscle.h"
|
||||
#include "dpreglist.h"
|
||||
#include "diaglist.h"
|
||||
#include "pwpath.h"
|
||||
#include "profile.h"
|
||||
#include "timing.h"
|
||||
|
||||
#define TRACE 0
|
||||
#define TRACE_PATH 0
|
||||
#define LIST_DIAGS 0
|
||||
|
||||
static double g_dDPAreaWithoutDiags = 0.0;
|
||||
static double g_dDPAreaWithDiags = 0.0;
|
||||
|
||||
static void OffsetPath(PWPath &Path, unsigned uOffsetA, unsigned uOffsetB)
|
||||
{
|
||||
const unsigned uEdgeCount = Path.GetEdgeCount();
|
||||
for (unsigned uEdgeIndex = 0; uEdgeIndex < uEdgeCount; ++uEdgeIndex)
|
||||
{
|
||||
const PWEdge &Edge = Path.GetEdge(uEdgeIndex);
|
||||
|
||||
// Nasty hack -- poke new values back into path, circumventing class
|
||||
PWEdge &NonConstEdge = (PWEdge &) Edge;
|
||||
NonConstEdge.uPrefixLengthA += uOffsetA;
|
||||
NonConstEdge.uPrefixLengthB += uOffsetB;
|
||||
}
|
||||
}
|
||||
|
||||
static void DiagToPath(const Diag &d, PWPath &Path)
|
||||
{
|
||||
Path.Clear();
|
||||
const unsigned uLength = d.m_uLength;
|
||||
for (unsigned i = 0; i < uLength; ++i)
|
||||
{
|
||||
PWEdge Edge;
|
||||
Edge.cType = 'M';
|
||||
Edge.uPrefixLengthA = d.m_uStartPosA + i + 1;
|
||||
Edge.uPrefixLengthB = d.m_uStartPosB + i + 1;
|
||||
Path.AppendEdge(Edge);
|
||||
}
|
||||
}
|
||||
|
||||
static void AppendRegPath(PWPath &Path, const PWPath &RegPath)
|
||||
{
|
||||
const unsigned uRegEdgeCount = RegPath.GetEdgeCount();
|
||||
for (unsigned uRegEdgeIndex = 0; uRegEdgeIndex < uRegEdgeCount; ++uRegEdgeIndex)
|
||||
{
|
||||
const PWEdge &RegEdge = RegPath.GetEdge(uRegEdgeIndex);
|
||||
Path.AppendEdge(RegEdge);
|
||||
}
|
||||
}
|
||||
|
||||
SCORE GlobalAlignDiags(const ProfPos *PA, unsigned uLengthA, const ProfPos *PB,
|
||||
unsigned uLengthB, PWPath &Path)
|
||||
{
|
||||
#if LIST_DIAGS
|
||||
TICKS t1 = GetClockTicks();
|
||||
#endif
|
||||
|
||||
DiagList DL;
|
||||
|
||||
if (ALPHA_Amino == g_Alpha)
|
||||
FindDiags(PA, uLengthA, PB, uLengthB, DL);
|
||||
else if (ALPHA_DNA == g_Alpha || ALPHA_RNA == g_Alpha)
|
||||
FindDiagsNuc(PA, uLengthA, PB, uLengthB, DL);
|
||||
else
|
||||
Quit("GlobalAlignDiags: bad alpha");
|
||||
|
||||
#if TRACE
|
||||
Log("GlobalAlignDiags, diag list:\n");
|
||||
DL.LogMe();
|
||||
#endif
|
||||
|
||||
DL.Sort();
|
||||
DL.DeleteIncompatible();
|
||||
|
||||
#if TRACE
|
||||
Log("After DeleteIncompatible:\n");
|
||||
DL.LogMe();
|
||||
#endif
|
||||
|
||||
MergeDiags(DL);
|
||||
|
||||
#if TRACE
|
||||
Log("After MergeDiags:\n");
|
||||
DL.LogMe();
|
||||
#endif
|
||||
|
||||
DPRegionList RL;
|
||||
DiagListToDPRegionList(DL, RL, uLengthA, uLengthB);
|
||||
|
||||
#if TRACE
|
||||
Log("RegionList:\n");
|
||||
RL.LogMe();
|
||||
#endif
|
||||
|
||||
#if LIST_DIAGS
|
||||
{
|
||||
TICKS t2 = GetClockTicks();
|
||||
unsigned uArea = RL.GetDPArea();
|
||||
Log("ticks=%ld\n", (long) (t2 - t1));
|
||||
Log("area=%u\n", uArea);
|
||||
}
|
||||
#endif
|
||||
|
||||
g_dDPAreaWithoutDiags += uLengthA*uLengthB;
|
||||
|
||||
double dDPAreaWithDiags = 0.0;
|
||||
const unsigned uRegionCount = RL.GetCount();
|
||||
for (unsigned uRegionIndex = 0; uRegionIndex < uRegionCount; ++uRegionIndex)
|
||||
{
|
||||
const DPRegion &r = RL.Get(uRegionIndex);
|
||||
|
||||
PWPath RegPath;
|
||||
if (DPREGIONTYPE_Diag == r.m_Type)
|
||||
{
|
||||
DiagToPath(r.m_Diag, RegPath);
|
||||
#if TRACE_PATH
|
||||
Log("DiagToPath, path=\n");
|
||||
RegPath.LogMe();
|
||||
#endif
|
||||
}
|
||||
else if (DPREGIONTYPE_Rect == r.m_Type)
|
||||
{
|
||||
const unsigned uRegStartPosA = r.m_Rect.m_uStartPosA;
|
||||
const unsigned uRegStartPosB = r.m_Rect.m_uStartPosB;
|
||||
const unsigned uRegLengthA = r.m_Rect.m_uLengthA;
|
||||
const unsigned uRegLengthB = r.m_Rect.m_uLengthB;
|
||||
const ProfPos *RegPA = PA + uRegStartPosA;
|
||||
const ProfPos *RegPB = PB + uRegStartPosB;
|
||||
|
||||
dDPAreaWithDiags += uRegLengthA*uRegLengthB;
|
||||
GlobalAlignNoDiags(RegPA, uRegLengthA, RegPB, uRegLengthB, RegPath);
|
||||
#if TRACE_PATH
|
||||
Log("GlobalAlignNoDiags RegPath=\n");
|
||||
RegPath.LogMe();
|
||||
#endif
|
||||
OffsetPath(RegPath, uRegStartPosA, uRegStartPosB);
|
||||
#if TRACE_PATH
|
||||
Log("After offset path, RegPath=\n");
|
||||
RegPath.LogMe();
|
||||
#endif
|
||||
}
|
||||
else
|
||||
Quit("GlobalAlignDiags, Invalid region type %u", r.m_Type);
|
||||
|
||||
AppendRegPath(Path, RegPath);
|
||||
#if TRACE_PATH
|
||||
Log("After AppendPath, path=");
|
||||
Path.LogMe();
|
||||
#endif
|
||||
}
|
||||
|
||||
#if TRACE
|
||||
{
|
||||
double dDPAreaWithoutDiags = uLengthA*uLengthB;
|
||||
Log("DP area with diags %.3g without %.3g pct saved %.3g %%\n",
|
||||
dDPAreaWithDiags, dDPAreaWithoutDiags, (1.0 - dDPAreaWithDiags/dDPAreaWithoutDiags)*100.0);
|
||||
}
|
||||
#endif
|
||||
g_dDPAreaWithDiags += dDPAreaWithDiags;
|
||||
return 0;
|
||||
}
|
||||
|
||||
void ListDiagSavings()
|
||||
{
|
||||
if (!g_bVerbose || !g_bDiags)
|
||||
return;
|
||||
double dAreaSaved = g_dDPAreaWithoutDiags - g_dDPAreaWithDiags;
|
||||
double dPct = dAreaSaved*100.0/g_dDPAreaWithoutDiags;
|
||||
Log("DP area saved by diagonals %-4.1f%%\n", dPct);
|
||||
}
|
||||
432
src/muscle/muscle3.8.31/src/glbalignla.cpp
Normal file
432
src/muscle/muscle3.8.31/src/glbalignla.cpp
Normal file
@@ -0,0 +1,432 @@
|
||||
#include "muscle.h"
|
||||
#include "profile.h"
|
||||
#include "pwpath.h"
|
||||
|
||||
#define OCC 1
|
||||
|
||||
struct DP_MEMORY
|
||||
{
|
||||
unsigned uLength;
|
||||
SCORE *GapOpenA;
|
||||
SCORE *GapOpenB;
|
||||
SCORE *GapCloseA;
|
||||
SCORE *GapCloseB;
|
||||
SCORE *MPrev;
|
||||
SCORE *MCurr;
|
||||
SCORE *MWork;
|
||||
SCORE *DPrev;
|
||||
SCORE *DCurr;
|
||||
SCORE *DWork;
|
||||
SCORE **ScoreMxB;
|
||||
#if OCC
|
||||
FCOUNT *OccA;
|
||||
FCOUNT *OccB;
|
||||
#endif
|
||||
unsigned **SortOrderA;
|
||||
unsigned *uDeletePos;
|
||||
FCOUNT **FreqsA;
|
||||
int **TraceBack;
|
||||
};
|
||||
|
||||
static struct DP_MEMORY DPM;
|
||||
|
||||
static void AllocDPMem(unsigned uLengthA, unsigned uLengthB)
|
||||
{
|
||||
// Max prefix length
|
||||
unsigned uLength = (uLengthA > uLengthB ? uLengthA : uLengthB) + 1;
|
||||
if (uLength < DPM.uLength)
|
||||
return;
|
||||
|
||||
// Add 256 to allow for future expansion and
|
||||
// round up to next multiple of 32.
|
||||
uLength += 256;
|
||||
uLength += 32 - uLength%32;
|
||||
|
||||
const unsigned uOldLength = DPM.uLength;
|
||||
if (uOldLength > 0)
|
||||
{
|
||||
for (unsigned i = 0; i < uOldLength; ++i)
|
||||
{
|
||||
delete[] DPM.TraceBack[i];
|
||||
delete[] DPM.FreqsA[i];
|
||||
delete[] DPM.SortOrderA[i];
|
||||
}
|
||||
for (unsigned n = 0; n < 20; ++n)
|
||||
delete[] DPM.ScoreMxB[n];
|
||||
|
||||
delete[] DPM.MPrev;
|
||||
delete[] DPM.MCurr;
|
||||
delete[] DPM.MWork;
|
||||
delete[] DPM.DPrev;
|
||||
delete[] DPM.DCurr;
|
||||
delete[] DPM.DWork;
|
||||
delete[] DPM.uDeletePos;
|
||||
delete[] DPM.GapOpenA;
|
||||
delete[] DPM.GapOpenB;
|
||||
delete[] DPM.GapCloseA;
|
||||
delete[] DPM.GapCloseB;
|
||||
delete[] DPM.SortOrderA;
|
||||
delete[] DPM.FreqsA;
|
||||
delete[] DPM.ScoreMxB;
|
||||
delete[] DPM.TraceBack;
|
||||
#if OCC
|
||||
delete[] DPM.OccA;
|
||||
delete[] DPM.OccB;
|
||||
#endif
|
||||
}
|
||||
|
||||
DPM.uLength = uLength;
|
||||
|
||||
DPM.GapOpenA = new SCORE[uLength];
|
||||
DPM.GapOpenB = new SCORE[uLength];
|
||||
DPM.GapCloseA = new SCORE[uLength];
|
||||
DPM.GapCloseB = new SCORE[uLength];
|
||||
#if OCC
|
||||
DPM.OccA = new FCOUNT[uLength];
|
||||
DPM.OccB = new FCOUNT[uLength];
|
||||
#endif
|
||||
|
||||
DPM.SortOrderA = new unsigned*[uLength];
|
||||
DPM.FreqsA = new FCOUNT*[uLength];
|
||||
DPM.ScoreMxB = new SCORE*[20];
|
||||
DPM.MPrev = new SCORE[uLength];
|
||||
DPM.MCurr = new SCORE[uLength];
|
||||
DPM.MWork = new SCORE[uLength];
|
||||
|
||||
DPM.DPrev = new SCORE[uLength];
|
||||
DPM.DCurr = new SCORE[uLength];
|
||||
DPM.DWork = new SCORE[uLength];
|
||||
DPM.uDeletePos = new unsigned[uLength];
|
||||
|
||||
DPM.TraceBack = new int*[uLength];
|
||||
|
||||
for (unsigned uLetter = 0; uLetter < 20; ++uLetter)
|
||||
DPM.ScoreMxB[uLetter] = new SCORE[uLength];
|
||||
|
||||
for (unsigned i = 0; i < uLength; ++i)
|
||||
{
|
||||
DPM.SortOrderA[i] = new unsigned[20];
|
||||
DPM.FreqsA[i] = new FCOUNT[20];
|
||||
DPM.TraceBack[i] = new int[uLength];
|
||||
}
|
||||
}
|
||||
|
||||
SCORE GlobalAlignLA(const ProfPos *PA, unsigned uLengthA, const ProfPos *PB,
|
||||
unsigned uLengthB, PWPath &Path)
|
||||
{
|
||||
const unsigned uPrefixCountA = uLengthA + 1;
|
||||
const unsigned uPrefixCountB = uLengthB + 1;
|
||||
|
||||
AllocDPMem(uLengthA, uLengthB);
|
||||
|
||||
SCORE *GapOpenA = DPM.GapOpenA;
|
||||
SCORE *GapOpenB = DPM.GapOpenB;
|
||||
SCORE *GapCloseA = DPM.GapCloseA;
|
||||
SCORE *GapCloseB = DPM.GapCloseB;
|
||||
|
||||
unsigned **SortOrderA = DPM.SortOrderA;
|
||||
FCOUNT **FreqsA = DPM.FreqsA;
|
||||
SCORE **ScoreMxB = DPM.ScoreMxB;
|
||||
SCORE *MPrev = DPM.MPrev;
|
||||
SCORE *MCurr = DPM.MCurr;
|
||||
SCORE *MWork = DPM.MWork;
|
||||
|
||||
SCORE *DPrev = DPM.DPrev;
|
||||
SCORE *DCurr = DPM.DCurr;
|
||||
SCORE *DWork = DPM.DWork;
|
||||
|
||||
#if OCC
|
||||
FCOUNT *OccA = DPM.OccA;
|
||||
FCOUNT *OccB = DPM.OccB;
|
||||
#endif
|
||||
|
||||
unsigned *uDeletePos = DPM.uDeletePos;
|
||||
|
||||
int **TraceBack = DPM.TraceBack;
|
||||
|
||||
for (unsigned i = 0; i < uLengthA; ++i)
|
||||
{
|
||||
GapOpenA[i] = PA[i].m_scoreGapOpen;
|
||||
GapCloseA[i] = PA[i].m_scoreGapClose;
|
||||
#if OCC
|
||||
OccA[i] = PA[i].m_fOcc;
|
||||
#endif
|
||||
|
||||
for (unsigned uLetter = 0; uLetter < 20; ++uLetter)
|
||||
{
|
||||
SortOrderA[i][uLetter] = PA[i].m_uSortOrder[uLetter];
|
||||
FreqsA[i][uLetter] = PA[i].m_fcCounts[uLetter];
|
||||
}
|
||||
}
|
||||
|
||||
for (unsigned j = 0; j < uLengthB; ++j)
|
||||
{
|
||||
GapOpenB[j] = PB[j].m_scoreGapOpen;
|
||||
GapCloseB[j] = PB[j].m_scoreGapClose;
|
||||
#if OCC
|
||||
OccB[j] = PB[j].m_fOcc;
|
||||
#endif
|
||||
}
|
||||
|
||||
for (unsigned uLetter = 0; uLetter < 20; ++uLetter)
|
||||
{
|
||||
for (unsigned j = 0; j < uLengthB; ++j)
|
||||
ScoreMxB[uLetter][j] = PB[j].m_AAScores[uLetter];
|
||||
}
|
||||
|
||||
for (unsigned i = 0; i < uPrefixCountA; ++i)
|
||||
memset(TraceBack[i], 0, uPrefixCountB*sizeof(int));
|
||||
|
||||
// Special case for i=0
|
||||
unsigned **ptrSortOrderA = SortOrderA;
|
||||
FCOUNT **ptrFreqsA = FreqsA;
|
||||
assert(ptrSortOrderA == &(SortOrderA[0]));
|
||||
assert(ptrFreqsA == &(FreqsA[0]));
|
||||
TraceBack[0][0] = 0;
|
||||
|
||||
SCORE scoreSum = 0;
|
||||
unsigned *ptrSortOrderAi = SortOrderA[0];
|
||||
const unsigned *ptrSortOrderAEnd = ptrSortOrderAi + 20;
|
||||
FCOUNT *ptrFreqsAi = FreqsA[0];
|
||||
for (; ptrSortOrderAi != ptrSortOrderAEnd; ++ptrSortOrderAi)
|
||||
{
|
||||
const unsigned uLetter = *ptrSortOrderAi;
|
||||
const FCOUNT fcLetter = ptrFreqsAi[uLetter];
|
||||
if (0 == fcLetter)
|
||||
break;
|
||||
scoreSum += fcLetter*ScoreMxB[uLetter][0];
|
||||
}
|
||||
if (0 == scoreSum)
|
||||
MPrev[0] = -2.5;
|
||||
else
|
||||
{
|
||||
#if OCC
|
||||
MPrev[0] = (logf(scoreSum) - g_scoreCenter)*OccA[0]*OccB[0];
|
||||
#else
|
||||
MPrev[0] = (logf(scoreSum) - g_scoreCenter);
|
||||
#endif
|
||||
}
|
||||
|
||||
// D(0,0) is -infinity (requires I->D).
|
||||
DPrev[0] = MINUS_INFINITY;
|
||||
|
||||
for (unsigned j = 1; j < uLengthB; ++j)
|
||||
{
|
||||
// Only way to get M(0, j) looks like this:
|
||||
// A ----X
|
||||
// B XXXXX
|
||||
// 0 j
|
||||
// So gap-open at j=0, gap-close at j-1.
|
||||
SCORE scoreSum = 0;
|
||||
unsigned *ptrSortOrderAi = SortOrderA[0];
|
||||
const unsigned *ptrSortOrderAEnd = ptrSortOrderAi + 20;
|
||||
FCOUNT *ptrFreqsAi = FreqsA[0];
|
||||
for (; ptrSortOrderAi != ptrSortOrderAEnd; ++ptrSortOrderAi)
|
||||
{
|
||||
const unsigned uLetter = *ptrSortOrderAi;
|
||||
const FCOUNT fcLetter = ptrFreqsAi[uLetter];
|
||||
if (0 == fcLetter)
|
||||
break;
|
||||
scoreSum += fcLetter*ScoreMxB[uLetter][j];
|
||||
}
|
||||
if (0 == scoreSum)
|
||||
MPrev[j] = -2.5;
|
||||
else
|
||||
{
|
||||
#if OCC
|
||||
MPrev[j] = (logf(scoreSum) - g_scoreCenter)*OccA[0]*OccB[j] +
|
||||
GapOpenB[0] + GapCloseB[j-1];
|
||||
#else
|
||||
MPrev[j] = (logf(scoreSum) - g_scoreCenter) +
|
||||
GapOpenB[0] + GapCloseB[j-1];
|
||||
#endif
|
||||
}
|
||||
TraceBack[0][j] = -(int) j;
|
||||
|
||||
// Assume no D->I transitions, then can't be a delete if only
|
||||
// one letter from A.
|
||||
DPrev[j] = MINUS_INFINITY;
|
||||
}
|
||||
|
||||
SCORE IPrev_j_1;
|
||||
for (unsigned i = 1; i < uLengthA; ++i)
|
||||
{
|
||||
++ptrSortOrderA;
|
||||
++ptrFreqsA;
|
||||
assert(ptrSortOrderA == &(SortOrderA[i]));
|
||||
assert(ptrFreqsA == &(FreqsA[i]));
|
||||
|
||||
SCORE *ptrMCurr_j = MCurr;
|
||||
memset(ptrMCurr_j, 0, uLengthB*sizeof(SCORE));
|
||||
const FCOUNT *FreqsAi = *ptrFreqsA;
|
||||
|
||||
const unsigned *SortOrderAi = *ptrSortOrderA;
|
||||
const unsigned *ptrSortOrderAiEnd = SortOrderAi + 20;
|
||||
const SCORE *ptrMCurrMax = MCurr + uLengthB;
|
||||
for (const unsigned *ptrSortOrderAi = SortOrderAi;
|
||||
ptrSortOrderAi != ptrSortOrderAiEnd;
|
||||
++ptrSortOrderAi)
|
||||
{
|
||||
const unsigned uLetter = *ptrSortOrderAi;
|
||||
SCORE *NSBR_Letter = ScoreMxB[uLetter];
|
||||
const FCOUNT fcLetter = FreqsAi[uLetter];
|
||||
if (0 == fcLetter)
|
||||
break;
|
||||
SCORE *ptrNSBR = NSBR_Letter;
|
||||
for (SCORE *ptrMCurr = MCurr; ptrMCurr != ptrMCurrMax; ++ptrMCurr)
|
||||
*ptrMCurr += fcLetter*(*ptrNSBR++);
|
||||
}
|
||||
|
||||
#if OCC
|
||||
const FCOUNT OccAi = OccA[i];
|
||||
#endif
|
||||
for (unsigned j = 0; j < uLengthB; ++j)
|
||||
{
|
||||
if (MCurr[j] == 0)
|
||||
MCurr[j] = -2.5;
|
||||
else
|
||||
#if OCC
|
||||
MCurr[j] = (logf(MCurr[j]) - g_scoreCenter)*OccAi*OccB[j];
|
||||
#else
|
||||
MCurr[j] = (logf(MCurr[j]) - g_scoreCenter);
|
||||
#endif
|
||||
}
|
||||
|
||||
ptrMCurr_j = MCurr;
|
||||
unsigned *ptrDeletePos = uDeletePos;
|
||||
|
||||
// Special case for j=0
|
||||
// Only way to get M(i, 0) looks like this:
|
||||
// 0 i
|
||||
// A XXXXX
|
||||
// B ----X
|
||||
// So gap-open at i=0, gap-close at i-1.
|
||||
assert(ptrMCurr_j == &(MCurr[0]));
|
||||
*ptrMCurr_j += GapOpenA[0] + GapCloseA[i-1];
|
||||
|
||||
++ptrMCurr_j;
|
||||
|
||||
int *ptrTraceBack_ij = TraceBack[i];
|
||||
*ptrTraceBack_ij++ = (int) i;
|
||||
|
||||
SCORE *ptrMPrev_j = MPrev;
|
||||
SCORE *ptrDPrev = DPrev;
|
||||
SCORE d = *ptrDPrev;
|
||||
SCORE DNew = *ptrMPrev_j + GapOpenA[i];
|
||||
if (DNew > d)
|
||||
{
|
||||
d = DNew;
|
||||
*ptrDeletePos = i;
|
||||
}
|
||||
|
||||
SCORE *ptrDCurr = DCurr;
|
||||
|
||||
assert(ptrDCurr == &(DCurr[0]));
|
||||
*ptrDCurr = d;
|
||||
|
||||
// Can't have an insert if no letters from B
|
||||
IPrev_j_1 = MINUS_INFINITY;
|
||||
|
||||
unsigned uInsertPos;
|
||||
const SCORE scoreGapOpenAi = GapOpenA[i];
|
||||
const SCORE scoreGapCloseAi_1 = GapCloseA[i-1];
|
||||
|
||||
for (unsigned j = 1; j < uLengthB; ++j)
|
||||
{
|
||||
// Here, MPrev_j is preserved from previous
|
||||
// iteration so with current i,j is M[i-1][j-1]
|
||||
SCORE MPrev_j = *ptrMPrev_j;
|
||||
SCORE INew = MPrev_j + GapOpenB[j];
|
||||
if (INew > IPrev_j_1)
|
||||
{
|
||||
IPrev_j_1 = INew;
|
||||
uInsertPos = j;
|
||||
}
|
||||
|
||||
SCORE scoreMax = MPrev_j;
|
||||
|
||||
assert(ptrDPrev == &(DPrev[j-1]));
|
||||
SCORE scoreD = *ptrDPrev++ + scoreGapCloseAi_1;
|
||||
if (scoreD > scoreMax)
|
||||
{
|
||||
scoreMax = scoreD;
|
||||
assert(ptrDeletePos == &(uDeletePos[j-1]));
|
||||
*ptrTraceBack_ij = (int) i - (int) *ptrDeletePos;
|
||||
assert(*ptrTraceBack_ij > 0);
|
||||
}
|
||||
++ptrDeletePos;
|
||||
|
||||
SCORE scoreI = IPrev_j_1 + GapCloseB[j-1];
|
||||
if (scoreI > scoreMax)
|
||||
{
|
||||
scoreMax = scoreI;
|
||||
*ptrTraceBack_ij = (int) uInsertPos - (int) j;
|
||||
assert(*ptrTraceBack_ij < 0);
|
||||
}
|
||||
|
||||
assert(ptrSortOrderA == &(SortOrderA[i]));
|
||||
assert(ptrFreqsA == &(FreqsA[i]));
|
||||
|
||||
*ptrMCurr_j += scoreMax;
|
||||
assert(ptrMCurr_j == &(MCurr[j]));
|
||||
++ptrMCurr_j;
|
||||
|
||||
MPrev_j = *(++ptrMPrev_j);
|
||||
assert(ptrDPrev == &(DPrev[j]));
|
||||
SCORE d = *ptrDPrev;
|
||||
SCORE DNew = MPrev_j + scoreGapOpenAi;
|
||||
if (DNew > d)
|
||||
{
|
||||
d = DNew;
|
||||
assert(ptrDeletePos == &uDeletePos[j]);
|
||||
*ptrDeletePos = i;
|
||||
}
|
||||
assert(ptrDCurr + 1 == &(DCurr[j]));
|
||||
*(++ptrDCurr) = d;
|
||||
|
||||
++ptrTraceBack_ij;
|
||||
}
|
||||
|
||||
Rotate(MPrev, MCurr, MWork);
|
||||
Rotate(DPrev, DCurr, DWork);
|
||||
}
|
||||
|
||||
// Special case for i=uLengthA
|
||||
SCORE IPrev = MINUS_INFINITY;
|
||||
|
||||
unsigned uInsertPos;
|
||||
|
||||
for (unsigned j = 1; j < uLengthB; ++j)
|
||||
{
|
||||
SCORE INew = MPrev[j-1] + GapOpenB[j];
|
||||
if (INew > IPrev)
|
||||
{
|
||||
uInsertPos = j;
|
||||
IPrev = INew;
|
||||
}
|
||||
}
|
||||
|
||||
// Special case for i=uLengthA, j=uLengthB
|
||||
SCORE scoreMax = MPrev[uLengthB-1];
|
||||
int iTraceBack = 0;
|
||||
|
||||
SCORE scoreD = DPrev[uLengthB-1] + GapCloseA[uLengthA-1];
|
||||
if (scoreD > scoreMax)
|
||||
{
|
||||
scoreMax = scoreD;
|
||||
iTraceBack = (int) uLengthA - (int) uDeletePos[uLengthB-1];
|
||||
}
|
||||
|
||||
SCORE scoreI = IPrev + GapCloseB[uLengthB-1];
|
||||
if (scoreI > scoreMax)
|
||||
{
|
||||
scoreMax = scoreI;
|
||||
iTraceBack = (int) uInsertPos - (int) uLengthB;
|
||||
}
|
||||
|
||||
TraceBack[uLengthA][uLengthB] = iTraceBack;
|
||||
|
||||
TraceBackToPath(TraceBack, uLengthA, uLengthB, Path);
|
||||
|
||||
return scoreMax;
|
||||
}
|
||||
435
src/muscle/muscle3.8.31/src/glbalignle.cpp
Normal file
435
src/muscle/muscle3.8.31/src/glbalignle.cpp
Normal file
@@ -0,0 +1,435 @@
|
||||
#include "muscle.h"
|
||||
#include "profile.h"
|
||||
#include "pwpath.h"
|
||||
|
||||
#define OCC 1
|
||||
|
||||
struct DP_MEMORY
|
||||
{
|
||||
unsigned uLength;
|
||||
SCORE *GapOpenA;
|
||||
SCORE *GapOpenB;
|
||||
SCORE *GapCloseA;
|
||||
SCORE *GapCloseB;
|
||||
SCORE *MPrev;
|
||||
SCORE *MCurr;
|
||||
SCORE *MWork;
|
||||
SCORE *DPrev;
|
||||
SCORE *DCurr;
|
||||
SCORE *DWork;
|
||||
SCORE **ScoreMxB;
|
||||
#if OCC
|
||||
FCOUNT *OccA;
|
||||
FCOUNT *OccB;
|
||||
#endif
|
||||
unsigned **SortOrderA;
|
||||
unsigned *uDeletePos;
|
||||
FCOUNT **FreqsA;
|
||||
int **TraceBack;
|
||||
};
|
||||
|
||||
static struct DP_MEMORY DPM;
|
||||
|
||||
static void AllocDPMem(unsigned uLengthA, unsigned uLengthB)
|
||||
{
|
||||
// Max prefix length
|
||||
unsigned uLength = (uLengthA > uLengthB ? uLengthA : uLengthB) + 1;
|
||||
if (uLength < DPM.uLength)
|
||||
return;
|
||||
|
||||
// Add 256 to allow for future expansion and
|
||||
// round up to next multiple of 32.
|
||||
uLength += 256;
|
||||
uLength += 32 - uLength%32;
|
||||
|
||||
const unsigned uOldLength = DPM.uLength;
|
||||
if (uOldLength > 0)
|
||||
{
|
||||
for (unsigned i = 0; i < uOldLength; ++i)
|
||||
{
|
||||
delete[] DPM.TraceBack[i];
|
||||
delete[] DPM.FreqsA[i];
|
||||
delete[] DPM.SortOrderA[i];
|
||||
}
|
||||
for (unsigned n = 0; n < 20; ++n)
|
||||
delete[] DPM.ScoreMxB[n];
|
||||
|
||||
delete[] DPM.MPrev;
|
||||
delete[] DPM.MCurr;
|
||||
delete[] DPM.MWork;
|
||||
delete[] DPM.DPrev;
|
||||
delete[] DPM.DCurr;
|
||||
delete[] DPM.DWork;
|
||||
delete[] DPM.uDeletePos;
|
||||
delete[] DPM.GapOpenA;
|
||||
delete[] DPM.GapOpenB;
|
||||
delete[] DPM.GapCloseA;
|
||||
delete[] DPM.GapCloseB;
|
||||
delete[] DPM.SortOrderA;
|
||||
delete[] DPM.FreqsA;
|
||||
delete[] DPM.ScoreMxB;
|
||||
delete[] DPM.TraceBack;
|
||||
#if OCC
|
||||
delete[] DPM.OccA;
|
||||
delete[] DPM.OccB;
|
||||
#endif
|
||||
}
|
||||
|
||||
DPM.uLength = uLength;
|
||||
|
||||
DPM.GapOpenA = new SCORE[uLength];
|
||||
DPM.GapOpenB = new SCORE[uLength];
|
||||
DPM.GapCloseA = new SCORE[uLength];
|
||||
DPM.GapCloseB = new SCORE[uLength];
|
||||
#if OCC
|
||||
DPM.OccA = new FCOUNT[uLength];
|
||||
DPM.OccB = new FCOUNT[uLength];
|
||||
#endif
|
||||
|
||||
DPM.SortOrderA = new unsigned*[uLength];
|
||||
DPM.FreqsA = new FCOUNT*[uLength];
|
||||
DPM.ScoreMxB = new SCORE*[20];
|
||||
DPM.MPrev = new SCORE[uLength];
|
||||
DPM.MCurr = new SCORE[uLength];
|
||||
DPM.MWork = new SCORE[uLength];
|
||||
|
||||
DPM.DPrev = new SCORE[uLength];
|
||||
DPM.DCurr = new SCORE[uLength];
|
||||
DPM.DWork = new SCORE[uLength];
|
||||
DPM.uDeletePos = new unsigned[uLength];
|
||||
|
||||
DPM.TraceBack = new int*[uLength];
|
||||
|
||||
for (unsigned uLetter = 0; uLetter < 20; ++uLetter)
|
||||
DPM.ScoreMxB[uLetter] = new SCORE[uLength];
|
||||
|
||||
for (unsigned i = 0; i < uLength; ++i)
|
||||
{
|
||||
DPM.SortOrderA[i] = new unsigned[20];
|
||||
DPM.FreqsA[i] = new FCOUNT[20];
|
||||
DPM.TraceBack[i] = new int[uLength];
|
||||
}
|
||||
}
|
||||
|
||||
SCORE GlobalAlignLE(const ProfPos *PA, unsigned uLengthA, const ProfPos *PB,
|
||||
unsigned uLengthB, PWPath &Path)
|
||||
{
|
||||
SetTermGaps(PA, uLengthA);
|
||||
SetTermGaps(PB, uLengthB);
|
||||
|
||||
const unsigned uPrefixCountA = uLengthA + 1;
|
||||
const unsigned uPrefixCountB = uLengthB + 1;
|
||||
|
||||
AllocDPMem(uLengthA, uLengthB);
|
||||
|
||||
SCORE *GapOpenA = DPM.GapOpenA;
|
||||
SCORE *GapOpenB = DPM.GapOpenB;
|
||||
SCORE *GapCloseA = DPM.GapCloseA;
|
||||
SCORE *GapCloseB = DPM.GapCloseB;
|
||||
|
||||
unsigned **SortOrderA = DPM.SortOrderA;
|
||||
FCOUNT **FreqsA = DPM.FreqsA;
|
||||
SCORE **ScoreMxB = DPM.ScoreMxB;
|
||||
SCORE *MPrev = DPM.MPrev;
|
||||
SCORE *MCurr = DPM.MCurr;
|
||||
SCORE *MWork = DPM.MWork;
|
||||
|
||||
SCORE *DPrev = DPM.DPrev;
|
||||
SCORE *DCurr = DPM.DCurr;
|
||||
SCORE *DWork = DPM.DWork;
|
||||
|
||||
#if OCC
|
||||
FCOUNT *OccA = DPM.OccA;
|
||||
FCOUNT *OccB = DPM.OccB;
|
||||
#endif
|
||||
|
||||
unsigned *uDeletePos = DPM.uDeletePos;
|
||||
|
||||
int **TraceBack = DPM.TraceBack;
|
||||
|
||||
for (unsigned i = 0; i < uLengthA; ++i)
|
||||
{
|
||||
GapOpenA[i] = PA[i].m_scoreGapOpen;
|
||||
GapCloseA[i] = PA[i].m_scoreGapClose;
|
||||
#if OCC
|
||||
OccA[i] = PA[i].m_fOcc;
|
||||
#endif
|
||||
|
||||
for (unsigned uLetter = 0; uLetter < 20; ++uLetter)
|
||||
{
|
||||
SortOrderA[i][uLetter] = PA[i].m_uSortOrder[uLetter];
|
||||
FreqsA[i][uLetter] = PA[i].m_fcCounts[uLetter];
|
||||
}
|
||||
}
|
||||
|
||||
for (unsigned j = 0; j < uLengthB; ++j)
|
||||
{
|
||||
GapOpenB[j] = PB[j].m_scoreGapOpen;
|
||||
GapCloseB[j] = PB[j].m_scoreGapClose;
|
||||
#if OCC
|
||||
OccB[j] = PB[j].m_fOcc;
|
||||
#endif
|
||||
}
|
||||
|
||||
for (unsigned uLetter = 0; uLetter < 20; ++uLetter)
|
||||
{
|
||||
for (unsigned j = 0; j < uLengthB; ++j)
|
||||
ScoreMxB[uLetter][j] = PB[j].m_AAScores[uLetter];
|
||||
}
|
||||
|
||||
for (unsigned i = 0; i < uPrefixCountA; ++i)
|
||||
memset(TraceBack[i], 0, uPrefixCountB*sizeof(int));
|
||||
|
||||
// Special case for i=0
|
||||
unsigned **ptrSortOrderA = SortOrderA;
|
||||
FCOUNT **ptrFreqsA = FreqsA;
|
||||
assert(ptrSortOrderA == &(SortOrderA[0]));
|
||||
assert(ptrFreqsA == &(FreqsA[0]));
|
||||
TraceBack[0][0] = 0;
|
||||
|
||||
SCORE scoreSum = 0;
|
||||
unsigned *ptrSortOrderAi = SortOrderA[0];
|
||||
const unsigned *ptrSortOrderAEnd = ptrSortOrderAi + 20;
|
||||
FCOUNT *ptrFreqsAi = FreqsA[0];
|
||||
for (; ptrSortOrderAi != ptrSortOrderAEnd; ++ptrSortOrderAi)
|
||||
{
|
||||
const unsigned uLetter = *ptrSortOrderAi;
|
||||
const FCOUNT fcLetter = ptrFreqsAi[uLetter];
|
||||
if (0 == fcLetter)
|
||||
break;
|
||||
scoreSum += fcLetter*ScoreMxB[uLetter][0];
|
||||
}
|
||||
if (0 == scoreSum)
|
||||
MPrev[0] = -2.5;
|
||||
else
|
||||
{
|
||||
#if OCC
|
||||
MPrev[0] = (logf(scoreSum) - g_scoreCenter)*OccA[0]*OccB[0];
|
||||
#else
|
||||
MPrev[0] = (logf(scoreSum) - g_scoreCenter);
|
||||
#endif
|
||||
}
|
||||
|
||||
// D(0,0) is -infinity (requires I->D).
|
||||
DPrev[0] = MINUS_INFINITY;
|
||||
|
||||
for (unsigned j = 1; j < uLengthB; ++j)
|
||||
{
|
||||
// Only way to get M(0, j) looks like this:
|
||||
// A ----X
|
||||
// B XXXXX
|
||||
// 0 j
|
||||
// So gap-open at j=0, gap-close at j-1.
|
||||
SCORE scoreSum = 0;
|
||||
unsigned *ptrSortOrderAi = SortOrderA[0];
|
||||
const unsigned *ptrSortOrderAEnd = ptrSortOrderAi + 20;
|
||||
FCOUNT *ptrFreqsAi = FreqsA[0];
|
||||
for (; ptrSortOrderAi != ptrSortOrderAEnd; ++ptrSortOrderAi)
|
||||
{
|
||||
const unsigned uLetter = *ptrSortOrderAi;
|
||||
const FCOUNT fcLetter = ptrFreqsAi[uLetter];
|
||||
if (0 == fcLetter)
|
||||
break;
|
||||
scoreSum += fcLetter*ScoreMxB[uLetter][j];
|
||||
}
|
||||
if (0 == scoreSum)
|
||||
MPrev[j] = -2.5;
|
||||
else
|
||||
{
|
||||
#if OCC
|
||||
MPrev[j] = (logf(scoreSum) - g_scoreCenter)*OccA[0]*OccB[j] +
|
||||
GapOpenB[0] + GapCloseB[j-1];
|
||||
#else
|
||||
MPrev[j] = (logf(scoreSum) - g_scoreCenter) +
|
||||
GapOpenB[0] + GapCloseB[j-1];
|
||||
#endif
|
||||
}
|
||||
TraceBack[0][j] = -(int) j;
|
||||
|
||||
// Assume no D->I transitions, then can't be a delete if only
|
||||
// one letter from A.
|
||||
DPrev[j] = MINUS_INFINITY;
|
||||
}
|
||||
|
||||
SCORE IPrev_j_1;
|
||||
for (unsigned i = 1; i < uLengthA; ++i)
|
||||
{
|
||||
++ptrSortOrderA;
|
||||
++ptrFreqsA;
|
||||
assert(ptrSortOrderA == &(SortOrderA[i]));
|
||||
assert(ptrFreqsA == &(FreqsA[i]));
|
||||
|
||||
SCORE *ptrMCurr_j = MCurr;
|
||||
memset(ptrMCurr_j, 0, uLengthB*sizeof(SCORE));
|
||||
const FCOUNT *FreqsAi = *ptrFreqsA;
|
||||
|
||||
const unsigned *SortOrderAi = *ptrSortOrderA;
|
||||
const unsigned *ptrSortOrderAiEnd = SortOrderAi + 20;
|
||||
const SCORE *ptrMCurrMax = MCurr + uLengthB;
|
||||
for (const unsigned *ptrSortOrderAi = SortOrderAi;
|
||||
ptrSortOrderAi != ptrSortOrderAiEnd;
|
||||
++ptrSortOrderAi)
|
||||
{
|
||||
const unsigned uLetter = *ptrSortOrderAi;
|
||||
SCORE *NSBR_Letter = ScoreMxB[uLetter];
|
||||
const FCOUNT fcLetter = FreqsAi[uLetter];
|
||||
if (0 == fcLetter)
|
||||
break;
|
||||
SCORE *ptrNSBR = NSBR_Letter;
|
||||
for (SCORE *ptrMCurr = MCurr; ptrMCurr != ptrMCurrMax; ++ptrMCurr)
|
||||
*ptrMCurr += fcLetter*(*ptrNSBR++);
|
||||
}
|
||||
|
||||
#if OCC
|
||||
const FCOUNT OccAi = OccA[i];
|
||||
#endif
|
||||
for (unsigned j = 0; j < uLengthB; ++j)
|
||||
{
|
||||
if (MCurr[j] == 0)
|
||||
MCurr[j] = -2.5;
|
||||
else
|
||||
#if OCC
|
||||
MCurr[j] = (logf(MCurr[j]) - g_scoreCenter)*OccAi*OccB[j];
|
||||
#else
|
||||
MCurr[j] = (logf(MCurr[j]) - g_scoreCenter);
|
||||
#endif
|
||||
}
|
||||
|
||||
ptrMCurr_j = MCurr;
|
||||
unsigned *ptrDeletePos = uDeletePos;
|
||||
|
||||
// Special case for j=0
|
||||
// Only way to get M(i, 0) looks like this:
|
||||
// 0 i
|
||||
// A XXXXX
|
||||
// B ----X
|
||||
// So gap-open at i=0, gap-close at i-1.
|
||||
assert(ptrMCurr_j == &(MCurr[0]));
|
||||
*ptrMCurr_j += GapOpenA[0] + GapCloseA[i-1];
|
||||
|
||||
++ptrMCurr_j;
|
||||
|
||||
int *ptrTraceBack_ij = TraceBack[i];
|
||||
*ptrTraceBack_ij++ = (int) i;
|
||||
|
||||
SCORE *ptrMPrev_j = MPrev;
|
||||
SCORE *ptrDPrev = DPrev;
|
||||
SCORE d = *ptrDPrev;
|
||||
SCORE DNew = *ptrMPrev_j + GapOpenA[i];
|
||||
if (DNew > d)
|
||||
{
|
||||
d = DNew;
|
||||
*ptrDeletePos = i;
|
||||
}
|
||||
|
||||
SCORE *ptrDCurr = DCurr;
|
||||
|
||||
assert(ptrDCurr == &(DCurr[0]));
|
||||
*ptrDCurr = d;
|
||||
|
||||
// Can't have an insert if no letters from B
|
||||
IPrev_j_1 = MINUS_INFINITY;
|
||||
|
||||
unsigned uInsertPos = 0;
|
||||
const SCORE scoreGapOpenAi = GapOpenA[i];
|
||||
const SCORE scoreGapCloseAi_1 = GapCloseA[i-1];
|
||||
|
||||
for (unsigned j = 1; j < uLengthB; ++j)
|
||||
{
|
||||
// Here, MPrev_j is preserved from previous
|
||||
// iteration so with current i,j is M[i-1][j-1]
|
||||
SCORE MPrev_j = *ptrMPrev_j;
|
||||
SCORE INew = MPrev_j + GapOpenB[j];
|
||||
if (INew > IPrev_j_1)
|
||||
{
|
||||
IPrev_j_1 = INew;
|
||||
uInsertPos = j;
|
||||
}
|
||||
|
||||
SCORE scoreMax = MPrev_j;
|
||||
|
||||
assert(ptrDPrev == &(DPrev[j-1]));
|
||||
SCORE scoreD = *ptrDPrev++ + scoreGapCloseAi_1;
|
||||
if (scoreD > scoreMax)
|
||||
{
|
||||
scoreMax = scoreD;
|
||||
assert(ptrDeletePos == &(uDeletePos[j-1]));
|
||||
*ptrTraceBack_ij = (int) i - (int) *ptrDeletePos;
|
||||
assert(*ptrTraceBack_ij > 0);
|
||||
}
|
||||
++ptrDeletePos;
|
||||
|
||||
SCORE scoreI = IPrev_j_1 + GapCloseB[j-1];
|
||||
if (scoreI > scoreMax)
|
||||
{
|
||||
scoreMax = scoreI;
|
||||
*ptrTraceBack_ij = (int) uInsertPos - (int) j;
|
||||
assert(*ptrTraceBack_ij < 0);
|
||||
}
|
||||
|
||||
assert(ptrSortOrderA == &(SortOrderA[i]));
|
||||
assert(ptrFreqsA == &(FreqsA[i]));
|
||||
|
||||
*ptrMCurr_j += scoreMax;
|
||||
assert(ptrMCurr_j == &(MCurr[j]));
|
||||
++ptrMCurr_j;
|
||||
|
||||
MPrev_j = *(++ptrMPrev_j);
|
||||
assert(ptrDPrev == &(DPrev[j]));
|
||||
SCORE d = *ptrDPrev;
|
||||
SCORE DNew = MPrev_j + scoreGapOpenAi;
|
||||
if (DNew > d)
|
||||
{
|
||||
d = DNew;
|
||||
assert(ptrDeletePos == &uDeletePos[j]);
|
||||
*ptrDeletePos = i;
|
||||
}
|
||||
assert(ptrDCurr + 1 == &(DCurr[j]));
|
||||
*(++ptrDCurr) = d;
|
||||
|
||||
++ptrTraceBack_ij;
|
||||
}
|
||||
|
||||
Rotate(MPrev, MCurr, MWork);
|
||||
Rotate(DPrev, DCurr, DWork);
|
||||
}
|
||||
|
||||
// Special case for i=uLengthA
|
||||
SCORE IPrev = MINUS_INFINITY;
|
||||
|
||||
unsigned uInsertPos;
|
||||
|
||||
for (unsigned j = 1; j < uLengthB; ++j)
|
||||
{
|
||||
SCORE INew = MPrev[j-1] + GapOpenB[j];
|
||||
if (INew > IPrev)
|
||||
{
|
||||
uInsertPos = j;
|
||||
IPrev = INew;
|
||||
}
|
||||
}
|
||||
|
||||
// Special case for i=uLengthA, j=uLengthB
|
||||
SCORE scoreMax = MPrev[uLengthB-1];
|
||||
int iTraceBack = 0;
|
||||
|
||||
SCORE scoreD = DPrev[uLengthB-1] + GapCloseA[uLengthA-1];
|
||||
if (scoreD > scoreMax)
|
||||
{
|
||||
scoreMax = scoreD;
|
||||
iTraceBack = (int) uLengthA - (int) uDeletePos[uLengthB-1];
|
||||
}
|
||||
|
||||
SCORE scoreI = IPrev + GapCloseB[uLengthB-1];
|
||||
if (scoreI > scoreMax)
|
||||
{
|
||||
scoreMax = scoreI;
|
||||
iTraceBack = (int) uInsertPos - (int) uLengthB;
|
||||
}
|
||||
|
||||
TraceBack[uLengthA][uLengthB] = iTraceBack;
|
||||
|
||||
TraceBackToPath(TraceBack, uLengthA, uLengthB, Path);
|
||||
|
||||
return scoreMax;
|
||||
}
|
||||
374
src/muscle/muscle3.8.31/src/glbalignns.cpp
Normal file
374
src/muscle/muscle3.8.31/src/glbalignns.cpp
Normal file
@@ -0,0 +1,374 @@
|
||||
#include "muscle.h"
|
||||
#include "profile.h"
|
||||
#include "pwpath.h"
|
||||
|
||||
struct DP_MEMORY
|
||||
{
|
||||
unsigned uLength;
|
||||
SCORE *GapOpenA;
|
||||
SCORE *GapOpenB;
|
||||
SCORE *GapCloseA;
|
||||
SCORE *GapCloseB;
|
||||
SCORE *MPrev;
|
||||
SCORE *MCurr;
|
||||
SCORE *MWork;
|
||||
SCORE *DPrev;
|
||||
SCORE *DCurr;
|
||||
SCORE *DWork;
|
||||
SCORE **ScoreMxB;
|
||||
unsigned **SortOrderA;
|
||||
unsigned *uDeletePos;
|
||||
FCOUNT **FreqsA;
|
||||
int **TraceBack;
|
||||
};
|
||||
|
||||
static struct DP_MEMORY DPM;
|
||||
|
||||
static void AllocDPMem(unsigned uLengthA, unsigned uLengthB)
|
||||
{
|
||||
// Max prefix length
|
||||
unsigned uLength = (uLengthA > uLengthB ? uLengthA : uLengthB) + 1;
|
||||
if (uLength < DPM.uLength)
|
||||
return;
|
||||
|
||||
// Add 256 to allow for future expansion and
|
||||
// round up to next multiple of 32.
|
||||
uLength += 256;
|
||||
uLength += 32 - uLength%32;
|
||||
|
||||
const unsigned uOldLength = DPM.uLength;
|
||||
if (uOldLength > 0)
|
||||
{
|
||||
for (unsigned i = 0; i < uOldLength; ++i)
|
||||
{
|
||||
delete[] DPM.TraceBack[i];
|
||||
delete[] DPM.FreqsA[i];
|
||||
delete[] DPM.SortOrderA[i];
|
||||
}
|
||||
for (unsigned n = 0; n < 20; ++n)
|
||||
delete[] DPM.ScoreMxB[n];
|
||||
|
||||
delete[] DPM.MPrev;
|
||||
delete[] DPM.MCurr;
|
||||
delete[] DPM.MWork;
|
||||
delete[] DPM.DPrev;
|
||||
delete[] DPM.DCurr;
|
||||
delete[] DPM.DWork;
|
||||
delete[] DPM.uDeletePos;
|
||||
delete[] DPM.GapOpenA;
|
||||
delete[] DPM.GapOpenB;
|
||||
delete[] DPM.GapCloseA;
|
||||
delete[] DPM.GapCloseB;
|
||||
delete[] DPM.SortOrderA;
|
||||
delete[] DPM.FreqsA;
|
||||
delete[] DPM.ScoreMxB;
|
||||
delete[] DPM.TraceBack;
|
||||
}
|
||||
|
||||
DPM.uLength = uLength;
|
||||
|
||||
DPM.GapOpenA = new SCORE[uLength];
|
||||
DPM.GapOpenB = new SCORE[uLength];
|
||||
DPM.GapCloseA = new SCORE[uLength];
|
||||
DPM.GapCloseB = new SCORE[uLength];
|
||||
|
||||
DPM.SortOrderA = new unsigned*[uLength];
|
||||
DPM.FreqsA = new FCOUNT*[uLength];
|
||||
DPM.ScoreMxB = new SCORE*[20];
|
||||
DPM.MPrev = new SCORE[uLength];
|
||||
DPM.MCurr = new SCORE[uLength];
|
||||
DPM.MWork = new SCORE[uLength];
|
||||
|
||||
DPM.DPrev = new SCORE[uLength];
|
||||
DPM.DCurr = new SCORE[uLength];
|
||||
DPM.DWork = new SCORE[uLength];
|
||||
DPM.uDeletePos = new unsigned[uLength];
|
||||
|
||||
DPM.TraceBack = new int*[uLength];
|
||||
|
||||
for (unsigned uLetter = 0; uLetter < 20; ++uLetter)
|
||||
DPM.ScoreMxB[uLetter] = new SCORE[uLength];
|
||||
|
||||
for (unsigned i = 0; i < uLength; ++i)
|
||||
{
|
||||
DPM.SortOrderA[i] = new unsigned[20];
|
||||
DPM.FreqsA[i] = new FCOUNT[20];
|
||||
DPM.TraceBack[i] = new int[uLength];
|
||||
}
|
||||
}
|
||||
|
||||
SCORE GlobalAlignNS(const ProfPos *PA, unsigned uLengthA, const ProfPos *PB,
|
||||
unsigned uLengthB, PWPath &Path)
|
||||
{
|
||||
const unsigned uPrefixCountA = uLengthA + 1;
|
||||
const unsigned uPrefixCountB = uLengthB + 1;
|
||||
|
||||
AllocDPMem(uLengthA, uLengthB);
|
||||
|
||||
SCORE *GapOpenA = DPM.GapOpenA;
|
||||
SCORE *GapOpenB = DPM.GapOpenB;
|
||||
SCORE *GapCloseA = DPM.GapCloseA;
|
||||
SCORE *GapCloseB = DPM.GapCloseB;
|
||||
|
||||
unsigned **SortOrderA = DPM.SortOrderA;
|
||||
FCOUNT **FreqsA = DPM.FreqsA;
|
||||
SCORE **ScoreMxB = DPM.ScoreMxB;
|
||||
SCORE *MPrev = DPM.MPrev;
|
||||
SCORE *MCurr = DPM.MCurr;
|
||||
SCORE *MWork = DPM.MWork;
|
||||
|
||||
SCORE *DPrev = DPM.DPrev;
|
||||
SCORE *DCurr = DPM.DCurr;
|
||||
SCORE *DWork = DPM.DWork;
|
||||
unsigned *uDeletePos = DPM.uDeletePos;
|
||||
|
||||
int **TraceBack = DPM.TraceBack;
|
||||
|
||||
for (unsigned i = 0; i < uLengthA; ++i)
|
||||
{
|
||||
GapOpenA[i] = PA[i].m_scoreGapOpen;
|
||||
GapCloseA[i] = PA[i].m_scoreGapClose;
|
||||
|
||||
for (unsigned uLetter = 0; uLetter < 20; ++uLetter)
|
||||
{
|
||||
SortOrderA[i][uLetter] = PA[i].m_uSortOrder[uLetter];
|
||||
FreqsA[i][uLetter] = PA[i].m_fcCounts[uLetter];
|
||||
}
|
||||
}
|
||||
|
||||
for (unsigned j = 0; j < uLengthB; ++j)
|
||||
{
|
||||
GapOpenB[j] = PB[j].m_scoreGapOpen;
|
||||
GapCloseB[j] = PB[j].m_scoreGapClose;
|
||||
}
|
||||
|
||||
for (unsigned uLetter = 0; uLetter < 20; ++uLetter)
|
||||
{
|
||||
for (unsigned j = 0; j < uLengthB; ++j)
|
||||
ScoreMxB[uLetter][j] = PB[j].m_AAScores[uLetter];
|
||||
}
|
||||
|
||||
for (unsigned i = 0; i < uPrefixCountA; ++i)
|
||||
memset(TraceBack[i], 0, uPrefixCountB*sizeof(int));
|
||||
|
||||
// Special case for i=0
|
||||
unsigned **ptrSortOrderA = SortOrderA;
|
||||
FCOUNT **ptrFreqsA = FreqsA;
|
||||
assert(ptrSortOrderA == &(SortOrderA[0]));
|
||||
assert(ptrFreqsA == &(FreqsA[0]));
|
||||
TraceBack[0][0] = 0;
|
||||
|
||||
SCORE scoreSum = 0;
|
||||
unsigned *ptrSortOrderAi = SortOrderA[0];
|
||||
const unsigned *ptrSortOrderAEnd = ptrSortOrderAi + 20;
|
||||
FCOUNT *ptrFreqsAi = FreqsA[0];
|
||||
for (; ptrSortOrderAi != ptrSortOrderAEnd; ++ptrSortOrderAi)
|
||||
{
|
||||
const unsigned uLetter = *ptrSortOrderAi;
|
||||
const FCOUNT fcLetter = ptrFreqsAi[uLetter];
|
||||
if (0 == fcLetter)
|
||||
break;
|
||||
scoreSum += fcLetter*ScoreMxB[uLetter][0];
|
||||
}
|
||||
MPrev[0] = scoreSum - g_scoreCenter;
|
||||
|
||||
// D(0,0) is -infinity (requires I->D).
|
||||
DPrev[0] = MINUS_INFINITY;
|
||||
|
||||
for (unsigned j = 1; j < uLengthB; ++j)
|
||||
{
|
||||
// Only way to get M(0, j) looks like this:
|
||||
// A ----X
|
||||
// B XXXXX
|
||||
// 0 j
|
||||
// So gap-open at j=0, gap-close at j-1.
|
||||
SCORE scoreSum = 0;
|
||||
unsigned *ptrSortOrderAi = SortOrderA[0];
|
||||
const unsigned *ptrSortOrderAEnd = ptrSortOrderAi + 20;
|
||||
FCOUNT *ptrFreqsAi = FreqsA[0];
|
||||
for (; ptrSortOrderAi != ptrSortOrderAEnd; ++ptrSortOrderAi)
|
||||
{
|
||||
const unsigned uLetter = *ptrSortOrderAi;
|
||||
const FCOUNT fcLetter = ptrFreqsAi[uLetter];
|
||||
if (0 == fcLetter)
|
||||
break;
|
||||
scoreSum += fcLetter*ScoreMxB[uLetter][j];
|
||||
}
|
||||
MPrev[j] = scoreSum - g_scoreCenter + GapOpenB[0] + GapCloseB[j-1];
|
||||
TraceBack[0][j] = -(int) j;
|
||||
|
||||
// Assume no D->I transitions, then can't be a delete if only
|
||||
// one letter from A.
|
||||
DPrev[j] = MINUS_INFINITY;
|
||||
}
|
||||
|
||||
SCORE IPrev_j_1;
|
||||
for (unsigned i = 1; i < uLengthA; ++i)
|
||||
{
|
||||
++ptrSortOrderA;
|
||||
++ptrFreqsA;
|
||||
assert(ptrSortOrderA == &(SortOrderA[i]));
|
||||
assert(ptrFreqsA == &(FreqsA[i]));
|
||||
|
||||
SCORE *ptrMCurr_j = MCurr;
|
||||
memset(ptrMCurr_j, 0, uLengthB*sizeof(SCORE));
|
||||
const FCOUNT *FreqsAi = *ptrFreqsA;
|
||||
|
||||
const unsigned *SortOrderAi = *ptrSortOrderA;
|
||||
const unsigned *ptrSortOrderAiEnd = SortOrderAi + 20;
|
||||
const SCORE *ptrMCurrMax = MCurr + uLengthB;
|
||||
for (const unsigned *ptrSortOrderAi = SortOrderAi;
|
||||
ptrSortOrderAi != ptrSortOrderAiEnd;
|
||||
++ptrSortOrderAi)
|
||||
{
|
||||
const unsigned uLetter = *ptrSortOrderAi;
|
||||
SCORE *NSBR_Letter = ScoreMxB[uLetter];
|
||||
const FCOUNT fcLetter = FreqsAi[uLetter];
|
||||
if (0 == fcLetter)
|
||||
break;
|
||||
SCORE *ptrNSBR = NSBR_Letter;
|
||||
for (SCORE *ptrMCurr = MCurr; ptrMCurr != ptrMCurrMax; ++ptrMCurr)
|
||||
*ptrMCurr += fcLetter*(*ptrNSBR++);
|
||||
}
|
||||
|
||||
for (unsigned j = 0; j < uLengthB; ++j)
|
||||
MCurr[j] -= g_scoreCenter;
|
||||
|
||||
ptrMCurr_j = MCurr;
|
||||
unsigned *ptrDeletePos = uDeletePos;
|
||||
|
||||
// Special case for j=0
|
||||
// Only way to get M(i, 0) looks like this:
|
||||
// 0 i
|
||||
// A XXXXX
|
||||
// B ----X
|
||||
// So gap-open at i=0, gap-close at i-1.
|
||||
assert(ptrMCurr_j == &(MCurr[0]));
|
||||
*ptrMCurr_j += GapOpenA[0] + GapCloseA[i-1];
|
||||
|
||||
++ptrMCurr_j;
|
||||
|
||||
int *ptrTraceBack_ij = TraceBack[i];
|
||||
*ptrTraceBack_ij++ = (int) i;
|
||||
|
||||
SCORE *ptrMPrev_j = MPrev;
|
||||
SCORE *ptrDPrev = DPrev;
|
||||
SCORE d = *ptrDPrev;
|
||||
SCORE DNew = *ptrMPrev_j + GapOpenA[i];
|
||||
if (DNew > d)
|
||||
{
|
||||
d = DNew;
|
||||
*ptrDeletePos = i;
|
||||
}
|
||||
|
||||
SCORE *ptrDCurr = DCurr;
|
||||
|
||||
assert(ptrDCurr == &(DCurr[0]));
|
||||
*ptrDCurr = d;
|
||||
|
||||
// Can't have an insert if no letters from B
|
||||
IPrev_j_1 = MINUS_INFINITY;
|
||||
|
||||
unsigned uInsertPos;
|
||||
const SCORE scoreGapOpenAi = GapOpenA[i];
|
||||
const SCORE scoreGapCloseAi_1 = GapCloseA[i-1];
|
||||
|
||||
for (unsigned j = 1; j < uLengthB; ++j)
|
||||
{
|
||||
// Here, MPrev_j is preserved from previous
|
||||
// iteration so with current i,j is M[i-1][j-1]
|
||||
SCORE MPrev_j = *ptrMPrev_j;
|
||||
SCORE INew = MPrev_j + GapOpenB[j];
|
||||
if (INew > IPrev_j_1)
|
||||
{
|
||||
IPrev_j_1 = INew;
|
||||
uInsertPos = j;
|
||||
}
|
||||
|
||||
SCORE scoreMax = MPrev_j;
|
||||
|
||||
assert(ptrDPrev == &(DPrev[j-1]));
|
||||
SCORE scoreD = *ptrDPrev++ + scoreGapCloseAi_1;
|
||||
if (scoreD > scoreMax)
|
||||
{
|
||||
scoreMax = scoreD;
|
||||
assert(ptrDeletePos == &(uDeletePos[j-1]));
|
||||
*ptrTraceBack_ij = (int) i - (int) *ptrDeletePos;
|
||||
assert(*ptrTraceBack_ij > 0);
|
||||
}
|
||||
++ptrDeletePos;
|
||||
|
||||
SCORE scoreI = IPrev_j_1 + GapCloseB[j-1];
|
||||
if (scoreI > scoreMax)
|
||||
{
|
||||
scoreMax = scoreI;
|
||||
*ptrTraceBack_ij = (int) uInsertPos - (int) j;
|
||||
assert(*ptrTraceBack_ij < 0);
|
||||
}
|
||||
|
||||
assert(ptrSortOrderA == &(SortOrderA[i]));
|
||||
assert(ptrFreqsA == &(FreqsA[i]));
|
||||
|
||||
*ptrMCurr_j += scoreMax;
|
||||
assert(ptrMCurr_j == &(MCurr[j]));
|
||||
++ptrMCurr_j;
|
||||
|
||||
MPrev_j = *(++ptrMPrev_j);
|
||||
assert(ptrDPrev == &(DPrev[j]));
|
||||
SCORE d = *ptrDPrev;
|
||||
SCORE DNew = MPrev_j + scoreGapOpenAi;
|
||||
if (DNew > d)
|
||||
{
|
||||
d = DNew;
|
||||
assert(ptrDeletePos == &uDeletePos[j]);
|
||||
*ptrDeletePos = i;
|
||||
}
|
||||
assert(ptrDCurr + 1 == &(DCurr[j]));
|
||||
*(++ptrDCurr) = d;
|
||||
|
||||
++ptrTraceBack_ij;
|
||||
}
|
||||
|
||||
Rotate(MPrev, MCurr, MWork);
|
||||
Rotate(DPrev, DCurr, DWork);
|
||||
}
|
||||
|
||||
// Special case for i=uLengthA
|
||||
SCORE IPrev = MINUS_INFINITY;
|
||||
|
||||
unsigned uInsertPos;
|
||||
|
||||
for (unsigned j = 1; j < uLengthB; ++j)
|
||||
{
|
||||
SCORE INew = MPrev[j-1] + GapOpenB[j];
|
||||
if (INew > IPrev)
|
||||
{
|
||||
uInsertPos = j;
|
||||
IPrev = INew;
|
||||
}
|
||||
}
|
||||
|
||||
// Special case for i=uLengthA, j=uLengthB
|
||||
SCORE scoreMax = MPrev[uLengthB-1];
|
||||
int iTraceBack = 0;
|
||||
|
||||
SCORE scoreD = DPrev[uLengthB-1] + GapCloseA[uLengthA-1];
|
||||
if (scoreD > scoreMax)
|
||||
{
|
||||
scoreMax = scoreD;
|
||||
iTraceBack = (int) uLengthA - (int) uDeletePos[uLengthB-1];
|
||||
}
|
||||
|
||||
SCORE scoreI = IPrev + GapCloseB[uLengthB-1];
|
||||
if (scoreI > scoreMax)
|
||||
{
|
||||
scoreMax = scoreI;
|
||||
iTraceBack = (int) uInsertPos - (int) uLengthB;
|
||||
}
|
||||
|
||||
TraceBack[uLengthA][uLengthB] = iTraceBack;
|
||||
|
||||
TraceBackToPath(TraceBack, uLengthA, uLengthB, Path);
|
||||
|
||||
return scoreMax;
|
||||
}
|
||||
368
src/muscle/muscle3.8.31/src/glbalignsimple.cpp
Normal file
368
src/muscle/muscle3.8.31/src/glbalignsimple.cpp
Normal file
@@ -0,0 +1,368 @@
|
||||
#include "muscle.h"
|
||||
#include <math.h>
|
||||
#include "pwpath.h"
|
||||
#include "profile.h"
|
||||
#include <stdio.h>
|
||||
|
||||
#define TRACE 0
|
||||
|
||||
#if 1 // SINGLE_AFFINE
|
||||
|
||||
extern bool g_bKeepSimpleDP;
|
||||
extern SCORE *g_DPM;
|
||||
extern SCORE *g_DPD;
|
||||
extern SCORE *g_DPI;
|
||||
extern char *g_TBM;
|
||||
extern char *g_TBD;
|
||||
extern char *g_TBI;
|
||||
|
||||
static const char *LocalScoreToStr(SCORE s)
|
||||
{
|
||||
static char str[16];
|
||||
if (s < -100000)
|
||||
return " *";
|
||||
sprintf(str, "%6.1f", s);
|
||||
return str;
|
||||
}
|
||||
|
||||
static void ListTB(const char *TBM_, const ProfPos *PA, const ProfPos *PB,
|
||||
unsigned uPrefixCountA, unsigned uPrefixCountB)
|
||||
{
|
||||
Log(" ");
|
||||
for (unsigned uPrefixLengthB = 0; uPrefixLengthB < uPrefixCountB; ++uPrefixLengthB)
|
||||
{
|
||||
char c = ' ';
|
||||
if (uPrefixLengthB > 0)
|
||||
c = ConsensusChar(PB[uPrefixLengthB - 1]);
|
||||
Log(" %4u:%c", uPrefixLengthB, c);
|
||||
}
|
||||
Log("\n");
|
||||
for (unsigned uPrefixLengthA = 0; uPrefixLengthA < uPrefixCountA; ++uPrefixLengthA)
|
||||
{
|
||||
char c = ' ';
|
||||
if (uPrefixLengthA > 0)
|
||||
c = ConsensusChar(PA[uPrefixLengthA - 1]);
|
||||
Log("%4u:%c ", uPrefixLengthA, c);
|
||||
for (unsigned uPrefixLengthB = 0; uPrefixLengthB < uPrefixCountB; ++uPrefixLengthB)
|
||||
Log(" %6c", TBM(uPrefixLengthA, uPrefixLengthB));
|
||||
Log("\n");
|
||||
}
|
||||
}
|
||||
|
||||
static void ListDP(const SCORE *DPM_, const ProfPos *PA, const ProfPos *PB,
|
||||
unsigned uPrefixCountA, unsigned uPrefixCountB)
|
||||
{
|
||||
Log(" ");
|
||||
for (unsigned uPrefixLengthB = 0; uPrefixLengthB < uPrefixCountB; ++uPrefixLengthB)
|
||||
{
|
||||
char c = ' ';
|
||||
if (uPrefixLengthB > 0)
|
||||
c = ConsensusChar(PB[uPrefixLengthB - 1]);
|
||||
Log(" %4u:%c", uPrefixLengthB, c);
|
||||
}
|
||||
Log("\n");
|
||||
for (unsigned uPrefixLengthA = 0; uPrefixLengthA < uPrefixCountA; ++uPrefixLengthA)
|
||||
{
|
||||
char c = ' ';
|
||||
if (uPrefixLengthA > 0)
|
||||
c = ConsensusChar(PA[uPrefixLengthA - 1]);
|
||||
Log("%4u:%c ", uPrefixLengthA, c);
|
||||
for (unsigned uPrefixLengthB = 0; uPrefixLengthB < uPrefixCountB; ++uPrefixLengthB)
|
||||
Log(" %s", LocalScoreToStr(DPM(uPrefixLengthA, uPrefixLengthB)));
|
||||
Log("\n");
|
||||
}
|
||||
}
|
||||
|
||||
SCORE GlobalAlignSimple(const ProfPos *PA, unsigned uLengthA, const ProfPos *PB,
|
||||
unsigned uLengthB, PWPath &Path)
|
||||
{
|
||||
assert(uLengthB > 0 && uLengthA > 0);
|
||||
|
||||
SetTermGaps(PA, uLengthA);
|
||||
SetTermGaps(PB, uLengthB);
|
||||
|
||||
const unsigned uPrefixCountA = uLengthA + 1;
|
||||
const unsigned uPrefixCountB = uLengthB + 1;
|
||||
|
||||
// Allocate DP matrices
|
||||
const size_t LM = uPrefixCountA*uPrefixCountB;
|
||||
SCORE *DPL_ = new SCORE[LM];
|
||||
SCORE *DPM_ = new SCORE[LM];
|
||||
SCORE *DPD_ = new SCORE[LM];
|
||||
SCORE *DPI_ = new SCORE[LM];
|
||||
|
||||
char *TBM_ = new char[LM];
|
||||
char *TBD_ = new char[LM];
|
||||
char *TBI_ = new char[LM];
|
||||
|
||||
memset(TBM_, '?', LM);
|
||||
memset(TBD_, '?', LM);
|
||||
memset(TBI_, '?', LM);
|
||||
|
||||
DPM(0, 0) = 0;
|
||||
DPD(0, 0) = MINUS_INFINITY;
|
||||
DPI(0, 0) = MINUS_INFINITY;
|
||||
|
||||
DPM(1, 0) = MINUS_INFINITY;
|
||||
DPD(1, 0) = PA[0].m_scoreGapOpen;
|
||||
TBD(1, 0) = 'D';
|
||||
DPI(1, 0) = MINUS_INFINITY;
|
||||
|
||||
DPM(0, 1) = MINUS_INFINITY;
|
||||
DPD(0, 1) = MINUS_INFINITY;
|
||||
DPI(0, 1) = PB[0].m_scoreGapOpen;
|
||||
TBI(0, 1) = 'I';
|
||||
|
||||
// Empty prefix of B is special case
|
||||
for (unsigned uPrefixLengthA = 2; uPrefixLengthA < uPrefixCountA; ++uPrefixLengthA)
|
||||
{
|
||||
// M=LetterA+LetterB, impossible with empty prefix
|
||||
DPM(uPrefixLengthA, 0) = MINUS_INFINITY;
|
||||
|
||||
// D=LetterA+GapB
|
||||
DPD(uPrefixLengthA, 0) = DPD(uPrefixLengthA - 1, 0) + g_scoreGapExtend;
|
||||
TBD(uPrefixLengthA, 0) = 'D';
|
||||
|
||||
// I=GapA+LetterB, impossible with empty prefix
|
||||
DPI(uPrefixLengthA, 0) = MINUS_INFINITY;
|
||||
}
|
||||
|
||||
// Empty prefix of A is special case
|
||||
for (unsigned uPrefixLengthB = 2; uPrefixLengthB < uPrefixCountB; ++uPrefixLengthB)
|
||||
{
|
||||
// M=LetterA+LetterB, impossible with empty prefix
|
||||
DPM(0, uPrefixLengthB) = MINUS_INFINITY;
|
||||
|
||||
// D=LetterA+GapB, impossible with empty prefix
|
||||
DPD(0, uPrefixLengthB) = MINUS_INFINITY;
|
||||
|
||||
// I=GapA+LetterB
|
||||
DPI(0, uPrefixLengthB) = DPI(0, uPrefixLengthB - 1) + g_scoreGapExtend;
|
||||
TBI(0, uPrefixLengthB) = 'I';
|
||||
}
|
||||
|
||||
// Special case to agree with NWFast, no D-I transitions so...
|
||||
DPD(uLengthA, 0) = MINUS_INFINITY;
|
||||
// DPI(0, uLengthB) = MINUS_INFINITY;
|
||||
|
||||
// ============
|
||||
// Main DP loop
|
||||
// ============
|
||||
SCORE scoreGapCloseB = MINUS_INFINITY;
|
||||
for (unsigned uPrefixLengthB = 1; uPrefixLengthB < uPrefixCountB; ++uPrefixLengthB)
|
||||
{
|
||||
const ProfPos &PPB = PB[uPrefixLengthB - 1];
|
||||
|
||||
SCORE scoreGapCloseA = MINUS_INFINITY;
|
||||
for (unsigned uPrefixLengthA = 1; uPrefixLengthA < uPrefixCountA; ++uPrefixLengthA)
|
||||
{
|
||||
const ProfPos &PPA = PA[uPrefixLengthA - 1];
|
||||
|
||||
{
|
||||
// Match M=LetterA+LetterB
|
||||
SCORE scoreLL = ScoreProfPos2(PPA, PPB);
|
||||
DPL(uPrefixLengthA, uPrefixLengthB) = scoreLL;
|
||||
|
||||
SCORE scoreMM = DPM(uPrefixLengthA-1, uPrefixLengthB-1);
|
||||
SCORE scoreDM = DPD(uPrefixLengthA-1, uPrefixLengthB-1) + scoreGapCloseA;
|
||||
SCORE scoreIM = DPI(uPrefixLengthA-1, uPrefixLengthB-1) + scoreGapCloseB;
|
||||
|
||||
SCORE scoreBest;
|
||||
if (scoreMM >= scoreDM && scoreMM >= scoreIM)
|
||||
{
|
||||
scoreBest = scoreMM;
|
||||
TBM(uPrefixLengthA, uPrefixLengthB) = 'M';
|
||||
}
|
||||
else if (scoreDM >= scoreMM && scoreDM >= scoreIM)
|
||||
{
|
||||
scoreBest = scoreDM;
|
||||
TBM(uPrefixLengthA, uPrefixLengthB) = 'D';
|
||||
}
|
||||
else
|
||||
{
|
||||
assert(scoreIM >= scoreMM && scoreIM >= scoreDM);
|
||||
scoreBest = scoreIM;
|
||||
TBM(uPrefixLengthA, uPrefixLengthB) = 'I';
|
||||
}
|
||||
DPM(uPrefixLengthA, uPrefixLengthB) = scoreBest + scoreLL;
|
||||
}
|
||||
|
||||
{
|
||||
// Delete D=LetterA+GapB
|
||||
SCORE scoreMD = DPM(uPrefixLengthA-1, uPrefixLengthB) +
|
||||
PA[uPrefixLengthA-1].m_scoreGapOpen;
|
||||
SCORE scoreDD = DPD(uPrefixLengthA-1, uPrefixLengthB) + g_scoreGapExtend;
|
||||
|
||||
SCORE scoreBest;
|
||||
if (scoreMD >= scoreDD)
|
||||
{
|
||||
scoreBest = scoreMD;
|
||||
TBD(uPrefixLengthA, uPrefixLengthB) = 'M';
|
||||
}
|
||||
else
|
||||
{
|
||||
assert(scoreDD >= scoreMD);
|
||||
scoreBest = scoreDD;
|
||||
TBD(uPrefixLengthA, uPrefixLengthB) = 'D';
|
||||
}
|
||||
DPD(uPrefixLengthA, uPrefixLengthB) = scoreBest;
|
||||
}
|
||||
|
||||
// Insert I=GapA+LetterB
|
||||
{
|
||||
SCORE scoreMI = DPM(uPrefixLengthA, uPrefixLengthB-1) +
|
||||
PB[uPrefixLengthB - 1].m_scoreGapOpen;
|
||||
SCORE scoreII = DPI(uPrefixLengthA, uPrefixLengthB-1) + g_scoreGapExtend;
|
||||
|
||||
SCORE scoreBest;
|
||||
if (scoreMI >= scoreII)
|
||||
{
|
||||
scoreBest = scoreMI;
|
||||
TBI(uPrefixLengthA, uPrefixLengthB) = 'M';
|
||||
}
|
||||
else
|
||||
{
|
||||
assert(scoreII > scoreMI);
|
||||
scoreBest = scoreII;
|
||||
TBI(uPrefixLengthA, uPrefixLengthB) = 'I';
|
||||
}
|
||||
DPI(uPrefixLengthA, uPrefixLengthB) = scoreBest;
|
||||
}
|
||||
|
||||
scoreGapCloseA = PPA.m_scoreGapClose;
|
||||
}
|
||||
scoreGapCloseB = PPB.m_scoreGapClose;
|
||||
}
|
||||
|
||||
#if TRACE
|
||||
Log("\n");
|
||||
Log("Simple DPL:\n");
|
||||
ListDP(DPL_, PA, PB, uPrefixCountA, uPrefixCountB);
|
||||
Log("\n");
|
||||
Log("Simple DPM:\n");
|
||||
ListDP(DPM_, PA, PB, uPrefixCountA, uPrefixCountB);
|
||||
Log("\n");
|
||||
Log("Simple DPD:\n");
|
||||
ListDP(DPD_, PA, PB, uPrefixCountA, uPrefixCountB);
|
||||
Log("\n");
|
||||
Log("Simple DPI:\n");
|
||||
ListDP(DPI_, PA, PB, uPrefixCountA, uPrefixCountB);
|
||||
Log("\n");
|
||||
Log("Simple TBM:\n");
|
||||
ListTB(TBM_, PA, PB, uPrefixCountA, uPrefixCountB);
|
||||
Log("\n");
|
||||
Log("Simple TBD:\n");
|
||||
ListTB(TBD_, PA, PB, uPrefixCountA, uPrefixCountB);
|
||||
Log("\n");
|
||||
Log("Simple TBI:\n");
|
||||
ListTB(TBI_, PA, PB, uPrefixCountA, uPrefixCountB);
|
||||
#endif
|
||||
|
||||
// Trace-back
|
||||
// ==========
|
||||
Path.Clear();
|
||||
|
||||
// Find last edge
|
||||
SCORE M = DPM(uLengthA, uLengthB);
|
||||
SCORE D = DPD(uLengthA, uLengthB) + PA[uLengthA-1].m_scoreGapClose;
|
||||
SCORE I = DPI(uLengthA, uLengthB) + PB[uLengthB-1].m_scoreGapClose;
|
||||
char cEdgeType = '?';
|
||||
|
||||
SCORE BestScore = MINUS_INFINITY;
|
||||
if (M >= D && M >= I)
|
||||
{
|
||||
cEdgeType = 'M';
|
||||
BestScore = M;
|
||||
}
|
||||
else if (D >= M && D >= I)
|
||||
{
|
||||
cEdgeType = 'D';
|
||||
BestScore = D;
|
||||
}
|
||||
else
|
||||
{
|
||||
assert(I >= M && I >= D);
|
||||
cEdgeType = 'I';
|
||||
BestScore = I;
|
||||
}
|
||||
|
||||
#if TRACE
|
||||
Log("Simple: MAB=%.4g DAB=%.4g IAB=%.4g best=%c\n", M, D, I, cEdgeType);
|
||||
#endif
|
||||
|
||||
unsigned PLA = uLengthA;
|
||||
unsigned PLB = uLengthB;
|
||||
for (;;)
|
||||
{
|
||||
PWEdge Edge;
|
||||
Edge.cType = cEdgeType;
|
||||
Edge.uPrefixLengthA = PLA;
|
||||
Edge.uPrefixLengthB = PLB;
|
||||
#if TRACE
|
||||
Log("Prepend %c%d.%d\n", Edge.cType, PLA, PLB);
|
||||
#endif
|
||||
Path.PrependEdge(Edge);
|
||||
|
||||
switch (cEdgeType)
|
||||
{
|
||||
case 'M':
|
||||
assert(PLA > 0);
|
||||
assert(PLB > 0);
|
||||
cEdgeType = TBM(PLA, PLB);
|
||||
--PLA;
|
||||
--PLB;
|
||||
break;
|
||||
|
||||
case 'D':
|
||||
assert(PLA > 0);
|
||||
cEdgeType = TBD(PLA, PLB);
|
||||
--PLA;
|
||||
break;
|
||||
|
||||
case 'I':
|
||||
assert(PLB > 0);
|
||||
cEdgeType = TBI(PLA, PLB);
|
||||
--PLB;
|
||||
break;
|
||||
|
||||
default:
|
||||
Quit("Invalid edge %c", cEdgeType);
|
||||
}
|
||||
if (0 == PLA && 0 == PLB)
|
||||
break;
|
||||
}
|
||||
Path.Validate();
|
||||
|
||||
// SCORE Score = TraceBack(PA, uLengthA, PB, uLengthB, DPM_, DPD_, DPI_, Path);
|
||||
|
||||
#if TRACE
|
||||
SCORE scorePath = FastScorePath2(PA, uLengthA, PB, uLengthB, Path);
|
||||
Path.LogMe();
|
||||
Log("Score = %s Path = %s\n", LocalScoreToStr(BestScore), LocalScoreToStr(scorePath));
|
||||
#endif
|
||||
|
||||
if (g_bKeepSimpleDP)
|
||||
{
|
||||
g_DPM = DPM_;
|
||||
g_DPD = DPD_;
|
||||
g_DPI = DPI_;
|
||||
|
||||
g_TBM = TBM_;
|
||||
g_TBD = TBD_;
|
||||
g_TBI = TBI_;
|
||||
}
|
||||
else
|
||||
{
|
||||
delete[] DPM_;
|
||||
delete[] DPD_;
|
||||
delete[] DPI_;
|
||||
|
||||
delete[] TBM_;
|
||||
delete[] TBD_;
|
||||
delete[] TBI_;
|
||||
}
|
||||
|
||||
return BestScore;
|
||||
}
|
||||
|
||||
#endif // SINLGLE_AFFINE
|
||||
374
src/muscle/muscle3.8.31/src/glbalignsp.cpp
Normal file
374
src/muscle/muscle3.8.31/src/glbalignsp.cpp
Normal file
@@ -0,0 +1,374 @@
|
||||
#include "muscle.h"
|
||||
#include "profile.h"
|
||||
#include "pwpath.h"
|
||||
|
||||
struct DP_MEMORY
|
||||
{
|
||||
unsigned uLength;
|
||||
SCORE *GapOpenA;
|
||||
SCORE *GapOpenB;
|
||||
SCORE *GapCloseA;
|
||||
SCORE *GapCloseB;
|
||||
SCORE *MPrev;
|
||||
SCORE *MCurr;
|
||||
SCORE *MWork;
|
||||
SCORE *DPrev;
|
||||
SCORE *DCurr;
|
||||
SCORE *DWork;
|
||||
SCORE **ScoreMxB;
|
||||
unsigned **SortOrderA;
|
||||
unsigned *uDeletePos;
|
||||
FCOUNT **FreqsA;
|
||||
int **TraceBack;
|
||||
};
|
||||
|
||||
static struct DP_MEMORY DPM;
|
||||
|
||||
static void AllocDPMem(unsigned uLengthA, unsigned uLengthB)
|
||||
{
|
||||
// Max prefix length
|
||||
unsigned uLength = (uLengthA > uLengthB ? uLengthA : uLengthB) + 1;
|
||||
if (uLength < DPM.uLength)
|
||||
return;
|
||||
|
||||
// Add 256 to allow for future expansion and
|
||||
// round up to next multiple of 32.
|
||||
uLength += 256;
|
||||
uLength += 32 - uLength%32;
|
||||
|
||||
const unsigned uOldLength = DPM.uLength;
|
||||
if (uOldLength > 0)
|
||||
{
|
||||
for (unsigned i = 0; i < uOldLength; ++i)
|
||||
{
|
||||
delete[] DPM.TraceBack[i];
|
||||
delete[] DPM.FreqsA[i];
|
||||
delete[] DPM.SortOrderA[i];
|
||||
}
|
||||
for (unsigned n = 0; n < 20; ++n)
|
||||
delete[] DPM.ScoreMxB[n];
|
||||
|
||||
delete[] DPM.MPrev;
|
||||
delete[] DPM.MCurr;
|
||||
delete[] DPM.MWork;
|
||||
delete[] DPM.DPrev;
|
||||
delete[] DPM.DCurr;
|
||||
delete[] DPM.DWork;
|
||||
delete[] DPM.uDeletePos;
|
||||
delete[] DPM.GapOpenA;
|
||||
delete[] DPM.GapOpenB;
|
||||
delete[] DPM.GapCloseA;
|
||||
delete[] DPM.GapCloseB;
|
||||
delete[] DPM.SortOrderA;
|
||||
delete[] DPM.FreqsA;
|
||||
delete[] DPM.ScoreMxB;
|
||||
delete[] DPM.TraceBack;
|
||||
}
|
||||
|
||||
DPM.uLength = uLength;
|
||||
|
||||
DPM.GapOpenA = new SCORE[uLength];
|
||||
DPM.GapOpenB = new SCORE[uLength];
|
||||
DPM.GapCloseA = new SCORE[uLength];
|
||||
DPM.GapCloseB = new SCORE[uLength];
|
||||
|
||||
DPM.SortOrderA = new unsigned*[uLength];
|
||||
DPM.FreqsA = new FCOUNT*[uLength];
|
||||
DPM.ScoreMxB = new SCORE*[20];
|
||||
DPM.MPrev = new SCORE[uLength];
|
||||
DPM.MCurr = new SCORE[uLength];
|
||||
DPM.MWork = new SCORE[uLength];
|
||||
|
||||
DPM.DPrev = new SCORE[uLength];
|
||||
DPM.DCurr = new SCORE[uLength];
|
||||
DPM.DWork = new SCORE[uLength];
|
||||
DPM.uDeletePos = new unsigned[uLength];
|
||||
|
||||
DPM.TraceBack = new int*[uLength];
|
||||
|
||||
for (unsigned uLetter = 0; uLetter < 20; ++uLetter)
|
||||
DPM.ScoreMxB[uLetter] = new SCORE[uLength];
|
||||
|
||||
for (unsigned i = 0; i < uLength; ++i)
|
||||
{
|
||||
DPM.SortOrderA[i] = new unsigned[20];
|
||||
DPM.FreqsA[i] = new FCOUNT[20];
|
||||
DPM.TraceBack[i] = new int[uLength];
|
||||
}
|
||||
}
|
||||
|
||||
SCORE GlobalAlignSP(const ProfPos *PA, unsigned uLengthA, const ProfPos *PB,
|
||||
unsigned uLengthB, PWPath &Path)
|
||||
{
|
||||
const unsigned uPrefixCountA = uLengthA + 1;
|
||||
const unsigned uPrefixCountB = uLengthB + 1;
|
||||
|
||||
AllocDPMem(uLengthA, uLengthB);
|
||||
|
||||
SCORE *GapOpenA = DPM.GapOpenA;
|
||||
SCORE *GapOpenB = DPM.GapOpenB;
|
||||
SCORE *GapCloseA = DPM.GapCloseA;
|
||||
SCORE *GapCloseB = DPM.GapCloseB;
|
||||
|
||||
unsigned **SortOrderA = DPM.SortOrderA;
|
||||
FCOUNT **FreqsA = DPM.FreqsA;
|
||||
SCORE **ScoreMxB = DPM.ScoreMxB;
|
||||
SCORE *MPrev = DPM.MPrev;
|
||||
SCORE *MCurr = DPM.MCurr;
|
||||
SCORE *MWork = DPM.MWork;
|
||||
|
||||
SCORE *DPrev = DPM.DPrev;
|
||||
SCORE *DCurr = DPM.DCurr;
|
||||
SCORE *DWork = DPM.DWork;
|
||||
unsigned *uDeletePos = DPM.uDeletePos;
|
||||
|
||||
int **TraceBack = DPM.TraceBack;
|
||||
|
||||
for (unsigned i = 0; i < uLengthA; ++i)
|
||||
{
|
||||
GapOpenA[i] = PA[i].m_scoreGapOpen;
|
||||
GapCloseA[i] = PA[i].m_scoreGapClose;
|
||||
|
||||
for (unsigned uLetter = 0; uLetter < 20; ++uLetter)
|
||||
{
|
||||
SortOrderA[i][uLetter] = PA[i].m_uSortOrder[uLetter];
|
||||
FreqsA[i][uLetter] = PA[i].m_fcCounts[uLetter];
|
||||
}
|
||||
}
|
||||
|
||||
for (unsigned j = 0; j < uLengthB; ++j)
|
||||
{
|
||||
GapOpenB[j] = PB[j].m_scoreGapOpen;
|
||||
GapCloseB[j] = PB[j].m_scoreGapClose;
|
||||
}
|
||||
|
||||
for (unsigned uLetter = 0; uLetter < 20; ++uLetter)
|
||||
{
|
||||
for (unsigned j = 0; j < uLengthB; ++j)
|
||||
ScoreMxB[uLetter][j] = PB[j].m_AAScores[uLetter];
|
||||
}
|
||||
|
||||
for (unsigned i = 0; i < uPrefixCountA; ++i)
|
||||
memset(TraceBack[i], 0, uPrefixCountB*sizeof(int));
|
||||
|
||||
// Special case for i=0
|
||||
unsigned **ptrSortOrderA = SortOrderA;
|
||||
FCOUNT **ptrFreqsA = FreqsA;
|
||||
assert(ptrSortOrderA == &(SortOrderA[0]));
|
||||
assert(ptrFreqsA == &(FreqsA[0]));
|
||||
TraceBack[0][0] = 0;
|
||||
|
||||
SCORE scoreSum = 0;
|
||||
unsigned *ptrSortOrderAi = SortOrderA[0];
|
||||
const unsigned *ptrSortOrderAEnd = ptrSortOrderAi + 20;
|
||||
FCOUNT *ptrFreqsAi = FreqsA[0];
|
||||
for (; ptrSortOrderAi != ptrSortOrderAEnd; ++ptrSortOrderAi)
|
||||
{
|
||||
const unsigned uLetter = *ptrSortOrderAi;
|
||||
const FCOUNT fcLetter = ptrFreqsAi[uLetter];
|
||||
if (0 == fcLetter)
|
||||
break;
|
||||
scoreSum += fcLetter*ScoreMxB[uLetter][0];
|
||||
}
|
||||
MPrev[0] = scoreSum - g_scoreCenter;
|
||||
|
||||
// D(0,0) is -infinity (requires I->D).
|
||||
DPrev[0] = MINUS_INFINITY;
|
||||
|
||||
for (unsigned j = 1; j < uLengthB; ++j)
|
||||
{
|
||||
// Only way to get M(0, j) looks like this:
|
||||
// A ----X
|
||||
// B XXXXX
|
||||
// 0 j
|
||||
// So gap-open at j=0, gap-close at j-1.
|
||||
SCORE scoreSum = 0;
|
||||
unsigned *ptrSortOrderAi = SortOrderA[0];
|
||||
const unsigned *ptrSortOrderAEnd = ptrSortOrderAi + 20;
|
||||
FCOUNT *ptrFreqsAi = FreqsA[0];
|
||||
for (; ptrSortOrderAi != ptrSortOrderAEnd; ++ptrSortOrderAi)
|
||||
{
|
||||
const unsigned uLetter = *ptrSortOrderAi;
|
||||
const FCOUNT fcLetter = ptrFreqsAi[uLetter];
|
||||
if (0 == fcLetter)
|
||||
break;
|
||||
scoreSum += fcLetter*ScoreMxB[uLetter][j];
|
||||
}
|
||||
MPrev[j] = scoreSum - g_scoreCenter + GapOpenB[0] + GapCloseB[j-1];
|
||||
TraceBack[0][j] = -(int) j;
|
||||
|
||||
// Assume no D->I transitions, then can't be a delete if only
|
||||
// one letter from A.
|
||||
DPrev[j] = MINUS_INFINITY;
|
||||
}
|
||||
|
||||
SCORE IPrev_j_1;
|
||||
for (unsigned i = 1; i < uLengthA; ++i)
|
||||
{
|
||||
++ptrSortOrderA;
|
||||
++ptrFreqsA;
|
||||
assert(ptrSortOrderA == &(SortOrderA[i]));
|
||||
assert(ptrFreqsA == &(FreqsA[i]));
|
||||
|
||||
SCORE *ptrMCurr_j = MCurr;
|
||||
memset(ptrMCurr_j, 0, uLengthB*sizeof(SCORE));
|
||||
const FCOUNT *FreqsAi = *ptrFreqsA;
|
||||
|
||||
const unsigned *SortOrderAi = *ptrSortOrderA;
|
||||
const unsigned *ptrSortOrderAiEnd = SortOrderAi + 20;
|
||||
const SCORE *ptrMCurrMax = MCurr + uLengthB;
|
||||
for (const unsigned *ptrSortOrderAi = SortOrderAi;
|
||||
ptrSortOrderAi != ptrSortOrderAiEnd;
|
||||
++ptrSortOrderAi)
|
||||
{
|
||||
const unsigned uLetter = *ptrSortOrderAi;
|
||||
SCORE *NSBR_Letter = ScoreMxB[uLetter];
|
||||
const FCOUNT fcLetter = FreqsAi[uLetter];
|
||||
if (0 == fcLetter)
|
||||
break;
|
||||
SCORE *ptrNSBR = NSBR_Letter;
|
||||
for (SCORE *ptrMCurr = MCurr; ptrMCurr != ptrMCurrMax; ++ptrMCurr)
|
||||
*ptrMCurr += fcLetter*(*ptrNSBR++);
|
||||
}
|
||||
|
||||
for (unsigned j = 0; j < uLengthB; ++j)
|
||||
MCurr[j] -= g_scoreCenter;
|
||||
|
||||
ptrMCurr_j = MCurr;
|
||||
unsigned *ptrDeletePos = uDeletePos;
|
||||
|
||||
// Special case for j=0
|
||||
// Only way to get M(i, 0) looks like this:
|
||||
// 0 i
|
||||
// A XXXXX
|
||||
// B ----X
|
||||
// So gap-open at i=0, gap-close at i-1.
|
||||
assert(ptrMCurr_j == &(MCurr[0]));
|
||||
*ptrMCurr_j += GapOpenA[0] + GapCloseA[i-1];
|
||||
|
||||
++ptrMCurr_j;
|
||||
|
||||
int *ptrTraceBack_ij = TraceBack[i];
|
||||
*ptrTraceBack_ij++ = (int) i;
|
||||
|
||||
SCORE *ptrMPrev_j = MPrev;
|
||||
SCORE *ptrDPrev = DPrev;
|
||||
SCORE d = *ptrDPrev;
|
||||
SCORE DNew = *ptrMPrev_j + GapOpenA[i];
|
||||
if (DNew > d)
|
||||
{
|
||||
d = DNew;
|
||||
*ptrDeletePos = i;
|
||||
}
|
||||
|
||||
SCORE *ptrDCurr = DCurr;
|
||||
|
||||
assert(ptrDCurr == &(DCurr[0]));
|
||||
*ptrDCurr = d;
|
||||
|
||||
// Can't have an insert if no letters from B
|
||||
IPrev_j_1 = MINUS_INFINITY;
|
||||
|
||||
unsigned uInsertPos;
|
||||
const SCORE scoreGapOpenAi = GapOpenA[i];
|
||||
const SCORE scoreGapCloseAi_1 = GapCloseA[i-1];
|
||||
|
||||
for (unsigned j = 1; j < uLengthB; ++j)
|
||||
{
|
||||
// Here, MPrev_j is preserved from previous
|
||||
// iteration so with current i,j is M[i-1][j-1]
|
||||
SCORE MPrev_j = *ptrMPrev_j;
|
||||
SCORE INew = MPrev_j + GapOpenB[j];
|
||||
if (INew > IPrev_j_1)
|
||||
{
|
||||
IPrev_j_1 = INew;
|
||||
uInsertPos = j;
|
||||
}
|
||||
|
||||
SCORE scoreMax = MPrev_j;
|
||||
|
||||
assert(ptrDPrev == &(DPrev[j-1]));
|
||||
SCORE scoreD = *ptrDPrev++ + scoreGapCloseAi_1;
|
||||
if (scoreD > scoreMax)
|
||||
{
|
||||
scoreMax = scoreD;
|
||||
assert(ptrDeletePos == &(uDeletePos[j-1]));
|
||||
*ptrTraceBack_ij = (int) i - (int) *ptrDeletePos;
|
||||
assert(*ptrTraceBack_ij > 0);
|
||||
}
|
||||
++ptrDeletePos;
|
||||
|
||||
SCORE scoreI = IPrev_j_1 + GapCloseB[j-1];
|
||||
if (scoreI > scoreMax)
|
||||
{
|
||||
scoreMax = scoreI;
|
||||
*ptrTraceBack_ij = (int) uInsertPos - (int) j;
|
||||
assert(*ptrTraceBack_ij < 0);
|
||||
}
|
||||
|
||||
assert(ptrSortOrderA == &(SortOrderA[i]));
|
||||
assert(ptrFreqsA == &(FreqsA[i]));
|
||||
|
||||
*ptrMCurr_j += scoreMax;
|
||||
assert(ptrMCurr_j == &(MCurr[j]));
|
||||
++ptrMCurr_j;
|
||||
|
||||
MPrev_j = *(++ptrMPrev_j);
|
||||
assert(ptrDPrev == &(DPrev[j]));
|
||||
SCORE d = *ptrDPrev;
|
||||
SCORE DNew = MPrev_j + scoreGapOpenAi;
|
||||
if (DNew > d)
|
||||
{
|
||||
d = DNew;
|
||||
assert(ptrDeletePos == &uDeletePos[j]);
|
||||
*ptrDeletePos = i;
|
||||
}
|
||||
assert(ptrDCurr + 1 == &(DCurr[j]));
|
||||
*(++ptrDCurr) = d;
|
||||
|
||||
++ptrTraceBack_ij;
|
||||
}
|
||||
|
||||
Rotate(MPrev, MCurr, MWork);
|
||||
Rotate(DPrev, DCurr, DWork);
|
||||
}
|
||||
|
||||
// Special case for i=uLengthA
|
||||
SCORE IPrev = MINUS_INFINITY;
|
||||
|
||||
unsigned uInsertPos;
|
||||
|
||||
for (unsigned j = 1; j < uLengthB; ++j)
|
||||
{
|
||||
SCORE INew = MPrev[j-1] + GapOpenB[j];
|
||||
if (INew > IPrev)
|
||||
{
|
||||
uInsertPos = j;
|
||||
IPrev = INew;
|
||||
}
|
||||
}
|
||||
|
||||
// Special case for i=uLengthA, j=uLengthB
|
||||
SCORE scoreMax = MPrev[uLengthB-1];
|
||||
int iTraceBack = 0;
|
||||
|
||||
SCORE scoreD = DPrev[uLengthB-1] + GapCloseA[uLengthA-1];
|
||||
if (scoreD > scoreMax)
|
||||
{
|
||||
scoreMax = scoreD;
|
||||
iTraceBack = (int) uLengthA - (int) uDeletePos[uLengthB-1];
|
||||
}
|
||||
|
||||
SCORE scoreI = IPrev + GapCloseB[uLengthB-1];
|
||||
if (scoreI > scoreMax)
|
||||
{
|
||||
scoreMax = scoreI;
|
||||
iTraceBack = (int) uInsertPos - (int) uLengthB;
|
||||
}
|
||||
|
||||
TraceBack[uLengthA][uLengthB] = iTraceBack;
|
||||
|
||||
TraceBackToPath(TraceBack, uLengthA, uLengthB, Path);
|
||||
|
||||
return scoreMax;
|
||||
}
|
||||
409
src/muscle/muscle3.8.31/src/glbalignspn.cpp
Normal file
409
src/muscle/muscle3.8.31/src/glbalignspn.cpp
Normal file
@@ -0,0 +1,409 @@
|
||||
#include "muscle.h"
|
||||
#include "profile.h"
|
||||
#include "pwpath.h"
|
||||
|
||||
struct DP_MEMORY
|
||||
{
|
||||
unsigned uLength;
|
||||
SCORE *GapOpenA;
|
||||
SCORE *GapOpenB;
|
||||
SCORE *GapCloseA;
|
||||
SCORE *GapCloseB;
|
||||
SCORE *MPrev;
|
||||
SCORE *MCurr;
|
||||
SCORE *MWork;
|
||||
SCORE *DPrev;
|
||||
SCORE *DCurr;
|
||||
SCORE *DWork;
|
||||
SCORE **ScoreMxB;
|
||||
unsigned **SortOrderA;
|
||||
unsigned *uDeletePos;
|
||||
FCOUNT **FreqsA;
|
||||
int **TraceBack;
|
||||
};
|
||||
|
||||
static struct DP_MEMORY DPM;
|
||||
|
||||
void FreeDPMemSPN()
|
||||
{
|
||||
const unsigned uOldLength = DPM.uLength;
|
||||
if (0 == uOldLength)
|
||||
return;
|
||||
|
||||
for (unsigned i = 0; i < uOldLength; ++i)
|
||||
{
|
||||
delete[] DPM.TraceBack[i];
|
||||
delete[] DPM.FreqsA[i];
|
||||
delete[] DPM.SortOrderA[i];
|
||||
}
|
||||
for (unsigned n = 0; n < 4; ++n)
|
||||
delete[] DPM.ScoreMxB[n];
|
||||
|
||||
delete[] DPM.MPrev;
|
||||
delete[] DPM.MCurr;
|
||||
delete[] DPM.MWork;
|
||||
delete[] DPM.DPrev;
|
||||
delete[] DPM.DCurr;
|
||||
delete[] DPM.DWork;
|
||||
delete[] DPM.uDeletePos;
|
||||
delete[] DPM.GapOpenA;
|
||||
delete[] DPM.GapOpenB;
|
||||
delete[] DPM.GapCloseA;
|
||||
delete[] DPM.GapCloseB;
|
||||
delete[] DPM.SortOrderA;
|
||||
delete[] DPM.FreqsA;
|
||||
delete[] DPM.ScoreMxB;
|
||||
delete[] DPM.TraceBack;
|
||||
}
|
||||
|
||||
static void AllocDPMem(unsigned uLengthA, unsigned uLengthB)
|
||||
{
|
||||
// Max prefix length
|
||||
unsigned uLength = (uLengthA > uLengthB ? uLengthA : uLengthB) + 1;
|
||||
if (uLength < DPM.uLength)
|
||||
return;
|
||||
|
||||
// Add 256 to allow for future expansion and
|
||||
// round up to next multiple of 32.
|
||||
uLength += 256;
|
||||
uLength += 32 - uLength%32;
|
||||
|
||||
const unsigned uOldLength = DPM.uLength;
|
||||
if (uOldLength > 0)
|
||||
{
|
||||
for (unsigned i = 0; i < uOldLength; ++i)
|
||||
{
|
||||
delete[] DPM.TraceBack[i];
|
||||
delete[] DPM.FreqsA[i];
|
||||
delete[] DPM.SortOrderA[i];
|
||||
}
|
||||
for (unsigned n = 0; n < 4; ++n)
|
||||
delete[] DPM.ScoreMxB[n];
|
||||
|
||||
delete[] DPM.MPrev;
|
||||
delete[] DPM.MCurr;
|
||||
delete[] DPM.MWork;
|
||||
delete[] DPM.DPrev;
|
||||
delete[] DPM.DCurr;
|
||||
delete[] DPM.DWork;
|
||||
delete[] DPM.uDeletePos;
|
||||
delete[] DPM.GapOpenA;
|
||||
delete[] DPM.GapOpenB;
|
||||
delete[] DPM.GapCloseA;
|
||||
delete[] DPM.GapCloseB;
|
||||
delete[] DPM.SortOrderA;
|
||||
delete[] DPM.FreqsA;
|
||||
delete[] DPM.ScoreMxB;
|
||||
delete[] DPM.TraceBack;
|
||||
}
|
||||
|
||||
DPM.uLength = uLength;
|
||||
|
||||
DPM.GapOpenA = new SCORE[uLength];
|
||||
DPM.GapOpenB = new SCORE[uLength];
|
||||
DPM.GapCloseA = new SCORE[uLength];
|
||||
DPM.GapCloseB = new SCORE[uLength];
|
||||
|
||||
DPM.SortOrderA = new unsigned*[uLength];
|
||||
DPM.FreqsA = new FCOUNT*[uLength];
|
||||
DPM.ScoreMxB = new SCORE*[4];
|
||||
DPM.MPrev = new SCORE[uLength];
|
||||
DPM.MCurr = new SCORE[uLength];
|
||||
DPM.MWork = new SCORE[uLength];
|
||||
|
||||
DPM.DPrev = new SCORE[uLength];
|
||||
DPM.DCurr = new SCORE[uLength];
|
||||
DPM.DWork = new SCORE[uLength];
|
||||
DPM.uDeletePos = new unsigned[uLength];
|
||||
|
||||
DPM.TraceBack = new int*[uLength];
|
||||
|
||||
for (unsigned uLetter = 0; uLetter < 4; ++uLetter)
|
||||
DPM.ScoreMxB[uLetter] = new SCORE[uLength];
|
||||
|
||||
for (unsigned i = 0; i < uLength; ++i)
|
||||
{
|
||||
DPM.SortOrderA[i] = new unsigned[4];
|
||||
DPM.FreqsA[i] = new FCOUNT[4];
|
||||
DPM.TraceBack[i] = new int[uLength];
|
||||
}
|
||||
}
|
||||
|
||||
SCORE GlobalAlignSPN(const ProfPos *PA, unsigned uLengthA, const ProfPos *PB,
|
||||
unsigned uLengthB, PWPath &Path)
|
||||
{
|
||||
if (ALPHA_DNA != g_Alpha || ALPHA_RNA == g_Alpha)
|
||||
Quit("GlobalAlignSPN: must be nucleo");
|
||||
|
||||
const unsigned uPrefixCountA = uLengthA + 1;
|
||||
const unsigned uPrefixCountB = uLengthB + 1;
|
||||
|
||||
AllocDPMem(uLengthA, uLengthB);
|
||||
|
||||
SCORE *GapOpenA = DPM.GapOpenA;
|
||||
SCORE *GapOpenB = DPM.GapOpenB;
|
||||
SCORE *GapCloseA = DPM.GapCloseA;
|
||||
SCORE *GapCloseB = DPM.GapCloseB;
|
||||
|
||||
unsigned **SortOrderA = DPM.SortOrderA;
|
||||
FCOUNT **FreqsA = DPM.FreqsA;
|
||||
SCORE **ScoreMxB = DPM.ScoreMxB;
|
||||
SCORE *MPrev = DPM.MPrev;
|
||||
SCORE *MCurr = DPM.MCurr;
|
||||
SCORE *MWork = DPM.MWork;
|
||||
|
||||
SCORE *DPrev = DPM.DPrev;
|
||||
SCORE *DCurr = DPM.DCurr;
|
||||
SCORE *DWork = DPM.DWork;
|
||||
unsigned *uDeletePos = DPM.uDeletePos;
|
||||
|
||||
int **TraceBack = DPM.TraceBack;
|
||||
|
||||
for (unsigned i = 0; i < uLengthA; ++i)
|
||||
{
|
||||
GapOpenA[i] = PA[i].m_scoreGapOpen;
|
||||
GapCloseA[i] = PA[i].m_scoreGapClose;
|
||||
|
||||
for (unsigned uLetter = 0; uLetter < 4; ++uLetter)
|
||||
{
|
||||
SortOrderA[i][uLetter] = PA[i].m_uSortOrder[uLetter];
|
||||
FreqsA[i][uLetter] = PA[i].m_fcCounts[uLetter];
|
||||
}
|
||||
}
|
||||
|
||||
for (unsigned j = 0; j < uLengthB; ++j)
|
||||
{
|
||||
GapOpenB[j] = PB[j].m_scoreGapOpen;
|
||||
GapCloseB[j] = PB[j].m_scoreGapClose;
|
||||
}
|
||||
|
||||
for (unsigned uLetter = 0; uLetter < 4; ++uLetter)
|
||||
{
|
||||
for (unsigned j = 0; j < uLengthB; ++j)
|
||||
ScoreMxB[uLetter][j] = PB[j].m_AAScores[uLetter];
|
||||
}
|
||||
|
||||
for (unsigned i = 0; i < uPrefixCountA; ++i)
|
||||
memset(TraceBack[i], 0, uPrefixCountB*sizeof(int));
|
||||
|
||||
// Special case for i=0
|
||||
unsigned **ptrSortOrderA = SortOrderA;
|
||||
FCOUNT **ptrFreqsA = FreqsA;
|
||||
assert(ptrSortOrderA == &(SortOrderA[0]));
|
||||
assert(ptrFreqsA == &(FreqsA[0]));
|
||||
TraceBack[0][0] = 0;
|
||||
|
||||
SCORE scoreSum = 0;
|
||||
unsigned *ptrSortOrderAi = SortOrderA[0];
|
||||
const unsigned *ptrSortOrderAEnd = ptrSortOrderAi + 4;
|
||||
FCOUNT *ptrFreqsAi = FreqsA[0];
|
||||
for (; ptrSortOrderAi != ptrSortOrderAEnd; ++ptrSortOrderAi)
|
||||
{
|
||||
const unsigned uLetter = *ptrSortOrderAi;
|
||||
const FCOUNT fcLetter = ptrFreqsAi[uLetter];
|
||||
if (0 == fcLetter)
|
||||
break;
|
||||
scoreSum += fcLetter*ScoreMxB[uLetter][0];
|
||||
}
|
||||
MPrev[0] = scoreSum - g_scoreCenter;
|
||||
|
||||
// D(0,0) is -infinity (requires I->D).
|
||||
DPrev[0] = MINUS_INFINITY;
|
||||
|
||||
for (unsigned j = 1; j < uLengthB; ++j)
|
||||
{
|
||||
// Only way to get M(0, j) looks like this:
|
||||
// A ----X
|
||||
// B XXXXX
|
||||
// 0 j
|
||||
// So gap-open at j=0, gap-close at j-1.
|
||||
SCORE scoreSum = 0;
|
||||
unsigned *ptrSortOrderAi = SortOrderA[0];
|
||||
const unsigned *ptrSortOrderAEnd = ptrSortOrderAi + 4;
|
||||
FCOUNT *ptrFreqsAi = FreqsA[0];
|
||||
for (; ptrSortOrderAi != ptrSortOrderAEnd; ++ptrSortOrderAi)
|
||||
{
|
||||
const unsigned uLetter = *ptrSortOrderAi;
|
||||
const FCOUNT fcLetter = ptrFreqsAi[uLetter];
|
||||
if (0 == fcLetter)
|
||||
break;
|
||||
scoreSum += fcLetter*ScoreMxB[uLetter][j];
|
||||
}
|
||||
MPrev[j] = scoreSum - g_scoreCenter + GapOpenB[0] + GapCloseB[j-1];
|
||||
TraceBack[0][j] = -(int) j;
|
||||
|
||||
// Assume no D->I transitions, then can't be a delete if only
|
||||
// one letter from A.
|
||||
DPrev[j] = MINUS_INFINITY;
|
||||
}
|
||||
|
||||
SCORE IPrev_j_1;
|
||||
for (unsigned i = 1; i < uLengthA; ++i)
|
||||
{
|
||||
++ptrSortOrderA;
|
||||
++ptrFreqsA;
|
||||
assert(ptrSortOrderA == &(SortOrderA[i]));
|
||||
assert(ptrFreqsA == &(FreqsA[i]));
|
||||
|
||||
SCORE *ptrMCurr_j = MCurr;
|
||||
memset(ptrMCurr_j, 0, uLengthB*sizeof(SCORE));
|
||||
const FCOUNT *FreqsAi = *ptrFreqsA;
|
||||
|
||||
const unsigned *SortOrderAi = *ptrSortOrderA;
|
||||
const unsigned *ptrSortOrderAiEnd = SortOrderAi + 4;
|
||||
const SCORE *ptrMCurrMax = MCurr + uLengthB;
|
||||
for (const unsigned *ptrSortOrderAi = SortOrderAi;
|
||||
ptrSortOrderAi != ptrSortOrderAiEnd;
|
||||
++ptrSortOrderAi)
|
||||
{
|
||||
const unsigned uLetter = *ptrSortOrderAi;
|
||||
SCORE *NSBR_Letter = ScoreMxB[uLetter];
|
||||
const FCOUNT fcLetter = FreqsAi[uLetter];
|
||||
if (0 == fcLetter)
|
||||
break;
|
||||
SCORE *ptrNSBR = NSBR_Letter;
|
||||
for (SCORE *ptrMCurr = MCurr; ptrMCurr != ptrMCurrMax; ++ptrMCurr)
|
||||
*ptrMCurr += fcLetter*(*ptrNSBR++);
|
||||
}
|
||||
|
||||
for (unsigned j = 0; j < uLengthB; ++j)
|
||||
MCurr[j] -= g_scoreCenter;
|
||||
|
||||
ptrMCurr_j = MCurr;
|
||||
unsigned *ptrDeletePos = uDeletePos;
|
||||
|
||||
// Special case for j=0
|
||||
// Only way to get M(i, 0) looks like this:
|
||||
// 0 i
|
||||
// A XXXXX
|
||||
// B ----X
|
||||
// So gap-open at i=0, gap-close at i-1.
|
||||
assert(ptrMCurr_j == &(MCurr[0]));
|
||||
*ptrMCurr_j += GapOpenA[0] + GapCloseA[i-1];
|
||||
|
||||
++ptrMCurr_j;
|
||||
|
||||
int *ptrTraceBack_ij = TraceBack[i];
|
||||
*ptrTraceBack_ij++ = (int) i;
|
||||
|
||||
SCORE *ptrMPrev_j = MPrev;
|
||||
SCORE *ptrDPrev = DPrev;
|
||||
SCORE d = *ptrDPrev;
|
||||
SCORE DNew = *ptrMPrev_j + GapOpenA[i];
|
||||
if (DNew > d)
|
||||
{
|
||||
d = DNew;
|
||||
*ptrDeletePos = i;
|
||||
}
|
||||
|
||||
SCORE *ptrDCurr = DCurr;
|
||||
|
||||
assert(ptrDCurr == &(DCurr[0]));
|
||||
*ptrDCurr = d;
|
||||
|
||||
// Can't have an insert if no letters from B
|
||||
IPrev_j_1 = MINUS_INFINITY;
|
||||
|
||||
unsigned uInsertPos;
|
||||
const SCORE scoreGapOpenAi = GapOpenA[i];
|
||||
const SCORE scoreGapCloseAi_1 = GapCloseA[i-1];
|
||||
|
||||
for (unsigned j = 1; j < uLengthB; ++j)
|
||||
{
|
||||
// Here, MPrev_j is preserved from previous
|
||||
// iteration so with current i,j is M[i-1][j-1]
|
||||
SCORE MPrev_j = *ptrMPrev_j;
|
||||
SCORE INew = MPrev_j + GapOpenB[j];
|
||||
if (INew > IPrev_j_1)
|
||||
{
|
||||
IPrev_j_1 = INew;
|
||||
uInsertPos = j;
|
||||
}
|
||||
|
||||
SCORE scoreMax = MPrev_j;
|
||||
|
||||
assert(ptrDPrev == &(DPrev[j-1]));
|
||||
SCORE scoreD = *ptrDPrev++ + scoreGapCloseAi_1;
|
||||
if (scoreD > scoreMax)
|
||||
{
|
||||
scoreMax = scoreD;
|
||||
assert(ptrDeletePos == &(uDeletePos[j-1]));
|
||||
*ptrTraceBack_ij = (int) i - (int) *ptrDeletePos;
|
||||
assert(*ptrTraceBack_ij > 0);
|
||||
}
|
||||
++ptrDeletePos;
|
||||
|
||||
SCORE scoreI = IPrev_j_1 + GapCloseB[j-1];
|
||||
if (scoreI > scoreMax)
|
||||
{
|
||||
scoreMax = scoreI;
|
||||
*ptrTraceBack_ij = (int) uInsertPos - (int) j;
|
||||
assert(*ptrTraceBack_ij < 0);
|
||||
}
|
||||
|
||||
assert(ptrSortOrderA == &(SortOrderA[i]));
|
||||
assert(ptrFreqsA == &(FreqsA[i]));
|
||||
|
||||
*ptrMCurr_j += scoreMax;
|
||||
assert(ptrMCurr_j == &(MCurr[j]));
|
||||
++ptrMCurr_j;
|
||||
|
||||
MPrev_j = *(++ptrMPrev_j);
|
||||
assert(ptrDPrev == &(DPrev[j]));
|
||||
SCORE d = *ptrDPrev;
|
||||
SCORE DNew = MPrev_j + scoreGapOpenAi;
|
||||
if (DNew > d)
|
||||
{
|
||||
d = DNew;
|
||||
assert(ptrDeletePos == &uDeletePos[j]);
|
||||
*ptrDeletePos = i;
|
||||
}
|
||||
assert(ptrDCurr + 1 == &(DCurr[j]));
|
||||
*(++ptrDCurr) = d;
|
||||
|
||||
++ptrTraceBack_ij;
|
||||
}
|
||||
|
||||
Rotate(MPrev, MCurr, MWork);
|
||||
Rotate(DPrev, DCurr, DWork);
|
||||
}
|
||||
|
||||
// Special case for i=uLengthA
|
||||
SCORE IPrev = MINUS_INFINITY;
|
||||
|
||||
unsigned uInsertPos;
|
||||
|
||||
for (unsigned j = 1; j < uLengthB; ++j)
|
||||
{
|
||||
SCORE INew = MPrev[j-1] + GapOpenB[j];
|
||||
if (INew > IPrev)
|
||||
{
|
||||
uInsertPos = j;
|
||||
IPrev = INew;
|
||||
}
|
||||
}
|
||||
|
||||
// Special case for i=uLengthA, j=uLengthB
|
||||
SCORE scoreMax = MPrev[uLengthB-1];
|
||||
int iTraceBack = 0;
|
||||
|
||||
SCORE scoreD = DPrev[uLengthB-1] + GapCloseA[uLengthA-1];
|
||||
if (scoreD > scoreMax)
|
||||
{
|
||||
scoreMax = scoreD;
|
||||
iTraceBack = (int) uLengthA - (int) uDeletePos[uLengthB-1];
|
||||
}
|
||||
|
||||
SCORE scoreI = IPrev + GapCloseB[uLengthB-1];
|
||||
if (scoreI > scoreMax)
|
||||
{
|
||||
scoreMax = scoreI;
|
||||
iTraceBack = (int) uInsertPos - (int) uLengthB;
|
||||
}
|
||||
|
||||
TraceBack[uLengthA][uLengthB] = iTraceBack;
|
||||
|
||||
TraceBackToPath(TraceBack, uLengthA, uLengthB, Path);
|
||||
|
||||
return scoreMax;
|
||||
}
|
||||
318
src/muscle/muscle3.8.31/src/glbalignss.cpp
Normal file
318
src/muscle/muscle3.8.31/src/glbalignss.cpp
Normal file
@@ -0,0 +1,318 @@
|
||||
#include "muscle.h"
|
||||
#include "profile.h"
|
||||
#include "pwpath.h"
|
||||
#include "seq.h"
|
||||
|
||||
extern SCOREMATRIX VTML_SP;
|
||||
|
||||
// #define SUBST(i, j) Subst(seqA, seqB, i, j)
|
||||
#define SUBST(i, j) MxRowA[i][seqB.GetLetter(j)]
|
||||
|
||||
static SCORE Subst(const Seq &seqA, const Seq &seqB, unsigned i, unsigned j)
|
||||
{
|
||||
assert(i < seqA.Length());
|
||||
assert(j < seqB.Length());
|
||||
|
||||
unsigned uLetterA = seqA.GetLetter(i);
|
||||
unsigned uLetterB = seqB.GetLetter(j);
|
||||
return VTML_SP[uLetterA][uLetterB] + g_scoreCenter;
|
||||
}
|
||||
|
||||
struct DP_MEMORY
|
||||
{
|
||||
unsigned uLength;
|
||||
SCORE *MPrev;
|
||||
SCORE *MCurr;
|
||||
SCORE *MWork;
|
||||
SCORE *DPrev;
|
||||
SCORE *DCurr;
|
||||
SCORE *DWork;
|
||||
SCORE **MxRowA;
|
||||
unsigned *LettersB;
|
||||
unsigned *uDeletePos;
|
||||
int **TraceBack;
|
||||
};
|
||||
|
||||
static struct DP_MEMORY DPM;
|
||||
|
||||
static void AllocDPMem(unsigned uLengthA, unsigned uLengthB)
|
||||
{
|
||||
// Max prefix length
|
||||
unsigned uLength = (uLengthA > uLengthB ? uLengthA : uLengthB) + 1;
|
||||
if (uLength < DPM.uLength)
|
||||
return;
|
||||
|
||||
// Add 256 to allow for future expansion and
|
||||
// round up to next multiple of 32.
|
||||
uLength += 256;
|
||||
uLength += 32 - uLength%32;
|
||||
|
||||
const unsigned uOldLength = DPM.uLength;
|
||||
if (uOldLength > 0)
|
||||
{
|
||||
for (unsigned i = 0; i < uOldLength; ++i)
|
||||
delete[] DPM.TraceBack[i];
|
||||
|
||||
delete[] DPM.MPrev;
|
||||
delete[] DPM.MCurr;
|
||||
delete[] DPM.MWork;
|
||||
delete[] DPM.DPrev;
|
||||
delete[] DPM.DCurr;
|
||||
delete[] DPM.DWork;
|
||||
delete[] DPM.MxRowA;
|
||||
delete[] DPM.LettersB;
|
||||
delete[] DPM.uDeletePos;
|
||||
delete[] DPM.TraceBack;
|
||||
}
|
||||
|
||||
DPM.uLength = uLength;
|
||||
|
||||
DPM.MPrev = new SCORE[uLength];
|
||||
DPM.MCurr = new SCORE[uLength];
|
||||
DPM.MWork = new SCORE[uLength];
|
||||
|
||||
DPM.DPrev = new SCORE[uLength];
|
||||
DPM.DCurr = new SCORE[uLength];
|
||||
DPM.DWork = new SCORE[uLength];
|
||||
DPM.MxRowA = new SCORE *[uLength];
|
||||
DPM.LettersB = new unsigned[uLength];
|
||||
DPM.uDeletePos = new unsigned[uLength];
|
||||
|
||||
DPM.TraceBack = new int*[uLength];
|
||||
|
||||
for (unsigned i = 0; i < uLength; ++i)
|
||||
DPM.TraceBack[i] = new int[uLength];
|
||||
}
|
||||
|
||||
static void RowFromSeq(const Seq &s, SCORE *Row[])
|
||||
{
|
||||
const unsigned uLength = s.Length();
|
||||
for (unsigned i = 0; i < uLength; ++i)
|
||||
{
|
||||
char c = s.GetChar(i);
|
||||
unsigned uLetter = CharToLetter(c);
|
||||
if (uLetter < 20)
|
||||
Row[i] = VTML_SP[uLetter];
|
||||
else
|
||||
Row[i] = VTML_SP[AX_X];
|
||||
}
|
||||
}
|
||||
|
||||
static void LettersFromSeq(const Seq &s, unsigned Letters[])
|
||||
{
|
||||
const unsigned uLength = s.Length();
|
||||
for (unsigned i = 0; i < uLength; ++i)
|
||||
{
|
||||
char c = s.GetChar(i);
|
||||
unsigned uLetter = CharToLetter(c);
|
||||
if (uLetter < 20)
|
||||
Letters[i] = uLetter;
|
||||
else
|
||||
Letters[i] = AX_X;
|
||||
}
|
||||
}
|
||||
|
||||
SCORE GlobalAlignSS(const Seq &seqA, const Seq &seqB, PWPath &Path)
|
||||
{
|
||||
const unsigned uLengthA = seqA.Length();
|
||||
const unsigned uLengthB = seqB.Length();
|
||||
const unsigned uPrefixCountA = uLengthA + 1;
|
||||
const unsigned uPrefixCountB = uLengthB + 1;
|
||||
|
||||
AllocDPMem(uLengthA, uLengthB);
|
||||
|
||||
SCORE *MPrev = DPM.MPrev;
|
||||
SCORE *MCurr = DPM.MCurr;
|
||||
SCORE *MWork = DPM.MWork;
|
||||
|
||||
SCORE *DPrev = DPM.DPrev;
|
||||
SCORE *DCurr = DPM.DCurr;
|
||||
SCORE *DWork = DPM.DWork;
|
||||
SCORE **MxRowA = DPM.MxRowA;
|
||||
unsigned *LettersB = DPM.LettersB;
|
||||
|
||||
RowFromSeq(seqA, MxRowA);
|
||||
LettersFromSeq(seqB, LettersB);
|
||||
|
||||
unsigned *uDeletePos = DPM.uDeletePos;
|
||||
|
||||
int **TraceBack = DPM.TraceBack;
|
||||
|
||||
#if DEBUG
|
||||
for (unsigned i = 0; i < uPrefixCountA; ++i)
|
||||
memset(TraceBack[i], 0, uPrefixCountB*sizeof(int));
|
||||
#endif
|
||||
|
||||
// Special case for i=0
|
||||
TraceBack[0][0] = 0;
|
||||
MPrev[0] = MxRowA[0][LettersB[0]];
|
||||
|
||||
// D(0,0) is -infinity (requires I->D).
|
||||
DPrev[0] = MINUS_INFINITY;
|
||||
|
||||
for (unsigned j = 1; j < uLengthB; ++j)
|
||||
{
|
||||
unsigned uLetterB = LettersB[j];
|
||||
|
||||
// Only way to get M(0, j) looks like this:
|
||||
// A ----X
|
||||
// B XXXXX
|
||||
// 0 j
|
||||
// So gap-open at j=0, gap-close at j-1.
|
||||
MPrev[j] = MxRowA[0][uLetterB] + g_scoreGapOpen/2; // term gaps half
|
||||
TraceBack[0][j] = -(int) j;
|
||||
|
||||
// Assume no D->I transitions, then can't be a delete if only
|
||||
// one letter from A.
|
||||
DPrev[j] = MINUS_INFINITY;
|
||||
}
|
||||
|
||||
SCORE IPrev_j_1;
|
||||
for (unsigned i = 1; i < uLengthA; ++i)
|
||||
{
|
||||
SCORE *ptrMCurr_j = MCurr;
|
||||
memset(ptrMCurr_j, 0, uLengthB*sizeof(SCORE));
|
||||
|
||||
const SCORE *RowA = MxRowA[i];
|
||||
const SCORE *ptrRowA = MxRowA[i];
|
||||
const SCORE *ptrMCurrEnd = ptrMCurr_j + uLengthB;
|
||||
unsigned *ptrLettersB = LettersB;
|
||||
for (; ptrMCurr_j != ptrMCurrEnd; ++ptrMCurr_j)
|
||||
{
|
||||
*ptrMCurr_j = RowA[*ptrLettersB];
|
||||
++ptrLettersB;
|
||||
}
|
||||
|
||||
unsigned *ptrDeletePos = uDeletePos;
|
||||
|
||||
// Special case for j=0
|
||||
// Only way to get M(i, 0) looks like this:
|
||||
// 0 i
|
||||
// A XXXXX
|
||||
// B ----X
|
||||
// So gap-open at i=0, gap-close at i-1.
|
||||
ptrMCurr_j = MCurr;
|
||||
assert(ptrMCurr_j == &(MCurr[0]));
|
||||
*ptrMCurr_j += g_scoreGapOpen/2; // term gaps half
|
||||
|
||||
++ptrMCurr_j;
|
||||
|
||||
int *ptrTraceBack_ij = TraceBack[i];
|
||||
*ptrTraceBack_ij++ = (int) i;
|
||||
|
||||
SCORE *ptrMPrev_j = MPrev;
|
||||
SCORE *ptrDPrev = DPrev;
|
||||
SCORE d = *ptrDPrev;
|
||||
SCORE DNew = *ptrMPrev_j + g_scoreGapOpen;
|
||||
if (DNew > d)
|
||||
{
|
||||
d = DNew;
|
||||
*ptrDeletePos = i;
|
||||
}
|
||||
|
||||
SCORE *ptrDCurr = DCurr;
|
||||
|
||||
assert(ptrDCurr == &(DCurr[0]));
|
||||
*ptrDCurr = d;
|
||||
|
||||
// Can't have an insert if no letters from B
|
||||
IPrev_j_1 = MINUS_INFINITY;
|
||||
|
||||
unsigned uInsertPos;
|
||||
|
||||
for (unsigned j = 1; j < uLengthB; ++j)
|
||||
{
|
||||
// Here, MPrev_j is preserved from previous
|
||||
// iteration so with current i,j is M[i-1][j-1]
|
||||
SCORE MPrev_j = *ptrMPrev_j;
|
||||
SCORE INew = MPrev_j + g_scoreGapOpen;
|
||||
if (INew > IPrev_j_1)
|
||||
{
|
||||
IPrev_j_1 = INew;
|
||||
uInsertPos = j;
|
||||
}
|
||||
|
||||
SCORE scoreMax = MPrev_j;
|
||||
|
||||
assert(ptrDPrev == &(DPrev[j-1]));
|
||||
SCORE scoreD = *ptrDPrev++;
|
||||
if (scoreD > scoreMax)
|
||||
{
|
||||
scoreMax = scoreD;
|
||||
assert(ptrDeletePos == &(uDeletePos[j-1]));
|
||||
*ptrTraceBack_ij = (int) i - (int) *ptrDeletePos;
|
||||
assert(*ptrTraceBack_ij > 0);
|
||||
}
|
||||
++ptrDeletePos;
|
||||
|
||||
SCORE scoreI = IPrev_j_1;
|
||||
if (scoreI > scoreMax)
|
||||
{
|
||||
scoreMax = scoreI;
|
||||
*ptrTraceBack_ij = (int) uInsertPos - (int) j;
|
||||
assert(*ptrTraceBack_ij < 0);
|
||||
}
|
||||
|
||||
*ptrMCurr_j += scoreMax;
|
||||
assert(ptrMCurr_j == &(MCurr[j]));
|
||||
++ptrMCurr_j;
|
||||
|
||||
MPrev_j = *(++ptrMPrev_j);
|
||||
assert(ptrDPrev == &(DPrev[j]));
|
||||
SCORE d = *ptrDPrev;
|
||||
SCORE DNew = MPrev_j + g_scoreGapOpen;
|
||||
if (DNew > d)
|
||||
{
|
||||
d = DNew;
|
||||
assert(ptrDeletePos == &uDeletePos[j]);
|
||||
*ptrDeletePos = i;
|
||||
}
|
||||
assert(ptrDCurr + 1 == &(DCurr[j]));
|
||||
*(++ptrDCurr) = d;
|
||||
|
||||
++ptrTraceBack_ij;
|
||||
}
|
||||
|
||||
Rotate(MPrev, MCurr, MWork);
|
||||
Rotate(DPrev, DCurr, DWork);
|
||||
}
|
||||
|
||||
// Special case for i=uLengthA
|
||||
SCORE IPrev = MINUS_INFINITY;
|
||||
|
||||
unsigned uInsertPos;
|
||||
|
||||
for (unsigned j = 1; j < uLengthB; ++j)
|
||||
{
|
||||
SCORE INew = MPrev[j-1];
|
||||
if (INew > IPrev)
|
||||
{
|
||||
uInsertPos = j;
|
||||
IPrev = INew;
|
||||
}
|
||||
}
|
||||
|
||||
// Special case for i=uLengthA, j=uLengthB
|
||||
SCORE scoreMax = MPrev[uLengthB-1];
|
||||
int iTraceBack = 0;
|
||||
|
||||
SCORE scoreD = DPrev[uLengthB-1] - g_scoreGapOpen/2; // term gaps half
|
||||
if (scoreD > scoreMax)
|
||||
{
|
||||
scoreMax = scoreD;
|
||||
iTraceBack = (int) uLengthA - (int) uDeletePos[uLengthB-1];
|
||||
}
|
||||
|
||||
SCORE scoreI = IPrev - g_scoreGapOpen/2;
|
||||
if (scoreI > scoreMax)
|
||||
{
|
||||
scoreMax = scoreI;
|
||||
iTraceBack = (int) uInsertPos - (int) uLengthB;
|
||||
}
|
||||
|
||||
TraceBack[uLengthA][uLengthB] = iTraceBack;
|
||||
|
||||
TraceBackToPath(TraceBack, uLengthA, uLengthB, Path);
|
||||
|
||||
return scoreMax;
|
||||
}
|
||||
390
src/muscle/muscle3.8.31/src/glbalndimer.cpp
Normal file
390
src/muscle/muscle3.8.31/src/glbalndimer.cpp
Normal file
@@ -0,0 +1,390 @@
|
||||
#include "muscle.h"
|
||||
#include <math.h>
|
||||
#include <stdio.h> // for sprintf
|
||||
#include "pwpath.h"
|
||||
#include "profile.h"
|
||||
#include "gapscoredimer.h"
|
||||
|
||||
#define TRACE 0
|
||||
|
||||
static SCORE TraceBackDimer( const SCORE *DPM_, const SCORE *DPD_, const SCORE *DPI_,
|
||||
const char *TBM_, const char *TBD_, const char *TBI_,
|
||||
unsigned uLengthA, unsigned uLengthB, PWPath &Path);
|
||||
|
||||
static const char *LocalScoreToStr(SCORE s)
|
||||
{
|
||||
static char str[16];
|
||||
if (MINUS_INFINITY == s)
|
||||
return " *";
|
||||
sprintf(str, "%6.3g", s);
|
||||
return str;
|
||||
}
|
||||
|
||||
#if TRACE
|
||||
static void ListDP(const SCORE *DPM_, const ProfPos *PA, const ProfPos *PB,
|
||||
unsigned uPrefixCountA, unsigned uPrefixCountB)
|
||||
{
|
||||
Log(" ");
|
||||
for (unsigned uPrefixLengthB = 0; uPrefixLengthB < uPrefixCountB; ++uPrefixLengthB)
|
||||
{
|
||||
char c = ' ';
|
||||
if (uPrefixLengthB > 0)
|
||||
c = ConsensusChar(PB[uPrefixLengthB - 1]);
|
||||
Log(" %4u:%c", uPrefixLengthB, c);
|
||||
}
|
||||
Log("\n");
|
||||
for (unsigned uPrefixLengthA = 0; uPrefixLengthA < uPrefixCountA; ++uPrefixLengthA)
|
||||
{
|
||||
char c = ' ';
|
||||
if (uPrefixLengthA > 0)
|
||||
c = ConsensusChar(PA[uPrefixLengthA - 1]);
|
||||
Log("%4u:%c ", uPrefixLengthA, c);
|
||||
for (unsigned uPrefixLengthB = 0; uPrefixLengthB < uPrefixCountB; ++uPrefixLengthB)
|
||||
Log(" %s", LocalScoreToStr(DPM(uPrefixLengthA, uPrefixLengthB)));
|
||||
Log("\n");
|
||||
}
|
||||
}
|
||||
|
||||
static void ListTB(const char *TBM_, const ProfPos *PA, const ProfPos *PB,
|
||||
unsigned uPrefixCountA, unsigned uPrefixCountB)
|
||||
{
|
||||
Log(" ");
|
||||
for (unsigned uPrefixLengthB = 0; uPrefixLengthB < uPrefixCountB; ++uPrefixLengthB)
|
||||
Log("%2d", uPrefixLengthB);
|
||||
Log("\n");
|
||||
Log(" ");
|
||||
for (unsigned uPrefixLengthB = 0; uPrefixLengthB < uPrefixCountB; ++uPrefixLengthB)
|
||||
{
|
||||
char c = ' ';
|
||||
if (uPrefixLengthB > 0)
|
||||
c = ConsensusChar(PB[uPrefixLengthB - 1]);
|
||||
Log(" %c", c);
|
||||
}
|
||||
Log("\n");
|
||||
for (unsigned uPrefixLengthA = 0; uPrefixLengthA < uPrefixCountA; ++uPrefixLengthA)
|
||||
{
|
||||
char c = ' ';
|
||||
if (uPrefixLengthA > 0)
|
||||
c = ConsensusChar(PA[uPrefixLengthA - 1]);
|
||||
Log("%4u:%c ", uPrefixLengthA, c);
|
||||
for (unsigned uPrefixLengthB = 0; uPrefixLengthB < uPrefixCountB; ++uPrefixLengthB)
|
||||
Log(" %c", TBM(uPrefixLengthA, uPrefixLengthB));
|
||||
Log("\n");
|
||||
}
|
||||
}
|
||||
#endif // TRACE
|
||||
|
||||
static ProfPos PPTerm;
|
||||
static bool InitializePPTerm()
|
||||
{
|
||||
PPTerm.m_bAllGaps = false;
|
||||
PPTerm.m_LL = 1;
|
||||
PPTerm.m_LG = 0;
|
||||
PPTerm.m_GL = 0;
|
||||
PPTerm.m_GG = 0;
|
||||
PPTerm.m_fOcc = 1;
|
||||
return true;
|
||||
}
|
||||
static bool PPTermInitialized = InitializePPTerm();
|
||||
|
||||
static SCORE ScoreProfPosDimerLE(const ProfPos &PPA, const ProfPos &PPB)
|
||||
{
|
||||
SCORE Score = 0;
|
||||
for (unsigned n = 0; n < 20; ++n)
|
||||
{
|
||||
const unsigned uLetter = PPA.m_uSortOrder[n];
|
||||
const FCOUNT fcLetter = PPA.m_fcCounts[uLetter];
|
||||
if (0 == fcLetter)
|
||||
break;
|
||||
Score += fcLetter*PPB.m_AAScores[uLetter];
|
||||
}
|
||||
if (0 == Score)
|
||||
return -2.5;
|
||||
SCORE logScore = logf(Score);
|
||||
return (SCORE) (logScore*(PPA.m_fOcc * PPB.m_fOcc));
|
||||
}
|
||||
|
||||
static SCORE ScoreProfPosDimerPSP(const ProfPos &PPA, const ProfPos &PPB)
|
||||
{
|
||||
SCORE Score = 0;
|
||||
for (unsigned n = 0; n < 20; ++n)
|
||||
{
|
||||
const unsigned uLetter = PPA.m_uSortOrder[n];
|
||||
const FCOUNT fcLetter = PPA.m_fcCounts[uLetter];
|
||||
if (0 == fcLetter)
|
||||
break;
|
||||
Score += fcLetter*PPB.m_AAScores[uLetter];
|
||||
}
|
||||
return Score;
|
||||
}
|
||||
|
||||
static SCORE ScoreProfPosDimer(const ProfPos &PPA, const ProfPos &PPB)
|
||||
{
|
||||
switch (g_PPScore)
|
||||
{
|
||||
case PPSCORE_LE:
|
||||
return ScoreProfPosDimerLE(PPA, PPB);
|
||||
|
||||
case PPSCORE_SP:
|
||||
case PPSCORE_SV:
|
||||
return ScoreProfPosDimerPSP(PPA, PPB);
|
||||
}
|
||||
Quit("Invalid g_PPScore");
|
||||
return 0;
|
||||
}
|
||||
|
||||
// Global alignment dynamic programming
|
||||
// This variant optimizes the profile-profile SP score under the
|
||||
// dimer approximation.
|
||||
SCORE GlobalAlignDimer(const ProfPos *PA, unsigned uLengthA, const ProfPos *PB,
|
||||
unsigned uLengthB, PWPath &Path)
|
||||
{
|
||||
assert(uLengthB > 0 && uLengthA > 0);
|
||||
|
||||
const unsigned uPrefixCountA = uLengthA + 1;
|
||||
const unsigned uPrefixCountB = uLengthB + 1;
|
||||
|
||||
// Allocate DP matrices
|
||||
const size_t LM = uPrefixCountA*uPrefixCountB;
|
||||
SCORE *DPM_ = new SCORE[LM];
|
||||
SCORE *DPD_ = new SCORE[LM];
|
||||
SCORE *DPI_ = new SCORE[LM];
|
||||
|
||||
char *TBM_ = new char[LM];
|
||||
char *TBD_ = new char[LM];
|
||||
char *TBI_ = new char[LM];
|
||||
|
||||
DPM(0, 0) = 0;
|
||||
DPD(0, 0) = MINUS_INFINITY;
|
||||
DPI(0, 0) = MINUS_INFINITY;
|
||||
|
||||
TBM(0, 0) = 'S';
|
||||
TBD(0, 0) = '?';
|
||||
TBI(0, 0) = '?';
|
||||
|
||||
DPM(1, 0) = MINUS_INFINITY;
|
||||
DPD(1, 0) = GapScoreMD(PA[0], PPTerm);
|
||||
DPI(1, 0) = MINUS_INFINITY;
|
||||
|
||||
TBM(1, 0) = '?';
|
||||
TBD(1, 0) = 'S';
|
||||
TBI(1, 0) = '?';
|
||||
|
||||
DPM(0, 1) = MINUS_INFINITY;
|
||||
DPD(0, 1) = MINUS_INFINITY;
|
||||
DPI(0, 1) = GapScoreMI(PPTerm, PB[0]);
|
||||
|
||||
TBM(0, 1) = '?';
|
||||
TBD(0, 1) = '?';
|
||||
TBI(0, 1) = 'S';
|
||||
|
||||
// Empty prefix of B is special case
|
||||
for (unsigned uPrefixLengthA = 2; uPrefixLengthA < uPrefixCountA; ++uPrefixLengthA)
|
||||
{
|
||||
// M=LetterA+LetterB, impossible with empty prefix
|
||||
DPM(uPrefixLengthA, 0) = MINUS_INFINITY;
|
||||
TBM(uPrefixLengthA, 0) = '?';
|
||||
|
||||
// D=LetterA+GapB
|
||||
DPD(uPrefixLengthA, 0) = DPD(uPrefixLengthA - 1, 0) +
|
||||
GapScoreDD(PA[uPrefixLengthA - 1], PPTerm);
|
||||
TBD(uPrefixLengthA, 0) = 'D';
|
||||
|
||||
// I=GapA+LetterB, impossible with empty prefix
|
||||
DPI(uPrefixLengthA, 0) = MINUS_INFINITY;
|
||||
TBI(uPrefixLengthA, 0) = '?';
|
||||
}
|
||||
|
||||
// Empty prefix of A is special case
|
||||
for (unsigned uPrefixLengthB = 2; uPrefixLengthB < uPrefixCountB; ++uPrefixLengthB)
|
||||
{
|
||||
// M=LetterA+LetterB, impossible with empty prefix
|
||||
DPM(0, uPrefixLengthB) = MINUS_INFINITY;
|
||||
TBM(0, uPrefixLengthB) = '?';
|
||||
|
||||
// D=LetterA+GapB, impossible with empty prefix
|
||||
DPD(0, uPrefixLengthB) = MINUS_INFINITY;
|
||||
TBD(0, uPrefixLengthB) = '?';
|
||||
|
||||
// I=GapA+LetterB
|
||||
DPI(0, uPrefixLengthB) = DPI(0, uPrefixLengthB - 1) +
|
||||
GapScoreII(PPTerm, PB[uPrefixLengthB - 1]);
|
||||
TBI(0, uPrefixLengthB) = 'I';
|
||||
}
|
||||
|
||||
// ============
|
||||
// Main DP loop
|
||||
// ============
|
||||
for (unsigned uPrefixLengthB = 1; uPrefixLengthB < uPrefixCountB; ++uPrefixLengthB)
|
||||
{
|
||||
const ProfPos &PPB = PB[uPrefixLengthB - 1];
|
||||
for (unsigned uPrefixLengthA = 1; uPrefixLengthA < uPrefixCountA; ++uPrefixLengthA)
|
||||
{
|
||||
const ProfPos &PPA = PA[uPrefixLengthA - 1];
|
||||
{
|
||||
// Match M=LetterA+LetterB
|
||||
SCORE scoreLL = ScoreProfPosDimer(PPA, PPB);
|
||||
|
||||
SCORE scoreMM = DPM(uPrefixLengthA-1, uPrefixLengthB-1) + GapScoreMM(PPA, PPB);
|
||||
SCORE scoreDM = DPD(uPrefixLengthA-1, uPrefixLengthB-1) + GapScoreDM(PPA, PPB);
|
||||
SCORE scoreIM = DPI(uPrefixLengthA-1, uPrefixLengthB-1) + GapScoreIM(PPA, PPB);
|
||||
|
||||
SCORE scoreBest = scoreMM;
|
||||
char c = 'M';
|
||||
if (scoreDM > scoreBest)
|
||||
{
|
||||
scoreBest = scoreDM;
|
||||
c = 'D';
|
||||
}
|
||||
if (scoreIM > scoreBest)
|
||||
{
|
||||
scoreBest = scoreIM;
|
||||
c = 'I';
|
||||
}
|
||||
|
||||
DPM(uPrefixLengthA, uPrefixLengthB) = scoreBest + scoreLL;
|
||||
TBM(uPrefixLengthA, uPrefixLengthB) = c;
|
||||
}
|
||||
{
|
||||
// Delete D=LetterA+GapB
|
||||
SCORE scoreMD = DPM(uPrefixLengthA-1, uPrefixLengthB) + GapScoreMD(PPA, PPB);
|
||||
SCORE scoreDD = DPD(uPrefixLengthA-1, uPrefixLengthB) + GapScoreDD(PPA, PPB);
|
||||
SCORE scoreID = DPI(uPrefixLengthA-1, uPrefixLengthB) + GapScoreID(PPA, PPB);
|
||||
|
||||
SCORE scoreBest = scoreMD;
|
||||
char c = 'M';
|
||||
if (scoreDD > scoreBest)
|
||||
{
|
||||
scoreBest = scoreDD;
|
||||
c = 'D';
|
||||
}
|
||||
if (scoreID > scoreBest)
|
||||
{
|
||||
scoreBest = scoreID;
|
||||
c = 'I';
|
||||
}
|
||||
|
||||
DPD(uPrefixLengthA, uPrefixLengthB) = scoreBest;
|
||||
TBD(uPrefixLengthA, uPrefixLengthB) = c;
|
||||
}
|
||||
{
|
||||
// Insert I=GapA+LetterB
|
||||
SCORE scoreMI = DPM(uPrefixLengthA, uPrefixLengthB-1) + GapScoreMI(PPA, PPB);
|
||||
SCORE scoreDI = DPD(uPrefixLengthA, uPrefixLengthB-1) + GapScoreDI(PPA, PPB);
|
||||
SCORE scoreII = DPI(uPrefixLengthA, uPrefixLengthB-1) + GapScoreII(PPA, PPB);
|
||||
|
||||
SCORE scoreBest = scoreMI;
|
||||
char c = 'M';
|
||||
if (scoreDI > scoreBest)
|
||||
{
|
||||
scoreBest = scoreDI;
|
||||
c = 'D';
|
||||
}
|
||||
if (scoreII > scoreBest)
|
||||
{
|
||||
scoreBest = scoreII;
|
||||
c = 'I';
|
||||
}
|
||||
|
||||
DPI(uPrefixLengthA, uPrefixLengthB) = scoreBest;
|
||||
TBI(uPrefixLengthA, uPrefixLengthB) = c;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#if TRACE
|
||||
Log("DPM:\n");
|
||||
ListDP(DPM_, PA, PB, uPrefixCountA, uPrefixCountB);
|
||||
Log("DPD:\n");
|
||||
ListDP(DPD_, PA, PB, uPrefixCountA, uPrefixCountB);
|
||||
Log("DPI:\n");
|
||||
ListDP(DPI_, PA, PB, uPrefixCountA, uPrefixCountB);
|
||||
Log("TBM:\n");
|
||||
ListTB(TBM_, PA, PB, uPrefixCountA, uPrefixCountB);
|
||||
Log("TBD:\n");
|
||||
ListTB(TBD_, PA, PB, uPrefixCountA, uPrefixCountB);
|
||||
Log("TBI:\n");
|
||||
ListTB(TBI_, PA, PB, uPrefixCountA, uPrefixCountB);
|
||||
#endif
|
||||
|
||||
SCORE Score = TraceBackDimer(DPM_, DPD_, DPI_, TBM_, TBD_, TBI_,
|
||||
uLengthA, uLengthB, Path);
|
||||
|
||||
#if TRACE
|
||||
Log("GlobalAlignDimer score = %.3g\n", Score);
|
||||
#endif
|
||||
|
||||
delete[] DPM_;
|
||||
delete[] DPD_;
|
||||
delete[] DPI_;
|
||||
|
||||
delete[] TBM_;
|
||||
delete[] TBD_;
|
||||
delete[] TBI_;
|
||||
|
||||
return Score;
|
||||
}
|
||||
|
||||
static SCORE TraceBackDimer( const SCORE *DPM_, const SCORE *DPD_, const SCORE *DPI_,
|
||||
const char *TBM_, const char *TBD_, const char *TBI_,
|
||||
unsigned uLengthA, unsigned uLengthB, PWPath &Path)
|
||||
{
|
||||
const unsigned uPrefixCountA = uLengthA + 1;
|
||||
|
||||
unsigned uPrefixLengthA = uLengthA;
|
||||
unsigned uPrefixLengthB = uLengthB;
|
||||
|
||||
char cEdge = 'M';
|
||||
SCORE scoreMax = DPM(uLengthA, uLengthB);
|
||||
if (DPD(uLengthA, uLengthB) > scoreMax)
|
||||
{
|
||||
scoreMax = DPD(uLengthA, uLengthB);
|
||||
cEdge = 'D';
|
||||
}
|
||||
if (DPI(uLengthA, uLengthB) > scoreMax)
|
||||
{
|
||||
scoreMax = DPI(uLengthA, uLengthB);
|
||||
cEdge = 'I';
|
||||
}
|
||||
|
||||
for (;;)
|
||||
{
|
||||
if (0 == uPrefixLengthA && 0 == uPrefixLengthB)
|
||||
break;
|
||||
|
||||
PWEdge Edge;
|
||||
Edge.cType = cEdge;
|
||||
Edge.uPrefixLengthA = uPrefixLengthA;
|
||||
Edge.uPrefixLengthB = uPrefixLengthB;
|
||||
Path.PrependEdge(Edge);
|
||||
|
||||
#if TRACE
|
||||
Log("PLA=%u PLB=%u Edge=%c\n", uPrefixLengthA, uPrefixLengthB, cEdge);
|
||||
#endif
|
||||
switch (cEdge)
|
||||
{
|
||||
case 'M':
|
||||
assert(uPrefixLengthA > 0 && uPrefixLengthB > 0);
|
||||
cEdge = TBM(uPrefixLengthA, uPrefixLengthB);
|
||||
--uPrefixLengthA;
|
||||
--uPrefixLengthB;
|
||||
break;
|
||||
case 'D':
|
||||
assert(uPrefixLengthA > 0);
|
||||
cEdge = TBD(uPrefixLengthA, uPrefixLengthB);
|
||||
--uPrefixLengthA;
|
||||
break;
|
||||
case 'I':
|
||||
assert(uPrefixLengthB > 0);
|
||||
cEdge = TBI(uPrefixLengthA, uPrefixLengthB);
|
||||
--uPrefixLengthB;
|
||||
break;
|
||||
default:
|
||||
Quit("Invalid edge PLA=%u PLB=%u %c", uPrefixLengthA, uPrefixLengthB, cEdge);
|
||||
}
|
||||
}
|
||||
#if TRACE
|
||||
Path.LogMe();
|
||||
#endif
|
||||
return scoreMax;
|
||||
}
|
||||
289
src/muscle/muscle3.8.31/src/globals.cpp
Normal file
289
src/muscle/muscle3.8.31/src/globals.cpp
Normal file
@@ -0,0 +1,289 @@
|
||||
#if WIN32
|
||||
#include <windows.h>
|
||||
#include <share.h>
|
||||
#endif
|
||||
|
||||
#include "muscle.h"
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <stdarg.h>
|
||||
#include <string.h>
|
||||
#include <math.h>
|
||||
#include <assert.h>
|
||||
#include <time.h>
|
||||
#include <errno.h>
|
||||
|
||||
#ifndef MAX_PATH
|
||||
#define MAX_PATH 260
|
||||
#endif
|
||||
|
||||
static char g_strListFileName[MAX_PATH];
|
||||
static bool g_bListFileAppend = false;
|
||||
|
||||
static SEQWEIGHT g_SeqWeight = SEQWEIGHT_Undefined;
|
||||
|
||||
void SetSeqWeightMethod(SEQWEIGHT Method)
|
||||
{
|
||||
g_SeqWeight = Method;
|
||||
}
|
||||
|
||||
SEQWEIGHT GetSeqWeightMethod()
|
||||
{
|
||||
return g_SeqWeight;
|
||||
}
|
||||
|
||||
void SetListFileName(const char *ptrListFileName, bool bAppend)
|
||||
{
|
||||
assert(strlen(ptrListFileName) < MAX_PATH);
|
||||
strcpy(g_strListFileName, ptrListFileName);
|
||||
g_bListFileAppend = bAppend;
|
||||
}
|
||||
|
||||
void Log(const char szFormat[], ...)
|
||||
{
|
||||
if (0 == g_strListFileName[0])
|
||||
return;
|
||||
|
||||
static FILE *f = NULL;
|
||||
const char *mode;
|
||||
if (g_bListFileAppend)
|
||||
mode = "a";
|
||||
else
|
||||
mode = "w";
|
||||
if (NULL == f)
|
||||
f = _fsopen(g_strListFileName, mode, _SH_DENYNO);
|
||||
if (NULL == f)
|
||||
{
|
||||
perror(g_strListFileName);
|
||||
exit(EXIT_NotStarted);
|
||||
}
|
||||
|
||||
char szStr[4096];
|
||||
va_list ArgList;
|
||||
va_start(ArgList, szFormat);
|
||||
vsprintf(szStr, szFormat, ArgList);
|
||||
fprintf(f, "%s", szStr);
|
||||
fflush(f);
|
||||
}
|
||||
|
||||
const char *GetTimeAsStr()
|
||||
{
|
||||
static char szStr[32];
|
||||
time_t t;
|
||||
time(&t);
|
||||
struct tm *ptmCurrentTime = localtime(&t);
|
||||
strcpy(szStr, asctime(ptmCurrentTime));
|
||||
assert('\n' == szStr[24]);
|
||||
szStr[24] = 0;
|
||||
return szStr;
|
||||
}
|
||||
|
||||
// Exit immediately with error message, printf-style.
|
||||
void Quit(const char szFormat[], ...)
|
||||
{
|
||||
va_list ArgList;
|
||||
char szStr[4096];
|
||||
|
||||
va_start(ArgList, szFormat);
|
||||
vsprintf(szStr, szFormat, ArgList);
|
||||
|
||||
fprintf(stderr, "\n*** ERROR *** %s\n", szStr);
|
||||
|
||||
Log("\n*** FATAL ERROR *** ");
|
||||
Log("%s\n", szStr);
|
||||
Log("Stopped %s\n", GetTimeAsStr());
|
||||
|
||||
#ifdef WIN32
|
||||
if (IsDebuggerPresent())
|
||||
{
|
||||
int iBtn = MessageBox(NULL, szStr, "muscle", MB_ICONERROR | MB_OKCANCEL);
|
||||
if (IDCANCEL == iBtn)
|
||||
Break();
|
||||
}
|
||||
#endif
|
||||
exit(EXIT_FatalError);
|
||||
}
|
||||
|
||||
void Warning(const char szFormat[], ...)
|
||||
{
|
||||
va_list ArgList;
|
||||
char szStr[4096];
|
||||
|
||||
va_start(ArgList, szFormat);
|
||||
vsprintf(szStr, szFormat, ArgList);
|
||||
|
||||
fprintf(stderr, "\n*** WARNING *** %s\n", szStr);
|
||||
Log("\n*** WARNING *** %s\n", szStr);
|
||||
}
|
||||
|
||||
// Remove leading and trailing blanks from string
|
||||
void TrimBlanks(char szStr[])
|
||||
{
|
||||
TrimLeadingBlanks(szStr);
|
||||
TrimTrailingBlanks(szStr);
|
||||
}
|
||||
|
||||
void TrimLeadingBlanks(char szStr[])
|
||||
{
|
||||
size_t n = strlen(szStr);
|
||||
while (szStr[0] == ' ')
|
||||
{
|
||||
memmove(szStr, szStr+1, n);
|
||||
szStr[--n] = 0;
|
||||
}
|
||||
}
|
||||
|
||||
void TrimTrailingBlanks(char szStr[])
|
||||
{
|
||||
size_t n = strlen(szStr);
|
||||
while (n > 0 && szStr[n-1] == ' ')
|
||||
szStr[--n] = 0;
|
||||
}
|
||||
|
||||
bool Verbose()
|
||||
{
|
||||
return true;
|
||||
}
|
||||
|
||||
SCORE StrToScore(const char *pszStr)
|
||||
{
|
||||
return (SCORE) atof(pszStr);
|
||||
}
|
||||
|
||||
void StripWhitespace(char szStr[])
|
||||
{
|
||||
unsigned uOutPos = 0;
|
||||
unsigned uInPos = 0;
|
||||
while (char c = szStr[uInPos++])
|
||||
if (' ' != c && '\t' != c && '\n' != c && '\r' != c)
|
||||
szStr[uOutPos++] = c;
|
||||
szStr[uOutPos] = 0;
|
||||
}
|
||||
|
||||
void StripGaps(char szStr[])
|
||||
{
|
||||
unsigned uOutPos = 0;
|
||||
unsigned uInPos = 0;
|
||||
while (char c = szStr[uInPos++])
|
||||
if ('-' != c)
|
||||
szStr[uOutPos++] = c;
|
||||
szStr[uOutPos] = 0;
|
||||
}
|
||||
|
||||
bool IsValidSignedInteger(const char *Str)
|
||||
{
|
||||
if (0 == strlen(Str))
|
||||
return false;
|
||||
if ('+' == *Str || '-' == *Str)
|
||||
++Str;
|
||||
while (char c = *Str++)
|
||||
if (!isdigit(c))
|
||||
return false;
|
||||
return true;
|
||||
}
|
||||
|
||||
bool IsValidInteger(const char *Str)
|
||||
{
|
||||
if (0 == strlen(Str))
|
||||
return false;
|
||||
while (char c = *Str++)
|
||||
if (!isdigit(c))
|
||||
return false;
|
||||
return true;
|
||||
}
|
||||
|
||||
// Is c valid as first character in an identifier?
|
||||
bool isidentf(char c)
|
||||
{
|
||||
return isalpha(c) || '_' == c;
|
||||
}
|
||||
|
||||
// Is c valid character in an identifier?
|
||||
bool isident(char c)
|
||||
{
|
||||
return isalpha(c) || isdigit(c) || '_' == c;
|
||||
}
|
||||
|
||||
bool IsValidIdentifier(const char *Str)
|
||||
{
|
||||
if (!isidentf(Str[0]))
|
||||
return false;
|
||||
while (char c = *Str++)
|
||||
if (!isident(c))
|
||||
return false;
|
||||
return true;
|
||||
}
|
||||
|
||||
void SetLogFile()
|
||||
{
|
||||
const char *strFileName = ValueOpt("loga");
|
||||
if (0 != strFileName)
|
||||
g_bListFileAppend = true;
|
||||
else
|
||||
strFileName = ValueOpt("log");
|
||||
if (0 == strFileName)
|
||||
return;
|
||||
strcpy(g_strListFileName, strFileName);
|
||||
}
|
||||
|
||||
// Get filename, stripping any extension and directory parts.
|
||||
void NameFromPath(const char szPath[], char szName[], unsigned uBytes)
|
||||
{
|
||||
if (0 == uBytes)
|
||||
return;
|
||||
const char *pstrLastSlash = strrchr(szPath, '/');
|
||||
const char *pstrLastBackslash = strrchr(szPath, '\\');
|
||||
const char *pstrLastDot = strrchr(szPath, '.');
|
||||
const char *pstrLastSep = pstrLastSlash > pstrLastBackslash ?
|
||||
pstrLastSlash : pstrLastBackslash;
|
||||
const char *pstrBegin = pstrLastSep ? pstrLastSep + 1 : szPath;
|
||||
const char *pstrEnd = pstrLastDot ? pstrLastDot - 1 : szPath + strlen(szPath);
|
||||
unsigned uNameLength = (unsigned) (pstrEnd - pstrBegin + 1);
|
||||
if (uNameLength > uBytes - 1)
|
||||
uNameLength = uBytes - 1;
|
||||
memcpy(szName, pstrBegin, uNameLength);
|
||||
szName[uNameLength] = 0;
|
||||
}
|
||||
|
||||
char *strsave(const char *s)
|
||||
{
|
||||
char *ptrCopy = strdup(s);
|
||||
if (0 == ptrCopy)
|
||||
Quit("Out of memory");
|
||||
return ptrCopy;
|
||||
}
|
||||
|
||||
bool IsValidFloatChar(char c)
|
||||
{
|
||||
return isdigit(c) || '.' == c || 'e' == c || 'E' == c || 'd' == c ||
|
||||
'D' == c || '.' == c || '+' == c || '-' == c;
|
||||
}
|
||||
|
||||
void Call_MY_ASSERT(const char *file, int line, bool b, const char *msg)
|
||||
{
|
||||
if (b)
|
||||
return;
|
||||
Quit("%s(%d): MY_ASSERT(%s)", file, line, msg);
|
||||
}
|
||||
|
||||
static size_t g_MemTotal;
|
||||
|
||||
void MemPlus(size_t Bytes, char *Where)
|
||||
{
|
||||
g_MemTotal += Bytes;
|
||||
Log("+%10u %6u %6u %s\n",
|
||||
(unsigned) Bytes,
|
||||
(unsigned) GetMemUseMB(),
|
||||
(unsigned) (g_MemTotal/1000000),
|
||||
Where);
|
||||
}
|
||||
|
||||
void MemMinus(size_t Bytes, char *Where)
|
||||
{
|
||||
g_MemTotal -= Bytes;
|
||||
Log("-%10u %6u %6u %s\n",
|
||||
(unsigned) Bytes,
|
||||
(unsigned) GetMemUseMB(),
|
||||
(unsigned) (g_MemTotal/1000000),
|
||||
Where);
|
||||
}
|
||||
163
src/muscle/muscle3.8.31/src/globalslinux.cpp
Normal file
163
src/muscle/muscle3.8.31/src/globalslinux.cpp
Normal file
@@ -0,0 +1,163 @@
|
||||
#include "muscle.h"
|
||||
|
||||
#if defined(__linux__)
|
||||
#include <sys/time.h>
|
||||
#include <sys/resource.h>
|
||||
#include <unistd.h>
|
||||
#include <errno.h>
|
||||
#include <stdio.h>
|
||||
#include <fcntl.h>
|
||||
|
||||
const int ONE_MB = 1000000;
|
||||
const int MEM_WARNING_THRESHOLD = 20*ONE_MB;
|
||||
|
||||
double GetNAN()
|
||||
{
|
||||
static unsigned long nan[2]={0xffffffff, 0x7fffffff};
|
||||
double dNAN = *( double* )nan;
|
||||
return dNAN;
|
||||
}
|
||||
|
||||
double g_dNAN = GetNAN();
|
||||
|
||||
void chkmem(const char szMsg[])
|
||||
{
|
||||
//assert(_CrtCheckMemory());
|
||||
}
|
||||
|
||||
void Break()
|
||||
{
|
||||
//DebugBreak();
|
||||
}
|
||||
|
||||
static char szCmdLine[4096];
|
||||
|
||||
void *ptrStartBreak = sbrk(0);
|
||||
|
||||
const char *GetCmdLine()
|
||||
{
|
||||
return szCmdLine;
|
||||
}
|
||||
|
||||
double GetMemUseMB()
|
||||
{
|
||||
static char statm[64];
|
||||
static int PageSize;
|
||||
if (0 == statm[0])
|
||||
{
|
||||
PageSize = sysconf(_SC_PAGESIZE);
|
||||
pid_t pid = getpid();
|
||||
sprintf(statm, "/proc/%d/statm", (int) pid);
|
||||
}
|
||||
|
||||
int fd = open(statm, O_RDONLY);
|
||||
if (-1 == fd)
|
||||
return -1;
|
||||
char Buffer[64];
|
||||
int n = read(fd, Buffer, sizeof(Buffer) - 1);
|
||||
close(fd);
|
||||
fd = -1;
|
||||
|
||||
if (n <= 0)
|
||||
{
|
||||
static bool Warned = false;
|
||||
if (!Warned)
|
||||
{
|
||||
Warned = true;
|
||||
Warning("*Warning* Cannot read %s errno=%d %s",
|
||||
statm, errno, strerror(errno));
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
Buffer[n] = 0;
|
||||
int Pages = atoi(Buffer);
|
||||
|
||||
return ((double) Pages * (double) PageSize)/1e6;
|
||||
}
|
||||
|
||||
void SaveCmdLine(int argc, char *argv[])
|
||||
{
|
||||
for (int i = 0; i < argc; ++i)
|
||||
{
|
||||
if (i > 0)
|
||||
strcat(szCmdLine, " ");
|
||||
strcat(szCmdLine, argv[i]);
|
||||
}
|
||||
}
|
||||
|
||||
double dPeakMemUseMB = 0;
|
||||
|
||||
double GetPeakMemUseMB()
|
||||
{
|
||||
CheckMemUse();
|
||||
return dPeakMemUseMB;
|
||||
}
|
||||
|
||||
double GetCPUGHz()
|
||||
{
|
||||
double dGHz = 2.5;
|
||||
const char *e = getenv("CPUGHZ");
|
||||
if (0 != e)
|
||||
dGHz = atof(e);
|
||||
return dGHz;
|
||||
}
|
||||
|
||||
void CheckMemUse()
|
||||
{
|
||||
double dMB = GetMemUseMB();
|
||||
if (dMB > dPeakMemUseMB)
|
||||
dPeakMemUseMB = dMB;
|
||||
}
|
||||
|
||||
double GetRAMSizeMB()
|
||||
{
|
||||
const double DEFAULT_RAM = 500;
|
||||
static double RAMMB = 0;
|
||||
if (RAMMB != 0)
|
||||
return RAMMB;
|
||||
|
||||
int fd = open("/proc/meminfo", O_RDONLY);
|
||||
if (-1 == fd)
|
||||
{
|
||||
static bool Warned = false;
|
||||
if (!Warned)
|
||||
{
|
||||
Warned = true;
|
||||
Warning("*Warning* Cannot open /proc/meminfo errno=%d %s",
|
||||
errno, strerror(errno));
|
||||
}
|
||||
return DEFAULT_RAM;
|
||||
}
|
||||
char Buffer[1024];
|
||||
int n = read(fd, Buffer, sizeof(Buffer) - 1);
|
||||
close(fd);
|
||||
fd = -1;
|
||||
|
||||
if (n <= 0)
|
||||
{
|
||||
static bool Warned = false;
|
||||
if (!Warned)
|
||||
{
|
||||
Warned = true;
|
||||
Warning("*Warning* Cannot read /proc/meminfo errno=%d %s",
|
||||
errno, strerror(errno));
|
||||
}
|
||||
return DEFAULT_RAM;
|
||||
}
|
||||
Buffer[n] = 0;
|
||||
char *pMem = strstr(Buffer, "MemTotal: ");
|
||||
if (0 == pMem)
|
||||
{
|
||||
static bool Warned = false;
|
||||
if (!Warned)
|
||||
{
|
||||
Warned = true;
|
||||
Warning("*Warning* 'MemTotal:' not found in /proc/meminfo");
|
||||
}
|
||||
return DEFAULT_RAM;
|
||||
}
|
||||
int Bytes = atoi(pMem+9)*1000;
|
||||
return ((double) Bytes)/1e6;
|
||||
}
|
||||
|
||||
#endif // !WIN32
|
||||
92
src/muscle/muscle3.8.31/src/globalsosx.cpp
Normal file
92
src/muscle/muscle3.8.31/src/globalsosx.cpp
Normal file
@@ -0,0 +1,92 @@
|
||||
#ifdef __MACH__
|
||||
|
||||
#include <memory.h>
|
||||
#include <stdlib.h>
|
||||
#include <stdio.h>
|
||||
#include <unistd.h>
|
||||
#include <sys/types.h>
|
||||
#include <sys/sysctl.h>
|
||||
#include <sys/socket.h>
|
||||
#include <sys/gmon.h>
|
||||
#include <mach/vm_param.h>
|
||||
#include <netinet/in.h>
|
||||
#include <netinet/icmp6.h>
|
||||
#include <sys/vmmeter.h>
|
||||
#include <sys/proc.h>
|
||||
#include <mach/task_info.h>
|
||||
#include <mach/task.h>
|
||||
#include <mach/mach_init.h>
|
||||
#include <mach/vm_statistics.h>
|
||||
|
||||
const double DEFAULT_RAM = 1e9;
|
||||
const double DEFAULT_MEM_USE = 1e6;
|
||||
|
||||
double GetNAN()
|
||||
{
|
||||
static unsigned long nan[2]={0xffffffff, 0x7fffffff};
|
||||
double dNAN = *( double* )nan;
|
||||
return dNAN;
|
||||
}
|
||||
|
||||
double g_dNAN = GetNAN();
|
||||
|
||||
|
||||
double GetRAMSize()
|
||||
{
|
||||
static double CACHED_RAM = 0;
|
||||
if (CACHED_RAM != 0)
|
||||
return CACHED_RAM;
|
||||
|
||||
uint64_t MemPages = 0;
|
||||
size_t Len = sizeof(MemPages);
|
||||
if (sysctlbyname("hw.memsize", &MemPages, &Len, NULL, 0) < 0)
|
||||
return DEFAULT_RAM;
|
||||
return (double) MemPages;
|
||||
}
|
||||
|
||||
double GetRAMSizeMB()
|
||||
{
|
||||
return GetRAMSize()/1e6;
|
||||
}
|
||||
|
||||
static double g_uPeakMemUseBytes;
|
||||
|
||||
double GetMaxMemUseBytes()
|
||||
{
|
||||
return g_uPeakMemUseBytes;
|
||||
}
|
||||
|
||||
double GetPeakMemUseBytes()
|
||||
{
|
||||
return GetMaxMemUseBytes();
|
||||
}
|
||||
|
||||
double GetMemUseBytes()
|
||||
{
|
||||
task_t mytask = mach_task_self();
|
||||
struct task_basic_info ti;
|
||||
memset((void *) &ti, 0, sizeof(ti));
|
||||
mach_msg_type_number_t count = TASK_BASIC_INFO_COUNT;
|
||||
kern_return_t ok = task_info(mytask, TASK_BASIC_INFO, (task_info_t) &ti, &count);
|
||||
if (ok == KERN_INVALID_ARGUMENT)
|
||||
return DEFAULT_MEM_USE;
|
||||
|
||||
if (ok != KERN_SUCCESS)
|
||||
return DEFAULT_MEM_USE;
|
||||
|
||||
double uBytes = (double ) ti.resident_size;
|
||||
if (uBytes > g_uPeakMemUseBytes)
|
||||
g_uPeakMemUseBytes = uBytes;
|
||||
return uBytes;
|
||||
}
|
||||
|
||||
double GetMemUseMB()
|
||||
{
|
||||
return GetMemUseBytes()/1e6;
|
||||
}
|
||||
|
||||
void OSInit()
|
||||
{
|
||||
}
|
||||
|
||||
#endif // __MACH__
|
||||
62
src/muscle/muscle3.8.31/src/globalsother.cpp
Normal file
62
src/muscle/muscle3.8.31/src/globalsother.cpp
Normal file
@@ -0,0 +1,62 @@
|
||||
#include "muscle.h"
|
||||
|
||||
#if !defined(__linux__) && !defined(_MSC_VER) && !defined(__MACH__)
|
||||
|
||||
double GetNAN()
|
||||
{
|
||||
return 0.0;
|
||||
}
|
||||
|
||||
double g_dNAN = GetNAN();
|
||||
|
||||
void chkmem(const char szMsg[])
|
||||
{
|
||||
}
|
||||
|
||||
void Break()
|
||||
{
|
||||
}
|
||||
|
||||
char szCmdLine[4096];
|
||||
|
||||
const char *GetCmdLine()
|
||||
{
|
||||
return "muscle";
|
||||
}
|
||||
|
||||
double GetMemUseMB()
|
||||
{
|
||||
return 100.0;
|
||||
}
|
||||
|
||||
void SaveCmdLine(int argc, char *argv[])
|
||||
{
|
||||
for (int i = 0; i < argc; ++i)
|
||||
{
|
||||
if (i > 0)
|
||||
strcat(szCmdLine, " ");
|
||||
strcat(szCmdLine, argv[i]);
|
||||
}
|
||||
}
|
||||
|
||||
double GetPeakMemUseMB()
|
||||
{
|
||||
return 100.0;
|
||||
}
|
||||
|
||||
double GetCPUGHz()
|
||||
{
|
||||
return 2.0;
|
||||
}
|
||||
|
||||
void CheckMemUse()
|
||||
{
|
||||
}
|
||||
|
||||
double GetRAMSizeMB()
|
||||
{
|
||||
return 500.0;
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
100
src/muscle/muscle3.8.31/src/globalswin32.cpp
Normal file
100
src/muscle/muscle3.8.31/src/globalswin32.cpp
Normal file
@@ -0,0 +1,100 @@
|
||||
#include "muscle.h"
|
||||
|
||||
#if WIN32
|
||||
#include <windows.h>
|
||||
#include <crtdbg.h>
|
||||
#include <psapi.h>
|
||||
#include <float.h>
|
||||
#include <stdio.h>
|
||||
|
||||
void DebugPrintf(const char *szFormat, ...)
|
||||
{
|
||||
va_list ArgList;
|
||||
char szStr[4096];
|
||||
|
||||
va_start(ArgList, szFormat);
|
||||
vsprintf(szStr, szFormat, ArgList);
|
||||
|
||||
OutputDebugString(szStr);
|
||||
}
|
||||
|
||||
double GetNAN()
|
||||
{
|
||||
static unsigned long nan[2]={0xffffffff, 0x7fffffff};
|
||||
double dNAN = *( double* )nan;
|
||||
assert(_isnan(dNAN));
|
||||
return dNAN;
|
||||
}
|
||||
|
||||
double g_dNAN = GetNAN();
|
||||
|
||||
void chkmem(const char szMsg[])
|
||||
{
|
||||
if (!_CrtCheckMemory())
|
||||
Quit("chkmem(%s)", szMsg);
|
||||
}
|
||||
|
||||
void Break()
|
||||
{
|
||||
if (IsDebuggerPresent())
|
||||
DebugBreak();
|
||||
}
|
||||
|
||||
const char *GetCmdLine()
|
||||
{
|
||||
return GetCommandLine();
|
||||
}
|
||||
|
||||
static unsigned uPeakMemUseBytes;
|
||||
|
||||
double GetRAMSizeMB()
|
||||
{
|
||||
MEMORYSTATUS MS;
|
||||
GlobalMemoryStatus(&MS);
|
||||
return MS.dwAvailPhys/1e6;
|
||||
}
|
||||
|
||||
double GetMemUseMB()
|
||||
{
|
||||
HANDLE hProc = GetCurrentProcess();
|
||||
PROCESS_MEMORY_COUNTERS PMC;
|
||||
BOOL bOk = GetProcessMemoryInfo(hProc, &PMC, sizeof(PMC));
|
||||
assert(bOk);
|
||||
//printf("GetMemUseMB()\n");
|
||||
//printf("%12u PageFaultCount\n", (unsigned) PMC.PageFaultCount);
|
||||
//printf("%12u PagefileUsage\n", (unsigned) PMC.PagefileUsage);
|
||||
//printf("%12u PeakPagefileUsage\n", (unsigned) PMC.PeakPagefileUsage);
|
||||
//printf("%12u WorkingSetSize\n", (unsigned) PMC.WorkingSetSize);
|
||||
//printf("%12u PeakWorkingSetSize\n", (unsigned) PMC.PeakWorkingSetSize);
|
||||
//printf("%12u QuotaPagedPoolUsage\n", (unsigned) PMC.QuotaPagedPoolUsage);
|
||||
//printf("%12u QuotaPeakPagedPoolUsage\n", (unsigned) PMC.QuotaPeakPagedPoolUsage);
|
||||
//printf("%12u QuotaNonPagedPoolUsage\n", (unsigned) PMC.QuotaNonPagedPoolUsage);
|
||||
//printf("%12u QuotaPeakNonPagedPoolUsage\n", (unsigned) PMC.QuotaPeakNonPagedPoolUsage);
|
||||
unsigned uBytes = (unsigned) PMC.WorkingSetSize;
|
||||
if (uBytes > uPeakMemUseBytes)
|
||||
uPeakMemUseBytes = uBytes;
|
||||
return (uBytes + 500000.0)/1000000.0;
|
||||
}
|
||||
|
||||
double GetPeakMemUseMB()
|
||||
{
|
||||
return (uPeakMemUseBytes + 500000.0)/1000000.0;
|
||||
}
|
||||
|
||||
void CheckMemUse()
|
||||
{
|
||||
// Side-effect: sets peak usage in uPeakMemUseBytes
|
||||
GetMemUseMB();
|
||||
}
|
||||
|
||||
double GetCPUGHz()
|
||||
{
|
||||
double dGHz = 2.5;
|
||||
const char *e = getenv("CPUGHZ");
|
||||
if (0 != e)
|
||||
dGHz = atof(e);
|
||||
if (dGHz < 0.1 || dGHz > 1000.0)
|
||||
Quit("Invalid value '%s' for environment variable CPUGHZ", e);
|
||||
return dGHz;
|
||||
}
|
||||
#endif // WIN32
|
||||
499
src/muscle/muscle3.8.31/src/gonnet.cpp
Normal file
499
src/muscle/muscle3.8.31/src/gonnet.cpp
Normal file
@@ -0,0 +1,499 @@
|
||||
#include "muscle.h"
|
||||
#include "gonnet.h"
|
||||
|
||||
#define ROW(A, C, D, E, F, G, H, I, K, L, M, N, P, Q, R, S, T, V, W, Y) \
|
||||
{ A/4.0, C/4.0, D/4.0, E/4.0, F/4.0, G/4.0, H/4.0, I/4.0, K/4.0, L/4.0, M/4.0, N/4.0, P/4.0, Q/4.0, R/4.0, S/4.0, T/4.0, V/4.0, W/4.0, Y/4.0 },
|
||||
|
||||
static double Gonnet80[20][20] =
|
||||
{
|
||||
// A C D E F G H I K L
|
||||
// M N P Q R S T V W Y
|
||||
ROW( 1990, 1140, 930, 1070, 600, 1130, 850, 810, 940, 810,
|
||||
980, 900, 1080, 1020, 880, 1380, 1190, 1180, 370, 590) // A
|
||||
|
||||
ROW( 1140, 2780, 310, 300, 850, 630, 810, 700, 360, 690,
|
||||
850, 690, 310, 480, 640, 1090, 900, 1030, 810, 920) // C
|
||||
|
||||
ROW( 930, 310, 2200, 1550, 130, 980, 1070, 180, 1030, 150,
|
||||
360, 1450, 820, 1150, 800, 1100, 1000, 350, 0, 550) // D
|
||||
|
||||
ROW( 1070, 300, 1550, 2120, 220, 770, 1070, 510, 1280, 490,
|
||||
710, 1110, 890, 1470, 1010, 1050, 970, 730, 260, 500) // E
|
||||
|
||||
ROW( 600, 850, 130, 220, 2380, 90, 980, 1090, 350, 1310,
|
||||
1270, 490, 310, 540, 340, 470, 620, 930, 1400, 1730) // F
|
||||
|
||||
ROW( 1130, 630, 980, 770, 90, 2210, 710, 100, 740, 200,
|
||||
410, 1060, 660, 800, 810, 1080, 720, 380, 430, 300) // G
|
||||
|
||||
ROW( 850, 810, 1070, 1070, 980, 710, 2510, 600, 1120, 670,
|
||||
860, 1330, 790, 1380, 1140, 990, 1000, 590, 810, 1450) // H
|
||||
|
||||
ROW( 810, 700, 180, 510, 1090, 100, 600, 2100, 650, 1460,
|
||||
1490, 530, 490, 640, 530, 620, 960, 1650, 610, 770) // I
|
||||
|
||||
ROW( 940, 360, 1030, 1280, 350, 740, 1120, 650, 2090, 660,
|
||||
870, 1220, 870, 1410, 1570, 1040, 1090, 700, 350, 640) // K
|
||||
|
||||
ROW( 810, 690, 150, 490, 1310, 200, 670, 1460, 660, 2010,
|
||||
1550, 450, 660, 850, 660, 600, 750, 1270, 800, 890) // L
|
||||
|
||||
ROW( 980, 850, 360, 710, 1270, 410, 860, 1490, 870, 1550,
|
||||
2410, 620, 460, 1050, 710, 830, 990, 1250, 790, 870) // M
|
||||
|
||||
ROW( 900, 690, 1450, 1110, 490, 1060, 1330, 530, 1220, 450,
|
||||
620, 2210, 760, 1180, 1020, 1290, 1170, 550, 380, 850) // N
|
||||
|
||||
ROW( 1080, 310, 820, 890, 310, 660, 790, 490, 870, 660,
|
||||
460, 760, 2380, 1000, 790, 1100, 1040, 670, 120, 480) // P
|
||||
|
||||
ROW( 1020, 480, 1150, 1470, 540, 800, 1380, 640, 1410, 850,
|
||||
1050, 1180, 1000, 2190, 1350, 1090, 1060, 730, 620, 710) // Q
|
||||
|
||||
ROW( 880, 640, 800, 1010, 340, 810, 1140, 530, 1570, 660,
|
||||
710, 1020, 790, 1350, 2210, 970, 970, 640, 830, 740) // R
|
||||
|
||||
ROW( 1380, 1090, 1100, 1050, 470, 1080, 990, 620, 1040, 600,
|
||||
830, 1290, 1100, 1090, 970, 2020, 1490, 810, 520, 780) // S
|
||||
|
||||
ROW( 1190, 900, 1000, 970, 620, 720, 1000, 960, 1090, 750,
|
||||
990, 1170, 1040, 1060, 970, 1490, 2050, 1150, 370, 660) // T
|
||||
|
||||
ROW( 1180, 1030, 350, 730, 930, 380, 590, 1650, 700, 1270,
|
||||
1250, 550, 670, 730, 640, 810, 1150, 2040, 440, 770) // V
|
||||
|
||||
ROW( 370, 810, 0, 260, 1400, 430, 810, 610, 350, 800,
|
||||
790, 380, 120, 620, 830, 520, 370, 440, 2970, 1470) // W
|
||||
|
||||
ROW( 590, 920, 550, 500, 1730, 300, 1450, 770, 640, 890,
|
||||
870, 850, 480, 710, 740, 780, 660, 770, 1470, 2470) // Y
|
||||
};
|
||||
|
||||
static double Gonnet120[20][20] =
|
||||
{
|
||||
// A C D E F G H I K L
|
||||
// M N P Q R S T V W Y
|
||||
ROW( 1550, 950, 780, 870, 480, 930, 700, 690, 770, 660,
|
||||
790, 760, 900, 840, 730, 1120, 980, 960, 280, 480) // A
|
||||
|
||||
ROW( 950, 2400, 270, 280, 700, 510, 650, 600, 320, 570,
|
||||
700, 550, 280, 400, 510, 890, 750, 850, 670, 760) // C
|
||||
|
||||
ROW( 780, 270, 1780, 1310, 90, 820, 890, 160, 880, 140,
|
||||
320, 1220, 680, 970, 690, 910, 830, 310, 0, 430) // D
|
||||
|
||||
ROW( 870, 280, 1310, 1680, 180, 650, 900, 410, 1070, 390,
|
||||
560, 950, 740, 1210, 860, 870, 810, 580, 180, 400) // E
|
||||
|
||||
ROW( 480, 700, 90, 180, 1980, 40, 820, 930, 290, 1110,
|
||||
1070, 380, 240, 430, 280, 380, 490, 790, 1230, 1510) // F
|
||||
|
||||
ROW( 930, 510, 820, 650, 40, 1860, 590, 90, 620, 140,
|
||||
310, 890, 550, 660, 660, 900, 610, 310, 300, 220) // G
|
||||
|
||||
ROW( 700, 650, 890, 900, 820, 590, 2060, 480, 940, 540,
|
||||
680, 1100, 650, 1130, 950, 820, 820, 490, 680, 1220) // H
|
||||
|
||||
ROW( 690, 600, 160, 410, 930, 90, 480, 1680, 520, 1240,
|
||||
1250, 410, 400, 530, 430, 520, 790, 1380, 500, 650) // I
|
||||
|
||||
ROW( 770, 320, 880, 1070, 290, 620, 940, 520, 1650, 520,
|
||||
690, 1010, 720, 1160, 1320, 860, 900, 570, 280, 520) // K
|
||||
|
||||
ROW( 660, 570, 140, 390, 1110, 140, 540, 1240, 520, 1620,
|
||||
1300, 350, 520, 660, 520, 490, 620, 1090, 670, 760) // L
|
||||
|
||||
ROW( 790, 700, 320, 560, 1070, 310, 680, 1250, 690, 1300,
|
||||
1910, 500, 400, 820, 580, 670, 800, 1060, 650, 740) // M
|
||||
|
||||
ROW( 760, 550, 1220, 950, 380, 890, 1100, 410, 1010, 350,
|
||||
500, 1760, 640, 970, 860, 1060, 960, 460, 280, 680) // N
|
||||
|
||||
ROW( 900, 280, 680, 740, 240, 550, 650, 400, 720, 520,
|
||||
400, 640, 2010, 820, 660, 910, 860, 540, 70, 370) // P
|
||||
|
||||
ROW( 840, 400, 970, 1210, 430, 660, 1130, 530, 1160, 660,
|
||||
820, 970, 820, 1700, 1120, 890, 870, 600, 470, 580) // Q
|
||||
|
||||
ROW( 730, 510, 690, 860, 280, 660, 950, 430, 1320, 520,
|
||||
580, 860, 660, 1120, 1790, 810, 800, 520, 660, 590) // R
|
||||
|
||||
ROW( 1120, 890, 910, 870, 380, 900, 820, 520, 860, 490,
|
||||
670, 1060, 910, 890, 810, 1560, 1220, 680, 390, 610) // S
|
||||
|
||||
ROW( 980, 750, 830, 810, 490, 610, 820, 790, 900, 620,
|
||||
800, 960, 860, 870, 800, 1220, 1600, 930, 290, 540) // T
|
||||
|
||||
ROW( 960, 850, 310, 580, 790, 310, 490, 1380, 570, 1090,
|
||||
1060, 460, 540, 600, 520, 680, 930, 1610, 370, 630) // V
|
||||
|
||||
ROW( 280, 670, 0, 180, 1230, 300, 680, 500, 280, 670,
|
||||
650, 280, 70, 470, 660, 390, 290, 370, 2620, 1290) // W
|
||||
|
||||
ROW( 480, 760, 430, 400, 1510, 220, 1220, 650, 520, 760,
|
||||
740, 680, 370, 580, 590, 610, 540, 630, 1290, 2070) // Y
|
||||
};
|
||||
|
||||
static SCORE Gonnet160[20][20] =
|
||||
{
|
||||
// A C D E F G H I K L
|
||||
// M N P Q R S T V W Y
|
||||
ROW( 1240, 810, 670, 740, 400, 800, 600, 600, 660, 560,
|
||||
660, 660, 770, 710, 620, 940, 830, 790, 230, 410) // A
|
||||
|
||||
ROW( 810, 2130, 250, 260, 600, 440, 550, 530, 300, 490,
|
||||
590, 470, 260, 360, 430, 760, 640, 720, 570, 650) // C
|
||||
|
||||
ROW( 670, 250, 1480, 1120, 80, 710, 770, 160, 770, 130,
|
||||
280, 1040, 590, 840, 620, 780, 720, 290, 0, 360) // D
|
||||
|
||||
ROW( 740, 260, 1120, 1370, 160, 570, 770, 350, 910, 330,
|
||||
470, 830, 640, 1010, 750, 750, 700, 480, 140, 340) // E
|
||||
|
||||
ROW( 400, 600, 80, 160, 1690, 20, 710, 810, 250, 970,
|
||||
920, 310, 200, 370, 250, 330, 420, 700, 1100, 1340) // F
|
||||
|
||||
ROW( 800, 440, 710, 570, 20, 1600, 510, 80, 540, 110,
|
||||
260, 760, 480, 570, 570, 770, 540, 260, 230, 180) // G
|
||||
|
||||
ROW( 600, 550, 770, 770, 710, 510, 1710, 410, 800, 460,
|
||||
570, 930, 560, 950, 810, 700, 700, 430, 590, 1050) // H
|
||||
|
||||
ROW( 600, 530, 160, 350, 810, 80, 410, 1370, 430, 1080,
|
||||
1070, 340, 350, 460, 370, 450, 660, 1180, 440, 580) // I
|
||||
|
||||
ROW( 660, 300, 770, 910, 250, 540, 800, 430, 1330, 440,
|
||||
570, 860, 620, 980, 1130, 740, 760, 480, 240, 430) // K
|
||||
|
||||
ROW( 560, 490, 130, 330, 970, 110, 460, 1080, 440, 1350,
|
||||
1120, 300, 430, 540, 430, 420, 540, 950, 580, 670) // L
|
||||
|
||||
ROW( 660, 590, 280, 470, 920, 260, 570, 1070, 570, 1120,
|
||||
1540, 420, 360, 660, 490, 550, 670, 920, 560, 650) // M
|
||||
|
||||
ROW( 660, 470, 1040, 830, 310, 760, 930, 340, 860, 300,
|
||||
420, 1430, 560, 830, 740, 890, 810, 400, 230, 560) // N
|
||||
|
||||
ROW( 770, 260, 590, 640, 200, 480, 560, 350, 620, 430,
|
||||
360, 560, 1740, 700, 570, 780, 740, 460, 40, 300) // P
|
||||
|
||||
ROW( 710, 360, 840, 1010, 370, 570, 950, 460, 980, 540,
|
||||
660, 830, 700, 1340, 950, 760, 740, 510, 380, 490) // Q
|
||||
|
||||
ROW( 620, 430, 620, 750, 250, 570, 810, 370, 1130, 430,
|
||||
490, 740, 570, 950, 1490, 690, 690, 440, 540, 490) // R
|
||||
|
||||
ROW( 940, 760, 780, 750, 330, 770, 700, 450, 740, 420,
|
||||
550, 890, 780, 760, 690, 1220, 1010, 580, 310, 500) // S
|
||||
|
||||
ROW( 830, 640, 720, 700, 420, 540, 700, 660, 760, 540,
|
||||
670, 810, 740, 740, 690, 1010, 1280, 780, 240, 460) // T
|
||||
|
||||
ROW( 790, 720, 290, 480, 700, 260, 430, 1180, 480, 950,
|
||||
920, 400, 460, 510, 440, 580, 780, 1310, 330, 540) // V
|
||||
|
||||
ROW( 230, 570, 0, 140, 1100, 230, 590, 440, 240, 580,
|
||||
560, 230, 40, 380, 540, 310, 240, 330, 2360, 1160) // W
|
||||
|
||||
ROW( 410, 650, 360, 340, 1340, 180, 1050, 580, 430, 670,
|
||||
650, 560, 300, 490, 490, 500, 460, 540, 1160, 1780) // Y
|
||||
};
|
||||
|
||||
double Gonnet16[21][21] =
|
||||
{
|
||||
// A C D E F G H I K L
|
||||
// M N P Q R S T V W Y
|
||||
ROW( 124, 81, 67, 74, 40, 80, 60, 60, 66, 56,
|
||||
66, 66, 77, 71, 62, 94, 83, 79, 23, 41) // A
|
||||
|
||||
ROW( 81, 213, 25, 26, 60, 44, 55, 53, 30, 49,
|
||||
59, 47, 26, 36, 43, 76, 64, 72, 57, 65) // C
|
||||
|
||||
ROW( 67, 25, 148, 112, 8, 71, 77, 16, 77, 13,
|
||||
28, 104, 59, 84, 62, 78, 72, 29, 0, 36) // D
|
||||
|
||||
ROW( 74, 26, 112, 137, 16, 57, 77, 35, 91, 33,
|
||||
47, 83, 64, 101, 75, 75, 70, 48, 14, 34) // E
|
||||
|
||||
ROW( 40, 60, 8, 16, 169, 2, 71, 81, 25, 97,
|
||||
92, 31, 20, 37, 25, 33, 42, 70, 110, 134) // F
|
||||
|
||||
ROW( 80, 44, 71, 57, 2, 160, 51, 8, 54, 11,
|
||||
26, 76, 48, 57, 57, 77, 54, 26, 23, 18) // G
|
||||
|
||||
ROW( 60, 55, 77, 77, 71, 51, 171, 41, 80, 46,
|
||||
57, 93, 56, 95, 81, 70, 70, 43, 59, 105) // H
|
||||
|
||||
ROW( 60, 53, 16, 35, 81, 8, 41, 137, 43, 108,
|
||||
107, 34, 35, 46, 37, 45, 66, 118, 44, 58) // I
|
||||
|
||||
ROW( 66, 30, 77, 91, 25, 54, 80, 43, 133, 44,
|
||||
57, 86, 62, 98, 113, 74, 76, 48, 24, 43) // K
|
||||
|
||||
ROW( 56, 49, 13, 33, 97, 11, 46, 108, 44, 135,
|
||||
112, 30, 43, 54, 43, 42, 54, 95, 58, 67) // L
|
||||
|
||||
ROW( 66, 59, 28, 47, 92, 26, 57, 107, 57, 112,
|
||||
154, 42, 36, 66, 49, 55, 67, 92, 56, 65) // M
|
||||
|
||||
ROW( 66, 47, 104, 83, 31, 76, 93, 34, 86, 30,
|
||||
42, 143, 56, 83, 74, 89, 81, 40, 23, 56) // N
|
||||
|
||||
ROW( 77, 26, 59, 64, 20, 48, 56, 35, 62, 43,
|
||||
36, 56, 174, 70, 57, 78, 74, 46, 4, 30) // P
|
||||
|
||||
ROW( 71, 36, 84, 101, 37, 57, 95, 46, 98, 54,
|
||||
66, 83, 70, 134, 95, 76, 74, 51, 38, 49) // Q
|
||||
|
||||
ROW( 62, 43, 62, 75, 25, 57, 81, 37, 113, 43,
|
||||
49, 74, 57, 95, 149, 69, 69, 44, 54, 49) // R
|
||||
|
||||
ROW( 94, 76, 78, 75, 33, 77, 70, 45, 74, 42,
|
||||
55, 89, 78, 76, 69, 122, 101, 58, 31, 50) // S
|
||||
|
||||
ROW( 83, 64, 72, 70, 42, 54, 70, 66, 76, 54,
|
||||
67, 81, 74, 74, 69, 101, 128, 78, 24, 46) // T
|
||||
|
||||
ROW( 79, 72, 29, 48, 70, 26, 43, 118, 48, 95,
|
||||
92, 40, 46, 51, 44, 58, 78, 131, 33, 54) // V
|
||||
|
||||
ROW( 23, 57, 0, 14, 110, 23, 59, 44, 24, 58,
|
||||
56, 23, 4, 38, 54, 31, 24, 33, 236, 116) // W
|
||||
|
||||
ROW( 41, 65, 36, 34, 134, 18, 105, 58, 43, 67,
|
||||
65, 56, 30, 49, 49, 50, 46, 54, 116, 178) // Y
|
||||
};
|
||||
|
||||
static double Gonnet250[20][20] =
|
||||
{
|
||||
// A C D E F G H I K L
|
||||
// M N P Q R S T V W Y
|
||||
ROW( 760, 570, 490, 520, 290, 570, 440, 440, 480, 400,
|
||||
450, 490, 550, 500, 460, 630, 580, 530, 160, 300) // A
|
||||
|
||||
ROW( 570, 1670, 200, 220, 440, 320, 390, 410, 240, 370,
|
||||
430, 340, 210, 280, 300, 530, 470, 520, 420, 470) // C
|
||||
|
||||
ROW( 490, 200, 990, 790, 70, 530, 560, 140, 570, 120,
|
||||
220, 740, 450, 610, 490, 570, 520, 230, 0, 240) // D
|
||||
|
||||
ROW( 520, 220, 790, 880, 130, 440, 560, 250, 640, 240,
|
||||
320, 610, 470, 690, 560, 540, 510, 330, 90, 250) // E
|
||||
|
||||
ROW( 290, 440, 70, 130, 1220, 0, 510, 620, 190, 720,
|
||||
680, 210, 140, 260, 200, 240, 300, 530, 880, 1030) // F
|
||||
|
||||
ROW( 570, 320, 530, 440, 0, 1180, 380, 70, 410, 80,
|
||||
170, 560, 360, 420, 420, 560, 410, 190, 120, 120) // G
|
||||
|
||||
ROW( 440, 390, 560, 560, 510, 380, 1120, 300, 580, 330,
|
||||
390, 640, 410, 640, 580, 500, 490, 320, 440, 740) // H
|
||||
|
||||
ROW( 440, 410, 140, 250, 620, 70, 300, 920, 310, 800,
|
||||
770, 240, 260, 330, 280, 340, 460, 830, 340, 450) // I
|
||||
|
||||
ROW( 480, 240, 570, 640, 190, 410, 580, 310, 840, 310,
|
||||
380, 600, 460, 670, 790, 530, 530, 350, 170, 310) // K
|
||||
|
||||
ROW( 400, 370, 120, 240, 720, 80, 330, 800, 310, 920,
|
||||
800, 220, 290, 360, 300, 310, 390, 700, 450, 520) // L
|
||||
|
||||
ROW( 450, 430, 220, 320, 680, 170, 390, 770, 380, 800,
|
||||
950, 300, 280, 420, 350, 380, 460, 680, 420, 500) // M
|
||||
|
||||
ROW( 490, 340, 740, 610, 210, 560, 640, 240, 600, 220,
|
||||
300, 900, 430, 590, 550, 610, 570, 300, 160, 380) // N
|
||||
|
||||
ROW( 550, 210, 450, 470, 140, 360, 410, 260, 460, 290,
|
||||
280, 430, 1280, 500, 430, 560, 530, 340, 20, 210) // P
|
||||
|
||||
ROW( 500, 280, 610, 690, 260, 420, 640, 330, 670, 360,
|
||||
420, 590, 500, 790, 670, 540, 520, 370, 250, 350) // Q
|
||||
|
||||
ROW( 460, 300, 490, 560, 200, 420, 580, 280, 790, 300,
|
||||
350, 550, 430, 670, 990, 500, 500, 320, 360, 340) // R
|
||||
|
||||
ROW( 630, 530, 570, 540, 240, 560, 500, 340, 530, 310,
|
||||
380, 610, 560, 540, 500, 740, 670, 420, 190, 330) // S
|
||||
|
||||
ROW( 580, 470, 520, 510, 300, 410, 490, 460, 530, 390,
|
||||
460, 570, 530, 520, 500, 670, 770, 520, 170, 330) // T
|
||||
|
||||
ROW( 530, 520, 230, 330, 530, 190, 320, 830, 350, 700,
|
||||
680, 300, 340, 370, 320, 420, 520, 860, 260, 410) // V
|
||||
|
||||
ROW( 160, 420, 0, 90, 880, 120, 440, 340, 170, 450,
|
||||
420, 160, 20, 250, 360, 190, 170, 260, 1940, 930) // W
|
||||
|
||||
ROW( 300, 470, 240, 250, 1030, 120, 740, 450, 310, 520,
|
||||
500, 380, 210, 350, 340, 330, 330, 410, 930, 1300) // Y
|
||||
};
|
||||
|
||||
static double Gonnet350[20][20] =
|
||||
{
|
||||
// A C D E F G H I K L
|
||||
// M N P Q R S T V W Y
|
||||
ROW( 450, 390, 350, 360, 210, 400, 310, 310, 340, 280,
|
||||
310, 350, 380, 350, 330, 410, 390, 350, 110, 210) // A
|
||||
|
||||
ROW( 390, 1280, 160, 180, 320, 230, 270, 300, 190, 280,
|
||||
310, 240, 170, 210, 220, 360, 330, 370, 310, 340) // C
|
||||
|
||||
ROW( 350, 160, 640, 540, 50, 390, 400, 110, 410, 100,
|
||||
160, 500, 330, 430, 370, 400, 370, 170, 0, 170) // D
|
||||
|
||||
ROW( 360, 180, 540, 550, 100, 330, 390, 180, 440, 170,
|
||||
220, 440, 350, 460, 410, 380, 360, 230, 60, 180) // E
|
||||
|
||||
ROW( 210, 320, 50, 100, 860, 0, 360, 460, 140, 530,
|
||||
490, 150, 100, 190, 150, 170, 220, 400, 700, 770) // F
|
||||
|
||||
ROW( 400, 230, 390, 330, 0, 860, 280, 60, 310, 50,
|
||||
120, 400, 280, 310, 310, 400, 300, 140, 50, 80) // G
|
||||
|
||||
ROW( 310, 270, 400, 390, 360, 280, 680, 220, 400, 240,
|
||||
270, 430, 300, 420, 410, 350, 340, 240, 320, 500) // H
|
||||
|
||||
ROW( 310, 300, 110, 180, 460, 60, 220, 620, 220, 570,
|
||||
540, 170, 190, 240, 200, 240, 320, 570, 260, 340) // I
|
||||
|
||||
ROW( 340, 190, 410, 440, 140, 310, 400, 220, 530, 210,
|
||||
260, 420, 330, 450, 530, 370, 370, 250, 120, 210) // K
|
||||
|
||||
ROW( 280, 280, 100, 170, 530, 50, 240, 570, 210, 630,
|
||||
560, 160, 200, 240, 210, 220, 280, 510, 340, 400) // L
|
||||
|
||||
ROW( 310, 310, 160, 220, 490, 120, 270, 540, 260, 560,
|
||||
580, 210, 210, 280, 240, 260, 310, 490, 320, 370) // M
|
||||
|
||||
ROW( 350, 240, 500, 440, 150, 400, 430, 170, 420, 160,
|
||||
210, 550, 320, 410, 390, 410, 390, 220, 110, 250) // N
|
||||
|
||||
ROW( 380, 170, 330, 350, 100, 280, 300, 190, 330, 200,
|
||||
210, 320, 910, 350, 310, 390, 370, 240, 10, 150) // P
|
||||
|
||||
ROW( 350, 210, 430, 460, 190, 310, 420, 240, 450, 240,
|
||||
280, 410, 350, 470, 450, 370, 360, 260, 160, 240) // Q
|
||||
|
||||
ROW( 330, 220, 370, 410, 150, 310, 410, 200, 530, 210,
|
||||
240, 390, 310, 450, 630, 360, 350, 230, 230, 230) // R
|
||||
|
||||
ROW( 410, 360, 400, 380, 170, 400, 350, 240, 370, 220,
|
||||
260, 410, 390, 370, 360, 450, 430, 290, 130, 230) // S
|
||||
|
||||
ROW( 390, 330, 370, 360, 220, 300, 340, 320, 370, 280,
|
||||
310, 390, 370, 360, 350, 430, 460, 350, 120, 230) // T
|
||||
|
||||
ROW( 350, 370, 170, 230, 400, 140, 240, 570, 250, 510,
|
||||
490, 220, 240, 260, 230, 290, 350, 560, 210, 310) // V
|
||||
|
||||
ROW( 110, 310, 0, 60, 700, 50, 320, 260, 120, 340,
|
||||
320, 110, 10, 160, 230, 130, 120, 210, 1590, 740) // W
|
||||
|
||||
ROW( 210, 340, 170, 180, 770, 80, 500, 340, 210, 400,
|
||||
370, 250, 150, 240, 230, 230, 230, 310, 740, 920) // Y
|
||||
};
|
||||
|
||||
const t_ROW *GetGonnetMatrix(unsigned N)
|
||||
{
|
||||
switch (N)
|
||||
{
|
||||
case 80:
|
||||
return Gonnet80;
|
||||
case 120:
|
||||
return Gonnet120;
|
||||
//case 16:
|
||||
// return Gonnet16;
|
||||
//case 160:
|
||||
// return Gonnet160;
|
||||
case 250:
|
||||
return Gonnet250;
|
||||
case 350:
|
||||
return Gonnet350;
|
||||
}
|
||||
Quit("Invalid Gonnet%u", N);
|
||||
return 0;
|
||||
}
|
||||
|
||||
//SCORE GetGonnetGapOpen(unsigned N)
|
||||
// {
|
||||
// switch (N)
|
||||
// {
|
||||
// case 80:
|
||||
// return -639;
|
||||
// case 120:
|
||||
// return -863;
|
||||
// case 160:
|
||||
// return -611;
|
||||
// case 250:
|
||||
// return -308;
|
||||
// case 350:
|
||||
// return -158;
|
||||
// }
|
||||
// Quit("Invalid Gonnet%u", N);
|
||||
// return 0;
|
||||
// }
|
||||
|
||||
SCORE GetGonnetGapOpen(unsigned N)
|
||||
{
|
||||
switch (N)
|
||||
{
|
||||
case 80:
|
||||
return -1000;
|
||||
case 120:
|
||||
return -800;
|
||||
case 160:
|
||||
return -700;
|
||||
case 250:
|
||||
return -200;
|
||||
case 350:
|
||||
return -175;
|
||||
}
|
||||
Quit("Invalid Gonnet%u", N);
|
||||
return 0;
|
||||
}
|
||||
|
||||
SCORE GetGonnetGapExtend(unsigned N)
|
||||
{
|
||||
switch (N)
|
||||
{
|
||||
case 80:
|
||||
return 350;
|
||||
case 120:
|
||||
return 200;
|
||||
case 160:
|
||||
return 175;
|
||||
case 250:
|
||||
return 20;
|
||||
case 350:
|
||||
return 20;
|
||||
}
|
||||
Quit("Invalid Gonnet%u", N);
|
||||
return 0;
|
||||
}
|
||||
|
||||
//double GonnetLookup[400][400];
|
||||
//
|
||||
//static bool InitGonnetLookup()
|
||||
// {
|
||||
// for (unsigned i = 0; i < 400; ++i)
|
||||
// {
|
||||
// const unsigned A1 = i/20;
|
||||
// const unsigned A2 = i%20;
|
||||
// for (unsigned j = 0; j <= i; ++j)
|
||||
// {
|
||||
// const unsigned B1 = j/20;
|
||||
// const unsigned B2 = j%20;
|
||||
//
|
||||
// const double s00 = Gonnet16[A1][B1];
|
||||
// const double s01 = Gonnet16[A1][B2];
|
||||
// const double s10 = Gonnet16[A2][B1];
|
||||
// const double s11 = Gonnet16[A2][B2];
|
||||
//
|
||||
// GonnetLookup[i][j] = GonnetLookup[j][i] = (s00 + s01 + s10 + s11)/4;
|
||||
// }
|
||||
// }
|
||||
// return true;
|
||||
// }
|
||||
//
|
||||
//static bool bGonnetLookupInitialized = InitGonnetLookup();
|
||||
12
src/muscle/muscle3.8.31/src/gonnet.h
Normal file
12
src/muscle/muscle3.8.31/src/gonnet.h
Normal file
@@ -0,0 +1,12 @@
|
||||
#ifndef Gonnet_h
|
||||
#define Gonnet_h
|
||||
|
||||
typedef double t_ROW[20];
|
||||
|
||||
const t_ROW *GetGonnetMatrix(unsigned N);
|
||||
SCORE GetGonnetGapOpen(unsigned N);
|
||||
SCORE GetGonnetGapExtend(unsigned N);
|
||||
|
||||
extern double GonnetLookup[400][400];
|
||||
|
||||
#endif // Gonnet_h
|
||||
0
src/muscle/muscle3.8.31/src/gotowt.cpp
Normal file
0
src/muscle/muscle3.8.31/src/gotowt.cpp
Normal file
84
src/muscle/muscle3.8.31/src/henikoffweight.cpp
Normal file
84
src/muscle/muscle3.8.31/src/henikoffweight.cpp
Normal file
@@ -0,0 +1,84 @@
|
||||
#include "muscle.h"
|
||||
#include "msa.h"
|
||||
|
||||
/***
|
||||
Compute Henikoff weights.
|
||||
Steven Henikoff and Jorja G. Henikoff (1994), Position-based sequence weights.
|
||||
J. Mol. Biol., 243(4):574-578.
|
||||
|
||||
Award each different residue an equal share of the weight, and then to divide up
|
||||
that weight equally among the sequences sharing the same residue. So if in a
|
||||
position of a multiple alignment, r different residues are represented, a residue
|
||||
represented in only one sequence contributes a score of 1/r to that sequence, whereas a
|
||||
residue represented in s sequences contributes a score of 1/rs to each of the s
|
||||
sequences. For each sequence, the contributions from each position are summed to give
|
||||
a sequence weight.
|
||||
|
||||
See also HenikoffWeightPB.
|
||||
***/
|
||||
|
||||
void MSA::CalcHenikoffWeightsCol(unsigned uColIndex) const
|
||||
{
|
||||
const unsigned uSeqCount = GetSeqCount();
|
||||
|
||||
// Compute letter counts in this column
|
||||
unsigned uLetterCount[MAX_ALPHA];
|
||||
memset(uLetterCount, 0, sizeof(uLetterCount));
|
||||
unsigned uDifferentLetterCount = 0;
|
||||
for (unsigned uSeqIndex = 0; uSeqIndex < uSeqCount; ++uSeqIndex)
|
||||
{
|
||||
unsigned uLetter = GetLetterEx(uSeqIndex, uColIndex);
|
||||
if (uLetter >= 20)
|
||||
continue;
|
||||
unsigned uNewCount = uLetterCount[uLetter] + 1;
|
||||
uLetterCount[uLetter] = uNewCount;
|
||||
if (1 == uNewCount)
|
||||
++uDifferentLetterCount;
|
||||
}
|
||||
|
||||
// Compute weight contributions
|
||||
for (unsigned uSeqIndex = 0; uSeqIndex < uSeqCount; ++uSeqIndex)
|
||||
{
|
||||
unsigned uLetter = GetLetterEx(uSeqIndex, uColIndex);
|
||||
if (uLetter >= 20)
|
||||
continue;
|
||||
const unsigned uCount = uLetterCount[uLetter];
|
||||
unsigned uDenom = uCount*uDifferentLetterCount;
|
||||
if (uDenom == 0)
|
||||
continue;
|
||||
m_Weights[uSeqIndex] += (WEIGHT) (1.0/uDenom);
|
||||
}
|
||||
}
|
||||
|
||||
void MSA::SetHenikoffWeights() const
|
||||
{
|
||||
const unsigned uColCount = GetColCount();
|
||||
const unsigned uSeqCount = GetSeqCount();
|
||||
|
||||
if (0 == uSeqCount)
|
||||
return;
|
||||
else if (1 == uSeqCount)
|
||||
{
|
||||
m_Weights[0] = (WEIGHT) 1.0;
|
||||
return;
|
||||
}
|
||||
else if (2 == uSeqCount)
|
||||
{
|
||||
m_Weights[0] = (WEIGHT) 0.5;
|
||||
m_Weights[1] = (WEIGHT) 0.5;
|
||||
return;
|
||||
}
|
||||
|
||||
for (unsigned uSeqIndex = 0; uSeqIndex < uSeqCount; ++uSeqIndex)
|
||||
m_Weights[uSeqIndex] = 0.0;
|
||||
|
||||
for (unsigned uColIndex = 0; uColIndex < uColCount; ++uColIndex)
|
||||
CalcHenikoffWeightsCol(uColIndex);
|
||||
|
||||
// Set all-gap seqs weight to 0
|
||||
for (unsigned uSeqIndex = 0; uSeqIndex < uSeqCount; ++uSeqIndex)
|
||||
if (IsGapSeq(uSeqIndex))
|
||||
m_Weights[uSeqIndex] = 0.0;
|
||||
|
||||
Normalize(m_Weights, uSeqCount);
|
||||
}
|
||||
124
src/muscle/muscle3.8.31/src/henikoffweightpb.cpp
Normal file
124
src/muscle/muscle3.8.31/src/henikoffweightpb.cpp
Normal file
@@ -0,0 +1,124 @@
|
||||
#include "muscle.h"
|
||||
#include "msa.h"
|
||||
|
||||
/***
|
||||
Compute Henikoff weights.
|
||||
Steven Henikoff and Jorja G. Henikoff (1994), Position-based sequence weights.
|
||||
J. Mol. Biol., 243(4):574-578.
|
||||
|
||||
Award each different residue an equal share of the weight, and then to divide up
|
||||
that weight equally among the sequences sharing the same residue. So if in a
|
||||
position of a multiple alignment, r different residues are represented, a residue
|
||||
represented in only one sequence contributes a score of 1/r to that sequence, whereas a
|
||||
residue represented in s sequences contributes a score of 1/rs to each of the s
|
||||
sequences. For each sequence, the contributions from each position are summed to give
|
||||
a sequence weight.
|
||||
|
||||
Here we use the variant from PSI-BLAST, which (a) treats gaps as a 21st letter,
|
||||
and (b) ignores columns that are perfectly conserved.
|
||||
|
||||
>>> WARNING -- I SUSPECT THIS DOESN'T WORK CORRECTLY <<<
|
||||
***/
|
||||
|
||||
void MSA::CalcHenikoffWeightsColPB(unsigned uColIndex) const
|
||||
{
|
||||
const unsigned uSeqCount = GetSeqCount();
|
||||
|
||||
// Compute letter counts in this column
|
||||
unsigned uLetterCount[MAX_ALPHA+1];
|
||||
memset(uLetterCount, 0, (MAX_ALPHA+1)*sizeof(unsigned));
|
||||
unsigned uLetter;
|
||||
for (unsigned uSeqIndex = 0; uSeqIndex < uSeqCount; ++uSeqIndex)
|
||||
{
|
||||
if (IsGap(uSeqIndex, uColIndex) || IsWildcard(uSeqIndex, uColIndex))
|
||||
uLetter = MAX_ALPHA;
|
||||
else
|
||||
uLetter = GetLetter(uSeqIndex, uColIndex);
|
||||
++(uLetterCount[uLetter]);
|
||||
}
|
||||
|
||||
// Check for special case of perfect conservation
|
||||
for (unsigned uLetter = 0; uLetter < MAX_ALPHA+1; ++uLetter)
|
||||
{
|
||||
unsigned uCount = uLetterCount[uLetter];
|
||||
if (uCount > 0)
|
||||
{
|
||||
// Perfectly conserved?
|
||||
if (uCount == uSeqCount)
|
||||
return;
|
||||
else
|
||||
// If count > 0 but less than nr. sequences, can't be conserved
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
// Compute weight contributions
|
||||
for (unsigned uSeqIndex = 0; uSeqIndex < uSeqCount; ++uSeqIndex)
|
||||
{
|
||||
unsigned uLetter;
|
||||
if (IsGap(uSeqIndex, uColIndex) || IsWildcard(uSeqIndex, uColIndex))
|
||||
uLetter = MAX_ALPHA;
|
||||
else
|
||||
uLetter = GetLetter(uSeqIndex, uColIndex);
|
||||
const unsigned uCount = uLetterCount[uLetter];
|
||||
m_Weights[uSeqIndex] += (WEIGHT) (1.0/uCount);
|
||||
}
|
||||
}
|
||||
|
||||
bool MSA::IsGapSeq(unsigned uSeqIndex) const
|
||||
{
|
||||
const unsigned uColCount = GetColCount();
|
||||
for (unsigned uColIndex = 0; uColIndex < uColCount; ++uColIndex)
|
||||
if (!IsGap(uSeqIndex, uColIndex))
|
||||
return false;
|
||||
return true;
|
||||
}
|
||||
|
||||
void MSA::SetUniformWeights() const
|
||||
{
|
||||
const unsigned uSeqCount = GetSeqCount();
|
||||
if (0 == uSeqCount)
|
||||
return;
|
||||
|
||||
const WEIGHT w = (WEIGHT) (1.0 / uSeqCount);
|
||||
for (unsigned uSeqIndex = 0; uSeqIndex < uSeqCount; ++uSeqIndex)
|
||||
m_Weights[uSeqIndex] = w;
|
||||
}
|
||||
|
||||
void MSA::SetHenikoffWeightsPB() const
|
||||
{
|
||||
const unsigned uColCount = GetColCount();
|
||||
const unsigned uSeqCount = GetSeqCount();
|
||||
|
||||
if (0 == uSeqCount)
|
||||
return;
|
||||
else if (1 == uSeqCount)
|
||||
{
|
||||
m_Weights[0] = 1.0;
|
||||
return;
|
||||
}
|
||||
else if (2 == uSeqCount)
|
||||
{
|
||||
m_Weights[0] = (WEIGHT) 0.5;
|
||||
m_Weights[1] = (WEIGHT) 0.5;
|
||||
return;
|
||||
}
|
||||
|
||||
for (unsigned uSeqIndex = 0; uSeqIndex < uSeqCount; ++uSeqIndex)
|
||||
m_Weights[uSeqIndex] = 0.0;
|
||||
|
||||
for (unsigned uColIndex = 0; uColIndex < uColCount; ++uColIndex)
|
||||
CalcHenikoffWeightsColPB(uColIndex);
|
||||
|
||||
// Set all-gap seqs weight to 0
|
||||
for (unsigned uSeqIndex = 0; uSeqIndex < uSeqCount; ++uSeqIndex)
|
||||
if (IsGapSeq(uSeqIndex))
|
||||
m_Weights[uSeqIndex] = 0.0;
|
||||
|
||||
// Check for special case of identical sequences, which will cause all
|
||||
// columns to be skipped becasue they're perfectly conserved.
|
||||
if (VectorIsZero(m_Weights, uSeqCount))
|
||||
VectorSet(m_Weights, uSeqCount, 1.0);
|
||||
|
||||
Normalize(m_Weights, uSeqCount);
|
||||
}
|
||||
136
src/muscle/muscle3.8.31/src/html.cpp
Normal file
136
src/muscle/muscle3.8.31/src/html.cpp
Normal file
@@ -0,0 +1,136 @@
|
||||
#include "muscle.h"
|
||||
#include <stdio.h>
|
||||
#include <ctype.h>
|
||||
#include "msa.h"
|
||||
#include "textfile.h"
|
||||
|
||||
const unsigned uCharsPerLine = 60;
|
||||
const int MIN_NAME = 10;
|
||||
const int MAX_NAME = 32;
|
||||
|
||||
extern void AssignColors(const MSA &a, int **Colors);
|
||||
|
||||
static int **MakeColors(const MSA &a)
|
||||
{
|
||||
const unsigned uSeqCount = a.GetSeqCount();
|
||||
const unsigned uColCount = a.GetColCount();
|
||||
|
||||
int **Colors = new int *[uSeqCount];
|
||||
for (unsigned i = 0; i < uSeqCount; ++i)
|
||||
{
|
||||
Colors[i] = new int[uColCount];
|
||||
memset(Colors[i], 0, uColCount*sizeof(int));
|
||||
}
|
||||
AssignColors(a, Colors);
|
||||
return Colors;
|
||||
}
|
||||
|
||||
static void ChangeColor(TextFile &File, int From, int To)
|
||||
{
|
||||
if (From == To)
|
||||
return;
|
||||
|
||||
#define COLOR_WHITE "FFFFFF"
|
||||
#define COLOR_GRAY "C0C0C0"
|
||||
#define COLOR_BLACK "000000"
|
||||
#define COLOR_RED "FF0000"
|
||||
#define COLOR_GREEN "00FF00"
|
||||
#define COLOR_BLUE "5590FF"
|
||||
#define COLOR_LIGHTBLUE "77FFFF"
|
||||
|
||||
#define X(c) File.PutString("</SPAN><SPAN STYLE=\"background-color:#" c "\">");
|
||||
switch (To)
|
||||
{
|
||||
case 0:
|
||||
X(COLOR_WHITE)
|
||||
break;
|
||||
case 1:
|
||||
X(COLOR_GRAY)
|
||||
break;
|
||||
case 2:
|
||||
X(COLOR_BLUE)
|
||||
break;
|
||||
case 3:
|
||||
X(COLOR_LIGHTBLUE)
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
#define COLOR_WINDOW "FFEEE0"
|
||||
|
||||
void MSA::ToHTMLFile(TextFile &File) const
|
||||
{
|
||||
File.PutString("<HTML>\n");
|
||||
File.PutString("<BODY BGCOLOR=\"#" COLOR_WINDOW "\">\n");
|
||||
File.PutString("<PRE>");
|
||||
|
||||
int **Colors = MakeColors(*this);
|
||||
|
||||
int iLongestNameLength = 0;
|
||||
for (unsigned uSeqIndex = 0; uSeqIndex < GetSeqCount(); ++uSeqIndex)
|
||||
{
|
||||
const char *ptrName = GetSeqName(uSeqIndex);
|
||||
const char *ptrBlank = strchr(ptrName, ' ');
|
||||
int iLength;
|
||||
if (0 != ptrBlank)
|
||||
iLength = (int) (ptrBlank - ptrName);
|
||||
else
|
||||
iLength = (int) strlen(ptrName);
|
||||
if (iLength > iLongestNameLength)
|
||||
iLongestNameLength = iLength;
|
||||
}
|
||||
if (iLongestNameLength > MAX_NAME)
|
||||
iLongestNameLength = MAX_NAME;
|
||||
if (iLongestNameLength < MIN_NAME)
|
||||
iLongestNameLength = MIN_NAME;
|
||||
|
||||
unsigned uLineCount = (GetColCount() - 1)/uCharsPerLine + 1;
|
||||
int CurrentColor = -1;
|
||||
for (unsigned uLineIndex = 0; uLineIndex < uLineCount; ++uLineIndex)
|
||||
{
|
||||
File.PutString("\n");
|
||||
unsigned uStartColIndex = uLineIndex*uCharsPerLine;
|
||||
unsigned uEndColIndex = uStartColIndex + uCharsPerLine - 1;
|
||||
if (uEndColIndex >= GetColCount())
|
||||
uEndColIndex = GetColCount() - 1;
|
||||
char Name[MAX_NAME+1];
|
||||
for (unsigned uSeqIndex = 0; uSeqIndex < GetSeqCount(); ++uSeqIndex)
|
||||
{
|
||||
const char *ptrName = GetSeqName(uSeqIndex);
|
||||
const char *ptrBlank = strchr(ptrName, ' ');
|
||||
int iLength;
|
||||
if (0 != ptrBlank)
|
||||
iLength = (int) (ptrBlank - ptrName);
|
||||
else
|
||||
iLength = (int) strlen(ptrName);
|
||||
if (iLength > MAX_NAME)
|
||||
iLength = MAX_NAME;
|
||||
memset(Name, ' ', MAX_NAME);
|
||||
memcpy(Name, ptrName, iLength);
|
||||
Name[iLongestNameLength] = 0;
|
||||
|
||||
// File.PutString("<FONT COLOR=\"#000000\">");
|
||||
CurrentColor = -1;
|
||||
File.PutString("<SPAN STYLE=\"background-color:#" COLOR_WINDOW "\">");
|
||||
File.PutFormat("%s ", Name);
|
||||
File.PutString("<SPAN STYLE=\"background-color:#FFFFFF\">");
|
||||
for (unsigned uColIndex = uStartColIndex; uColIndex <= uEndColIndex;
|
||||
++uColIndex)
|
||||
{
|
||||
const int Color = Colors[uSeqIndex][uColIndex];
|
||||
ChangeColor(File, CurrentColor, Color);
|
||||
CurrentColor = Color;
|
||||
const char c = GetChar(uSeqIndex, uColIndex);
|
||||
if (Color == 0)
|
||||
File.PutFormat("%c", tolower(c));
|
||||
else
|
||||
File.PutFormat("%c", toupper(c));
|
||||
}
|
||||
File.PutString("\n");
|
||||
}
|
||||
}
|
||||
File.PutString("</SPAN>\n");
|
||||
File.PutString("</PRE>\n");
|
||||
File.PutString("</BODY>\n");
|
||||
File.PutString("</HTML>\n");
|
||||
}
|
||||
42
src/muscle/muscle3.8.31/src/hydro.cpp
Normal file
42
src/muscle/muscle3.8.31/src/hydro.cpp
Normal file
@@ -0,0 +1,42 @@
|
||||
#include "muscle.h"
|
||||
#include "profile.h"
|
||||
|
||||
extern void TomHydro(ProfPos *Prof, unsigned Length);
|
||||
|
||||
// Apply hydrophobicity heuristic to a profile
|
||||
void Hydro(ProfPos *Prof, unsigned uLength)
|
||||
{
|
||||
if (ALPHA_Amino != g_Alpha)
|
||||
return;
|
||||
|
||||
if (g_bTomHydro)
|
||||
{
|
||||
TomHydro(Prof, uLength);
|
||||
return;
|
||||
}
|
||||
|
||||
if (0 == g_uHydrophobicRunLength)
|
||||
return;
|
||||
|
||||
if (uLength <= g_uHydrophobicRunLength)
|
||||
return;
|
||||
|
||||
unsigned uRunLength = 0;
|
||||
unsigned L2 = g_uHydrophobicRunLength/2;
|
||||
for (unsigned uColIndex = L2; uColIndex < uLength - L2; ++uColIndex)
|
||||
{
|
||||
ProfPos &PP = Prof[uColIndex];
|
||||
bool bHydro = IsHydrophobic(PP.m_fcCounts);
|
||||
if (bHydro)
|
||||
{
|
||||
++uRunLength;
|
||||
if (uRunLength >= g_uHydrophobicRunLength)
|
||||
{
|
||||
Prof[uColIndex-L2].m_scoreGapOpen *= (SCORE) g_dHydroFactor;
|
||||
Prof[uColIndex-L2].m_scoreGapClose *= (SCORE) g_dHydroFactor;
|
||||
}
|
||||
}
|
||||
else
|
||||
uRunLength = 0;
|
||||
}
|
||||
}
|
||||
354
src/muscle/muscle3.8.31/src/intmath.cpp
Normal file
354
src/muscle/muscle3.8.31/src/intmath.cpp
Normal file
@@ -0,0 +1,354 @@
|
||||
#include "muscle.h"
|
||||
#include <math.h>
|
||||
|
||||
PROB ScoreToProb(SCORE Score)
|
||||
{
|
||||
if (MINUS_INFINITY >= Score)
|
||||
return 0.0;
|
||||
return (PROB) pow(2.0, (double) Score/INTSCALE);
|
||||
}
|
||||
|
||||
//#if 0
|
||||
//static const double log2e = log2(exp(1.0));
|
||||
//
|
||||
//double lnTolog2(double ln)
|
||||
// {
|
||||
// return ln*log2e;
|
||||
// }
|
||||
//
|
||||
//double log2(double x)
|
||||
// {
|
||||
// if (0 == x)
|
||||
// return MINUS_INFINITY;
|
||||
//
|
||||
// static const double dInvLn2 = 1.0/log(2.0);
|
||||
//// Multiply by inverse of log(2) just in case multiplication
|
||||
//// is faster than division.
|
||||
// return log(x)*dInvLn2;
|
||||
// }
|
||||
//#endif
|
||||
|
||||
//SCORE ProbToScore(PROB Prob)
|
||||
// {
|
||||
// if (0.0 == Prob)
|
||||
// return MINUS_INFINITY;
|
||||
//// return (SCORE) floor(INTSCALE*log2(Prob));
|
||||
// return (SCORE) log2(Prob);
|
||||
// }
|
||||
|
||||
WEIGHT DoubleToWeight(double d)
|
||||
{
|
||||
assert(d >= 0);
|
||||
return (WEIGHT) (INTSCALE*d);
|
||||
}
|
||||
|
||||
double WeightToDouble(WEIGHT w)
|
||||
{
|
||||
return (double) w / (double) INTSCALE;
|
||||
}
|
||||
|
||||
SCORE DoubleToScore(double d)
|
||||
{
|
||||
return (SCORE)(d*(double) INTSCALE);
|
||||
}
|
||||
|
||||
bool ScoreEq(SCORE s1, SCORE s2)
|
||||
{
|
||||
return BTEq(s1, s2);
|
||||
}
|
||||
|
||||
static bool BTEq2(BASETYPE b1, BASETYPE b2)
|
||||
{
|
||||
double diff = fabs(b1 - b2);
|
||||
if (diff < 0.0001)
|
||||
return true;
|
||||
double sum = fabs(b1) + fabs(b2);
|
||||
return diff/sum < 0.005;
|
||||
}
|
||||
|
||||
bool BTEq(double b1, double b2)
|
||||
{
|
||||
return BTEq2((BASETYPE) b1, (BASETYPE) b2);
|
||||
}
|
||||
|
||||
//const double dLn2 = log(2.0);
|
||||
|
||||
//// pow2(x)=2^x
|
||||
//double pow2(double x)
|
||||
// {
|
||||
// if (MINUS_INFINITY == x)
|
||||
// return 0;
|
||||
// return exp(x*dLn2);
|
||||
// }
|
||||
|
||||
//// lp2(x) = log2(1 + 2^-x), x >= 0
|
||||
//double lp2(double x)
|
||||
// {
|
||||
// return log2(1 + pow2(-x));
|
||||
// }
|
||||
|
||||
// SumLog(x, y) = log2(2^x + 2^y)
|
||||
//SCORE SumLog(SCORE x, SCORE y)
|
||||
// {
|
||||
// return (SCORE) log2(pow2(x) + pow2(y));
|
||||
// }
|
||||
//
|
||||
//// SumLog(x, y, z) = log2(2^x + 2^y + 2^z)
|
||||
//SCORE SumLog(SCORE x, SCORE y, SCORE z)
|
||||
// {
|
||||
// return (SCORE) log2(pow2(x) + pow2(y) + pow2(z));
|
||||
// }
|
||||
//
|
||||
//// SumLog(w, x, y, z) = log2(2^w + 2^x + 2^y + 2^z)
|
||||
//SCORE SumLog(SCORE w, SCORE x, SCORE y, SCORE z)
|
||||
// {
|
||||
// return (SCORE) log2(pow2(w) + pow2(x) + pow2(y) + pow2(z));
|
||||
// }
|
||||
|
||||
//SCORE lp2Fast(SCORE x)
|
||||
// {
|
||||
// assert(x >= 0);
|
||||
// const int iTableSize = 1000;
|
||||
// const double dRange = 20.0;
|
||||
// const double dScale = dRange/iTableSize;
|
||||
// static SCORE dValue[iTableSize];
|
||||
// static bool bInit = false;
|
||||
// if (!bInit)
|
||||
// {
|
||||
// for (int i = 0; i < iTableSize; ++i)
|
||||
// dValue[i] = (SCORE) lp2(i*dScale);
|
||||
// bInit = true;
|
||||
// }
|
||||
// if (x >= dRange)
|
||||
// return 0.0;
|
||||
// int i = (int) (x/dScale);
|
||||
// assert(i >= 0 && i < iTableSize);
|
||||
// SCORE dResult = dValue[i];
|
||||
// assert(BTEq(dResult, lp2(x)));
|
||||
// return dResult;
|
||||
// }
|
||||
//
|
||||
//// SumLog(x, y) = log2(2^x + 2^y)
|
||||
//SCORE SumLogFast(SCORE x, SCORE y)
|
||||
// {
|
||||
// if (MINUS_INFINITY == x)
|
||||
// {
|
||||
// if (MINUS_INFINITY == y)
|
||||
// return MINUS_INFINITY;
|
||||
// return y;
|
||||
// }
|
||||
// else if (MINUS_INFINITY == y)
|
||||
// return x;
|
||||
//
|
||||
// SCORE dResult;
|
||||
// if (x > y)
|
||||
// dResult = x + lp2Fast(x-y);
|
||||
// else
|
||||
// dResult = y + lp2Fast(y-x);
|
||||
// assert(SumLog(x, y) == dResult);
|
||||
// return dResult;
|
||||
// }
|
||||
//
|
||||
//SCORE SumLogFast(SCORE x, SCORE y, SCORE z)
|
||||
// {
|
||||
// SCORE dResult = SumLogFast(x, SumLogFast(y, z));
|
||||
// assert(SumLog(x, y, z) == dResult);
|
||||
// return dResult;
|
||||
// }
|
||||
|
||||
//SCORE SumLogFast(SCORE w, SCORE x, SCORE y, SCORE z)
|
||||
// {
|
||||
// SCORE dResult = SumLogFast(SumLogFast(w, x), SumLogFast(y, z));
|
||||
// assert(SumLog(w, x, y, z) == dResult);
|
||||
// return dResult;
|
||||
// }
|
||||
|
||||
double VecSum(const double v[], unsigned n)
|
||||
{
|
||||
double dSum = 0.0;
|
||||
for (unsigned i = 0; i < n; ++i)
|
||||
dSum += v[i];
|
||||
return dSum;
|
||||
}
|
||||
|
||||
void Normalize(PROB p[], unsigned n)
|
||||
{
|
||||
unsigned i;
|
||||
PROB dSum = 0.0;
|
||||
for (i = 0; i < n; ++i)
|
||||
dSum += p[i];
|
||||
if (0.0 == dSum)
|
||||
Quit("Normalize, sum=0");
|
||||
for (i = 0; i < n; ++i)
|
||||
p[i] /= dSum;
|
||||
}
|
||||
|
||||
void NormalizeUnlessZero(PROB p[], unsigned n)
|
||||
{
|
||||
unsigned i;
|
||||
PROB dSum = 0.0;
|
||||
for (i = 0; i < n; ++i)
|
||||
dSum += p[i];
|
||||
if (0.0 == dSum)
|
||||
return;
|
||||
for (i = 0; i < n; ++i)
|
||||
p[i] /= dSum;
|
||||
}
|
||||
|
||||
void Normalize(PROB p[], unsigned n, double dRequiredTotal)
|
||||
{
|
||||
unsigned i;
|
||||
double dSum = 0.0;
|
||||
for (i = 0; i < n; ++i)
|
||||
dSum += p[i];
|
||||
if (0.0 == dSum)
|
||||
Quit("Normalize, sum=0");
|
||||
double dFactor = dRequiredTotal / dSum;
|
||||
for (i = 0; i < n; ++i)
|
||||
p[i] *= (PROB) dFactor;
|
||||
}
|
||||
|
||||
bool VectorIsZero(const double dValues[], unsigned n)
|
||||
{
|
||||
for (unsigned i = 0; i < n; ++i)
|
||||
if (dValues[i] != 0.0)
|
||||
return false;
|
||||
return true;
|
||||
}
|
||||
|
||||
void VectorSet(double dValues[], unsigned n, double d)
|
||||
{
|
||||
for (unsigned i = 0; i < n; ++i)
|
||||
dValues[i] = d;
|
||||
}
|
||||
|
||||
bool VectorIsZero(const float dValues[], unsigned n)
|
||||
{
|
||||
for (unsigned i = 0; i < n; ++i)
|
||||
if (dValues[i] != 0.0)
|
||||
return false;
|
||||
return true;
|
||||
}
|
||||
|
||||
void VectorSet(float dValues[], unsigned n, float d)
|
||||
{
|
||||
for (unsigned i = 0; i < n; ++i)
|
||||
dValues[i] = d;
|
||||
}
|
||||
|
||||
double Correl(const double P[], const double Q[], unsigned uCount)
|
||||
{
|
||||
double dSumP = 0.0;
|
||||
double dSumQ = 0.0;
|
||||
for (unsigned n = 0; n < uCount; ++n)
|
||||
{
|
||||
dSumP += P[n];
|
||||
dSumQ += Q[n];
|
||||
}
|
||||
const double dMeanP = dSumP/uCount;
|
||||
const double dMeanQ = dSumQ/uCount;
|
||||
|
||||
double dSum1 = 0.0;
|
||||
double dSum2 = 0.0;
|
||||
double dSum3 = 0.0;
|
||||
for (unsigned n = 0; n < uCount; ++n)
|
||||
{
|
||||
const double dDiffP = P[n] - dMeanP;
|
||||
const double dDiffQ = Q[n] - dMeanQ;
|
||||
dSum1 += dDiffP*dDiffQ;
|
||||
dSum2 += dDiffP*dDiffP;
|
||||
dSum3 += dDiffQ*dDiffQ;
|
||||
}
|
||||
if (0 == dSum1)
|
||||
return 0;
|
||||
const double dCorrel = dSum1 / sqrt(dSum2*dSum3);
|
||||
return dCorrel;
|
||||
}
|
||||
|
||||
float Correl(const float P[], const float Q[], unsigned uCount)
|
||||
{
|
||||
float dSumP = 0.0;
|
||||
float dSumQ = 0.0;
|
||||
for (unsigned n = 0; n < uCount; ++n)
|
||||
{
|
||||
dSumP += P[n];
|
||||
dSumQ += Q[n];
|
||||
}
|
||||
const float dMeanP = dSumP/uCount;
|
||||
const float dMeanQ = dSumQ/uCount;
|
||||
|
||||
float dSum1 = 0.0;
|
||||
float dSum2 = 0.0;
|
||||
float dSum3 = 0.0;
|
||||
for (unsigned n = 0; n < uCount; ++n)
|
||||
{
|
||||
const float dDiffP = P[n] - dMeanP;
|
||||
const float dDiffQ = Q[n] - dMeanQ;
|
||||
dSum1 += dDiffP*dDiffQ;
|
||||
dSum2 += dDiffP*dDiffP;
|
||||
dSum3 += dDiffQ*dDiffQ;
|
||||
}
|
||||
if (0 == dSum1)
|
||||
return 0;
|
||||
const float dCorrel = dSum1 / (float) sqrt(dSum2*dSum3);
|
||||
return dCorrel;
|
||||
}
|
||||
|
||||
// Simple (but slow) function to compute Pearson ranks
|
||||
// that allows for ties. Correctness and simplicity
|
||||
// are priorities over speed here.
|
||||
void Rank(const float P[], float Ranks[], unsigned uCount)
|
||||
{
|
||||
for (unsigned n = 0; n < uCount; ++n)
|
||||
{
|
||||
unsigned uNumberGreater = 0;
|
||||
unsigned uNumberEqual = 0;
|
||||
unsigned uNumberLess = 0;
|
||||
double dValue = P[n];
|
||||
for (unsigned i = 0; i < uCount; ++i)
|
||||
{
|
||||
double v = P[i];
|
||||
if (v == dValue)
|
||||
++uNumberEqual;
|
||||
else if (v < dValue)
|
||||
++uNumberLess;
|
||||
else
|
||||
++uNumberGreater;
|
||||
}
|
||||
assert(uNumberEqual >= 1);
|
||||
assert(uNumberEqual + uNumberLess + uNumberGreater == uCount);
|
||||
Ranks[n] = (float) (1 + uNumberLess + (uNumberEqual - 1)/2.0);
|
||||
}
|
||||
}
|
||||
|
||||
void Rank(const double P[], double Ranks[], unsigned uCount)
|
||||
{
|
||||
for (unsigned n = 0; n < uCount; ++n)
|
||||
{
|
||||
unsigned uNumberGreater = 0;
|
||||
unsigned uNumberEqual = 0;
|
||||
unsigned uNumberLess = 0;
|
||||
double dValue = P[n];
|
||||
for (unsigned i = 0; i < uCount; ++i)
|
||||
{
|
||||
double v = P[i];
|
||||
if (v == dValue)
|
||||
++uNumberEqual;
|
||||
else if (v < dValue)
|
||||
++uNumberLess;
|
||||
else
|
||||
++uNumberGreater;
|
||||
}
|
||||
assert(uNumberEqual >= 1);
|
||||
assert(uNumberEqual + uNumberLess + uNumberGreater == uCount);
|
||||
Ranks[n] = (double) (1 + uNumberLess + (uNumberEqual - 1)/2.0);
|
||||
}
|
||||
}
|
||||
|
||||
FCOUNT SumCounts(const FCOUNT Counts[])
|
||||
{
|
||||
FCOUNT Sum = 0;
|
||||
for (int i = 0; i < 20; ++i)
|
||||
Sum += Counts[i];
|
||||
return Sum;
|
||||
}
|
||||
210
src/muscle/muscle3.8.31/src/intmath.h
Normal file
210
src/muscle/muscle3.8.31/src/intmath.h
Normal file
@@ -0,0 +1,210 @@
|
||||
// IntMath.h: Header for doing fractional math with integers for speed.
|
||||
|
||||
#ifndef IntMath_h
|
||||
#define IntMath_h
|
||||
|
||||
typedef float BASETYPE;
|
||||
//typedef double BASETYPE;
|
||||
|
||||
// Scaling factor used to store certain floating point
|
||||
// values as integers to a few significant figures.
|
||||
//const int INTSCALE = 1000;
|
||||
const int INTSCALE = 1;
|
||||
|
||||
// Type for a probability in range 0.0 to 1.0.
|
||||
typedef BASETYPE PROB;
|
||||
|
||||
// Type for an log-odds integer score.
|
||||
// Stored as log2(PROB)*INTSCALE.
|
||||
//typedef int SCORE;
|
||||
typedef BASETYPE SCORE;
|
||||
|
||||
// Type for a weight.
|
||||
// Stored as w*INTSCALE where w is in range 0.0 to 1.0.
|
||||
//typedef unsigned WEIGHT;
|
||||
typedef BASETYPE WEIGHT;
|
||||
|
||||
// Type for a fractional weighted count stored as n*WEIGHT/N
|
||||
// where n=measured count (integer >= 0) and N is total for
|
||||
// the distribution (e.g., n=number of residues of a given
|
||||
// type in a column, N=number of residues in the column).
|
||||
// Hence values in an FCOUNT variable range from 0..INTSCALE
|
||||
// as an integer, representing "true" values 0.0 to 1.0.
|
||||
//typedef unsigned FCOUNT;
|
||||
typedef BASETYPE FCOUNT;
|
||||
|
||||
// Representation of -infinity. Value should
|
||||
// be large and negative, but not so large
|
||||
// that adding a few of them overflows.
|
||||
// TODO: Multiplied by 10 to work around bug
|
||||
// when aligning Bali 1ckaA in ref4, which is
|
||||
// so long that B->Mmax got to -infinity, causing
|
||||
// traceback to fail.
|
||||
//const int MINUS_INFINITY = -10000000;
|
||||
const BASETYPE MINUS_INFINITY = (BASETYPE) -1e37;
|
||||
const BASETYPE PLUS_INFINITY = (BASETYPE) 1e37;
|
||||
|
||||
// Probability relative to a null model
|
||||
typedef double RPROB;
|
||||
|
||||
PROB ScoreToProb(SCORE Score);
|
||||
SCORE ProbToScore(PROB Prob);
|
||||
SCORE DoubleToScore(double d);
|
||||
WEIGHT DoubleToWeight(double d);
|
||||
double WeightToDouble(WEIGHT w);
|
||||
SCORE MulScoreWeight(SCORE Score, WEIGHT Weight);
|
||||
bool ScoreEq(SCORE s1, SCORE s2);
|
||||
bool BTEq(double b1, double b2);
|
||||
|
||||
static double ScoreToDouble(SCORE Score)
|
||||
{
|
||||
return (double) Score / (double) INTSCALE;
|
||||
}
|
||||
|
||||
#if 0
|
||||
// In-line assembler for Result = (x*y)/z
|
||||
// Note that imul and idiv will do 64-bit arithmetic
|
||||
// on 32-bit operands, so this shouldn't overflow
|
||||
// Can't write this efficiently in C/C++ (would
|
||||
// often overlow 32 bits).
|
||||
#define MulDivAssign(Result, x, y, z) \
|
||||
{ \
|
||||
int X = (x); \
|
||||
int Y = (y); \
|
||||
int Z = (z); \
|
||||
_asm mov eax,X \
|
||||
_asm imul Y \
|
||||
_asm mov ecx,Z \
|
||||
_asm idiv ecx \
|
||||
_asm mov Result,eax \
|
||||
}
|
||||
#else
|
||||
#define MulDivAssign(Result, x, y, z) Result = (((x)*(y))/(z))
|
||||
#endif
|
||||
|
||||
#define MulScoreWeight(r, s, w) MulDivAssign(r, s, w, INTSCALE)
|
||||
#define MulWeightWCount(r, wt, wc) MulDivAssign(r, wt, wc, INTSCALE)
|
||||
#define MulFCountScore(r, fc, sc) MulDivAssign(r, fc, sc, INTSCALE)
|
||||
|
||||
#if _DEBUG
|
||||
|
||||
static inline SCORE Add2(SCORE a, SCORE b)
|
||||
{
|
||||
if (MINUS_INFINITY == a)
|
||||
return MINUS_INFINITY;
|
||||
if (MINUS_INFINITY == b)
|
||||
return MINUS_INFINITY;
|
||||
SCORE sum = a + b;
|
||||
if (sum < MINUS_INFINITY)
|
||||
return MINUS_INFINITY;
|
||||
// assert(sum < OVERFLOW_WARN);
|
||||
return sum;
|
||||
}
|
||||
|
||||
static inline SCORE Add3(SCORE a, SCORE b, SCORE c)
|
||||
{
|
||||
return Add2(Add2(a, b), c);
|
||||
}
|
||||
|
||||
static inline SCORE Add4(SCORE a, SCORE b, SCORE c, SCORE d)
|
||||
{
|
||||
return Add2(Add2(a, b), Add2(c, d));
|
||||
}
|
||||
|
||||
static inline SCORE Add5(SCORE a, SCORE b, SCORE c, SCORE d, SCORE e)
|
||||
{
|
||||
return Add3(Add2(a, b), Add2(c, d), e);
|
||||
}
|
||||
|
||||
static inline SCORE Add6(SCORE a, SCORE b, SCORE c, SCORE d, SCORE e, SCORE f)
|
||||
{
|
||||
return Add3(Add2(a, b), Add2(c, d), Add2(e, f));
|
||||
}
|
||||
|
||||
static inline SCORE Add7(SCORE a, SCORE b, SCORE c, SCORE d, SCORE e, SCORE f, SCORE g)
|
||||
{
|
||||
return Add4(Add2(a, b), Add2(c, d), Add2(e, f), g);
|
||||
}
|
||||
|
||||
static inline SCORE Mul2(SCORE a, SCORE b)
|
||||
{
|
||||
if (MINUS_INFINITY == a)
|
||||
return MINUS_INFINITY;
|
||||
if (MINUS_INFINITY == b)
|
||||
return MINUS_INFINITY;
|
||||
//__int64 prod = (__int64) a * (__int64) b;
|
||||
//assert((SCORE) prod == prod);
|
||||
//return (SCORE) prod;
|
||||
return a*b;
|
||||
}
|
||||
|
||||
static inline SCORE Sub2(SCORE a, SCORE b)
|
||||
{
|
||||
if (MINUS_INFINITY == a)
|
||||
return MINUS_INFINITY;
|
||||
if (MINUS_INFINITY == b)
|
||||
return MINUS_INFINITY;
|
||||
SCORE diff = a - b;
|
||||
if (diff < MINUS_INFINITY)
|
||||
return MINUS_INFINITY;
|
||||
// assert(diff < OVERFLOW_WARN);
|
||||
return diff;
|
||||
}
|
||||
|
||||
static inline SCORE Div2(SCORE a, int b)
|
||||
{
|
||||
if (MINUS_INFINITY == a)
|
||||
return MINUS_INFINITY;
|
||||
return a/b;
|
||||
}
|
||||
|
||||
//static inline SCORE MulScoreWeight(SCORE s, WEIGHT w)
|
||||
// {
|
||||
// SCORE Prod = s*(SCORE) w;
|
||||
// assert(Prod < OVERFLOW_WARN);
|
||||
// extern void Log(const char Format[], ...);
|
||||
// if (Prod/(SCORE) w != s)
|
||||
// Log("**WARRNING MulScoreWeight Prod=%d w=%d Prod/w=%d s=%d\n",
|
||||
// Prod,
|
||||
// w,
|
||||
// Prod/(SCORE) w,
|
||||
// s);
|
||||
// assert(Prod/ (SCORE) w == s);
|
||||
// return Prod/INTSCALE;
|
||||
// }
|
||||
//
|
||||
//static inline WCOUNT MulWeightWCount(WEIGHT wt, WCOUNT wc)
|
||||
// {
|
||||
// return (wt*wc)/INTSCALE;
|
||||
// }
|
||||
|
||||
#else
|
||||
#define Add2(a, b) ((a) + (b))
|
||||
#define Sub2(a, b) ((MINUS_INFINITY == (a)) ? MINUS_INFINITY : ((a) - (b)))
|
||||
#define Div2(a, b) ((MINUS_INFINITY == (a)) ? MINUS_INFINITY : ((a) / (b)))
|
||||
#define Add3(a, b, c) ((a) + (b) + (c))
|
||||
#define Add4(a, b, c, d) ((a) + (b) + (c) + (d))
|
||||
#define Add5(a, b, c, d, e) ((a) + (b) + (c) + (d) + (e))
|
||||
#define Add6(a, b, c, d, e, f) ((a) + (b) + (c) + (d) + (e) + (f))
|
||||
#define Add7(a, b, c, d, e, f, g) ((a) + (b) + (c) + (d) + (e) + (f) + (g))
|
||||
//#define MulScoreWeight(s, w) (((s)*(SCORE) (w))/INTSCALE)
|
||||
#define Mul2(a, b) ((a)*(b))
|
||||
#endif
|
||||
|
||||
//static inline SCORE MulFCountScore(FCOUNT fc, SCORE sc)
|
||||
// {
|
||||
//// Fast way to say "if (fc >= 2^15 || sc >= 2^15)":
|
||||
// if ((fc | sc) & 0xffff1000)
|
||||
// {
|
||||
// SCORE Score = ((fc+5)/10)*sc;
|
||||
// assert(Score < assert);
|
||||
// OVERFLOW_WARN(Score > MINUS_INFINITY);
|
||||
// return Score/(INTSCALE/10);
|
||||
// }
|
||||
// SCORE Score = fc*sc;
|
||||
// assert(Score < OVERFLOW_WARN);
|
||||
// assert(Score > MINUS_INFINITY);
|
||||
// return Score/INTSCALE;
|
||||
// }
|
||||
|
||||
#endif // IntMath_h
|
||||
100
src/muscle/muscle3.8.31/src/local.cpp
Normal file
100
src/muscle/muscle3.8.31/src/local.cpp
Normal file
@@ -0,0 +1,100 @@
|
||||
#include "muscle.h"
|
||||
#include "textfile.h"
|
||||
#include "msa.h"
|
||||
#include "profile.h"
|
||||
#include "pwpath.h"
|
||||
#include "tree.h"
|
||||
|
||||
#define TRACE 0
|
||||
|
||||
static void MSAFromFileName(const char *FileName, MSA &a)
|
||||
{
|
||||
TextFile File(FileName);
|
||||
a.FromFile(File);
|
||||
}
|
||||
|
||||
static ProfPos *ProfileFromMSALocal(MSA &msa, Tree &tree)
|
||||
{
|
||||
const unsigned uSeqCount = msa.GetSeqCount();
|
||||
for (unsigned uSeqIndex = 0; uSeqIndex < uSeqCount; ++uSeqIndex)
|
||||
msa.SetSeqId(uSeqIndex, uSeqIndex);
|
||||
|
||||
TreeFromMSA(msa, tree, g_Cluster1, g_Distance1, g_Root1);
|
||||
SetMuscleTree(tree);
|
||||
return ProfileFromMSA(msa);
|
||||
}
|
||||
|
||||
void Local()
|
||||
{
|
||||
if (0 == g_pstrFileName1 || 0 == g_pstrFileName2)
|
||||
Quit("Must specify both -in1 and -in2 for -sw");
|
||||
|
||||
SetSeqWeightMethod(g_SeqWeight1);
|
||||
|
||||
MSA msa1;
|
||||
MSA msa2;
|
||||
|
||||
MSAFromFileName(g_pstrFileName1, msa1);
|
||||
MSAFromFileName(g_pstrFileName2, msa2);
|
||||
|
||||
ALPHA Alpha = ALPHA_Undefined;
|
||||
switch (g_SeqType)
|
||||
{
|
||||
case SEQTYPE_Auto:
|
||||
Alpha = msa1.GuessAlpha();
|
||||
break;
|
||||
|
||||
case SEQTYPE_Protein:
|
||||
Alpha = ALPHA_Amino;
|
||||
break;
|
||||
|
||||
case SEQTYPE_DNA:
|
||||
Alpha = ALPHA_DNA;
|
||||
break;
|
||||
|
||||
case SEQTYPE_RNA:
|
||||
Alpha = ALPHA_RNA;
|
||||
break;
|
||||
|
||||
default:
|
||||
Quit("Invalid SeqType");
|
||||
}
|
||||
SetAlpha(Alpha);
|
||||
|
||||
msa1.FixAlpha();
|
||||
msa2.FixAlpha();
|
||||
|
||||
if (ALPHA_DNA == Alpha || ALPHA_RNA == Alpha)
|
||||
SetPPScore(PPSCORE_SPN);
|
||||
|
||||
const unsigned uSeqCount1 = msa1.GetSeqCount();
|
||||
const unsigned uSeqCount2 = msa2.GetSeqCount();
|
||||
const unsigned uMaxSeqCount = (uSeqCount1 > uSeqCount2 ? uSeqCount1 : uSeqCount2);
|
||||
MSA::SetIdCount(uMaxSeqCount);
|
||||
|
||||
unsigned uLength1 = msa1.GetColCount();
|
||||
unsigned uLength2 = msa2.GetColCount();
|
||||
|
||||
Tree tree1;
|
||||
Tree tree2;
|
||||
|
||||
ProfPos *Prof1 = ProfileFromMSALocal(msa1, tree1);
|
||||
ProfPos *Prof2 = ProfileFromMSALocal(msa2, tree2);
|
||||
|
||||
PWPath Path;
|
||||
SW(Prof1, uLength1, Prof2, uLength2, Path);
|
||||
|
||||
#if TRACE
|
||||
Path.LogMe();
|
||||
#endif
|
||||
|
||||
MSA msaOut;
|
||||
AlignTwoMSAsGivenPathSW(Path, msa1, msa2, msaOut);
|
||||
|
||||
#if TRACE
|
||||
msaOut.LogMe();
|
||||
#endif
|
||||
|
||||
TextFile fileOut(g_pstrOutFileName, true);
|
||||
msaOut.ToFile(fileOut);
|
||||
}
|
||||
72
src/muscle/muscle3.8.31/src/main.cpp
Normal file
72
src/muscle/muscle3.8.31/src/main.cpp
Normal file
@@ -0,0 +1,72 @@
|
||||
//@@TODO reconcile /muscle with /muscle3.6
|
||||
|
||||
#include "muscle.h"
|
||||
#include <stdio.h>
|
||||
#ifdef WIN32
|
||||
#include <windows.h> // for SetPriorityClass()
|
||||
#include <io.h> // for isatty()
|
||||
#else
|
||||
#include <unistd.h> // for isatty()
|
||||
#endif
|
||||
|
||||
const char *MUSCLE_LONG_VERSION = "MUSCLE v" SHORT_VERSION "."
|
||||
#include "svnversion.h"
|
||||
" by Robert C. Edgar";
|
||||
|
||||
int g_argc;
|
||||
char **g_argv;
|
||||
|
||||
int main(int argc, char **argv)
|
||||
{
|
||||
#if WIN32
|
||||
// Multi-tasking does not work well in CPU-bound
|
||||
// console apps running under Win32.
|
||||
// Reducing the process priority allows GUI apps
|
||||
// to run responsively in parallel.
|
||||
SetPriorityClass(GetCurrentProcess(), BELOW_NORMAL_PRIORITY_CLASS);
|
||||
#endif
|
||||
g_argc = argc;
|
||||
g_argv = argv;
|
||||
|
||||
SetNewHandler();
|
||||
SetStartTime();
|
||||
ProcessArgVect(argc - 1, argv + 1);
|
||||
SetParams();
|
||||
SetLogFile();
|
||||
|
||||
//extern void TestSubFams(const char *);
|
||||
//TestSubFams(g_pstrInFileName);
|
||||
//return 0;
|
||||
|
||||
if (g_bVersion)
|
||||
{
|
||||
printf("%s\n", MUSCLE_LONG_VERSION);
|
||||
exit(EXIT_SUCCESS);
|
||||
}
|
||||
|
||||
if (!g_bQuiet)
|
||||
Credits();
|
||||
|
||||
if (MissingCommand() && isatty(0))
|
||||
{
|
||||
Usage();
|
||||
exit(EXIT_SUCCESS);
|
||||
}
|
||||
|
||||
if (g_bCatchExceptions)
|
||||
{
|
||||
try
|
||||
{
|
||||
Run();
|
||||
}
|
||||
catch (...)
|
||||
{
|
||||
OnException();
|
||||
exit(EXIT_Except);
|
||||
}
|
||||
}
|
||||
else
|
||||
Run();
|
||||
|
||||
exit(EXIT_Success);
|
||||
}
|
||||
0
src/muscle/muscle3.8.31/src/make.err
Normal file
0
src/muscle/muscle3.8.31/src/make.err
Normal file
2
src/muscle/muscle3.8.31/src/make.out
Normal file
2
src/muscle/muscle3.8.31/src/make.out
Normal file
@@ -0,0 +1,2 @@
|
||||
g++ -O3 -march=pentiumpro -mcpu=pentiumpro -funroll-loops -Winline -DNDEBUG=1 -o muscle aligngivenpath.o aligngivenpathsw.o aligntwomsas.o aligntwoprofs.o alpha.o anchors.o blosumla.o clust.o cluster.o clwwt.o cons.o diaglist.o difftrees.o difftreese.o distcalc.o distfunc.o domuscle.o dosp.o dpreglist.o edgelist.o enumopts.o enumtostr.o estring.o fasta.o fastclust.o fastdist.o fastdistjones.o fastdistkbit.o fastdistkmer.o fastdistmafft.o fastscorepath2.o finddiags.o glbalign.o glbaligndiag.o glbalignle.o glbalignsimple.o glbalignsp.o globals.o globalslinux.o globalswin32.o gonnet.o gotowt.o henikoffweight.o henikoffweightpb.o hydro.o intmath.o local.o main.o makerootmsa.o mpam200.o msa.o msa2.o msadistkimura.o msf.o objscore.o objscore2.o onexception.o options.o pam200mafft.o params.o phy.o phy2.o phy3.o phy4.o phyfromclust.o phyfromfile.o phytofile.o posgap.o profile.o profilefrommsa.o progalign.o progress.o progressivealign.o pwpath.o realigndiffs.o realigndiffse.o refine.o refinehoriz.o refinesubfams.o refinetree.o refinetreee.o refinevert.o savebest.o scorehistory.o scoremx.o seq.o seqvect.o setblosumweights.o setgscweights.o setnewhandler.o sw.o textfile.o threewaywt.o traceback.o tracebackopt.o tracebacksw.o treefrommsa.o typetostr.o upgma2.o usage.o validateids.o vtml2.o -lm -static
|
||||
strip muscle
|
||||
231
src/muscle/muscle3.8.31/src/makerootmsa.cpp
Normal file
231
src/muscle/muscle3.8.31/src/makerootmsa.cpp
Normal file
@@ -0,0 +1,231 @@
|
||||
#include "muscle.h"
|
||||
#include "tree.h"
|
||||
#include "seqvect.h"
|
||||
#include "profile.h"
|
||||
#include "msa.h"
|
||||
#include "pwpath.h"
|
||||
#include "estring.h"
|
||||
|
||||
#define TRACE 0
|
||||
#define VALIDATE 0
|
||||
|
||||
static void PathSeq(const Seq &s, const PWPath &Path, bool bRight, Seq &sOut)
|
||||
{
|
||||
short *esA;
|
||||
short *esB;
|
||||
PathToEstrings(Path, &esA, &esB);
|
||||
|
||||
const unsigned uSeqLength = s.Length();
|
||||
const unsigned uEdgeCount = Path.GetEdgeCount();
|
||||
|
||||
sOut.Clear();
|
||||
sOut.SetName(s.GetName());
|
||||
unsigned uPos = 0;
|
||||
for (unsigned uEdgeIndex = 0; uEdgeIndex < uEdgeCount; ++uEdgeIndex)
|
||||
{
|
||||
const PWEdge &Edge = Path.GetEdge(uEdgeIndex);
|
||||
char cType = Edge.cType;
|
||||
if (bRight)
|
||||
{
|
||||
if (cType == 'I')
|
||||
cType = 'D';
|
||||
else if (cType == 'D')
|
||||
cType = 'I';
|
||||
}
|
||||
switch (cType)
|
||||
{
|
||||
case 'M':
|
||||
sOut.AppendChar(s[uPos++]);
|
||||
break;
|
||||
case 'D':
|
||||
sOut.AppendChar('-');
|
||||
break;
|
||||
case 'I':
|
||||
sOut.AppendChar(s[uPos++]);
|
||||
break;
|
||||
default:
|
||||
Quit("PathSeq, invalid edge type %c", cType);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#if VALIDATE
|
||||
|
||||
static void MakeRootSeq(const Seq &s, const Tree &GuideTree, unsigned uLeafNodeIndex,
|
||||
const ProgNode Nodes[], Seq &sRoot)
|
||||
{
|
||||
sRoot.Copy(s);
|
||||
unsigned uNodeIndex = uLeafNodeIndex;
|
||||
for (;;)
|
||||
{
|
||||
unsigned uParent = GuideTree.GetParent(uNodeIndex);
|
||||
if (NULL_NEIGHBOR == uParent)
|
||||
break;
|
||||
bool bRight = (GuideTree.GetLeft(uParent) == uNodeIndex);
|
||||
uNodeIndex = uParent;
|
||||
const PWPath &Path = Nodes[uNodeIndex].m_Path;
|
||||
Seq sTmp;
|
||||
PathSeq(sRoot, Path, bRight, sTmp);
|
||||
sTmp.SetId(0);
|
||||
sRoot.Copy(sTmp);
|
||||
}
|
||||
}
|
||||
|
||||
#endif // VALIDATE
|
||||
|
||||
static short *MakeRootSeqE(const Seq &s, const Tree &GuideTree, unsigned uLeafNodeIndex,
|
||||
const ProgNode Nodes[], Seq &sRoot, short *Estring1, short *Estring2)
|
||||
{
|
||||
short *EstringCurr = Estring1;
|
||||
short *EstringNext = Estring2;
|
||||
|
||||
const unsigned uSeqLength = s.Length();
|
||||
EstringCurr[0] = uSeqLength;
|
||||
EstringCurr[1] = 0;
|
||||
|
||||
unsigned uNodeIndex = uLeafNodeIndex;
|
||||
for (;;)
|
||||
{
|
||||
unsigned uParent = GuideTree.GetParent(uNodeIndex);
|
||||
if (NULL_NEIGHBOR == uParent)
|
||||
break;
|
||||
bool bRight = (GuideTree.GetLeft(uParent) == uNodeIndex);
|
||||
uNodeIndex = uParent;
|
||||
const PWPath &Path = Nodes[uNodeIndex].m_Path;
|
||||
const short *EstringNode = bRight ?
|
||||
Nodes[uNodeIndex].m_EstringL : Nodes[uNodeIndex].m_EstringR;
|
||||
|
||||
MulEstrings(EstringCurr, EstringNode, EstringNext);
|
||||
#if TRACE
|
||||
Log("\n");
|
||||
Log("Curr=");
|
||||
LogEstring(EstringCurr);
|
||||
Log("\n");
|
||||
Log("Node=");
|
||||
LogEstring(EstringNode);
|
||||
Log("\n");
|
||||
Log("Prod=");
|
||||
LogEstring(EstringNext);
|
||||
Log("\n");
|
||||
#endif
|
||||
short *EstringTmp = EstringNext;
|
||||
EstringNext = EstringCurr;
|
||||
EstringCurr = EstringTmp;
|
||||
}
|
||||
EstringOp(EstringCurr, s, sRoot);
|
||||
|
||||
#if TRACE
|
||||
Log("Root estring=");
|
||||
LogEstring(EstringCurr);
|
||||
Log("\n");
|
||||
Log("Root seq=");
|
||||
sRoot.LogMe();
|
||||
#endif
|
||||
return EstringCurr;
|
||||
}
|
||||
|
||||
static unsigned GetFirstNodeIndex(const Tree &tree)
|
||||
{
|
||||
if (g_bStable)
|
||||
return 0;
|
||||
return tree.FirstDepthFirstNode();
|
||||
}
|
||||
|
||||
static unsigned GetNextNodeIndex(const Tree &tree, unsigned uPrevNodeIndex)
|
||||
{
|
||||
if (g_bStable)
|
||||
{
|
||||
const unsigned uNodeCount = tree.GetNodeCount();
|
||||
unsigned uNodeIndex = uPrevNodeIndex;
|
||||
for (;;)
|
||||
{
|
||||
++uNodeIndex;
|
||||
if (uNodeIndex >= uNodeCount)
|
||||
return NULL_NEIGHBOR;
|
||||
if (tree.IsLeaf(uNodeIndex))
|
||||
return uNodeIndex;
|
||||
}
|
||||
}
|
||||
unsigned uNodeIndex = uPrevNodeIndex;
|
||||
for (;;)
|
||||
{
|
||||
uNodeIndex = tree.NextDepthFirstNode(uNodeIndex);
|
||||
if (NULL_NEIGHBOR == uNodeIndex || tree.IsLeaf(uNodeIndex))
|
||||
return uNodeIndex;
|
||||
}
|
||||
}
|
||||
|
||||
void MakeRootMSA(const SeqVect &v, const Tree &GuideTree, ProgNode Nodes[],
|
||||
MSA &a)
|
||||
{
|
||||
#if TRACE
|
||||
Log("MakeRootMSA Tree=");
|
||||
GuideTree.LogMe();
|
||||
#endif
|
||||
const unsigned uSeqCount = v.GetSeqCount();
|
||||
unsigned uColCount = uInsane;
|
||||
unsigned uSeqIndex = 0;
|
||||
const unsigned uTreeNodeCount = GuideTree.GetNodeCount();
|
||||
const unsigned uRootNodeIndex = GuideTree.GetRootNodeIndex();
|
||||
const PWPath &RootPath = Nodes[uRootNodeIndex].m_Path;
|
||||
const unsigned uRootColCount = RootPath.GetEdgeCount();
|
||||
const unsigned uEstringSize = uRootColCount + 1;
|
||||
short *Estring1 = new short[uEstringSize];
|
||||
short *Estring2 = new short[uEstringSize];
|
||||
SetProgressDesc("Root alignment");
|
||||
|
||||
unsigned uTreeNodeIndex = GetFirstNodeIndex(GuideTree);
|
||||
do
|
||||
{
|
||||
Progress(uSeqIndex, uSeqCount);
|
||||
|
||||
unsigned uId = GuideTree.GetLeafId(uTreeNodeIndex);
|
||||
const Seq &s = *(v[uId]);
|
||||
|
||||
Seq sRootE;
|
||||
short *es = MakeRootSeqE(s, GuideTree, uTreeNodeIndex, Nodes, sRootE,
|
||||
Estring1, Estring2);
|
||||
Nodes[uTreeNodeIndex].m_EstringL = EstringNewCopy(es);
|
||||
|
||||
#if VALIDATE
|
||||
Seq sRoot;
|
||||
MakeRootSeq(s, GuideTree, uTreeNodeIndex, Nodes, sRoot);
|
||||
if (!sRoot.Eq(sRootE))
|
||||
{
|
||||
Log("sRoot=");
|
||||
sRoot.LogMe();
|
||||
Log("sRootE=");
|
||||
sRootE.LogMe();
|
||||
Quit("Root seqs differ");
|
||||
}
|
||||
#if TRACE
|
||||
Log("MakeRootSeq=\n");
|
||||
sRoot.LogMe();
|
||||
#endif
|
||||
#endif
|
||||
|
||||
if (uInsane == uColCount)
|
||||
{
|
||||
uColCount = sRootE.Length();
|
||||
a.SetSize(uSeqCount, uColCount);
|
||||
}
|
||||
else
|
||||
{
|
||||
assert(uColCount == sRootE.Length());
|
||||
}
|
||||
a.SetSeqName(uSeqIndex, s.GetName());
|
||||
a.SetSeqId(uSeqIndex, uId);
|
||||
for (unsigned uColIndex = 0; uColIndex < uColCount; ++uColIndex)
|
||||
a.SetChar(uSeqIndex, uColIndex, sRootE[uColIndex]);
|
||||
++uSeqIndex;
|
||||
|
||||
uTreeNodeIndex = GetNextNodeIndex(GuideTree, uTreeNodeIndex);
|
||||
}
|
||||
while (NULL_NEIGHBOR != uTreeNodeIndex);
|
||||
|
||||
delete[] Estring1;
|
||||
delete[] Estring2;
|
||||
|
||||
ProgressStepsDone();
|
||||
assert(uSeqIndex == uSeqCount);
|
||||
}
|
||||
62
src/muscle/muscle3.8.31/src/makerootmsab.cpp
Normal file
62
src/muscle/muscle3.8.31/src/makerootmsab.cpp
Normal file
@@ -0,0 +1,62 @@
|
||||
#include "muscle.h"
|
||||
#include "tree.h"
|
||||
#include "profile.h"
|
||||
#include "msa.h"
|
||||
#include "seqvect.h"
|
||||
#include "pwpath.h"
|
||||
|
||||
static void DoSeq(Seq &s, unsigned uSeqIndex, const ProfPos *RootProf,
|
||||
unsigned uRootProfLength, MSA &msaOut)
|
||||
{
|
||||
MSA msaSeq;
|
||||
msaSeq.FromSeq(s);
|
||||
const unsigned uSeqLength = s.Length();
|
||||
|
||||
MSA msaDummy;
|
||||
msaDummy.SetSize(1, uRootProfLength);
|
||||
msaDummy.SetSeqId(0, 0);
|
||||
msaDummy.SetSeqName(0, "Dummy0");
|
||||
for (unsigned uColIndex = 0; uColIndex < uRootProfLength; ++uColIndex)
|
||||
msaDummy.SetChar(0, uColIndex, '?');
|
||||
|
||||
ProfPos *SeqProf = ProfileFromMSA(msaSeq);
|
||||
for (unsigned uColIndex = 0; uColIndex < uSeqLength; ++uColIndex)
|
||||
{
|
||||
ProfPos &PP = SeqProf[uColIndex];
|
||||
PP.m_scoreGapOpen = MINUS_INFINITY;
|
||||
PP.m_scoreGapClose = MINUS_INFINITY;
|
||||
}
|
||||
|
||||
ProfPos *ProfOut;
|
||||
unsigned uLengthOut;
|
||||
PWPath Path;
|
||||
AlignTwoProfs(SeqProf, uSeqLength, 1.0, RootProf, uRootProfLength, 1.0,
|
||||
Path, &ProfOut, &uLengthOut);
|
||||
assert(uLengthOut = uRootProfLength);
|
||||
delete[] ProfOut;
|
||||
|
||||
MSA msaCombined;
|
||||
AlignTwoMSAsGivenPath(Path, msaSeq, msaDummy, msaCombined);
|
||||
|
||||
msaCombined.LogMe();
|
||||
msaOut.SetSeqName(uSeqIndex, s.GetName());
|
||||
msaOut.SetSeqId(uSeqIndex, s.GetId());
|
||||
for (unsigned uColIndex = 0; uColIndex < uRootProfLength; ++uColIndex)
|
||||
msaOut.SetChar(uSeqIndex, uColIndex, msaCombined.GetChar(0, uColIndex));
|
||||
}
|
||||
|
||||
// Steven Brenner's O(NL^2) proposal for creating a root alignment
|
||||
// Align each sequence to the profile at the root.
|
||||
// Compare the e-string solution, which is O(NL log N).
|
||||
void MakeRootMSABrenner(SeqVect &v, const Tree &GuideTree, ProgNode Nodes[],
|
||||
MSA &a)
|
||||
{
|
||||
const unsigned uSeqCount = v.Length();
|
||||
const unsigned uRootNodeIndex = GuideTree.GetRootNodeIndex();
|
||||
const ProfPos *RootProfile = Nodes[uRootNodeIndex].m_Prof;
|
||||
const unsigned uRootColCount = Nodes[uRootNodeIndex].m_uLength;
|
||||
a.SetSize(uSeqCount, uRootColCount);
|
||||
|
||||
for (unsigned uSeqIndex = 0; uSeqIndex < uSeqCount; ++uSeqIndex)
|
||||
DoSeq(*v[uSeqIndex], uSeqIndex, RootProfile, uRootColCount, a);
|
||||
}
|
||||
38
src/muscle/muscle3.8.31/src/maketree.cpp
Normal file
38
src/muscle/muscle3.8.31/src/maketree.cpp
Normal file
@@ -0,0 +1,38 @@
|
||||
#include "muscle.h"
|
||||
#include "msa.h"
|
||||
#include "textfile.h"
|
||||
#include "tree.h"
|
||||
|
||||
void DoMakeTree()
|
||||
{
|
||||
if (g_pstrInFileName == 0 || g_pstrOutFileName == 0)
|
||||
Quit("-maketree requires -in <msa> and -out <treefile>");
|
||||
|
||||
SetStartTime();
|
||||
|
||||
SetSeqWeightMethod(g_SeqWeight1);
|
||||
|
||||
TextFile MSAFile(g_pstrInFileName);
|
||||
|
||||
MSA msa;
|
||||
msa.FromFile(MSAFile);
|
||||
|
||||
unsigned uSeqCount = msa.GetSeqCount();
|
||||
MSA::SetIdCount(uSeqCount);
|
||||
|
||||
// Initialize sequence ids.
|
||||
// From this point on, ids must somehow propogate from here.
|
||||
for (unsigned uSeqIndex = 0; uSeqIndex < uSeqCount; ++uSeqIndex)
|
||||
msa.SetSeqId(uSeqIndex, uSeqIndex);
|
||||
SetMuscleInputMSA(msa);
|
||||
|
||||
Progress("%u sequences", uSeqCount);
|
||||
|
||||
Tree tree;
|
||||
TreeFromMSA(msa, tree, g_Cluster2, g_Distance2, g_Root2);
|
||||
|
||||
TextFile TreeFile(g_pstrOutFileName, true);
|
||||
tree.ToFile(TreeFile);
|
||||
|
||||
Progress("Tree created");
|
||||
}
|
||||
64
src/muscle/muscle3.8.31/src/mhack.cpp
Normal file
64
src/muscle/muscle3.8.31/src/mhack.cpp
Normal file
@@ -0,0 +1,64 @@
|
||||
#include "muscle.h"
|
||||
#include "seqvect.h"
|
||||
#include "msa.h"
|
||||
|
||||
/***
|
||||
Methionine hack.
|
||||
Most proteins start with M.
|
||||
This results in odd-looking alignments with the terminal Ms aligned followed
|
||||
immediately by gaps.
|
||||
Hack this by treating terminal M like X.
|
||||
***/
|
||||
|
||||
static bool *M;
|
||||
|
||||
void MHackStart(SeqVect &v)
|
||||
{
|
||||
if (ALPHA_Amino != g_Alpha)
|
||||
return;
|
||||
|
||||
const unsigned uSeqCount = v.Length();
|
||||
M = new bool[uSeqCount];
|
||||
memset(M, 0, uSeqCount*sizeof(bool));
|
||||
for (unsigned uSeqIndex = 0; uSeqIndex < uSeqCount; ++uSeqIndex)
|
||||
{
|
||||
Seq &s = v.GetSeq(uSeqIndex);
|
||||
if (0 == s.Length())
|
||||
continue;
|
||||
unsigned uId = s.GetId();
|
||||
if (s[0] == 'M' || s[0] == 'm')
|
||||
{
|
||||
M[uId] = true;
|
||||
s[0] = 'X';
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void MHackEnd(MSA &msa)
|
||||
{
|
||||
if (ALPHA_Amino != g_Alpha)
|
||||
return;
|
||||
if (0 == M)
|
||||
return;
|
||||
|
||||
const unsigned uSeqCount = msa.GetSeqCount();
|
||||
const unsigned uColCount = msa.GetColCount();
|
||||
for (unsigned uSeqIndex = 0; uSeqIndex < uSeqCount; ++uSeqIndex)
|
||||
{
|
||||
unsigned uId = msa.GetSeqId(uSeqIndex);
|
||||
if (M[uId])
|
||||
{
|
||||
for (unsigned uColIndex = 0; uColIndex < uColCount; ++uColIndex)
|
||||
{
|
||||
if (!msa.IsGap(uSeqIndex, uColIndex))
|
||||
{
|
||||
msa.SetChar(uSeqIndex, uColIndex, 'M');
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
delete[] M;
|
||||
M = 0;
|
||||
}
|
||||
21
src/muscle/muscle3.8.31/src/mk
Executable file
21
src/muscle/muscle3.8.31/src/mk
Executable file
@@ -0,0 +1,21 @@
|
||||
#!/bin/bash
|
||||
CPPNames='aligngivenpath aligngivenpathsw aligntwomsas aligntwoprofs aln alpha anchors bittraceback blosum62 blosumla clust cluster clwwt color cons diaglist diffobjscore diffpaths difftrees difftreese distcalc distfunc distpwkimura domuscle dosp dpreglist drawtree edgelist enumopts enumtostr estring fasta fasta2 fastclust fastdist fastdistjones fastdistkbit fastdistkmer fastdistmafft fastdistnuc fastscorepath2 finddiags finddiagsn glbalign glbalign352 glbaligndiag glbalignle glbalignsimple glbalignsp glbalignspn glbalignss glbalndimer globals globalslinux globalsosx globalsother globalswin32 gonnet henikoffweight henikoffweightpb html hydro intmath local main makerootmsa makerootmsab maketree mhack mpam200 msa msa2 msadistkimura msf muscle muscleout nucmx nwdasimple nwdasimple2 nwdasmall nwrec nwsmall objscore objscore2 objscoreda onexception options outweights pam200mafft params phy phy2 phy3 phy4 phyfromclust phyfromfile physeq phytofile posgap ppscore profdb profile profilefrommsa progalign progress progressivealign pwpath readmx realigndiffs realigndiffse refine refinehoriz refinesubfams refinetree refinetreee refinevert refinew savebest scoredist scoregaps scorehistory scorepp seq seqvect setblosumweights setgscweights setnewhandler spfast sptest stabilize subfam subfams sw termgaps textfile threewaywt tomhydro traceback tracebackopt tracebacksw treefrommsa typetostr upgma2 usage validateids vtml2 writescorefile'
|
||||
ObjNames='aligngivenpath.o aligngivenpathsw.o aligntwomsas.o aligntwoprofs.o aln.o alpha.o anchors.o bittraceback.o blosum62.o blosumla.o clust.o cluster.o clwwt.o color.o cons.o diaglist.o diffobjscore.o diffpaths.o difftrees.o difftreese.o distcalc.o distfunc.o distpwkimura.o domuscle.o dosp.o dpreglist.o drawtree.o edgelist.o enumopts.o enumtostr.o estring.o fasta.o fasta2.o fastclust.o fastdist.o fastdistjones.o fastdistkbit.o fastdistkmer.o fastdistmafft.o fastdistnuc.o fastscorepath2.o finddiags.o finddiagsn.o glbalign.o glbalign352.o glbaligndiag.o glbalignle.o glbalignsimple.o glbalignsp.o glbalignspn.o glbalignss.o glbalndimer.o globals.o globalslinux.o globalsosx.o globalsother.o globalswin32.o gonnet.o henikoffweight.o henikoffweightpb.o html.o hydro.o intmath.o local.o main.o makerootmsa.o makerootmsab.o maketree.o mhack.o mpam200.o msa.o msa2.o msadistkimura.o msf.o muscle.o muscleout.o nucmx.o nwdasimple.o nwdasimple2.o nwdasmall.o nwrec.o nwsmall.o objscore.o objscore2.o objscoreda.o onexception.o options.o outweights.o pam200mafft.o params.o phy.o phy2.o phy3.o phy4.o phyfromclust.o phyfromfile.o physeq.o phytofile.o posgap.o ppscore.o profdb.o profile.o profilefrommsa.o progalign.o progress.o progressivealign.o pwpath.o readmx.o realigndiffs.o realigndiffse.o refine.o refinehoriz.o refinesubfams.o refinetree.o refinetreee.o refinevert.o refinew.o savebest.o scoredist.o scoregaps.o scorehistory.o scorepp.o seq.o seqvect.o setblosumweights.o setgscweights.o setnewhandler.o spfast.o sptest.o stabilize.o subfam.o subfams.o sw.o termgaps.o textfile.o threewaywt.o tomhydro.o traceback.o tracebackopt.o tracebacksw.o treefrommsa.o typetostr.o upgma2.o usage.o validateids.o vtml2.o writescorefile.o'
|
||||
|
||||
rm -f *.o muscle.make.stdout.txt muscle.make.stderr.txt
|
||||
for CPPName in $CPPNames
|
||||
do
|
||||
echo $CPPName >> /dev/tty
|
||||
$CXX $ENV_GCC_OPTS -c -O3 -msse2 -mfpmath=sse -D_FILE_OFFSET_BITS=64 -DNDEBUG=1 $CPPName.cpp -o $CPPName.o >> muscle.make.stdout.txt 2>> muscle.make.stderr.txt
|
||||
done
|
||||
|
||||
LINK_OPTS=
|
||||
if [ `uname -s` == Linux ] ; then
|
||||
LINK_OPTS=-static
|
||||
fi
|
||||
$CXX $LINK_OPTS $ENV_LINK_OPTS -g -o muscle $ObjNames >> muscle.make.stdout.txt 2>> muscle.make.stderr.txt
|
||||
tail muscle.make.stderr.txt
|
||||
|
||||
strip muscle
|
||||
ls -lh muscle
|
||||
sum muscle
|
||||
107
src/muscle/muscle3.8.31/src/mpam200.cpp
Normal file
107
src/muscle/muscle3.8.31/src/mpam200.cpp
Normal file
@@ -0,0 +1,107 @@
|
||||
#include "muscle.h"
|
||||
|
||||
const float PAM_200_CENTER = (float) 20.0;
|
||||
|
||||
#define v(x) ((float) x + PAM_200_CENTER)
|
||||
#define ROW(A, C, D, E, F, G, H, I, K, L, M, N, P, Q, R, S, T, V, W, Y) \
|
||||
{ v(A), v(C), v(D), v(E), v(F), v(G), v(H), v(I), v(K), v(L), \
|
||||
v(M), v(N), v(P), v(Q), v(R), v(S), v(T), v(V), v(W), v(Y) },
|
||||
|
||||
float PAM200[32][32] =
|
||||
{
|
||||
// A C D E F G H I K L
|
||||
// M N P Q R S T V W Y
|
||||
ROW( 388, -0, 34, 32, -202, 159, -88, 89, -55, -67,
|
||||
19, 86, 186, -34, -32, 237, 273, 171, -326, -239) // A
|
||||
ROW( -0, 1170, -248, -315, 74, -14, 43, -151, -204, -196,
|
||||
-132, -49, -142, -215, 29, 165, -7, -69, 179, 313) // C
|
||||
ROW( 34, -248, 625, 496, -419, 148, 78, -245, 55, -361,
|
||||
-255, 332, -169, 122, -64, 45, -13, -167, -438, -148) // D
|
||||
ROW( 32, -315, 496, 610, -480, 125, 25, -245, 175, -327,
|
||||
-242, 166, -141, 279, 34, -30, -56, -150, -386, -305) // E
|
||||
ROW( -202, 74, -419, -480, 888, -407, 62, 80, -443, 320,
|
||||
67, -236, -180, -294, -327, -51, -173, 31, -1, 584) // F
|
||||
ROW( 159, -14, 148, 125, -407, 662, -114, -216, -34, -324,
|
||||
-246, 79, -77, -68, 97, 155, 21, -93, -58, -349) // G
|
||||
ROW( -88, 43, 78, 25, 62, -114, 766, -205, 144, -92,
|
||||
-152, 238, 66, 368, 257, 35, -35, -217, -201, 468) // H
|
||||
ROW( 89, -151, -245, -245, 80, -216, -205, 554, -224, 288,
|
||||
391, -114, -115, -222, -208, -19, 162, 469, -274, -153) // I
|
||||
ROW( -55, -204, 55, 175, -443, -34, 144, -224, 632, -249,
|
||||
-118, 186, -86, 315, 466, 2, 19, -227, -216, -264) // K
|
||||
ROW( -67, -196, -361, -327, 320, -324, -92, 288, -249, 591,
|
||||
369, -223, 53, -86, -170, -69, -41, 239, -66, -29) // L
|
||||
ROW( 19, -132, -255, -242, 67, -246, -152, 391, -118, 369,
|
||||
756, -131, -98, -124, -129, -49, 129, 331, -229, -182) // M
|
||||
ROW( 86, -49, 332, 166, -236, 79, 238, -114, 186, -223,
|
||||
-131, 516, -21, 88, 73, 240, 168, -118, -379, -8) // N
|
||||
ROW( 186, -142, -169, -141, -180, -77, 66, -115, -86, 53,
|
||||
-98, -21, 736, 122, 5, 221, 139, -75, -373, -226) // P
|
||||
ROW( -34, -215, 122, 279, -294, -68, 368, -222, 315, -86,
|
||||
-124, 88, 122, 635, 301, -13, -35, -195, -243, -73) // Q
|
||||
ROW( -32, 29, -64, 34, -327, 97, 257, -208, 466, -170,
|
||||
-129, 73, 5, 301, 606, 28, -4, -201, 104, -133) // R
|
||||
ROW( 237, 165, 45, -30, -51, 155, 35, -19, 2, -69,
|
||||
-49, 240, 221, -13, 28, 353, 259, 8, -213, -55) // S
|
||||
ROW( 273, -7, -13, -56, -173, 21, -35, 162, 19, -41,
|
||||
129, 168, 139, -35, -4, 259, 422, 143, -343, -190) // T
|
||||
ROW( 171, -69, -167, -150, 31, -93, -217, 469, -227, 239,
|
||||
331, -118, -75, -195, -201, 8, 143, 505, -245, -197) // V
|
||||
ROW( -326, 179, -438, -386, -1, -58, -201, -274, -216, -66,
|
||||
-229, -379, -373, -243, 104, -213, -343, -245, 1475, 63) // W
|
||||
ROW( -239, 313, -148, -305, 584, -349, 468, -153, -264, -29,
|
||||
-182, -8, -226, -73, -133, -55, -190, -197, 63, 979) // Y
|
||||
};
|
||||
|
||||
#undef v
|
||||
#define v(x) ((float) x)
|
||||
#define RNC(A, C, D, E, F, G, H, I, K, L, M, N, P, Q, R, S, T, V, W, Y) \
|
||||
{ v(A), v(C), v(D), v(E), v(F), v(G), v(H), v(I), v(K), v(L), \
|
||||
v(M), v(N), v(P), v(Q), v(R), v(S), v(T), v(V), v(W), v(Y) },
|
||||
|
||||
float PAM200NoCenter[32][32] =
|
||||
|
||||
{
|
||||
// A C D E F G H I K L
|
||||
// M N P Q R S T V W Y
|
||||
RNC( 388, -0, 34, 32, -202, 159, -88, 89, -55, -67,
|
||||
19, 86, 186, -34, -32, 237, 273, 171, -326, -239) // A
|
||||
RNC( -0, 1170, -248, -315, 74, -14, 43, -151, -204, -196,
|
||||
-132, -49, -142, -215, 29, 165, -7, -69, 179, 313) // C
|
||||
RNC( 34, -248, 625, 496, -419, 148, 78, -245, 55, -361,
|
||||
-255, 332, -169, 122, -64, 45, -13, -167, -438, -148) // D
|
||||
RNC( 32, -315, 496, 610, -480, 125, 25, -245, 175, -327,
|
||||
-242, 166, -141, 279, 34, -30, -56, -150, -386, -305) // E
|
||||
RNC( -202, 74, -419, -480, 888, -407, 62, 80, -443, 320,
|
||||
67, -236, -180, -294, -327, -51, -173, 31, -1, 584) // F
|
||||
RNC( 159, -14, 148, 125, -407, 662, -114, -216, -34, -324,
|
||||
-246, 79, -77, -68, 97, 155, 21, -93, -58, -349) // G
|
||||
RNC( -88, 43, 78, 25, 62, -114, 766, -205, 144, -92,
|
||||
-152, 238, 66, 368, 257, 35, -35, -217, -201, 468) // H
|
||||
RNC( 89, -151, -245, -245, 80, -216, -205, 554, -224, 288,
|
||||
391, -114, -115, -222, -208, -19, 162, 469, -274, -153) // I
|
||||
RNC( -55, -204, 55, 175, -443, -34, 144, -224, 632, -249,
|
||||
-118, 186, -86, 315, 466, 2, 19, -227, -216, -264) // K
|
||||
RNC( -67, -196, -361, -327, 320, -324, -92, 288, -249, 591,
|
||||
369, -223, 53, -86, -170, -69, -41, 239, -66, -29) // L
|
||||
RNC( 19, -132, -255, -242, 67, -246, -152, 391, -118, 369,
|
||||
756, -131, -98, -124, -129, -49, 129, 331, -229, -182) // M
|
||||
RNC( 86, -49, 332, 166, -236, 79, 238, -114, 186, -223,
|
||||
-131, 516, -21, 88, 73, 240, 168, -118, -379, -8) // N
|
||||
RNC( 186, -142, -169, -141, -180, -77, 66, -115, -86, 53,
|
||||
-98, -21, 736, 122, 5, 221, 139, -75, -373, -226) // P
|
||||
RNC( -34, -215, 122, 279, -294, -68, 368, -222, 315, -86,
|
||||
-124, 88, 122, 635, 301, -13, -35, -195, -243, -73) // Q
|
||||
RNC( -32, 29, -64, 34, -327, 97, 257, -208, 466, -170,
|
||||
-129, 73, 5, 301, 606, 28, -4, -201, 104, -133) // R
|
||||
RNC( 237, 165, 45, -30, -51, 155, 35, -19, 2, -69,
|
||||
-49, 240, 221, -13, 28, 353, 259, 8, -213, -55) // S
|
||||
RNC( 273, -7, -13, -56, -173, 21, -35, 162, 19, -41,
|
||||
129, 168, 139, -35, -4, 259, 422, 143, -343, -190) // T
|
||||
RNC( 171, -69, -167, -150, 31, -93, -217, 469, -227, 239,
|
||||
331, -118, -75, -195, -201, 8, 143, 505, -245, -197) // V
|
||||
RNC( -326, 179, -438, -386, -1, -58, -201, -274, -216, -66,
|
||||
-229, -379, -373, -243, 104, -213, -343, -245, 1475, 63) // W
|
||||
RNC( -239, 313, -148, -305, 584, -349, 468, -153, -264, -29,
|
||||
-182, -8, -226, -73, -133, -55, -190, -197, 63, 979) // Y
|
||||
};
|
||||
851
src/muscle/muscle3.8.31/src/msa.cpp
Normal file
851
src/muscle/muscle3.8.31/src/msa.cpp
Normal file
@@ -0,0 +1,851 @@
|
||||
#include "muscle.h"
|
||||
#include "msa.h"
|
||||
#include "textfile.h"
|
||||
#include "seq.h"
|
||||
#include <math.h>
|
||||
|
||||
const unsigned DEFAULT_SEQ_LENGTH = 500;
|
||||
|
||||
unsigned MSA::m_uIdCount = 0;
|
||||
|
||||
MSA::MSA()
|
||||
{
|
||||
m_uSeqCount = 0;
|
||||
m_uColCount = 0;
|
||||
|
||||
m_szSeqs = 0;
|
||||
m_szNames = 0;
|
||||
m_Weights = 0;
|
||||
|
||||
m_IdToSeqIndex = 0;
|
||||
m_SeqIndexToId = 0;
|
||||
|
||||
m_uCacheSeqCount = 0;
|
||||
m_uCacheSeqLength = 0;
|
||||
}
|
||||
|
||||
MSA::~MSA()
|
||||
{
|
||||
Free();
|
||||
}
|
||||
|
||||
void MSA::Free()
|
||||
{
|
||||
for (unsigned n = 0; n < m_uSeqCount; ++n)
|
||||
{
|
||||
delete[] m_szSeqs[n];
|
||||
delete[] m_szNames[n];
|
||||
}
|
||||
|
||||
delete[] m_szSeqs;
|
||||
delete[] m_szNames;
|
||||
delete[] m_Weights;
|
||||
delete[] m_IdToSeqIndex;
|
||||
delete[] m_SeqIndexToId;
|
||||
|
||||
m_uSeqCount = 0;
|
||||
m_uColCount = 0;
|
||||
|
||||
m_szSeqs = 0;
|
||||
m_szNames = 0;
|
||||
m_Weights = 0;
|
||||
|
||||
m_IdToSeqIndex = 0;
|
||||
m_SeqIndexToId = 0;
|
||||
}
|
||||
|
||||
void MSA::SetSize(unsigned uSeqCount, unsigned uColCount)
|
||||
{
|
||||
Free();
|
||||
|
||||
m_uSeqCount = uSeqCount;
|
||||
m_uCacheSeqLength = uColCount;
|
||||
m_uColCount = 0;
|
||||
|
||||
if (0 == uSeqCount && 0 == uColCount)
|
||||
return;
|
||||
|
||||
m_szSeqs = new char *[uSeqCount];
|
||||
m_szNames = new char *[uSeqCount];
|
||||
m_Weights = new WEIGHT[uSeqCount];
|
||||
|
||||
for (unsigned uSeqIndex = 0; uSeqIndex < uSeqCount; ++uSeqIndex)
|
||||
{
|
||||
m_szSeqs[uSeqIndex] = new char[uColCount+1];
|
||||
m_szNames[uSeqIndex] = 0;
|
||||
#if DEBUG
|
||||
m_Weights[uSeqIndex] = BTInsane;
|
||||
memset(m_szSeqs[uSeqIndex], '?', uColCount);
|
||||
#endif
|
||||
m_szSeqs[uSeqIndex][uColCount] = 0;
|
||||
}
|
||||
|
||||
if (m_uIdCount > 0)
|
||||
{
|
||||
m_IdToSeqIndex = new unsigned[m_uIdCount];
|
||||
m_SeqIndexToId = new unsigned[m_uSeqCount];
|
||||
#if DEBUG
|
||||
memset(m_IdToSeqIndex, 0xff, m_uIdCount*sizeof(unsigned));
|
||||
memset(m_SeqIndexToId, 0xff, m_uSeqCount*sizeof(unsigned));
|
||||
#endif
|
||||
}
|
||||
}
|
||||
|
||||
void MSA::LogMe() const
|
||||
{
|
||||
if (0 == GetColCount())
|
||||
{
|
||||
Log("MSA empty\n");
|
||||
return;
|
||||
}
|
||||
|
||||
const unsigned uColsPerLine = 50;
|
||||
unsigned uLinesPerSeq = (GetColCount() - 1)/uColsPerLine + 1;
|
||||
for (unsigned n = 0; n < uLinesPerSeq; ++n)
|
||||
{
|
||||
unsigned i;
|
||||
unsigned iStart = n*uColsPerLine;
|
||||
unsigned iEnd = GetColCount();
|
||||
if (iEnd - iStart + 1 > uColsPerLine)
|
||||
iEnd = iStart + uColsPerLine;
|
||||
Log(" ");
|
||||
for (i = iStart; i < iEnd; ++i)
|
||||
Log("%u", i%10);
|
||||
Log("\n");
|
||||
Log(" ");
|
||||
for (i = iStart; i + 9 < iEnd; i += 10)
|
||||
Log("%-10u", i);
|
||||
if (n == uLinesPerSeq - 1)
|
||||
Log(" %-10u", GetColCount());
|
||||
Log("\n");
|
||||
for (unsigned uSeqIndex = 0; uSeqIndex < m_uSeqCount; ++uSeqIndex)
|
||||
{
|
||||
Log("%12.12s", m_szNames[uSeqIndex]);
|
||||
if (m_Weights[uSeqIndex] != BTInsane)
|
||||
Log(" (%5.3f)", m_Weights[uSeqIndex]);
|
||||
else
|
||||
Log(" ");
|
||||
Log(" ");
|
||||
for (i = iStart; i < iEnd; ++i)
|
||||
Log("%c", GetChar(uSeqIndex, i));
|
||||
if (0 != m_SeqIndexToId)
|
||||
Log(" [%5u]", m_SeqIndexToId[uSeqIndex]);
|
||||
Log("\n");
|
||||
}
|
||||
Log("\n\n");
|
||||
}
|
||||
}
|
||||
|
||||
char MSA::GetChar(unsigned uSeqIndex, unsigned uIndex) const
|
||||
{
|
||||
// TODO: Performance cost?
|
||||
if (uSeqIndex >= m_uSeqCount || uIndex >= m_uColCount)
|
||||
Quit("MSA::GetChar(%u/%u,%u/%u)",
|
||||
uSeqIndex, m_uSeqCount, uIndex, m_uColCount);
|
||||
|
||||
char c = m_szSeqs[uSeqIndex][uIndex];
|
||||
// assert(IsLegalChar(c));
|
||||
return c;
|
||||
}
|
||||
|
||||
unsigned MSA::GetLetter(unsigned uSeqIndex, unsigned uIndex) const
|
||||
{
|
||||
// TODO: Performance cost?
|
||||
char c = GetChar(uSeqIndex, uIndex);
|
||||
unsigned uLetter = CharToLetter(c);
|
||||
if (uLetter >= 20)
|
||||
{
|
||||
char c = ' ';
|
||||
if (uSeqIndex < m_uSeqCount && uIndex < m_uColCount)
|
||||
c = m_szSeqs[uSeqIndex][uIndex];
|
||||
Quit("MSA::GetLetter(%u/%u, %u/%u)='%c'/%u",
|
||||
uSeqIndex, m_uSeqCount, uIndex, m_uColCount, c, uLetter);
|
||||
}
|
||||
return uLetter;
|
||||
}
|
||||
|
||||
unsigned MSA::GetLetterEx(unsigned uSeqIndex, unsigned uIndex) const
|
||||
{
|
||||
// TODO: Performance cost?
|
||||
char c = GetChar(uSeqIndex, uIndex);
|
||||
unsigned uLetter = CharToLetterEx(c);
|
||||
return uLetter;
|
||||
}
|
||||
|
||||
void MSA::SetSeqName(unsigned uSeqIndex, const char szName[])
|
||||
{
|
||||
if (uSeqIndex >= m_uSeqCount)
|
||||
Quit("MSA::SetSeqName(%u, %s), count=%u", uSeqIndex, m_uSeqCount);
|
||||
delete[] m_szNames[uSeqIndex];
|
||||
int n = (int) strlen(szName) + 1;
|
||||
m_szNames[uSeqIndex] = new char[n];
|
||||
memcpy(m_szNames[uSeqIndex], szName, n);
|
||||
}
|
||||
|
||||
const char *MSA::GetSeqName(unsigned uSeqIndex) const
|
||||
{
|
||||
if (uSeqIndex >= m_uSeqCount)
|
||||
Quit("MSA::GetSeqName(%u), count=%u", uSeqIndex, m_uSeqCount);
|
||||
return m_szNames[uSeqIndex];
|
||||
}
|
||||
|
||||
bool MSA::IsGap(unsigned uSeqIndex, unsigned uIndex) const
|
||||
{
|
||||
char c = GetChar(uSeqIndex, uIndex);
|
||||
return IsGapChar(c);
|
||||
}
|
||||
|
||||
bool MSA::IsWildcard(unsigned uSeqIndex, unsigned uIndex) const
|
||||
{
|
||||
char c = GetChar(uSeqIndex, uIndex);
|
||||
return IsWildcardChar(c);
|
||||
}
|
||||
|
||||
void MSA::SetChar(unsigned uSeqIndex, unsigned uIndex, char c)
|
||||
{
|
||||
if (uSeqIndex >= m_uSeqCount || uIndex > m_uCacheSeqLength)
|
||||
Quit("MSA::SetChar(%u,%u)", uSeqIndex, uIndex);
|
||||
|
||||
if (uIndex == m_uCacheSeqLength)
|
||||
{
|
||||
const unsigned uNewCacheSeqLength = m_uCacheSeqLength + DEFAULT_SEQ_LENGTH;
|
||||
for (unsigned n = 0; n < m_uSeqCount; ++n)
|
||||
{
|
||||
char *ptrNewSeq = new char[uNewCacheSeqLength+1];
|
||||
memcpy(ptrNewSeq, m_szSeqs[n], m_uCacheSeqLength);
|
||||
memset(ptrNewSeq + m_uCacheSeqLength, '?', DEFAULT_SEQ_LENGTH);
|
||||
ptrNewSeq[uNewCacheSeqLength] = 0;
|
||||
delete[] m_szSeqs[n];
|
||||
m_szSeqs[n] = ptrNewSeq;
|
||||
}
|
||||
|
||||
m_uColCount = uIndex;
|
||||
m_uCacheSeqLength = uNewCacheSeqLength;
|
||||
}
|
||||
|
||||
if (uIndex >= m_uColCount)
|
||||
m_uColCount = uIndex + 1;
|
||||
m_szSeqs[uSeqIndex][uIndex] = c;
|
||||
}
|
||||
|
||||
void MSA::GetSeq(unsigned uSeqIndex, Seq &seq) const
|
||||
{
|
||||
assert(uSeqIndex < m_uSeqCount);
|
||||
|
||||
seq.Clear();
|
||||
|
||||
for (unsigned n = 0; n < m_uColCount; ++n)
|
||||
if (!IsGap(uSeqIndex, n))
|
||||
{
|
||||
char c = GetChar(uSeqIndex, n);
|
||||
if (!isalpha(c))
|
||||
Quit("Invalid character '%c' in sequence", c);
|
||||
c = toupper(c);
|
||||
seq.push_back(c);
|
||||
}
|
||||
const char *ptrName = GetSeqName(uSeqIndex);
|
||||
seq.SetName(ptrName);
|
||||
}
|
||||
|
||||
bool MSA::HasGap() const
|
||||
{
|
||||
for (unsigned uSeqIndex = 0; uSeqIndex < GetSeqCount(); ++uSeqIndex)
|
||||
for (unsigned n = 0; n < GetColCount(); ++n)
|
||||
if (IsGap(uSeqIndex, n))
|
||||
return true;
|
||||
return false;
|
||||
}
|
||||
|
||||
bool MSA::IsLegalLetter(unsigned uLetter) const
|
||||
{
|
||||
return uLetter < 20;
|
||||
}
|
||||
|
||||
void MSA::SetSeqCount(unsigned uSeqCount)
|
||||
{
|
||||
Free();
|
||||
SetSize(uSeqCount, DEFAULT_SEQ_LENGTH);
|
||||
}
|
||||
|
||||
void MSA::CopyCol(unsigned uFromCol, unsigned uToCol)
|
||||
{
|
||||
assert(uFromCol < GetColCount());
|
||||
assert(uToCol < GetColCount());
|
||||
if (uFromCol == uToCol)
|
||||
return;
|
||||
|
||||
for (unsigned uSeqIndex = 0; uSeqIndex < GetSeqCount(); ++uSeqIndex)
|
||||
{
|
||||
const char c = GetChar(uSeqIndex, uFromCol);
|
||||
SetChar(uSeqIndex, uToCol, c);
|
||||
}
|
||||
}
|
||||
|
||||
void MSA::Copy(const MSA &msa)
|
||||
{
|
||||
Free();
|
||||
const unsigned uSeqCount = msa.GetSeqCount();
|
||||
const unsigned uColCount = msa.GetColCount();
|
||||
SetSize(uSeqCount, uColCount);
|
||||
|
||||
for (unsigned uSeqIndex = 0; uSeqIndex < uSeqCount; ++uSeqIndex)
|
||||
{
|
||||
SetSeqName(uSeqIndex, msa.GetSeqName(uSeqIndex));
|
||||
const unsigned uId = msa.GetSeqId(uSeqIndex);
|
||||
SetSeqId(uSeqIndex, uId);
|
||||
for (unsigned uColIndex = 0; uColIndex < uColCount; ++uColIndex)
|
||||
{
|
||||
const char c = msa.GetChar(uSeqIndex, uColIndex);
|
||||
SetChar(uSeqIndex, uColIndex, c);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
bool MSA::IsGapColumn(unsigned uColIndex) const
|
||||
{
|
||||
assert(GetSeqCount() > 0);
|
||||
for (unsigned uSeqIndex = 0; uSeqIndex < GetSeqCount(); ++uSeqIndex)
|
||||
if (!IsGap(uSeqIndex, uColIndex))
|
||||
return false;
|
||||
return true;
|
||||
}
|
||||
|
||||
bool MSA::GetSeqIndex(const char *ptrSeqName, unsigned *ptruSeqIndex) const
|
||||
{
|
||||
for (unsigned uSeqIndex = 0; uSeqIndex < GetSeqCount(); ++uSeqIndex)
|
||||
if (0 == stricmp(ptrSeqName, GetSeqName(uSeqIndex)))
|
||||
{
|
||||
*ptruSeqIndex = uSeqIndex;
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
void MSA::DeleteCol(unsigned uColIndex)
|
||||
{
|
||||
assert(uColIndex < m_uColCount);
|
||||
size_t n = m_uColCount - uColIndex;
|
||||
if (n > 0)
|
||||
{
|
||||
for (unsigned uSeqIndex = 0; uSeqIndex < GetSeqCount(); ++uSeqIndex)
|
||||
{
|
||||
char *ptrSeq = m_szSeqs[uSeqIndex];
|
||||
memmove(ptrSeq + uColIndex, ptrSeq + uColIndex + 1, n);
|
||||
}
|
||||
}
|
||||
--m_uColCount;
|
||||
}
|
||||
|
||||
void MSA::DeleteColumns(unsigned uColIndex, unsigned uColCount)
|
||||
{
|
||||
for (unsigned n = 0; n < uColCount; ++n)
|
||||
DeleteCol(uColIndex);
|
||||
}
|
||||
|
||||
void MSA::FromFile(TextFile &File)
|
||||
{
|
||||
FromFASTAFile(File);
|
||||
}
|
||||
|
||||
// Weights sum to 1, WCounts sum to NIC
|
||||
WEIGHT MSA::GetSeqWeight(unsigned uSeqIndex) const
|
||||
{
|
||||
assert(uSeqIndex < m_uSeqCount);
|
||||
WEIGHT w = m_Weights[uSeqIndex];
|
||||
if (w == wInsane)
|
||||
Quit("Seq weight not set");
|
||||
return w;
|
||||
}
|
||||
|
||||
void MSA::SetSeqWeight(unsigned uSeqIndex, WEIGHT w) const
|
||||
{
|
||||
assert(uSeqIndex < m_uSeqCount);
|
||||
m_Weights[uSeqIndex] = w;
|
||||
}
|
||||
|
||||
void MSA::NormalizeWeights(WEIGHT wDesiredTotal) const
|
||||
{
|
||||
WEIGHT wTotal = 0;
|
||||
for (unsigned uSeqIndex = 0; uSeqIndex < m_uSeqCount; ++uSeqIndex)
|
||||
wTotal += m_Weights[uSeqIndex];
|
||||
|
||||
if (0 == wTotal)
|
||||
return;
|
||||
|
||||
const WEIGHT f = wDesiredTotal/wTotal;
|
||||
for (unsigned uSeqIndex = 0; uSeqIndex < m_uSeqCount; ++uSeqIndex)
|
||||
m_Weights[uSeqIndex] *= f;
|
||||
}
|
||||
|
||||
void MSA::CalcWeights() const
|
||||
{
|
||||
Quit("Calc weights not implemented");
|
||||
}
|
||||
|
||||
static void FmtChar(char c, unsigned uWidth)
|
||||
{
|
||||
Log("%c", c);
|
||||
for (unsigned n = 0; n < uWidth - 1; ++n)
|
||||
Log(" ");
|
||||
}
|
||||
|
||||
static void FmtInt(unsigned u, unsigned uWidth)
|
||||
{
|
||||
static char szStr[1024];
|
||||
assert(uWidth < sizeof(szStr));
|
||||
if (u > 0)
|
||||
sprintf(szStr, "%u", u);
|
||||
else
|
||||
strcpy(szStr, ".");
|
||||
Log(szStr);
|
||||
unsigned n = (unsigned) strlen(szStr);
|
||||
if (n < uWidth)
|
||||
for (unsigned i = 0; i < uWidth - n; ++i)
|
||||
Log(" ");
|
||||
}
|
||||
|
||||
static void FmtInt0(unsigned u, unsigned uWidth)
|
||||
{
|
||||
static char szStr[1024];
|
||||
assert(uWidth < sizeof(szStr));
|
||||
sprintf(szStr, "%u", u);
|
||||
Log(szStr);
|
||||
unsigned n = (unsigned) strlen(szStr);
|
||||
if (n < uWidth)
|
||||
for (unsigned i = 0; i < uWidth - n; ++i)
|
||||
Log(" ");
|
||||
}
|
||||
|
||||
static void FmtPad(unsigned n)
|
||||
{
|
||||
for (unsigned i = 0; i < n; ++i)
|
||||
Log(" ");
|
||||
}
|
||||
|
||||
void MSA::FromSeq(const Seq &s)
|
||||
{
|
||||
unsigned uSeqLength = s.Length();
|
||||
SetSize(1, uSeqLength);
|
||||
SetSeqName(0, s.GetName());
|
||||
if (0 != m_SeqIndexToId)
|
||||
SetSeqId(0, s.GetId());
|
||||
for (unsigned n = 0; n < uSeqLength; ++n)
|
||||
SetChar(0, n, s[n]);
|
||||
}
|
||||
|
||||
unsigned MSA::GetCharCount(unsigned uSeqIndex, unsigned uColIndex) const
|
||||
{
|
||||
assert(uSeqIndex < GetSeqCount());
|
||||
assert(uColIndex < GetColCount());
|
||||
|
||||
unsigned uCol = 0;
|
||||
for (unsigned n = 0; n <= uColIndex; ++n)
|
||||
if (!IsGap(uSeqIndex, n))
|
||||
++uCol;
|
||||
return uCol;
|
||||
}
|
||||
|
||||
void MSA::CopySeq(unsigned uToSeqIndex, const MSA &msaFrom, unsigned uFromSeqIndex)
|
||||
{
|
||||
assert(uToSeqIndex < m_uSeqCount);
|
||||
const unsigned uColCount = msaFrom.GetColCount();
|
||||
assert(m_uColCount == uColCount ||
|
||||
(0 == m_uColCount && uColCount <= m_uCacheSeqLength));
|
||||
|
||||
memcpy(m_szSeqs[uToSeqIndex], msaFrom.GetSeqBuffer(uFromSeqIndex), uColCount);
|
||||
SetSeqName(uToSeqIndex, msaFrom.GetSeqName(uFromSeqIndex));
|
||||
if (0 == m_uColCount)
|
||||
m_uColCount = uColCount;
|
||||
}
|
||||
|
||||
const char *MSA::GetSeqBuffer(unsigned uSeqIndex) const
|
||||
{
|
||||
assert(uSeqIndex < m_uSeqCount);
|
||||
return m_szSeqs[uSeqIndex];
|
||||
}
|
||||
|
||||
void MSA::DeleteSeq(unsigned uSeqIndex)
|
||||
{
|
||||
assert(uSeqIndex < m_uSeqCount);
|
||||
|
||||
delete m_szSeqs[uSeqIndex];
|
||||
delete m_szNames[uSeqIndex];
|
||||
|
||||
const unsigned uBytesToMove = (m_uSeqCount - uSeqIndex)*sizeof(char *);
|
||||
if (uBytesToMove > 0)
|
||||
{
|
||||
memmove(m_szSeqs + uSeqIndex, m_szSeqs + uSeqIndex + 1, uBytesToMove);
|
||||
memmove(m_szNames + uSeqIndex, m_szNames + uSeqIndex + 1, uBytesToMove);
|
||||
}
|
||||
|
||||
--m_uSeqCount;
|
||||
|
||||
delete[] m_Weights;
|
||||
m_Weights = 0;
|
||||
}
|
||||
|
||||
bool MSA::IsEmptyCol(unsigned uColIndex) const
|
||||
{
|
||||
const unsigned uSeqCount = GetSeqCount();
|
||||
for (unsigned uSeqIndex = 0; uSeqIndex < uSeqCount; ++uSeqIndex)
|
||||
if (!IsGap(uSeqIndex, uColIndex))
|
||||
return false;
|
||||
return true;
|
||||
}
|
||||
|
||||
//void MSA::DeleteEmptyCols(bool bProgress)
|
||||
// {
|
||||
// unsigned uColCount = GetColCount();
|
||||
// for (unsigned uColIndex = 0; uColIndex < uColCount; ++uColIndex)
|
||||
// {
|
||||
// if (IsEmptyCol(uColIndex))
|
||||
// {
|
||||
// if (bProgress)
|
||||
// {
|
||||
// Log("Deleting col %u of %u\n", uColIndex, uColCount);
|
||||
// printf("Deleting col %u of %u\n", uColIndex, uColCount);
|
||||
// }
|
||||
// DeleteCol(uColIndex);
|
||||
// --uColCount;
|
||||
// }
|
||||
// }
|
||||
// }
|
||||
|
||||
unsigned MSA::AlignedColIndexToColIndex(unsigned uAlignedColIndex) const
|
||||
{
|
||||
Quit("MSA::AlignedColIndexToColIndex not implemented");
|
||||
return 0;
|
||||
}
|
||||
|
||||
WEIGHT MSA::GetTotalSeqWeight() const
|
||||
{
|
||||
WEIGHT wTotal = 0;
|
||||
const unsigned uSeqCount = GetSeqCount();
|
||||
for (unsigned uSeqIndex = 0; uSeqIndex < uSeqCount; ++uSeqIndex)
|
||||
wTotal += m_Weights[uSeqIndex];
|
||||
return wTotal;
|
||||
}
|
||||
|
||||
bool MSA::SeqsEq(const MSA &a1, unsigned uSeqIndex1, const MSA &a2,
|
||||
unsigned uSeqIndex2)
|
||||
{
|
||||
Seq s1;
|
||||
Seq s2;
|
||||
|
||||
a1.GetSeq(uSeqIndex1, s1);
|
||||
a2.GetSeq(uSeqIndex2, s2);
|
||||
|
||||
s1.StripGaps();
|
||||
s2.StripGaps();
|
||||
|
||||
return s1.EqIgnoreCase(s2);
|
||||
}
|
||||
|
||||
unsigned MSA::GetSeqLength(unsigned uSeqIndex) const
|
||||
{
|
||||
assert(uSeqIndex < GetSeqCount());
|
||||
|
||||
const unsigned uColCount = GetColCount();
|
||||
unsigned uLength = 0;
|
||||
for (unsigned uColIndex = 0; uColIndex < uColCount; ++uColIndex)
|
||||
if (!IsGap(uSeqIndex, uColIndex))
|
||||
++uLength;
|
||||
return uLength;
|
||||
}
|
||||
|
||||
void MSA::GetPWID(unsigned uSeqIndex1, unsigned uSeqIndex2, double *ptrPWID,
|
||||
unsigned *ptruPosCount) const
|
||||
{
|
||||
assert(uSeqIndex1 < GetSeqCount());
|
||||
assert(uSeqIndex2 < GetSeqCount());
|
||||
|
||||
unsigned uSameCount = 0;
|
||||
unsigned uPosCount = 0;
|
||||
const unsigned uColCount = GetColCount();
|
||||
for (unsigned uColIndex = 0; uColIndex < uColCount; ++uColIndex)
|
||||
{
|
||||
char c1 = GetChar(uSeqIndex1, uColIndex);
|
||||
if (IsGapChar(c1))
|
||||
continue;
|
||||
char c2 = GetChar(uSeqIndex2, uColIndex);
|
||||
if (IsGapChar(c2))
|
||||
continue;
|
||||
++uPosCount;
|
||||
if (c1 == c2)
|
||||
++uSameCount;
|
||||
}
|
||||
*ptruPosCount = uPosCount;
|
||||
if (uPosCount > 0)
|
||||
*ptrPWID = 100.0 * (double) uSameCount / (double) uPosCount;
|
||||
else
|
||||
*ptrPWID = 0;
|
||||
}
|
||||
|
||||
void MSA::UnWeight()
|
||||
{
|
||||
for (unsigned uSeqIndex = 0; uSeqIndex < GetSeqCount(); ++uSeqIndex)
|
||||
m_Weights[uSeqIndex] = BTInsane;
|
||||
}
|
||||
|
||||
unsigned MSA::UniqueResidueTypes(unsigned uColIndex) const
|
||||
{
|
||||
assert(uColIndex < GetColCount());
|
||||
|
||||
unsigned Counts[MAX_ALPHA];
|
||||
memset(Counts, 0, sizeof(Counts));
|
||||
const unsigned uSeqCount = GetSeqCount();
|
||||
for (unsigned uSeqIndex = 0; uSeqIndex < uSeqCount; ++uSeqIndex)
|
||||
{
|
||||
if (IsGap(uSeqIndex, uColIndex) || IsWildcard(uSeqIndex, uColIndex))
|
||||
continue;
|
||||
const unsigned uLetter = GetLetter(uSeqIndex, uColIndex);
|
||||
++(Counts[uLetter]);
|
||||
}
|
||||
unsigned uUniqueCount = 0;
|
||||
for (unsigned uLetter = 0; uLetter < g_AlphaSize; ++uLetter)
|
||||
if (Counts[uLetter] > 0)
|
||||
++uUniqueCount;
|
||||
return uUniqueCount;
|
||||
}
|
||||
|
||||
double MSA::GetOcc(unsigned uColIndex) const
|
||||
{
|
||||
unsigned uGapCount = 0;
|
||||
for (unsigned uSeqIndex = 0; uSeqIndex < GetSeqCount(); ++uSeqIndex)
|
||||
if (IsGap(uSeqIndex, uColIndex))
|
||||
++uGapCount;
|
||||
unsigned uSeqCount = GetSeqCount();
|
||||
return (double) (uSeqCount - uGapCount) / (double) uSeqCount;
|
||||
}
|
||||
|
||||
void MSA::ToFile(TextFile &File) const
|
||||
{
|
||||
if (g_bMSF)
|
||||
ToMSFFile(File);
|
||||
else if (g_bAln)
|
||||
ToAlnFile(File);
|
||||
else if (g_bHTML)
|
||||
ToHTMLFile(File);
|
||||
else if (g_bPHYS)
|
||||
ToPhySequentialFile(File);
|
||||
else if (g_bPHYI)
|
||||
ToPhyInterleavedFile(File);
|
||||
else
|
||||
ToFASTAFile(File);
|
||||
if (0 != g_pstrScoreFileName)
|
||||
WriteScoreFile(*this);
|
||||
}
|
||||
|
||||
bool MSA::ColumnHasGap(unsigned uColIndex) const
|
||||
{
|
||||
const unsigned uSeqCount = GetSeqCount();
|
||||
for (unsigned uSeqIndex = 0; uSeqIndex < uSeqCount; ++uSeqIndex)
|
||||
if (IsGap(uSeqIndex, uColIndex))
|
||||
return true;
|
||||
return false;
|
||||
}
|
||||
|
||||
void MSA::SetIdCount(unsigned uIdCount)
|
||||
{
|
||||
//if (m_uIdCount != 0)
|
||||
// Quit("MSA::SetIdCount: may only be called once");
|
||||
|
||||
if (m_uIdCount > 0)
|
||||
{
|
||||
if (uIdCount > m_uIdCount)
|
||||
Quit("MSA::SetIdCount: cannot increase count");
|
||||
return;
|
||||
}
|
||||
m_uIdCount = uIdCount;
|
||||
}
|
||||
|
||||
void MSA::SetSeqId(unsigned uSeqIndex, unsigned uId)
|
||||
{
|
||||
assert(uSeqIndex < m_uSeqCount);
|
||||
assert(uId < m_uIdCount);
|
||||
if (0 == m_SeqIndexToId)
|
||||
{
|
||||
if (0 == m_uIdCount)
|
||||
Quit("MSA::SetSeqId, SetIdCount has not been called");
|
||||
m_IdToSeqIndex = new unsigned[m_uIdCount];
|
||||
m_SeqIndexToId = new unsigned[m_uSeqCount];
|
||||
|
||||
memset(m_IdToSeqIndex, 0xff, m_uIdCount*sizeof(unsigned));
|
||||
memset(m_SeqIndexToId, 0xff, m_uSeqCount*sizeof(unsigned));
|
||||
}
|
||||
m_SeqIndexToId[uSeqIndex] = uId;
|
||||
m_IdToSeqIndex[uId] = uSeqIndex;
|
||||
}
|
||||
|
||||
unsigned MSA::GetSeqIndex(unsigned uId) const
|
||||
{
|
||||
assert(uId < m_uIdCount);
|
||||
assert(0 != m_IdToSeqIndex);
|
||||
unsigned uSeqIndex = m_IdToSeqIndex[uId];
|
||||
assert(uSeqIndex < m_uSeqCount);
|
||||
return uSeqIndex;
|
||||
}
|
||||
|
||||
bool MSA::GetSeqIndex(unsigned uId, unsigned *ptruIndex) const
|
||||
{
|
||||
for (unsigned uSeqIndex = 0; uSeqIndex < m_uSeqCount; ++uSeqIndex)
|
||||
{
|
||||
if (uId == m_SeqIndexToId[uSeqIndex])
|
||||
{
|
||||
*ptruIndex = uSeqIndex;
|
||||
return true;
|
||||
}
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
unsigned MSA::GetSeqId(unsigned uSeqIndex) const
|
||||
{
|
||||
assert(uSeqIndex < m_uSeqCount);
|
||||
unsigned uId = m_SeqIndexToId[uSeqIndex];
|
||||
assert(uId < m_uIdCount);
|
||||
return uId;
|
||||
}
|
||||
|
||||
bool MSA::WeightsSet() const
|
||||
{
|
||||
return BTInsane != m_Weights[0];
|
||||
}
|
||||
|
||||
void MSASubsetByIds(const MSA &msaIn, const unsigned Ids[], unsigned uIdCount,
|
||||
MSA &msaOut)
|
||||
{
|
||||
const unsigned uColCount = msaIn.GetColCount();
|
||||
msaOut.SetSize(uIdCount, uColCount);
|
||||
for (unsigned uSeqIndexOut = 0; uSeqIndexOut < uIdCount; ++uSeqIndexOut)
|
||||
{
|
||||
const unsigned uId = Ids[uSeqIndexOut];
|
||||
|
||||
const unsigned uSeqIndexIn = msaIn.GetSeqIndex(uId);
|
||||
const char *ptrName = msaIn.GetSeqName(uSeqIndexIn);
|
||||
|
||||
msaOut.SetSeqId(uSeqIndexOut, uId);
|
||||
msaOut.SetSeqName(uSeqIndexOut, ptrName);
|
||||
|
||||
for (unsigned uColIndex = 0; uColIndex < uColCount; ++uColIndex)
|
||||
{
|
||||
const char c = msaIn.GetChar(uSeqIndexIn, uColIndex);
|
||||
msaOut.SetChar(uSeqIndexOut, uColIndex, c);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Caller must allocate ptrSeq and ptrLabel as new char[n].
|
||||
void MSA::AppendSeq(char *ptrSeq, unsigned uSeqLength, char *ptrLabel)
|
||||
{
|
||||
if (m_uSeqCount > m_uCacheSeqCount)
|
||||
Quit("Internal error MSA::AppendSeq");
|
||||
if (m_uSeqCount == m_uCacheSeqCount)
|
||||
ExpandCache(m_uSeqCount + 4, uSeqLength);
|
||||
m_szSeqs[m_uSeqCount] = ptrSeq;
|
||||
m_szNames[m_uSeqCount] = ptrLabel;
|
||||
++m_uSeqCount;
|
||||
}
|
||||
|
||||
void MSA::ExpandCache(unsigned uSeqCount, unsigned uColCount)
|
||||
{
|
||||
if (m_IdToSeqIndex != 0 || m_SeqIndexToId != 0 || uSeqCount < m_uSeqCount)
|
||||
Quit("Internal error MSA::ExpandCache");
|
||||
|
||||
if (m_uSeqCount > 0 && uColCount != m_uColCount)
|
||||
Quit("Internal error MSA::ExpandCache, ColCount changed");
|
||||
|
||||
char **NewSeqs = new char *[uSeqCount];
|
||||
char **NewNames = new char *[uSeqCount];
|
||||
WEIGHT *NewWeights = new WEIGHT[uSeqCount];
|
||||
|
||||
for (unsigned uSeqIndex = 0; uSeqIndex < m_uSeqCount; ++uSeqIndex)
|
||||
{
|
||||
NewSeqs[uSeqIndex] = m_szSeqs[uSeqIndex];
|
||||
NewNames[uSeqIndex] = m_szNames[uSeqIndex];
|
||||
NewWeights[uSeqIndex] = m_Weights[uSeqIndex];
|
||||
}
|
||||
|
||||
for (unsigned uSeqIndex = m_uSeqCount; uSeqIndex < uSeqCount; ++uSeqIndex)
|
||||
{
|
||||
char *Seq = new char[uColCount];
|
||||
NewSeqs[uSeqIndex] = Seq;
|
||||
#if DEBUG
|
||||
memset(Seq, '?', uColCount);
|
||||
#endif
|
||||
}
|
||||
|
||||
delete[] m_szSeqs;
|
||||
delete[] m_szNames;
|
||||
delete[] m_Weights;
|
||||
|
||||
m_szSeqs = NewSeqs;
|
||||
m_szNames = NewNames;
|
||||
m_Weights = NewWeights;
|
||||
|
||||
m_uCacheSeqCount = uSeqCount;
|
||||
m_uCacheSeqLength = uColCount;
|
||||
m_uColCount = uColCount;
|
||||
}
|
||||
|
||||
void MSA::FixAlpha()
|
||||
{
|
||||
ClearInvalidLetterWarning();
|
||||
for (unsigned uSeqIndex = 0; uSeqIndex < m_uSeqCount; ++uSeqIndex)
|
||||
{
|
||||
for (unsigned uColIndex = 0; uColIndex < m_uColCount; ++uColIndex)
|
||||
{
|
||||
char c = GetChar(uSeqIndex, uColIndex);
|
||||
if (!IsResidueChar(c) && !IsGapChar(c))
|
||||
{
|
||||
char w = GetWildcardChar();
|
||||
// Warning("Invalid letter '%c', replaced by '%c'", c, w);
|
||||
InvalidLetterWarning(c, w);
|
||||
SetChar(uSeqIndex, uColIndex, w);
|
||||
}
|
||||
}
|
||||
}
|
||||
ReportInvalidLetters();
|
||||
}
|
||||
|
||||
ALPHA MSA::GuessAlpha() const
|
||||
{
|
||||
// If at least MIN_NUCLEO_PCT of the first CHAR_COUNT non-gap
|
||||
// letters belong to the nucleotide alphabet, guess nucleo.
|
||||
// Otherwise amino.
|
||||
const unsigned CHAR_COUNT = 100;
|
||||
const unsigned MIN_NUCLEO_PCT = 95;
|
||||
|
||||
const unsigned uSeqCount = GetSeqCount();
|
||||
const unsigned uColCount = GetColCount();
|
||||
if (0 == uSeqCount)
|
||||
return ALPHA_Amino;
|
||||
|
||||
unsigned uDNACount = 0;
|
||||
unsigned uRNACount = 0;
|
||||
unsigned uTotal = 0;
|
||||
unsigned i = 0;
|
||||
for (;;)
|
||||
{
|
||||
unsigned uSeqIndex = i/uColCount;
|
||||
if (uSeqIndex >= uSeqCount)
|
||||
break;
|
||||
unsigned uColIndex = i%uColCount;
|
||||
++i;
|
||||
char c = GetChar(uSeqIndex, uColIndex);
|
||||
if (IsGapChar(c))
|
||||
continue;
|
||||
if (IsDNA(c))
|
||||
++uDNACount;
|
||||
if (IsRNA(c))
|
||||
++uRNACount;
|
||||
++uTotal;
|
||||
if (uTotal >= CHAR_COUNT)
|
||||
break;
|
||||
}
|
||||
if (uTotal != 0 && ((uRNACount*100)/uTotal) >= MIN_NUCLEO_PCT)
|
||||
return ALPHA_RNA;
|
||||
if (uTotal != 0 && ((uDNACount*100)/uTotal) >= MIN_NUCLEO_PCT)
|
||||
return ALPHA_DNA;
|
||||
return ALPHA_Amino;
|
||||
}
|
||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user