Skip to content

Commit 63b774d

Browse files
committed
Sequence Alignemnt Algo (Needleman-Wunsch)
1 parent a481be5 commit 63b774d

File tree

8 files changed

+198
-4
lines changed

8 files changed

+198
-4
lines changed

General/General.vcxproj

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -117,10 +117,12 @@
117117
<ItemGroup>
118118
<ClInclude Include="Knapsack.h" />
119119
<ClInclude Include="MaxWIS.h" />
120+
<ClInclude Include="SequenceAlignment.h" />
120121
</ItemGroup>
121122
<ItemGroup>
122123
<ClCompile Include="Knapsack.cpp" />
123124
<ClCompile Include="MaxWIS.cpp" />
125+
<ClCompile Include="SequenceAlignment.cpp" />
124126
</ItemGroup>
125127
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
126128
<ImportGroup Label="ExtensionTargets">

General/General.vcxproj.filters

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,9 @@
2121
<ClInclude Include="Knapsack.h">
2222
<Filter>Header Files</Filter>
2323
</ClInclude>
24+
<ClInclude Include="SequenceAlignment.h">
25+
<Filter>Header Files</Filter>
26+
</ClInclude>
2427
</ItemGroup>
2528
<ItemGroup>
2629
<ClCompile Include="MaxWIS.cpp">
@@ -29,5 +32,8 @@
2932
<ClCompile Include="Knapsack.cpp">
3033
<Filter>Source Files</Filter>
3134
</ClCompile>
35+
<ClCompile Include="SequenceAlignment.cpp">
36+
<Filter>Source Files</Filter>
37+
</ClCompile>
3238
</ItemGroup>
3339
</Project>

General/SequenceAlignment.cpp

Lines changed: 115 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,115 @@
1+
#include <algorithm>
2+
3+
#include "SequenceAlignment.h"
4+
5+
SequenceAlignment::SequenceAlignment():
6+
m_numScores(0),
7+
m_gapScore(0)
8+
{
9+
}
10+
11+
SequenceAlignment::~SequenceAlignment()
12+
{
13+
}
14+
15+
void SequenceAlignment::clear()
16+
{
17+
m_charToIndexMap.clear();
18+
m_numScores = 0;
19+
m_gapScore = 0;
20+
m_scores.clear();
21+
}
22+
23+
bool SequenceAlignment::read(std::istream& in)
24+
{
25+
clear();
26+
27+
// read alphabet
28+
std::string alphabet;
29+
in >> m_alphabet;
30+
31+
uint32_t idx = 0;
32+
for (auto c : m_alphabet)
33+
{
34+
m_charToIndexMap.insert(std::make_pair(c, idx++));
35+
}
36+
37+
m_numScores = m_alphabet.size();
38+
m_scores.resize(m_numScores * m_numScores, 0);
39+
40+
for (uint32_t i = 0; i < m_numScores; ++i)
41+
{
42+
for (uint32_t j = 0; j < m_numScores; ++j)
43+
{
44+
in >> m_scores[i * m_numScores + j];
45+
}
46+
}
47+
in >> m_gapScore;
48+
49+
in >> m_sequnces[0] >> m_sequnces[1];
50+
51+
return true;
52+
}
53+
54+
bool SequenceAlignment::run(
55+
uint32_t &scoreNeedlemanWunsch,
56+
std::string(&seq)[2])
57+
{
58+
uint32_t size[2] = { m_sequnces[0].size() + 1, m_sequnces[1].size() + 1 };
59+
std::vector<char> nwScore(size[0] * size[1], 0);
60+
for (uint32_t i = 0; i < size[1]; ++i)
61+
{
62+
nwScore[i] = (i + 1) * m_gapScore;
63+
}
64+
for (uint32_t i = 0; i < size[0]; ++i)
65+
{
66+
nwScore[i * size[1]] = (i + 1) * m_gapScore;
67+
}
68+
69+
uint32_t tmpScore[3];
70+
for (uint32_t i = 1; i < size[0]; ++i)
71+
{
72+
for (uint32_t j = 1; j < size[1]; ++j)
73+
{
74+
uint32_t idx1 = m_charToIndexMap[m_sequnces[0][i - 1]];
75+
uint32_t idx2 = m_charToIndexMap[m_sequnces[1][j - 1]];
76+
tmpScore[0] = nwScore[(i - 1) * size[1] + (j - 1)] + m_scores[idx1 * m_numScores + idx2];
77+
tmpScore[1] = nwScore[i * size[1] + (j - 1)] + m_gapScore;
78+
tmpScore[2] = nwScore[(i - 1) * size[1] + j] + m_gapScore;
79+
nwScore[i * size[1] + j] = std::min(tmpScore[0], std::min(tmpScore[1], tmpScore[2]));
80+
}
81+
}
82+
83+
scoreNeedlemanWunsch = nwScore[size[0] * size[1] - 1];
84+
85+
for (uint32_t i = size[0] - 1, j = size[1] - 1; (i > 0) && (j > 0);)
86+
{
87+
uint32_t idx1 = m_charToIndexMap[m_sequnces[0][i - 1]];
88+
uint32_t idx2 = m_charToIndexMap[m_sequnces[1][j - 1]];
89+
tmpScore[0] = nwScore[(i - 1) * size[1] + (j - 1)] + m_scores[idx1 * m_numScores + idx2];
90+
tmpScore[1] = nwScore[i * size[1] + (j - 1)] + m_gapScore;
91+
tmpScore[2] = nwScore[(i - 1) * size[1] + j] + m_gapScore;
92+
if (nwScore[i * size[1] + j] == tmpScore[0])
93+
{
94+
seq[0].push_back(m_sequnces[0][i - 1]);
95+
seq[1].push_back(m_sequnces[1][j - 1]);
96+
--i, --j;
97+
}
98+
else if (nwScore[i * size[1] + j] == tmpScore[1])
99+
{
100+
seq[0].push_back(' ');
101+
seq[1].push_back(m_sequnces[1][j - 1]);
102+
--j;
103+
}
104+
else if (nwScore[i * size[1] + j] == tmpScore[2])
105+
{
106+
seq[0].push_back(m_sequnces[0][i - 1]);
107+
seq[1].push_back(' ');
108+
--i;
109+
}
110+
}
111+
std::reverse(seq[0].begin(), seq[0].end());
112+
std::reverse(seq[1].begin(), seq[1].end());
113+
114+
return true;
115+
}

General/SequenceAlignment.h

Lines changed: 32 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,32 @@
1+
#ifndef SEQUENCE_ALIGNMENT_H
2+
#define SEQUENCE_ALIGNMENT_H
3+
4+
#include <unordered_map>
5+
#include <vector>
6+
#include <string>
7+
#include <cstdint>
8+
9+
class SequenceAlignment
10+
{
11+
private:
12+
typedef std::unordered_map<char, uint32_t> CharToIndexMap;
13+
CharToIndexMap m_charToIndexMap;
14+
std::string m_alphabet;
15+
uint32_t m_numScores;
16+
std::vector<uint32_t> m_scores;
17+
uint32_t m_gapScore;
18+
std::string m_sequnces[2];
19+
20+
private:
21+
void clear();
22+
public:
23+
SequenceAlignment();
24+
~SequenceAlignment();
25+
26+
bool read(std::istream& in);
27+
bool run(
28+
uint32_t &scoreNeedlemanWunsch,
29+
std::string (&seq)[2]);
30+
};
31+
32+
#endif // SEQUENCE_ALIGNMENT_H

test/main.cpp

Lines changed: 27 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8,12 +8,14 @@
88

99
#include <cstdint>
1010
#include <iostream>
11+
#include <cstdint>
1112

1213
#include "graphs/Graph.h"
1314
#include "graphs/GraphMatrix.h"
1415
#include "General/Knapsack.h"
16+
#include "General/SequenceAlignment.h"
1517

16-
int main(int argc, const char * argv[])
18+
void knapsackFunc()
1719
{
1820
Knapsack knapsack;
1921
std::ifstream fin("knapsack.in");
@@ -27,6 +29,30 @@ int main(int argc, const char * argv[])
2729
std::cout << val << std::endl;
2830

2931
std::cin.get();
32+
}
33+
34+
void sequenceAlignmentFunc()
35+
{
36+
SequenceAlignment sequenceAlignment;
37+
std::ifstream fin("sequence_alignment.in");
38+
if (!sequenceAlignment.read(fin))
39+
{
40+
std::cout << "Cannot read SequenceAlignment" << std::endl;
41+
exit(2);
42+
}
43+
uint32_t nwScore = 0;
44+
std::string sequnces[2];
45+
sequenceAlignment.run(nwScore, sequnces);
46+
std::cout << nwScore << std::endl;
47+
std::cout << sequnces[0] << std::endl;
48+
std::cout << sequnces[1] << std::endl;
49+
50+
std::cin.get();
51+
}
52+
53+
int main(int argc, const char * argv[])
54+
{
55+
sequenceAlignmentFunc();
3056
#if 0
3157
// insert code here...
3258
std::cout << "Creating graph.\n";

test/sequence_alignment.in

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,9 @@
1+
ACGT
2+
0 1 1 1
3+
1 0 1 1
4+
1 1 0 1
5+
1 1 1 0
6+
2
7+
8+
ACCCCCCCCCCGT
9+
ACGT

test/test.vcxproj

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -132,6 +132,7 @@
132132
<ItemGroup>
133133
<None Include="graph.in" />
134134
<None Include="knapsack.in" />
135+
<None Include="sequence_alignment.in" />
135136
</ItemGroup>
136137
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
137138
<ImportGroup Label="ExtensionTargets">

test/test.vcxproj.filters

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,7 @@
1313
<UniqueIdentifier>{67DA6AB6-F800-4c08-8B7A-83BB121AAD01}</UniqueIdentifier>
1414
<Extensions>rc;ico;cur;bmp;dlg;rc2;rct;bin;rgs;gif;jpg;jpeg;jpe;resx;tiff;tif;png;wav;mfcribbon-ms</Extensions>
1515
</Filter>
16-
<Filter Include="Graph Files">
16+
<Filter Include="Input Files">
1717
<UniqueIdentifier>{4c6e610c-9b28-4ee1-a627-e79d8c9e10dc}</UniqueIdentifier>
1818
</Filter>
1919
</ItemGroup>
@@ -24,10 +24,13 @@
2424
</ItemGroup>
2525
<ItemGroup>
2626
<None Include="graph.in">
27-
<Filter>Graph Files</Filter>
27+
<Filter>Input Files</Filter>
2828
</None>
2929
<None Include="knapsack.in">
30-
<Filter>Graph Files</Filter>
30+
<Filter>Input Files</Filter>
31+
</None>
32+
<None Include="sequence_alignment.in">
33+
<Filter>Input Files</Filter>
3134
</None>
3235
</ItemGroup>
3336
</Project>

0 commit comments

Comments
 (0)