-
Notifications
You must be signed in to change notification settings - Fork 0
/
ApproximateMatchingTest.cpp
121 lines (103 loc) · 3.96 KB
/
ApproximateMatchingTest.cpp
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
// Copyright 2012, Milan Oberkirch
// Author: Milan Oberkirch <oberkirm@informatik.uni-freiburg.de>
#include <gtest/gtest.h>
#include <fstream> // NOLINT
#include <vector>
#include <string>
#include "./ApproximateMatching.h"
using std::string;
const char mockupFileName[] = "ApproximateMatching.test.tmp";
ApproximateMatching approximateMatching;
InvertedIndex ii;
// ___________________________________________________________________________
TEST(ApproximateMatching, createMockup) {
// Create small mockup, just to visit some code later on
std::ofstream mockup(mockupFileName);
if (mockup.is_open()) {
mockup << "first_url\tanalphabet\n"
<< "www.example.com\tvocabulary\n";
} else {
std::cerr << "Cannot open mockup-file";
}
}
// ___________________________________________________________________________
TEST(ApproximateMatching, init) {
ii.buildFromCsvFile(mockupFileName);
approximateMatching.init(ii, 5, '+');
EXPECT_EQ('+', approximateMatching.dummyChar());
EXPECT_EQ(5, approximateMatching.k());
}
// ___________________________________________________________________________
TEST(ApproximateMatching, mergeInvertedLists) {
ApproximateMatching approximateMatching;
vector< vector<size_t> > input = { {1, 3, 4, 6, 9, 11}, {2, 5, 7, 8, 10} };
vector<size_t> expected = {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11};
vector<size_t> result = approximateMatching.mergeInvertedLists(input);
// When I do an assert here, it is legit to only check one iterator in the
// following loop
ASSERT_EQ(expected.size(), result.size());
vector<size_t>::iterator itExpected = expected.begin();
vector<size_t>::iterator itResult = result.begin();
for (; itExpected < expected.end(); ++itExpected, ++itResult)
EXPECT_EQ(*itExpected, *itResult);
// More then two lists, with dublicats
input = { {1, 2, 3, 10}, {2, 3, 4}, {1, 4, 6, 8}, {5, 7, 9} };
expected = {1, 1, 2, 2, 3, 3, 4, 4, 5, 6, 7, 8, 9, 10};
result = approximateMatching.mergeInvertedLists(input);
ASSERT_EQ(expected.size(), result.size());
itExpected = expected.begin();
itResult = result.begin();
for (; itExpected < expected.end(); ++itExpected, ++itResult)
EXPECT_EQ(*itExpected, *itResult);
}
// ___________________________________________________________________________
TEST(ApproximateMatching, computeEditDistance) {
ApproximateMatching approximateMatching;
unsigned int result = approximateMatching.
computeEditDistance("B", "B");
unsigned int expected = 0;
EXPECT_EQ(expected, result);
result = approximateMatching.
computeEditDistance("BREAD", "BOARD");
expected = 3;
EXPECT_EQ(expected, result);
result = approximateMatching.
computeEditDistance("ha", "hans");
expected = 2;
EXPECT_EQ(expected, result);
result = approximateMatching.
computeEditDistance("halten", "hans");
expected = approximateMatching.
computeEditDistance("halt", "hans") + 2;
EXPECT_EQ(expected, result);
}
// ___________________________________________________________________________
TEST(ApproximateMatching, computeApproximateMatches) {
string input;
vector<string> expected;
vector<string> actual;
input = "vocabluary";
expected = {"analphabet", "vocabulary"};
actual = approximateMatching.computeApproximateMatches(
input, 10);
ASSERT_EQ(expected.size(), actual.size());
EXPECT_EQ(expected[0], actual[0]);
input = "vocabeluary";
expected = {"vocabulary"};
actual = approximateMatching.computeApproximateMatches(
input, 5);
ASSERT_EQ(expected.size(), actual.size());
EXPECT_EQ(expected[0], actual[0]);
input = "vocabluary";
expected = {"vocabulary"};
actual = approximateMatching.computeApproximateMatches(
input, 2);
ASSERT_EQ(expected.size(), actual.size());
EXPECT_EQ(expected[0], actual[0]);
input = "vocabeluary";
expected = {"analphabet", "vocabulary"};
actual = approximateMatching.computeApproximateMatches(
input, 10);
ASSERT_EQ(expected.size(), actual.size());
EXPECT_EQ(expected[0], actual[0]);
}