Skip to content

Commit 05cdbc7

Browse files
committed
Fix and enable dawg_test
Signed-off-by: Stefan Weil <[email protected]>
1 parent aec992e commit 05cdbc7

File tree

2 files changed

+25
-38
lines changed

2 files changed

+25
-38
lines changed

unittest/Makefile.am

+2-4
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,3 @@
1-
# Absolute path of directory 'src'.
2-
TESS_SRC_DIR=$(shell cd $(top_srcdir) && pwd)/src
3-
41
# Absolute path of directory 'langdata'.
52
LANGDATA_DIR=$(shell cd $(top_srcdir) && cd .. && pwd)/langdata_lstm
63

@@ -15,7 +12,7 @@ TESTING_DIR=$(shell cd $(top_srcdir) && pwd)/test/testing
1512
# (using submodule test).
1613
TESTDATA_DIR=$(shell cd $(top_srcdir) && pwd)/test/testdata
1714

18-
AM_CPPFLAGS += -DTESS_SRC_DIR="\"$(TESS_SRC_DIR)\""
15+
AM_CPPFLAGS += -DTESSBIN_DIR="\"$(abs_top_builddir)\""
1916
AM_CPPFLAGS += -DLANGDATA_DIR="\"$(LANGDATA_DIR)\""
2017
AM_CPPFLAGS += -DTESSDATA_DIR="\"$(TESSDATA_DIR)\""
2118
AM_CPPFLAGS += -DTESTING_DIR="\"$(TESTING_DIR)\""
@@ -107,6 +104,7 @@ check_PROGRAMS = \
107104
bitvector_test \
108105
cleanapi_test \
109106
colpartition_test \
107+
dawg_test \
110108
denorm_test \
111109
fileio_test \
112110
heap_test \

unittest/dawg_test.cc

+23-34
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,6 @@
11

2+
#include <cstdlib> // for system
3+
#include <fstream> // for ifstream
24
#include <set>
35
#include <string>
46
#include <vector>
@@ -8,55 +10,42 @@
810
#include "trie.h"
911

1012
#include "include_gunit.h"
11-
#include "base/filelinereader.h"
12-
#include "util/process/subprocess.h"
1313

1414
namespace {
1515

16-
void RemoveTrailingLineTerminators(char* line) {
17-
char* end = line + strlen(line) - 1;
18-
while (end >= line && ('\n' == *end || '\r' == *end)) {
19-
*end-- = 0;
20-
}
21-
}
22-
23-
void AddLineToSet(std::set<std::string>* words, char* line) {
24-
RemoveTrailingLineTerminators(line);
25-
words->insert(line);
26-
}
27-
2816
// Test some basic functionality dealing with Dawgs (compressed dictionaries,
2917
// aka Directed Acyclic Word Graphs).
3018
class DawgTest : public testing::Test {
3119
protected:
3220
void LoadWordlist(const std::string& filename, std::set<std::string>* words) const {
33-
FileLineReader::Options options;
34-
options.set_comment_char(0);
35-
FileLineReader flr(filename.c_str(), options);
36-
flr.set_line_callback(NewPermanentCallback(AddLineToSet, words));
37-
flr.Reload();
21+
std::ifstream file(filename);
22+
if (file.is_open()) {
23+
std::string line;
24+
while (getline(file, line)) {
25+
// Remove trailing line terminators from line.
26+
while (!line.empty() && (line.back() == '\n' || line.back() == '\r')) {
27+
line.resize(line.size() - 1);
28+
}
29+
// Add line to set.
30+
words->insert(line.c_str());
31+
}
32+
file.close();
33+
}
3834
}
3935
std::string TestDataNameToPath(const std::string& name) const {
40-
return file::JoinPath(TESTDATA_DIR, "/" + name);
36+
return file::JoinPath(TESTDATA_DIR, name);
4137
}
42-
std::string TessBinaryPath(const std::string& binary_name) const {
43-
return file::JoinPath(TESS_SRC_DIR,
38+
std::string TessBinaryPath(const std::string& name) const {
39+
return file::JoinPath(TESSBIN_DIR, "src/training/" + name);
4440
}
4541
std::string OutputNameToPath(const std::string& name) const {
4642
return file::JoinPath(FLAGS_test_tmpdir, name);
4743
}
48-
int RunCommand(const std::string& program, const std::string& arg1, const std::string& arg2,
49-
const std::string& arg3) const {
50-
SubProcess p;
51-
std::vector<std::string> argv;
52-
argv.push_back(program);
53-
argv.push_back(arg1);
54-
argv.push_back(arg2);
55-
argv.push_back(arg3);
56-
p.SetProgram(TessBinaryPath(program), argv);
57-
p.Start();
58-
p.Wait();
59-
return p.exit_code();
44+
int RunCommand(const std::string& program, const std::string& arg1,
45+
const std::string& arg2, const std::string& arg3) const {
46+
std::string cmdline =
47+
TessBinaryPath(program) + " " + arg1 + " " + arg2 + " " + arg3;
48+
return system(cmdline.c_str());
6049
}
6150
// Test that we are able to convert a wordlist file (one "word" per line) to
6251
// a dawg (a compressed format) and then extract the original wordlist back

0 commit comments

Comments
 (0)