Skip to content

Commit 2916dc8

Browse files
committed
unittest: Add mastertrainer_test (only works partially)
The test currently has subtests which fail because of missing files. Signed-off-by: Stefan Weil <[email protected]>
1 parent f93fb9d commit 2916dc8

File tree

2 files changed

+59
-32
lines changed

2 files changed

+59
-32
lines changed

unittest/Makefile.am

+4
Original file line numberDiff line numberDiff line change
@@ -107,6 +107,7 @@ check_PROGRAMS = \
107107
lang_model_test \
108108
linlsq_test \
109109
loadlang_test \
110+
mastertrainer_test \
110111
matrix_test \
111112
nthitem_test \
112113
osd_test \
@@ -185,6 +186,9 @@ linlsq_test_LDADD = $(GTEST_LIBS) $(TESS_LIBS)
185186
loadlang_test_SOURCES = loadlang_test.cc
186187
loadlang_test_LDADD = $(GTEST_LIBS) $(TESS_LIBS) $(LEPTONICA_LIBS)
187188

189+
mastertrainer_test_SOURCES = mastertrainer_test.cc
190+
mastertrainer_test_LDADD = $(ABSEIL_LIBS) $(GTEST_LIBS) $(TRAINING_LIBS) $(TESS_LIBS)
191+
188192
matrix_test_SOURCES = matrix_test.cc
189193
matrix_test_LDADD = $(GTEST_LIBS) $(TESS_LIBS)
190194

unittest/mastertrainer_test.cc

+55-32
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,13 @@
1+
// (C) Copyright 2017, Google Inc.
2+
// Licensed under the Apache License, Version 2.0 (the "License");
3+
// you may not use this file except in compliance with the License.
4+
// You may obtain a copy of the License at
5+
// http://www.apache.org/licenses/LICENSE-2.0
6+
// Unless required by applicable law or agreed to in writing, software
7+
// distributed under the License is distributed on an "AS IS" BASIS,
8+
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
9+
// See the License for the specific language governing permissions and
10+
// limitations under the License.
111

212
// Although this is a trivial-looking test, it exercises a lot of code:
313
// SampleIterator has to correctly iterate over the correct characters, or
@@ -15,36 +25,49 @@
1525
#include <utility>
1626
#include <vector>
1727

18-
#include "tesseract/ccutil/genericvector.h"
19-
#include "tesseract/ccutil/unicharset.h"
20-
#include "tesseract/classify/errorcounter.h"
21-
#include "tesseract/classify/mastertrainer.h"
22-
#include "tesseract/classify/shapeclassifier.h"
23-
#include "tesseract/classify/shapetable.h"
24-
#include "tesseract/classify/trainingsample.h"
25-
#include "tesseract/training/commontraining.h"
26-
#include "tesseract/training/tessopt.h"
28+
#include "absl/strings/numbers.h" // for safe_strto32
29+
#include "absl/strings/str_split.h" // for absl::StrSplit
30+
31+
#include "include_gunit.h"
32+
33+
#include "genericvector.h"
34+
#include "log.h" // for LOG
35+
#include "unicharset.h"
36+
#include "errorcounter.h"
37+
#include "mastertrainer.h"
38+
#include "shapeclassifier.h"
39+
#include "shapetable.h"
40+
#include "trainingsample.h"
41+
#include "commontraining.h"
42+
#include "tessopt.h" // tessoptind
2743

2844
// Commontraining command-line arguments for font_properties, xheights and
2945
// unicharset.
30-
DECLARE_string(F);
31-
DECLARE_string(X);
32-
DECLARE_string(U);
33-
DECLARE_string(output_trainer);
46+
DECLARE_STRING_PARAM_FLAG(F);
47+
DECLARE_STRING_PARAM_FLAG(X);
48+
DECLARE_STRING_PARAM_FLAG(U);
49+
DECLARE_STRING_PARAM_FLAG(output_trainer);
3450

3551
// Specs of the MockClassifier.
36-
const int kNumTopNErrs = 10;
37-
const int kNumTop2Errs = kNumTopNErrs + 20;
38-
const int kNumTop1Errs = kNumTop2Errs + 30;
39-
const int kNumTopTopErrs = kNumTop1Errs + 25;
40-
const int kNumNonReject = 1000;
41-
const int kNumCorrect = kNumNonReject - kNumTop1Errs;
52+
static const int kNumTopNErrs = 10;
53+
static const int kNumTop2Errs = kNumTopNErrs + 20;
54+
static const int kNumTop1Errs = kNumTop2Errs + 30;
55+
static const int kNumTopTopErrs = kNumTop1Errs + 25;
56+
static const int kNumNonReject = 1000;
57+
static const int kNumCorrect = kNumNonReject - kNumTop1Errs;
4258
// The total number of answers is given by the number of non-rejects plus
4359
// all the multiple answers.
44-
const int kNumAnswers = kNumNonReject + 2 * (kNumTop2Errs - kNumTopNErrs) +
60+
static const int kNumAnswers = kNumNonReject + 2 * (kNumTop2Errs - kNumTopNErrs) +
4561
(kNumTop1Errs - kNumTop2Errs) +
4662
(kNumTopTopErrs - kNumTop1Errs);
4763

64+
static bool safe_strto32(const std::string& str, int* pResult)
65+
{
66+
long n = strtol(str.c_str(), nullptr, 0);
67+
*pResult = n;
68+
return true;
69+
}
70+
4871
namespace tesseract {
4972

5073
// Mock ShapeClassifier that cheats by looking at the correct answer, and
@@ -138,13 +161,13 @@ const double kMin1lDistance = 0.25;
138161
// The fixture for testing Tesseract.
139162
class MasterTrainerTest : public testing::Test {
140163
protected:
141-
string TestDataNameToPath(const string& name) {
142-
return file::JoinPath(FLAGS_test_srcdir, "testdata/" + name);
164+
std::string TestDataNameToPath(const std::string& name) {
165+
return file::JoinPath(TESTING_DIR, name);
143166
}
144-
string TessdataPath() {
145-
return file::JoinPath(FLAGS_test_srcdir, "tessdata");
167+
std::string TessdataPath() {
168+
return TESSDATA_DIR;
146169
}
147-
string TmpNameToPath(const string& name) {
170+
std::string TmpNameToPath(const std::string& name) {
148171
return file::JoinPath(FLAGS_test_tmpdir, name);
149172
}
150173

@@ -161,11 +184,11 @@ class MasterTrainerTest : public testing::Test {
161184
// if load_from_tmp, then reloads a master trainer that was saved by a
162185
// previous call in which it was false.
163186
void LoadMasterTrainer() {
164-
FLAGS_output_trainer = TmpNameToPath("tmp_trainer");
165-
FLAGS_F = TestDataNameToPath("font_properties");
166-
FLAGS_X = TestDataNameToPath("eng.xheights");
167-
FLAGS_U = TestDataNameToPath("eng.unicharset");
168-
string tr_file_name(TestDataNameToPath("eng.Arial.exp0.tr"));
187+
FLAGS_output_trainer = TmpNameToPath("tmp_trainer").c_str();
188+
FLAGS_F = file::JoinPath(LANGDATA_DIR, "font_properties").c_str();
189+
FLAGS_X = TestDataNameToPath("eng.xheights").c_str();
190+
FLAGS_U = file::JoinPath(LANGDATA_DIR, "eng/eng.unicharset").c_str();
191+
std::string tr_file_name(TestDataNameToPath("eng.Arial.exp0.tr"));
169192
const char* argv[] = {tr_file_name.c_str()};
170193
int argc = 1;
171194
STRING file_prefix;
@@ -256,8 +279,8 @@ TEST_F(MasterTrainerTest, ErrorCounterTest) {
256279
false, shape_classifier,
257280
&accuracy_report);
258281
LOG(INFO) << accuracy_report.string();
259-
string result_string = accuracy_report.string();
260-
std::vector<string> results =
282+
std::string result_string = accuracy_report.string();
283+
std::vector<std::string> results =
261284
absl::StrSplit(result_string, '\t', absl::SkipEmpty());
262285
EXPECT_EQ(tesseract::CT_SIZE + 1, results.size());
263286
int result_values[tesseract::CT_SIZE];

0 commit comments

Comments
 (0)