Skip to content

Commit 1ac76d8

Browse files
committed
Partially fix and enable more unittests
Add more subtests to langmodel_test Add more subtests to langmodel_test fix and enable lstmtrainer_test fix and enable some subtests from recodebeam_test partial fix for resultiterator_test fix typo removing the terminating linefeed. fix typo changes
1 parent eaf5deb commit 1ac76d8

10 files changed

+286
-114
lines changed

unittest/Makefile.am

+14
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@ LANGDATA_DIR=$(shell cd $(top_srcdir) && cd .. && pwd)/langdata_lstm
44
# Absolute path of directory 'tessdata' with traineddata files
55
# (must be on same level as top source directory).
66
TESSDATA_DIR=$(shell cd $(top_srcdir) && cd .. && pwd)/tessdata
7+
TESSDATA_BEST_DIR=$(shell cd $(top_srcdir) && cd .. && pwd)/tessdata_best
78

89
# Absolute path of directory 'testing' with test images and ground truth texts
910
# (using submodule test).
@@ -15,6 +16,7 @@ TESTDATA_DIR=$(shell cd $(top_srcdir) && pwd)/test/testdata
1516
AM_CPPFLAGS += -DTESSBIN_DIR="\"$(abs_top_builddir)\""
1617
AM_CPPFLAGS += -DLANGDATA_DIR="\"$(LANGDATA_DIR)\""
1718
AM_CPPFLAGS += -DTESSDATA_DIR="\"$(TESSDATA_DIR)\""
19+
AM_CPPFLAGS += -DTESSDATA_BEST_DIR="\"$(TESSDATA_BEST_DIR)\""
1820
AM_CPPFLAGS += -DTESTING_DIR="\"$(TESTING_DIR)\""
1921
AM_CPPFLAGS += -DTESTDATA_DIR="\"$(TESTDATA_DIR)\""
2022
AM_CPPFLAGS += -DPANGO_ENABLE_ENGINE
@@ -98,6 +100,8 @@ AM_CPPFLAGS += -isystem $(top_srcdir)/googletest/googletest/include \
98100
-isystem $(top_srcdir)/googletest/googlemock/include
99101

100102
check_PROGRAMS = \
103+
resultiterator_test \
104+
recodebeam_test \
101105
apiexample_test \
102106
applybox_test \
103107
baseapi_test \
@@ -137,6 +141,7 @@ check_PROGRAMS += commandlineflags_test
137141
check_PROGRAMS += lstm_recode_test
138142
check_PROGRAMS += lstm_squashed_test
139143
check_PROGRAMS += lstm_test
144+
check_PROGRAMS += lstmtrainer_test
140145
check_PROGRAMS += unichar_test
141146
check_PROGRAMS += unicharcompress_test
142147
check_PROGRAMS += unicharset_test
@@ -228,6 +233,9 @@ lstm_squashed_test_LDADD = $(ABSEIL_LIBS) $(GTEST_LIBS) $(TESS_LIBS) $(TRAINING_
228233
lstm_test_SOURCES = lstm_test.cc
229234
lstm_test_LDADD = $(ABSEIL_LIBS) $(GTEST_LIBS) $(TESS_LIBS) $(TRAINING_LIBS)
230235

236+
lstmtrainer_test_SOURCES = lstmtrainer_test.cc
237+
lstmtrainer_test_LDADD = $(ABSEIL_LIBS) $(GTEST_LIBS) $(TESS_LIBS) $(TRAINING_LIBS) $(LEPTONICA_LIBS)
238+
231239
mastertrainer_test_SOURCES = mastertrainer_test.cc
232240
mastertrainer_test_LDADD = $(ABSEIL_LIBS) $(GTEST_LIBS) $(TRAINING_LIBS) $(TESS_LIBS)
233241

@@ -253,9 +261,15 @@ progress_test_LDADD = $(GTEST_LIBS) $(GMOCK_LIBS) $(TESS_LIBS) $(LEPTONICA_LIBS)
253261
qrsequence_test_SOURCES = qrsequence_test.cc
254262
qrsequence_test_LDADD = $(ABSEIL_LIBS) $(GTEST_LIBS) $(TESS_LIBS)
255263

264+
recodebeam_test_SOURCES = recodebeam_test.cc
265+
recodebeam_test_LDADD = $(ABSEIL_LIBS) $(GTEST_LIBS) $(TRAINING_LIBS) $(TESS_LIBS) $(ICU_I18N_LIBS) $(ICU_UC_LIBS)
266+
256267
rect_test_SOURCES = rect_test.cc
257268
rect_test_LDADD = $(GTEST_LIBS) $(TESS_LIBS)
258269

270+
resultiterator_test_SOURCES = resultiterator_test.cc
271+
resultiterator_test_LDADD = $(ABSEIL_LIBS) $(GTEST_LIBS) $(TRAINING_LIBS) $(TESS_LIBS) $(ICU_I18N_LIBS) $(ICU_UC_LIBS)
272+
259273
shapetable_test_SOURCES = shapetable_test.cc
260274
shapetable_test_LDADD = $(ABSEIL_LIBS) $(GTEST_LIBS) $(TESS_LIBS)
261275

unittest/include_gunit.h

+1
Original file line numberDiff line numberDiff line change
@@ -54,6 +54,7 @@ class file : public tesseract::File {
5454
#define CHECK(test) ASSERT_HOST(test)
5555
#define CHECK_GT(test, value) ASSERT_HOST((test) > (value))
5656
#define CHECK_LT(test, value) ASSERT_HOST((test) < (value))
57+
#define CHECK_LE(test, value) ASSERT_HOST((test) <= (value))
5758
#define CHECK_OK(test) ASSERT_HOST(test)
5859

5960
#endif // TESSERACT_UNITTEST_INCLUDE_GUNIT_H_

unittest/lang_model_test.cc

+106-3
Original file line numberDiff line numberDiff line change
@@ -43,7 +43,7 @@ TEST(LangModelTest, AddACharacter) {
4343
EXPECT_TRUE(unicharset.load_from_file(unicharset_path.c_str()));
4444
std::string version_str = "TestVersion";
4545
std::string output_dir = FLAGS_test_tmpdir;
46-
LOG(INFO) << "Output dir=" << output_dir;
46+
LOG(INFO) << "Output dir=" << output_dir << "\n";
4747
std::string lang1 = "eng";
4848
bool pass_through_recoder = false;
4949
GenericVector<STRING> words, puncs, numbers;
@@ -61,13 +61,16 @@ TEST(LangModelTest, AddACharacter) {
6161
EXPECT_EQ(0, CombineLangModel(unicharset, script_dir, version_str, output_dir,
6262
lang1, pass_through_recoder, words, puncs,
6363
numbers, lang_is_rtl, nullptr, nullptr));
64-
// Init a trainer with it, and encode a string.
64+
// Init a trainer with it, and encode kTestString.
6565
std::string traineddata1 =
6666
file::JoinPath(output_dir, lang1, absl::StrCat(lang1, ".traineddata"));
6767
LSTMTrainer trainer1;
6868
trainer1.InitCharSet(traineddata1);
6969
GenericVector<int> labels1;
7070
EXPECT_TRUE(trainer1.EncodeString(kTestString, &labels1));
71+
STRING test1_decoded = trainer1.DecodeLabels(labels1);
72+
std::string test1_str(&test1_decoded[0], test1_decoded.length());
73+
LOG(INFO) << "Labels1=" << test1_str << "\n";
7174

7275
// Add a new character to the unicharset and try again.
7376
int size_before = unicharset.size();
@@ -81,13 +84,113 @@ TEST(LangModelTest, AddACharacter) {
8184
CombineLangModel(unicharset, script_dir, version_str, output_dir,
8285
lang2, pass_through_recoder, words, puncs, numbers,
8386
lang_is_rtl, nullptr, nullptr));
84-
// Init a trainer with it, and encode a string.
87+
// Init a trainer with it, and encode kTestString.
8588
std::string traineddata2 =
8689
file::JoinPath(output_dir, lang2, absl::StrCat(lang2, ".traineddata"));
8790
LSTMTrainer trainer2;
8891
trainer2.InitCharSet(traineddata2);
8992
GenericVector<int> labels2;
9093
EXPECT_TRUE(trainer2.EncodeString(kTestString, &labels2));
94+
STRING test2_decoded = trainer2.DecodeLabels(labels2);
95+
std::string test2_str(&test2_decoded[0], test2_decoded.length());
96+
LOG(INFO) << "Labels2=" << test2_str << "\n";
97+
// encode kTestStringRupees.
98+
GenericVector<int> labels3;
99+
EXPECT_TRUE(trainer2.EncodeString(kTestStringRupees, &labels3));
100+
STRING test3_decoded = trainer2.DecodeLabels(labels3);
101+
std::string test3_str(&test3_decoded[0], test3_decoded.length());
102+
LOG(INFO) << "labels3=" << test3_str << "\n";
103+
// Copy labels1 to a std::vector, renumbering the null char to match trainer2.
104+
// Since Tensor Flow's CTC implementation insists on having the null be the
105+
// last label, and we want to be compatible, null has to be renumbered when
106+
// we add a class.
107+
int null1 = trainer1.null_char();
108+
int null2 = trainer2.null_char();
109+
EXPECT_EQ(null1 + 1, null2);
110+
std::vector<int> labels1_v(labels1.size());
111+
for (int i = 0; i < labels1.size(); ++i) {
112+
if (labels1[i] == null1)
113+
labels1_v[i] = null2;
114+
else
115+
labels1_v[i] = labels1[i];
116+
}
117+
EXPECT_THAT(labels1_v,
118+
testing::ElementsAreArray(&labels2[0], labels2.size()));
119+
// To make sure we we are not cheating somehow, we can now encode the Rupee
120+
// symbol, which we could not do before.
121+
EXPECT_FALSE(trainer1.EncodeString(kTestStringRupees, &labels1));
122+
EXPECT_TRUE(trainer2.EncodeString(kTestStringRupees, &labels2));
123+
}
124+
125+
// Same as above test, for hin instead of eng
126+
TEST(LangModelTest, AddACharacterHindi) {
127+
constexpr char kTestString[] = "हिन्दी में एक लाइन लिखें";
128+
constexpr char kTestStringRupees[] = "हिंदी में रूपये का चिन्ह प्रयोग करें ₹१००.००";
129+
// Setup the arguments.
130+
std::string script_dir = LANGDATA_DIR;
131+
std::string hin_dir = file::JoinPath(script_dir, "hin");
132+
std::string unicharset_path = TestDataNameToPath("hin_beam.unicharset");
133+
UNICHARSET unicharset;
134+
EXPECT_TRUE(unicharset.load_from_file(unicharset_path.c_str()));
135+
std::string version_str = "TestVersion";
136+
std::string output_dir = FLAGS_test_tmpdir;
137+
LOG(INFO) << "Output dir=" << output_dir << "\n";
138+
std::string lang1 = "hin";
139+
bool pass_through_recoder = false;
140+
GenericVector<STRING> words, puncs, numbers;
141+
// If these reads fail, we get a warning message and an empty list of words.
142+
ReadFile(file::JoinPath(hin_dir, "hin.wordlist"), nullptr)
143+
.split('\n', &words);
144+
EXPECT_GT(words.size(), 0);
145+
ReadFile(file::JoinPath(hin_dir, "hin.punc"), nullptr).split('\n', &puncs);
146+
EXPECT_GT(puncs.size(), 0);
147+
ReadFile(file::JoinPath(hin_dir, "hin.numbers"), nullptr)
148+
.split('\n', &numbers);
149+
EXPECT_GT(numbers.size(), 0);
150+
bool lang_is_rtl = false;
151+
// Generate the traineddata file.
152+
EXPECT_EQ(0, CombineLangModel(unicharset, script_dir, version_str, output_dir,
153+
lang1, pass_through_recoder, words, puncs,
154+
numbers, lang_is_rtl, nullptr, nullptr));
155+
// Init a trainer with it, and encode kTestString.
156+
std::string traineddata1 =
157+
file::JoinPath(output_dir, lang1, absl::StrCat(lang1, ".traineddata"));
158+
LSTMTrainer trainer1;
159+
trainer1.InitCharSet(traineddata1);
160+
GenericVector<int> labels1;
161+
EXPECT_TRUE(trainer1.EncodeString(kTestString, &labels1));
162+
STRING test1_decoded = trainer1.DecodeLabels(labels1);
163+
std::string test1_str(&test1_decoded[0], test1_decoded.length());
164+
LOG(INFO) << "Labels1=" << test1_str << "\n";
165+
166+
// Add a new character to the unicharset and try again.
167+
int size_before = unicharset.size();
168+
unicharset.unichar_insert("");
169+
SetupBasicProperties(/*report_errors*/ true, /*decompose (NFD)*/ false,
170+
&unicharset);
171+
EXPECT_EQ(size_before + 1, unicharset.size());
172+
// Generate the traineddata file.
173+
std::string lang2 = "extendedhin";
174+
EXPECT_EQ(EXIT_SUCCESS,
175+
CombineLangModel(unicharset, script_dir, version_str, output_dir,
176+
lang2, pass_through_recoder, words, puncs, numbers,
177+
lang_is_rtl, nullptr, nullptr));
178+
// Init a trainer with it, and encode kTestString.
179+
std::string traineddata2 =
180+
file::JoinPath(output_dir, lang2, absl::StrCat(lang2, ".traineddata"));
181+
LSTMTrainer trainer2;
182+
trainer2.InitCharSet(traineddata2);
183+
GenericVector<int> labels2;
184+
EXPECT_TRUE(trainer2.EncodeString(kTestString, &labels2));
185+
STRING test2_decoded = trainer2.DecodeLabels(labels2);
186+
std::string test2_str(&test2_decoded[0], test2_decoded.length());
187+
LOG(INFO) << "Labels2=" << test2_str << "\n";
188+
// encode kTestStringRupees.
189+
GenericVector<int> labels3;
190+
EXPECT_TRUE(trainer2.EncodeString(kTestStringRupees, &labels3));
191+
STRING test3_decoded = trainer2.DecodeLabels(labels3);
192+
std::string test3_str(&test3_decoded[0], test3_decoded.length());
193+
LOG(INFO) << "labels3=" << test3_str << "\n";
91194
// Copy labels1 to a std::vector, renumbering the null char to match trainer2.
92195
// Since Tensor Flow's CTC implementation insists on having the null be the
93196
// last label, and we want to be compatible, null has to be renumbered when

unittest/lstm_recode_test.cc

+2-2
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,7 @@ namespace tesseract {
1919
TEST_F(LSTMTrainerTest, RecodeTestKorBase) {
2020
// A basic single-layer, bi-di 1d LSTM on Korean.
2121
SetupTrainer("[1,1,0,32 Lbx96 O1c1]", "kor-full", "kor/kor.unicharset",
22-
"kor.Arial_Unicode_MS.exp0.lstmf", false, true, 5e-4, false);
22+
"kor.Arial_Unicode_MS.exp0.lstmf", false, true, 5e-4, false, "kor");
2323
double kor_full_err = TrainIterations(kTrainerIterations * 2);
2424
EXPECT_LT(kor_full_err, 88);
2525
// EXPECT_GT(kor_full_err, 85);
@@ -29,7 +29,7 @@ TEST_F(LSTMTrainerTest, RecodeTestKorBase) {
2929
TEST_F(LSTMTrainerTest, RecodeTestKor) {
3030
// A basic single-layer, bi-di 1d LSTM on Korean.
3131
SetupTrainer("[1,1,0,32 Lbx96 O1c1]", "kor-recode", "kor/kor.unicharset",
32-
"kor.Arial_Unicode_MS.exp0.lstmf", true, true, 5e-4, false);
32+
"kor.Arial_Unicode_MS.exp0.lstmf", true, true, 5e-4, false, "kor");
3333
double kor_recode_err = TrainIterations(kTrainerIterations);
3434
EXPECT_LT(kor_recode_err, 60);
3535
LOG(INFO) << "********** Expected < 60 ************\n" ;

unittest/lstm_squashed_test.cc

+1-1
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,7 @@ TEST_F(LSTMTrainerTest, TestSquashed) {
2222
// recoding on, adam on.
2323
SetupTrainerEng("[1,32,0,1 Ct3,3,16 Mp3,3 Lfys48 Lbx96 O1c1]",
2424
"SQU-2-layer-lstm", /*recode*/ true, /*adam*/ true);
25-
double lstm_2d_err = TrainIterations(kTrainerIterations * 2);
25+
double lstm_2d_err = TrainIterations(kTrainerIterations * 3 / 2);
2626
EXPECT_LT(lstm_2d_err, 80);
2727
LOG(INFO) << "********** < 80 ************\n" ;
2828
TestIntMode(kTrainerIterations);

unittest/lstm_test.cc

+1-1
Original file line numberDiff line numberDiff line change
@@ -32,7 +32,7 @@ TEST_F(LSTMTrainerTest, BasicTest) {
3232
"[1,32,0,1 Ct5,5,16 Mp4,4 Ct1,1,16 Ct3,3,128 Mp4,1 Ct1,1,64 S2,1 "
3333
"Ct1,1,64O1c1]",
3434
"no-lstm", "eng/eng.unicharset", "eng.Arial.exp0.lstmf", false, false,
35-
2e-4, false);
35+
2e-4, false, "eng");
3636
double non_lstm_err = TrainIterations(kTrainerIterations * 4);
3737
EXPECT_LT(non_lstm_err, 98);
3838
LOG(INFO) << "********** Expected < 98 ************\n" ;

unittest/lstm_test.h

+15-7
Original file line numberDiff line numberDiff line change
@@ -50,17 +50,25 @@ class LSTMTrainerTest : public testing::Test {
5050
return file::JoinPath(TESTDATA_DIR,
5151
"" + name);
5252
}
53-
53+
std::string TessDataNameToPath(const std::string& name) {
54+
return file::JoinPath(TESSDATA_DIR,
55+
"" + name);
56+
}
57+
std::string TestingNameToPath(const std::string& name) {
58+
return file::JoinPath(TESTING_DIR,
59+
"" + name);
60+
}
61+
5462
void SetupTrainerEng(const std::string& network_spec, const std::string& model_name,
5563
bool recode, bool adam) {
5664
SetupTrainer(network_spec, model_name, "eng/eng.unicharset",
57-
"eng.Arial.exp0.lstmf", recode, adam, 5e-4, false);
65+
"eng.Arial.exp0.lstmf", recode, adam, 5e-4, false, "eng");
5866
}
5967
void SetupTrainer(const std::string& network_spec, const std::string& model_name,
6068
const std::string& unicharset_file, const std::string& lstmf_file,
6169
bool recode, bool adam, double learning_rate,
62-
bool layer_specific) {
63-
constexpr char kLang[] = "eng"; // Exact value doesn't matter.
70+
bool layer_specific, const std::string& kLang) {
71+
// constexpr char kLang[] = "eng"; // Exact value doesn't matter.
6472
std::string unicharset_name = TestDataNameToPath(unicharset_file);
6573
UNICHARSET unicharset;
6674
ASSERT_TRUE(unicharset.load_from_file(unicharset_name.c_str(), false));
@@ -76,7 +84,7 @@ class LSTMTrainerTest : public testing::Test {
7684
model_path.c_str(), checkpoint_path.c_str(),
7785
0, 0));
7886
trainer_->InitCharSet(file::JoinPath(FLAGS_test_tmpdir, kLang,
79-
absl::StrCat(kLang, ".traineddata")));
87+
absl::StrCat(kLang, ".traineddata")));
8088
int net_mode = adam ? NF_ADAM : 0;
8189
// Adam needs a higher learning rate, due to not multiplying the effective
8290
// rate by 1/(1-momentum).
@@ -157,9 +165,9 @@ class LSTMTrainerTest : public testing::Test {
157165
// string.
158166
void TestEncodeDecode(const std::string& lang, const std::string& str, bool recode) {
159167
std::string unicharset_name = lang + "/" + lang + ".unicharset";
160-
std::string lstmf_name = lang + ".Arial_Unicode_MS.exp0.lstmf";
168+
std::string lstmf_name = lang + ".Arial_Unicode_MS.exp0.lstmf";
161169
SetupTrainer("[1,1,0,32 Lbx100 O1c1]", "bidi-lstm", unicharset_name,
162-
lstmf_name, recode, true, 5e-4, true);
170+
lstmf_name, recode, true, 5e-4, true, lang);
163171
GenericVector<int> labels;
164172
EXPECT_TRUE(trainer_->EncodeString(str.c_str(), &labels));
165173
STRING decoded = trainer_->DecodeLabels(labels);

unittest/lstmtrainer_test.cc

+36-20
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,17 @@
1-
#include "leptonica/include/allheaders.h"
2-
#include "tesseract/api/baseapi.h"
3-
#include "tesseract/unittest/lstm_test.h"
1+
// (C) Copyright 2017, Google Inc.
2+
// Licensed under the Apache License, Version 2.0 (the "License");
3+
// you may not use this file except in compliance with the License.
4+
// You may obtain a copy of the License at
5+
// http://www.apache.org/licenses/LICENSE-2.0
6+
// Unless required by applicable law or agreed to in writing, software
7+
// distributed under the License is distributed on an "AS IS" BASIS,
8+
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
9+
// See the License for the specific language governing permissions and
10+
// limitations under the License.
11+
12+
#include "allheaders.h"
13+
#include "baseapi.h"
14+
#include "lstm_test.h"
415

516
namespace tesseract {
617
namespace {
@@ -21,16 +32,17 @@ TEST_F(LSTMTrainerTest, EncodesKor) {
2132

2233
TEST_F(LSTMTrainerTest, MapCoder) {
2334
LSTMTrainer fra_trainer;
24-
fra_trainer.InitCharSet(TestDataNameToPath("fra.traineddata"));
35+
fra_trainer.InitCharSet(TestDataNameToPath("fra/fra.traineddata"));
2536
LSTMTrainer deu_trainer;
26-
deu_trainer.InitCharSet(TestDataNameToPath("deu.traineddata"));
37+
deu_trainer.InitCharSet(TestDataNameToPath("deu/deu.traineddata"));
2738
// A string that uses characters common to French and German.
28-
string kTestStr = "The quick brown 'fox' jumps over: the lazy dog!";
39+
std::string kTestStr = "The quick brown 'fox' jumps over: the lazy dog!";
2940
GenericVector<int> deu_labels;
3041
EXPECT_TRUE(deu_trainer.EncodeString(kTestStr.c_str(), &deu_labels));
3142
// The french trainer cannot decode them correctly.
3243
STRING badly_decoded = fra_trainer.DecodeLabels(deu_labels);
33-
string bad_str(&badly_decoded[0], badly_decoded.length());
44+
std::string bad_str(&badly_decoded[0], badly_decoded.length());
45+
LOG(INFO) << "bad_str fra=" << bad_str << "\n";
3446
EXPECT_NE(kTestStr, bad_str);
3547
// Encode the string as fra.
3648
GenericVector<int> fra_labels;
@@ -49,7 +61,8 @@ TEST_F(LSTMTrainerTest, MapCoder) {
4961
}
5062
// The german trainer can now decode them correctly.
5163
STRING decoded = deu_trainer.DecodeLabels(mapped_fra_labels);
52-
string ok_str(&decoded[0], decoded.length());
64+
std::string ok_str(&decoded[0], decoded.length());
65+
LOG(INFO) << "ok_str deu=" << ok_str << "\n";
5366
EXPECT_EQ(kTestStr, ok_str);
5467
}
5568

@@ -58,29 +71,32 @@ TEST_F(LSTMTrainerTest, MapCoder) {
5871
TEST_F(LSTMTrainerTest, ConvertModel) {
5972
// Setup a trainer with a deu charset.
6073
LSTMTrainer deu_trainer;
61-
deu_trainer.InitCharSet(TestDataNameToPath("deu.traineddata"));
74+
deu_trainer.InitCharSet(TestDataNameToPath("deu/deu.traineddata"));
6275
// Load the fra traineddata, strip out the model, and save to a tmp file.
6376
TessdataManager mgr;
64-
string fra_data =
65-
file::JoinPath(FLAGS_test_srcdir, "tessdata_best", "fra.traineddata");
66-
CHECK(mgr.Init(fra_data.c_str())) << "Failed to load " << fra_data;
67-
string model_path = file::JoinPath(FLAGS_test_tmpdir, "fra.lstm");
77+
std::string fra_data =
78+
file::JoinPath(TESSDATA_BEST_DIR, "fra.traineddata");
79+
CHECK(mgr.Init(fra_data.c_str()));
80+
LOG(INFO) << "Load " << fra_data << "\n";
81+
std::string model_path = file::JoinPath(FLAGS_test_tmpdir, "fra.lstm");
6882
CHECK(mgr.ExtractToFile(model_path.c_str()));
83+
LOG(INFO) << "Extract " << model_path << "\n";
6984
// Load the fra model into the deu_trainer, and save the converted model.
70-
CHECK(deu_trainer.TryLoadingCheckpoint(model_path.c_str(), fra_data.c_str()))
71-
<< "Failed checkpoint load for " << model_path << " and " << fra_data;
72-
string deu_data = file::JoinPath(FLAGS_test_tmpdir, "deu.traineddata");
85+
CHECK(deu_trainer.TryLoadingCheckpoint(model_path.c_str(), fra_data.c_str()));
86+
LOG(INFO) << "Checkpoint load for " << model_path << " and " << fra_data << "\n";
87+
std::string deu_data = file::JoinPath(FLAGS_test_tmpdir, "deu.traineddata");
7388
CHECK(deu_trainer.SaveTraineddata(deu_data.c_str()));
89+
LOG(INFO) << "Save " << deu_data << "\n";
7490
// Now run the saved model on phototest. (See BasicTesseractTest in
7591
// baseapi_test.cc).
7692
TessBaseAPI api;
77-
api.Init(FLAGS_test_tmpdir.c_str(), "deu", tesseract::OEM_LSTM_ONLY);
78-
Pix* src_pix = pixRead(TestDataNameToPath("phototest.tif").c_str());
93+
api.Init(FLAGS_test_tmpdir, "deu", tesseract::OEM_LSTM_ONLY);
94+
Pix* src_pix = pixRead(TestingNameToPath("phototest.tif").c_str());
7995
CHECK(src_pix);
8096
api.SetImage(src_pix);
8197
std::unique_ptr<char[]> result(api.GetUTF8Text());
82-
string truth_text;
83-
CHECK_OK(file::GetContents(TestDataNameToPath("phototest.gold.txt"),
98+
std::string truth_text;
99+
CHECK_OK(file::GetContents(TestingNameToPath("phototest.gold.txt"),
84100
&truth_text, file::Defaults()));
85101

86102
EXPECT_STREQ(truth_text.c_str(), result.get());

0 commit comments

Comments
 (0)