Skip to content

Commit ceabab8

Browse files
committed
unittest: Catch missing eng.traineddata in baseapi_test
Signed-off-by: Stefan Weil <[email protected]>
1 parent bbd3626 commit ceabab8

File tree

1 file changed

+66
-31
lines changed

1 file changed

+66
-31
lines changed

unittest/baseapi_test.cc

+66-31
Original file line numberDiff line numberDiff line change
@@ -76,47 +76,59 @@ TEST_F(TesseractTest, BasicTesseractTest) {
7676
tesseract::TessBaseAPI api;
7777
std::string truth_text;
7878
std::string ocr_text;
79-
api.Init(TessdataPath().c_str(), "eng", tesseract::OEM_TESSERACT_ONLY);
80-
Pix* src_pix = pixRead(TestDataNameToPath("phototest.tif").c_str());
81-
CHECK(src_pix);
82-
ocr_text = GetCleanedTextResult(&api, src_pix);
83-
CHECK_OK(file::GetContents(TestDataNameToPath("phototest.gold.txt"),
84-
&truth_text, file::Defaults()));
85-
absl::StripAsciiWhitespace(&truth_text);
86-
EXPECT_STREQ(truth_text.c_str(), ocr_text.c_str());
87-
pixDestroy(&src_pix);
79+
if (api.Init(TessdataPath().c_str(), "eng", tesseract::OEM_TESSERACT_ONLY) != -1) {
80+
Pix* src_pix = pixRead(TestDataNameToPath("phototest.tif").c_str());
81+
CHECK(src_pix);
82+
ocr_text = GetCleanedTextResult(&api, src_pix);
83+
CHECK_OK(file::GetContents(TestDataNameToPath("phototest.gold.txt"),
84+
&truth_text, file::Defaults()));
85+
absl::StripAsciiWhitespace(&truth_text);
86+
EXPECT_STREQ(truth_text.c_str(), ocr_text.c_str());
87+
pixDestroy(&src_pix);
88+
} else {
89+
// eng.traineddata not found.
90+
GTEST_SKIP();
91+
}
8892
}
8993

9094
// Test that api.GetComponentImages() will return a set of images for
9195
// paragraphs even if text recognition was not run.
9296
TEST_F(TesseractTest, IteratesParagraphsEvenIfNotDetected) {
9397
tesseract::TessBaseAPI api;
94-
api.Init(TessdataPath().c_str(), "eng", tesseract::OEM_TESSERACT_ONLY);
95-
api.SetPageSegMode(tesseract::PSM_SINGLE_BLOCK);
96-
api.SetVariable("paragraph_debug_level", "3");
98+
if (api.Init(TessdataPath().c_str(), "eng", tesseract::OEM_TESSERACT_ONLY) != -1) {
99+
api.SetPageSegMode(tesseract::PSM_SINGLE_BLOCK);
100+
api.SetVariable("paragraph_debug_level", "3");
97101
#if 0 // TODO: b622.png is missing
98-
Pix* src_pix = pixRead(TestDataNameToPath("b622.png").c_str());
99-
CHECK(src_pix);
100-
api.SetImage(src_pix);
101-
Boxa* para_boxes =
102-
api.GetComponentImages(tesseract::RIL_PARA, true, nullptr, nullptr);
103-
EXPECT_TRUE(para_boxes != nullptr);
104-
Boxa* block_boxes =
105-
api.GetComponentImages(tesseract::RIL_BLOCK, true, nullptr, nullptr);
106-
EXPECT_TRUE(block_boxes != nullptr);
107-
// TODO(eger): Get paragraphs out of this page pre-text.
108-
EXPECT_GE(boxaGetCount(para_boxes), boxaGetCount(block_boxes));
109-
boxaDestroy(&block_boxes);
110-
boxaDestroy(&para_boxes);
111-
pixDestroy(&src_pix);
102+
Pix* src_pix = pixRead(TestDataNameToPath("b622.png").c_str());
103+
CHECK(src_pix);
104+
api.SetImage(src_pix);
105+
Boxa* para_boxes =
106+
api.GetComponentImages(tesseract::RIL_PARA, true, nullptr, nullptr);
107+
EXPECT_TRUE(para_boxes != nullptr);
108+
Boxa* block_boxes =
109+
api.GetComponentImages(tesseract::RIL_BLOCK, true, nullptr, nullptr);
110+
EXPECT_TRUE(block_boxes != nullptr);
111+
// TODO(eger): Get paragraphs out of this page pre-text.
112+
EXPECT_GE(boxaGetCount(para_boxes), boxaGetCount(block_boxes));
113+
boxaDestroy(&block_boxes);
114+
boxaDestroy(&para_boxes);
115+
pixDestroy(&src_pix);
112116
#endif
117+
} else {
118+
// eng.traineddata not found.
119+
GTEST_SKIP();
120+
}
113121
}
114122

115123
// We should get hOCR output and not seg fault, even if the api caller doesn't
116124
// call SetInputName().
117125
TEST_F(TesseractTest, HOCRWorksWithoutSetInputName) {
118126
tesseract::TessBaseAPI api;
119-
api.Init(TessdataPath().c_str(), "eng", tesseract::OEM_TESSERACT_ONLY);
127+
if (api.Init(TessdataPath().c_str(), "eng", tesseract::OEM_TESSERACT_ONLY) == -1) {
128+
// eng.traineddata not found.
129+
GTEST_SKIP();
130+
return;
131+
}
120132
Pix* src_pix = pixRead(TestDataNameToPath("HelloGoogle.tif").c_str());
121133
CHECK(src_pix);
122134
api.SetImage(src_pix);
@@ -131,7 +143,11 @@ TEST_F(TesseractTest, HOCRWorksWithoutSetInputName) {
131143
// hOCR output should contain baseline info for upright textlines.
132144
TEST_F(TesseractTest, HOCRContainsBaseline) {
133145
tesseract::TessBaseAPI api;
134-
api.Init(TessdataPath().c_str(), "eng", tesseract::OEM_TESSERACT_ONLY);
146+
if (api.Init(TessdataPath().c_str(), "eng", tesseract::OEM_TESSERACT_ONLY) == -1) {
147+
// eng.traineddata not found.
148+
GTEST_SKIP();
149+
return;
150+
}
135151
Pix* src_pix = pixRead(TestDataNameToPath("HelloGoogle.tif").c_str());
136152
CHECK(src_pix);
137153
api.SetInputName("HelloGoogle.tif");
@@ -151,6 +167,11 @@ TEST_F(TesseractTest, HOCRContainsBaseline) {
151167
// better algorithms to deal with baseline and xheight consistency.
152168
TEST_F(TesseractTest, RickSnyderNotFuckSnyder) {
153169
tesseract::TessBaseAPI api;
170+
if (api.Init(TessdataPath().c_str(), "eng", tesseract::OEM_TESSERACT_ONLY) == -1) {
171+
// eng.traineddata not found.
172+
GTEST_SKIP();
173+
return;
174+
}
154175
api.Init(TessdataPath().c_str(), "eng", tesseract::OEM_TESSERACT_ONLY);
155176
#if 0 // TODO: rick_snyder.jpeg is missing
156177
Pix* src_pix = pixRead(TestDataNameToPath("rick_snyder.jpeg").c_str());
@@ -161,6 +182,8 @@ TEST_F(TesseractTest, RickSnyderNotFuckSnyder) {
161182
EXPECT_THAT(result, Not(HasSubstr("FUCK")));
162183
delete[] result;
163184
pixDestroy(&src_pix);
185+
#else
186+
GTEST_SKIP();
164187
#endif
165188
}
166189

@@ -182,7 +205,11 @@ TEST_F(TesseractTest, AdaptToWordStrTest) {
182205
tesseract::TessBaseAPI api;
183206
std::string truth_text;
184207
std::string ocr_text;
185-
api.Init(TessdataPath().c_str(), "eng", tesseract::OEM_TESSERACT_ONLY);
208+
if (api.Init(TessdataPath().c_str(), "eng", tesseract::OEM_TESSERACT_ONLY) == -1) {
209+
// eng.traineddata not found.
210+
GTEST_SKIP();
211+
return;
212+
}
186213
api.SetVariable("matcher_sufficient_examples_for_prototyping", "1");
187214
api.SetVariable("classify_class_pruner_threshold", "220");
188215
// Train on the training text.
@@ -216,7 +243,11 @@ TEST_F(TesseractTest, BasicLSTMTest) {
216243
tesseract::TessBaseAPI api;
217244
std::string truth_text;
218245
std::string ocr_text;
219-
api.Init(TessdataPath().c_str(), "eng", tesseract::OEM_LSTM_ONLY);
246+
if (api.Init(TessdataPath().c_str(), "eng", tesseract::OEM_LSTM_ONLY) == -1) {
247+
// eng.traineddata not found.
248+
GTEST_SKIP();
249+
return;
250+
}
220251
Pix* src_pix = pixRead(TestDataNameToPath("phototest_2.tif").c_str());
221252
CHECK(src_pix);
222253
ocr_text = GetCleanedTextResult(&api, src_pix);
@@ -240,7 +271,11 @@ TEST_F(TesseractTest, LSTMGeometryTest) {
240271
#else
241272
Pix* src_pix = pixRead(TestDataNameToPath("deslant.tif").c_str());
242273
FriendlyTessBaseAPI api;
243-
api.Init(TessdataPath().c_str(), "eng", tesseract::OEM_LSTM_ONLY);
274+
if (api.Init(TessdataPath().c_str(), "eng", tesseract::OEM_LSTM_ONLY) == -1) {
275+
// eng.traineddata not found.
276+
GTEST_SKIP();
277+
return;
278+
}
244279
api.SetImage(src_pix);
245280
ASSERT_EQ(api.Recognize(nullptr), 0);
246281

0 commit comments

Comments
 (0)