@@ -76,47 +76,59 @@ TEST_F(TesseractTest, BasicTesseractTest) {
76
76
tesseract::TessBaseAPI api;
77
77
std::string truth_text;
78
78
std::string ocr_text;
79
- api.Init (TessdataPath ().c_str (), " eng" , tesseract::OEM_TESSERACT_ONLY);
80
- Pix* src_pix = pixRead (TestDataNameToPath (" phototest.tif" ).c_str ());
81
- CHECK (src_pix);
82
- ocr_text = GetCleanedTextResult (&api, src_pix);
83
- CHECK_OK (file::GetContents (TestDataNameToPath (" phototest.gold.txt" ),
84
- &truth_text, file::Defaults ()));
85
- absl::StripAsciiWhitespace (&truth_text);
86
- EXPECT_STREQ (truth_text.c_str (), ocr_text.c_str ());
87
- pixDestroy (&src_pix);
79
+ if (api.Init (TessdataPath ().c_str (), " eng" , tesseract::OEM_TESSERACT_ONLY) != -1 ) {
80
+ Pix* src_pix = pixRead (TestDataNameToPath (" phototest.tif" ).c_str ());
81
+ CHECK (src_pix);
82
+ ocr_text = GetCleanedTextResult (&api, src_pix);
83
+ CHECK_OK (file::GetContents (TestDataNameToPath (" phototest.gold.txt" ),
84
+ &truth_text, file::Defaults ()));
85
+ absl::StripAsciiWhitespace (&truth_text);
86
+ EXPECT_STREQ (truth_text.c_str (), ocr_text.c_str ());
87
+ pixDestroy (&src_pix);
88
+ } else {
89
+ // eng.traineddata not found.
90
+ GTEST_SKIP ();
91
+ }
88
92
}
89
93
90
94
// Test that api.GetComponentImages() will return a set of images for
91
95
// paragraphs even if text recognition was not run.
92
96
TEST_F (TesseractTest, IteratesParagraphsEvenIfNotDetected) {
93
97
tesseract::TessBaseAPI api;
94
- api.Init (TessdataPath ().c_str (), " eng" , tesseract::OEM_TESSERACT_ONLY);
95
- api.SetPageSegMode (tesseract::PSM_SINGLE_BLOCK);
96
- api.SetVariable (" paragraph_debug_level" , " 3" );
98
+ if ( api.Init (TessdataPath ().c_str (), " eng" , tesseract::OEM_TESSERACT_ONLY) != - 1 ) {
99
+ api.SetPageSegMode (tesseract::PSM_SINGLE_BLOCK);
100
+ api.SetVariable (" paragraph_debug_level" , " 3" );
97
101
#if 0 // TODO: b622.png is missing
98
- Pix* src_pix = pixRead(TestDataNameToPath("b622.png").c_str());
99
- CHECK(src_pix);
100
- api.SetImage(src_pix);
101
- Boxa* para_boxes =
102
- api.GetComponentImages(tesseract::RIL_PARA, true, nullptr, nullptr);
103
- EXPECT_TRUE(para_boxes != nullptr);
104
- Boxa* block_boxes =
105
- api.GetComponentImages(tesseract::RIL_BLOCK, true, nullptr, nullptr);
106
- EXPECT_TRUE(block_boxes != nullptr);
107
- // TODO(eger): Get paragraphs out of this page pre-text.
108
- EXPECT_GE(boxaGetCount(para_boxes), boxaGetCount(block_boxes));
109
- boxaDestroy(&block_boxes);
110
- boxaDestroy(¶_boxes);
111
- pixDestroy(&src_pix);
102
+ Pix* src_pix = pixRead(TestDataNameToPath("b622.png").c_str());
103
+ CHECK(src_pix);
104
+ api.SetImage(src_pix);
105
+ Boxa* para_boxes =
106
+ api.GetComponentImages(tesseract::RIL_PARA, true, nullptr, nullptr);
107
+ EXPECT_TRUE(para_boxes != nullptr);
108
+ Boxa* block_boxes =
109
+ api.GetComponentImages(tesseract::RIL_BLOCK, true, nullptr, nullptr);
110
+ EXPECT_TRUE(block_boxes != nullptr);
111
+ // TODO(eger): Get paragraphs out of this page pre-text.
112
+ EXPECT_GE(boxaGetCount(para_boxes), boxaGetCount(block_boxes));
113
+ boxaDestroy(&block_boxes);
114
+ boxaDestroy(¶_boxes);
115
+ pixDestroy(&src_pix);
112
116
#endif
117
+ } else {
118
+ // eng.traineddata not found.
119
+ GTEST_SKIP ();
120
+ }
113
121
}
114
122
115
123
// We should get hOCR output and not seg fault, even if the api caller doesn't
116
124
// call SetInputName().
117
125
TEST_F (TesseractTest, HOCRWorksWithoutSetInputName) {
118
126
tesseract::TessBaseAPI api;
119
- api.Init (TessdataPath ().c_str (), " eng" , tesseract::OEM_TESSERACT_ONLY);
127
+ if (api.Init (TessdataPath ().c_str (), " eng" , tesseract::OEM_TESSERACT_ONLY) == -1 ) {
128
+ // eng.traineddata not found.
129
+ GTEST_SKIP ();
130
+ return ;
131
+ }
120
132
Pix* src_pix = pixRead (TestDataNameToPath (" HelloGoogle.tif" ).c_str ());
121
133
CHECK (src_pix);
122
134
api.SetImage (src_pix);
@@ -131,7 +143,11 @@ TEST_F(TesseractTest, HOCRWorksWithoutSetInputName) {
131
143
// hOCR output should contain baseline info for upright textlines.
132
144
TEST_F (TesseractTest, HOCRContainsBaseline) {
133
145
tesseract::TessBaseAPI api;
134
- api.Init (TessdataPath ().c_str (), " eng" , tesseract::OEM_TESSERACT_ONLY);
146
+ if (api.Init (TessdataPath ().c_str (), " eng" , tesseract::OEM_TESSERACT_ONLY) == -1 ) {
147
+ // eng.traineddata not found.
148
+ GTEST_SKIP ();
149
+ return ;
150
+ }
135
151
Pix* src_pix = pixRead (TestDataNameToPath (" HelloGoogle.tif" ).c_str ());
136
152
CHECK (src_pix);
137
153
api.SetInputName (" HelloGoogle.tif" );
@@ -151,6 +167,11 @@ TEST_F(TesseractTest, HOCRContainsBaseline) {
151
167
// better algorithms to deal with baseline and xheight consistency.
152
168
TEST_F (TesseractTest, RickSnyderNotFuckSnyder) {
153
169
tesseract::TessBaseAPI api;
170
+ if (api.Init (TessdataPath ().c_str (), " eng" , tesseract::OEM_TESSERACT_ONLY) == -1 ) {
171
+ // eng.traineddata not found.
172
+ GTEST_SKIP ();
173
+ return ;
174
+ }
154
175
api.Init (TessdataPath ().c_str (), " eng" , tesseract::OEM_TESSERACT_ONLY);
155
176
#if 0 // TODO: rick_snyder.jpeg is missing
156
177
Pix* src_pix = pixRead(TestDataNameToPath("rick_snyder.jpeg").c_str());
@@ -161,6 +182,8 @@ TEST_F(TesseractTest, RickSnyderNotFuckSnyder) {
161
182
EXPECT_THAT(result, Not(HasSubstr("FUCK")));
162
183
delete[] result;
163
184
pixDestroy(&src_pix);
185
+ #else
186
+ GTEST_SKIP ();
164
187
#endif
165
188
}
166
189
@@ -182,7 +205,11 @@ TEST_F(TesseractTest, AdaptToWordStrTest) {
182
205
tesseract::TessBaseAPI api;
183
206
std::string truth_text;
184
207
std::string ocr_text;
185
- api.Init (TessdataPath ().c_str (), " eng" , tesseract::OEM_TESSERACT_ONLY);
208
+ if (api.Init (TessdataPath ().c_str (), " eng" , tesseract::OEM_TESSERACT_ONLY) == -1 ) {
209
+ // eng.traineddata not found.
210
+ GTEST_SKIP ();
211
+ return ;
212
+ }
186
213
api.SetVariable (" matcher_sufficient_examples_for_prototyping" , " 1" );
187
214
api.SetVariable (" classify_class_pruner_threshold" , " 220" );
188
215
// Train on the training text.
@@ -216,7 +243,11 @@ TEST_F(TesseractTest, BasicLSTMTest) {
216
243
tesseract::TessBaseAPI api;
217
244
std::string truth_text;
218
245
std::string ocr_text;
219
- api.Init (TessdataPath ().c_str (), " eng" , tesseract::OEM_LSTM_ONLY);
246
+ if (api.Init (TessdataPath ().c_str (), " eng" , tesseract::OEM_LSTM_ONLY) == -1 ) {
247
+ // eng.traineddata not found.
248
+ GTEST_SKIP ();
249
+ return ;
250
+ }
220
251
Pix* src_pix = pixRead (TestDataNameToPath (" phototest_2.tif" ).c_str ());
221
252
CHECK (src_pix);
222
253
ocr_text = GetCleanedTextResult (&api, src_pix);
@@ -240,7 +271,11 @@ TEST_F(TesseractTest, LSTMGeometryTest) {
240
271
#else
241
272
Pix* src_pix = pixRead (TestDataNameToPath (" deslant.tif" ).c_str ());
242
273
FriendlyTessBaseAPI api;
243
- api.Init (TessdataPath ().c_str (), " eng" , tesseract::OEM_LSTM_ONLY);
274
+ if (api.Init (TessdataPath ().c_str (), " eng" , tesseract::OEM_LSTM_ONLY) == -1 ) {
275
+ // eng.traineddata not found.
276
+ GTEST_SKIP ();
277
+ return ;
278
+ }
244
279
api.SetImage (src_pix);
245
280
ASSERT_EQ (api.Recognize (nullptr ), 0 );
246
281
0 commit comments