Skip to content

Commit 40c1cf6

Browse files
committed
unittest: Fix and enable pango_font_info_test
Signed-off-by: Stefan Weil <[email protected]>
1 parent 04d85b4 commit 40c1cf6

File tree

2 files changed

+85
-54
lines changed

2 files changed

+85
-54
lines changed

unittest/Makefile.am

+11-6
Original file line numberDiff line numberDiff line change
@@ -132,7 +132,6 @@ check_PROGRAMS += matrix_test
132132
check_PROGRAMS += nthitem_test
133133
check_PROGRAMS += osd_test
134134
# check_PROGRAMS += pagesegmode_test
135-
# check_PROGRAMS += pango_font_info_test
136135
check_PROGRAMS += paragraphs_test
137136
check_PROGRAMS += params_model_test
138137
check_PROGRAMS += progress_test
@@ -159,6 +158,7 @@ check_PROGRAMS += lstm_squashed_test
159158
check_PROGRAMS += lstm_test
160159
check_PROGRAMS += lstmtrainer_test
161160
check_PROGRAMS += normstrngs_test
161+
check_PROGRAMS += pango_font_info_test
162162
check_PROGRAMS += unichar_test
163163
check_PROGRAMS += unicharcompress_test
164164
check_PROGRAMS += unicharset_test
@@ -279,18 +279,23 @@ normstrngs_test_LDADD = $(ABSEIL_LIBS) $(GTEST_LIBS) $(TRAINING_LIBS) $(ICU_I18N
279279
nthitem_test_SOURCES = nthitem_test.cc
280280
nthitem_test_LDADD = $(GTEST_LIBS) $(TESS_LIBS)
281281

282-
#pango_font_info_test_SOURCES = pango_font_info_test.cc
283-
#pango_font_info_test_LDADD = $(GTEST_LIBS) $(TESS_LIBS)
282+
osd_test_SOURCES = osd_test.cc
283+
osd_test_LDADD = $(GTEST_LIBS) $(TESS_LIBS) $(LEPTONICA_LIBS)
284+
285+
pango_font_info_test_SOURCES = pango_font_info_test.cc
286+
pango_font_info_test_SOURCES += third_party/utf/rune.c
287+
pango_font_info_test_SOURCES += util/utf8/unicodetext.cc util/utf8/unilib.cc
288+
pango_font_info_test_LDADD = $(ABSEIL_LIBS) $(GTEST_LIBS) $(TRAINING_LIBS) $(LEPTONICA_LIBS)
289+
pango_font_info_test_LDADD += $(ICU_I18N_LIBS) -lfontconfig
290+
pango_font_info_test_LDADD += -lpangocairo-1.0 -lpangoft2-1.0
291+
pango_font_info_test_LDADD += $(cairo_LIBS) $(pango_LIBS)
284292

285293
paragraphs_test_SOURCES = paragraphs_test.cc
286294
paragraphs_test_LDADD = $(ABSEIL_LIBS) $(GTEST_LIBS) $(TESS_LIBS)
287295

288296
params_model_test_SOURCES = params_model_test.cc
289297
params_model_test_LDADD = $(GTEST_LIBS) $(TESS_LIBS)
290298

291-
osd_test_SOURCES = osd_test.cc
292-
osd_test_LDADD = $(GTEST_LIBS) $(TESS_LIBS) $(LEPTONICA_LIBS)
293-
294299
progress_test_SOURCES = progress_test.cc
295300
progress_test_LDFLAGS = $(OPENCL_LDFLAGS) $(LEPTONICA_LIBS)
296301
progress_test_LDADD = $(GTEST_LIBS) $(GMOCK_LIBS) $(TESS_LIBS) $(LEPTONICA_LIBS)

unittest/pango_font_info_test.cc

+74-48
Original file line numberDiff line numberDiff line change
@@ -1,12 +1,24 @@
1-
2-
#include "tesseract/training/pango_font_info.h"
3-
4-
#include <stdio.h>
5-
#include <string.h>
6-
7-
#include "pango/pango.h"
8-
#include "tesseract/training/commandlineflags.h"
9-
#include "tesseract/training/fileio.h"
1+
// (C) Copyright 2017, Google Inc.
2+
// Licensed under the Apache License, Version 2.0 (the "License");
3+
// you may not use this file except in compliance with the License.
4+
// You may obtain a copy of the License at
5+
// http://www.apache.org/licenses/LICENSE-2.0
6+
// Unless required by applicable law or agreed to in writing, software
7+
// distributed under the License is distributed on an "AS IS" BASIS,
8+
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
9+
// See the License for the specific language governing permissions and
10+
// limitations under the License.
11+
12+
#include <cstdio>
13+
#include <string>
14+
#include <pango/pango.h>
15+
#include "include_gunit.h"
16+
#include "commandlineflags.h"
17+
#include "fileio.h"
18+
#include "pango_font_info.h"
19+
#include "absl/strings/str_cat.h" // for absl::StrCat
20+
#include "gmock/gmock-matchers.h" // for EXPECT_THAT
21+
#include "util/utf8/unicodetext.h" // for UnicodeText
1022

1123
DECLARE_STRING_PARAM_FLAG(fonts_dir);
1224
DECLARE_STRING_PARAM_FLAG(fontconfig_tmpdir);
@@ -19,19 +31,19 @@ using tesseract::FontUtils;
1931
using tesseract::PangoFontInfo;
2032

2133
// Fonts in testdata directory
22-
const char* kExpectedFontNames[] = {"Arab",
23-
"Arial Bold Italic",
24-
"DejaVu Sans Ultra-Light",
25-
"Lohit Hindi",
34+
const char* kExpectedFontNames[] = {
35+
"Arab",
36+
"Arial Bold Italic",
37+
"DejaVu Sans Ultra-Light",
38+
"Lohit Hindi",
2639
#if PANGO_VERSION <= 12005
27-
"Times New Roman",
40+
"Times New Roman",
2841
#else
29-
"Times New Roman,", // Pango v1.36.2
30-
// requires a trailing
31-
// ','
42+
"Times New Roman,", // Pango v1.36.2 requires a trailing ','
3243
#endif
33-
"UnBatang",
34-
"Verdana"};
44+
"UnBatang",
45+
"Verdana"
46+
};
3547

3648
// Sample text used in tests.
3749
const char kArabicText[] = "والفكر والصراع 1234,\nوالفكر والصراع";
@@ -41,23 +53,27 @@ const char kKorText[] = "이는 것으로";
4153
// Hindi words containing illegal vowel sequences.
4254
const char* kBadlyFormedHinWords[] = {
4355
#if PANGO_VERSION <= 12005
44-
"उपयोक्ताो", "नहीें", "कहीअे", "पत्रिाका", "छह्णाीस",
56+
"उपयोक्ताो", "नहीें", "कहीअे", "पत्रिाका", "छह्णाीस",
4557
#endif
46-
// Pango v1.36.2 will render the above words even though they are invalid.
47-
"प्रंात", nullptr};
58+
// Pango v1.36.2 will render the above words even though they are invalid.
59+
"प्रंात", nullptr
60+
};
4861

4962
class PangoFontInfoTest : public ::testing::Test {
5063
protected:
5164
void SetUp() override {
52-
std::locale::global(std::locale(""));
65+
static std::locale system_locale("");
66+
std::locale::global(system_locale);
5367
}
5468

5569
// Creates a fake fonts.conf file that points to the testdata fonts for
5670
// fontconfig to initialize with.
5771
static void SetUpTestCase() {
58-
FLAGS_fonts_dir = File::JoinPath(FLAGS_test_srcdir, "testdata");
72+
FLAGS_fonts_dir = TESTING_DIR;
5973
FLAGS_fontconfig_tmpdir = FLAGS_test_tmpdir;
74+
#ifdef GOOGLE_TESSERACT
6075
FLAGS_use_only_legacy_fonts = false;
76+
#endif
6177
}
6278

6379
PangoFontInfo font_info_;
@@ -120,7 +136,7 @@ TEST_F(PangoFontInfoTest, CanRenderLigature) {
120136
font_info_.ParseFontDescriptionName("Arab 12");
121137
const char kArabicLigature[] = "لا";
122138
EXPECT_TRUE(
123-
font_info_.CanRenderString(kArabicLigature, strlen(kArabicLigature)));
139+
font_info_.CanRenderString(kArabicLigature, strlen(kArabicLigature)));
124140

125141
printf("Next word\n");
126142
EXPECT_TRUE(font_info_.CanRenderString(kArabicText, strlen(kArabicText)));
@@ -143,17 +159,17 @@ TEST_F(PangoFontInfoTest, CannotRenderInvalidString) {
143159
TEST_F(PangoFontInfoTest, CanDropUncoveredChars) {
144160
font_info_.ParseFontDescriptionName("Verdana 12");
145161
// Verdana cannot render the "ff" ligature
146-
string word = "office";
162+
std::string word = "office";
147163
EXPECT_EQ(1, font_info_.DropUncoveredChars(&word));
148164
EXPECT_EQ("oice", word);
149165

150166
// Don't drop non-letter characters like word joiners.
151167
const char* kJoiners[] = {
152-
"\u2060", // U+2060 (WJ)
153-
"\u200C", // U+200C (ZWJ)
154-
"\u200D" // U+200D (ZWNJ)
168+
"\u2060", // U+2060 (WJ)
169+
"\u200C", // U+200C (ZWJ)
170+
"\u200D" // U+200D (ZWNJ)
155171
};
156-
for (int i = 0; i < ARRAYSIZE(kJoiners); ++i) {
172+
for (size_t i = 0; i < ARRAYSIZE(kJoiners); ++i) {
157173
word = kJoiners[i];
158174
EXPECT_EQ(0, font_info_.DropUncoveredChars(&word));
159175
EXPECT_STREQ(kJoiners[i], word.c_str());
@@ -167,17 +183,21 @@ class FontUtilsTest : public ::testing::Test {
167183
// Creates a fake fonts.conf file that points to the testdata fonts for
168184
// fontconfig to initialize with.
169185
static void SetUpTestCase() {
170-
FLAGS_fonts_dir = File::JoinPath(FLAGS_test_srcdir, "testdata");
186+
FLAGS_fonts_dir = TESTING_DIR;
171187
FLAGS_fontconfig_tmpdir = FLAGS_test_tmpdir;
172188
}
173189

174190
void CountUnicodeChars(const char* utf8_text,
175-
std::unordered_map<char32, inT64>* ch_map) {
191+
std::unordered_map<char32, int64_t>* ch_map) {
176192
ch_map->clear();
177193
UnicodeText ut;
178194
ut.PointToUTF8(utf8_text, strlen(utf8_text));
179195
for (UnicodeText::const_iterator it = ut.begin(); it != ut.end(); ++it) {
196+
#if 0
180197
if (UnicodeProps::IsWhitespace(*it)) continue;
198+
#else
199+
if (std::isspace(*it)) continue;
200+
#endif
181201
++(*ch_map)[*it];
182202
}
183203
}
@@ -206,21 +226,21 @@ TEST_F(FontUtilsTest, DoesDetectMissingFonts) {
206226
}
207227

208228
TEST_F(FontUtilsTest, DoesListAvailableFonts) {
209-
const std::vector<string>& fonts = FontUtils::ListAvailableFonts();
229+
const std::vector<std::string>& fonts = FontUtils::ListAvailableFonts();
210230
EXPECT_THAT(fonts, ::testing::ElementsAreArray(kExpectedFontNames));
211-
for (int i = 0; i < fonts.size(); ++i) {
231+
for (auto& font : fonts) {
212232
PangoFontInfo font_info;
213-
EXPECT_TRUE(font_info.ParseFontDescriptionName(fonts[i]));
233+
EXPECT_TRUE(font_info.ParseFontDescriptionName(font));
214234
}
215235
}
216236

217237
TEST_F(FontUtilsTest, DoesFindBestFonts) {
218-
string fonts_list;
219-
std::unordered_map<char32, inT64> ch_map;
238+
std::string fonts_list;
239+
std::unordered_map<char32, int64_t> ch_map;
220240
CountUnicodeChars(kEngText, &ch_map);
221241
EXPECT_EQ(26, ch_map.size()); // 26 letters
222242
std::vector<std::pair<const char*, std::vector<bool> > > font_flags;
223-
string best_list = FontUtils::BestFonts(ch_map, &font_flags);
243+
std::string best_list = FontUtils::BestFonts(ch_map, &font_flags);
224244
EXPECT_TRUE(best_list.size());
225245
// All fonts except Lohit Hindi should render English text.
226246
EXPECT_EQ(ARRAYSIZE(kExpectedFontNames) - 1, font_flags.size());
@@ -238,8 +258,8 @@ TEST_F(FontUtilsTest, DoesSelectFont) {
238258
const char* kLangNames[] = {"Arabic", "English", "Hindi", "Korean", nullptr};
239259
for (int i = 0; kLangText[i] != nullptr; ++i) {
240260
SCOPED_TRACE(kLangNames[i]);
241-
std::vector<string> graphemes;
242-
string selected_font;
261+
std::vector<std::string> graphemes;
262+
std::string selected_font;
243263
EXPECT_TRUE(FontUtils::SelectFont(kLangText[i], strlen(kLangText[i]),
244264
&selected_font, &graphemes));
245265
EXPECT_TRUE(selected_font.size());
@@ -249,28 +269,30 @@ TEST_F(FontUtilsTest, DoesSelectFont) {
249269

250270
TEST_F(FontUtilsTest, DoesFailToSelectFont) {
251271
const char kMixedScriptText[] = "पिताने विवाह की | والفكر والصراع";
252-
std::vector<string> graphemes;
253-
string selected_font;
272+
std::vector<std::string> graphemes;
273+
std::string selected_font;
254274
EXPECT_FALSE(FontUtils::SelectFont(kMixedScriptText, strlen(kMixedScriptText),
255275
&selected_font, &graphemes));
256276
}
257277

258278
TEST_F(FontUtilsTest, GetAllRenderableCharacters) {
259-
const int32 kHindiChar = 0x0905;
260-
const int32 kArabicChar = 0x0623;
261-
const int32 kMongolianChar = 0x180E; // Mongolian vowel separator
262-
const int32 kOghamChar = 0x1680; // Ogham space mark
279+
const int32_t kHindiChar = 0x0905;
280+
const int32_t kArabicChar = 0x0623;
281+
const int32_t kMongolianChar = 0x180E; // Mongolian vowel separator
282+
const int32_t kOghamChar = 0x1680; // Ogham space mark
263283
std::vector<bool> unicode_mask;
264284
FontUtils::GetAllRenderableCharacters(&unicode_mask);
265285
EXPECT_TRUE(unicode_mask['A']);
266286
EXPECT_TRUE(unicode_mask['1']);
267287
EXPECT_TRUE(unicode_mask[kHindiChar]);
268288
EXPECT_TRUE(unicode_mask[kArabicChar]);
269289
EXPECT_FALSE(unicode_mask[kMongolianChar]); // no font for mongolian.
290+
#if 0 // TODO: check fails because DejaVu Sans Ultra-Light supports ogham
270291
EXPECT_FALSE(unicode_mask[kOghamChar]); // no font for ogham.
292+
#endif
271293
unicode_mask.clear();
272294

273-
std::vector<string> selected_fonts;
295+
std::vector<std::string> selected_fonts;
274296
selected_fonts.push_back("Lohit Hindi");
275297
FontUtils::GetAllRenderableCharacters(selected_fonts, &unicode_mask);
276298
EXPECT_TRUE(unicode_mask['1']);
@@ -279,14 +301,18 @@ TEST_F(FontUtilsTest, GetAllRenderableCharacters) {
279301
EXPECT_FALSE(unicode_mask[kArabicChar]); // or Arabic,
280302
EXPECT_FALSE(unicode_mask[kMongolianChar]); // or Mongolian,
281303
EXPECT_FALSE(unicode_mask[kOghamChar]); // or Ogham.
304+
unicode_mask.clear();
282305

283306
// Check that none of the included fonts cover the Mongolian or Ogham space
284307
// characters.
285-
for (int f = 0; f < ARRAYSIZE(kExpectedFontNames); ++f) {
308+
for (size_t f = 0; f < ARRAYSIZE(kExpectedFontNames); ++f) {
286309
SCOPED_TRACE(absl::StrCat("Testing ", kExpectedFontNames[f]));
287310
FontUtils::GetAllRenderableCharacters(kExpectedFontNames[f], &unicode_mask);
311+
#if 0 // TODO: check fails because DejaVu Sans Ultra-Light supports ogham
288312
EXPECT_FALSE(unicode_mask[kOghamChar]);
313+
#endif
289314
EXPECT_FALSE(unicode_mask[kMongolianChar]);
315+
unicode_mask.clear();
290316
}
291317
}
292318
} // namespace

0 commit comments

Comments
 (0)