Skip to content

Commit 20ed60b

Browse files
committed
Fix unicharset_test
1 parent db3ed5d commit 20ed60b

File tree

3 files changed

+48
-11
lines changed

3 files changed

+48
-11
lines changed

unittest/Makefile.am

+10
Original file line numberDiff line numberDiff line change
@@ -8,10 +8,12 @@ TESSDATA_DIR=$(shell cd $(top_srcdir) && cd .. && pwd)/tessdata
88
# Absolute path of directory 'testing' with test images and ground truth texts
99
# (using submodule test).
1010
TESTING_DIR=$(shell cd $(top_srcdir) && pwd)/test/testing
11+
TESTDATA_DIR=$(shell cd $(top_srcdir) && pwd)/test/testdata
1112

1213
AM_CPPFLAGS += -DLANGDATA_DIR="\"$(LANGDATA_DIR)\""
1314
AM_CPPFLAGS += -DTESSDATA_DIR="\"$(TESSDATA_DIR)\""
1415
AM_CPPFLAGS += -DTESTING_DIR="\"$(TESTING_DIR)\""
16+
AM_CPPFLAGS += -DTESTDATA_DIR="\"$(TESTDATA_DIR)\""
1517
AM_CPPFLAGS += -DPANGO_ENABLE_ENGINE
1618
AM_CPPFLAGS += -I$(top_builddir)/src/api
1719
AM_CPPFLAGS += -I$(top_srcdir)/src/api
@@ -123,6 +125,8 @@ check_PROGRAMS = \
123125

124126
if ENABLE_TRAINING
125127
check_PROGRAMS += commandlineflags_test
128+
check_PROGRAMS += unichar_test
129+
check_PROGRAMS += unicharset_test
126130
check_PROGRAMS += validator_test
127131
endif
128132

@@ -235,6 +239,12 @@ tabvector_test_LDADD = $(GTEST_LIBS) $(TESS_LIBS)
235239
tfile_test_SOURCES = tfile_test.cc
236240
tfile_test_LDADD = $(GTEST_LIBS) $(TESS_LIBS)
237241

242+
unichar_test_SOURCES = unichar_test.cc
243+
unichar_test_LDADD = $(GTEST_LIBS) $(TRAINING_LIBS) $(TESS_LIBS) $(ICU_UC_LIBS)
244+
245+
unicharset_test_SOURCES = unicharset_test.cc
246+
unicharset_test_LDADD = $(GTEST_LIBS) $(TRAINING_LIBS) $(TESS_LIBS) $(ICU_UC_LIBS)
247+
238248
validator_test_SOURCES = validator_test.cc
239249
validator_test_LDADD = $(GTEST_LIBS) $(TRAINING_LIBS) $(TESS_LIBS) $(ICU_UC_LIBS)
240250

unittest/unichar_test.cc

+15-3
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,16 @@
1-
#include "tesseract/ccutil/unichar.h"
1+
// (C) Copyright 2017, Google Inc.
2+
// Licensed under the Apache License, Version 2.0 (the "License");
3+
// you may not use this file except in compliance with the License.
4+
// You may obtain a copy of the License at
5+
// http://www.apache.org/licenses/LICENSE-2.0
6+
// Unless required by applicable law or agreed to in writing, software
7+
// distributed under the License is distributed on an "AS IS" BASIS,
8+
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
9+
// See the License for the specific language governing permissions and
10+
// limitations under the License.
11+
12+
#include "unichar.h"
13+
#include "include_gunit.h"
214

315
using tesseract::UNICHAR;
416

@@ -13,7 +25,7 @@ TEST(UnicharTest, Conversion) {
1325
// Check for round-trip conversion.
1426
std::vector<char32> utf32 = UNICHAR::UTF8ToUTF32(kUTF8Src);
1527
EXPECT_THAT(utf32, testing::ElementsAreArray(kUTF32Src));
16-
string utf8 = UNICHAR::UTF32ToUTF8(utf32);
28+
std::string utf8 = UNICHAR::UTF32ToUTF8(utf32);
1729
EXPECT_STREQ(kUTF8Src, utf8.c_str());
1830
}
1931

@@ -25,7 +37,7 @@ TEST(UnicharTest, InvalidText) {
2537
std::vector<char32> utf32 = UNICHAR::UTF8ToUTF32(kInvalidUTF8);
2638
EXPECT_TRUE(utf32.empty());
2739
// Invalid utf32 produces an empty string.
28-
string utf8 = UNICHAR::UTF32ToUTF8(kInvalidUTF32);
40+
std::string utf8 = UNICHAR::UTF32ToUTF8(kInvalidUTF32);
2941
EXPECT_TRUE(utf8.empty());
3042
}
3143

unittest/unicharset_test.cc

+23-8
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,19 @@
1-
#include "tesseract/ccutil/unicharset.h"
1+
// (C) Copyright 2017, Google Inc.
2+
// Licensed under the Apache License, Version 2.0 (the "License");
3+
// you may not use this file except in compliance with the License.
4+
// You may obtain a copy of the License at
5+
// http://www.apache.org/licenses/LICENSE-2.0
6+
// Unless required by applicable law or agreed to in writing, software
7+
// distributed under the License is distributed on an "AS IS" BASIS,
8+
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
9+
// See the License for the specific language governing permissions and
10+
// limitations under the License.
11+
12+
#include <string>
13+
#include "log.h" // for LOG
14+
#include "unicharset.h"
15+
#include "gmock/gmock.h" // for testing::ElementsAreArray
16+
#include "include_gunit.h"
217

318
using testing::ElementsAreArray;
419

@@ -32,7 +47,7 @@ TEST(UnicharsetTest, Basics) {
3247
std::vector<int> v(&labels[0], &labels[0] + labels.size());
3348
EXPECT_THAT(v, ElementsAreArray({3, 4, 4, 5, 7, 6}));
3449
// With the fi ligature encoding fails without a pre-cleanup.
35-
string lig_str = "af\ufb01ne";
50+
std::string lig_str = "af\ufb01ne";
3651
EXPECT_FALSE(
3752
u.encode_string(lig_str.c_str(), true, &labels, nullptr, nullptr));
3853
lig_str = u.CleanupString(lig_str.c_str());
@@ -62,7 +77,7 @@ TEST(UnicharsetTest, Multibyte) {
6277
EXPECT_EQ(u.size(), 9);
6378
EXPECT_EQ(u.unichar_to_id("\u0627"), 3);
6479
EXPECT_EQ(u.unichar_to_id("\u062c"), 4);
65-
// The first two bytes of this string is \u0627, which matches id 3;
80+
// The first two bytes of this std::string is \u0627, which matches id 3;
6681
EXPECT_EQ(u.unichar_to_id("\u0627\u062c", 2), 3);
6782
EXPECT_EQ(u.unichar_to_id("\u062f"), 5);
6883
// Individual f and i are not present, but they are there as a pair.
@@ -79,13 +94,13 @@ TEST(UnicharsetTest, Multibyte) {
7994
// With the fi ligature the fi is picked out.
8095
GenericVector<char> lengths;
8196
int encoded_length;
82-
string src_str = "\u0627\u062c\ufb01\u0635\u062b";
97+
std::string src_str = "\u0627\u062c\ufb01\u0635\u062b";
8398
// src_str has to be pre-cleaned for lengths to be correct.
84-
string cleaned = u.CleanupString(src_str.c_str());
99+
std::string cleaned = u.CleanupString(src_str.c_str());
85100
EXPECT_TRUE(u.encode_string(cleaned.c_str(), true, &labels, &lengths,
86101
&encoded_length));
87102
EXPECT_EQ(encoded_length, cleaned.size());
88-
string len_str(&lengths[0], lengths.size());
103+
std::string len_str(&lengths[0], lengths.size());
89104
EXPECT_STREQ(len_str.c_str(), "\002\002\002\002\002");
90105
v = std::vector<int>(&labels[0], &labels[0] + labels.size());
91106
EXPECT_THAT(v, ElementsAreArray({3, 4, 6, 8, 7}));
@@ -128,8 +143,8 @@ TEST(UnicharsetTest, MultibyteBigrams) {
128143
TEST(UnicharsetTest, OldStyle) {
129144
// This test verifies an old unicharset that contains fi/fl ligatures loads
130145
// and keeps all the entries.
131-
string filename =
132-
file::JoinPath(FLAGS_test_srcdir, "testdata", "eng.unicharset");
146+
std::string filename =
147+
file::JoinPath(TESTDATA_DIR, "eng.unicharset");
133148
UNICHARSET u;
134149
LOG(INFO) << "Filename=" << filename;
135150
EXPECT_TRUE(u.load_from_file(filename.c_str()));

0 commit comments

Comments
 (0)