1
- #include " tesseract/ccutil/unicharset.h"
1
+ // (C) Copyright 2017, Google Inc.
2
+ // Licensed under the Apache License, Version 2.0 (the "License");
3
+ // you may not use this file except in compliance with the License.
4
+ // You may obtain a copy of the License at
5
+ // http://www.apache.org/licenses/LICENSE-2.0
6
+ // Unless required by applicable law or agreed to in writing, software
7
+ // distributed under the License is distributed on an "AS IS" BASIS,
8
+ // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
9
+ // See the License for the specific language governing permissions and
10
+ // limitations under the License.
11
+
12
+ #include < string>
13
+ #include " log.h" // for LOG
14
+ #include " unicharset.h"
15
+ #include " gmock/gmock.h" // for testing::ElementsAreArray
16
+ #include " include_gunit.h"
2
17
3
18
using testing::ElementsAreArray;
4
19
@@ -32,7 +47,7 @@ TEST(UnicharsetTest, Basics) {
32
47
std::vector<int > v (&labels[0 ], &labels[0 ] + labels.size ());
33
48
EXPECT_THAT (v, ElementsAreArray ({3 , 4 , 4 , 5 , 7 , 6 }));
34
49
// With the fi ligature encoding fails without a pre-cleanup.
35
- string lig_str = " af\ufb01 ne" ;
50
+ std:: string lig_str = " af\ufb01 ne" ;
36
51
EXPECT_FALSE (
37
52
u.encode_string (lig_str.c_str (), true , &labels, nullptr , nullptr ));
38
53
lig_str = u.CleanupString (lig_str.c_str ());
@@ -62,7 +77,7 @@ TEST(UnicharsetTest, Multibyte) {
62
77
EXPECT_EQ (u.size (), 9 );
63
78
EXPECT_EQ (u.unichar_to_id (" \u0627 " ), 3 );
64
79
EXPECT_EQ (u.unichar_to_id (" \u062c " ), 4 );
65
- // The first two bytes of this string is \u0627, which matches id 3;
80
+ // The first two bytes of this std:: string is \u0627, which matches id 3;
66
81
EXPECT_EQ (u.unichar_to_id (" \u0627\u062c " , 2 ), 3 );
67
82
EXPECT_EQ (u.unichar_to_id (" \u062f " ), 5 );
68
83
// Individual f and i are not present, but they are there as a pair.
@@ -79,13 +94,13 @@ TEST(UnicharsetTest, Multibyte) {
79
94
// With the fi ligature the fi is picked out.
80
95
GenericVector<char > lengths;
81
96
int encoded_length;
82
- string src_str = " \u0627\u062c\ufb01\u0635\u062b " ;
97
+ std:: string src_str = " \u0627\u062c\ufb01\u0635\u062b " ;
83
98
// src_str has to be pre-cleaned for lengths to be correct.
84
- string cleaned = u.CleanupString (src_str.c_str ());
99
+ std:: string cleaned = u.CleanupString (src_str.c_str ());
85
100
EXPECT_TRUE (u.encode_string (cleaned.c_str (), true , &labels, &lengths,
86
101
&encoded_length));
87
102
EXPECT_EQ (encoded_length, cleaned.size ());
88
- string len_str (&lengths[0 ], lengths.size ());
103
+ std:: string len_str (&lengths[0 ], lengths.size ());
89
104
EXPECT_STREQ (len_str.c_str (), " \002\002\002\002\002 " );
90
105
v = std::vector<int >(&labels[0 ], &labels[0 ] + labels.size ());
91
106
EXPECT_THAT (v, ElementsAreArray ({3 , 4 , 6 , 8 , 7 }));
@@ -128,8 +143,8 @@ TEST(UnicharsetTest, MultibyteBigrams) {
128
143
TEST (UnicharsetTest, OldStyle) {
129
144
// This test verifies an old unicharset that contains fi/fl ligatures loads
130
145
// and keeps all the entries.
131
- string filename =
132
- file::JoinPath (FLAGS_test_srcdir, " testdata " , " eng.unicharset" );
146
+ std:: string filename =
147
+ file::JoinPath (TESTDATA_DIR , " eng.unicharset" );
133
148
UNICHARSET u;
134
149
LOG (INFO) << " Filename=" << filename;
135
150
EXPECT_TRUE (u.load_from_file (filename.c_str ()));
0 commit comments