1
- #include " tesseract/training/lang_model_helpers.h"
1
+ // (C) Copyright 2017, Google Inc.
2
+ // Licensed under the Apache License, Version 2.0 (the "License");
3
+ // you may not use this file except in compliance with the License.
4
+ // You may obtain a copy of the License at
5
+ // http://www.apache.org/licenses/LICENSE-2.0
6
+ // Unless required by applicable law or agreed to in writing, software
7
+ // distributed under the License is distributed on an "AS IS" BASIS,
8
+ // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
9
+ // See the License for the specific language governing permissions and
10
+ // limitations under the License.
2
11
3
- #include " tesseract/lstm/lstmtrainer.h"
4
- #include " tesseract/training/unicharset_training_utils.h"
12
+ #include < string> // for std::string
13
+
14
+ #include " absl/strings/str_cat.h"
15
+
16
+ #include " gmock/gmock.h" // for testing::ElementsAreArray
17
+
18
+ #include " include_gunit.h"
19
+ #include " lang_model_helpers.h"
20
+ #include " log.h" // for LOG
21
+ #include " lstmtrainer.h"
22
+ #include " unicharset_training_utils.h"
5
23
6
24
namespace tesseract {
7
25
namespace {
8
26
9
- string TestDataNameToPath (const string& name) {
10
- return file::JoinPath (FLAGS_test_srcdir, " testdata " , name);
27
+ std:: string TestDataNameToPath (const std:: string& name) {
28
+ return file::JoinPath (TESTING_DIR , name);
11
29
}
12
30
13
31
// This is an integration test that verifies that CombineLangModel works to
@@ -18,15 +36,15 @@ TEST(LangModelTest, AddACharacter) {
18
36
constexpr char kTestString [] = " Simple ASCII string to encode !@#$%&" ;
19
37
constexpr char kTestStringRupees [] = " ASCII string with Rupee symbol ₹" ;
20
38
// Setup the arguments.
21
- string script_dir = file::JoinPath (FLAGS_test_srcdir, " langdata " ) ;
22
- string eng_dir = file::JoinPath (script_dir, " eng" );
23
- string unicharset_path = TestDataNameToPath (" eng_beam.unicharset" );
39
+ std:: string script_dir = LANGDATA_DIR ;
40
+ std:: string eng_dir = file::JoinPath (script_dir, " eng" );
41
+ std:: string unicharset_path = TestDataNameToPath (" eng_beam.unicharset" );
24
42
UNICHARSET unicharset;
25
43
EXPECT_TRUE (unicharset.load_from_file (unicharset_path.c_str ()));
26
- string version_str = " TestVersion" ;
27
- string output_dir = FLAGS_test_tmpdir;
44
+ std:: string version_str = " TestVersion" ;
45
+ std:: string output_dir = FLAGS_test_tmpdir;
28
46
LOG (INFO) << " Output dir=" << output_dir;
29
- string lang1 = " eng" ;
47
+ std:: string lang1 = " eng" ;
30
48
bool pass_through_recoder = false ;
31
49
GenericVector<STRING> words, puncs, numbers;
32
50
// If these reads fail, we get a warning message and an empty list of words.
@@ -44,7 +62,7 @@ TEST(LangModelTest, AddACharacter) {
44
62
lang1, pass_through_recoder, words, puncs,
45
63
numbers, lang_is_rtl, nullptr , nullptr ));
46
64
// Init a trainer with it, and encode a string.
47
- string traineddata1 =
65
+ std:: string traineddata1 =
48
66
file::JoinPath (output_dir, lang1, absl::StrCat (lang1, " .traineddata" ));
49
67
LSTMTrainer trainer1;
50
68
trainer1.InitCharSet (traineddata1);
@@ -58,13 +76,13 @@ TEST(LangModelTest, AddACharacter) {
58
76
&unicharset);
59
77
EXPECT_EQ (size_before + 1 , unicharset.size ());
60
78
// Generate the traineddata file.
61
- string lang2 = " extended" ;
79
+ std:: string lang2 = " extended" ;
62
80
EXPECT_EQ (EXIT_SUCCESS,
63
81
CombineLangModel (unicharset, script_dir, version_str, output_dir,
64
82
lang2, pass_through_recoder, words, puncs, numbers,
65
83
lang_is_rtl, nullptr , nullptr ));
66
84
// Init a trainer with it, and encode a string.
67
- string traineddata2 =
85
+ std:: string traineddata2 =
68
86
file::JoinPath (output_dir, lang2, absl::StrCat (lang2, " .traineddata" ));
69
87
LSTMTrainer trainer2;
70
88
trainer2.InitCharSet (traineddata2);
@@ -86,7 +104,7 @@ TEST(LangModelTest, AddACharacter) {
86
104
}
87
105
EXPECT_THAT (labels1_v,
88
106
testing::ElementsAreArray (&labels2[0 ], labels2.size ()));
89
- // To make sure we weren't cheating somehow, we can now encode the Rupee
107
+ // To make sure we we are not cheating somehow, we can now encode the Rupee
90
108
// symbol, which we could not do before.
91
109
EXPECT_FALSE (trainer1.EncodeString (kTestStringRupees , &labels1));
92
110
EXPECT_TRUE (trainer2.EncodeString (kTestStringRupees , &labels2));
0 commit comments