Skip to content

Commit a987e6d

Browse files
committed
Major bug fixes to pango renderer and resolved issue of hash_map vs unordered_map
1 parent 2c837df commit a987e6d

8 files changed

+104
-96
lines changed

ccutil/hashfn.h

+9-19
Original file line numberDiff line numberDiff line change
@@ -20,16 +20,15 @@
2020
#ifndef HASHFN_H
2121
#define HASHFN_H
2222

23-
#ifdef USE_STD_NAMESPACE
2423
#if (__cplusplus >= 201103L) || defined(_MSC_VER) // Visual Studio
2524
#include <unordered_map>
2625
#include <unordered_set>
27-
#define hash_map std::unordered_map
28-
#if (_MSC_VER >= 1500 && _MSC_VER < 1600) // Visual Studio 2008
29-
using namespace std::tr1;
26+
#if defined(_MSC_VER) && (_MSC_VER >= 1500 && _MSC_VER < 1600) // VS 2008
27+
#define TessHashMap std::tr1::unordered_map
28+
#define TessHashSet std::tr1::unordered_set
3029
#else // _MSC_VER
31-
using std::unordered_map;
32-
using std::unordered_set;
30+
#define TessHashMap std::unordered_map
31+
#define TessHashSet std::unordered_set
3332
#include <memory>
3433
#define SmartPtr std::unique_ptr
3534
#define HAVE_UNIQUE_PTR
@@ -41,23 +40,14 @@ using std::unordered_set;
4140
#include <ext/hash_set>
4241
using __gnu_cxx::hash_map;
4342
using __gnu_cxx::hash_set;
44-
#define unordered_map hash_map
45-
#define unordered_set hash_set
43+
#define TessHashMap __gnu_cxx::hash_map
44+
#define TessHashSet __gnu_cxx::hash_set
4645
#else
4746
#include <hash_map>
4847
#include <hash_set>
48+
#define TessHashMap hash_map
49+
#define TessHashSet :hash_set
4950
#endif // gcc
50-
#elif (__clang__)
51-
#include <unordered_map>
52-
#include <unordered_set>
53-
#define hash_map std::unordered_map
54-
#define unordered_set std::unordered_set
55-
#else // USE_STD_NAMESPACE
56-
#include <hash_map>
57-
#include <hash_set>
58-
#define unordered_map hash_map
59-
#define unordered_set hash_set
60-
#endif // USE_STD_NAMESPACE
6151

6252
#ifndef HAVE_UNIQUE_PTR
6353
// Trivial smart ptr. Expand to add features of std::unique_ptr as required.

textord/bbgrid.h

+1-1
Original file line numberDiff line numberDiff line change
@@ -364,7 +364,7 @@ template<class BBC, class BBC_CLIST, class BBC_C_IT> class GridSearch {
364364
// An iterator over the list at (x_, y_) in the grid_.
365365
BBC_C_IT it_;
366366
// Set of unique returned elements used when unique_mode_ is true.
367-
unordered_set<BBC*, PtrHash<BBC> > returns_;
367+
TessHashSet<BBC*, PtrHash<BBC> > returns_;
368368
};
369369

370370
// Sort function to sort a BBC by bounding_box().left().

training/ligature_table.h

+1-1
Original file line numberDiff line numberDiff line change
@@ -32,7 +32,7 @@ namespace tesseract {
3232
class PangoFontInfo; // defined in pango_font_info.h
3333

3434
// Map to substitute strings for ligatures.
35-
typedef hash_map<string, string, StringHash> LigHash;
35+
typedef TessHashMap<string, string, StringHash> LigHash;
3636

3737
class LigatureTable {
3838
public:

training/pango_font_info.cpp

+51-55
Original file line numberDiff line numberDiff line change
@@ -60,15 +60,6 @@
6060

6161
STRING_PARAM_FLAG(fontconfig_tmpdir, "/tmp",
6262
"Overrides fontconfig default temporary dir");
63-
BOOL_PARAM_FLAG(fontconfig_refresh_cache, false,
64-
"Does a one-time deletion of cache files from the "
65-
"fontconfig_tmpdir before initializing fontconfig.");
66-
BOOL_PARAM_FLAG(fontconfig_refresh_config_file, true,
67-
"Does a one-time reset of the fontconfig config file to point"
68-
" to fonts_dir before initializing fontconfig. Set to true"
69-
" if fontconfig_refresh_cache is true. Set it to false to use"
70-
" multiple instances in separate processes without having to"
71-
" rescan the fonts_dir, using a previously setup font cache");
7263

7364
#ifndef USE_STD_NAMESPACE
7465
#include "ocr/trainingdata/typesetting/legacy_fonts.h"
@@ -91,7 +82,8 @@ namespace tesseract {
9182
// in pixels.
9283
const int kDefaultResolution = 300;
9384

94-
bool PangoFontInfo::fontconfig_initialized_ = false;
85+
string PangoFontInfo::fonts_dir_;
86+
string PangoFontInfo::cache_dir_;
9587

9688
PangoFontInfo::PangoFontInfo() : desc_(NULL), resolution_(kDefaultResolution) {
9789
Clear();
@@ -119,6 +111,8 @@ void PangoFontInfo::Clear() {
119111
}
120112
}
121113

114+
PangoFontInfo::~PangoFontInfo() { pango_font_description_free(desc_); }
115+
122116
string PangoFontInfo::DescriptionName() const {
123117
if (!desc_) return "";
124118
char* desc_str = pango_font_description_to_string(desc_);
@@ -127,59 +121,62 @@ string PangoFontInfo::DescriptionName() const {
127121
return desc_name;
128122
}
129123

130-
// Initializes Fontconfig for use by writing a fake fonts.conf file into the
131-
// FLAGS_fontconfigs_tmpdir directory, that points to the supplied
132-
// fonts_dir, and then overrides the FONTCONFIG_PATH environment variable
133-
// to point to this fonts.conf file. If force_clear, the cache is refreshed
134-
// even if it has already been initialized.
135-
void PangoFontInfo::InitFontConfig(bool force_clear, const string& fonts_dir) {
136-
if ((fontconfig_initialized_ && !force_clear) || fonts_dir.empty()) {
137-
fontconfig_initialized_ = true;
138-
return;
139-
}
140-
if (FLAGS_fontconfig_refresh_cache || force_clear) {
141-
File::DeleteMatchingFiles(File::JoinPath(
142-
FLAGS_fontconfig_tmpdir.c_str(), "*cache-?").c_str());
143-
}
144-
if (FLAGS_fontconfig_refresh_config_file || FLAGS_fontconfig_refresh_cache ||
145-
force_clear) {
146-
const int MAX_FONTCONF_FILESIZE = 1024;
147-
char fonts_conf_template[MAX_FONTCONF_FILESIZE];
148-
snprintf(fonts_conf_template, MAX_FONTCONF_FILESIZE,
149-
"<?xml version=\"1.0\"?>\n"
150-
"<!DOCTYPE fontconfig SYSTEM \"fonts.dtd\">\n"
151-
"<fontconfig>\n"
152-
"<dir>%s</dir>\n"
153-
"<cachedir>%s</cachedir>\n"
154-
"<config></config>\n"
155-
"</fontconfig>", fonts_dir.c_str(),
156-
FLAGS_fontconfig_tmpdir.c_str());
157-
string fonts_conf_file = File::JoinPath(FLAGS_fontconfig_tmpdir.c_str(),
158-
"fonts.conf");
159-
File::WriteStringToFileOrDie(fonts_conf_template, fonts_conf_file);
124+
// If not already initialized, initializes FontConfig by setting its
125+
// environment variable and creating a fonts.conf file that points to the
126+
// FLAGS_fonts_dir and the cache to FLAGS_fontconfig_tmpdir.
127+
/* static */
128+
void PangoFontInfo::SoftInitFontConfig() {
129+
if (fonts_dir_.empty()) {
130+
HardInitFontConfig(FLAGS_fonts_dir, FLAGS_fontconfig_tmpdir);
160131
}
132+
}
133+
134+
// Re-initializes font config, whether or not already initialized.
135+
// If already initialized, any existing cache is deleted, just to be sure.
136+
/* static */
137+
void PangoFontInfo::HardInitFontConfig(const string& fonts_dir,
138+
const string& cache_dir) {
139+
if (!cache_dir_.empty()) {
140+
File::DeleteMatchingFiles(
141+
File::JoinPath(cache_dir_.c_str(), "*cache-?").c_str());
142+
}
143+
const int MAX_FONTCONF_FILESIZE = 1024;
144+
char fonts_conf_template[MAX_FONTCONF_FILESIZE];
145+
cache_dir_ = cache_dir;
146+
fonts_dir_ = fonts_dir;
147+
snprintf(fonts_conf_template, MAX_FONTCONF_FILESIZE,
148+
"<?xml version=\"1.0\"?>\n"
149+
"<!DOCTYPE fontconfig SYSTEM \"fonts.dtd\">\n"
150+
"<fontconfig>\n"
151+
"<dir>%s</dir>\n"
152+
"<cachedir>%s</cachedir>\n"
153+
"<config></config>\n"
154+
"</fontconfig>",
155+
fonts_dir.c_str(), cache_dir_.c_str());
156+
string fonts_conf_file = File::JoinPath(cache_dir_.c_str(), "fonts.conf");
157+
File::WriteStringToFileOrDie(fonts_conf_template, fonts_conf_file);
161158
#ifdef _WIN32
162159
std::string env("FONTCONFIG_PATH=");
163-
env.append(FLAGS_fontconfig_tmpdir.c_str());
160+
env.append(cache_dir_.c_str());
164161
putenv(env.c_str());
165162
putenv("LANG=en_US.utf8");
166163
#else
167-
setenv("FONTCONFIG_PATH", FLAGS_fontconfig_tmpdir.c_str(), true);
164+
setenv("FONTCONFIG_PATH", cache_dir_.c_str(), true);
168165
// Fix the locale so that the reported font names are consistent.
169166
setenv("LANG", "en_US.utf8", true);
170167
#endif // _WIN32
171-
if (!fontconfig_initialized_ || force_clear) {
172-
if (FcInitReinitialize() != FcTrue) {
173-
tprintf("FcInitiReinitialize failed!!\n");
174-
}
168+
169+
if (FcInitReinitialize() != FcTrue) {
170+
tprintf("FcInitiReinitialize failed!!\n");
175171
}
176-
fontconfig_initialized_ = true;
177172
FontUtils::ReInit();
173+
// Clear Pango's font cache too.
174+
pango_cairo_font_map_set_default(NULL);
178175
}
179176

180177
static void ListFontFamilies(PangoFontFamily*** families,
181178
int* n_families) {
182-
PangoFontInfo::InitFontConfig(false, FLAGS_fonts_dir.c_str());
179+
PangoFontInfo::SoftInitFontConfig();
183180
PangoFontMap* font_map = pango_cairo_font_map_get_default();
184181
DISABLE_HEAP_LEAK_CHECK;
185182
pango_font_map_list_families(font_map, families, n_families);
@@ -253,7 +250,7 @@ bool PangoFontInfo::ParseFontDescriptionName(const string& name) {
253250
// in the font map. Note that if the font is wholly missing, this could
254251
// correspond to a completely different font family and face.
255252
PangoFont* PangoFontInfo::ToPangoFont() const {
256-
InitFontConfig(false, FLAGS_fonts_dir.c_str());
253+
SoftInitFontConfig();
257254
PangoFontMap* font_map = pango_cairo_font_map_get_default();
258255
PangoContext* context = pango_context_new();
259256
pango_cairo_context_set_resolution(context, resolution_);
@@ -538,7 +535,7 @@ bool FontUtils::IsAvailableFont(const char* input_query_desc,
538535
query_desc.c_str());
539536
PangoFont* selected_font = NULL;
540537
{
541-
PangoFontInfo::InitFontConfig(false, FLAGS_fonts_dir.c_str());
538+
PangoFontInfo::SoftInitFontConfig();
542539
PangoFontMap* font_map = pango_cairo_font_map_get_default();
543540
PangoContext* context = pango_context_new();
544541
pango_context_set_font_map(context, font_map);
@@ -690,9 +687,8 @@ void FontUtils::GetAllRenderableCharacters(const vector<string>& fonts,
690687
// Utilities written to be backward compatible with StringRender
691688

692689
/* static */
693-
int FontUtils::FontScore(const unordered_map<char32, inT64>& ch_map,
694-
const string& fontname,
695-
int* raw_score,
690+
int FontUtils::FontScore(const TessHashMap<char32, inT64>& ch_map,
691+
const string& fontname, int* raw_score,
696692
vector<bool>* ch_flags) {
697693
PangoFontInfo font_info;
698694
if (!font_info.ParseFontDescriptionName(fontname)) {
@@ -707,7 +703,7 @@ int FontUtils::FontScore(const unordered_map<char32, inT64>& ch_map,
707703
}
708704
*raw_score = 0;
709705
int ok_chars = 0;
710-
for (unordered_map<char32, inT64>::const_iterator it = ch_map.begin();
706+
for (TessHashMap<char32, inT64>::const_iterator it = ch_map.begin();
711707
it != ch_map.end(); ++it) {
712708
bool covered = (IsWhitespace(it->first) ||
713709
(pango_coverage_get(coverage, it->first)
@@ -725,7 +721,7 @@ int FontUtils::FontScore(const unordered_map<char32, inT64>& ch_map,
725721

726722

727723
/* static */
728-
string FontUtils::BestFonts(const unordered_map<char32, inT64>& ch_map,
724+
string FontUtils::BestFonts(const TessHashMap<char32, inT64>& ch_map,
729725
vector<pair<const char*, vector<bool> > >* fonts) {
730726
const double kMinOKFraction = 0.99;
731727
// Weighted fraction of characters that must be renderable in a font to make

training/pango_font_info.h

+27-9
Original file line numberDiff line numberDiff line change
@@ -24,10 +24,16 @@
2424
#include <utility>
2525
#include <vector>
2626

27+
#include "commandlineflags.h"
2728
#include "hashfn.h"
2829
#include "host.h"
29-
#include "util.h"
3030
#include "pango/pango-font.h"
31+
#include "pango/pango.h"
32+
#include "pango/pangocairo.h"
33+
#include "util.h"
34+
35+
DECLARE_STRING_PARAM_FLAG(fonts_dir);
36+
DECLARE_STRING_PARAM_FLAG(fontconfig_tmpdir);
3137

3238
typedef signed int char32;
3339

@@ -44,6 +50,7 @@ class PangoFontInfo {
4450
DECORATIVE,
4551
};
4652
PangoFontInfo();
53+
~PangoFontInfo();
4754
// Initialize from parsing a font description name, defined as a string of the
4855
// format:
4956
// "FamilyName [FaceName] [PointSize]"
@@ -83,10 +90,14 @@ class PangoFontInfo {
8390
bool GetSpacingProperties(const string& utf8_char,
8491
int* x_bearing, int* x_advance) const;
8592

86-
// Initializes FontConfig by setting its environment variable and creating
87-
// a fonts.conf file that points to the given fonts_dir. Once initialized,
88-
// it is not re-initialized unless force_clear is true.
89-
static void InitFontConfig(bool force_clear, const string& fonts_dir);
93+
// If not already initialized, initializes FontConfig by setting its
94+
// environment variable and creating a fonts.conf file that points to the
95+
// FLAGS_fonts_dir and the cache to FLAGS_fontconfig_tmpdir.
96+
static void SoftInitFontConfig();
97+
// Re-initializes font config, whether or not already initialized.
98+
// If already initialized, any existing cache is deleted, just to be sure.
99+
static void HardInitFontConfig(const string& fonts_dir,
100+
const string& cache_dir);
90101

91102
// Accessors
92103
string DescriptionName() const;
@@ -130,8 +141,14 @@ class PangoFontInfo {
130141
int resolution_;
131142
// Fontconfig operates through an environment variable, so it intrinsically
132143
// cannot be thread-friendly, but you can serialize multiple independent
133-
// font configurations by calling InitFontConfig(true, path).
134-
static bool fontconfig_initialized_;
144+
// font configurations by calling HardInitFontConfig(fonts_dir, cache_dir).
145+
// These hold the last initialized values set by HardInitFontConfig or
146+
// the first call to SoftInitFontConfig.
147+
// Directory to be scanned for font files.
148+
static string fonts_dir_;
149+
// Directory to store the cache of font information. (Can be the same as
150+
// fonts_dir_)
151+
static string cache_dir_;
135152

136153
private:
137154
PangoFontInfo(const PangoFontInfo&);
@@ -185,15 +202,16 @@ class FontUtils {
185202
// In the flags vector, each flag is set according to whether the
186203
// corresponding character (in order of iterating ch_map) can be rendered.
187204
// The return string is a list of the acceptable fonts that were used.
188-
static string BestFonts(const unordered_map<char32, inT64>& ch_map,
205+
static string BestFonts(
206+
const TessHashMap<char32, inT64>& ch_map,
189207
vector<std::pair<const char*, vector<bool> > >* font_flag);
190208

191209
// FontScore returns the weighted renderability score of the given
192210
// hash map character table in the given font. The unweighted score
193211
// is also returned in raw_score.
194212
// The values in the bool vector ch_flags correspond to whether the
195213
// corresponding character (in order of iterating ch_map) can be rendered.
196-
static int FontScore(const unordered_map<char32, inT64>& ch_map,
214+
static int FontScore(const TessHashMap<char32, inT64>& ch_map,
197215
const string& fontname, int* raw_score,
198216
vector<bool>* ch_flags);
199217

training/stringrenderer.cpp

+6-1
Original file line numberDiff line numberDiff line change
@@ -108,6 +108,7 @@ StringRenderer::StringRenderer(const string& font_desc, int page_width,
108108
underline_start_prob_(0),
109109
underline_continuation_prob_(0),
110110
underline_style_(PANGO_UNDERLINE_SINGLE),
111+
features_(NULL),
111112
drop_uncovered_chars_(true),
112113
strip_unrenderable_words_(false),
113114
add_ligatures_(false),
@@ -120,7 +121,6 @@ StringRenderer::StringRenderer(const string& font_desc, int page_width,
120121
box_padding_(0),
121122
total_chars_(0),
122123
font_index_(0),
123-
features_(NULL),
124124
last_offset_(0) {
125125
pen_color_[0] = 0.0;
126126
pen_color_[1] = 0.0;
@@ -347,6 +347,11 @@ void StringRenderer::ClearBoxes() {
347347
boxaDestroy(&page_boxes_);
348348
}
349349

350+
string StringRenderer::GetBoxesStr() {
351+
BoxChar::PrepareToWrite(&boxchars_);
352+
return BoxChar::GetTesseractBoxStr(page_height_, boxchars_);
353+
}
354+
350355
void StringRenderer::WriteAllBoxes(const string& filename) {
351356
BoxChar::PrepareToWrite(&boxchars_);
352357
BoxChar::WriteTesseractBoxFile(filename, page_height_, boxchars_);

0 commit comments

Comments
 (0)