@@ -108,26 +108,30 @@ const int kMinCredibleResolution = 70;
108
108
const int kMaxCredibleResolution = 2400 ;
109
109
110
110
TessBaseAPI::TessBaseAPI ()
111
- : tesseract_(NULL ),
112
- osd_tesseract_ (NULL ),
113
- equ_detect_(NULL ),
114
- // Thresholder is initialized to NULL here, but will be set before use by:
115
- // A constructor of a derived API, SetThresholder(), or
116
- // created implicitly when used in InternalSetImage.
117
- thresholder_(NULL ),
118
- paragraph_models_(NULL ),
119
- block_list_(NULL ),
120
- page_res_(NULL ),
121
- input_file_(NULL ),
122
- output_file_(NULL ),
123
- datapath_(NULL ),
124
- language_(NULL ),
125
- last_oem_requested_(OEM_DEFAULT),
126
- recognition_done_(false ),
127
- truth_cb_(NULL ),
128
- rect_left_(0 ), rect_top_(0 ), rect_width_(0 ), rect_height_(0 ),
129
- image_width_(0 ), image_height_(0 ) {
130
- }
111
+ : tesseract_(nullptr ),
112
+ osd_tesseract_ (nullptr ),
113
+ equ_detect_(nullptr ),
114
+ reader_(nullptr ),
115
+ // Thresholder is initialized to NULL here, but will be set before use by:
116
+ // A constructor of a derived API, SetThresholder(), or
117
+ // created implicitly when used in InternalSetImage.
118
+ thresholder_(nullptr ),
119
+ paragraph_models_(nullptr ),
120
+ block_list_(nullptr ),
121
+ page_res_(nullptr ),
122
+ input_file_(nullptr ),
123
+ output_file_(nullptr ),
124
+ datapath_(nullptr ),
125
+ language_(nullptr ),
126
+ last_oem_requested_(OEM_DEFAULT),
127
+ recognition_done_(false ),
128
+ truth_cb_(NULL ),
129
+ rect_left_(0 ),
130
+ rect_top_(0 ),
131
+ rect_width_(0 ),
132
+ rect_height_(0 ),
133
+ image_width_(0 ),
134
+ image_height_(0 ) {}
131
135
132
136
TessBaseAPI::~TessBaseAPI () {
133
137
End ();
@@ -275,20 +279,33 @@ int TessBaseAPI::Init(const char* datapath, const char* language,
275
279
const GenericVector<STRING> *vars_vec,
276
280
const GenericVector<STRING> *vars_values,
277
281
bool set_only_non_debug_params) {
282
+ return Init (datapath, 0 , language, oem, configs, configs_size, vars_vec,
283
+ vars_values, set_only_non_debug_params, nullptr );
284
+ }
285
+
286
+ // In-memory version reads the traineddata file directly from the given
287
+ // data[data_size] array. Also implements the version with a datapath in data,
288
+ // flagged by data_size = 0.
289
+ int TessBaseAPI::Init (const char * data, int data_size, const char * language,
290
+ OcrEngineMode oem, char ** configs, int configs_size,
291
+ const GenericVector<STRING>* vars_vec,
292
+ const GenericVector<STRING>* vars_values,
293
+ bool set_only_non_debug_params, FileReader reader) {
278
294
PERF_COUNT_START (" TessBaseAPI::Init" )
279
295
// Default language is "eng".
280
- if (language == NULL ) language = " eng" ;
296
+ if (language == nullptr ) language = " eng" ;
297
+ STRING datapath = data_size == 0 ? data : language;
281
298
// If the datapath, OcrEngineMode or the language have changed - start again.
282
299
// Note that the language_ field stores the last requested language that was
283
300
// initialized successfully, while tesseract_->lang stores the language
284
301
// actually used. They differ only if the requested language was NULL, in
285
302
// which case tesseract_->lang is set to the Tesseract default ("eng").
286
- if (tesseract_ != NULL &&
287
- (datapath_ == NULL || language_ == NULL ||
288
- *datapath_ != datapath || last_oem_requested_ != oem ||
303
+ if (tesseract_ != nullptr &&
304
+ (datapath_ == nullptr || language_ == nullptr || *datapath_ != datapath ||
305
+ last_oem_requested_ != oem ||
289
306
(*language_ != language && tesseract_->lang != language))) {
290
307
delete tesseract_;
291
- tesseract_ = NULL ;
308
+ tesseract_ = nullptr ;
292
309
}
293
310
// PERF_COUNT_SUB("delete tesseract_")
294
311
#ifdef USE_OPENCL
@@ -297,27 +314,33 @@ int TessBaseAPI::Init(const char* datapath, const char* language,
297
314
#endif
298
315
PERF_COUNT_SUB (" OD::InitEnv()" )
299
316
bool reset_classifier = true ;
300
- if (tesseract_ == NULL ) {
317
+ if (tesseract_ == nullptr ) {
301
318
reset_classifier = false ;
302
319
tesseract_ = new Tesseract;
320
+ if (reader != nullptr ) reader_ = reader;
321
+ TessdataManager mgr (reader_);
322
+ if (data_size != 0 ) {
323
+ mgr.LoadMemBuffer (language, data, data_size);
324
+ }
303
325
if (tesseract_->init_tesseract (
304
- datapath, output_file_ != NULL ? output_file_->string () : NULL ,
305
- language, oem, configs, configs_size, vars_vec, vars_values,
306
- set_only_non_debug_params) != 0 ) {
326
+ datapath.string (),
327
+ output_file_ != nullptr ? output_file_->string () : nullptr ,
328
+ language, oem, configs, configs_size, vars_vec, vars_values,
329
+ set_only_non_debug_params, &mgr) != 0 ) {
307
330
return -1 ;
308
331
}
309
332
}
310
333
PERF_COUNT_SUB (" update tesseract_" )
311
334
// Update datapath and language requested for the last valid initialization.
312
- if (datapath_ == NULL )
335
+ if (datapath_ == nullptr )
313
336
datapath_ = new STRING (datapath);
314
337
else
315
338
*datapath_ = datapath;
316
339
if ((strcmp (datapath_->string (), " " ) == 0 ) &&
317
340
(strcmp (tesseract_->datadir .string (), " " ) != 0 ))
318
341
*datapath_ = tesseract_->datadir ;
319
342
320
- if (language_ == NULL )
343
+ if (language_ == nullptr )
321
344
language_ = new STRING (language);
322
345
else
323
346
*language_ = language;
@@ -421,7 +444,8 @@ int TessBaseAPI::InitLangMod(const char* datapath, const char* language) {
421
444
tesseract_ = new Tesseract;
422
445
else
423
446
ParamUtils::ResetToDefaults (tesseract_->params ());
424
- return tesseract_->init_tesseract_lm (datapath, NULL , language);
447
+ TessdataManager mgr;
448
+ return tesseract_->init_tesseract_lm (datapath, NULL , language, &mgr);
425
449
}
426
450
427
451
/* *
@@ -431,7 +455,7 @@ int TessBaseAPI::InitLangMod(const char* datapath, const char* language) {
431
455
void TessBaseAPI::InitForAnalysePage () {
432
456
if (tesseract_ == NULL ) {
433
457
tesseract_ = new Tesseract;
434
- tesseract_->InitAdaptiveClassifier (false );
458
+ tesseract_->InitAdaptiveClassifier (nullptr );
435
459
}
436
460
}
437
461
@@ -2239,7 +2263,7 @@ int TessBaseAPI::FindLines() {
2239
2263
}
2240
2264
if (tesseract_ == NULL ) {
2241
2265
tesseract_ = new Tesseract;
2242
- tesseract_->InitAdaptiveClassifier (false );
2266
+ tesseract_->InitAdaptiveClassifier (nullptr );
2243
2267
}
2244
2268
if (tesseract_->pix_binary () == NULL )
2245
2269
Threshold (tesseract_->mutable_pix_binary ());
@@ -2261,22 +2285,24 @@ int TessBaseAPI::FindLines() {
2261
2285
2262
2286
Tesseract* osd_tess = osd_tesseract_;
2263
2287
OSResults osr;
2264
- if (PSM_OSD_ENABLED (tesseract_->tessedit_pageseg_mode ) && osd_tess == NULL ) {
2288
+ if (PSM_OSD_ENABLED (tesseract_->tessedit_pageseg_mode ) &&
2289
+ osd_tess == nullptr ) {
2265
2290
if (strcmp (language_->string (), " osd" ) == 0 ) {
2266
2291
osd_tess = tesseract_;
2267
2292
} else {
2268
2293
osd_tesseract_ = new Tesseract;
2269
- if (osd_tesseract_->init_tesseract (
2270
- datapath_->string (), NULL , " osd" , OEM_TESSERACT_ONLY,
2271
- NULL , 0 , NULL , NULL , false ) == 0 ) {
2294
+ TessdataManager mgr (reader_);
2295
+ if (osd_tesseract_->init_tesseract (datapath_->string (), nullptr , " osd" ,
2296
+ OEM_TESSERACT_ONLY, nullptr , 0 ,
2297
+ nullptr , nullptr , false , &mgr) == 0 ) {
2272
2298
osd_tess = osd_tesseract_;
2273
2299
osd_tesseract_->set_source_resolution (
2274
2300
thresholder_->GetSourceYResolution ());
2275
2301
} else {
2276
2302
tprintf (" Warning: Auto orientation and script detection requested,"
2277
2303
" but osd language failed to load\n " );
2278
2304
delete osd_tesseract_;
2279
- osd_tesseract_ = NULL ;
2305
+ osd_tesseract_ = nullptr ;
2280
2306
}
2281
2307
}
2282
2308
}
0 commit comments