Skip to content

Commit 6bbcb50

Browse files
committed
Added osd renderer for psm 0.
Works for single page and multi-page.
1 parent b882590 commit 6bbcb50

File tree

5 files changed

+126
-47
lines changed

5 files changed

+126
-47
lines changed

api/baseapi.cpp

+58-12
Original file line numberDiff line numberDiff line change
@@ -1198,35 +1198,39 @@ bool TessBaseAPI::ProcessPage(Pix* pix, int page_index, const char* filename,
11981198
SetInputName(filename);
11991199
SetImage(pix);
12001200
bool failed = false;
1201-
if (timeout_millisec > 0) {
1201+
1202+
if (tesseract_->tessedit_pageseg_mode == PSM_AUTO_ONLY) {
1203+
// Disabled character recognition
1204+
PageIterator* it = AnalyseLayout();
1205+
1206+
if (it == NULL) {
1207+
failed = true;
1208+
} else {
1209+
delete it;
1210+
}
1211+
} else if (tesseract_->tessedit_pageseg_mode == PSM_OSD_ONLY) {
1212+
failed = FindLines() != 0;
1213+
} else if (timeout_millisec > 0) {
12021214
// Running with a timeout.
12031215
ETEXT_DESC monitor;
12041216
monitor.cancel = NULL;
12051217
monitor.cancel_this = NULL;
12061218
monitor.set_deadline_msecs(timeout_millisec);
1219+
12071220
// Now run the main recognition.
12081221
failed = Recognize(&monitor) < 0;
1209-
} else if (tesseract_->tessedit_pageseg_mode == PSM_OSD_ONLY ||
1210-
tesseract_->tessedit_pageseg_mode == PSM_AUTO_ONLY) {
1211-
// Disabled character recognition.
1212-
PageIterator* it = AnalyseLayout();
1213-
if (it == NULL) {
1214-
failed = true;
1215-
} else {
1216-
delete it;
1217-
PERF_COUNT_END
1218-
return true;
1219-
}
12201222
} else {
12211223
// Normal layout and character recognition with no timeout.
12221224
failed = Recognize(NULL) < 0;
12231225
}
1226+
12241227
if (tesseract_->tessedit_write_images) {
12251228
#ifndef ANDROID_BUILD
12261229
Pix* page_pix = GetThresholdedImage();
12271230
pixWrite("tessinput.tif", page_pix, IFF_TIFF_G4);
12281231
#endif // ANDROID_BUILD
12291232
}
1233+
12301234
if (failed && retry_config != NULL && retry_config[0] != '\0') {
12311235
// Save current config variables before switching modes.
12321236
FILE* fp = fopen(kOldVarsFile, "wb");
@@ -1243,6 +1247,7 @@ bool TessBaseAPI::ProcessPage(Pix* pix, int page_index, const char* filename,
12431247
if (renderer && !failed) {
12441248
failed = !renderer->AddImage(this);
12451249
}
1250+
12461251
PERF_COUNT_END
12471252
return !failed;
12481253
}
@@ -1734,6 +1739,47 @@ char* TessBaseAPI::GetUNLVText() {
17341739
return result;
17351740
}
17361741

1742+
/**
1743+
* The recognized text is returned as a char* which is coded
1744+
* as UTF8 and must be freed with the delete [] operator.
1745+
* page_number is a 0-based page index that will appear in the osd file.
1746+
*/
1747+
char* TessBaseAPI::GetOsdText(int page_number) {
1748+
OSResults osr;
1749+
1750+
bool osd = DetectOS(&osr);
1751+
if (!osd) {
1752+
return NULL;
1753+
}
1754+
1755+
int orient_id = osr.best_result.orientation_id;
1756+
int script_id = osr.get_best_script(orient_id);
1757+
float orient_conf = osr.best_result.oconfidence;
1758+
float script_conf = osr.best_result.sconfidence;
1759+
const char* script_name =
1760+
osr.unicharset->get_script_from_script_id(script_id);
1761+
1762+
// clockwise orientation of the input image, in degrees
1763+
int orient_deg = orient_id * 90;
1764+
1765+
// clockwise rotation needed to make the page upright
1766+
int rotate = OrientationIdToValue(orient_id);
1767+
1768+
char* osd_buf = new char[255];
1769+
snprintf(osd_buf, 255,
1770+
"Page number: %d\n"
1771+
"Orientation in degrees: %d\n"
1772+
"Rotate: %d\n"
1773+
"Orientation confidence: %.2f\n"
1774+
"Script: %s\n"
1775+
"Script confidence: %.2f\n",
1776+
page_number,
1777+
orient_deg, rotate, orient_conf,
1778+
script_name, script_conf);
1779+
1780+
return osd_buf;
1781+
}
1782+
17371783
/** Returns the average word confidence for Tesseract page result. */
17381784
int TessBaseAPI::MeanTextConf() {
17391785
int* conf = AllWordConfidences();

api/baseapi.h

+9
Original file line numberDiff line numberDiff line change
@@ -600,12 +600,21 @@ class TESS_API TessBaseAPI {
600600
* page_number is a 0-based page index that will appear in the box file.
601601
*/
602602
char* GetBoxText(int page_number);
603+
603604
/**
604605
* The recognized text is returned as a char* which is coded
605606
* as UNLV format Latin-1 with specific reject and suspect codes
606607
* and must be freed with the delete [] operator.
607608
*/
608609
char* GetUNLVText();
610+
611+
/**
612+
* The recognized text is returned as a char* which is coded
613+
* as UTF8 and must be freed with the delete [] operator.
614+
* page_number is a 0-based page index that will appear in the osd file.
615+
*/
616+
char* GetOsdText(int page_number);
617+
609618
/** Returns the (average) confidence value between 0 and 100. */
610619
int MeanTextConf();
611620
/**

api/renderer.cpp

+17
Original file line numberDiff line numberDiff line change
@@ -213,4 +213,21 @@ bool TessBoxTextRenderer::AddImageHandler(TessBaseAPI* api) {
213213
return true;
214214
}
215215

216+
/**********************************************************************
217+
* Osd Text Renderer interface implementation
218+
**********************************************************************/
219+
TessOsdRenderer::TessOsdRenderer(const char* outputbase)
220+
: TessResultRenderer(outputbase, "osd") {
221+
}
222+
223+
bool TessOsdRenderer::AddImageHandler(TessBaseAPI* api) {
224+
char* osd = api->GetOsdText(imagenum());
225+
if (osd == NULL) return false;
226+
227+
AppendString(osd);
228+
delete[] osd;
229+
230+
return true;
231+
}
232+
216233
} // namespace tesseract

api/renderer.h

+11
Original file line numberDiff line numberDiff line change
@@ -221,6 +221,17 @@ class TESS_API TessBoxTextRenderer : public TessResultRenderer {
221221
virtual bool AddImageHandler(TessBaseAPI* api);
222222
};
223223

224+
/**
225+
* Renders tesseract output into an osd text string
226+
*/
227+
class TESS_API TessOsdRenderer : public TessResultRenderer {
228+
public:
229+
explicit TessOsdRenderer(const char* outputbase);
230+
231+
protected:
232+
virtual bool AddImageHandler(TessBaseAPI* api);
233+
};
234+
224235
} // namespace tesseract.
225236

226237
#endif // TESSERACT_API_RENDERER_H__

api/tesseractmain.cpp

+31-35
Original file line numberDiff line numberDiff line change
@@ -242,56 +242,52 @@ int main(int argc, char **argv) {
242242
if (api.GetPageSegMode() == tesseract::PSM_SINGLE_BLOCK)
243243
api.SetPageSegMode(pagesegmode);
244244

245-
if (pagesegmode == tesseract::PSM_AUTO_ONLY ||
246-
pagesegmode == tesseract::PSM_OSD_ONLY) {
245+
if (pagesegmode == tesseract::PSM_AUTO_ONLY) {
247246
int ret_val = 0;
248247

249248
Pix* pixs = pixRead(image);
250249
if (!pixs) {
251250
fprintf(stderr, "Cannot open input file: %s\n", image);
252251
exit(2);
253252
}
253+
254254
api.SetImage(pixs);
255255

256-
if (pagesegmode == tesseract::PSM_OSD_ONLY) {
257-
OSResults osr;
258-
if (api.DetectOS(&osr)) {
259-
int orient = osr.best_result.orientation_id;
260-
int script_id = osr.get_best_script(orient);
261-
const char* script_name =
262-
osr.unicharset->get_script_from_script_id(script_id);
263-
float orient_oco = osr.best_result.oconfidence;
264-
float orient_sco = osr.best_result.sconfidence;
265-
tprintf("Orientation: %d\n"
266-
"Orientation in degrees: %d\n"
267-
"Orientation confidence: %.2f\n"
268-
"Script: %s\n"
269-
"Script confidence: %.2f\n",
270-
orient, OrientationIdToValue(orient), orient_oco,
271-
script_name, orient_sco);
272-
} else {
273-
ret_val = 1;
274-
}
256+
tesseract::Orientation orientation;
257+
tesseract::WritingDirection direction;
258+
tesseract::TextlineOrder order;
259+
float deskew_angle;
260+
261+
tesseract::PageIterator* it = api.AnalyseLayout();
262+
if (it) {
263+
it->Orientation(&orientation, &direction, &order, &deskew_angle);
264+
tprintf("Orientation: %d\nWritingDirection: %d\nTextlineOrder: %d\n" \
265+
"Deskew angle: %.4f\n",
266+
orientation, direction, order, deskew_angle);
275267
} else {
276-
tesseract::Orientation orientation;
277-
tesseract::WritingDirection direction;
278-
tesseract::TextlineOrder order;
279-
float deskew_angle;
280-
tesseract::PageIterator* it = api.AnalyseLayout();
281-
if (it) {
282-
it->Orientation(&orientation, &direction, &order, &deskew_angle);
283-
tprintf("Orientation: %d\nWritingDirection: %d\nTextlineOrder: %d\n" \
284-
"Deskew angle: %.4f\n",
285-
orientation, direction, order, deskew_angle);
286-
} else {
287-
ret_val = 1;
288-
}
289-
delete it;
268+
ret_val = 1;
290269
}
270+
271+
delete it;
272+
291273
pixDestroy(&pixs);
292274
exit(ret_val);
293275
}
294276

277+
if (pagesegmode == tesseract::PSM_OSD_ONLY) {
278+
tesseract::TessResultRenderer* renderer =
279+
new tesseract::TessOsdRenderer(outputbase);
280+
281+
bool succeed = api.ProcessPages(image, NULL, 0, renderer);
282+
if (succeed) {
283+
PERF_COUNT_END
284+
return 0;
285+
} else {
286+
fprintf(stderr, "Error during processing.\n");
287+
exit(1);
288+
}
289+
}
290+
295291
bool b;
296292
tesseract::PointerVector<tesseract::TessResultRenderer> renderers;
297293
api.GetBoolVariable("tessedit_create_hocr", &b);

0 commit comments

Comments
 (0)