Skip to content

Commit 349de8b

Browse files
stweilzdenop
authored andcommitted
Support different help texts for normal and advanced users and restore legacy mode (#1325)
* Restore support for the legacy engine It is still needed to get text attributes which are unsupported by the LSTM engine, and it also has better recognition rates for some texts. Signed-off-by: Stefan Weil <[email protected]> * tesseractmain: Add missing 'static' attributes Signed-off-by: Stefan Weil <[email protected]> * Support different help texts for normal and advanced users The old option --help now shows a very basic help text. The new option --help-extra shows the full help information. It now also includes a hint that Tesseract supports lists of images. Fix also the indentation in the PSM help and use a more neutral text in the OEM help. Signed-off-by: Stefan Weil <[email protected]> * Add missing line feed in error message Signed-off-by: Stefan Weil <[email protected]>
1 parent 173ad2b commit 349de8b

File tree

1 file changed

+66
-48
lines changed

1 file changed

+66
-48
lines changed

api/tesseractmain.cpp

+66-48
Original file line numberDiff line numberDiff line change
@@ -51,7 +51,7 @@ static void Win32WarningHandler(const char* module, const char* fmt,
5151

5252
#endif /* HAVE_TIFFIO_H && _WIN32 */
5353

54-
void PrintVersionInfo() {
54+
static void PrintVersionInfo() {
5555
char* versionStrP;
5656

5757
printf("tesseract %s\n", tesseract::TessBaseAPI::Version());
@@ -103,17 +103,7 @@ void PrintVersionInfo() {
103103
if (SIMDDetect::IsSSEAvailable()) printf(" Found SSE\n");
104104
}
105105

106-
void PrintUsage(const char* program) {
107-
printf(
108-
"Usage:\n"
109-
" %s --help | --help-psm | --help-oem | --version\n"
110-
" %s --list-langs [--tessdata-dir PATH]\n"
111-
" %s --print-parameters [options...] [configfile...]\n"
112-
" %s imagename|stdin outputbase|stdout [options...] [configfile...]\n",
113-
program, program, program, program);
114-
}
115-
116-
void PrintHelpForPSM() {
106+
static void PrintHelpForPSM() {
117107
const char* msg =
118108
"Page segmentation modes:\n"
119109
" 0 Orientation and script detection (OSD) only.\n"
@@ -131,26 +121,30 @@ void PrintHelpForPSM() {
131121
" particular order.\n"
132122
" 12 Sparse text with OSD.\n"
133123
" 13 Raw line. Treat the image as a single text line,\n"
134-
"\t\t\tbypassing hacks that are Tesseract-specific.\n";
124+
" bypassing hacks that are Tesseract-specific.\n";
135125

136126
printf("%s", msg);
137127
}
138128

139-
void PrintHelpForOEM() {
129+
static void PrintHelpForOEM() {
140130
const char* msg =
141131
"OCR Engine modes:\n"
142-
" 0 Original Tesseract only (unsupported).\n"
132+
" 0 Legacy Tesseract only.\n"
143133
" 1 Neural nets LSTM only.\n"
144-
" 2 Tesseract + LSTM (unsupported).\n"
134+
" 2 Legacy + LSTM Tesseract.\n"
145135
" 3 Default, based on what is available.\n";
146136

147137
printf("%s", msg);
148138
}
149139

150-
void PrintHelpMessage(const char* program) {
151-
PrintUsage(program);
152-
153-
const char* ocr_options =
140+
static void PrintHelpExtra(const char* program) {
141+
printf(
142+
"Usage:\n"
143+
" %s --help | --help-extra | --help-psm | --help-oem | --version\n"
144+
" %s --list-langs [--tessdata-dir PATH]\n"
145+
" %s --print-parameters [options...] [configfile...]\n"
146+
" %s imagename|imagelist|stdin outputbase|stdout [options...] [configfile...]\n"
147+
"\n"
154148
"OCR options:\n"
155149
" --tessdata-dir PATH Specify the location of tessdata path.\n"
156150
" --user-words PATH Specify the location of user words file.\n"
@@ -160,26 +154,50 @@ void PrintHelpMessage(const char* program) {
160154
" Multiple -c arguments are allowed.\n"
161155
" --psm NUM Specify page segmentation mode.\n"
162156
" --oem NUM Specify OCR Engine mode.\n"
163-
"NOTE: These options must occur before any configfile.\n";
157+
"NOTE: These options must occur before any configfile.\n"
158+
"\n",
159+
program, program, program, program
160+
);
164161

165-
printf("\n%s\n", ocr_options);
166162
PrintHelpForPSM();
163+
printf("\n");
167164
PrintHelpForOEM();
168165

169-
const char* single_options =
166+
printf(
167+
"\n"
170168
"Single options:\n"
171-
" -h, --help Show this help message.\n"
169+
" -h, --help Show minimal help message.\n"
170+
" --help-extra Show extra help for advanced users.\n"
172171
" --help-psm Show page segmentation modes.\n"
173172
" --help-oem Show OCR Engine modes.\n"
174173
" -v, --version Show version information.\n"
175174
" --list-langs List available languages for tesseract engine.\n"
176-
" --print-parameters Print tesseract parameters.\n";
175+
" --print-parameters Print tesseract parameters.\n"
176+
);
177+
}
177178

178-
printf("\n%s", single_options);
179+
static void PrintHelpMessage(const char* program) {
180+
printf(
181+
"Usage:\n"
182+
" %s --help | --help-extra | --version\n"
183+
" %s --list-langs\n"
184+
" %s imagename outputbase [options...] [configfile...]\n"
185+
"\n"
186+
"OCR options:\n"
187+
" -l LANG[+LANG] Specify language(s) used for OCR.\n"
188+
"NOTE: These options must occur before any configfile.\n"
189+
"\n"
190+
"Single options:\n"
191+
" --help Show this help message.\n"
192+
" --help-extra Show extra help for advanced users.\n"
193+
" --version Show version information.\n"
194+
" --list-langs List available languages for tesseract engine.\n",
195+
program, program, program
196+
);
179197
}
180198

181-
void SetVariablesFromCLArgs(tesseract::TessBaseAPI* api, int argc,
182-
char** argv) {
199+
static void SetVariablesFromCLArgs(tesseract::TessBaseAPI* api, int argc,
200+
char** argv) {
183201
char opt1[256], opt2[255];
184202
for (int i = 0; i < argc; i++) {
185203
if (strcmp(argv[i], "-c") == 0 && i + 1 < argc) {
@@ -202,7 +220,7 @@ void SetVariablesFromCLArgs(tesseract::TessBaseAPI* api, int argc,
202220
}
203221
}
204222

205-
void PrintLangsList(tesseract::TessBaseAPI* api) {
223+
static void PrintLangsList(tesseract::TessBaseAPI* api) {
206224
GenericVector<STRING> languages;
207225
api->GetAvailableLanguagesAsVector(&languages);
208226
printf("List of available languages (%d):\n", languages.size());
@@ -213,7 +231,7 @@ void PrintLangsList(tesseract::TessBaseAPI* api) {
213231
api->End();
214232
}
215233

216-
void PrintBanner() {
234+
static void PrintBanner() {
217235
tprintf("Tesseract Open Source OCR Engine v%s with Leptonica\n",
218236
tesseract::TessBaseAPI::Version());
219237
}
@@ -232,27 +250,28 @@ void PrintBanner() {
232250
* It would be simpler if we could set the value before Init,
233251
* but that doesn't work.
234252
*/
235-
void FixPageSegMode(tesseract::TessBaseAPI* api,
236-
tesseract::PageSegMode pagesegmode) {
253+
static void FixPageSegMode(tesseract::TessBaseAPI* api,
254+
tesseract::PageSegMode pagesegmode) {
237255
if (api->GetPageSegMode() == tesseract::PSM_SINGLE_BLOCK)
238256
api->SetPageSegMode(pagesegmode);
239257
}
240258

241-
void checkArgValues (int arg, const char* mode, int count) {
259+
static void checkArgValues(int arg, const char* mode, int count) {
242260
if (arg >= count || arg < 0) {
243-
printf("Invalid %s value, please enter a number between 0-%d", mode, count - 1);
244-
exit(0);
261+
printf("Invalid %s value, please enter a number between 0-%d\n", mode, count - 1);
262+
exit(0);
245263
}
246264
}
247265

248266
// NOTE: arg_i is used here to avoid ugly *i so many times in this function
249-
void ParseArgs(const int argc, char** argv, const char** lang,
250-
const char** image, const char** outputbase,
251-
const char** datapath, bool* list_langs, bool* print_parameters,
252-
GenericVector<STRING>* vars_vec,
253-
GenericVector<STRING>* vars_values, int* arg_i,
254-
tesseract::PageSegMode* pagesegmode,
255-
tesseract::OcrEngineMode* enginemode) {
267+
static void ParseArgs(const int argc, char** argv, const char** lang,
268+
const char** image, const char** outputbase,
269+
const char** datapath,
270+
bool* list_langs, bool* print_parameters,
271+
GenericVector<STRING>* vars_vec,
272+
GenericVector<STRING>* vars_values, int* arg_i,
273+
tesseract::PageSegMode* pagesegmode,
274+
tesseract::OcrEngineMode* enginemode) {
256275
if (argc == 1) {
257276
PrintHelpMessage(argv[0]);
258277
exit(0);
@@ -263,6 +282,10 @@ void ParseArgs(const int argc, char** argv, const char** lang,
263282
PrintHelpMessage(argv[0]);
264283
exit(0);
265284
}
285+
if (strcmp(argv[1], "--help-extra") == 0) {
286+
PrintHelpExtra(argv[0]);
287+
exit(0);
288+
}
266289
if ((strcmp(argv[1], "--help-psm") == 0)) {
267290
PrintHelpForPSM();
268291
exit(0);
@@ -310,11 +333,6 @@ void ParseArgs(const int argc, char** argv, const char** lang,
310333
} else if (strcmp(argv[i], "--oem") == 0 && i + 1 < argc) {
311334
int oem = atoi(argv[i + 1]);
312335
checkArgValues(oem, "OEM", tesseract::OEM_COUNT);
313-
if (oem == tesseract::OEM_TESSERACT_ONLY ||
314-
oem == tesseract::OEM_TESSERACT_LSTM_COMBINED) {
315-
printf("Legacy OCR Engine is not supported anymore.\n");
316-
exit(2);
317-
}
318336
*enginemode = static_cast<tesseract::OcrEngineMode>(oem);
319337
++i;
320338
} else if (strcmp(argv[i], "--print-parameters") == 0) {
@@ -344,7 +362,7 @@ void ParseArgs(const int argc, char** argv, const char** lang,
344362
}
345363
}
346364

347-
void PreloadRenderers(
365+
static void PreloadRenderers(
348366
tesseract::TessBaseAPI* api,
349367
tesseract::PointerVector<tesseract::TessResultRenderer>* renderers,
350368
tesseract::PageSegMode pagesegmode, const char* outputbase) {

0 commit comments

Comments
 (0)