Skip to content

Commit aa6eb6b

Browse files
committed
Remove Tesseract parameter "include_page_breaks" and use FF by default
Now Tesseract adds a page break (normally form feed) by default. It is still possible to suppress page breaks by setting an empty page_separator. Signed-off-by: Stefan Weil <[email protected]>
1 parent 3bb573a commit aa6eb6b

File tree

3 files changed

+1
-10
lines changed

3 files changed

+1
-10
lines changed

api/renderer.cpp

+1-3
Original file line numberDiff line numberDiff line change
@@ -132,10 +132,8 @@ bool TessTextRenderer::AddImageHandler(TessBaseAPI* api) {
132132

133133
AppendString(utf8.get());
134134

135-
bool pageBreak = false;
136-
api->GetBoolVariable("include_page_breaks", &pageBreak);
137135
const char* pageSeparator = api->GetStringVariable("page_separator");
138-
if (pageBreak) {
136+
if (pageSeparator != nullptr && *pageSeparator != '\0') {
139137
AppendString(pageSeparator);
140138
}
141139

ccmain/tesseractclass.cpp

-4
Original file line numberDiff line numberDiff line change
@@ -505,10 +505,6 @@ Tesseract::Tesseract()
505505
this->params()),
506506
BOOL_MEMBER(preserve_interword_spaces, false,
507507
"Preserve multiple interword spaces", this->params()),
508-
BOOL_MEMBER(include_page_breaks, FALSE,
509-
"Include page separator string in output text after each "
510-
"image/page.",
511-
this->params()),
512508
STRING_MEMBER(page_separator, "\f",
513509
"Page separator (default is form feed control character)",
514510
this->params()),

ccmain/tesseractclass.h

-3
Original file line numberDiff line numberDiff line change
@@ -1103,9 +1103,6 @@ class Tesseract : public Wordrec {
11031103
INT_VAR_H(tessedit_parallelize, 0, "Run in parallel where possible");
11041104
BOOL_VAR_H(preserve_interword_spaces, false,
11051105
"Preserve multiple interword spaces");
1106-
BOOL_VAR_H(include_page_breaks, false,
1107-
"Include page separator string in output text after each "
1108-
"image/page.");
11091106
STRING_VAR_H(page_separator, "\f",
11101107
"Page separator (default is form feed control character)");
11111108

0 commit comments

Comments
 (0)