21
21
// To avoid collision with other typenames include the ABSOLUTE MINIMUM
22
22
// complexity of includes here. Use forward declarations wherever possible
23
23
// and hide includes of complex types in baseapi.cpp.
24
- #include < string> // for std::string
24
+ #include < string> // for std::string
25
25
#include " genericvector.h"
26
26
#include " platform.h"
27
27
@@ -45,107 +45,116 @@ class TessBaseAPI;
45
45
* in addition to the heuristics for producing it.
46
46
*/
47
47
class TESS_API TessResultRenderer {
48
- public:
49
- virtual ~TessResultRenderer ();
50
-
51
- // Takes ownership of pointer so must be new'd instance.
52
- // Renderers aren't ordered, but appends the sequences of next parameter
53
- // and existing next(). The renderers should be unique across both lists.
54
- void insert (TessResultRenderer* next);
55
-
56
- // Returns the next renderer or nullptr.
57
- TessResultRenderer* next () { return next_; }
58
-
59
- /* *
60
- * Starts a new document with the given title.
61
- * This clears the contents of the output data.
62
- * Title should use UTF-8 encoding.
63
- */
64
- bool BeginDocument (const char * title);
65
-
66
- /* *
67
- * Adds the recognized text from the source image to the current document.
68
- * Invalid if BeginDocument not yet called.
69
- *
70
- * Note that this API is a bit weird but is designed to fit into the
71
- * current TessBaseAPI implementation where the api has lots of state
72
- * information that we might want to add in.
73
- */
74
- bool AddImage (TessBaseAPI* api);
75
-
76
- /* *
77
- * Finishes the document and finalizes the output data
78
- * Invalid if BeginDocument not yet called.
79
- */
80
- bool EndDocument ();
81
-
82
- const char * file_extension () const { return file_extension_; }
83
- const char * title () const { return title_.c_str (); }
84
-
85
- // Is everything fine? Otherwise something went wrong.
86
- bool happy () { return happy_; }
87
-
88
- /* *
89
- * Returns the index of the last image given to AddImage
90
- * (i.e. images are incremented whether the image succeeded or not)
91
- *
92
- * This is always defined. It means either the number of the
93
- * current image, the last image ended, or in the completed document
94
- * depending on when in the document lifecycle you are looking at it.
95
- * Will return -1 if a document was never started.
96
- */
97
- int imagenum () const { return imagenum_; }
98
-
99
- protected:
100
- /* *
101
- * Called by concrete classes.
102
- *
103
- * outputbase is the name of the output file excluding
104
- * extension. For example, "/path/to/chocolate-chip-cookie-recipe"
105
- *
106
- * extension indicates the file extension to be used for output
107
- * files. For example "pdf" will produce a .pdf file, and "hocr"
108
- * will produce .hocr files.
109
- */
110
- TessResultRenderer (const char *outputbase,
111
- const char * extension);
112
-
113
- // Hook for specialized handling in BeginDocument()
114
- virtual bool BeginDocumentHandler ();
115
-
116
- // This must be overridden to render the OCR'd results
117
- virtual bool AddImageHandler (TessBaseAPI* api) = 0;
118
-
119
- // Hook for specialized handling in EndDocument()
120
- virtual bool EndDocumentHandler ();
121
-
122
- // Renderers can call this to append '\0' terminated strings into
123
- // the output string returned by GetOutput.
124
- // This method will grow the output buffer if needed.
125
- void AppendString (const char * s);
126
-
127
- // Renderers can call this to append binary byte sequences into
128
- // the output string returned by GetOutput. Note that s is not necessarily
129
- // '\0' terminated (and can contain '\0' within it).
130
- // This method will grow the output buffer if needed.
131
- void AppendData (const char * s, int len);
132
-
133
- private:
134
- const char * file_extension_; // standard extension for generated output
135
- STRING title_; // title of document being renderered
136
- int imagenum_; // index of last image added
137
-
138
- FILE* fout_; // output file pointer
139
- TessResultRenderer* next_; // Can link multiple renderers together
140
- bool happy_; // I get grumpy when the disk fills up, etc.
48
+ public:
49
+ virtual ~TessResultRenderer ();
50
+
51
+ // Takes ownership of pointer so must be new'd instance.
52
+ // Renderers aren't ordered, but appends the sequences of next parameter
53
+ // and existing next(). The renderers should be unique across both lists.
54
+ void insert (TessResultRenderer* next);
55
+
56
+ // Returns the next renderer or nullptr.
57
+ TessResultRenderer* next () {
58
+ return next_;
59
+ }
60
+
61
+ /* *
62
+ * Starts a new document with the given title.
63
+ * This clears the contents of the output data.
64
+ * Title should use UTF-8 encoding.
65
+ */
66
+ bool BeginDocument (const char * title);
67
+
68
+ /* *
69
+ * Adds the recognized text from the source image to the current document.
70
+ * Invalid if BeginDocument not yet called.
71
+ *
72
+ * Note that this API is a bit weird but is designed to fit into the
73
+ * current TessBaseAPI implementation where the api has lots of state
74
+ * information that we might want to add in.
75
+ */
76
+ bool AddImage (TessBaseAPI* api);
77
+
78
+ /* *
79
+ * Finishes the document and finalizes the output data
80
+ * Invalid if BeginDocument not yet called.
81
+ */
82
+ bool EndDocument ();
83
+
84
+ const char * file_extension () const {
85
+ return file_extension_;
86
+ }
87
+ const char * title () const {
88
+ return title_.c_str ();
89
+ }
90
+
91
+ // Is everything fine? Otherwise something went wrong.
92
+ bool happy () {
93
+ return happy_;
94
+ }
95
+
96
+ /* *
97
+ * Returns the index of the last image given to AddImage
98
+ * (i.e. images are incremented whether the image succeeded or not)
99
+ *
100
+ * This is always defined. It means either the number of the
101
+ * current image, the last image ended, or in the completed document
102
+ * depending on when in the document lifecycle you are looking at it.
103
+ * Will return -1 if a document was never started.
104
+ */
105
+ int imagenum () const {
106
+ return imagenum_;
107
+ }
108
+
109
+ protected:
110
+ /* *
111
+ * Called by concrete classes.
112
+ *
113
+ * outputbase is the name of the output file excluding
114
+ * extension. For example, "/path/to/chocolate-chip-cookie-recipe"
115
+ *
116
+ * extension indicates the file extension to be used for output
117
+ * files. For example "pdf" will produce a .pdf file, and "hocr"
118
+ * will produce .hocr files.
119
+ */
120
+ TessResultRenderer (const char * outputbase, const char * extension);
121
+
122
+ // Hook for specialized handling in BeginDocument()
123
+ virtual bool BeginDocumentHandler ();
124
+
125
+ // This must be overridden to render the OCR'd results
126
+ virtual bool AddImageHandler (TessBaseAPI* api) = 0;
127
+
128
+ // Hook for specialized handling in EndDocument()
129
+ virtual bool EndDocumentHandler ();
130
+
131
+ // Renderers can call this to append '\0' terminated strings into
132
+ // the output string returned by GetOutput.
133
+ // This method will grow the output buffer if needed.
134
+ void AppendString (const char * s);
135
+
136
+ // Renderers can call this to append binary byte sequences into
137
+ // the output string returned by GetOutput. Note that s is not necessarily
138
+ // '\0' terminated (and can contain '\0' within it).
139
+ // This method will grow the output buffer if needed.
140
+ void AppendData (const char * s, int len);
141
+
142
+ private:
143
+ const char * file_extension_; // standard extension for generated output
144
+ STRING title_; // title of document being renderered
145
+ int imagenum_; // index of last image added
146
+
147
+ FILE* fout_; // output file pointer
148
+ TessResultRenderer* next_; // Can link multiple renderers together
149
+ bool happy_; // I get grumpy when the disk fills up, etc.
141
150
};
142
151
143
152
/* *
144
153
* Renders tesseract output into a plain UTF-8 text string
145
154
*/
146
155
class TESS_API TessTextRenderer : public TessResultRenderer {
147
156
public:
148
- explicit TessTextRenderer (const char * outputbase);
157
+ explicit TessTextRenderer (const char * outputbase);
149
158
150
159
protected:
151
160
bool AddImageHandler (TessBaseAPI* api) override ;
@@ -156,8 +165,8 @@ class TESS_API TessTextRenderer : public TessResultRenderer {
156
165
*/
157
166
class TESS_API TessHOcrRenderer : public TessResultRenderer {
158
167
public:
159
- explicit TessHOcrRenderer (const char * outputbase, bool font_info);
160
- explicit TessHOcrRenderer (const char * outputbase);
168
+ explicit TessHOcrRenderer (const char * outputbase, bool font_info);
169
+ explicit TessHOcrRenderer (const char * outputbase);
161
170
162
171
protected:
163
172
bool BeginDocumentHandler () override ;
@@ -171,16 +180,15 @@ class TESS_API TessHOcrRenderer : public TessResultRenderer {
171
180
/* *
172
181
* Renders tesseract output into an alto text string
173
182
*/
174
- class TESS_API TessAltoRenderer : public TessResultRenderer {
175
- public:
176
- explicit TessAltoRenderer (const char *outputbase);
177
-
178
- protected:
179
- bool BeginDocumentHandler () override ;
180
- bool AddImageHandler (TessBaseAPI* api) override ;
181
- bool EndDocumentHandler () override ;
183
+ class TESS_API TessAltoRenderer : public TessResultRenderer {
184
+ public:
185
+ explicit TessAltoRenderer (const char * outputbase);
182
186
183
- };
187
+ protected:
188
+ bool BeginDocumentHandler () override ;
189
+ bool AddImageHandler (TessBaseAPI* api) override ;
190
+ bool EndDocumentHandler () override ;
191
+ };
184
192
185
193
/* *
186
194
* Renders Tesseract output into a TSV string
@@ -196,7 +204,7 @@ class TESS_API TessTsvRenderer : public TessResultRenderer {
196
204
bool EndDocumentHandler () override ;
197
205
198
206
private:
199
- bool font_info_; // whether to print font information
207
+ bool font_info_; // whether to print font information
200
208
};
201
209
202
210
/* *
@@ -206,7 +214,8 @@ class TESS_API TessPDFRenderer : public TessResultRenderer {
206
214
public:
207
215
// datadir is the location of the TESSDATA. We need it because
208
216
// we load a custom PDF font from this location.
209
- TessPDFRenderer (const char * outputbase, const char * datadir, bool textonly = false );
217
+ TessPDFRenderer (const char * outputbase, const char * datadir,
218
+ bool textonly = false );
210
219
211
220
protected:
212
221
bool BeginDocumentHandler () override ;
@@ -227,21 +236,21 @@ class TESS_API TessPDFRenderer : public TessResultRenderer {
227
236
// Bookkeeping only. DIY = Do It Yourself.
228
237
void AppendPDFObjectDIY (size_t objectsize);
229
238
// Bookkeeping + emit data.
230
- void AppendPDFObject (const char * data);
239
+ void AppendPDFObject (const char * data);
231
240
// Create the /Contents object for an entire page.
232
241
char * GetPDFTextObjects (TessBaseAPI* api, double width, double height);
233
242
// Turn an image into a PDF object. Only transcode if we have to.
234
243
static bool imageToPDFObj (Pix* pix, const char * filename, long int objnum,
235
- char ** pdf_object, long int * pdf_object_size, const int jpg_quality);
244
+ char ** pdf_object, long int * pdf_object_size,
245
+ int jpg_quality);
236
246
};
237
247
238
-
239
248
/* *
240
249
* Renders tesseract output into a plain UTF-8 text string
241
250
*/
242
251
class TESS_API TessUnlvRenderer : public TessResultRenderer {
243
252
public:
244
- explicit TessUnlvRenderer (const char * outputbase);
253
+ explicit TessUnlvRenderer (const char * outputbase);
245
254
246
255
protected:
247
256
bool AddImageHandler (TessBaseAPI* api) override ;
@@ -263,7 +272,7 @@ class TESS_API TessLSTMBoxRenderer : public TessResultRenderer {
263
272
*/
264
273
class TESS_API TessBoxTextRenderer : public TessResultRenderer {
265
274
public:
266
- explicit TessBoxTextRenderer (const char * outputbase);
275
+ explicit TessBoxTextRenderer (const char * outputbase);
267
276
268
277
protected:
269
278
bool AddImageHandler (TessBaseAPI* api) override ;
@@ -293,7 +302,7 @@ class TESS_API TessOsdRenderer : public TessResultRenderer {
293
302
bool AddImageHandler (TessBaseAPI* api) override ;
294
303
};
295
304
296
- #endif // ndef DISABLED_LEGACY_ENGINE
305
+ #endif // ndef DISABLED_LEGACY_ENGINE
297
306
298
307
} // namespace tesseract.
299
308
0 commit comments