@@ -179,6 +179,61 @@ bool TessHOcrRenderer::AddImageHandler(TessBaseAPI* api) {
179
179
return true ;
180
180
}
181
181
182
+ /* *********************************************************************
183
+ * HOcr Text Renderer interface implementation
184
+ **********************************************************************/
185
+ TessHOcrTsvRenderer::TessHOcrTsvRenderer (const char *outputbase)
186
+ : TessResultRenderer(outputbase, " hocr.tsv" ) {
187
+ font_info_ = false ;
188
+ }
189
+
190
+ TessHOcrTsvRenderer::TessHOcrTsvRenderer (const char *outputbase, bool font_info)
191
+ : TessResultRenderer(outputbase, " hocr.tsv" ) {
192
+ font_info_ = font_info;
193
+ }
194
+
195
+ bool TessHOcrTsvRenderer::BeginDocumentHandler () {
196
+ AppendString (
197
+ " <?xml version=\" 1.0\" encoding=\" UTF-8\" ?>\n "
198
+ " <!DOCTYPE html PUBLIC \" -//W3C//DTD XHTML 1.0 Transitional//EN\"\n "
199
+ " \" http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd\" >\n "
200
+ " <html xmlns=\" http://www.w3.org/1999/xhtml\" xml:lang=\" en\" "
201
+ " lang=\" en\" >\n <head>\n <title>\n " );
202
+ AppendString (title ());
203
+ AppendString (
204
+ " </title>\n "
205
+ " <meta http-equiv=\" Content-Type\" content=\" text/html;"
206
+ " charset=utf-8\" />\n "
207
+ " <meta name='ocr-system' content='tesseract " TESSERACT_VERSION_STR
208
+ " ' />\n "
209
+ " <meta name='ocr-capabilities' content='ocr_page ocr_carea ocr_par"
210
+ " ocr_line ocrx_word" );
211
+ if (font_info_)
212
+ AppendString (
213
+ " ocrp_lang ocrp_dir ocrp_font ocrp_fsize ocrp_wconf" );
214
+ AppendString (
215
+ " '/>\n "
216
+ " </head>\n <body>\n " );
217
+
218
+ return true ;
219
+ }
220
+
221
+ bool TessHOcrTsvRenderer::EndDocumentHandler () {
222
+ AppendString (" </body>\n </html>\n " );
223
+
224
+ return true ;
225
+ }
226
+
227
+ bool TessHOcrTsvRenderer::AddImageHandler (TessBaseAPI* api) {
228
+ char * hocr = api->GetHOCRText (imagenum ());
229
+ if (hocr == NULL ) return false ;
230
+
231
+ AppendString (hocr);
232
+ delete[] hocr;
233
+
234
+ return true ;
235
+ }
236
+
182
237
/* *********************************************************************
183
238
* UNLV Text Renderer interface implementation
184
239
**********************************************************************/
0 commit comments