Skip to content

Commit 4c61bca

Browse files
committed
Fix export of HTML when using UTF-8 (Issue #526)
1 parent 8d1c067 commit 4c61bca

File tree

2 files changed

+42
-7
lines changed

2 files changed

+42
-7
lines changed

CHANGES.md

+1
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@
55
management and fix potential double-free bugs.
66
- Updated configure script to look for zlib with pkg-config (Issue #519)
77
- Updated markdown support code to mmd.
8+
- Fixed export of UTF-8 HTML (Issue #526)
89
- Fixed handling of whitespace-only nodes (Issue #528)
910
- Fixed handling of tabs in PRE nodes (Issue #529)
1011
- Fixed case sensitivity of link targets (Issue #530)

htmldoc/iso8859.cxx

+41-7
Original file line numberDiff line numberDiff line change
@@ -2,17 +2,13 @@
22
* ISO-8859-1 conversion routines for HTMLDOC, an HTML document
33
* processing program.
44
*
5-
* Copyright 2011-2019 by Michael R Sweet.
5+
* Copyright 2011-2024 by Michael R Sweet.
66
* Copyright 1997-2010 by Easy Software Products. All rights reserved.
77
*
88
* This program is free software. Distribution and use rights are outlined in
99
* the file "COPYING".
1010
*/
1111

12-
/*
13-
* Include necessary headers.
14-
*/
15-
1612
#include <stdio.h>
1713
#include <stdlib.h>
1814

@@ -441,11 +437,49 @@ iso8859(uchar value) /* I - ISO-8859-1 equivalent */
441437

442438
if (iso8859_names[value] == NULL)
443439
{
444-
buf[0] = value;
445-
buf[1] = '\0';
440+
if (value < 127)
441+
{
442+
// ASCII...
443+
buf[0] = value;
444+
buf[1] = '\0';
445+
}
446+
else if (_htmlUTF8)
447+
{
448+
// UTF-8...
449+
int unich = _htmlUnicode[value]; // Unicode character
450+
451+
if (unich < 0x400)
452+
{
453+
buf[0] = 0xc0 | (unich >> 6);
454+
buf[1] = 0x80 | (unich & 0x3f);
455+
buf[2] = '\0';
456+
}
457+
else if (unich < 0x10000)
458+
{
459+
buf[0] = 0xe0 | (unich >> 12);
460+
buf[1] = 0x80 | ((unich >> 6) & 0x3f);
461+
buf[2] = 0x80 | (unich & 0x3f);
462+
buf[3] = '\0';
463+
}
464+
else
465+
{
466+
buf[0] = 0xf0 | (unich >> 18);
467+
buf[1] = 0x80 | ((unich >> 12) & 0x3f);
468+
buf[2] = 0x80 | ((unich >> 6) & 0x3f);
469+
buf[3] = 0x80 | (unich & 0x3f);
470+
buf[4] = '\0';
471+
}
472+
}
473+
else
474+
{
475+
// Character-set neutral way to map to Unicode...
476+
snprintf((char *)buf, sizeof(buf), "&#%d;", _htmlUnicode[value]);
477+
}
446478
}
447479
else
480+
{
448481
snprintf((char *)buf, sizeof(buf), "&%s;", iso8859_names[value]->name);
482+
}
449483

450484
return (buf);
451485
}

0 commit comments

Comments
 (0)