|
7 | 7 | #include "cmark-gfm.h"
|
8 | 8 | #include "node.h"
|
9 | 9 | #include "buffer.h"
|
10 |
| -#include "houdini.h" |
11 | 10 | #include "syntax_extension.h"
|
12 | 11 |
|
13 | 12 | #define BUFFER_SIZE 100
|
14 | 13 | #define MAX_INDENT 40
|
15 | 14 |
|
16 | 15 | // Functions to convert cmark_nodes to XML strings.
|
17 | 16 |
|
18 |
| -static void escape_xml(cmark_strbuf *dest, const unsigned char *source, |
19 |
| - bufsize_t length) { |
20 |
| - houdini_escape_html0(dest, source, length, 0); |
| 17 | +// C0 control characters, U+FFFE and U+FFF aren't allowed in XML. |
| 18 | +static const char XML_ESCAPE_TABLE[256] = { |
| 19 | + /* 0x00 */ 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 0, 1, 1, |
| 20 | + /* 0x10 */ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, |
| 21 | + /* 0x20 */ 0, 0, 2, 0, 0, 0, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, |
| 22 | + /* 0x30 */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 4, 0, 5, 0, |
| 23 | + /* 0x40 */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, |
| 24 | + /* 0x50 */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, |
| 25 | + /* 0x60 */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, |
| 26 | + /* 0x70 */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, |
| 27 | + /* 0x80 */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, |
| 28 | + /* 0x90 */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, |
| 29 | + /* 0xA0 */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, |
| 30 | + /* 0xB0 */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 9, 9, |
| 31 | + /* 0xC0 */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, |
| 32 | + /* 0xD0 */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, |
| 33 | + /* 0xE0 */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, |
| 34 | + /* 0xF0 */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, |
| 35 | +}; |
| 36 | + |
| 37 | +// U+FFFD Replacement Character encoded in UTF-8 |
| 38 | +#define UTF8_REPL "\xEF\xBF\xBD" |
| 39 | + |
| 40 | +static const char *XML_ESCAPES[] = { |
| 41 | + "", UTF8_REPL, """, "&", "<", ">" |
| 42 | +}; |
| 43 | + |
| 44 | +static void escape_xml(cmark_strbuf *ob, const unsigned char *src, |
| 45 | + bufsize_t size) { |
| 46 | + bufsize_t i = 0, org, esc = 0; |
| 47 | + |
| 48 | + while (i < size) { |
| 49 | + org = i; |
| 50 | + while (i < size && (esc = XML_ESCAPE_TABLE[src[i]]) == 0) |
| 51 | + i++; |
| 52 | + |
| 53 | + if (i > org) |
| 54 | + cmark_strbuf_put(ob, src + org, i - org); |
| 55 | + |
| 56 | + if (i >= size) |
| 57 | + break; |
| 58 | + |
| 59 | + if (esc == 9) { |
| 60 | + // To replace U+FFFE and U+FFFF with U+FFFD, only the last byte has to |
| 61 | + // be changed. |
| 62 | + // We know that src[i] is 0xBE or 0xBF. |
| 63 | + if (i >= 2 && src[i-2] == 0xEF && src[i-1] == 0xBF) { |
| 64 | + cmark_strbuf_putc(ob, 0xBD); |
| 65 | + } else { |
| 66 | + cmark_strbuf_putc(ob, src[i]); |
| 67 | + } |
| 68 | + } else { |
| 69 | + cmark_strbuf_puts(ob, XML_ESCAPES[esc]); |
| 70 | + } |
| 71 | + |
| 72 | + i++; |
| 73 | + } |
21 | 74 | }
|
22 | 75 |
|
23 | 76 | struct render_state {
|
|
0 commit comments