Skip to content

Commit 3ee87b3

Browse files
committed
fixup! tests: optionally write results as JUnit-style .xml
Make sure to write the .xml in UTF-8 encoding. We also need to make sure that invalid UTF-8 encoding is turned into valid UTF-8 (using the Replacement Character, \uFFFD) because t9902's trace contains such invalid byte sequences, and the task that uploads the test results would refuse to do anything if it was asked to parse an .xml file with invalid UTF-8 in it. Signed-off-by: Johannes Schindelin <[email protected]>
1 parent 3036485 commit 3ee87b3

File tree

5 files changed

+84
-12
lines changed

5 files changed

+84
-12
lines changed

Makefile

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -757,6 +757,7 @@ TEST_BUILTINS_OBJS += test-submodule-config.o
757757
TEST_BUILTINS_OBJS += test-submodule-nested-repo-config.o
758758
TEST_BUILTINS_OBJS += test-subprocess.o
759759
TEST_BUILTINS_OBJS += test-urlmatch-normalization.o
760+
TEST_BUILTINS_OBJS += test-xml-encode.o
760761
TEST_BUILTINS_OBJS += test-wildmatch.o
761762
TEST_BUILTINS_OBJS += test-windows-named-pipe.o
762763
TEST_BUILTINS_OBJS += test-write-cache.o

t/helper/test-tool.c

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -51,6 +51,7 @@ static struct test_cmd cmds[] = {
5151
{ "submodule-nested-repo-config", cmd__submodule_nested_repo_config },
5252
{ "subprocess", cmd__subprocess },
5353
{ "urlmatch-normalization", cmd__urlmatch_normalization },
54+
{ "xml-encode", cmd__xml_encode },
5455
{ "wildmatch", cmd__wildmatch },
5556
#ifdef GIT_WINDOWS_NATIVE
5657
{ "windows-named-pipe", cmd__windows_named_pipe },

t/helper/test-tool.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -47,6 +47,7 @@ int cmd__submodule_config(int argc, const char **argv);
4747
int cmd__submodule_nested_repo_config(int argc, const char **argv);
4848
int cmd__subprocess(int argc, const char **argv);
4949
int cmd__urlmatch_normalization(int argc, const char **argv);
50+
int cmd__xml_encode(int argc, const char **argv);
5051
int cmd__wildmatch(int argc, const char **argv);
5152
#ifdef GIT_WINDOWS_NATIVE
5253
int cmd__windows_named_pipe(int argc, const char **argv);

t/helper/test-xml-encode.c

Lines changed: 80 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,80 @@
1+
#include "test-tool.h"
2+
3+
static const char *utf8_replace_character = "&#xfffd;";
4+
5+
/*
6+
* Encodes (possibly incorrect) UTF-8 on <stdin> to <stdout>, to be embedded
7+
* in an XML file.
8+
*/
9+
int cmd__xml_encode(int argc, const char **argv)
10+
{
11+
unsigned char buf[1024], tmp[4], *tmp2 = NULL;
12+
ssize_t cur = 0, len = 1, remaining = 0;
13+
unsigned char ch;
14+
15+
for (;;) {
16+
if (++cur == len) {
17+
len = xread(0, buf, sizeof(buf));
18+
if (!len)
19+
return 0;
20+
if (len < 0)
21+
die_errno("Could not read <stdin>");
22+
cur = 0;
23+
}
24+
ch = buf[cur];
25+
26+
if (tmp2) {
27+
if ((ch & 0xc0) != 0x80) {
28+
fputs(utf8_replace_character, stdout);
29+
tmp2 = 0;
30+
cur--;
31+
continue;
32+
}
33+
*tmp2 = ch;
34+
tmp2++;
35+
if (--remaining == 0) {
36+
fwrite(tmp, tmp2 - tmp, 1, stdout);
37+
tmp2 = 0;
38+
}
39+
continue;
40+
}
41+
42+
if (!(ch & 0x80)) {
43+
/* 0xxxxxxx */
44+
if (ch == '&')
45+
fputs("&amp;", stdout);
46+
else if (ch == '\'')
47+
fputs("&apos;", stdout);
48+
else if (ch == '"')
49+
fputs("&quot;", stdout);
50+
else if (ch == '<')
51+
fputs("&lt;", stdout);
52+
else if (ch == '>')
53+
fputs("&gt;", stdout);
54+
else if (ch >= 0x20)
55+
fputc(ch, stdout);
56+
else if (ch == 0x09 || ch == 0x0a || ch == 0x0d)
57+
fprintf(stdout, "&#x%02x;", ch);
58+
else
59+
fputs(utf8_replace_character, stdout);
60+
} else if ((ch & 0xe0) == 0xc0) {
61+
/* 110XXXXx 10xxxxxx */
62+
tmp[0] = ch;
63+
remaining = 1;
64+
tmp2 = tmp + 1;
65+
} else if ((ch & 0xf0) == 0xe0) {
66+
/* 1110XXXX 10Xxxxxx 10xxxxxx */
67+
tmp[0] = ch;
68+
remaining = 2;
69+
tmp2 = tmp + 1;
70+
} else if ((ch & 0xf8) == 0xf0) {
71+
/* 11110XXX 10XXxxxx 10xxxxxx 10xxxxxx */
72+
tmp[0] = ch;
73+
remaining = 3;
74+
tmp2 = tmp + 1;
75+
} else
76+
fputs(utf8_replace_character, stdout);
77+
}
78+
79+
return 0;
80+
}

t/test-lib.sh

Lines changed: 1 addition & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -888,18 +888,7 @@ write_junit_xml () {
888888
}
889889

890890
xml_attr_encode () {
891-
# We do not translate CR to &#x0d; because BSD sed does not handle
892-
# \r in the regex. In practice, the output should not even have any
893-
# carriage returns.
894-
printf '%s\n' "$@" |
895-
sed -e 's/&/\&amp;/g' -e "s/'/\&apos;/g" -e 's/"/\&quot;/g' \
896-
-e 's/</\&lt;/g' -e 's/>/\&gt;/g' \
897-
-e "s/$(printf \\x1c)/\\&#xfffd;/g" \
898-
-e "s/$(printf \\x1d)/\\&#xfffd;/g" \
899-
-e "s/$(printf \\x1e)/\\&#xfffd;/g" \
900-
-e "s/$(printf \\x1f)/\\&#xfffd;/g" \
901-
-e 's/ /\&#x09;/g' -e 's/$/\&#x0a;/' -e '$s/&#x0a;$//' |
902-
tr -d '\012\015'
891+
printf '%s\n' "$@" | test-tool xml-encode
903892
}
904893

905894
write_junit_xml_testcase () {

0 commit comments

Comments
 (0)