|
| 1 | +#include "test-tool.h" |
| 2 | + |
| 3 | +static const char *utf8_replace_character = "�"; |
| 4 | + |
| 5 | +/* |
| 6 | + * Encodes (possibly incorrect) UTF-8 on <stdin> to <stdout>, to be embedded |
| 7 | + * in an XML file. |
| 8 | + */ |
| 9 | +int cmd__xml_encode(int argc, const char **argv) |
| 10 | +{ |
| 11 | + unsigned char buf[1024], tmp[4], *tmp2 = NULL; |
| 12 | + ssize_t cur = 0, len = 1, remaining = 0; |
| 13 | + unsigned char ch; |
| 14 | + |
| 15 | + for (;;) { |
| 16 | + if (++cur == len) { |
| 17 | + len = xread(0, buf, sizeof(buf)); |
| 18 | + if (!len) |
| 19 | + return 0; |
| 20 | + if (len < 0) |
| 21 | + die_errno("Could not read <stdin>"); |
| 22 | + cur = 0; |
| 23 | + } |
| 24 | + ch = buf[cur]; |
| 25 | + |
| 26 | + if (tmp2) { |
| 27 | + if ((ch & 0xc0) != 0x80) { |
| 28 | + fputs(utf8_replace_character, stdout); |
| 29 | + tmp2 = 0; |
| 30 | + cur--; |
| 31 | + continue; |
| 32 | + } |
| 33 | + *tmp2 = ch; |
| 34 | + tmp2++; |
| 35 | + if (--remaining == 0) { |
| 36 | + fwrite(tmp, tmp2 - tmp, 1, stdout); |
| 37 | + tmp2 = 0; |
| 38 | + } |
| 39 | + continue; |
| 40 | + } |
| 41 | + |
| 42 | + if (!(ch & 0x80)) { |
| 43 | + /* 0xxxxxxx */ |
| 44 | + if (ch == '&') |
| 45 | + fputs("&", stdout); |
| 46 | + else if (ch == '\'') |
| 47 | + fputs("'", stdout); |
| 48 | + else if (ch == '"') |
| 49 | + fputs(""", stdout); |
| 50 | + else if (ch == '<') |
| 51 | + fputs("<", stdout); |
| 52 | + else if (ch == '>') |
| 53 | + fputs(">", stdout); |
| 54 | + else if (ch >= 0x20) |
| 55 | + fputc(ch, stdout); |
| 56 | + else if (ch == 0x09 || ch == 0x0a || ch == 0x0d) |
| 57 | + fprintf(stdout, "&#x%02x;", ch); |
| 58 | + else |
| 59 | + fputs(utf8_replace_character, stdout); |
| 60 | + } else if ((ch & 0xe0) == 0xc0) { |
| 61 | + /* 110XXXXx 10xxxxxx */ |
| 62 | + tmp[0] = ch; |
| 63 | + remaining = 1; |
| 64 | + tmp2 = tmp + 1; |
| 65 | + } else if ((ch & 0xf0) == 0xe0) { |
| 66 | + /* 1110XXXX 10Xxxxxx 10xxxxxx */ |
| 67 | + tmp[0] = ch; |
| 68 | + remaining = 2; |
| 69 | + tmp2 = tmp + 1; |
| 70 | + } else if ((ch & 0xf8) == 0xf0) { |
| 71 | + /* 11110XXX 10XXxxxx 10xxxxxx 10xxxxxx */ |
| 72 | + tmp[0] = ch; |
| 73 | + remaining = 3; |
| 74 | + tmp2 = tmp + 1; |
| 75 | + } else |
| 76 | + fputs(utf8_replace_character, stdout); |
| 77 | + } |
| 78 | + |
| 79 | + return 0; |
| 80 | +} |
0 commit comments