81 lines
1.7 KiB
C
81 lines
1.7 KiB
C
#include "test-tool.h"
|
|
|
|
static const char *utf8_replace_character = "�";
|
|
|
|
/*
|
|
* Encodes (possibly incorrect) UTF-8 on <stdin> to <stdout>, to be embedded
|
|
* in an XML file.
|
|
*/
|
|
int cmd__xml_encode(int argc UNUSED, const char **argv UNUSED)
|
|
{
|
|
unsigned char buf[1024], tmp[4], *tmp2 = NULL;
|
|
ssize_t cur = 0, len = 1, remaining = 0;
|
|
unsigned char ch;
|
|
|
|
for (;;) {
|
|
if (++cur == len) {
|
|
len = xread(0, buf, sizeof(buf));
|
|
if (!len)
|
|
return 0;
|
|
if (len < 0)
|
|
die_errno("Could not read <stdin>");
|
|
cur = 0;
|
|
}
|
|
ch = buf[cur];
|
|
|
|
if (tmp2) {
|
|
if ((ch & 0xc0) != 0x80) {
|
|
fputs(utf8_replace_character, stdout);
|
|
tmp2 = NULL;
|
|
cur--;
|
|
continue;
|
|
}
|
|
*tmp2 = ch;
|
|
tmp2++;
|
|
if (--remaining == 0) {
|
|
fwrite(tmp, tmp2 - tmp, 1, stdout);
|
|
tmp2 = NULL;
|
|
}
|
|
continue;
|
|
}
|
|
|
|
if (!(ch & 0x80)) {
|
|
/* 0xxxxxxx */
|
|
if (ch == '&')
|
|
fputs("&", stdout);
|
|
else if (ch == '\'')
|
|
fputs("'", stdout);
|
|
else if (ch == '"')
|
|
fputs(""", stdout);
|
|
else if (ch == '<')
|
|
fputs("<", stdout);
|
|
else if (ch == '>')
|
|
fputs(">", stdout);
|
|
else if (ch >= 0x20)
|
|
fputc(ch, stdout);
|
|
else if (ch == 0x09 || ch == 0x0a || ch == 0x0d)
|
|
fprintf(stdout, "&#x%02x;", ch);
|
|
else
|
|
fputs(utf8_replace_character, stdout);
|
|
} else if ((ch & 0xe0) == 0xc0) {
|
|
/* 110XXXXx 10xxxxxx */
|
|
tmp[0] = ch;
|
|
remaining = 1;
|
|
tmp2 = tmp + 1;
|
|
} else if ((ch & 0xf0) == 0xe0) {
|
|
/* 1110XXXX 10Xxxxxx 10xxxxxx */
|
|
tmp[0] = ch;
|
|
remaining = 2;
|
|
tmp2 = tmp + 1;
|
|
} else if ((ch & 0xf8) == 0xf0) {
|
|
/* 11110XXX 10XXxxxx 10xxxxxx 10xxxxxx */
|
|
tmp[0] = ch;
|
|
remaining = 3;
|
|
tmp2 = tmp + 1;
|
|
} else
|
|
fputs(utf8_replace_character, stdout);
|
|
}
|
|
|
|
return 0;
|
|
}
|