static ssize_t hexstr_to_8bitchars (const char* in, size_t in_length, char* out, size_t out_size) { int d1, d2; /* odd number of chars check */ if (in_length & 0x1) return -EINVAL; in_length = in_length >> 1; if (out_size - 1 < in_length) { return -ENOMEM; } out_size = in_length; for (; in_length; --in_length) { d1 = parse_hexdigit(*in++); if(d1 < 0) return -EINVAL; d2 = parse_hexdigit(*in++); if(d2 < 0) return -EINVAL; *out++ = (d1 << 4) | d2; } *out = 0; return out_size; }
// pct-encoded = "%" HEXDIG HEXDIG TextCursor parse_pct_encoded(TextCursor cursor) { char c = get_char(cursor); if (c != '%') throw ParseError(); parse_hexdigit(cursor); return parse_hexdigit(cursor); }
// h16 = 1*4HEXDIG TextCursor parse_h16(TextCursor cursor) { cursor = parse_hexdigit(cursor); for (int i=0; i<3; i++) { try { cursor = parse_hexdigit(cursor); } catch(ParseError) { return cursor; } } return cursor; }
/* Decodes a JSON string token (enclosed in double quotes) by translating escape sequences into UTF-8 sequences. Assumes the token is formatted correctly. Returns a dynamically allocated string, or NULL if allocation failed. */ static char *decode_string(const char *begin, const char *end, size_t *size) { const char *p; char *buf = malloc(end - begin - 1), *q = buf; if (buf == NULL) return NULL; for (p = begin + 1; p < end - 1; ++p) { if (*p != '\\') { *q++ = *p; } else switch (*++p) { default: *q++ = *p; break; /* one of '/', '\' or '"' */ case 'b': *q++ = '\b'; break; case 'f': *q++ = '\f'; break; case 'n': *q++ = '\n'; break; case 'r': *q++ = '\r'; break; case 't': *q++ = '\t'; break; case 'u': { /* decode character encoded with four hex digits: */ unsigned u = (unsigned)parse_hexdigit(p[1]) << 12 | (unsigned)parse_hexdigit(p[2]) << 8 | (unsigned)parse_hexdigit(p[3]) << 4 | (unsigned)parse_hexdigit(p[4]) << 0; if (u < 0x80) /* ASCII character */ { *q++ = u; } else if (u < 0x800) /* 2-byte UTF-8 sequence */ { *q++ = 0xc0 | ((u >> 6) ); *q++ = 0x80 | ((u ) &0x3f); } else /* 3-byte UTF-8 sequence */ { *q++ = 0xe0 | ((u >> 12) ); *q++ = 0x80 | ((u >> 6)&0x3f); *q++ = 0x80 | ((u )&0x3f); } p += 4; } }
int hexread(unsigned char *result, const unsigned char *in, unsigned int len) { const unsigned char *pos; char dig1, dig2; unsigned char *res = result; for (pos = in; pos-in <= len-2; pos+=2) { if (*pos == ':') pos++; dig1 = *pos; dig2 = *(pos+1); *res++ = parse_hexdigit(dig1) << 4 | parse_hexdigit(dig2); } return (res - result); }
/* array in order of values RECODE_* */ static const coder recoders[STR_ENCODING_UNKNOWN][2] = { [STR_ENCODING_7BIT_HEX_PAD_0] = { hexstr_7bit_to_char_pad_0, char_to_hexstr_7bit_pad_0 }, [STR_ENCODING_8BIT_HEX] = { hexstr_to_8bitchars, chars8bit_to_hexstr }, [STR_ENCODING_UCS2_HEX] = { hexstr_ucs2_to_utf8, utf8_to_hexstr_ucs2 }, [STR_ENCODING_7BIT] = { just_copy, just_copy }, [STR_ENCODING_7BIT_HEX_PAD_1] = { hexstr_7bit_to_char_pad_1, char_to_hexstr_7bit_pad_1 }, [STR_ENCODING_7BIT_HEX_PAD_2] = { hexstr_7bit_to_char_pad_2, char_to_hexstr_7bit_pad_2 }, [STR_ENCODING_7BIT_HEX_PAD_3] = { hexstr_7bit_to_char_pad_3, char_to_hexstr_7bit_pad_3 }, [STR_ENCODING_7BIT_HEX_PAD_4] = { hexstr_7bit_to_char_pad_4, char_to_hexstr_7bit_pad_4 }, [STR_ENCODING_7BIT_HEX_PAD_5] = { hexstr_7bit_to_char_pad_5, char_to_hexstr_7bit_pad_5 }, [STR_ENCODING_7BIT_HEX_PAD_6] = { hexstr_7bit_to_char_pad_6, char_to_hexstr_7bit_pad_6 }, }; #/* */ EXPORT_DEF ssize_t str_recode(recode_direction_t dir, str_encoding_t encoding, const char* in, size_t in_length, char* out, size_t out_size) { unsigned idx = encoding; if((dir == RECODE_DECODE || dir == RECODE_ENCODE) && idx < ITEMS_OF(recoders)) return (recoders[idx][dir])(in, in_length, out, out_size); return -EINVAL; } #/* */ EXPORT_DEF str_encoding_t get_encoding(recode_direction_t hint, const char* in, size_t length) { if(hint == RECODE_ENCODE) { for(; length; --length, ++in) if(*in & 0x80) return STR_ENCODING_UCS2_HEX; return STR_ENCODING_7BIT_HEX_PAD_0; } else { size_t x; for(x = 0; x < length; ++x) { if(parse_hexdigit(in[x]) < 0) { return STR_ENCODING_7BIT; } } // TODO: STR_ENCODING_7BIT_HEX_PAD_X or STR_ENCODING_8BIT_HEX or STR_ENCODING_UCS2_HEX } return STR_ENCODING_UNKNOWN; }
TextCursor parse_at_least_one_hexdigit(TextCursor cursor) { cursor = parse_hexdigit(cursor); REPEAT_IGNORING(parse_hexdigit, cursor); }