static int do_buf(unsigned char *buf, int buflen, int type, unsigned char flags, char *quotes, char_io *io_ch, void *arg) { int i, outlen, len; unsigned char orflags, *p, *q; unsigned long c; p = buf; q = buf + buflen; outlen = 0; while(p != q) { if(p == buf && flags & ASN1_STRFLGS_ESC_2253) orflags = CHARTYPE_FIRST_ESC_2253; else orflags = 0; switch(type & BUF_TYPE_WIDTH_MASK) { case 4: c = ((unsigned long)*p++) << 24; c |= ((unsigned long)*p++) << 16; c |= ((unsigned long)*p++) << 8; c |= *p++; break; case 2: c = ((unsigned long)*p++) << 8; c |= *p++; break; case 1: c = *p++; break; case 0: i = UTF8_getc(p, buflen, &c); if(i < 0) return -1; /* Invalid UTF8String */ p += i; break; default: return -1; /* invalid width */ } if (p == q && flags & ASN1_STRFLGS_ESC_2253) orflags = CHARTYPE_LAST_ESC_2253; if(type & BUF_TYPE_CONVUTF8) { unsigned char utfbuf[6]; int utflen; utflen = UTF8_putc(utfbuf, sizeof utfbuf, c); for(i = 0; i < utflen; i++) { /* We don't need to worry about setting orflags correctly * because if utflen==1 its value will be correct anyway * otherwise each character will be > 0x7f and so the * character will never be escaped on first and last. */ len = do_esc_char(utfbuf[i], (unsigned char)(flags | orflags), quotes, io_ch, arg); if(len < 0) return -1; outlen += len; } } else { len = do_esc_char(c, (unsigned char)(flags | orflags), quotes, io_ch, arg); if(len < 0) return -1; outlen += len; } } return outlen; }
static int traverse_string(const unsigned char *p, int len, int inform, int (*rfunc) (unsigned long value, void *in), void *arg) { unsigned long value; int ret; while (len) { if (inform == MBSTRING_ASC) { value = *p++; len--; } else if (inform == MBSTRING_BMP) { value = *p++ << 8; value |= *p++; len -= 2; } else if (inform == MBSTRING_UNIV) { value = ((unsigned long)*p++) << 24; value |= ((unsigned long)*p++) << 16; value |= *p++ << 8; value |= *p++; len -= 4; } else { ret = UTF8_getc(p, len, &value); if (ret < 0) return -1; len -= ret; p += ret; } if (rfunc) { ret = rfunc(value, arg); if (ret <= 0) return ret; } } return 1; }
static int traverse_string(const unsigned char *p, int len, int inform, int (*rfunc)(unsigned long value, void *in), void *arg) { unsigned long value; int ret; while (len) { switch (inform) { case MBSTRING_ASC: value = *p++; len--; break; case MBSTRING_BMP: value = *p++ << 8; value |= *p++; /* BMP is explictly defined to not support surrogates */ if (UNICODE_IS_SURROGATE(value)) return -1; len -= 2; break; case MBSTRING_UNIV: value = (unsigned long)*p++ << 24; value |= *p++ << 16; value |= *p++ << 8; value |= *p++; if (value > UNICODE_MAX || UNICODE_IS_SURROGATE(value)) return -1; len -= 4; break; default: ret = UTF8_getc(p, len, &value); if (ret < 0) return -1; len -= ret; p += ret; break; } if (rfunc) { ret = rfunc(value, arg); if (ret <= 0) return ret; } } return 1; }
int main(void) { unsigned char testbuf[] = "012345"; const unsigned char zerobuf[sizeof testbuf] = { 0 }; unsigned long value; unsigned int i, j, k, l; int ret; /* * First, verify UTF8_getc() */ value = UNCHANGED; ret = UTF8_getc(testbuf, 0, &value); ASSERT(ret == 0); ASSERT(value == UNCHANGED); /* check all valid single-byte chars */ for (i = 0; i < 0x80; i++) { testbuf[0] = i; ret = UTF8_getc(testbuf, 1, &value); ASSERT(ret == 1); ASSERT(value == i); ret = UTF8_getc(testbuf, 2, &value); ASSERT(ret == 1); ASSERT(value == i); } /* * Verify failure on all invalid initial bytes: * 0x80 - 0xBF following bytes only * 0xC0 - 0xC1 used to be in non-shortest forms * 0xF5 - 0xFD used to be initial for 5 and 6 byte sequences * 0xFE - 0xFF have never been valid in utf-8 */ for (i = 0x80; i < 0xC2; i++) { value = UNCHANGED; testbuf[0] = i; ret = UTF8_getc(testbuf, 1, &value); ASSERT(ret == -2); ASSERT(value == UNCHANGED); } for (i = 0xF5; i < 0x100; i++) { value = UNCHANGED; testbuf[0] = i; ret = UTF8_getc(testbuf, 1, &value); ASSERT(ret == -2); ASSERT(value == UNCHANGED); } /* * Verify handling of all two-byte sequences */ for (i = 0xC2; i < 0xE0; i++) { testbuf[0] = i; for (j = 0; j < 0x100; j++) { testbuf[1] = j; value = UNCHANGED; ret = UTF8_getc(testbuf, 1, &value); ASSERT(ret == -1); ASSERT(value == UNCHANGED); ret = UTF8_getc(testbuf, 2, &value); /* outside range of trailing bytes */ if (j < 0x80 || j > 0xBF) { ASSERT(ret == -3); ASSERT(value == UNCHANGED); continue; } /* valid */ ASSERT(ret == 2); ASSERT((value & 0x3F) == (j & 0x3F)); ASSERT(value >> 6 == (i & 0x1F)); } } /* * Verify handling of all three-byte sequences */ for (i = 0xE0; i < 0xF0; i++) { testbuf[0] = i; for (j = 0; j < 0x100; j++) { testbuf[1] = j; for (k = 0; k < 0x100; k++) { testbuf[2] = k; value = UNCHANGED; ret = UTF8_getc(testbuf, 2, &value); ASSERT(ret == -1); ASSERT(value == UNCHANGED); ret = UTF8_getc(testbuf, 3, &value); /* outside range of trailing bytes */ if (j < 0x80 || j > 0xBF || k < 0x80 || k > 0xBF) { ASSERT(ret == -3); ASSERT(value == UNCHANGED); continue; } /* non-shortest form */ if (i == 0xE0 && j < 0xA0) { ASSERT(ret == -4); ASSERT(value == UNCHANGED); continue; } /* surrogate pair code point */ if (i == 0xED && j > 0x9F) { ASSERT(ret == -2); ASSERT(value == UNCHANGED); continue; } ASSERT(ret == 3); ASSERT((value & 0x3F) == (k & 0x3F)); ASSERT(((value >> 6) & 0x3F) == (j & 0x3F)); ASSERT(value >> 12 == (i & 0x0F)); } } } /* * Verify handling of all four-byte sequences */ for (i = 0xF0; i < 0xF5; i++) { testbuf[0] = i; for (j = 0; j < 0x100; j++) { testbuf[1] = j; for (k = 0; k < 0x100; k++) { testbuf[2] = k; for (l = 0; l < 0x100; l++) { testbuf[3] = l; value = UNCHANGED; ret = UTF8_getc(testbuf, 3, &value); ASSERT(ret == -1); ASSERT(value == UNCHANGED); ret = UTF8_getc(testbuf, 4, &value); /* outside range of trailing bytes */ if (j < 0x80 || j > 0xBF || k < 0x80 || k > 0xBF || l < 0x80 || l > 0xBF) { ASSERT(ret == -3); ASSERT(value == UNCHANGED); continue; } /* non-shortest form */ if (i == 0xF0 && j < 0x90) { ASSERT(ret == -4); ASSERT(value == UNCHANGED); continue; } /* beyond end of UCS range */ if (i == 0xF4 && j > 0x8F) { ASSERT(ret == -2); ASSERT(value == UNCHANGED); continue; } ASSERT(ret == 4); ASSERT((value & 0x3F) == (l & 0x3F)); ASSERT(((value >> 6) & 0x3F) == (k & 0x3F)); ASSERT(((value >> 12) & 0x3F) == (j & 0x3F)); ASSERT(value >> 18 == (i & 0x07)); } } } } /* * Next, verify UTF8_putc() */ memset(testbuf, 0, sizeof testbuf); /* single-byte sequences */ for (i = 0; i < 0x80; i++) { ret = UTF8_putc(NULL, 0, i); ASSERT(ret == 1); testbuf[0] = 0; ret = UTF8_putc(testbuf, 0, i); ASSERT(ret == -1); ASSERT(memcmp(testbuf, zerobuf, sizeof testbuf) == 0); ret = UTF8_putc(testbuf, 1, i); ASSERT(ret == 1); ASSERT(testbuf[0] == i); ASSERT(memcmp(testbuf+1, zerobuf, sizeof(testbuf)-1) == 0); } /* two-byte sequences */ for (i = 0x80; i < 0x800; i++) { ret = UTF8_putc(NULL, 0, i); ASSERT(ret == 2); testbuf[0] = testbuf[1] = 0; ret = UTF8_putc(testbuf, 1, i); ASSERT(ret == -1); ASSERT(memcmp(testbuf, zerobuf, sizeof testbuf) == 0); ret = UTF8_putc(testbuf, 2, i); ASSERT(ret == 2); ASSERT(memcmp(testbuf+2, zerobuf, sizeof(testbuf)-2) == 0); ret = UTF8_getc(testbuf, 2, &value); ASSERT(ret == 2); ASSERT(value == i); } /* three-byte sequences */ for (i = 0x800; i < 0x10000; i++) { if (i >= 0xD800 && i < 0xE000) { /* surrogates aren't valid */ ret = UTF8_putc(NULL, 0, i); ASSERT(ret == -2); continue; } ret = UTF8_putc(NULL, 0, i); ASSERT(ret == 3); testbuf[0] = testbuf[1] = testbuf[2] = 0; ret = UTF8_putc(testbuf, 2, i); ASSERT(ret == -1); ASSERT(memcmp(testbuf, zerobuf, sizeof testbuf) == 0); ret = UTF8_putc(testbuf, 3, i); ASSERT(ret == 3); ASSERT(memcmp(testbuf+3, zerobuf, sizeof(testbuf)-3) == 0); ret = UTF8_getc(testbuf, 3, &value); ASSERT(ret == 3); ASSERT(value == i); } /* four-byte sequences */ for (i = 0x10000; i < 0x110000; i++) { ret = UTF8_putc(NULL, 0, i); ASSERT(ret == 4); testbuf[0] = testbuf[1] = testbuf[2] = testbuf[3] = 0; ret = UTF8_putc(testbuf, 3, i); ASSERT(ret == -1); ASSERT(memcmp(testbuf, zerobuf, sizeof testbuf) == 0); ret = UTF8_putc(testbuf, 4, i); ASSERT(ret == 4); ASSERT(memcmp(testbuf+4, zerobuf, sizeof(testbuf)-4) == 0); ret = UTF8_getc(testbuf, 4, &value); ASSERT(ret == 4); ASSERT(value == i); } /* spot check some larger values to confirm error return */ for (i = 0x110000; i < 0x110100; i++) { ret = UTF8_putc(NULL, 0, i); ASSERT(ret == -2); } for (value = (unsigned long)-1; value > (unsigned long)-256; value--) { ret = UTF8_putc(NULL, 0, value); ASSERT(ret == -2); } return 0; }