static const char *parse_string(cJSON *item,const char *str,const char **ep) { const char *ptr=str+1,*end_ptr=str+1;char *ptr2;char *out;int len=0;unsigned uc,uc2; if (*str!='\"') {*ep=str;return 0;} /* not a string! */ while (*end_ptr!='\"' && *end_ptr && ++len) if (*end_ptr++ == '\\') end_ptr++; /* Skip escaped quotes. */ out=(char*)cJSON_malloc(len+1); /* This is how long we need for the string, roughly. */ if (!out) return 0; item->valuestring=out; /* assign here so out will be deleted during cJSON_Delete() later */ item->type=cJSON_String; ptr=str+1;ptr2=out; while (ptr < end_ptr) { if (*ptr!='\\') *ptr2++=*ptr++; else { ptr++; switch (*ptr) { case 'b': *ptr2++='\b'; break; case 'f': *ptr2++='\f'; break; case 'n': *ptr2++='\n'; break; case 'r': *ptr2++='\r'; break; case 't': *ptr2++='\t'; break; case 'u': /* transcode utf16 to utf8. */ uc=parse_hex4(ptr+1);ptr+=4; /* get the unicode char. */ if (ptr >= end_ptr) {*ep=str;return 0;} /* invalid */ if ((uc>=0xDC00 && uc<=0xDFFF) || uc==0) {*ep=str;return 0;} /* check for invalid. */ if (uc>=0xD800 && uc<=0xDBFF) /* UTF16 surrogate pairs. */ { if (ptr+6 > end_ptr) {*ep=str;return 0;} /* invalid */ if (ptr[1]!='\\' || ptr[2]!='u') {*ep=str;return 0;} /* missing second-half of surrogate. */ uc2=parse_hex4(ptr+3);ptr+=6; if (uc2<0xDC00 || uc2>0xDFFF) {*ep=str;return 0;} /* invalid second-half of surrogate. */ uc=0x10000 + (((uc&0x3FF)<<10) | (uc2&0x3FF)); } len=4;if (uc<0x80) len=1;else if (uc<0x800) len=2;else if (uc<0x10000) len=3; ptr2+=len; switch (len) { case 4: *--ptr2 =((uc | 0x80) & 0xBF); uc >>= 6; case 3: *--ptr2 =((uc | 0x80) & 0xBF); uc >>= 6; case 2: *--ptr2 =((uc | 0x80) & 0xBF); uc >>= 6; case 1: *--ptr2 =(uc | firstByteMark[len]); } ptr2+=len; break; default: *ptr2++=*ptr; break; } ptr++; } } *ptr2=0; if (*ptr=='\"') ptr++; return ptr; }
static void parse_hex4_should_parse_all_combinations(void) { unsigned int number = 0; unsigned char digits_lower[5]; unsigned char digits_upper[5]; /* test all combinations */ for (number = 0; number <= 0xFFFF; number++) { TEST_ASSERT_EQUAL_INT_MESSAGE(4, sprintf((char*)digits_lower, "%.4x", number), "sprintf failed."); TEST_ASSERT_EQUAL_INT_MESSAGE(4, sprintf((char*)digits_upper, "%.4X", number), "sprintf failed."); TEST_ASSERT_EQUAL_INT_MESSAGE(number, parse_hex4(digits_lower), "Failed to parse lowercase digits."); TEST_ASSERT_EQUAL_INT_MESSAGE(number, parse_hex4(digits_upper), "Failed to parse uppercase digits."); } }
const char *esc_seq_parser(ast *t, position *p){ position s = *p; if(!read_string("\\", NULL, p)){ return "esc_seq"; } if(parse_hex2(t, p) || parse_hex4(t, p)){ NULL; } //abfnrtv if(!is_end(p)){ char repr; switch(*p->curr){ case 'a': repr = '\a'; break; case 'b': repr = '\b'; break; case 'f': repr = '\f'; break; case 'n': repr = '\n'; break; case 'r': repr = '\r'; break; case 't': repr = '\t'; break; case 'v': repr = '\v'; break; default: repr = *p->curr; break; } ++p->curr; add_text(t, &repr, 1); return NULL; } *p = s; return "esc_seq"; }
static const char *parse_string(JSON *item, const char *str) { const char *ptr=str+1; char *ptr2, *out; int len=0; unsigned uc,uc2; if(*str!='\"') { ep = str; return 0; } // not a string! while (*ptr!='\"' && *ptr && ++len) if(*ptr++ == '\\') ptr++; // Skip escaped quotes. out = new char [len+1]; // This is how long we need for the string, roughly. ptr = str+1; ptr2 = out; while (*ptr!='\"' && *ptr) { if(*ptr!='\\') *ptr2++ = *ptr++; else { ptr++; switch (*ptr) { case 'b': *ptr2++ = '\b'; break; case 'f': *ptr2++ = '\f'; break; case 'n': *ptr2++ = '\n'; break; case 'r': *ptr2++ = '\r'; break; case 't': *ptr2++ = '\t'; break; case 'u': // transcode utf16 to utf8. uc = parse_hex4(ptr+1);ptr+=4; // get the unicode char. if((uc>=0xDC00 && uc<=0xDFFF) || uc==0) break; // check for invalid. if(uc>=0xD800 && uc<=0xDBFF) // UTF16 surrogate pairs. { if(ptr[1]!='\\' || ptr[2]!='u') break; // missing second-half of surrogate. uc2 = parse_hex4(ptr+3);ptr+=6; if(uc2<0xDC00 || uc2>0xDFFF) break; // invalid second-half of surrogate. uc = 0x10000 + (((uc&0x3FF)<<10) | (uc2&0x3FF)); } len = 4; if(uc<0x80) len=1; else if(uc<0x800) len=2; else if(uc<0x10000) len = 3; ptr2 += len; switch (len) { case 4: *--ptr2 =((uc | 0x80) & 0xBF); uc >>= 6; case 3: *--ptr2 =((uc | 0x80) & 0xBF); uc >>= 6; case 2: *--ptr2 =((uc | 0x80) & 0xBF); uc >>= 6; case 1: *--ptr2 =(uc | firstByteMark[len]); } ptr2 += len; break; default: *ptr2++ = *ptr; break; } ptr++; } } *ptr2 = 0; if(*ptr=='\"') ptr++; item->valuestring = out; item->type = JSON_STRING; return ptr; }
static void parse_hex4_should_parse_mixed_case(void) { TEST_ASSERT_EQUAL_INT(0xBEEF, parse_hex4((const unsigned char*)"beef")); TEST_ASSERT_EQUAL_INT(0xBEEF, parse_hex4((const unsigned char*)"beeF")); TEST_ASSERT_EQUAL_INT(0xBEEF, parse_hex4((const unsigned char*)"beEf")); TEST_ASSERT_EQUAL_INT(0xBEEF, parse_hex4((const unsigned char*)"beEF")); TEST_ASSERT_EQUAL_INT(0xBEEF, parse_hex4((const unsigned char*)"bEef")); TEST_ASSERT_EQUAL_INT(0xBEEF, parse_hex4((const unsigned char*)"bEeF")); TEST_ASSERT_EQUAL_INT(0xBEEF, parse_hex4((const unsigned char*)"bEEf")); TEST_ASSERT_EQUAL_INT(0xBEEF, parse_hex4((const unsigned char*)"bEEF")); TEST_ASSERT_EQUAL_INT(0xBEEF, parse_hex4((const unsigned char*)"Beef")); TEST_ASSERT_EQUAL_INT(0xBEEF, parse_hex4((const unsigned char*)"BeeF")); TEST_ASSERT_EQUAL_INT(0xBEEF, parse_hex4((const unsigned char*)"BeEf")); TEST_ASSERT_EQUAL_INT(0xBEEF, parse_hex4((const unsigned char*)"BeEF")); TEST_ASSERT_EQUAL_INT(0xBEEF, parse_hex4((const unsigned char*)"BEef")); TEST_ASSERT_EQUAL_INT(0xBEEF, parse_hex4((const unsigned char*)"BEeF")); TEST_ASSERT_EQUAL_INT(0xBEEF, parse_hex4((const unsigned char*)"BEEf")); TEST_ASSERT_EQUAL_INT(0xBEEF, parse_hex4((const unsigned char*)"BEEF")); }
static const char *parse_string(cJSON *item,const char *str) { const char *ptr=str+1;char *ptr2;char *out;int32_t len=0;unsigned uc,uc2; if (*str!='\"') {ep=str;return 0;} /* not a string! */ while (*ptr!='\"' && *ptr && ++len) if (*ptr++ == '\\') ptr++; // Skip escaped quotes out=(char*)cJSON_malloc(len+2); /* This is how long we need for the string, roughly. */ if (!out) return 0; ptr=str+1;ptr2=out; while (*ptr!='\"' && *ptr) { if (*ptr!='\\') { if ( *ptr == '%' && is_hexstr((char *)&ptr[1],2) && isprint(_decode_hex((char *)&ptr[1])) != 0 ) *ptr2++ = _decode_hex((char *)&ptr[1]), ptr += 3; else *ptr2++ = *ptr++; } else { ptr++; switch (*ptr) { case 'b': *ptr2++='\b'; break; case 'f': *ptr2++='\f'; break; case 'n': *ptr2++='\n'; break; case 'r': *ptr2++='\r'; break; case 't': *ptr2++='\t'; break; case 'u': // transcode utf16 to utf8 uc=parse_hex4(ptr+1);ptr+=4; // get the unicode char if ((uc>=0xDC00 && uc<=0xDFFF) || uc==0) break; // check for invalid if (uc>=0xD800 && uc<=0xDBFF) // UTF16 surrogate pairs { if (ptr[1]!='\\' || ptr[2]!='u') break; // missing second-half of surrogate. uc2=parse_hex4(ptr+3);ptr+=6; if (uc2<0xDC00 || uc2>0xDFFF) break; // invalid second-half of surrogate uc=0x10000 + (((uc&0x3FF)<<10) | (uc2&0x3FF)); } len=4;if (uc<0x80) len=1;else if (uc<0x800) len=2;else if (uc<0x10000) len=3; ptr2+=len; switch (len) { case 4: *--ptr2 =((uc | 0x80) & 0xBF); uc >>= 6; case 3: *--ptr2 =((uc | 0x80) & 0xBF); uc >>= 6; case 2: *--ptr2 =((uc | 0x80) & 0xBF); uc >>= 6; case 1: *--ptr2 =(uc | firstByteMark[len]); } ptr2+=len; break; default: *ptr2++=*ptr; break; } ptr++; } } *ptr2=0; if (*ptr=='\"') ptr++; item->valuestring=out; item->type=cJSON_String; return ptr; }
/* Parse the input text into an unescaped cstring, and populate item. */ static const char *parse_string(cJSON *item, const char *str, const char **ep) { const char *ptr = str + 1; const char *end_ptr =str + 1; char *ptr2 = NULL; char *out = NULL; int len = 0; unsigned uc = 0; unsigned uc2 = 0; /* not a string! */ if (*str != '\"') { *ep = str; return NULL; } while ((*end_ptr != '\"') && *end_ptr && ++len) { if (*end_ptr++ == '\\') { if (*end_ptr == '\0') { /* prevent buffer overflow when last input character is a backslash */ return NULL; } /* Skip escaped quotes. */ end_ptr++; } } /* This is at most how long we need for the string, roughly. */ out = (char*)cJSON_malloc(len + 1); if (!out) { return NULL; } item->valuestring = out; /* assign here so out will be deleted during cJSON_Delete() later */ item->type = cJSON_String; ptr = str + 1; ptr2 = out; /* loop through the string literal */ while (ptr < end_ptr) { if (*ptr != '\\') { *ptr2++ = *ptr++; } /* escape sequence */ else { ptr++; switch (*ptr) { case 'b': *ptr2++ = '\b'; break; case 'f': *ptr2++ = '\f'; break; case 'n': *ptr2++ = '\n'; break; case 'r': *ptr2++ = '\r'; break; case 't': *ptr2++ = '\t'; break; case '\"': case '\\': case '/': *ptr2++ = *ptr; break; case 'u': /* transcode utf16 to utf8. See RFC2781 and RFC3629. */ uc = parse_hex4(ptr + 1); /* get the unicode char. */ ptr += 4; if (ptr >= end_ptr) { /* invalid */ *ep = str; return NULL; } /* check for invalid. */ if (((uc >= 0xDC00) && (uc <= 0xDFFF)) || (uc == 0)) { *ep = str; return NULL; } /* UTF16 surrogate pairs. */ if ((uc >= 0xD800) && (uc<=0xDBFF)) { if ((ptr + 6) > end_ptr) { /* invalid */ *ep = str; return NULL; } if ((ptr[1] != '\\') || (ptr[2] != 'u')) { /* missing second-half of surrogate. */ *ep = str; return NULL; } uc2 = parse_hex4(ptr + 3); ptr += 6; /* \uXXXX */ if ((uc2 < 0xDC00) || (uc2 > 0xDFFF)) { /* invalid second-half of surrogate. */ *ep = str; return NULL; } /* calculate unicode codepoint from the surrogate pair */ uc = 0x10000 + (((uc & 0x3FF) << 10) | (uc2 & 0x3FF)); } /* encode as UTF8 * takes at maximum 4 bytes to encode: * 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx */ len = 4; if (uc < 0x80) { /* normal ascii, encoding 0xxxxxxx */ len = 1; } else if (uc < 0x800) { /* two bytes, encoding 110xxxxx 10xxxxxx */ len = 2; } else if (uc < 0x10000) { /* three bytes, encoding 1110xxxx 10xxxxxx 10xxxxxx */ len = 3; } ptr2 += len; switch (len) { case 4: /* 10xxxxxx */ *--ptr2 = ((uc | 0x80) & 0xBF); uc >>= 6; case 3: /* 10xxxxxx */ *--ptr2 = ((uc | 0x80) & 0xBF); uc >>= 6; case 2: /* 10xxxxxx */ *--ptr2 = ((uc | 0x80) & 0xBF); uc >>= 6; case 1: /* depending on the length in bytes this determines the * encoding ofthe first UTF8 byte */ *--ptr2 = (uc | firstByteMark[len]); } ptr2 += len; break; default: *ep = str; return NULL; } ptr++; } } *ptr2 = '\0'; if (*ptr == '\"') { ptr++; } return ptr; }