static String* S_unescape(QueryParser *self, String *orig, CharBuf *buf) { StringIterator *iter = Str_Top(orig); int32_t code_point; UNUSED_VAR(self); CB_Set_Size(buf, 0); CB_Grow(buf, Str_Get_Size(orig) + 4); while (STRITER_DONE != (code_point = StrIter_Next(iter))) { if (code_point == '\\') { int32_t next_code_point = StrIter_Next(iter); if (next_code_point == ':' || next_code_point == '"' || next_code_point == '\\' ) { CB_Cat_Char(buf, next_code_point); } else { CB_Cat_Char(buf, code_point); if (next_code_point != STRITER_DONE) { CB_Cat_Char(buf, next_code_point); } } } else { CB_Cat_Char(buf, code_point); } } DECREF(iter); return CB_To_String(buf); }
static void test_Cat(TestBatchRunner *runner) { String *wanted = Str_newf("a%s", smiley); CharBuf *got = S_get_cb(""); CB_Cat(got, wanted); TEST_TRUE(runner, S_cb_equals(got, wanted), "Cat"); DECREF(got); got = S_get_cb("a"); CB_Cat_Char(got, 0x263A); TEST_TRUE(runner, S_cb_equals(got, wanted), "Cat_Char"); DECREF(got); got = S_get_cb("a"); CB_Cat_Utf8(got, smiley, smiley_len); TEST_TRUE(runner, S_cb_equals(got, wanted), "Cat_Utf8"); DECREF(got); got = S_get_cb("a"); CB_Cat_Trusted_Utf8(got, smiley, smiley_len); TEST_TRUE(runner, S_cb_equals(got, wanted), "Cat_Trusted_Utf8"); DECREF(got); DECREF(wanted); }
static void test_Cat(TestBatch *batch) { CharBuf *wanted = CB_newf("a%s", smiley); CharBuf *got = S_get_cb(""); CB_Cat(got, wanted); TEST_TRUE(batch, CB_Equals(wanted, (Obj*)got), "Cat"); DECREF(got); got = S_get_cb("a"); CB_Cat_Char(got, 0x263A); TEST_TRUE(batch, CB_Equals(wanted, (Obj*)got), "Cat_Char"); DECREF(got); got = S_get_cb("a"); CB_Cat_Str(got, smiley, smiley_len); TEST_TRUE(batch, CB_Equals(wanted, (Obj*)got), "Cat_Str"); DECREF(got); got = S_get_cb("a"); CB_Cat_Trusted_Str(got, smiley, smiley_len); TEST_TRUE(batch, CB_Equals(wanted, (Obj*)got), "Cat_Trusted_Str"); DECREF(got); DECREF(wanted); }
String* TestUtils_random_string(size_t length) { CharBuf *buf = CB_new(length); while (length--) { CB_Cat_Char(buf, S_random_code_point()); } String *string = CB_Yield_String(buf); DECREF(buf); return string; }
// Surround a smiley with lots of whitespace. static String* S_smiley_with_whitespace(int *num_spaces_ptr) { int32_t spaces[] = { ' ', '\t', '\r', '\n', 0x000B, 0x000C, 0x000D, 0x0085, 0x00A0, 0x1680, 0x2000, 0x2001, 0x2002, 0x2003, 0x2004, 0x2005, 0x2006, 0x2007, 0x2008, 0x2009, 0x200A, 0x2028, 0x2029, 0x202F, 0x205F, 0x3000 }; int num_spaces = sizeof(spaces) / sizeof(uint32_t); CharBuf *buf = CB_new(0); for (int i = 0; i < num_spaces; i++) { CB_Cat_Char(buf, spaces[i]); } CB_Cat_Char(buf, smiley_cp); for (int i = 0; i < num_spaces; i++) { CB_Cat_Char(buf, spaces[i]); } String *retval = CB_To_String(buf); if (num_spaces_ptr) { *num_spaces_ptr = num_spaces; } DECREF(buf); return retval; }
static String* S_encode_entities(String *text, CharBuf *buf) { StringIterator *iter = Str_Top(text); size_t space = 0; const int MAX_ENTITY_BYTES = 9; // &#dddddd; // Scan first so that we only allocate once. int32_t code_point; while (STRITER_DONE != (code_point = StrIter_Next(iter))) { if (code_point > 127 || (!isgraph(code_point) && !isspace(code_point)) || code_point == '<' || code_point == '>' || code_point == '&' || code_point == '"' ) { space += MAX_ENTITY_BYTES; } else { space += 1; } } CB_Grow(buf, space); CB_Set_Size(buf, 0); DECREF(iter); iter = Str_Top(text); while (STRITER_DONE != (code_point = StrIter_Next(iter))) { if (code_point > 127 || (!isgraph(code_point) && !isspace(code_point)) ) { CB_catf(buf, "&#%u32;", code_point); } else if (code_point == '<') { CB_Cat_Trusted_Utf8(buf, "<", 4); } else if (code_point == '>') { CB_Cat_Trusted_Utf8(buf, ">", 4); } else if (code_point == '&') { CB_Cat_Trusted_Utf8(buf, "&", 5); } else if (code_point == '"') { CB_Cat_Trusted_Utf8(buf, """, 6); } else { CB_Cat_Char(buf, code_point); } } DECREF(iter); return CB_To_String(buf); }
String* Method_Host_Name_IMP(Method *self) { StringIterator *iter = StrIter_new(self->name, 0); CharBuf *charbuf = CB_new(Str_Get_Size(self->name)); int32_t code_point; while (STR_OOB != (code_point = StrIter_Next(iter))) { if (code_point != '_') { CB_Cat_Char(charbuf, code_point); } } String *host_name = CB_Yield_String(charbuf); DECREF(charbuf); DECREF(iter); return host_name; }
static void test_escapes(TestBatch *batch) { CharBuf *string = CB_new(10); CharBuf *json_wanted = CB_new(10); for (int i = 0; control_escapes[i] != NULL; i++) { CB_Truncate(string, 0); CB_Cat_Char(string, i); const char *escaped = control_escapes[i]; CharBuf *json = Json_to_json((Obj*)string); CharBuf *decoded = (CharBuf*)Json_from_json(json); CB_setf(json_wanted, "\"%s\"", escaped); CB_Trim(json); TEST_TRUE(batch, json != NULL && CB_Equals(json_wanted, (Obj*)json), "encode control escape: %s", escaped); TEST_TRUE(batch, decoded != NULL && CB_Equals(string, (Obj*)decoded), "decode control escape: %s", escaped); DECREF(json); DECREF(decoded); } for (int i = 0; quote_escapes_source[i] != NULL; i++) { const char *source = quote_escapes_source[i]; const char *escaped = quote_escapes_json[i]; CB_setf(string, source, strlen(source)); CharBuf *json = Json_to_json((Obj*)string); CharBuf *decoded = (CharBuf*)Json_from_json(json); CB_setf(json_wanted, "\"%s\"", escaped); CB_Trim(json); TEST_TRUE(batch, json != NULL && CB_Equals(json_wanted, (Obj*)json), "encode quote/backslash escapes: %s", source); TEST_TRUE(batch, decoded != NULL && CB_Equals(string, (Obj*)decoded), "decode quote/backslash escapes: %s", source); DECREF(json); DECREF(decoded); } DECREF(json_wanted); DECREF(string); }
void Err_Add_Frame_IMP(Err *self, const char *file, int line, const char *func) { CharBuf *buf = CB_new(0); CB_Cat(buf, self->mess); if (!Str_Ends_With_Utf8(self->mess, "\n", 1)) { CB_Cat_Char(buf, '\n'); } if (func != NULL) { CB_catf(buf, "\t%s at %s line %i32\n", func, file, (int32_t)line); } else { CB_catf(buf, "\tat %s line %i32\n", file, (int32_t)line); } DECREF(self->mess); self->mess = CB_Yield_String(buf); DECREF(buf); }
static void test_Trim(TestBatch *batch) { uint32_t spaces[] = { ' ', '\t', '\r', '\n', 0x000B, 0x000C, 0x000D, 0x0085, 0x00A0, 0x1680, 0x180E, 0x2000, 0x2001, 0x2002, 0x2003, 0x2004, 0x2005, 0x2006, 0x2007, 0x2008, 0x2009, 0x200A, 0x2028, 0x2029, 0x202F, 0x205F, 0x3000 }; uint32_t num_spaces = sizeof(spaces) / sizeof(uint32_t); uint32_t i; CharBuf *got = CB_new(0); // Surround a smiley with lots of whitespace. for (i = 0; i < num_spaces; i++) { CB_Cat_Char(got, spaces[i]); } CB_Cat_Char(got, 0x263A); for (i = 0; i < num_spaces; i++) { CB_Cat_Char(got, spaces[i]); } TEST_TRUE(batch, CB_Trim_Top(got), "Trim_Top returns true on success"); TEST_FALSE(batch, CB_Trim_Top(got), "Trim_Top returns false on failure"); TEST_TRUE(batch, CB_Trim_Tail(got), "Trim_Tail returns true on success"); TEST_FALSE(batch, CB_Trim_Tail(got), "Trim_Tail returns false on failure"); TEST_TRUE(batch, CB_Equals_Str(got, smiley, smiley_len), "Trim_Top and Trim_Tail worked"); // Build the spacey smiley again. CB_Truncate(got, 0); for (i = 0; i < num_spaces; i++) { CB_Cat_Char(got, spaces[i]); } CB_Cat_Char(got, 0x263A); for (i = 0; i < num_spaces; i++) { CB_Cat_Char(got, spaces[i]); } TEST_TRUE(batch, CB_Trim(got), "Trim returns true on success"); TEST_FALSE(batch, CB_Trim(got), "Trim returns false on failure"); TEST_TRUE(batch, CB_Equals_Str(got, smiley, smiley_len), "Trim worked"); DECREF(got); }
static void S_append_json_string(Obj *dump, CharBuf *json) { // Append opening quote. CB_Cat_Trusted_Str(json, "\"", 1); // Process string data. ZombieCharBuf *iterator = ZCB_WRAP((CharBuf*)dump); while (ZCB_Get_Size(iterator)) { uint32_t code_point = ZCB_Nip_One(iterator); if (code_point > 127) { // There is no need to escape any high characters, including those // above the BMP, as we assume that the destination channel can // handle arbitrary UTF-8 data. CB_Cat_Char(json, code_point); } else { char buffer[7]; size_t len; switch (code_point & 127) { // Perform all mandatory escapes enumerated in the JSON spec. // Note that the spec makes escaping forward slash optional; // we choose not to. case 0x00: case 0x01: case 0x02: case 0x03: case 0x04: case 0x05: case 0x06: case 0x07: case 0x0b: case 0x0e: case 0x0f: case 0x10: case 0x11: case 0x12: case 0x13: case 0x14: case 0x15: case 0x16: case 0x17: case 0x18: case 0x19: case 0x1a: case 0x1b: case 0x1c: case 0x1d: case 0x1e: case 0x1f: { sprintf(buffer, "\\u%04x", (unsigned)code_point); len = 6; break; } case '\b': memcpy(buffer, "\\b", 2); len = 2; break; case '\t': memcpy(buffer, "\\t", 2); len = 2; break; case '\n': memcpy(buffer, "\\n", 2); len = 2; break; case '\f': memcpy(buffer, "\\f", 2); len = 2; break; case '\r': memcpy(buffer, "\\r", 2); len = 2; break; case '\\': memcpy(buffer, "\\\\", 2); len = 2; break; case '\"': memcpy(buffer, "\\\"", 2); len = 2; break; // Ordinary printable ASCII. default: buffer[0] = (char)code_point; len = 1; } CB_Cat_Trusted_Str(json, buffer, len); } } // Append closing quote. CB_Cat_Trusted_Str(json, "\"", 1); }
static void test_iterator(TestBatchRunner *runner) { static const int32_t code_points[] = { 0x41, 0x7F, 0x80, 0x7FF, 0x800, 0xFFFF, 0x10000, 0x10FFFF }; static size_t num_code_points = sizeof(code_points) / sizeof(code_points[0]); CharBuf *buf = CB_new(0); for (size_t i = 0; i < num_code_points; ++i) { CB_Cat_Char(buf, code_points[i]); } String *string = CB_To_String(buf); { StringIterator *iter = Str_Top(string); TEST_TRUE(runner, StrIter_Equals(iter, (Obj*)iter), "StringIterator equal to self"); TEST_FALSE(runner, StrIter_Equals(iter, (Obj*)CFISH_TRUE), "StringIterator not equal non-iterators"); DECREF(iter); } { StringIterator *top = Str_Top(string); StringIterator *tail = Str_Tail(string); TEST_INT_EQ(runner, StrIter_Compare_To(top, (Obj*)tail), -1, "Compare_To top < tail"); TEST_INT_EQ(runner, StrIter_Compare_To(tail, (Obj*)top), 1, "Compare_To tail > top"); TEST_INT_EQ(runner, StrIter_Compare_To(top, (Obj*)top), 0, "Compare_To top == top"); StringIterator *clone = StrIter_Clone(top); TEST_TRUE(runner, StrIter_Equals(clone, (Obj*)top), "Clone"); StrIter_Assign(clone, tail); TEST_TRUE(runner, StrIter_Equals(clone, (Obj*)tail), "Assign"); String *other = Str_newf("Other string"); StringIterator *other_iter = Str_Top(other); TEST_FALSE(runner, StrIter_Equals(other_iter, (Obj*)tail), "Equals returns false for different strings"); StrIter_Assign(clone, other_iter); TEST_TRUE(runner, StrIter_Equals(clone, (Obj*)other_iter), "Assign iterator with different string"); DECREF(other); DECREF(other_iter); DECREF(clone); DECREF(top); DECREF(tail); } { StringIterator *iter = Str_Top(string); for (size_t i = 0; i < num_code_points; ++i) { TEST_TRUE(runner, StrIter_Has_Next(iter), "Has_Next %d", i); int32_t code_point = StrIter_Next(iter); TEST_INT_EQ(runner, code_point, code_points[i], "Next %d", i); } TEST_TRUE(runner, !StrIter_Has_Next(iter), "Has_Next at end of string"); TEST_INT_EQ(runner, StrIter_Next(iter), STR_OOB, "Next at end of string"); StringIterator *tail = Str_Tail(string); TEST_TRUE(runner, StrIter_Equals(iter, (Obj*)tail), "Equals tail"); DECREF(tail); DECREF(iter); } { StringIterator *iter = Str_Tail(string); for (size_t i = num_code_points; i--;) { TEST_TRUE(runner, StrIter_Has_Prev(iter), "Has_Prev %d", i); int32_t code_point = StrIter_Prev(iter); TEST_INT_EQ(runner, code_point, code_points[i], "Prev %d", i); } TEST_TRUE(runner, !StrIter_Has_Prev(iter), "Has_Prev at end of string"); TEST_INT_EQ(runner, StrIter_Prev(iter), STR_OOB, "Prev at start of string"); StringIterator *top = Str_Top(string); TEST_TRUE(runner, StrIter_Equals(iter, (Obj*)top), "Equals top"); DECREF(top); DECREF(iter); } { StringIterator *iter = Str_Top(string); StrIter_Next(iter); TEST_INT_EQ(runner, StrIter_Advance(iter, 2), 2, "Advance returns number of code points"); TEST_INT_EQ(runner, StrIter_Next(iter), code_points[3], "Advance works"); TEST_INT_EQ(runner, StrIter_Advance(iter, 1000000), num_code_points - 4, "Advance past end of string"); StrIter_Prev(iter); TEST_INT_EQ(runner, StrIter_Recede(iter, 2), 2, "Recede returns number of code points"); TEST_INT_EQ(runner, StrIter_Prev(iter), code_points[num_code_points-4], "Recede works"); TEST_INT_EQ(runner, StrIter_Recede(iter, 1000000), num_code_points - 4, "Recede past start of string"); DECREF(iter); } DECREF(string); DECREF(buf); }
String* Highlighter_Raw_Excerpt_IMP(Highlighter *self, String *field_val, int32_t *start_ptr, HeatMap *heat_map) { HighlighterIVARS *const ivars = Highlighter_IVARS(self); // Find start of excerpt. StringIterator *top = Str_Top(field_val); int32_t best_location = S_hottest(heat_map); int32_t start; uint32_t max_skip; if ((uint32_t)best_location <= ivars->slop) { // If the beginning of the string falls within the window centered // around the hottest point in the field, start the fragment at the // beginning. start = 0; max_skip = best_location; } else { start = best_location - ivars->slop; max_skip = ivars->slop; StrIter_Advance(top, start); } uint32_t num_skipped; bool found_starting_edge = S_find_starting_boundary(top, max_skip, &num_skipped); start += num_skipped; // Find end of excerpt. StringIterator *tail = StrIter_Clone(top); uint32_t max_len = ivars->excerpt_length; if (!found_starting_edge) { // Leave space for starting ellipsis and space character. max_len -= 2; } bool found_ending_edge = true; uint32_t excerpt_len = StrIter_Advance(tail, max_len); // Skip up to slop code points but keep at least max_len - slop. if (excerpt_len > max_len - ivars->slop) { max_skip = excerpt_len - (max_len - ivars->slop); found_ending_edge = S_find_ending_boundary(tail, max_skip, &num_skipped); if (num_skipped >= excerpt_len) { excerpt_len = 0; } else { excerpt_len -= num_skipped; } } // Extract excerpt. String *raw_excerpt; if (!excerpt_len) { raw_excerpt = Str_new_from_trusted_utf8("", 0); } else { String *substring = StrIter_substring(top, tail); CharBuf *buf = CB_new(Str_Get_Size(substring) + 8); // If not starting on a sentence boundary, prepend an ellipsis. if (!found_starting_edge) { CB_Cat_Char(buf, ELLIPSIS_CODE_POINT); CB_Cat_Char(buf, ' '); start -= 2; } CB_Cat(buf, substring); // If not ending on a sentence boundary, append an ellipsis. if (!found_ending_edge) { CB_Cat_Char(buf, ELLIPSIS_CODE_POINT); } raw_excerpt = CB_Yield_String(buf); DECREF(buf); DECREF(substring); } *start_ptr = start; DECREF(top); DECREF(tail); return raw_excerpt; }