static String* S_unescape(QueryParser *self, String *orig, CharBuf *buf) { StringIterator *iter = Str_Top(orig); int32_t code_point; UNUSED_VAR(self); CB_Set_Size(buf, 0); CB_Grow(buf, Str_Get_Size(orig) + 4); while (STRITER_DONE != (code_point = StrIter_Next(iter))) { if (code_point == '\\') { int32_t next_code_point = StrIter_Next(iter); if (next_code_point == ':' || next_code_point == '"' || next_code_point == '\\' ) { CB_Cat_Char(buf, next_code_point); } else { CB_Cat_Char(buf, code_point); if (next_code_point != STRITER_DONE) { CB_Cat_Char(buf, next_code_point); } } } else { CB_Cat_Char(buf, code_point); } } DECREF(iter); return CB_To_String(buf); }
static bool S_cb_equals(CharBuf *cb, String *other) { String *string = CB_To_String(cb); bool retval = Str_Equals(string, (Obj*)other); DECREF(string); return retval; }
Obj* TextTermStepper_Get_Value_IMP(TextTermStepper *self) { TextTermStepperIVARS *const ivars = TextTermStepper_IVARS(self); if (ivars->string == NULL) { ivars->string = CB_To_String((CharBuf*)ivars->value); } return (Obj*)ivars->string; }
static String* S_encode_entities(String *text, CharBuf *buf) { StringIterator *iter = Str_Top(text); size_t space = 0; const int MAX_ENTITY_BYTES = 9; // &#dddddd; // Scan first so that we only allocate once. int32_t code_point; while (STRITER_DONE != (code_point = StrIter_Next(iter))) { if (code_point > 127 || (!isgraph(code_point) && !isspace(code_point)) || code_point == '<' || code_point == '>' || code_point == '&' || code_point == '"' ) { space += MAX_ENTITY_BYTES; } else { space += 1; } } CB_Grow(buf, space); CB_Set_Size(buf, 0); DECREF(iter); iter = Str_Top(text); while (STRITER_DONE != (code_point = StrIter_Next(iter))) { if (code_point > 127 || (!isgraph(code_point) && !isspace(code_point)) ) { CB_catf(buf, "&#%u32;", code_point); } else if (code_point == '<') { CB_Cat_Trusted_Utf8(buf, "<", 4); } else if (code_point == '>') { CB_Cat_Trusted_Utf8(buf, ">", 4); } else if (code_point == '&') { CB_Cat_Trusted_Utf8(buf, "&", 5); } else if (code_point == '"') { CB_Cat_Trusted_Utf8(buf, """, 6); } else { CB_Cat_Char(buf, code_point); } } DECREF(iter); return CB_To_String(buf); }
// Surround a smiley with lots of whitespace. static String* S_smiley_with_whitespace(int *num_spaces_ptr) { int32_t spaces[] = { ' ', '\t', '\r', '\n', 0x000B, 0x000C, 0x000D, 0x0085, 0x00A0, 0x1680, 0x2000, 0x2001, 0x2002, 0x2003, 0x2004, 0x2005, 0x2006, 0x2007, 0x2008, 0x2009, 0x200A, 0x2028, 0x2029, 0x202F, 0x205F, 0x3000 }; int num_spaces = sizeof(spaces) / sizeof(uint32_t); CharBuf *buf = CB_new(0); for (int i = 0; i < num_spaces; i++) { CB_Cat_Char(buf, spaces[i]); } CB_Cat_Char(buf, smiley_cp); for (int i = 0; i < num_spaces; i++) { CB_Cat_Char(buf, spaces[i]); } String *retval = CB_To_String(buf); if (num_spaces_ptr) { *num_spaces_ptr = num_spaces; } DECREF(buf); return retval; }
static Hash* S_extract_tv_cache(ByteBuf *field_buf) { Hash *tv_cache = Hash_new(0); const char *tv_string = BB_Get_Buf(field_buf); int32_t num_terms = NumUtil_decode_c32(&tv_string); CharBuf *text_buf = CB_new(0); // Read the number of highlightable terms in the field. for (int32_t i = 0; i < num_terms; i++) { size_t overlap = NumUtil_decode_c32(&tv_string); size_t len = NumUtil_decode_c32(&tv_string); // Decompress the term text. CB_Set_Size(text_buf, overlap); CB_Cat_Trusted_Utf8(text_buf, tv_string, len); tv_string += len; // Get positions & offsets string. const char *bookmark_ptr = tv_string; int32_t num_positions = NumUtil_decode_c32(&tv_string); while (num_positions--) { // Leave nums compressed to save a little mem. NumUtil_skip_cint(&tv_string); NumUtil_skip_cint(&tv_string); NumUtil_skip_cint(&tv_string); } len = tv_string - bookmark_ptr; // Store the $text => $posdata pair in the output hash. String *text = CB_To_String(text_buf); Hash_Store(tv_cache, (Obj*)text, (Obj*)BB_new_bytes(bookmark_ptr, len)); DECREF(text); } DECREF(text_buf); return tv_cache; }
static void test_iterator(TestBatchRunner *runner) { static const int32_t code_points[] = { 0x41, 0x7F, 0x80, 0x7FF, 0x800, 0xFFFF, 0x10000, 0x10FFFF }; static size_t num_code_points = sizeof(code_points) / sizeof(code_points[0]); CharBuf *buf = CB_new(0); for (size_t i = 0; i < num_code_points; ++i) { CB_Cat_Char(buf, code_points[i]); } String *string = CB_To_String(buf); { StringIterator *iter = Str_Top(string); TEST_TRUE(runner, StrIter_Equals(iter, (Obj*)iter), "StringIterator equal to self"); TEST_FALSE(runner, StrIter_Equals(iter, (Obj*)CFISH_TRUE), "StringIterator not equal non-iterators"); DECREF(iter); } { StringIterator *top = Str_Top(string); StringIterator *tail = Str_Tail(string); TEST_INT_EQ(runner, StrIter_Compare_To(top, (Obj*)tail), -1, "Compare_To top < tail"); TEST_INT_EQ(runner, StrIter_Compare_To(tail, (Obj*)top), 1, "Compare_To tail > top"); TEST_INT_EQ(runner, StrIter_Compare_To(top, (Obj*)top), 0, "Compare_To top == top"); StringIterator *clone = StrIter_Clone(top); TEST_TRUE(runner, StrIter_Equals(clone, (Obj*)top), "Clone"); StrIter_Assign(clone, tail); TEST_TRUE(runner, StrIter_Equals(clone, (Obj*)tail), "Assign"); String *other = Str_newf("Other string"); StringIterator *other_iter = Str_Top(other); TEST_FALSE(runner, StrIter_Equals(other_iter, (Obj*)tail), "Equals returns false for different strings"); StrIter_Assign(clone, other_iter); TEST_TRUE(runner, StrIter_Equals(clone, (Obj*)other_iter), "Assign iterator with different string"); DECREF(other); DECREF(other_iter); DECREF(clone); DECREF(top); DECREF(tail); } { StringIterator *iter = Str_Top(string); for (size_t i = 0; i < num_code_points; ++i) { TEST_TRUE(runner, StrIter_Has_Next(iter), "Has_Next %d", i); int32_t code_point = StrIter_Next(iter); TEST_INT_EQ(runner, code_point, code_points[i], "Next %d", i); } TEST_TRUE(runner, !StrIter_Has_Next(iter), "Has_Next at end of string"); TEST_INT_EQ(runner, StrIter_Next(iter), STR_OOB, "Next at end of string"); StringIterator *tail = Str_Tail(string); TEST_TRUE(runner, StrIter_Equals(iter, (Obj*)tail), "Equals tail"); DECREF(tail); DECREF(iter); } { StringIterator *iter = Str_Tail(string); for (size_t i = num_code_points; i--;) { TEST_TRUE(runner, StrIter_Has_Prev(iter), "Has_Prev %d", i); int32_t code_point = StrIter_Prev(iter); TEST_INT_EQ(runner, code_point, code_points[i], "Prev %d", i); } TEST_TRUE(runner, !StrIter_Has_Prev(iter), "Has_Prev at end of string"); TEST_INT_EQ(runner, StrIter_Prev(iter), STR_OOB, "Prev at start of string"); StringIterator *top = Str_Top(string); TEST_TRUE(runner, StrIter_Equals(iter, (Obj*)top), "Equals top"); DECREF(top); DECREF(iter); } { StringIterator *iter = Str_Top(string); StrIter_Next(iter); TEST_INT_EQ(runner, StrIter_Advance(iter, 2), 2, "Advance returns number of code points"); TEST_INT_EQ(runner, StrIter_Next(iter), code_points[3], "Advance works"); TEST_INT_EQ(runner, StrIter_Advance(iter, 1000000), num_code_points - 4, "Advance past end of string"); StrIter_Prev(iter); TEST_INT_EQ(runner, StrIter_Recede(iter, 2), 2, "Recede returns number of code points"); TEST_INT_EQ(runner, StrIter_Prev(iter), code_points[num_code_points-4], "Recede works"); TEST_INT_EQ(runner, StrIter_Recede(iter, 1000000), num_code_points - 4, "Recede past start of string"); DECREF(iter); } DECREF(string); DECREF(buf); }