CharBuf* Float32Type_specifier(Float32Type *self) { UNUSED_VAR(self); return CB_newf("f32_t"); }
CharBuf* Int64Type_specifier(Int64Type *self) { UNUSED_VAR(self); return CB_newf("i64_t"); }
void SortWriter_finish(SortWriter *self) { SortWriterIVARS *const ivars = SortWriter_IVARS(self); VArray *const field_writers = ivars->field_writers; // If we have no data, bail out. if (!ivars->temp_ord_out) { return; } // If we've either flushed or added segments, flush everything so that any // one field can use the entire margin up to mem_thresh. if (ivars->flush_at_finish) { for (uint32_t i = 1, max = VA_Get_Size(field_writers); i < max; i++) { SortFieldWriter *field_writer = (SortFieldWriter*)VA_Fetch(field_writers, i); if (field_writer) { SortFieldWriter_Flush(field_writer); } } } // Close down temp streams. OutStream_Close(ivars->temp_ord_out); OutStream_Close(ivars->temp_ix_out); OutStream_Close(ivars->temp_dat_out); for (uint32_t i = 1, max = VA_Get_Size(field_writers); i < max; i++) { SortFieldWriter *field_writer = (SortFieldWriter*)VA_Delete(field_writers, i); if (field_writer) { CharBuf *field = Seg_Field_Name(ivars->segment, i); SortFieldWriter_Flip(field_writer); int32_t count = SortFieldWriter_Finish(field_writer); Hash_Store(ivars->counts, (Obj*)field, (Obj*)CB_newf("%i32", count)); int32_t null_ord = SortFieldWriter_Get_Null_Ord(field_writer); if (null_ord != -1) { Hash_Store(ivars->null_ords, (Obj*)field, (Obj*)CB_newf("%i32", null_ord)); } int32_t ord_width = SortFieldWriter_Get_Ord_Width(field_writer); Hash_Store(ivars->ord_widths, (Obj*)field, (Obj*)CB_newf("%i32", ord_width)); } DECREF(field_writer); } VA_Clear(field_writers); // Store metadata. Seg_Store_Metadata_Str(ivars->segment, "sort", 4, (Obj*)SortWriter_Metadata(self)); // Clean up. Folder *folder = ivars->folder; CharBuf *seg_name = Seg_Get_Name(ivars->segment); CharBuf *path = CB_newf("%o/sort_ord_temp", seg_name); Folder_Delete(folder, path); CB_setf(path, "%o/sort_ix_temp", seg_name); Folder_Delete(folder, path); CB_setf(path, "%o/sort_dat_temp", seg_name); Folder_Delete(folder, path); DECREF(path); }
FSFileHandle* FSFH_do_open(FSFileHandle *self, const CharBuf *path, uint32_t flags) { FH_do_open((FileHandle*)self, path, flags); FSFileHandleIVARS *const ivars = FSFH_IVARS(self); if (!path || !CB_Get_Size(path)) { Err_set_error(Err_new(CB_newf("Missing required param 'path'"))); CFISH_DECREF(self); return NULL; } // Attempt to open file. if (flags & FH_WRITE_ONLY) { ivars->fd = open((char*)CB_Get_Ptr8(path), SI_posix_flags(flags), 0666); if (ivars->fd == -1) { ivars->fd = 0; Err_set_error(Err_new(CB_newf("Attempt to open '%o' failed: %s", path, strerror(errno)))); CFISH_DECREF(self); return NULL; } if (flags & FH_EXCLUSIVE) { ivars->len = 0; } else { // Derive length. ivars->len = lseek64(ivars->fd, INT64_C(0), SEEK_END); if (ivars->len == -1) { Err_set_error(Err_new(CB_newf("lseek64 on %o failed: %s", ivars->path, strerror(errno)))); CFISH_DECREF(self); return NULL; } else { int64_t check_val = lseek64(ivars->fd, INT64_C(0), SEEK_SET); if (check_val == -1) { Err_set_error(Err_new(CB_newf("lseek64 on %o failed: %s", ivars->path, strerror(errno)))); CFISH_DECREF(self); return NULL; } } } } else if (flags & FH_READ_ONLY) { if (SI_init_read_only(self, ivars)) { // On 64-bit systems, map the whole file up-front. if (IS_64_BIT && ivars->len) { ivars->buf = (char*)SI_map(self, ivars, 0, ivars->len); if (!ivars->buf) { // An error occurred during SI_map, which has set // Err_error for us already. CFISH_DECREF(self); return NULL; } } } else { CFISH_DECREF(self); return NULL; } } else { Err_set_error(Err_new(CB_newf("Must specify FH_READ_ONLY or FH_WRITE_ONLY to open '%o'", path))); CFISH_DECREF(self); return NULL; } return self; }
static Obj* S_do_parse_json(void *json_parser, char *json, size_t len) { lucy_JsonParserState state; state.result = NULL; state.errors = false; char *text = json; char *const end = text + len; while (text < end) { int token_type = -1; Obj *value = NULL; char *const save = text; switch (*text) { case ' ': case '\n': case '\r': case '\t': // Skip insignificant whitespace, which the JSON RFC defines // as only four ASCII characters. text++; continue; case '[': token_type = LUCY_JSON_TOKENTYPE_LEFT_SQUARE_BRACKET; text++; break; case ']': token_type = LUCY_JSON_TOKENTYPE_RIGHT_SQUARE_BRACKET; text++; break; case '{': token_type = LUCY_JSON_TOKENTYPE_LEFT_CURLY_BRACKET; text++; break; case '}': token_type = LUCY_JSON_TOKENTYPE_RIGHT_CURLY_BRACKET; text++; break; case ':': token_type = LUCY_JSON_TOKENTYPE_COLON; text++; break; case ',': token_type = LUCY_JSON_TOKENTYPE_COMMA; text++; break; case '"': value = (Obj*)S_parse_string(&text, end); if (value) { token_type = LUCY_JSON_TOKENTYPE_STRING; } else { // Clear out parser and return. LucyParseJson(json_parser, 0, NULL, &state); ERR_ADD_FRAME(Err_get_error()); return NULL; } break; case 'n': if (SI_check_keyword(text, end, "null", 4)) { token_type = LUCY_JSON_TOKENTYPE_NULL; text += 4; } break; case 't': if (SI_check_keyword(text, end, "true", 4)) { token_type = LUCY_JSON_TOKENTYPE_TRUE; value = (Obj*)CFISH_TRUE; text += 4; } break; case 'f': if (SI_check_keyword(text, end, "false", 5)) { token_type = LUCY_JSON_TOKENTYPE_FALSE; value = (Obj*)CFISH_FALSE; text += 5; } break; case '0': case '1': case '2': case '3': case '4': case '5': case '6': case '7': case '8': case '9': case '-': { // Note no '+', as JSON spec doesn't allow it. value = (Obj*)S_parse_number(&text, end); if (value) { token_type = LUCY_JSON_TOKENTYPE_NUMBER; } else { // Clear out parser and return. LucyParseJson(json_parser, 0, NULL, &state); ERR_ADD_FRAME(Err_get_error()); return NULL; } } break; } LucyParseJson(json_parser, token_type, value, &state); if (state.errors) { SET_ERROR(CB_newf("JSON syntax error"), save, end); return NULL; } } // Finish up. LucyParseJson(json_parser, 0, NULL, &state); if (state.errors) { SET_ERROR(CB_newf("JSON syntax error"), json, end); return NULL; } return state.result; }
static CharBuf* S_unescape_text(char *const top, char *const end) { // The unescaped string will never be longer than the escaped string // because only a \u escape can theoretically be too long and // StrHelp_encode_utf8_char guards against sequences over 4 bytes. // Therefore we can allocate once and not worry about reallocating. size_t cap = end - top + 1; char *target_buf = (char*)MALLOCATE(cap); size_t target_size = 0; for (char *text = top; text < end; text++) { if (*text != '\\') { target_buf[target_size++] = *text; } else { // Process escape. text++; switch (*text) { case '"': target_buf[target_size++] = '"'; break; case '\\': target_buf[target_size++] = '\\'; break; case '/': target_buf[target_size++] = '/'; break; case 'b': target_buf[target_size++] = '\b'; break; case 'f': target_buf[target_size++] = '\f'; break; case 'n': target_buf[target_size++] = '\n'; break; case 'r': target_buf[target_size++] = '\r'; break; case 't': target_buf[target_size++] = '\t'; break; case 'u': { // Copy into a temp buffer because strtol will overrun // into adjacent text data for e.g. "\uAAAA1". char temp[5] = { 0, 0, 0, 0, 0 }; memcpy(temp, text + 1, 4); text += 4; char *num_end; long code_point = strtol(temp, &num_end, 16); char *temp_ptr = temp; if (num_end != temp_ptr + 4 || code_point < 0) { FREEMEM(target_buf); SET_ERROR(CB_newf("Invalid \\u escape"), text - 5, end); return NULL; } if (code_point >= 0xD800 && code_point <= 0xDFFF) { FREEMEM(target_buf); SET_ERROR(CB_newf("Surrogate pairs not supported"), text - 5, end); return NULL; } target_size += StrHelp_encode_utf8_char((uint32_t)code_point, target_buf + target_size); } break; default: FREEMEM(target_buf); SET_ERROR(CB_newf("Illegal escape"), text - 1, end); return NULL; } } } // NULL-terminate, sanity check, then return the escaped string. target_buf[target_size] = '\0'; if (!StrHelp_utf8_valid(target_buf, target_size)) { FREEMEM(target_buf); CharBuf *mess = MAKE_MESS("Bad UTF-8 in JSON"); Err_set_error(Err_new(mess)); return NULL; } return CB_new_steal_from_trusted_str(target_buf, target_size, cap); }
SortCache* SortCache_init(SortCache *self, Schema *schema, Folder *folder, Segment *segment, i32_t field_num) { CharBuf *field = Seg_Field_Name(segment, field_num); CharBuf *seg_name = Seg_Get_Name(segment); CharBuf *ord_file = CB_newf("%o/sort-%i32.ord", seg_name, field_num); CharBuf *ix_file = CB_newf("%o/sort-%i32.ix", seg_name, field_num); CharBuf *dat_file = CB_newf("%o/sort-%i32.dat", seg_name, field_num); i64_t ord_len, ix_len, dat_len; /* Derive. */ self->doc_max = Seg_Get_Count(segment); self->type = Schema_Fetch_Type(schema, field); if (!self->type || !FType_Sortable(self->type)) { THROW("'%o' isn't a sortable field", field); } /* Open instreams. */ self->ord_in = Folder_Open_In(folder, ord_file); self->ix_in = Folder_Open_In(folder, ix_file); self->dat_in = Folder_Open_In(folder, dat_file); if (!self->ix_in || !self->dat_in || !self->ord_in) { CharBuf *mess = MAKE_MESS("Can't open either %o, %o or %o", ord_file, ix_file, dat_file); DECREF(ord_file); DECREF(ix_file); DECREF(dat_file); Err_throw_mess(mess); } ord_len = InStream_Length(self->ord_in); ix_len = InStream_Length(self->ix_in); dat_len = InStream_Length(self->dat_in); /* Calculate the number of unique values and derive the ord bit width. */ self->num_uniq = (i32_t)(ix_len / 8) - 1; self->width = S_calc_width(self->num_uniq); /* Validate file lengths. */ { double bytes_per_doc = self->width / 8.0; double max_ords = ord_len / bytes_per_doc; if (max_ords < self->doc_max + 1) { THROW("Conflict between ord count max %f64 and doc_max %i32", max_ords, self->doc_max); } } /* Mmap ords, offsets and character data. */ self->ords = InStream_Buf(self->ord_in, (size_t)ord_len); self->offsets = (i64_t*)InStream_Buf(self->ix_in, (size_t)ix_len); self->char_data = InStream_Buf(self->dat_in, dat_len); { char *offs = (char*)self->offsets; self->offsets_limit = (i64_t*)(offs + ix_len); self->char_data_limit = self->char_data + dat_len; } DECREF(ord_file); DECREF(ix_file); DECREF(dat_file); return self; }
CharBuf* IntNum_to_string(IntNum *self) { return CB_newf("%i64", IntNum_To_I64(self)); }
CharBuf* FloatNum_to_string(FloatNum *self) { return CB_newf("%f64", FloatNum_To_F64(self)); }
CompoundFileReader* CFReader_do_open(CompoundFileReader *self, Folder *folder) { CharBuf *cfmeta_file = (CharBuf*)ZCB_WRAP_STR("cfmeta.json", 11); Hash *metadata = (Hash*)Json_slurp_json((Folder*)folder, cfmeta_file); Err *error = NULL; Folder_init((Folder*)self, Folder_Get_Path(folder)); // Parse metadata file. if (!metadata || !Hash_Is_A(metadata, HASH)) { error = Err_new(CB_newf("Can't read '%o' in '%o'", cfmeta_file, Folder_Get_Path(folder))); } else { Obj *format = Hash_Fetch_Str(metadata, "format", 6); self->format = format ? (int32_t)Obj_To_I64(format) : 0; self->records = (Hash*)INCREF(Hash_Fetch_Str(metadata, "files", 5)); if (self->format < 1) { error = Err_new(CB_newf("Corrupt %o file: Missing or invalid 'format'", cfmeta_file)); } else if (self->format > CFWriter_current_file_format) { error = Err_new(CB_newf("Unsupported compound file format: %i32 " "(current = %i32", self->format, CFWriter_current_file_format)); } else if (!self->records) { error = Err_new(CB_newf("Corrupt %o file: missing 'files' key", cfmeta_file)); } } DECREF(metadata); if (error) { Err_set_error(error); DECREF(self); return NULL; } // Open an instream which we'll clone over and over. CharBuf *cf_file = (CharBuf*)ZCB_WRAP_STR("cf.dat", 6); self->instream = Folder_Open_In(folder, cf_file); if (!self->instream) { ERR_ADD_FRAME(Err_get_error()); DECREF(self); return NULL; } // Assign. self->real_folder = (Folder*)INCREF(folder); // Strip directory name from filepaths for old format. if (self->format == 1) { VArray *files = Hash_Keys(self->records); ZombieCharBuf *filename = ZCB_BLANK(); ZombieCharBuf *folder_name = IxFileNames_local_part(Folder_Get_Path(folder), ZCB_BLANK()); size_t folder_name_len = ZCB_Length(folder_name); for (uint32_t i = 0, max = VA_Get_Size(files); i < max; i++) { CharBuf *orig = (CharBuf*)VA_Fetch(files, i); if (CB_Starts_With(orig, (CharBuf*)folder_name)) { Obj *record = Hash_Delete(self->records, (Obj*)orig); ZCB_Assign(filename, orig); ZCB_Nip(filename, folder_name_len + sizeof(DIR_SEP) - 1); Hash_Store(self->records, (Obj*)filename, (Obj*)record); } } DECREF(files); } return self; }
static void S_init_strings(void) { foo = CB_newf("foo"); bar = CB_newf("bar"); baz = CB_newf("baz"); boffo = CB_newf("boffo"); banana = CB_newf("banana"); foo_bar = CB_newf("foo/bar"); foo_bar_baz = CB_newf("foo/bar/baz"); foo_bar_boffo = CB_newf("foo/bar/boffo"); foo_boffo = CB_newf("foo/boffo"); foo_foo = CB_newf("foo/foo"); nope = CB_newf("nope"); nope_nyet = CB_newf("nope/nyet"); }