Beispiel #1
0
CharBuf*
Float32Type_specifier(Float32Type *self) {
    UNUSED_VAR(self);
    return CB_newf("f32_t");
}
Beispiel #2
0
CharBuf*
Int64Type_specifier(Int64Type *self) {
    UNUSED_VAR(self);
    return CB_newf("i64_t");
}
Beispiel #3
0
void
SortWriter_finish(SortWriter *self) {
    SortWriterIVARS *const ivars = SortWriter_IVARS(self);
    VArray *const field_writers = ivars->field_writers;

    // If we have no data, bail out.
    if (!ivars->temp_ord_out) { return; }

    // If we've either flushed or added segments, flush everything so that any
    // one field can use the entire margin up to mem_thresh.
    if (ivars->flush_at_finish) {
        for (uint32_t i = 1, max = VA_Get_Size(field_writers); i < max; i++) {
            SortFieldWriter *field_writer
                = (SortFieldWriter*)VA_Fetch(field_writers, i);
            if (field_writer) {
                SortFieldWriter_Flush(field_writer);
            }
        }
    }

    // Close down temp streams.
    OutStream_Close(ivars->temp_ord_out);
    OutStream_Close(ivars->temp_ix_out);
    OutStream_Close(ivars->temp_dat_out);

    for (uint32_t i = 1, max = VA_Get_Size(field_writers); i < max; i++) {
        SortFieldWriter *field_writer
            = (SortFieldWriter*)VA_Delete(field_writers, i);
        if (field_writer) {
            CharBuf *field = Seg_Field_Name(ivars->segment, i);
            SortFieldWriter_Flip(field_writer);
            int32_t count = SortFieldWriter_Finish(field_writer);
            Hash_Store(ivars->counts, (Obj*)field,
                       (Obj*)CB_newf("%i32", count));
            int32_t null_ord = SortFieldWriter_Get_Null_Ord(field_writer);
            if (null_ord != -1) {
                Hash_Store(ivars->null_ords, (Obj*)field,
                           (Obj*)CB_newf("%i32", null_ord));
            }
            int32_t ord_width = SortFieldWriter_Get_Ord_Width(field_writer);
            Hash_Store(ivars->ord_widths, (Obj*)field,
                       (Obj*)CB_newf("%i32", ord_width));
        }

        DECREF(field_writer);
    }
    VA_Clear(field_writers);

    // Store metadata.
    Seg_Store_Metadata_Str(ivars->segment, "sort", 4,
                           (Obj*)SortWriter_Metadata(self));

    // Clean up.
    Folder  *folder   = ivars->folder;
    CharBuf *seg_name = Seg_Get_Name(ivars->segment);
    CharBuf *path     = CB_newf("%o/sort_ord_temp", seg_name);
    Folder_Delete(folder, path);
    CB_setf(path, "%o/sort_ix_temp", seg_name);
    Folder_Delete(folder, path);
    CB_setf(path, "%o/sort_dat_temp", seg_name);
    Folder_Delete(folder, path);
    DECREF(path);
}
Beispiel #4
0
FSFileHandle*
FSFH_do_open(FSFileHandle *self, const CharBuf *path, uint32_t flags) {
    FH_do_open((FileHandle*)self, path, flags);
    FSFileHandleIVARS *const ivars = FSFH_IVARS(self);
    if (!path || !CB_Get_Size(path)) {
        Err_set_error(Err_new(CB_newf("Missing required param 'path'")));
        CFISH_DECREF(self);
        return NULL;
    }

    // Attempt to open file.
    if (flags & FH_WRITE_ONLY) {
        ivars->fd = open((char*)CB_Get_Ptr8(path), SI_posix_flags(flags), 0666);
        if (ivars->fd == -1) {
            ivars->fd = 0;
            Err_set_error(Err_new(CB_newf("Attempt to open '%o' failed: %s",
                                          path, strerror(errno))));
            CFISH_DECREF(self);
            return NULL;
        }
        if (flags & FH_EXCLUSIVE) {
            ivars->len = 0;
        }
        else {
            // Derive length.
            ivars->len = lseek64(ivars->fd, INT64_C(0), SEEK_END);
            if (ivars->len == -1) {
                Err_set_error(Err_new(CB_newf("lseek64 on %o failed: %s",
                                              ivars->path, strerror(errno))));
                CFISH_DECREF(self);
                return NULL;
            }
            else {
                int64_t check_val = lseek64(ivars->fd, INT64_C(0), SEEK_SET);
                if (check_val == -1) {
                    Err_set_error(Err_new(CB_newf("lseek64 on %o failed: %s",
                                                  ivars->path, strerror(errno))));
                    CFISH_DECREF(self);
                    return NULL;
                }
            }
        }
    }
    else if (flags & FH_READ_ONLY) {
        if (SI_init_read_only(self, ivars)) {
            // On 64-bit systems, map the whole file up-front.
            if (IS_64_BIT && ivars->len) {
                ivars->buf = (char*)SI_map(self, ivars, 0, ivars->len);
                if (!ivars->buf) {
                    // An error occurred during SI_map, which has set
                    // Err_error for us already.
                    CFISH_DECREF(self);
                    return NULL;
                }
            }
        }
        else {
            CFISH_DECREF(self);
            return NULL;
        }
    }
    else {
        Err_set_error(Err_new(CB_newf("Must specify FH_READ_ONLY or FH_WRITE_ONLY to open '%o'",
                                      path)));
        CFISH_DECREF(self);
        return NULL;
    }

    return self;
}
Beispiel #5
0
static Obj*
S_do_parse_json(void *json_parser, char *json, size_t len) {
    lucy_JsonParserState state;
    state.result = NULL;
    state.errors = false;

    char *text = json;
    char *const end = text + len;
    while (text < end) {
        int  token_type = -1;
        Obj *value      = NULL;
        char *const save = text;
        switch (*text) {
            case ' ': case '\n': case '\r': case '\t':
                // Skip insignificant whitespace, which the JSON RFC defines
                // as only four ASCII characters.
                text++;
                continue;
            case '[':
                token_type = LUCY_JSON_TOKENTYPE_LEFT_SQUARE_BRACKET;
                text++;
                break;
            case ']':
                token_type = LUCY_JSON_TOKENTYPE_RIGHT_SQUARE_BRACKET;
                text++;
                break;
            case '{':
                token_type = LUCY_JSON_TOKENTYPE_LEFT_CURLY_BRACKET;
                text++;
                break;
            case '}':
                token_type = LUCY_JSON_TOKENTYPE_RIGHT_CURLY_BRACKET;
                text++;
                break;
            case ':':
                token_type = LUCY_JSON_TOKENTYPE_COLON;
                text++;
                break;
            case ',':
                token_type = LUCY_JSON_TOKENTYPE_COMMA;
                text++;
                break;
            case '"':
                value = (Obj*)S_parse_string(&text, end);
                if (value) {
                    token_type = LUCY_JSON_TOKENTYPE_STRING;
                }
                else {
                    // Clear out parser and return.
                    LucyParseJson(json_parser, 0, NULL, &state);
                    ERR_ADD_FRAME(Err_get_error());
                    return NULL;
                }
                break;
            case 'n':
                if (SI_check_keyword(text, end, "null", 4)) {
                    token_type = LUCY_JSON_TOKENTYPE_NULL;
                    text += 4;
                }
                break;
            case 't':
                if (SI_check_keyword(text, end, "true", 4)) {
                    token_type = LUCY_JSON_TOKENTYPE_TRUE;
                    value = (Obj*)CFISH_TRUE;
                    text += 4;
                }
                break;
            case 'f':
                if (SI_check_keyword(text, end, "false", 5)) {
                    token_type = LUCY_JSON_TOKENTYPE_FALSE;
                    value = (Obj*)CFISH_FALSE;
                    text += 5;
                }
                break;
            case '0': case '1': case '2': case '3': case '4':
            case '5': case '6': case '7': case '8': case '9':
            case '-': { // Note no '+', as JSON spec doesn't allow it.
                    value = (Obj*)S_parse_number(&text, end);
                    if (value) {
                        token_type = LUCY_JSON_TOKENTYPE_NUMBER;
                    }
                    else {
                        // Clear out parser and return.
                        LucyParseJson(json_parser, 0, NULL, &state);
                        ERR_ADD_FRAME(Err_get_error());
                        return NULL;
                    }
                }
                break;
        }
        LucyParseJson(json_parser, token_type, value, &state);
        if (state.errors) {
            SET_ERROR(CB_newf("JSON syntax error"), save, end);
            return NULL;
        }
    }

    // Finish up.
    LucyParseJson(json_parser, 0, NULL, &state);
    if (state.errors) {
        SET_ERROR(CB_newf("JSON syntax error"), json, end);
        return NULL;
    }
    return state.result;
}
Beispiel #6
0
static CharBuf*
S_unescape_text(char *const top, char *const end) {
    // The unescaped string will never be longer than the escaped string
    // because only a \u escape can theoretically be too long and
    // StrHelp_encode_utf8_char guards against sequences over 4 bytes.
    // Therefore we can allocate once and not worry about reallocating.
    size_t cap = end - top + 1;
    char *target_buf = (char*)MALLOCATE(cap);
    size_t target_size = 0;
    for (char *text = top; text < end; text++) {
        if (*text != '\\') {
            target_buf[target_size++] = *text;
        }
        else {
            // Process escape.
            text++;
            switch (*text) {
                case '"':
                    target_buf[target_size++] = '"';
                    break;
                case '\\':
                    target_buf[target_size++] = '\\';
                    break;
                case '/':
                    target_buf[target_size++] = '/';
                    break;
                case 'b':
                    target_buf[target_size++] = '\b';
                    break;
                case 'f':
                    target_buf[target_size++] = '\f';
                    break;
                case 'n':
                    target_buf[target_size++] = '\n';
                    break;
                case 'r':
                    target_buf[target_size++] = '\r';
                    break;
                case 't':
                    target_buf[target_size++] = '\t';
                    break;
                case 'u': {
                        // Copy into a temp buffer because strtol will overrun
                        // into adjacent text data for e.g. "\uAAAA1".
                        char temp[5] = { 0, 0, 0, 0, 0 };
                        memcpy(temp, text + 1, 4);
                        text += 4;
                        char *num_end;
                        long code_point = strtol(temp, &num_end, 16);
                        char *temp_ptr = temp;
                        if (num_end != temp_ptr + 4 || code_point < 0) {
                            FREEMEM(target_buf);
                            SET_ERROR(CB_newf("Invalid \\u escape"), text - 5, end);
                            return NULL;
                        }
                        if (code_point >= 0xD800 && code_point <= 0xDFFF) {
                            FREEMEM(target_buf);
                            SET_ERROR(CB_newf("Surrogate pairs not supported"),
                                      text - 5, end);
                            return NULL;
                        }
                        target_size += StrHelp_encode_utf8_char((uint32_t)code_point,
                                                                target_buf + target_size);
                    }
                    break;
                default:
                    FREEMEM(target_buf);
                    SET_ERROR(CB_newf("Illegal escape"), text - 1, end);
                    return NULL;
            }
        }
    }

    // NULL-terminate, sanity check, then return the escaped string.
    target_buf[target_size] = '\0';
    if (!StrHelp_utf8_valid(target_buf, target_size)) {
        FREEMEM(target_buf);
        CharBuf *mess = MAKE_MESS("Bad UTF-8 in JSON");
        Err_set_error(Err_new(mess));
        return NULL;
    }
    return CB_new_steal_from_trusted_str(target_buf, target_size, cap);
}
Beispiel #7
0
SortCache*
SortCache_init(SortCache *self, Schema *schema, Folder *folder,
               Segment *segment, i32_t field_num)
{
    CharBuf *field    = Seg_Field_Name(segment, field_num);
    CharBuf *seg_name = Seg_Get_Name(segment);
    CharBuf *ord_file = CB_newf("%o/sort-%i32.ord", seg_name, field_num);
    CharBuf *ix_file  = CB_newf("%o/sort-%i32.ix",  seg_name, field_num);
    CharBuf *dat_file = CB_newf("%o/sort-%i32.dat", seg_name, field_num);
    i64_t ord_len, ix_len, dat_len;

    /* Derive. */
    self->doc_max = Seg_Get_Count(segment);
    self->type    = Schema_Fetch_Type(schema, field);
    if (!self->type || !FType_Sortable(self->type)) {
        THROW("'%o' isn't a sortable field", field);
    }

    /* Open instreams. */
    self->ord_in  = Folder_Open_In(folder, ord_file);
    self->ix_in   = Folder_Open_In(folder, ix_file);
    self->dat_in  = Folder_Open_In(folder, dat_file);
    if (!self->ix_in || !self->dat_in || !self->ord_in) {
        CharBuf *mess = MAKE_MESS("Can't open either %o, %o or %o", ord_file, 
            ix_file, dat_file);
        DECREF(ord_file);
        DECREF(ix_file);
        DECREF(dat_file);
        Err_throw_mess(mess);
    }
    ord_len = InStream_Length(self->ord_in);
    ix_len  = InStream_Length(self->ix_in);
    dat_len = InStream_Length(self->dat_in);

    /* Calculate the number of unique values and derive the ord bit width. */
    self->num_uniq = (i32_t)(ix_len / 8) - 1; 
    self->width    = S_calc_width(self->num_uniq);

    /* Validate file lengths. */
    {
        double bytes_per_doc = self->width / 8.0;
        double max_ords      = ord_len / bytes_per_doc;
        if (max_ords < self->doc_max + 1) {
            THROW("Conflict between ord count max %f64 and doc_max %i32", 
                max_ords, self->doc_max);
        }
    }

    /* Mmap ords, offsets and character data. */
    self->ords      = InStream_Buf(self->ord_in, (size_t)ord_len);
    self->offsets   = (i64_t*)InStream_Buf(self->ix_in, (size_t)ix_len);
    self->char_data = InStream_Buf(self->dat_in, dat_len);
    {
        char *offs            = (char*)self->offsets;
        self->offsets_limit   = (i64_t*)(offs + ix_len);
        self->char_data_limit = self->char_data + dat_len;
    }

    DECREF(ord_file);
    DECREF(ix_file);
    DECREF(dat_file);

    return self;
}
Beispiel #8
0
Datei: Num.c Projekt: theory/lucy
CharBuf*
IntNum_to_string(IntNum *self) {
    return CB_newf("%i64", IntNum_To_I64(self));
}
Beispiel #9
0
Datei: Num.c Projekt: theory/lucy
CharBuf*
FloatNum_to_string(FloatNum *self) {
    return CB_newf("%f64", FloatNum_To_F64(self));
}
Beispiel #10
0
CompoundFileReader*
CFReader_do_open(CompoundFileReader *self, Folder *folder) {
    CharBuf *cfmeta_file = (CharBuf*)ZCB_WRAP_STR("cfmeta.json", 11);
    Hash *metadata = (Hash*)Json_slurp_json((Folder*)folder, cfmeta_file);
    Err *error = NULL;

    Folder_init((Folder*)self, Folder_Get_Path(folder));

    // Parse metadata file.
    if (!metadata || !Hash_Is_A(metadata, HASH)) {
        error = Err_new(CB_newf("Can't read '%o' in '%o'", cfmeta_file,
                                Folder_Get_Path(folder)));
    }
    else {
        Obj *format = Hash_Fetch_Str(metadata, "format", 6);
        self->format = format ? (int32_t)Obj_To_I64(format) : 0;
        self->records = (Hash*)INCREF(Hash_Fetch_Str(metadata, "files", 5));
        if (self->format < 1) {
            error = Err_new(CB_newf("Corrupt %o file: Missing or invalid 'format'",
                                    cfmeta_file));
        }
        else if (self->format > CFWriter_current_file_format) {
            error = Err_new(CB_newf("Unsupported compound file format: %i32 "
                                    "(current = %i32", self->format,
                                    CFWriter_current_file_format));
        }
        else if (!self->records) {
            error = Err_new(CB_newf("Corrupt %o file: missing 'files' key",
                                    cfmeta_file));
        }
    }
    DECREF(metadata);
    if (error) {
        Err_set_error(error);
        DECREF(self);
        return NULL;
    }

    // Open an instream which we'll clone over and over.
    CharBuf *cf_file = (CharBuf*)ZCB_WRAP_STR("cf.dat", 6);
    self->instream = Folder_Open_In(folder, cf_file);
    if (!self->instream) {
        ERR_ADD_FRAME(Err_get_error());
        DECREF(self);
        return NULL;
    }

    // Assign.
    self->real_folder = (Folder*)INCREF(folder);

    // Strip directory name from filepaths for old format.
    if (self->format == 1) {
        VArray *files = Hash_Keys(self->records);
        ZombieCharBuf *filename = ZCB_BLANK();
        ZombieCharBuf *folder_name
            = IxFileNames_local_part(Folder_Get_Path(folder), ZCB_BLANK());
        size_t folder_name_len = ZCB_Length(folder_name);

        for (uint32_t i = 0, max = VA_Get_Size(files); i < max; i++) {
            CharBuf *orig = (CharBuf*)VA_Fetch(files, i);
            if (CB_Starts_With(orig, (CharBuf*)folder_name)) {
                Obj *record = Hash_Delete(self->records, (Obj*)orig);
                ZCB_Assign(filename, orig);
                ZCB_Nip(filename, folder_name_len + sizeof(DIR_SEP) - 1);
                Hash_Store(self->records, (Obj*)filename, (Obj*)record);
            }
        }

        DECREF(files);
    }

    return self;
}
Beispiel #11
0
static void
S_init_strings(void) {
    foo           = CB_newf("foo");
    bar           = CB_newf("bar");
    baz           = CB_newf("baz");
    boffo         = CB_newf("boffo");
    banana        = CB_newf("banana");
    foo_bar       = CB_newf("foo/bar");
    foo_bar_baz   = CB_newf("foo/bar/baz");
    foo_bar_boffo = CB_newf("foo/bar/boffo");
    foo_boffo     = CB_newf("foo/boffo");
    foo_foo       = CB_newf("foo/foo");
    nope          = CB_newf("nope");
    nope_nyet     = CB_newf("nope/nyet");
}