i32_t SortCache_ordinal(SortCache *self, i32_t doc_id) { if (doc_id > self->doc_max) { THROW("Out of range: %i32 > %i32", doc_id, self->doc_max); } switch (self->width) { case 1: return IntArr_u1get(self->ords, doc_id); case 2: return IntArr_u2get(self->ords, doc_id); case 4: return IntArr_u4get(self->ords, doc_id); case 8: { u8_t *ints = (u8_t*)self->ords; return ints[doc_id]; } case 16: { u16_t *ints = (u16_t*)self->ords; return ints[doc_id]; } case 32: { u32_t *ints = (u32_t*)self->ords; return ints[doc_id]; } default: UNREACHABLE_RETURN(i32_t); } }
size_t StrIter_Recede_IMP(StringIterator *self, size_t num) { size_t num_skipped = 0; size_t byte_offset = self->byte_offset; const uint8_t *const ptr = (const uint8_t*)self->string->ptr; while (num_skipped < num) { if (byte_offset == 0) { break; } uint8_t byte; do { if (byte_offset == 0) { THROW(ERR, "StrIter_Recede: Invalid UTF-8"); UNREACHABLE_RETURN(size_t); } byte = ptr[--byte_offset]; } while ((byte & 0xC0) == 0x80); ++num_skipped; } self->byte_offset = byte_offset; return num_skipped; }
int32_t StrIter_Next_IMP(StringIterator *self) { String *string = self->string; size_t byte_offset = self->byte_offset; size_t size = string->size; if (byte_offset >= size) { return STR_OOB; } const uint8_t *const ptr = (const uint8_t*)string->ptr; int32_t retval = ptr[byte_offset++]; if (retval >= 0x80) { /* * The 'mask' bit is tricky. In each iteration, 'retval' is * left-shifted by 6 and 'mask' by 5 bits. So relative to the first * byte of the sequence, 'mask' moves one bit to the right. * * The possible outcomes after the loop are: * * Two byte sequence * retval: 110aaaaa bbbbbb * mask: 00100000 000000 * * Three byte sequence * retval: 1110aaaa bbbbbb cccccc * mask: 00010000 000000 000000 * * Four byte sequence * retval: 11110aaa bbbbbb cccccc dddddd * mask: 00001000 000000 000000 000000 * * This also illustrates why the exit condition (retval & mask) * works. After the first iteration, the third most significant bit * is tested. After the second iteration, the fourth, and so on. */ int32_t mask = 1 << 6; do { if (byte_offset >= size) { THROW(ERR, "StrIter_Next: Invalid UTF-8"); UNREACHABLE_RETURN(int32_t); } retval = (retval << 6) | (ptr[byte_offset++] & 0x3F); mask <<= 5; } while (retval & mask); retval &= mask - 1; } self->byte_offset = byte_offset; return retval; }
int32_t StrIter_Compare_To_IMP(StringIterator *self, Obj *other) { StringIterator *twin = (StringIterator*)CERTIFY(other, STRINGITERATOR); if (self->string != twin->string) { THROW(ERR, "Can't compare iterators of different strings"); UNREACHABLE_RETURN(int32_t); } if (self->byte_offset < twin->byte_offset) { return -1; } if (self->byte_offset > twin->byte_offset) { return 1; } return 0; }
int32_t StrIter_Prev_IMP(StringIterator *self) { size_t byte_offset = self->byte_offset; if (byte_offset == 0) { return STR_OOB; } const uint8_t *const ptr = (const uint8_t*)self->string->ptr; int32_t retval = ptr[--byte_offset]; if (retval >= 0x80) { // Construct the result from right to left. if (byte_offset == 0) { THROW(ERR, "StrIter_Prev: Invalid UTF-8"); UNREACHABLE_RETURN(int32_t); } retval &= 0x3F; int shift = 6; int32_t first_byte_mask = 0x1F; int32_t byte = ptr[--byte_offset]; while ((byte & 0xC0) == 0x80) { if (byte_offset == 0) { THROW(ERR, "StrIter_Prev: Invalid UTF-8"); UNREACHABLE_RETURN(int32_t); } retval |= (byte & 0x3F) << shift; shift += 6; first_byte_mask >>= 1; byte = ptr[--byte_offset]; } retval |= (byte & first_byte_mask) << shift; } self->byte_offset = byte_offset; return retval; }
int32_t Float_Compare_To_IMP(Float *self, Obj *other) { if (Obj_is_a(other, FLOAT)) { Float *twin = (Float*)other; return S_compare_f64(self->value, twin->value); } else if (Obj_is_a(other, INTEGER)) { Integer *twin = (Integer*)other; return -S_compare_i64_f64(twin->value, self->value); } else { THROW(ERR, "Can't compare Float to %o", Obj_get_class_name(other)); UNREACHABLE_RETURN(int32_t); } }
static int8_t S_derive_action(SortRule *rule, SortCache *cache) { int32_t rule_type = SortRule_Get_Type(rule); bool reverse = !!SortRule_Get_Reverse(rule); if (rule_type == SortRule_SCORE) { return COMPARE_BY_SCORE + reverse; } else if (rule_type == SortRule_DOC_ID) { return COMPARE_BY_DOC_ID + reverse; } else if (rule_type == SortRule_FIELD) { if (cache) { int8_t width = SortCache_Get_Ord_Width(cache); switch (width) { case 1: return COMPARE_BY_ORD1 + reverse; case 2: return COMPARE_BY_ORD2 + reverse; case 4: return COMPARE_BY_ORD4 + reverse; case 8: return COMPARE_BY_ORD8 + reverse; case 16: if (SortCache_Get_Native_Ords(cache)) { return COMPARE_BY_NATIVE_ORD16 + reverse; } else { return COMPARE_BY_ORD16 + reverse; } case 32: if (SortCache_Get_Native_Ords(cache)) { return COMPARE_BY_NATIVE_ORD32 + reverse; } else { return COMPARE_BY_ORD32 + reverse; } default: THROW(ERR, "Unknown width: %i8", width); } } else { return AUTO_TIE; } } else { THROW(ERR, "Unrecognized SortRule type %i32", rule_type); } UNREACHABLE_RETURN(int8_t); }
uint32_t S_find_in_array(Vector *array, Obj *obj) { for (uint32_t i = 0, max = Vec_Get_Size(array); i < max; i++) { Obj *candidate = Vec_Fetch(array, i); if (obj == NULL && candidate == NULL) { return i; } else if (obj != NULL && candidate != NULL) { if (Obj_get_class(obj) == Obj_get_class(candidate)) { if (Obj_Equals(obj, candidate)) { return i; } } } } THROW(ERR, "Couldn't find match for %o", obj); UNREACHABLE_RETURN(uint32_t); }
int32_t SortCache_Ordinal_IMP(SortCache *self, int32_t doc_id) { SortCacheIVARS *const ivars = SortCache_IVARS(self); if (doc_id > ivars->doc_max || doc_id < 0) { THROW(ERR, "Out of range: %i32 max: %i32", doc_id, ivars->doc_max); } switch (ivars->ord_width) { case 1: return NumUtil_u1get(ivars->ords, (uint32_t)doc_id); case 2: return NumUtil_u2get(ivars->ords, (uint32_t)doc_id); case 4: return NumUtil_u4get(ivars->ords, (uint32_t)doc_id); case 8: { uint8_t *ints = (uint8_t*)ivars->ords; return ints[doc_id]; } case 16: if (ivars->native_ords) { uint16_t *ints = (uint16_t*)ivars->ords; return ints[doc_id]; } else { uint8_t *bytes = (uint8_t*)ivars->ords; bytes += (size_t)doc_id * sizeof(uint16_t); return NumUtil_decode_bigend_u16(bytes); } case 32: if (ivars->native_ords) { int32_t *ints = (int32_t*)ivars->ords; return ints[doc_id]; } else { uint8_t *bytes = (uint8_t*)ivars->ords; bytes += (size_t)doc_id * sizeof(int32_t); return (int32_t)NumUtil_decode_bigend_u32(bytes); } default: { THROW(ERR, "Invalid ord width: %i32", ivars->ord_width); UNREACHABLE_RETURN(int32_t); } } }
size_t StrIter_Advance_IMP(StringIterator *self, size_t num) { size_t num_skipped = 0; size_t byte_offset = self->byte_offset; size_t size = self->string->size; const uint8_t *const ptr = (const uint8_t*)self->string->ptr; while (num_skipped < num) { if (byte_offset >= size) { break; } uint8_t first_byte = ptr[byte_offset]; byte_offset += StrHelp_UTF8_COUNT[first_byte]; ++num_skipped; } if (byte_offset > size) { THROW(ERR, "StrIter_Advance: Invalid UTF-8"); UNREACHABLE_RETURN(size_t); } self->byte_offset = byte_offset; return num_skipped; }
uint32_t cfish_dec_refcount(void *vself) { THROW(CFISH_ERR, "TODO"); UNREACHABLE_RETURN(uint32_t); }
uint32_t lucy_Obj_dec_refcount(lucy_Obj *self) { THROW(LUCY_ERR, "TODO"); UNREACHABLE_RETURN(uint32_t); }