Example #1
0
File: hash.c Project: GJDuck/SMCHR
/*
 * Hash a term.
 */
extern hash_t hash_term(term_t t)
{
    switch (type(t))
    {
        case VAR:
            return hash_var(var(t));
        case NIL:
            return hash_nil();
        case BOOL:
            return hash_bool(boolean(t));
        case NUM:
            return hash_num(num(t));
        case ATOM:
            return hash_atom(atom(t));
        case STR:
            return hash_string(string(t));
        case FOREIGN:
            return hash_foreign(foreign(t));
        case FUNC:
            return hash_func(func(t));
        default:
        {
            hash_t dummy = HASH(0, 0);
            return dummy;
        }
    }
}
static void hash_object(object_t* object, uint32_t* hash) {
    switch (object->type) {
        case type_nil:    *hash = hash_nil(*hash); return;
        case type_bool:   *hash = hash_bool(*hash, object->b); return;
        case type_double: *hash = hash_double(*hash, object->d); return;
        case type_int:    *hash = hash_i64(*hash, object->i); return;
        case type_uint:   *hash = hash_u64(*hash, object->u); return;

        case type_str:
            *hash = hash_str(*hash, object->str, object->l);
            return;

        // unused types in this benchmark
        #if 0
        case type_float:
            write_float(hash, node_float(node));
            return;
        case type_bin:
            *hash = hash_str(*hash, node_data(node), node_data_len(node));
            return;
        case type_ext:
            *hash = hash_u8(*hash, node_exttype(node));
            *hash = hash_str(*hash, node_data(node), node_data_len(node));
            return;
        #endif

        case type_array: {
            uint32_t count = object->l;
            for (uint32_t i = 0; i < count; ++i)
                hash_object(object->children + i, hash);
            *hash = hash_u32(*hash, count);
            return;
        }

        case type_map: {
            uint32_t count = object->l;
            for (uint32_t i = 0; i < count; ++i) {

                // we expect keys to be short strings
                object_t* key = object->children + (i * 2);
                *hash = hash_str(*hash, key->str, key->l);

                hash_object(object->children + (i * 2) + 1, hash);
            }
            *hash = hash_u32(*hash, count);
            return;
        }

        default:
            break;
    }

    abort();
}
static bool hash_json(json_value* value, uint32_t* hash) {
    switch (value->type) {
        case json_null:    *hash = hash_nil(*hash); return true;
        case json_boolean: *hash = hash_bool(*hash, value->u.boolean ? true : false); return true;
        case json_double:  *hash = hash_double(*hash, value->u.dbl); return true;
        case json_string:  *hash = hash_str(*hash, value->u.string.ptr, value->u.string.length); return true;

        case json_integer:
            // json-parser does not support JSON big integers at all.
            // Judging from the code, it looks like it just overflows and
            // gives garbage with no error if an integer is outside the
            // range of int64_t.
            *hash = hash_i64(*hash, value->u.integer);
            return true;

        case json_array: {
            for (unsigned int i = 0; i < value->u.array.length; ++i)
                if (!hash_json(value->u.array.values[i], hash))
                    return false;
            *hash = hash_u32(*hash, value->u.array.length);
            return true;
        }

        case json_object: {
            for (unsigned int i = 0; i < value->u.object.length; ++i) {
                json_object_entry* entry = &value->u.object.values[i];
                *hash = hash_str(*hash, entry->name, entry->name_length);
                if (!hash_json(entry->value, hash))
                    return false;
            }
            *hash = hash_u32(*hash, value->u.object.length);
            return true;
        }

        default:
            break;
    }

    return false;
}
static bool hash_element(cmp_ctx_t* cmp, uint32_t* hash) {
    buffer_t* buffer = (buffer_t*)cmp->buf;

    cmp_object_t object;
    if (!cmp_read_object(cmp, &object))
        return false;

    // note: we fetch values out of the cmp_object_t directly rather
    // than going through the cmp_object_is/as* functions. it's much
    // faster this way.

    switch (object.type) {
        case CMP_TYPE_NIL: *hash = hash_nil(*hash); return true;
        case CMP_TYPE_BOOLEAN: *hash = hash_bool(*hash, object.as.boolean); return true;
        case CMP_TYPE_DOUBLE: *hash = hash_double(*hash, object.as.dbl); return true;

        // note: all ints are hashed as 64-bit (not all libraries read different sized types)

        case CMP_TYPE_POSITIVE_FIXNUM: *hash = hash_u64(*hash, object.as.u8); return true;
        case CMP_TYPE_UINT8: *hash = hash_u64(*hash, object.as.u8); return true;
        case CMP_TYPE_UINT16: *hash = hash_u64(*hash, object.as.u16); return true;
        case CMP_TYPE_UINT32: *hash = hash_u64(*hash, object.as.u32); return true;
        case CMP_TYPE_UINT64: *hash = hash_u64(*hash, object.as.u64); return true;

        case CMP_TYPE_NEGATIVE_FIXNUM: *hash = hash_i64(*hash, object.as.s8); return true;
        case CMP_TYPE_SINT8: *hash = hash_i64(*hash, object.as.s8); return true;
        case CMP_TYPE_SINT16: *hash = hash_i64(*hash, object.as.s16); return true;
        case CMP_TYPE_SINT32: *hash = hash_i64(*hash, object.as.s32); return true;
        case CMP_TYPE_SINT64: *hash = hash_i64(*hash, object.as.s64); return true;

        case CMP_TYPE_FIXSTR:
        case CMP_TYPE_STR8:
        case CMP_TYPE_STR16:
        case CMP_TYPE_STR32:
        {
            uint32_t len = object.as.str_size;
            if (buffer->left < len)
                return false;
            *hash = hash_str(*hash, buffer->data, len);
            buffer->data += len;
            buffer->left -= len;
            return true;
        }

        case CMP_TYPE_FIXARRAY:
        case CMP_TYPE_ARRAY16:
        case CMP_TYPE_ARRAY32:
            for (size_t i = 0; i < object.as.array_size; ++i){
                if (!hash_element(cmp, hash))
                    return false;
            }
            *hash = hash_u32(*hash, object.as.array_size);
            return true;

        case CMP_TYPE_FIXMAP:
        case CMP_TYPE_MAP16:
        case CMP_TYPE_MAP32:
        {
            for (size_t i = 0; i < object.as.map_size; ++i) {

                // we expect keys to be short strings
                char buf[16];
                uint32_t size = sizeof(buf);
                if (!cmp_read_str(cmp, buf, &size))
                    return false;
                *hash = hash_str(*hash, buf, size);

                if (!hash_element(cmp, hash))
                    return false;
            }
            *hash = hash_u32(*hash, object.as.map_size);
            return true;
        }

        default:
            break;
    }

    return false;
}
static void hash_value(void* data, UBJ_TYPE type, size_t index, uint32_t* hash) {
    switch (type) {
        case UBJ_MIXED: {
            ubjr_dynamic_t* dynamic = &((ubjr_dynamic_t*)data)[index];
            switch (dynamic->type) {
                case UBJ_INT8:
                case UBJ_UINT8:
                case UBJ_INT16:
                case UBJ_INT32:
                case UBJ_INT64:
                    // the union in a dynamic doesn't actually contain these types, so
                    // we can't just cast it and recurse like the others here. if an
                    // integer is read in a dynamic, the int64_t integer is set to the
                    // value read in priv_ubjr_pointer_to_dynamic(), but integers can
                    // be smaller sizes in fixed-type arrays and objects.
                    *hash = hash_i64(*hash, dynamic->integer);
                    return;

                case UBJ_FLOAT64: hash_value(&dynamic->real, dynamic->type, 0, hash); return;
                case UBJ_STRING:  hash_value(&dynamic->string, dynamic->type, 0, hash); return;
                case UBJ_ARRAY:   hash_value(&dynamic->container_array, dynamic->type, 0, hash); return;
                case UBJ_OBJECT:  hash_value(&dynamic->container_object, dynamic->type, 0, hash); return;

                default: break;
            }
            hash_value(NULL, dynamic->type, 0, hash);
            return;
        }

        case UBJ_NULLTYPE: *hash = hash_nil(*hash); return;
        case UBJ_BOOL_TRUE: *hash = hash_bool(*hash, true); return;
        case UBJ_BOOL_FALSE: *hash = hash_bool(*hash, false); return;
        case UBJ_FLOAT64: *hash = hash_double(*hash, ((double*)data)[index]); return;

        // we assume that the type here comes from a fixed-size array, not from
        // a dynamic (which should have been handled above.)
        case UBJ_INT8:  *hash = hash_i64(*hash, (( int8_t*)data)[index]); return;
        case UBJ_UINT8: *hash = hash_i64(*hash, ((uint8_t*)data)[index]); return;
        case UBJ_INT16: *hash = hash_i64(*hash, ((int16_t*)data)[index]); return;
        case UBJ_INT32: *hash = hash_i64(*hash, ((int32_t*)data)[index]); return;
        case UBJ_INT64: *hash = hash_i64(*hash, ((int64_t*)data)[index]); return;

        case UBJ_STRING:
        case UBJ_CHAR: {
            ubjr_string_t str = ((ubjr_string_t*)data)[index];
            *hash = hash_str(*hash, str, strlen(str));
            return;
        }

        case UBJ_ARRAY: {
            ubjr_array_t* array = &((ubjr_array_t*)data)[index];
            for (size_t i = 0; i < array->size; ++i)
                hash_value(array->values, array->type, i, hash);
            *hash = hash_u32(*hash, array->size);
            return;
        }

        case UBJ_OBJECT:
        {
            ubjr_object_t* object = &((ubjr_object_t*)data)[index];
            for (size_t i = 0; i < object->size; ++i) {
                *hash = hash_str(*hash, object->keys[i], strlen(object->keys[i]));
                hash_value(object->values, object->type, i, hash);
            }
            *hash = hash_u32(*hash, object->size);
            return;
        }

        default:
            break;
    }
}
static int parse_null(void* ctx) {
    parser_t* parser = (parser_t*)ctx;
    ++parser->children[parser->depth];
    parser->hash = hash_nil(parser->hash);
    return 1;
}