static void hash_aux(value obj) { unsigned char * p; mlsize_t i, j; tag_t tag; hash_univ_limit--; if (hash_univ_count < 0 || hash_univ_limit < 0) return; again: if (Is_long(obj)) { hash_univ_count--; Combine(Long_val(obj)); return; } /* Pointers into the heap are well-structured blocks. So are atoms. We can inspect the block contents. */ Assert (Is_block (obj)); if (Is_in_value_area(obj)) { tag = Tag_val(obj); switch (tag) { case String_tag: hash_univ_count--; i = caml_string_length(obj); for (p = &Byte_u(obj, 0); i > 0; i--, p++) Combine_small(*p); break; case Double_tag: /* For doubles, we inspect their binary representation, LSB first. The results are consistent among all platforms with IEEE floats. */ hash_univ_count--; #ifdef ARCH_BIG_ENDIAN for (p = &Byte_u(obj, sizeof(double) - 1), i = sizeof(double); i > 0; p--, i--) #else for (p = &Byte_u(obj, 0), i = sizeof(double); i > 0; p++, i--) #endif Combine_small(*p); break; case Double_array_tag: hash_univ_count--; for (j = 0; j < Bosize_val(obj); j += sizeof(double)) { #ifdef ARCH_BIG_ENDIAN for (p = &Byte_u(obj, j + sizeof(double) - 1), i = sizeof(double); i > 0; p--, i--) #else for (p = &Byte_u(obj, j), i = sizeof(double); i > 0; p++, i--) #endif Combine_small(*p); } break; case Abstract_tag: /* We don't know anything about the contents of the block. Better do nothing. */ break; case Infix_tag: hash_aux(obj - Infix_offset_val(obj)); break; case Forward_tag: obj = Forward_val (obj); goto again; case Object_tag: hash_univ_count--; Combine(Oid_val(obj)); break; case Custom_tag: /* If no hashing function provided, do nothing */ if (Custom_ops_val(obj)->hash != NULL) { hash_univ_count--; Combine(Custom_ops_val(obj)->hash(obj)); } break; default: hash_univ_count--; Combine_small(tag); i = Wosize_val(obj); while (i != 0) { i--; hash_aux(Field(obj, i)); } break; } return; } /* Otherwise, obj is a pointer outside the heap, to an object with a priori unknown structure. Use its physical address as hash key. */ Combine((intnat) obj); }
static void hash_aux(value obj) { unsigned char * p; mlsize_t i; tag_t tag; hash_univ_limit--; if (hash_univ_count < 0 || hash_univ_limit < 0) { if (safe) { fatal_error("hash: count limit exceeded\n"); } else { return; } } if (IS_LONG(obj)) { hash_univ_count--; Combine(VAL_TO_LONG(obj)); return; } /* Atoms are not in the heap, but it's better to hash their tag than to do nothing. */ if (Is_atom(obj)) { tag = Tag_val(obj); hash_univ_count--; Combine_small(tag); return; } /* Pointers into the heap are well-structured blocks. We can inspect the block contents. */ if (Is_in_heap(obj) || Is_young(obj)) { tag = Tag_val(obj); switch (tag) { case String_tag: hash_univ_count--; { mlsize_t len = string_length(obj); i = len <= 128 ? len : 128; // Hash on 128 first characters for (p = &Byte_u(obj, 0); i > 0; i--, p++) { Combine_small(*p); } // Hash on logarithmically many additional characters beyond 128 for (i = 1; i+127 < len; i *= 2) { Combine_small(Byte_u(obj, 127+i)); } break; } case Double_tag: /* For doubles, we inspect their binary representation, LSB first. The results are consistent among all platforms with IEEE floats. */ hash_univ_count--; #ifdef WORDS_BIGENDIAN for (p = &Byte_u(obj, sizeof(double) - 1), i = sizeof(double); i > 0; p--, i--) #else for (p = &Byte_u(obj, 0), i = sizeof(double); i > 0; p++, i--) #endif Combine_small(*p); break; case Abstract_tag: case Final_tag: /* We don't know anything about the contents of the block. Better do nothing. */ break; case Reference_tag: /* We can't hash on the heap address itself, since the reference block * may be moved (from the young generation to the old one). * But, we may follow the pointer. On cyclic structures this will * terminate because the hash_univ_count gets decremented. */ /* Poor idea to hash on the pointed-to structure, even so: it may change, * and hence the hash value of the value changes, although the ref * doesn't. * * This breaks most hash table implementations. sestoft 2000-02-20. */ if (safe) { fatal_error("hash: ref encountered\n"); } Combine_small(tag); hash_univ_count--; break; default: hash_univ_count--; Combine_small(tag); i = Wosize_val(obj); while (i != 0) { i--; hash_aux(Field(obj, i)); } break; } return; } /* Otherwise, obj is a pointer outside the heap, to an object with a priori unknown structure. Use its physical address as hash key. */ Combine((long) obj); }