Пример #1
0
static void hash_aux(value obj)
{
  unsigned char * p;
  mlsize_t i, j;
  tag_t tag;

  hash_univ_limit--;
  if (hash_univ_count < 0 || hash_univ_limit < 0) return;

 again:
  if (Is_long(obj)) {
    hash_univ_count--;
    Combine(Long_val(obj));
    return;
  }

  /* Pointers into the heap are well-structured blocks. So are atoms.
     We can inspect the block contents. */

  Assert (Is_block (obj));  
  if (Is_in_value_area(obj)) {
    tag = Tag_val(obj);
    switch (tag) {
    case String_tag:
      hash_univ_count--;
      i = caml_string_length(obj);
      for (p = &Byte_u(obj, 0); i > 0; i--, p++)
        Combine_small(*p);
      break;
    case Double_tag:
      /* For doubles, we inspect their binary representation, LSB first.
         The results are consistent among all platforms with IEEE floats. */
      hash_univ_count--;
#ifdef ARCH_BIG_ENDIAN
      for (p = &Byte_u(obj, sizeof(double) - 1), i = sizeof(double);
           i > 0;
           p--, i--)
#else
      for (p = &Byte_u(obj, 0), i = sizeof(double);
           i > 0;
           p++, i--)
#endif
        Combine_small(*p);
      break;
    case Double_array_tag:
      hash_univ_count--;
      for (j = 0; j < Bosize_val(obj); j += sizeof(double)) {
#ifdef ARCH_BIG_ENDIAN
      for (p = &Byte_u(obj, j + sizeof(double) - 1), i = sizeof(double);
           i > 0;
           p--, i--)
#else
      for (p = &Byte_u(obj, j), i = sizeof(double);
           i > 0;
           p++, i--)
#endif
        Combine_small(*p);
      }
      break;
    case Abstract_tag:
      /* We don't know anything about the contents of the block.
         Better do nothing. */
      break;
    case Infix_tag:
      hash_aux(obj - Infix_offset_val(obj));
      break;
    case Forward_tag:
      obj = Forward_val (obj);
      goto again;
    case Object_tag:
      hash_univ_count--;
      Combine(Oid_val(obj));
      break;
    case Custom_tag:
      /* If no hashing function provided, do nothing */
      if (Custom_ops_val(obj)->hash != NULL) {
        hash_univ_count--;
        Combine(Custom_ops_val(obj)->hash(obj));
      }
      break;
    default:
      hash_univ_count--;
      Combine_small(tag);
      i = Wosize_val(obj);
      while (i != 0) {
        i--;
        hash_aux(Field(obj, i));
      }
      break;
    }
    return;
  }

  /* Otherwise, obj is a pointer outside the heap, to an object with
     a priori unknown structure. Use its physical address as hash key. */
  Combine((intnat) obj);
}
Пример #2
0
static void hash_aux(value obj)
{
  unsigned char * p;
  mlsize_t i;
  tag_t tag;

  hash_univ_limit--;
  if (hash_univ_count < 0 || hash_univ_limit < 0) {
	  if (safe) {
		  fatal_error("hash: count limit exceeded\n");
	  } else {
		  return;
	  }
  }

  if (IS_LONG(obj)) {
	  hash_univ_count--;
	  Combine(VAL_TO_LONG(obj));
	  return;
  }

  /* Atoms are not in the heap, but it's better to hash their tag
     than to do nothing. */

  if (Is_atom(obj)) {
    tag = Tag_val(obj);
    hash_univ_count--;
    Combine_small(tag);
    return;
  }

  /* Pointers into the heap are well-structured blocks.
     We can inspect the block contents. */

  if (Is_in_heap(obj) || Is_young(obj)) {
    tag = Tag_val(obj);
    switch (tag) {
    case String_tag:
      hash_univ_count--;
      {
	      mlsize_t len = string_length(obj);
	      i = len <= 128 ? len : 128;
	      // Hash on 128 first characters
	      for (p = &Byte_u(obj, 0); i > 0; i--, p++) {
		      Combine_small(*p);
	      }
	      // Hash on logarithmically many additional characters beyond 128
	      for (i = 1; i+127 < len; i *= 2) {
		      Combine_small(Byte_u(obj, 127+i));
	      }
	      break;
      }
    case Double_tag:
      /* For doubles, we inspect their binary representation, LSB first.
         The results are consistent among all platforms with IEEE floats. */
      hash_univ_count--;
#ifdef WORDS_BIGENDIAN
      for (p = &Byte_u(obj, sizeof(double) - 1), i = sizeof(double);
           i > 0;
           p--, i--)
#else
      for (p = &Byte_u(obj, 0), i = sizeof(double);
           i > 0;
           p++, i--)
#endif
        Combine_small(*p);
      break;
    case Abstract_tag:
    case Final_tag:
      /* We don't know anything about the contents of the block.
         Better do nothing. */
      break;
    case Reference_tag:
      /* We can't hash on the heap address itself, since the reference block
       * may be moved (from the young generation to the old one).
       * But, we may follow the pointer.  On cyclic structures this will
       * terminate because the hash_univ_count gets decremented.
       */

      /* Poor idea to hash on the pointed-to structure, even so: it may change,
       * and hence the hash value of the value changes, although the ref
       * doesn't.
       *
       * This breaks most hash table implementations.  sestoft 2000-02-20.
       */

	    if (safe) {
		    fatal_error("hash: ref encountered\n");
	    }
	    Combine_small(tag);
	    hash_univ_count--;
      break;
    default:
      hash_univ_count--;
      Combine_small(tag);
      i = Wosize_val(obj);
      while (i != 0) {
        i--;
        hash_aux(Field(obj, i));
      }
      break;
    }
    return;
  }

  /* Otherwise, obj is a pointer outside the heap, to an object with
     a priori unknown structure. Use its physical address as hash key. */
  Combine((long) obj);
}