//  Insert  Ref  with hash key  Key  into global  Hash_Table .
//  Ref  represents string  S .
static
void
Hash_Insert(String_Ref_t Ref, uint64 Key, char * S) {
  String_Ref_t  H_Ref;
  char  * T;
  int  Shift;
  unsigned char  Key_Check;
  int64  Ct, Probe, Sub;
  int  i;

  Sub = HASH_FUNCTION (Key);
  Shift = HASH_CHECK_FUNCTION (Key);
  Hash_Check_Array [Sub] |= (((Check_Vector_t) 1) << Shift);
  Key_Check = KEY_CHECK_FUNCTION (Key);
  Probe = PROBE_FUNCTION (Key);

  Ct = 0;
  do {
    for  (i = 0;  i < Hash_Table [Sub] . Entry_Ct;  i ++)
      if  (Hash_Table [Sub] . Check [i] == Key_Check) {
        H_Ref = Hash_Table [Sub] . Entry [i];
        T = Data + String_Start [getStringRefStringNum(H_Ref)] + getStringRefOffset(H_Ref);
        if  (strncmp (S, T, Kmer_Len) == 0) {
          if  (getStringRefLast(H_Ref)) {
            Extra_Ref_Ct ++;
          }
          Next_Ref [(String_Start [getStringRefStringNum(Ref)] + getStringRefOffset(Ref)) / (HASH_KMER_SKIP + 1)] = H_Ref;
          Extra_Ref_Ct ++;
          setStringRefLast(Ref, TRUELY_ZERO);
          Hash_Table [Sub] . Entry [i] = Ref;

          if  (Hash_Table [Sub] . Hits [i] < HIGHEST_KMER_LIMIT)
            Hash_Table [Sub] . Hits [i] ++;

          return;
        }
      }
    if  (i != Hash_Table [Sub] . Entry_Ct) {
      fprintf (stderr, "i = %d  Sub = " F_S64 "  Entry_Ct = %d\n",
               i, Sub, Hash_Table [Sub] . Entry_Ct);
    }
    assert (i == Hash_Table [Sub] . Entry_Ct);
    if  (Hash_Table [Sub] . Entry_Ct < ENTRIES_PER_BUCKET) {
      setStringRefLast(Ref, TRUELY_ONE);
      Hash_Table [Sub] . Entry [i] = Ref;
      Hash_Table [Sub] . Check [i] = Key_Check;
      Hash_Table [Sub] . Entry_Ct ++;
      Hash_Entries ++;
      Hash_Table [Sub] . Hits [i] = 1;
      return;
    }
    Sub = (Sub + Probe) % HASH_TABLE_SIZE;
  }  while  (++ Ct < HASH_TABLE_SIZE);

  fprintf (stderr, "ERROR:  Hash table full\n");
  assert (FALSE);
}
//  Add string  s  as an extra hash table string and return
//  a single reference to the beginning of it.
static
String_Ref_t
Add_Extra_Hash_String(const char * s) {
  String_Ref_t  ref = 0;
  String_Ref_t  sub = 0;
  size_t  new_len;
  int  len;
   
  new_len = Used_Data_Len + Kmer_Len;
  if  (Extra_String_Subcount < MAX_EXTRA_SUBCOUNT) {
    sub = String_Ct + Extra_String_Ct - 1;
  } else {
    sub = String_Ct + Extra_String_Ct;
    if  (sub >= String_Start_Size) {
      String_Start_Size *= MEMORY_EXPANSION_FACTOR;
      if  (sub >= String_Start_Size)
        String_Start_Size = sub;
      String_Start = (int64 *) safe_realloc (String_Start,
                                             String_Start_Size * sizeof (int64));
    }
    String_Start [sub] = Used_Data_Len;
    Extra_String_Ct ++;
    Extra_String_Subcount = 0;
    new_len ++;
  }

  if  (new_len >= Extra_Data_Len) {
    Extra_Data_Len = (size_t) (Extra_Data_Len * MEMORY_EXPANSION_FACTOR);
    if  (new_len > Extra_Data_Len)
      Extra_Data_Len = new_len;
    Data = (char *) safe_realloc (Data, Extra_Data_Len);
  }
  strncpy (Data + String_Start [sub] + Kmer_Len * Extra_String_Subcount,
           s, Kmer_Len + 1);
  Used_Data_Len = new_len;

  setStringRefStringNum(ref, sub);
  if  (sub > MAX_STRING_NUM) {
    fprintf (stderr, "Too many skip kmer strings for hash table.\n"
             "Try skipping hopeless check (-z option)\n"
             "Exiting\n");
    exit (1);
  }
  setStringRefOffset(ref, (String_Ref_t)Extra_String_Subcount * (String_Ref_t)Kmer_Len);

  assert(Extra_String_Subcount * Kmer_Len < OFFSET_MASK);

  setStringRefLast(ref, TRUELY_ONE);
  setStringRefEmpty(ref, TRUELY_ONE);

  Extra_String_Subcount ++;

  return  ref;
}
Пример #3
0
//  Add string  s  as an extra hash table string and return
//  a single reference to the beginning of it.
static
String_Ref_t
Add_Extra_Hash_String(const char *s) {
  String_Ref_t  ref = 0;
  String_Ref_t  sub = 0;

  int  len;

  uint32 new_len = Used_Data_Len + G.Kmer_Len;

  if (Extra_String_Subcount < MAX_EXTRA_SUBCOUNT) {
    sub = String_Ct + Extra_String_Ct - 1;

  } else {
    sub = String_Ct + Extra_String_Ct;

    if (sub >= String_Start_Size) {
      uint64  n = max(sub * 1.1, String_Start_Size * 1.5);

      //fprintf(stderr, "REALLOC String_Start from "F_U64" to "F_U64"\n", String_Start_Size, n);
      resizeArray(String_Start, String_Start_Size, String_Start_Size, n);
    }

    String_Start[sub] = Used_Data_Len;

    Extra_String_Ct++;
    Extra_String_Subcount = 0;
    new_len++;
  }

  if (new_len >= Extra_Data_Len) {
    uint64  n = max(new_len * 1.1, Extra_Data_Len * 1.5);

    //fprintf(stderr, "REALLOC basesData from "F_U64" to "F_U64"\n", Extra_Data_Len, n);
    resizeArray(basesData, Extra_Data_Len, Extra_Data_Len, n);
  }

  strncpy(basesData + String_Start[sub] + G.Kmer_Len * Extra_String_Subcount, s, G.Kmer_Len + 1);

  Used_Data_Len = new_len;

  setStringRefStringNum(ref, sub);

  if (sub > MAX_STRING_NUM) {
    fprintf(stderr, "Too many skip kmer strings for hash table.\n");
    fprintf(stderr, "Try skipping hopeless check (-z option)\n");
    fprintf(stderr, "Exiting\n");
    exit (1);
  }

  setStringRefOffset(ref, (String_Ref_t)Extra_String_Subcount * (String_Ref_t)G.Kmer_Len);

  assert(Extra_String_Subcount * G.Kmer_Len < OFFSET_MASK);

  setStringRefLast(ref,  (uint64)1);
  setStringRefEmpty(ref, TRUELY_ONE);

  Extra_String_Subcount++;

  return(ref);
}