// Insert Ref with hash key Key into global Hash_Table . // Ref represents string S . static void Hash_Insert(String_Ref_t Ref, uint64 Key, char * S) { String_Ref_t H_Ref; char * T; int Shift; unsigned char Key_Check; int64 Ct, Probe, Sub; int i; Sub = HASH_FUNCTION (Key); Shift = HASH_CHECK_FUNCTION (Key); Hash_Check_Array [Sub] |= (((Check_Vector_t) 1) << Shift); Key_Check = KEY_CHECK_FUNCTION (Key); Probe = PROBE_FUNCTION (Key); Ct = 0; do { for (i = 0; i < Hash_Table [Sub] . Entry_Ct; i ++) if (Hash_Table [Sub] . Check [i] == Key_Check) { H_Ref = Hash_Table [Sub] . Entry [i]; T = Data + String_Start [getStringRefStringNum(H_Ref)] + getStringRefOffset(H_Ref); if (strncmp (S, T, Kmer_Len) == 0) { if (getStringRefLast(H_Ref)) { Extra_Ref_Ct ++; } Next_Ref [(String_Start [getStringRefStringNum(Ref)] + getStringRefOffset(Ref)) / (HASH_KMER_SKIP + 1)] = H_Ref; Extra_Ref_Ct ++; setStringRefLast(Ref, TRUELY_ZERO); Hash_Table [Sub] . Entry [i] = Ref; if (Hash_Table [Sub] . Hits [i] < HIGHEST_KMER_LIMIT) Hash_Table [Sub] . Hits [i] ++; return; } } if (i != Hash_Table [Sub] . Entry_Ct) { fprintf (stderr, "i = %d Sub = " F_S64 " Entry_Ct = %d\n", i, Sub, Hash_Table [Sub] . Entry_Ct); } assert (i == Hash_Table [Sub] . Entry_Ct); if (Hash_Table [Sub] . Entry_Ct < ENTRIES_PER_BUCKET) { setStringRefLast(Ref, TRUELY_ONE); Hash_Table [Sub] . Entry [i] = Ref; Hash_Table [Sub] . Check [i] = Key_Check; Hash_Table [Sub] . Entry_Ct ++; Hash_Entries ++; Hash_Table [Sub] . Hits [i] = 1; return; } Sub = (Sub + Probe) % HASH_TABLE_SIZE; } while (++ Ct < HASH_TABLE_SIZE); fprintf (stderr, "ERROR: Hash table full\n"); assert (FALSE); }
// Add string s as an extra hash table string and return // a single reference to the beginning of it. static String_Ref_t Add_Extra_Hash_String(const char * s) { String_Ref_t ref = 0; String_Ref_t sub = 0; size_t new_len; int len; new_len = Used_Data_Len + Kmer_Len; if (Extra_String_Subcount < MAX_EXTRA_SUBCOUNT) { sub = String_Ct + Extra_String_Ct - 1; } else { sub = String_Ct + Extra_String_Ct; if (sub >= String_Start_Size) { String_Start_Size *= MEMORY_EXPANSION_FACTOR; if (sub >= String_Start_Size) String_Start_Size = sub; String_Start = (int64 *) safe_realloc (String_Start, String_Start_Size * sizeof (int64)); } String_Start [sub] = Used_Data_Len; Extra_String_Ct ++; Extra_String_Subcount = 0; new_len ++; } if (new_len >= Extra_Data_Len) { Extra_Data_Len = (size_t) (Extra_Data_Len * MEMORY_EXPANSION_FACTOR); if (new_len > Extra_Data_Len) Extra_Data_Len = new_len; Data = (char *) safe_realloc (Data, Extra_Data_Len); } strncpy (Data + String_Start [sub] + Kmer_Len * Extra_String_Subcount, s, Kmer_Len + 1); Used_Data_Len = new_len; setStringRefStringNum(ref, sub); if (sub > MAX_STRING_NUM) { fprintf (stderr, "Too many skip kmer strings for hash table.\n" "Try skipping hopeless check (-z option)\n" "Exiting\n"); exit (1); } setStringRefOffset(ref, (String_Ref_t)Extra_String_Subcount * (String_Ref_t)Kmer_Len); assert(Extra_String_Subcount * Kmer_Len < OFFSET_MASK); setStringRefLast(ref, TRUELY_ONE); setStringRefEmpty(ref, TRUELY_ONE); Extra_String_Subcount ++; return ref; }
// Add string s as an extra hash table string and return // a single reference to the beginning of it. static String_Ref_t Add_Extra_Hash_String(const char *s) { String_Ref_t ref = 0; String_Ref_t sub = 0; int len; uint32 new_len = Used_Data_Len + G.Kmer_Len; if (Extra_String_Subcount < MAX_EXTRA_SUBCOUNT) { sub = String_Ct + Extra_String_Ct - 1; } else { sub = String_Ct + Extra_String_Ct; if (sub >= String_Start_Size) { uint64 n = max(sub * 1.1, String_Start_Size * 1.5); //fprintf(stderr, "REALLOC String_Start from "F_U64" to "F_U64"\n", String_Start_Size, n); resizeArray(String_Start, String_Start_Size, String_Start_Size, n); } String_Start[sub] = Used_Data_Len; Extra_String_Ct++; Extra_String_Subcount = 0; new_len++; } if (new_len >= Extra_Data_Len) { uint64 n = max(new_len * 1.1, Extra_Data_Len * 1.5); //fprintf(stderr, "REALLOC basesData from "F_U64" to "F_U64"\n", Extra_Data_Len, n); resizeArray(basesData, Extra_Data_Len, Extra_Data_Len, n); } strncpy(basesData + String_Start[sub] + G.Kmer_Len * Extra_String_Subcount, s, G.Kmer_Len + 1); Used_Data_Len = new_len; setStringRefStringNum(ref, sub); if (sub > MAX_STRING_NUM) { fprintf(stderr, "Too many skip kmer strings for hash table.\n"); fprintf(stderr, "Try skipping hopeless check (-z option)\n"); fprintf(stderr, "Exiting\n"); exit (1); } setStringRefOffset(ref, (String_Ref_t)Extra_String_Subcount * (String_Ref_t)G.Kmer_Len); assert(Extra_String_Subcount * G.Kmer_Len < OFFSET_MASK); setStringRefLast(ref, (uint64)1); setStringRefEmpty(ref, TRUELY_ONE); Extra_String_Subcount++; return(ref); }