// Insert string subscript i into the global hash table. // Sequence and information about the string are in // global variables Data, String_Start, String_Info, .... static void Put_String_In_Hash(int i) { String_Ref_t ref = 0; char * p, * window; int kmers_inserted = 0; int skip_ct; uint64 key, key_is_bad; int j; if (String_Info [i] . length < Kmer_Len) return; p = window = Data + String_Start [i]; key = key_is_bad = 0; for (j = 0; j < Kmer_Len; j ++) { key_is_bad |= (uint64) (Char_Is_Bad [(int) * p]) << j; key |= (uint64) (Bit_Equivalent [(int) * (p ++)]) << (2 * j); } setStringRefStringNum(ref, i); if (i > MAX_STRING_NUM) { fprintf (stderr, "Too many strings for hash table--exiting\n"); exit (1); } setStringRefOffset(ref, TRUELY_ZERO); skip_ct = 0; setStringRefEmpty(ref, TRUELY_ZERO); if (key_is_bad == false) { Hash_Insert (ref, key, window); kmers_inserted ++; } while ((* p) != '\0') { window ++; { String_Ref_t newoff = getStringRefOffset(ref) + 1; assert(newoff < OFFSET_MASK); setStringRefOffset(ref, newoff); } if (++ skip_ct > HASH_KMER_SKIP) skip_ct = 0; key_is_bad >>= 1; key_is_bad |= (uint64) (Char_Is_Bad [(int) * p]) << (Kmer_Len - 1); key >>= 2; key |= (uint64) (Bit_Equivalent [(int) * (p ++)]) << (2 * (Kmer_Len - 1)); if (skip_ct == 0 && ! key_is_bad) { Hash_Insert (ref, key, window); kmers_inserted ++; } } }
// Add string s as an extra hash table string and return // a single reference to the beginning of it. static String_Ref_t Add_Extra_Hash_String(const char * s) { String_Ref_t ref = 0; String_Ref_t sub = 0; size_t new_len; int len; new_len = Used_Data_Len + Kmer_Len; if (Extra_String_Subcount < MAX_EXTRA_SUBCOUNT) { sub = String_Ct + Extra_String_Ct - 1; } else { sub = String_Ct + Extra_String_Ct; if (sub >= String_Start_Size) { String_Start_Size *= MEMORY_EXPANSION_FACTOR; if (sub >= String_Start_Size) String_Start_Size = sub; String_Start = (int64 *) safe_realloc (String_Start, String_Start_Size * sizeof (int64)); } String_Start [sub] = Used_Data_Len; Extra_String_Ct ++; Extra_String_Subcount = 0; new_len ++; } if (new_len >= Extra_Data_Len) { Extra_Data_Len = (size_t) (Extra_Data_Len * MEMORY_EXPANSION_FACTOR); if (new_len > Extra_Data_Len) Extra_Data_Len = new_len; Data = (char *) safe_realloc (Data, Extra_Data_Len); } strncpy (Data + String_Start [sub] + Kmer_Len * Extra_String_Subcount, s, Kmer_Len + 1); Used_Data_Len = new_len; setStringRefStringNum(ref, sub); if (sub > MAX_STRING_NUM) { fprintf (stderr, "Too many skip kmer strings for hash table.\n" "Try skipping hopeless check (-z option)\n" "Exiting\n"); exit (1); } setStringRefOffset(ref, (String_Ref_t)Extra_String_Subcount * (String_Ref_t)Kmer_Len); assert(Extra_String_Subcount * Kmer_Len < OFFSET_MASK); setStringRefLast(ref, TRUELY_ONE); setStringRefEmpty(ref, TRUELY_ONE); Extra_String_Subcount ++; return ref; }
// Add string s as an extra hash table string and return // a single reference to the beginning of it. static String_Ref_t Add_Extra_Hash_String(const char *s) { String_Ref_t ref = 0; String_Ref_t sub = 0; int len; uint32 new_len = Used_Data_Len + G.Kmer_Len; if (Extra_String_Subcount < MAX_EXTRA_SUBCOUNT) { sub = String_Ct + Extra_String_Ct - 1; } else { sub = String_Ct + Extra_String_Ct; if (sub >= String_Start_Size) { uint64 n = max(sub * 1.1, String_Start_Size * 1.5); //fprintf(stderr, "REALLOC String_Start from "F_U64" to "F_U64"\n", String_Start_Size, n); resizeArray(String_Start, String_Start_Size, String_Start_Size, n); } String_Start[sub] = Used_Data_Len; Extra_String_Ct++; Extra_String_Subcount = 0; new_len++; } if (new_len >= Extra_Data_Len) { uint64 n = max(new_len * 1.1, Extra_Data_Len * 1.5); //fprintf(stderr, "REALLOC basesData from "F_U64" to "F_U64"\n", Extra_Data_Len, n); resizeArray(basesData, Extra_Data_Len, Extra_Data_Len, n); } strncpy(basesData + String_Start[sub] + G.Kmer_Len * Extra_String_Subcount, s, G.Kmer_Len + 1); Used_Data_Len = new_len; setStringRefStringNum(ref, sub); if (sub > MAX_STRING_NUM) { fprintf(stderr, "Too many skip kmer strings for hash table.\n"); fprintf(stderr, "Try skipping hopeless check (-z option)\n"); fprintf(stderr, "Exiting\n"); exit (1); } setStringRefOffset(ref, (String_Ref_t)Extra_String_Subcount * (String_Ref_t)G.Kmer_Len); assert(Extra_String_Subcount * G.Kmer_Len < OFFSET_MASK); setStringRefLast(ref, (uint64)1); setStringRefEmpty(ref, TRUELY_ONE); Extra_String_Subcount++; return(ref); }
// Insert string subscript i into the global hash table. // Sequence and information about the string are in // global variables basesData, String_Start, String_Info, .... static void Put_String_In_Hash(uint32 curID, uint32 i) { String_Ref_t ref = 0; int skip_ct; uint64 key; uint64 key_is_bad; int j; uint32 kmers_skipped = 0; uint32 kmers_bad = 0; uint32 kmers_inserted = 0; char *p = basesData + String_Start[i]; char *window = basesData + String_Start[i]; key = key_is_bad = 0; for (uint32 j=0; j<G.Kmer_Len; j ++) { key_is_bad |= (uint64) (Char_Is_Bad[(int) * p]) << j; key |= (uint64) (Bit_Equivalent[(int) * (p ++)]) << (2 * j); } setStringRefStringNum(ref, i); if (i > MAX_STRING_NUM) fprintf (stderr, "Too many strings for hash table--exiting\n"), exit(1); setStringRefOffset(ref, TRUELY_ZERO); skip_ct = 0; setStringRefEmpty(ref, TRUELY_ZERO); if (key_is_bad == false) { Hash_Insert(ref, key, window); kmers_inserted++; } else { kmers_bad++; } while (*p != 0) { window++; String_Ref_t newoff = getStringRefOffset(ref) + 1; assert(newoff < OFFSET_MASK); setStringRefOffset(ref, newoff); if (++skip_ct > HASH_KMER_SKIP) skip_ct = 0; key_is_bad >>= 1; key_is_bad |= (uint64) (Char_Is_Bad[(int) * p]) << (G.Kmer_Len - 1); key >>= 2; key |= (uint64) (Bit_Equivalent[(int) * (p ++)]) << (2 * (G.Kmer_Len - 1)); if (skip_ct > 0) { kmers_skipped++; continue; } if (key_is_bad) { kmers_bad++; continue; } Hash_Insert(ref, key, window); kmers_inserted++; } //fprintf(stderr, "STRING %u skipped %u bad %u inserted %u\n", // curID, kmers_skipped, kmers_bad, kmers_inserted); }