// Insert Ref with hash key Key into global Hash_Table . // Ref represents string S . static void Hash_Insert(String_Ref_t Ref, uint64 Key, char * S) { String_Ref_t H_Ref; char * T; int Shift; unsigned char Key_Check; int64 Ct, Probe, Sub; int i; Sub = HASH_FUNCTION (Key); Shift = HASH_CHECK_FUNCTION (Key); Hash_Check_Array [Sub] |= (((Check_Vector_t) 1) << Shift); Key_Check = KEY_CHECK_FUNCTION (Key); Probe = PROBE_FUNCTION (Key); Ct = 0; do { for (i = 0; i < Hash_Table [Sub] . Entry_Ct; i ++) if (Hash_Table [Sub] . Check [i] == Key_Check) { H_Ref = Hash_Table [Sub] . Entry [i]; T = Data + String_Start [getStringRefStringNum(H_Ref)] + getStringRefOffset(H_Ref); if (strncmp (S, T, Kmer_Len) == 0) { if (getStringRefLast(H_Ref)) { Extra_Ref_Ct ++; } Next_Ref [(String_Start [getStringRefStringNum(Ref)] + getStringRefOffset(Ref)) / (HASH_KMER_SKIP + 1)] = H_Ref; Extra_Ref_Ct ++; setStringRefLast(Ref, TRUELY_ZERO); Hash_Table [Sub] . Entry [i] = Ref; if (Hash_Table [Sub] . Hits [i] < HIGHEST_KMER_LIMIT) Hash_Table [Sub] . Hits [i] ++; return; } } if (i != Hash_Table [Sub] . Entry_Ct) { fprintf (stderr, "i = %d Sub = " F_S64 " Entry_Ct = %d\n", i, Sub, Hash_Table [Sub] . Entry_Ct); } assert (i == Hash_Table [Sub] . Entry_Ct); if (Hash_Table [Sub] . Entry_Ct < ENTRIES_PER_BUCKET) { setStringRefLast(Ref, TRUELY_ONE); Hash_Table [Sub] . Entry [i] = Ref; Hash_Table [Sub] . Check [i] = Key_Check; Hash_Table [Sub] . Entry_Ct ++; Hash_Entries ++; Hash_Table [Sub] . Hits [i] = 1; return; } Sub = (Sub + Probe) % HASH_TABLE_SIZE; } while (++ Ct < HASH_TABLE_SIZE); fprintf (stderr, "ERROR: Hash table full\n"); assert (FALSE); }
static void Mark_Screened_Ends_Single(String_Ref_t ref) { int32 s_num = getStringRefStringNum(ref); int32 len = String_Info [s_num] . length; if (getStringRefOffset(ref) < HOPELESS_MATCH) String_Info [s_num] . lfrag_end_screened = TRUE; if (len - getStringRefOffset(ref) - Kmer_Len + 1 < HOPELESS_MATCH) String_Info [s_num] . rfrag_end_screened = TRUE; }
// Insert string subscript i into the global hash table. // Sequence and information about the string are in // global variables Data, String_Start, String_Info, .... static void Put_String_In_Hash(int i) { String_Ref_t ref = 0; char * p, * window; int kmers_inserted = 0; int skip_ct; uint64 key, key_is_bad; int j; if (String_Info [i] . length < Kmer_Len) return; p = window = Data + String_Start [i]; key = key_is_bad = 0; for (j = 0; j < Kmer_Len; j ++) { key_is_bad |= (uint64) (Char_Is_Bad [(int) * p]) << j; key |= (uint64) (Bit_Equivalent [(int) * (p ++)]) << (2 * j); } setStringRefStringNum(ref, i); if (i > MAX_STRING_NUM) { fprintf (stderr, "Too many strings for hash table--exiting\n"); exit (1); } setStringRefOffset(ref, TRUELY_ZERO); skip_ct = 0; setStringRefEmpty(ref, TRUELY_ZERO); if (key_is_bad == false) { Hash_Insert (ref, key, window); kmers_inserted ++; } while ((* p) != '\0') { window ++; { String_Ref_t newoff = getStringRefOffset(ref) + 1; assert(newoff < OFFSET_MASK); setStringRefOffset(ref, newoff); } if (++ skip_ct > HASH_KMER_SKIP) skip_ct = 0; key_is_bad >>= 1; key_is_bad |= (uint64) (Char_Is_Bad [(int) * p]) << (Kmer_Len - 1); key >>= 2; key |= (uint64) (Bit_Equivalent [(int) * (p ++)]) << (2 * (Kmer_Len - 1)); if (skip_ct == 0 && ! key_is_bad) { Hash_Insert (ref, key, window); kmers_inserted ++; } } }
static void Mark_Screened_Ends_Chain(String_Ref_t ref) { Mark_Screened_Ends_Single (ref); while (! getStringRefLast(ref)) { ref = Next_Ref [(String_Start [getStringRefStringNum(ref)] + getStringRefOffset(ref)) / (HASH_KMER_SKIP + 1)]; Mark_Screened_Ends_Single (ref); } }
// Set the empty bit to true for the hash table entry // corresponding to string s whose hash key is key . // Also set global String_Info . left/right_end_screened // true if the entry occurs near the left/right end, resp., // of the string in the hash table. If not found, add an // entry to the hash table and mark it empty. static void Hash_Mark_Empty(uint64 key, char * s) { String_Ref_t h_ref; char * t; unsigned char key_check; int64 ct, probe; int64 sub; int i, shift; sub = HASH_FUNCTION (key); key_check = KEY_CHECK_FUNCTION (key); probe = PROBE_FUNCTION (key); ct = 0; do { for (i = 0; i < Hash_Table [sub] . Entry_Ct; i ++) if (Hash_Table [sub] . Check [i] == key_check) { h_ref = Hash_Table [sub] . Entry [i]; t = Data + String_Start [getStringRefStringNum(h_ref)] + getStringRefOffset(h_ref); if (strncmp (s, t, Kmer_Len) == 0) { if (! getStringRefEmpty(Hash_Table [sub] . Entry [i])) Mark_Screened_Ends_Chain (Hash_Table [sub] . Entry [i]); setStringRefEmpty(Hash_Table [sub] . Entry [i], TRUELY_ONE); return; } } assert (i == Hash_Table [sub] . Entry_Ct); if (Hash_Table [sub] . Entry_Ct < ENTRIES_PER_BUCKET) { // Not found if (Use_Hopeless_Check) { Hash_Table [sub] . Entry [i] = Add_Extra_Hash_String (s); setStringRefEmpty(Hash_Table [sub] . Entry [i], TRUELY_ONE); Hash_Table [sub] . Check [i] = key_check; Hash_Table [sub] . Entry_Ct ++; Hash_Table [sub] . Hits [i] = 0; Hash_Entries ++; shift = HASH_CHECK_FUNCTION (key); Hash_Check_Array [sub] |= (((Check_Vector_t) 1) << shift); } return; } sub = (sub + probe) % HASH_TABLE_SIZE; } while (++ ct < HASH_TABLE_SIZE); fprintf (stderr, "ERROR: Hash table full\n"); assert (FALSE); }
// Read the next batch of strings from stream and create a hash // table index of their Kmer_Len -mers. Return 1 if successful; // 0 otherwise. The batch ends when either end-of-file is encountered // or Max_Hash_Strings have been read in. first_frag_id is the // internal ID of the first fragment in the hash table. int Build_Hash_Index(gkStream *stream, int32 first_frag_id, gkFragment *myRead) { String_Ref_t ref; uint64 total_len; static int64 max_extra_ref_ct = 0; static int64 old_ref_len, new_ref_len; int frag_status; uint64 hash_entry_limit; Hash_String_Num_Offset = first_frag_id; String_Ct = Extra_String_Ct = 0; Extra_String_Subcount = MAX_EXTRA_SUBCOUNT; total_len = 0; if (Data == NULL) { Extra_Data_Len = Data_Len = Max_Hash_Data_Len + AS_READ_MAX_NORMAL_LEN; Data = (char *) safe_realloc (Data, Data_Len); Quality_Data = (char *) safe_realloc (Quality_Data, Data_Len); old_ref_len = Data_Len / (HASH_KMER_SKIP + 1); Next_Ref = (String_Ref_t *) safe_realloc (Next_Ref, old_ref_len * sizeof (String_Ref_t)); } memset (Next_Ref, '\377', old_ref_len * sizeof (String_Ref_t)); memset (Hash_Table, 0, HASH_TABLE_SIZE * sizeof (Hash_Bucket_t)); memset (Hash_Check_Array, 0, HASH_TABLE_SIZE * sizeof (Check_Vector_t)); Extra_Ref_Ct = 0; Hash_Entries = 0; hash_entry_limit = Max_Hash_Load * HASH_TABLE_SIZE * ENTRIES_PER_BUCKET; #if 0 fprintf(stderr, "HASH LOADING STARTED: fragID %12"F_U64P"\n", first_frag_id); fprintf(stderr, "HASH LOADING STARTED: strings %12"F_U64P" out of %12"F_U64P" max.\n", String_Ct, Max_Hash_Strings); fprintf(stderr, "HASH LOADING STARTED: length %12"F_U64P" out of %12"F_U64P" max.\n", total_len, Max_Hash_Data_Len); fprintf(stderr, "HASH LOADING STARTED: entries %12"F_U64P" out of %12"F_U64P" max (load %.2f).\n", Hash_Entries, hash_entry_limit, (100.0 * Hash_Entries) / (HASH_TABLE_SIZE * ENTRIES_PER_BUCKET)); #endif while (String_Ct < Max_Hash_Strings && total_len < Max_Hash_Data_Len && Hash_Entries < hash_entry_limit && (frag_status = Read_Next_Frag (Sequence_Buffer, Quality_Buffer, stream, myRead, & Last_Hash_Frag_Read, minLibToHash, maxLibToHash))) { int extra, len; size_t new_len; if (frag_status == DELETED_FRAG) { Sequence_Buffer [0] = '\0'; Quality_Buffer [0] = '\0'; } String_Start [String_Ct] = total_len; len = strlen (Sequence_Buffer); String_Info [String_Ct] . length = len; String_Info [String_Ct] . lfrag_end_screened = FALSE; String_Info [String_Ct] . rfrag_end_screened = FALSE; new_len = total_len + len + 1; extra = new_len % (HASH_KMER_SKIP + 1); if (extra > 0) new_len += 1 + HASH_KMER_SKIP - extra; if (new_len > Data_Len) { Data_Len = (size_t) (Data_Len * MEMORY_EXPANSION_FACTOR); if (new_len > Data_Len) Data_Len = new_len; if (Data_Len > Extra_Data_Len) { Data = (char *) safe_realloc (Data, Data_Len); Extra_Data_Len = Data_Len; } Quality_Data = (char *) safe_realloc (Quality_Data, Data_Len); new_ref_len = Data_Len / (HASH_KMER_SKIP + 1); Next_Ref = (String_Ref_t *) safe_realloc (Next_Ref, new_ref_len * sizeof (String_Ref_t)); memset (Next_Ref + old_ref_len, '\377', (new_ref_len - old_ref_len) * sizeof (String_Ref_t)); old_ref_len = new_ref_len; } strcpy (Data + total_len, Sequence_Buffer); memcpy (Quality_Data + total_len, Quality_Buffer, len + 1); total_len = new_len; Put_String_In_Hash (String_Ct); String_Ct ++; if ((String_Ct % 100000) == 0) fprintf (stderr, "String_Ct:%12"F_U64P"/%12"F_U32P" totalLen:%12"F_U64P"/%12"F_U64P" Hash_Entries:%12"F_U64P"/%12"F_U64P" Load: %.2f%%\n", String_Ct, Max_Hash_Strings, total_len, Max_Hash_Data_Len, Hash_Entries, hash_entry_limit, 100.0 * Hash_Entries / (HASH_TABLE_SIZE * ENTRIES_PER_BUCKET)); } if (String_Ct == 0) return 0; fprintf(stderr, "HASH LOADING STOPPED: strings %12"F_U64P" out of %12"F_U32P" max.\n", String_Ct, Max_Hash_Strings); fprintf(stderr, "HASH LOADING STOPPED: length %12"F_U64P" out of %12"F_U64P" max.\n", total_len, Max_Hash_Data_Len); fprintf(stderr, "HASH LOADING STOPPED: entries %12"F_U64P" out of %12"F_U64P" max (load %.2f).\n", Hash_Entries, hash_entry_limit, 100.0 * Hash_Entries / (HASH_TABLE_SIZE * ENTRIES_PER_BUCKET)); Used_Data_Len = total_len; if (Extra_Ref_Ct > max_extra_ref_ct) { max_extra_ref_ct *= MEMORY_EXPANSION_FACTOR; if (Extra_Ref_Ct > max_extra_ref_ct) max_extra_ref_ct = Extra_Ref_Ct; fprintf (stderr, "### realloc Extra_Ref_Space max_extra_ref_ct = " F_S64 "\n", max_extra_ref_ct); Extra_Ref_Space = (String_Ref_t *) safe_realloc (Extra_Ref_Space, max_extra_ref_ct * sizeof (String_Ref_t)); } if (Kmer_Skip_File != NULL) Mark_Skip_Kmers (); // Coalesce reference chain into adjacent entries in Extra_Ref_Space Extra_Ref_Ct = 0; for (int32 i = 0; i < HASH_TABLE_SIZE; i ++) for (int32 j = 0; j < Hash_Table [i] . Entry_Ct; j ++) { ref = Hash_Table [i] . Entry [j]; if (! getStringRefLast(ref) && ! getStringRefEmpty(ref)) { Extra_Ref_Space [Extra_Ref_Ct] = ref; setStringRefStringNum(Hash_Table [i] . Entry [j], (String_Ref_t)(Extra_Ref_Ct >> OFFSET_BITS)); setStringRefOffset (Hash_Table [i] . Entry [j], (String_Ref_t)(Extra_Ref_Ct & OFFSET_MASK)); Extra_Ref_Ct ++; do { ref = Next_Ref [(String_Start [getStringRefStringNum(ref)] + getStringRefOffset(ref)) / (HASH_KMER_SKIP + 1)]; Extra_Ref_Space [Extra_Ref_Ct ++] = ref; } while (! getStringRefLast(ref)); } }
// Read the next batch of strings from stream and create a hash // table index of their G.Kmer_Len -mers. Return 1 if successful; // 0 otherwise. The batch ends when either end-of-file is encountered // or Max_Hash_Strings have been read in. first_frag_id is the // internal ID of the first fragment in the hash table. int Build_Hash_Index(gkStore *gkpStore, uint32 bgnID, uint32 endID) { String_Ref_t ref; uint64 total_len; uint64 hash_entry_limit; fprintf(stderr, "Build_Hash_Index from "F_U32" to "F_U32"\n", bgnID, endID); Hash_String_Num_Offset = bgnID; String_Ct = 0; Extra_String_Ct = 0; Extra_String_Subcount = MAX_EXTRA_SUBCOUNT; total_len = 0; //if (Data == NULL) { // Extra_Data_Len = Max_Hash_Data_Len + AS_MAX_READLEN; // Data_Len = Max_Hash_Data_Len + AS_MAX_READLEN; // // basesData = new char [Data_Len]; // qualsData = new char [Data_Len]; // // old_ref_len = Data_Len / (HASH_KMER_SKIP + 1); // nextRef = new String_Ref_t [old_ref_len]; //} //memset(nextRef, 0xff, old_ref_len * sizeof(String_Ref_t)); memset(Hash_Table, 0x00, HASH_TABLE_SIZE * sizeof(Hash_Bucket_t)); memset(Hash_Check_Array, 0x00, HASH_TABLE_SIZE * sizeof(Check_Vector_t)); Extra_Ref_Ct = 0; Hash_Entries = 0; hash_entry_limit = G.Max_Hash_Load * HASH_TABLE_SIZE * ENTRIES_PER_BUCKET; #if 0 fprintf(stderr, "HASH LOADING STARTED: fragID %12"F_U64P"\n", first_frag_id); fprintf(stderr, "HASH LOADING STARTED: strings %12"F_U64P" out of %12"F_U64P" max.\n", String_Ct, G.Max_Hash_Strings); fprintf(stderr, "HASH LOADING STARTED: length %12"F_U64P" out of %12"F_U64P" max.\n", total_len, G.Max_Hash_Data_Len); fprintf(stderr, "HASH LOADING STARTED: entries %12"F_U64P" out of %12"F_U64P" max (load %.2f).\n", Hash_Entries, hash_entry_limit, (100.0 * Hash_Entries) / (HASH_TABLE_SIZE * ENTRIES_PER_BUCKET)); #endif // Compute an upper limit on the number of bases we will load. The number of Hash_Entries // can't be computed here, so the real loop below could end earlier than expected - and we // don't use a little bit of memory. uint32 nSkipped = 0; uint32 nShort = 0; uint32 nLoadable = 0; uint64 maxAlloc = 0; uint32 curID = 0; // The last ID loaded into the hash for (curID=bgnID; ((String_Ct < G.Max_Hash_Strings) && (total_len < G.Max_Hash_Data_Len) && (curID <= endID)); curID++) { gkRead *read = gkpStore->gkStore_getRead(curID); if ((read->gkRead_libraryID() < G.minLibToHash) || (read->gkRead_libraryID() > G.maxLibToHash)) { nSkipped++; continue; } if (read->gkRead_sequenceLength() < G.Min_Olap_Len) { nShort++; continue; } nLoadable++; maxAlloc += read->gkRead_sequenceLength() + 1; } fprintf(stderr, "Found "F_U32" reads with length "F_U64" to load; "F_U32" skipped by being too short; "F_U32" skipped per library restriction\n", nLoadable, maxAlloc, nShort, nSkipped); // This should be less than what the user requested on the command line if (maxAlloc >= G.Max_Hash_Data_Len + AS_MAX_READLEN) fprintf(stderr, "maxAlloc = "F_U64" G.Max_Hash_Data_Len = "F_U64" AS_MAX_READLEN = %u\n", maxAlloc, G.Max_Hash_Data_Len, AS_MAX_READLEN); assert(maxAlloc < G.Max_Hash_Data_Len + AS_MAX_READLEN); // Allocate space, then fill it. uint64 nextRef_Len = maxAlloc / (HASH_KMER_SKIP + 1); Extra_Data_Len = Data_Len = maxAlloc; basesData = new char [Data_Len]; qualsData = new char [Data_Len]; nextRef = new String_Ref_t [nextRef_Len]; memset(nextRef, 0xff, sizeof(String_Ref_t) * nextRef_Len); gkReadData *readData = new gkReadData; for (curID=bgnID; ((String_Ct < G.Max_Hash_Strings) && (total_len < G.Max_Hash_Data_Len) && (Hash_Entries < hash_entry_limit) && (curID <= endID)); curID++, String_Ct++) { // Load sequence if it exists, otherwise, add an empty read. // Duplicated in Process_Overlaps(). String_Start[String_Ct] = UINT64_MAX; String_Info[String_Ct].length = 0; String_Info[String_Ct].lfrag_end_screened = TRUE; String_Info[String_Ct].rfrag_end_screened = TRUE; gkRead *read = gkpStore->gkStore_getRead(curID); if ((read->gkRead_libraryID() < G.minLibToHash) || (read->gkRead_libraryID() > G.maxLibToHash)) continue; uint32 len = read->gkRead_sequenceLength(); if (len < G.Min_Olap_Len) continue; gkpStore->gkStore_loadReadData(read, readData); char *seqptr = readData->gkReadData_getSequence(); char *qltptr = readData->gkReadData_getQualities(); // Note where we are going to store the string, and how long it is String_Start[String_Ct] = total_len; String_Info[String_Ct].length = len; String_Info[String_Ct].lfrag_end_screened = FALSE; String_Info[String_Ct].rfrag_end_screened = FALSE; // Store it. for (uint32 i=0; i<len; i++, total_len++) { basesData[total_len] = tolower(seqptr[i]); qualsData[total_len] = qltptr[i]; } basesData[total_len] = 0; qualsData[total_len] = 0; total_len++; // Skipping kners is totally untested. #if 0 if (HASH_KMER_SKIP > 0) { uint32 extra = new_len % (HASH_KMER_SKIP + 1); if (extra > 0) new_len += 1 + HASH_KMER_SKIP - extra; } #endif // Trouble - allocate more space for sequence and quality data. // This was computed ahead of time! if (total_len > maxAlloc) fprintf(stderr, "total_len="F_U64" len="F_U32" maxAlloc="F_U64"\n", total_len, len, maxAlloc); assert(total_len <= maxAlloc); // What is Extra_Data_Len? It's set to Data_Len if we would have reallocated here. Put_String_In_Hash(curID, String_Ct); if ((String_Ct % 100000) == 0) fprintf (stderr, "String_Ct:%12"F_U64P"/%12"F_U32P" totalLen:%12"F_U64P"/%12"F_U64P" Hash_Entries:%12"F_U64P"/%12"F_U64P" Load: %.2f%%\n", String_Ct, G.Max_Hash_Strings, total_len, G.Max_Hash_Data_Len, Hash_Entries, hash_entry_limit, 100.0 * Hash_Entries / (HASH_TABLE_SIZE * ENTRIES_PER_BUCKET)); } curID--; // We always stop on the read after we loaded. delete readData; fprintf(stderr, "HASH LOADING STOPPED: strings %12"F_U64P" out of %12"F_U32P" max.\n", String_Ct, G.Max_Hash_Strings); fprintf(stderr, "HASH LOADING STOPPED: length %12"F_U64P" out of %12"F_U64P" max.\n", total_len, G.Max_Hash_Data_Len); fprintf(stderr, "HASH LOADING STOPPED: entries %12"F_U64P" out of %12"F_U64P" max (load %.2f).\n", Hash_Entries, hash_entry_limit, 100.0 * Hash_Entries / (HASH_TABLE_SIZE * ENTRIES_PER_BUCKET)); if (String_Ct == 0) { fprintf(stderr, "HASH LOADING STOPPED: no strings added?\n"); return(endID); } Used_Data_Len = total_len; //fprintf(stderr, "Extra_Ref_Ct = "F_U64" Max_Extra_Ref_Space = "F_U64"\n", Extra_Ref_Ct, Max_Extra_Ref_Space); if (Extra_Ref_Ct > Max_Extra_Ref_Space) { int32 newSize = (Max_Extra_Ref_Space == 0) ? 16 * 1024 : Max_Extra_Ref_Space * 2; while (newSize < Extra_Ref_Ct) newSize *= 2; String_Ref_t *newSpace = new String_Ref_t [newSize]; memcpy(newSpace, Extra_Ref_Space, sizeof(String_Ref_t) * Max_Extra_Ref_Space); delete [] Extra_Ref_Space; Max_Extra_Ref_Space = newSize; // Former max_extra_ref_ct Extra_Ref_Space = newSpace; } if (G.Kmer_Skip_File != NULL) Mark_Skip_Kmers(); // Coalesce reference chain into adjacent entries in Extra_Ref_Space Extra_Ref_Ct = 0; for (int32 i = 0; i < HASH_TABLE_SIZE; i ++) for (int32 j = 0; j < Hash_Table[i].Entry_Ct; j ++) { ref = Hash_Table[i].Entry[j]; if (! getStringRefLast(ref) && ! getStringRefEmpty(ref)) { Extra_Ref_Space[Extra_Ref_Ct] = ref; setStringRefStringNum(Hash_Table[i].Entry[j], (String_Ref_t)(Extra_Ref_Ct >> OFFSET_BITS)); setStringRefOffset (Hash_Table[i].Entry[j], (String_Ref_t)(Extra_Ref_Ct & OFFSET_MASK)); Extra_Ref_Ct ++; do { ref = nextRef[(String_Start[getStringRefStringNum(ref)] + getStringRefOffset(ref)) / (HASH_KMER_SKIP + 1)]; Extra_Ref_Space[Extra_Ref_Ct ++] = ref; } while (! getStringRefLast(ref)); } }
// Insert string subscript i into the global hash table. // Sequence and information about the string are in // global variables basesData, String_Start, String_Info, .... static void Put_String_In_Hash(uint32 curID, uint32 i) { String_Ref_t ref = 0; int skip_ct; uint64 key; uint64 key_is_bad; int j; uint32 kmers_skipped = 0; uint32 kmers_bad = 0; uint32 kmers_inserted = 0; char *p = basesData + String_Start[i]; char *window = basesData + String_Start[i]; key = key_is_bad = 0; for (uint32 j=0; j<G.Kmer_Len; j ++) { key_is_bad |= (uint64) (Char_Is_Bad[(int) * p]) << j; key |= (uint64) (Bit_Equivalent[(int) * (p ++)]) << (2 * j); } setStringRefStringNum(ref, i); if (i > MAX_STRING_NUM) fprintf (stderr, "Too many strings for hash table--exiting\n"), exit(1); setStringRefOffset(ref, TRUELY_ZERO); skip_ct = 0; setStringRefEmpty(ref, TRUELY_ZERO); if (key_is_bad == false) { Hash_Insert(ref, key, window); kmers_inserted++; } else { kmers_bad++; } while (*p != 0) { window++; String_Ref_t newoff = getStringRefOffset(ref) + 1; assert(newoff < OFFSET_MASK); setStringRefOffset(ref, newoff); if (++skip_ct > HASH_KMER_SKIP) skip_ct = 0; key_is_bad >>= 1; key_is_bad |= (uint64) (Char_Is_Bad[(int) * p]) << (G.Kmer_Len - 1); key >>= 2; key |= (uint64) (Bit_Equivalent[(int) * (p ++)]) << (2 * (G.Kmer_Len - 1)); if (skip_ct > 0) { kmers_skipped++; continue; } if (key_is_bad) { kmers_bad++; continue; } Hash_Insert(ref, key, window); kmers_inserted++; } //fprintf(stderr, "STRING %u skipped %u bad %u inserted %u\n", // curID, kmers_skipped, kmers_bad, kmers_inserted); }