static size_t hash2(const Token *p) { /* A simple-minded hashing for the secondary sweep; sample first, middle, and last token, and on 64-bit systems also two tokens from in between these. */ int pos_last_sample = N_SAMPLES - 1; uint64_t h_val = 0; h_val ^= ((uint64_t)Token2int(p[sample_pos[0]])) << 0; h_val ^= ((uint64_t)Token2int(p[sample_pos[pos_last_sample]])) << 16; h_val ^= ((uint64_t)Token2int(p[sample_pos[pos_last_sample/2]])) << 24; h_val ^= ((uint64_t)Token2int(p[sample_pos[pos_last_sample*1/4]]))<<32; h_val ^= ((uint64_t)Token2int(p[sample_pos[pos_last_sample*3/4]]))<<48; /* the last two are ignored on a 32-bit system */ h_val *= 2147483647; h_val &= 017777777777; #ifdef DB_HASH /* reduce h_val to the type yielded by hash2(), and print the result in a responsible way */ size_t h = (size_t)h_val; fprintf(Debug_File, "hash2 = %s\n", any_uint2string(h, 0)); #endif /* DB_HASH */ return (size_t)h_val; }
static size_t hash2(const Token *p) { /* A simple-minded hashing for the secondary sweep; sample first and last token (on 64-bit systems, also two tokens from the middle). */ uint64_t h_val = 0; h_val ^= ((size_t)Token2int(p[sample_pos[(N_SAMPLES - 1) / 4]])) << 48; h_val ^= ((size_t)Token2int(p[sample_pos[(N_SAMPLES - 1) * 3 / 4 ]])) << 32; h_val ^= ((size_t)Token2int(p[sample_pos[N_SAMPLES - 1]])) << 16; h_val ^= (size_t)Token2int(p[sample_pos[0]]); return (size_t) h_val; }
static size_t hash1(const Token *p) { /* hash1(p) returns the hash code of Min_Run_Size tokens starting at p; caller guarantees that there are at least Min_Run_Size tokens. */ uint64_t h_val; int n; h_val = 0; for (n = 0; n < N_SAMPLES; n++) { h_val = (h_val << 2) OPERATION Token2int(p[sample_pos[n]]); if (h_val & (1ULL<<63)) { h_val ^= (1ULL<<63|1); } } #ifdef DB_HASH /* reduce h_val to the type yielded by hash1(), and print the result in a responsible way */ size_t h = (size_t) (h_val % hash_table_size); fprintf(Debug_File, "hash1 = %s\n", any_uint2string(h, 0)); #endif /* DB_HASH */ return (size_t) (h_val % hash_table_size); }
static int Token_in_range(const Token tk, int low, int high) { int tki = Token2int(tk); if (tki < low) return 0; if (tki > high) return 0; return 1; }
void fprint_token(FILE *ofile, const Token tk) { /* Prints a regular token in two characters: normal char meta (bit 9 set) ^A cntl $A meta-cntl A printable #A meta and hashed tokens in hexadecimal. */ int tki = Token2int(tk); int ch = tki & 0x7F; int bit8 = tki & 0x80; if (Token_EQ(tk, No_Token)) {fprintf(ofile, "--"); return;} if (Token_EQ(tk, IDF)) {fprintf(ofile, "IDF"); return;} if (Token_EQ(tk, End_Of_Line)) {fprintf(ofile, "EOL"); return;} if (is_simple_token(tk)) { if ('!' <= ch && ch <= '~') { fprintf(ofile, "%s%c", (bit8 ? "8" : ""), ch); return; } if (0 < ch && ch <= ' ') { fprintf(ofile, "%s%c", (bit8 ? "$" : "^"), ch + '@'); return; } if (ch == 0x7F) { fprintf(ofile, "%s%c", (bit8 ? "$" : "^"), '?'); return; } } if (is_CTRL_token(tk)) { if (check_and_print(ofile, "CTRL", ch, 'A', '~', '@')) return; } if (is_NORM_token(tk)) { if (check_and_print(ofile, "NORM", ch, '!', '~', '\0')) return; } if (is_MTCT_token(tk)) { if (check_and_print(ofile, "MTCT", ch, 'A', '~', '@')) return; } if (is_META_token(tk)) { if (check_and_print(ofile, "META", ch, '!', '~', '\0')) return; } if (is_hashed_token(tk)) { fprintf(ofile, "0x%04x", tki); return; } /* gap token! */ fprintf(ofile, "!0x%04x!", tki); }
static size_t hash1(const Token *p) { /* hash1(p) returns the hash code of Min_Run_Size tokens starting at p; caller guarantees that there are at least Min_Run_Size tokens. */ uint64_t h_val; int n; h_val = 0; for (n = 0; n < N_SAMPLES; n++) { h_val = (h_val << 2) OPERATION Token2int(p[sample_pos[n]]); if (h_val & (1ULL<<63)) { h_val ^= (1ULL<<63|1); } } return (size_t) (h_val % hash_table_size); }