Example #1
0
static size_t
hash2(const Token *p) {
	/*	A simple-minded hashing for the secondary sweep;
		sample first, middle, and last token, and on 64-bit systems
		also two tokens from in between these.
	*/
	int pos_last_sample = N_SAMPLES - 1;
	uint64_t h_val = 0;
	h_val ^= ((uint64_t)Token2int(p[sample_pos[0]])) << 0;
	h_val ^= ((uint64_t)Token2int(p[sample_pos[pos_last_sample]])) << 16;
	h_val ^= ((uint64_t)Token2int(p[sample_pos[pos_last_sample/2]])) << 24;
	h_val ^= ((uint64_t)Token2int(p[sample_pos[pos_last_sample*1/4]]))<<32;
	h_val ^= ((uint64_t)Token2int(p[sample_pos[pos_last_sample*3/4]]))<<48;
	/* the last two are ignored on a 32-bit system */
	h_val *= 2147483647;
	h_val &= 017777777777;

#ifdef	DB_HASH
	/* reduce h_val to the type yielded by hash2(), and print the
	   result in a responsible way
	*/
	size_t h = (size_t)h_val;
	fprintf(Debug_File, "hash2 = %s\n", any_uint2string(h, 0));
#endif	/* DB_HASH */

	return (size_t)h_val;
}
Example #2
0
static size_t
hash2(const Token *p) {
	/*	A simple-minded hashing for the secondary sweep;
		sample first and last token (on 64-bit systems, also
		two tokens from the middle).
	*/
	uint64_t h_val = 0;
	h_val ^= ((size_t)Token2int(p[sample_pos[(N_SAMPLES - 1) / 4]])) << 48;
	h_val ^= ((size_t)Token2int(p[sample_pos[(N_SAMPLES - 1) * 3 / 4 ]])) << 32;
	h_val ^= ((size_t)Token2int(p[sample_pos[N_SAMPLES - 1]])) << 16;
	h_val ^= (size_t)Token2int(p[sample_pos[0]]);

	return (size_t) h_val;
}
Example #3
0
static size_t
hash1(const Token *p) {
	/*	hash1(p) returns the hash code of Min_Run_Size
		tokens starting at p; caller guarantees that there
		are at least Min_Run_Size tokens.
	*/
	uint64_t h_val;
	int n;

	h_val = 0;
	for (n = 0; n < N_SAMPLES; n++) {
		h_val = (h_val << 2) OPERATION Token2int(p[sample_pos[n]]);
		if (h_val & (1ULL<<63)) {
			h_val ^= (1ULL<<63|1);
		}
	}

#ifdef	DB_HASH
	/* reduce h_val to the type yielded by hash1(), and print the
	   result in a responsible way
	*/
	size_t h = (size_t) (h_val % hash_table_size);
	fprintf(Debug_File, "hash1 = %s\n", any_uint2string(h, 0));
#endif	/* DB_HASH */

	return (size_t) (h_val % hash_table_size);
}
Example #4
0
static int
Token_in_range(const Token tk, int low, int high) {
	int tki = Token2int(tk);
	if (tki < low) return 0;
	if (tki > high) return 0;
	return 1;
}
Example #5
0
void
fprint_token(FILE *ofile, const Token tk) {
	/*	Prints a regular token in two characters:
			normal char		meta (bit 9 set)
			^A	cntl		$A	meta-cntl
			 A	printable	#A	meta
		and hashed tokens in hexadecimal.
	*/
	int tki = Token2int(tk);
	int ch =   tki & 0x7F;
	int bit8 = tki & 0x80;


	if (Token_EQ(tk, No_Token))	{fprintf(ofile, "--"); return;}
	if (Token_EQ(tk, IDF))		{fprintf(ofile, "IDF"); return;}
	if (Token_EQ(tk, End_Of_Line))	{fprintf(ofile, "EOL"); return;}

	if (is_simple_token(tk)) {
		if ('!' <= ch && ch <= '~') {
			fprintf(ofile, "%s%c", (bit8 ? "8" : ""), ch);
			return;
		}
		if (0 < ch && ch <= ' ') {
			fprintf(ofile, "%s%c", (bit8 ? "$" : "^"), ch + '@');
			return;
		}
		if (ch == 0x7F) {
			fprintf(ofile, "%s%c", (bit8 ? "$" : "^"), '?');
			return;
		}
	}

	if (is_CTRL_token(tk)) {
		if (check_and_print(ofile, "CTRL", ch, 'A', '~', '@')) return;
	}

	if (is_NORM_token(tk)) {
		if (check_and_print(ofile, "NORM", ch, '!', '~', '\0')) return;
	}

	if (is_MTCT_token(tk)) {
		if (check_and_print(ofile, "MTCT", ch, 'A', '~', '@')) return;
	}

	if (is_META_token(tk)) {
		if (check_and_print(ofile, "META", ch, '!', '~', '\0')) return;
	}

	if (is_hashed_token(tk)) {
		fprintf(ofile, "0x%04x", tki);
		return;
	}

	/* gap token! */
	fprintf(ofile, "!0x%04x!", tki);
}
Example #6
0
static size_t
hash1(const Token *p) {
	/*	hash1(p) returns the hash code of Min_Run_Size
		tokens starting at p; caller guarantees that there
		are at least Min_Run_Size tokens.
	*/
	uint64_t h_val;
	int n;

	h_val = 0;
	for (n = 0; n < N_SAMPLES; n++) {
		h_val = (h_val << 2) OPERATION Token2int(p[sample_pos[n]]);
		if (h_val & (1ULL<<63)) {
			h_val ^= (1ULL<<63|1);
		}
	}

	return (size_t) (h_val % hash_table_size);
}