示例#1
0
void test_big( const std::string& expected ) {
    typename H::encoder enc;
    for (char c : TEST6) { enc.put(c); }
    for (int i = 0; i < 16777215; i++) {
        enc.write( TEST6.c_str(), TEST6.size() );
    }
    H hash = enc.result();
    BOOST_CHECK_EQUAL( expected, (std::string) hash );

    enc.reset();
    enc.write( TEST1.c_str(), TEST1.size() );
    hash = enc.result();
    BOOST_CHECK( hash >= H::hash( TEST1 ) );
    test<H>( TEST1, (std::string) hash );

    hash = hash ^ hash;
    hash.data()[hash.data_size() - 1] = 1;
    for (int i = hash.data_size() * 8 - 1; i > 0; i--) {
        H other = hash << i;
        BOOST_CHECK( other != hash );
        BOOST_CHECK( other > hash );
        BOOST_CHECK( hash < other );
    }

    H hash2( expected );
    fc::variant v;
    to_variant( hash2, v );
    from_variant( v, hash );
    BOOST_CHECK( hash == hash2 );

    H hash3( expected.substr(15) + "000000000000000" );
    BOOST_CHECK( hash3 == hash2 << 60 );
}
示例#2
0
int hash5(const char* word)
{
	unsigned int HashVal = hash3(word);
	int WordLen = strlen(word);
	for (int i = 0; i < WordLen; i++)
	{

		if (HashVal < MAXINT)
			HashVal *= FNV_PRIME;
		HashVal ^= (unsigned int)word[i];
	}
	return HashVal;
}
示例#3
0
文件: hash.c 项目: ArchFeh/hustoj
static void
make_forward_references_hash3(void) {
	size_t i;

	/* Do a third hash to check up on the previous two */

	/* This time we use a genuine compare */
	for (i = 0; i+Min_Run_Size < Text_Length(); i++) {
		size_t j = i;

		while (	/* there is still a forward reference */
			(j = forward_reference[j])
		&&	/* its hash code does not match */
			!hash3(&Token_Array[i], &Token_Array[j])
		) {
			/* continue searching */
		}
		/* short-circuit forward reference to it, or to zero */
		forward_reference[i] = j;
	}

	db_forward_references("third hashing");
}
示例#4
0
int main(int argc, char *argv[])
{
	struct timert t1;
	struct timert *t1Ptr = &t1;
	if (timer_start(t1Ptr)) {
		exit(-1);
	}

	process_optlong(argc, argv, msg, &myargument, longopts);
	print_arguments(myargument);
	// DEBUG_PRINT("%s", "****DEBUG_LOG_ENABLE****");

	if (!myargument.filename) {
		myargument.filename = "/home/love/dic.txt";
	}
	int fd;
	fd = open(myargument.filename, O_RDONLY);
	if (!fd) {
		printf("can't open file(%s)\n", myargument.filename);
		exit(-1);
	}

	DEBUG_PRINT("file(%s)opened(%d)", myargument.filename, fd);
	FILE *fd_File;
	fd_File = fdopen(fd, "r");
	if (!fd_File) {
		printf("fdopen(fd) error....\n");
		exit(-1);
	}

	int month_no;
	char buf[8192] = {};
	char *charPtrTmp;
	charPtrTmp = &buf;
	for ( ; ; ) {
		charPtrTmp = fgets(buf, sizeof(buf), fd_File);
		if (!charPtrTmp) {
			break;
		}

		if (myargument.mode) {
			printf("DEBUG: %s\n", buf);
		}
		
		if (strlen(buf)  > 1) {
			if (myargument.mode)
				DEBUG_PRINT("strlen of buf:%d", strlen(buf));
			buf[strlen(buf) - 1] = '\0';
			month_no = lookup_word(buf, month_names);
			if (month_no >= 0) {
				printf("found at %2d(%s)\n", month_no, month_names[month_no]);
			} else if (myargument.mode) {
				printf("%d\t%s\n", month_no, buf);
			}
		}
	}
	/* int c; */
	/* while((c = getchar()) != EOF) { */
	/* 	putchar(c); */
	/* } */

	mylib_func1();
	if (timer_stop (t1Ptr)) {
		exit(-1);
	}

	int month_names_size = 12;
	// DEBUG_PRINT("%s %d", "sizeof month_names", month_names_size);
	int i;
	for (i = 0; i < month_names_size; i++) {
		unsigned int resu;
		resu = hash3(month_names[i], sizeof(month_names[i]));
		printf("hash(%d) = %x\n", i, resu);
	}
	int a = 23;
	int b = 15;
	printf("a=%d\t b=%d\n", a, b);
	a = a ^ b;
	b = b ^ a;
	a = a ^ b;
	printf("after swap with ^, a=%d\t b=%d\n", a, b);
	printf("\nEND: this program taked %ld useconds!\n",
		   timer_delta_useconds(t1Ptr));
	exit(0);
}
示例#5
0
文件: repet.cpp 项目: bqqbarbhg/repet
void test_compress(const char *data, size_t length)
{
	size_t skip_table_size = length;
	if (skip_table_size > 0x10000) {
		skip_table_size = 0x10000;
	}

	uint16_t *skip_table = (uint16_t*)malloc(skip_table_size * sizeof(uint16_t));
	memset(skip_table, 0xFF, skip_table_size * sizeof(uint16_t));
	uint16_t *hash_table = (uint16_t*)malloc(HASH_TABLE_SIZE * sizeof(uint16_t));
	memset(hash_table, 0xFF, HASH_TABLE_SIZE * sizeof(uint16_t));

	int scans = 0;
	int skips = 0;
	int hashes = 0;

	unsigned last_match_pos = 0;
	unsigned last_match_len = 0;
	unsigned last_end_hash = 0;

	const char *base = data;
	size_t base_length = length;

	unsigned int min_forget_dist = 16000;
	unsigned forget_length = 0x10000 - min_forget_dist;
	
	for (;;) {

		unsigned block_length = forget_length;
		if (base_length < block_length) {
			block_length = (unsigned)base_length;
		}
		for (unsigned pos = 0; pos < block_length; pos++) {
			if (base_length - pos < 3) {
				// No full trigraph left, not worth finding a match.
				continue;
			}

			const char *cur_str = &base[pos];
			unsigned max_match_len = (unsigned)(base_length - pos);

			// Get the linked list of the previous occourences of the trigraph at the
			// current position.
			unsigned hash = hash3(&base[pos]);
			uint16_t prev = hash_table[hash]; hashes++;
			hash_table[hash] = (uint16_t)pos;

#if DO_LOG
			putchar('\n');
			print_highlight(base, base_length, pos, 1, 30, printf("|> "));
#endif
			int t = 0;

			if (prev == 0xFFFF) {
#if DO_LOG
				printf("No previous match for [%.3s]\n", &base[pos]);
#endif

				// The trigraph hasn't been seen yet, so there can be no match.
				// Initialize linked list with end node and continue scanning.
				skip_table[pos] = 0xFFFF;
				last_match_len = 0;
				continue;
			}

			// Link to the chain.
			skip_table[pos] = (uint16_t)(pos - prev);

			unsigned end_dist = 0;
			unsigned end_hash = 0;
			unsigned end_pos = 0;
			int dbg_hash_pos;
			unsigned target_len = 3;
			unsigned best_pos = 0;
			unsigned best_len = 0;

			uint16_t check_pos = prev;

			if (last_match_len > 3 && base_length - pos > 3) {

				int len = last_match_len - 1;

				end_hash = last_end_hash;
				if (end_hash == HASH_TABLE_SIZE) {

					last_match_pos++;
					last_match_len--;

#if DO_LOG
					printf("Last match proven optimal\n");
#endif

					continue;
				}
				end_pos = hash_table[end_hash];
				if (end_pos == 0xFFFF) {

					last_match_pos++;
					last_match_len--;

#if DO_LOG
					printf("Last match proven optimal\n");
#endif

					continue;
				}

				end_dist = len - 2;
				target_len = len + 1;

				best_pos = last_match_pos + 1;
				best_len = len;

#if DO_LOG
				printf("Searching with [%.3s]..%d..[%.3s]\n",
					&base[pos], end_dist, &base[pos + last_match_len - 3]);
#endif

				// Synchronize the begin and end trigraphs so that they are separated
				// by the searched for amount of bytes.
				for (;;) {
					int diff = end_pos - check_pos - end_dist;
					if (diff == 0)
						break;
					else if (diff > 0) {
						uint16_t skip = skip_table[end_pos]; skips++;
						if (skip > end_pos) break;
						end_pos -= skip;
					} else {
						uint16_t skip = skip_table[check_pos]; skips++;
						if (skip > check_pos) break;
						check_pos -= skip;
					}
				}
			} else {
#if DO_LOG
				printf("Searching with [%.3s]\n", &base[pos]);
#endif
			}

			for (;;)
			{
				// Calculate the maximum bound for the match.
				unsigned check_len = (unsigned)(pos - check_pos);
				unsigned max_check_len = check_len < max_match_len
					? check_len : max_match_len;

				scans++;

				// Match as far as possible.
				const char *check_str = &base[check_pos];
				unsigned len;
				for (len = 0; len < max_check_len; len++) {
					if (cur_str[len] != check_str[len])
						break;
				}

				if (len >= target_len) {
#if DO_LOG
					print_highlight(base, base_length, pos, len, 10, printf("%d: ", t));
					print_highlight(base, base_length, check_pos, len, 10, printf("%d: ", t));
#endif
					t++;

					best_len = len;
					best_pos = check_pos;

					if (pos + len >= base_length - 1) {
						// We have found the longest possible match, nothing to search for.
						end_hash = HASH_TABLE_SIZE;
						break;
					}

					// We found a new longest match, update end trigraph accordingly.
					end_hash = hash3(&base[pos + len - 2]);
					end_pos = hash_table[end_hash]; hashes++;
					end_dist = len - 2;
					target_len = len + 1;

					if (end_pos == 0xFFFF) {
						// The buffer does not contain any occourences of the end trigraph,
						// so we have found the longest match there is.
						break;
					}

					dbg_hash_pos = (int)(pos + len - 2);

#if DO_LOG
					printf("Searching with [%.3s]..%d..[%.3s]\n",
						&base[pos], end_dist, &base[dbg_hash_pos]);
#endif

				} else {
					// In this case we hit a false match candidate. Carry on.
				}

				{
					// Move to the next match candidate.
					uint16_t skip = skip_table[check_pos]; skips++;
					if (skip > check_pos) break;
					check_pos -= skip;
				}

				if (best_len > 0) {
					// Synchronize the begin and end trigraphs so that they are separated
					// by the searched for amount of bytes.
					for (;;) {
						int diff = end_pos - check_pos - end_dist;
						if (diff == 0)
							break;
						else if (diff > 0) {
							uint16_t skip = skip_table[end_pos]; skips++;
							if (skip > end_pos) break;
							end_pos -= skip;
						} else {
							uint16_t skip = skip_table[check_pos]; skips++;
							if (skip > check_pos) break;
							check_pos -= skip;
						}
					}

					// Could not find a potential match anymore.
					if ((unsigned)(end_pos - check_pos) != end_dist)
						break;
				}
			}

			last_match_pos = best_pos;
			last_match_len = best_len;
			last_end_hash = end_hash;
		}

		base_length -= block_length;
		if (base_length == 0)
			break;

		// Need to rebase
		for (unsigned i = 0; i < HASH_TABLE_SIZE; i++) {
			if (hash_table[i] < block_length) {
				// Fell off the range of the compressor, "forget" the pointer.
				hash_table[i] = 0xFFFF;
			} else {
				// Still valid, adjust base.
				hash_table[i] -= (uint16_t)block_length;
			}
		}
		base += block_length;
	}

	free(skip_table);
	free(hash_table);

	printf("Hashes: %d\n", hashes);
	printf("Skips: %d\n", skips);
	printf("Scans: %d\n", scans);
}