void test_big( const std::string& expected ) { typename H::encoder enc; for (char c : TEST6) { enc.put(c); } for (int i = 0; i < 16777215; i++) { enc.write( TEST6.c_str(), TEST6.size() ); } H hash = enc.result(); BOOST_CHECK_EQUAL( expected, (std::string) hash ); enc.reset(); enc.write( TEST1.c_str(), TEST1.size() ); hash = enc.result(); BOOST_CHECK( hash >= H::hash( TEST1 ) ); test<H>( TEST1, (std::string) hash ); hash = hash ^ hash; hash.data()[hash.data_size() - 1] = 1; for (int i = hash.data_size() * 8 - 1; i > 0; i--) { H other = hash << i; BOOST_CHECK( other != hash ); BOOST_CHECK( other > hash ); BOOST_CHECK( hash < other ); } H hash2( expected ); fc::variant v; to_variant( hash2, v ); from_variant( v, hash ); BOOST_CHECK( hash == hash2 ); H hash3( expected.substr(15) + "000000000000000" ); BOOST_CHECK( hash3 == hash2 << 60 ); }
int hash5(const char* word) { unsigned int HashVal = hash3(word); int WordLen = strlen(word); for (int i = 0; i < WordLen; i++) { if (HashVal < MAXINT) HashVal *= FNV_PRIME; HashVal ^= (unsigned int)word[i]; } return HashVal; }
static void make_forward_references_hash3(void) { size_t i; /* Do a third hash to check up on the previous two */ /* This time we use a genuine compare */ for (i = 0; i+Min_Run_Size < Text_Length(); i++) { size_t j = i; while ( /* there is still a forward reference */ (j = forward_reference[j]) && /* its hash code does not match */ !hash3(&Token_Array[i], &Token_Array[j]) ) { /* continue searching */ } /* short-circuit forward reference to it, or to zero */ forward_reference[i] = j; } db_forward_references("third hashing"); }
int main(int argc, char *argv[]) { struct timert t1; struct timert *t1Ptr = &t1; if (timer_start(t1Ptr)) { exit(-1); } process_optlong(argc, argv, msg, &myargument, longopts); print_arguments(myargument); // DEBUG_PRINT("%s", "****DEBUG_LOG_ENABLE****"); if (!myargument.filename) { myargument.filename = "/home/love/dic.txt"; } int fd; fd = open(myargument.filename, O_RDONLY); if (!fd) { printf("can't open file(%s)\n", myargument.filename); exit(-1); } DEBUG_PRINT("file(%s)opened(%d)", myargument.filename, fd); FILE *fd_File; fd_File = fdopen(fd, "r"); if (!fd_File) { printf("fdopen(fd) error....\n"); exit(-1); } int month_no; char buf[8192] = {}; char *charPtrTmp; charPtrTmp = &buf; for ( ; ; ) { charPtrTmp = fgets(buf, sizeof(buf), fd_File); if (!charPtrTmp) { break; } if (myargument.mode) { printf("DEBUG: %s\n", buf); } if (strlen(buf) > 1) { if (myargument.mode) DEBUG_PRINT("strlen of buf:%d", strlen(buf)); buf[strlen(buf) - 1] = '\0'; month_no = lookup_word(buf, month_names); if (month_no >= 0) { printf("found at %2d(%s)\n", month_no, month_names[month_no]); } else if (myargument.mode) { printf("%d\t%s\n", month_no, buf); } } } /* int c; */ /* while((c = getchar()) != EOF) { */ /* putchar(c); */ /* } */ mylib_func1(); if (timer_stop (t1Ptr)) { exit(-1); } int month_names_size = 12; // DEBUG_PRINT("%s %d", "sizeof month_names", month_names_size); int i; for (i = 0; i < month_names_size; i++) { unsigned int resu; resu = hash3(month_names[i], sizeof(month_names[i])); printf("hash(%d) = %x\n", i, resu); } int a = 23; int b = 15; printf("a=%d\t b=%d\n", a, b); a = a ^ b; b = b ^ a; a = a ^ b; printf("after swap with ^, a=%d\t b=%d\n", a, b); printf("\nEND: this program taked %ld useconds!\n", timer_delta_useconds(t1Ptr)); exit(0); }
void test_compress(const char *data, size_t length) { size_t skip_table_size = length; if (skip_table_size > 0x10000) { skip_table_size = 0x10000; } uint16_t *skip_table = (uint16_t*)malloc(skip_table_size * sizeof(uint16_t)); memset(skip_table, 0xFF, skip_table_size * sizeof(uint16_t)); uint16_t *hash_table = (uint16_t*)malloc(HASH_TABLE_SIZE * sizeof(uint16_t)); memset(hash_table, 0xFF, HASH_TABLE_SIZE * sizeof(uint16_t)); int scans = 0; int skips = 0; int hashes = 0; unsigned last_match_pos = 0; unsigned last_match_len = 0; unsigned last_end_hash = 0; const char *base = data; size_t base_length = length; unsigned int min_forget_dist = 16000; unsigned forget_length = 0x10000 - min_forget_dist; for (;;) { unsigned block_length = forget_length; if (base_length < block_length) { block_length = (unsigned)base_length; } for (unsigned pos = 0; pos < block_length; pos++) { if (base_length - pos < 3) { // No full trigraph left, not worth finding a match. continue; } const char *cur_str = &base[pos]; unsigned max_match_len = (unsigned)(base_length - pos); // Get the linked list of the previous occourences of the trigraph at the // current position. unsigned hash = hash3(&base[pos]); uint16_t prev = hash_table[hash]; hashes++; hash_table[hash] = (uint16_t)pos; #if DO_LOG putchar('\n'); print_highlight(base, base_length, pos, 1, 30, printf("|> ")); #endif int t = 0; if (prev == 0xFFFF) { #if DO_LOG printf("No previous match for [%.3s]\n", &base[pos]); #endif // The trigraph hasn't been seen yet, so there can be no match. // Initialize linked list with end node and continue scanning. skip_table[pos] = 0xFFFF; last_match_len = 0; continue; } // Link to the chain. skip_table[pos] = (uint16_t)(pos - prev); unsigned end_dist = 0; unsigned end_hash = 0; unsigned end_pos = 0; int dbg_hash_pos; unsigned target_len = 3; unsigned best_pos = 0; unsigned best_len = 0; uint16_t check_pos = prev; if (last_match_len > 3 && base_length - pos > 3) { int len = last_match_len - 1; end_hash = last_end_hash; if (end_hash == HASH_TABLE_SIZE) { last_match_pos++; last_match_len--; #if DO_LOG printf("Last match proven optimal\n"); #endif continue; } end_pos = hash_table[end_hash]; if (end_pos == 0xFFFF) { last_match_pos++; last_match_len--; #if DO_LOG printf("Last match proven optimal\n"); #endif continue; } end_dist = len - 2; target_len = len + 1; best_pos = last_match_pos + 1; best_len = len; #if DO_LOG printf("Searching with [%.3s]..%d..[%.3s]\n", &base[pos], end_dist, &base[pos + last_match_len - 3]); #endif // Synchronize the begin and end trigraphs so that they are separated // by the searched for amount of bytes. for (;;) { int diff = end_pos - check_pos - end_dist; if (diff == 0) break; else if (diff > 0) { uint16_t skip = skip_table[end_pos]; skips++; if (skip > end_pos) break; end_pos -= skip; } else { uint16_t skip = skip_table[check_pos]; skips++; if (skip > check_pos) break; check_pos -= skip; } } } else { #if DO_LOG printf("Searching with [%.3s]\n", &base[pos]); #endif } for (;;) { // Calculate the maximum bound for the match. unsigned check_len = (unsigned)(pos - check_pos); unsigned max_check_len = check_len < max_match_len ? check_len : max_match_len; scans++; // Match as far as possible. const char *check_str = &base[check_pos]; unsigned len; for (len = 0; len < max_check_len; len++) { if (cur_str[len] != check_str[len]) break; } if (len >= target_len) { #if DO_LOG print_highlight(base, base_length, pos, len, 10, printf("%d: ", t)); print_highlight(base, base_length, check_pos, len, 10, printf("%d: ", t)); #endif t++; best_len = len; best_pos = check_pos; if (pos + len >= base_length - 1) { // We have found the longest possible match, nothing to search for. end_hash = HASH_TABLE_SIZE; break; } // We found a new longest match, update end trigraph accordingly. end_hash = hash3(&base[pos + len - 2]); end_pos = hash_table[end_hash]; hashes++; end_dist = len - 2; target_len = len + 1; if (end_pos == 0xFFFF) { // The buffer does not contain any occourences of the end trigraph, // so we have found the longest match there is. break; } dbg_hash_pos = (int)(pos + len - 2); #if DO_LOG printf("Searching with [%.3s]..%d..[%.3s]\n", &base[pos], end_dist, &base[dbg_hash_pos]); #endif } else { // In this case we hit a false match candidate. Carry on. } { // Move to the next match candidate. uint16_t skip = skip_table[check_pos]; skips++; if (skip > check_pos) break; check_pos -= skip; } if (best_len > 0) { // Synchronize the begin and end trigraphs so that they are separated // by the searched for amount of bytes. for (;;) { int diff = end_pos - check_pos - end_dist; if (diff == 0) break; else if (diff > 0) { uint16_t skip = skip_table[end_pos]; skips++; if (skip > end_pos) break; end_pos -= skip; } else { uint16_t skip = skip_table[check_pos]; skips++; if (skip > check_pos) break; check_pos -= skip; } } // Could not find a potential match anymore. if ((unsigned)(end_pos - check_pos) != end_dist) break; } } last_match_pos = best_pos; last_match_len = best_len; last_end_hash = end_hash; } base_length -= block_length; if (base_length == 0) break; // Need to rebase for (unsigned i = 0; i < HASH_TABLE_SIZE; i++) { if (hash_table[i] < block_length) { // Fell off the range of the compressor, "forget" the pointer. hash_table[i] = 0xFFFF; } else { // Still valid, adjust base. hash_table[i] -= (uint16_t)block_length; } } base += block_length; } free(skip_table); free(hash_table); printf("Hashes: %d\n", hashes); printf("Skips: %d\n", skips); printf("Scans: %d\n", scans); }