void build_table(table_t *test_table, int num_keys) { int probes = -1; printf(" Build table with"); if (TableType == RAND) { printf(" %d random keys\n", num_keys); probes = build_random(test_table, TableSize, num_keys); } else if (TableType == SEQ) { printf(" %d sequential keys\n", num_keys); probes = build_seq(test_table, TableSize, num_keys); } else if (TableType == FOLD) { printf(" %d folded keys\n", num_keys); probes = build_fold(test_table, TableSize, num_keys); } else if (TableType == WORST) { printf(" %d worst keys\n", num_keys); probes = build_worst(test_table, TableSize, num_keys); } else { printf("invalid option for table type\n"); exit(7); } printf(" The average number of probes for a successful search = %g\n", (double) probes/num_keys); if (Verbose) table_debug_print(test_table); int size = table_entries(test_table); assert(size == num_keys); }
static value make_gsymbol(const char *name, fncode fn) { struct table *gsymbols = (fn ? fnglobals(fn) : globals)->gsymbols; struct symbol *gsym; if (!table_lookup(gsymbols, name, &gsym)) { struct string *s; GCPRO1(gsymbols); s = alloc_string(name); SET_READONLY(s); GCPOP(1); gsym = table_add_fast(gsymbols, s, makeint(table_entries(gsymbols))); } return gsym; }
/* driver to test sequence of inserts and deletes. */ void equilibriumDriver(void) { int i, code; int key_range, num_keys; int size; int ran_index; int suc_search, suc_trials, unsuc_search, unsuc_trials; int keys_added, keys_removed; int *ip; table_t *test_table; hashkey_t key; data_t dp; clock_t start, end; /* print parameters for this test run */ printf("\n----- Equilibrium test driver -----\n"); printf(" Trials: %d\n", Trials); test_table = table_construct(TableSize, ProbeDec); num_keys = (int) (TableSize * LoadFactor); /* build a table as starting point */ build_table(test_table, num_keys); size = num_keys; key_range = MAXID - MINID + 1; /* in equilibrium make inserts and removes with equal probability */ suc_search = suc_trials = unsuc_search = unsuc_trials = 0; keys_added = keys_removed = 0; start = clock(); for (i = 0; i < Trials; i++) { if (drand48() < 0.5 && table_full(test_table) == FALSE) { // insert only if table not full // for separate chaining table is never full key = (hashkey_t) (drand48() * key_range) + MINID; ip = (int *) malloc(sizeof(int)); *ip = key; /* insert returns 0 if key not found, 1 if older key found */ if (Verbose) printf("Trial %d, Insert Key %u", i, key); code = table_insert(test_table, key, ip); if (code == 0) { /* key was not in table so added */ unsuc_search += table_stats(test_table); unsuc_trials++; keys_added++; if (Verbose) printf(" added\n"); } else if (code == 1) { suc_search += table_stats(test_table); suc_trials++; if (Verbose) printf(" replaced (rare!)\n"); } else { printf("!!!Trial %d failed to insert key (%u) with code (%d)\n", i, key, code); exit(10); } } else if (table_entries(test_table) > TableSize/4) { // delete only if table is at least 25% full // why 25%? Would 10% be better? Lower than 10% will // be computationally expensive do { ran_index = (int) (drand48() * TableSize); key = table_peek(test_table, ran_index,0); } while (key == 0); if (Verbose) printf("Trial %d, Delete Key %u", i, key); if (key < MINID || MAXID < key) { printf("\n\n table peek failed: invalid key (%u) during trial (%d)\n", key, i); exit(12); } dp = table_delete(test_table, key); if (dp != NULL) { if (Verbose) printf(" removed\n"); suc_search += table_stats(test_table); suc_trials++; keys_removed++; assert(*(int *)dp == key); free(dp); } else { printf("!!! failed to find key (%u) in table, trial (%d)!\n", key, i); printf("this is a catastrophic error!!!\n"); exit(11); } } } end = clock(); if (Verbose) { printf("Table after equilibrium trials\n"); table_debug_print(test_table); } size += keys_added - keys_removed; printf(" Keys added (%d), removed (%d) new size should be (%d) and is (%d)\n", keys_added, keys_removed, size, table_entries(test_table)); assert(size == table_entries(test_table)); printf(" After exercise, time=%g \n", 1000*((double)(end-start))/CLOCKS_PER_SEC); printf(" successful searches during exercise=%g, trials=%d\n", (double) suc_search/suc_trials, suc_trials); printf(" unsuccessful searches during exercise=%g, trials=%d\n", (double) unsuc_search/unsuc_trials, unsuc_trials); /* test access times for new table */ /* separate chaining handled differently * should improve design of table_peek function so it * returns 0 if count is invalid when using open addressing. * In current design it is ignored */ suc_search = suc_trials = unsuc_search = unsuc_trials = 0; start = clock(); /* check each position in table for key */ if (ProbeDec == CHAIN) { for (i = 0; i < TableSize; i++) { int count = 0; key = table_peek(test_table, i, count); while (key != 0) { assert(MINID <= key && key <= MAXID); dp = table_retrieve(test_table, key); if (dp == NULL) { printf("Failed key (%u) should be at (%d)\n", key, i); exit(15); } else { suc_search += table_stats(test_table); suc_trials++; assert(*(int *)dp == key); } key = table_peek(test_table, i, ++count); } } } else { for (i = 0; i < TableSize; i++) { key = table_peek(test_table, i, 0); if (key != 0) { assert(MINID <= key && key <= MAXID); dp = table_retrieve(test_table, key); if (dp == NULL) { printf("Failed to find key (%u) but it is in location (%d)\n", key, i); exit(16); } else { suc_search += table_stats(test_table); suc_trials++; assert(*(int *)dp == key); } } } } for (i = 0; i < Trials; i++) { /* random key with uniform distribution */ key = (hashkey_t) (drand48() * key_range) + MINID; dp = table_retrieve(test_table, key); if (dp == NULL) { unsuc_search += table_stats(test_table); unsuc_trials++; } else { // this should be very rare assert(*(int *)dp == key); } } end = clock(); size = table_entries(test_table); printf(" After retrieve experiment, time=%g\n", 1000*((double)(end-start))/CLOCKS_PER_SEC); printf(" New load factor = %g\n", (double) size/TableSize); printf(" Percent empty locations marked deleted = %g\n", (double) 100.0 * table_deletekeys(test_table) / (TableSize - table_entries(test_table))); printf(" Measured avg probes for successful search=%g, trials=%d\n", (double) suc_search/suc_trials, suc_trials); if (ProbeDec == CHAIN && LoadFactor > 0.5 && LoadFactor < 1.5) { printf(" ** This measure is biased. See comments\n\n"); /* The design of the equilibirum driver depends on the uniform * selection of keys to insert and remove. For linear, double, and * quadratic probing selecting a key to remove is done with a uniform * distribution among all possible keys. However, for separate * chaining, the algorithm simply picks a table location with a uniform * distribution, but this is not the same as picking a key with a * uniform distribution. So, there is a bias that a key in a table * location with fewer other keys is more likely to be selected. This * causes the average number of probes for a successful search to * increase as the equilibrium driver runs for a long time. To remove * the bias, a solution is needed to pick a key with a uniform * distribution when chaining is used. It is not clear how to select a * key with low computational cost. */ } printf(" Measured avg probes for unsuccessful search=%g, trials=%d\n", (double) unsuc_search/unsuc_trials, unsuc_trials); printf(" Do deletions increase avg number of probes?\n"); performanceFormulas((double) size/TableSize); /* rehash and retest table */ printf(" Rehash table\n"); test_table = table_rehash(test_table, TableSize); /* number entries in table should not change */ assert(size == table_entries(test_table)); /* rehashing must clear all entries marked for deletion */ assert(0 == table_deletekeys(test_table)); /* test access times for rehashed table */ suc_search = suc_trials = unsuc_search = unsuc_trials = 0; start = clock(); /* check each position in table for key */ if (ProbeDec == CHAIN) { for (i = 0; i < TableSize; i++) { int count = 0; key = table_peek(test_table, i, count); while (key != 0) { assert(MINID <= key && key <= MAXID); dp = table_retrieve(test_table, key); if (dp == NULL) { printf("Failed key (%u) should be at (%d)\n", key, i); exit(25); } else { suc_search += table_stats(test_table); suc_trials++; assert(*(int *)dp == key); } key = table_peek(test_table, i, ++count); } } } else { for (i = 0; i < TableSize; i++) { key = table_peek(test_table, i, 0); if (key != 0) { assert(MINID <= key && key <= MAXID); dp = table_retrieve(test_table, key); if (dp == NULL) { printf("Failed to find key (%u) after rehash but it is in location (%d)\n", key, i); exit(26); } else { suc_search += table_stats(test_table); suc_trials++; assert(*(int *)dp == key); } } } } for (i = 0; i < Trials; i++) { /* random key with uniform distribution */ key = (hashkey_t) (drand48() * key_range) + MINID; dp = table_retrieve(test_table, key); if (dp == NULL) { unsuc_search += table_stats(test_table); unsuc_trials++; } else { // this should be very rare assert(*(int *)dp == key); } } end = clock(); size = table_entries(test_table); printf(" After rehash, time=%g\n", 1000*((double)(end-start))/CLOCKS_PER_SEC); printf(" Measured avg probes for successful search=%g, trials=%d\n", (double) suc_search/suc_trials, suc_trials); printf(" Measured avg probes for unsuccessful search=%g, trials=%d\n", (double) unsuc_search/unsuc_trials, unsuc_trials); /* remove and free all items from table */ table_destruct(test_table); printf("----- End of equilibrium test -----\n\n"); }
/* driver to build and test tables. Note this driver * does not delete keys from the table. */ void RetrieveDriver() { int i; int key_range, num_keys; int suc_search, suc_trials, unsuc_search, unsuc_trials; table_t *test_table; hashkey_t key; data_t dp; /* print parameters for this test run */ printf("\n----- Retrieve driver -----\n"); printf(" Trials: %d\n", Trials); num_keys = (int) (TableSize * LoadFactor); test_table = table_construct(TableSize, ProbeDec); build_table(test_table, num_keys); key_range = MAXID - MINID + 1; if (Trials > 0) { /* access table to measure probes for an unsuccessful search */ suc_search = suc_trials = unsuc_search = unsuc_trials = 0; for (i = 0; i < Trials; i++) { /* random key with uniform distribution */ key = (hashkey_t) (drand48() * key_range) + MINID; if (Verbose) printf("%d: looking for %d\n", i, key); dp = table_retrieve(test_table, key); if (dp == NULL) { unsuc_search += table_stats(test_table); unsuc_trials++; if (Verbose) printf("\t not found with %d probes\n", table_stats(test_table)); } else { // this should be very rare suc_search += table_stats(test_table); suc_trials++; if (Verbose) printf("\t\t FOUND with %d probes (this is rare!)\n", table_stats(test_table)); assert(*(int *)dp == key); } } assert(num_keys == table_entries(test_table)); if (suc_trials > 0) printf(" Avg probes for successful search = %g measured with %d trials\n", (double) suc_search/suc_trials, suc_trials); if (unsuc_trials > 0) printf(" Avg probes for unsuccessful search = %g measured with %d trials\n", (double) unsuc_search/unsuc_trials, unsuc_trials); } /* print expected values from analysis with compare to experimental * measurements */ performanceFormulas(LoadFactor); /* remove and free all items from table */ table_destruct(test_table); printf("----- End of access driver -----\n\n"); }
/* driver to test small tables. This is a series of * simple tests and is not exhaustive. * * input: test_M is the table size for this test run */ void RehashDriver(int test_M) { int i, *ip, code; table_t *H; printf("\n----- Rehash driver -----\n"); if (ProbeDec == CHAIN) { printf("This design of the rehash driver does not work with separate chaining\n"); return; } hashkey_t startkey = MINID + (test_M - MINID%test_M); assert(startkey%test_M == 0); assert(test_M > 5); // tests designed for size at least 6 H = table_construct(test_M, ProbeDec); // fill table sequentially for (i = 0; i < test_M-1; i++) { ip = (int *) malloc(sizeof(int)); *ip = 10*i; assert(table_full(H) == 0); code = table_insert(H, startkey+i, ip); ip = NULL; assert(code == 0); assert(table_entries(H) == i+1); assert(table_stats(H) == 1); assert(table_peek(H,i,0) == startkey+i); } if (Verbose) { printf("\nfull table, last entry empty\n"); table_debug_print(H); } // tests on empty position assert(table_peek(H,i,0) == 0); assert(NULL == table_retrieve(H, startkey+i)); assert(table_stats(H) == 1); assert(table_full(H) == 1); assert(-1 == table_insert(H, MAXID, NULL)); // retrieve and replace each entry for (i = 0; i < test_M-1; i++) { ip = table_retrieve(H, startkey+i); assert(*(int *)ip == 10*i); ip = NULL; assert(table_stats(H) == 1); ip = table_retrieve(H, startkey+i+test_M); assert(ip == NULL); assert(2 <= table_stats(H) && table_stats(H) <= test_M); if (ProbeDec == LINEAR) assert(table_stats(H) == i+2); ip = (int *) malloc(sizeof(int)); *ip = 99*i; assert(1 == table_insert(H, startkey+i, ip)); ip = NULL; ip = table_retrieve(H, startkey+i); assert(*(int *)ip == 99*i); ip = NULL; } assert(table_entries(H) == test_M-1); assert(table_full(H) == 1); // delete tests assert(table_deletekeys(H) == 0); ip = table_delete(H, startkey+1); assert(*(int *)ip == 99); free(ip); ip = NULL; if (Verbose) { printf("\nsecond entry deleted, last entry empty\n"); table_debug_print(H); } assert(table_entries(H) == test_M-2); assert(table_full(H) == 0); assert(table_peek(H,1,0) == 0); assert(table_deletekeys(H) == 1); ip = table_retrieve(H, startkey+1); // check key is not there assert(ip == NULL); assert(table_stats(H) >= 2); // attempt to delete keys not in table assert(NULL == table_delete(H, startkey+1)); assert(NULL == table_delete(H, startkey+test_M-1)); // insert key in its place ip = (int *) malloc(sizeof(int)); *ip = 123; assert(0 == table_insert(H, startkey+1+test_M, ip)); ip = NULL; assert(table_peek(H,1,0) == startkey+1+test_M); ip = table_retrieve(H, startkey+1+test_M); assert(*(int *)ip == 123); ip = NULL; assert(table_entries(H) == test_M-1); assert(table_full(H) == 1); assert(table_deletekeys(H) == 0); for (i = 2; i < test_M-1; i++) { // clear out all but two keys ip = table_delete(H, startkey+i); assert(*(int *)ip == 99*i); free(ip); ip = NULL; } assert(table_entries(H) == 2); ip = (int *) malloc(sizeof(int)); // fill last empty *ip = 456; assert(0 == table_insert(H, startkey+test_M-1, ip)); ip = NULL; assert(table_entries(H) == 3); // unsuccessful search when no empty keys assert(NULL == table_retrieve(H, startkey+test_M)); // two keys the collide in position 0 ip = (int *) malloc(sizeof(int)); *ip = 77; assert(0 == table_insert(H, startkey+test_M, ip)); ip = (int *) malloc(sizeof(int)); *ip = 88; assert(0 == table_insert(H, startkey+10*test_M, ip)); ip = NULL; assert(table_entries(H) == 5); ip = table_delete(H, startkey); // delete position 0 assert(*(int *)ip == 0); free(ip); ip = NULL; assert(table_entries(H) == 4); ip = (int *) malloc(sizeof(int)); // replace *ip = 87; assert(1 == table_insert(H, startkey+10*test_M, ip)); ip = NULL; assert(table_entries(H) == 4); ip = (int *) malloc(sizeof(int)); // put back position 0 *ip = 76; assert(0 == table_insert(H, startkey+20*test_M, ip)); ip = NULL; assert(table_entries(H) == 5); assert(table_peek(H,0,0) == startkey+20*test_M); assert(table_deletekeys(H) == test_M-5); // verify 5 items in table ip = table_retrieve(H, startkey+1+test_M); assert(*(int *)ip == 123); ip = table_retrieve(H, startkey+test_M); assert(*(int *)ip == 77); ip = table_retrieve(H, startkey+10*test_M); assert(*(int *)ip == 87); ip = table_retrieve(H, startkey+20*test_M); assert(*(int *)ip == 76); ip = table_retrieve(H, startkey+test_M-1); assert(*(int *)ip == 456); ip = NULL; // rehash H = table_rehash(H, test_M); assert(table_entries(H) == 5); assert(table_deletekeys(H) == 0); if (Verbose) { printf("\ntable after rehash with 5 items\n"); table_debug_print(H); } // verify 5 items in table ip = table_retrieve(H, startkey+1+test_M); assert(*(int *)ip == 123); ip = table_retrieve(H, startkey+test_M); assert(*(int *)ip == 77); ip = table_retrieve(H, startkey+10*test_M); assert(*(int *)ip == 87); ip = table_retrieve(H, startkey+20*test_M); assert(*(int *)ip == 76); ip = table_retrieve(H, startkey+test_M-1); assert(*(int *)ip == 456); ip = NULL; // rehash and increase table size // If linear double the size // If double, need new prime int new_M = 2*test_M; if (ProbeDec == DOUBLE) new_M = find_first_prime(new_M); H = table_rehash(H, new_M); assert(table_entries(H) == 5); assert(table_deletekeys(H) == 0); if (Verbose) { printf("\nafter increase table to %d with 5 items\n", new_M); table_debug_print(H); } // verify 5 keys and information not lost during rehash ip = table_retrieve(H, startkey+1+test_M); assert(*(int *)ip == 123); ip = table_retrieve(H, startkey+test_M); assert(*(int *)ip == 77); ip = table_retrieve(H, startkey+10*test_M); assert(*(int *)ip == 87); ip = table_retrieve(H, startkey+20*test_M); assert(*(int *)ip == 76); ip = table_retrieve(H, startkey+test_M-1); assert(*(int *)ip == 456); ip = NULL; // fill the new larger table assert(table_full(H) == 0); int new_items = new_M - 1 - 5; int base_addr = 2*startkey + 20*test_M*test_M; if (base_addr+new_items*test_M > MAXID) { printf("re-run -b driver with smaller table size\n"); exit(1); } for (i = 0; i < new_items; i++) { ip = (int *) malloc(sizeof(int)); *ip = 10*i; code = table_insert(H, base_addr+i*test_M, ip); ip = NULL; assert(code == 0); assert(table_entries(H) == i+1+5); } assert(table_full(H) == 1); assert(table_entries(H) == new_M-1); if (Verbose) { printf("\nafter larger table filled\n"); table_debug_print(H); } // verify new items are found for (i = 0; i < new_items; i++) { ip = table_retrieve(H, base_addr+i*test_M); assert(*(int *)ip == 10*i); ip = NULL; } // clean up table table_destruct(H); printf("----- Passed rehash driver -----\n\n"); }