int main(int argc, char *argv[]) { hashtbl_t ht; char **keys; int i, j, retval; retval = ht_init(&ht, 5, NULL, free); ASSERT_INT_EQ(0, retval, "ht_init: clean initialization"); for (j = 0; j < 3; j++) { for (i = 0; i < 10; i++) { char *data; char key[12]; sprintf(key, "hello %d", i); data = strdup("world"); /* free()d by ht_destroy(). */ retval = ht_put(&ht, key, data); ASSERT_INT_EQ(0, retval, "ht_put: returns 0 on success."); ASSERT_TRUE(ht_get(&ht, key) == data, "ht_get: look up newly-put key"); } } ASSERT_LONG_EQ(10, ht.nelems, "ht_put: update element count correctly"); ASSERT_TRUE(ht_get(&ht, "hello 13") == NULL, "ht_get: look up non-existing key"); ht_call_for_each(&ht, call_for_each_test); ASSERT_INT_EQ(10, n_calls, "ht_call_for_each: verify repeated invokations"); keys = malloc(ht.nelems * sizeof(char *)); retval = ht_keys(&ht, keys); ASSERT_INT_EQ(10, retval, "ht_keys: returns the number of keys."); /* TODO(jhinds): verify the contents of keys[] */ ht_delete(&ht, "hello 0"); ASSERT_TRUE(ht_get(&ht, "hello 0") == NULL, "ht_delete: removes entry successfully"); ASSERT_LONG_EQ(9, ht.nelems, "ht_delete: updates element count correctly"); return unittest_has_error; }
/** @brief * * @param args contains the parsed cmd-line options & arguments. * @param argc number of cmd-line arguments. * @param argv list of cmd-line arguments * @param optind index of the first non-option cmd-line argument. * * @return exit status for main() to return. */ int pivot(struct cmdargs *args, int argc, char *argv[], int optind) { int i, j, tmplen; char default_delim[] = { 0xFE, 0x00 }; hashtbl_t key_hash; /* outer hash */ hashtbl_t *pivot_hash; /* pointer for inner hashes */ struct pivot_conf conf; /* variables for keeping track of the unique pivot strings */ hashtbl_t uniq_pivots; /* set of all pivot strings */ char **pivot_array; /* list of all pivot keys */ size_t n_key_strings; /* number of distinct key strings */ size_t n_pivot_keys; /* number of distinct pivot field values */ double *line_values; /* array of values */ char *keystr, *pivstr; /* hash key strings */ size_t keystr_sz, pivstr_sz; char **headers = NULL; /* array of header labels */ size_t n_headers = 0; /* number of fields */ char *fieldbuf = NULL; /* to hold fields extracted from input */ size_t fieldbuf_sz = 0; /* size of field buffer */ FILE *fin; /* input file */ dbfr_t *in_reader; size_t max_line_sz = 0; char empty_string[] = ""; if (!args->delim) { args->delim = getenv("DELIMITER"); if (!args->delim) args->delim = default_delim; } expand_chars(args->delim); delim = args->delim; /* get first input file pointer - either trailing arg or stdin */ if (optind == argc) fin = stdin; else fin = nextfile(argc, argv, &optind, "r"); if (!fin) { fprintf(stderr, "%s: no valid input files specified.\n", argv[0]); return EXIT_FILE_ERR; } in_reader = dbfr_init(fin); /* set locale with values from the environment so strcoll() will work correctly. */ setlocale(LC_ALL, ""); setlocale(LC_COLLATE, ""); memset(&conf, 0, sizeof(conf)); if (configure_pivot(&conf, args, in_reader->next_line, delim) != 0) { fprintf(stderr, "%s: error parsing input field arguments.\n", argv[0]); return EXIT_HELP; } if (conf.n_pivots == 0 || conf.n_values == 0) { fprintf(stderr, "%s: -p/-P and -v/-A must be specified.\n", argv[0]); return EXIT_HELP; } /* TODO: get rid of this arbirary field length limitation */ fieldbuf = xmalloc(MAX_FIELD_LEN); fieldbuf_sz = MAX_FIELD_LEN; /* extract headers from first line of input if necessary */ if (args->keep_header) { if (dbfr_getline(in_reader) < 1) { fprintf(stderr, "%s: unexpected end of file.\n", argv[0]); return EXIT_FILE_ERR; } chomp(in_reader->current_line); n_headers = fields_in_line(in_reader->current_line, delim); headers = xmalloc(sizeof(char *) * n_headers); for (i = 0; i < n_headers; i++) { get_line_field(fieldbuf, in_reader->current_line, fieldbuf_sz - 1, i, delim); headers[i] = xmalloc(sizeof(char *) * strlen(fieldbuf) + 1); strcpy(headers[i], fieldbuf); } #ifdef CRUSH_DEBUG for (i = 0; i < n_headers; i++) { fprintf(stderr, "%s%s", headers[i], i < n_headers - 1 ? args->delim : ""); } fprintf(stderr, "\n"); #endif } /* these two buffers will have enough capacity to hold the entire input line, unless there are no key fields specified, in which case keystr will just be set to an empty string. */ keystr = pivstr = NULL; keystr_sz = pivstr_sz = 0; ht_init(&key_hash, KEY_HASH_SZ, NULL, free_hash); ht_init(&uniq_pivots, PIVOT_HASH_SZ, NULL, NULL); n_key_strings = 0; n_pivot_keys = 0; /* no keys specified? set keystr to an empty string */ if (!conf.n_keys) { keystr = empty_string; } while (fin != NULL) { while (dbfr_getline(in_reader) > 0) { int value_in_hash = 1; int pivot_in_hash = 1; chomp(in_reader->current_line); if (conf.n_keys) { /* this could validly return NULL if both sizes are 0 the first time thru, when keystr is still NULL, but that shouldn't happen */ if (realloc_if_needed(&keystr, &keystr_sz, in_reader->current_line_sz) == NULL) { fprintf(stderr, "%s: out of memory.\n", getenv("_")); break; } } if (conf.n_pivots) { if (realloc_if_needed(&pivstr, &pivstr_sz, in_reader->current_line_sz) == NULL) { fprintf(stderr, "%s: out of memory.\n", getenv("_")); break; } } /* make key string from keys[] */ if (conf.n_keys) extract_fields_to_string(in_reader->current_line, keystr, keystr_sz, conf.keys, conf.n_keys, delim); /* make key string from pivots[] */ extract_fields_to_string(in_reader->current_line, pivstr, pivstr_sz, conf.pivots, conf.n_pivots, delim); #ifdef CRUSH_DEBUG if (n_keys) fprintf(stderr, "key string: %s\n", keystr); if (n_pivots) fprintf(stderr, "pivot string: %s\n", pivstr); #endif /* get hashtable value */ pivot_hash = (hashtbl_t *) ht_get(&key_hash, keystr); if (!pivot_hash) { pivot_hash = xmalloc(sizeof(hashtbl_t)); ht_init(pivot_hash, PIVOT_HASH_SZ, NULL, free); pivot_in_hash = 0; } line_values = ht_get(pivot_hash, pivstr); if (!line_values) { line_values = xmalloc(sizeof(double) * conf.n_values); memset(line_values, 0, sizeof(double) * conf.n_values); value_in_hash = 0; } /* add in values */ for (i = 0; i < conf.n_values; i++) { tmplen = get_line_field(fieldbuf, in_reader->current_line, fieldbuf_sz - 1, conf.values[i], delim); if (tmplen > 0) { line_values[i] += atof(fieldbuf); /* remember the greatest input floating-point precision for each * field */ tmplen = float_str_precision(fieldbuf); if (conf.value_precisions[i] < tmplen) { #ifdef CRUSH_DEBUG fprintf(stderr, "setting precision to %d for field %d\n", tmplen, i); #endif conf.value_precisions[i] = tmplen; } } } /* store hashtable value */ if (!value_in_hash) ht_put(pivot_hash, pivstr, line_values); if (!pivot_in_hash) { ht_put(&key_hash, keystr, pivot_hash); } /* store the pivot key string for later use */ ht_put(&uniq_pivots, pivstr, (void *) 1); } if (in_reader->current_line_sz > max_line_sz) max_line_sz = in_reader->current_line_sz; dbfr_close(in_reader); fin = nextfile(argc, argv, &optind, "r"); if (fin) { in_reader = dbfr_init(fin); /* reconfigure in case the fields are rearranged in the new file */ if (configure_pivot(&conf, args, in_reader->next_line, delim) != 0) { fprintf(stderr, "%s: error parsing input field arguments.\n", argv[0]); return EXIT_HELP; } /* throw out headers from all files after the first. */ if (args->keep_header) dbfr_getline(in_reader); } } n_key_strings = key_hash.nelems; n_pivot_keys = uniq_pivots.nelems; /* sort the collection of all pivot key strings */ pivot_array = xmalloc(sizeof(char *) * n_pivot_keys); ht_keys(&uniq_pivots, pivot_array); qsort(pivot_array, n_pivot_keys, sizeof(char *), (int (*)(const void *, const void *)) key_strcmp); #ifdef CRUSH_DEBUG fprintf(stderr, "sorted pivot strings:\n"); for (i = 0; i < n_pivot_keys; i++) { fprintf(stderr, "\t%s\n", pivot_array[i]); } #endif /* OUTPUT SECTION */ /* print headers separate from data if necessary */ if (args->keep_header) { char *pivot_label; /* assumption - the largest line of input has a greater length than the combined length of all pivot field values and a 3-char separator. safe assumption? probably not if every input field is used as a pivot field. */ pivot_label = xmalloc(max_line_sz); if (conf.n_keys) { for (i = 0; i < conf.n_keys; i++) printf("%s%s", headers[conf.keys[i]], delim); } for (i = 0; i < n_pivot_keys; i++) { pivot_label[0] = 0x00; /* get the current pivot field values & build a label with them */ for (j = 0; j < conf.n_pivots; j++) { get_line_field(fieldbuf, pivot_array[i], fieldbuf_sz - 1, j, delim); strcat(pivot_label, fieldbuf); if (j != conf.n_pivots - 1) strcat(pivot_label, " - "); } /* get the value field labels & print them with the pivot label */ for (j = 0; j < conf.n_values; j++) { printf("%s: %s", pivot_label, headers[conf.values[j]]); if (j != conf.n_values - 1) fputs(delim, stdout); } /* TODO: segfault is happening around here */ if (i != n_pivot_keys - 1) fputs(delim, stdout); } fputs("\n", stdout); free(pivot_label); /* free each header string - don't need them anymore */ for (i = 0; i < n_headers; i++) free(headers[i]); free(headers); } { char **key_array; llist_node_t *key_node; llist_t *key_list; char *empty_value_string; /* construct string for empty value set. this should be big enough for n_values worth of zeros (of the appropriate precision) and delimiters in between. here we'll just guess that a precision of 8 is enough. */ empty_value_string = xmalloc((sizeof(char) * conf.n_values * 8) + (strlen(delim) * conf.n_values)); empty_value_string[0] = 0x00; for (i = 0; i < conf.n_values; i++) { sprintf(empty_value_string, "%s%.*f", empty_value_string, conf.value_precisions[i], 0.0F); if (i != conf.n_values - 1) strcat(empty_value_string, delim); } key_array = xmalloc(sizeof(char *) * n_key_strings); j = ht_keys(&key_hash, key_array); /* j now holds the number of distinct keys to be output */ assert(j == n_key_strings); /* sort the keys */ qsort(key_array, n_key_strings, sizeof(char *), (int (*)(const void *, const void *)) key_strcmp); /* loop through all key strings */ for (i = 0; i < n_key_strings; i++) { int k; pivot_hash = ht_get(&key_hash, key_array[i]); if (n_key_strings > 0) printf("%s%s", key_array[i], delim); /* loop through all possible pivot-string inner hashtable keys */ for (k = 0; k < n_pivot_keys; k++) { /* loop through all values */ line_values = ht_get(pivot_hash, pivot_array[k]); if (!line_values) fputs(empty_value_string, stdout); else { for (j = 0; j < conf.n_values; j++) { printf("%.*f%s", conf.value_precisions[j], line_values[j], j != conf.n_values - 1 ? delim : ""); } } if (k != n_pivot_keys - 1) fputs(delim, stdout); } fputs("\n", stdout); } free(empty_value_string); free(key_array); } /* CLEANUP SECTION */ ht_destroy(&key_hash); ht_destroy(&uniq_pivots); if (keystr && keystr != empty_string) free(keystr); if (pivstr) free(pivstr); if (pivot_array) free(pivot_array); if (fieldbuf) free(fieldbuf); return EXIT_OKAY; }
int main(int argc, char *argv[]) { (void) argc; (void) argv; hash_table ht; ht_init(&ht, HT_KEY_CONST | HT_VALUE_CONST, 0.05); char *s1 = (char*)"teststring 1"; char *s2 = (char*)"teststring 2"; char *s3 = (char*)"teststring 3"; ht_insert(&ht, s1, strlen(s1)+1, s2, strlen(s2)+1); int contains = ht_contains(&ht, s1, strlen(s1)+1); test(contains, "Checking for key \"%s\"", s1); size_t value_size; char *got = ht_get(&ht, s1, strlen(s1)+1, &value_size); fprintf(stderr, "Value size: %zu\n", value_size); fprintf(stderr, "Got: {\"%s\": -----\"%s\"}\n", s1, got); test(value_size == strlen(s2)+1, "Value size was %zu (desired %lu)", value_size, strlen(s2)+1); fprintf(stderr, "Replacing {\"%s\": \"%s\"} with {\"%s\": \"%s\"}\n", s1, s2, s1, s3); ht_insert(&ht, s1, strlen(s1)+1, s3, strlen(s3)+1); unsigned int num_keys; void **keys; keys = ht_keys(&ht, &num_keys); test(num_keys == 1, "HashTable has %d keys", num_keys); test(keys != NULL, "Keys is not null"); if(keys) free(keys); got = ht_get(&ht, s1, strlen(s1)+1, &value_size); fprintf(stderr, "Value size: %zu\n", value_size); fprintf(stderr, "Got: {\"%s\": \"%s\"}\n", s1, got); test(value_size == strlen(s3)+1, "Value size was %zu (desired %lu)", value_size, strlen(s3)+1); fprintf(stderr, "Removing entry with key \"%s\"\n", s1); ht_remove(&ht, s1, strlen(s1)+1); contains = ht_contains(&ht, s1, strlen(s1)+1); test(!contains, "Checking for removal of key \"%s\"", s1); keys = ht_keys(&ht, &num_keys); test(num_keys == 0, "HashTable has %d keys", num_keys); if(keys) free(keys); fprintf(stderr, "Stress test"); int key_count = 1000000; int i; int *many_keys = malloc(key_count * sizeof(*many_keys)); int *many_values = malloc(key_count * sizeof(*many_values)); srand(time(NULL)); for(i = 0; i < key_count; i++) { many_keys[i] = i; many_values[i] = rand(); } struct timespec t1; struct timespec t2; t1 = snap_time(); for(i = 0; i < key_count; i++) { ht_insert(&ht, &(many_keys[i]), sizeof(many_keys[i]), &(many_values[i]), sizeof(many_values[i])); } t2 = snap_time(); fprintf(stderr, "Inserting %d keys took %.2f seconds\n", key_count, get_elapsed(t1, t2)); fprintf(stderr, "Checking inserted keys\n"); int ok_flag = 1; for(i = 0; i < key_count; i++) { if(ht_contains(&ht, &(many_keys[i]), sizeof(many_keys[i]))) { size_t value_size; int value; value = *(int*)ht_get(&ht, &(many_keys[i]), sizeof(many_keys[i]), &value_size); if(value != many_values[i]) { fprintf(stderr, "Key value mismatch. Got {%d: %d} expected: {%d: %d}\n", many_keys[i], value, many_keys[i], many_values[i]); ok_flag = 0; break; } } else { fprintf(stderr, "Missing key-value pair {%d: %d}\n", many_keys[i], many_values[i]); ok_flag = 0; break; } } test(ok_flag == 1, "Result was %d", ok_flag); ht_clear(&ht); ht_resize(&ht, 4194304); t1 = snap_time(); for(i = 0; i < key_count; i++) { ht_insert(&ht, &(many_keys[i]), sizeof(many_keys[i]), &(many_values[i]), sizeof(many_values[i])); } t2 = snap_time(); fprintf(stderr, "Inserting %d keys (on preallocated table) took %.2f seconds\n", key_count, get_elapsed(t1, t2)); for(i = 0; i < key_count; i++) { ht_remove(&ht, &(many_keys[i]), sizeof(many_keys[i])); } test(ht_size(&ht) == 0, "%d keys remaining", ht_size(&ht)); ht_destroy(&ht); free(many_keys); free(many_values); return report_results(); }