/** * Writes a block of files to the output * @param x Feature vectors * @param len Length of block * @return number of written files */ int output_text_write(fvec_t **x, int len) { assert(x && len >= 0); int j, i, k; for (j = 0; j < len; j++) { for (i = 0; i < x[j]->len; i++) { /* Print feature (hash and string) */ fentry_t *fe = fhash_get(x[j]->dim[i]); fprintf(f, "%llu:", (long long unsigned int) x[j]->dim[i]); for (k = 0; fe && k < fe->len; k++) { if (isprint(fe->data[k]) && !strchr("%: ", fe->data[k])) fprintf(f, "%c", fe->data[k]); else fprintf(f, "%%%.2x", (unsigned char) fe->data[k]); } /* Print value of feature */ fprintf(f, ":%g", x[j]->val[i]); if (i < x[j]->len - 1) fprintf(f, ","); } /* Print source of string */ if (x[j]->src) fprintf(f, " %s", x[j]->src); fprintf(f, "\n"); } return TRUE; }
void* fhash_str_get(fhash* phash, const char* key) { void** value = fhash_get(phash, key, (key_sz_t)strlen(key), NULL); if (value) { return *value; } else { return NULL; } }
/** * Print the content of a feature vector * @param f File pointer * @param fv feature vector */ void fvec_print(FILE * f, fvec_t *fv) { assert(fv); int i, j; fprintf(f, "Feature vector [src: %s, label: %g, len: %lu, total: %lu]\n", fv->src, fv->label, fv->len, fv->total); for (i = 0; i < fv->len; i++) { fprintf(f, " %.16llx:%6.4f [", (long long unsigned int) fv->dim[i], fv->val[i]); if (fhash_enabled()) { fentry_t *fe = fhash_get(fv->dim[i]); for (j = 0; fe && j < fe->len; j++) if (isprint(fe->data[j]) && !strchr("% ", fe->data[j])) fprintf(f, "%c", fe->data[j]); else fprintf(f, "%%%.2x", (unsigned char) fe->data[j]); } fprintf(f, "]\n"); } }
static void test_without_autorehash() { printf("========= fhash testing without auto rehash =========\n"); fhash_opt opt; opt.hash_alg = NULL; opt.compare = hash_core_compare; fhash* phash = fhash_create(0, opt, FHASH_MASK_NONE); // test set { unsigned long long start = ftime_gettime(); for (int i = 0; i < total_lines; i++) { fhash_set(phash, lines[i], (key_sz_t)strlen(lines[i]), lines[i], (value_sz_t)strlen(lines[i])); } unsigned long long end = ftime_gettime(); printf("fhash_set x%d spend time: %llu usec\n", total_lines, end - start); } // test get { unsigned long long start = ftime_gettime(); for (int i = 0; i < total_lines; i++) { fhash_get(phash, lines[i], (key_sz_t)strlen(lines[i]), NULL); } unsigned long long end = ftime_gettime(); printf("fhash_get x%d spend time: %llu usec\n", total_lines, end - start); } // test iteration { unsigned long long start = ftime_gettime(); fhash_iter iter = fhash_iter_new(phash); char* data = NULL; int iter_count = 0; while ((data = fhash_next(&iter))) { iter_count++; } fhash_iter_release(&iter); unsigned long long end = ftime_gettime(); printf("fhash_next x%d spend time: %llu usec\n", iter_count, end -start); } // test rehash { unsigned long long start = ftime_gettime(); int ret = fhash_rehash(phash, (uint32_t)total_lines); unsigned long long end = ftime_gettime(); printf("fhash_rehash (index double), ret: %d, spend time: %llu usec\n", ret, end -start); } fhash_profile_data profile_data; fhash_profile(phash, 0, &profile_data); print_profile(&profile_data); // test delete { unsigned long long start = ftime_gettime(); for (int i = 0; i < total_lines; i++) { fhash_del(phash, lines[i], (key_sz_t)strlen(lines[i])); } unsigned long long end = ftime_gettime(); printf("fhash_del x%d spend time: %llu usec\n", total_lines, end - start); } fhash_delete(phash); }