/** * Print the content of a feature vector * @param fv feature vector */ void fvec_print(fvec_t *fv) { assert(fv); int i, j; printf("feature vector\n len: %lu, total: %lu, mem: %.2fkb\n", fv->len, fv->total, fv->mem / 1e3); if (fv->src) printf(" src: '%s'\n", fv->src); if (verbose < 3) return; for (i = 0; i < fv->len; i++) { printf(" 0x%.16llx: %6.4f", (long long unsigned int) fv->dim[i], fv->val[i]); /* Lookup feature */ fentry_t *fe = ftable_get(fv->dim[i]); if (!fe) { printf("\n"); continue; } /* Print feature string */ printf(" ["); for (j = 0; j < fe->len; j++) { if (isprint(fe->data[j]) || fe->data[j] == '%') printf("%c", fe->data[j]); else printf("%%%.2x", fe->data[j]); } printf("]\n"); } }
/** * Print shared n-grams for each cluster * @param c Clustering structure * @param fa Array of feature vectors * @param file Output file */ void export_shared_ngrams(cluster_t *c, farray_t *fa, const char *file) { assert(c && fa && file); int i, j, k; double shared; FILE *f; char *name = NULL; config_lookup_float(&cfg, "cluster.shared_ngrams", &shared); if (shared <= 0.0) return; if (verbose > 0) printf("Exporting shared n-grams with minimum ratio %4.2f.\n", shared); if (!(f = fopen(file, "a"))) { error("Could not create file '%s'.", file); return; } /* Print incremental header */ fprintf(f, "# ---\n# Shared n-grams for %s\n", fa->src); fprintf(f, "# Minimum ratio of shared n-grams: %4.2f (%2.0f%%)\n", shared, shared * 100); fprintf(f, "# ---\n# <cluster> <ratio> <hash> <ngram>\n"); /* Compute shared n-grams per cluster */ for (i = 0; i < c->num; i++) { fvec_t *s = fvec_zero(); for (j = 0, k = 0; j < c->len; j++) { if (c->cluster[j] != i) continue; /* Clone and binarize */ fvec_t *x = fvec_clone(fa->x[j]); fvec_bin(x); if (k == 0) name = cluster_get_name(c, j); /* Merge n-grams in cluster */ fvec_t *y = fvec_add(s, x); fvec_destroy(s); fvec_destroy(x); s = y; k++; } /* Check for empty cluster */ if (k == 0) continue; fvec_div(s, k); /* Output shared n-grams */ for (j = 0; j < s->len; j++) { if (s->val[j] < shared) continue; fprintf(f, "%s %6.4f %.16llx ", name, s->val[j], (long long unsigned int) s->dim[j]); /* Lookup feature */ fentry_t *fe = ftable_get(s->dim[j]); if (!fe) error("Oops. Feature not in lookup table."); /* Print feature */ fprintf(f, "\""); for (k = 0; k < fe->len; k++) { if (isprint(fe->data[k]) || fe->data[k] == '%') fprintf(f, "%c", fe->data[k]); else fprintf(f, "%%%.2x", fe->data[k]); } fprintf(f, "\"\n"); } fvec_destroy(s); } fclose(f); }