/** * Exports a distance matrix to a text file * @param d Pointer to matrix * @param fa Feature vector array * @param file File name */ void export_dist(double *d, farray_t *fa, const char *file) { assert(d && fa && file); int i, j; FILE *f; if (verbose > 0) printf("Exporting distance matrix to '%s'.\n", file); if (!(f = fopen(file, "w"))) { error("Could not create file '%s'.", file); return; } /* Print version header */ malheur_version(f); /* Print distance header */ fprintf(f, "# ---\n# Distance matrix for %s\n", fa->src); fprintf(f, "# Matrix size: %lu x %lu\n# ---\n", fa->len, fa->len); fprintf(f, "# <report> <cluster> <dist1> <dist2> ... <distn>\n"); /* Print matrix */ for (i = 0; i < fa->len; i++) { fprintf(f, "%s %s ", fa->x[i]->src, farray_get_label(fa, i)); for (j = 0; j < fa->len; j++) fprintf(f, "%g ", d[i * fa->len + j]); fprintf(f, "\n"); } fclose(f); }
/** * Return an array of rejected feature vectors * @param c Cluster structure * @param f Array of feature vectors * @return Rejected feature vectors */ farray_t *cluster_get_rejected(cluster_t *c, farray_t *f) { int i; farray_t *r = farray_create("rejected"); for (i = 0; i < f->len; i++) { if (c->cluster[i]) continue; farray_add(r, fvec_clone(f->x[i]), farray_get_label(f, i)); } return r; }
/** * Exports classification results * @param p Prototype structure * @param fa Feature vector array * @param as Assignments to protoypes * @param file File name */ void export_class(farray_t *p, farray_t *fa, assign_t *as, const char *file) { assert(p && fa && file); int i, j; char *l; FILE *f; if (verbose > 0) printf("Exporting classification to '%s'.\n", file); if (!(f = fopen(file, "w"))) { error("Could not create file '%s'.", file); return; } /* Print version header */ malheur_version(f); /* Evaluate some quality functions */ double *e = quality(fa->y, as->label, as->len); /* Print prototype header */ fprintf(f, "# ---\n# Classification for %s\n", fa->src); fprintf(f, "# Precision of classification: %4.1f %%\n", e[Q_PRECISION] * 100.0); fprintf(f, "# Recall of classification: %4.1f %%\n", e[Q_RECALL] * 100.0); fprintf(f, "# F-measure of classification: %4.1f %%\n", e[Q_FMEASURE] * 100.0); fprintf(f, "# ---\n# <report> <label> <prototype> <distance>\n"); for (i = 0; i < fa->len; i++) { j = as->proto[i]; l = as->label[i] ? farray_get_label(p, j) : "rejected"; fprintf(f, "%s %s %s %g\n", fa->x[i]->src, l, p->x[j]->src, as->dist[i]); } fclose(f); }
/** * Simple test cases classification */ int test_classify() { int i, k, err = 0; fvec_t *f; test_printf("Classification using prototypes"); /* Prepare training data */ farray_t *fa1 = farray_create("train"); for (i = 0; train_data[i].str; i++) { f = fvec_extract(train_data[i].str, strlen(train_data[i].str), NULL); farray_add(fa1, f, train_data[i].label); } /* Prepare testing data */ farray_t *fa2 = farray_create("train"); for (i = 0; test_data[i].str; i++) { f = fvec_extract(test_data[i].str, strlen(test_data[i].str), NULL); farray_add(fa2, f, test_data[i].label); } /* Classification of test data */ config_set_float(&cfg, "classify.max_dist", 1.41); assign_t *a = class_assign(fa2, fa1); /* Check predicted labels */ for (k = 0; test_data[k].str; k++) { char *l = farray_get_label(fa1, a->proto[k]); err += strcmp(l, test_data[k].label) != 0; } /* Clean up */ assign_destroy(a); farray_destroy(fa1); farray_destroy(fa2); test_return(err, i); return err; }
/** * Exports results from the incremental analysis (phase 1). The results are * obtained by first classifying and then clustering reports. In the first * phase the classified reports are written to the output file. * @param p Prototype struture * @param fa Feature vector array * @param as Assignments to prototypes * @param file File name */ void export_increment1(farray_t *p, farray_t *fa, assign_t *as, const char *file) { int i, j; FILE *f; if (verbose > 0) printf("Exporting incremental analysis (1) to '%s'.\n", file); if (!(f = fopen(file, "w"))) { error("Could not create file '%s'.", file); return; } /* Print version header */ malheur_version(f); /* Print incremental header */ fprintf(f, "# ---\n# Incremental analysis for %s\n", fa->src); fprintf(f, "# ---\n# <report> <cluster> <prototype> <distance>\n"); if (!p || !as) { fclose(f); return; } for (i = 0; i < fa->len; i++) { if (!as->label[i]) continue; j = as->proto[i]; fprintf(f, "%s %s %s %g\n", fa->x[i]->src, farray_get_label(p, j), p->x[j]->src, as->dist[i]); } fclose(f); }