/** * Exports a distance matrix to a text file * @param d Pointer to matrix * @param fa Feature vector array * @param file File name */ void export_dist(double *d, farray_t *fa, const char *file) { assert(d && fa && file); int i, j; FILE *f; if (verbose > 0) printf("Exporting distance matrix to '%s'.\n", file); if (!(f = fopen(file, "w"))) { error("Could not create file '%s'.", file); return; } /* Print version header */ malheur_version(f); /* Print distance header */ fprintf(f, "# ---\n# Distance matrix for %s\n", fa->src); fprintf(f, "# Matrix size: %lu x %lu\n# ---\n", fa->len, fa->len); fprintf(f, "# <report> <cluster> <dist1> <dist2> ... <distn>\n"); /* Print matrix */ for (i = 0; i < fa->len; i++) { fprintf(f, "%s %s ", fa->x[i]->src, farray_get_label(fa, i)); for (j = 0; j < fa->len; j++) fprintf(f, "%g ", d[i * fa->len + j]); fprintf(f, "\n"); } fclose(f); }
/** * Exports classification results * @param p Prototype structure * @param fa Feature vector array * @param as Assignments to protoypes * @param file File name */ void export_class(farray_t *p, farray_t *fa, assign_t *as, const char *file) { assert(p && fa && file); int i, j; char *l; FILE *f; if (verbose > 0) printf("Exporting classification to '%s'.\n", file); if (!(f = fopen(file, "w"))) { error("Could not create file '%s'.", file); return; } /* Print version header */ malheur_version(f); /* Evaluate some quality functions */ double *e = quality(fa->y, as->label, as->len); /* Print prototype header */ fprintf(f, "# ---\n# Classification for %s\n", fa->src); fprintf(f, "# Precision of classification: %4.1f %%\n", e[Q_PRECISION] * 100.0); fprintf(f, "# Recall of classification: %4.1f %%\n", e[Q_RECALL] * 100.0); fprintf(f, "# F-measure of classification: %4.1f %%\n", e[Q_FMEASURE] * 100.0); fprintf(f, "# ---\n# <report> <label> <prototype> <distance>\n"); for (i = 0; i < fa->len; i++) { j = as->proto[i]; l = as->label[i] ? farray_get_label(p, j) : "rejected"; fprintf(f, "%s %s %s %g\n", fa->x[i]->src, l, p->x[j]->src, as->dist[i]); } fclose(f); }
/** * Exports a clustering structure to a text file * @param c Clustering structure * @param fa Feature vector array * @param p Prototype struture * @param a Assignments of prototypes * @param file File name */ void export_cluster(cluster_t *c, farray_t *p, farray_t *fa, assign_t *a, const char *file) { assert(c && fa && file); FILE *f; int i, j; if (verbose > 0) printf("Exporting clusters to '%s'.\n", file); if (!(f = fopen(file, "w"))) { error("Could not create file '%s'.", file); return; } /* Print version header */ malheur_version(f); /* Evaluate some quality functions */ double *e = quality(fa->y, c->cluster, c->len); /* Print prototype header */ fprintf(f, "# ---\n# Clusters for %s\n", fa->src); fprintf(f, "# Number of cluster: %lu\n", c->num); fprintf(f, "# Precision of clusters: %4.1f %%\n", e[Q_PRECISION] * 100.0); fprintf(f, "# Recall of clusters: %4.1f %%\n", e[Q_RECALL] * 100.0); fprintf(f, "# F-measure of clusters: %4.1f %%\n", e[Q_FMEASURE] * 100.0); fprintf(f, "# ---\n# <report> <cluster> <prototype> <distance>\n"); for (i = 0; i < fa->len; i++) { j = a->proto[i]; fprintf(f, "%s %s %s %g\n", fa->x[i]->src, cluster_get_name(c, i), p->x[j]->src, a->dist[i]); } fclose(f); }
/** * Exports a structure of prototypes to a text file * @param pr Prototype structure * @param fa Feature vector array * @param as Assignments to protoypes * @param file File name */ void export_proto(farray_t *pr, farray_t *fa, assign_t *as, const char *file) { assert(pr && fa && file); int i, j; FILE *f; if (verbose > 0) printf("Exporting prototypes to '%s'.\n", file); if (!(f = fopen(file, "w"))) { error("Could not create file '%s'.", file); return; } /* Print version header */ malheur_version(f); /* Evaluate some quality functions */ double *e = quality(fa->y, as->proto, as->len); /* Print prototype header */ fprintf(f, "# ---\n# Prototypes for %s\n", fa->src); fprintf(f, "# Number of prototypes: %lu\n", pr->len); fprintf(f, "# Compression of prototypes: %4.1f %%\n", pr->len * 100.0 / (double) fa->len); fprintf(f, "# Precision of prototypes: %4.1f %%\n", e[Q_PRECISION] * 100.0); fprintf(f, "# ---\n# <report> <prototype> <distance>\n"); for (i = 0; i < fa->len; i++) { j = as->proto[i]; fprintf(f, "%s %s %g\n", fa->x[i]->src, pr->x[j]->src, as->dist[i]); } fclose(f); }
/** * Exports results from the incremental analysis (phase 1). The results are * obtained by first classifying and then clustering reports. In the first * phase the classified reports are written to the output file. * @param p Prototype struture * @param fa Feature vector array * @param as Assignments to prototypes * @param file File name */ void export_increment1(farray_t *p, farray_t *fa, assign_t *as, const char *file) { int i, j; FILE *f; if (verbose > 0) printf("Exporting incremental analysis (1) to '%s'.\n", file); if (!(f = fopen(file, "w"))) { error("Could not create file '%s'.", file); return; } /* Print version header */ malheur_version(f); /* Print incremental header */ fprintf(f, "# ---\n# Incremental analysis for %s\n", fa->src); fprintf(f, "# ---\n# <report> <cluster> <prototype> <distance>\n"); if (!p || !as) { fclose(f); return; } for (i = 0; i < fa->len; i++) { if (!as->label[i]) continue; j = as->proto[i]; fprintf(f, "%s %s %s %g\n", fa->x[i]->src, farray_get_label(p, j), p->x[j]->src, as->dist[i]); } fclose(f); }
/** * Parse command line options * @param argc Number of arguments * @param argv Argument values */ static void parse_options(int argc, char **argv) { int ch; /* reset getopt */ optind = 0; while ((ch = getopt_long(argc, argv, OPTSTRING, longopts, NULL)) != -1) { switch (ch) { case 'n': save = FALSE; break; case 'r': reset = TRUE; break; case 'v': case 'm': /* Empty. See load_config() */ break; case 'o': output_file = optarg; break; case 'V': malheur_version(stdout); exit(EXIT_SUCCESS); break; case 'h': case '?': print_usage(); exit(EXIT_SUCCESS); break; /* long options */ case 1001: config_set_string(&cfg, "input.format", optarg); break; case 1002: config_set_int(&cfg, "input.mist_level", atoi(optarg)); break; case 1003: config_set_int(&cfg, "input.mist_rlen", atoi(optarg)); break; case 1004: config_set_int(&cfg, "input.mist_tlen", atoi(optarg)); break; case 1005: config_set_string(&cfg, "features.ngram_delim", optarg); break; case 1006: config_set_int(&cfg, "features.ngram_len", atoi(optarg)); break; case 1007: config_set_string(&cfg, "features.vect_embed", optarg); break; case 1008: config_set_int(&cfg, "features.lookup_table", atoi(optarg)); break; case 1009: config_set_float(&cfg, "prototypes.max_dist", atof(optarg)); break; case 1010: config_set_int(&cfg, "prototypes.max_num", atoi(optarg)); break; case 1011: config_set_float(&cfg, "classify.max_dist", atof(optarg)); break; case 1012: config_set_string(&cfg, "cluster.link_mode", optarg); break; case 1013: config_set_float(&cfg, "cluster.min_dist", atof(optarg)); break; case 1014: config_set_int(&cfg, "cluster.reject_num", atoi(optarg)); break; case 1015: config_set_int(&cfg, "cluster.shared_ngrams", atoi(optarg)); break; } } /* Check configuration */ config_check(&cfg); argc -= optind; argv += optind; if (argc < 1) fatal("the <action> argument is required"); /* Argument: action */ if (!strcasecmp(argv[0], "prototype")) { action = PROTOTYPE; } else if (!strcasecmp(argv[0], "distance")) { action = DISTANCE; } else if (!strcasecmp(argv[0], "cluster")) { action = CLUSTER; } else if (!strcasecmp(argv[0], "classify")) { action = CLASSIFY; } else if (!strcasecmp(argv[0], "increment")) { action = INCREMENT; } else if (!strcasecmp(argv[0], "protodist")) { action = PROTODIST; } else if (!strcasecmp(argv[0], "info")) { action = INFO; } else { fatal("Unknown analysis action '%s'", argv[0]); } if (argc < 2 && action != PROTODIST && action != INFO) fatal("the <dataset> argument is required"); /* Assign input files */ input_files = argv + 1; input_len = argc - 1; }