コード例 #1
0
ファイル: export.c プロジェクト: JaonLin/malheur
/**
 * Exports a distance matrix to a text file
 * @param d Pointer to matrix
 * @param fa Feature vector array
 * @param file File name
 */
void export_dist(double *d, farray_t *fa, const char *file)
{
    assert(d && fa && file);
    int i, j;
    FILE *f;

    if (verbose > 0)
        printf("Exporting distance matrix to '%s'.\n", file);

    if (!(f = fopen(file, "w"))) {
        error("Could not create file '%s'.", file);
        return;
    }

    /* Print version header */
    malheur_version(f);

    /* Print distance header */
    fprintf(f, "# ---\n# Distance matrix for %s\n", fa->src);
    fprintf(f, "# Matrix size: %lu x %lu\n# ---\n", fa->len, fa->len);
    fprintf(f, "# <report> <cluster> <dist1> <dist2> ... <distn>\n");

    /* Print matrix */
    for (i = 0; i < fa->len; i++) {
        fprintf(f, "%s %s ", fa->x[i]->src, farray_get_label(fa, i));
        for (j = 0; j < fa->len; j++)
            fprintf(f, "%g ", d[i * fa->len + j]);
        fprintf(f, "\n");
    }

    fclose(f);
}
コード例 #2
0
ファイル: export.c プロジェクト: JaonLin/malheur
/**
 * Exports classification results
 * @param p Prototype structure
 * @param fa Feature vector array
 * @param as Assignments to protoypes
 * @param file File name
 */
void export_class(farray_t *p, farray_t *fa, assign_t *as, const char *file)
{
    assert(p && fa && file);
    int i, j;
    char *l;
    FILE *f;

    if (verbose > 0)
        printf("Exporting classification to '%s'.\n", file);

    if (!(f = fopen(file, "w"))) {
        error("Could not create file '%s'.", file);
        return;
    }

    /* Print version header */
    malheur_version(f);

    /* Evaluate some quality functions */
    double *e = quality(fa->y, as->label, as->len);

    /* Print prototype header */
    fprintf(f, "# ---\n# Classification for %s\n", fa->src);
    fprintf(f, "# Precision of classification: %4.1f %%\n",
            e[Q_PRECISION] * 100.0);
    fprintf(f, "# Recall of classification: %4.1f %%\n",
            e[Q_RECALL] * 100.0);
    fprintf(f, "# F-measure of classification: %4.1f %%\n",
            e[Q_FMEASURE] * 100.0);
    fprintf(f, "# ---\n# <report> <label> <prototype> <distance>\n");

    for (i = 0; i < fa->len; i++) {
        j = as->proto[i];
        l = as->label[i] ? farray_get_label(p, j) : "rejected";
        fprintf(f, "%s %s %s %g\n", fa->x[i]->src, l, p->x[j]->src,
                as->dist[i]);
    }

    fclose(f);
}
コード例 #3
0
ファイル: export.c プロジェクト: JaonLin/malheur
/**
 * Exports a clustering structure to a text file
 * @param c Clustering structure
 * @param fa Feature vector array
 * @param p Prototype struture
 * @param a Assignments of prototypes
 * @param file File name
 */
void export_cluster(cluster_t *c, farray_t *p, farray_t *fa, assign_t *a,
                    const char *file)
{
    assert(c && fa && file);
    FILE *f;
    int i, j;

    if (verbose > 0)
        printf("Exporting clusters to '%s'.\n", file);

    if (!(f = fopen(file, "w"))) {
        error("Could not create file '%s'.", file);
        return;
    }

    /* Print version header */
    malheur_version(f);

    /* Evaluate some quality functions */
    double *e = quality(fa->y, c->cluster, c->len);

    /* Print prototype header */
    fprintf(f, "# ---\n# Clusters for %s\n", fa->src);
    fprintf(f, "# Number of cluster: %lu\n", c->num);
    fprintf(f, "# Precision of clusters: %4.1f %%\n",
            e[Q_PRECISION] * 100.0);
    fprintf(f, "# Recall of clusters: %4.1f %%\n", e[Q_RECALL] * 100.0);
    fprintf(f, "# F-measure of clusters: %4.1f %%\n", e[Q_FMEASURE] * 100.0);
    fprintf(f, "# ---\n# <report> <cluster> <prototype> <distance>\n");

    for (i = 0; i < fa->len; i++) {
        j = a->proto[i];
        fprintf(f, "%s %s %s %g\n", fa->x[i]->src, cluster_get_name(c, i),
                p->x[j]->src, a->dist[i]);
    }

    fclose(f);
}
コード例 #4
0
ファイル: export.c プロジェクト: JaonLin/malheur
/**
 * Exports a structure of prototypes to a text file
 * @param pr Prototype structure
 * @param fa Feature vector array
 * @param as Assignments to protoypes
 * @param file File name
 */
void export_proto(farray_t *pr, farray_t *fa, assign_t *as, const char *file)
{
    assert(pr && fa && file);
    int i, j;
    FILE *f;

    if (verbose > 0)
        printf("Exporting prototypes to '%s'.\n", file);

    if (!(f = fopen(file, "w"))) {
        error("Could not create file '%s'.", file);
        return;
    }

    /* Print version header */
    malheur_version(f);

    /* Evaluate some quality functions */
    double *e = quality(fa->y, as->proto, as->len);

    /* Print prototype header */
    fprintf(f, "# ---\n# Prototypes for %s\n", fa->src);
    fprintf(f, "# Number of prototypes: %lu\n", pr->len);
    fprintf(f, "# Compression of prototypes: %4.1f %%\n",
            pr->len * 100.0 / (double) fa->len);
    fprintf(f, "# Precision of prototypes: %4.1f %%\n",
            e[Q_PRECISION] * 100.0);
    fprintf(f, "# ---\n# <report> <prototype> <distance>\n");

    for (i = 0; i < fa->len; i++) {
        j = as->proto[i];
        fprintf(f, "%s %s %g\n", fa->x[i]->src, pr->x[j]->src, as->dist[i]);
    }

    fclose(f);
}
コード例 #5
0
ファイル: export.c プロジェクト: JaonLin/malheur
/**
 * Exports results from the incremental analysis (phase 1). The results are
 * obtained by first classifying and then clustering reports. In the first
 * phase the classified reports are written to the output file.
 * @param p Prototype struture
 * @param fa Feature vector array
 * @param as Assignments to prototypes
 * @param file File name
 */
void export_increment1(farray_t *p, farray_t *fa, assign_t *as,
                       const char *file)
{
    int i, j;
    FILE *f;

    if (verbose > 0)
        printf("Exporting incremental analysis (1) to '%s'.\n", file);

    if (!(f = fopen(file, "w"))) {
        error("Could not create file '%s'.", file);
        return;
    }

    /* Print version header */
    malheur_version(f);

    /* Print incremental header */
    fprintf(f, "# ---\n# Incremental analysis for %s\n", fa->src);
    fprintf(f, "# ---\n# <report> <cluster> <prototype> <distance>\n");

    if (!p || !as) {
        fclose(f);
        return;
    }

    for (i = 0; i < fa->len; i++) {
        if (!as->label[i])
            continue;
        j = as->proto[i];
        fprintf(f, "%s %s %s %g\n", fa->x[i]->src, farray_get_label(p, j),
                p->x[j]->src, as->dist[i]);
    }

    fclose(f);
}
コード例 #6
0
ファイル: malheur.c プロジェクト: chiehwen/malheur
/**
 * Parse command line options
 * @param argc Number of arguments
 * @param argv Argument values
 */
static void parse_options(int argc, char **argv)
{
    int ch;
    
    /* reset getopt */
    optind = 0;
    
    while ((ch = getopt_long(argc, argv, OPTSTRING, longopts, NULL)) != -1) {
        switch (ch) {
        case 'n': 
            save = FALSE;
            break;
        case 'r':
            reset = TRUE;
            break;
        case 'v':
        case 'm':
            /* Empty. See load_config() */
            break;
        case 'o':
            output_file = optarg;
            break;
        case 'V':
            malheur_version(stdout);
            exit(EXIT_SUCCESS);
            break;
        case 'h':
        case '?':
            print_usage();
            exit(EXIT_SUCCESS);
            break;

        /* long options */
        case 1001:
            config_set_string(&cfg, "input.format", optarg);    
            break;
        case 1002:
            config_set_int(&cfg, "input.mist_level", atoi(optarg));    
            break;
        case 1003:
            config_set_int(&cfg, "input.mist_rlen", atoi(optarg));    
            break;
        case 1004:
            config_set_int(&cfg, "input.mist_tlen", atoi(optarg));    
            break;
        case 1005:
            config_set_string(&cfg, "features.ngram_delim", optarg);    
            break;
        case 1006:
            config_set_int(&cfg, "features.ngram_len", atoi(optarg));    
            break;
        case 1007:
            config_set_string(&cfg, "features.vect_embed", optarg);    
            break;
        case 1008:
            config_set_int(&cfg, "features.lookup_table", atoi(optarg));    
            break;
        case 1009:
            config_set_float(&cfg, "prototypes.max_dist", atof(optarg));    
            break;
        case 1010:
            config_set_int(&cfg, "prototypes.max_num", atoi(optarg));    
            break;
        case 1011:
            config_set_float(&cfg, "classify.max_dist", atof(optarg));    
            break;
        case 1012:
            config_set_string(&cfg, "cluster.link_mode", optarg);    
            break;
        case 1013:
            config_set_float(&cfg, "cluster.min_dist", atof(optarg));    
            break;
        case 1014:
            config_set_int(&cfg, "cluster.reject_num", atoi(optarg));    
            break;
        case 1015:
            config_set_int(&cfg, "cluster.shared_ngrams", atoi(optarg));    
            break;
        }
    }

    /* Check configuration */
    config_check(&cfg);

    argc -= optind;
    argv += optind;

    if (argc < 1)
        fatal("the <action> argument is required");

    /* Argument: action */
    if (!strcasecmp(argv[0], "prototype")) {
        action = PROTOTYPE;
    } else if (!strcasecmp(argv[0], "distance")) {
        action = DISTANCE;
    } else if (!strcasecmp(argv[0], "cluster")) {
        action = CLUSTER;
    } else if (!strcasecmp(argv[0], "classify")) {
        action = CLASSIFY;
    } else if (!strcasecmp(argv[0], "increment")) {
        action = INCREMENT;
    } else if (!strcasecmp(argv[0], "protodist")) {
        action = PROTODIST;
    } else if (!strcasecmp(argv[0], "info")) {
        action = INFO;
    } else {
        fatal("Unknown analysis action '%s'", argv[0]);
    }
    
    if (argc < 2 && action != PROTODIST && action != INFO) 
        fatal("the <dataset> argument is required");

    /* Assign input files */
    input_files = argv + 1;
    input_len = argc - 1;
}