Ejemplo n.º 1
0
/**
 * Test clustering
 */
int test_cluster_single()
{
    int i, j, k, err = 0;

    test_printf("Clustering using prototypes (single)");

    /* Prepare test data */ ;
    farray_t *fa = farray_create("test");
    for (i = 0; i < DATA_LEN; i++) {
        fvec_t *f = fvec_extract(test_data[i], strlen(test_data[i]), NULL);
        farray_add(fa, f, "test");
    }

    /* Get clustering */
    config_set_string(&cfg, "cluster.link_mode", "single");
    cluster_t *c = cluster_linkage(fa, 0);

    /* Check number of clusters */
    err += (c->num != DATA_CLUSTER);

    /* Check position of prototypes */
    for (k = 0; k < DATA_LEN; k += DATA_LEN / DATA_CLUSTER)
        for (j = 0; j < DATA_LEN / DATA_CLUSTER - 1; j++)
            err += c->cluster[k + j] != c->cluster[k + j + 1];

    /* Clean up */
    cluster_destroy(c);
    farray_destroy(fa);

    test_return(err, 1 + DATA_CLUSTER * (DATA_LEN / DATA_CLUSTER - 1));
    return err;
}
Ejemplo n.º 2
0
/**
 * Initializes an empty clustering.
 * @param n Number of points
 * @param r Run of clustering
 * @return Clustering structure
 */
static cluster_t *cluster_create(int n, int r)
{
    int i;

    /* Allocate cluster structure */
    cluster_t *c = calloc(1, sizeof(cluster_t));
    if (!c) {
        error("Could not allocate cluster structure");
        return NULL;
    }

    /* Allocate cluster assignments */
    c->cluster = malloc(sizeof(unsigned int) * n);
    if (!c->cluster) {
        error("Could not allocate cluster assignments");
        cluster_destroy(c);
        return NULL;
    }

    /* Initialize cluster assignements */
    for (i = 0; i < n; i++)
        c->cluster[i] = i + 1;

    c->num = n;
    c->len = n;
    c->run = r;

    return c;
}
Ejemplo n.º 3
0
/**
 * Clusters the given malware reports
 */
static void malheur_cluster()
{
    assign_t *as;
    farray_t *fa, *pr, *pn, *re;

    /* Load data */
    fa = malheur_load();

    /* Extract prototypes */
    pr = proto_extract(fa, &as);

    /* Cluster prototypes and extrapolate */
    cluster_t *c = cluster_linkage(pr, 0);
    cluster_extrapolate(c, as);
    cluster_trim(c);

    /* Save prototypes */
    pn = cluster_get_prototypes(c, as, pr);
    if (save)
        farray_save_file(pn, mcfg.proto_file);
    farray_destroy(pn);

    /* Save rejected feature vectors */
    re = cluster_get_rejected(c, fa);
    if (save)
        farray_save_file(re, mcfg.reject_file);
    farray_destroy(re);

    /* Export clustering */
    export_cluster(c, pr, fa, as, output_file);
    
    /* Export shared n-grams */
    export_shared_ngrams(c, fa, output_file);

    /* Clean up */
    cluster_destroy(c);
    assign_destroy(as);
    farray_destroy(pr);
    farray_destroy(fa);
}
Ejemplo n.º 4
0
/* 
 * A simple stress test for clustering
 */
int test_stress()
{
    int i, j, k, err = 0;
    fvec_t *f;
    farray_t *fa;
    char buf[STR_LENGTH + 1], label[32];

    test_printf("Stress test for clustering");

    for (i = 0; i < STRESS_RUNS; i++) {
        /* Create array */
        fa = farray_create("test");

        for (j = 0; j < NUM_VECTORS; j++) {
            for (k = 0; k < STR_LENGTH; k++)
                buf[k] = rand() % 10 + '0';
            buf[k] = 0;

            /* Extract features */
            f = fvec_extract(buf, strlen(buf), "test");
            snprintf(label, 32, "label%.2d", rand() % 10);

            /* Add to array */
            farray_add(fa, f, label);
        }

        /* Extract prototypes */
        cluster_t *c = cluster_linkage(fa, 0);

        /* Destroy features */
        cluster_destroy(c);
        farray_destroy(fa);
    }

    test_return(err, STRESS_RUNS);
    return err;
}
Ejemplo n.º 5
0
/**
 * Classify the given malware reports
 */
static void malheur_increment()
{
    farray_t *pr = NULL, *tmp, *pn, *re;
    assign_t *as; 

    /* Load internal state */
    malheur_load_state();

    /* Load data including rejected stuff */
    farray_t *fa = malheur_load();
    if (!access(mcfg.reject_file, F_OK)) {
        tmp = farray_load_file(mcfg.reject_file);
        fa = farray_merge(fa, tmp);
    }

    /* Classification */
    if (!access(mcfg.proto_file, R_OK)) {
        pr = farray_load_file(mcfg.proto_file);

        /* Apply classification */
        as = class_assign(fa, pr);
        tmp = class_get_rejected(as, fa);
        
        /* Export results */
        export_increment1(pr, fa, as, output_file);
        
        /* Clean up */
        farray_destroy(fa);
        farray_destroy(pr);
        assign_destroy(as);
        fa = tmp;        
    } else {
        /* Export results */
        export_increment1(pr, fa, as, output_file);
    }

    /* Extract prototypes */
    pr = proto_extract(fa, &as);
    
    /* Cluster prototypes and extrapolate */
    cluster_t *c = cluster_linkage(pr, mstate.run + 1);
    cluster_extrapolate(c, as);
    cluster_trim(c);

    /* Save prototypes vectors */
    pn = cluster_get_prototypes(c, as, pr);
    if (save)
        farray_append_file(pn, mcfg.proto_file);

    /* Save rejeted feature vectors */
    re = cluster_get_rejected(c, fa);
    if (save)
        farray_save_file(re, mcfg.reject_file);

    /* Update state */
    mstate.run++;
    mstate.num_proto = pn->len;
    mstate.num_reject = re->len;

    /* Save state */
    if (save) 
        malheur_save_state();
    
    /* Export results */
    export_increment2(c, pr, fa, as, output_file);

    /* Clean up */
    cluster_destroy(c);
    assign_destroy(as);

    farray_destroy(re);
    farray_destroy(pn);
    farray_destroy(pr);
    farray_destroy(fa);
}