/** * Test clustering */ int test_cluster_single() { int i, j, k, err = 0; test_printf("Clustering using prototypes (single)"); /* Prepare test data */ ; farray_t *fa = farray_create("test"); for (i = 0; i < DATA_LEN; i++) { fvec_t *f = fvec_extract(test_data[i], strlen(test_data[i]), NULL); farray_add(fa, f, "test"); } /* Get clustering */ config_set_string(&cfg, "cluster.link_mode", "single"); cluster_t *c = cluster_linkage(fa, 0); /* Check number of clusters */ err += (c->num != DATA_CLUSTER); /* Check position of prototypes */ for (k = 0; k < DATA_LEN; k += DATA_LEN / DATA_CLUSTER) for (j = 0; j < DATA_LEN / DATA_CLUSTER - 1; j++) err += c->cluster[k + j] != c->cluster[k + j + 1]; /* Clean up */ cluster_destroy(c); farray_destroy(fa); test_return(err, 1 + DATA_CLUSTER * (DATA_LEN / DATA_CLUSTER - 1)); return err; }
/** * Initializes an empty clustering. * @param n Number of points * @param r Run of clustering * @return Clustering structure */ static cluster_t *cluster_create(int n, int r) { int i; /* Allocate cluster structure */ cluster_t *c = calloc(1, sizeof(cluster_t)); if (!c) { error("Could not allocate cluster structure"); return NULL; } /* Allocate cluster assignments */ c->cluster = malloc(sizeof(unsigned int) * n); if (!c->cluster) { error("Could not allocate cluster assignments"); cluster_destroy(c); return NULL; } /* Initialize cluster assignements */ for (i = 0; i < n; i++) c->cluster[i] = i + 1; c->num = n; c->len = n; c->run = r; return c; }
/** * Clusters the given malware reports */ static void malheur_cluster() { assign_t *as; farray_t *fa, *pr, *pn, *re; /* Load data */ fa = malheur_load(); /* Extract prototypes */ pr = proto_extract(fa, &as); /* Cluster prototypes and extrapolate */ cluster_t *c = cluster_linkage(pr, 0); cluster_extrapolate(c, as); cluster_trim(c); /* Save prototypes */ pn = cluster_get_prototypes(c, as, pr); if (save) farray_save_file(pn, mcfg.proto_file); farray_destroy(pn); /* Save rejected feature vectors */ re = cluster_get_rejected(c, fa); if (save) farray_save_file(re, mcfg.reject_file); farray_destroy(re); /* Export clustering */ export_cluster(c, pr, fa, as, output_file); /* Export shared n-grams */ export_shared_ngrams(c, fa, output_file); /* Clean up */ cluster_destroy(c); assign_destroy(as); farray_destroy(pr); farray_destroy(fa); }
/* * A simple stress test for clustering */ int test_stress() { int i, j, k, err = 0; fvec_t *f; farray_t *fa; char buf[STR_LENGTH + 1], label[32]; test_printf("Stress test for clustering"); for (i = 0; i < STRESS_RUNS; i++) { /* Create array */ fa = farray_create("test"); for (j = 0; j < NUM_VECTORS; j++) { for (k = 0; k < STR_LENGTH; k++) buf[k] = rand() % 10 + '0'; buf[k] = 0; /* Extract features */ f = fvec_extract(buf, strlen(buf), "test"); snprintf(label, 32, "label%.2d", rand() % 10); /* Add to array */ farray_add(fa, f, label); } /* Extract prototypes */ cluster_t *c = cluster_linkage(fa, 0); /* Destroy features */ cluster_destroy(c); farray_destroy(fa); } test_return(err, STRESS_RUNS); return err; }
/** * Classify the given malware reports */ static void malheur_increment() { farray_t *pr = NULL, *tmp, *pn, *re; assign_t *as; /* Load internal state */ malheur_load_state(); /* Load data including rejected stuff */ farray_t *fa = malheur_load(); if (!access(mcfg.reject_file, F_OK)) { tmp = farray_load_file(mcfg.reject_file); fa = farray_merge(fa, tmp); } /* Classification */ if (!access(mcfg.proto_file, R_OK)) { pr = farray_load_file(mcfg.proto_file); /* Apply classification */ as = class_assign(fa, pr); tmp = class_get_rejected(as, fa); /* Export results */ export_increment1(pr, fa, as, output_file); /* Clean up */ farray_destroy(fa); farray_destroy(pr); assign_destroy(as); fa = tmp; } else { /* Export results */ export_increment1(pr, fa, as, output_file); } /* Extract prototypes */ pr = proto_extract(fa, &as); /* Cluster prototypes and extrapolate */ cluster_t *c = cluster_linkage(pr, mstate.run + 1); cluster_extrapolate(c, as); cluster_trim(c); /* Save prototypes vectors */ pn = cluster_get_prototypes(c, as, pr); if (save) farray_append_file(pn, mcfg.proto_file); /* Save rejeted feature vectors */ re = cluster_get_rejected(c, fa); if (save) farray_save_file(re, mcfg.reject_file); /* Update state */ mstate.run++; mstate.num_proto = pn->len; mstate.num_reject = re->len; /* Save state */ if (save) malheur_save_state(); /* Export results */ export_increment2(c, pr, fa, as, output_file); /* Clean up */ cluster_destroy(c); assign_destroy(as); farray_destroy(re); farray_destroy(pn); farray_destroy(pr); farray_destroy(fa); }