/** * Test clustering */ int test_cluster_single() { int i, j, k, err = 0; test_printf("Clustering using prototypes (single)"); /* Prepare test data */ ; farray_t *fa = farray_create("test"); for (i = 0; i < DATA_LEN; i++) { fvec_t *f = fvec_extract(test_data[i], strlen(test_data[i]), NULL); farray_add(fa, f, "test"); } /* Get clustering */ config_set_string(&cfg, "cluster.link_mode", "single"); cluster_t *c = cluster_linkage(fa, 0); /* Check number of clusters */ err += (c->num != DATA_CLUSTER); /* Check position of prototypes */ for (k = 0; k < DATA_LEN; k += DATA_LEN / DATA_CLUSTER) for (j = 0; j < DATA_LEN / DATA_CLUSTER - 1; j++) err += c->cluster[k + j] != c->cluster[k + j + 1]; /* Clean up */ cluster_destroy(c); farray_destroy(fa); test_return(err, 1 + DATA_CLUSTER * (DATA_LEN / DATA_CLUSTER - 1)); return err; }
/** * Return an array of prototypes labeled with cluster numbers * @param c cluster structure * @param a assignment of prototypes * @param p prototypes * @return rejected feature vectors */ farray_t *cluster_get_prototypes(cluster_t *c, assign_t *a, farray_t *p) { int i; farray_t *n = farray_create("prototypes"); count_t *hash = NULL, *entry; for (i = 0; i < a->len; i++) { /* Skip rejected clusters */ if (!c->cluster[i]) continue; /* Check if prototype has been added */ int j = a->proto[i]; HASH_FIND_INT(hash, &j, entry); if (entry) continue; /* Add new prototype */ entry = malloc(sizeof(count_t)); entry->label = j; HASH_ADD_INT(hash, label, entry); /* Add prototype */ farray_add(n, fvec_clone(p->x[j]), cluster_get_name(c, i)); } /* Delete hash table */ while (hash) { entry = hash; HASH_DEL(hash, entry); free(entry); } return n; }
/* * A simple stress test for classification */ int test_stress() { int i, j, k, err = 0; fvec_t *f; farray_t *fa; char buf[STR_LENGTH + 1], label[32]; test_printf("Stress test for classification"); for (i = 0; i < STRESS_RUNS; i++) { /* Create array */ fa = farray_create("test"); for (j = 0; j < NUM_VECTORS; j++) { for (k = 0; k < STR_LENGTH; k++) buf[k] = rand() % 10 + '0'; buf[k] = 0; /* Extract features */ f = fvec_extract(buf, strlen(buf), "test"); snprintf(label, 32, "label%.2d", rand() % 10); /* Add to array */ farray_add(fa, f, label); } assign_t *a = class_assign(fa, fa); assign_destroy(a); farray_destroy(fa); } test_return(err, STRESS_RUNS); return err; }
/** * Return an array of rejected feature vectors * @param c Cluster structure * @param f Array of feature vectors * @return Rejected feature vectors */ farray_t *cluster_get_rejected(cluster_t *c, farray_t *f) { int i; farray_t *r = farray_create("rejected"); for (i = 0; i < f->len; i++) { if (c->cluster[i]) continue; farray_add(r, fvec_clone(f->x[i]), farray_get_label(f, i)); } return r; }
/** * Simple test cases classification */ int test_classify() { int i, k, err = 0; fvec_t *f; test_printf("Classification using prototypes"); /* Prepare training data */ farray_t *fa1 = farray_create("train"); for (i = 0; train_data[i].str; i++) { f = fvec_extract(train_data[i].str, strlen(train_data[i].str), NULL); farray_add(fa1, f, train_data[i].label); } /* Prepare testing data */ farray_t *fa2 = farray_create("train"); for (i = 0; test_data[i].str; i++) { f = fvec_extract(test_data[i].str, strlen(test_data[i].str), NULL); farray_add(fa2, f, test_data[i].label); } /* Classification of test data */ config_set_float(&cfg, "classify.max_dist", 1.41); assign_t *a = class_assign(fa2, fa1); /* Check predicted labels */ for (k = 0; test_data[k].str; k++) { char *l = farray_get_label(fa1, a->proto[k]); err += strcmp(l, test_data[k].label) != 0; } /* Clean up */ assign_destroy(a); farray_destroy(fa1); farray_destroy(fa2); test_return(err, i); return err; }
/* * A simple stress test for clustering */ int test_stress() { int i, j, k, err = 0; fvec_t *f; farray_t *fa; char buf[STR_LENGTH + 1], label[32]; test_printf("Stress test for clustering"); for (i = 0; i < STRESS_RUNS; i++) { /* Create array */ fa = farray_create("test"); for (j = 0; j < NUM_VECTORS; j++) { for (k = 0; k < STR_LENGTH; k++) buf[k] = rand() % 10 + '0'; buf[k] = 0; /* Extract features */ f = fvec_extract(buf, strlen(buf), "test"); snprintf(label, 32, "label%.2d", rand() % 10); /* Add to array */ farray_add(fa, f, label); } /* Extract prototypes */ cluster_t *c = cluster_linkage(fa, 0); /* Destroy features */ cluster_destroy(c); farray_destroy(fa); } test_return(err, STRESS_RUNS); return err; }