void vector_quantisation_n_clusters(){ Dataset* dataset = new_dataset(DIMENSIONALITY); CLUSTER_COUNT = read_input(dataset); normalise_dataset(dataset, MIN_VALUES, MAX_VALUES); Centroid** centroids = malloc(CLUSTER_COUNT * sizeof(Centroid*)); for (size_t i = 0; i < CLUSTER_COUNT; i++) { centroids[i] = new_centroid((int)i, 1.0); } double total_error_previous_try = FLOAT_MAX; double total_error = FLOAT_MAX; double total_error_old = 0.0; double epsilon = 0.0001; Datapoint** best_centers = malloc(CLUSTER_COUNT * sizeof(Datapoint*)); for (size_t i = 0; i < CLUSTER_COUNT; i++) { best_centers[i] = new_datapoint(DIMENSIONALITY); } time_t time_at_beginning = time(0); while ((time(0) - time_at_beginning) < 20) { while ((time(0) - time_at_beginning) < 20) { if (fabs(total_error_old - total_error) <= epsilon && total_error != FLOAT_MAX) { break; } compute_cluster_centers(dataset, centroids); total_error_old = total_error; total_error = error_total(dataset, centroids); } if (total_error < total_error_previous_try) { dump_centers(centroids, best_centers); total_error_previous_try = total_error; } reinitialise_centroids(centroids); reinitialise_dataset(dataset); total_error = total_error_old = FLOAT_MAX; } load_centers(centroids, best_centers); print_centers(centroids); for (size_t i = 0; i < CLUSTER_COUNT; i++) { delete_centroid(centroids[i]); delete_datapoint(best_centers[i]); } }
void vector_quantisation_x_clusters(){ Dataset* dataset = new_dataset(DIMENSIONALITY); read_input(dataset); if (dataset->size == 1) { printf("%lf,%lf\n", dataset->points[0]->position->scalars[0], dataset->points[0]->position->scalars[1]); return; } if (dataset->size == 2) { double x1 = dataset->points[0]->position->scalars[0]; double x2 = dataset->points[1]->position->scalars[0]; double y1 = dataset->points[0]->position->scalars[1]; double y2 = dataset->points[1]->position->scalars[1]; printf("%lf,%lf\n", (x1+x2)/2.0 , (y1+y2)/2.0); return; } normalise_dataset(dataset, MIN_VALUES, MAX_VALUES); double errors_diff_cluster_count[MAX_CLUSTER_COUNT]; memset(errors_diff_cluster_count, 0.0, MAX_CLUSTER_COUNT*sizeof(double)); Datapoint*** best_centers = malloc(MAX_CLUSTER_COUNT * sizeof(Datapoint**)); size_t CLUSTER_COUNT_COMPUTED = 0; time_t time_at_beginning = time(0); while ((time(0) - time_at_beginning) < 9 && CLUSTER_COUNT_COMPUTED < MAX_CLUSTER_COUNT) { CLUSTER_COUNT = ++CLUSTER_COUNT_COMPUTED; best_centers[CLUSTER_COUNT_COMPUTED-1] = malloc(CLUSTER_COUNT_COMPUTED * sizeof(Datapoint*)); for (size_t i = 0; i < CLUSTER_COUNT_COMPUTED; i++) { best_centers[CLUSTER_COUNT_COMPUTED-1][i] = new_datapoint(DIMENSIONALITY); } Centroid** centroids = malloc(CLUSTER_COUNT_COMPUTED * sizeof(Centroid*)); for (size_t i = 0; i < CLUSTER_COUNT_COMPUTED; i++) { centroids[i] = new_centroid((int)i, 1.0); } size_t trial = 0; double total_error_previous_try = FLOAT_MAX; time_t three_sec_max = time(0); while (trial++ < 50 && (time(0) - three_sec_max) < 1) { double total_error = FLOAT_MAX; double total_error_old = FLOAT_MAX; double epsilon = 0.00001; reinitialise_centroids(centroids); reinitialise_dataset(dataset); // allow max 1 sec per cluseter_count iterations time_t one_sec_max = time(0); while ((time(0) - one_sec_max) < 1) { if (fabs(total_error_old - total_error) <= epsilon && total_error != FLOAT_MAX) { break; } compute_cluster_centers(dataset, centroids); total_error_old = total_error; total_error = error_total(dataset, centroids); } if (total_error < total_error_previous_try) { dump_centers(centroids, best_centers[CLUSTER_COUNT_COMPUTED-1]); total_error_previous_try = total_error; errors_diff_cluster_count[CLUSTER_COUNT_COMPUTED-1] = total_error; } } // printf("For cluster count: %ld succedded with %ld trials.\n", CLUSTER_COUNT_COMPUTED, trial); for (size_t i = 0; i < CLUSTER_COUNT_COMPUTED; i++) { delete_centroid(centroids[i]); } free(centroids); } CLUSTER_COUNT = CLUSTER_COUNT_COMPUTED; size_t optimal_cluster_count = find_elbow(errors_diff_cluster_count); // printf("Optimal cluster count: %ld\n", optimal_cluster_count); CLUSTER_COUNT = optimal_cluster_count; Centroid** optimal_centroids = malloc(optimal_cluster_count * sizeof(Centroid*)); for (size_t i = 0; i < optimal_cluster_count; i++) { optimal_centroids[i] = new_centroid((int)i, 1.0); } load_centers(optimal_centroids, best_centers[optimal_cluster_count-1]); print_centers(optimal_centroids); for (size_t i = 0; i < CLUSTER_COUNT_COMPUTED; i++) { for (int j = 0; j < i+1; j++) { delete_datapoint(best_centers[i][j]); } free(best_centers[i]); } free(best_centers); for (size_t i = 0; i < optimal_cluster_count; i++) { delete_centroid(optimal_centroids[i]); } free(optimal_centroids); }
static void selectCB(Widget w, ANTS *a, XmListCallbackStruct *cb) { char *txt; int n, is_seq = 0, is_app = 0, err=0, first = 1; DataSetPtr d = NULL; void UpdateData(), obtain_map_info(); int read_file(char *, char *, DataSetPtr); if (cb->reason == XmCR_BROWSE_SELECT) { XmStringGetLtoR(cb->item, XmSTRING_DEFAULT_CHARSET, &txt); if (open_ants_block(txt, a) == 0) { d = new_dataset(get_listlist(), "Single POPS", NULL); if (d && !read_file("ants", a->fname, d)) { vP->from = vP->to = d; strcpy(d->name, vP->s->name); } } XtFree(txt); } else { if (strncmp(type_of_seq, "seqants", 7) == 0 || strncmp(type_of_seq, "appseqants", 10)==0) is_seq = 1; if (strncmp(type_of_seq, "app", 3)==0) is_app = 1; if (!vP->to || !is_app) { d = new_dataset(get_listlist(), "POPS", NULL); if (!d) return; } else { d = NULL; } sprintf(d ? d->name : vP->to->name, "%s %d scans", is_seq ? "Sequence of" : "Read", cb->selected_item_count + count_scans(d)); for (n=0; n<cb->selected_item_count; n++) { if (XmStringGetLtoR(cb->selected_items[n], XmSTRING_DEFAULT_CHARSET, &txt)) { if (open_ants_block(txt, a) == 0) { err = read_file(is_seq ? "seqants" : "ants", a->fname, first ? d : NULL); if (!err && first) { first = 0; } else if (err == 1) { /* Out of memory */ XtFree(txt); break; } } XtFree(txt); } } if (d) vP->to = d; vP->from = vP->to; if (count_scans(vP->from) > 1) obtain_map_info(NULL, "map", NULL); } UpdateData(SCALE_BOTH, REDRAW); }