void kmedoids::pam(const dissimilarity_matrix& distance, size_t k, const object_id *initial_medoids) { if (k > distance.size1()) { throw std::logic_error("Attempt to run PAM with more clusters than data."); } if (distance.size1() != distance.size2()) { throw std::logic_error("Error: distance matrix is not square!"); } // first get this the right size. cluster_ids.resize(distance.size1()); // size cluster_ids appropriately and randomly pick initial medoids if (initial_medoids) { medoid_ids.clear(); copy(initial_medoids, initial_medoids + k, back_inserter(medoid_ids)); } else { init_medoids(k, distance); } // set tolerance equal to epsilon times mean magnitude of distances. // Note that distances *should* all be non-negative. double tolerance = epsilon * sum(distance) / (distance.size1() * distance.size2()); while (true) { // initial cluster setup total_dissimilarity = assign_objects_to_clusters(matrix_distance(distance)); //vars to keep track of minimum double minTotalCost = DBL_MAX; medoid_id minMedoid = 0; object_id minObject = 0; //iterate over each medoid for (medoid_id i=0; i < k; i++) { //iterate over all non-medoid objects for (object_id h = 0; h < cluster_ids.size(); h++) { if (is_medoid(h)) continue; //see if the total cost of swapping i & h was less than min double curCost = cost(i, h, distance); if (curCost < minTotalCost) { minTotalCost = curCost; minMedoid = i; minObject = h; } } } // bail if we can't gain anything more (we've converged) if (minTotalCost >= -tolerance) break; // install the new medoid if we found a beneficial swap medoid_ids[minMedoid] = minObject; cluster_ids[minObject] = minMedoid; } if (sort_medoids) sort(); }
// Main loop for an instance of the algorithm. double run() { size_t i; size_t j; size_t k; printf("Initialization.\n"); init_medoids(); if(verbose) print_medoids(medoids); for(k = 0; k < clustc; ++k) { for(j = 0; j < dmatrixc; ++j) { weights[k][j] = 1.0; } } if(verbose) print_weights(weights); update_memb(); if(verbose) print_memb(memb); double prev_adeq = 0.0; double adeq = adequacy_obj(false); printf("Adequacy: %.20lf\n", adeq); double diff = fabs(adeq - prev_adeq); for(i = 1; i <= max_iter && diff > epsilon; ++i) { printf("Iteration %d.\n", i); prev_adeq = adeq; adequacy_cluster(false); update_medoids(); adeq = adequacy_cluster(true); if(verbose) { print_medoids(medoids); printf("Adequacy1: %.20lf\n", adeq); } adequacy_cluster(false); update_weights(); adeq = adequacy_cluster(true); if(verbose) { print_weights(weights); printf("Adequacy2: %.20lf\n", adeq); } adequacy_obj(false); update_memb(); adeq = adequacy_obj(true); if(verbose) print_memb(memb); printf("Adequacy: %.20lf\n", adeq); if(dgt(adeq, prev_adeq)) { printf("Warn: current adequacy is greater than " "previous iteration (%.20lf)\n", adeq - prev_adeq); } diff = fabs(adeq - prev_adeq); } printf("Adequacy difference threshold reached (%.20lf).\n", diff); return adeq; }