Esempio n. 1
0
  void kmedoids::pam(const dissimilarity_matrix& distance, size_t k, const object_id *initial_medoids) {
    if (k > distance.size1()) {
      throw std::logic_error("Attempt to run PAM with more clusters than data.");
    }

    if (distance.size1() != distance.size2()) {
      throw std::logic_error("Error: distance matrix is not square!");
    }
    
    // first get this the right size.
    cluster_ids.resize(distance.size1());

    // size cluster_ids appropriately and randomly pick initial medoids
    if (initial_medoids) {
      medoid_ids.clear();
      copy(initial_medoids, initial_medoids + k, back_inserter(medoid_ids));
    } else {
      init_medoids(k, distance);
    }

    // set tolerance equal to epsilon times mean magnitude of distances.
    // Note that distances *should* all be non-negative.
    double tolerance = epsilon * sum(distance) / (distance.size1() * distance.size2());

    while (true) {
      // initial cluster setup
      total_dissimilarity = assign_objects_to_clusters(matrix_distance(distance));

      //vars to keep track of minimum
      double minTotalCost = DBL_MAX;
      medoid_id minMedoid = 0;
      object_id minObject = 0;

      //iterate over each medoid
      for (medoid_id i=0; i < k; i++) {
        //iterate over all non-medoid objects
        for (object_id h = 0; h < cluster_ids.size(); h++) {
          if (is_medoid(h)) continue;

          //see if the total cost of swapping i & h was less than min
          double curCost = cost(i, h, distance);
          if (curCost < minTotalCost) {
            minTotalCost = curCost;
            minMedoid = i;
            minObject = h;
          }
        }
      }

      // bail if we can't gain anything more (we've converged)
      if (minTotalCost >= -tolerance) break;

      // install the new medoid if we found a beneficial swap
      medoid_ids[minMedoid] = minObject;
      cluster_ids[minObject] = minMedoid;
    }
    
    if (sort_medoids) sort();
  }
Esempio n. 2
0
// Main loop for an instance of the algorithm.
double run() {
	size_t i;
	size_t j;
	size_t k;
	printf("Initialization.\n");
	init_medoids();
    if(verbose) print_medoids(medoids);
	for(k = 0; k < clustc; ++k) {
		for(j = 0; j < dmatrixc; ++j) {
			weights[k][j] = 1.0;
		}
	}
	if(verbose) print_weights(weights);
	update_memb();
	if(verbose) print_memb(memb);
	double prev_adeq = 0.0;
	double adeq = adequacy_obj(false);
	printf("Adequacy: %.20lf\n", adeq);
    double diff = fabs(adeq - prev_adeq);
	for(i = 1; i <= max_iter && diff > epsilon; ++i) {
        printf("Iteration %d.\n", i);
        prev_adeq = adeq;
		adequacy_cluster(false);
        update_medoids();
		adeq = adequacy_cluster(true);
        if(verbose) {
            print_medoids(medoids);
            printf("Adequacy1: %.20lf\n", adeq);
        }
		adequacy_cluster(false);
        update_weights();
		adeq = adequacy_cluster(true);
        if(verbose) {
            print_weights(weights);
            printf("Adequacy2: %.20lf\n", adeq);
        }
		adequacy_obj(false);
        update_memb();
		adeq = adequacy_obj(true);
        if(verbose) print_memb(memb);
        printf("Adequacy: %.20lf\n", adeq);
        if(dgt(adeq, prev_adeq)) {
            printf("Warn: current adequacy is greater than "
                    "previous iteration (%.20lf)\n",
                    adeq - prev_adeq);
        }
        diff = fabs(adeq - prev_adeq);
	}
    printf("Adequacy difference threshold reached (%.20lf).\n",
            diff);
    return adeq;
}