Exemple #1
0
void kmeans::process(const dataset & p_data, const index_sequence & p_indexes, cluster_data & p_result) {
    m_ptr_data = &p_data;
    m_ptr_indexes = &p_indexes;

    m_ptr_result = (kmeans_data *) &p_result;

    if (p_data[0].size() != m_initial_centers[0].size()) {
        throw std::invalid_argument("Dimension of the input data and dimension of the initial cluster centers must be the same.");
    }

    m_ptr_result->centers().assign(m_initial_centers.begin(), m_initial_centers.end());

    if (m_ptr_result->is_observed()) {
        cluster_sequence sequence;
        update_clusters(m_initial_centers, sequence);

        m_ptr_result->evolution_centers().push_back(m_initial_centers);
        m_ptr_result->evolution_clusters().push_back(sequence);
    }

    double current_change = std::numeric_limits<double>::max();

    for(std::size_t iteration = 0; iteration < m_itermax && current_change > m_tolerance; iteration++) {
        update_clusters(m_ptr_result->centers(), m_ptr_result->clusters());
        current_change = update_centers(m_ptr_result->clusters(), m_ptr_result->centers());

        if (m_ptr_result->is_observed()) {
            m_ptr_result->evolution_centers().push_back(m_ptr_result->centers());
            m_ptr_result->evolution_clusters().push_back(m_ptr_result->clusters());
        }
    }

    calculate_total_wce();
}
Exemple #2
0
void kmedians::process(const dataset & data, cluster_data & output_result) {
    m_ptr_data = &data;
    m_ptr_result = (kmedians_data *) &output_result;

    if (data[0].size() != m_initial_medians[0].size()) {
        throw std::invalid_argument("kmedians: dimension of the input data and dimension of the initial medians must be equal.");
    }

    m_ptr_result->medians() = m_initial_medians;

    double changes = std::numeric_limits<double>::max();
    double prev_changes = 0.0;

    std::size_t counter_repeaters = 0;

    for (std::size_t iteration = 0; (iteration < m_max_iter) && (changes > m_tolerance) && (counter_repeaters < 10); iteration++)
    {
        update_clusters(m_ptr_result->medians(), m_ptr_result->clusters());
        changes = update_medians(m_ptr_result->clusters(), m_ptr_result->medians());

        double change_difference = std::abs(changes - prev_changes);
        if (change_difference < THRESHOLD_CHANGE) {
            counter_repeaters++;
        }
        else {
            counter_repeaters = 0;
        }

        prev_changes = changes;
    }

    m_ptr_data = nullptr;
    m_ptr_result = nullptr;
}
Exemple #3
0
void kmedians::process(const std::vector<point> & data) {
    m_ptr_data = (std::vector<point> *) &data;
    if (data[0].size() != m_medians[0].size()) {
        throw std::runtime_error("CCORE [kmedians]: dimension of the input data and dimension of the initial cluster medians must be equal.");
    }

    m_clusters.clear();

    double stop_condition = m_tolerance * m_tolerance;
    double changes = 0.0;
    double prev_changes = 0.0;

    size_t counter_repeaters = 0;

    do {
        update_clusters();
        changes = update_medians();

        double change_difference = abs(changes - prev_changes);
        if (change_difference < 0.000001) {
            counter_repeaters++;
        }
        else {
            counter_repeaters = 0;
        }

        prev_changes = changes;
    }
    while ((changes > stop_condition) && (counter_repeaters < 10));

    m_ptr_data = nullptr;
}
Exemple #4
0
void xmeans::improve_parameters(std::vector<std::vector<unsigned int> *> * improved_clusters, std::vector<std::vector<double> > * improved_centers, const std::vector<unsigned int> * const available_indexes) {
	double current_change = std::numeric_limits<double>::max();

	while(current_change > tolerance) {
		update_clusters(improved_clusters, improved_centers, available_indexes);
		current_change = update_centers(improved_clusters, improved_centers);
	}
}
Exemple #5
0
void kmeans::process(void) {
	double current_change = std::numeric_limits<double>::max();

	while(current_change > tolerance) {
		update_clusters();
		current_change = update_centers();
	}
}
Exemple #6
0
void CLUSTER::cluster_iso() {
  bool ITER = false;
  int ITER_N = 0;
  float LO_Q = (k+1)/2;
  float HI_Q = 2*k;
  //print();
  while (ITER_N++ < MAX_ITER) {
    //cout << "< Updating membership >" << endl;
    update_membership();
    //print();
    //cout << "< Updating clusters >" << endl;
    float AV_DIST = update_clusters();
    //print();
    if (csize < LO_Q || (csize < HI_Q && ITER)) {
      int u = csize;
      //cout << "< Number of clusters: " << csize << " >" << endl;
      for (int v = 0; u-- > 0; v++) {      
	if (centers[v]->deviation > BREAKING_POINT) {
	  if(LO_Q > csize || 
	     (centers[v]->distance > AV_DIST && centers[v]->size > 2*MIN_SIZE)){
            //cout << "splitting " << centers[v]->loc << endl;
            centers[add_cluster(centers[v]->loc + 0.01)]->size = 0;
	    centers[v]->loc -= 0.01;
	    last_action = _split;
	  } else {
	    if (ITER_N > 1) {
	      if (last_action != _lump) {
		last_action = try_lumping();
	      } else {
		if (csize == 0) break; else continue;
	      }
	    } else {
	      last_action = try_lumping();
	    }	
	  }
	}
      }      
    } else {
      last_action = try_lumping();
    }
    ITER = 1 - ITER;
  } 
}
Exemple #7
0
void CLUSTER::cluster_iso2() {
  if (one_to_one()) return;
  bool ITER = false;
  unsigned int ITER_N = 0;
  float LO_Q = (k+1)/2;
  float HI_Q = 2*k;
  int last = 0;
  //int last_csize = -1;
  //print();
  last_action = _split;
  while (last < 4) {
 //   cout << "LOOP " << ITER_N << endl;
    last_action = _nothing;
    //cout << "< Updating membership >" << endl;
    update_membership();
    //cout << "< Updating clusters >" << endl;
    float AV_DIST = update_clusters();
    //print();
    //print();
    if (csize < HI_Q) {
      int u = csize;
      //cout << "< Number of clusters: " << csize << " >" << endl;
      for (int v = 0; u-- > 0; v++) {      
	if (centers[v]->deviation > BREAKING_POINT) {
//	  if(LO_Q > csize || 
//	     (centers[v]->distance > AV_DIST && centers[v]->size > 2*MIN_SIZE)){
           //cout << "splitting " << centers[v]->loc << " BR " << BREAKING_POINT << endl;
            centers[add_cluster(centers[v]->loc + 0.0001)]->size = 0;
	    centers[v]->loc -= 0.00001;
	    last = 0;
//	  }
	}
      }      
    }
    ITER = 1 - ITER;
    last++;
    if (last == 4 && csize < k) {
      float OBP = BREAKING_POINT;
      float DV = 0;
      //find average deviation and set BREAKING_POINT to it
      for (int v = 0; v < csize; v++) DV += centers[v]->deviation;
      DV /= csize;
      //make a little smaller then average to be able break a single cluster
      BREAKING_POINT = DV-0.0000001;
      //cout << "adjusting BREAKING_POINT " << BREAKING_POINT <<  endl;
      //take another shot _only_ if this is the first time this BP is used
      if (OBP != BREAKING_POINT) last = 0;
      //there are no divisible clusters, quit
      if (DV == 0) last = 4;
    }
    ITER_N++;
//    cout << "turn " << ITER_N << endl;
  } 
  if (csize < k) {
    //divide largest clusters in 2 ?
  }
  if (csize > k) {
    int EXTRA = csize - k;
    while (EXTRA > 0) {
      update_membership();
      update_clusters();
      cluster_insertion_sort();
      float s = ::distance(centers[0]->loc, centers[1]->loc); // * 
		//  (centers[0]->size + centers[1]->size);
      int l = 0;
      for (int v = 1; v < csize-1; v++) {
        float m = ::distance(centers[v]->loc, centers[v+1]->loc);
        if (m  < s) {
          s = m;
	  l = v;
 	}
      }
      EXTRA--;
      centers[l]->loc = (centers[l]->loc + centers[l+1]->loc)/2;
      erase_cluster(l+1);
    }
  }
  update_membership();
  update_clusters();

}