int main() { double data[] = { 0.0, 0.2, 0.4, 0.3, 0.2, 0.4, 0.4, 0.2, 0.4, 0.5, 0.2, 0.4, 5.0, 5.2, 8.4, 6.0, 5.2, 7.4, 4.0, 5.2, 4.4, 10.3, 10.4, 10.5, 10.1, 10.6, 10.7, 11.3, 10.2, 10.9 }; const int size = 10; //Number of samples const int dim = 3; //Dimension of feature const int cluster_num = 4; //Cluster number KMeans* kmeans = new KMeans(dim,cluster_num); int* labels = new int[size]; kmeans->SetInitMode(KMeans::InitUniform); kmeans->Cluster(data,size,labels); for(int i = 0; i < size; ++i) { printf("%f, %f, %f belongs to %d cluster\n", data[i*dim+0], data[i*dim+1], data[i*dim+2], labels[i]); } delete []labels; delete kmeans; return 0; }
void GMM::Init(const char* sampleFileName) { const double MIN_VAR = 1E-10; KMeans* kmeans = new KMeans(m_dimNum, m_mixNum); kmeans->SetInitMode(KMeans::InitUniform); kmeans->Cluster(sampleFileName, "gmm_init.tmp"); int* counts = new int[m_mixNum]; double* overMeans = new double[m_dimNum]; // Overall mean of training data for (int i = 0; i < m_mixNum; i++) { counts[i] = 0; m_priors[i] = 0; memcpy(m_means[i], kmeans->GetMean(i), sizeof(double) * m_dimNum); memset(m_vars[i], 0, sizeof(double) * m_dimNum); } memset(overMeans, 0, sizeof(double) * m_dimNum); memset(m_minVars, 0, sizeof(double) * m_dimNum); // Open the sample and label file to initialize the model ifstream sampleFile(sampleFileName, ios_base::binary); //assert(sampleFile); ifstream labelFile("gmm_init.tmp", ios_base::binary); //assert(labelFile); int size = 0; sampleFile.read((char*)&size, sizeof(int)); sampleFile.seekg(2 * sizeof(int), ios_base::beg); labelFile.seekg(sizeof(int), ios_base::beg); double* x = new double[m_dimNum]; int label = -1; for (int i = 0; i < size; i++) { sampleFile.read((char*)x, sizeof(double) * m_dimNum); labelFile.read((char*)&label, sizeof(int)); // Count each Gaussian counts[label]++; double* m = kmeans->GetMean(label); for (int d = 0; d < m_dimNum; d++) { m_vars[label][d] += (x[d] - m[d]) * (x[d] - m[d]); } // Count the overall mean and variance. for (int d = 0; d < m_dimNum; d++) { overMeans[d] += x[d]; m_minVars[d] += x[d] * x[d]; } } // Compute the overall variance (* 0.01) as the minimum variance. for (int d = 0; d < m_dimNum; d++) { overMeans[d] /= size; m_minVars[d] = max(MIN_VAR, 0.01 * (m_minVars[d] / size - overMeans[d] * overMeans[d])); } // Initialize each Gaussian. for (int i = 0; i < m_mixNum; i++) { m_priors[i] = 1.0 * counts[i] / size; if (m_priors[i] > 0) { for (int d = 0; d < m_dimNum; d++) { m_vars[i][d] = m_vars[i][d] / counts[i]; // A minimum variance for each dimension is required. if (m_vars[i][d] < m_minVars[d]) { m_vars[i][d] = m_minVars[d]; } } } else { memcpy(m_vars[i], m_minVars, sizeof(double) * m_dimNum); cout << "[WARNING] Gaussian " << i << " of GMM is not used!\n"; } } delete kmeans; delete[] x; delete[] counts; delete[] overMeans; sampleFile.close(); labelFile.close(); }
void GMM::Init(double *data, int N) { const double MIN_VAR = 1E-10; KMeans* kmeans = new KMeans(m_dimNum, m_mixNum); kmeans->SetInitMode(KMeans::InitUniform); int *Label; Label=new int[N]; kmeans->Cluster(data,N,Label); int* counts = new int[m_mixNum]; double* overMeans = new double[m_dimNum]; // Overall mean of training data for (int i = 0; i < m_mixNum; i++) { counts[i] = 0; m_priors[i] = 0; memcpy(m_means[i], kmeans->GetMean(i), sizeof(double) * m_dimNum); memset(m_vars[i], 0, sizeof(double) * m_dimNum); } memset(overMeans, 0, sizeof(double) * m_dimNum); memset(m_minVars, 0, sizeof(double) * m_dimNum); int size = 0; size=N; double* x = new double[m_dimNum]; int label = -1; for (int i = 0; i < size; i++) { for(int j=0;j<m_dimNum;j++) x[j]=data[i*m_dimNum+j]; label=Label[i]; // Count each Gaussian counts[label]++; double* m = kmeans->GetMean(label); for (int d = 0; d < m_dimNum; d++) { m_vars[label][d] += (x[d] - m[d]) * (x[d] - m[d]); } // Count the overall mean and variance. for (int d = 0; d < m_dimNum; d++) { overMeans[d] += x[d]; m_minVars[d] += x[d] * x[d]; } } // Compute the overall variance (* 0.01) as the minimum variance. for (int d = 0; d < m_dimNum; d++) { overMeans[d] /= size; m_minVars[d] = max(MIN_VAR, 0.01 * (m_minVars[d] / size - overMeans[d] * overMeans[d])); } // Initialize each Gaussian. for (int i = 0; i < m_mixNum; i++) { m_priors[i] = 1.0 * counts[i] / size; if (m_priors[i] > 0) { for (int d = 0; d < m_dimNum; d++) { m_vars[i][d] = m_vars[i][d] / counts[i]; // A minimum variance for each dimension is required. if (m_vars[i][d] < m_minVars[d]) { m_vars[i][d] = m_minVars[d]; } } } else { memcpy(m_vars[i], m_minVars, sizeof(double) * m_dimNum); cout << "[WARNING] Gaussian " << i << " of GMM is not used!\n"; } } delete kmeans; delete[] x; delete[] counts; delete[] overMeans; delete[] Label; }