void qFinderDMM::kMeans(){ try { vector<vector<double> > relativeAbundance(numSamples); vector<vector<double> > alphaMatrix; alphaMatrix.resize(numPartitions); lambdaMatrix.resize(numPartitions); for(int i=0;i<numPartitions;i++){ alphaMatrix[i].assign(numOTUs, 0); lambdaMatrix[i].assign(numOTUs, 0); } //get relative abundance for(int i=0;i<numSamples;i++){ if (m->control_pressed) { return; } int groupTotal = 0; relativeAbundance[i].assign(numOTUs, 0.0); for(int j=0;j<numOTUs;j++){ groupTotal += countMatrix[i][j]; } for(int j=0;j<numOTUs;j++){ relativeAbundance[i][j] = countMatrix[i][j] / (double)groupTotal; } } //randomly assign samples into partitions zMatrix.resize(numPartitions); for(int i=0;i<numPartitions;i++){ zMatrix[i].assign(numSamples, 0); } for(int i=0;i<numSamples;i++){ zMatrix[rand()%numPartitions][i] = 1; } double maxChange = 1; int maxIters = 1000; int iteration = 0; weights.assign(numPartitions, 0); while(maxChange > 1e-6 && iteration < maxIters){ if (m->control_pressed) { return; } //calcualte average relative abundance maxChange = 0.0000; for(int i=0;i<numPartitions;i++){ double normChange = 0.0; weights[i] = 0; for(int j=0;j<numSamples;j++){ weights[i] += (double)zMatrix[i][j]; } vector<double> averageRelativeAbundance(numOTUs, 0); for(int j=0;j<numOTUs;j++){ for(int k=0;k<numSamples;k++){ averageRelativeAbundance[j] += zMatrix[i][k] * relativeAbundance[k][j]; } } for(int j=0;j<numOTUs;j++){ averageRelativeAbundance[j] /= weights[i]; double difference = averageRelativeAbundance[j] - alphaMatrix[i][j]; normChange += difference * difference; alphaMatrix[i][j] = averageRelativeAbundance[j]; } normChange = sqrt(normChange); if(normChange > maxChange){ maxChange = normChange; } } //calcualte distance between each sample in partition adn the average relative abundance for(int i=0;i<numSamples;i++){ if (m->control_pressed) { return; } double normalizationFactor = 0; vector<double> totalDistToPartition(numPartitions, 0); for(int j=0;j<numPartitions;j++){ for(int k=0;k<numOTUs;k++){ double difference = alphaMatrix[j][k] - relativeAbundance[i][k]; totalDistToPartition[j] += difference * difference; } totalDistToPartition[j] = sqrt(totalDistToPartition[j]); normalizationFactor += exp(-50.0 * totalDistToPartition[j]); } for(int j=0;j<numPartitions;j++){ zMatrix[j][i] = exp(-50.0 * totalDistToPartition[j]) / normalizationFactor; } } iteration++; // cout << "K means: " << iteration << '\t' << maxChange << endl; } // cout << "Iter:-1"; for(int i=0;i<numPartitions;i++){ weights[i] = 0.0000; for(int j=0;j<numSamples;j++){ weights[i] += zMatrix[i][j]; } // printf("\tw_%d=%.3f", i, weights[i]); } // cout << endl; for(int i=0;i<numOTUs;i++){ if (m->control_pressed) { return; } for(int j=0;j<numPartitions;j++){ if(alphaMatrix[j][i] > 0){ lambdaMatrix[j][i] = log(alphaMatrix[j][i]); } else{ lambdaMatrix[j][i] = -10.0; } } } } catch(exception& e){ m->errorOut(e, "qFinderDMM", "kMeans"); exit(1); } }
int CommunityTypeFinder::findkMeans(){ try { error.resize(numPartitions); for (int i = 0; i < numPartitions; i++) { error[i].resize(numOTUs, 0.0); } vector<vector<double> > relativeAbundance(numSamples); vector<vector<double> > alphaMatrix; alphaMatrix.resize(numPartitions); lambdaMatrix.resize(numPartitions); for(int i=0;i<numPartitions;i++){ alphaMatrix[i].assign(numOTUs, 0); lambdaMatrix[i].assign(numOTUs, 0); } //get relative abundance for(int i=0;i<numSamples;i++){ if (m->control_pressed) { return 0; } int groupTotal = 0; relativeAbundance[i].assign(numOTUs, 0.0); for(int j=0;j<numOTUs;j++){ groupTotal += countMatrix[i][j]; } for(int j=0;j<numOTUs;j++){ relativeAbundance[i][j] = countMatrix[i][j] / (double)groupTotal; } } //randomly assign samples into partitions zMatrix.resize(numPartitions); for(int i=0;i<numPartitions;i++){ zMatrix[i].assign(numSamples, 0); } //randomize samples vector<int> temp; for (int i = 0; i < numSamples; i++) { temp.push_back(i); } random_shuffle(temp.begin(), temp.end()); //assign each partition at least one random sample int numAssignedSamples = 0; for (int i = 0; i < numPartitions; i++) { zMatrix[i][temp[numAssignedSamples]] = 1; numAssignedSamples++; } //assign rest of samples to partitions int count = 0; for(int i=numAssignedSamples;i<numSamples;i++){ zMatrix[count%numPartitions][temp[i]] = 1; count++; } double maxChange = 1; int maxIters = 1000; int iteration = 0; weights.assign(numPartitions, 0); while(maxChange > 1e-6 && iteration < maxIters){ if (m->control_pressed) { return 0; } //calcualte average relative abundance maxChange = 0.0000; for(int i=0;i<numPartitions;i++){ double normChange = 0.0; weights[i] = 0; for(int j=0;j<numSamples;j++){ weights[i] += (double)zMatrix[i][j]; } vector<double> averageRelativeAbundance(numOTUs, 0); for(int j=0;j<numOTUs;j++){ for(int k=0;k<numSamples;k++){ averageRelativeAbundance[j] += zMatrix[i][k] * relativeAbundance[k][j]; } } for(int j=0;j<numOTUs;j++){ averageRelativeAbundance[j] /= weights[i]; double difference = averageRelativeAbundance[j] - alphaMatrix[i][j]; normChange += difference * difference; alphaMatrix[i][j] = averageRelativeAbundance[j]; } normChange = sqrt(normChange); if(normChange > maxChange){ maxChange = normChange; } } //calcualte distance between each sample in partition and the average relative abundance for(int i=0;i<numSamples;i++){ if (m->control_pressed) { return 0; } double normalizationFactor = 0; vector<double> totalDistToPartition(numPartitions, 0); for(int j=0;j<numPartitions;j++){ for(int k=0;k<numOTUs;k++){ double difference = alphaMatrix[j][k] - relativeAbundance[i][k]; totalDistToPartition[j] += difference * difference; } totalDistToPartition[j] = sqrt(totalDistToPartition[j]); normalizationFactor += exp(-50.0 * totalDistToPartition[j]); } for(int j=0;j<numPartitions;j++){ zMatrix[j][i] = exp(-50.0 * totalDistToPartition[j]) / normalizationFactor; } } iteration++; // cout << "K means: " << iteration << '\t' << maxChange << endl; } // cout << "Iter:-1"; for(int i=0;i<numPartitions;i++){ weights[i] = 0.0000; for(int j=0;j<numSamples;j++){ weights[i] += zMatrix[i][j]; } // printf("\tw_%d=%.3f", i, weights[i]); } // cout << endl; for(int i=0;i<numOTUs;i++){ if (m->control_pressed) { return 0; } for(int j=0;j<numPartitions;j++){ if(alphaMatrix[j][i] > 0){ lambdaMatrix[j][i] = log(alphaMatrix[j][i]); } else{ lambdaMatrix[j][i] = -10.0; } } } return 0; } catch(exception& e){ m->errorOut(e, "CommunityTypeFinder", "kMeans"); exit(1); } }