コード例 #1
0
ファイル: qFinderDMM.cpp プロジェクト: azmfaridee/mothur
void qFinderDMM::kMeans(){
    try {
        
        vector<vector<double> > relativeAbundance(numSamples);
        vector<vector<double> > alphaMatrix;
        
        alphaMatrix.resize(numPartitions);
        lambdaMatrix.resize(numPartitions);
        for(int i=0;i<numPartitions;i++){
            alphaMatrix[i].assign(numOTUs, 0);
            lambdaMatrix[i].assign(numOTUs, 0);
        }
        
        //get relative abundance
        for(int i=0;i<numSamples;i++){
            if (m->control_pressed) {  return; }
            int groupTotal = 0;
            
            relativeAbundance[i].assign(numOTUs, 0.0);
            
            for(int j=0;j<numOTUs;j++){
                groupTotal += countMatrix[i][j];
            }
            for(int j=0;j<numOTUs;j++){
                relativeAbundance[i][j] = countMatrix[i][j] / (double)groupTotal;
            }
        }
        
        //randomly assign samples into partitions
        zMatrix.resize(numPartitions);
        for(int i=0;i<numPartitions;i++){
            zMatrix[i].assign(numSamples, 0);
        }
        
        for(int i=0;i<numSamples;i++){
            zMatrix[rand()%numPartitions][i] = 1;
        }
        
        double maxChange = 1;
        int maxIters = 1000;
        int iteration = 0;
        
        weights.assign(numPartitions, 0);
        
        while(maxChange > 1e-6 && iteration < maxIters){
            
            if (m->control_pressed) {  return; }
            //calcualte average relative abundance
            maxChange = 0.0000;
            for(int i=0;i<numPartitions;i++){
                
                double normChange = 0.0;
                
                weights[i] = 0;
                
                for(int j=0;j<numSamples;j++){
                    weights[i] += (double)zMatrix[i][j];
                }
                
                vector<double> averageRelativeAbundance(numOTUs, 0);
                for(int j=0;j<numOTUs;j++){
                    for(int k=0;k<numSamples;k++){
                        averageRelativeAbundance[j] += zMatrix[i][k] * relativeAbundance[k][j];
                    }
                }
                
                for(int j=0;j<numOTUs;j++){
                    averageRelativeAbundance[j] /= weights[i];
                    double difference = averageRelativeAbundance[j] - alphaMatrix[i][j];
                    normChange += difference * difference;
                    alphaMatrix[i][j] = averageRelativeAbundance[j];
                }
                
                normChange = sqrt(normChange);
                
                if(normChange > maxChange){ maxChange = normChange; }
            }
            
            
            //calcualte distance between each sample in partition adn the average relative abundance
            for(int i=0;i<numSamples;i++){
                if (m->control_pressed) {  return; }
                
                double normalizationFactor = 0;
                vector<double> totalDistToPartition(numPartitions, 0);
                
                for(int j=0;j<numPartitions;j++){
                    for(int k=0;k<numOTUs;k++){
                        double difference = alphaMatrix[j][k] - relativeAbundance[i][k];
                        totalDistToPartition[j] += difference * difference;
                    }
                    totalDistToPartition[j] = sqrt(totalDistToPartition[j]);
                    normalizationFactor += exp(-50.0 * totalDistToPartition[j]);
                }
                
                
                for(int j=0;j<numPartitions;j++){
                    zMatrix[j][i] = exp(-50.0 * totalDistToPartition[j]) / normalizationFactor;
                }
                
            }
            
            iteration++;
            //        cout << "K means: " << iteration << '\t' << maxChange << endl;
            
        }
        
        //    cout << "Iter:-1";
        for(int i=0;i<numPartitions;i++){
            weights[i] = 0.0000;
            
            for(int j=0;j<numSamples;j++){
                weights[i] += zMatrix[i][j];
            }
            //        printf("\tw_%d=%.3f", i, weights[i]);
        }
        //    cout << endl;
        
        
        for(int i=0;i<numOTUs;i++){
            if (m->control_pressed) {  return; }
            for(int j=0;j<numPartitions;j++){
                if(alphaMatrix[j][i] > 0){
                    lambdaMatrix[j][i] = log(alphaMatrix[j][i]);
                }
                else{
                    lambdaMatrix[j][i] = -10.0;
                }
            }
        }
    }
    catch(exception& e){
        m->errorOut(e, "qFinderDMM", "kMeans");
        exit(1);
    }
}
コード例 #2
0
int CommunityTypeFinder::findkMeans(){
    try {
        error.resize(numPartitions); for (int i = 0; i < numPartitions; i++) { error[i].resize(numOTUs, 0.0); }
        vector<vector<double> > relativeAbundance(numSamples);
        vector<vector<double> > alphaMatrix;
        
        alphaMatrix.resize(numPartitions);
        lambdaMatrix.resize(numPartitions);
        for(int i=0;i<numPartitions;i++){
            alphaMatrix[i].assign(numOTUs, 0);
            lambdaMatrix[i].assign(numOTUs, 0);
        }
        
        //get relative abundance
        for(int i=0;i<numSamples;i++){
            if (m->control_pressed) {  return 0; }
            int groupTotal = 0;
            
            relativeAbundance[i].assign(numOTUs, 0.0);
            
            for(int j=0;j<numOTUs;j++){
                groupTotal += countMatrix[i][j];
            }
            for(int j=0;j<numOTUs;j++){
                relativeAbundance[i][j] = countMatrix[i][j] / (double)groupTotal;
            }
        }
        
        //randomly assign samples into partitions
        zMatrix.resize(numPartitions);
        for(int i=0;i<numPartitions;i++){
            zMatrix[i].assign(numSamples, 0);
        }
        
        //randomize samples
        vector<int> temp;
        for (int i = 0; i < numSamples; i++) { temp.push_back(i); }
        random_shuffle(temp.begin(), temp.end());
        
        //assign each partition at least one random sample
        int numAssignedSamples = 0;
        for (int i = 0; i < numPartitions; i++) {
            zMatrix[i][temp[numAssignedSamples]] = 1;
            numAssignedSamples++;
        }
        
        //assign rest of samples to partitions
        int count = 0;
        for(int i=numAssignedSamples;i<numSamples;i++){
            zMatrix[count%numPartitions][temp[i]] = 1;
            count++;
        }
        
        double maxChange = 1;
        int maxIters = 1000;
        int iteration = 0;
        
        weights.assign(numPartitions, 0);
        
        while(maxChange > 1e-6 && iteration < maxIters){
            
            if (m->control_pressed) {  return 0; }
            //calcualte average relative abundance
            maxChange = 0.0000;
            for(int i=0;i<numPartitions;i++){
                
                double normChange = 0.0;
                
                weights[i] = 0;
                
                for(int j=0;j<numSamples;j++){
                    weights[i] += (double)zMatrix[i][j];
                }
                
                vector<double> averageRelativeAbundance(numOTUs, 0);
                for(int j=0;j<numOTUs;j++){
                    for(int k=0;k<numSamples;k++){
                        averageRelativeAbundance[j] += zMatrix[i][k] * relativeAbundance[k][j];
                    }
                }
                
                for(int j=0;j<numOTUs;j++){
                    averageRelativeAbundance[j] /= weights[i];
                    
                    double difference = averageRelativeAbundance[j] - alphaMatrix[i][j];
                    normChange += difference * difference;
                    alphaMatrix[i][j] = averageRelativeAbundance[j];
                }
                
                normChange = sqrt(normChange);
                
                if(normChange > maxChange){ maxChange = normChange; }
            }
            
            
            //calcualte distance between each sample in partition and the average relative abundance
            for(int i=0;i<numSamples;i++){
                if (m->control_pressed) {  return 0; }
                
                double normalizationFactor = 0;
                vector<double> totalDistToPartition(numPartitions, 0);
                
                for(int j=0;j<numPartitions;j++){
                    for(int k=0;k<numOTUs;k++){
                        double difference = alphaMatrix[j][k] - relativeAbundance[i][k];
                        totalDistToPartition[j] += difference * difference;
                    }
                    totalDistToPartition[j] = sqrt(totalDistToPartition[j]);
                    normalizationFactor += exp(-50.0 * totalDistToPartition[j]);
                }
                
                
                for(int j=0;j<numPartitions;j++){
                    zMatrix[j][i] = exp(-50.0 * totalDistToPartition[j]) / normalizationFactor;
                }
                
            }
            
            iteration++;
            //        cout << "K means: " << iteration << '\t' << maxChange << endl;
            
        }
        
        //    cout << "Iter:-1";
        for(int i=0;i<numPartitions;i++){
            weights[i] = 0.0000;
            
            for(int j=0;j<numSamples;j++){
                weights[i] += zMatrix[i][j];
            }
            //        printf("\tw_%d=%.3f", i, weights[i]);
        }
        //    cout << endl;
        
        
        for(int i=0;i<numOTUs;i++){
            if (m->control_pressed) {  return 0; }
            for(int j=0;j<numPartitions;j++){
                if(alphaMatrix[j][i] > 0){
                    lambdaMatrix[j][i] = log(alphaMatrix[j][i]);
                }
                else{
                    lambdaMatrix[j][i] = -10.0;
                }
            }
        }
        return 0;
    }
    catch(exception& e){
        m->errorOut(e, "CommunityTypeFinder", "kMeans");
        exit(1);
    }
}