vector<int> LefseCommand::runKruskalWallis(vector<SharedRAbundVector*>& lookup, DesignMap& designMap) { try { map<string, string> variables; variables["[filename]"] = outputDir + m->getRootName(m->getSimpleName(sharedfile)); variables["[distance]"] = lookup[0]->getLabel(); string outputFileName = getOutputFileName("kruskall-wallis",variables); ofstream out; m->openOutputFile(outputFileName, out); outputNames.push_back(outputFileName); outputTypes["kruskall-wallis"].push_back(outputFileName); out << "OTULabel\tKW\tPvalue\n"; vector<int> significantOtuLabels; int numBins = lookup[0]->getNumBins(); //sanity check to make sure each treatment has a group in the shared file set<string> treatments; for (int j = 0; j < lookup.size(); j++) { string group = lookup[j]->getGroup(); string treatment = designMap.get(group, mclass); //get value for this group in this category treatments.insert(treatment); } if (treatments.size() < 2) { m->mothurOut("[ERROR]: need at least 2 things to classes to compare, quitting.\n"); m->control_pressed = true; } LinearAlgebra linear; for (int i = 0; i < numBins; i++) { if (m->control_pressed) { break; } vector<spearmanRank> values; for (int j = 0; j < lookup.size(); j++) { string group = lookup[j]->getGroup(); string treatment = designMap.get(group, mclass); //get value for this group in this category spearmanRank temp(treatment, lookup[j]->getAbundance(i)); values.push_back(temp); } double pValue = 0.0; double H = linear.calcKruskalWallis(values, pValue); //output H and signifigance out << m->currentBinLabels[i] << '\t' << H << '\t' << pValue << endl; if (pValue < anovaAlpha) { significantOtuLabels.push_back(i); } } out.close(); return significantOtuLabels; } catch(exception& e) { m->errorOut(e, "LefseCommand", "runKruskalWallis"); exit(1); } }
qFinderDMM::qFinderDMM(vector<vector<int> > cm, int p): countMatrix(cm), numPartitions(p){ try { m = MothurOut::getInstance(); numSamples = (int)countMatrix.size(); numOTUs = (int)countMatrix[0].size(); kMeans(); // cout << "done kMeans" << endl; optimizeLambda(); // cout << "done optimizeLambda" << endl; double change = 1.0000; currNLL = 0.0000; int iter = 0; while(change > 1.0e-6 && iter < 100){ // cout << "Calc_Z: "; calculatePiK(); optimizeLambda(); // printf("Iter:%d\t",iter); for(int i=0;i<numPartitions;i++){ weights[i] = 0.0000; for(int j=0;j<numSamples;j++){ weights[i] += zMatrix[i][j]; } // printf("w_%d=%.3f\t",i,weights[i]); } double nLL = getNegativeLogLikelihood(); change = abs(nLL - currNLL); currNLL = nLL; // printf("NLL=%.5f\tDelta=%.4e\n",currNLL, change); iter++; } error.resize(numPartitions); logDeterminant = 0.0000; LinearAlgebra l; for(currentPartition=0;currentPartition<numPartitions;currentPartition++){ error[currentPartition].assign(numOTUs, 0.0000); if(currentPartition > 0){ logDeterminant += (2.0 * log(numSamples) - log(weights[currentPartition])); } vector<vector<double> > hessian = getHessian(); vector<vector<double> > invHessian = l.getInverse(hessian); for(int i=0;i<numOTUs;i++){ logDeterminant += log(abs(hessian[i][i])); error[currentPartition][i] = invHessian[i][i]; } } int numParameters = numPartitions * numOTUs + numPartitions - 1; laplace = currNLL + 0.5 * logDeterminant - 0.5 * numParameters * log(2.0 * 3.14159); bic = currNLL + 0.5 * log(numSamples) * numParameters; aic = currNLL + numParameters; } catch(exception& e) { m->errorOut(e, "qFinderDMM", "qFinderDMM"); exit(1); } }
qFinderDMM::qFinderDMM(vector<vector<int> > cm, int p) : CommunityTypeFinder() { try { //cout << "here" << endl; numPartitions = p; countMatrix = cm; numSamples = (int)countMatrix.size(); numOTUs = (int)countMatrix[0].size(); // if (m->debug) { m->mothurOut("before kmeans\n"); } findkMeans(); //if (m->debug) { m->mothurOut("done kMeans\n"); } optimizeLambda(); //if (m->debug) { m->mothurOut("done optimizeLambda\n"); } double change = 1.0000; currNLL = 0.0000; int iter = 0; while(change > 1.0e-6 && iter < 100){ // if (m->debug) { m->mothurOut("Calc_Z: \n"); } calculatePiK(); optimizeLambda(); // if (m->debug) { m->mothurOut("Iter: " + toString(iter) + "\n"); } for(int i=0;i<numPartitions;i++){ weights[i] = 0.0000; for(int j=0;j<numSamples;j++){ weights[i] += zMatrix[i][j]; } // printf("w_%d=%.3f\t",i,weights[i]); } double nLL = getNegativeLogLikelihood(); change = abs(nLL - currNLL); currNLL = nLL; // printf("NLL=%.5f\tDelta=%.4e\n",currNLL, change); iter++; } //if (m->debug) { m->mothurOut("done while loop\n"); } error.resize(numPartitions); logDeterminant = 0.0000; LinearAlgebra l; for(currentPartition=0;currentPartition<numPartitions;currentPartition++){ error[currentPartition].assign(numOTUs, 0.0000); if (m->debug) { m->mothurOut("current partition = " + toString(currentPartition) + "\n"); } if(currentPartition > 0){ logDeterminant += (2.0 * log(numSamples) - log(weights[currentPartition])); } //if (m->debug) { m->mothurOut("before hession\n"); } vector<vector<double> > hessian = getHessian(); //if (m->debug) { m->mothurOut("after hession\n"); } vector<vector<double> > invHessian = l.getInverse(hessian); //if (m->debug) { m->mothurOut("after inverse\n"); } for(int i=0;i<numOTUs;i++){ logDeterminant += log(abs(hessian[i][i])); error[currentPartition][i] = invHessian[i][i]; } } int numParameters = numPartitions * numOTUs + numPartitions - 1; laplace = currNLL + 0.5 * logDeterminant - 0.5 * numParameters * log(2.0 * 3.14159); bic = currNLL + 0.5 * log(numSamples) * numParameters; aic = currNLL + numParameters; } catch(exception& e) { m->errorOut(e, "qFinderDMM", "qFinderDMM"); exit(1); } }
vector<int> LefseCommand::runWilcoxon(vector<SharedRAbundVector*>& lookup, DesignMap& designMap, vector<int> bins) { try { vector<int> significantOtuLabels; //if it exists and meets the following requirements run Wilcoxon /* 1. Subclass members all belong to same main class 2. Number of groups in each subclass is the same 3. anything else?? */ vector<string> subclasses; map<string, string> subclass2Class; map<string, int> subclassCounts; map<string, vector<int> > subClass2GroupIndex; //maps subclass name to vector of indexes in lookup from that subclass. old -> 1,2,3 means groups in location 1,2,3 of lookup are from old. Saves time below. bool error = false; for (int j = 0; j < lookup.size(); j++) { string group = lookup[j]->getGroup(); string treatment = designMap.get(group, mclass); //get value for this group in this category string thisSub = designMap.get(group, subclass); map<string, string>::iterator it = subclass2Class.find(thisSub); if (it == subclass2Class.end()) { subclass2Class[thisSub] = treatment; subclassCounts[thisSub] = 1; vector<int> temp; temp.push_back(j); subClass2GroupIndex[thisSub] = temp; } else { subclassCounts[thisSub]++; subClass2GroupIndex[thisSub].push_back(j); if (it->second != treatment) { error = true; m->mothurOut("[ERROR]: subclass " + thisSub + " has members in " + it->second + " and " + treatment + ". Subclass members must be from the same class. Ignoring wilcoxon.\n"); } } } if (error) { return significantOtuLabels; } else { //check counts to make sure subclasses are the same size set<int> counts; for (map<string, int>::iterator it = subclassCounts.begin(); it != subclassCounts.end(); it++) { counts.insert(it->second); } if (counts.size() > 1) { m->mothurOut("[ERROR]: subclasses must be the same size. Ignoring wilcoxon.\n"); return significantOtuLabels; } } int numBins = lookup[0]->getNumBins(); vector<compGroup> comp; //find comparisons and fill comp map<string, int>::iterator itB; for(map<string, int>::iterator it=subclassCounts.begin();it!=subclassCounts.end();it++){ itB = it;itB++; for(itB;itB!=subclassCounts.end();itB++){ compGroup temp(it->first,itB->first); comp.push_back(temp); } } int numComp = comp.size(); if (numComp < 2) { m->mothurOut("[ERROR]: Need at least 2 subclasses, Ignoring Wilcoxon.\n"); return significantOtuLabels; } map<string, string> variables; variables["[filename]"] = outputDir + m->getRootName(m->getSimpleName(sharedfile)); variables["[distance]"] = lookup[0]->getLabel(); string outputFileName = getOutputFileName("wilcoxon",variables); ofstream out; m->openOutputFile(outputFileName, out); outputNames.push_back(outputFileName); outputTypes["wilcoxon"].push_back(outputFileName); out << "OTULabel\tComparision\tWilcoxon\tPvalue\n"; LinearAlgebra linear; for (int i = 0; i < numBins; i++) { if (m->control_pressed) { break; } if (m->inUsersGroups(i, bins)) { //flagged in Kruskal Wallis bool sig = false; //for each subclass comparision for (int j = 0; j < numComp; j++) { //fill x and y with this comparisons data vector<double> x; vector<double> y; //fill x and y vector<int> xIndexes = subClass2GroupIndex[comp[j].group1]; //indexes in lookup for this subclass for (int k = 0; k < xIndexes.size(); k++) { x.push_back(lookup[xIndexes[k]]->getAbundance(i)); } vector<int> yIndexes = subClass2GroupIndex[comp[j].group2]; //indexes in lookup for this subclass for (int k = 0; k < yIndexes.size(); k++) { y.push_back(lookup[yIndexes[k]]->getAbundance(i)); } double pValue = 0.0; double H = linear.calcWilcoxon(x, y, pValue); //output H and signifigance out << m->currentBinLabels[i] << '\t' << comp[j].getCombo() << '\t' << H << '\t' << pValue << endl; //set sig - not sure how yet } if (sig) { significantOtuLabels.push_back(i); } } } out.close(); return significantOtuLabels; } catch(exception& e) { m->errorOut(e, "LefseCommand", "runWilcoxon"); exit(1); } }