//********************************************************************************************************************** vector<vector<float> > SparccCommand::driver(vector<vector<float> >& sharedVector, vector<vector<float> >& origCorrMatrix, int numPerms){ try { int numOTUs = sharedVector[0].size(); vector<vector<float> > sharedShuffled = sharedVector; vector<vector<float> > pValues(numOTUs); for(int i=0;i<numOTUs;i++){ pValues[i].assign(numOTUs, 0); } for(int i=0;i<numPerms;i++){ if (m->control_pressed) { return pValues; } sharedShuffled = shuffleSharedVector(sharedVector); CalcSparcc permutedData(sharedShuffled, maxIterations, numSamplings, normalizeMethod); vector<vector<float> > permuteCorrMatrix = permutedData.getRho(); for(int j=0;j<numOTUs;j++){ for(int k=0;k<j;k++){ //cout << k << endl; double randValue = permuteCorrMatrix[j][k]; double observedValue = origCorrMatrix[j][k]; if(observedValue >= 0 && randValue > observedValue) { pValues[j][k]++; }//this method seems to deflate the else if(observedValue < 0 && randValue < observedValue){ pValues[j][k]++; }//pvalues of small rho values } } float done = ceil(numPermutations * 0.05); if((i+1) % (int)(done) == 0){ cout << i+1 << endl; } } return pValues; } catch(exception& e) { m->errorOut(e, "SparccCommand", "driver"); exit(1); } }
//********************************************************************************************************************** int CorrAxesCommand::calcKendall(map<string, vector<float> >& axes, ofstream& out) { try { //format data vector< vector<spearmanRank> > scores; scores.resize(numaxes); for (map<string, vector<float> >::iterator it = axes.begin(); it != axes.end(); it++) { vector<float> temp = it->second; for (int i = 0; i < temp.size(); i++) { spearmanRank member(it->first, temp[i]); scores[i].push_back(member); } } //sort each axis for (int i = 0; i < numaxes; i++) { sort(scores[i].begin(), scores[i].end(), compareSpearman); } //convert scores to ranks of xi in each axis for (int i = 0; i < numaxes; i++) { vector<spearmanRank*> ties; int rankTotal = 0; for (int j = 0; j < scores[i].size(); j++) { rankTotal += (j+1); ties.push_back(&(scores[i][j])); if (j != scores[i].size()-1) { // you are not the last so you can look ahead if (scores[i][j].score != scores[i][j+1].score) { // you are done with ties, rank them and continue for (int k = 0; k < ties.size(); k++) { float thisrank = rankTotal / (float) ties.size(); (*ties[k]).score = thisrank; } ties.clear(); rankTotal = 0; } }else { // you are the last one for (int k = 0; k < ties.size(); k++) { float thisrank = rankTotal / (float) ties.size(); (*ties[k]).score = thisrank; } } } } //for each otu for (int i = 0; i < lookupFloat[0]->getNumBins(); i++) { if (metadatafile == "") { out << i+1; } else { out << metadataLabels[i]; } //find the ranks of this otu - Y vector<spearmanRank> otuScores; for (int j = 0; j < lookupFloat.size(); j++) { spearmanRank member(lookupFloat[j]->getGroup(), lookupFloat[j]->getAbundance(i)); otuScores.push_back(member); } sort(otuScores.begin(), otuScores.end(), compareSpearman); map<string, float> rankOtus; vector<spearmanRank> ties; int rankTotal = 0; for (int j = 0; j < otuScores.size(); j++) { rankTotal += (j+1); ties.push_back(otuScores[j]); if (j != otuScores.size()-1) { // you are not the last so you can look ahead if (otuScores[j].score != otuScores[j+1].score) { // you are done with ties, rank them and continue for (int k = 0; k < ties.size(); k++) { float thisrank = rankTotal / (float) ties.size(); rankOtus[ties[k].name] = thisrank; } ties.clear(); rankTotal = 0; } }else { // you are the last one for (int k = 0; k < ties.size(); k++) { float thisrank = rankTotal / (float) ties.size(); rankOtus[ties[k].name] = thisrank; } } } vector<double> pValues(numaxes); //calc spearman ranks for each axis for this otu for (int j = 0; j < numaxes; j++) { int numCoor = 0; int numDisCoor = 0; vector<spearmanRank> otus; vector<spearmanRank> otusTemp; for (int l = 0; l < scores[j].size(); l++) { spearmanRank member(scores[j][l].name, rankOtus[scores[j][l].name]); otus.push_back(member); } int count = 0; for (int l = 0; l < scores[j].size(); l++) { int numWithHigherRank = 0; int numWithLowerRank = 0; float thisrank = otus[l].score; for (int u = l+1; u < scores[j].size(); u++) { if (otus[u].score > thisrank) { numWithHigherRank++; } else if (otus[u].score < thisrank) { numWithLowerRank++; } count++; } numCoor += numWithHigherRank; numDisCoor += numWithLowerRank; } double p = (numCoor - numDisCoor) / (float) count; if (isnan(p) || isinf(p)) { p = 0.0; } out << '\t' << p; pValues[j] = p; //calc signif - zA - http://en.wikipedia.org/wiki/Kendall_tau_rank_correlation_coefficient#Significance_tests double numer = 3.0 * (numCoor - numDisCoor); int n = scores[j].size(); double denom = n * (n-1) * (2*n + 5) / (double) 2.0; denom = sqrt(denom); double sig = numer / denom; if (isnan(sig) || isinf(sig)) { sig = 0.0; } out << '\t' << sig; } double sum = 0; for(int k=0;k<numaxes;k++){ sum += pValues[k] * pValues[k]; } out << '\t' << sqrt(sum) << endl; } return 0; } catch(exception& e) { m->errorOut(e, "CorrAxesCommand", "calcKendall"); exit(1); } }
//********************************************************************************************************************** int CorrAxesCommand::calcSpearman(map<string, vector<float> >& axes, ofstream& out) { try { //format data vector< map<float, int> > tableX; tableX.resize(numaxes); map<float, int>::iterator itTable; vector< vector<spearmanRank> > scores; scores.resize(numaxes); for (map<string, vector<float> >::iterator it = axes.begin(); it != axes.end(); it++) { vector<float> temp = it->second; for (int i = 0; i < temp.size(); i++) { spearmanRank member(it->first, temp[i]); scores[i].push_back(member); //count number of repeats itTable = tableX[i].find(temp[i]); if (itTable == tableX[i].end()) { tableX[i][temp[i]] = 1; }else { tableX[i][temp[i]]++; } } } //calc LX //for each axis vector<double> Lx; Lx.resize(numaxes, 0.0); for (int i = 0; i < numaxes; i++) { for (itTable = tableX[i].begin(); itTable != tableX[i].end(); itTable++) { double tx = (double) itTable->second; Lx[i] += ((pow(tx, 3.0) - tx) / 12.0); } } //sort each axis for (int i = 0; i < numaxes; i++) { sort(scores[i].begin(), scores[i].end(), compareSpearman); } //find ranks of xi in each axis map<string, vector<float> > rankAxes; for (int i = 0; i < numaxes; i++) { vector<spearmanRank> ties; int rankTotal = 0; for (int j = 0; j < scores[i].size(); j++) { rankTotal += (j+1); ties.push_back(scores[i][j]); if (j != (scores[i].size()-1)) { // you are not the last so you can look ahead if (scores[i][j].score != scores[i][j+1].score) { // you are done with ties, rank them and continue for (int k = 0; k < ties.size(); k++) { float thisrank = rankTotal / (float) ties.size(); rankAxes[ties[k].name].push_back(thisrank); } ties.clear(); rankTotal = 0; } }else { // you are the last one for (int k = 0; k < ties.size(); k++) { float thisrank = rankTotal / (float) ties.size(); rankAxes[ties[k].name].push_back(thisrank); } } } } //for each otu for (int i = 0; i < lookupFloat[0]->getNumBins(); i++) { if (metadatafile == "") { out << i+1; } else { out << metadataLabels[i]; } //find the ranks of this otu - Y vector<spearmanRank> otuScores; map<float, int> tableY; for (int j = 0; j < lookupFloat.size(); j++) { spearmanRank member(lookupFloat[j]->getGroup(), lookupFloat[j]->getAbundance(i)); otuScores.push_back(member); itTable = tableY.find(member.score); if (itTable == tableY.end()) { tableY[member.score] = 1; }else { tableY[member.score]++; } } //calc Ly double Ly = 0.0; for (itTable = tableY.begin(); itTable != tableY.end(); itTable++) { double ty = (double) itTable->second; Ly += ((pow(ty, 3.0) - ty) / 12.0); } sort(otuScores.begin(), otuScores.end(), compareSpearman); map<string, float> rankOtus; vector<spearmanRank> ties; int rankTotal = 0; for (int j = 0; j < otuScores.size(); j++) { rankTotal += (j+1); ties.push_back(otuScores[j]); if (j != (otuScores.size()-1)) { // you are not the last so you can look ahead if (otuScores[j].score != otuScores[j+1].score) { // you are done with ties, rank them and continue for (int k = 0; k < ties.size(); k++) { float thisrank = rankTotal / (float) ties.size(); rankOtus[ties[k].name] = thisrank; } ties.clear(); rankTotal = 0; } }else { // you are the last one for (int k = 0; k < ties.size(); k++) { float thisrank = rankTotal / (float) ties.size(); rankOtus[ties[k].name] = thisrank; } } } vector<double> pValues(numaxes); //calc spearman ranks for each axis for this otu for (int j = 0; j < numaxes; j++) { double di = 0.0; for (int k = 0; k < lookupFloat.size(); k++) { float xi = rankAxes[lookupFloat[k]->getGroup()][j]; float yi = rankOtus[lookupFloat[k]->getGroup()]; di += ((xi - yi) * (xi - yi)); } double p = 0.0; double n = (double) lookupFloat.size(); double SX2 = ((pow(n, 3.0) - n) / 12.0) - Lx[j]; double SY2 = ((pow(n, 3.0) - n) / 12.0) - Ly; p = (SX2 + SY2 - di) / (2.0 * sqrt((SX2*SY2))); if (isnan(p) || isinf(p)) { p = 0.0; } out << '\t' << p; pValues[j] = p; //signifigance calc - http://en.wikipedia.org/wiki/Spearman%27s_rank_correlation_coefficient double temp = (lookupFloat.size()-2) / (double) (1- (p*p)); temp = sqrt(temp); double sig = p*temp; if (isnan(sig) || isinf(sig)) { sig = 0.0; } out << '\t' << sig; } double sum = 0; for(int k=0;k<numaxes;k++){ sum += pValues[k] * pValues[k]; } out << '\t' << sqrt(sum) << endl; } return 0; } catch(exception& e) { m->errorOut(e, "CorrAxesCommand", "calcSpearman"); exit(1); } }