Beispiel #1
0
vector<int> LefseCommand::runKruskalWallis(vector<SharedRAbundVector*>& lookup, DesignMap& designMap) {
	try {
        map<string, string> variables;
        variables["[filename]"] = outputDir + m->getRootName(m->getSimpleName(sharedfile));
        variables["[distance]"] = lookup[0]->getLabel();
		string outputFileName = getOutputFileName("kruskall-wallis",variables);
        
		ofstream out;
		m->openOutputFile(outputFileName, out);
		outputNames.push_back(outputFileName); outputTypes["kruskall-wallis"].push_back(outputFileName);
        out << "OTULabel\tKW\tPvalue\n";
        
        vector<int> significantOtuLabels;
        int numBins = lookup[0]->getNumBins();
        //sanity check to make sure each treatment has a group in the shared file
        set<string> treatments;
        for (int j = 0; j < lookup.size(); j++) {
            string group = lookup[j]->getGroup();
            string treatment = designMap.get(group, mclass); //get value for this group in this category
            treatments.insert(treatment);
        }
        if (treatments.size() < 2) { m->mothurOut("[ERROR]: need at least 2 things to classes to compare, quitting.\n"); m->control_pressed = true; }
        
        LinearAlgebra linear;
        for (int i = 0; i < numBins; i++) {
            if (m->control_pressed) { break; }
            
            vector<spearmanRank> values;
            for (int j = 0; j < lookup.size(); j++) {
                string group = lookup[j]->getGroup();
                string treatment = designMap.get(group, mclass); //get value for this group in this category
                spearmanRank temp(treatment, lookup[j]->getAbundance(i));
                values.push_back(temp);
            }
            
            double pValue = 0.0;
            double H = linear.calcKruskalWallis(values, pValue);
            
            //output H and signifigance
            out << m->currentBinLabels[i] << '\t' << H << '\t' << pValue << endl;
            
            if (pValue < anovaAlpha) {  significantOtuLabels.push_back(i);  }
        }
        out.close();
        
        return significantOtuLabels;
    }
	catch(exception& e) {
		m->errorOut(e, "LefseCommand", "runKruskalWallis");
		exit(1);
	}
}
Beispiel #2
0
qFinderDMM::qFinderDMM(vector<vector<int> > cm, int p): countMatrix(cm), numPartitions(p){
    try {
        m = MothurOut::getInstance();
        numSamples = (int)countMatrix.size();
        numOTUs = (int)countMatrix[0].size();
        
        
        kMeans();
        //    cout << "done kMeans" << endl;
        
        optimizeLambda();
        
        
        //    cout << "done optimizeLambda" << endl;
        
        double change = 1.0000;
        currNLL = 0.0000;
        
        int iter = 0;
        
        while(change > 1.0e-6 && iter < 100){
            
            //        cout << "Calc_Z: ";
            calculatePiK();
            
            optimizeLambda();
            
            //        printf("Iter:%d\t",iter);
            
            for(int i=0;i<numPartitions;i++){
                weights[i] = 0.0000;
                for(int j=0;j<numSamples;j++){
                    weights[i] += zMatrix[i][j];
                }
                //            printf("w_%d=%.3f\t",i,weights[i]);
                
            }
            
            double nLL = getNegativeLogLikelihood();
            
            change = abs(nLL - currNLL);
            
            currNLL = nLL;
            
            //        printf("NLL=%.5f\tDelta=%.4e\n",currNLL, change);
            
            iter++;
        }
        
        error.resize(numPartitions);
        
        logDeterminant = 0.0000;
        
        LinearAlgebra l;
        
        for(currentPartition=0;currentPartition<numPartitions;currentPartition++){
            
            error[currentPartition].assign(numOTUs, 0.0000);
            
            if(currentPartition > 0){
                logDeterminant += (2.0 * log(numSamples) - log(weights[currentPartition]));
            }
            vector<vector<double> > hessian = getHessian();
            vector<vector<double> > invHessian = l.getInverse(hessian);
            
            for(int i=0;i<numOTUs;i++){
                logDeterminant += log(abs(hessian[i][i]));
                error[currentPartition][i] = invHessian[i][i];
            }
            
        }
        
        int numParameters = numPartitions * numOTUs + numPartitions - 1;
        laplace = currNLL + 0.5 * logDeterminant - 0.5 * numParameters * log(2.0 * 3.14159);
        bic = currNLL + 0.5 * log(numSamples) * numParameters;
        aic = currNLL + numParameters;
    }
	catch(exception& e) {
		m->errorOut(e, "qFinderDMM", "qFinderDMM");
		exit(1);
	}
}
Beispiel #3
0
qFinderDMM::qFinderDMM(vector<vector<int> > cm, int p) : CommunityTypeFinder() {
    try {
        //cout << "here" << endl;
        numPartitions = p;
        countMatrix = cm;
        numSamples = (int)countMatrix.size();
        numOTUs = (int)countMatrix[0].size();
        
       // if (m->debug) { m->mothurOut("before kmeans\n"); }
        findkMeans();
       //if (m->debug) { m->mothurOut("done kMeans\n"); }
        
        optimizeLambda();
        
        
         //if (m->debug) { m->mothurOut("done optimizeLambda\n"); }
        
        double change = 1.0000;
        currNLL = 0.0000;
        
        int iter = 0;
        
        while(change > 1.0e-6 && iter < 100){
            
           // if (m->debug) { m->mothurOut("Calc_Z: \n"); }
            calculatePiK();
            
            optimizeLambda();
            
              // if (m->debug) { m->mothurOut("Iter: " + toString(iter) + "\n"); }
            
            for(int i=0;i<numPartitions;i++){
                weights[i] = 0.0000;
                for(int j=0;j<numSamples;j++){
                    weights[i] += zMatrix[i][j];
                }
                           // printf("w_%d=%.3f\t",i,weights[i]);
                
            }
            
            double nLL = getNegativeLogLikelihood();
            
            change = abs(nLL - currNLL);
            
            currNLL = nLL;
            
                  //  printf("NLL=%.5f\tDelta=%.4e\n",currNLL, change);
            
            iter++;
        }
        //if (m->debug) { m->mothurOut("done while loop\n"); }
        error.resize(numPartitions);
        
        logDeterminant = 0.0000;
        
        LinearAlgebra l;
        
        for(currentPartition=0;currentPartition<numPartitions;currentPartition++){
            
            error[currentPartition].assign(numOTUs, 0.0000);
            
            if (m->debug) { m->mothurOut("current partition = " + toString(currentPartition) + "\n"); }
            
            if(currentPartition > 0){
                logDeterminant += (2.0 * log(numSamples) - log(weights[currentPartition]));
            }
            //if (m->debug) { m->mothurOut("before hession\n"); }
            vector<vector<double> > hessian = getHessian();
            //if (m->debug) { m->mothurOut("after hession\n"); }
            vector<vector<double> > invHessian = l.getInverse(hessian);
            //if (m->debug) { m->mothurOut("after inverse\n"); }
            for(int i=0;i<numOTUs;i++){
                logDeterminant += log(abs(hessian[i][i]));
                error[currentPartition][i] = invHessian[i][i];
            }
        }
        
        int numParameters = numPartitions * numOTUs + numPartitions - 1;
        laplace = currNLL + 0.5 * logDeterminant - 0.5 * numParameters * log(2.0 * 3.14159);
        bic = currNLL + 0.5 * log(numSamples) * numParameters;
        aic = currNLL + numParameters;
    }
	catch(exception& e) {
		m->errorOut(e, "qFinderDMM", "qFinderDMM");
		exit(1);
	}
}
Beispiel #4
0
vector<int> LefseCommand::runWilcoxon(vector<SharedRAbundVector*>& lookup, DesignMap& designMap, vector<int> bins) {
    try {
        vector<int> significantOtuLabels;
        //if it exists and meets the following requirements run Wilcoxon
        /*
         1. Subclass members all belong to same main class
         2. Number of groups in each subclass is the same
         3. anything else??
         
         */
        vector<string> subclasses;
        map<string, string> subclass2Class;
        map<string, int> subclassCounts;
        map<string, vector<int> > subClass2GroupIndex; //maps subclass name to vector of indexes in lookup from that subclass. old -> 1,2,3 means groups in location 1,2,3 of lookup are from old.  Saves time below.
        bool error = false;
        for (int j = 0; j < lookup.size(); j++) {
            string group = lookup[j]->getGroup();
            string treatment = designMap.get(group, mclass); //get value for this group in this category
            string thisSub = designMap.get(group, subclass);
            map<string, string>::iterator it = subclass2Class.find(thisSub);
            if (it == subclass2Class.end()) {
                subclass2Class[thisSub] = treatment;
                subclassCounts[thisSub] = 1;
                vector<int> temp; temp.push_back(j);
                subClass2GroupIndex[thisSub] = temp;
            }
            else {
                subclassCounts[thisSub]++;
                subClass2GroupIndex[thisSub].push_back(j);
                if (it->second != treatment) {
                    error = true;
                    m->mothurOut("[ERROR]: subclass " + thisSub + " has members in " + it->second + " and " + treatment + ". Subclass members must be from the same class. Ignoring wilcoxon.\n");
                }
            }
        }
        
        if (error) { return significantOtuLabels; }
        else { //check counts to make sure subclasses are the same size
            set<int> counts;
            for (map<string, int>::iterator it = subclassCounts.begin(); it != subclassCounts.end(); it++) { counts.insert(it->second); }
            if (counts.size() > 1) { m->mothurOut("[ERROR]: subclasses must be the same size. Ignoring wilcoxon.\n");
                return significantOtuLabels;  }
        }
        
        int numBins = lookup[0]->getNumBins();
        vector<compGroup> comp;
        //find comparisons and fill comp
        map<string, int>::iterator itB;
        for(map<string, int>::iterator it=subclassCounts.begin();it!=subclassCounts.end();it++){
            itB = it;itB++;
            for(itB;itB!=subclassCounts.end();itB++){
                compGroup temp(it->first,itB->first);
                comp.push_back(temp);
            }			
        }

        int numComp = comp.size();
        if (numComp < 2) {  m->mothurOut("[ERROR]: Need at least 2 subclasses, Ignoring Wilcoxon.\n");
            return significantOtuLabels;  }
        
        map<string, string> variables;
        variables["[filename]"] = outputDir + m->getRootName(m->getSimpleName(sharedfile));
        variables["[distance]"] = lookup[0]->getLabel();
        string outputFileName = getOutputFileName("wilcoxon",variables);
        
        ofstream out;
        m->openOutputFile(outputFileName, out);
        outputNames.push_back(outputFileName); outputTypes["wilcoxon"].push_back(outputFileName);
        out << "OTULabel\tComparision\tWilcoxon\tPvalue\n";
        
        LinearAlgebra linear;
        for (int i = 0; i < numBins; i++) {
            if (m->control_pressed) { break; }
            
            if (m->inUsersGroups(i, bins)) { //flagged in Kruskal Wallis
                
                bool sig = false;
                //for each subclass comparision
                for (int j = 0; j < numComp; j++) {
                    //fill x and y with this comparisons data
                    vector<double> x; vector<double> y;
                    
                    //fill x and y
                    vector<int> xIndexes = subClass2GroupIndex[comp[j].group1]; //indexes in lookup for this subclass
                    for (int k = 0; k < xIndexes.size(); k++) { x.push_back(lookup[xIndexes[k]]->getAbundance(i)); }
                    
                    vector<int> yIndexes = subClass2GroupIndex[comp[j].group2]; //indexes in lookup for this subclass
                    for (int k = 0; k < yIndexes.size(); k++) { y.push_back(lookup[yIndexes[k]]->getAbundance(i)); }
                    
                    double pValue = 0.0;
                    double H = linear.calcWilcoxon(x, y, pValue);
            
                    //output H and signifigance
                    out << m->currentBinLabels[i] << '\t' << comp[j].getCombo() << '\t' << H << '\t' << pValue << endl;
                    
                    //set sig - not sure how yet
                }
                if (sig) {  significantOtuLabels.push_back(i);  }
            }
        }
        out.close();
        
        return significantOtuLabels;
    }
    catch(exception& e) {
        m->errorOut(e, "LefseCommand", "runWilcoxon");
        exit(1);
    }
}