Beispiel #1
vector<int> LefseCommand::runKruskalWallis(vector<SharedRAbundVector*>& lookup, DesignMap& designMap) {
	try {
        map<string, string> variables;
        variables["[filename]"] = outputDir + m->getRootName(m->getSimpleName(sharedfile));
        variables["[distance]"] = lookup[0]->getLabel();
		string outputFileName = getOutputFileName("kruskall-wallis",variables);
		ofstream out;
		m->openOutputFile(outputFileName, out);
		outputNames.push_back(outputFileName); outputTypes["kruskall-wallis"].push_back(outputFileName);
        out << "OTULabel\tKW\tPvalue\n";
        vector<int> significantOtuLabels;
        int numBins = lookup[0]->getNumBins();
        //sanity check to make sure each treatment has a group in the shared file
        set<string> treatments;
        for (int j = 0; j < lookup.size(); j++) {
            string group = lookup[j]->getGroup();
            string treatment = designMap.get(group, mclass); //get value for this group in this category
        if (treatments.size() < 2) { m->mothurOut("[ERROR]: need at least 2 things to classes to compare, quitting.\n"); m->control_pressed = true; }
        LinearAlgebra linear;
        for (int i = 0; i < numBins; i++) {
            if (m->control_pressed) { break; }
            vector<spearmanRank> values;
            for (int j = 0; j < lookup.size(); j++) {
                string group = lookup[j]->getGroup();
                string treatment = designMap.get(group, mclass); //get value for this group in this category
                spearmanRank temp(treatment, lookup[j]->getAbundance(i));
            double pValue = 0.0;
            double H = linear.calcKruskalWallis(values, pValue);
            //output H and signifigance
            out << m->currentBinLabels[i] << '\t' << H << '\t' << pValue << endl;
            if (pValue < anovaAlpha) {  significantOtuLabels.push_back(i);  }
        return significantOtuLabels;
	catch(exception& e) {
		m->errorOut(e, "LefseCommand", "runKruskalWallis");
Beispiel #2
qFinderDMM::qFinderDMM(vector<vector<int> > cm, int p): countMatrix(cm), numPartitions(p){
    try {
        m = MothurOut::getInstance();
        numSamples = (int)countMatrix.size();
        numOTUs = (int)countMatrix[0].size();
        //    cout << "done kMeans" << endl;
        //    cout << "done optimizeLambda" << endl;
        double change = 1.0000;
        currNLL = 0.0000;
        int iter = 0;
        while(change > 1.0e-6 && iter < 100){
            //        cout << "Calc_Z: ";
            //        printf("Iter:%d\t",iter);
            for(int i=0;i<numPartitions;i++){
                weights[i] = 0.0000;
                for(int j=0;j<numSamples;j++){
                    weights[i] += zMatrix[i][j];
                //            printf("w_%d=%.3f\t",i,weights[i]);
            double nLL = getNegativeLogLikelihood();
            change = abs(nLL - currNLL);
            currNLL = nLL;
            //        printf("NLL=%.5f\tDelta=%.4e\n",currNLL, change);
        logDeterminant = 0.0000;
        LinearAlgebra l;
            error[currentPartition].assign(numOTUs, 0.0000);
            if(currentPartition > 0){
                logDeterminant += (2.0 * log(numSamples) - log(weights[currentPartition]));
            vector<vector<double> > hessian = getHessian();
            vector<vector<double> > invHessian = l.getInverse(hessian);
            for(int i=0;i<numOTUs;i++){
                logDeterminant += log(abs(hessian[i][i]));
                error[currentPartition][i] = invHessian[i][i];
        int numParameters = numPartitions * numOTUs + numPartitions - 1;
        laplace = currNLL + 0.5 * logDeterminant - 0.5 * numParameters * log(2.0 * 3.14159);
        bic = currNLL + 0.5 * log(numSamples) * numParameters;
        aic = currNLL + numParameters;
	catch(exception& e) {
		m->errorOut(e, "qFinderDMM", "qFinderDMM");
Beispiel #3
qFinderDMM::qFinderDMM(vector<vector<int> > cm, int p) : CommunityTypeFinder() {
    try {
        //cout << "here" << endl;
        numPartitions = p;
        countMatrix = cm;
        numSamples = (int)countMatrix.size();
        numOTUs = (int)countMatrix[0].size();
       // if (m->debug) { m->mothurOut("before kmeans\n"); }
       //if (m->debug) { m->mothurOut("done kMeans\n"); }
         //if (m->debug) { m->mothurOut("done optimizeLambda\n"); }
        double change = 1.0000;
        currNLL = 0.0000;
        int iter = 0;
        while(change > 1.0e-6 && iter < 100){
           // if (m->debug) { m->mothurOut("Calc_Z: \n"); }
              // if (m->debug) { m->mothurOut("Iter: " + toString(iter) + "\n"); }
            for(int i=0;i<numPartitions;i++){
                weights[i] = 0.0000;
                for(int j=0;j<numSamples;j++){
                    weights[i] += zMatrix[i][j];
                           // printf("w_%d=%.3f\t",i,weights[i]);
            double nLL = getNegativeLogLikelihood();
            change = abs(nLL - currNLL);
            currNLL = nLL;
                  //  printf("NLL=%.5f\tDelta=%.4e\n",currNLL, change);
        //if (m->debug) { m->mothurOut("done while loop\n"); }
        logDeterminant = 0.0000;
        LinearAlgebra l;
            error[currentPartition].assign(numOTUs, 0.0000);
            if (m->debug) { m->mothurOut("current partition = " + toString(currentPartition) + "\n"); }
            if(currentPartition > 0){
                logDeterminant += (2.0 * log(numSamples) - log(weights[currentPartition]));
            //if (m->debug) { m->mothurOut("before hession\n"); }
            vector<vector<double> > hessian = getHessian();
            //if (m->debug) { m->mothurOut("after hession\n"); }
            vector<vector<double> > invHessian = l.getInverse(hessian);
            //if (m->debug) { m->mothurOut("after inverse\n"); }
            for(int i=0;i<numOTUs;i++){
                logDeterminant += log(abs(hessian[i][i]));
                error[currentPartition][i] = invHessian[i][i];
        int numParameters = numPartitions * numOTUs + numPartitions - 1;
        laplace = currNLL + 0.5 * logDeterminant - 0.5 * numParameters * log(2.0 * 3.14159);
        bic = currNLL + 0.5 * log(numSamples) * numParameters;
        aic = currNLL + numParameters;
	catch(exception& e) {
		m->errorOut(e, "qFinderDMM", "qFinderDMM");
Beispiel #4
vector<int> LefseCommand::runWilcoxon(vector<SharedRAbundVector*>& lookup, DesignMap& designMap, vector<int> bins) {
    try {
        vector<int> significantOtuLabels;
        //if it exists and meets the following requirements run Wilcoxon
         1. Subclass members all belong to same main class
         2. Number of groups in each subclass is the same
         3. anything else??
        vector<string> subclasses;
        map<string, string> subclass2Class;
        map<string, int> subclassCounts;
        map<string, vector<int> > subClass2GroupIndex; //maps subclass name to vector of indexes in lookup from that subclass. old -> 1,2,3 means groups in location 1,2,3 of lookup are from old.  Saves time below.
        bool error = false;
        for (int j = 0; j < lookup.size(); j++) {
            string group = lookup[j]->getGroup();
            string treatment = designMap.get(group, mclass); //get value for this group in this category
            string thisSub = designMap.get(group, subclass);
            map<string, string>::iterator it = subclass2Class.find(thisSub);
            if (it == subclass2Class.end()) {
                subclass2Class[thisSub] = treatment;
                subclassCounts[thisSub] = 1;
                vector<int> temp; temp.push_back(j);
                subClass2GroupIndex[thisSub] = temp;
            else {
                if (it->second != treatment) {
                    error = true;
                    m->mothurOut("[ERROR]: subclass " + thisSub + " has members in " + it->second + " and " + treatment + ". Subclass members must be from the same class. Ignoring wilcoxon.\n");
        if (error) { return significantOtuLabels; }
        else { //check counts to make sure subclasses are the same size
            set<int> counts;
            for (map<string, int>::iterator it = subclassCounts.begin(); it != subclassCounts.end(); it++) { counts.insert(it->second); }
            if (counts.size() > 1) { m->mothurOut("[ERROR]: subclasses must be the same size. Ignoring wilcoxon.\n");
                return significantOtuLabels;  }
        int numBins = lookup[0]->getNumBins();
        vector<compGroup> comp;
        //find comparisons and fill comp
        map<string, int>::iterator itB;
        for(map<string, int>::iterator it=subclassCounts.begin();it!=subclassCounts.end();it++){
            itB = it;itB++;
                compGroup temp(it->first,itB->first);

        int numComp = comp.size();
        if (numComp < 2) {  m->mothurOut("[ERROR]: Need at least 2 subclasses, Ignoring Wilcoxon.\n");
            return significantOtuLabels;  }
        map<string, string> variables;
        variables["[filename]"] = outputDir + m->getRootName(m->getSimpleName(sharedfile));
        variables["[distance]"] = lookup[0]->getLabel();
        string outputFileName = getOutputFileName("wilcoxon",variables);
        ofstream out;
        m->openOutputFile(outputFileName, out);
        outputNames.push_back(outputFileName); outputTypes["wilcoxon"].push_back(outputFileName);
        out << "OTULabel\tComparision\tWilcoxon\tPvalue\n";
        LinearAlgebra linear;
        for (int i = 0; i < numBins; i++) {
            if (m->control_pressed) { break; }
            if (m->inUsersGroups(i, bins)) { //flagged in Kruskal Wallis
                bool sig = false;
                //for each subclass comparision
                for (int j = 0; j < numComp; j++) {
                    //fill x and y with this comparisons data
                    vector<double> x; vector<double> y;
                    //fill x and y
                    vector<int> xIndexes = subClass2GroupIndex[comp[j].group1]; //indexes in lookup for this subclass
                    for (int k = 0; k < xIndexes.size(); k++) { x.push_back(lookup[xIndexes[k]]->getAbundance(i)); }
                    vector<int> yIndexes = subClass2GroupIndex[comp[j].group2]; //indexes in lookup for this subclass
                    for (int k = 0; k < yIndexes.size(); k++) { y.push_back(lookup[yIndexes[k]]->getAbundance(i)); }
                    double pValue = 0.0;
                    double H = linear.calcWilcoxon(x, y, pValue);
                    //output H and signifigance
                    out << m->currentBinLabels[i] << '\t' << comp[j].getCombo() << '\t' << H << '\t' << pValue << endl;
                    //set sig - not sure how yet
                if (sig) {  significantOtuLabels.push_back(i);  }
        return significantOtuLabels;
    catch(exception& e) {
        m->errorOut(e, "LefseCommand", "runWilcoxon");