Beispiel #1
0
void
testStore2(void)
{
	int	i;
	long	bO, bF, grossBytes;
 
	bO = stoBytesAlloc - stoBytesFree - stoBytesGc;
 
	printforest = 1;
	grossBytes = 0;
 
	forest = (struct tree **)stoAlloc(int0,
					  FOREST_SIZE*sizeof(struct tree *));
	for (i = 0; i < FOREST_SIZE; i++) forest[i] = 0;
 
	randomForest();

	for (i = 0; i < FOREST_SIZE; i++) {
		long sz = treeSize(forest[i]);
		printf("%d: %ld bytes\n", i, sz);
		grossBytes += sz;
		if (printforest) treePrint(osStdout, forest[i]);
	}
 
	for (i = 0; i < FOREST_SIZE; i++) {
		treeCheck(forest[i], treeNo);
		treeFree(forest[i]);
	}

	bF = stoBytesAlloc - stoBytesFree - stoBytesGc;
	printf("Memory: %lu (owned) %lu (alloc) %ld (net) %ld (gross)\n",
	       stoBytesOwn, bF - bO, treeBytes, grossBytes);
 
	stoFree(forest);
}
void ClassifyRFSharedCommand::processSharedAndDesignData(vector<SharedRAbundVector*> lookup){
    try {
  
        map<string, int> treatmentToIntMap;
        map<int, string> intToTreatmentMap;
        //vector<string> groups = designMap.getCategory();
        for (int i = 0; i < lookup.size(); i++) {
            string treatmentName = designMap.get(lookup[i]->getGroup());
            treatmentToIntMap[treatmentName] = i;
            intToTreatmentMap[i] = treatmentName;
        }
        
        int numSamples = lookup.size();
        int numFeatures = lookup[0]->getNumBins();
        
        int numRows = numSamples;
        int numColumns = numFeatures + 1;           // extra one space needed for the treatment/outcome
        
        vector< vector<int> > dataSet(numRows, vector<int>(numColumns, 0));
        
        vector<string> names;
        
        for (int i = 0; i < lookup.size(); i++) {
            string sharedGroupName = lookup[i]->getGroup();
            names.push_back(sharedGroupName);
            string treatmentName = designMap.get(sharedGroupName);
            
            int j = 0;
            for (; j < lookup[i]->getNumBins(); j++) {
                int otuCount = lookup[i]->get(j);
                dataSet[i][j] = otuCount;
            }
            dataSet[i][j] = treatmentToIntMap[treatmentName];
        }
        
        RandomForest randomForest(dataSet, numDecisionTrees, treeSplitCriterion, doPruning, pruneAggressiveness, discardHighErrorTrees, highErrorTreeDiscardThreshold, optimumFeatureSubsetSelectionCriteria, featureStandardDeviationThreshold);
        
        randomForest.populateDecisionTrees();
        
        randomForest.calcForrestErrorRate();
        
        randomForest.printConfusionMatrix(intToTreatmentMap);
        
        
        map<string, string> variables; 
        variables["[filename]"] = outputDir + util.getRootName(util.getSimpleName(sharedfile)) + "RF.";
        variables["[distance]"] = lookup[0]->getLabel();
        string filename = getOutputFileName("summary", variables);
        outputNames.push_back(filename); outputTypes["summary"].push_back(filename);
        randomForest.calcForrestVariableImportance(filename);
        
        //
        map<string, string> variable; 
        variable["[filename]"] = outputDir + util.getRootName(util.getSimpleName(sharedfile)) + "misclassifications.";
        variable["[distance]"] = lookup[0]->getLabel();
        string mc_filename = getOutputFileName("summary", variable);
        outputNames.push_back(mc_filename); outputTypes["summary"].push_back(mc_filename);
        randomForest.getMissclassifications(mc_filename, intToTreatmentMap, names);
        //
        
        m->mothurOutEndLine();
    }
    catch(exception& e) {
        m->errorOut(e, "ClassifySharedCommand", "processSharedAndDesignData");
        exit(1);
    }
}