Example #1
0
// creates recursively the tree, uses subset of features
void Tree::train(DataFrame &dataframe, const vector<int> &featList,
		const int nrFeat, const bool verbose) {
	//select features randomly
	if (verbose) cout<<"Training tree with random seed:"<<rng.seed<<endl;
	vector<int> featsubset = LUtils::sample(rng, nrFeat, dataframe.nrcols - 1,
			false);
	DataFrame::FeatureResult featResult = dataframe.findBestFeature(featsubset,
			entropy_loss);
	//Create a new root node
	root->feature = featResult.opt_feat;
	root->splitvalue = featResult.opt_split;
	root->impurity = featResult.loss;
	root->nameFeature = dataframe.header.at(featResult.opt_feat);
	root->nrsamples = dataframe.nrrows;
	createBranch(root, dataframe, nrFeat, verbose);
}
Example #2
0
//recursively insert nodes
void Tree::createBranch(boost::shared_ptr<Node> parentNode, DataFrame &dfsplit,
		const int nrFeat, bool verbose) {

	vector<int> featsubset = LUtils::sample(rng, nrFeat, dfsplit.nrcols - 1,
			false);

	if (verbose) {
		cout << "Feature subset: ";
		for (unsigned i = 0; i < featsubset.size(); ++i) {
			cout << " " << dfsplit.header[featsubset[i]];
		}
		cout << endl;
	}

	DataFrame leftDF;
	DataFrame rightDF;
	dfsplit.splitFrame(parentNode->splitvalue, parentNode->feature, leftDF,
			rightDF);
	tree_size++;
	//LEFT BRANCH
	if (verbose && leftDF.nrrows > 0) {
		cout << "...Creating left branch: Feature: "
				<< dfsplit.header[parentNode->feature] << " Value:"
				<< parentNode->splitvalue << " n:" << leftDF.nrrows
				<< " with prediction:" << leftDF.cm << endl;
		//leftDF.printSummary();
	}
	if (leftDF.nrrows == 0) {
		//happens if one of the nodes is "practically" pure
		if (verbose) {
			cout << "No data in left node, right node:" << rightDF.nrrows
					<< endl;
			cout << "Left node: Parent node is terminal." << endl;
		}
		parentNode->isTerminal = true;
		tnodecount++;
		return;
	} else if (leftDF.nrrows <= min_node || parentNode->depth + 1 > max_depth
			|| leftDF.distinct[leftDF.classCol] < 2) {
		if (verbose)
			cout << "Terminal node, cm: " << leftDF.cm << endl;
		boost::shared_ptr<Node> left = boost::make_shared<Node>(
				parentNode->depth + 1, leftDF.cm);
		left->isTerminal = true;
		left->nrsamples = leftDF.nrrows;
		parentNode->left = left;
		tnodecount++;
	} else {
		DataFrame::FeatureResult featResulta = leftDF.findBestFeature(
				featsubset, entropy_loss);
		boost::shared_ptr<Node> left = boost::make_shared<Node>(
				featResulta.opt_feat, featResulta.opt_split, featResulta.loss,
				parentNode->depth + 1, leftDF.header[featResulta.opt_feat],
				leftDF.nrrows, leftDF.cm);
		parentNode->left = left;
		createBranch(left, leftDF, nrFeat, verbose);
	}

	//RIGHT BRANCH
	if (verbose && rightDF.nrrows > 0) {
		cout << "...Creating right branch: Feature: "
				<< dfsplit.header[parentNode->feature] << " Value:"
				<< parentNode->splitvalue << " n:" << rightDF.nrrows
				<< " with prediction:" << rightDF.cm << endl;
		//rightDF.printSummary();
	}
	if (rightDF.nrrows == 0) {
		//happens if one of the nodes is "practically" pure
		if (verbose) {
			cout << "No data in right node,  left node:" << leftDF.nrrows
					<< endl;
			cout << "Right node: Parent node is terminal." << endl;
		}
		parentNode->isTerminal = true;
		tnodecount++;
		return;
	} else if (rightDF.nrrows <= min_node || parentNode->depth + 1 > max_depth
			|| rightDF.distinct[rightDF.classCol] < 2) {
		if (verbose)
			cout << "Terminal node, cm: " << rightDF.cm << endl;
		boost::shared_ptr<Node> right = boost::make_shared<Node>(
				parentNode->depth + 1, rightDF.cm);
		right->isTerminal = true;

		right->nrsamples = rightDF.nrrows;
		parentNode->right = right;
		tnodecount++;
	} else {
		DataFrame::FeatureResult featResultb = rightDF.findBestFeature(
				featsubset, entropy_loss);
		if (verbose)
			cout << "Terminal node, cm: " << rightDF.cm << endl;
		boost::shared_ptr<Node> right = boost::make_shared<Node>(
				featResultb.opt_feat, featResultb.opt_split, featResultb.loss,
				parentNode->depth + 1, rightDF.header[featResultb.opt_feat],
				rightDF.nrrows, rightDF.cm);
		parentNode->right = right;
		createBranch(right, rightDF, nrFeat, verbose);

	}
	//if we reach this point, we should return
	return;
}