Beispiel #1
0
CvDTree* mushroom_create_dtree( const CvMat* data, const CvMat* missing,
                                const CvMat* responses, float p_weight )
{
    CvDTree* dtree;
    CvMat* var_type;
    int i, hr1 = 0, hr2 = 0, p_total = 0;
    float priors[] = { 1, p_weight };

    var_type = cvCreateMat( data->cols + 1, 1, CV_8U );
    cvSet( var_type, cvScalarAll(CV_VAR_CATEGORICAL) ); // all the variables are categorical

    dtree = new CvDTree;
    
    dtree->train( data, CV_ROW_SAMPLE, responses, 0, 0, var_type, missing,
                  CvDTreeParams( 8, // max depth
                                 10, // min sample count
                                 0, // regression accuracy: N/A here
                                 true, // compute surrogate split, as we have missing data
                                 15, // max number of categories (use sub-optimal algorithm for larger numbers)
                                 10, // the number of cross-validation folds
                                 true, // use 1SE rule => smaller tree
                                 true, // throw away the pruned tree branches
                                 priors // the array of priors, the bigger p_weight, the more attention
                                        // to the poisonous mushrooms
                                        // (a mushroom will be judjed to be poisonous with bigger chance)
                                 ));

    // compute hit-rate on the training database, demonstrates predict usage.
    for( i = 0; i < data->rows; i++ )
    {
        CvMat sample, mask;
        cvGetRow( data, &sample, i );
        cvGetRow( missing, &mask, i );
        double r = dtree->predict( &sample, &mask )->value;
        int d = fabs(r - responses->data.fl[i]) >= FLT_EPSILON;
        if( d )
        {
            if( r != 'p' )
                hr1++;
            else
                hr2++;
        }
        p_total += responses->data.fl[i] == 'p';
    }

    printf( "Results on the training database:\n"
            "\tPoisonous mushrooms mis-predicted: %d (%g%%)\n"
            "\tFalse-alarms: %d (%g%%)\n", hr1, (double)hr1*100/p_total,
            hr2, (double)hr2*100/(data->rows - p_total) );

    cvReleaseMat( &var_type );

    return dtree;
}
Beispiel #2
0
int main()
{
    const int train_sample_count = 300;

//#define LEPIOTA
#ifdef LEPIOTA
    const char* filename = "../../../OpenCV_SVN/samples/c/agaricus-lepiota.data";
#else
    const char* filename = "../../../OpenCV_SVN/samples/c/waveform.data";
#endif

    CvDTree dtree;
    CvBoost boost;
    CvRTrees rtrees;
    CvERTrees ertrees;

    CvMLData data;

    CvTrainTestSplit spl( train_sample_count );
    
    data.read_csv( filename );

#ifdef LEPIOTA
    data.set_response_idx( 0 );     
#else
    data.set_response_idx( 21 );     
    data.change_var_type( 21, CV_VAR_CATEGORICAL );
#endif

    data.set_train_test_split( &spl );
    
    printf("======DTREE=====\n");
    dtree.train( &data, CvDTreeParams( 10, 2, 0, false, 16, 0, false, false, 0 ));
    print_result( dtree.calc_error( &data, CV_TRAIN_ERROR), dtree.calc_error( &data ), dtree.get_var_importance() );

#ifdef LEPIOTA
    printf("======BOOST=====\n");
    boost.train( &data, CvBoostParams(CvBoost::DISCRETE, 100, 0.95, 2, false, 0));
    print_result( boost.calc_error( &data, CV_TRAIN_ERROR ), boost.calc_error( &data ), 0 );
#endif

    printf("======RTREES=====\n");
    rtrees.train( &data, CvRTParams( 10, 2, 0, false, 16, 0, true, 0, 100, 0, CV_TERMCRIT_ITER ));
    print_result( rtrees.calc_error( &data, CV_TRAIN_ERROR), rtrees.calc_error( &data ), rtrees.get_var_importance() );

    printf("======ERTREES=====\n");
    ertrees.train( &data, CvRTParams( 10, 2, 0, false, 16, 0, true, 0, 100, 0, CV_TERMCRIT_ITER ));
    print_result( ertrees.calc_error( &data, CV_TRAIN_ERROR), ertrees.calc_error( &data ), ertrees.get_var_importance() );

    return 0;
}
Beispiel #3
0
//Decision Tree
void decisiontree ( Mat & trainingData , Mat & trainingClasses , Mat & testData ,
		Mat & testClasses ) {
	CvDTree dtree ;
	Mat var_type (3 , 1 , CV_8U ) ;
	// define attributes as numerical
	var_type.at < unsigned int >(0 ,0) = CV_VAR_NUMERICAL;
	var_type.at < unsigned int >(0 ,1) = CV_VAR_NUMERICAL ;
	// define output node as numerical
	var_type.at < unsigned int >(0 ,2) = CV_VAR_NUMERICAL;
	dtree.train ( trainingData , CV_ROW_SAMPLE , trainingClasses , Mat () , Mat () ,
			var_type , Mat () , CvDTreeParams () ) ;
	Mat predicted ( testClasses.rows , 1 , CV_32F ) ;
	for ( int i = 0; i < testData.rows ; i ++) {
		const Mat sample = testData.row ( i ) ;
		CvDTreeNode * prediction = dtree.predict ( sample ) ;
		predicted.at < float > (i , 0) = prediction->value ;
	}
	cout << " Accuracy_ { TREE } = " << evaluate ( predicted , testClasses ) << endl ;
	plot_binary ( testData , predicted , " Predictions tree " ) ;
}
Beispiel #4
0
void trainFromTxt() {
    FILE* fin = fopen("train.txt","r");
    int N,i;
    training = true;
    fscanf(fin,"%i",&N);
    CvMat *data = cvCreateMat(N,COLS,CV_32F);
    CvMat *resp = cvCreateMat(N,1,CV_32F);
    char c[10];
    float *fdata,*fresp;
    for(i=0; i<N; i++) {
        fdata = data->data.fl + i*COLS;
        fresp = resp->data.fl + i;
        fscanf(fin,"%s",c);
        for (int j=0; j<COLS; j++) {
            fscanf(fin,"%f",&fdata[j]);
        }
        *fresp = (float)c[0];
    }

    printMatrix(data);
    printMatrix(resp);

    CvMat *vartype = cvCreateMat( data->cols + 1, 1, CV_8U );
    unsigned char *vtype = vartype->data.ptr;

    //Tipos de variables de entrada al árbol
    vtype[0]=CV_VAR_NUMERICAL;
    vtype[1]=CV_VAR_NUMERICAL;
    vtype[2]=CV_VAR_NUMERICAL;
    vtype[3]=CV_VAR_NUMERICAL;

    vtype[4]=CV_VAR_CATEGORICAL; //Tipo de la salida del árbol

    ptree = new CvDTree;
    ptree->train(data,CV_ROW_SAMPLE,resp,0,0,vartype,0,CvDTreeParams());
    
}
Beispiel #5
0
/**
 * Creates a matrix for training but feeds it from images taken of folder
 * ./training; Only JPG images are taken into account and all shapes in those
 * images are clasified according to the first letter of the picture.
 */
void train() {
    training = true;
    mostrar = (flags & SH_T) != 0;
    listFiles(DIR_TR,training_image);
    fillMatrix();

    //printMatrix(t_data);
    //printMatrix(t_resp);

    CvMat *vartype = cvCreateMat( t_data->cols + 1, 1, CV_8U );
    unsigned char *vtype = vartype->data.ptr;

    //Tipos de variables de entrada al árbol
    vtype[0]=CV_VAR_NUMERICAL;
    vtype[1]=CV_VAR_NUMERICAL;
    vtype[2]=CV_VAR_NUMERICAL;
    vtype[3]=CV_VAR_NUMERICAL;

    vtype[4]=CV_VAR_CATEGORICAL; //Tipo de la salida del árbol

    if ((flags & F_CHK) != 0) {
        trainMask = cvCreateMat(t_data->rows, 1, CV_8U);
        unsigned char *x = trainMask->data.ptr;
        CvRNG seed = cvRNG(time(0));
        for (int i=0; i<t_data->rows; i++,x++) {
            double p = cvRandReal(&seed);
            if (p < probTrain) {
                *x = 1;
            } else {
                *x=0;
            }
        }
    }

    ptree = new CvDTree;
    ptree->train(t_data,CV_ROW_SAMPLE,t_resp,0,trainMask,vartype,0,CvDTreeParams());
}
Beispiel #6
0
/** 
 * @author     	JIA Pei
 * @version    	2009-10-04
 * @brief      	Training
 * @param      	data     		Input - input data
 * @param		categories		Input - column vector
 * @return		classification time cost
*/
void CClassificationAlgs::Training(const Mat_<float>& data, const Mat_<int>& categories)
{
	unsigned int NbOfSamples = data.rows;
	set<int> ClassSet;
	for(int i = 0; i < categories.rows; i++)
	{
		ClassSet.insert(categories(i, 0));
	}
	this->m_iNbOfCategories = ClassSet.size();
	
	switch(this->m_iClassificationMethod)
	{
		case CClassificationAlgs::DecisionTree:
			this->m_CVDtree.train( 	data,
									CV_ROW_SAMPLE,
									categories,
									Mat(),
									Mat(),
									Mat(),
									Mat(),
									CvDTreeParams( INT_MAX, 2, 0, false, this->m_iNbOfCategories, 0, false, false, 0 ) );
		break;
		case CClassificationAlgs::Boost:
		    this->m_CVBoost.train( 	data,
									CV_ROW_SAMPLE,
									categories,
									Mat(),
									Mat(),
									Mat(),
									Mat(),
									CvBoostParams(CvBoost::DISCRETE, 50, 0.95, INT_MAX, false, 0),
									false );
		break;
		case CClassificationAlgs::RandomForest:
			this->m_CVRTrees.train( data, 
									CV_ROW_SAMPLE,
									categories,
									Mat(),
									Mat(),
									Mat(),
									Mat(),
									CvRTParams( INT_MAX, 2, 0, false, this->m_iNbOfCategories, 0, true, 0, 100, 0, CV_TERMCRIT_ITER ) );
		break;
		case CClassificationAlgs::ExtremeRandomForest:
			this->m_CVERTrees.train(data,
									CV_ROW_SAMPLE,
									categories,
									Mat(),
									Mat(),
									Mat(),
									Mat(),
									CvRTParams( INT_MAX, 2, 0, false, this->m_iNbOfCategories, 0, true, 0, 100, 0, CV_TERMCRIT_ITER ) );
		break;
		case CClassificationAlgs::SVM:
			this->m_CVSVM.train(	data,
									categories,
									Mat(),
									Mat(),
									CvSVMParams(CvSVM::C_SVC, CvSVM::RBF,
									0, 1, 0,
									1, 0, 0,
									NULL, cvTermCriteria(CV_TERMCRIT_ITER, 1000, 1E-6) ) );
		break;
	}
}
Beispiel #7
0
int CV_DTreeTest :: train( int test_case_idx )
{
    int MAX_DEPTH, MIN_SAMPLE_COUNT, MAX_CATEGORIES, CV_FOLDS;
    float REG_ACCURACY = 0;
    bool USE_SURROGATE, IS_PRUNED;
    
    const char* data_name = ((CvFileNode*)cvGetSeqElem( data_sets_names, test_case_idx ))->data.str.ptr;      

    // read validation params
    CvFileStorage* fs = ts->get_file_storage();
    CvFileNode* fnode = cvGetFileNodeByName( fs, 0, "validation" ), *fnode1 = 0;
    fnode = cvGetFileNodeByName( fs, fnode, name );
    fnode = cvGetFileNodeByName( fs, fnode, data_name );
    fnode = cvGetFileNodeByName( fs, fnode, "model_params" );
    fnode1 = cvGetFileNodeByName( fs, fnode, "max_depth" );
    if ( !fnode1 )
    {
        ts->printf( CvTS::LOG, "MAX_DEPTH can not be read from config file" );
        return CvTS::FAIL_INVALID_TEST_DATA;
    }
    MAX_DEPTH = fnode1->data.i;
    fnode1 = cvGetFileNodeByName( fs, fnode, "min_sample_count" );
    if ( !fnode1 )
    {
        ts->printf( CvTS::LOG, "MAX_DEPTH can not be read from config file" );
        return CvTS::FAIL_INVALID_TEST_DATA;
    }
    MIN_SAMPLE_COUNT = fnode1->data.i;
    fnode1 = cvGetFileNodeByName( fs, fnode, "use_surrogate" );
    if ( !fnode1 )
    {
        ts->printf( CvTS::LOG, "USE_SURROGATE can not be read from config file" );
        return CvTS::FAIL_INVALID_TEST_DATA;
    }
    USE_SURROGATE = ( fnode1->data.i!= 0);
    fnode1 = cvGetFileNodeByName( fs, fnode, "max_categories" );
    if ( !fnode1 )
    {
        ts->printf( CvTS::LOG, "MAX_CATEGORIES can not be read from config file" );
        return CvTS::FAIL_INVALID_TEST_DATA;
    }
    MAX_CATEGORIES = fnode1->data.i;
    fnode1 = cvGetFileNodeByName( fs, fnode, "cv_folds" );
    if ( !fnode1 )
    {
        ts->printf( CvTS::LOG, "CV_FOLDS can not be read from config file" );
        return CvTS::FAIL_INVALID_TEST_DATA;
    }
    CV_FOLDS = fnode1->data.i;
    fnode1 = cvGetFileNodeByName( fs, fnode, "is_pruned" );
    if ( !fnode1 )
    {
        ts->printf( CvTS::LOG, "IS_PRUNED can not be read from config file" );
        return CvTS::FAIL_INVALID_TEST_DATA;
    }
    IS_PRUNED = (fnode1->data.i != 0);

    
    if ( !tree->train( &data, 
       CvDTreeParams(MAX_DEPTH, MIN_SAMPLE_COUNT, REG_ACCURACY, USE_SURROGATE,
       MAX_CATEGORIES, CV_FOLDS, false, IS_PRUNED, 0 )) )
    {
        ts->printf( CvTS::LOG, "in test case %d model training  was failed", test_case_idx );
        return CvTS::FAIL_INVALID_OUTPUT;
    }
    return CvTS::OK;
}
int main(int argc, char **argv)
{

	float priors[] = { 1.0, 10.0 };	// Edible vs poisonos weights

	CvMat *var_type;
	CvMat *data;				// jmh add
	data = cvCreateMat(20, 30, CV_8U);	// jmh add

	var_type = cvCreateMat(data->cols + 1, 1, CV_8U);
	cvSet(var_type, cvScalarAll(CV_VAR_CATEGORICAL));	// all these vars 
	// are categorical
	CvDTree *dtree;
	dtree = new CvDTree;
	dtree->train(data, CV_ROW_SAMPLE, responses, 0, 0, var_type, missing, CvDTreeParams(8,	// max depth
																						10,	// min sample count
																						0,	// regression accuracy: N/A here
																						true,	// compute surrogate split, 
																						//   as we have missing data
																						15,	// max number of categories 
																						//   (use sub-optimal algorithm for
																						//   larger numbers)
																						10,	// cross-validations 
																						true,	// use 1SE rule => smaller tree
																						true,	// throw away the pruned tree branches
																						priors	// the array of priors, the bigger 
																						//   p_weight, the more attention
																						//   to the poisonous mushrooms
				 )
		);

	dtree->save("tree.xml", "MyTree");
	dtree->clear();
	dtree->load("tree.xml", "MyTree");

#define MAX_CLUSTERS 5
	CvScalar color_tab[MAX_CLUSTERS];
	IplImage *img = cvCreateImage(cvSize(500, 500), 8, 3);
	CvRNG rng = cvRNG(0xffffffff);

	color_tab[0] = CV_RGB(255, 0, 0);
	color_tab[1] = CV_RGB(0, 255, 0);
	color_tab[2] = CV_RGB(100, 100, 255);
	color_tab[3] = CV_RGB(255, 0, 255);
	color_tab[4] = CV_RGB(255, 255, 0);

	cvNamedWindow("clusters", 1);

	for (;;) {
		int k, cluster_count = cvRandInt(&rng) % MAX_CLUSTERS + 1;
		int i, sample_count = cvRandInt(&rng) % 1000 + 1;
		CvMat *points = cvCreateMat(sample_count, 1, CV_32FC2);
		CvMat *clusters = cvCreateMat(sample_count, 1, CV_32SC1);

		/* generate random sample from multivariate 
		   Gaussian distribution */
		for (k = 0; k < cluster_count; k++) {
			CvPoint center;
			CvMat point_chunk;
			center.x = cvRandInt(&rng) % img->width;
			center.y = cvRandInt(&rng) % img->height;
			cvGetRows(points, &point_chunk,
					  k * sample_count / cluster_count,
					  k == cluster_count - 1 ? sample_count :
					  (k + 1) * sample_count / cluster_count);
			cvRandArr(&rng, &point_chunk, CV_RAND_NORMAL,
					  cvScalar(center.x, center.y, 0, 0),
					  cvScalar(img->width / 6, img->height / 6, 0, 0));
		}

		/* shuffle samples */
		for (i = 0; i < sample_count / 2; i++) {
			CvPoint2D32f *pt1 = (CvPoint2D32f *) points->data.fl +
				cvRandInt(&rng) % sample_count;
			CvPoint2D32f *pt2 = (CvPoint2D32f *) points->data.fl +
				cvRandInt(&rng) % sample_count;
			CvPoint2D32f temp;
			CV_SWAP(*pt1, *pt2, temp);
		}

		cvKMeans2(points, cluster_count, clusters,
				  cvTermCriteria(CV_TERMCRIT_EPS + CV_TERMCRIT_ITER, 10, 1.0));
		cvZero(img);
		for (i = 0; i < sample_count; i++) {
			CvPoint2D32f pt = ((CvPoint2D32f *) points->data.fl)[i];
			int cluster_idx = clusters->data.i[i];
			cvCircle(img, cvPointFrom32f(pt), 2,
					 color_tab[cluster_idx], CV_FILLED);
		}

		cvReleaseMat(&points);
		cvReleaseMat(&clusters);

		cvShowImage("clusters", img);

		int key = cvWaitKey(0);
		if (key == 27)			// 'ESC'
			break;
	}
}