Пример #1
0
void rmse_print(int copt)
{
	double rmse_train=rmse(1);
	double rmse_probe=rmse(2);
	double rmse_both=rmse(3);
	if(!copt) {
		double rmse_train_clipped=cliprmse(1);
		double rmse_probe_clipped=cliprmse(2);
		double rmse_both_clipped=cliprmse(3);
		lg("RMSE Train %f (%.1f%%) Clipped %f (%.1f%%) Probe %f (%.1f%%) Clipped %f Both %f (%.1f%%) Clipped %f\n",
			rmse_train,100.*(last_rmse_train-rmse_train)/rmse_train,
			rmse_train_clipped,100.*(last_rmse_train_clipped-rmse_train_clipped)/rmse_train_clipped,
			rmse_probe,100.*(last_rmse_probe-rmse_probe)/rmse_probe,
			rmse_probe_clipped,100.*(last_rmse_probe_clipped-rmse_probe_clipped)/rmse_probe_clipped,
			rmse_both_clipped,100.*(last_rmse_both_clipped-rmse_both_clipped)/rmse_both_clipped
			);
		last_rmse_probe_clipped=rmse_probe_clipped;
		last_rmse_train_clipped=rmse_train_clipped;
	} else
		lg("RMSE Train %f (%.1f%%) Probe %f (%.1f%%) Both %f (%.1f%%)\n",
			rmse_train,100.*(last_rmse_train-rmse_train)/rmse_train,
			rmse_probe,100.*(last_rmse_probe-rmse_probe)/rmse_probe,
			rmse_both,100.*(last_rmse_both-rmse_both)/rmse_both
		);
	last_rmse_both=rmse_both;
	last_rmse_probe=rmse_probe;
	last_rmse_train=rmse_train;
}
Пример #2
0
Файл: pmf.c Проект: nionjo/dm
void  est_mf(MF * mf) {
    fullfill_param(mf);
    double l_rmse = rmse(mf, 0);
    fprintf(stderr, "iter :0 train rmse : %f   test rmse : %f  learn rate : %f\n", l_rmse, rmse(mf,1), mf->p.a);
    int *p = (int*)malloc(sizeof(int) * mf->T);
    for (int i = 0; i < mf->T; i++) p[i] = i;
    int n = 1;
    while (n <= mf->p.niters){
        fprintf(stderr, "iter :%d ", n);
        shuffle(p, mf->T);
        backup(mf);
        for (int j = 0; j < mf->T; j++){
            int id = p[j];
            int uid = mf->u_i[id][0];
            int iid = mf->u_i[id][1];
            int uoff = uid * mf->p.k;
            int ioff = iid * mf->p.k;
            double score = mf->s[id];
            double rscore = mf->mu + mf->bu[uid] + mf->bi[iid];
            if (n > mf->p.nbias) {
                for (int k = 0; k < mf->p.k; k++){
                    rscore += mf->pu[uoff + k] * mf->qi[ioff + k];
                }
            }
            if (rscore > mf->max_s) rscore = mf->max_s;
            if (rscore < mf->min_s) rscore = mf->min_s;
            double e = score - rscore;
            mf->bu[uid] += mf->p.a * (e - mf->p.b * mf->bu[uid]);
            mf->bi[iid] += mf->p.a * (e - mf->p.b * mf->bi[iid]);
            if (n > mf->p.nbias) {
                for (int k = 0; k < mf->p.k; k++){
                    double tmp = mf->pu[uoff + k];
                    mf->pu[uoff + k] += mf->p.a * (e * mf->qi[ioff + k] - mf->p.b * mf->pu[uoff + k]);
                    mf->qi[ioff + k] += mf->p.a * (e * tmp              - mf->p.b * mf->qi[ioff + k]);
                }
            }
        }
        double c_rmse = rmse(mf, 0);
        if (c_rmse < l_rmse){
            mf->p.a *= 0.9;
            l_rmse = c_rmse;
            n += 1;
            double v_rmse = rmse(mf, 1);
            fprintf(stderr, "train rmse : %f   test rmse : %f  learn rate : %f\n", c_rmse, v_rmse, mf->p.a);
            if (n % mf->p.savestep == 0){
                save_mf(mf, n);
            }
        }
        else{
            recover(mf);
            mf->p.a *= 0.8;
            fprintf(stderr, "run failed, try again\n");
        }
    }
    free(p); p = NULL;
}
void Sbs2SourceReconstructionSparse::cross_validation_k_channel(DTU::DtuArray2D<double> *Y_mean_0, DTU::DtuArray2D<double> *estimated_S)
{
    double lambda = 0.0;
    double rmse_temp = 0.0;
    vector<double> rmse(lambdas.size());
    int pos_best_lambda = 0;

    for(int i=0 ; i<lambdas.size() ; ++i)
    {
	lambda = lambdas.at(i);

	qDebug() << lambda << i;

	fista_method_group_lasso_v2(A_normalized, Y_mean_0, lambda, L, estimated_S);
	rootMeanSquareError(Y_mean_0, A_normalized, estimated_S, &rmse_temp);
	rmse[i] = rmse_temp;
	if (i!=0)
	{
	    if(rmse[i]<rmse[0])
	    {
		rmse[0]=rmse[i];
		pos_best_lambda=i;
	    }
	}
    }

    double best_lambda = lambdas[pos_best_lambda];
    fista_method_group_lasso_v2(A_normalized, Y_mean_0, best_lambda, L, S_temp);
}
Пример #4
0
void validate_algorithms(const D &learning, const M &learning_mask,
						   const D &validation, const M &validation_mask,
						   AlgoInputIterator algo_begin, AlgoInputIterator algo_end,
						   size_t verbosity = 0)
{
	// Average user's and product's ratings
	if (verbosity >= 1)
	{
		std::cout << "Average user's and product's ratings...";
	}
	itpp::vec avg_users_rating(learning.rows());
	itpp::vec avg_products_rating(learning.cols());
	avg_users_rating.zeros();
	avg_products_rating.zeros();
	avg_ratings(learning, learning_mask, avg_users_rating, avg_products_rating);
	if (verbosity >= 1)
	{
		std::cout << "Done." << std::endl;
	}
	
	if (verbosity >= 2)
	{
		std::cout << "Avg. users' ratings: \n" << avg_users_rating << std::endl;
		std::cout << "Avg. products' ratings: \n" << avg_products_rating << std::endl;
	}

	// Users' resemblance
	itpp::mat user_resemblance(learning.rows(), learning.rows());
	itpp::bmat user_resemblance_mask(user_resemblance.rows(), user_resemblance.cols());
	user_resemblance.zeros();
	user_resemblance_mask.zeros();

	// compute users' resemblance on demand
	user_resemblance_itpp_t u_resemblance(learning, 
										  user_resemblance, user_resemblance_mask, 
										  correlation_coeff_resembl_metric_t());

	// Validate algorithms
	itpp::mat algo_prediction(learning.rows(), learning.cols());
	for (AlgoInputIterator i = algo_begin; i != algo_end; ++i)
	{
		algo_prediction.zeros();
		if (verbosity >= 1)
		{
			std::cout << (*i)->name();
		}
		(**i)(algo_prediction, learning, learning_mask, 
			  u_resemblance, avg_users_rating, avg_products_rating);
		if (verbosity >= 1)
		{
			std::cout << "Done." << std::endl;
		}

		// RMSE
		float algo_rmse = rmse(validation, algo_prediction);
		std::cout << (*i)->name() << ": \n" << algo_prediction << std::endl;
		std::cout << (*i)->name() << " RMSE: \n" << algo_rmse << std::endl;
	}
}
Пример #5
0
void testBivariateStats(){

 
 float x[]={95.0,85.0,80.0,70.0,60.0};
 float y[]={85.0,95.0,70.0,65.0,70.0};
 regressionCoefficients coeffs;
 
 coeffs=regression(x,y,5);
 
 printf("\nBivariate Stats\n"); 
 printf("correlation =%f\n",correlation(x,y,5));
 printf("covariance =%f\n",covariance(x,y,5));
 printf("rmse =%f\n",rmse(x,y,5));
 printf("bias =%f\n",bias(x,y,5));
 printf("m =%f\n",coeffs.m);
 printf("c =%f\n",coeffs.c);
 
}
Пример #6
0
 void RMSEProbe(vector< Rating > & data){
     double ret = 0;
     double n = 0;
     vector<double> rmse(5, 0);
     vector<double> nn(5,0);
     for(int i = 0; i < data.size(); ++i){
         if(data[i].test != 1) continue;
         double rui = data[i].value();
         double pui = rating(data[i]);
         ret += (rui - pui) * (rui - pui);
         n++;
         rmse[(int)(rui) - 1] += (rui - pui) * (rui - pui);
         nn[(int)(rui) - 1]++;
     }
     cout << sqrt(ret / n) << endl;
     for(int k = 0; k < 5;++k){
         cout << k + 1 << "\t" << sqrt(rmse[k] / nn[k]) << endl;
     }
 }
Пример #7
0
DoubleReal ElutionPeakDetection::computeMassTraceNoise(const MassTrace& tr)
{
    // compute RMSE
    DoubleReal squared_sum(0.0);
    std::vector<DoubleReal> smooth_ints(tr.getSmoothedIntensities());

    for (Size i = 0; i < smooth_ints.size(); ++i)
    {
        squared_sum += (tr[i].getIntensity() - smooth_ints[i])*(tr[i].getIntensity() - smooth_ints[i]);
    }

    DoubleReal rmse(0.0);

    if (smooth_ints.size() > 0)
    {
        rmse = std::sqrt(squared_sum/smooth_ints.size());
    }

    return rmse;
}
Пример #8
0
//ag_merge [-n _start_N_value_] [-a _start_alpha_value_] -d _directory1_ _directory2_ [_directory3_] 
//[_directory4_] ...
int main(int argc, char* argv[])
{	
	try{
//0. Set log file
	LogStream clog;
	LogStream::init(true);
	clog << "\n-----\nag_merge ";
	for(int argNo = 1; argNo < argc; argNo++)
		clog << argv[argNo] << " ";
	clog << "\n\n";

//1. Set input parameters from command line 

	int startTiGN = 1;
	double startAlpha = 0.5;
	int firstDirNo = 0;

	stringv args(argc); 
	for(int argNo = 0; argNo < argc; argNo++)
		args[argNo] = string(argv[argNo]);

	 //parse and save input parameters
	for(int argNo = 1; argNo < argc; argNo += 2)
	{
		if(!args[argNo].compare("-n"))
			startTiGN = atoiExt(argv[argNo + 1]);
		else if(!args[argNo].compare("-a"))
			startAlpha = atofExt(argv[argNo + 1]);
		else if(!args[argNo].compare("-d"))
		{
			firstDirNo = argNo + 1;
			break;
		}
		else
			throw INPUT_ERR;
	}

	//check that there are at least two directories 
	if(argc < (firstDirNo + 2))
		throw INPUT_ERR;

	//convert names of input directories to strings and check that they exist
	int folderN = argc - firstDirNo;
	stringv folders(folderN); 
	for(int argNo = firstDirNo; argNo < argc; argNo++)
	{
		folders[argNo - firstDirNo] = string(argv[argNo]);
		struct stat status;
		if((stat(argv[argNo], &status) != 0) || !(status.st_mode & S_IFDIR))
			throw DIR_ERR;
	}

//1.a) delete all temp files from the previous run and create a directory AGTemp
#ifdef _WIN32	//in windows
	CreateDirectory("AGTemp", NULL);
#else // in linux
	system("rm -rf ./AGTemp/");
	system("mkdir ./AGTemp/");
#endif

//2. Set parameters from AGTemp/params.txt from the first directory
	TrainInfo ti;			//set of model parameters in the current directory
	double prevBest;		//best value of performance achieved on the previous run
			
	fstream fparam;
	string paramPathName = folders[0] + "/AGTemp/params.txt";
	fparam.open(paramPathName.c_str(), ios_base::in); 
	string modeStr, metric;
	fparam >> ti.seed >> ti.trainFName >> ti.validFName >> ti.attrFName >> ti.minAlpha >> ti.maxTiGN 
		>> ti.bagN >> modeStr >> metric;	

	//modeStr should be "fast" or "slow" or "layered"	
	if(modeStr.compare("fast") == 0)
		ti.mode = FAST;
	else if(modeStr.compare("slow") == 0)
		ti.mode = SLOW;
	else if(modeStr.compare("layered") == 0)
		ti.mode = LAYERED;
	else
		throw TEMP_ERR;

	//metric should be "roc" or "rms"
	if(metric.compare("rms") == 0)
		ti.rms = true;
	else if(metric.compare("roc") == 0)
		ti.rms = false;
	else
		throw TEMP_ERR;

	if(fparam.fail())
		throw TEMP_ERR;
	fparam.close();
	fparam.clear();

	//read best value of performance on previous run
	fstream fbest;
	double stub;
	int itemN; // number of data points in the train set, need to calculate possible values of alpha
	string fbestPathName = folders[0] + "/AGTemp/best.txt";
	fbest.open(fbestPathName.c_str(), ios_base::in); 
	fbest >> prevBest >> stub >> stub >> stub >> itemN;
	if(fbest.fail())
		throw TEMP_ERR;
	fbest.close();

	int alphaN = getAlphaN(ti.minAlpha, itemN); //number of different alpha values
	int tigNN = getTiGNN(ti.maxTiGN);

	//direction of initialization (1 - up, 0 - right), used in fast mode only
	doublevv dir(tigNN, doublev(alphaN, 0)); 
	//outer array: column (by TiGN)
	//middle array: row	(by alpha)
	
	//direction of initialization (1 - up, 0 - right), collects average in the slow mode
	doublevv dirStat(tigNN, doublev(alphaN, 0));

	if(ti.mode == FAST)
	{//read part of the directions table from file
		fstream fdir;
		string fdirPathName = folders[0] + "/AGTemp/dir.txt";
		fdir.open(fdirPathName.c_str(), ios_base::in); 
		for(int tigNNo = 0; tigNNo < tigNN; tigNNo++)
			for(int alphaNo = 0; alphaNo < alphaN; alphaNo++)
				fdir >> dir[tigNNo][alphaNo];
		if(fdir.fail())
			throw TEMP_ERR;
		fdir.close();
	}

//3. Read main parameters from all other directories and check that they match

	int allBagN = ti.bagN;
	intv bagNs(folderN, 0);
	bagNs[0] = ti.bagN;
	intv prevBagNs(folderN + 1, 0); //sums of bagNs of all previous directories
	prevBagNs[1] = ti.bagN;
	int lastSeed = ti.seed;
	for(int folderNo = 1; folderNo < folderN; folderNo++)
	{
		TrainInfo extraTI;	//set of model parameters in the additional directory
		
		string fparamPathName = folders[folderNo] + "/AGTemp/params.txt";
		fparam.open(fparamPathName.c_str(), ios_base::in); 
		fparam >> extraTI.seed >> extraTI.trainFName >> extraTI.validFName >> extraTI.attrFName 
			>> extraTI.minAlpha >> extraTI.maxTiGN >> extraTI.bagN;	

		if(fparam.fail())
		{
			clog << fparamPathName << '\n';
			throw TEMP_ERR;
		}
		fparam.close();

		if((ti.minAlpha != extraTI.minAlpha) || (ti.maxTiGN != extraTI.maxTiGN))
		  {
		    clog << fparamPathName << '\n';
			throw MERGE_MISMATCH_ERR;
		  }
		if(extraTI.seed == ti.seed)
			throw SAME_SEED_ERR;
		if(folderNo == (folderN - 1))
			lastSeed = extraTI.seed;

		allBagN += extraTI.bagN;
		bagNs[folderNo] = extraTI.bagN;
		prevBagNs[folderNo + 1] = allBagN;

		string fdirStatPathName = folders[folderNo] + "/AGTemp/dirstat.txt";
		fstream fdirStat;	
		fdirStat.open("./AGTemp/dirstat.txt", ios_base::in);
		for(int alphaNo = 0; alphaNo < alphaN; alphaNo++)
			for(int tigNNo = 0; tigNNo < tigNN; tigNNo++)
			{
				double ds;
				fdirStat >> ds;
				dirStat[tigNNo][alphaNo] += ds * extraTI.bagN;
			}
	}

//4. Load data
	INDdata data("", ti.validFName.c_str(), "", ti.attrFName.c_str());
	CGrove::setData(data);
	CTreeNode::setData(data);

	doublev validTar;
	int validN = data.getTargets(validTar, VALID);

	clog << "Alpha = " << ti.minAlpha << "\nN = " << ti.maxTiGN << "\n" 
		<< allBagN << " bagging iterations\n";
	if(ti.mode == FAST)
		clog << "fast mode\n\n";
	else if(ti.mode == SLOW)
		clog << "slow mode\n\n";
	else //if(ti.mode == LAYERED)
		clog << "layered mode\n\n";

	//5. Initialize some internal process variables

	//surfaces of performance values for validation set. 
	//Always calculate rms (for convergence analysis), if needed, calculate roc
	doublevvv rmsV(tigNN, doublevv(alphaN, doublev(allBagN, 0))); 
	doublevvv rocV;
	if(!ti.rms)
		rocV.resize(tigNN, doublevv(alphaN, doublev(allBagN, 0))); 
	//outer array: column (by TiGN)
	//middle array: row (by alpha)
	//inner array: bagging iterations. Performance is kept for all iterations to create bagging curves

	//sums of predictions for each data point (raw material to calculate performance)
	doublevvv predsumsV(tigNN, doublevv(alphaN, doublev(validN, 0)));
	//outer array: column (by TiGN)
	//middle array: row	(by alpha)
	//inner array: data points in the validation set
	

//6. Read and merge models from the directories
	int startAlphaNo = getAlphaN(startAlpha, itemN) - 1; 
	int startTiGNNo = getTiGNN(startTiGN) - 1;

	for(int alphaNo = startAlphaNo; alphaNo < alphaN; alphaNo++)
	{
		double alpha;
		if(alphaNo < alphaN - 1)
			alpha = alphaVal(alphaNo);
		else	//this is a special case because minAlpha can be zero
			alpha = ti.minAlpha;

		cout << "Merging models with alpha = " << alpha << endl;

		for(int tigNNo = startTiGNNo; tigNNo < tigNN; tigNNo++) 
		{
			int tigN = tigVal(tigNNo);	//number of trees in the current grove

			//temp file in the extra directory that keeps models corresponding to alpha and tigN
			string prefix = string("/AGTemp/ag.a.") 
								+ alphaToStr(alpha)
								+ ".n." 
								+ itoa(tigN, 10);
			string tempFName = prefix + ".tmp";

			//this will kill the pre-existing file in the output directory
			fstream fsave((string(".") + tempFName).c_str(), ios_base::binary | ios_base::out);	

			for(int folderNo = 0; folderNo < folderN; folderNo++)
			{
				string inTempFName = folders[folderNo] + tempFName;
				fstream ftemp((inTempFName).c_str(), ios_base::binary | ios_base::in);
				if(ftemp.fail())
				{
				    clog << inTempFName << '\n';
					throw TEMP_ERR;
				}
			
				//merge all extra models with the same (alpha, tigN) parameter values into existing models
				for(int bagNo = prevBagNs[folderNo]; bagNo < prevBagNs[folderNo + 1]; bagNo++)
				{
					//retrieve next grove
					CGrove extraGrove(alpha, tigN);
					try{
					extraGrove.load(ftemp);
					}catch(TE_ERROR err){
					  clog << inTempFName << '\n';
					  throw err;
					}
					//add the loaded grove to a model file with alpha and tigN values in the name
					extraGrove.save((string(".") + tempFName).c_str());

					//generate predictions and performance for validation set
					doublev predictions(validN);
					for(int itemNo = 0; itemNo < validN; itemNo++)
					{
						predsumsV[tigNNo][alphaNo][itemNo] += extraGrove.predict(itemNo, VALID);
						predictions[itemNo] = predsumsV[tigNNo][alphaNo][itemNo] / (bagNo + 1);
					}
					if(bagNo == allBagN - 1)
					{
						string predsFName = prefix + ".preds.txt";
						fstream fpreds((string(".") + predsFName).c_str(), ios_base::out);
						for(int itemNo = 0; itemNo < validN; itemNo++)
							fpreds << predictions[itemNo] << endl;
						fpreds.close();
					}

					rmsV[tigNNo][alphaNo][bagNo] = rmse(predictions, validTar);
					if(!ti.rms)
						rocV[tigNNo][alphaNo][bagNo] = roc(predictions, validTar);

				}// end for(int bagNo = ti.bagN; bagNo < ti.bagN + extraTI.bagN; bagNo++)
				ftemp.close();
			}//end for(int folderNo = 0; folderNo < folderN; folderNo++)
		}//end for(int tigNNo = 0; tigNNo < tigNN; tigNNo++) 
	}//end for(int alphaNo = 0; alphaNo < alphaN; alphaNo++)

	//4. Output
	ti.bagN = allBagN;
	ti.seed = lastSeed;
	if(ti.rms)
		trainOut(ti, dir, rmsV, rmsV, predsumsV, itemN, dirStat, startAlphaNo, startTiGNNo);
	else
		trainOut(ti, dir, rmsV, rocV, predsumsV, itemN, dirStat, startAlphaNo, startTiGNNo);

	}catch(TE_ERROR err){
Пример #9
0
int main(int argc, char* argv[])
{	
	try{

//1. Analyze input parameters
	//convert input parameters to string from char*
	stringv args(argc); 
	for(int argNo = 0; argNo < argc; argNo++)
		args[argNo] = string(argv[argNo]);
	
	//check that the number of arguments is even (flags + value pairs)
	if(argc % 2 == 0)
		throw INPUT_ERR;

#ifndef _WIN32
	int threadN = 6;	//number of threads
#endif

	TrainInfo ti; //model training parameters
	int topAttrN = 0;  //how many top attributes to output and keep in the cut data 
						//(0 = do not do feature selection)
						//(-1 = output all available features)

	//parse and save input parameters
	//indicators of presence of required flags in the input
	bool hasTrain = false;
	bool hasVal = false; 
	bool hasAttr = false; 

	int treeN = 100;
	double shrinkage = 0.01;
	double subsample = -1;

	for(int argNo = 1; argNo < argc; argNo += 2)
	{
		if(!args[argNo].compare("-t"))
		{
			ti.trainFName = args[argNo + 1];
			hasTrain = true;
		}
		else if(!args[argNo].compare("-v"))
		{
			ti.validFName = args[argNo + 1];
			hasVal = true;
		}
		else if(!args[argNo].compare("-r"))
		{
			ti.attrFName = args[argNo + 1];
			hasAttr = true;
		}
		else if(!args[argNo].compare("-a"))
			ti.alpha = atofExt(argv[argNo + 1]);
		else if(!args[argNo].compare("-n"))
			treeN = atoiExt(argv[argNo + 1]);
		else if(!args[argNo].compare("-i"))
			ti.seed = atoiExt(argv[argNo + 1]);
		else if(!args[argNo].compare("-k"))
			topAttrN = atoiExt(argv[argNo + 1]);
		else if(!args[argNo].compare("-sh"))
			shrinkage = atofExt(argv[argNo + 1]);
		else if(!args[argNo].compare("-sub"))
			subsample = atofExt(argv[argNo + 1]);
		else if(!args[argNo].compare("-c"))
		{
			if(!args[argNo + 1].compare("roc"))
				ti.rms = false;
			else if(!args[argNo + 1].compare("rms"))
				ti.rms = true;
			else
				throw INPUT_ERR;
		}
		else if(!args[argNo].compare("-h"))
#ifndef _WIN32 
			threadN = atoiExt(argv[argNo + 1]);
#else
			throw WIN_ERR;
#endif
		else
			throw INPUT_ERR;
	}//end for(int argNo = 1; argNo < argc; argNo += 2) //parse and save input parameters

	if(!(hasTrain && hasVal && hasAttr))
		throw INPUT_ERR;

	if((ti.alpha < 0) || (ti.alpha > 1))
		throw ALPHA_ERR;

//1.a) Set log file
	LogStream clog;
	LogStream::init(true);
	clog << "\n-----\ngbt_train ";
	for(int argNo = 1; argNo < argc; argNo++)
		clog << argv[argNo] << " ";
	clog << "\n\n";

//1.b) Initialize random number generator. 
	srand(ti.seed);

//2. Load data
	INDdata data(ti.trainFName.c_str(), ti.validFName.c_str(), ti.testFName.c_str(), 
				 ti.attrFName.c_str());
	CTree::setData(data);
	CTreeNode::setData(data);

//2.a) Start thread pool
#ifndef _WIN32
	TThreadPool pool(threadN);
	CTree::setPool(pool);
#endif

//------------------
	int attrN = data.getAttrN();
	if(topAttrN == -1)
		topAttrN = attrN;
	idpairv attrCounts;	//counts of attribute importance
	bool doFS = (topAttrN != 0);	//whether feature selection is requested
	if(doFS)
	{//initialize attrCounts
		attrCounts.resize(attrN);
		for(int attrNo = 0; attrNo < attrN; attrNo++)
		{
			attrCounts[attrNo].first = attrNo;	//number of attribute	
			attrCounts[attrNo].second = 0;		//counts
		}
	}

	fstream frmscurve("boosting_rms.txt", ios_base::out); //bagging curve (rms)
	frmscurve.close();
	fstream froccurve;
	if(!ti.rms)
	{
		froccurve.open("boosting_roc.txt", ios_base::out); //bagging curve (roc) 
		froccurve.close();
	}

	doublev validTar;
	int validN = data.getTargets(validTar, VALID);

	doublev trainTar;
	int trainN = data.getTargets(trainTar, TRAIN);

	int sampleN;
	if(subsample == -1)
		sampleN = trainN;
	else
		sampleN = (int) (trainN * subsample);
	
	doublev validPreds(validN, 0);
	doublev trainPreds(trainN, 0);
	
	for(int treeNo = 0; treeNo < treeN; treeNo++)
	{
		if(treeNo % 10 == 0)
			cout << "\titeration " << treeNo + 1 << " out of " << treeN << endl;

		if(subsample == -1)
			data.newBag();
		else
			data.newSample(sampleN);

		CTree tree(ti.alpha);
		tree.setRoot();
		tree.resetRoot(trainPreds);
		idpairv stub;
		tree.grow(doFS, attrCounts);

		//update predictions
		for(int itemNo = 0; itemNo < trainN; itemNo++)
			trainPreds[itemNo] += shrinkage * tree.predict(itemNo, TRAIN);
		for(int itemNo = 0; itemNo < validN; itemNo++)
			validPreds[itemNo] += shrinkage * tree.predict(itemNo, VALID);

		//output
		frmscurve.open("boosting_rms.txt", ios_base::out | ios_base::app); 
		frmscurve << rmse(validPreds, validTar) << endl;
		frmscurve.close();
		
		if(!ti.rms)
		{
			froccurve.open("boosting_roc.txt", ios_base::out | ios_base::app); 
			froccurve << roc(validPreds, validTar) << endl;
			froccurve.close();
		}

	}

	//output feature selection results
	if(doFS)
	{
		sort(attrCounts.begin(), attrCounts.end(), idGreater);
		if(topAttrN > attrN)
			topAttrN = attrN;

		fstream ffeatures("feature_scores.txt", ios_base::out);
		ffeatures << "Top " << topAttrN << " features\n";
		for(int attrNo = 0; attrNo < topAttrN; attrNo++)
			ffeatures << data.getAttrName(attrCounts[attrNo].first) << "\t"
			<< attrCounts[attrNo].second / ti.bagN / trainN << "\n";
		ffeatures << "\n\nColumn numbers (beginning with 1)\n";
		for(int attrNo = 0; attrNo < topAttrN; attrNo++)
			ffeatures << data.getColNo(attrCounts[attrNo].first) + 1 << " ";
		ffeatures << "\nLabel column number: " << data.getTarColNo() + 1;
		ffeatures.close();

		//output new attribute file
		for(int attrNo = topAttrN; attrNo < attrN; attrNo++)
			data.ignoreAttr(attrCounts[attrNo].first);
		data.outAttr(ti.attrFName);
	}

	//output predictions
	fstream fpreds;
	fpreds.open("preds.txt", ios_base::out);
	for(int itemNo = 0; itemNo < validN; itemNo++)
		fpreds << validPreds[itemNo] << endl;
	fpreds.close();

//------------------

	}catch(TE_ERROR err){
Пример #10
0
/** call: ./main <matrix_dimension> <number_of_tests> <use_gpu>*/
int main(int argc, char* argv[])
{
	cuda_identify();

	if (argc != 4) {
		printf("program must be called with arguments: matrix_dimension tests_number use_gpu(0/1)\n");
		exit(1);
	}
	const int M = atoi(argv[1]);
	printf("Using matrix dimension: %d\n", M);
	const int tests = atoi(argv[2]);
	const bool cpu = !atoi(argv[3]);

	// always use the same seed to get the same matrices during tests
	srand(0);

	#ifdef DOUBLE
		const fp_t min_diff = 0.00000001;	//for double, fails with 8192 and floats on both cpu and gpu
	#else
		const fp_t min_diff = 0.000001;
	#endif
	const fp_t alpha = 0.9;
	const int max_iter = 50;

	fp_t* exec_times = malloc(tests * sizeof(fp_t));
	fp_t* all_rmse = malloc(tests * sizeof(fp_t));
	for (int k = 0; k < tests; k++) {

		const DataSet dataset = generate_dataset(M);

		Matrix* last_x = aligned_vector(M, true);
		Matrix* x = aligned_vector(M, true);
		for (int i = 0; i < M; i++) {
		}

		int iterations = 0;

		// solve Ax = b
		const fp_t start_time = omp_get_wtime();

		fp_t sum = 0;
		int j = 0;
		int i = 0;
		const Matrix* A = dataset.A;
		const Matrix* b = dataset.b;
		assert(x != last_x);

		if (cpu) {
			//#pragma omp parallel shared(last_x, x, iterations) private(i, j, sum)
			while ((matrix_diff(x, last_x) > min_diff) && (max_iter < 0 || iterations < max_iter)) {
				//fp_t st_time0 = omp_get_wtime();
				//#pragma omp single
				{
					swap(last_x, x);
				}

				// A, M, alpha and b are constant, so they cannot be declared as shared
				//#pragma omp for schedule(dynamic)
				for (i = 0; i < M; i++) {
					sum = 0;

					//#pragma omp simd aligned(A, last_x: 16) reduction(+:sum) linear(j)
					for (j = 0; j < M; j++) {
						sum += A->elements[i * M + j] * last_x->elements[j];
					}

					sum -= A->elements[i * M + i] * last_x->elements[i];	// opt: outside the loop for sse optimizer
					x->elements[i] = (1 - alpha) * last_x->elements[i] + alpha * (b->elements[i] - sum) / A->elements[i * M + i];
				}

				//#pragma omp single nowait
				{
					iterations++;
				}
				//printf("%dus spent\n", (int)((omp_get_wtime() - st_time0) * 1000000));
			}
		} else {
			Matrix* d_A = device_matrix_from(A);
			#ifndef DOUBLE
				#ifdef TEXTURE
					texbind(d_A->elements, d_A->size * sizeof(fp_t));
				#endif
			#endif
			cudaMemcpy(d_A->elements, A->elements, A->size * sizeof(fp_t), cudaMemcpyHostToDevice);

			Matrix* d_b = device_matrix_from(b);
			cudaMemcpy(d_b->elements, b->elements, b->size * sizeof(fp_t), cudaMemcpyHostToDevice);

			Matrix* d_last_x = device_matrix_from(last_x);
			Matrix* d_c = device_matrix_from(b);
			Matrix* d_x = device_matrix_from(x);
			cudaMemcpy(d_x->elements, x->elements, x->size * sizeof(fp_t), cudaMemcpyHostToDevice);
			cudaMemcpy(d_last_x->elements, last_x->elements, last_x->size * sizeof(fp_t), cudaMemcpyHostToDevice);

			fp_t x_diff = 2 * min_diff;
			fp_t* d_x_diff;
			cudaMalloc((void**)&d_x_diff, sizeof(fp_t));

			//fp_t stime;
			while ((x_diff > min_diff) && (max_iter < 0 || iterations < max_iter)) {
				//stime = omp_get_wtime();
				cuda_multiply(*d_A, *d_last_x, *d_c);
				//print_cuda_elapsed(stime);

				//stime = omp_get_wtime();
				cuda_reduce(*d_A, *d_b, *d_c, d_x, d_last_x, alpha); //performs swap
				//print_cuda_elapsed(stime);

				//stime = omp_get_wtime();
				cuda_diff(*d_x, *d_last_x, d_x_diff);
				//print_cuda_elapsed(stime);

				iterations++;
				//cudaMemcpyFromSymbol(&x_diff, "d_x_diff", sizeof(x_diff), 0, cudaMemcpyDeviceToHost);
				//stime = omp_get_wtime();
				cudaMemcpy(&x_diff, d_x_diff, sizeof(fp_t), cudaMemcpyDeviceToHost);
				//print_cuda_elapsed(stime);
			}
			// copy last_x instead, as it was swapped
			cudaMemcpy(x->elements, d_last_x->elements, x->size * sizeof(fp_t), cudaMemcpyDeviceToHost);

			#ifndef DOUBLE
				#ifdef TEXTURE
					texunbind();
				#endif
			#endif
			cudaFree(d_A->elements);
			cudaFree(d_b->elements);
			cudaFree(d_last_x->elements);
			cudaFree(d_c->elements);
			cudaFree(d_x->elements);
			cudaFree(d_x_diff);

			free(d_A);
			free(d_b);
			free(d_c);
			free(d_last_x);
			free(d_x);
		}
		const fp_t end_time = omp_get_wtime();
		const fp_t seconds_spent = end_time - start_time;
		exec_times[k] = seconds_spent;

		if (verbose) {
			printf("x: ");
			print_matrix(x);
			printf("expected_x: ");
			print_matrix(dataset.x);
			//print_matrix(dataset.A);
			//print_matrix(dataset.b);
		}
		Matrix* bx = aligned_vector(M, false);
		for (int i = 0; i < M; i++) {
			for (int j = 0; j < M; j++) {
				bx->elements[i] += A->elements[i * M + j] * x->elements[j];
			}
		}
		if (verbose) {
			printf("resulting b: ");
			print_matrix(bx);
		}
		all_rmse[k] = rmse(bx, b);
		printf("RMSE: %0.10f\n", all_rmse[k]);
		printf("iterations: %d\nseconds: %0.10f\n", iterations, seconds_spent);

		assert(x != last_x);

		free(bx->elements);
		free(x->elements);
		free(last_x->elements);
		free(dataset.x->elements);
		free(dataset.A->elements);
		free(dataset.b->elements);

		free(bx);
		free(x);
		free(last_x);
		free(dataset.x);
		free(dataset.A);
		free(dataset.b);
	}
	printf("Time: mean %0.10f std %0.10f\n", array_mean(exec_times, tests), array_std(exec_times, tests));
	printf("RMSE: mean %0.10f std %0.10f\n", array_mean(all_rmse, tests), array_std(all_rmse, tests));
	free(all_rmse);
	free(exec_times);

	return 0;
}
int main(int argc, char** argv){
double tstart, tstop, ttime;
tstart = (double)clock()/CLOCKS_PER_SEC;
    srand(time(0));
    std::string csv_value;
    const char* csv;
    char* end;
    std::ifstream file_r;
    file_r.open(argv[1]);
    std::vector<double> csv_values;
    while(file_r.good()){
	std::getline(file_r,csv_value,',');
	std::cout<<csv_value<<std::endl;	
	csv=csv_value.c_str();
	csv_values.push_back(std::strtod(csv,&end));
    }
    std::vector<Vel_pos> dat_l;
    std::vector<Vel_pos> dat_t;
    int t=csv_values.size();
    int t_l=0.8*t;
    int t_t=t-t_l;
    int sep=0; 
    std::cout<<t_l<<"  "<<t_t<<std::endl;
    for(std::vector<double>::iterator it=csv_values.begin(); it!=csv_values.end(); it++){
	if(sep<t_l)	
		dat_l.push_back(Vel_pos(*it,1));
	else
		dat_t.push_back(Vel_pos(*it,1));
	sep++;
    }
    int se=std::atoi(argv[3]);
    int me=std::atoi(argv[4]);
    int sss=std::atoi(argv[5]);
    int part_num=std::atoi(argv[6]);
    int c=std::atoi(argv[7]);
    int pso_iter=std::atoi(argv[8]);
    double c1=std::atof(argv[9]);
    double c2=std::atof(argv[10]);
    double c3=std::atof(argv[11]);
    double omega=std::atof(argv[12]);
    double m=std::atof(argv[13]);
    Fuzzy_C_Means_Clustering f1(dat_l,c,1,m,t_l);
    for(int i=0;i<15;i++){
        f1.do_next_it();
        std::cout<<"obj_func_fcm: "<<f1.get_obj_func(f1.get_u())<<std::endl;
    }
    std::vector<Vel_pos> u_l=f1.get_u();
    std::vector<Vel_pos> u_t=gran_int_h(f1.get_v(),t_t ,c,dat_t);
    std::vector<Vel_pos> u=u_l;
    u.insert(u.end(),u_t.begin(),u_t.end());
    std::vector<Vel_pos> u_pre;
    u_pre=u_pre_func(u,t,c,prepare_fcm_m(se,me,omega,c1,c2,c3,sss,part_num,u_l,t_l,c,false,pso_iter));
    double* dat_f;std::cout<<"b";
    dat_f=u_to_dat(u_pre,f1.get_v(),t,c,m);
    dat_f[0]=csv_values[0];
    double mse=rmse(dat_f, csv_values,t);
	tstop = (double)clock()/CLOCKS_PER_SEC;
    delete [] dat_f;
    file_r.close();
    delete [] csv;
    delete [] end;
    return 0;
}
Пример #12
0
template<class T> T
nrmse (const Matrix<T>& A, const Matrix<T>& B) {

	return rmse(A,B)/T(norm(A));

}
Пример #13
0
int main(int argc, char* argv[])
{	
	try{

//1. Analyze input parameters
	//convert input parameters to string from char*
	stringv args(argc); 
	for(int argNo = 0; argNo < argc; argNo++)
		args[argNo] = string(argv[argNo]);
	
	//check that the number of arguments is even (flags + value pairs)
	if(argc % 2 == 0)
		throw INPUT_ERR;

#ifndef _WIN32
	int threadN = 6;	//number of threads
#endif

	TrainInfo ti; //model training parameters
	string modelFName = "model.bin";	//name of the output file for the model
	int topAttrN = 0;  //how many top attributes to output and keep in the cut data 
							//(0 = do not do feature selection)
							//(-1 = output all available features)
	bool doOut = true; //whether to output log information to stdout

	//parse and save input parameters
	//indicators of presence of required flags in the input
	bool hasTrain = false;
	bool hasVal = false; 
	bool hasAttr = false; 

	for(int argNo = 1; argNo < argc; argNo += 2)
	{
		if(!args[argNo].compare("-t"))
		{
			ti.trainFName = args[argNo + 1];
			hasTrain = true;
		}
		else if(!args[argNo].compare("-v"))
		{
			ti.validFName = args[argNo + 1];
			hasVal = true;
		}
		else if(!args[argNo].compare("-r"))
		{
			ti.attrFName = args[argNo + 1];
			hasAttr = true;
		}
		else if(!args[argNo].compare("-a"))
			ti.alpha = atofExt(argv[argNo + 1]);
		else if(!args[argNo].compare("-b"))
			ti.bagN = atoiExt(argv[argNo + 1]);
		else if(!args[argNo].compare("-i"))
			ti.seed = atoiExt(argv[argNo + 1]);
		else if(!args[argNo].compare("-k"))
			topAttrN = atoiExt(argv[argNo + 1]);
		else if(!args[argNo].compare("-m"))
		{
			modelFName = args[argNo + 1];
			if(modelFName.empty())
				throw EMPTY_MODEL_NAME_ERR;
		}
		else if(!args[argNo].compare("-l"))
		{
			if(!args[argNo + 1].compare("log"))
				doOut = true;
			else if(!args[argNo + 1].compare("nolog"))
				doOut = false;
			else
				throw INPUT_ERR;
		}
		else if(!args[argNo].compare("-c"))
		{
			if(!args[argNo + 1].compare("roc"))
				ti.rms = false;
			else if(!args[argNo + 1].compare("rms"))
				ti.rms = true;
			else
				throw INPUT_ERR;
		}
		else if(!args[argNo].compare("-h"))
#ifndef _WIN32 
			threadN = atoiExt(argv[argNo + 1]);
#else
			throw WIN_ERR;
#endif
		else
			throw INPUT_ERR;
	}//end for(int argNo = 1; argNo < argc; argNo += 2) //parse and save input parameters

	if(!(hasTrain && hasVal && hasAttr))
		throw INPUT_ERR;

	if((ti.alpha < 0) || (ti.alpha > 1))
		throw ALPHA_ERR;
	
//1.a) Set log file
	LogStream clog;
	LogStream::init(doOut);
	clog << "\n-----\nbt_train ";
	for(int argNo = 1; argNo < argc; argNo++)
		clog << argv[argNo] << " ";
	clog << "\n\n";

//1.b) Initialize random number generator. 
	srand(ti.seed);

//2. Load data
	INDdata data(ti.trainFName.c_str(), ti.validFName.c_str(), ti.testFName.c_str(), 
				 ti.attrFName.c_str());
	CTree::setData(data);
	CTreeNode::setData(data);

//2.a) Start thread pool
#ifndef _WIN32
	TThreadPool pool(threadN);
	CTree::setPool(pool);
#endif

//3. Train models
	doublev validTar;
	int validN = data.getTargets(validTar, VALID);
	int itemN = data.getTrainN();

	//adjust minAlpha, if needed
	double newAlpha = adjustAlpha(ti.alpha, itemN);
	if(ti.alpha != newAlpha)
	{
		if(newAlpha == 0)
			clog << "Warning: due to small train set size value of alpha was changed to 0"; 
		else 
			clog << "Warning: alpha value was rounded to the closest valid value " << newAlpha;
		clog << ".\n\n";
		ti.alpha = newAlpha;	
	}
	clog << "Alpha = " << ti.alpha << "\n" 
		<< ti.bagN << " bagging iterations\n";

	doublev rmsV(ti.bagN, 0); 				//bagging curve of rms values for validation set
	doublev rocV;							 
	if(!ti.rms)
		rocV.resize(ti.bagN, 0);			//bagging curve of roc values for validation set
	doublev predsumsV(validN, 0); 			//sums of predictions for each data point

	int attrN = data.getAttrN();
	if(topAttrN == -1)
		topAttrN = attrN;
	idpairv attrCounts;	//counts of attribute importance
	bool doFS = (topAttrN != 0);	//whether feature selection is requested
	if(doFS)
	{//initialize attrCounts
		attrCounts.resize(attrN);
		for(int attrNo = 0; attrNo < attrN; attrNo++)
		{
			attrCounts[attrNo].first = attrNo;	//number of attribute	
			attrCounts[attrNo].second = 0;		//counts
		}
	}
	fstream fmodel(modelFName.c_str(), ios_base::binary | ios_base::out);
	//header for compatibility with Additive Groves model
	AG_TRAIN_MODE modeStub = SLOW;
	fmodel.write((char*) &modeStub, sizeof(enum AG_TRAIN_MODE));
	int tigNStub = 1;
	fmodel.write((char*) &tigNStub, sizeof(int));
	fmodel.write((char*) &ti.alpha, sizeof(double));
	fmodel.close();
	
	fstream fbagrms("bagging_rms.txt", ios_base::out); //bagging curve (rms)
	fbagrms.close();
	fstream fbagroc;
	if(!ti.rms)
	{
		fbagroc.open("bagging_roc.txt", ios_base::out); //bagging curve (roc) 
		fbagroc.close();
	}

	//make bags, build trees, collect predictions
	for(int bagNo = 0; bagNo < ti.bagN; bagNo++)
	{
		if(doOut)
			cout << "Iteration " << bagNo + 1 << " out of " << ti.bagN << endl;

		data.newBag();
		CTree tree(ti.alpha);
		tree.setRoot();
		tree.grow(doFS, attrCounts);
		tree.save(modelFName.c_str());

		//generate predictions for validation set
		doublev predictions(validN);
		for(int itemNo = 0; itemNo < validN; itemNo++)
		{
			predsumsV[itemNo] += tree.predict(itemNo, VALID);
			predictions[itemNo] = predsumsV[itemNo] / (bagNo + 1);
		}
		rmsV[bagNo] = rmse(predictions, validTar);
		if(!ti.rms)
			rocV[bagNo] = roc(predictions, validTar);

		//output an element of bagging curve 
		fbagrms.open("bagging_rms.txt", ios_base::out | ios_base::app); 
		fbagrms << rmsV[bagNo] << endl;
		fbagrms.close();

		//same for roc, if needed
		if(!ti.rms)
		{
			fbagroc.open("bagging_roc.txt", ios_base::out | ios_base::app); 
			fbagroc << rocV[bagNo] << endl;
			fbagroc.close();
		}
	}

	if(doFS)	//sort attributes by counts
		sort(attrCounts.begin(), attrCounts.end(), idGreater);
	
//4. Output
		
	//output results and recommendations
	if(ti.rms)
		clog << "RMSE on validation set = " << rmsV[ti.bagN - 1] << "\n";
	else
		clog << "ROC on validation set = " << rocV[ti.bagN - 1] << "\n";


	//analyze whether more bagging should be recommended based on the curve in the best point
	if(moreBag(rmsV))
	{
		int recBagN = ti.bagN + 100;
		clog << "\nRecommendation: a greater number of bagging iterations might produce a better model.\n"
			<< "Suggested action: bt_train -b " << recBagN << "\n";
	}
	else
		clog << "\nThe bagging curve shows good convergence. \n"; 
	clog << "\n";

	//standard output in case of turned off log output: final performance on validation set only
	if(!doOut)
		if(ti.rms)
			cout << rmsV[ti.bagN - 1] << endl;
		else
			cout << rocV[ti.bagN - 1] << endl;

	//output feature selection results
	if(doFS)
	{
		if(topAttrN > attrN)
			topAttrN = attrN;

		fstream ffeatures("feature_scores.txt", ios_base::out);	
		ffeatures << "Top " << topAttrN << " features\n";
		for(int attrNo = 0; attrNo < topAttrN; attrNo++)
			ffeatures << data.getAttrName(attrCounts[attrNo].first) << "\t" 
				<< attrCounts[attrNo].second / ti.bagN / itemN << "\n";
		ffeatures << "\n\nColumn numbers (beginning with 1)\n";
		for(int attrNo = 0; attrNo < topAttrN; attrNo++)
			ffeatures << data.getColNo(attrCounts[attrNo].first) + 1 << " ";
		ffeatures << "\nLabel column number: " << data.getTarColNo() + 1;
		ffeatures.close();

		//output new attribute file
		for(int attrNo = topAttrN; attrNo < attrN; attrNo++)
			data.ignoreAttr(attrCounts[attrNo].first);
		data.outAttr(ti.attrFName);
	}

	}catch(TE_ERROR err){