コード例 #1
0
ファイル: 1.cpp プロジェクト: fuxiang90/code-fuxiang90
void gen_rand_data(SGVector<float64_t> lab, SGMatrix<float64_t> feat,
		float64_t dist)
{
	index_t dims=feat.num_rows;
	index_t num=lab.vlen;

	for (int32_t i=0; i<num; i++)
	{
		if (i<num/2)
		{
			lab[i]=-1.0;

			for (int32_t j=0; j<dims; j++)
				feat(j, i)=CMath::random(0.0, 1.0)+dist;
		}
		else
		{
			lab[i]=1.0;

			for (int32_t j=0; j<dims; j++)
				feat(j, i)=CMath::random(0.0, 1.0)-dist;
		}
	}
	lab.display_vector("lab");
	feat.display_matrix("feat");
}
コード例 #2
0
void CMultitaskROCEvaluation::set_indices(SGVector<index_t> indices)
{
	indices.display_vector("indices");
	ASSERT(m_task_relation)

	set<index_t> indices_set;
	for (int32_t i=0; i<indices.vlen; i++)
		indices_set.insert(indices[i]);

	if (m_num_tasks>0)
	{
		SG_FREE(m_tasks_indices);
	}
	m_num_tasks = m_task_relation->get_num_tasks();
	m_tasks_indices = SG_MALLOC(SGVector<index_t>, m_num_tasks);

	SGVector<index_t>* tasks_indices = m_task_relation->get_tasks_indices();
	for (int32_t t=0; t<m_num_tasks; t++)
	{
		vector<index_t> task_indices_cut;
		SGVector<index_t> task_indices = tasks_indices[t];
		//task_indices.display_vector("task indices");
		for (int32_t i=0; i<task_indices.vlen; i++)
		{
			if (indices_set.count(task_indices[i]))
			{
				//SG_SPRINT("%d is in %d task\n",task_indices[i],t)
				task_indices_cut.push_back(task_indices[i]);
			}
		}

		SGVector<index_t> cutted(task_indices_cut.size());
		for (int32_t i=0; i<cutted.vlen; i++)
			cutted[i] = task_indices_cut[i];
		//cutted.display_vector("cutted");
		m_tasks_indices[t] = cutted;
	}
	SG_FREE(tasks_indices);
}
コード例 #3
0
int main(int argc, char **argv)
{
	init_shogun_with_defaults();


	/* create some data and labels */
	SGMatrix<float64_t> matrix =
			SGMatrix<float64_t>(dim_vectors, num_vectors);

	SGMatrix<float64_t> matrix2 =
			SGMatrix<float64_t>(dim_vectors, num_vectors);
			
	CRegressionLabels* labels=new CRegressionLabels(num_vectors);

	build_matrices(matrix2, matrix, labels);
	
	/* create training features */
	CDenseFeatures<float64_t>* features=new CDenseFeatures<float64_t> ();
	features->set_feature_matrix(matrix);

	/* create testing features */
	CDenseFeatures<float64_t>* features2=new CDenseFeatures<float64_t> ();
	features2->set_feature_matrix(matrix2);

	SG_REF(features);
	SG_REF(features2);

	SG_REF(labels);
	
	/*Allocate our Kernel*/
	CGaussianKernel* test_kernel = new CGaussianKernel(10, 2);

	test_kernel->init(features, features);

	/*Allocate our mean function*/
	CZeroMean* mean = new CZeroMean();
	
	/*Allocate our likelihood function*/
	CGaussianLikelihood* lik = new CGaussianLikelihood();

	/*Allocate our inference method*/
	CExactInferenceMethod* inf =
			new CExactInferenceMethod(test_kernel, 
						  features, mean, labels, lik);

	SG_REF(inf);

	/*Finally use these to allocate the Gaussian Process Object*/
	CGaussianProcessRegression* gp =
			new CGaussianProcessRegression(inf, features, labels);

	SG_REF(gp);
	
	/*Build the parameter tree for model selection*/
	CModelSelectionParameters* root = build_tree(inf, lik, test_kernel);

	/*Criterion for gradient search*/
	CGradientCriterion* crit = new CGradientCriterion();

	/*This will evaluate our inference method for its derivatives*/
	CGradientEvaluation* grad=new CGradientEvaluation(gp, features, labels,
			crit);

	grad->set_function(inf);

	gp->print_modsel_params();

	root->print_tree();

	/* handles all of the above structures in memory */
	CGradientModelSelection* grad_search=new CGradientModelSelection(
			root, grad);

	/* set autolocking to false to get rid of warnings */
	grad->set_autolock(false);

	/*Search for best parameters*/
	CParameterCombination* best_combination=grad_search->select_model(true);

	/*Output all the results and information*/
	if (best_combination)
	{
		SG_SPRINT("best parameter(s):\n");
		best_combination->print_tree();

		best_combination->apply_to_machine(gp);
	}

	CGradientResult* result=(CGradientResult*)grad->evaluate();

	if(result->get_result_type() != GRADIENTEVALUATION_RESULT)
		SG_SERROR("Evaluation result not a GradientEvaluationResult!");

	result->print_result();

	SGVector<float64_t> alpha = inf->get_alpha();
	SGVector<float64_t> labe = labels->get_labels();
	SGVector<float64_t> diagonal = inf->get_diagonal_vector();
	SGMatrix<float64_t> cholesky = inf->get_cholesky();
	gp->set_return_type(CGaussianProcessRegression::GP_RETURN_COV);

	CRegressionLabels* covariance = gp->apply_regression(features);

	gp->set_return_type(CGaussianProcessRegression::GP_RETURN_MEANS);
	
	CRegressionLabels* predictions = gp->apply_regression();

	alpha.display_vector("Alpha Vector");
	labe.display_vector("Labels");
	diagonal.display_vector("sW Matrix");
	covariance->get_labels().display_vector("Predicted Variances");
	predictions->get_labels().display_vector("Mean Predictions");
	cholesky.display_matrix("Cholesky Matrix L");
	matrix.display_matrix("Training Features");
	matrix2.display_matrix("Testing Features");

	/*free memory*/
	SG_UNREF(features);
	SG_UNREF(features2);
	SG_UNREF(predictions);
	SG_UNREF(covariance);
	SG_UNREF(labels);
	SG_UNREF(inf);
	SG_UNREF(gp);
	SG_UNREF(grad_search);
	SG_UNREF(best_combination);
	SG_UNREF(result);
	SG_UNREF(mean);

	exit_shogun();

	return 0;
}
コード例 #4
0
int main(int argc, char **argv)
{
	init_shogun(&print_message, &print_message, &print_message);

	int32_t num_vectors=4;
	int32_t dim_vectors=3;

	/* create some data and labels */
	SGMatrix<float64_t> matrix =
			SGMatrix<float64_t>(dim_vectors, num_vectors);

	matrix[0] = -1;
	matrix[1] = -1;
	matrix[2] = -1;
	matrix[3] = 1;
	matrix[4] = 1;
	matrix[5] = 1;
	matrix[6] = -10;
	matrix[7] = -10;
	matrix[8] = -10;
	matrix[9] = 3;
	matrix[10] = 2;
	matrix[11] = 1;

	SGMatrix<float64_t> matrix2 =
			SGMatrix<float64_t>(dim_vectors, num_vectors);

	for (int32_t i=0; i<num_vectors*dim_vectors; i++)
		matrix2[i]=i*sin(i)*.96;

	/* create training features */
	CDenseFeatures<float64_t>* features=new CDenseFeatures<float64_t> ();
	features->set_feature_matrix(matrix);

	/* create testing features */
	CDenseFeatures<float64_t>* features2=new CDenseFeatures<float64_t> ();
	features2->set_feature_matrix(matrix2);

	SG_REF(features);
	SG_REF(features2);

	CRegressionLabels* labels=new CRegressionLabels(num_vectors);

	/* create labels, two classes */
	for (index_t i=0; i<num_vectors; ++i)
	{
		if(i%2 == 0) labels->set_label(i, 1);
		else labels->set_label(i, -1);
	}

	SG_REF(labels);
	CGaussianKernel* test_kernel = new CGaussianKernel(10, 2);

	test_kernel->init(features, features);

	CZeroMean* mean = new CZeroMean();
	CGaussianLikelihood* lik = new CGaussianLikelihood();
	lik->set_sigma(0.01);

	CExactInferenceMethod* inf =
			new CExactInferenceMethod(test_kernel, features, mean, labels, lik);


	SG_REF(inf);

	CGaussianProcessRegression* gp =
			new CGaussianProcessRegression(inf, features, labels);

	CModelSelectionParameters* root=new CModelSelectionParameters();

	CModelSelectionParameters* c1 =
			new CModelSelectionParameters("inference_method", inf);
	root->append_child(c1);

	CModelSelectionParameters* c2 = new CModelSelectionParameters("scale");
	c1 ->append_child(c2);
	c2->build_values(0.01, 4.0, R_LINEAR);


	CModelSelectionParameters* c3 =
			new CModelSelectionParameters("likelihood_model", lik);
	c1->append_child(c3);

	CModelSelectionParameters* c4=new CModelSelectionParameters("sigma");
	c3->append_child(c4);
	c4->build_values(0.001, 4.0, R_LINEAR);

	CModelSelectionParameters* c5 =
			new CModelSelectionParameters("kernel", test_kernel);
	c1->append_child(c5);

	CModelSelectionParameters* c6 =
			new CModelSelectionParameters("width");
	c5->append_child(c6);
	c6->build_values(0.001, 4.0, R_LINEAR);

	/* cross validation class for evaluation in model selection */
	SG_REF(gp);

	CGradientCriterion* crit = new CGradientCriterion();

	CGradientEvaluation* grad=new CGradientEvaluation(gp, features, labels,
			crit);

	grad->set_function(inf);

	gp->print_modsel_params();

	root->print_tree();

	/* handles all of the above structures in memory */
	CGradientModelSelection* grad_search=new CGradientModelSelection(
			root, grad);

	/* set autolocking to false to get rid of warnings */
	grad->set_autolock(false);

	CParameterCombination* best_combination=grad_search->select_model(true);
	grad_search->set_max_evaluations(5);

	if (best_combination)
	{
		SG_SPRINT("best parameter(s):\n");
		best_combination->print_tree();

		best_combination->apply_to_machine(gp);
	}

	CGradientResult* result=(CGradientResult*)grad->evaluate();

	if(result->get_result_type() != GRADIENTEVALUATION_RESULT)
		SG_SERROR("Evaluation result not a GradientEvaluationResult!");

	result->print_result();

	SGVector<float64_t> alpha = inf->get_alpha();
	SGVector<float64_t> labe = labels->get_labels();
	SGVector<float64_t> diagonal = inf->get_diagonal_vector();
	SGMatrix<float64_t> cholesky = inf->get_cholesky();
	gp->set_return_type(CGaussianProcessRegression::GP_RETURN_COV);

	CRegressionLabels* covariance = gp->apply_regression(features);

	gp->set_return_type(CGaussianProcessRegression::GP_RETURN_MEANS);
	CRegressionLabels* predictions = gp->apply_regression();

	alpha.display_vector("Alpha Vector");
	labe.display_vector("Labels");
	diagonal.display_vector("sW Matrix");
	covariance->get_labels().display_vector("Predicted Variances");
	predictions->get_labels().display_vector("Mean Predictions");
	cholesky.display_matrix("Cholesky Matrix L");
	matrix.display_matrix("Training Features");
	matrix2.display_matrix("Testing Features");

	/*free memory*/
	SG_UNREF(features);
	SG_UNREF(features2);
	SG_UNREF(predictions);
	SG_UNREF(covariance);
	SG_UNREF(labels);
	SG_UNREF(inf);
	SG_UNREF(gp);
	SG_UNREF(grad_search);
	SG_UNREF(best_combination);
	SG_UNREF(result);

	exit_shogun();

	return 0;
}
コード例 #5
0
ファイル: Mosek.cpp プロジェクト: coodoing/shogun
MSKrescodee CMosek::optimize(SGVector< float64_t > sol)
{
	m_rescode = MSK_optimize(m_task);

#ifdef DEBUG_MOSEK
	// Print a summary containing information about the solution
	MSK_solutionsummary(m_task, MSK_STREAM_LOG);
#endif

	// Read the solution
	if ( m_rescode == MSK_RES_OK )
	{
		// Solution status
		MSKsolstae solsta;
		// FIXME posible solutions are:
		// MSK_SOL_ITR: the interior solution
		// MSK_SOL_BAS: the basic solution
		// MSK_SOL_ITG: the integer solution
		MSK_getsolutionstatus(m_task, MSK_SOL_ITR, NULL, &solsta);

		switch (solsta)
		{
		case MSK_SOL_STA_OPTIMAL:
		case MSK_SOL_STA_NEAR_OPTIMAL:
			MSK_getsolutionslice(m_task,
					// Request the interior solution
					MSK_SOL_ITR,
					// of the optimization vector
					MSK_SOL_ITEM_XX,
					0,
					sol.vlen,
					sol.vector);
#ifdef DEBUG_SOLUTION
			sol.display_vector("Solution");
#endif
			break;
		case MSK_SOL_STA_DUAL_INFEAS_CER:
		case MSK_SOL_STA_PRIM_INFEAS_CER:
		case MSK_SOL_STA_NEAR_DUAL_INFEAS_CER:
		case MSK_SOL_STA_NEAR_PRIM_INFEAS_CER:
#ifdef DEBUG_MOSEK
			SG_PRINT("Primal or dual infeasibility "
				 "certificate found\n");
#endif
			break;
		case MSK_SOL_STA_UNKNOWN:
#ifdef DEBUG_MOSEK
			SG_PRINT("Undetermined solution status\n");
#endif
			break;
		default:
#ifdef DEBUG_MOSEK
			SG_PRINT("Other solution status\n");
#endif
			break; 	// to avoid compile error when DEBUG_MOSEK
				// is not defined
		}
	}

	// In case any error occurred, print the appropriate error message
	if ( m_rescode != MSK_RES_OK )
	{
		char symname[MSK_MAX_STR_LEN];
		char desc[MSK_MAX_STR_LEN];

		MSK_getcodedesc(m_rescode, symname, desc);

		SG_PRINT("An error occurred optimizing with MOSEK\n");
		SG_PRINT("ERROR %s - '%s'\n", symname, desc);
	}

	return m_rescode;
}
コード例 #6
0
void CCrossValidationPrintOutput::update_test_indices(
		SGVector<index_t> indices, const char* prefix)
{
	indices.display_vector("test_indices", prefix);
}
コード例 #7
0
ファイル: LinearTimeMMD.cpp プロジェクト: hushell/shogun
void CLinearTimeMMD::compute_statistic_and_variance(
		SGVector<float64_t>& statistic, SGVector<float64_t>& variance,
		bool multiple_kernels)
{
	SG_DEBUG("entering %s::compute_statistic_and_variance()\n", get_name())

	REQUIRE(m_streaming_p, "%s::compute_statistic_and_variance: streaming "
			"features p required!\n", get_name());
	REQUIRE(m_streaming_q, "%s::compute_statistic_and_variance: streaming "
			"features q required!\n", get_name());

	REQUIRE(m_kernel, "%s::compute_statistic_and_variance: kernel needed!\n",
			get_name());

	/* make sure multiple_kernels flag is used only with a combined kernel */
	REQUIRE(!multiple_kernels || m_kernel->get_kernel_type()==K_COMBINED,
			"%s::compute_statistic_and_variance: multiple kernels specified,"
			"but underlying kernel is not of type K_COMBINED\n", get_name());

	/* m is number of samples from each distribution, m_2 is half of it
	 * using names from JLMR paper (see class documentation) */
	index_t m_2=m_m/2;

	SG_DEBUG("m_m=%d\n", m_m)

	/* find out whether single or multiple kernels (cast is safe, check above) */
	index_t num_kernels=1;
	if (multiple_kernels)
	{
		num_kernels=((CCombinedKernel*)m_kernel)->get_num_subkernels();
		SG_DEBUG("computing MMD and variance for %d sub-kernels\n",
				num_kernels);
	}

	/* allocate memory for results if vectors are empty */
	if (!statistic.vector)
		statistic=SGVector<float64_t>(num_kernels);

	if (!variance.vector)
		variance=SGVector<float64_t>(num_kernels);

	/* ensure right dimensions */
	REQUIRE(statistic.vlen==num_kernels, "%s::compute_statistic_and_variance: "
			"statistic vector size (%d) does not match number of kernels (%d)\n",
			 get_name(), statistic.vlen, num_kernels);

	REQUIRE(variance.vlen==num_kernels, "%s::compute_statistic_and_variance: "
			"variance vector size (%d) does not match number of kernels (%d)\n",
			 get_name(), variance.vlen, num_kernels);

	/* temp variable in the algorithm */
	float64_t current;
	float64_t delta;

	/* initialise statistic and variance since they are cumulative */
	statistic.zero();
	variance.zero();

	/* needed for online mean and variance */
	SGVector<index_t> term_counters(num_kernels);
	term_counters.set_const(1);

	/* term counter to compute online mean and variance */
	index_t num_examples_processed=0;
	while (num_examples_processed<m_2)
	{
		/* number of example to look at in this iteration */
		index_t num_this_run=CMath::min(m_blocksize,
				CMath::max(0, m_2-num_examples_processed));
		SG_DEBUG("processing %d more examples. %d so far processed. Blocksize "
				"is %d\n", num_this_run, num_examples_processed, m_blocksize);

		/* stream data from both distributions */
		CFeatures* p1=m_streaming_p->get_streamed_features(num_this_run);
		CFeatures* p2=m_streaming_p->get_streamed_features(num_this_run);
		CFeatures* q1=m_streaming_q->get_streamed_features(num_this_run);
		CFeatures* q2=m_streaming_q->get_streamed_features(num_this_run);

		/* check whether h0 should be simulated and permute if so */
		if (m_simulate_h0)
		{
			/* create merged copy of all feature instances to permute */
			CList* list=new CList();
			list->append_element(p2);
			list->append_element(q1);
			list->append_element(q2);
			CFeatures* merged=p1->create_merged_copy(list);
			SG_UNREF(list);

			/* permute */
			SGVector<index_t> inds(merged->get_num_vectors());
			inds.range_fill();
			inds.permute();
			merged->add_subset(inds);

			/* copy back, replacing old features */
			SG_UNREF(p1);
			SG_UNREF(p2);
			SG_UNREF(q1);
			SG_UNREF(q2);

			SGVector<index_t> copy(num_this_run);
			copy.range_fill();
			p1=merged->copy_subset(copy);
			copy.add(num_this_run);
			p2=merged->copy_subset(copy);
			copy.add(num_this_run);
			q1=merged->copy_subset(copy);
			copy.add(num_this_run);
			q2=merged->copy_subset(copy);

			/* clean up and note that copy_subset does a SG_REF */
			SG_UNREF(merged);
		}
		else
		{
			/* reference produced features (only if copy_subset was not used) */
			SG_REF(p1);
			SG_REF(p2);
			SG_REF(q1);
			SG_REF(q2);
		}

		/* if multiple kernels are used, compute all of them on streamed data,
		 * if multiple kernels flag is false, the above loop will be executed
		 * only once */
		CKernel* kernel=m_kernel;
		if (multiple_kernels)
		{
			SG_DEBUG("using multiple kernels\n");
		}

		/* iterate through all kernels for this data */
		for (index_t i=0; i<num_kernels; ++i)
		{
			/* if multiple kernels should be computed, set next kernel */
			if (multiple_kernels)
			{
				kernel=((CCombinedKernel*)m_kernel)->get_kernel(i);
			}

			/* compute kernel matrix diagonals */
			kernel->init(p1, p2);
			SGVector<float64_t> pp=kernel->get_kernel_diagonal();

			kernel->init(q1, q2);
			SGVector<float64_t> qq=kernel->get_kernel_diagonal();

			kernel->init(p1, q2);
			SGVector<float64_t> pq=kernel->get_kernel_diagonal();

			kernel->init(q1, p2);
			SGVector<float64_t> qp=kernel->get_kernel_diagonal();

			/* single variances for all kernels. Update mean and variance
			 * using Knuth's online variance algorithm.
			 * C.f. for example Wikipedia */
			for (index_t j=0; j<num_this_run; ++j)
			{
				/* compute sum of current h terms for current kernel */
				current=pp[j]+qq[j]-pq[j]-qp[j];

				/* D. Knuth's online variance algorithm for current kernel */
				delta=current-statistic[i];
				statistic[i]+=delta/term_counters[i]++;
				variance[i]+=delta*(current-statistic[i]);

				SG_DEBUG("burst: current=%f, delta=%f, statistic=%f, "
						"variance=%f, kernel_idx=%d\n", current, delta,
						statistic[i], variance[i], i);
			}
			
			if (multiple_kernels)
			{
				SG_UNREF(kernel);
			}
		}

		/* clean up streamed data */
		SG_UNREF(p1);
		SG_UNREF(p2);
		SG_UNREF(q1);
		SG_UNREF(q2);

		/* add number of processed examples for this run */
		num_examples_processed+=num_this_run;
	}
	SG_DEBUG("Done compouting statistic, processed 2*%d examples.\n",
			num_examples_processed);

	/* mean of sum all traces is linear time mmd, copy entries for all kernels */
	if (io->get_loglevel()==MSG_DEBUG || io->get_loglevel()==MSG_GCDEBUG)
		statistic.display_vector("statistics");

	/* variance of terms can be computed using mean (statistic).
	 * Note that the variance needs to be divided by m_2 in order to get
	 * variance of null-distribution */
	for (index_t i=0; i<num_kernels; ++i)
		variance[i]=variance[i]/(m_2-1)/m_2;

	if (io->get_loglevel()==MSG_DEBUG || io->get_loglevel()==MSG_GCDEBUG)
		variance.display_vector("variances");

	SG_DEBUG("leaving %s::compute_statistic_and_variance()\n", get_name())
}
コード例 #8
0
ファイル: SubsetStack.cpp プロジェクト: AlexBinder/shogun
void CSubsetStack::add_subset(SGVector<index_t> subset)
{
	/* if there are already subsets on stack, do some legality checks */
	if (m_active_subsets_stack->get_num_elements())
	{
		/* check that subsets may only be smaller or equal than existing */
		CSubset* latest=(CSubset*)m_active_subsets_stack->get_last_element();
		if (subset.vlen>latest->m_subset_idx.vlen)
		{
			subset.display_vector("subset");
			latest->m_subset_idx.display_vector("last on stack");
			SG_ERROR("%s::add_subset(): Provided index vector is "
					"larger than the subsets on the stubset stack!\n", get_name());
		}

		/* check for range of indices */
		index_t max_index=SGVector<index_t>::max(subset.vector, subset.vlen);
		if (max_index>=latest->m_subset_idx.vlen)
		{
			subset.display_vector("subset");
			latest->m_subset_idx.display_vector("last on stack");
			SG_ERROR("%s::add_subset(): Provided index vector contains"
					" indices larger than possible range!\n", get_name());
		}

		/* clean up */
		SG_UNREF(latest);
	}

	/* active subset will be changed anyway, no setting to NULL */
	SG_UNREF(m_active_subset);

	/* two cases: stack is empty/stack is not empty */
	if (m_active_subsets_stack->get_num_elements())
	{
		/* if there are alreay subsets, we need to map given one through
		 * existing ones */

		/* get latest current subset */
		CSubset* latest=(CSubset*)m_active_subsets_stack->get_last_element();

		/* create new index vector */
		SGVector<index_t> new_active_subset=SGVector<index_t>(subset.vlen);

		/* using the latest current subset, transform all indices by the latest
		 * added subset (dynamic programming greets you) */
		for (index_t i=0; i<subset.vlen; ++i)
		{
			new_active_subset.vector[i]=
					latest->m_subset_idx.vector[subset.vector[i]];
		}

		/* replace active subset */
		m_active_subset=new CSubset(new_active_subset);
		SG_REF(m_active_subset);
		SG_UNREF(latest);
	}
	else
	{
		/* just use plain given subset since there is nothing to map */
		m_active_subset=new CSubset(subset);
		SG_REF(m_active_subset);
	}

	/* add current active subset on stack of active subsets in any case */
	m_active_subsets_stack->append_element(m_active_subset);
}