コード例 #1
0
ファイル: SubGradientLPM.cpp プロジェクト: AsherBond/shogun
bool CSubGradientLPM::train(CFeatures* data)
{
	lpmtim=0;
	SG_INFO("C=%f epsilon=%f\n", C1, epsilon);
	ASSERT(labels);
	if (data)
	{
		if (!data->has_property(FP_DOT))
			SG_ERROR("Specified features are not of type CDotFeatures\n");
		set_features((CDotFeatures*) data);
	}
	ASSERT(features);

	int32_t num_iterations=0;
	int32_t num_train_labels=labels->get_num_labels();
	int32_t num_feat=features->get_dim_feature_space();
	int32_t num_vec=features->get_num_vectors();

	ASSERT(num_vec==num_train_labels);

	init(num_vec, num_feat);

	int32_t num_active=0;
	int32_t num_bound=0;
	float64_t alpha=0;
	float64_t dir_deriv=0;
	float64_t obj=0;
	delta_active=num_vec;
	last_it_noimprovement=-1;

	work_epsilon=0.99;
	autoselected_epsilon=work_epsilon;

	compute_projection(num_feat, num_vec);

	CTime time;
	float64_t loop_time=0;
	while (!(CSignal::cancel_computations()))
	{
		CTime t;
		delta_active=find_active(num_feat, num_vec, num_active, num_bound);

		update_active(num_feat, num_vec);

#ifdef DEBUG_SUBGRADIENTLPM
		SG_PRINT("==================================================\niteration: %d ", num_iterations);
		obj=compute_objective(num_feat, num_vec);
		SG_PRINT("objective:%.10f alpha: %.10f dir_deriv: %f num_bound: %d num_active: %d work_eps: %10.10f eps: %10.10f auto_eps: %10.10f time:%f\n",
				obj, alpha, dir_deriv, num_bound, num_active, work_epsilon, epsilon, autoselected_epsilon, loop_time);
#else
	  SG_ABS_PROGRESS(work_epsilon, -CMath::log10(work_epsilon), -CMath::log10(0.99999999), -CMath::log10(epsilon), 6);
#endif
		//CMath::display_vector(w, w_dim, "w");
		//SG_PRINT("bias: %f\n", bias);
		//CMath::display_vector(proj, num_vec, "proj");
		//CMath::display_vector(idx_active, num_active, "idx_active");
		//SG_PRINT("num_active: %d\n", num_active);
		//CMath::display_vector(idx_bound, num_bound, "idx_bound");
		//SG_PRINT("num_bound: %d\n", num_bound);
		//CMath::display_vector(sum_CXy_active, num_feat, "sum_CXy_active");
		//SG_PRINT("sum_Cy_active: %f\n", sum_Cy_active);
		//CMath::display_vector(grad_w, num_feat, "grad_w");
		//SG_PRINT("grad_b:%f\n", grad_b);
		
		dir_deriv=compute_min_subgradient(num_feat, num_vec, num_active, num_bound);

		alpha=line_search(num_feat, num_vec);

		if (num_it_noimprovement==10 || num_bound<qpsize_max)
		{
			float64_t norm_grad=CMath::dot(grad_w, grad_w, num_feat) +
				grad_b*grad_b;

			SG_PRINT("CHECKING OPTIMALITY CONDITIONS: "
					"work_epsilon: %10.10f delta_active:%d alpha: %10.10f norm_grad: %10.10f a*norm_grad:%10.16f\n",
					work_epsilon, delta_active, alpha, norm_grad, CMath::abs(alpha*norm_grad));

			if (work_epsilon<=epsilon && delta_active==0 && CMath::abs(alpha*norm_grad)<1e-6)
				break;
			else
				num_it_noimprovement=0;
		}

		//if (work_epsilon<=epsilon && delta_active==0 && num_it_noimprovement)
		if ((dir_deriv<0 || alpha==0) && (work_epsilon<=epsilon && delta_active==0))
		{
			if (last_it_noimprovement==num_iterations-1)
			{
				SG_PRINT("no improvement...\n");
				num_it_noimprovement++;
			}
			else
				num_it_noimprovement=0;

			last_it_noimprovement=num_iterations;
		}

		CMath::vec1_plus_scalar_times_vec2(w, -alpha, grad_w, num_feat);
		bias-=alpha*grad_b;

		update_projection(alpha, num_vec);

		t.stop();
		loop_time=t.time_diff_sec();
		num_iterations++;

		if (get_max_train_time()>0 && time.cur_time_diff()>get_max_train_time())
			break;
	}

	SG_INFO("converged after %d iterations\n", num_iterations);

	obj=compute_objective(num_feat, num_vec);
	SG_INFO("objective: %f alpha: %f dir_deriv: %f num_bound: %d num_active: %d\n",
			obj, alpha, dir_deriv, num_bound, num_active);

#ifdef DEBUG_SUBGRADIENTLPM
	CMath::display_vector(w, w_dim, "w");
	SG_PRINT("bias: %f\n", bias);
#endif
	SG_PRINT("solver time:%f s\n", lpmtim);

	cleanup();

	return true;
}
コード例 #2
0
ファイル: SubGradientSVM.cpp プロジェクト: JMR-b/shikken
float64_t CSubGradientSVM::compute_min_subgradient(
	int32_t num_feat, int32_t num_vec, int32_t num_active, int32_t num_bound)
{
	float64_t dir_deriv=0;

	if (num_bound > 0)
	{

		CTime t2;
		CMath::add(v, 1.0, w, -1.0, sum_CXy_active, num_feat);

		if (num_bound>=qpsize_max && num_it_noimprovement!=10) // if qp gets to large, lets just choose a random beta
		{
			//SG_PRINT("qpsize too large  (%d>=%d) choosing random subgradient/beta\n", num_bound, qpsize_max);
			for (int32_t i=0; i<num_bound; i++)
				beta[i]=CMath::random(0.0,1.0);
		}
		else
		{
			memset(beta, 0, sizeof(float64_t)*num_bound);

			float64_t bias_const=0;

			if (use_bias)
				bias_const=1;

			for (int32_t i=0; i<num_bound; i++)
			{
				for (int32_t j=i; j<num_bound; j++)
				{
					Z[i*num_bound+j]= 2.0*C1*C1*get_label(idx_bound[i])*get_label(idx_bound[j])* 
						(features->dot(idx_bound[i], features, idx_bound[j]) + bias_const);

					Z[j*num_bound+i]=Z[i*num_bound+j];

				}

				Zv[i]=-2.0*C1*get_label(idx_bound[i])* 
					(features->dense_dot(idx_bound[i], v, num_feat)-sum_Cy_active);
			}

			//CMath::display_matrix(Z, num_bound, num_bound, "Z");
			//CMath::display_vector(Zv, num_bound, "Zv");
			t2.stop();
#ifdef DEBUG_SUBGRADIENTSVM
			t2.time_diff_sec(true);
#endif

			CTime t;
			CQPBSVMLib solver(Z,num_bound, Zv,num_bound, 1.0);
			//solver.set_solver(QPB_SOLVER_GRADDESC);
			//solver.set_solver(QPB_SOLVER_GS);
#ifdef USE_CPLEX
			solver.set_solver(QPB_SOLVER_CPLEX);
#else
			solver.set_solver(QPB_SOLVER_SCAS);
#endif

			solver.solve_qp(beta, num_bound);

			t.stop();
#ifdef DEBUG_SUBGRADIENTSVM
			tim+=t.time_diff_sec(true);
#else
			tim+=t.time_diff_sec(false);
#endif

			//CMath::display_vector(beta, num_bound, "beta gs");
			//solver.set_solver(QPB_SOLVER_CPLEX);
			//solver.solve_qp(beta, num_bound);
			//CMath::display_vector(beta, num_bound, "beta cplex");

			//CMath::display_vector(grad_w, num_feat, "grad_w");
			//SG_PRINT("grad_b:%f\n", grad_b);
		}

		CMath::add(grad_w, 1.0, w, -1.0, sum_CXy_active, num_feat);
		grad_b = -sum_Cy_active;

		for (int32_t i=0; i<num_bound; i++)
		{
			features->add_to_dense_vec(-C1*beta[i]*get_label(idx_bound[i]), idx_bound[i], grad_w, num_feat);
			if (use_bias)
				grad_b -=  C1 * get_label(idx_bound[i])*beta[i];
		}

		dir_deriv = CMath::dot(grad_w, v, num_feat) - grad_b*sum_Cy_active;
		for (int32_t i=0; i<num_bound; i++)
		{
			float64_t val= C1*get_label(idx_bound[i])*(features->dense_dot(idx_bound[i], grad_w, num_feat)+grad_b);
			dir_deriv += CMath::max(0.0, val);
		}
	}
	else
	{
		CMath::add(grad_w, 1.0, w, -1.0, sum_CXy_active, num_feat);
		grad_b = -sum_Cy_active;

		dir_deriv = CMath::dot(grad_w, grad_w, num_feat)+ grad_b*grad_b;
	}

	return dir_deriv;
}