Пример #1
0
slep_result_t slep_mc_plain_lr(
		CDotFeatures* features,
		CMulticlassLabels* labels,
		float64_t z,
		const slep_options& options)
{
	int i,j;
	// obtain problem parameters
	int n_feats   = features->get_dim_feature_space();
	int n_vecs    = features->get_num_vectors();
	int n_classes = labels->get_num_classes();

	// labels vector containing values in range (0 .. n_classes)
	SGVector<float64_t> labels_vector = labels->get_labels();

	// initialize matrices and vectors to be used
	// weight vector
	MatrixXd w  = MatrixXd::Zero(n_feats, n_classes);
	// intercepts (biases)
	VectorXd c  = VectorXd::Zero(n_classes);

	if (options.last_result)
	{
		SGMatrix<float64_t> last_w = options.last_result->w;
		SGVector<float64_t> last_c = options.last_result->c;
		for (i=0; i<n_classes; i++)
		{
			c[i] = last_c[i];
			for (j=0; j<n_feats; j++)
				w(j,i) = last_w(j,i);
		}
	}
	// iterative process matrices and vectors
	MatrixXd wp = w, wwp = MatrixXd::Zero(n_feats, n_classes);
	VectorXd cp = c, ccp = VectorXd::Zero(n_classes);
	// search point weight vector
	MatrixXd search_w = MatrixXd::Zero(n_feats, n_classes);
	// search point intercepts
	VectorXd search_c = VectorXd::Zero(n_classes);
	// dot products
	MatrixXd Aw  = MatrixXd::Zero(n_vecs, n_classes);
	for (j=0; j<n_classes; j++)
		features->dense_dot_range(Aw.col(j).data(), 0, n_vecs, NULL, w.col(j).data(), n_feats, 0.0);
	MatrixXd As  = MatrixXd::Zero(n_vecs, n_classes);
	MatrixXd Awp = MatrixXd::Zero(n_vecs, n_classes);
	// gradients
	MatrixXd g   = MatrixXd::Zero(n_feats, n_classes);
	VectorXd gc  = VectorXd::Zero(n_classes);
	// projection
	MatrixXd v   = MatrixXd::Zero(n_feats, n_classes);

	// Lipschitz continuous gradient parameter for line search
	double L = 1.0/(n_vecs*n_classes);
	// coefficients for search point computation
	double alphap = 0, alpha = 1;

	// lambda regularization parameter
	double lambda = z;
	// objective values
	double objective = 0.0;
	double objective_p = 0.0;

	int iter = 0;
	bool done = false;
	CTime time;
	//internal::set_is_malloc_allowed(false);
	while ((!done) && (iter<options.max_iter) && (!CSignal::cancel_computations()))
	{
		double beta = (alphap-1)/alpha;
		// compute search points
		search_w = w + beta*wwp;
		search_c = c + beta*ccp;

		// update dot products with search point
		As = Aw + beta*(Aw-Awp);

		// compute objective and gradient at search point
		double fun_s = 0;
		g.setZero();
		gc.setZero();
		// for each vector
		for (i=0; i<n_vecs; i++)
		{
			// class of current vector
			int vec_class = labels_vector[i];
			// for each class
			for (j=0; j<n_classes; j++)
			{
				// compute logistic loss
				double aa = ((vec_class == j) ? -1.0 : 1.0)*(As(i,j) + search_c(j));
				double bb = aa > 0.0 ? aa : 0.0;
				// avoid underflow via log-sum-exp trick
				fun_s += CMath::log(CMath::exp(-bb) + CMath::exp(aa-bb)) + bb;
				double prob = 1.0/(1+CMath::exp(aa));
				double b = ((vec_class == j) ? -1.0 : 1.0)*(1-prob);///(n_vecs*n_classes);
				// update gradient of intercepts
				gc[j] += b;
				// update gradient of weight vectors
				features->add_to_dense_vec(b, i, g.col(j).data(), n_feats);
			}
		}
		//fun_s /= (n_vecs*n_classes);

		wp = w;
		Awp = Aw;
		cp = c;

		int inner_iter = 0;
		double fun_x = 0;

		// line search process
		while (inner_iter<5000)
		{
			// compute line search point
			v = search_w - g/L;
			c = search_c - gc/L;

			// compute projection of gradient
			eppMatrix(w.data(),v.data(),n_feats,n_classes,lambda/L,options.q);

			v = w - search_w;

			// update dot products
			for (j=0; j<n_classes; j++)
				features->dense_dot_range(Aw.col(j).data(), 0, n_vecs, NULL, w.col(j).data(), n_feats, 0.0);

			// compute objective at search point
			fun_x = 0;
			for (i=0; i<n_vecs; i++)
			{
				int vec_class = labels_vector[i];
				for (j=0; j<n_classes; j++)
				{
					double aa = ((vec_class == j) ? -1.0 : 1.0)*(Aw(i,j) + c(j));
					double bb = aa > 0.0 ? aa : 0.0;
					fun_x += CMath::log(CMath::exp(-bb) + CMath::exp(aa-bb)) + bb;
				}
			}
			//fun_x /= (n_vecs*n_classes);

			// check for termination of line search
			double r_sum = (v.squaredNorm() + (c-search_c).squaredNorm())/2;
			double l_sum = fun_x - fun_s - v.cwiseProduct(g).sum() - (c-search_c).dot(gc);

			// stop if projected gradient is less than 1e-20
			if (r_sum <= 1e-20)
			{
				SG_SINFO("Gradient step makes little improvement (%f)\n",r_sum)
				done = true;
				break;
			}

			if (l_sum <= r_sum*L)
				break;
			else
				L = CMath::max(2*L, l_sum/r_sum);

			inner_iter++;
		}

		// update alpha coefficients
		alphap = alpha;
		alpha = (1+CMath::sqrt(4*alpha*alpha+1))/2;

		// update wwp and ccp
		wwp = w - wp;
		ccp = c - cp;

		// update objectives
		objective_p = objective;
		objective = fun_x;

		// regularize objective with tree norm
		double L1q_norm = 0.0;
		for (int m=0; m<n_classes; m++)
			L1q_norm += w.col(m).norm();
		objective += lambda*L1q_norm;

		//cout << "Objective = " << objective << endl;

		// check for termination of whole process
		if ((CMath::abs(objective - objective_p) < options.tolerance*CMath::abs(objective_p)) && (iter>2))
		{
			SG_SINFO("Objective changes less than tolerance\n")
			done = true;
		}

		iter++;
	}
	SG_SINFO("%d iterations passed, objective = %f\n",iter,objective)
	//internal::set_is_malloc_allowed(true);

	// output computed weight vectors and intercepts
	SGMatrix<float64_t> r_w(n_feats,n_classes);
	for (j=0; j<n_classes; j++)
	{
		for (i=0; i<n_feats; i++)
			r_w(i,j) = w(i,j);
	}
	//r_w.display_matrix();
	SGVector<float64_t> r_c(n_classes);
	for (j=0; j<n_classes; j++)
		r_c[j] = c[j];
	return slep_result_t(r_w, r_c);
};
Пример #2
0
template<class T> CRegressionLabels* SGSparseMatrix<T>::load_svmlight_file(char* fname,
		bool do_sort_features)
{
	CRegressionLabels* lab=NULL;

	size_t blocksize=1024*1024;
	size_t required_blocksize=blocksize;
	uint8_t* dummy=SG_MALLOC(uint8_t, blocksize);
	FILE* f=fopen(fname, "ro");

	if (f)
	{
		free_data();

		SG_SINFO("counting line numbers in file %s\n", fname)
		size_t sz=blocksize;
		size_t block_offs=0;
		size_t old_block_offs=0;
		fseek(f, 0, SEEK_END);
		size_t fsize=ftell(f);
		rewind(f);

		while (sz == blocksize)
		{
			sz=fread(dummy, sizeof(uint8_t), blocksize, f);
			for (size_t i=0; i<sz; i++)
			{
				block_offs++;
				if (dummy[i]=='\n' || (i==sz-1 && sz<blocksize))
				{
					num_vectors++;
					required_blocksize=CMath::max(required_blocksize, block_offs-old_block_offs+1);
					old_block_offs=block_offs;
				}
			}
			SG_SPROGRESS(block_offs, 0, fsize, 1, "COUNTING:\t")
		}

		SG_SINFO("found %d feature vectors\n", num_vectors)
		SG_FREE(dummy);
		blocksize=required_blocksize;
		dummy = SG_MALLOC(uint8_t, blocksize+1); //allow setting of '\0' at EOL

		lab=new CRegressionLabels(num_vectors);
		sparse_matrix=SG_MALLOC(SGSparseVector<T>, num_vectors);
		rewind(f);
		sz=blocksize;
		int32_t lines=0;
		while (sz == blocksize)
		{
			sz=fread(dummy, sizeof(uint8_t), blocksize, f);

			size_t old_sz=0;
			for (size_t i=0; i<sz; i++)
			{
				if (i==sz-1 && dummy[i]!='\n' && sz==blocksize)
				{
					size_t len=i-old_sz+1;
					uint8_t* data=&dummy[old_sz];

					for (size_t j=0; j<len; j++)
						dummy[j]=data[j];

					sz=fread(dummy+len, sizeof(uint8_t), blocksize-len, f);
					i=0;
					old_sz=0;
					sz+=len;
				}

				if (dummy[i]=='\n' || (i==sz-1 && sz<blocksize))
				{

					size_t len=i-old_sz;
					uint8_t* data=&dummy[old_sz];

					int32_t dims=0;
					for (size_t j=0; j<len; j++)
					{
						if (data[j]==':')
							dims++;
					}

					if (dims<=0)
					{
						SG_SERROR("Error in line %d - number of"
								" dimensions is %d line is %d characters"
								" long\n line_content:'%.*s'\n", lines,
								dims, len, len, (const char*) data);
					}

					SGSparseVectorEntry<T>* feat=SG_MALLOC(SGSparseVectorEntry<T>, dims);
					size_t j=0;
					for (; j<len; j++)
					{
						if (data[j]==' ')
						{
							data[j]='\0';

							lab->set_label(lines, atof((const char*) data));
							break;
						}
					}

					int32_t d=0;
					j++;
					uint8_t* start=&data[j];
					for (; j<len; j++)
					{
						if (data[j]==':')
						{
							data[j]='\0';

							feat[d].feat_index=(int32_t) atoi((const char*) start)-1;
							num_features=CMath::max(num_features, feat[d].feat_index+1);

							j++;
							start=&data[j];
							for (; j<len; j++)
							{
								if (data[j]==' ' || data[j]=='\n')
								{
									data[j]='\0';
									feat[d].entry=(T) atof((const char*) start);
									d++;
									break;
								}
							}

							if (j==len)
							{
								data[j]='\0';
								feat[dims-1].entry=(T) atof((const char*) start);
							}

							j++;
							start=&data[j];
						}
					}

					sparse_matrix[lines].num_feat_entries=dims;
					sparse_matrix[lines].features=feat;

					old_sz=i+1;
					lines++;
					SG_SPROGRESS(lines, 0, num_vectors, 1, "LOADING:\t")
				}
			}
		}
		SG_SINFO("file successfully read\n")
		fclose(f);
	}