bool CFeatureTreeLeastSquaresRegression::train_machine(CFeatures* data)
{
	if (data && (CDotFeatures*)data)
		set_features((CDotFeatures*)data);

	ASSERT(features);
	ASSERT(m_labels);

	int32_t n_vectors = features->get_num_vectors();

	float64_t* y = SG_MALLOC(float64_t, n_vectors);
	for (int32_t i=0; i<n_vectors; i++)
		y[i] = ((CRegressionLabels*)m_labels)->get_label(i);

	slep_options options;
	options.general = false;
	options.termination = m_termination;
	options.tolerance = m_tolerance;
	options.max_iter = m_max_iter;
	options.restart_num = 10000;
	options.n_nodes = 1;
	options.regularization = 0;
	SGVector<float64_t> ind = m_feature_tree->get_ind();
	options.ind = ind.vector;
	options.G = NULL;
	options.initial_w = NULL;

	w = slep_tree_lsr(features,y,m_z,options);

	SG_FREE(y);

	return true;
}
Beispiel #2
0
template<class T> SGNDArray<T>::SGNDArray(const SGVector<index_t> dimensions, bool ref_counting) :
	SGReferencedData(ref_counting)
{
	num_dims = dimensions.size();
	dims = SG_MALLOC(index_t, num_dims);

	len_array = 1;
	for (int32_t i=0; i<num_dims; i++)
	{
		dims[i] = dimensions[i];
		len_array *= dims[i];
	}

	REQUIRE(len_array>0, "Length of array (%d) must be greater than 0\n", len_array);
	array = SG_MALLOC(T, len_array);
}
Beispiel #3
0
SGMatrixList<T>::SGMatrixList(int32_t nmats, bool ref_counting)
: SGReferencedData(ref_counting), num_matrices(nmats)
{
	matrix_list = SG_MALLOC(SGMatrix<T>, nmats);
	// Call to SGMatrix default constructor in-place
	for ( int32_t i = 0 ; i < nmats ; ++i )
		new (&matrix_list[i]) SGMatrix<T>();
}
Beispiel #4
0
T* SGMatrix<T>::clone_matrix(const T* matrix, int32_t nrows, int32_t ncols)
{
	T* result = SG_MALLOC(T, int64_t(nrows)*ncols);
	for (int64_t i=0; i<int64_t(nrows)*ncols; i++)
		result[i]=matrix[i];

	return result;
}
Beispiel #5
0
void SGMatrix<T>::inverse(SGMatrix<float64_t> matrix)
{
	ASSERT(matrix.num_cols==matrix.num_rows);
	int32_t* ipiv = SG_MALLOC(int32_t, matrix.num_cols);
	clapack_dgetrf(CblasColMajor,matrix.num_cols,matrix.num_cols,matrix.matrix,matrix.num_cols,ipiv);
	clapack_dgetri(CblasColMajor,matrix.num_cols,matrix.matrix,matrix.num_cols,ipiv);
	SG_FREE(ipiv);
}
Beispiel #6
0
template<class T> SGNDArray<T>::SGNDArray(index_t* d, index_t nd, bool ref_counting) :
	SGReferencedData(ref_counting), dims(d), num_dims(nd)
{
	len_array = 1;
	for (int32_t i=0; i<num_dims; i++)
		len_array *= dims[i];

	REQUIRE(len_array>0, "Length of array (%d) must be greater than 0\n", len_array);
	array = SG_MALLOC(T, len_array);
}
Beispiel #7
0
template<class ST> void* CDenseFeatures<ST>::get_feature_iterator(int32_t vector_index)
{
	if (vector_index>=get_num_vectors())
	{
		SG_ERROR("Index out of bounds (number of vectors %d, you "
		"requested %d)\n", get_num_vectors(), vector_index);
	}

	dense_feature_iterator* iterator = SG_MALLOC(dense_feature_iterator, 1);
	iterator->vec = get_feature_vector(vector_index, iterator->vlen,
			iterator->vfree);
	iterator->vidx = vector_index;
	iterator->index = 0;
	return iterator;
}
Beispiel #8
0
void SGMatrix<T>::transpose_matrix(
	T*& matrix, int32_t& num_feat, int32_t& num_vec)
{
	/* this should be done in-place! Heiko */
	T* transposed=SG_MALLOC(T, num_vec*num_feat);
	for (int32_t i=0; i<num_vec; i++)
	{
		for (int32_t j=0; j<num_feat; j++)
			transposed[i+j*num_vec]=matrix[i*num_feat+j];
	}

	SG_FREE(matrix);
	matrix=transposed;

	CMath::swap(num_feat, num_vec);
}
ST* CDecompressString<ST>::apply_to_string(ST* f, int32_t &len)
{
	uint64_t compressed_size=((int32_t*) f)[0];
	uint64_t uncompressed_size=((int32_t*) f)[1];

	int32_t offs=CMath::ceil(2.0*sizeof(int32_t)/sizeof(ST));
	ASSERT(uint64_t(len)==uint64_t(offs)+compressed_size);

	len=uncompressed_size;
	uncompressed_size*=sizeof(ST);
	ST* vec=SG_MALLOC(ST, len);
	compressor->decompress((uint8_t*) (&f[offs]), compressed_size,
			(uint8_t*) vec, uncompressed_size);

	ASSERT(uncompressed_size==((uint64_t) len)*sizeof(ST));
	return vec;
}
Beispiel #10
0
template <class ST> void CStringFileFeatures<ST>::fetch_meta_info_from_file(int32_t granularity)
{
	CStringFileFeatures<ST>::cleanup();
	uint64_t file_size=file->get_size();
	ASSERT(granularity>=1);
	ASSERT(CStringFeatures<ST>::alphabet);

	int64_t buffer_size=granularity;
	CStringFeatures<ST>::features=SG_MALLOC(SGString<ST>, buffer_size);

	uint64_t offs=0;
	uint64_t len=0;
	CStringFeatures<ST>::max_string_length=0;
	CStringFeatures<ST>::num_vectors=0;

	while (true)
	{
		ST* line=get_line(len, offs, CStringFeatures<ST>::num_vectors, file_size);

		if (line)
		{
			if (CStringFeatures<ST>::num_vectors > buffer_size)
			{
				CStringFeatures<ST>::features = SG_REALLOC(SGString<ST>, CStringFeatures<ST>::features, buffer_size, buffer_size+granularity);
				buffer_size+=granularity;
			}

			CStringFeatures<ST>::features[CStringFeatures<ST>::num_vectors-1].string=line;
			CStringFeatures<ST>::features[CStringFeatures<ST>::num_vectors-1].slen=len;
			CStringFeatures<ST>::max_string_length=CMath::max(CStringFeatures<ST>::max_string_length, (int32_t) len);
		}
		else
			break;
	}

	CStringFeatures<ST>::SG_INFO("number of strings:%d\n", CStringFeatures<ST>::num_vectors);
	CStringFeatures<ST>::SG_INFO("maximum string length:%d\n", CStringFeatures<ST>::max_string_length);
	CStringFeatures<ST>::SG_INFO("max_value_in_histogram:%d\n", CStringFeatures<ST>::alphabet->get_max_value_in_histogram());
	CStringFeatures<ST>::SG_INFO("num_symbols_in_histogram:%d\n", CStringFeatures<ST>::alphabet->get_num_symbols_in_histogram());

	if (!CStringFeatures<ST>::alphabet->check_alphabet_size() || !CStringFeatures<ST>::alphabet->check_alphabet())
		CStringFileFeatures<ST>::cleanup();

	CStringFeatures<ST>::features=SG_REALLOC(SGString<ST>, CStringFeatures<ST>::features, buffer_size, CStringFeatures<ST>::num_vectors);
}
Beispiel #11
0
template<class ST> ST* CDenseFeatures<ST>::get_transposed(int32_t &num_feat, int32_t &num_vec)
{
	num_feat = get_num_vectors();
	num_vec = num_features;

	int32_t old_num_vec=get_num_vectors();

	ST* fm = SG_MALLOC(ST, int64_t(num_feat) * num_vec);

	for (int32_t i=0; i<old_num_vec; i++)
	{
		SGVector<ST> vec=get_feature_vector(i);

		for (int32_t j=0; j<vec.vlen; j++)
			fm[j*int64_t(old_num_vec)+i]=vec.vector[j];

		free_feature_vector(vec, i);
	}

	return fm;
}
Beispiel #12
0
float64_t* SGMatrix<T>::pinv(
		float64_t* matrix, int32_t rows, int32_t cols, float64_t* target)
{
	if (!target)
		target=SG_MALLOC(float64_t, rows*cols);

	char jobu='A';
	char jobvt='A';
	int m=rows; /* for calling external lib */
	int n=cols; /* for calling external lib */
	int lda=m; /* for calling external lib */
	int ldu=m; /* for calling external lib */
	int ldvt=n; /* for calling external lib */
	int info=-1; /* for calling external lib */
	int32_t lsize=CMath::min((int32_t) m, (int32_t) n);
	double* s=SG_MALLOC(double, lsize);
	double* u=SG_MALLOC(double, m*m);
	double* vt=SG_MALLOC(double, n*n);

	wrap_dgesvd(jobu, jobvt, m, n, matrix, lda, s, u, ldu, vt, ldvt, &info);
	ASSERT(info==0);

	for (int32_t i=0; i<n; i++)
	{
		for (int32_t j=0; j<lsize; j++)
			vt[i*n+j]=vt[i*n+j]/s[j];
	}

	cblas_dgemm(CblasColMajor, CblasTrans, CblasTrans, m, n, m, 1.0, vt, ldvt, u, ldu, 0, target, m);

	SG_FREE(u);
	SG_FREE(vt);
	SG_FREE(s);

	return target;
}
Beispiel #13
0
SGSparseMatrix<T>::SGSparseMatrix(index_t num_feat, index_t num_vec, bool ref_counting) :
	SGReferencedData(ref_counting),
	num_vectors(num_vec), num_features(num_feat)
{
	sparse_matrix=SG_MALLOC(SGSparseVector<T>, num_vectors);
}
Beispiel #14
0
SGMatrix<T>::SGMatrix(index_t nrows, index_t ncols, bool ref_counting)
	: SGReferencedData(ref_counting), num_rows(nrows), num_cols(ncols)
{
	matrix=SG_MALLOC(T, ((int64_t) nrows)*ncols);
}
Beispiel #15
0
SGVector<T>::SGVector(index_t len, bool ref_counting)
: SGReferencedData(ref_counting), vlen(len), gpu_ptr(NULL)
{
	vector=SG_MALLOC(T, len);
	m_on_gpu.store(false, std::memory_order_release);
}
SGVector<float64_t> CKernelMeanMatching::compute_weights()
{
	int32_t i,j;
	ASSERT(m_kernel)
	ASSERT(m_training_indices.vlen)
	ASSERT(m_test_indices.vlen)

	int32_t n_tr = m_training_indices.vlen;
	int32_t n_te = m_test_indices.vlen;

	SGVector<float64_t> weights(n_tr);
	weights.zero();

	kmm_K = SG_MALLOC(float64_t, n_tr*n_tr);
	kmm_K_ld = n_tr;
	float64_t* diag_K = SG_MALLOC(float64_t, n_tr);
	for (i=0; i<n_tr; i++)
	{
		float64_t d = m_kernel->kernel(m_training_indices[i], m_training_indices[i]);
		diag_K[i] = d;
		kmm_K[i*n_tr+i] = d;
		for (j=i+1; j<n_tr; j++)
		{
			d = m_kernel->kernel(m_training_indices[i],m_training_indices[j]);
			kmm_K[i*n_tr+j] = d;
			kmm_K[j*n_tr+i] = d;
		}
	}
	float64_t* kappa = SG_MALLOC(float64_t, n_tr);
	for (i=0; i<n_tr; i++)
	{
		float64_t avg = 0.0;
		for (j=0; j<n_te; j++)
			avg+= m_kernel->kernel(m_training_indices[i],m_test_indices[j]);

		avg *= float64_t(n_tr)/n_te;
		kappa[i] = -avg;
	}
	float64_t* a = SG_MALLOC(float64_t, n_tr);
	for (i=0; i<n_tr; i++) a[i] = 1.0;
	float64_t* LB = SG_MALLOC(float64_t, n_tr);
	float64_t* UB = SG_MALLOC(float64_t, n_tr);
	float64_t B = 2.0;
	for (i=0; i<n_tr; i++)
	{
		LB[i] = 0.0;
		UB[i] = B;
	}
	for (i=0; i<n_tr; i++)
		weights[i] = 1.0/float64_t(n_tr);

	libqp_state_T result =
		libqp_gsmo_solver(&kmm_get_col,diag_K,kappa,a,1.0,LB,UB,weights,n_tr,1000,1e-9,NULL);

	SG_DEBUG("libqp exitflag=%d, %d iterations passed, primal objective=%f\n",
	         result.exitflag,result.nIter,result.QP);

	SG_FREE(kappa);
	SG_FREE(a);
	SG_FREE(LB);
	SG_FREE(UB);
	SG_FREE(diag_K);
	SG_FREE(kmm_K);

	return weights;
}
Beispiel #17
0
SGStringList<T>::SGStringList(index_t num_s, index_t max_length, bool ref_counting) : 
	SGReferencedData(ref_counting),
	num_strings(num_s), max_string_length(max_length)
{
	strings=SG_MALLOC(SGString<T>, num_strings);
}
Beispiel #18
0
template<class T> CRegressionLabels* SGSparseMatrix<T>::load_svmlight_file(char* fname,
		bool do_sort_features)
{
	CRegressionLabels* lab=NULL;

	size_t blocksize=1024*1024;
	size_t required_blocksize=blocksize;
	uint8_t* dummy=SG_MALLOC(uint8_t, blocksize);
	FILE* f=fopen(fname, "ro");

	if (f)
	{
		free_data();

		SG_SINFO("counting line numbers in file %s\n", fname)
		size_t sz=blocksize;
		size_t block_offs=0;
		size_t old_block_offs=0;
		fseek(f, 0, SEEK_END);
		size_t fsize=ftell(f);
		rewind(f);

		while (sz == blocksize)
		{
			sz=fread(dummy, sizeof(uint8_t), blocksize, f);
			for (size_t i=0; i<sz; i++)
			{
				block_offs++;
				if (dummy[i]=='\n' || (i==sz-1 && sz<blocksize))
				{
					num_vectors++;
					required_blocksize=CMath::max(required_blocksize, block_offs-old_block_offs+1);
					old_block_offs=block_offs;
				}
			}
			SG_SPROGRESS(block_offs, 0, fsize, 1, "COUNTING:\t")
		}

		SG_SINFO("found %d feature vectors\n", num_vectors)
		SG_FREE(dummy);
		blocksize=required_blocksize;
		dummy = SG_MALLOC(uint8_t, blocksize+1); //allow setting of '\0' at EOL

		lab=new CRegressionLabels(num_vectors);
		sparse_matrix=SG_MALLOC(SGSparseVector<T>, num_vectors);
		rewind(f);
		sz=blocksize;
		int32_t lines=0;
		while (sz == blocksize)
		{
			sz=fread(dummy, sizeof(uint8_t), blocksize, f);

			size_t old_sz=0;
			for (size_t i=0; i<sz; i++)
			{
				if (i==sz-1 && dummy[i]!='\n' && sz==blocksize)
				{
					size_t len=i-old_sz+1;
					uint8_t* data=&dummy[old_sz];

					for (size_t j=0; j<len; j++)
						dummy[j]=data[j];

					sz=fread(dummy+len, sizeof(uint8_t), blocksize-len, f);
					i=0;
					old_sz=0;
					sz+=len;
				}

				if (dummy[i]=='\n' || (i==sz-1 && sz<blocksize))
				{

					size_t len=i-old_sz;
					uint8_t* data=&dummy[old_sz];

					int32_t dims=0;
					for (size_t j=0; j<len; j++)
					{
						if (data[j]==':')
							dims++;
					}

					if (dims<=0)
					{
						SG_SERROR("Error in line %d - number of"
								" dimensions is %d line is %d characters"
								" long\n line_content:'%.*s'\n", lines,
								dims, len, len, (const char*) data);
					}

					SGSparseVectorEntry<T>* feat=SG_MALLOC(SGSparseVectorEntry<T>, dims);
					size_t j=0;
					for (; j<len; j++)
					{
						if (data[j]==' ')
						{
							data[j]='\0';

							lab->set_label(lines, atof((const char*) data));
							break;
						}
					}

					int32_t d=0;
					j++;
					uint8_t* start=&data[j];
					for (; j<len; j++)
					{
						if (data[j]==':')
						{
							data[j]='\0';

							feat[d].feat_index=(int32_t) atoi((const char*) start)-1;
							num_features=CMath::max(num_features, feat[d].feat_index+1);

							j++;
							start=&data[j];
							for (; j<len; j++)
							{
								if (data[j]==' ' || data[j]=='\n')
								{
									data[j]='\0';
									feat[d].entry=(T) atof((const char*) start);
									d++;
									break;
								}
							}

							if (j==len)
							{
								data[j]='\0';
								feat[dims-1].entry=(T) atof((const char*) start);
							}

							j++;
							start=&data[j];
						}
					}

					sparse_matrix[lines].num_feat_entries=dims;
					sparse_matrix[lines].features=feat;

					old_sz=i+1;
					lines++;
					SG_SPROGRESS(lines, 0, num_vectors, 1, "LOADING:\t")
				}
			}
		}
		SG_SINFO("file successfully read\n")
		fclose(f);
	}
Beispiel #19
0
T* SGVector<T>::clone_vector(const T* vec, int32_t len)
{
	T* result = SG_MALLOC(T, len);
	memcpy(result, vec, sizeof(T)*len);
	return result;
}
Beispiel #20
0
SGVector<T>::SGVector(index_t len, bool ref_counting)
: SGReferencedData(ref_counting), vlen(len)
{
	vector=SG_MALLOC(T, len);
}