Exemple #1
0
int model::load_training_data(const dataset &ds)
{
	int nrow, ncol;

        nrow = ds.ins_num();
        ncol = ds.fea_num();

	if (nrow <= 0 || ncol < 1) {
		ULIB_FATAL("invalid training data dimensions");
		return -1;
	}
        if (nrow > FLAGS_max_num_examples)
                nrow = FLAGS_max_num_examples;

	if (alloc_training_data(nrow, ncol)) {
		ULIB_FATAL("couldn't allocate training data");
		return -1;
	}

        double tavg = 0;
        double tvar = 0;
	for (int i = 0; i < nrow; ++i) {
		double t = ds.get_tgv(i);
                tavg += t;
                tvar += t*t;
		gsl_vector_set(_tv, i, t);
                for (int j = 0; j < ncol; ++j)
                        gsl_matrix_set(_fm, i, j, ds.get_fea(i, j));
	}
        _t_avg = tavg/nrow;
        _t_std = sqrt(tvar/nrow - _t_avg*_t_avg);

        return 0;
}
Exemple #2
0
void line2list(struct list_head *head, const char *file)
{
	FILE *fp;
	struct str_ent *p;
	char *line = NULL;
	size_t n;
	int nl = 0;

	fp = fopen(file, "r");
	if (fp == NULL) {
		ULIB_FATAL("cannot open %s", file);
		exit(EXIT_FAILURE);
	}
	while (getline(&line, &n, fp) > 0) {
		p = (struct str_ent *) malloc(sizeof(struct str_ent));
		if (p == NULL) {
			ULIB_FATAL("cannot new entry");
			exit(EXIT_FAILURE);
		}
		list_add_tail(&p->aaa, head);
		p->str = line;
		line = NULL;
		++nl;
	}
	free(line);
	fclose(fp);
	ULIB_NOTICE("%d line(s) has been loaded", nl);
}
Exemple #3
0
int model::get_col_mean()
{
	size_t nrow = _fm->size1;
	size_t ncol = _fm->size2;

	gsl_vector_free(_col_mean);

	gsl_vector *mean = gsl_vector_alloc(ncol);
	if (mean == NULL) {
		ULIB_FATAL("couldn't allocate vector");
		return -1;
	}

	for (size_t j = 0; j < ncol; ++j) {
		double sum = 0;
		for (size_t i = 0; i < nrow; ++i)
			sum += gsl_matrix_get(_fm, i, j);
		sum /= nrow;
		gsl_vector_set(mean, j, sum);
	}

	_col_mean = mean;

	return 0;
}
Exemple #4
0
int model::alloc_training_data(int nrow, int ncol)
{
	gsl_vector_free(_tv);
	gsl_matrix_free(_fm);

	_tv = gsl_vector_alloc(nrow);
	if (_tv == NULL) {
		ULIB_FATAL("couldn't allocate target value vector");
		return -1;
	}
	_fm = gsl_matrix_alloc(nrow, ncol);
	if (_fm == NULL) {
		ULIB_FATAL("couldn't allocate feature matrix");
		_tv = NULL;
		return -1;
	}

	return 0;
}
Exemple #5
0
int main()
{
    ULIB_DEBUG("debug log");
    ULIB_WARNING("warning log");
    ULIB_NOTICE("notice log");
    ULIB_FATAL("fatal log");

    printf("passed\n");

    return 0;
}
int dataset::get_target(gsl_vector *v) const
{
        int nrow = ins_num();

        if ((int)v->size != nrow) {
                ULIB_FATAL("dataset dims do not match the target vector");
                return -1;
        }

        for (int i = 0; i < nrow; ++i)
                gsl_vector_set(v, i, get_tgv(i));

        return 0;
}
int dataset::get_matrix(gsl_matrix *m) const
{
        int nrow = ins_num();
        int ncol = fea_num();

        if ((int)m->size1 != nrow
            || (int)m->size2 != ncol) {
                ULIB_FATAL("dataset dims do not match the output matrix");
                return -1;
        }

        for (int i = 0; i < nrow; ++i)
                for (int j = 0; j < ncol; ++j)
                        gsl_matrix_set(m, i, j, get_fea(i, j));

        return 0;
}
Exemple #8
0
int model::get_col_sd()
{
	size_t nrow = _fm->size1;
	size_t ncol = _fm->size2;

	gsl_vector_free(_col_sd);

	gsl_vector *sd = gsl_vector_alloc(ncol);
	if (sd == NULL) {
		ULIB_FATAL("couldn't allocate vector");
		return -1;
	}

	double k = sqrt(nrow);
	for (size_t j = 0; j < ncol; ++j) {
		gsl_vector_const_view cv = gsl_matrix_const_column(_fm, j);
		double d = gsl_blas_dnrm2(&cv.vector);
		gsl_vector_set(sd, j, d/k);
	}

	_col_sd = sd;

	return 0;
}
Exemple #9
0
int model::predict(const dataset &tds, gsl_matrix **pp)
{
	int ret = -1;

	gsl_matrix *mat = NULL;
	gsl_matrix *ptv = NULL;
	gsl_matrix *km1 = NULL;
	gsl_matrix *km2 = NULL;
	gsl_matrix *res = NULL;
	gsl_matrix *stm = NULL;

        gsl_vector_view avg_col;
        gsl_vector_view dv;
        
	if (tds.ins_num() <= 0 || tds.fea_num() != (int)_col_mean->size) {
		ULIB_FATAL("invalid test dimensions, (ins_num=%d,fea_num=%d)",
                           tds.ins_num(), tds.fea_num());
		goto done;
	}

	mat = gsl_matrix_alloc(tds.ins_num(), tds.fea_num());
	if (mat == NULL) {
		ULIB_FATAL("couldn't allocate test feature matrix");
		goto done;
	}
	ptv = gsl_matrix_alloc(tds.ins_num(), 2);
	if (ptv == NULL) {
		ULIB_FATAL("couldn't allocate prediction matrix");
		goto done;
	}
        if (tds.get_matrix(mat)) {
                ULIB_FATAL("couldn't get test matrix");
                goto done;
        }
	dbg_print_mat(mat, "Test Matrix:");

	zero_out_mat(mat);
	norm_mat(mat);

	dbg_print_mat(mat, "Normalized Test Matrix:");

	km1 = comp_kern_mat(mat, _fm, _kern);
	if (km1 == NULL) {
		ULIB_FATAL("couldn't compute test1 kernel matrix");
		goto done;
	}
	dbg_print_mat(km1, "Test Kernel Matrix:");

	km2 = comp_kern_mat(mat, mat, _kern);
	if (km2 == NULL) {
		ULIB_FATAL("couldn't compute test2 kernel matrix");
		goto done;
	}
	dbg_print_mat(km1, "Test Kernel Matrix:");
	dv = gsl_matrix_diagonal(km2);

	res = gsl_matrix_alloc(km1->size1, _ikm->size2);
	if (res == NULL) {
		ULIB_FATAL("couldn't allocate temporary matrix");
		goto done;
	}
	stm = gsl_matrix_alloc(km2->size1, km2->size2);
	if (stm == NULL) {
		ULIB_FATAL("couldn't allocate std matrix");
		goto done;
	}

	gsl_blas_dgemm(CblasNoTrans, CblasNoTrans, 1.0, km1, _ikm, 0.0, res);
	gsl_blas_dgemm(CblasNoTrans, CblasTrans, 1.0, res, km1, 0.0, stm);
        gsl_matrix_sub(km2, stm);
	dbg_print_mat(res, "Predictive Matrix:");
        avg_col = gsl_matrix_column(ptv, 0);
	gsl_blas_dgemv(CblasNoTrans, 1.0, res, _tv, 0.0, &avg_col.vector);
        gsl_vector_add_constant(&avg_col.vector, _t_avg);
        gsl_matrix_scale(km2, _t_std*_t_std);
	gsl_vector_add_constant(&dv.vector, _noise_var);
        for (size_t i = 0; i < km2->size1; ++i)
                gsl_matrix_set(ptv, i, 1, sqrt(gsl_vector_get(&dv.vector, i)));
	*pp = ptv;
	ptv = NULL;
	ret = 0;
done:
	gsl_matrix_free(mat);
	gsl_matrix_free(ptv);
	gsl_matrix_free(km1);
	gsl_matrix_free(km2);
	gsl_matrix_free(res);
	gsl_matrix_free(stm);
	return ret;
}
Exemple #10
0
int model::train(const dataset &ds)
{
	int ret = -1;
	gsl_matrix *km = NULL;
	gsl_matrix *ikm = NULL;
	gsl_permutation *perm = NULL;
	gsl_vector_view dv;

	gsl_matrix_free(_ikm);

	if (load_training_data(ds)) {
		ULIB_FATAL("couldn't load training data");
		goto done;
	}
	dbg_print_mat(_fm, "Feature Matrix:");
	if (get_col_mean()) {
		ULIB_FATAL("couldn't get feature column means");
		goto done;
	}
	zero_out_mat(_fm);
	if (get_col_sd()) {
		ULIB_FATAL("couldn't get feature column standard deviations");
		goto done;
	}
	norm_mat(_fm);
	dbg_print_mat(_fm, "Normalized Feature Matrix:");

	km = comp_kern_mat(_fm, _kern);
	dbg_print_mat(km, "Kernel Matrix:");
	if (km == NULL) {
		ULIB_FATAL("couldn't compute kernel matrix");
		goto done;
	}

	dv = gsl_matrix_diagonal(km);
	gsl_vector_add_constant(&dv.vector, _noise_var);

	ikm = gsl_matrix_alloc(km->size1, km->size2);
	if (ikm == NULL) {
		ULIB_FATAL("couldn't allocate cost model");
		goto done;
	}

	int signum;
	perm = gsl_permutation_alloc(ikm->size1);
	if (perm == NULL) {
		ULIB_FATAL("couldn't allocate cost model");
		goto done;
	}
	gsl_linalg_LU_decomp(km, perm, &signum);
	gsl_linalg_LU_invert(km, perm, ikm);
        gsl_vector_add_constant(_tv, -_t_avg);

	_ikm = ikm;
	ikm = NULL;
	ret = 0;
done:
	gsl_permutation_free(perm);
	gsl_matrix_free(km);
	gsl_matrix_free(ikm);

	return ret;
}