int model::load_training_data(const dataset &ds) { int nrow, ncol; nrow = ds.ins_num(); ncol = ds.fea_num(); if (nrow <= 0 || ncol < 1) { ULIB_FATAL("invalid training data dimensions"); return -1; } if (nrow > FLAGS_max_num_examples) nrow = FLAGS_max_num_examples; if (alloc_training_data(nrow, ncol)) { ULIB_FATAL("couldn't allocate training data"); return -1; } double tavg = 0; double tvar = 0; for (int i = 0; i < nrow; ++i) { double t = ds.get_tgv(i); tavg += t; tvar += t*t; gsl_vector_set(_tv, i, t); for (int j = 0; j < ncol; ++j) gsl_matrix_set(_fm, i, j, ds.get_fea(i, j)); } _t_avg = tavg/nrow; _t_std = sqrt(tvar/nrow - _t_avg*_t_avg); return 0; }
int model::predict(const dataset &tds, gsl_matrix **pp) { int ret = -1; gsl_matrix *mat = NULL; gsl_matrix *ptv = NULL; gsl_matrix *km1 = NULL; gsl_matrix *km2 = NULL; gsl_matrix *res = NULL; gsl_matrix *stm = NULL; gsl_vector_view avg_col; gsl_vector_view dv; if (tds.ins_num() <= 0 || tds.fea_num() != (int)_col_mean->size) { ULIB_FATAL("invalid test dimensions, (ins_num=%d,fea_num=%d)", tds.ins_num(), tds.fea_num()); goto done; } mat = gsl_matrix_alloc(tds.ins_num(), tds.fea_num()); if (mat == NULL) { ULIB_FATAL("couldn't allocate test feature matrix"); goto done; } ptv = gsl_matrix_alloc(tds.ins_num(), 2); if (ptv == NULL) { ULIB_FATAL("couldn't allocate prediction matrix"); goto done; } if (tds.get_matrix(mat)) { ULIB_FATAL("couldn't get test matrix"); goto done; } dbg_print_mat(mat, "Test Matrix:"); zero_out_mat(mat); norm_mat(mat); dbg_print_mat(mat, "Normalized Test Matrix:"); km1 = comp_kern_mat(mat, _fm, _kern); if (km1 == NULL) { ULIB_FATAL("couldn't compute test1 kernel matrix"); goto done; } dbg_print_mat(km1, "Test Kernel Matrix:"); km2 = comp_kern_mat(mat, mat, _kern); if (km2 == NULL) { ULIB_FATAL("couldn't compute test2 kernel matrix"); goto done; } dbg_print_mat(km1, "Test Kernel Matrix:"); dv = gsl_matrix_diagonal(km2); res = gsl_matrix_alloc(km1->size1, _ikm->size2); if (res == NULL) { ULIB_FATAL("couldn't allocate temporary matrix"); goto done; } stm = gsl_matrix_alloc(km2->size1, km2->size2); if (stm == NULL) { ULIB_FATAL("couldn't allocate std matrix"); goto done; } gsl_blas_dgemm(CblasNoTrans, CblasNoTrans, 1.0, km1, _ikm, 0.0, res); gsl_blas_dgemm(CblasNoTrans, CblasTrans, 1.0, res, km1, 0.0, stm); gsl_matrix_sub(km2, stm); dbg_print_mat(res, "Predictive Matrix:"); avg_col = gsl_matrix_column(ptv, 0); gsl_blas_dgemv(CblasNoTrans, 1.0, res, _tv, 0.0, &avg_col.vector); gsl_vector_add_constant(&avg_col.vector, _t_avg); gsl_matrix_scale(km2, _t_std*_t_std); gsl_vector_add_constant(&dv.vector, _noise_var); for (size_t i = 0; i < km2->size1; ++i) gsl_matrix_set(ptv, i, 1, sqrt(gsl_vector_get(&dv.vector, i))); *pp = ptv; ptv = NULL; ret = 0; done: gsl_matrix_free(mat); gsl_matrix_free(ptv); gsl_matrix_free(km1); gsl_matrix_free(km2); gsl_matrix_free(res); gsl_matrix_free(stm); return ret; }