int gsl_linalg_LQ_vecQ (const gsl_matrix * LQ, const gsl_vector * tau, gsl_vector * v) { const size_t N = LQ->size1; const size_t M = LQ->size2; if (tau->size != GSL_MIN (M, N)) { GSL_ERROR ("size of tau must be MIN(M,N)", GSL_EBADLEN); } else if (v->size != M) { GSL_ERROR ("vector size must be M", GSL_EBADLEN); } else { size_t i; /* compute v Q^T */ for (i = GSL_MIN (M, N); i-- > 0;) { gsl_vector_const_view c = gsl_matrix_const_row (LQ, i); gsl_vector_const_view h = gsl_vector_const_subvector (&(c.vector), i, M - i); gsl_vector_view w = gsl_vector_subvector (v, i, M - i); double ti = gsl_vector_get (tau, i); gsl_linalg_householder_hv (ti, &(h.vector), &(w.vector)); } return GSL_SUCCESS; } }
/// Copy a row into a GSLVector /// @param i :: A row index. GSLVector GSLMatrix::copyRow(size_t i) const { if (i >= size1()) { throw std::out_of_range("GSLMatrix row index is out of range."); } auto rowView = gsl_matrix_const_row(gsl(), i); return GSLVector(&rowView.vector); }
static void moveGN( const gsl_matrix *Vt, const gsl_vector *sig2, const gsl_vector *ufuncsig, double lambda2, gsl_vector * dx, int k, gsl_vector * scaling ) { gsl_vector_set_zero(dx); double threshold = gsl_vector_get(sig2, 0) * Vt->size2 * DBL_EPSILON * 100; size_t i; for (i = 0; (i<sig2->size) &&(gsl_vector_get(sig2, i) >= threshold); i++) { gsl_vector VtRow = gsl_matrix_const_row(Vt, i).vector; gsl_blas_daxpy(gsl_vector_get(ufuncsig, i) / (gsl_vector_get(sig2, i) * gsl_vector_get(sig2, i) + lambda2), &VtRow, dx); } /* if (i >= k) { PRINTF("Pseudoinverse threshold exceeded.\n"); PRINTF("Threshold: %g, i: %d, last: %g, next: %g\n", threshold, i, gsl_vector_get(sig2, i-1), gsl_vector_get(sig2, i)); }*/ if (scaling != NULL) { gsl_vector_mul(dx, scaling); } }
double shapeAlign::cosineSim(const gsl_matrix* X, const gsl_matrix *Y) { double cosSim = 0; double denomX = 0, denomY = 0; for (size_t i = 0; i < X->size1; i++){ double dot = 0; // Get vector views of rows gsl_vector_const_view Xrow = gsl_matrix_const_row(X,i); gsl_vector_const_view Yrow = gsl_matrix_const_row(Y,i); // Compute dot product gsl_blas_ddot(&Xrow.vector,&Yrow.vector,&dot); cosSim +=dot; denomX += pow(gsl_blas_dnrm2(&Xrow.vector),2.0); denomY += pow(gsl_blas_dnrm2(&Yrow.vector),2.0); } cosSim = cosSim / (sqrt(denomX * denomY)); return cosSim; }
int main(int argc, char *argv[]) { size_t NumParam = 9; size_t InitSamples = (unsigned int) atof(argv[1]); unsigned int rng_seed = (unsigned int) atof(argv[2]); // Declare and configure GSL RNG gsl_rng * rng; const gsl_rng_type * T; gsl_rng_env_setup(); T = gsl_rng_default; rng = gsl_rng_alloc (T); gsl_rng_set(rng, rng_seed); gsl_matrix * Xmat = gsl_matrix_alloc(InitSamples, NumParam); sample_prior(rng, InitSamples, Xmat); // for(size_t i = 0; i < InitSamples; i++){ // for(size_t j = 0; j < NumParam; j++) // printf("%10.5f ", gsl_matrix_get(Xmat, i, j)); // printf("\n"); // } // printf("\nlog prior:\n"); // for(size_t i = 0; i < InitSamples; i++){ // gsl_vector_view v = gsl_matrix_row(Xmat, i); // printf("%8.4f ", log(prior(&v.vector))); // if((i+1) % 10 == 0) printf("\n"); // } // printf("\nlog likelihood:\n"); // for(size_t i = 0; i < InitSamples; i++){ // gsl_vector_view v = gsl_matrix_row(Xmat, i); // printf("%8.4f ", log(likelihood(&v.vector))); // if((i+1) % 10 == 0) printf("\n"); // } #pragma omp parallel for for(size_t i = 0; i < InitSamples; i++){ gsl_vector_const_view v = gsl_matrix_const_row(Xmat, i); likelihood(&v.vector); if(likelihood(&v.vector) > 0) printf("%7zu: %f %f\n", i, log(prior(&v.vector)), log(likelihood(&v.vector))); } gsl_matrix_free(Xmat); gsl_rng_free(rng); return 0; }
int gsl_linalg_LQ_unpack (const gsl_matrix * LQ, const gsl_vector * tau, gsl_matrix * Q, gsl_matrix * L) { const size_t N = LQ->size1; const size_t M = LQ->size2; if (Q->size1 != M || Q->size2 != M) { GSL_ERROR ("Q matrix must be M x M", GSL_ENOTSQR); } else if (L->size1 != N || L->size2 != M) { GSL_ERROR ("R matrix must be N x M", GSL_ENOTSQR); } else if (tau->size != GSL_MIN (M, N)) { GSL_ERROR ("size of tau must be MIN(M,N)", GSL_EBADLEN); } else { size_t i, j, l_border; /* Initialize Q to the identity */ gsl_matrix_set_identity (Q); for (i = GSL_MIN (M, N); i-- > 0;) { gsl_vector_const_view c = gsl_matrix_const_row (LQ, i); gsl_vector_const_view h = gsl_vector_const_subvector (&c.vector, i, M - i); gsl_matrix_view m = gsl_matrix_submatrix (Q, i, i, M - i, M - i); double ti = gsl_vector_get (tau, i); gsl_linalg_householder_mh (ti, &h.vector, &m.matrix); } /* Form the lower triangular matrix L from a packed LQ matrix */ for (i = 0; i < N; i++) { l_border=GSL_MIN(i,M-1); for (j = 0; j <= l_border ; j++) gsl_matrix_set (L, i, j, gsl_matrix_get (LQ, i, j)); for (j = l_border+1; j < M; j++) gsl_matrix_set (L, i, j, 0.0); } return GSL_SUCCESS; } }
gsl_vector * io_util_readVecFromTxt (const char *filename) { gsl_matrix *mat = io_util_readMatFromTxt (filename); assert (mat->size1 == 1); gsl_vector *vec = gsl_vector_calloc (mat->size2); gsl_vector_const_view row = gsl_matrix_const_row (mat, 0); gsl_vector_memcpy (vec, &row.vector); /* clean up */ gsl_matrix_free (mat); return vec; }
int gsl_multifit_linear_residuals (const gsl_matrix *X, const gsl_vector *y, const gsl_vector *c, gsl_vector *r) { if (X->size1 != y->size) { GSL_ERROR ("number of observations in y does not match rows of matrix X", GSL_EBADLEN); } else if (X->size2 != c->size) { GSL_ERROR ("number of parameters c does not match columns of matrix X", GSL_EBADLEN); } else if (y->size != r->size) { GSL_ERROR ("number of observations in y does not match number of residuals", GSL_EBADLEN); } else { size_t i; for (i = 0; i < y->size; ++i) { double yi = gsl_vector_get(y, i); gsl_vector_const_view row = gsl_matrix_const_row(X, i); double y_est, ri; gsl_blas_ddot(&row.vector, c, &y_est); ri = yi - y_est; gsl_vector_set(r, i, ri); } return GSL_SUCCESS; } } /* gsl_multifit_linear_residuals() */
inline void write_geno_matrix(const geno_matrix *m, const KTfwd::uint_t generation, std::string stub, const int repid, const bool keep_origin) { stub += ".generation" + std::to_string(generation) + ".rep" + std::to_string(repid) + ".gz"; gzFile gzout = gzopen(stub.c_str(), "w"); std::ostringstream buffer; int nwrites = 0; for (std::size_t row = 0; row < m->nrow; ++row) { auto row_view = gsl_matrix_const_row(m->m.get(), row); buffer << m->G[row] << '\t'; for (std::size_t col = 0 + static_cast<size_t>(keep_origin == false); col < m->ncol; ++col) { buffer << gsl_vector_get(&row_view.vector, col); if (col < m->ncol - 1) buffer << '\t'; } buffer << '\n'; ++nwrites; if (nwrites == 10) { gzwrite(gzout, buffer.str().c_str(), buffer.str().size()); buffer.str(std::string()); nwrites = 0; } } if (nwrites) { gzwrite(gzout, buffer.str().c_str(), buffer.str().size()); } gzclose(gzout); }
static int multifit_wlinear_svd (const gsl_matrix * X, const gsl_vector * w, const gsl_vector * y, double tol, int balance, size_t * rank, gsl_vector * c, gsl_matrix * cov, double *chisq, gsl_multifit_linear_workspace * work) { if (X->size1 != y->size) { GSL_ERROR ("number of observations in y does not match rows of matrix X", GSL_EBADLEN); } else if (X->size2 != c->size) { GSL_ERROR ("number of parameters c does not match columns of matrix X", GSL_EBADLEN); } else if (w->size != y->size) { GSL_ERROR ("number of weights does not match number of observations", GSL_EBADLEN); } else if (cov->size1 != cov->size2) { GSL_ERROR ("covariance matrix is not square", GSL_ENOTSQR); } else if (c->size != cov->size1) { GSL_ERROR ("number of parameters does not match size of covariance matrix", GSL_EBADLEN); } else if (X->size1 != work->n || X->size2 != work->p) { GSL_ERROR ("size of workspace does not match size of observation matrix", GSL_EBADLEN); } else { const size_t n = X->size1; const size_t p = X->size2; size_t i, j, p_eff; gsl_matrix *A = work->A; gsl_matrix *Q = work->Q; gsl_matrix *QSI = work->QSI; gsl_vector *S = work->S; gsl_vector *t = work->t; gsl_vector *xt = work->xt; gsl_vector *D = work->D; /* Scale X, A = sqrt(w) X */ gsl_matrix_memcpy (A, X); for (i = 0; i < n; i++) { double wi = gsl_vector_get (w, i); if (wi < 0) wi = 0; { gsl_vector_view row = gsl_matrix_row (A, i); gsl_vector_scale (&row.vector, sqrt (wi)); } } /* Balance the columns of the matrix A if requested */ if (balance) { gsl_linalg_balance_columns (A, D); } else { gsl_vector_set_all (D, 1.0); } /* Decompose A into U S Q^T */ gsl_linalg_SV_decomp_mod (A, QSI, Q, S, xt); /* Solve sqrt(w) y = A c for c, by first computing t = sqrt(w) y */ for (i = 0; i < n; i++) { double wi = gsl_vector_get (w, i); double yi = gsl_vector_get (y, i); if (wi < 0) wi = 0; gsl_vector_set (t, i, sqrt (wi) * yi); } gsl_blas_dgemv (CblasTrans, 1.0, A, t, 0.0, xt); /* Scale the matrix Q, Q' = Q S^-1 */ gsl_matrix_memcpy (QSI, Q); { double alpha0 = gsl_vector_get (S, 0); p_eff = 0; for (j = 0; j < p; j++) { gsl_vector_view column = gsl_matrix_column (QSI, j); double alpha = gsl_vector_get (S, j); if (alpha <= tol * alpha0) { alpha = 0.0; } else { alpha = 1.0 / alpha; p_eff++; } gsl_vector_scale (&column.vector, alpha); } *rank = p_eff; } gsl_vector_set_zero (c); /* Solution */ gsl_blas_dgemv (CblasNoTrans, 1.0, QSI, xt, 0.0, c); /* Unscale the balancing factors */ gsl_vector_div (c, D); /* Compute chisq, from residual r = y - X c */ { double r2 = 0; for (i = 0; i < n; i++) { double yi = gsl_vector_get (y, i); double wi = gsl_vector_get (w, i); gsl_vector_const_view row = gsl_matrix_const_row (X, i); double y_est, ri; gsl_blas_ddot (&row.vector, c, &y_est); ri = yi - y_est; r2 += wi * ri * ri; } *chisq = r2; /* Form covariance matrix cov = (X^T W X)^-1 = (Q S^-1) (Q S^-1)^T */ for (i = 0; i < p; i++) { gsl_vector_view row_i = gsl_matrix_row (QSI, i); double d_i = gsl_vector_get (D, i); for (j = i; j < p; j++) { gsl_vector_view row_j = gsl_matrix_row (QSI, j); double d_j = gsl_vector_get (D, j); double s; gsl_blas_ddot (&row_i.vector, &row_j.vector, &s); gsl_matrix_set (cov, i, j, s / (d_i * d_j)); gsl_matrix_set (cov, j, i, s / (d_i * d_j)); } } } return GSL_SUCCESS; } }
int gsl_multifit_linear_svd (const gsl_matrix * X, const gsl_vector * y, double tol, size_t * rank, gsl_vector * c, gsl_matrix * cov, double *chisq, gsl_multifit_linear_workspace * work) { if (X->size1 != y->size) { GSL_ERROR ("number of observations in y does not match rows of matrix X", GSL_EBADLEN); } else if (X->size2 != c->size) { GSL_ERROR ("number of parameters c does not match columns of matrix X", GSL_EBADLEN); } else if (cov->size1 != cov->size2) { GSL_ERROR ("covariance matrix is not square", GSL_ENOTSQR); } else if (c->size != cov->size1) { GSL_ERROR ("number of parameters does not match size of covariance matrix", GSL_EBADLEN); } else if (X->size1 != work->n || X->size2 != work->p) { GSL_ERROR ("size of workspace does not match size of observation matrix", GSL_EBADLEN); } else if (tol <= 0) { GSL_ERROR ("tolerance must be positive", GSL_EINVAL); } else { const size_t n = X->size1; const size_t p = X->size2; size_t i, j, p_eff; gsl_matrix *A = work->A; gsl_matrix *Q = work->Q; gsl_matrix *QSI = work->QSI; gsl_vector *S = work->S; gsl_vector *xt = work->xt; gsl_vector *D = work->D; /* Copy X to workspace, A <= X */ gsl_matrix_memcpy (A, X); /* Balance the columns of the matrix A */ gsl_linalg_balance_columns (A, D); /* Decompose A into U S Q^T */ gsl_linalg_SV_decomp_mod (A, QSI, Q, S, xt); /* Solve y = A c for c */ gsl_blas_dgemv (CblasTrans, 1.0, A, y, 0.0, xt); /* Scale the matrix Q, Q' = Q S^-1 */ gsl_matrix_memcpy (QSI, Q); { double alpha0 = gsl_vector_get (S, 0); p_eff = 0; for (j = 0; j < p; j++) { gsl_vector_view column = gsl_matrix_column (QSI, j); double alpha = gsl_vector_get (S, j); if (alpha <= tol * alpha0) { alpha = 0.0; } else { alpha = 1.0 / alpha; p_eff++; } gsl_vector_scale (&column.vector, alpha); } *rank = p_eff; } gsl_vector_set_zero (c); gsl_blas_dgemv (CblasNoTrans, 1.0, QSI, xt, 0.0, c); /* Unscale the balancing factors */ gsl_vector_div (c, D); /* Compute chisq, from residual r = y - X c */ { double s2 = 0, r2 = 0; for (i = 0; i < n; i++) { double yi = gsl_vector_get (y, i); gsl_vector_const_view row = gsl_matrix_const_row (X, i); double y_est, ri; gsl_blas_ddot (&row.vector, c, &y_est); ri = yi - y_est; r2 += ri * ri; } s2 = r2 / (n - p_eff); /* p_eff == rank */ *chisq = r2; /* Form variance-covariance matrix cov = s2 * (Q S^-1) (Q S^-1)^T */ for (i = 0; i < p; i++) { gsl_vector_view row_i = gsl_matrix_row (QSI, i); double d_i = gsl_vector_get (D, i); for (j = i; j < p; j++) { gsl_vector_view row_j = gsl_matrix_row (QSI, j); double d_j = gsl_vector_get (D, j); double s; gsl_blas_ddot (&row_i.vector, &row_j.vector, &s); gsl_matrix_set (cov, i, j, s * s2 / (d_i * d_j)); gsl_matrix_set (cov, j, i, s * s2 / (d_i * d_j)); } } } return GSL_SUCCESS; } }
static void ncdf_write_file(const char *filename, const struct gibbs_problem *p) { int ncid; int time_dim_id; int cons_dim_id; int draw_dim_id; int dim_ids[3]; int data_var_id; int mthet_var_id; int msig_var_id; size_t i; int err; err = nc_create(filename, NC_CLOBBER | NC_CLASSIC_MODEL, &ncid); if (err != NC_NOERR) { fprintf(stderr, "%s\n", nc_strerror(err)); return; } nc_def_dim(ncid, "time", p->data->size1, &time_dim_id); nc_def_dim(ncid, "constituents", p->data->size2, &cons_dim_id); nc_def_dim(ncid, "draws", p->draws, &draw_dim_id); nc_put_att_long(ncid, NC_GLOBAL, "iterations", NC_INT, 1, (long *) &p->iterations); nc_put_att_long(ncid, NC_GLOBAL, "burn", NC_INT, 1, (long *) &p->burn); nc_put_att_long(ncid, NC_GLOBAL, "seed", NC_INT, 1, &p->seed); nc_def_var(ncid, "mean", NC_DOUBLE, 1, &cons_dim_id, &mthet_var_id); dim_ids[0] = cons_dim_id; dim_ids[1] = cons_dim_id; nc_def_var(ncid, "covariance", NC_DOUBLE, 2, dim_ids, &msig_var_id); dim_ids[2] = cons_dim_id; dim_ids[1] = time_dim_id; dim_ids[0] = draw_dim_id; nc_def_var(ncid, "data", NC_DOUBLE, 3, dim_ids, &data_var_id); nc_enddef(ncid); nc_put_var_double(ncid, mthet_var_id, p->mthet->data); if (p->msig->size2 == p->msig->tda) { nc_put_var_double(ncid, msig_var_id, p->msig->data); } else { for (i = 0; i < p->msig->size1; i++) { gsl_vector_const_view v = gsl_matrix_const_row(p->msig, i); size_t start[2] = {0, i}; size_t count[2] = {p->msig->size2, 1}; nc_put_vara_double(ncid, msig_var_id, start, count, v.vector.data); } } for (i = 0; i < p->draws; i++) { const gsl_matrix *m = p->ddata[i]; if (m->size2 == m->tda) { size_t start[3] = {i, 0, 0}; size_t count[3] = {1, m->size1, m->size2}; nc_put_vara_double(ncid, data_var_id, start, count, m->data); } else { size_t j; for (j = 0; j < m->size1; i++) { gsl_vector_const_view v = gsl_matrix_const_row(m, j); size_t start[3] = {0, j, i}; size_t count[3] = {m->size2, 1, 1}; nc_put_vara_double(ncid, data_var_id, start, count, v.vector.data); } } } nc_close(ncid); }
void c_ctr::learn_map_estimate(const c_data* users, const c_data* items, const c_corpus* c, const ctr_hyperparameter* param, const char* directory) { // init model parameters printf("\ninitializing the model ...\n"); init_model(param->ctr_run); // filename char name[500]; // start time time_t start, current; time(&start); int elapsed = 0; int iter = 0; double likelihood = -exp(50), likelihood_old; double converge = 1.0; /// create the state log file sprintf(name, "%s/state.log", directory); FILE* file = fopen(name, "w"); fprintf(file, "iter time likelihood converge\n"); /* alloc auxiliary variables */ gsl_matrix* XX = gsl_matrix_alloc(m_num_factors, m_num_factors); gsl_matrix* A = gsl_matrix_alloc(m_num_factors, m_num_factors); gsl_matrix* B = gsl_matrix_alloc(m_num_factors, m_num_factors); gsl_vector* x = gsl_vector_alloc(m_num_factors); gsl_matrix* phi = NULL; gsl_matrix* word_ss = NULL; gsl_matrix* log_beta = NULL; gsl_vector* gamma = NULL; if (param->ctr_run && param->theta_opt) { int max_len = c->max_corpus_length(); phi = gsl_matrix_calloc(max_len, m_num_factors); word_ss = gsl_matrix_calloc(m_num_factors, c->m_size_vocab); log_beta = gsl_matrix_calloc(m_num_factors, c->m_size_vocab); gsl_matrix_memcpy(log_beta, m_beta); mtx_log(log_beta); gamma = gsl_vector_alloc(m_num_factors); } /* tmp variables for indexes */ int i, j, m, n, l, k; int* item_ids; int* user_ids; double result; /// confidence parameters double a_minus_b = param->a - param->b; while ((iter < param->max_iter and converge > 1e-4 ) or iter < min_iter) { likelihood_old = likelihood; likelihood = 0.0; // update U gsl_matrix_set_zero(XX); for (j = 0; j < m_num_items; j ++) { m = items->m_vec_len[j]; if (m>0) { gsl_vector_const_view v = gsl_matrix_const_row(m_V, j); gsl_blas_dger(1.0, &v.vector, &v.vector, XX); } } gsl_matrix_scale(XX, param->b); // this is only for U gsl_matrix_add_diagonal(XX, param->lambda_u); for (i = 0; i < m_num_users; i ++) { item_ids = users->m_vec_data[i]; n = users->m_vec_len[i]; if (n > 0) { // this user has rated some articles gsl_matrix_memcpy(A, XX); gsl_vector_set_zero(x); for (l=0; l < n; l ++) { j = item_ids[l]; gsl_vector_const_view v = gsl_matrix_const_row(m_V, j); gsl_blas_dger(a_minus_b, &v.vector, &v.vector, A); gsl_blas_daxpy(param->a, &v.vector, x); } gsl_vector_view u = gsl_matrix_row(m_U, i); matrix_vector_solve(A, x, &(u.vector)); // update the likelihood gsl_blas_ddot(&u.vector, &u.vector, &result); likelihood += -0.5 * param->lambda_u * result; } } if (param->lda_regression) break; // one iteration is enough for lda-regression // update V if (param->ctr_run && param->theta_opt) gsl_matrix_set_zero(word_ss); gsl_matrix_set_zero(XX); for (i = 0; i < m_num_users; i ++) { n = users->m_vec_len[i]; if (n>0) { gsl_vector_const_view u = gsl_matrix_const_row(m_U, i); gsl_blas_dger(1.0, &u.vector, &u.vector, XX); } } gsl_matrix_scale(XX, param->b); for (j = 0; j < m_num_items; j ++) { gsl_vector_view v = gsl_matrix_row(m_V, j); gsl_vector_view theta_v = gsl_matrix_row(m_theta, j); user_ids = items->m_vec_data[j]; m = items->m_vec_len[j]; if (m>0) { // m > 0, some users have rated this article gsl_matrix_memcpy(A, XX); gsl_vector_set_zero(x); for (l = 0; l < m; l ++) { i = user_ids[l]; gsl_vector_const_view u = gsl_matrix_const_row(m_U, i); gsl_blas_dger(a_minus_b, &u.vector, &u.vector, A); gsl_blas_daxpy(param->a, &u.vector, x); } // adding the topic vector // even when ctr_run=0, m_theta=0 gsl_blas_daxpy(param->lambda_v, &theta_v.vector, x); gsl_matrix_memcpy(B, A); // save for computing likelihood // here different from U update gsl_matrix_add_diagonal(A, param->lambda_v); matrix_vector_solve(A, x, &v.vector); // update the likelihood for the relevant part likelihood += -0.5 * m * param->a; for (l = 0; l < m; l ++) { i = user_ids[l]; gsl_vector_const_view u = gsl_matrix_const_row(m_U, i); gsl_blas_ddot(&u.vector, &v.vector, &result); likelihood += param->a * result; } likelihood += -0.5 * mahalanobis_prod(B, &v.vector, &v.vector); // likelihood part of theta, even when theta=0, which is a // special case gsl_vector_memcpy(x, &v.vector); gsl_vector_sub(x, &theta_v.vector); gsl_blas_ddot(x, x, &result); likelihood += -0.5 * param->lambda_v * result; if (param->ctr_run && param->theta_opt) { const c_document* doc = c->m_docs[j]; likelihood += doc_inference(doc, &theta_v.vector, log_beta, phi, gamma, word_ss, true); optimize_simplex(gamma, &v.vector, param->lambda_v, &theta_v.vector); } } else { // m=0, this article has never been rated if (param->ctr_run && param->theta_opt) { const c_document* doc = c->m_docs[j]; doc_inference(doc, &theta_v.vector, log_beta, phi, gamma, word_ss, false); vnormalize(gamma); gsl_vector_memcpy(&theta_v.vector, gamma); } } } // update beta if needed if (param->ctr_run && param->theta_opt) { gsl_matrix_memcpy(m_beta, word_ss); for (k = 0; k < m_num_factors; k ++) { gsl_vector_view row = gsl_matrix_row(m_beta, k); vnormalize(&row.vector); } gsl_matrix_memcpy(log_beta, m_beta); mtx_log(log_beta); } time(¤t); elapsed = (int)difftime(current, start); iter++; converge = fabs((likelihood-likelihood_old)/likelihood_old); if (likelihood < likelihood_old) printf("likelihood is decreasing!\n"); fprintf(file, "%04d %06d %10.5f %.10f\n", iter, elapsed, likelihood, converge); fflush(file); printf("iter=%04d, time=%06d, likelihood=%.5f, converge=%.10f\n", iter, elapsed, likelihood, converge); // save intermediate results if (iter % param->save_lag == 0) { sprintf(name, "%s/%04d-U.dat", directory, iter); FILE * file_U = fopen(name, "w"); mtx_fprintf(file_U, m_U); fclose(file_U); sprintf(name, "%s/%04d-V.dat", directory, iter); FILE * file_V = fopen(name, "w"); mtx_fprintf(file_V, m_V); fclose(file_V); if (param->ctr_run) { sprintf(name, "%s/%04d-theta.dat", directory, iter); FILE * file_theta = fopen(name, "w"); mtx_fprintf(file_theta, m_theta); fclose(file_theta); sprintf(name, "%s/%04d-beta.dat", directory, iter); FILE * file_beta = fopen(name, "w"); mtx_fprintf(file_beta, m_beta); fclose(file_beta); } } } // save final results sprintf(name, "%s/final-U.dat", directory); FILE * file_U = fopen(name, "w"); mtx_fprintf(file_U, m_U); fclose(file_U); sprintf(name, "%s/final-V.dat", directory); FILE * file_V = fopen(name, "w"); mtx_fprintf(file_V, m_V); fclose(file_V); if (param->ctr_run) { sprintf(name, "%s/final-theta.dat", directory); FILE * file_theta = fopen(name, "w"); mtx_fprintf(file_theta, m_theta); fclose(file_theta); sprintf(name, "%s/final-beta.dat", directory); FILE * file_beta = fopen(name, "w"); mtx_fprintf(file_beta, m_beta); fclose(file_beta); } // free memory gsl_matrix_free(XX); gsl_matrix_free(A); gsl_matrix_free(B); gsl_vector_free(x); if (param->ctr_run && param->theta_opt) { gsl_matrix_free(phi); gsl_matrix_free(log_beta); gsl_matrix_free(word_ss); gsl_vector_free(gamma); } }
/* Performs LSE of consequent parameters for all branches in the network * * PARAMETERS: A ---> predictor variables matrix * y ---> expected results for this sample set (P in total) * * PRE: A != NULL * y != NULL * * POS: result != NULL && best fit consequent parameters vector returned * or * result == NULL */ static gsl_vector * anfis_fit_linear (const gsl_matrix *A, const gsl_vector *y, size_t P, size_t M) { gsl_matrix *S = NULL, /* Covariance matrix */ *Snew = NULL, *Saux = NULL; gsl_vector *X = NULL, /* Future best fit parameters */ *Xnew = NULL; unsigned int i = 0; double den = 0.0, factor = 0.0; assert (A != NULL); assert (y != NULL); /* Generating necessary workspace */ S = gsl_matrix_alloc (M,M); if (S == NULL) goto exit_point; Snew = gsl_matrix_calloc (M,M); if (Snew == NULL) goto exit_point; Saux = gsl_matrix_calloc (M,M); if (Saux == NULL) goto exit_point; Xnew = gsl_vector_alloc (M); if (Xnew == NULL) goto exit_point; X = gsl_vector_calloc (M); if (X == NULL) goto exit_point; /* S = γ*Id , where γ is a large number */ gsl_matrix_set_identity (S); gsl_matrix_scale (S, _gamma); /* Performing Least Square Estimation */ for (i=0 ; i < P ; i++) { /* Matrix A i-th row (row At_i+1 in Jang's paper) */ gsl_vector_const_view A_i = gsl_matrix_const_row (A, i); /* Snew = S(i) * A_i+1 * At_i+1 * S(i) */ calc_num (S, &(A_i.vector), Snew, Saux, Xnew, M); /* scale = At_i+1 * S(i) * A_i+1 */ den = calc_den (S, &(A_i.vector), Xnew); /* Snew = Snew / (1+scale) */ gsl_matrix_scale (Snew, 1.0 / (1.0+den)); /* S(i+1) = S(i) - Snew */ gsl_matrix_sub (S, Snew); /* factor = At_i+1 * X(i) */ gsl_blas_ddot (&(A_i.vector), X, &factor); /* factor = yt_i+1 - factor */ factor = gsl_vector_get (y, i) - factor; /* Xnew = S(i+1) * A_i+1 */ gsl_blas_dgemv (CblasNoTrans, 1.0, S, &(A_i.vector), 0.0, Xnew); /* Xnew = Xnew * factor */ gsl_vector_scale (Xnew, factor); /* X(i+1) = X(i) + Xnew */ gsl_vector_add (X, Xnew); } exit_point: if (S != NULL) { gsl_matrix_free (S); S = NULL;} if (Snew != NULL) { gsl_matrix_free (Snew); Snew = NULL;} if (Saux != NULL) { gsl_matrix_free (Saux); Saux = NULL;} if (Xnew != NULL) { gsl_vector_free (Xnew); Xnew = NULL;} return X; }
void fnIMIS(const size_t InitSamples, const size_t StepSamples, const size_t FinalResamples, const size_t MaxIter, const size_t NumParam, unsigned long int rng_seed, const char * runName) { // Declare and configure GSL RNG gsl_rng * rng; const gsl_rng_type * T; gsl_rng_env_setup(); T = gsl_rng_default; rng = gsl_rng_alloc (T); gsl_rng_set(rng, rng_seed); char strDiagnosticsFile[strlen(runName) + 15 +1]; char strResampleFile[strlen(runName) + 12 +1]; strcpy(strDiagnosticsFile, runName); strcat(strDiagnosticsFile, "Diagnostics.txt"); strcpy(strResampleFile, runName); strcat(strResampleFile, "Resample.txt"); FILE * diagnostics_file = fopen(strDiagnosticsFile, "w"); fprintf(diagnostics_file, "Seeded RNG: %zu\n", rng_seed); fprintf(diagnostics_file, "Running IMIS. InitSamples: %zu, StepSamples: %zu, FinalResamples %zu, MaxIter %zu\n", InitSamples, StepSamples, FinalResamples, MaxIter); // Setup IMIS arrays gsl_matrix * Xmat = gsl_matrix_alloc(InitSamples + StepSamples*MaxIter, NumParam); double * prior_all = (double*) malloc(sizeof(double) * (InitSamples + StepSamples*MaxIter)); double * likelihood_all = (double*) malloc(sizeof(double) * (InitSamples + StepSamples*MaxIter)); double * imp_weight_denom = (double*) malloc(sizeof(double) * (InitSamples + StepSamples*MaxIter)); // proportional to q(k) in stage 2c of Raftery & Bao double * gaussian_sum = (double*) calloc(InitSamples + StepSamples*MaxIter, sizeof(double)); // sum of mixture distribution for mode struct dst * distance = (struct dst *) malloc(sizeof(struct dst) * (InitSamples + StepSamples*MaxIter)); // Mahalanobis distance to most recent mode double * imp_weights = (double*) malloc(sizeof(double) * (InitSamples + StepSamples*MaxIter)); double * tmp_MVNpdf = (double*) malloc(sizeof(double) * (InitSamples + StepSamples*MaxIter)); gsl_matrix * nearestX = gsl_matrix_alloc(StepSamples, NumParam); double center_all[MaxIter][NumParam]; gsl_matrix * sigmaChol_all[MaxIter]; gsl_matrix * sigmaInv_all[MaxIter]; // Initial prior samples sample_prior(rng, InitSamples, Xmat); // Calculate prior covariance double prior_invCov_diag[NumParam]; /* The paper describing the algorithm uses the full prior covariance matrix. This follows the code in the IMIS R package and diagonalizes the prior covariance matrix to ensure invertibility. */ for(size_t i = 0; i < NumParam; i++){ gsl_vector_view tmpCol = gsl_matrix_subcolumn(Xmat, i, 0, InitSamples); prior_invCov_diag[i] = gsl_stats_variance(tmpCol.vector.data, tmpCol.vector.stride, InitSamples); prior_invCov_diag[i] = 1.0/prior_invCov_diag[i]; } // IMIS steps fprintf(diagnostics_file, "Step Var(w_i) MargLik Unique Max(w_i) ESS Time\n"); printf("Step Var(w_i) MargLik Unique Max(w_i) ESS Time\n"); time_t time1, time2; time(&time1); size_t imisStep = 0, numImisSamples; for(imisStep = 0; imisStep < MaxIter; imisStep++){ numImisSamples = (InitSamples + imisStep*StepSamples); // Evaluate prior and likelihood if(imisStep == 0){ // initial stage #pragma omp parallel for for(size_t i = 0; i < numImisSamples; i++){ gsl_vector_const_view theta = gsl_matrix_const_row(Xmat, i); prior_all[i] = prior(&theta.vector); likelihood_all[i] = likelihood(&theta.vector); } } else { // imisStep > 0 #pragma omp parallel for for(size_t i = InitSamples + (imisStep-1)*StepSamples; i < numImisSamples; i++){ gsl_vector_const_view theta = gsl_matrix_const_row(Xmat, i); prior_all[i] = prior(&theta.vector); likelihood_all[i] = likelihood(&theta.vector); } } // Determine importance weights, find current maximum, calculate monitoring criteria #pragma omp parallel for for(size_t i = 0; i < numImisSamples; i++){ imp_weight_denom[i] = (InitSamples*prior_all[i] + StepSamples*gaussian_sum[i])/(InitSamples + StepSamples * imisStep); imp_weights[i] = (prior_all[i] > 0)?likelihood_all[i]*prior_all[i]/imp_weight_denom[i]:0; } double sumWeights = 0.0; for(size_t i = 0; i < numImisSamples; i++){ sumWeights += imp_weights[i]; } double maxWeight = 0.0, varImpW = 0.0, entropy = 0.0, expectedUnique = 0.0, effSampSize = 0.0, margLik; size_t maxW_idx; #pragma omp parallel for reduction(+: varImpW, entropy, expectedUnique, effSampSize) for(size_t i = 0; i < numImisSamples; i++){ imp_weights[i] /= sumWeights; varImpW += pow(numImisSamples * imp_weights[i] - 1.0, 2.0); entropy += imp_weights[i] * log(imp_weights[i]); expectedUnique += (1.0 - pow((1.0 - imp_weights[i]), FinalResamples)); effSampSize += pow(imp_weights[i], 2.0); } for(size_t i = 0; i < numImisSamples; i++){ if(imp_weights[i] > maxWeight){ maxW_idx = i; maxWeight = imp_weights[i]; } } for(size_t i = 0; i < NumParam; i++) center_all[imisStep][i] = gsl_matrix_get(Xmat, maxW_idx, i); varImpW /= numImisSamples; entropy = -entropy / log(numImisSamples); effSampSize = 1.0/effSampSize; margLik = log(sumWeights/numImisSamples); fprintf(diagnostics_file, "%4zu %8.2f %8.2f %8.2f %8.2f %8.2f %8.2f\n", imisStep, varImpW, margLik, expectedUnique, maxWeight, effSampSize, difftime(time(&time2), time1)); printf("%4zu %8.2f %8.2f %8.2f %8.2f %8.2f %8.2f\n", imisStep, varImpW, margLik, expectedUnique, maxWeight, effSampSize, difftime(time(&time2), time1)); time1 = time2; // Check for convergence if(expectedUnique > FinalResamples*(1.0 - exp(-1.0))){ break; } // Calculate Mahalanobis distance to current mode GetMahalanobis_diag(Xmat, center_all[imisStep], prior_invCov_diag, numImisSamples, NumParam, distance); // Find StepSamples nearest points // (Note: this was a major bottleneck when InitSamples and StepResamples are large. qsort substantially outperformed GSL sort options.) qsort(distance, numImisSamples, sizeof(struct dst), cmp_dst); #pragma omp parallel for for(size_t i = 0; i < StepSamples; i++){ gsl_vector_const_view tmpX = gsl_matrix_const_row(Xmat, distance[i].idx); gsl_matrix_set_row(nearestX, i, &tmpX.vector); } // Calculate weighted covariance of nearestX // (a) Calculate weights for nearest points 1...StepSamples double weightsCov[StepSamples]; #pragma omp parallel for for(size_t i = 0; i < StepSamples; i++){ weightsCov[i] = 0.5*(imp_weights[distance[i].idx] + 1.0/numImisSamples); // cov_wt function will normalize the weights } // (b) Calculate weighted covariance sigmaChol_all[imisStep] = gsl_matrix_alloc(NumParam, NumParam); covariance_weighted(nearestX, weightsCov, StepSamples, center_all[imisStep], NumParam, sigmaChol_all[imisStep]); // (c) Do Cholesky decomposition and inverse of covariance matrix gsl_linalg_cholesky_decomp(sigmaChol_all[imisStep]); for(size_t j = 0; j < NumParam; j++) // Note: GSL outputs a symmetric matrix rather than lower tri, so have to set upper tri to zero for(size_t k = j+1; k < NumParam; k++) gsl_matrix_set(sigmaChol_all[imisStep], j, k, 0.0); sigmaInv_all[imisStep] = gsl_matrix_alloc(NumParam, NumParam); gsl_matrix_memcpy(sigmaInv_all[imisStep], sigmaChol_all[imisStep]); gsl_linalg_cholesky_invert(sigmaInv_all[imisStep]); // Sample new inputs gsl_matrix_view newSamples = gsl_matrix_submatrix(Xmat, numImisSamples, 0, StepSamples, NumParam); GenerateRandMVnorm(rng, StepSamples, center_all[imisStep], sigmaChol_all[imisStep], NumParam, &newSamples.matrix); // Evaluate sampling probability from mixture distribution // (a) For newly sampled points, sum over all previous centers for(size_t pastStep = 0; pastStep < imisStep; pastStep++){ GetMVNpdf(&newSamples.matrix, center_all[pastStep], sigmaInv_all[pastStep], sigmaChol_all[pastStep], StepSamples, NumParam, tmp_MVNpdf); #pragma omp parallel for for(size_t i = 0; i < StepSamples; i++) gaussian_sum[numImisSamples + i] += tmp_MVNpdf[i]; } // (b) For all points, add weight for most recent center gsl_matrix_const_view Xmat_curr = gsl_matrix_const_submatrix(Xmat, 0, 0, numImisSamples + StepSamples, NumParam); GetMVNpdf(&Xmat_curr.matrix, center_all[imisStep], sigmaInv_all[imisStep], sigmaChol_all[imisStep], numImisSamples + StepSamples, NumParam, tmp_MVNpdf); #pragma omp parallel for for(size_t i = 0; i < numImisSamples + StepSamples; i++) gaussian_sum[i] += tmp_MVNpdf[i]; } // loop over imisStep //// FINISHED IMIS ROUTINE fclose(diagnostics_file); // Resample posterior outputs int resampleIdx[FinalResamples]; walker_ProbSampleReplace(rng, numImisSamples, imp_weights, FinalResamples, resampleIdx); // Note: Random sampling routine used in R sample() function. // Print results FILE * resample_file = fopen(strResampleFile, "w"); for(size_t i = 0; i < FinalResamples; i++){ for(size_t j = 0; j < NumParam; j++) fprintf(resample_file, "%.15e\t", gsl_matrix_get(Xmat, resampleIdx[i], j)); gsl_vector_const_view theta = gsl_matrix_const_row(Xmat, resampleIdx[i]); fprintf(resample_file, "\n"); } fclose(resample_file); /* // This outputs Xmat (parameter matrix), centers, and covariance matrices to files for debugging FILE * Xmat_file = fopen("Xmat.txt", "w"); for(size_t i = 0; i < numImisSamples; i++){ for(size_t j = 0; j < NumParam; j++) fprintf(Xmat_file, "%.15e\t", gsl_matrix_get(Xmat, i, j)); fprintf(Xmat_file, "%e\t%e\t%e\t%e\t%e\t\n", prior_all[i], likelihood_all[i], imp_weights[i], gaussian_sum[i], distance[i]); } fclose(Xmat_file); FILE * centers_file = fopen("centers.txt", "w"); for(size_t i = 0; i < imisStep; i++){ for(size_t j = 0; j < NumParam; j++) fprintf(centers_file, "%f\t", center_all[i][j]); fprintf(centers_file, "\n"); } fclose(centers_file); FILE * sigmaInv_file = fopen("sigmaInv.txt", "w"); for(size_t i = 0; i < imisStep; i++){ for(size_t j = 0; j < NumParam; j++) for(size_t k = 0; k < NumParam; k++) fprintf(sigmaInv_file, "%f\t", gsl_matrix_get(sigmaInv_all[i], j, k)); fprintf(sigmaInv_file, "\n"); } fclose(sigmaInv_file); */ // free memory allocated by IMIS for(size_t i = 0; i < imisStep; i++){ gsl_matrix_free(sigmaChol_all[i]); gsl_matrix_free(sigmaInv_all[i]); } // release RNG gsl_rng_free(rng); gsl_matrix_free(Xmat); gsl_matrix_free(nearestX); free(prior_all); free(likelihood_all); free(imp_weight_denom); free(gaussian_sum); free(distance); free(imp_weights); free(tmp_MVNpdf); return; }