tnn_error tnn_module_bprop_linear(tnn_module *m){ tnn_error ret; gsl_matrix w; gsl_matrix dw; //Routine check if(m->t != TNN_MODULE_TYPE_LINEAR){ return TNN_ERROR_MODULE_MISTYPE; } if(m->input->valid != true || m->output->valid != true || m->w.valid != true){ return TNN_ERROR_STATE_INVALID; } //Transform the matrix TNN_MACRO_ERRORTEST(tnn_numeric_v2m(&m->w.x, &w, m->output->size, m->input->size),ret); TNN_MACRO_ERRORTEST(tnn_numeric_v2m(&m->w.dx, &dw, m->output->size, m->input->size), ret); //bprop to input TNN_MACRO_GSLTEST(gsl_blas_dgemv(CblasTrans, 1.0, &w, &m->output->dx, 0.0, &m->input->dx)); //bprop to dw gsl_matrix_set_zero(&dw); TNN_MACRO_GSLTEST(gsl_blas_dger(1.0, &m->output->dx, &m->input->x, &dw)); return TNN_ERROR_SUCCESS; }
static int cod_householder_mh(const double tau, const gsl_vector * v, gsl_matrix * A, gsl_vector * work) { if (tau == 0) { return GSL_SUCCESS; /* H = I */ } else { const size_t M = A->size1; const size_t N = A->size2; const size_t L = v->size; gsl_vector_view A1 = gsl_matrix_subcolumn(A, 0, 0, M); gsl_matrix_view C = gsl_matrix_submatrix(A, 0, N - L, M, L); /* work(1:M) = A(1:M,1) */ gsl_vector_memcpy(work, &A1.vector); /* work(1:M) = work(1:M) + A(1:M,M+1:N) * v(1:N-M) */ gsl_blas_dgemv(CblasNoTrans, 1.0, &C.matrix, v, 1.0, work); /* A(1:M,1) = A(1:M,1) - tau * work(1:M) */ gsl_blas_daxpy(-tau, work, &A1.vector); /* A(1:M,M+1:N) = A(1:M,M+1:N) - tau * work(1:M) * v(1:N-M)' */ gsl_blas_dger(-tau, work, v, &C.matrix); return GSL_SUCCESS; } }
/* Computes covariance using the renormalization above and adds it to an existing matrix. */ void MultinomialCovariance(double alpha, const gsl_vector* v, gsl_matrix* m) { double scale = gsl_blas_dsum(v); gsl_blas_dger(-alpha / scale, v, v, m); gsl_vector_view diag = gsl_matrix_diagonal(m); gsl_blas_daxpy(alpha, v, &diag.vector); }
// f = I(||X||_2 <= 1) void project_spectral_norm_ball(gsl_matrix *X) { gsl_matrix *V = gsl_matrix_alloc(X->size1, X->size2); gsl_vector *d = gsl_vector_alloc(X->size2); gsl_vector *tmp = gsl_vector_alloc(X->size2); gsl_linalg_SV_decomp(X, W, d, tmp); int i; double d_i; gsl_matrix_set_zero(X); for (i = 0; i < X->size2; i++) { d_i = fmax(1, gsl_vector_get(d, i)); gsl_vector_view U_i = gsl_matrix_column(X, i); gsl_vector_view V_i = gsl_matrix_column(V, i); gsl_blas_dger(d_i, &U_i.vector, &V_i.vector, X); } gsl_vector_free(d); gsl_matrix_free(V); gsl_vector_free(tmp); }
/* create a matrix of a given rank */ static int create_rank_matrix(const size_t rank, gsl_matrix * m, gsl_rng * r) { const size_t M = m->size1; const size_t N = m->size2; size_t i; gsl_vector *u = gsl_vector_alloc(M); gsl_vector *v = gsl_vector_alloc(N); gsl_matrix_set_zero(m); /* add several rank-1 matrices together */ for (i = 0; i < rank; ++i) { create_random_vector(u, r); create_random_vector(v, r); gsl_blas_dger(1.0, u, v, m); } gsl_vector_free(u); gsl_vector_free(v); return GSL_SUCCESS; }
void GICPOptimizer::fdf(const gsl_vector *x, void *params, double * f, gsl_vector *g) { std::cout << ">>> fdf" << std::endl; GICPOptData *opt_data = (GICPOptData *)params; double pt1[3]; double pt2[3]; double res[3]; // residual double temp[3]; // temp local vector double temp_mat[9]; // temp matrix used for accumulating the rotation gradient gsl_vector_view gsl_pt1 = gsl_vector_view_array(pt1, 3); gsl_vector_view gsl_pt2 = gsl_vector_view_array(pt2, 3); gsl_vector_view gsl_res = gsl_vector_view_array(res, 3); gsl_vector_view gsl_temp = gsl_vector_view_array(temp, 3); gsl_vector_view gsl_gradient_t = gsl_vector_subvector(g, 0, 3); // translation comp. of gradient gsl_vector_view gsl_gradient_r = gsl_vector_subvector(g, 3, 3); // rotation comp. of gradient gsl_matrix_view gsl_temp_mat_r = gsl_matrix_view_array(temp_mat, 3, 3); gsl_matrix_view gsl_M; dgc_transform_t t; double temp_double; // take the base transformation dgc_transform_copy(t, opt_data->base_t); // apply the current state apply_state(t, x); // zero all accumulator variables *f = 0; gsl_vector_set_zero(g); gsl_vector_set_zero(&gsl_temp.vector); gsl_matrix_set_zero(&gsl_temp_mat_r.matrix); for(int i = 0; i < opt_data->p1->Size(); i++) { int j = opt_data->nn_indecies[i]; if(j != -1) { // get point 1 pt1[0] = (*opt_data->p1)[i].x; pt1[1] = (*opt_data->p1)[i].y; pt1[2] = (*opt_data->p1)[i].z; // get point 2 pt2[0] = (*opt_data->p2)[j].x; pt2[1] = (*opt_data->p2)[j].y; pt2[2] = (*opt_data->p2)[j].z; //cout << "accessing " << i << " of " << opt_data->p1->Size() << ", " << opt_data->p2->Size() << endl; //get M-matrix gsl_M = gsl_matrix_view_array(&opt_data->M[i][0][0], 3, 3); print_gsl_matrix(&gsl_M.matrix, "M"); //transform point 1 dgc_transform_point(&pt1[0], &pt1[1], &pt1[2], t); std::cout << "pt1 " << pt1[0] << "," << pt1[1] << "," << pt1[2] << std::endl; res[0] = pt1[0] - pt2[0]; res[1] = pt1[1] - pt2[1]; res[2] = pt1[2] - pt2[2]; std::cout << "res " << res[0] << "," << res[1] << "," << res[2] << std::endl; // compute the transformed residual // temp := M*res //print_gsl_matrix(&gsl_M.matrix, "gsl_m"); gsl_blas_dsymv(CblasLower, 1., &gsl_M.matrix, &gsl_res.vector, 0., &gsl_temp.vector); print_gsl_vector(&gsl_temp.vector, "temp"); // compute M-norm of the residual // temp_double := res'*temp = temp'*M*res gsl_blas_ddot(&gsl_res.vector, &gsl_temp.vector, &temp_double); // accumulate total error: f += res'*M*res *f += temp_double/(double)opt_data->num_matches; std::cout << "f " << *f << std::endl; // accumulate translation gradient: // gsl_gradient_t += 2*M*res gsl_blas_dsymv(CblasLower, 2./(double)opt_data->num_matches, &gsl_M.matrix, &gsl_res.vector, 1., &gsl_gradient_t.vector); if(opt_data->solve_rotation) { // accumulate the rotation gradient matrix // get back the original untransformed point to compute the rotation gradient pt1[0] = (*opt_data->p1)[i].x; pt1[1] = (*opt_data->p1)[i].y; pt1[2] = (*opt_data->p1)[i].z; dgc_transform_point(&pt1[0], &pt1[1], &pt1[2], opt_data->base_t); // gsl_temp_mat_r += 2*(gsl_temp).(gsl_pt1)' [ = (2*M*residual).(gsl_pt1)' ] gsl_blas_dger(2./(double)opt_data->num_matches, &gsl_pt1.vector, &gsl_temp.vector, &gsl_temp_mat_r.matrix); } } } print_gsl_vector(g, "gradient"); // the above loop sets up the gradient with respect to the translation, and the matrix derivative w.r.t. the rotation matrix // this code sets up the matrix derivatives dR/dPhi, dR/dPsi, dR/dTheta. i.e. the derivatives of the whole rotation matrix with respect to the euler angles // note that this code assumes the XYZ order of euler angles, with the Z rotation corresponding to bearing. This means the Z angle is negative of what it would be // in the regular XYZ euler-angle convention. if(opt_data->solve_rotation) { // now use the d/dR matrix to compute the derivative with respect to euler angles and put it directly into g[3], g[4], g[5]; compute_dr(x, &gsl_temp_mat_r.matrix, g); } print_gsl_matrix(&gsl_temp_mat_r.matrix, "R"); print_gsl_vector(g, "gradient"); std::cout << "<<< fdf" << std::endl; }
/** * C++ version of gsl_blas_dger(). * @param alpha A constant * @param X A vector * @param Y A vector * @param A A matrix * @return Error code on failure */ int dger( double alpha, vector const& X, vector const& Y, matrix& A ){ return gsl_blas_dger( alpha, X.get(), Y.get(), A.get() ); }
void c_ctr::learn_map_estimate(const c_data* users, const c_data* items, const c_corpus* c, const ctr_hyperparameter* param, const char* directory) { // init model parameters printf("\ninitializing the model ...\n"); init_model(param->ctr_run); // filename char name[500]; // start time time_t start, current; time(&start); int elapsed = 0; int iter = 0; double likelihood = -exp(50), likelihood_old; double converge = 1.0; /// create the state log file sprintf(name, "%s/state.log", directory); FILE* file = fopen(name, "w"); fprintf(file, "iter time likelihood converge\n"); /* alloc auxiliary variables */ gsl_matrix* XX = gsl_matrix_alloc(m_num_factors, m_num_factors); gsl_matrix* A = gsl_matrix_alloc(m_num_factors, m_num_factors); gsl_matrix* B = gsl_matrix_alloc(m_num_factors, m_num_factors); gsl_vector* x = gsl_vector_alloc(m_num_factors); gsl_matrix* phi = NULL; gsl_matrix* word_ss = NULL; gsl_matrix* log_beta = NULL; gsl_vector* gamma = NULL; if (param->ctr_run && param->theta_opt) { int max_len = c->max_corpus_length(); phi = gsl_matrix_calloc(max_len, m_num_factors); word_ss = gsl_matrix_calloc(m_num_factors, c->m_size_vocab); log_beta = gsl_matrix_calloc(m_num_factors, c->m_size_vocab); gsl_matrix_memcpy(log_beta, m_beta); mtx_log(log_beta); gamma = gsl_vector_alloc(m_num_factors); } /* tmp variables for indexes */ int i, j, m, n, l, k; int* item_ids; int* user_ids; double result; /// confidence parameters double a_minus_b = param->a - param->b; while ((iter < param->max_iter and converge > 1e-4 ) or iter < min_iter) { likelihood_old = likelihood; likelihood = 0.0; // update U gsl_matrix_set_zero(XX); for (j = 0; j < m_num_items; j ++) { m = items->m_vec_len[j]; if (m>0) { gsl_vector_const_view v = gsl_matrix_const_row(m_V, j); gsl_blas_dger(1.0, &v.vector, &v.vector, XX); } } gsl_matrix_scale(XX, param->b); // this is only for U gsl_matrix_add_diagonal(XX, param->lambda_u); for (i = 0; i < m_num_users; i ++) { item_ids = users->m_vec_data[i]; n = users->m_vec_len[i]; if (n > 0) { // this user has rated some articles gsl_matrix_memcpy(A, XX); gsl_vector_set_zero(x); for (l=0; l < n; l ++) { j = item_ids[l]; gsl_vector_const_view v = gsl_matrix_const_row(m_V, j); gsl_blas_dger(a_minus_b, &v.vector, &v.vector, A); gsl_blas_daxpy(param->a, &v.vector, x); } gsl_vector_view u = gsl_matrix_row(m_U, i); matrix_vector_solve(A, x, &(u.vector)); // update the likelihood gsl_blas_ddot(&u.vector, &u.vector, &result); likelihood += -0.5 * param->lambda_u * result; } } if (param->lda_regression) break; // one iteration is enough for lda-regression // update V if (param->ctr_run && param->theta_opt) gsl_matrix_set_zero(word_ss); gsl_matrix_set_zero(XX); for (i = 0; i < m_num_users; i ++) { n = users->m_vec_len[i]; if (n>0) { gsl_vector_const_view u = gsl_matrix_const_row(m_U, i); gsl_blas_dger(1.0, &u.vector, &u.vector, XX); } } gsl_matrix_scale(XX, param->b); for (j = 0; j < m_num_items; j ++) { gsl_vector_view v = gsl_matrix_row(m_V, j); gsl_vector_view theta_v = gsl_matrix_row(m_theta, j); user_ids = items->m_vec_data[j]; m = items->m_vec_len[j]; if (m>0) { // m > 0, some users have rated this article gsl_matrix_memcpy(A, XX); gsl_vector_set_zero(x); for (l = 0; l < m; l ++) { i = user_ids[l]; gsl_vector_const_view u = gsl_matrix_const_row(m_U, i); gsl_blas_dger(a_minus_b, &u.vector, &u.vector, A); gsl_blas_daxpy(param->a, &u.vector, x); } // adding the topic vector // even when ctr_run=0, m_theta=0 gsl_blas_daxpy(param->lambda_v, &theta_v.vector, x); gsl_matrix_memcpy(B, A); // save for computing likelihood // here different from U update gsl_matrix_add_diagonal(A, param->lambda_v); matrix_vector_solve(A, x, &v.vector); // update the likelihood for the relevant part likelihood += -0.5 * m * param->a; for (l = 0; l < m; l ++) { i = user_ids[l]; gsl_vector_const_view u = gsl_matrix_const_row(m_U, i); gsl_blas_ddot(&u.vector, &v.vector, &result); likelihood += param->a * result; } likelihood += -0.5 * mahalanobis_prod(B, &v.vector, &v.vector); // likelihood part of theta, even when theta=0, which is a // special case gsl_vector_memcpy(x, &v.vector); gsl_vector_sub(x, &theta_v.vector); gsl_blas_ddot(x, x, &result); likelihood += -0.5 * param->lambda_v * result; if (param->ctr_run && param->theta_opt) { const c_document* doc = c->m_docs[j]; likelihood += doc_inference(doc, &theta_v.vector, log_beta, phi, gamma, word_ss, true); optimize_simplex(gamma, &v.vector, param->lambda_v, &theta_v.vector); } } else { // m=0, this article has never been rated if (param->ctr_run && param->theta_opt) { const c_document* doc = c->m_docs[j]; doc_inference(doc, &theta_v.vector, log_beta, phi, gamma, word_ss, false); vnormalize(gamma); gsl_vector_memcpy(&theta_v.vector, gamma); } } } // update beta if needed if (param->ctr_run && param->theta_opt) { gsl_matrix_memcpy(m_beta, word_ss); for (k = 0; k < m_num_factors; k ++) { gsl_vector_view row = gsl_matrix_row(m_beta, k); vnormalize(&row.vector); } gsl_matrix_memcpy(log_beta, m_beta); mtx_log(log_beta); } time(¤t); elapsed = (int)difftime(current, start); iter++; converge = fabs((likelihood-likelihood_old)/likelihood_old); if (likelihood < likelihood_old) printf("likelihood is decreasing!\n"); fprintf(file, "%04d %06d %10.5f %.10f\n", iter, elapsed, likelihood, converge); fflush(file); printf("iter=%04d, time=%06d, likelihood=%.5f, converge=%.10f\n", iter, elapsed, likelihood, converge); // save intermediate results if (iter % param->save_lag == 0) { sprintf(name, "%s/%04d-U.dat", directory, iter); FILE * file_U = fopen(name, "w"); mtx_fprintf(file_U, m_U); fclose(file_U); sprintf(name, "%s/%04d-V.dat", directory, iter); FILE * file_V = fopen(name, "w"); mtx_fprintf(file_V, m_V); fclose(file_V); if (param->ctr_run) { sprintf(name, "%s/%04d-theta.dat", directory, iter); FILE * file_theta = fopen(name, "w"); mtx_fprintf(file_theta, m_theta); fclose(file_theta); sprintf(name, "%s/%04d-beta.dat", directory, iter); FILE * file_beta = fopen(name, "w"); mtx_fprintf(file_beta, m_beta); fclose(file_beta); } } } // save final results sprintf(name, "%s/final-U.dat", directory); FILE * file_U = fopen(name, "w"); mtx_fprintf(file_U, m_U); fclose(file_U); sprintf(name, "%s/final-V.dat", directory); FILE * file_V = fopen(name, "w"); mtx_fprintf(file_V, m_V); fclose(file_V); if (param->ctr_run) { sprintf(name, "%s/final-theta.dat", directory); FILE * file_theta = fopen(name, "w"); mtx_fprintf(file_theta, m_theta); fclose(file_theta); sprintf(name, "%s/final-beta.dat", directory); FILE * file_beta = fopen(name, "w"); mtx_fprintf(file_beta, m_beta); fclose(file_beta); } // free memory gsl_matrix_free(XX); gsl_matrix_free(A); gsl_matrix_free(B); gsl_vector_free(x); if (param->ctr_run && param->theta_opt) { gsl_matrix_free(phi); gsl_matrix_free(log_beta); gsl_matrix_free(word_ss); gsl_vector_free(gamma); } }