inline uvec find_finite(const BaseCube<typename T1::elem_type,T1>& X) { arma_extra_debug_sigprint(); typedef typename T1::elem_type eT; const unwrap_cube<T1> tmp(X.get_ref()); const Mat<eT> R( const_cast< eT* >(tmp.M.memptr()), tmp.M.n_elem, 1, false ); return find_finite(R); }
double mse(const mat & A, const mat & W, const mat & H, const mat & W1, const mat & H2) { // compute mean square error of A and fixed A const int k = W.n_cols - H2.n_cols; mat Adiff = A; Adiff -= W.cols(0, k-1) * H.cols(0, k-1).t(); if (!W1.empty()) Adiff -= W1*H.cols(k, H.n_cols-1).t(); if (!H2.empty()) Adiff -= W.cols(k, W.n_cols-1)*H2.t(); if (A.is_finite()) return mean(mean(square(Adiff))); else return mean(square(Adiff.elem(find_finite(Adiff)))); }
void GRASTA_training(const mat &D, mat &Uhat, struct STATUS &status, const struct GRASTA_OPT &options, mat &W, mat &Outlier ) { int rows, cols; rows = D.n_rows; cols = D.n_cols; if ( !status.init ){ status.init = 1; status.curr_iter = 0; status.last_mu = options.MIN_MU; status.level = 0; status.step_scale = 0.0; status.last_w = zeros(options.RANK, 1); status.last_gamma = zeros(options.DIM, 1); if (!Uhat.is_finite()){ Uhat = orth(randn(options.DIM, options.RANK)); } } Outlier = zeros<mat>(rows, cols); W = zeros<mat>(options.RANK, cols); mat U_Omega, y_Omega, y_t, s, w, dual, gt; uvec idx, col_order; ADMM_OPT admm_opt; double SCALE, t, rel; bool bRet; admm_opt.lambda = options.lambda; //if (!options.QUIET) int maxIter = options.maxCycles * cols; // 20 passes through the data set status.hist_rel.reserve( maxIter); // Order of examples to process arma_rng::set_seed_random(); col_order = conv_to<uvec>::from(floor(cols*randu(maxIter, 1))); for (int k=0; k<maxIter; k++){ int iCol = col_order(k); //PRINTF("%d / %d\n",iCol, cols); y_t = D.col(iCol); idx = find_finite(y_t); y_Omega = y_t.elem(idx); SCALE = norm(y_Omega); y_Omega = y_Omega/SCALE; // the following for-loop is for U_Omega = U(idx,:) in matlab U_Omega = zeros<mat>(idx.n_elem, Uhat.n_cols); for (int i=0; i<idx.n_elem; i++) U_Omega.row(i) = Uhat.row(idx(i)); // solve L-1 regression admm_opt.MAX_ITER = options.MAX_ITER; if (options.NORM_TYPE == L1_NORM) bRet = ADMM_L1(U_Omega, y_Omega, admm_opt, s, w, dual); else if (options.NORM_TYPE == L21_NORM){ w = solve(U_Omega, y_Omega); s = y_Omega - U_Omega*w; dual = -s/norm(s, 2); } else { PRINTF("Error: norm type does not support!\n"); return; } vec tmp_col = zeros<vec>(rows); tmp_col.elem(idx) = SCALE * s; Outlier.col(iCol) = tmp_col; W.col(iCol) = SCALE * w; // take gradient step over Grassmannian t = GRASTA_update(Uhat, status, w, dual, idx, options); if (!options.QUIET){ rel = subspace(options.GT_mat, Uhat); status.hist_rel.push_back(rel); if (rel < options.TOL){ PRINTF("%d/%d: subspace angle %.2e\n",k,maxIter, rel); break; } } if (k % cols ==0){ if (!options.QUIET) PRINTF("Pass %d/%d: step-size %.2e, level %d, last mu %.2f\n", k % cols, options.maxCycles, t, status.level, status.last_mu); } if (status.level >= options.convergeLevel){ // Must cycling around the dataset twice to get the correct regression weight W if (!options.QUIET) PRINTF("Converge at level %d, last mu %.2f\n",status.level,status.last_mu); break; } } }
//[[Rcpp::export]] Rcpp::List nnmf(const mat & A, const unsigned int k, mat W, mat H, umat Wm, umat Hm, const vec & alpha, const vec & beta, const unsigned int max_iter, const double rel_tol, const int n_threads, const int verbose, const bool show_warning, const unsigned int inner_max_iter, const double inner_rel_tol, const int method, unsigned int trace) { /****************************************************************************************************** * Non-negative Matrix Factorization(NNMF) using alternating scheme * ---------------------------------------------------------------- * Description: * Decompose matrix A such that * A = W H * Arguments: * A : Matrix to be decomposed * W, H : Initial matrices of W and H, where ncol(W) = nrow(H) = k. # of rows/columns of W/H could be 0 * Wm, Hm : Masks of W and H, s.t. masked entries are no-updated and fixed to initial values * alpha : [L2, angle, L1] regularization on W (non-masked entries) * beta : [L2, angle, L1] regularization on H (non-masked entries) * max_iter : Maximum number of iteration * rel_tol : Relative tolerance between two successive iterations, = |e2-e1|/avg(e1, e2) * n_threads : Number of threads (openMP) * verbose : Either 0 = no any tracking, 1 == progression bar, 2 == print iteration info * show_warning : If to show warning if targeted `tol` is not reached * inner_max_iter : Maximum number of iterations passed to each inner W or H matrix updating loop * inner_rel_tol : Relative tolerance passed to inner W or H matrix updating loop, = |e2-e1|/avg(e1, e2) * method : Integer of 1, 2, 3 or 4, which encodes methods * : 1 = sequential coordinate-wise minimization using square loss * : 2 = Lee's multiplicative update with square loss, which is re-scaled gradient descent * : 3 = sequentially quadratic approximated minimization with KL-divergence * : 4 = Lee's multiplicative update with KL-divergence, which is re-scaled gradient descent * trace : A positive integer, error will be checked very 'trace' iterations. Computing WH can be very expansive, * : so one may not want to check error A-WH every single iteration * Return: * A list (Rcpp::List) of * W, H : resulting W and H matrices * mse_error : a vector of mean square error (divided by number of non-missings) * mkl_error : a vector (length = number of iterations) of mean KL-distance * target_error : a vector of loss (0.5*mse or mkl), plus constraints * average_epoch : a vector of average epochs (one complete swap over W and H) * Author: * Eric Xihui Lin <*****@*****.**> * Version: * 2015-12-11 ******************************************************************************************************/ unsigned int n = A.n_rows; unsigned int m = A.n_cols; //int k = H.n_rows; // decomposition rank k unsigned int N_non_missing = n*m; if (trace < 1) trace = 1; unsigned int err_len = (unsigned int)std::ceil(double(max_iter)/double(trace)) + 1; vec mse_err(err_len), mkl_err(err_len), terr(err_len), ave_epoch(err_len); // check progression bool show_progress = false; if (verbose == 1) show_progress = true; Progress prgrss(max_iter, show_progress); double rel_err = rel_tol + 1; double terr_last = 1e99; uvec non_missing; bool any_missing = !A.is_finite(); if (any_missing) { non_missing = find_finite(A); N_non_missing = non_missing.n_elem; mkl_err.fill(mean((A.elem(non_missing)+TINY_NUM) % log(A.elem(non_missing)+TINY_NUM) - A.elem(non_missing))); } else mkl_err.fill(mean(mean((A+TINY_NUM) % log(A+TINY_NUM) - A))); // fixed part in KL-dist, mean(A log(A) - A) if (Wm.empty()) Wm.resize(0, n); else inplace_trans(Wm); if (Hm.empty()) Hm.resize(0, m); if (W.empty()) { W.randu(k, n); W *= 0.01; if (!Wm.empty()) W.elem(find(Wm > 0)).fill(0.0); } else inplace_trans(W); if (H.empty()) { H.randu(k, m); H *= 0.01; if (!Hm.empty()) H.elem(find(Hm > 0)).fill(0.0); } if (verbose == 2) { Rprintf("\n%10s | %10s | %10s | %10s | %10s\n", "Iteration", "MSE", "MKL", "Target", "Rel. Err."); Rprintf("--------------------------------------------------------------\n"); } int total_raw_iter = 0; unsigned int i = 0; unsigned int i_e = 0; // index for error checking for(; i < max_iter && std::abs(rel_err) > rel_tol; i++) { Rcpp::checkUserInterrupt(); prgrss.increment(); if (any_missing) { // update W total_raw_iter += update_with_missing(W, H, A.t(), Wm, alpha, inner_max_iter, inner_rel_tol, n_threads, method); // update H total_raw_iter += update_with_missing(H, W, A, Hm, beta, inner_max_iter, inner_rel_tol, n_threads, method); if (i % trace == 0) { const mat & Ahat = W.t()*H; mse_err(i_e) = mean(square((A - Ahat).eval().elem(non_missing))); mkl_err(i_e) += mean((-(A+TINY_NUM) % log(Ahat+TINY_NUM) + Ahat).eval().elem(non_missing)); } } else { // update W total_raw_iter += update(W, H, A.t(), Wm, alpha, inner_max_iter, inner_rel_tol, n_threads, method); // update H total_raw_iter += update(H, W, A, Hm, beta, inner_max_iter, inner_rel_tol, n_threads, method); if (i % trace == 0) { const mat & Ahat = W.t()*H; mse_err(i_e) = mean(mean(square((A - Ahat)))); mkl_err(i_e) += mean(mean(-(A+TINY_NUM) % log(Ahat+TINY_NUM) + Ahat)); } } if (i % trace == 0) { ave_epoch(i_e) = double(total_raw_iter)/(n+m); if (method < 3) // mse based terr(i_e) = 0.5*mse_err(i_e); else // KL based terr(i_e) = mkl_err(i_e); add_penalty(i_e, terr, W, H, N_non_missing, alpha, beta); rel_err = 2*(terr_last - terr(i_e)) / (terr_last + terr(i_e) + TINY_NUM ); terr_last = terr(i_e); if (verbose == 2) Rprintf("%10d | %10.4f | %10.4f | %10.4f | %10.g\n", i+1, mse_err(i_e), mkl_err(i_e), terr(i_e), rel_err); total_raw_iter = 0; // reset to 0 ++i_e; } } // compute error of the last iteration if ((i-1) % trace != 0) { if (any_missing) { const mat & Ahat = W.t()*H; mse_err(i_e) = mean(square((A - Ahat).eval().elem(non_missing))); mkl_err(i_e) += mean((-(A+TINY_NUM) % log(Ahat+TINY_NUM) + Ahat).eval().elem(non_missing)); } else { const mat & Ahat = W.t()*H; mse_err(i_e) = mean(mean(square((A - Ahat)))); mkl_err(i_e) += mean(mean(-(A+TINY_NUM) % log(Ahat+TINY_NUM) + Ahat)); } ave_epoch(i_e) = double(total_raw_iter)/(n+m); if (method < 3) // mse based terr(i_e) = 0.5*mse_err(i_e); else // KL based terr(i_e) = mkl_err(i_e); add_penalty(i_e, terr, W, H, N_non_missing, alpha, beta); rel_err = 2*(terr_last - terr(i_e)) / (terr_last + terr(i_e) + TINY_NUM ); terr_last = terr(i_e); if (verbose == 2) Rprintf("%10d | %10.4f | %10.4f | %10.4f | %10.g\n", i+1, mse_err(i_e), mkl_err(i_e), terr(i_e), rel_err); ++i_e; } if (verbose == 2) { Rprintf("--------------------------------------------------------------\n"); Rprintf("%10s | %10s | %10s | %10s | %10s\n\n", "Iteration", "MSE", "MKL", "Target", "Rel. Err."); } if (i_e < err_len) { mse_err.resize(i_e); mkl_err.resize(i_e); terr.resize(i_e); ave_epoch.resize(i_e); } if (show_warning && rel_err > rel_tol) Rcpp::warning("Target tolerance not reached. Try a larger max.iter."); return Rcpp::List::create( Rcpp::Named("W") = W.t(), Rcpp::Named("H") = H, Rcpp::Named("mse_error") = mse_err, Rcpp::Named("mkl_error") = mkl_err, Rcpp::Named("target_error") = terr, Rcpp::Named("average_epoch") = ave_epoch, Rcpp::Named("n_iteration") = i ); }
mat nnls_solver_with_missing(const mat & A, const mat & W, const mat & W1, const mat & H2, const umat & mask, const double & eta, const double & beta, int max_iter, double rel_tol, int n_threads) { // A = [W, W1, W2] [H, H1, H2]^T. // Where A may have missing values // Note that here in the input W = [W, W2] // compute x = [H, H1]^T given W, W2 // A0 = W2*H2 is empty when H2 is empty (no partial info in H) // Return: x = [H, H1] int n = A.n_rows, m = A.n_cols; int k = W.n_cols - H2.n_cols; int kW = W1.n_cols; int nH = k+kW; mat x(nH, m, fill::zeros); if (n_threads < 0) n_threads = 0; bool is_masked = !mask.empty(); #pragma omp parallel for num_threads(n_threads) schedule(dynamic) for (int j = 0; j < m; j++) { // break if all entries of col_j are masked if (is_masked && arma::all(mask.col(j))) continue; uvec non_missing = find_finite(A.col(j)); mat WtW(nH, nH); // WtW update_WtW(WtW, W.rows(non_missing), W1.rows(non_missing), H2); if (beta > 0) WtW += beta; if (eta > 0) WtW.diag() += eta; mat mu(nH, 1); // -WtA uvec jv(1); jv(0) = j; //non_missing.t().print("non_missing = "); //std::cout << "1.1" << std::endl; if (H2.empty()) update_WtA(mu, W.rows(non_missing), W1.rows(non_missing), H2, A.submat(non_missing, jv)); else update_WtA(mu, W.rows(non_missing), W1.rows(non_missing), H2.rows(j, j), A.submat(non_missing, jv)); //std::cout << "1.5" << std::endl; vec x0(nH); double tmp; int i = 0; double err1, err2 = 9999; do { x0 = x.col(j); err1 = err2; err2 = 0; for (int l = 0; l < nH; l++) { if (is_masked && mask(l,j) > 0) continue; tmp = x(l,j) - mu(l,0) / WtW(l,l); if (tmp < 0) tmp = 0; if (tmp != x(l,j)) { mu.col(0) += (tmp - x(l,j)) * WtW.col(l); } x(l,j) = tmp; tmp = std::abs(x(l,j) - x0(l)); if (tmp > err2) err2 = tmp; } } while(++i < max_iter && std::abs(err1 - err2) / (err1 + 1e-9) > rel_tol); } return x; }
double ung_ssm::bsf_filter(const unsigned int nsim, arma::cube& alpha, arma::mat& weights, arma::umat& indices) { arma::uvec nonzero = arma::find(P1.diag() > 0); arma::mat L_P1(m, m, arma::fill::zeros); if (nonzero.n_elem > 0) { L_P1.submat(nonzero, nonzero) = arma::chol(P1.submat(nonzero, nonzero), "lower"); } std::normal_distribution<> normal(0.0, 1.0); for (unsigned int i = 0; i < nsim; i++) { arma::vec um(m); for(unsigned int j = 0; j < m; j++) { um(j) = normal(engine); } alpha.slice(i).col(0) = a1 + L_P1 * um; } std::uniform_real_distribution<> unif(0.0, 1.0); arma::vec normalized_weights(nsim); double loglik = 0.0; if(arma::is_finite(y(0))) { weights.col(0) = log_obs_density(0, alpha); double max_weight = weights.col(0).max(); weights.col(0) = arma::exp(weights.col(0) - max_weight); double sum_weights = arma::accu(weights.col(0)); if(sum_weights > 0.0){ normalized_weights = weights.col(0) / sum_weights; } else { return -std::numeric_limits<double>::infinity(); } loglik = max_weight + std::log(sum_weights / nsim); } else { weights.col(0).ones(); normalized_weights.fill(1.0 / nsim); } for (unsigned int t = 0; t < n; t++) { arma::vec r(nsim); for (unsigned int i = 0; i < nsim; i++) { r(i) = unif(engine); } indices.col(t) = stratified_sample(normalized_weights, r, nsim); arma::mat alphatmp(m, nsim); for (unsigned int i = 0; i < nsim; i++) { alphatmp.col(i) = alpha.slice(indices(i, t)).col(t); } for (unsigned int i = 0; i < nsim; i++) { arma::vec uk(k); for(unsigned int j = 0; j < k; j++) { uk(j) = normal(engine); } alpha.slice(i).col(t + 1) = C.col(t * Ctv) + T.slice(t * Ttv) * alphatmp.col(i) + R.slice(t * Rtv) * uk; } if ((t < (n - 1)) && arma::is_finite(y(t + 1))) { weights.col(t + 1) = log_obs_density(t + 1, alpha); double max_weight = weights.col(t + 1).max(); weights.col(t + 1) = arma::exp(weights.col(t + 1) - max_weight); double sum_weights = arma::accu(weights.col(t + 1)); if(sum_weights > 0.0){ normalized_weights = weights.col(t + 1) / sum_weights; } else { return -std::numeric_limits<double>::infinity(); } loglik += max_weight + std::log(sum_weights / nsim); } else { weights.col(t + 1).ones(); normalized_weights.fill(1.0/nsim); } } // constant part of the log-likelihood switch(distribution) { case 0 : loglik += arma::uvec(arma::find_finite(y)).n_elem * norm_log_const(phi); break; case 1 : { arma::uvec finite_y(find_finite(y)); loglik += poisson_log_const(y(finite_y), u(finite_y)); } break; case 2 : { arma::uvec finite_y(find_finite(y)); loglik += binomial_log_const(y(finite_y), u(finite_y)); } break; case 3 : { arma::uvec finite_y(find_finite(y)); loglik += negbin_log_const(y(finite_y), u(finite_y), phi); } break; } return loglik; }