Ejemplo n.º 1
0
inline
uvec
find_finite(const BaseCube<typename T1::elem_type,T1>& X)
  {
  arma_extra_debug_sigprint();
  
  typedef typename T1::elem_type eT;
  
  const unwrap_cube<T1> tmp(X.get_ref());
  
  const Mat<eT> R( const_cast< eT* >(tmp.M.memptr()), tmp.M.n_elem, 1, false );
  
  return find_finite(R);
  }
Ejemplo n.º 2
0
double mse(const mat & A, const mat & W, const mat & H, const mat & W1, const mat & H2)
{
	// compute mean square error of A and fixed A
	const int k = W.n_cols - H2.n_cols;

	mat Adiff = A;
	Adiff -= W.cols(0, k-1) * H.cols(0, k-1).t();
	if (!W1.empty())
		Adiff -= W1*H.cols(k, H.n_cols-1).t();
	if (!H2.empty())
		Adiff -= W.cols(k, W.n_cols-1)*H2.t();

	if (A.is_finite())
		return mean(mean(square(Adiff)));
	else
		return mean(square(Adiff.elem(find_finite(Adiff))));
}
Ejemplo n.º 3
0
void GRASTA_training(const mat &D,
        mat &Uhat,
        struct STATUS &status,
        const struct GRASTA_OPT &options,
        mat &W,
        mat &Outlier
        )
{
    int rows, cols;
    rows = D.n_rows; cols = D.n_cols;
    
    if ( !status.init ){
        status.init         = 1;
        status.curr_iter    = 0;
        
        status.last_mu      = options.MIN_MU;
        status.level        = 0;
        status.step_scale   = 0.0;
        status.last_w       = zeros(options.RANK, 1);
        status.last_gamma   = zeros(options.DIM, 1);        
        
        if (!Uhat.is_finite()){
            Uhat = orth(randn(options.DIM, options.RANK));        
        }
    }
    
    Outlier      = zeros<mat>(rows, cols);
    W            = zeros<mat>(options.RANK, cols);
    
    mat         U_Omega, y_Omega, y_t, s, w, dual, gt;
    uvec        idx, col_order;
    ADMM_OPT    admm_opt;
    double      SCALE, t, rel;
    bool        bRet;
    
    admm_opt.lambda = options.lambda;
    //if (!options.QUIET) 
    int maxIter = options.maxCycles * cols; // 20 passes through the data set
    status.hist_rel.reserve( maxIter);
                
    // Order of examples to process
    arma_rng::set_seed_random();
    col_order = conv_to<uvec>::from(floor(cols*randu(maxIter, 1)));
    
    for (int k=0; k<maxIter; k++){
        int iCol = col_order(k);
        //PRINTF("%d / %d\n",iCol, cols);
        
        y_t     = D.col(iCol);
        idx     = find_finite(y_t);
                
        y_Omega = y_t.elem(idx);
        
        SCALE = norm(y_Omega);
        y_Omega = y_Omega/SCALE;
        
        // the following for-loop is for U_Omega = U(idx,:) in matlab
        U_Omega = zeros<mat>(idx.n_elem, Uhat.n_cols);
        for (int i=0; i<idx.n_elem; i++)
            U_Omega.row(i) = Uhat.row(idx(i));
        
        // solve L-1 regression
        admm_opt.MAX_ITER = options.MAX_ITER;
        
        if (options.NORM_TYPE == L1_NORM)
            bRet = ADMM_L1(U_Omega, y_Omega, admm_opt, s, w, dual);
        else if (options.NORM_TYPE == L21_NORM){
            w = solve(U_Omega, y_Omega);
            s = y_Omega - U_Omega*w;
            dual = -s/norm(s, 2);
        }
        else {
            PRINTF("Error: norm type does not support!\n");
            return;
        }
        
        vec tmp_col = zeros<vec>(rows);
        tmp_col.elem(idx) = SCALE * s;
        
        Outlier.col(iCol) = tmp_col;
        
        W.col(iCol) =  SCALE * w;

        // take gradient step over Grassmannian
        t = GRASTA_update(Uhat, status, w, dual, idx, options);
        
        if (!options.QUIET){
            rel = subspace(options.GT_mat, Uhat);
            status.hist_rel.push_back(rel);
            
            if (rel < options.TOL){
                PRINTF("%d/%d: subspace angle %.2e\n",k,maxIter, rel);
                break;
            }
        }
        
        if (k % cols ==0){
            
            if (!options.QUIET) PRINTF("Pass %d/%d: step-size %.2e, level %d, last mu %.2f\n",
                    k % cols, options.maxCycles, t, status.level, status.last_mu);
        }
        if (status.level >= options.convergeLevel){
            // Must cycling around the dataset twice to get the correct regression weight W
            if (!options.QUIET) PRINTF("Converge at level %d, last mu %.2f\n",status.level,status.last_mu);           
            break;
        }        
    }
}
Ejemplo n.º 4
0
//[[Rcpp::export]]
Rcpp::List nnmf(const mat & A, const unsigned int k, mat W, mat H, umat Wm, umat Hm,
	const vec & alpha, const vec & beta, const unsigned int max_iter, const double rel_tol, 
	const int n_threads, const int verbose, const bool show_warning, const unsigned int inner_max_iter, 
	const double inner_rel_tol, const int method, unsigned int trace)
{
	/******************************************************************************************************
	 *              Non-negative Matrix Factorization(NNMF) using alternating scheme
	 *              ----------------------------------------------------------------
	 * Description:
	 * 	Decompose matrix A such that
	 * 		A = W H
	 * Arguments:
	 * 	A              : Matrix to be decomposed
	 * 	W, H           : Initial matrices of W and H, where ncol(W) = nrow(H) = k. # of rows/columns of W/H could be 0
	 * 	Wm, Hm         : Masks of W and H, s.t. masked entries are no-updated and fixed to initial values
	 * 	alpha          : [L2, angle, L1] regularization on W (non-masked entries)
	 * 	beta           : [L2, angle, L1] regularization on H (non-masked entries)
	 * 	max_iter       : Maximum number of iteration
	 * 	rel_tol        : Relative tolerance between two successive iterations, = |e2-e1|/avg(e1, e2)
	 * 	n_threads      : Number of threads (openMP)
	 * 	verbose        : Either 0 = no any tracking, 1 == progression bar, 2 == print iteration info
	 * 	show_warning   : If to show warning if targeted `tol` is not reached
	 * 	inner_max_iter : Maximum number of iterations passed to each inner W or H matrix updating loop
	 * 	inner_rel_tol  : Relative tolerance passed to inner W or H matrix updating loop, = |e2-e1|/avg(e1, e2)
	 * 	method         : Integer of 1, 2, 3 or 4, which encodes methods
	 * 	               : 1 = sequential coordinate-wise minimization using square loss
	 * 	               : 2 = Lee's multiplicative update with square loss, which is re-scaled gradient descent
	 * 	               : 3 = sequentially quadratic approximated minimization with KL-divergence
	 * 	               : 4 = Lee's multiplicative update with KL-divergence, which is re-scaled gradient descent
	 * 	trace          : A positive integer, error will be checked very 'trace' iterations. Computing WH can be very expansive,
	 * 	               : so one may not want to check error A-WH every single iteration
	 * Return:
	 * 	A list (Rcpp::List) of 
	 * 		W, H          : resulting W and H matrices
	 * 		mse_error     : a vector of mean square error (divided by number of non-missings)
	 * 		mkl_error     : a vector (length = number of iterations) of mean KL-distance
	 * 		target_error  : a vector of loss (0.5*mse or mkl), plus constraints
	 * 		average_epoch : a vector of average epochs (one complete swap over W and H)
	 * Author:
	 * 	Eric Xihui Lin <*****@*****.**>
	 * Version:
	 * 	2015-12-11
	 ******************************************************************************************************/

	unsigned int n = A.n_rows;
	unsigned int m = A.n_cols;
	//int k = H.n_rows; // decomposition rank k
	unsigned int N_non_missing = n*m;

	if (trace < 1) trace = 1;
	unsigned int err_len = (unsigned int)std::ceil(double(max_iter)/double(trace)) + 1;
	vec mse_err(err_len), mkl_err(err_len), terr(err_len), ave_epoch(err_len);

	// check progression
	bool show_progress = false;
	if (verbose == 1) show_progress = true;
	Progress prgrss(max_iter, show_progress);

	double rel_err = rel_tol + 1;
	double terr_last = 1e99;
	uvec non_missing;
	bool any_missing = !A.is_finite();
	if (any_missing) 
	{
		non_missing = find_finite(A);
		N_non_missing = non_missing.n_elem;
		mkl_err.fill(mean((A.elem(non_missing)+TINY_NUM) % log(A.elem(non_missing)+TINY_NUM) - A.elem(non_missing)));
	}
	else
		mkl_err.fill(mean(mean((A+TINY_NUM) % log(A+TINY_NUM) - A))); // fixed part in KL-dist, mean(A log(A) - A)

	if (Wm.empty())
		Wm.resize(0, n);
	else
		inplace_trans(Wm);
	if (Hm.empty())
		Hm.resize(0, m);

	if (W.empty())
	{
		W.randu(k, n);
		W *= 0.01;
		if (!Wm.empty())
			W.elem(find(Wm > 0)).fill(0.0);
	}
	else
		inplace_trans(W);

	if (H.empty())
	{
		H.randu(k, m);
		H *= 0.01;
		if (!Hm.empty())
			H.elem(find(Hm > 0)).fill(0.0);
	}

	if (verbose == 2)
	{
		Rprintf("\n%10s | %10s | %10s | %10s | %10s\n", "Iteration", "MSE", "MKL", "Target", "Rel. Err.");
		Rprintf("--------------------------------------------------------------\n");
	}

	int total_raw_iter = 0;
	unsigned int i = 0;
	unsigned int i_e = 0; // index for error checking
	for(; i < max_iter && std::abs(rel_err) > rel_tol; i++) 
	{
		Rcpp::checkUserInterrupt();
		prgrss.increment();

		if (any_missing)
		{
			// update W
			total_raw_iter += update_with_missing(W, H, A.t(), Wm, alpha, inner_max_iter, inner_rel_tol, n_threads, method);
			// update H
			total_raw_iter += update_with_missing(H, W, A, Hm, beta, inner_max_iter, inner_rel_tol, n_threads, method);

			if (i % trace == 0)
			{
				const mat & Ahat = W.t()*H;
				mse_err(i_e) = mean(square((A - Ahat).eval().elem(non_missing)));
				mkl_err(i_e) += mean((-(A+TINY_NUM) % log(Ahat+TINY_NUM) + Ahat).eval().elem(non_missing));
			}
		}
		else
		{
			// update W
			total_raw_iter += update(W, H, A.t(), Wm, alpha, inner_max_iter, inner_rel_tol, n_threads, method);
			// update H
			total_raw_iter += update(H, W, A, Hm, beta, inner_max_iter, inner_rel_tol, n_threads, method);

			if (i % trace == 0)
			{
				const mat & Ahat = W.t()*H;
				mse_err(i_e) = mean(mean(square((A - Ahat))));
				mkl_err(i_e) += mean(mean(-(A+TINY_NUM) % log(Ahat+TINY_NUM) + Ahat));
			}
		}

		if (i % trace == 0)
		{
			ave_epoch(i_e) = double(total_raw_iter)/(n+m);
			if (method < 3) // mse based
				terr(i_e) = 0.5*mse_err(i_e);
			else // KL based
				terr(i_e) = mkl_err(i_e);

			add_penalty(i_e, terr, W, H, N_non_missing, alpha, beta);

			rel_err = 2*(terr_last - terr(i_e)) / (terr_last + terr(i_e) + TINY_NUM );
			terr_last = terr(i_e);
			if (verbose == 2)
				Rprintf("%10d | %10.4f | %10.4f | %10.4f | %10.g\n", i+1, mse_err(i_e), mkl_err(i_e), terr(i_e), rel_err);

			total_raw_iter = 0; // reset to 0
			++i_e;
		}
	}

	// compute error of the last iteration
	if ((i-1) % trace != 0)
	{
		if (any_missing)
		{
			const mat & Ahat = W.t()*H;
			mse_err(i_e) = mean(square((A - Ahat).eval().elem(non_missing)));
			mkl_err(i_e) += mean((-(A+TINY_NUM) % log(Ahat+TINY_NUM) + Ahat).eval().elem(non_missing));
		}
		else
		{
			const mat & Ahat = W.t()*H;
			mse_err(i_e) = mean(mean(square((A - Ahat))));
			mkl_err(i_e) += mean(mean(-(A+TINY_NUM) % log(Ahat+TINY_NUM) + Ahat));
		}

		ave_epoch(i_e) = double(total_raw_iter)/(n+m);
		if (method < 3) // mse based
			terr(i_e) = 0.5*mse_err(i_e);
		else // KL based
			terr(i_e) = mkl_err(i_e);
		add_penalty(i_e, terr, W, H, N_non_missing, alpha, beta);

		rel_err = 2*(terr_last - terr(i_e)) / (terr_last + terr(i_e) + TINY_NUM );
		terr_last = terr(i_e);
		if (verbose == 2)
			Rprintf("%10d | %10.4f | %10.4f | %10.4f | %10.g\n", i+1, mse_err(i_e), mkl_err(i_e), terr(i_e), rel_err);

		++i_e;
	}

	if (verbose == 2)
	{
		Rprintf("--------------------------------------------------------------\n");
		Rprintf("%10s | %10s | %10s | %10s | %10s\n\n", "Iteration", "MSE", "MKL", "Target", "Rel. Err.");
	}

	if (i_e < err_len)
	{
		mse_err.resize(i_e);
		mkl_err.resize(i_e);
		terr.resize(i_e);
		ave_epoch.resize(i_e);
	}

	if (show_warning && rel_err > rel_tol)
		Rcpp::warning("Target tolerance not reached. Try a larger max.iter.");

	return Rcpp::List::create(
		Rcpp::Named("W") = W.t(),
		Rcpp::Named("H") = H,
		Rcpp::Named("mse_error") = mse_err,
		Rcpp::Named("mkl_error") = mkl_err,
		Rcpp::Named("target_error") = terr,
		Rcpp::Named("average_epoch") = ave_epoch,
		Rcpp::Named("n_iteration") = i
		);
}
Ejemplo n.º 5
0
mat nnls_solver_with_missing(const mat & A, const mat & W, const mat & W1, const mat & H2, const umat & mask, 
	const double & eta, const double & beta, int max_iter, double rel_tol, int n_threads)
{
	// A = [W, W1, W2] [H, H1, H2]^T.
	// Where A may have missing values
	// Note that here in the input W = [W, W2]
	// compute x = [H, H1]^T given W, W2
	// A0 = W2*H2 is empty when H2 is empty (no partial info in H)
	// Return: x = [H, H1]

	int n = A.n_rows, m = A.n_cols;
	int k = W.n_cols - H2.n_cols;
	int kW = W1.n_cols;
	int nH = k+kW;

	mat x(nH, m, fill::zeros);

	if (n_threads < 0) n_threads = 0;
	bool is_masked = !mask.empty();

	#pragma omp parallel for num_threads(n_threads) schedule(dynamic)
	for (int j = 0; j < m; j++)
	{
		// break if all entries of col_j are masked
		if (is_masked && arma::all(mask.col(j))) 
			continue;
		
		uvec non_missing = find_finite(A.col(j));
		mat WtW(nH, nH); // WtW
		update_WtW(WtW, W.rows(non_missing), W1.rows(non_missing), H2);
		if (beta > 0) WtW += beta;
		if (eta > 0) WtW.diag() += eta;

		mat mu(nH, 1); // -WtA
		uvec jv(1);
		jv(0) = j;
		//non_missing.t().print("non_missing = ");
		//std::cout << "1.1" << std::endl;
		if (H2.empty())
			update_WtA(mu, W.rows(non_missing), W1.rows(non_missing), H2, A.submat(non_missing, jv));
		else
			update_WtA(mu, W.rows(non_missing), W1.rows(non_missing), H2.rows(j, j), A.submat(non_missing, jv));
		//std::cout << "1.5" << std::endl;

		vec x0(nH);
		double tmp;
		int i = 0;
		double err1, err2 = 9999;
		do {
			x0 = x.col(j);
			err1 = err2;
			err2 = 0;
			for (int l = 0; l < nH; l++)
			{
				if (is_masked && mask(l,j) > 0) continue;
				tmp = x(l,j) - mu(l,0) / WtW(l,l);
				if (tmp < 0) tmp = 0;
				if (tmp != x(l,j))
				{
					mu.col(0) += (tmp - x(l,j)) * WtW.col(l);
				}
				x(l,j) = tmp;
				tmp = std::abs(x(l,j) - x0(l));
				if (tmp > err2) err2 = tmp;
			}
		} while(++i < max_iter && std::abs(err1 - err2) / (err1 + 1e-9) > rel_tol);
	}
	return x;
}
Ejemplo n.º 6
0
double ung_ssm::bsf_filter(const unsigned int nsim, arma::cube& alpha,
  arma::mat& weights, arma::umat& indices) {
  
  arma::uvec nonzero = arma::find(P1.diag() > 0);
  arma::mat L_P1(m, m, arma::fill::zeros);
  if (nonzero.n_elem > 0) {
    L_P1.submat(nonzero, nonzero) =
      arma::chol(P1.submat(nonzero, nonzero), "lower");
  }
  std::normal_distribution<> normal(0.0, 1.0);
  for (unsigned int i = 0; i < nsim; i++) {
    arma::vec um(m);
    for(unsigned int j = 0; j < m; j++) {
      um(j) = normal(engine);
    }
    alpha.slice(i).col(0) = a1 + L_P1 * um;
  }
  
  std::uniform_real_distribution<> unif(0.0, 1.0);
  arma::vec normalized_weights(nsim);
  double loglik = 0.0;
  
  if(arma::is_finite(y(0))) {
    weights.col(0) = log_obs_density(0, alpha);
    double max_weight = weights.col(0).max();
    weights.col(0) = arma::exp(weights.col(0) - max_weight);
    double sum_weights = arma::accu(weights.col(0));
    if(sum_weights > 0.0){
      normalized_weights = weights.col(0) / sum_weights;
    } else {
      return -std::numeric_limits<double>::infinity();
    }
    loglik = max_weight + std::log(sum_weights / nsim);
  } else {
    weights.col(0).ones();
    normalized_weights.fill(1.0 / nsim);
  }
  for (unsigned int t = 0; t < n; t++) {
    
    arma::vec r(nsim);
    for (unsigned int i = 0; i < nsim; i++) {
      r(i) = unif(engine);
    }
    
    indices.col(t) = stratified_sample(normalized_weights, r, nsim);
    
    arma::mat alphatmp(m, nsim);
    
    for (unsigned int i = 0; i < nsim; i++) {
      alphatmp.col(i) = alpha.slice(indices(i, t)).col(t);
    }
    
    for (unsigned int i = 0; i < nsim; i++) {
      arma::vec uk(k);
      for(unsigned int j = 0; j < k; j++) {
        uk(j) = normal(engine);
      }
      alpha.slice(i).col(t + 1) = C.col(t * Ctv) +
        T.slice(t * Ttv) * alphatmp.col(i) + R.slice(t * Rtv) * uk;
    }
    
    if ((t < (n - 1)) && arma::is_finite(y(t + 1))) {
      weights.col(t + 1) = log_obs_density(t + 1, alpha);
      
      double max_weight = weights.col(t + 1).max();
      weights.col(t + 1) = arma::exp(weights.col(t + 1) - max_weight);
      double sum_weights = arma::accu(weights.col(t + 1));
      if(sum_weights > 0.0){
        normalized_weights = weights.col(t + 1) / sum_weights;
      } else {
        return -std::numeric_limits<double>::infinity();
      }
      loglik += max_weight + std::log(sum_weights / nsim);
    } else {
      weights.col(t + 1).ones();
      normalized_weights.fill(1.0/nsim);
    }
  }
  // constant part of the log-likelihood
  switch(distribution) {
  case 0 :
    loglik += arma::uvec(arma::find_finite(y)).n_elem * norm_log_const(phi);
    break;
  case 1 : {
      arma::uvec finite_y(find_finite(y));
      loglik += poisson_log_const(y(finite_y), u(finite_y));
    } break;
  case 2 : {
    arma::uvec finite_y(find_finite(y));
    loglik += binomial_log_const(y(finite_y), u(finite_y));
  } break;
  case 3 : {
    arma::uvec finite_y(find_finite(y));
    loglik += negbin_log_const(y(finite_y), u(finite_y), phi);
  } break;
  }
  return loglik;
}