Пример #1
0
// TODO make sure this does not cause any alloc
void BaseSequentialSolver::solve_block(chunk_type result, const inverse_type& inv, chunk_type rhs) const {
	assert( !has_nan(rhs.eval()) );
	result = rhs;

    bool ret = inv.solveInPlace(result);
    assert( !has_nan(result.eval()) );
    assert( ret );

	(void) ret;
}
Пример #2
0
void DiagonalResponse::factor(const mat& H ) {
	
    if( _constant.getValue() && !_firstFactorization ) return;
    _firstFactorization = false;

	diag = H.diagonal().cwiseInverse();
	
	assert( !has_nan(diag) );
}
Пример #3
0
/**
 * @brief Main wrapper for fitting a trendfilter model.
 * Takes as input either a sequence of lambda tuning parameters, or the number
 * of desired lambda values. In the latter case the function will also calculate
 * a lambda sequence. The user must supply allocated memory to store the output,
 * with the function itself returning only @c void. For default values, and an
 * example of how to call the function, see the function tf_admm_default.
 *
 * @param y                    a vector of responses
 * @param x                    a vector of response locations; must be in increasing order
 * @param w                    a vector of sample weights
 * @param n                    the length of y, x, and w
 * @param k                    degree of the trendfilter; i.e., k=1 linear
 * @param family               family code for the type of fit; family=0 for OLS
 * @param max_iter             maximum number of ADMM interations; ignored for k=0
 * @param lam_flag             0/1 flag for whether lambda sequence needs to be estimated
 * @param lambda               either a sequence of lambda when lam_flag=0, or empty
 *                             allocated space if lam_flag=1
 * @param nlambda              number of lambda values; need for both lam_flag=0 and 1
 * @param lambda_min_ratio     minimum ratio between min and max lambda; ignored for lam_flag=0
 * @param beta                 allocated space of size n*nlambda to store the output coefficents
 * @param obj                  allocated space of size max_iter*nlambda to store the objective
 * @param iter                 allocated space of size nlambda to store the number of iterations
 * @param status               allocated space of size nlambda to store the status of each run
 * @param rho                  tuning parameter for the ADMM algorithm
 * @param obj_tol              stopping criteria tolerance
 * @param alpha_ls             for family != 0, line search tuning parameter
 * @param gamma_ls             for family != 0, line search tuning parameter
 * @param max_iter_ls          for family != 0, max number of iterations in line search
 * @param max_iter_newton      for family != 0, max number of iterations in inner ADMM
 * @param verbose              0/1 flag for printing progress
 * @return void
 * @see tf_admm_default
 */
void tf_admm (double * y, double * x, double * w, int n, int k, int family,
              int max_iter, int lam_flag, double * lambda,
              int nlambda, double lambda_min_ratio, double * beta,
              double * obj, int * iter, int * status, double rho,
              double obj_tol, double alpha_ls, double gamma_ls,
              int max_iter_ls, int max_iter_newton, int verbose)
{
  int i;
  int j;
  double max_lam;
  double min_lam;
  double * temp_n;
  double * beta_max;
  double * alpha;
  double * u;

  cs * D;
  cs * Dt;
  cs * Dk;
  cs * Dkt;
  cs * DktDk;
  gqr * Dt_qr;
  gqr * Dkt_qr;

  beta_max = (double *) malloc(n * sizeof(double));
  temp_n   = (double *) malloc(n * sizeof(double));
  alpha    = (double *) malloc(n * sizeof(double)); /* we use extra buffer (n vs n-k) */
  u        = (double *) malloc(n * sizeof(double)); /* we use extra buffer (n vs n-k) */

  /* Assume w does not have zeros */
  for(i = 0; i < n; i++) temp_n[i] = 1/sqrt(w[i]);

  D = tf_calc_dk(n, k+1, x);
  Dk = tf_calc_dktil(n, k, x);
  Dt = cs_transpose(D, 1);
  diag_times_sparse(Dt, temp_n); /* Dt = W^{-1/2} Dt */
  Dkt = cs_transpose(Dk, 1);
  Dt_qr = glmgen_qr(Dt);
  Dkt_qr = glmgen_qr(Dkt);
  DktDk = cs_multiply(Dkt,Dk);

  /* Determine the maximum lambda in the path, and initiate the path if needed
   * using the input lambda_min_ratio and equally spaced log points.
   */
  max_lam = tf_maxlam(n, y, Dt_qr, w);
  if (!lam_flag)
  {
    min_lam = max_lam * lambda_min_ratio;
    lambda[0] = max_lam;
    for (i = 1; i < nlambda; i++)
      lambda[i] = exp((log(max_lam) * (nlambda - i -1) + log(min_lam) * i) / (nlambda-1));

  }

  rho = rho * pow( (x[n-1] - x[0])/n, (double)k);

  /* Initiate alpha and u for a warm start */
  if (lambda[0] < max_lam * 1e-5)
  {
    for (i = 0; i < n - k; i++)
    {
      alpha[i] = 0;
      u[i] = 0;
    }
  } else {

    /* beta_max */
    for (i = 0; i < n; i++) temp_n[i] = -sqrt(w[i]) * y[i];
    glmgen_qrsol (Dt_qr, temp_n);
    for (i = 0; i < n; i++) beta_max[i] = 0;
    cs_gaxpy(Dt, temp_n, beta_max);
    /* Dt has a W^{-1/2}, so in the next step divide by sqrt(w) instead of w. */
    for (i = 0; i < n; i++) beta_max[i] = y[i] - beta_max[i]/sqrt(w[i]);

    /* alpha_max */
    tf_dxtil(x, n, k, beta_max, alpha);

    /* u_max */
    switch (family)
    {
    case FAMILY_GAUSSIAN:
      for (i = 0; i < n; i++) u[i] = w[i] * (beta_max[i] - y[i]) / (rho * lambda[0]);
      break;

    case FAMILY_LOGISTIC:
      for (i = 0; i < n; i++) {
        u[i] = logi_b2(beta_max[i]) * w[i] * (beta_max[i] - y[i]) / (rho * lambda[0]);
      }
      break;

    case FAMILY_POISSON:
      for (i = 0; i < n; i++) {
        u[i] = pois_b2(beta_max[i]) * w[i] *(beta_max[i] - y[i]) / (rho * lambda[0]);
      }
      break;

    default:
      for (i = 0; i < nlambda; i++) status[i] = 2;
      return;
    }

    glmgen_qrsol (Dkt_qr, u);
  }

  /* Iterate lower level functions over all lambda values;
   * the alpha and u vectors get used each time of subsequent
   * warm starts
   */
  for (i = 0; i < nlambda; i++)
  {
    /* warm start */
    double * beta_init = (i == 0) ? beta_max : beta + (i-1)*n;
    for(j = 0; j < n; j++) beta[i*n + j] = beta_init[j];

    switch (family)
    {
      case FAMILY_GAUSSIAN:
        tf_admm_gauss(y, x, w, n, k, max_iter, lambda[i], beta+i*n, alpha,
                      u, obj+i*max_iter, iter+i, rho * lambda[i], obj_tol,
                      DktDk, verbose);
        break;

      case FAMILY_LOGISTIC:
        tf_admm_glm(y, x, w, n, k, max_iter, lambda[i], beta+i*n, alpha, u, obj+i*max_iter, iter+i,
                    rho * lambda[i], obj_tol, alpha_ls, gamma_ls, max_iter_ls, max_iter_newton,
                    DktDk, &logi_b, &logi_b1, &logi_b2, verbose);
        break;

      case FAMILY_POISSON:
        tf_admm_glm(y, x, w, n, k, max_iter, lambda[i], beta+i*n, alpha, u, obj+i*max_iter, iter+i,
                    rho * lambda[i], obj_tol, alpha_ls, gamma_ls, max_iter_ls, max_iter_newton,
                    DktDk, &pois_b, &pois_b1, &pois_b2, verbose);
        break;
    }

    /* If there any NaNs in beta: reset beta, alpha, u */
    if(has_nan(beta + i * n, n))
    {
      for(j = 0; j < n; j++) beta[i*n + j] = 0;
      for(j = 0; j < n-k; j++) { alpha[j] = 0; u[j] = 0; }
      status[i] = 1;
      printf("Numerical error in lambda[%d]=%f",i,lambda[i]);
    }
  }

  cs_spfree(D);
  cs_spfree(Dt);
  cs_spfree(Dk);
  cs_spfree(Dkt);
  cs_spfree(DktDk);
  glmgen_gqr_free(Dt_qr);
  glmgen_gqr_free(Dkt_qr);

  free(temp_n);
  free(beta_max);
  free(alpha);
  free(u);
}
Пример #4
0
/**
 * @brief Main wrapper for fitting a trendfilter model.
 * Takes as input either a sequence of lambda tuning parameters, or the number
 * of desired lambda values. In the latter case the function will also calculate
 * a lambda sequence. The user must supply allocated memory to store the output,
 * with the function itself returning only @c void. For default values, and an
 * example of how to call the function, see the function tf_admm_default.
 *
 * @param x                    a vector of data locations; must be in increasing order
 * @param y                    a vector of responses
 * @param w                    a vector of sample weights
 * @param n                    the length of x, y, and w
 * @param k                    polynomial degree of the fitted trend; i.e., k=1 for linear
 * @param family               family code for the type of fit; family=0 for OLS
 * @param max_iter             maximum number of ADMM interations; ignored for k=0
 * @param beta0                initialization value of beta for first lambda; ignored if NULL
 * @param lam_flag             0/1 flag for whether lambda sequence needs to be estimated
 * @param lambda               either a sequence of lambda when lam_flag=0, or empty
 *                             allocated space if lam_flag=1
 * @param nlambda              number of lambda values; need for both lam_flag=0 and 1
 * @param lambda_min_ratio     minimum ratio between min and max lambda; ignored for lam_flag=0
 * @param df                   allocated space of nlambda to store the output df values
 * @param beta                 allocated space of size n*nlambda to store the output coefficents
 * @param obj                  allocated space of size max_iter*nlambda to store the objective
 * @param iter                 allocated space of size nlambda to store the number of iterations
 * @param status               allocated space of size nlambda to store the status of each run
 * @param rho                  tuning parameter for the ADMM algorithm
 * @param obj_tol              stopping criteria tolerance
 * @param obj_tol_newton       for family != 0, stopping criteria tolerance for prox Newton
 * @param alpha_ls             for family != 0, line search tuning parameter
 * @param gamma_ls             for family != 0, line search tuning parameter
 * @param max_iter_ls          for family != 0, max number of iterations in line search
 * @param max_iter_newton       for family != 0, max number of iterations in inner ADMM
 * @param verbose              0/1 flag for printing progress
 * @return void
 * @see tf_admm_default
 */
void tf_admm ( double * x, double * y, double * w, int n, int k, int family,
    int max_iter, double * beta0, int lam_flag, double * lambda,
    int nlambda, double lambda_min_ratio, int tridiag, int * df,
    double * beta, double * obj, int * iter, int * status,
    double rho, double obj_tol, double obj_tol_newton, double alpha_ls, double gamma_ls,
    int max_iter_ls, int max_iter_newton, int verbose)
{
  int i;
  int j;
  int numDualVars;
  double max_lam;
  double min_lam;
  double * temp_n;
  double * beta_max;
  double * alpha;
  double * u;
  double * A0;
  double * A1;
  double * v;

  cs * D;
  cs * Dt;
  cs * Dk;
  cs * Dkt;
  cs * DktDk;
  gqr * Dt_qr;
  gqr * Dkt_qr;

  beta_max = (double *) malloc(n * sizeof(double));
  temp_n   = (double *) malloc(n * sizeof(double));
  v        = (double *) malloc(n * sizeof(double));

  numDualVars = tridiag ? k : 1;

  /* we use extra buffer below (n vs n-k) */
  alpha    = (double *) malloc(n * numDualVars * sizeof(double)); 
  u        = (double *) malloc(n * numDualVars * sizeof(double)); 

  /* Assume w does not have zeros */
  for (i = 0; i < n; i++) temp_n[i] = 1/sqrt(w[i]);

  D 	= tf_calc_dk(n, k+1, x);
  Dk 	= tf_calc_dktil(n, k, x);
  Dt 	= cs_transpose(D, 1);

  diag_times_sparse(Dt, temp_n); /* Dt = W^{-1/2} Dt */

  Dkt 	 = cs_transpose(Dk, 1);
  Dt_qr  = glmgen_qr(Dt);
  Dkt_qr = glmgen_qr(Dkt);
  DktDk  = cs_multiply(Dkt,Dk);


  /* Determine the maximum lambda in the path */
  max_lam = tf_maxlam(n, y, Dt_qr, w);
  /* and if it is too small, return a trivial solution for Gaussian case */
  if (family == FAMILY_GAUSSIAN) {
    if (max_lam < 1e-12) {
      for (i=0; i<nlambda; i++) {
        for (j=0; j<n; j++) beta[i*n+j] = y[j];
        obj[i*(max_iter+1)] = 0;
        df[i] = n;
      }
      cs_spfree(D);
      cs_spfree(Dt);
      cs_spfree(Dk);
      cs_spfree(Dkt);
      cs_spfree(DktDk);
      glmgen_gqr_free(Dt_qr);
      glmgen_gqr_free(Dkt_qr);
      free(temp_n);
      free(beta_max);
      free(alpha);
      free(u);
      return;
    }
  }
  else {		
    max_lam += 1;
  }

  /* Initiate the path if needed using the input lambda_min_ratio and 
   * equally spaced points in log space. */
  if (!lam_flag) seq_logspace(max_lam,lambda_min_ratio,nlambda,lambda);

  /* Augmented Lagrangian parameter */
  rho = rho * pow((x[n-1] - x[0])/(double)(n-1), (double)k);
  
  /* Initiate alpha and u for a warm start */
  if (lambda[0] < max_lam * 1e-5)  
    for (i = 0; i < n - k; i++) alpha[i] = u[i] = 0;    
  else {
    /* beta_max */
    if (beta0 == NULL)
      calc_beta_max(y,w,n,Dt_qr,Dt,temp_n,beta_max);
    else
      memcpy(beta_max, beta0, n*sizeof(double));

    /* Check if beta = weighted mean(y) is better than beta */
    double yc = weighted_mean(y,w,n);
    for (i = 0; i < n; i++) temp_n[i] = yc;
    double obj1 = tf_obj(x,y,w,n,k,max_lam,family,beta_max,v);
    double obj2 = tf_obj(x,y,w,n,k,max_lam,family,temp_n,v);
    if(obj2 < obj1) memcpy(beta_max, temp_n, n*sizeof(double));

    /* alpha_max */

    if (tridiag && k>0)
    {
      tf_dx1(x, n, 1, beta_max, alpha + (n*k-n));
      for (j=k-1; j >= 1; j--)
        tf_dx1(x, n, k-j+1, alpha + (n*j), alpha + (n*j-n));      
    }
    else if (k>0)
      tf_dxtil(x, n, k, beta_max, alpha);

    /* u_max */    

    if (tridiag)
      for (j=0; j<k; j++) memset(u + (n*j), 0, (n-k+j) * sizeof(double)); 
    else {
      for (i = 0; i < n; i++) 
          u[i] = w[i] * (beta_max[i] - y[i]) / (rho * lambda[0]);

      if(family == FAMILY_LOGISTIC)
        for (i = 0; i < n; i++) u[i] *= logi_b2(beta_max[i]);
      else if(family == FAMILY_POISSON)
        for (i = 0; i < n; i++) u[i] *= pois_b2(beta_max[i]);
      glmgen_qrsol (Dkt_qr, u);
      // for (i = 0; i < n-k; i++) u[i] = 0;
    }
  }

  if (tridiag && k>0)
  {
    /* Setup tridiagonal systems */  
    A0 = (double*) malloc(n*k*sizeof(double));
    A1 = (double*) malloc(n*k*sizeof(double));

    for (j=2; j <= k; j++)
    {
      form_tridiag(x, n, k-j+2, 1, 1, A0+(n*j-n), A1+(n*j-n));
    }
  }  

  /* Iterate lower level functions over all lambda values;
   * the alpha and u vectors get used each time of subsequent
   * warm starts */
  for (i = 0; i < nlambda; i++)
  {    
    /* warm start */
    double *beta_init = (i == 0) ? beta_max : beta + (i-1)*n;
    for(j = 0; j < n; j++) beta[i*n + j] = beta_init[j];

    if (tridiag)
    {
      form_tridiag(x, n, 1, rho * lambda[i], 0, A0, A1);
      for (j=0; j < n; j++) A0[j] = A0[j] + w[j];
    }

    switch (family) {
      case FAMILY_GAUSSIAN:
        if (tridiag)        
          tf_admm_gauss_tri(x, y, w, n, k, max_iter, lambda[i], df+i, beta+i*n,
              alpha, u, obj+i*(1+max_iter), iter+i, rho * lambda[i],
              obj_tol, A0, A1, verbose);        
        else
          tf_admm_gauss(x, y, w, n, k, max_iter, lambda[i], df+i, beta+i*n,
              alpha, u, obj+i*(1+max_iter), iter+i, rho * lambda[i],
              obj_tol, DktDk, verbose);

        break;

      case FAMILY_LOGISTIC:
        tf_admm_glm(x, y, w, n, k, max_iter, lambda[i], tridiag, df+i, beta+i*n,
            alpha, u, obj+i*(1+max_iter_newton), iter+i, rho * lambda[i], obj_tol,
            obj_tol_newton, alpha_ls, gamma_ls, max_iter_ls, max_iter_newton,
            DktDk, A0, A1, &logi_b, &logi_b1, &logi_b2, verbose);
        break;

      case FAMILY_POISSON:
        tf_admm_glm(x, y, w, n, k, max_iter, lambda[i], tridiag, df+i, beta+i*n,
            alpha, u, obj+i*(1+max_iter_newton), iter+i, rho * lambda[i], obj_tol,
            obj_tol_newton, alpha_ls, gamma_ls, max_iter_ls, max_iter_newton,
            DktDk, A0, A1, &pois_b, &pois_b1, &pois_b2, verbose);
        break;

      default:
        printf("Unknown family, stopping calculation.\n");
        status[i] = 2;
    }
    

    /* If there any NaNs in beta: reset beta, alpha, u */
    if (has_nan(beta + i*n, n))
    {
      double yc = weighted_mean(y,w,n);
      switch(family) {
        case FAMILY_POISSON:
          yc = (yc > 0) ? log(yc) : -DBL_MAX;
          break;
        case FAMILY_LOGISTIC:
          yc = (yc > 0) ? ( yc < 1 ? log(yc/(1-yc)) : DBL_MAX) : -DBL_MAX;
          break;
        default: break;
      }
      for (j = 0; j < n; j++) beta[i*n + j] = yc;
      for (j = 0; j < n-k; j++) alpha[j] = 0;
      for (j = 0; j < n; j++) u[j] = w[j] * (beta[i*n+j] - y[j]) / (rho * lambda[i]);
      glmgen_qrsol (Dkt_qr, u);
      if (tridiag) for (j = 0; j < n*k; j++) alpha[j] = u[j] = 0;
      status[i] = 1;
    }
  }

  cs_spfree(D);
  cs_spfree(Dt);
  cs_spfree(Dk);
  cs_spfree(Dkt);
  cs_spfree(DktDk);
  glmgen_gqr_free(Dt_qr);
  glmgen_gqr_free(Dkt_qr);

  free(beta_max);
  free(temp_n);
  free(alpha);
  free(u);
  free(v);

  if (tridiag && k>0)
  {
    free(A0);
    free(A1);
  }
}
Пример #5
0
// this is where the magic happens
SReal BaseSequentialSolver::step(vec& lambda,
                             vec& net, 
                             const system_type& sys,
                             const vec& rhs,
                             vec& error, vec& delta,
							 bool correct ) const {

	// TODO size asserts
	
	// error norm2 estimate (seems conservative and much cheaper to
	// compute anyways)
	real estimate = 0;

    SReal omega = this->omega.getValue();
		
	// inner loop
	for(unsigned i = 0, n = blocks.size(); i < n; ++i) {
			
		const block& b = blocks[i];
			 
        // data chunks
        chunk_type lambda_chunk(&lambda(b.offset), b.size);
        chunk_type delta_chunk(&delta(b.offset), b.size);

        // if the constraint is activated, solve it
        if( b.activated )
        {
            chunk_type error_chunk(&error(b.offset), b.size);

            // update rhs TODO track and remove possible allocs
            error_chunk.noalias() = rhs.segment(b.offset, b.size);
            error_chunk.noalias() -= JP.middleRows(b.offset, b.size) * net;
            error_chunk.noalias() -= sys.C.middleRows(b.offset, b.size) * lambda;

            // error estimate update, we sum current chunk errors
            // estimate += error_chunk.squaredNorm();

            // solve for lambda changes
            solve_block(delta_chunk, blocks_inv[i], error_chunk);

            // backup old lambdas
            error_chunk = lambda_chunk;

            // update lambdas
            lambda_chunk = lambda_chunk + omega * delta_chunk;

            // project new lambdas if needed
            if( b.projector ) {
                b.projector->project( lambda_chunk.data(), lambda_chunk.size(), i, correct );
                assert( !has_nan(lambda_chunk.eval()) );
            }

            // correct lambda differences based on projection
            delta_chunk = lambda_chunk - error_chunk;
        }
        else // deactivated constraint
        {
            // force lambda to be 0
            delta_chunk = -lambda_chunk;
            lambda_chunk.setZero();
        }

		// we estimate the total lambda change. since GS convergence
		// is linear, this can give an idea about current precision.
		estimate += delta_chunk.squaredNorm();

		// incrementally update net forces, we only do fresh
		// computation after the loop to keep perfs decent
        net.noalias() += mapping_response.middleCols(b.offset, b.size) * delta_chunk;
		// net.noalias() = mapping_response * lambda;

        // fix net to avoid error accumulations ?
	}

	// std::cerr << "sanity check: " << (net - mapping_response * lambda).norm() << std::endl;

	// TODO is this needed to avoid error accumulation ?
	// net = mapping_response * lambda;

	// TODO flag to return real residual estimate !! otherwise
	// convergence plots are not fair.
	return estimate;
}