Example #1
0
int L1rlrOwlqnAlgorithm::train()
{
    // the main difference between this implementation and OWLQN paper, is we calc gradient after value
    // this is because the calc procedure between value & gradient is similar, we canc calc gradient after
    // value with a little calc cost
    double alpha = 0.0, backoff = 0.0;  
    _iter = 0;

    // init _g_xk
    lr_func_val(_x_k1, _f_xk1, _g_xk1);
    _f_xk = _f_xk1;
    for (uint32_t i = 0; i < _data->feature_num(); ++i)
    {
        _g_xk[i] = _g_xk1[i];
    }

    for (_iter = 0; _iter < _iters; ++_iter )
    {
        // 1.0 check convergence
        if (check_avg_convergence())
        {
            std::cout << "OWLQN convergenced!" << std::endl;
            break;
        }
        // 2.0 get LBFGS Newton Direction
        if (l1_direction() != 0)
        {
            std::cout << "Calc L1 Direction failed!" << std::endl;
            break;
        }
        // 2.1 tow-loop calc p_k = - H_k g_xk
        if (newton_direction() != 0)
        {
            std::cout << "Calc LBFGS Newton Direction failed!" << std::endl;
            break;
        }

        // 2.2 Orthant Projection
        if (orthant_projection() != 0)
        {
            std::cout << "Orthant projection failed!" << std::endl;
            break;
        }
        // 2.3 check search direction
        double dir_grad_product = check_direction();
        if (dir_grad_product >= 0)
        {
            std::cout << "Check Search Direction failed! Please check gradient & search direction!" << std::endl;
            break;
        }

        // 3.0 line search for step length alpha_k
        // 3.1 set alpha & backoff init parameter
        alpha = 1.0;
        backoff = 0.5;
        if ( 0 == _iter )
        {
            alpha = 1 / _dir.l2_norm();
            backoff = 0.1;
        }

        for (uint32_t line_search_idx = 0; line_search_idx < _line_search_steps; ++line_search_idx)
        {
           // 3.2 get Next Point: x_k1
            for (uint32_t i = 0; i < _data->feature_num(); ++i)
            {
                _x_k1[i] = _x_k[i] + _dir[i] * alpha;
                // 3.2.5 Point Orthant Constraint
                if (_x_k1[i] * _x_k[i] < -1e-15)
                {
                    _x_k1[i] = 0.0;
                }
            }

            // 3.3 calc f_xk1, g_xk1
            if (lr_func_val(_x_k1, _f_xk1, _g_xk1) != 0)
            {
                std::cout << "calc function value failed!" << std::endl;
                break;
            }
            // 3.4 check line search quit
            printf("check line search quit\n _f_xk1:[%lf] _f_xk[%lf] orig [%lf] alpha [%lf]\n", _f_xk1, _f_xk, dir_grad_product, alpha); 
            if (_f_xk1 <= _f_xk + 1e-4 * dir_grad_product * alpha)
            {
                break;
            }
            alpha *= backoff;
        }
        // 4.0 shift k & k1
        if (shift_xk_xk1() != 0)
        {
            std::cout << "Shift xk xk1 state failed!" << std::endl;
            break;
        }
    } 
    std::cout << "OWLQN train complete!" << std::endl;
    return 0;
}
Example #2
0
static int
dogleg (const gsl_matrix * r, const gsl_vector * qtf,
        const gsl_vector * diag, double delta,
        gsl_vector * newton, gsl_vector * gradient, gsl_vector * p)
{
  double qnorm, gnorm, sgnorm, bnorm, temp;

  newton_direction (r, qtf, newton);

#ifdef DEBUG
  printf("newton = "); gsl_vector_fprintf(stdout, newton, "%g"); printf("\n");
#endif

  qnorm = scaled_enorm (diag, newton);

  if (qnorm <= delta)
    {
      gsl_vector_memcpy (p, newton);
#ifdef DEBUG
      printf("took newton (qnorm = %g  <=   delta = %g)\n", qnorm, delta);
#endif
      return GSL_SUCCESS;
    }

  gradient_direction (r, qtf, diag, gradient);

#ifdef DEBUG
  printf("grad = "); gsl_vector_fprintf(stdout, gradient, "%g"); printf("\n");
#endif

  gnorm = enorm (gradient);

  if (gnorm == 0)
    {
      double alpha = delta / qnorm;
      double beta = 0;
      scaled_addition (alpha, newton, beta, gradient, p);
#ifdef DEBUG
      printf("took scaled newton because gnorm = 0\n");
#endif
      return GSL_SUCCESS;
    }

  minimum_step (gnorm, diag, gradient);

  compute_Rg (r, gradient, p);  /* Use p as temporary space to compute Rg */

#ifdef DEBUG
  printf("mingrad = "); gsl_vector_fprintf(stdout, gradient, "%g"); printf("\n");
  printf("Rg = "); gsl_vector_fprintf(stdout, p, "%g"); printf("\n");
#endif

  temp = enorm (p);
  sgnorm = (gnorm / temp) / temp;

  if (sgnorm > delta)
    {
      double alpha = 0;
      double beta = delta;
      scaled_addition (alpha, newton, beta, gradient, p);
#ifdef DEBUG
      printf("took gradient\n");
#endif
      return GSL_SUCCESS;
    }

  bnorm = enorm (qtf);

  {
    double bg = bnorm / gnorm;
    double bq = bnorm / qnorm;
    double dq = delta / qnorm;
    double dq2 = dq * dq;
    double sd = sgnorm / delta;
    double sd2 = sd * sd;

    double t1 = bg * bq * sd;
    double u = t1 - dq;
    double t2 = t1 - dq * sd2 + sqrt (u * u + (1-dq2) * (1 - sd2));

    double alpha = dq * (1 - sd2) / t2;
    double beta = (1 - alpha) * sgnorm;

#ifdef DEBUG
    printf("bnorm = %g\n", bnorm);
    printf("gnorm = %g\n", gnorm);
    printf("qnorm = %g\n", qnorm);
    printf("delta = %g\n", delta);
    printf("alpha = %g   beta = %g\n", alpha, beta);
    printf("took scaled combination of newton and gradient\n");
#endif

    scaled_addition (alpha, newton, beta, gradient, p);
  }

  return GSL_SUCCESS;
}