int L1rlrOwlqnAlgorithm::train() { // the main difference between this implementation and OWLQN paper, is we calc gradient after value // this is because the calc procedure between value & gradient is similar, we canc calc gradient after // value with a little calc cost double alpha = 0.0, backoff = 0.0; _iter = 0; // init _g_xk lr_func_val(_x_k1, _f_xk1, _g_xk1); _f_xk = _f_xk1; for (uint32_t i = 0; i < _data->feature_num(); ++i) { _g_xk[i] = _g_xk1[i]; } for (_iter = 0; _iter < _iters; ++_iter ) { // 1.0 check convergence if (check_avg_convergence()) { std::cout << "OWLQN convergenced!" << std::endl; break; } // 2.0 get LBFGS Newton Direction if (l1_direction() != 0) { std::cout << "Calc L1 Direction failed!" << std::endl; break; } // 2.1 tow-loop calc p_k = - H_k g_xk if (newton_direction() != 0) { std::cout << "Calc LBFGS Newton Direction failed!" << std::endl; break; } // 2.2 Orthant Projection if (orthant_projection() != 0) { std::cout << "Orthant projection failed!" << std::endl; break; } // 2.3 check search direction double dir_grad_product = check_direction(); if (dir_grad_product >= 0) { std::cout << "Check Search Direction failed! Please check gradient & search direction!" << std::endl; break; } // 3.0 line search for step length alpha_k // 3.1 set alpha & backoff init parameter alpha = 1.0; backoff = 0.5; if ( 0 == _iter ) { alpha = 1 / _dir.l2_norm(); backoff = 0.1; } for (uint32_t line_search_idx = 0; line_search_idx < _line_search_steps; ++line_search_idx) { // 3.2 get Next Point: x_k1 for (uint32_t i = 0; i < _data->feature_num(); ++i) { _x_k1[i] = _x_k[i] + _dir[i] * alpha; // 3.2.5 Point Orthant Constraint if (_x_k1[i] * _x_k[i] < -1e-15) { _x_k1[i] = 0.0; } } // 3.3 calc f_xk1, g_xk1 if (lr_func_val(_x_k1, _f_xk1, _g_xk1) != 0) { std::cout << "calc function value failed!" << std::endl; break; } // 3.4 check line search quit printf("check line search quit\n _f_xk1:[%lf] _f_xk[%lf] orig [%lf] alpha [%lf]\n", _f_xk1, _f_xk, dir_grad_product, alpha); if (_f_xk1 <= _f_xk + 1e-4 * dir_grad_product * alpha) { break; } alpha *= backoff; } // 4.0 shift k & k1 if (shift_xk_xk1() != 0) { std::cout << "Shift xk xk1 state failed!" << std::endl; break; } } std::cout << "OWLQN train complete!" << std::endl; return 0; }
static int dogleg (const gsl_matrix * r, const gsl_vector * qtf, const gsl_vector * diag, double delta, gsl_vector * newton, gsl_vector * gradient, gsl_vector * p) { double qnorm, gnorm, sgnorm, bnorm, temp; newton_direction (r, qtf, newton); #ifdef DEBUG printf("newton = "); gsl_vector_fprintf(stdout, newton, "%g"); printf("\n"); #endif qnorm = scaled_enorm (diag, newton); if (qnorm <= delta) { gsl_vector_memcpy (p, newton); #ifdef DEBUG printf("took newton (qnorm = %g <= delta = %g)\n", qnorm, delta); #endif return GSL_SUCCESS; } gradient_direction (r, qtf, diag, gradient); #ifdef DEBUG printf("grad = "); gsl_vector_fprintf(stdout, gradient, "%g"); printf("\n"); #endif gnorm = enorm (gradient); if (gnorm == 0) { double alpha = delta / qnorm; double beta = 0; scaled_addition (alpha, newton, beta, gradient, p); #ifdef DEBUG printf("took scaled newton because gnorm = 0\n"); #endif return GSL_SUCCESS; } minimum_step (gnorm, diag, gradient); compute_Rg (r, gradient, p); /* Use p as temporary space to compute Rg */ #ifdef DEBUG printf("mingrad = "); gsl_vector_fprintf(stdout, gradient, "%g"); printf("\n"); printf("Rg = "); gsl_vector_fprintf(stdout, p, "%g"); printf("\n"); #endif temp = enorm (p); sgnorm = (gnorm / temp) / temp; if (sgnorm > delta) { double alpha = 0; double beta = delta; scaled_addition (alpha, newton, beta, gradient, p); #ifdef DEBUG printf("took gradient\n"); #endif return GSL_SUCCESS; } bnorm = enorm (qtf); { double bg = bnorm / gnorm; double bq = bnorm / qnorm; double dq = delta / qnorm; double dq2 = dq * dq; double sd = sgnorm / delta; double sd2 = sd * sd; double t1 = bg * bq * sd; double u = t1 - dq; double t2 = t1 - dq * sd2 + sqrt (u * u + (1-dq2) * (1 - sd2)); double alpha = dq * (1 - sd2) / t2; double beta = (1 - alpha) * sgnorm; #ifdef DEBUG printf("bnorm = %g\n", bnorm); printf("gnorm = %g\n", gnorm); printf("qnorm = %g\n", qnorm); printf("delta = %g\n", delta); printf("alpha = %g beta = %g\n", alpha, beta); printf("took scaled combination of newton and gradient\n"); #endif scaled_addition (alpha, newton, beta, gradient, p); } return GSL_SUCCESS; }