static void add_ref_l_p_a(double **PXX, int *i1, double **AXX, double **BXX, double *CXX, double **DXX, double **R23, double *S32, double **R12_a, double **T12_a, double *S12_a, double **R21_a, double **T21_a, double *S21_a, double *S32_a, double **R13_a, double *S31_a, int n, double atran, double *atran_a, double *v1, double *v2, double **w1, double **w2, work_data work) { /* double **w3; */ double **EXX_a; /* w3 = get_work1(&work, WORK_DXX); */ EXX_a = get_work1(&work, WORK_DXX); dmat_add(R12_a, R13_a, R12_a, n, n); /* dmat_trans(BXX, w1, n, n); dmat_mul(R13_a, w1, n, n, n, EXX_a); */ dmat_gxgxmx(0, R13_a, 1, BXX, 1., EXX_a, 0., n, n, n); /* dmat_trans(DXX, w1, n, n); dmat_mul(w1, R13_a, n, n, n, w2); dmat_add(T12_a, w2, T12_a, n, n); */ dmat_gxgxmx(1, DXX, 0, R13_a, 1., T12_a, 1., n, n, n); dvec_add(S21_a, S31_a, S21_a, n); dmat_vxvtmx(S31_a, CXX, 1., EXX_a, 1., n, n); /* dmat_trans(AXX, w1, n, n); dm_v_mul(w1, S31_a, n, n, v1); */ dmat_gxvxmx(1, AXX, S31_a, 1., v1, 0., n, n); dvec_scale(atran, v1, v2, n); dvec_add(S32_a, v2, S32_a, n); *atran_a += dvec_dot(S32, v1, n); /* dmat_trans(R23, w1, n, n); dm_v_mul(w1, v1, n, n, v2); dvec_add(S12_a, v2, S12_a, n); */ dmat_gxvxmx(1, R23, v1, 1., S12_a, 1., n, n); dmat_copy(w1, EXX_a, n, n); dmat_getrs2('t', PXX, w1, n, n, i1); dmat_add(T21_a, w1, T21_a, n, n); /* dmat_trans(DXX, w2, n, n); dmat_mul(w2, w1, n, n, n, w3); dmat_add(R21_a, w3, R21_a, n, n); */ dmat_gxgxmx(1, DXX, 0, w1, 1., R21_a, 1., n, n, n); dmat_zero(R13_a, n, n); dvec_zero(S31_a, n); }
/** \brief Returns the maximum value of the regularization parameter lambda * that gives a non-zero solution. * * @param X feature matrix * @param b class vector * @param sflag standardization flag * - If sflag is 0, compute the maximum value of lambda * without standardization. * - If sflag is 1, given matrix is standardized first and * then the maximum value of lambda is computed. * * @return maximum value of lambda */ double find_lambdamax(const dmatrix *X, const double *b, const int sflag) { double ret; int i, m, n; int mp, mn; double r1, r2; double *ar, *ac; double *tmp_m, *tmp_n; double *avg_x, *std_x; dmatrix *A; m = X->m; n = X->n; dmat_duplicate(X, &A); dmat_copy(X, A); tmp_m = malloc(m*sizeof(double)); tmp_n = malloc(n*sizeof(double)); if (sflag == TRUE) { standardize_data(A, b, &avg_x, &std_x, &ac, &ar); } else { dmat_diagscale(A, b, FALSE, NULL, TRUE); avg_x = std_x = ac = ar = NULL; } /* number of positive class examples */ mp = 0; for (i = 0; i < m; i++) { mp += (b[i]>0 ? 1:0); } mn = m - mp; r1 = (double)mn/m; r2 = (double)mp/m; for (i = 0; i < m; i++) { tmp_m[i] = (b[i] > 0 ? r1 : r2); } dmat_yAmpqTx( A, ac, ar, tmp_m, tmp_n); ret = dmat_norminf(n, tmp_n) / m; free(tmp_n); free(tmp_m); dmat_free(A); if (avg_x) free(avg_x); if (std_x) free(std_x); if (ac) free(ac); if (ar) free(ar); return ret; }
static void add_ref_p_l_a(double **PXX, int *i1, double **AXX, double **BXX, double *CXX, double **T12, double *S12, double **R21, double **R23, double *S12_a, double *S21_a, double **R23_a, double *S32_a, double **R13_a, double *S31_a, int n, double atran, double *v1, double *v2, double **w1, double **w2, work_data work) { double **w3; double **DXX_a; double **EXX_a; w3 = get_work1(&work, WORK_DXX); DXX_a = get_work1(&work, WORK_DXX); EXX_a = get_work1(&work, WORK_DXX); dmat_trans(BXX, w1, n, n); dmat_mul(R13_a, w1, n, n, n, EXX_a); dmat_trans(T12, w1, n, n); dmat_mul(R13_a, w1, n, n, n, DXX_a); dvec_add(S21_a, S31_a, S21_a, n); dmat_vxvtmx(S31_a, CXX, 1., EXX_a, 1., n, n); dmat_trans(AXX, w1, n, n); dm_v_mul(w1, S31_a, n, n, v1); dvec_scale(atran, v1, v2, n); dvec_add(S32_a, v2, S32_a, n); dmat_vxvtmx(v1, S12, 1., R23_a, 1., n, n); dmat_trans(R23, w1, n, n); dm_v_mul(w1, v1, n, n, v2); dvec_add(S12_a, v2, S12_a, n); dmat_copy(w1, EXX_a, n, n); dmat_getrs2('t', PXX, w1, n, n, i1); dmat_trans(R21, w2, n, n); dmat_mul(w1, w2, n, n, n, w3); dmat_add(DXX_a, w3, DXX_a, n, n); dmat_trans(AXX, w1, n, n); dmat_mul(w1, DXX_a, n, n, n, w2); dmat_add(R23_a, w2, R23_a, n, n); dmat_zero(R13_a, n, n); dvec_zero(S31_a, n); }
/** \brief Compute search direction using cholesky method (m > n). * */ void compute_searchdir_chol_thin(problem_data_t *pdat, variables_t *vars, double t, dmatrix *B, dmatrix *BB, double *tm1, double *bDA, double *d3) { int i, m, n; double bDb, bDbinv; dmatrix *matX1, *matX2; double lambda, tinv; double *g, *h, *z, *expz, *expmz, *ac, *ar, *b, *d1, *d2, *Aw; double *x, *v, *w, *u, *dx, *dv, *dw, *du, *gv, *gw, *gu, *gx; get_problem_data(pdat, &matX1, &matX2, &ac, &ar, &b, &lambda); get_variables(vars, &x, &v, &w, &u, &dx, &dv, &dw, &du, &gx, &gv, &gw, &gu, &g, &h, &z, &expz, &expmz, &d1, &d2, &Aw); m = matX1->m; n = matX1->n; tinv = 1.0 / t; /* bDb, Db */ bDb = 0.0; for (i = 0; i < m; i++) { tm1[i] = h[i] * b[i]; /* tm1 = Db */ bDb += b[i] * tm1[i]; } bDbinv = 1.0 / bDb; /* bDA */ dmat_yATx(matX1, tm1, bDA); dmat_copy(matX1, B); /* B = A */ dmat_ysqrtx(m, h, tm1); /* tm1 = D^{1/2} */ /* B = D^{1/2}*B */ dmat_diagscale(B, tm1, FALSE, NULL, FALSE); /* BB = A^T*D*A */ dmat_B_ATA(B, BB); /* BB = B^T*B */ for (i = 0; i < n; i++) { double q1, q2, q3, ui, wi, gr2; ui = u[i]; wi = w[i]; q1 = 1.0 / (ui + wi); q2 = 1.0 / (ui - wi); q3 = ui * ui + wi * wi; gw[i] -= (q1 - q2) * tinv; /* A'*g - (q1-q2) */ gu[i] = lambda - (q1 + q2) * tinv; /* lambda - (q1+q2) */ d1[i] = (q1 * q1 + q2 * q2) * tinv; d2[i] = (q1 * q1 - q2 * q2) * tinv; d3[i] = 2 / q3 * tinv; /* dw = (bDA'*gv-bDb*gr2); */ gr2 = gw[i] + 2 * gu[i] * ui * wi / q3; dw[i] = bDA[i] * gv[0] * bDbinv - gr2; } /* dw = (bDb*S-bDA'*bDA)\(bDA'*gv-bDb*gr2); = (S-1/bDb)*bDA'*bDA)\(bDA'*(gv/bDb)-gr2); */ dmat_diagadd(BB, d3); dmat_A_axxTpA(-bDbinv, bDA, BB); dmat_posv(BB, dw); /* dv = (-bDA*dw-gv)/bDb; */ dv[0] = -(dmat_dot(n, bDA, dw) + gv[0]) / bDb; /* du = -(gu+d2.*dw)./d1; */ for (i = 0; i < n; i++) du[i] = -(gu[i] + d2[i] * dw[i]) / d1[i]; }
/** \brief Compute search direction using cholesky method (m < n). * */ void compute_searchdir_chol_fat(problem_data_t *pdat, variables_t *vars, double t, dmatrix *B, dmatrix *BB, double *tm1, double *bDA, double *d3inv, double *tmp31, double *tmp32) { int i, m, n; double bDb; dmatrix *matX1, *matX2; double lambda, tinv; double *g, *h, *z, *expz, *expmz, *ac, *ar, *b, *d1, *d2, *Aw; double *x, *v, *w, *u, *dx, *dv, *dw, *du, *gv, *gw, *gu, *gx; get_problem_data(pdat, &matX1, &matX2, &ac, &ar, &b, &lambda); get_variables(vars, &x, &v, &w, &u, &dx, &dv, &dw, &du, &gx, &gv, &gw, &gu, &g, &h, &z, &expz, &expmz, &d1, &d2, &Aw); m = matX1->m; n = matX1->n; tinv = 1.0 / t; /* bDb, Db */ bDb = 0.0; for (i = 0; i < m; i++) { tm1[i] = h[i] * b[i]; /* tm1 = Db */ bDb += b[i] * tm1[i]; } /* bDA, D_inv */ dmat_yATx(matX1, tm1, bDA); dmat_copy(matX1, B); /* B = A */ dmat_yinvx(m, h, tm1); /* tm1 = D_inv */ for (i = 0; i < n; i++) { double ui, wi, q1, q2, q3, gr2; ui = u[i]; wi = w[i]; q1 = 1.0 / (ui + wi); q2 = 1.0 / (ui - wi); q3 = ui * ui + wi * wi; gw[i] -= (q1 - q2) * tinv; /* A'*g - (q1-q2) */ gu[i] = lambda - (q1 + q2)*tinv; /* lambda - (q1+q2) */ d1[i] = (q1 * q1 + q2 * q2) * tinv; d2[i] = (q1 * q1 - q2 * q2) * tinv; gr2 = gw[i] + 2 * gu[i] * ui * wi / q3; d3inv[i] = t * q3 / 2; /* en = d3^{-1} */ /* temporary use of tmp31 */ tmp31[i] = sqrt(d3inv[i]); /* store temporary values in dw, du */ dw[i] = d3inv[i] * bDA[i]; /* dw := d3inv.*bDA */ du[i] = d3inv[i] * gr2; /* du := d3inv.*gr2 */ } /* B = B*D3^{1/2} */ dmat_diagscale(B, NULL, FALSE, tmp31, FALSE); /* S = BB = ... */ /* BB = A*D3_inv*A^T */ dmat_B_AAT(B, BB); /* BB = B*B^T */ /* BB = D_inv + A*D3_inv*A^T */ dmat_diagadd(BB, tm1); /* SMW */ dmat_yAx(matX1, dw, tm1); dmat_posv(BB, tm1); dmat_yATx(matX1, tm1, tmp31); dmat_elemprod(n, d3inv, tmp31, tmp31); dmat_waxpby(n, -1, tmp31, 1, dw, tmp31); dmat_yAx(matX1, du, tm1); dmat_potrs(BB, tm1); dmat_yATx(matX1, tm1, tmp32); dmat_elemprod(n, d3inv, tmp32, tmp32); dmat_waxpby(n, -1, tmp32, 1, du, tmp32); dv[0] = (-gv[0] + dmat_dot(n,bDA,tmp32)) / (bDb - dmat_dot(n,bDA,tmp31)); /* dw = ... */ dmat_waxpby(n, -dv[0], tmp31, -1, tmp32, dw); /* du = -(gu+d2.*dw)./d1; */ for (i = 0; i < n; i++) du[i] = -(gu[i] + d2[i] * dw[i]) / d1[i]; }
int l1_logreg_train(dmatrix *X, double *b, double lambda, train_opts to, double *initial_x, double *initial_t, double *sol, int *total_ntiter, int *total_pcgiter) { /* problem data */ problem_data_t prob; variables_t vars; dmatrix *matX1; /* matX1 = diag(b)*X_std */ dmatrix *matX2; /* matX2 = X_std.^2 (only for pcg) */ double *ac, *ar; double *avg_x, *std_x; int m, n, ntiter, pcgiter, status; double pobj, dobj, gap; double t, s, maxAnu; double *g, *h, *z, *expz, *expmz; double *x, *v, *w, *u; double *dx, *dv, *dw, *du; double *gv, *gw, *gu, *gx; double *d1, *d2, *Aw; /* pcg variables */ pcg_status_t pcgstat; adata_t adata; mdata_t mdata; double *precond; /* temporary variables */ double *tm1, *tn1, *tn2, *tn3, *tn4, *tx1; /* temporary variables for dense case (cholesky) */ dmatrix *B; /* m x n (or m x n) */ dmatrix *BB; /* n x n (or m x m) */ char format_buf[PRINT_BUF_SIZE]; #if INTERNAL_PLOT dmatrix *internal_plot; dmat_new_dense(&internal_plot, 3, MAX_NT_ITER); memset(internal_plot->val,0,sizeof(double)*3*MAX_NT_ITER); /* row 1: cum_nt_iter, row 2: cum_pcg_iter, row 3: duality gap */ #endif p2func_progress print_progress = NULL; /* * INITIALIZATION */ s = 1.0; pobj = DBL_MAX; dobj = -DBL_MAX; pcgiter = 0; matX1 = NULL; matX2 = NULL; init_pcg_status(&pcgstat); dmat_duplicate(X, &matX1); dmat_copy(X, matX1); m = matX1->m; n = matX1->n; if (to.sflag == TRUE) { /* standardize_data not only standardizes the data, but also multiplies diag(b). */ standardize_data(matX1, b, &avg_x, &std_x, &ac, &ar); } else { /* matX1 = diag(b)*X */ dmat_diagscale(matX1, b, FALSE, NULL, TRUE); avg_x = std_x = ac = ar = NULL; } if (matX1->nz >= 0) /* only for pcg */ { dmat_elemAA(matX1, &matX2); } else { matX2 = NULL; } set_problem_data(&prob, matX1, matX2, ac, ar, b, lambda, avg_x, std_x); create_variables(&vars, m, n); get_variables(&vars, &x, &v, &w, &u, &dx, &dv, &dw, &du, &gx, &gv, &gw, &gu, &g, &h, &z, &expz, &expmz, &d1, &d2, &Aw); allocate_temporaries(m, n, (matX1->nz >= 0), &tm1, &tn1, &tn2, &tn3, &tn4, &tx1, &precond, &B, &BB); if (initial_x == NULL) { dmat_vset(1, 0.0, v); dmat_vset(n, 0.0, w); dmat_vset(n, 1.0, u); dmat_vset(n+n+1, 0, dx); t = min(max(1.0, 1.0 / lambda), 2.0 * n / ABSTOL); } else { dmat_vcopy(n+n+1, initial_x, x); dmat_vset(n+n+1, 0, dx); t = *initial_t; } set_adata(&adata, matX1, ac, ar, b, h, d1, d2); set_mdata(&mdata, m, n, precond); /* select printing function and format according to verbose level and method type (pcg/direct) */ if (to.verbose_level>=2) init_progress((matX1->nz >= 0), to.verbose_level, format_buf, &print_progress); /*** MAIN LOOP ************************************************************/ for (ntiter = 0; ntiter < MAX_NT_ITER; ntiter++) { /* * Sets v as the optimal value of the intercept. */ dmat_yAmpqx(matX1, ac, ar, w, Aw); optimize_intercept(v, z, expz, expmz, tm1, b, Aw, m); /* * Constructs dual feasible point nu. */ fprimes(m, expz, expmz, g, h); /* partially computes the gradient of phi. the rest part of the gradient will be completed while computing the search direction. */ gv[0] = dmat_dot(m, b, g); /* gv = b'*g */ dmat_yAmpqTx(matX1, ac, ar, g, gw); /* gw = A'*g */ dmat_waxpby(m, -1, g, 0, NULL, tm1); /* nu = -g */ maxAnu = dmat_norminf(n, gw); /* max(A'*nu) */ if (maxAnu > lambda) dmat_waxpby(m, lambda / maxAnu, tm1, 0.0, NULL, tm1); /* * Evaluates duality gap. */ pobj = logistic_loss2(m,z,expz,expmz)/m + lambda*dmat_norm1(n,w); dobj = max(nentropy(m, tm1) / m, dobj); gap = pobj - dobj; #if INTERNAL_PLOT internal_plot->val[0*MAX_NT_ITER+ntiter] = (double)ntiter; internal_plot->val[1*MAX_NT_ITER+ntiter] = (double)pcgiter; internal_plot->val[2*MAX_NT_ITER+ntiter] = gap; #endif if (to.verbose_level>=2) { (*print_progress)(format_buf, ntiter, gap, pobj, dobj, s, t, pcgstat.flag, pcgstat.relres, pcgstat.iter); } /* * Quits if gap < tolerance. */ if (gap < to.tolerance ) /***********************************************/ { if (sol != NULL) { /* trim solution */ int i; double lambda_threshold; lambda_threshold = to.ktolerance*lambda; sol[0] = x[0]; for (i = 0; i < n; i++) { sol[i+1] = (fabs(gw[i])>lambda_threshold)? x[i+1] : 0.0; } /* if standardized, sol = coeff/std */ if (to.sflag == TRUE && to.cflag == FALSE) { dmat_elemdivi(n, sol+1, std_x, sol+1); sol[0] -= dmat_dot(n, avg_x, sol+1); } } if (initial_x != NULL) { dmat_vcopy(n+n+1, x, initial_x); *initial_t = t; } if (total_pcgiter) *total_pcgiter = pcgiter; if (total_ntiter ) *total_ntiter = ntiter; /* free memory */ free_variables(&vars); free_temporaries(tm1, tn1, tn2, tn3, tn4, tx1, precond, B, BB); free_problem_data(&prob); #if INTERNAL_PLOT write_mm_matrix("internal_plot",internal_plot,"",TYPE_G); #endif return STATUS_SOLUTION_FOUND; } /********************************************************************/ /* * Updates t */ if (s >= 0.5) { t = max(min(2.0 * n * MU / gap, MU * t), t); } else if (s < 1e-5) { t = 1.1*t; } /* * Computes search direction. */ if (matX1->nz >= 0) { /* pcg */ compute_searchdir_pcg(&prob, &vars, t, s, gap, &pcgstat, &adata, &mdata, precond, tm1, tn1, tx1); pcgiter += pcgstat.iter; } else { /* direct */ if (n > m) { /* direct method for n > m, SMW */ compute_searchdir_chol_fat(&prob, &vars, t, B, BB, tm1, tn1, tn2, tn3, tn4); } else { /* direct method for n <= m */ compute_searchdir_chol_thin(&prob, &vars, t, B, BB, tm1, tn1, tn2); } } /* * Backtracking linesearch & update x = (v,w,u) and z. */ s = backtracking_linesearch(&prob, &vars, t, tm1, tx1); if (s < 0) break; /* BLS error */ } /*** END OF MAIN LOOP *****************************************************/ /* Abnormal termination */ if (s < 0) { status = STATUS_MAX_LS_ITER_EXCEEDED; } else /* if (ntiter == MAX_NT_ITER) */ { status = STATUS_MAX_NT_ITER_EXCEEDED; } if (sol != NULL) { /* trim solution */ int i; double lambda_threshold; lambda_threshold = to.ktolerance*lambda; sol[0] = x[0]; for (i = 0; i < n; i++) { sol[i+1] = (fabs(gw[i])>lambda_threshold)? x[i+1] : 0.0; } /* if standardized, sol = coeff/std */ if (to.sflag == TRUE && to.cflag == FALSE) { dmat_elemdivi(n, sol+1, std_x, sol+1); sol[0] -= dmat_dot(n, avg_x, sol+1); } } if (initial_x != NULL) { dmat_vcopy(n+n+1, x, initial_x); *initial_t = t; } if (total_pcgiter) *total_pcgiter = pcgiter; if (total_ntiter ) *total_ntiter = ntiter; /* free memory */ free_variables(&vars); free_temporaries(tm1, tn1, tn2, tn3, tn4, tx1, precond, B, BB); free_problem_data(&prob); #if INTERNAL_PLOT write_mm_matrix("internal_plot",internal_plot,"",TYPE_G); #endif return status; }
static void add_all_u_l_up_a(double **PXX, int *i1, double **AXX, double **BXX, double *CXX, double **T12, double *S12, double **R21, double **T32, double **R23_a, double **T32_a, double *S32_a, double **R13_a, double **T31_a, double *S31_a, int n, double atran, double *v1, double *v2, double **w1, double **w2, work_data work) { /* double **w3; */ double **DXX_a; double **EXX_a; /* w3 = get_work1(&work, WORK_DXX); */ DXX_a = get_work1(&work, WORK_DXX); EXX_a = get_work1(&work, WORK_DXX); /* dmat_trans(T32, w1, n, n); dmat_mul(T31_a, w1, n, n, n, EXX_a); */ dmat_gxgxmx(0, T31_a, 1, T32, 1., EXX_a, 0., n, n, n); /* dmat_trans(AXX, w1, n, n); dmat_mul(w1, T31_a, n, n, n, w2); dmat_add(T32_a, w2, T32_a, n, n); */ dmat_gxgxmx(1, AXX, 0, T31_a, 1., T32_a, 1., n, n, n); /* dmat_trans(BXX, w1, n, n); dmat_mul(R13_a, w1, n, n, n, w2); dmat_add(EXX_a, w2, EXX_a, n, n); */ dmat_gxgxmx(0, R13_a, 1, BXX, 1., EXX_a, 1., n, n, n); /* dmat_trans(T12, w1, n, n); dmat_mul(R13_a, w1, n, n, n, DXX_a); */ dmat_gxgxmx(0, R13_a, 1, T12, 1., DXX_a, 0., n, n, n); dmat_vxvtmx(S31_a, CXX, 1., EXX_a, 1., n, n); /* dmat_trans(AXX, w1, n, n); dm_v_mul(w1, S31_a, n, n, v1); */ dmat_gxvxmx(1, AXX, S31_a, 1., v1, 0., n, n); dvec_scale(atran, v1, v2, n); dvec_add(S32_a, v2, S32_a, n); dmat_vxvtmx(v1, S12, 1., R23_a, 1., n, n); dmat_copy(w1, EXX_a, n, n); dmat_getrs2('t', PXX, w1, n, n, i1); /* dmat_trans(R21, w2, n, n); dmat_mul(w1, w2, n, n, n, w3); dmat_add(DXX_a, w3, DXX_a, n, n); */ dmat_gxgxmx(0, w1, 1, R21, 1., DXX_a, 1., n, n, n); /* dmat_trans(AXX, w1, n, n); dmat_mul(w1, DXX_a, n, n, n, w2); dmat_add(R23_a, w2, R23_a, n, n); */ dmat_gxgxmx(1, AXX, 0, DXX_a, 1., R23_a, 1., n, n, n); dmat_zero(R13_a, n, n); dmat_zero(T31_a, n, n); dvec_zero(S31_a, n); }
// R=B-A*X void dsolve_residual(int n, int NRHS, double *R, int LDR, const double *A, int LDA, const double *X, int LDX, const double *B, int LDB) { dmat_copy(n,NRHS,R,LDR,B,LDB); // R=B dmat_sub_prod(n,n,NRHS,R,LDR,A,LDA,X,LDX); // R=R-A*X }