vec triangle_slope(vec x){ double P = 8.0; // Period double A = 1.0; // Amplitude double T = 0.05; // Threshold for soft thresh return soft_threshold(triangle_wave(x - P/4,P,A),T); }
void l1::proxoperator(LocalDenseMatrixType& W, double lambda, LocalDenseMatrixType& mu, LocalDenseMatrixType& P) { double *Wbuf = W.Buffer(); double *mubuf = mu.Buffer(); double *Pbuf = P.Buffer(); int mn = W.Height()*W.Width(); for(int i=0;i<mn; i++) Pbuf[i] = soft_threshold(Wbuf[i] - mubuf[i], lambda); }
/*** return step-size for updating beta ***/ double cdescent_beta_stepsize (const cdescent *cd, const int j) { double scale2 = cdescent_scale2 (cd, j); double z = cdescent_gradient (cd, j) / scale2; double gamma = cd->lambda1 / scale2; if (cd->w) gamma *= cd->w->data[j]; /* eta(j) = S(z / scale2 + beta(j), w(j) * lambda1 / scale2) - beta(j) */ return soft_threshold (z + cd->beta->data[j], gamma) - cd->beta->data[j]; }
/* Coordinate descent, multi-task * * This function will only work for quadratic functions like linear loss and * squared hinge loss, since it assumes that the Newton step for each variable * is exact. It's easy to modify it to loop over the Newton steps until some * convergence is achieved, e.g., for logistic loss. * */ int cd_gmatrix(gmatrix *g, step step_func, const int maxepochs, const int maxiters, const double lambda1, const double lambda2, const double gamma, const double trunc, int *numactiveK) { const int p1 = g->p + 1, K = g->K; int j, k, allconverged = 0, numactive = 0, epoch = 1, good = FALSE; double beta_new, delta, s; const double l2recip = 1 / (1 + lambda2); sample sm; double *beta_old = NULL; int *active_old = NULL; long pkj, p1K1 = p1 * K - 1, kK1; int v1, v2, e, l; int nE = K * (K - 1) / 2; double d1, d2, df1, df2, sv; double beta_pkj; double lossold = g->loss; int mult = 2, idx; int mx, i, m; double tmp; if(!sample_init(&sm)) return FAILURE; CALLOCTEST(beta_old, (long)p1 * K, sizeof(double)); CALLOCTEST(active_old, (long)p1 * K, sizeof(int)); //for(j = p1K1 ; j >= 0 ; --j) // active_old[j] = g->active[j] = !g->ignore[j]; while(epoch <= maxepochs) { numactive = 0; for(k = 0 ; k < K ; k++) { numactiveK[k] = 0; kK1 = k * (K - 1); for(j = 0 ; j < p1; j++) { pkj = (long)p1 * k + j; beta_pkj = g->beta[pkj]; beta_new = beta_old[pkj] = beta_pkj; if(g->active[pkj]) { if(!g->nextcol(g, &sm, j, NA_ACTION_PROPORTIONAL)) return FAILURE; step_func(&sm, g, k, &d1, &d2); /* don't penalise intercept */ if(j > 0) { /* fusion penalty */ df1 = 0; df2 = 0; if(g->dofusion) { /* for each task k, compute derivative over * the K-1 other tasks, as each task has K-1 pairs */ for(l = K - 2 ; l >= 0 ; --l) { e = g->edges[l + kK1]; v1 = g->pairs[e]; v2 = g->pairs[e + nE]; sv = g->beta[j + v1 * p1] * g->C[e + v1 * nE] + g->beta[j + v2 * p1] * g->C[e + v2 * nE]; df1 += sv * g->C[e + k * nE]; } /* derivatives of fusion loss */ df1 *= gamma; df2 = gamma * g->diagCC[k]; } s = beta_pkj - (d1 + df1) / (d2 + df2); beta_new = soft_threshold(s, lambda1) * l2recip; } else s = beta_new = beta_pkj - d1 / d2; /* numerically close enough to zero */ if(fabs(beta_new) < ZERO_THRESH) beta_new = 0; delta = beta_new - beta_pkj; if(delta != 0) updateloss(g, beta_pkj, delta, sm.x, j, k, lambda1, gamma, nE); /* intercept always deemed active */ g->active[pkj] = (j == 0) || (g->beta[pkj] != 0); } numactiveK[k] += g->active[pkj]; numactive += g->active[pkj]; } } g->loss = 0; for(i = 0 ; i < g->ncurr ; i++) { for(k = 0 ; k < g->K ; k++) { m = i + k * g->ncurr; tmp = (g->lp[m] - g->y[m]) * (g->lp[m] - g->y[m]); g->loss += tmp; } } g->loss /= (2.0 * g->ncurr); g->l1loss = 0; for(j = 1 ; j < p1 ; j++) for(k = 0 ; k < K ; k++) g->l1loss += fabs(g->beta[j + k * p1]); g->loss += lambda1 * g->l1loss; if(g->dofusion) { g->floss = 0; for(j = p1 - 1 ; j >= 1 ; --j) { for(e = nE - 1 ; e >= 0 ; --e) { v1 = g->pairs[e]; v2 = g->pairs[e + nE]; sv = g->beta[j + v1 * p1] * g->C[e + v1 * nE] + g->beta[j + v2 * p1] * g->C[e + v2 * nE]; g->floss += sv * sv; } } g->loss += gamma * 0.5 * g->floss; } if(fabs(g->loss - lossold) / fabs(lossold) < g->tol) allconverged++; else { allconverged = 0; //printf("%d loss: %.6f lossold: %.6f\n", epoch, g->loss, lossold); } if(allconverged == 1) { /* reset active-set to contain all (non monomorphic) coordinates, in * order to check whether non-active coordinates become active again * or vice-versa */ for(j = p1K1 ; j >= 0 ; --j) { active_old[j] = g->active[j]; g->active[j] = !g->ignore[j]; } if(g->verbose) { timestamp(); printf(" resetting activeset at epoch %d, loss: %.6f floss: %.6f\n", epoch, g->loss, g->floss); } mult = 2; } else if(allconverged == 2) { for(j = p1K1 ; j >= 0 ; --j) if(g->active[j] != active_old[j]) break; if(j < 0) { if(g->verbose) { timestamp(); printf(" terminating at epoch %d with %d active vars\n", epoch, numactive); } good = TRUE; break; } if(g->verbose) { timestamp(); printf(" active set changed, %d active vars, mx:", numactive); } /* keep iterating over existing active set, keep track * of the current active set */ for(j = p1K1 ; j >= 0 ; --j) active_old[j] = g->active[j]; /* double the size of the active set */ for(k = 0 ; k < K ; k++) { mx = fminl(mult * numactiveK[k], p1); printf("%d ", mx); for(j = mx - 1 ; j >= 0 ; --j) { idx = g->grad_array[j + k * p1].index + k * p1; g->active[idx] = !g->ignore[idx]; } } printf("\n"); allconverged = 0; mult *= 2; } epoch++; lossold = g->loss; } if(g->verbose) printf("\n"); FREENULL(beta_old); FREENULL(active_old); return good ? numactive : CDFAILURE; }
int main(int argc, char **argv) { const int MAX_ITER = 50; const double RELTOL = 1e-2; const double ABSTOL = 1e-4; /* * Some bookkeeping variables for MPI. The 'rank' of a process is its numeric id * in the process pool. For example, if we run a program via `mpirun -np 4 foo', then * the process ranks are 0 through 3. Here, N and size are the total number of processes * running (in this example, 4). */ int rank; int size; MPI_Init(&argc, &argv); // Initialize the MPI execution environment MPI_Comm_rank(MPI_COMM_WORLD, &rank); // Determine current running process MPI_Comm_size(MPI_COMM_WORLD, &size); // Total number of processes double N = (double) size; // Number of subsystems/slaves for ADMM /* Read in local data */ int skinny; // A flag indicating whether the matrix A is fat or skinny FILE *f; int m, n; int row, col; double entry; /* * Subsystem n will look for files called An.dat and bn.dat * in the current directory; these are its local data and do not need to be * visible to any other processes. Note that * m and n here refer to the dimensions of the *local* coefficient matrix. */ /* Read A */ char s[20]; sprintf(s, "data/A%d.dat", rank + 1); printf("[%d] reading %s\n", rank, s); f = fopen(s, "r"); if (f == NULL) { printf("[%d] ERROR: %s does not exist, exiting.\n", rank, s); exit(EXIT_FAILURE); } mm_read_mtx_array_size(f, &m, &n); gsl_matrix *A = gsl_matrix_calloc(m, n); for (int i = 0; i < m*n; i++) { row = i % m; col = floor(i/m); fscanf(f, "%lf", &entry); gsl_matrix_set(A, row, col, entry); } fclose(f); /* Read b */ sprintf(s, "data/b%d.dat", rank + 1); printf("[%d] reading %s\n", rank, s); f = fopen(s, "r"); if (f == NULL) { printf("[%d] ERROR: %s does not exist, exiting.\n", rank, s); exit(EXIT_FAILURE); } mm_read_mtx_array_size(f, &m, &n); gsl_vector *b = gsl_vector_calloc(m); for (int i = 0; i < m; i++) { fscanf(f, "%lf", &entry); gsl_vector_set(b, i, entry); } fclose(f); m = A->size1; n = A->size2; skinny = (m >= n); /* * These are all variables related to ADMM itself. There are many * more variables than in the Matlab implementation because we also * require vectors and matrices to store various intermediate results. * The naming scheme follows the Matlab version of this solver. */ double rho = 1.0; gsl_vector *x = gsl_vector_calloc(n); gsl_vector *u = gsl_vector_calloc(n); gsl_vector *z = gsl_vector_calloc(n); gsl_vector *y = gsl_vector_calloc(n); gsl_vector *r = gsl_vector_calloc(n); gsl_vector *zprev = gsl_vector_calloc(n); gsl_vector *zdiff = gsl_vector_calloc(n); gsl_vector *q = gsl_vector_calloc(n); gsl_vector *w = gsl_vector_calloc(n); gsl_vector *Aq = gsl_vector_calloc(m); gsl_vector *p = gsl_vector_calloc(m); gsl_vector *Atb = gsl_vector_calloc(n); double send[3]; // an array used to aggregate 3 scalars at once double recv[3]; // used to receive the results of these aggregations double nxstack = 0; double nystack = 0; double prires = 0; double dualres = 0; double eps_pri = 0; double eps_dual = 0; /* Precompute and cache factorizations */ gsl_blas_dgemv(CblasTrans, 1, A, b, 0, Atb); // Atb = A^T b /* * The lasso regularization parameter here is just hardcoded * to 0.5 for simplicity. Using the lambda_max heuristic would require * network communication, since it requires looking at the *global* A^T b. */ double lambda = 0.5; if (rank == 0) { printf("using lambda: %.4f\n", lambda); } gsl_matrix *L; /* Use the matrix inversion lemma for efficiency; see section 4.2 of the paper */ if (skinny) { /* L = chol(AtA + rho*I) */ L = gsl_matrix_calloc(n,n); gsl_matrix *AtA = gsl_matrix_calloc(n,n); gsl_blas_dsyrk(CblasLower, CblasTrans, 1, A, 0, AtA); gsl_matrix *rhoI = gsl_matrix_calloc(n,n); gsl_matrix_set_identity(rhoI); gsl_matrix_scale(rhoI, rho); gsl_matrix_memcpy(L, AtA); gsl_matrix_add(L, rhoI); gsl_linalg_cholesky_decomp(L); gsl_matrix_free(AtA); gsl_matrix_free(rhoI); } else { /* L = chol(I + 1/rho*AAt) */ L = gsl_matrix_calloc(m,m); gsl_matrix *AAt = gsl_matrix_calloc(m,m); gsl_blas_dsyrk(CblasLower, CblasNoTrans, 1, A, 0, AAt); gsl_matrix_scale(AAt, 1/rho); gsl_matrix *eye = gsl_matrix_calloc(m,m); gsl_matrix_set_identity(eye); gsl_matrix_memcpy(L, AAt); gsl_matrix_add(L, eye); gsl_linalg_cholesky_decomp(L); gsl_matrix_free(AAt); gsl_matrix_free(eye); } /* Main ADMM solver loop */ int iter = 0; if (rank == 0) { printf("%3s %10s %10s %10s %10s %10s\n", "#", "r norm", "eps_pri", "s norm", "eps_dual", "objective"); } double startAllTime, endAllTime; startAllTime = MPI_Wtime(); while (iter < MAX_ITER) { /* u-update: u = u + x - z */ gsl_vector_sub(x, z); gsl_vector_add(u, x); /* x-update: x = (A^T A + rho I) \ (A^T b + rho z - y) */ gsl_vector_memcpy(q, z); gsl_vector_sub(q, u); gsl_vector_scale(q, rho); gsl_vector_add(q, Atb); // q = A^T b + rho*(z - u) double tmp, tmpq; gsl_blas_ddot(x, x, &tmp); gsl_blas_ddot(q, q, &tmpq); if (skinny) { /* x = U \ (L \ q) */ gsl_linalg_cholesky_solve(L, q, x); } else { /* x = q/rho - 1/rho^2 * A^T * (U \ (L \ (A*q))) */ gsl_blas_dgemv(CblasNoTrans, 1, A, q, 0, Aq); gsl_linalg_cholesky_solve(L, Aq, p); gsl_blas_dgemv(CblasTrans, 1, A, p, 0, x); /* now x = A^T * (U \ (L \ (A*q)) */ gsl_vector_scale(x, -1/(rho*rho)); gsl_vector_scale(q, 1/rho); gsl_vector_add(x, q); } /* * Message-passing: compute the global sum over all processors of the * contents of w and t. Also, update z. */ gsl_vector_memcpy(w, x); gsl_vector_add(w, u); // w = x + u gsl_blas_ddot(r, r, &send[0]); gsl_blas_ddot(x, x, &send[1]); gsl_blas_ddot(u, u, &send[2]); send[2] /= pow(rho, 2); gsl_vector_memcpy(zprev, z); // could be reduced to a single Allreduce call by concatenating send to w MPI_Allreduce(w->data, z->data, n, MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD); MPI_Allreduce(send, recv, 3, MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD); prires = sqrt(recv[0]); /* sqrt(sum ||r_i||_2^2) */ nxstack = sqrt(recv[1]); /* sqrt(sum ||x_i||_2^2) */ nystack = sqrt(recv[2]); /* sqrt(sum ||y_i||_2^2) */ gsl_vector_scale(z, 1/N); soft_threshold(z, lambda/(N*rho)); /* Termination checks */ /* dual residual */ gsl_vector_memcpy(zdiff, z); gsl_vector_sub(zdiff, zprev); dualres = sqrt(N) * rho * gsl_blas_dnrm2(zdiff); /* ||s^k||_2^2 = N rho^2 ||z - zprev||_2^2 */ /* compute primal and dual feasibility tolerances */ eps_pri = sqrt(n*N)*ABSTOL + RELTOL * fmax(nxstack, sqrt(N)*gsl_blas_dnrm2(z)); eps_dual = sqrt(n*N)*ABSTOL + RELTOL * nystack; if (rank == 0) { printf("%3d %10.4f %10.4f %10.4f %10.4f %10.4f\n", iter, prires, eps_pri, dualres, eps_dual, objective(A, b, lambda, z)); } if (prires <= eps_pri && dualres <= eps_dual) { break; } /* Compute residual: r = x - z */ gsl_vector_memcpy(r, x); gsl_vector_sub(r, z); iter++; } /* Have the master write out the results to disk */ if (rank == 0) { endAllTime = MPI_Wtime(); printf("Elapsed time is: %lf \n", endAllTime - startAllTime); f = fopen("data/solution.dat", "w"); gsl_vector_fprintf(f, z, "%lf"); fclose(f); } MPI_Finalize(); /* Shut down the MPI execution environment */ /* Clear memory */ gsl_matrix_free(A); gsl_matrix_free(L); gsl_vector_free(b); gsl_vector_free(x); gsl_vector_free(u); gsl_vector_free(z); gsl_vector_free(y); gsl_vector_free(r); gsl_vector_free(w); gsl_vector_free(zprev); gsl_vector_free(zdiff); gsl_vector_free(q); gsl_vector_free(Aq); gsl_vector_free(Atb); gsl_vector_free(p); return EXIT_SUCCESS; }