/* solveTV2d_DR.cpp Solves the 2 dimensional TV proximity problem by applying a Douglas-Rachford splitting algorithm. Parameters: - 0: bidimensional reference signal y. - 1: lambda penalties over the columns - 2: lambda penalties over the rows - 3: norm to apply over the columns - 4: norm to apply over the rows - 5: (optional) number of cores to use (default: as defined by environment variable OMP_NUM_THREADS) - 6: (optional) maximum number of iterations to run (default: as defined in TVopt.h) Outputs: - 0: solution x. - 1: array with optimizer information: + [0]: number of iterations run. + [1]: stopping tolerance. */ void mexFunction(int nlhs, mxArray *plhs[ ],int nrhs, const mxArray *prhs[ ]) { const mwSize *sDims; double *x=NULL,*y,*info=NULL,*dims; double lambda1, lambda2, norm1, norm2; int *ns=NULL; int nds,N,M,ncores,maxIters,i; #define FREE \ if(!nlhs) free(x); \ if(ns) free(ns); #define CANCEL(txt) \ printf("Error in solveTV2D_DR: %s\n",txt); \ if(x) free(x); \ if(info) free(info); \ return; /* Check input correctness */ if(nrhs < 5){CANCEL("not enought inputs");} if(!mxIsClass(prhs[0],"double")) {CANCEL("input signal must be in double format")} /* Find number of dimensions of the input signal */ nds=mxGetNumberOfDimensions(prhs[0]); /* Must be 2 */ if ( nds != 2 ) {CANCEL("input signal must be 2-dimensional")} M = mxGetM(prhs[0]); N = mxGetN(prhs[0]); /* Get dimensions of input signal */ sDims = mxGetDimensions(prhs[0]); /* Convert dimensions to C array */ ns = (int*)malloc(sizeof(int)*nds); if(!ns) {CANCEL("out of memory")} for(i=0;i<nds;i++) ns[i] = (int)sDims[i]; /* Get input signal */ y = mxGetPr(prhs[0]); /* Get rest of inputs */ lambda1 = mxGetScalar(prhs[1]); lambda2 = mxGetScalar(prhs[2]); norm1 = mxGetScalar(prhs[3]); norm2 = mxGetScalar(prhs[4]); if(nrhs >= 6) ncores = (int)(mxGetPr(prhs[5]))[0]; else ncores = 1; if(nrhs >= 7) maxIters = (int)(mxGetPr(prhs[6]))[0]; else maxIters = 0; /* Create output arrays */ if(nlhs >= 1){ plhs[0]=mxCreateNumericArray(nds,sDims,mxDOUBLE_CLASS,mxREAL); if(!plhs[0]){CANCEL("out of memory")} x=mxGetPr(plhs[0]); }
static void ChargeAdd(int pktlen) { struct timeval now; g_ctc_packets++; if (g_ctc_packets > TOPO_CTC_PACKETS_MAX) g_ctc_packets = TOPO_CTC_PACKETS_MAX; g_ctc_bytes += pktlen; if (g_ctc_bytes > TOPO_CTC_BYTES_MAX) g_ctc_bytes = TOPO_CTC_BYTES_MAX; #ifdef __DEBUG__ IF_TRACED(TRC_CHARGE) dbgprintf("ChargeAdd: CTC now has bytes=" FMT_UINT32 " & packets=" FMT_UINT32 "\n", g_ctc_bytes, g_ctc_packets); END_TRACE #endif /* Reset charge timer */ gettimeofday(&now, NULL); timeval_add_ms(&now, TOPO_CHARGE_TIMEOUT); CANCEL(g_charge_timer); g_charge_timer = event_add(&now, state_charge_timeout, /*state:*/NULL); }
static void ChargeConsume(int pktlen) { struct timeval now; if (g_ctc_packets) g_ctc_packets--; if (g_ctc_bytes) g_ctc_bytes -= pktlen; #ifdef __DEBUG__ IF_TRACED(TRC_CHARGE) dbgprintf("ChargeConsume: CTC now has bytes=" FMT_UINT32 " & packets=" FMT_UINT32 "\n", g_ctc_bytes, g_ctc_packets); END_TRACE #endif /* Reset charge timer */ gettimeofday(&now, NULL); timeval_add_ms(&now, TOPO_CHARGE_TIMEOUT); CANCEL(g_charge_timer); g_charge_timer = event_add(&now, state_charge_timeout, /*state:*/NULL); }
/* solveTV2_morec.cpp Solves the TV-L2 proximity problem by applying a More-Sorensen + Projected Gradient algorithm. Parameters: - 0: reference signal y. - 1: lambda penalty. Outputs: - 0: primal solution x. - 1: array with optimizer information: + [0]: number of iterations run. + [1]: dual gap. */ void mexFunction(int nlhs, mxArray *plhs[ ],int nrhs, const mxArray *prhs[ ]) { double *x=NULL,*y,*info=NULL; double lambda; int M,N,nn,i; #define FREE \ if(!nlhs) free(x); #define CANCEL(txt) \ printf("Error in solveTV2_morec2: %s\n",txt); \ if(x) free(x); \ if(info) free(info); \ return; /* Check input correctness */ if(nrhs < 2){CANCEL("not enought inputs");} if(!mxIsClass(prhs[0],"double")) {CANCEL("input signal must be in double format")} /* Create output arrays */ M = mxGetM(prhs[0]); N = mxGetN(prhs[0]); nn = (M > N) ? M : N; if(nlhs >= 1){ plhs[0] = mxCreateDoubleMatrix(nn,1,mxREAL); x = mxGetPr(plhs[0]); } else x = (double*)malloc(sizeof(double)*nn); if(nlhs >= 2){ plhs[1] = mxCreateDoubleMatrix(N_INFO,1,mxREAL); info = mxGetPr(plhs[1]); } /* Retrieve input data */ y = mxGetPr(prhs[0]); lambda = mxGetScalar(prhs[1]); /* Run Projected Newton */ morePG_TV2(y,lambda,x,info,nn,NULL); /* Free resources */ FREE return; }
/* PN_TV1 Given a reference signal y and a penalty parameter lambda, solves the proximity operator min_x 0.5 ||x-y||^2 + lambda sum_i |x_i - x_(i-1)| . To do so a Projected Newton algorithm is used to solve its dual problem. Inputs: - y: reference signal. - lambda: penalty parameter. - x: array in which to store the solution. - info: array in which to store optimizer information. - n: length of array y (and x). - sigma: tolerance for sufficient descent. - ws: workspace of allocated memory to use. If NULL, any needed memory is locally managed. */ int PN_TV1(double *y,double lambda,double *x,double *info,int n,double sigma,Workspace *ws){ int i,ind,nI,recomp,found,iters,maxIters,nn=n-1; double lambdaMax,tmp,fval0,fval1,gRd,delta,grad0,stop,stopPrev,improve,rhs,maxStep,prevDelta; double *w=NULL,*g=NULL,*d=NULL,*aux=NULL,*aux2=NULL; int *inactive=NULL; ptrdiff_t one=1,rc,nnp=nn,nIp; /* Macros */ #define GRAD2GAP(g,w,gap,i) \ gap = 0; \ for(i=0;i<nn;i++) \ gap += fabs(g[i]) * lambda + w[i] * g[i]; #define PRIMAL2VAL(x,val,i) \ val = 0; \ for(i=0;i<n;i++) \ val += x[i]*x[i]; \ val *= 0.5; #define PROJECTION(w) \ for(i=0;i<nn;i++) \ if(w[i] > lambda) w[i] = lambda; \ else if(w[i] < -lambda) w[i] = -lambda; #define CHECK_INACTIVE(w,g,inactive,nI,i) \ for(i=nI=0 ; i<nn ; i++) \ if( (w[i] > -lambda && w[i] < lambda) || (w[i] == -lambda && g[i] < -EPSILON) || (w[i] == lambda && g[i] > EPSILON) ) \ inactive[nI++] = i; #define FREE \ if(!ws){ \ if(w) free(w); \ if(g) free(g); \ if(d) free(d); \ if(aux) free(aux); \ if(aux2) free(aux2); \ if(inactive) free(inactive); \ } #define CANCEL(txt,info) \ printf("PN_TV1: %s\n"); \ FREE \ if(info) info[INFO_RC] = RC_ERROR;\ return 0; /* Alloc memory if no workspace available */ if(!ws){ w = (double*)malloc(sizeof(double)*nn); g = (double*)malloc(sizeof(double)*nn); d = (double*)malloc(sizeof(double)*nn); aux = (double*)malloc(sizeof(double)*nn); aux2 = (double*)malloc(sizeof(double)*nn); inactive = (int*)malloc(sizeof(int)*nn); if(!w || !g || ! d || !aux || !aux2 || !inactive){CANCEL("out of memory",info)} }
/* solveTV1_johnson.cpp Solves the TV-L1 proximity problem by applying Johnson's dynamic programming method. Parameters: - 0: reference signal y. - 1: lambda penalty. Outputs: - 0: primal solution x. */ void mexFunction(int nlhs, mxArray *plhs[ ],int nrhs, const mxArray *prhs[ ]) { double *x=NULL,*y; float lambda; int M,N,nn,i; #define FREE \ if(!nlhs) free(x); #define CANCEL(txt) \ printf("Error in solveTV1_condat: %s\n",txt); \ if(x) free(x); \ return; /* Check input correctness */ if(nrhs < 2){CANCEL("not enought inputs");} if(!mxIsClass(prhs[0],"double")) {CANCEL("input signal must be in double format")} /* Create output arrays */ M = mxGetM(prhs[0]); N = mxGetN(prhs[0]); nn = (M > N) ? M : N; if(nlhs >= 1){ plhs[0] = mxCreateDoubleMatrix(nn,1,mxREAL); x = mxGetPr(plhs[0]); } else x = (double*)malloc(sizeof(double)*nn); /* Retrieve input data */ y = mxGetPr(prhs[0]); lambda = mxGetScalar(prhs[1]); /* Run Johnson's method */ dp(nn, y, lambda, x); /* Free resources */ FREE return; #undef FREE #undef CANCEL }
/* Restart the inactivity timer for the session associated with the current event */ void restart_inactivity_timer(uint32_t timeout) { struct timeval now; if (g_this_event.ssn == NULL || g_this_event.ssn->ssn_is_valid != TRUE) return; gettimeofday(&now, NULL); timeval_add_ms(&now, timeout); CANCEL(g_this_event.ssn->ssn_InactivityTimer); g_this_event.ssn->ssn_InactivityTimer = event_add(&now, state_inactivity_timeout, g_this_event.ssn); }
int DR2L1W_TV(size_t M, size_t N, double*unary, double*W1, double*W2, double*s, int nThreads, int maxit, double* info) { int i; double *t = NULL; double *tb = NULL; Workspace **ws = NULL; int maxDim; #define FREE \ if(t) free(t); \ if(tb) free(tb); \ if(ws) freeWorkspaces(ws,nThreads); #define CANCEL(txt,info) \ printf("DR2L1W_TV: %s\n",txt); \ FREE \ if(info) info[INFO_RC] = RC_ERROR;\ return 0; //printMatrix(M, N, unary, "unary"); if (nThreads < 1) nThreads = 1; omp_set_num_threads(nThreads); maxDim = (M > N) ? M : N; // Alloc memory for algorithm */ t = (double*) malloc(sizeof(double)*M*N); tb = (double*)malloc(sizeof(double)*M*N); ws = newWorkspaces(maxDim,nThreads); if (!t || !tb || !ws) {CANCEL("out of memory", info)} /* Set number of iterations */ if(maxit <= 0) maxit = MAX_ITERS_DR; // t = ones(size(unary)) * mean(unary(:)); double sum=0; for (i=0; i < M*N; i++) sum += unary[i]; sum = 2*sum / (M*N); for (i=0; i < M*N; i++) t[i]=sum; #ifdef DEBUG fprintf(DEBUG_FILE,"Starting Douglas-Rachford with size=[%d,%d], norms=[%lf,%lf], threads=%d\n",M,N,1,1,nThreads); fflush(DEBUG_FILE); #endif int iter = 0; /* MAIN LOOP */ while ( iter < maxit ) { iter++; // reflect for B_vertical //s = 2*dualprojLines(t, u1, W1) - t; #ifdef DEBUG fprintf(DEBUG_FILE,"Dual projection along columns\n"); fflush(DEBUG_FILE); #endif // Projection (prox) step DR_columnsPass(M, N, t, s, W1, ws); // Reflection for (i=0; i < M*N; i++) s[i] = 2*s[i] - t[i]; // reflect for -B_horizontal // t = 2*( -dualprojLines(-s', u2', W2')) - s'; #ifdef DEBUG fprintf(DEBUG_FILE,"Dual projection along rows\n"); fflush(DEBUG_FILE); #endif // Projection (prox) step, taking into account displacemente from reference unary signal DR_rowsPass(M, N, s, tb, unary, W2, ws); // Reflection for (i=0; i < M*N; i++) tb[i] = -2*tb[i] - s[i]; // Combiner step for (i=0; i < M*N; i++) t[i] = 0.5*(t[i]+tb[i]); } // DR is divergent, but with an additional projection we can recover valid solutions DR_columnsPass(M, N, t, s, W1, ws); DR_rowsPass(M, N, s, tb, unary, W2, ws); for (i = 0; i < M*N; i++) s[i] = - s[i] - tb[i]; /* Gather output information */ if(info){ info[INFO_ITERS] = iter; info[INFO_RC] = RC_OK; } // Free and return FREE return 0; #undef FREE #undef CANCEL }
/* PN_TV1_Weighted Given a reference signal y and a weight vector lambda, solves the proximity operator min_x 0.5 ||x-y||^2 + sum_i lambda_i |x_i - x_(i-1)| . To do so a Projected Newton algorithm is used to solve its dual problem. Inputs: - y: reference signal. - lambda: weight vector. - x: array in which to store the solution. - info: array in which to store optimizer information. - n: length of array y (and x). - sigma: tolerance for sufficient descent. - ws: workspace of allocated memory to use. If NULL, any needed memory is locally managed. */ int PN_TV1_Weighted(double *y,double *lambda,double *x,double *info,int n,double sigma,Workspace *ws){ int i,ind,nI,recomp,found,iters,nn=n-1; double lambdaMax,tmp,fval0,fval1,gRd,delta,grad0,stop,stopPrev,improve,rhs,maxStep,prevDelta; double *w=NULL,*g=NULL,*d=NULL,*aux=NULL,*aux2=NULL; int *inactive=NULL; lapack_int one=1,rc,nnp=nn,nIp; /* Macros */ #define GRAD2GAP(g,w,gap,i) \ gap = 0; \ for(i=0;i<nn;i++) \ gap += fabs(g[i]) * lambda[i] + w[i] * g[i]; #define PRIMAL2VAL(x,val,i) \ val = 0; \ for(i=0;i<n;i++) \ val += x[i]*x[i]; \ val *= 0.5; #define PROJECTION(w) \ for(i=0;i<nn;i++) \ if(w[i] > lambda[i]) w[i] = lambda[i]; \ else if(w[i] < -lambda[i]) w[i] = -lambda[i]; #define CHECK_INACTIVE(w,g,inactive,nI,i) \ for(i=nI=0 ; i<nn ; i++) \ if( (w[i] > -lambda[i] && w[i] < lambda[i]) || (w[i] == -lambda[i] && g[i] < -EPSILON) || (w[i] == lambda[i] && g[i] > EPSILON) ) \ inactive[nI++] = i; #define FREE \ if(!ws){ \ if(w) free(w); \ if(g) free(g); \ if(d) free(d); \ if(aux) free(aux); \ if(aux2) free(aux2); \ if(inactive) free(inactive); \ } #define CANCEL(txt,info) \ printf("PN_TV1: %s\n",txt); \ FREE \ if(info) info[INFO_RC] = RC_ERROR;\ return 0; /* Alloc memory if no workspace available */ if(!ws){ w = (double*)malloc(sizeof(double)*nn); g = (double*)malloc(sizeof(double)*nn); d = (double*)malloc(sizeof(double)*nn); aux = (double*)malloc(sizeof(double)*nn); aux2 = (double*)malloc(sizeof(double)*nn); inactive = (int*)malloc(sizeof(int)*nn); } /* If a workspace is available, request memory */ else{ w = getDoubleWorkspace(ws); g = getDoubleWorkspace(ws); d = getDoubleWorkspace(ws); aux = getDoubleWorkspace(ws); aux2 = getDoubleWorkspace(ws); inactive = getIntWorkspace(ws); } if(!w || !g || ! d || !aux || !aux2 || !inactive) {CANCEL("out of memory",info)} /* Precompute useful quantities */ for(i=0;i<nn;i++) w[i] = (y[i+1] - y[i]); /* Dy */ iters = 0; /* Factorize Hessian */ for(i=0;i<nn-1;i++){ aux[i] = 2; aux2[i] = -1; } aux[nn-1] = 2; dpttrf_(&nnp,aux,aux2,&rc); /* Solve Choleski-like linear system to obtain unconstrained solution */ dpttrs_(&nnp, &one, aux, aux2, w, &nnp, &rc); /* above assume we solved DD'u = Dy */ /* we wanted to solve DD'Wu = Dy; so now obtain u by dividing by W */ for(i=0;i<nn;i++) w[i]=w[i] / lambda[i]; /* Compute maximum effective penalty */ lambdaMax = 0; for(i=0;i<nn;i++) if((tmp = fabs(w[i])) > lambdaMax) lambdaMax = tmp; /* Check if the unconstrained solution is feasible for the given lambda */ #ifdef DEBUG fprintf(DEBUG_FILE,"lambda=%lf,lambdaMax=%lf\n",1.0,lambdaMax); #endif /* check if infnorm(u ./ w) <= 1 */ if(1.0 >= lambdaMax){ /* In this case all entries of the primal solution should be the same as the mean of y */ tmp = 0; for(i=0;i<n;i++) tmp += y[i]; tmp /= n; for(i=0;i<n;i++) x[i] = tmp; /* Gradient evaluation */ PRIMAL2GRAD(x,g,i) /* Compute dual gap */ GRAD2GAP(g,w,stop,i) if(info){ info[INFO_GAP] = fabs(stop); info[INFO_ITERS] = 0; info[INFO_RC] = RC_OK; } FREE return 1; }
/** Issue sync() and steal. Note that this may add sync requests to the xlb_pending_syncs list, which must be handled by the caller. @param stole_single true if stole single-worker task, else false @param stole_par true if stole parallel task, else false */ static adlb_code xlb_steal(int target, bool *stole_single, bool *stole_par) { adlb_code rc; *stole_single = false; *stole_par = false; MPI_Request request; MPI_Status status; TRACE_START; MPE_LOG(xlb_mpe_dmn_steal_start); DEBUG("[%i] stealing from %i", xlb_s.layout.rank, target); struct packed_steal_resp hdr; IRECV2(&hdr, sizeof(hdr), MPI_BYTE, target, ADLB_TAG_RESPONSE_STEAL_COUNT, &request); int max_memory = 1; int total_single = 0, total_par = 0; int response; rc = steal_sync(target, max_memory, &response); if (!response || rc == ADLB_SHUTDOWN) { CANCEL(&request); *stole_single = *stole_par = false; goto end; } ADLB_CHECK(rc); // Sender will stream work in groups, each with // header. while (true) { WAIT(&request, &status); if (hdr.count > 0) { int single, par; rc = steal_payloads(target, hdr.count, &single, &par, false); ADLB_CHECK(rc); total_single += single; total_par += par; } if (hdr.last) break; IRECV2(&hdr, sizeof(hdr), MPI_BYTE, target, ADLB_TAG_RESPONSE_STEAL_COUNT, &request); } DEBUG("[%i] steal result: stole %i tasks from %i", xlb_s.layout.rank, total_single + total_par, target); // MPE_INFO(xlb_mpe_svr_info, "STOLE: %i FROM: %i", hdr->count, target); *stole_single = (total_single > 0); *stole_par = (total_par > 0); // Record the time of this steal attempt xlb_steal_last = MPI_Wtime(); // Update failed steals if (hdr.count > 0) { xlb_failed_steals_since_backoff = 0; } else { xlb_failed_steals_since_backoff++; } end: TRACE_END; MPE_LOG(xlb_mpe_dmn_steal_end); return ADLB_SUCCESS; }