void local_laplacian_internal( const float *const input, // input buffer in some Labx or yuvx format float *const out, // output buffer with colour const int wd, // width and const int ht, // height of the input buffer const float sigma, // user param: separate shadows/midtones/highlights const float shadows, // user param: lift shadows const float highlights, // user param: compress highlights const float clarity, // user param: increase clarity/local contrast const int use_sse2) // flag whether to use SSE version { #define max_levels 30 #define num_gamma 6 // don't divide by 2 more often than we can: const int num_levels = MIN(max_levels, 31-__builtin_clz(MIN(wd,ht))); const int max_supp = 1<<(num_levels-1); int w, h; float *padded[max_levels] = {0}; padded[0] = ll_pad_input(input, wd, ht, max_supp, &w, &h); // allocate pyramid pointers for padded input for(int l=1;l<num_levels;l++) padded[l] = dt_alloc_align(16, sizeof(float)*dl(w,l)*dl(h,l)); // allocate pyramid pointers for output float *output[max_levels] = {0}; for(int l=0;l<num_levels;l++) output[l] = dt_alloc_align(16, sizeof(float)*dl(w,l)*dl(h,l)); // create gauss pyramid of padded input, write coarse directly to output #if defined(__SSE2__) if(use_sse2) { for(int l=1;l<num_levels-1;l++) gauss_reduce_sse2(padded[l-1], padded[l], dl(w,l-1), dl(h,l-1)); gauss_reduce_sse2(padded[num_levels-2], output[num_levels-1], dl(w,num_levels-2), dl(h,num_levels-2)); } else #endif { for(int l=1;l<num_levels-1;l++) gauss_reduce(padded[l-1], padded[l], dl(w,l-1), dl(h,l-1)); gauss_reduce(padded[num_levels-2], output[num_levels-1], dl(w,num_levels-2), dl(h,num_levels-2)); } // evenly sample brightness [0,1]: float gamma[num_gamma] = {0.0f}; for(int k=0;k<num_gamma;k++) gamma[k] = (k+.5f)/(float)num_gamma; // for(int k=0;k<num_gamma;k++) gamma[k] = k/(num_gamma-1.0f); // allocate memory for intermediate laplacian pyramids float *buf[num_gamma][max_levels] = {{0}}; for(int k=0;k<num_gamma;k++) for(int l=0;l<num_levels;l++) buf[k][l] = dt_alloc_align(16, sizeof(float)*dl(w,l)*dl(h,l)); // the paper says remapping only level 3 not 0 does the trick, too // (but i really like the additional octave of sharpness we get, // willing to pay the cost). for(int k=0;k<num_gamma;k++) { // process images #if defined(__SSE2__) if(use_sse2) apply_curve_sse2(buf[k][0], padded[0], w, h, max_supp, gamma[k], sigma, shadows, highlights, clarity); else // brackets in next line needed for silly gcc warning: #endif {apply_curve(buf[k][0], padded[0], w, h, max_supp, gamma[k], sigma, shadows, highlights, clarity);} // create gaussian pyramids for(int l=1;l<num_levels;l++) #if defined(__SSE2__) if(use_sse2) gauss_reduce_sse2(buf[k][l-1], buf[k][l], dl(w,l-1), dl(h,l-1)); else #endif gauss_reduce(buf[k][l-1], buf[k][l], dl(w,l-1), dl(h,l-1)); } // assemble output pyramid coarse to fine for(int l=num_levels-2;l >= 0; l--) { const int pw = dl(w,l), ph = dl(h,l); gauss_expand(output[l+1], output[l], pw, ph); // go through all coefficients in the upsampled gauss buffer: #ifdef _OPENMP #pragma omp parallel for default(none) schedule(static) collapse(2) shared(w,h,buf,output,l,gamma,padded) #endif for(int j=0;j<ph;j++) for(int i=0;i<pw;i++) { const float v = padded[l][j*pw+i]; int hi = 1; for(;hi<num_gamma-1 && gamma[hi] <= v;hi++); int lo = hi-1; const float a = CLAMPS((v - gamma[lo])/(gamma[hi]-gamma[lo]), 0.0f, 1.0f); const float l0 = ll_laplacian(buf[lo][l+1], buf[lo][l], i, j, pw, ph); const float l1 = ll_laplacian(buf[hi][l+1], buf[hi][l], i, j, pw, ph); output[l][j*pw+i] += l0 * (1.0f-a) + l1 * a; // we could do this to save on memory (no need for finest buf[][]). // unfortunately it results in a quite noticable loss of sharpness, i think // the extra level is worth it. // else if(l == 0) // use finest scale from input to not amplify noise (and use less memory) // output[l][j*pw+i] += ll_laplacian(padded[l+1], padded[l], i, j, pw, ph); } } #ifdef _OPENMP #pragma omp parallel for default(none) schedule(dynamic) collapse(2) shared(w,output,buf) #endif for(int j=0;j<ht;j++) for(int i=0;i<wd;i++) { out[4*(j*wd+i)+0] = 100.0f * output[0][(j+max_supp)*w+max_supp+i]; // [0,1] -> L out[4*(j*wd+i)+1] = input[4*(j*wd+i)+1]; // copy original colour channels out[4*(j*wd+i)+2] = input[4*(j*wd+i)+2]; } // free all buffers! for(int l=0;l<max_levels;l++) { dt_free_align(padded[l]); dt_free_align(output[l]); for(int k = 0; k < num_gamma; k++) dt_free_align(buf[k][l]); } #undef num_levels #undef num_gamma }
void F4GB::do_spairs() { if (hilbert && hilbert->nRemainingExpected() == 0) { if (M2_gbTrace >= 1) fprintf(stderr, "-- skipping degree...no elements expected in this degree\n"); return; } reset_matrix(); reset_syz_matrix(); clock_t begin_time = clock(); n_lcmdups = 0; make_matrix(); if (M2_gbTrace >= 5) { fprintf(stderr, "---------\n"); show_matrix(); fprintf(stderr, "---------\n"); } clock_t end_time = clock(); clock_make_matrix += end_time - begin_time; double nsecs = static_cast<double>(end_time - begin_time); nsecs /= CLOCKS_PER_SEC; if (M2_gbTrace >= 2) fprintf(stderr, " make matrix time = %f\n", nsecs); if (M2_gbTrace >= 2) H.dump(); begin_time = clock(); gauss_reduce(true); end_time = clock(); clock_gauss += end_time - begin_time; // fprintf(stderr, "---------\n"); // show_matrix(); // fprintf(stderr, "---------\n"); nsecs = static_cast<double>(end_time - begin_time); nsecs /= CLOCKS_PER_SEC; if (M2_gbTrace >= 2) { fprintf(stderr, " gauss time = %f\n", nsecs); fprintf(stderr, " lcm dups = %ld\n", n_lcmdups); if (M2_gbTrace >= 5) { fprintf(stderr, "---------\n"); show_matrix(); fprintf(stderr, "---------\n"); show_syz_matrix(); // show_new_rows_matrix(); } } new_GB_elements(); int ngb = INTSIZE(gb); if (M2_gbTrace >= 1) { fprintf(stderr, " # GB elements = %d\n", ngb); if (M2_gbTrace >= 5) show_gb_array(gb); if (using_syz) fprintf(stderr, " # syzygies = %ld\n", static_cast<long>(syz_basis.size())); if (M2_gbTrace >= 5) show_syz_basis(); } clear_matrix(); clear_syz_matrix(); }