Exemplo n.º 1
0
void local_laplacian_internal(
    const float *const input,   // input buffer in some Labx or yuvx format
    float *const out,           // output buffer with colour
    const int wd,               // width and
    const int ht,               // height of the input buffer
    const float sigma,          // user param: separate shadows/midtones/highlights
    const float shadows,        // user param: lift shadows
    const float highlights,     // user param: compress highlights
    const float clarity,        // user param: increase clarity/local contrast
    const int use_sse2)         // flag whether to use SSE version
{
#define max_levels 30
#define num_gamma 6
  // don't divide by 2 more often than we can:
  const int num_levels = MIN(max_levels, 31-__builtin_clz(MIN(wd,ht)));
  const int max_supp = 1<<(num_levels-1);
  int w, h;
  float *padded[max_levels] = {0};
  padded[0] = ll_pad_input(input, wd, ht, max_supp, &w, &h);

  // allocate pyramid pointers for padded input
  for(int l=1;l<num_levels;l++)
    padded[l] = dt_alloc_align(16, sizeof(float)*dl(w,l)*dl(h,l));

  // allocate pyramid pointers for output
  float *output[max_levels] = {0};
  for(int l=0;l<num_levels;l++)
    output[l] = dt_alloc_align(16, sizeof(float)*dl(w,l)*dl(h,l));

  // create gauss pyramid of padded input, write coarse directly to output
#if defined(__SSE2__)
  if(use_sse2)
  {
    for(int l=1;l<num_levels-1;l++)
      gauss_reduce_sse2(padded[l-1], padded[l], dl(w,l-1), dl(h,l-1));
    gauss_reduce_sse2(padded[num_levels-2], output[num_levels-1], dl(w,num_levels-2), dl(h,num_levels-2));
  }
  else
#endif
  {
    for(int l=1;l<num_levels-1;l++)
      gauss_reduce(padded[l-1], padded[l], dl(w,l-1), dl(h,l-1));
    gauss_reduce(padded[num_levels-2], output[num_levels-1], dl(w,num_levels-2), dl(h,num_levels-2));
  }

  // evenly sample brightness [0,1]:
  float gamma[num_gamma] = {0.0f};
  for(int k=0;k<num_gamma;k++) gamma[k] = (k+.5f)/(float)num_gamma;
  // for(int k=0;k<num_gamma;k++) gamma[k] = k/(num_gamma-1.0f);

  // allocate memory for intermediate laplacian pyramids
  float *buf[num_gamma][max_levels] = {{0}};
  for(int k=0;k<num_gamma;k++) for(int l=0;l<num_levels;l++)
    buf[k][l] = dt_alloc_align(16, sizeof(float)*dl(w,l)*dl(h,l));

  // the paper says remapping only level 3 not 0 does the trick, too
  // (but i really like the additional octave of sharpness we get,
  // willing to pay the cost).
  for(int k=0;k<num_gamma;k++)
  { // process images
#if defined(__SSE2__)
    if(use_sse2)
      apply_curve_sse2(buf[k][0], padded[0], w, h, max_supp, gamma[k], sigma, shadows, highlights, clarity);
    else // brackets in next line needed for silly gcc warning:
#endif
    {apply_curve(buf[k][0], padded[0], w, h, max_supp, gamma[k], sigma, shadows, highlights, clarity);}

    // create gaussian pyramids
    for(int l=1;l<num_levels;l++)
#if defined(__SSE2__)
      if(use_sse2)
        gauss_reduce_sse2(buf[k][l-1], buf[k][l], dl(w,l-1), dl(h,l-1));
      else
#endif
        gauss_reduce(buf[k][l-1], buf[k][l], dl(w,l-1), dl(h,l-1));
  }

  // assemble output pyramid coarse to fine
  for(int l=num_levels-2;l >= 0; l--)
  {
    const int pw = dl(w,l), ph = dl(h,l);

    gauss_expand(output[l+1], output[l], pw, ph);
    // go through all coefficients in the upsampled gauss buffer:
#ifdef _OPENMP
#pragma omp parallel for default(none) schedule(static) collapse(2) shared(w,h,buf,output,l,gamma,padded)
#endif
    for(int j=0;j<ph;j++) for(int i=0;i<pw;i++)
    {
      const float v = padded[l][j*pw+i];
      int hi = 1;
      for(;hi<num_gamma-1 && gamma[hi] <= v;hi++);
      int lo = hi-1;
      const float a = CLAMPS((v - gamma[lo])/(gamma[hi]-gamma[lo]), 0.0f, 1.0f);
      const float l0 = ll_laplacian(buf[lo][l+1], buf[lo][l], i, j, pw, ph);
      const float l1 = ll_laplacian(buf[hi][l+1], buf[hi][l], i, j, pw, ph);
      output[l][j*pw+i] += l0 * (1.0f-a) + l1 * a;
      // we could do this to save on memory (no need for finest buf[][]).
      // unfortunately it results in a quite noticable loss of sharpness, i think
      // the extra level is worth it.
      // else if(l == 0) // use finest scale from input to not amplify noise (and use less memory)
      //   output[l][j*pw+i] += ll_laplacian(padded[l+1], padded[l], i, j, pw, ph);
    }
  }
#ifdef _OPENMP
#pragma omp parallel for default(none) schedule(dynamic) collapse(2) shared(w,output,buf)
#endif
  for(int j=0;j<ht;j++) for(int i=0;i<wd;i++)
  {
    out[4*(j*wd+i)+0] = 100.0f * output[0][(j+max_supp)*w+max_supp+i]; // [0,1] -> L
    out[4*(j*wd+i)+1] = input[4*(j*wd+i)+1]; // copy original colour channels
    out[4*(j*wd+i)+2] = input[4*(j*wd+i)+2];
  }
  // free all buffers!
  for(int l=0;l<max_levels;l++)
  {
    dt_free_align(padded[l]);
    dt_free_align(output[l]);
    for(int k = 0; k < num_gamma; k++) dt_free_align(buf[k][l]);
  }
#undef num_levels
#undef num_gamma
}
Exemplo n.º 2
0
Arquivo: f4.cpp Projeto: pzinn/M2
void F4GB::do_spairs()
{
  if (hilbert && hilbert->nRemainingExpected() == 0)
    {
      if (M2_gbTrace >= 1)
        fprintf(stderr, "-- skipping degree...no elements expected in this degree\n");
      return;
    }
  reset_matrix();
  reset_syz_matrix();
  clock_t begin_time = clock();

  n_lcmdups = 0;
  make_matrix();

  if (M2_gbTrace >= 5) {
    fprintf(stderr, "---------\n");
    show_matrix();
    fprintf(stderr, "---------\n");
  }

  clock_t end_time = clock();
  clock_make_matrix += end_time - begin_time;
  double nsecs = static_cast<double>(end_time - begin_time);
  nsecs /= CLOCKS_PER_SEC;
  if (M2_gbTrace >= 2)
    fprintf(stderr, " make matrix time = %f\n", nsecs);

  if (M2_gbTrace >= 2)
    H.dump();

  begin_time = clock();
  gauss_reduce(true);
  end_time = clock();
  clock_gauss += end_time - begin_time;

  //  fprintf(stderr, "---------\n");
  //  show_matrix();
  //  fprintf(stderr, "---------\n");

  nsecs = static_cast<double>(end_time - begin_time);
  nsecs /= CLOCKS_PER_SEC;
  if (M2_gbTrace >= 2)
    {
      fprintf(stderr, " gauss time          = %f\n", nsecs);

      fprintf(stderr, " lcm dups            = %ld\n", n_lcmdups);
      if (M2_gbTrace >= 5)
        {
          fprintf(stderr, "---------\n");
          show_matrix();
          fprintf(stderr, "---------\n");
          show_syz_matrix();
          //  show_new_rows_matrix();
        }
    }
  new_GB_elements();
  int ngb = INTSIZE(gb);
  if (M2_gbTrace >= 1) {
    fprintf(stderr, " # GB elements   = %d\n", ngb);
    if (M2_gbTrace >= 5) show_gb_array(gb);
    if (using_syz)
      fprintf(stderr, " # syzygies      = %ld\n", static_cast<long>(syz_basis.size()));
    if (M2_gbTrace >= 5) show_syz_basis();
  }

  clear_matrix();
  clear_syz_matrix();
}