Exemplo n.º 1
0
int dt_dev_pixelpipe_cache_init(dt_dev_pixelpipe_cache_t *cache, int entries, size_t size)
{
  cache->entries = entries;
  cache->data = (void **)calloc(entries, sizeof(void *));
  cache->size = (size_t *)calloc(entries, sizeof(size_t));
  cache->hash = (uint64_t *)calloc(entries, sizeof(uint64_t));
  cache->used = (int32_t *)calloc(entries, sizeof(int32_t));
  for(int k = 0; k < entries; k++)
  {
    cache->data[k] = (void *)dt_alloc_align(16, size);
    if(!cache->data[k]) goto alloc_memory_fail;
    cache->size[k] = size;
#ifdef _DEBUG
    memset(cache->data[k], 0x5d, size);
#endif
    cache->hash[k] = -1;
    cache->used[k] = 0;
  }
  cache->queries = cache->misses = 0;
  return 1;

alloc_memory_fail:
  for(int k = 0; k < entries; k++)
  {
    if(cache->data[k]) dt_free_align(cache->data[k]);
  }

  free(cache->data);
  free(cache->size);
  free(cache->hash);
  free(cache->used);

  return 0;
}
Exemplo n.º 2
0
int write_image(dt_imageio_module_data_t *data, const char *filename, const void *ivoid, void *exif,
                int exif_len, int imgid, int num, int total)
{
  const dt_imageio_module_data_t *const pfm = data;
  int status = 0;
  FILE *f = fopen(filename, "wb");
  if(f)
  {
    // INFO: per-line fwrite call seems to perform best. LebedevRI, 18.04.2014
    (void)fprintf(f, "PF\n%d %d\n-1.0\n", pfm->width, pfm->height);
    void *buf_line = dt_alloc_align(16, 3 * sizeof(float) * pfm->width);
    for(int j = 0; j < pfm->height; j++)
    {
      // NOTE: pfm has rows in reverse order
      const int row_in = pfm->height - 1 - j;
      const float *in = (const float *)ivoid + 4 * (size_t)pfm->width * row_in;
      float *out = (float *)buf_line;
      for(int i = 0; i < pfm->width; i++, in += 4, out += 3)
      {
        memcpy(out, in, 3 * sizeof(float));
      }
      int cnt = fwrite(buf_line, 3 * sizeof(float), pfm->width, f);
      if(cnt != pfm->width)
        status = 1;
      else
        status = 0;
    }
    dt_free_align(buf_line);
    buf_line = NULL;
    fclose(f);
  }
  return status;
}
Exemplo n.º 3
0
void dt_cache_cleanup(dt_cache_t *cache)
{
  g_hash_table_destroy(cache->hashtable);
  GList *l = cache->lru;
  while(l)
  {
    dt_cache_entry_t *entry = (dt_cache_entry_t *)l->data;

    if(cache->cleanup)
    {
      assert(entry->data_size);
      ASAN_UNPOISON_MEMORY_REGION(entry->data, entry->data_size);

      cache->cleanup(cache->cleanup_data, entry);
    }
    else
      dt_free_align(entry->data);

    dt_pthread_rwlock_destroy(&entry->lock);
    g_slice_free1(sizeof(*entry), entry);
    l = g_list_next(l);
  }
  g_list_free(cache->lru);
  dt_pthread_mutex_destroy(&cache->lock);
}
Exemplo n.º 4
0
void dt_dev_pixelpipe_cache_cleanup(dt_dev_pixelpipe_cache_t *cache)
{
  for(int k = 0; k < cache->entries; k++) dt_free_align(cache->data[k]);
  free(cache->data);
  free(cache->hash);
  free(cache->used);
  free(cache->size);
}
Exemplo n.º 5
0
void dt_gaussian_free(dt_gaussian_t *g)
{
  if(!g) return;
  dt_free_align(g->buf);
  free(g->min);
  free(g->max);
  free(g);
}
Exemplo n.º 6
0
int dt_cache_remove(dt_cache_t *cache, const uint32_t key)
{
  gpointer orig_key, value;
  gboolean res;
  int result;
  dt_cache_entry_t *entry;
restart:
  dt_pthread_mutex_lock(&cache->lock);

  res = g_hash_table_lookup_extended(
      cache->hashtable, GINT_TO_POINTER(key), &orig_key, &value);
  entry = (dt_cache_entry_t *)value;
  if(!res)
  { // not found in cache, not deleting.
    dt_pthread_mutex_unlock(&cache->lock);
    return 1;
  }
  // need write lock to be able to delete:
  result = dt_pthread_rwlock_trywrlock(&entry->lock);
  if(result)
  {
    dt_pthread_mutex_unlock(&cache->lock);
    g_usleep(5);
    goto restart;
  }

  if(entry->_lock_demoting)
  {
    // oops, we are currently demoting (rw -> r) lock to this entry in some thread. do not touch!
    dt_pthread_rwlock_unlock(&entry->lock);
    dt_pthread_mutex_unlock(&cache->lock);
    g_usleep(5);
    goto restart;
  }

  gboolean removed = g_hash_table_remove(cache->hashtable, GINT_TO_POINTER(key));
  (void)removed; // make non-assert compile happy
  assert(removed);
  cache->lru = g_list_delete_link(cache->lru, entry->link);

  if(cache->cleanup)
  {
    assert(entry->data_size);
    ASAN_UNPOISON_MEMORY_REGION(entry->data, entry->data_size);

    cache->cleanup(cache->cleanup_data, entry);
  }
  else
    dt_free_align(entry->data);

  dt_pthread_rwlock_unlock(&entry->lock);
  dt_pthread_rwlock_destroy(&entry->lock);
  cache->cost -= entry->cost;
  g_slice_free1(sizeof(*entry), entry);

  dt_pthread_mutex_unlock(&cache->lock);
  return 0;
}
Exemplo n.º 7
0
void dt_mipmap_cache_cleanup(dt_mipmap_cache_t *cache)
{
  dt_mipmap_cache_serialize(cache);
  for(int k=0; k<DT_MIPMAP_F; k++)
  {
    dt_cache_cleanup(&cache->mip[k].cache);
    // now mem is actually freed, not during cache cleanup
    dt_free_align(cache->mip[k].buf);
  }
  dt_cache_cleanup(&cache->mip[DT_MIPMAP_FULL].cache);
  dt_cache_cleanup(&cache->mip[DT_MIPMAP_F].cache);

  // clean up temporary buffers for decompressed images, if any:
  if(cache->compression_type)
  {
    dt_cache_cleanup(&cache->scratchmem.cache);
    dt_free_align(cache->scratchmem.buf);
  }
}
Exemplo n.º 8
0
int dt_dev_pixelpipe_cache_get_weighted(dt_dev_pixelpipe_cache_t *cache, const uint64_t hash,
                                        const size_t size, void **data, int weight)
{
  cache->queries++;
  *data = NULL;
  int max_used = -1, max = 0;
  size_t sz = 0;
  for(int k = 0; k < cache->entries; k++)
  {
    // search for hash in cache
    if(cache->used[k] > max_used)
    {
      max_used = cache->used[k];
      max = k;
    }
    cache->used[k]++; // age all entries
    if(cache->hash[k] == hash)
    {
      *data = cache->data[k];
      sz = cache->size[k];
      cache->used[k] = weight; // this is the MRU entry
    }
  }

  if(!*data || sz < size)
  {
    // kill LRU entry
    // printf("[pixelpipe_cache_get] hash not found, returning slot %d/%d age %d\n", max, cache->entries,
    // weight);
    if(cache->size[max] < size)
    {
      dt_free_align(cache->data[max]);
      cache->data[max] = (void *)dt_alloc_align(16, size);
      cache->size[max] = size;
    }
    *data = cache->data[max];
    cache->hash[max] = hash;
    cache->used[max] = weight;
    cache->misses++;
    return 1;
  }
  else
    return 0;
}
Exemplo n.º 9
0
Arquivo: pfm.c Projeto: PkmX/darktable
int write_image(dt_imageio_module_data_t *data, const char *filename, const void *ivoid, void *exif,
                int exif_len, int imgid, int num, int total)
{
  const dt_imageio_module_data_t *const pfm = data;
  int status = 0;
  FILE *f = fopen(filename, "wb");
  if(f)
  {
    // align pfm header to sse, assuming the file will
    // be mmapped to page boundaries.
    char header[1024];
    snprintf(header, 1024, "PF\n%d %d\n-1.0", pfm->width, pfm->height);
    size_t len = strlen(header);
    fprintf(f, "PF\n%d %d\n-1.0", pfm->width, pfm->height);
    ssize_t off = 0;
    while((len + 1 + off) & 0xf) off++;
    while(off-- > 0) fprintf(f, "0");
    fprintf(f, "\n");
    void *buf_line = dt_alloc_align(16, 3 * sizeof(float) * pfm->width);
    for(int j = 0; j < pfm->height; j++)
    {
      // NOTE: pfm has rows in reverse order
      const int row_in = pfm->height - 1 - j;
      const float *in = (const float *)ivoid + 4 * (size_t)pfm->width * row_in;
      float *out = (float *)buf_line;
      for(int i = 0; i < pfm->width; i++, in += 4, out += 3)
      {
        memcpy(out, in, 3 * sizeof(float));
      }
      // INFO: per-line fwrite call seems to perform best. LebedevRI, 18.04.2014
      int cnt = fwrite(buf_line, 3 * sizeof(float), pfm->width, f);
      if(cnt != pfm->width)
        status = 1;
      else
        status = 0;
    }
    dt_free_align(buf_line);
    buf_line = NULL;
    fclose(f);
  }
  return status;
}
Exemplo n.º 10
0
// best-effort garbage collection. never blocks, never fails. well, sometimes it just doesn't free anything.
void dt_cache_gc(dt_cache_t *cache, const float fill_ratio)
{
  GList *l = cache->lru;
  int cnt = 0;
  while(l)
  {
    cnt++;
    dt_cache_entry_t *entry = (dt_cache_entry_t *)l->data;
    assert(entry->link->data == entry);
    l = g_list_next(l); // we might remove this element, so walk to the next one while we still have the pointer..
    if(cache->cost < cache->cost_quota * fill_ratio) break;

    // if still locked by anyone else give up:
    if(dt_pthread_rwlock_trywrlock(&entry->lock)) continue;

    if(entry->_lock_demoting)
    {
      // oops, we are currently demoting (rw -> r) lock to this entry in some thread. do not touch!
      dt_pthread_rwlock_unlock(&entry->lock);
      continue;
    }

    // delete!
    g_hash_table_remove(cache->hashtable, GINT_TO_POINTER(entry->key));
    cache->lru = g_list_delete_link(cache->lru, entry->link);
    cache->cost -= entry->cost;

    if(cache->cleanup)
    {
      assert(entry->data_size);
      ASAN_UNPOISON_MEMORY_REGION(entry->data, entry->data_size);

      cache->cleanup(cache->cleanup_data, entry);
    }
    else
      dt_free_align(entry->data);

    dt_pthread_rwlock_unlock(&entry->lock);
    dt_pthread_rwlock_destroy(&entry->lock);
    g_slice_free1(sizeof(*entry), entry);
  }
}
Exemplo n.º 11
0
// callback for the imageio core to allocate memory.
// only needed for _F and _FULL buffers, as they change size
// with the input image. will allocate img->width*img->height*img->bpp bytes.
void*
dt_mipmap_cache_alloc(dt_image_t *img, dt_mipmap_size_t size, dt_mipmap_cache_allocator_t a)
{
  assert(size == DT_MIPMAP_FULL);

  struct dt_mipmap_buffer_dsc** dsc = (struct dt_mipmap_buffer_dsc**)a;

  int32_t wd = img->width;
  int32_t ht = img->height;
  int32_t bpp = img->bpp;
  const uint32_t buffer_size =
    ((wd*ht*bpp) + sizeof(**dsc));

  // buf might have been alloc'ed before,
  // so only check size and re-alloc if necessary:
  if(!(*dsc) || ((*dsc)->size < buffer_size) || ((void *)*dsc == (void *)dt_mipmap_cache_static_dead_image))
  {
    if((void *)*dsc != (void *)dt_mipmap_cache_static_dead_image)
      dt_free_align(*dsc);
    *dsc = dt_alloc_align(64, buffer_size);
    // fprintf(stderr, "[mipmap cache] alloc for key %u %p\n", get_key(img->id, size), *buf);
    if(!(*dsc))
    {
      // return fallback: at least alloc size for a dead image:
      *dsc = (struct dt_mipmap_buffer_dsc *)dt_mipmap_cache_static_dead_image;
      // allocator holds the pointer. but imageio client is tricked to believe allocation failed:
      return NULL;
    }
    // set buffer size only if we're making it larger.
    (*dsc)->size = buffer_size;
  }
  (*dsc)->width = wd;
  (*dsc)->height = ht;
  (*dsc)->flags = DT_MIPMAP_BUFFER_DSC_FLAG_GENERATE;

  // fprintf(stderr, "full buffer allocating img %u %d x %d = %u bytes (%p)\n", img->id, img->width, img->height, buffer_size, *buf);

  // trick the user into using a pointer without the header:
  return (*dsc)+1;
}
Exemplo n.º 12
0
dt_gaussian_t *dt_gaussian_init(const int width,    // width of input image
                                const int height,   // height of input image
                                const int channels, // channels per pixel
                                const float *max,   // maximum allowed values per channel for clamping
                                const float *min,   // minimum allowed values per channel for clamping
                                const float sigma,  // gaussian sigma
                                const int order)    // order of gaussian blur
{
  dt_gaussian_t *g = (dt_gaussian_t *)malloc(sizeof(dt_gaussian_t));
  if(!g) return NULL;

  g->width = width;
  g->height = height;
  g->channels = channels;
  g->sigma = sigma;
  g->order = order;
  g->buf = NULL;
  g->max = (float *)calloc(channels, sizeof(float));
  g->min = (float *)calloc(channels, sizeof(float));

  if(!g->min || !g->max) goto error;

  for(int k = 0; k < channels; k++)
  {
    g->max[k] = max[k];
    g->min[k] = min[k];
  }

  g->buf = dt_alloc_align(64, (size_t)width * height * channels * sizeof(float));
  if(!g->buf) goto error;

  return g;

error:
  dt_free_align(g->buf);
  free(g->max);
  free(g->min);
  free(g);
  return NULL;
}
Exemplo n.º 13
0
// callback for the imageio core to allocate memory.
// only needed for _F and _FULL buffers, as they change size
// with the input image. will allocate img->width*img->height*img->bpp bytes.
void *dt_mipmap_cache_alloc(dt_mipmap_buffer_t *buf, const dt_image_t *img)
{
  assert(buf->size == DT_MIPMAP_FULL);

  const int wd = img->width;
  const int ht = img->height;
  struct dt_mipmap_buffer_dsc *dsc = (struct dt_mipmap_buffer_dsc *)buf->cache_entry->data;
  const size_t buffer_size = (size_t)wd*ht*img->bpp + sizeof(*dsc);

  // buf might have been alloc'ed before,
  // so only check size and re-alloc if necessary:
  if(!buf->buf || (dsc->size < buffer_size) || ((void *)dsc == (void *)dt_mipmap_cache_static_dead_image))
  {
    if((void *)dsc != (void *)dt_mipmap_cache_static_dead_image) dt_free_align(buf->cache_entry->data);
    buf->cache_entry->data = dt_alloc_align(64, buffer_size);
    if(!buf->cache_entry->data)
    {
      // return fallback: at least alloc size for a dead image:
      buf->cache_entry->data = (void*)dt_mipmap_cache_static_dead_image;
      // allocator holds the pointer. but let imageio client know that allocation failed:
      return NULL;
    }
    // set buffer size only if we're making it larger.
    dsc = (struct dt_mipmap_buffer_dsc *)buf->cache_entry->data;
    dsc->size = buffer_size;
  }
  dsc->width = wd;
  dsc->height = ht;
  dsc->color_space = DT_COLORSPACE_NONE;
  dsc->flags = DT_MIPMAP_BUFFER_DSC_FLAG_GENERATE;
  buf->buf = (uint8_t *)(dsc + 1);

  // fprintf(stderr, "full buffer allocating img %u %d x %d = %u bytes (%p)\n", img->id, img->width,
  // img->height, buffer_size, *buf);

  // return pointer to start of payload
  return dsc + 1;
}
Exemplo n.º 14
0
static void generate_thumbnail_cache()
{
  const int max_mip = DT_MIPMAP_2;
  fprintf(stderr, _("creating cache directories\n"));
  char filename[PATH_MAX] = {0};
  for(int k=DT_MIPMAP_0;k<=max_mip;k++)
  {
    snprintf(filename, sizeof(filename), "%s.d/%d", darktable.mipmap_cache->cachedir, k);
    fprintf(stderr, _("creating cache directory '%s'\n"), filename);
    int mkd = g_mkdir_with_parents(filename, 0750);
    if(mkd)
    {
      fprintf(stderr, _("could not create directory '%s'!\n"), filename);
      return;
    }
  }
  // some progress counter
  sqlite3_stmt *stmt;
  uint64_t image_count = 0, counter = 0;
  DT_DEBUG_SQLITE3_PREPARE_V2(dt_database_get(darktable.db), "select count(id) from images", -1, &stmt, 0);
  if(sqlite3_step(stmt) == SQLITE_ROW)
    image_count = sqlite3_column_int(stmt, 0);
  sqlite3_finalize(stmt);

  // go through all images:
  DT_DEBUG_SQLITE3_PREPARE_V2(dt_database_get(darktable.db), "select id from images", -1, &stmt, 0);
  // could only alloc max_mip-1, but would need to detect the special case that max==0.
  const size_t bufsize = (size_t)4 * darktable.mipmap_cache->max_width[max_mip]
                         * darktable.mipmap_cache->max_height[max_mip];
  uint8_t *tmp = (uint8_t *)dt_alloc_align(16, bufsize);
  if(!tmp)
  {
    fprintf(stderr, "couldn't allocate temporary memory!\n");
    sqlite3_finalize(stmt);
    return;
  }
  const int cache_quality = MIN(100, MAX(10, dt_conf_get_int("database_cache_quality")));
  while(sqlite3_step(stmt) == SQLITE_ROW)
  {
    const int32_t imgid = sqlite3_column_int(stmt, 0);
    // check whether all of these files are already there
    int all_exist = 1;
    for(int k=max_mip;k>=DT_MIPMAP_0;k--)
    {
      snprintf(filename, sizeof(filename), "%s.d/%d/%d.jpg", darktable.mipmap_cache->cachedir, k, imgid);
      all_exist &= !access(filename, R_OK); 
    }
    if(all_exist) goto next;
    dt_mipmap_buffer_t buf;
    // get largest thumbnail for this image
    // this one will take care of itself, we'll just write out the lower thumbs manually:
    dt_mipmap_cache_get(darktable.mipmap_cache, &buf, imgid, max_mip, DT_MIPMAP_BLOCKING, 'r');
    if(buf.width > 8 && buf.height > 8) // don't create for skulls
    for(int k=max_mip-1;k>=DT_MIPMAP_0;k--)
    {
      uint32_t width, height;
      const int wd = darktable.mipmap_cache->max_width[k];
      const int ht = darktable.mipmap_cache->max_height[k];
      // use exactly the same mechanism as the cache internally to rescale the thumbnail:
      dt_iop_flip_and_zoom_8(buf.buf, buf.width, buf.height, tmp, wd, ht, 0, &width, &height);

      snprintf(filename, sizeof(filename), "%s.d/%d/%d.jpg", darktable.mipmap_cache->cachedir, k, imgid);
      FILE *f = fopen(filename, "wb");
      if(f)
      {
        // allocate temp memory:
        uint8_t *blob = (uint8_t *)malloc(bufsize);
        if(!blob) goto write_error;
        const int32_t length
          = dt_imageio_jpeg_compress(tmp, blob, width, height, cache_quality);
        assert(length <= bufsize);
        int written = fwrite(blob, sizeof(uint8_t), length, f);
        if(written != length)
        {
write_error:
          unlink(filename);
        }
        free(blob);
        fclose(f);
      }
    }
    dt_mipmap_cache_release(darktable.mipmap_cache, &buf);
next:
    counter ++;
    fprintf(stderr, "\rimage %lu/%lu (%.02f%%)            ", counter, image_count, 100.0*counter/(float)image_count);
  }
  dt_free_align(tmp);
  sqlite3_finalize(stmt);
  fprintf(stderr, "done                     \n");
}
Exemplo n.º 15
0
dt_imageio_retval_t dt_imageio_open_png(dt_image_t *img, const char *filename, dt_mipmap_buffer_t *mbuf)
{
  const char *ext = filename + strlen(filename);
  while(*ext != '.' && ext > filename) ext--;
  if(strncmp(ext, ".png", 4) && strncmp(ext, ".PNG", 4)) return DT_IMAGEIO_FILE_CORRUPTED;
  if(!img->exif_inited) (void)dt_exif_read(img, filename);

  dt_imageio_png_t image;
  uint8_t *buf = NULL;
  uint32_t width, height;
  uint16_t bpp;


  if(read_header(filename, &image) != 0) return DT_IMAGEIO_FILE_CORRUPTED;

  width = img->width = image.width;
  height = img->height = image.height;
  bpp = image.bit_depth;

  img->bpp = 4 * sizeof(float);

  float *mipbuf = (float *)dt_mipmap_cache_alloc(mbuf, img);
  if(!mipbuf)
  {
    fclose(image.f);
    png_destroy_read_struct(&image.png_ptr, &image.info_ptr, NULL);
    fprintf(stderr, "[png_open] could not alloc full buffer for image `%s'\n", img->filename);
    return DT_IMAGEIO_CACHE_FULL;
  }

  buf = dt_alloc_align(16, (size_t)width * height * 3 * (bpp < 16 ? 1 : 2));
  if(!buf)
  {
    fclose(image.f);
    png_destroy_read_struct(&image.png_ptr, &image.info_ptr, NULL);
    fprintf(stderr, "[png_open] could not alloc intermediate buffer for image `%s'\n", img->filename);
    return DT_IMAGEIO_CACHE_FULL;
  }

  if(read_image(&image, (void *)buf) != 0)
  {
    dt_free_align(buf);
    fprintf(stderr, "[png_open] could not read image `%s'\n", img->filename);
    return DT_IMAGEIO_FILE_CORRUPTED;
  }

  for(size_t j = 0; j < height; j++)
  {
    if(bpp < 16)
      for(size_t i = 0; i < width; i++)
        for(int k = 0; k < 3; k++)
          mipbuf[4 * (j * width + i) + k] = buf[3 * (j * width + i) + k] * (1.0f / 255.0f);
    else
      for(size_t i = 0; i < width; i++)
        for(int k = 0; k < 3; k++)
          mipbuf[4 * (j * width + i) + k] = (256.0f * buf[2 * (3 * (j * width + i) + k)]
                                             + buf[2 * (3 * (j * width + i) + k) + 1]) * (1.0f / 65535.0f);
  }

  dt_free_align(buf);
  return DT_IMAGEIO_OK;
}
Exemplo n.º 16
0
void local_laplacian_internal(
    const float *const input,   // input buffer in some Labx or yuvx format
    float *const out,           // output buffer with colour
    const int wd,               // width and
    const int ht,               // height of the input buffer
    const float sigma,          // user param: separate shadows/midtones/highlights
    const float shadows,        // user param: lift shadows
    const float highlights,     // user param: compress highlights
    const float clarity,        // user param: increase clarity/local contrast
    const int use_sse2)         // flag whether to use SSE version
{
#define max_levels 30
#define num_gamma 6
  // don't divide by 2 more often than we can:
  const int num_levels = MIN(max_levels, 31-__builtin_clz(MIN(wd,ht)));
  const int max_supp = 1<<(num_levels-1);
  int w, h;
  float *padded[max_levels] = {0};
  padded[0] = ll_pad_input(input, wd, ht, max_supp, &w, &h);

  // allocate pyramid pointers for padded input
  for(int l=1;l<num_levels;l++)
    padded[l] = dt_alloc_align(16, sizeof(float)*dl(w,l)*dl(h,l));

  // allocate pyramid pointers for output
  float *output[max_levels] = {0};
  for(int l=0;l<num_levels;l++)
    output[l] = dt_alloc_align(16, sizeof(float)*dl(w,l)*dl(h,l));

  // create gauss pyramid of padded input, write coarse directly to output
#if defined(__SSE2__)
  if(use_sse2)
  {
    for(int l=1;l<num_levels-1;l++)
      gauss_reduce_sse2(padded[l-1], padded[l], dl(w,l-1), dl(h,l-1));
    gauss_reduce_sse2(padded[num_levels-2], output[num_levels-1], dl(w,num_levels-2), dl(h,num_levels-2));
  }
  else
#endif
  {
    for(int l=1;l<num_levels-1;l++)
      gauss_reduce(padded[l-1], padded[l], dl(w,l-1), dl(h,l-1));
    gauss_reduce(padded[num_levels-2], output[num_levels-1], dl(w,num_levels-2), dl(h,num_levels-2));
  }

  // evenly sample brightness [0,1]:
  float gamma[num_gamma] = {0.0f};
  for(int k=0;k<num_gamma;k++) gamma[k] = (k+.5f)/(float)num_gamma;
  // for(int k=0;k<num_gamma;k++) gamma[k] = k/(num_gamma-1.0f);

  // allocate memory for intermediate laplacian pyramids
  float *buf[num_gamma][max_levels] = {{0}};
  for(int k=0;k<num_gamma;k++) for(int l=0;l<num_levels;l++)
    buf[k][l] = dt_alloc_align(16, sizeof(float)*dl(w,l)*dl(h,l));

  // the paper says remapping only level 3 not 0 does the trick, too
  // (but i really like the additional octave of sharpness we get,
  // willing to pay the cost).
  for(int k=0;k<num_gamma;k++)
  { // process images
#if defined(__SSE2__)
    if(use_sse2)
      apply_curve_sse2(buf[k][0], padded[0], w, h, max_supp, gamma[k], sigma, shadows, highlights, clarity);
    else // brackets in next line needed for silly gcc warning:
#endif
    {apply_curve(buf[k][0], padded[0], w, h, max_supp, gamma[k], sigma, shadows, highlights, clarity);}

    // create gaussian pyramids
    for(int l=1;l<num_levels;l++)
#if defined(__SSE2__)
      if(use_sse2)
        gauss_reduce_sse2(buf[k][l-1], buf[k][l], dl(w,l-1), dl(h,l-1));
      else
#endif
        gauss_reduce(buf[k][l-1], buf[k][l], dl(w,l-1), dl(h,l-1));
  }

  // assemble output pyramid coarse to fine
  for(int l=num_levels-2;l >= 0; l--)
  {
    const int pw = dl(w,l), ph = dl(h,l);

    gauss_expand(output[l+1], output[l], pw, ph);
    // go through all coefficients in the upsampled gauss buffer:
#ifdef _OPENMP
#pragma omp parallel for default(none) schedule(static) collapse(2) shared(w,h,buf,output,l,gamma,padded)
#endif
    for(int j=0;j<ph;j++) for(int i=0;i<pw;i++)
    {
      const float v = padded[l][j*pw+i];
      int hi = 1;
      for(;hi<num_gamma-1 && gamma[hi] <= v;hi++);
      int lo = hi-1;
      const float a = CLAMPS((v - gamma[lo])/(gamma[hi]-gamma[lo]), 0.0f, 1.0f);
      const float l0 = ll_laplacian(buf[lo][l+1], buf[lo][l], i, j, pw, ph);
      const float l1 = ll_laplacian(buf[hi][l+1], buf[hi][l], i, j, pw, ph);
      output[l][j*pw+i] += l0 * (1.0f-a) + l1 * a;
      // we could do this to save on memory (no need for finest buf[][]).
      // unfortunately it results in a quite noticable loss of sharpness, i think
      // the extra level is worth it.
      // else if(l == 0) // use finest scale from input to not amplify noise (and use less memory)
      //   output[l][j*pw+i] += ll_laplacian(padded[l+1], padded[l], i, j, pw, ph);
    }
  }
#ifdef _OPENMP
#pragma omp parallel for default(none) schedule(dynamic) collapse(2) shared(w,output,buf)
#endif
  for(int j=0;j<ht;j++) for(int i=0;i<wd;i++)
  {
    out[4*(j*wd+i)+0] = 100.0f * output[0][(j+max_supp)*w+max_supp+i]; // [0,1] -> L
    out[4*(j*wd+i)+1] = input[4*(j*wd+i)+1]; // copy original colour channels
    out[4*(j*wd+i)+2] = input[4*(j*wd+i)+2];
  }
  // free all buffers!
  for(int l=0;l<max_levels;l++)
  {
    dt_free_align(padded[l]);
    dt_free_align(output[l]);
    for(int k = 0; k < num_gamma; k++) dt_free_align(buf[k][l]);
  }
#undef num_levels
#undef num_gamma
}
Exemplo n.º 17
0
static inline void gauss_reduce_sse2(
    const float *const input, // fine input buffer
    float *const coarse,      // coarse scale, blurred input buf
    const int wd,             // fine res
    const int ht)
{
  // blur, store only coarse res
  const int cw = (wd-1)/2+1, ch = (ht-1)/2+1;

  // this version is inspired by opencv's pyrDown_ :
  // - allocate 5 rows of ring buffer (aligned)
  // - for coarse res y
  //   - fill 5 coarse-res row buffers with 1 4 6 4 1 weights (reuse some from last time)
  //   - do vertical convolution via sse and write to coarse output buf

  const int stride = ((cw+8)&~7); // assure sse alignment of rows
  float *ringbuf = dt_alloc_align(16, sizeof(*ringbuf)*stride*5);
  float *rows[5] = {0};
  int rowj = 0; // we initialised this many rows so far

  for(int j=1;j<ch-1;j++)
  {
    // horizontal pass, convolve with 1 4 6 4 1 kernel and decimate
    for(;rowj<=2*j+2;rowj++)
    {
      float *const row = ringbuf + (rowj % 5)*stride;
      const float *const in = input + rowj*wd;
#ifdef _OPENMP
#pragma omp parallel for schedule(static) default(none)
#endif
      for(int i=1;i<cw-1;i++)
        row[i] = 6*in[2*i] + 4*(in[2*i-1]+in[2*i+1]) + in[2*i-2] + in[2*i+2];
    }

    // init row pointers
    for(int k=0;k<5;k++)
      rows[k] = ringbuf + ((2*j-2+k)%5)*stride;

    // vertical pass, convolve and decimate using SIMD:
    // note that we're ignoring the (1..cw-1) buffer limit, we'll pull in
    // garbage and fix it later by border filling.
    float *const out = coarse + j*cw;
    const float *const row0 = rows[0], *const row1 = rows[1],
                *const row2 = rows[2], *const row3 = rows[3], *const row4 = rows[4];
    const __m128 four = _mm_set1_ps(4.f), scale = _mm_set1_ps(1.f/256.f);
#ifdef _OPENMP
#pragma omp parallel for schedule(static) default(none)
#endif
    for(int i=0;i<=cw-8;i+=8)
    {
      __m128 r0, r1, r2, r3, r4, t0, t1;
      r0 = _mm_load_ps(row0 + i);
      r1 = _mm_load_ps(row1 + i);
      r2 = _mm_load_ps(row2 + i);
      r3 = _mm_load_ps(row3 + i);
      r4 = _mm_load_ps(row4 + i);
      r0 = _mm_add_ps(r0, r4);
      r1 = _mm_add_ps(_mm_add_ps(r1, r3), r2);
      r0 = _mm_add_ps(r0, _mm_add_ps(r2, r2));
      t0 = _mm_add_ps(r0, _mm_mul_ps(r1, four));

      r0 = _mm_load_ps(row0 + i + 4);
      r1 = _mm_load_ps(row1 + i + 4);
      r2 = _mm_load_ps(row2 + i + 4);
      r3 = _mm_load_ps(row3 + i + 4);
      r4 = _mm_load_ps(row4 + i + 4);
      r0 = _mm_add_ps(r0, r4);
      r1 = _mm_add_ps(_mm_add_ps(r1, r3), r2);
      r0 = _mm_add_ps(r0, _mm_add_ps(r2, r2));
      t1 = _mm_add_ps(r0, _mm_mul_ps(r1, four));

      t0 = _mm_mul_ps(t0, scale);
      t1 = _mm_mul_ps(t1, scale);

      _mm_storeu_ps(out + i, t0);
      _mm_storeu_ps(out + i + 4, t1);
    }
    // process the rest
    for(int i=cw&~7;i<cw-1;i++)
      out[i] = (6*row2[i] + 4*(row1[i] + row3[i]) + row0[i] + row4[i])*(1.0f/256.0f);
  }
  dt_free_align(ringbuf);
  ll_fill_boundary1(coarse, cw, ch);
}
Exemplo n.º 18
0
// internal function: to avoid exif blob reading + 8-bit byteorder flag + high-quality override
int dt_imageio_export_with_flags(
  const uint32_t              imgid,
  const char                 *filename,
  dt_imageio_module_format_t *format,
  dt_imageio_module_data_t   *format_params,
  const int32_t               ignore_exif,
  const int32_t               display_byteorder,
  const gboolean              high_quality,
  const int32_t               thumbnail_export,
  const char                 *filter,
  const gboolean              copy_metadata,
  dt_imageio_module_storage_t *storage,
  dt_imageio_module_data_t   *storage_params)
{
  dt_develop_t dev;
  dt_dev_init(&dev, 0);
  dt_mipmap_buffer_t buf;
  if(thumbnail_export && dt_conf_get_bool("plugins/lighttable/low_quality_thumbnails"))
    dt_mipmap_cache_read_get(darktable.mipmap_cache, &buf, imgid, DT_MIPMAP_F, DT_MIPMAP_BLOCKING);
  else
    dt_mipmap_cache_read_get(darktable.mipmap_cache, &buf, imgid, DT_MIPMAP_FULL, DT_MIPMAP_BLOCKING);
  dt_dev_load_image(&dev, imgid);
  const dt_image_t *img = &dev.image_storage;
  const int wd = img->width;
  const int ht = img->height;

  int res = 0;

  dt_times_t start;
  dt_get_times(&start);
  dt_dev_pixelpipe_t pipe;
  res = thumbnail_export ? dt_dev_pixelpipe_init_thumbnail(&pipe, wd, ht) : dt_dev_pixelpipe_init_export(&pipe, wd, ht, format->levels(format_params));
  if(!res)
  {
    dt_control_log(_("failed to allocate memory for %s, please lower the threads used for export or buy more memory."), thumbnail_export ? C_("noun", "thumbnail export") : C_("noun", "export"));
    dt_dev_cleanup(&dev);
    dt_mipmap_cache_read_release(darktable.mipmap_cache, &buf);
    return 1;
  }

  if(!buf.buf)
  {
    dt_control_log(_("image `%s' is not available!"), img->filename);
    dt_mipmap_cache_read_release(darktable.mipmap_cache, &buf);
    dt_dev_cleanup(&dev);
    return 1;
  }

  //  If a style is to be applied during export, add the iop params into the history
  if (!thumbnail_export && format_params->style[0] != '\0')
  {
    GList *stls;

    GList *modules = dev.iop;
    dt_iop_module_t *m = NULL;

    if ((stls=dt_styles_get_item_list(format_params->style, TRUE, -1)) == 0)
    {
      dt_control_log(_("cannot find the style '%s' to apply during export."), format_params->style);
      dt_dev_cleanup(&dev);
      dt_mipmap_cache_read_release(darktable.mipmap_cache, &buf);
      return 1;
    }

    //  Add each params
    while (stls)
    {
      dt_style_item_t *s = (dt_style_item_t *) stls->data;

      modules = dev.iop;
      while (modules)
      {
        m = (dt_iop_module_t *)modules->data;

        //  since the name in the style is returned with a possible multi-name, just check the start of the name
        if (strncmp(m->op, s->name, strlen(m->op)) == 0)
        {
          dt_dev_history_item_t *h = malloc(sizeof(dt_dev_history_item_t));

          h->params = s->params;
          h->blend_params = s->blendop_params;
          h->enabled = s->enabled;
          h->module = m;
          h->multi_priority = 1;
          g_strlcpy(h->multi_name, "", sizeof(h->multi_name));

          if(m->legacy_params && (s->module_version != m->version()))
          {
            void *new_params = malloc(m->params_size);
            m->legacy_params (m, h->params, s->module_version, new_params, labs(m->version()));

            free (h->params);
            h->params = new_params;
          }

          dev.history_end++;
          dev.history = g_list_append(dev.history, h);
          break;
        }
        modules = g_list_next(modules);
      }
      stls = g_list_next(stls);
    }
  }

  dt_dev_pixelpipe_set_input(&pipe, &dev, (float *)buf.buf, buf.width, buf.height, 1.0);
  dt_dev_pixelpipe_create_nodes(&pipe, &dev);
  dt_dev_pixelpipe_synch_all(&pipe, &dev);
  dt_dev_pixelpipe_get_dimensions(&pipe, &dev, pipe.iwidth, pipe.iheight, &pipe.processed_width, &pipe.processed_height);
  if(filter)
  {
    if(!strncmp(filter, "pre:", 4))
      dt_dev_pixelpipe_disable_after(&pipe, filter+4);
    if(!strncmp(filter, "post:", 5))
      dt_dev_pixelpipe_disable_before(&pipe, filter+5);
  }
  dt_show_times(&start, "[export] creating pixelpipe", NULL);

  // find output color profile for this image:
  int sRGB = 1;
  gchar *overprofile = dt_conf_get_string("plugins/lighttable/export/iccprofile");
  if(overprofile && !strcmp(overprofile, "sRGB"))
  {
    sRGB = 1;
  }
  else if(!overprofile || !strcmp(overprofile, "image"))
  {
    GList *modules = dev.iop;
    dt_iop_module_t *colorout = NULL;
    while (modules)
    {
      colorout = (dt_iop_module_t *)modules->data;
      if(colorout->get_p && strcmp(colorout->op, "colorout") == 0)
      {
        const char *iccprofile = colorout->get_p(colorout->params, "iccprofile");
        if(!strcmp(iccprofile, "sRGB")) sRGB = 1;
        else sRGB = 0;
      }
      modules = g_list_next(modules);
    }
  }
  else
  {
    sRGB = 0;
  }
  g_free(overprofile);

  // get only once at the beginning, in case the user changes it on the way:
  const gboolean high_quality_processing = ((format_params->max_width  == 0 || format_params->max_width  >= pipe.processed_width ) &&
      (format_params->max_height == 0 || format_params->max_height >= pipe.processed_height)) ? FALSE :
      high_quality;
  const int width  = high_quality_processing ? 0 : format_params->max_width;
  const int height = high_quality_processing ? 0 : format_params->max_height;
  const double scalex = width  > 0 ? fminf(width /(double)pipe.processed_width,  1.0) : 1.0;
  const double scaley = height > 0 ? fminf(height/(double)pipe.processed_height, 1.0) : 1.0;
  const double scale = fminf(scalex, scaley);
  int processed_width  = scale*pipe.processed_width  + .5f;
  int processed_height = scale*pipe.processed_height + .5f;
  const int bpp = format->bpp(format_params);

  // downsampling done last, if high quality processing was requested:
  uint8_t *outbuf = pipe.backbuf;
  uint8_t *moutbuf = NULL; // keep track of alloc'ed memory
  dt_get_times(&start);
  if(high_quality_processing)
  {
    dt_dev_pixelpipe_process_no_gamma(&pipe, &dev, 0, 0, processed_width, processed_height, scale);
    const double scalex = format_params->max_width  > 0 ? fminf(format_params->max_width /(double)pipe.processed_width,  1.0) : 1.0;
    const double scaley = format_params->max_height > 0 ? fminf(format_params->max_height/(double)pipe.processed_height, 1.0) : 1.0;
    const double scale = fminf(scalex, scaley);
    processed_width  = scale*pipe.processed_width  + .5f;
    processed_height = scale*pipe.processed_height + .5f;
    moutbuf = (uint8_t *)dt_alloc_align(64, (size_t)sizeof(float)*processed_width*processed_height*4);
    outbuf = moutbuf;
    // now downscale into the new buffer:
    dt_iop_roi_t roi_in, roi_out;
    roi_in.x = roi_in.y = roi_out.x = roi_out.y = 0;
    roi_in.scale = 1.0;
    roi_out.scale = scale;
    roi_in.width = pipe.processed_width;
    roi_in.height = pipe.processed_height;
    roi_out.width = processed_width;
    roi_out.height = processed_height;
    dt_iop_clip_and_zoom((float *)outbuf, (float *)pipe.backbuf, &roi_out, &roi_in, processed_width, pipe.processed_width);
  }
  else
  {
    // do the processing (8-bit with special treatment, to make sure we can use openmp further down):
    if(bpp == 8)
      dt_dev_pixelpipe_process(&pipe, &dev, 0, 0, processed_width, processed_height, scale);
    else
      dt_dev_pixelpipe_process_no_gamma(&pipe, &dev, 0, 0, processed_width, processed_height, scale);
    outbuf = pipe.backbuf;
  }
  dt_show_times(&start, thumbnail_export ? "[dev_process_thumbnail] pixel pipeline processing" : "[dev_process_export] pixel pipeline processing", NULL);

  // downconversion to low-precision formats:
  if(bpp == 8)
  {
    if(display_byteorder)
    {
      if(high_quality_processing)
      {
        const float *const inbuf = (float *)outbuf;
        for(size_t k=0; k<(size_t)processed_width*processed_height; k++)
        {
          // convert in place, this is unfortunately very serial..
          const uint8_t r = CLAMP(inbuf[4*k+2]*0xff, 0, 0xff);
          const uint8_t g = CLAMP(inbuf[4*k+1]*0xff, 0, 0xff);
          const uint8_t b = CLAMP(inbuf[4*k+0]*0xff, 0, 0xff);
          outbuf[4*k+0] = r;
          outbuf[4*k+1] = g;
          outbuf[4*k+2] = b;
        }
      }
      // else processing output was 8-bit already, and no need to swap order
    }
    else // need to flip 
    {
      // ldr output: char
      if(high_quality_processing)
      {
        const float *const inbuf = (float *)outbuf;
        for(size_t k=0; k<(size_t)processed_width*processed_height; k++)
        {
          // convert in place, this is unfortunately very serial..
          const uint8_t r = CLAMP(inbuf[4*k+0]*0xff, 0, 0xff);
          const uint8_t g = CLAMP(inbuf[4*k+1]*0xff, 0, 0xff);
          const uint8_t b = CLAMP(inbuf[4*k+2]*0xff, 0, 0xff);
          outbuf[4*k+0] = r;
          outbuf[4*k+1] = g;
          outbuf[4*k+2] = b;
        }
      }
      else
      { // !display_byteorder, need to swap:
        uint8_t *const buf8 = pipe.backbuf;
#ifdef _OPENMP
#pragma omp parallel for default(none) shared(processed_width, processed_height) schedule(static)
#endif
        // just flip byte order
        for(size_t k=0; k<(size_t)processed_width*processed_height; k++)
        {
          uint8_t tmp = buf8[4*k+0];
          buf8[4*k+0] = buf8[4*k+2];
          buf8[4*k+2] = tmp;
        }
      }
    }
  }
  else if(bpp == 16)
  {
    // uint16_t per color channel
    float    *buff  = (float *)   outbuf;
    uint16_t *buf16 = (uint16_t *)outbuf;
    for(int y=0; y<processed_height; y++) for(int x=0; x<processed_width ; x++)
      {
        // convert in place
        const size_t k = (size_t)processed_width*y + x;
        for(int i=0; i<3; i++) buf16[4*k+i] = CLAMP(buff[4*k+i]*0x10000, 0, 0xffff);
      }
  }
  // else output float, no further harm done to the pixels :)

  format_params->width  = processed_width;
  format_params->height = processed_height;

  if(!ignore_exif)
  {
    int length;
    uint8_t exif_profile[65535]; // C++ alloc'ed buffer is uncool, so we waste some bits here.
    char pathname[PATH_MAX];
    gboolean from_cache = TRUE;
    dt_image_full_path(imgid, pathname, sizeof(pathname), &from_cache);
    // last param is dng mode, it's false here
    length = dt_exif_read_blob(exif_profile, pathname, imgid, sRGB, processed_width, processed_height, 0);

    res = format->write_image (format_params, filename, outbuf, exif_profile, length, imgid);
  }
  else
  {
    res = format->write_image (format_params, filename, outbuf, NULL, 0, imgid);
  }

  dt_dev_pixelpipe_cleanup(&pipe);
  dt_dev_cleanup(&dev);
  dt_mipmap_cache_read_release(darktable.mipmap_cache, &buf);
  dt_free_align(moutbuf);
  /* now write xmp into that container, if possible */
  if(copy_metadata && (format->flags(format_params) & FORMAT_FLAGS_SUPPORT_XMP)) {
    dt_exif_xmp_attach(imgid, filename);
    // no need to cancel the export if this fail
  }


  if(!thumbnail_export && strcmp(format->mime(format_params), "memory"))
  {
    dt_control_signal_raise(darktable.signals,DT_SIGNAL_IMAGE_EXPORT_TMPFILE,imgid,filename,format,format_params,storage,storage_params);
  }
  return res;
}
Exemplo n.º 19
0
static int dt_group_get_mask_roi(dt_iop_module_t *module, dt_dev_pixelpipe_iop_t *piece,
                                 dt_masks_form_t *form, const dt_iop_roi_t *roi, float *buffer)
{
  double start2 = dt_get_wtime();
  const guint nb = g_list_length(form->points);
  if(nb == 0) return 0;
  int nb_ok = 0;

  const int width = roi->width;
  const int height = roi->height;

  // we need to allocate a temporary buffer for intermediate creation of individual shapes
  float *bufs = dt_alloc_align(64, (size_t)width * height * sizeof(float));
  if(bufs == NULL) return 0;

  // empty the output buffer
  memset(buffer, 0, (size_t)width * height * sizeof(float));

  // and we get all masks
  GList *fpts = g_list_first(form->points);

  while(fpts)
  {
    dt_masks_point_group_t *fpt = (dt_masks_point_group_t *)fpts->data;
    dt_masks_form_t *sel = dt_masks_get_from_id(module->dev, fpt->formid);

    if(sel)
    {
      const int ok = dt_masks_get_mask_roi(module, piece, sel, roi, bufs);
      const float op = fpt->opacity;
      const int state = fpt->state;

      if(ok)
      {
        // first see if we need to invert this shape
        if(state & DT_MASKS_STATE_INVERSE)
        {
#ifdef _OPENMP
#if !defined(__SUNOS__) && !defined(__NetBSD__)
#pragma omp parallel for default(none) shared(bufs)
#else
#pragma omp parallel for shared(bufs)
#endif
#endif
          for(int y = 0; y < height; y++)
            for(int x = 0; x < width; x++)
            {
              size_t index = (size_t)y * width + x;
              bufs[index] = 1.0f - bufs[index];
            }
        }

        if(state & DT_MASKS_STATE_UNION)
        {
#ifdef _OPENMP
#if !defined(__SUNOS__) && !defined(__NetBSD__)
#pragma omp parallel for default(none) shared(bufs, buffer)
#else
#pragma omp parallel for shared(bufs, buffer)
#endif
#endif
          for(int y = 0; y < height; y++)
            for(int x = 0; x < width; x++)
            {
              size_t index = (size_t)y * width + x;
              buffer[index] = fmaxf(buffer[index], bufs[index] * op);
            }
        }
        else if(state & DT_MASKS_STATE_INTERSECTION)
        {
#ifdef _OPENMP
#if !defined(__SUNOS__) && !defined(__NetBSD__)
#pragma omp parallel for default(none) shared(bufs, buffer)
#else
#pragma omp parallel for shared(bufs, buffer)
#endif
#endif
          for(int y = 0; y < height; y++)
            for(int x = 0; x < width; x++)
            {
              size_t index = (size_t)y * width + x;
              float b1 = buffer[index];
              float b2 = b2 = bufs[index]; // FIXME: is this line correct? what it supposed to be doing?
              if(b1 > 0.0f && b2 > 0.0f)
                buffer[index] = fminf(b1, b2 * op);
              else
                buffer[index] = 0.0f;
            }
        }
        else if(state & DT_MASKS_STATE_DIFFERENCE)
        {
#ifdef _OPENMP
#if !defined(__SUNOS__) && !defined(__NetBSD__)
#pragma omp parallel for default(none) shared(bufs, buffer)
#else
#pragma omp parallel for shared(bufs, buffer)
#endif
#endif
          for(int y = 0; y < height; y++)
            for(int x = 0; x < width; x++)
            {
              size_t index = (size_t)y * width + x;
              float b1 = buffer[index];
              float b2 = bufs[index] * op;
              if(b1 > 0.0f && b2 > 0.0f) buffer[index] = b1 * (1.0f - b2);
            }
        }
        else if(state & DT_MASKS_STATE_EXCLUSION)
        {
#ifdef _OPENMP
#if !defined(__SUNOS__) && !defined(__NetBSD__)
#pragma omp parallel for default(none) shared(bufs, buffer)
#else
#pragma omp parallel for shared(bufs, buffer)
#endif
#endif
          for(int y = 0; y < height; y++)
            for(int x = 0; x < width; x++)
            {
              size_t index = (size_t)y * width + x;
              float b1 = buffer[index];
              float b2 = bufs[index] * op;
              if(b1 > 0.0f && b2 > 0.0f)
                buffer[index] = fmaxf((1.0f - b1) * b2, b1 * (1.0f - b2));
              else
                buffer[index] = fmaxf(b1, b2);
            }
        }
        else // if we are here, this mean that we just have to copy the shape and null other parts
        {
#ifdef _OPENMP
#if !defined(__SUNOS__) && !defined(__NetBSD__)
#pragma omp parallel for default(none) shared(bufs, buffer)
#else
#pragma omp parallel for shared(bufs, buffer)
#endif
#endif
          for(int y = 0; y < height; y++)
            for(int x = 0; x < width; x++)
            {
              size_t index = (size_t)y * width + x;
              buffer[index] = bufs[index] * op;
            }
        }

        if(darktable.unmuted & DT_DEBUG_PERF)
          dt_print(DT_DEBUG_MASKS, "[masks %d] combine took %0.04f sec\n", nb_ok, dt_get_wtime() - start2);
        start2 = dt_get_wtime();

        nb_ok++;
      }
    }
    fpts = g_list_next(fpts);
  }

  // and we free the intermediate buffer
  dt_free_align(bufs);

  return (nb_ok != 0);
}
Exemplo n.º 20
0
dt_imageio_retval_t
dt_imageio_open_gm(
  dt_image_t *img,
  const char *filename,
  dt_mipmap_cache_allocator_t a)
{
  int err = DT_IMAGEIO_FILE_CORRUPTED;
  float *buf = NULL;
  ExceptionInfo exception;
  Image *image = NULL;
  ImageInfo *image_info = NULL;
  uint32_t width, height, orientation;

  if(!_supported_image(filename)) return DT_IMAGEIO_FILE_CORRUPTED;

  if(!img->exif_inited)
    (void) dt_exif_read(img, filename);

  GetExceptionInfo(&exception);
  image_info=CloneImageInfo((ImageInfo *) NULL);

  g_strlcpy(image_info->filename,filename,sizeof(image_info->filename));

  image=ReadImage(image_info,&exception);
  if (exception.severity != UndefinedException)
    CatchException(&exception);
  if (!image)
  {
    fprintf(stderr, "[GraphicsMagick_open] image `%s' not found\n", img->filename);
    err = DT_IMAGEIO_FILE_NOT_FOUND;
    goto error;
  }

  width = image->columns;
  height = image->rows;
  orientation = image->orientation;

  if(orientation & 4)
  {
    img->width = height;
    img->height = width;
  }
  else
  {
    img->width = width;
    img->height = height;
  }

  img->bpp = 4*sizeof(float);

  float *mipbuf = (float *)dt_mipmap_cache_alloc(img, DT_MIPMAP_FULL, a);
  if(!mipbuf)
  {
    fprintf(stderr, "[GraphicsMagick_open] could not alloc full buffer for image `%s'\n", img->filename);
    err = DT_IMAGEIO_CACHE_FULL;
    goto error;
  }

  buf = (float *)dt_alloc_align(16, width*img->bpp);
  if(!buf) goto error;

  const int ht2 = orientation & 4 ? img->width  : img->height; // pretend unrotated, rotate in write_pos
  const int wd2 = orientation & 4 ? img->height : img->width;

  for (uint32_t row = 0; row < height; row++)
  {
    int ret = DispatchImage(image, 0, row, width, 1, "RGBP", FloatPixel, (void *)buf, &exception);
    if (exception.severity != UndefinedException)
      CatchException(&exception);
    if(ret != MagickPass)
    {
      fprintf(stderr, "[GraphicsMagick_open] error reading image `%s'\n", img->filename);
      err = DT_IMAGEIO_FILE_CORRUPTED;
      goto error;
    }

    for(uint32_t i=0; i<width; i++)
      for(int k=0; k<4; k++) mipbuf[4*dt_imageio_write_pos(i, row, wd2, ht2, wd2, ht2, orientation) + k] = buf[4*i + k];
  }

  if(buf) dt_free_align(buf);
  if(image) DestroyImage(image);
  if(image_info) DestroyImageInfo(image_info);
  DestroyExceptionInfo(&exception);

  img->filters = 0;
  img->flags &= ~DT_IMAGE_RAW;
  img->flags &= ~DT_IMAGE_HDR;
  img->flags |= DT_IMAGE_LDR;

  return DT_IMAGEIO_OK;

error:
  if(buf) dt_free_align(buf);
  if(image) DestroyImage(image);
  if(image_info) DestroyImageInfo(image_info);
  DestroyExceptionInfo(&exception);
  return err;
}
Exemplo n.º 21
0
int process_cl(struct dt_iop_module_t *self, dt_dev_pixelpipe_iop_t *piece, cl_mem dev_in, cl_mem dev_out,
               const dt_iop_roi_t *const roi_in, const dt_iop_roi_t *const roi_out)
{
  dt_iop_global_tonemap_data_t *d = (dt_iop_global_tonemap_data_t *)piece->data;
  dt_iop_global_tonemap_global_data_t *gd = (dt_iop_global_tonemap_global_data_t *)self->data;
  dt_iop_global_tonemap_gui_data_t *g = (dt_iop_global_tonemap_gui_data_t *)self->gui_data;
  dt_bilateral_cl_t *b = NULL;

  cl_int err = -999;
  cl_mem dev_m = NULL;
  cl_mem dev_r = NULL;
  float *maximum = NULL;
  const int devid = piece->pipe->devid;
  int gtkernel = -1;

  const int width = roi_out->width;
  const int height = roi_out->height;
  float parameters[4] = { 0.0f };

  switch(d->operator)
  {
    case OPERATOR_REINHARD:
      gtkernel = gd->kernel_global_tonemap_reinhard;
      break;
    case OPERATOR_DRAGO:
      gtkernel = gd->kernel_global_tonemap_drago;
      break;
    case OPERATOR_FILMIC:
      gtkernel = gd->kernel_global_tonemap_filmic;
      break;
  }

  if(d->operator== OPERATOR_DRAGO)
  {
    const float eps = 0.0001f;
    float tmp_lwmax = NAN;

    // see comments in process() about lwmax value
    if(self->dev->gui_attached && g && piece->pipe->type == DT_DEV_PIXELPIPE_FULL)
    {
      dt_pthread_mutex_lock(&g->lock);
      const uint64_t hash = g->hash;
      dt_pthread_mutex_unlock(&g->lock);

      if(hash != 0 && !dt_dev_sync_pixelpipe_hash(self->dev, piece->pipe, 0, self->priority, &g->lock, &g->hash))
        dt_control_log(_("inconsistent output"));

      dt_pthread_mutex_lock(&g->lock);
      tmp_lwmax = g->lwmax;
      dt_pthread_mutex_unlock(&g->lock);
    }

    if(isnan(tmp_lwmax))
    {
      dt_opencl_local_buffer_t flocopt
        = (dt_opencl_local_buffer_t){ .xoffset = 0, .xfactor = 1, .yoffset = 0, .yfactor = 1,
                                      .cellsize = sizeof(float), .overhead = 0,
                                      .sizex = 1 << 4, .sizey = 1 << 4 };

      if(!dt_opencl_local_buffer_opt(devid, gd->kernel_pixelmax_first, &flocopt))
        goto error;

      const size_t bwidth = ROUNDUP(width, flocopt.sizex);
      const size_t bheight = ROUNDUP(height, flocopt.sizey);

      const int bufsize = (bwidth / flocopt.sizex) * (bheight / flocopt.sizey);

      dt_opencl_local_buffer_t slocopt
        = (dt_opencl_local_buffer_t){ .xoffset = 0, .xfactor = 1, .yoffset = 0, .yfactor = 1,
                                      .cellsize = sizeof(float), .overhead = 0,
                                      .sizex = 1 << 16, .sizey = 1 };

      if(!dt_opencl_local_buffer_opt(devid, gd->kernel_pixelmax_second, &slocopt))
        goto error;

      const int reducesize = MIN(REDUCESIZE, ROUNDUP(bufsize, slocopt.sizex) / slocopt.sizex);

      size_t sizes[3];
      size_t local[3];

      dev_m = dt_opencl_alloc_device_buffer(devid, (size_t)bufsize * sizeof(float));
      if(dev_m == NULL) goto error;

      dev_r = dt_opencl_alloc_device_buffer(devid, (size_t)reducesize * sizeof(float));
      if(dev_r == NULL) goto error;

      sizes[0] = bwidth;
      sizes[1] = bheight;
      sizes[2] = 1;
      local[0] = flocopt.sizex;
      local[1] = flocopt.sizey;
      local[2] = 1;
      dt_opencl_set_kernel_arg(devid, gd->kernel_pixelmax_first, 0, sizeof(cl_mem), &dev_in);
      dt_opencl_set_kernel_arg(devid, gd->kernel_pixelmax_first, 1, sizeof(int), &width);
      dt_opencl_set_kernel_arg(devid, gd->kernel_pixelmax_first, 2, sizeof(int), &height);
      dt_opencl_set_kernel_arg(devid, gd->kernel_pixelmax_first, 3, sizeof(cl_mem), &dev_m);
      dt_opencl_set_kernel_arg(devid, gd->kernel_pixelmax_first, 4, flocopt.sizex * flocopt.sizey * sizeof(float), NULL);
      err = dt_opencl_enqueue_kernel_2d_with_local(devid, gd->kernel_pixelmax_first, sizes, local);
      if(err != CL_SUCCESS) goto error;

      sizes[0] = reducesize * slocopt.sizex;
      sizes[1] = 1;
      sizes[2] = 1;
      local[0] = slocopt.sizex;
      local[1] = 1;
      local[2] = 1;
      dt_opencl_set_kernel_arg(devid, gd->kernel_pixelmax_second, 0, sizeof(cl_mem), &dev_m);
      dt_opencl_set_kernel_arg(devid, gd->kernel_pixelmax_second, 1, sizeof(cl_mem), &dev_r);
      dt_opencl_set_kernel_arg(devid, gd->kernel_pixelmax_second, 2, sizeof(int), &bufsize);
      dt_opencl_set_kernel_arg(devid, gd->kernel_pixelmax_second, 3, slocopt.sizex * sizeof(float), NULL);
      err = dt_opencl_enqueue_kernel_2d_with_local(devid, gd->kernel_pixelmax_second, sizes, local);
      if(err != CL_SUCCESS) goto error;

      maximum = dt_alloc_align(16, reducesize * sizeof(float));
      err = dt_opencl_read_buffer_from_device(devid, (void *)maximum, dev_r, 0,
                                            (size_t)reducesize * sizeof(float), CL_TRUE);
      if(err != CL_SUCCESS) goto error;

      dt_opencl_release_mem_object(dev_r);
      dt_opencl_release_mem_object(dev_m);
      dev_r = dev_m = NULL;

      for(int k = 1; k < reducesize; k++)
      {
        float mine = maximum[0];
        float other = maximum[k];
        maximum[0] = (other > mine) ? other : mine;
      }

      tmp_lwmax = MAX(eps, (maximum[0] * 0.01f));

      dt_free_align(maximum);
      maximum = NULL;
    }

    const float lwmax = tmp_lwmax;
    const float ldc = d->drago.max_light * 0.01f / log10f(lwmax + 1.0f);
    const float bl = logf(MAX(eps, d->drago.bias)) / logf(0.5f);

    parameters[0] = eps;
    parameters[1] = ldc;
    parameters[2] = bl;
    parameters[3] = lwmax;

    if(self->dev->gui_attached && g && piece->pipe->type == DT_DEV_PIXELPIPE_PREVIEW)
    {
      uint64_t hash = dt_dev_hash_plus(self->dev, piece->pipe, 0, self->priority);
      dt_pthread_mutex_lock(&g->lock);
      g->lwmax = lwmax;
      g->hash = hash;
      dt_pthread_mutex_unlock(&g->lock);
    }
  }

  const float scale = piece->iscale / roi_in->scale;
  const float sigma_r = 8.0f; // does not depend on scale
  const float iw = piece->buf_in.width / scale;
  const float ih = piece->buf_in.height / scale;
  const float sigma_s = fminf(iw, ih) * 0.03f;

  if(d->detail != 0.0f)
  {
    b = dt_bilateral_init_cl(devid, roi_in->width, roi_in->height, sigma_s, sigma_r);
    if(!b) goto error;
    // get detail from unchanged input buffer
    err = dt_bilateral_splat_cl(b, dev_in);
    if(err != CL_SUCCESS) goto error;
  }

  size_t sizes[2] = { ROUNDUPWD(width), ROUNDUPHT(height) };
  dt_opencl_set_kernel_arg(devid, gtkernel, 0, sizeof(cl_mem), &dev_in);
  dt_opencl_set_kernel_arg(devid, gtkernel, 1, sizeof(cl_mem), &dev_out);
  dt_opencl_set_kernel_arg(devid, gtkernel, 2, sizeof(int), &width);
  dt_opencl_set_kernel_arg(devid, gtkernel, 3, sizeof(int), &height);
  dt_opencl_set_kernel_arg(devid, gtkernel, 4, 4 * sizeof(float), &parameters);
  err = dt_opencl_enqueue_kernel_2d(devid, gtkernel, sizes);
  if(err != CL_SUCCESS) goto error;

  if(d->detail != 0.0f)
  {
    err = dt_bilateral_blur_cl(b);
    if(err != CL_SUCCESS) goto error;
    // and apply it to output buffer after logscale
    err = dt_bilateral_slice_to_output_cl(b, dev_in, dev_out, d->detail);
    if(err != CL_SUCCESS) goto error;
    dt_bilateral_free_cl(b);
  }

  return TRUE;

error:
  if(b) dt_bilateral_free_cl(b);
  dt_opencl_release_mem_object(dev_m);
  dt_opencl_release_mem_object(dev_r);
  dt_free_align(maximum);
  dt_print(DT_DEBUG_OPENCL, "[opencl_global_tonemap] couldn't enqueue kernel! %d\n", err);
  return FALSE;
}
#endif


void tiling_callback(struct dt_iop_module_t *self, struct dt_dev_pixelpipe_iop_t *piece,
                     const dt_iop_roi_t *roi_in, const dt_iop_roi_t *roi_out,
                     struct dt_develop_tiling_t *tiling)
{
  dt_iop_global_tonemap_data_t *d = (dt_iop_global_tonemap_data_t *)piece->data;

  const float scale = piece->iscale / roi_in->scale;
  const float iw = piece->buf_in.width / scale;
  const float ih = piece->buf_in.height / scale;
  const float sigma_s = fminf(iw, ih) * 0.03f;
  const float sigma_r = 8.0f;
  const int detail = (d->detail != 0.0f);

  const int width = roi_in->width;
  const int height = roi_in->height;
  const int channels = piece->colors;

  const size_t basebuffer = width * height * channels * sizeof(float);

  tiling->factor = 2.0f + (detail ? (float)dt_bilateral_memory_use2(width, height, sigma_s, sigma_r) / basebuffer : 0.0f);
  tiling->maxbuf
      = (detail ? MAX(1.0f, (float)dt_bilateral_singlebuffer_size2(width, height, sigma_s, sigma_r) / basebuffer) : 1.0f);
  tiling->overhead = 0;
  tiling->overlap = (detail ? ceilf(4 * sigma_s) : 0);
  tiling->xalign = 1;
  tiling->yalign = 1;
  return;
}

void commit_params(struct dt_iop_module_t *self, dt_iop_params_t *p1, dt_dev_pixelpipe_t *pipe,
                   dt_dev_pixelpipe_iop_t *piece)
{
  dt_iop_global_tonemap_params_t *p = (dt_iop_global_tonemap_params_t *)p1;
  dt_iop_global_tonemap_data_t *d = (dt_iop_global_tonemap_data_t *)piece->data;

  d->operator= p->operator;
  d->drago.bias = p->drago.bias;
  d->drago.max_light = p->drago.max_light;
  d->detail = p->detail;

  // drago needs the maximum L-value of the whole image so it must not use tiling
  if(d->operator == OPERATOR_DRAGO) piece->process_tiling_ready = 0;

#ifdef HAVE_OPENCL
  if(d->detail != 0.0f)
    piece->process_cl_ready = (piece->process_cl_ready && !(darktable.opencl->avoid_atomics));
#endif
}
Exemplo n.º 22
0
void process(struct dt_iop_module_t *self, dt_dev_pixelpipe_iop_t *piece, void *ivoid, void *ovoid,
             const dt_iop_roi_t *roi_in, const dt_iop_roi_t *roi_out)
{
  const dt_iop_colorout_data_t *const d = (dt_iop_colorout_data_t *)piece->data;
  const int ch = piece->colors;
  const int gamutcheck = (d->mode == DT_PROFILE_GAMUTCHECK);

  if(!isnan(d->cmatrix[0]))
  {
// fprintf(stderr,"Using cmatrix codepath\n");
// convert to rgb using matrix
#ifdef _OPENMP
#pragma omp parallel for schedule(static) default(none) shared(roi_in, roi_out, ivoid, ovoid)
#endif
    for(int j = 0; j < roi_out->height; j++)
    {

      float *in = (float *)ivoid + (size_t)ch * roi_in->width * j;
      float *out = (float *)ovoid + (size_t)ch * roi_out->width * j;
      const __m128 m0 = _mm_set_ps(0.0f, d->cmatrix[6], d->cmatrix[3], d->cmatrix[0]);
      const __m128 m1 = _mm_set_ps(0.0f, d->cmatrix[7], d->cmatrix[4], d->cmatrix[1]);
      const __m128 m2 = _mm_set_ps(0.0f, d->cmatrix[8], d->cmatrix[5], d->cmatrix[2]);

      for(int i = 0; i < roi_out->width; i++, in += ch, out += ch)
      {
        const __m128 xyz = dt_Lab_to_XYZ_SSE(_mm_load_ps(in));
        const __m128 t
            = _mm_add_ps(_mm_mul_ps(m0, _mm_shuffle_ps(xyz, xyz, _MM_SHUFFLE(0, 0, 0, 0))),
                         _mm_add_ps(_mm_mul_ps(m1, _mm_shuffle_ps(xyz, xyz, _MM_SHUFFLE(1, 1, 1, 1))),
                                    _mm_mul_ps(m2, _mm_shuffle_ps(xyz, xyz, _MM_SHUFFLE(2, 2, 2, 2)))));

        _mm_stream_ps(out, t);
      }
    }
    _mm_sfence();
// apply profile
#ifdef _OPENMP
#pragma omp parallel for schedule(static) default(none) shared(roi_in, roi_out, ivoid, ovoid)
#endif
    for(int j = 0; j < roi_out->height; j++)
    {

      float *in = (float *)ivoid + (size_t)ch * roi_in->width * j;
      float *out = (float *)ovoid + (size_t)ch * roi_out->width * j;

      for(int i = 0; i < roi_out->width; i++, in += ch, out += ch)
      {
        for(int i = 0; i < 3; i++)
          if(d->lut[i][0] >= 0.0f)
          {
            out[i] = (out[i] < 1.0f) ? lerp_lut(d->lut[i], out[i])
                                     : dt_iop_eval_exp(d->unbounded_coeffs[i], out[i]);
          }
      }
    }
  }
  else
  {
    // fprintf(stderr,"Using xform codepath\n");
    const __m128 outofgamutpixel = _mm_set_ps(0.0f, 1.0f, 1.0f, 0.0f);
#ifdef _OPENMP
#pragma omp parallel for schedule(static) default(none) shared(ivoid, ovoid, roi_out)
#endif
    for(int k = 0; k < roi_out->height; k++)
    {
      const float *in = ((float *)ivoid) + (size_t)ch * k * roi_out->width;
      float *out = ((float *)ovoid) + (size_t)ch * k * roi_out->width;

      if(!gamutcheck)
      {
        cmsDoTransform(d->xform, in, out, roi_out->width);
      }
      else
      {
        void *rgb = dt_alloc_align(16, 4 * sizeof(float) * roi_out->width);
        cmsDoTransform(d->xform, in, rgb, roi_out->width);
        float *rgbptr = (float *)rgb;
        for(int j = 0; j < roi_out->width; j++, rgbptr += 4, out += 4)
        {
          const __m128 pixel = _mm_load_ps(rgbptr);
          __m128 ingamut = _mm_cmplt_ps(pixel, _mm_set_ps(-FLT_MAX, 0.0f, 0.0f, 0.0f));

          ingamut = _mm_or_ps(_mm_unpacklo_ps(ingamut, ingamut), _mm_unpackhi_ps(ingamut, ingamut));
          ingamut = _mm_or_ps(_mm_unpacklo_ps(ingamut, ingamut), _mm_unpackhi_ps(ingamut, ingamut));

          const __m128 result
              = _mm_or_ps(_mm_and_ps(ingamut, outofgamutpixel), _mm_andnot_ps(ingamut, pixel));
          _mm_stream_ps(out, result);
        }
        dt_free_align(rgb);
      }
    }
    _mm_sfence();
  }

  if(piece->pipe->mask_display) dt_iop_alpha_copy(ivoid, ovoid, roi_out->width, roi_out->height);
}