Пример #1
0
void
dt_image_cache_init(dt_image_cache_t *cache)
{
  // the image cache does no serialization.
  // (unsafe. data should be in db/xmp, not in any other additional cache,
  // also, it should be relatively fast to get the image_t structs from sql.)
  // TODO: actually an independent conf var?
  //       too large: dangerous and wasteful?
  //       can we get away with a fixed size?
  const uint32_t max_mem = 50*1024*1024;
  uint32_t num = (uint32_t)(1.5f*max_mem/sizeof(dt_image_t));
  dt_cache_init(&cache->cache, num, 16, 64, max_mem);
  dt_cache_set_allocate_callback(&cache->cache, &dt_image_cache_allocate,   cache);
  dt_cache_set_cleanup_callback (&cache->cache, &dt_image_cache_deallocate, cache);

  // might have been rounded to power of two:
  num = dt_cache_capacity(&cache->cache);
  cache->images = dt_alloc_align(64, sizeof(dt_image_t)*num);
  dt_print(DT_DEBUG_CACHE, "[image_cache] has %d entries\n", num);
  // initialize first image as empty data:
  dt_image_init(cache->images);
  for(uint32_t k=1; k<num; k++)
  {
    // optimized initialization (avoid accessing conf):
    memcpy(cache->images + k, cache->images, sizeof(dt_image_t));
  }
}
Пример #2
0
void dt_image_cache_init(dt_image_cache_t *cache)
{
  // the image cache does no serialization.
  // (unsafe. data should be in db/xmp, not in any other additional cache,
  // also, it should be relatively fast to get the image_t structs from sql.)
  // TODO: actually an independent conf var?
  //       too large: dangerous and wasteful?
  //       can we get away with a fixed size?
  const uint32_t max_mem = 50 * 1024 * 1024;
  uint32_t num = (uint32_t)(1.5f * max_mem / sizeof(dt_image_t));
  dt_cache_init(&cache->cache, sizeof(dt_image_t), max_mem);
  dt_cache_set_allocate_callback(&cache->cache, &dt_image_cache_allocate, cache);
  dt_cache_set_cleanup_callback(&cache->cache, &dt_image_cache_deallocate, cache);

  dt_print(DT_DEBUG_CACHE, "[image_cache] has %d entries\n", num);
}
Пример #3
0
int main(int argc, char *arg[])
{
  dt_cache_t cache;
  // dt_cache_init(&cache, 110000, 16, 64, 100000);
  // really hammer it, make quota insanely low:
  dt_cache_init(&cache, 110000, 16, 64, 100);
  dt_cache_set_allocate_callback(&cache, alloc_dummy, NULL);

#ifdef _OPENMP
#  pragma omp parallel for default(none) schedule(guided) shared(cache, stderr) num_threads(16)
#endif
  for(int k=0;k<100000;k++)
  {
    void *data = (void *)(long int)k;
    const int size = 0;//dt_cache_size(&cache);
    const int con1 = dt_cache_contains(&cache, k);
    const int val1 = (int)(long int)dt_cache_read_get(&cache, k);
    const int val2 = (int)(long int)dt_cache_read_get(&cache, k);
    // fprintf(stderr, "\rinserted number %d, size %d, value %d - %d, contains %d - %d", k, size, val1, val2, con1, con2);
    const int con2 = dt_cache_contains(&cache, k);
    assert (con1 == 0);
    assert (con2 == 1);
    assert (val2 == k);
    dt_cache_read_release(&cache, k);
    dt_cache_read_release(&cache, k);
  }
  dt_cache_print_locked(&cache);
  // fprintf(stderr, "\n");
  fprintf(stderr, "[passed] inserting 100000 entries concurrently\n");

  const int size = dt_cache_size(&cache);
  const int lru_cnt   = lru_check_consistency(&cache);
  const int lru_cnt_r = lru_check_consistency_reverse(&cache);
  // fprintf(stderr, "lru list contains %d|%d/%d entries\n", lru_cnt, lru_cnt_r, size);
  assert(size == lru_cnt);
  assert(lru_cnt_r == lru_cnt);
  fprintf(stderr, "[passed] cache lru consistency after removals, have %d entries left.\n", size);

  dt_cache_cleanup(&cache);
  exit(0);
}
Пример #4
0
void dt_mipmap_cache_init(dt_mipmap_cache_t *cache)
{
  // make sure static memory is initialized
  struct dt_mipmap_buffer_dsc *dsc = (struct dt_mipmap_buffer_dsc *)dt_mipmap_cache_static_dead_image;
  dead_image_f((dt_mipmap_buffer_t *)(dsc+1));

  cache->compression_type = 0;
  gchar *compression = dt_conf_get_string("cache_compression");
  if(compression)
  {
    if(!strcmp(compression, "low quality (fast)"))
      cache->compression_type = 1;
    else if(!strcmp(compression, "high quality (slow)"))
      cache->compression_type = 2;
    g_free(compression);
  }

  dt_print(DT_DEBUG_CACHE, "[mipmap_cache_init] using %s\n", cache->compression_type == 0 ? "no compression" :
           (cache->compression_type == 1 ? "low quality compression" : "slow high quality compression"));

  // adjust numbers to be large enough to hold what mem limit suggests.
  // we want at least 100MB, and consider 8G just still reasonable.
  size_t max_mem = CLAMPS(dt_conf_get_int64("cache_memory"), 100u<<20, ((uint64_t)8)<<30);
  const uint32_t parallel = CLAMP(dt_conf_get_int ("worker_threads")*dt_conf_get_int("parallel_export"), 1, 8);
  const int32_t max_size = 2048, min_size = 32;
  int32_t wd = darktable.thumbnail_width;
  int32_t ht = darktable.thumbnail_height;
  wd = CLAMPS(wd, min_size, max_size);
  ht = CLAMPS(ht, min_size, max_size);
  // round up to a multiple of 8, so we can divide by two 3 times
  if(wd & 0xf) wd = (wd & ~0xf) + 0x10;
  if(ht & 0xf) ht = (ht & ~0xf) + 0x10;
  // cache these, can't change at runtime:
  cache->mip[DT_MIPMAP_F].max_width  = wd;
  cache->mip[DT_MIPMAP_F].max_height = ht;
  cache->mip[DT_MIPMAP_F-1].max_width  = wd;
  cache->mip[DT_MIPMAP_F-1].max_height = ht;
  for(int k=DT_MIPMAP_F-2; k>=DT_MIPMAP_0; k--)
  {
    cache->mip[k].max_width  = cache->mip[k+1].max_width  / 2;
    cache->mip[k].max_height = cache->mip[k+1].max_height / 2;
  }

  // initialize some per-thread cached scratchmem for uncompressed buffers during thumb creation:
  if(cache->compression_type)
  {
    cache->scratchmem.max_width = wd;
    cache->scratchmem.max_height = ht;
    cache->scratchmem.buffer_size = wd*ht*sizeof(uint32_t);
    cache->scratchmem.size = DT_MIPMAP_3; // at max.
    // TODO: use thread local storage instead (zero performance penalty on linux)
    dt_cache_init(&cache->scratchmem.cache, parallel, parallel, 64, 0.9f*parallel*wd*ht*sizeof(uint32_t));
    // might have been rounded to power of two:
    const int cnt = dt_cache_capacity(&cache->scratchmem.cache);
    cache->scratchmem.buf = dt_alloc_align(64, cnt * wd*ht*sizeof(uint32_t));
    dt_cache_static_allocation(&cache->scratchmem.cache, (uint8_t *)cache->scratchmem.buf, wd*ht*sizeof(uint32_t));
    dt_cache_set_allocate_callback(&cache->scratchmem.cache,
                                   scratchmem_allocate, &cache->scratchmem);
    dt_print(DT_DEBUG_CACHE,
             "[mipmap_cache_init] cache has % 5d entries for temporary compression buffers (% 4.02f MB).\n",
             cnt, cnt* wd*ht*sizeof(uint32_t)/(1024.0*1024.0));
  }

  for(int k=DT_MIPMAP_3; k>=0; k--)
  {
    // clear stats:
    cache->mip[k].stats_requests = 0;
    cache->mip[k].stats_near_match = 0;
    cache->mip[k].stats_misses = 0;
    cache->mip[k].stats_fetches = 0;
    cache->mip[k].stats_standin = 0;
    // buffer stores width and height + actual data
    const int width  = cache->mip[k].max_width;
    const int height = cache->mip[k].max_height;
    // header + adjusted for dxt compression:
    cache->mip[k].buffer_size = 4*sizeof(uint32_t) + compressed_buffer_size(cache->compression_type, width, height);
    cache->mip[k].size = k;
    // level of parallelism also gives minimum size (which is twice that)
    // is rounded to a power of two by the cache anyways, we might as well.
    // XXX this needs adjustment for video mode (more full-res thumbs for replay)
    // TODO: collect hit/miss stats and auto-adjust to user browsing behaviour
    // TODO: can #prefetches be collected this way, too?
    const size_t max_mem2 = MAX(0, (k == 0) ? (max_mem) : (max_mem/(k+4)));
    uint32_t thumbnails = MAX(2, nearest_power_of_two((uint32_t)((double)max_mem2/cache->mip[k].buffer_size)));
    while(thumbnails > parallel && (size_t)thumbnails * cache->mip[k].buffer_size > max_mem2) thumbnails /= 2;

    // try to utilize that memory well (use 90% quota), the hopscotch paper claims good scalability up to
    // even more than that.
    dt_cache_init(&cache->mip[k].cache, thumbnails,
                  parallel,
                  64, 0.9f*thumbnails*cache->mip[k].buffer_size);

    // might have been rounded to power of two:
    thumbnails = dt_cache_capacity(&cache->mip[k].cache);
    max_mem -= thumbnails * cache->mip[k].buffer_size;
    // dt_print(DT_DEBUG_CACHE, "[mipmap mem] %4.02f left\n", max_mem/(1024.0*1024.0));
    cache->mip[k].buf = dt_alloc_align(64, thumbnails * cache->mip[k].buffer_size);
    dt_cache_static_allocation(&cache->mip[k].cache, (uint8_t *)cache->mip[k].buf, cache->mip[k].buffer_size);
    dt_cache_set_allocate_callback(&cache->mip[k].cache,
                                   dt_mipmap_cache_allocate, &cache->mip[k]);
    // dt_cache_set_cleanup_callback(&cache->mip[k].cache,
    // &dt_mipmap_cache_deallocate, &cache->mip[k]);

    dt_print(DT_DEBUG_CACHE,
             "[mipmap_cache_init] cache has % 5d entries for mip %d (% 4.02f MB).\n",
             thumbnails, k, thumbnails * cache->mip[k].buffer_size/(1024.0*1024.0));
  }

  // full buffer needs dynamic alloc:
  const int full_entries = MAX(2, parallel); // even with one thread you want two buffers. one for dr one for thumbs.
  int32_t max_mem_bufs = nearest_power_of_two(full_entries);

  // for this buffer, because it can be very busy during import, we want the minimum
  // number of entries in the hashtable to be 16, but leave the quota as is. the dynamic
  // alloc/free properties of this cache take care that no more memory is required.
  dt_cache_init(&cache->mip[DT_MIPMAP_FULL].cache, max_mem_bufs, parallel, 64, max_mem_bufs);
  dt_cache_set_allocate_callback(&cache->mip[DT_MIPMAP_FULL].cache,
                                 dt_mipmap_cache_allocate_dynamic, &cache->mip[DT_MIPMAP_FULL]);
  // dt_cache_set_cleanup_callback(&cache->mip[DT_MIPMAP_FULL].cache,
  // &dt_mipmap_cache_deallocate_dynamic, &cache->mip[DT_MIPMAP_FULL]);
  cache->mip[DT_MIPMAP_FULL].buffer_size = 0;
  cache->mip[DT_MIPMAP_FULL].size = DT_MIPMAP_FULL;
  cache->mip[DT_MIPMAP_FULL].buf = NULL;

  // same for mipf:
  dt_cache_init(&cache->mip[DT_MIPMAP_F].cache, max_mem_bufs, parallel, 64, max_mem_bufs);
  dt_cache_set_allocate_callback(&cache->mip[DT_MIPMAP_F].cache,
                                 dt_mipmap_cache_allocate_dynamic, &cache->mip[DT_MIPMAP_F]);
  dt_cache_set_cleanup_callback(&cache->mip[DT_MIPMAP_F].cache,
                                dt_mipmap_cache_deallocate_dynamic, &cache->mip[DT_MIPMAP_F]);
  cache->mip[DT_MIPMAP_F].buffer_size = 4*sizeof(uint32_t) +
                                        4*sizeof(float) * cache->mip[DT_MIPMAP_F].max_width * cache->mip[DT_MIPMAP_F].max_height;
  cache->mip[DT_MIPMAP_F].size = DT_MIPMAP_F;
  cache->mip[DT_MIPMAP_F].buf = NULL;

  dt_mipmap_cache_deserialize(cache);
}