Exemple #1
0
void dt_mipmap_cache_print(dt_mipmap_cache_t *cache)
{
  for(int k=0; k<(int)DT_MIPMAP_F; k++)
  {
    printf("[mipmap_cache] level %d fill %.2f/%.2f MB (%.2f%% in %u/%u buffers)\n", k, cache->mip[k].cache.cost/(1024.0*1024.0),
           cache->mip[k].cache.cost_quota/(1024.0*1024.0),
           100.0f*(float)cache->mip[k].cache.cost/(float)cache->mip[k].cache.cost_quota,
           dt_cache_size(&cache->mip[k].cache),
           dt_cache_capacity(&cache->mip[k].cache));
  }
  for(int k=(int)DT_MIPMAP_F; k<=(int)DT_MIPMAP_FULL; k++)
  {
    printf("[mipmap_cache] level [f%d] fill %d/%d slots (%.2f%% in %u/%u buffers)\n", k, cache->mip[k].cache.cost,
           cache->mip[k].cache.cost_quota,
           100.0f*(float)cache->mip[k].cache.cost/(float)cache->mip[k].cache.cost_quota,
           dt_cache_size(&cache->mip[k].cache),
           dt_cache_capacity(&cache->mip[k].cache));
  }
  if(cache->compression_type)
  {
    printf("[mipmap_cache] scratch fill %.2f/%.2f MB (%.2f%% in %u/%u buffers)\n", cache->scratchmem.cache.cost/(1024.0*1024.0),
           cache->scratchmem.cache.cost_quota/(1024.0*1024.0),
           100.0f*(float)cache->scratchmem.cache.cost/(float)cache->scratchmem.cache.cost_quota,
           dt_cache_size(&cache->scratchmem.cache),
           dt_cache_capacity(&cache->scratchmem.cache));
  }
  printf("\n\n");
  // very verbose stats about locks/users
  //dt_cache_print(&cache->mip[DT_MIPMAP_3].cache);
}
Exemple #2
0
void dt_mipmap_cache_print(dt_mipmap_cache_t *cache)
{
  for(int k=0; k<(int)DT_MIPMAP_F; k++)
  {
    printf("[mipmap_cache] level [i%d] (%4dx%4d) fill %.2f/%.2f MB (%.2f%% in %u/%u buffers)\n", k,
        cache->mip[k].max_width, cache->mip[k].max_height, cache->mip[k].cache.cost/(1024.0*1024.0),
           cache->mip[k].cache.cost_quota/(1024.0*1024.0),
           100.0f*(float)cache->mip[k].cache.cost/(float)cache->mip[k].cache.cost_quota,
           dt_cache_size(&cache->mip[k].cache),
           dt_cache_capacity(&cache->mip[k].cache));
  }
  for(int k=(int)DT_MIPMAP_F; k<=(int)DT_MIPMAP_FULL; k++)
  {
    printf("[mipmap_cache] level [f%d] fill %d/%d slots (%.2f%% in %u/%u buffers)\n", k,
           (uint32_t)cache->mip[k].cache.cost,
           (uint32_t)cache->mip[k].cache.cost_quota,
           100.0f*(float)cache->mip[k].cache.cost/(float)cache->mip[k].cache.cost_quota,
           dt_cache_size(&cache->mip[k].cache),
           dt_cache_capacity(&cache->mip[k].cache));
  }
  if(cache->compression_type)
  {
    printf("[mipmap_cache] scratch fill %.2f/%.2f MB (%.2f%% in %u/%u buffers)\n", cache->scratchmem.cache.cost/(1024.0*1024.0),
           cache->scratchmem.cache.cost_quota/(1024.0*1024.0),
           100.0f*(float)cache->scratchmem.cache.cost/(float)cache->scratchmem.cache.cost_quota,
           dt_cache_size(&cache->scratchmem.cache),
           dt_cache_capacity(&cache->scratchmem.cache));
  }
  uint64_t sum = 0;
  uint64_t sum_fetches = 0;
  uint64_t sum_standins = 0;
  for(int k=0; k<=(int)DT_MIPMAP_FULL; k++)
  {
    sum += cache->mip[k].stats_requests;
    sum_fetches += cache->mip[k].stats_fetches;
    sum_standins += cache->mip[k].stats_standin;
  }
  printf("[mipmap_cache] level | near match | miss | stand-in | fetches | total rq\n");
  for(int k=0; k<=(int)DT_MIPMAP_FULL; k++)
    printf("[mipmap_cache] %c%d    | %6.2f%% | %6.2f%% | %6.2f%%  | %6.2f%% | %6.2f%%\n", k > 3 ? 'f' : 'i', k,
        100.0*cache->mip[k].stats_near_match/(float)cache->mip[k].stats_requests,
        100.0*cache->mip[k].stats_misses/(float)cache->mip[k].stats_requests,
        100.0*cache->mip[k].stats_standin/(float)sum_standins,
        100.0*cache->mip[k].stats_fetches/(float)sum_fetches,
        100.0*cache->mip[k].stats_requests/(float)sum);
  printf("\n\n");
  // very verbose stats about locks/users
  //dt_cache_print(&cache->mip[DT_MIPMAP_3].cache);
}
Exemple #3
0
void
dt_image_cache_init(dt_image_cache_t *cache)
{
  // the image cache does no serialization.
  // (unsafe. data should be in db/xmp, not in any other additional cache,
  // also, it should be relatively fast to get the image_t structs from sql.)
  // TODO: actually an independent conf var?
  //       too large: dangerous and wasteful?
  //       can we get away with a fixed size?
  const uint32_t max_mem = 50*1024*1024;
  uint32_t num = (uint32_t)(1.5f*max_mem/sizeof(dt_image_t));
  dt_cache_init(&cache->cache, num, 16, 64, max_mem);
  dt_cache_set_allocate_callback(&cache->cache, &dt_image_cache_allocate,   cache);
  dt_cache_set_cleanup_callback (&cache->cache, &dt_image_cache_deallocate, cache);

  // might have been rounded to power of two:
  num = dt_cache_capacity(&cache->cache);
  cache->images = dt_alloc_align(64, sizeof(dt_image_t)*num);
  dt_print(DT_DEBUG_CACHE, "[image_cache] has %d entries\n", num);
  // initialize first image as empty data:
  dt_image_init(cache->images);
  for(uint32_t k=1; k<num; k++)
  {
    // optimized initialization (avoid accessing conf):
    memcpy(cache->images + k, cache->images, sizeof(dt_image_t));
  }
}
Exemple #4
0
void dt_mipmap_cache_init(dt_mipmap_cache_t *cache)
{
  // make sure static memory is initialized
  struct dt_mipmap_buffer_dsc *dsc = (struct dt_mipmap_buffer_dsc *)dt_mipmap_cache_static_dead_image;
  dead_image_f((dt_mipmap_buffer_t *)(dsc+1));

  cache->compression_type = 0;
  gchar *compression = dt_conf_get_string("cache_compression");
  if(compression)
  {
    if(!strcmp(compression, "low quality (fast)"))
      cache->compression_type = 1;
    else if(!strcmp(compression, "high quality (slow)"))
      cache->compression_type = 2;
    g_free(compression);
  }

  dt_print(DT_DEBUG_CACHE, "[mipmap_cache_init] using %s\n", cache->compression_type == 0 ? "no compression" :
           (cache->compression_type == 1 ? "low quality compression" : "slow high quality compression"));

  // adjust numbers to be large enough to hold what mem limit suggests.
  // we want at least 100MB, and consider 8G just still reasonable.
  size_t max_mem = CLAMPS(dt_conf_get_int64("cache_memory"), 100u<<20, ((uint64_t)8)<<30);
  const uint32_t parallel = CLAMP(dt_conf_get_int ("worker_threads")*dt_conf_get_int("parallel_export"), 1, 8);
  const int32_t max_size = 2048, min_size = 32;
  int32_t wd = darktable.thumbnail_width;
  int32_t ht = darktable.thumbnail_height;
  wd = CLAMPS(wd, min_size, max_size);
  ht = CLAMPS(ht, min_size, max_size);
  // round up to a multiple of 8, so we can divide by two 3 times
  if(wd & 0xf) wd = (wd & ~0xf) + 0x10;
  if(ht & 0xf) ht = (ht & ~0xf) + 0x10;
  // cache these, can't change at runtime:
  cache->mip[DT_MIPMAP_F].max_width  = wd;
  cache->mip[DT_MIPMAP_F].max_height = ht;
  cache->mip[DT_MIPMAP_F-1].max_width  = wd;
  cache->mip[DT_MIPMAP_F-1].max_height = ht;
  for(int k=DT_MIPMAP_F-2; k>=DT_MIPMAP_0; k--)
  {
    cache->mip[k].max_width  = cache->mip[k+1].max_width  / 2;
    cache->mip[k].max_height = cache->mip[k+1].max_height / 2;
  }

  // initialize some per-thread cached scratchmem for uncompressed buffers during thumb creation:
  if(cache->compression_type)
  {
    cache->scratchmem.max_width = wd;
    cache->scratchmem.max_height = ht;
    cache->scratchmem.buffer_size = wd*ht*sizeof(uint32_t);
    cache->scratchmem.size = DT_MIPMAP_3; // at max.
    // TODO: use thread local storage instead (zero performance penalty on linux)
    dt_cache_init(&cache->scratchmem.cache, parallel, parallel, 64, 0.9f*parallel*wd*ht*sizeof(uint32_t));
    // might have been rounded to power of two:
    const int cnt = dt_cache_capacity(&cache->scratchmem.cache);
    cache->scratchmem.buf = dt_alloc_align(64, cnt * wd*ht*sizeof(uint32_t));
    dt_cache_static_allocation(&cache->scratchmem.cache, (uint8_t *)cache->scratchmem.buf, wd*ht*sizeof(uint32_t));
    dt_cache_set_allocate_callback(&cache->scratchmem.cache,
                                   scratchmem_allocate, &cache->scratchmem);
    dt_print(DT_DEBUG_CACHE,
             "[mipmap_cache_init] cache has % 5d entries for temporary compression buffers (% 4.02f MB).\n",
             cnt, cnt* wd*ht*sizeof(uint32_t)/(1024.0*1024.0));
  }

  for(int k=DT_MIPMAP_3; k>=0; k--)
  {
    // clear stats:
    cache->mip[k].stats_requests = 0;
    cache->mip[k].stats_near_match = 0;
    cache->mip[k].stats_misses = 0;
    cache->mip[k].stats_fetches = 0;
    cache->mip[k].stats_standin = 0;
    // buffer stores width and height + actual data
    const int width  = cache->mip[k].max_width;
    const int height = cache->mip[k].max_height;
    // header + adjusted for dxt compression:
    cache->mip[k].buffer_size = 4*sizeof(uint32_t) + compressed_buffer_size(cache->compression_type, width, height);
    cache->mip[k].size = k;
    // level of parallelism also gives minimum size (which is twice that)
    // is rounded to a power of two by the cache anyways, we might as well.
    // XXX this needs adjustment for video mode (more full-res thumbs for replay)
    // TODO: collect hit/miss stats and auto-adjust to user browsing behaviour
    // TODO: can #prefetches be collected this way, too?
    const size_t max_mem2 = MAX(0, (k == 0) ? (max_mem) : (max_mem/(k+4)));
    uint32_t thumbnails = MAX(2, nearest_power_of_two((uint32_t)((double)max_mem2/cache->mip[k].buffer_size)));
    while(thumbnails > parallel && (size_t)thumbnails * cache->mip[k].buffer_size > max_mem2) thumbnails /= 2;

    // try to utilize that memory well (use 90% quota), the hopscotch paper claims good scalability up to
    // even more than that.
    dt_cache_init(&cache->mip[k].cache, thumbnails,
                  parallel,
                  64, 0.9f*thumbnails*cache->mip[k].buffer_size);

    // might have been rounded to power of two:
    thumbnails = dt_cache_capacity(&cache->mip[k].cache);
    max_mem -= thumbnails * cache->mip[k].buffer_size;
    // dt_print(DT_DEBUG_CACHE, "[mipmap mem] %4.02f left\n", max_mem/(1024.0*1024.0));
    cache->mip[k].buf = dt_alloc_align(64, thumbnails * cache->mip[k].buffer_size);
    dt_cache_static_allocation(&cache->mip[k].cache, (uint8_t *)cache->mip[k].buf, cache->mip[k].buffer_size);
    dt_cache_set_allocate_callback(&cache->mip[k].cache,
                                   dt_mipmap_cache_allocate, &cache->mip[k]);
    // dt_cache_set_cleanup_callback(&cache->mip[k].cache,
    // &dt_mipmap_cache_deallocate, &cache->mip[k]);

    dt_print(DT_DEBUG_CACHE,
             "[mipmap_cache_init] cache has % 5d entries for mip %d (% 4.02f MB).\n",
             thumbnails, k, thumbnails * cache->mip[k].buffer_size/(1024.0*1024.0));
  }

  // full buffer needs dynamic alloc:
  const int full_entries = MAX(2, parallel); // even with one thread you want two buffers. one for dr one for thumbs.
  int32_t max_mem_bufs = nearest_power_of_two(full_entries);

  // for this buffer, because it can be very busy during import, we want the minimum
  // number of entries in the hashtable to be 16, but leave the quota as is. the dynamic
  // alloc/free properties of this cache take care that no more memory is required.
  dt_cache_init(&cache->mip[DT_MIPMAP_FULL].cache, max_mem_bufs, parallel, 64, max_mem_bufs);
  dt_cache_set_allocate_callback(&cache->mip[DT_MIPMAP_FULL].cache,
                                 dt_mipmap_cache_allocate_dynamic, &cache->mip[DT_MIPMAP_FULL]);
  // dt_cache_set_cleanup_callback(&cache->mip[DT_MIPMAP_FULL].cache,
  // &dt_mipmap_cache_deallocate_dynamic, &cache->mip[DT_MIPMAP_FULL]);
  cache->mip[DT_MIPMAP_FULL].buffer_size = 0;
  cache->mip[DT_MIPMAP_FULL].size = DT_MIPMAP_FULL;
  cache->mip[DT_MIPMAP_FULL].buf = NULL;

  // same for mipf:
  dt_cache_init(&cache->mip[DT_MIPMAP_F].cache, max_mem_bufs, parallel, 64, max_mem_bufs);
  dt_cache_set_allocate_callback(&cache->mip[DT_MIPMAP_F].cache,
                                 dt_mipmap_cache_allocate_dynamic, &cache->mip[DT_MIPMAP_F]);
  dt_cache_set_cleanup_callback(&cache->mip[DT_MIPMAP_F].cache,
                                dt_mipmap_cache_deallocate_dynamic, &cache->mip[DT_MIPMAP_F]);
  cache->mip[DT_MIPMAP_F].buffer_size = 4*sizeof(uint32_t) +
                                        4*sizeof(float) * cache->mip[DT_MIPMAP_F].max_width * cache->mip[DT_MIPMAP_F].max_height;
  cache->mip[DT_MIPMAP_F].size = DT_MIPMAP_F;
  cache->mip[DT_MIPMAP_F].buf = NULL;

  dt_mipmap_cache_deserialize(cache);
}