int TileManager::get_neighbor_index(int index, int neighbor) { static const int dx[] = {-1, 0, 1, -1, 1, -1, 0, 1, 0}, dy[] = {-1, -1, -1, 0, 0, 1, 1, 1, 0}; int resolution = state.resolution_divider; int image_w = max(1, params.width/resolution); int image_h = max(1, params.height/resolution); int tile_w = (tile_size.x >= image_w)? 1: divide_up(image_w, tile_size.x); int tile_h = (tile_size.y >= image_h)? 1: divide_up(image_h, tile_size.y); int nx = state.tiles[index].x/tile_size.x + dx[neighbor], ny = state.tiles[index].y/tile_size.y + dy[neighbor]; if(nx < 0 || ny < 0 || nx >= tile_w || ny >= tile_h) return -1; return ny*state.tile_stride + nx; }
void DenoisingTask::setup_denoising_buffer() { /* Expand filter_area by radius pixels and clamp the result to the extent of the neighboring tiles */ rect = rect_from_shape(filter_area.x, filter_area.y, filter_area.z, filter_area.w); rect = rect_expand(rect, radius); rect = rect_clip(rect, make_int4(tile_info->x[0], tile_info->y[0], tile_info->x[3], tile_info->y[3])); buffer.passes = 14; buffer.width = rect.z - rect.x; buffer.stride = align_up(buffer.width, 4); buffer.h = rect.w - rect.y; int alignment_floats = divide_up(device->mem_sub_ptr_alignment(), sizeof(float)); buffer.pass_stride = align_up(buffer.stride * buffer.h, alignment_floats); /* Pad the total size by four floats since the SIMD kernels might go a bit over the end. */ int mem_size = align_up(buffer.pass_stride * buffer.passes + 4, alignment_floats); buffer.mem.alloc_to_device(mem_size, false); /* CPUs process shifts sequentially while GPUs process them in parallel. */ int num_layers; if(buffer.gpu_temporary_mem) { /* Shadowing prefiltering uses a radius of 6, so allocate at least that much. */ int max_radius = max(radius, 6); int num_shifts = (2*max_radius + 1) * (2*max_radius + 1); num_layers = 2*num_shifts + 1; } else { num_layers = 3; } /* Allocate two layers per shift as well as one for the weight accumulation. */ buffer.temporary_mem.alloc_to_device(num_layers * buffer.pass_stride); }
/** @brief Compute the number of hash blocks needed * * Does not include empty branches in computation * * @param data_blocks Number of data blocks * @param fanout Tree fanout * @param levels[out] Number of tree levels (not including data level) * @param hash_blocks[out] Number of necessary hash blocks * * @return Non zero value means error, else 0 */ int compute_hash_blocks(uint64_t data_blocks, uint32_t fanout, uint32_t *levels, uint32_t *hash_blocks, uint32_t *blocks_per_level){ *levels = 0; *hash_blocks = 0; uint32_t i = divide_up(data_blocks, fanout); while (i != 1) { blocks_per_level[*levels] = i; *hash_blocks += i; *levels += 1; i = divide_up(i, fanout); } // Top level blocks_per_level[*levels] = 1; *levels += 1; *hash_blocks += 1; if (i == 0) { return -1; } else { return 0; } }
/* If sliced is false, splits image into tiles and assigns equal amount of tiles to every render device. * If sliced is true, slice image into as much pieces as how many devices are rendering this image. */ int TileManager::gen_tiles(bool sliced) { int resolution = state.resolution_divider; int image_w = max(1, params.width/resolution); int image_h = max(1, params.height/resolution); int2 center = make_int2(image_w/2, image_h/2); int num_logical_devices = preserve_tile_device? num_devices: 1; int num = min(image_h, num_logical_devices); int slice_num = sliced? num: 1; int tile_w = (tile_size.x >= image_w) ? 1 : divide_up(image_w, tile_size.x); state.tiles.clear(); state.render_tiles.clear(); state.denoising_tiles.clear(); state.render_tiles.resize(num); state.denoising_tiles.resize(num); state.tile_stride = tile_w; vector<list<int> >::iterator tile_list; tile_list = state.render_tiles.begin(); if(tile_order == TILE_HILBERT_SPIRAL) { assert(!sliced); int tile_h = (tile_size.y >= image_h) ? 1 : divide_up(image_h, tile_size.y); state.tiles.resize(tile_w*tile_h); /* Size of blocks in tiles, must be a power of 2 */ const int hilbert_size = (max(tile_size.x, tile_size.y) <= 12)? 8: 4; int tiles_per_device = divide_up(tile_w * tile_h, num); int cur_device = 0, cur_tiles = 0; int2 block_size = tile_size * make_int2(hilbert_size, hilbert_size); /* Number of blocks to fill the image */ int blocks_x = (block_size.x >= image_w)? 1: divide_up(image_w, block_size.x); int blocks_y = (block_size.y >= image_h)? 1: divide_up(image_h, block_size.y); int n = max(blocks_x, blocks_y) | 0x1; /* Side length of the spiral (must be odd) */ /* Offset of spiral (to keep it centered) */ int2 offset = make_int2((image_w - n*block_size.x)/2, (image_h - n*block_size.y)/2); offset = (offset / tile_size) * tile_size; /* Round to tile border. */ int2 block = make_int2(0, 0); /* Current block */ SpiralDirection prev_dir = DIRECTION_UP, dir = DIRECTION_UP; for(int i = 0;;) { /* Generate the tiles in the current block. */ for(int hilbert_index = 0; hilbert_index < hilbert_size*hilbert_size; hilbert_index++) { int2 tile, hilbert_pos = hilbert_index_to_pos(hilbert_size, hilbert_index); /* Rotate block according to spiral direction. */ if(prev_dir == DIRECTION_UP && dir == DIRECTION_UP) { tile = make_int2(hilbert_pos.y, hilbert_pos.x); } else if(dir == DIRECTION_LEFT || prev_dir == DIRECTION_LEFT) { tile = hilbert_pos; } else if(dir == DIRECTION_DOWN) { tile = make_int2(hilbert_size-1-hilbert_pos.y, hilbert_size-1-hilbert_pos.x); } else { tile = make_int2(hilbert_size-1-hilbert_pos.x, hilbert_size-1-hilbert_pos.y); } int2 pos = block*block_size + tile*tile_size + offset; /* Only add tiles which are in the image (tiles outside of the image can be generated since the spiral is always square). */ if(pos.x >= 0 && pos.y >= 0 && pos.x < image_w && pos.y < image_h) { int w = min(tile_size.x, image_w - pos.x); int h = min(tile_size.y, image_h - pos.y); int2 ipos = pos / tile_size; int idx = ipos.y*tile_w + ipos.x; state.tiles[idx] = Tile(idx, pos.x, pos.y, w, h, cur_device, Tile::RENDER); tile_list->push_front(idx); cur_tiles++; if(cur_tiles == tiles_per_device) { tile_list++; cur_tiles = 0; cur_device++; } } } /* Stop as soon as the spiral has reached the center block. */ if(block.x == (n-1)/2 && block.y == (n-1)/2) break; /* Advance to next block. */ prev_dir = dir; switch(dir) { case DIRECTION_UP: block.y++; if(block.y == (n-i-1)) { dir = DIRECTION_LEFT; } break; case DIRECTION_LEFT: block.x++; if(block.x == (n-i-1)) { dir = DIRECTION_DOWN; } break; case DIRECTION_DOWN: block.y--; if(block.y == i) { dir = DIRECTION_RIGHT; } break; case DIRECTION_RIGHT: block.x--; if(block.x == i+1) { dir = DIRECTION_UP; i++; } break; } } return tile_w*tile_h; } int idx = 0; for(int slice = 0; slice < slice_num; slice++) { int slice_y = (image_h/slice_num)*slice; int slice_h = (slice == slice_num-1)? image_h - slice*(image_h/slice_num): image_h/slice_num; int tile_h = (tile_size.y >= slice_h)? 1: divide_up(slice_h, tile_size.y); int tiles_per_device = divide_up(tile_w * tile_h, num); int cur_device = 0, cur_tiles = 0; for(int tile_y = 0; tile_y < tile_h; tile_y++) { for(int tile_x = 0; tile_x < tile_w; tile_x++, idx++) { int x = tile_x * tile_size.x; int y = tile_y * tile_size.y; int w = (tile_x == tile_w-1)? image_w - x: tile_size.x; int h = (tile_y == tile_h-1)? slice_h - y: tile_size.y; state.tiles.push_back(Tile(idx, x, y + slice_y, w, h, sliced? slice: cur_device, Tile::RENDER)); tile_list->push_back(idx); if(!sliced) { cur_tiles++; if(cur_tiles == tiles_per_device) { /* Tiles are already generated in Bottom-to-Top order, so no sort is necessary in that case. */ if(tile_order != TILE_BOTTOM_TO_TOP) { tile_list->sort(TileComparator(tile_order, center, &state.tiles[0])); } tile_list++; cur_tiles = 0; cur_device++; } } } } if(sliced) { tile_list++; } } return idx; }
/** @brief Compute the optimal number of data blocks to fill disk * * @param blocks Total number of blocks to work with * @param fanout Number of hashes that fit in a hash block * @param data_blocks[out] Number of data blocks writeable * @param hash_blocks[out] Number of blocks needed for hashes * @param jb_blocks[out] Number of blocks needed for journal * @param pad_blocks[out] Number of blocks wasted * @param levels[out] Number of hash block levels (not including data level) * * @return 0 if ok else error */ int compute_block_numbers(uint64_t blocks, uint32_t block_size, uint32_t fanout, uint32_t journal_blocks, uint64_t *data_blocks, uint32_t *hash_blocks, uint32_t *jb_blocks, uint32_t *pad_blocks, uint32_t *levels, uint32_t *blocks_per_level, uint32_t hash_bytes){ if (blocks < 6) { exit_error_f("Not enough space! Need at least 6 blocks!"); return -1; } // Remove one for superblocks blocks = blocks - 1; *pad_blocks = blocks; uint64_t low = 0; uint64_t high = blocks; uint32_t *bpl = (uint32_t*)malloc(sizeof(uint32_t) * DM_MINTEGRITY_MAX_LEVELS); while (high >= low && high != 0) { uint64_t mid = low + divide_up((high - low), 2); // Non overflow method uint64_t db = mid, used = 0; uint32_t hb = 0, jb = 0, pb = 0; uint32_t lev; // Number of hash blocks, levels needed for this many data blocks if (compute_hash_blocks(db, fanout, &lev, &hb, bpl) != 0) { break; // Barf } // Number of jb blocks needed jb = journal_blocks; used = db + jb + hb; pb = blocks - used; // Result is better if (used <= blocks && pb < *pad_blocks) { *data_blocks = db; *hash_blocks = hb; *jb_blocks = jb; *pad_blocks = pb; *levels = lev; for (int i = 0; i < *levels; i++) { blocks_per_level[i] = bpl[i]; } } if (used > blocks) { // Too many - go down high = mid - 1; } else if (used < blocks) { // Not enough - go up low = mid + 1; } else { // Optimal! Wow! break; } } free(bpl); // Failed at first try if (*pad_blocks == blocks) { return -1; } else { return 0; } }