static int TrySparseImageCopyPixels(const IceTImage image, IceTSizeType start, IceTSizeType end) { IceTVoid *full_sparse_buffer; IceTSparseImage full_sparse; IceTVoid *compress_sub_buffer; IceTSparseImage compress_sub; IceTVoid *sparse_copy_buffer; IceTSparseImage sparse_copy; IceTSizeType width = icetImageGetWidth(image); IceTSizeType height = icetImageGetHeight(image); IceTSizeType sub_size = end - start; int result; printf("Trying sparse image copy from %d to %d\n", start, end); full_sparse_buffer = malloc(icetSparseImageBufferSize(width, height)); full_sparse = icetSparseImageAssignBuffer(full_sparse_buffer,width,height); compress_sub_buffer = malloc(icetSparseImageBufferSize(sub_size, 1)); compress_sub = icetSparseImageAssignBuffer(compress_sub_buffer, sub_size, 1); sparse_copy_buffer = malloc(icetSparseImageBufferSize(sub_size, 1)); sparse_copy = icetSparseImageAssignBuffer(sparse_copy_buffer, sub_size, 1); icetCompressSubImage(image, start, sub_size, compress_sub); icetCompressImage(image, full_sparse); icetSparseImageCopyPixels(full_sparse, start, sub_size, sparse_copy); result = CompareSparseImages(compress_sub, sparse_copy); free(full_sparse_buffer); free(compress_sub_buffer); free(sparse_copy_buffer); return result; }
static int TestSparseImageSplit(const IceTImage image) { #define NUM_PARTITIONS 7 IceTVoid *full_sparse_buffer; IceTSparseImage full_sparse; IceTVoid *sparse_partition_buffer[NUM_PARTITIONS]; IceTSparseImage sparse_partition[NUM_PARTITIONS]; IceTSizeType offsets[NUM_PARTITIONS]; IceTVoid *compare_sparse_buffer; IceTSparseImage compare_sparse; IceTSizeType width; IceTSizeType height; IceTSizeType num_partition_pixels; IceTInt partition; width = icetImageGetWidth(image); height = icetImageGetHeight(image); num_partition_pixels = icetSparseImageSplitPartitionNumPixels(width*height, NUM_PARTITIONS, NUM_PARTITIONS); full_sparse_buffer = malloc(icetSparseImageBufferSize(width, height)); full_sparse = icetSparseImageAssignBuffer(full_sparse_buffer,width,height); for (partition = 0; partition < NUM_PARTITIONS; partition++) { sparse_partition_buffer[partition] = malloc(icetSparseImageBufferSize(num_partition_pixels, 1)); sparse_partition[partition] = icetSparseImageAssignBuffer(sparse_partition_buffer[partition], num_partition_pixels, 1); } compare_sparse_buffer = malloc(icetSparseImageBufferSize(num_partition_pixels, 1)); compare_sparse = icetSparseImageAssignBuffer(compare_sparse_buffer, num_partition_pixels, 1); icetCompressImage(image, full_sparse); printf("Spliting image %d times\n", NUM_PARTITIONS); icetSparseImageSplit(full_sparse, 0, NUM_PARTITIONS, NUM_PARTITIONS, sparse_partition, offsets); for (partition = 0; partition < NUM_PARTITIONS; partition++) { IceTInt result; icetCompressSubImage(image, offsets[partition], icetSparseImageGetNumPixels( sparse_partition[partition]), compare_sparse); printf(" Comparing partition %d\n", partition); result = CompareSparseImages(compare_sparse, sparse_partition[partition]); if (result != TEST_PASSED) return result; } printf("Spliting image %d times with first partition in place.\n", NUM_PARTITIONS); sparse_partition[0] = full_sparse; icetSparseImageSplit(full_sparse, 0, NUM_PARTITIONS, NUM_PARTITIONS, sparse_partition, offsets); for (partition = 0; partition < NUM_PARTITIONS; partition++) { IceTInt result; icetCompressSubImage(image, offsets[partition], icetSparseImageGetNumPixels( sparse_partition[partition]), compare_sparse); printf(" Comparing partition %d\n", partition); result = CompareSparseImages(compare_sparse, sparse_partition[partition]); if (result != TEST_PASSED) return result; } free(full_sparse_buffer); for (partition = 0; partition < NUM_PARTITIONS; partition++) { free(sparse_partition_buffer[partition]); } free(compare_sparse_buffer); return TEST_PASSED; #undef NUM_PARTITIONS }
static IceTImage splitStrategy(void) { int *tile_groups; int my_tile; int group_size; int fragment_size; GLint rank; GLint num_proc; GLint num_tiles; GLint max_pixels; GLint *tile_contribs; GLint total_image_count; GLint *display_nodes; GLint tile_displayed; GLenum output_buffers; GLint num_contained_tiles; GLint *contained_tiles_list; GLboolean *all_contained_tiles_masks; int tile, image, node; int num_allocated; IceTSparseImage *incoming; IceTSparseImage outgoing; IceTImage imageFragment; IceTImage fullImage; int num_requests; IceTCommRequest *requests; int first_incoming = 1; icetRaiseDebug("In splitStrategy"); icetGetIntegerv(ICET_RANK, &rank); icetGetIntegerv(ICET_NUM_PROCESSES, &num_proc); icetGetIntegerv(ICET_NUM_TILES, &num_tiles); icetGetIntegerv(ICET_TILE_MAX_PIXELS, &max_pixels); tile_contribs = icetUnsafeStateGet(ICET_TILE_CONTRIB_COUNTS); icetGetIntegerv(ICET_TOTAL_IMAGE_COUNT, &total_image_count); display_nodes = icetUnsafeStateGet(ICET_DISPLAY_NODES); icetGetIntegerv(ICET_TILE_DISPLAYED, &tile_displayed); icetGetIntegerv(ICET_NUM_CONTAINED_TILES, &num_contained_tiles); contained_tiles_list = icetUnsafeStateGet(ICET_CONTAINED_TILES_LIST); all_contained_tiles_masks = icetUnsafeStateGet(ICET_ALL_CONTAINED_TILES_MASKS); /* Special case: no images rendered whatsoever. */ if (total_image_count < 1) { icetRaiseDebug("Not rendering any images. Quit early."); if (tile_displayed >= 0) { icetResizeBuffer(icetFullImageSize(max_pixels)); fullImage = icetReserveBufferMem(icetFullImageSize(max_pixels)); icetInitializeImage(fullImage, max_pixels); icetClearImage(fullImage); } else { fullImage = NULL; } return fullImage; } tile_groups = malloc(sizeof(int)*(num_tiles+1)); num_allocated = 0; tile_groups[0] = 0; /* Set entry of tile_groups[i+1] to the number of processes to help compose the image in tile i. */ for (tile = 0; tile < num_tiles; tile++) { int allocate = (tile_contribs[tile]*num_proc)/total_image_count; if ((allocate < 1) && (tile_contribs[tile] > 0)) { allocate = 1; } tile_groups[tile+1] = allocate; num_allocated += allocate; } /* Make the number of processes allocated equal exactly the number of processes available. */ while (num_allocated < num_proc) { /* Add processes to the tile with the lowest process:image ratio. */ int min_id = -1; float min_ratio = (float)num_proc; for (tile = 0; tile < num_tiles; tile++) { float ratio; /* Don't even consider tiles with no contributors. */ if (tile_contribs[tile] == 0) continue; ratio = (float)tile_groups[tile+1]/tile_contribs[tile]; if (ratio < min_ratio) { min_ratio = ratio; min_id = tile; } } #ifdef DEBUG if (min_id < 0) { icetRaiseError("Could not find candidate to add tile.", ICET_SANITY_CHECK_FAIL); } #endif tile_groups[min_id+1]++; num_allocated++; } while (num_allocated > num_proc) { /* Remove processes from the tile with the highest process:image ratio. */ int max_id = -1; float max_ratio = 0; for (tile = 0; tile < num_tiles; tile++) { float ratio; /* Don't even consider tiles with a minimum allocation. */ if (tile_groups[tile+1] <= 1) continue; ratio = (float)tile_groups[tile+1]/tile_contribs[tile]; if (ratio > max_ratio) { max_ratio = ratio; max_id = tile; } } #ifdef DEBUG if (max_id < 0) { icetRaiseError("Could not find candidate to remove tile.", ICET_SANITY_CHECK_FAIL); } #endif tile_groups[max_id+1]--; num_allocated--; } /* Processes are assigned sequentially from 0 to N to each tile as needed. Change each tile_groups[i] entry to be the lowest rank of the processes assigned to tile i. Thus the processes assigned to tile i are tile_groups[i] through tile_groups[i+1]-1. */ for (tile = 1; tile < num_tiles; tile++) { tile_groups[tile] += tile_groups[tile-1]; } tile_groups[num_tiles] = num_proc; /* Figure out which tile I am assigned to. */ for (my_tile = 0; rank >= tile_groups[my_tile+1]; my_tile++); group_size = tile_groups[my_tile+1] - tile_groups[my_tile]; fragment_size = max_pixels/group_size; num_requests = tile_contribs[my_tile]; if (num_requests < 2) num_requests = 2; icetResizeBuffer( sizeof(IceTSparseImage)*tile_contribs[my_tile] + icetFullImageSize(fragment_size) + icetSparseImageSize(max_pixels) + icetFullImageSize(max_pixels) + icetSparseImageSize(fragment_size)*tile_contribs[my_tile] + sizeof(IceTCommRequest)*num_requests); incoming = icetReserveBufferMem(sizeof(IceTSparseImage)*tile_contribs[my_tile]); outgoing = icetReserveBufferMem(icetSparseImageSize(max_pixels)); imageFragment = icetReserveBufferMem(icetFullImageSize(fragment_size)); fullImage = icetReserveBufferMem(icetFullImageSize(max_pixels)); requests = icetReserveBufferMem(sizeof(IceTCommRequest)*num_requests); /* Set up asynchronous receives for all incoming image fragments. */ /* for (image = 0; image < tile_contribs[my_tile]; image++) { */ /* incoming[image] */ /* = icetReserveBufferMem(icetSparseImageSize(fragment_size)); */ /* MPI_Irecv(incoming[image], icetSparseImageSize(fragment_size), */ /* MPI_BYTE, MPI_ANY_SOURCE, IMAGE_DATA, */ /* icetGetCommunicator(), requests + image); */ /* } */ for (image = 0, node = 0; image < tile_contribs[my_tile]; node++) { if (all_contained_tiles_masks[node*num_tiles + my_tile]) { icetRaiseDebug1("Setting up receive from node %d", node); incoming[image] = icetReserveBufferMem(icetSparseImageSize(fragment_size)); requests[image] = ICET_COMM_IRECV(incoming[image], icetSparseImageSize(fragment_size), ICET_BYTE, node, IMAGE_DATA); image++; } } /* Render and send all tile images I am rendering. */ for (image = 0; image < num_contained_tiles; image++) { int sending_frag_size; int compressedSize; GLuint offset; tile = contained_tiles_list[image]; icetGetTileImage(tile, fullImage); icetRaiseDebug1("Got image for tile %d", tile); offset = 0; sending_frag_size = max_pixels/(tile_groups[tile+1]-tile_groups[tile]); for (node = tile_groups[tile]; node < tile_groups[tile+1]; node++) { icetRaiseDebug2("Sending tile %d to node %d", tile, node); compressedSize = icetCompressSubImage(fullImage, offset, sending_frag_size, outgoing); icetAddSentBytes(compressedSize); ICET_COMM_SEND(outgoing, compressedSize, ICET_BYTE, node, IMAGE_DATA); offset += sending_frag_size; } } /* Wait for images to come in and Z compare them. */ for (image = 0; image < tile_contribs[my_tile]; image++) { int idx; idx = ICET_COMM_WAITANY(tile_contribs[my_tile], requests); if (first_incoming) { icetRaiseDebug1("Got first image (%d).", idx); icetDecompressImage(incoming[idx], imageFragment); first_incoming = 0; } else { icetRaiseDebug1("Got subsequent image (%d).", idx); icetCompressedComposite(imageFragment, incoming[idx], 1); } } /* Send composited fragment to display process. */ icetGetIntegerv(ICET_OUTPUT_BUFFERS, (GLint *)&output_buffers); if ((output_buffers & ICET_COLOR_BUFFER_BIT) != 0) { icetAddSentBytes(4*fragment_size); requests[0] = ICET_COMM_ISEND(icetGetImageColorBuffer(imageFragment), 4*fragment_size, ICET_BYTE, display_nodes[my_tile], COLOR_DATA); } if ((output_buffers & ICET_DEPTH_BUFFER_BIT) != 0) { icetAddSentBytes(4*fragment_size); requests[1] = ICET_COMM_ISEND(icetGetImageDepthBuffer(imageFragment), fragment_size, ICET_INT, display_nodes[my_tile], DEPTH_DATA); } /* If I am displaying a tile, receive image data. */ if (tile_displayed >= 0) { icetInitializeImage(fullImage, max_pixels); /* Check to make sure tile is not blank. */ if (tile_groups[tile_displayed+1] > tile_groups[tile_displayed]) { int my_frag_size = max_pixels/( tile_groups[tile_displayed+1] - tile_groups[tile_displayed]); if ((output_buffers & ICET_COLOR_BUFFER_BIT) != 0) { GLubyte *cb = icetGetImageColorBuffer(fullImage); for (node = tile_groups[tile_displayed]; node < tile_groups[tile_displayed+1]; node++) { icetRaiseDebug1("Getting final color fragment from %d", node); ICET_COMM_RECV(cb, 4*my_frag_size, ICET_BYTE, node, COLOR_DATA); cb += 4*my_frag_size; } } if ((output_buffers & ICET_DEPTH_BUFFER_BIT) != 0) { GLuint *db = icetGetImageDepthBuffer(fullImage); for (node = tile_groups[tile_displayed]; node < tile_groups[tile_displayed+1]; node++) { icetRaiseDebug1("Getting final depth fragment from %d", node); ICET_COMM_RECV(db, my_frag_size, ICET_INT, node, DEPTH_DATA); db += my_frag_size; } } } else { icetClearImage(fullImage); } } if ((output_buffers & ICET_COLOR_BUFFER_BIT) != 0) { ICET_COMM_WAIT(requests); } if ((output_buffers & ICET_DEPTH_BUFFER_BIT) != 0) { ICET_COMM_WAIT(requests + 1); } free(tile_groups); return fullImage; }