/** Documented at declaration */ int gpujpeg_encoder_encode(struct gpujpeg_encoder* encoder, struct gpujpeg_encoder_input* input, uint8_t** image_compressed, int* image_compressed_size) { // Get coder struct gpujpeg_coder* coder = &encoder->coder; // Reset durations coder->duration_memory_to = 0.0; coder->duration_memory_from = 0.0; coder->duration_memory_map = 0.0; coder->duration_memory_unmap = 0.0; coder->duration_preprocessor = 0.0; coder->duration_dct_quantization = 0.0; coder->duration_huffman_coder = 0.0; coder->duration_stream = 0.0; coder->duration_in_gpu = 0.0; // Load input image if ( input->type == GPUJPEG_ENCODER_INPUT_IMAGE ) { GPUJPEG_CUSTOM_TIMER_START(encoder->def); // Copy image to device memory if ( cudaSuccess != cudaMemcpy(coder->d_data_raw, input->image, coder->data_raw_size * sizeof(uint8_t), cudaMemcpyHostToDevice) ) return -1; GPUJPEG_CUSTOM_TIMER_STOP(encoder->def); coder->duration_memory_to = GPUJPEG_CUSTOM_TIMER_DURATION(encoder->def); } else if ( input->type == GPUJPEG_ENCODER_INPUT_OPENGL_TEXTURE ) { assert(input->texture != NULL); GPUJPEG_CUSTOM_TIMER_START(encoder->def); // Map texture to CUDA int data_size = 0; uint8_t* d_data = gpujpeg_opengl_texture_map(input->texture, &data_size); assert(data_size == (coder->data_raw_size)); GPUJPEG_CUSTOM_TIMER_STOP(encoder->def); coder->duration_memory_map = GPUJPEG_CUSTOM_TIMER_DURATION(encoder->def); GPUJPEG_CUSTOM_TIMER_START(encoder->def); // Copy image data from texture pixel buffer object to device data cudaMemcpy(coder->d_data_raw, d_data, coder->data_raw_size * sizeof(uint8_t), cudaMemcpyDeviceToDevice); GPUJPEG_CUSTOM_TIMER_STOP(encoder->def); coder->duration_memory_to = GPUJPEG_CUSTOM_TIMER_DURATION(encoder->def); GPUJPEG_CUSTOM_TIMER_START(encoder->def); // Unmap texture from CUDA gpujpeg_opengl_texture_unmap(input->texture); GPUJPEG_CUSTOM_TIMER_STOP(encoder->def); coder->duration_memory_unmap = GPUJPEG_CUSTOM_TIMER_DURATION(encoder->def); } else if(input->type == GPUJPEG_ENCODER_INPUT_IMAGE_ON_GPU) { GPUJPEG_CUSTOM_TIMER_START(encoder->def); coder->d_data_raw = input->image; GPUJPEG_CUSTOM_TIMER_STOP(encoder->def); coder->duration_memory_to = GPUJPEG_CUSTOM_TIMER_DURATION(encoder->def); } else { // Unknown output type assert(0); } //gpujpeg_table_print(encoder->table[JPEG_COMPONENT_LUMINANCE]); //gpujpeg_table_print(encoder->table[JPEG_COMPONENT_CHROMINANCE]); GPUJPEG_CUSTOM_TIMER_START(encoder->in_gpu); GPUJPEG_CUSTOM_TIMER_START(encoder->def); // Preprocessing if ( gpujpeg_preprocessor_encode(&encoder->coder) != 0 ) return -1; GPUJPEG_CUSTOM_TIMER_STOP(encoder->def); coder->duration_preprocessor = GPUJPEG_CUSTOM_TIMER_DURATION(encoder->def); GPUJPEG_CUSTOM_TIMER_START(encoder->def); // Perform DCT and quantization if ( gpujpeg_dct_gpu(encoder) != 0 ) return -1; // If restart interval is 0 then the GPU processing is in the end (even huffman coder will be performed on CPU) if ( coder->param.restart_interval == 0 ) { GPUJPEG_CUSTOM_TIMER_STOP(encoder->in_gpu); coder->duration_in_gpu = GPUJPEG_CUSTOM_TIMER_DURATION(encoder->in_gpu); } // Initialize writer output buffer current position encoder->writer->buffer_current = encoder->writer->buffer; // Write header gpujpeg_writer_write_header(encoder); GPUJPEG_CUSTOM_TIMER_STOP(encoder->def); coder->duration_dct_quantization = GPUJPEG_CUSTOM_TIMER_DURATION(encoder->def); GPUJPEG_CUSTOM_TIMER_START(encoder->def); // Perform huffman coding on CPU (when restart interval is not set) if ( coder->param.restart_interval == 0 ) { // Copy quantized data from device memory to cpu memory cudaMemcpy(coder->data_quantized, coder->d_data_quantized, coder->data_size * sizeof(int16_t), cudaMemcpyDeviceToHost); GPUJPEG_CUSTOM_TIMER_STOP(encoder->def); coder->duration_memory_from = GPUJPEG_CUSTOM_TIMER_DURATION(encoder->def); GPUJPEG_CUSTOM_TIMER_START(encoder->def); // Perform huffman coding if ( gpujpeg_huffman_cpu_encoder_encode(encoder) != 0 ) { fprintf(stderr, "[GPUJPEG] [Error] Huffman encoder on CPU failed!\n"); return -1; } GPUJPEG_CUSTOM_TIMER_STOP(encoder->def); coder->duration_huffman_coder = GPUJPEG_CUSTOM_TIMER_DURATION(encoder->def); } // Perform huffman coding on GPU (when restart interval is set) else { // Perform huffman coding unsigned int output_size; if ( gpujpeg_huffman_gpu_encoder_encode(encoder, &output_size) != 0 ) { fprintf(stderr, "[GPUJPEG] [Error] Huffman encoder on GPU failed!\n"); return -1; } GPUJPEG_CUSTOM_TIMER_STOP(encoder->in_gpu); coder->duration_in_gpu = GPUJPEG_CUSTOM_TIMER_DURATION(encoder->in_gpu); GPUJPEG_CUSTOM_TIMER_STOP(encoder->def); coder->duration_huffman_coder = GPUJPEG_CUSTOM_TIMER_DURATION(encoder->def); GPUJPEG_CUSTOM_TIMER_START(encoder->def); // Copy compressed data from device memory to cpu memory if ( cudaSuccess != cudaMemcpy(coder->data_compressed, coder->d_data_compressed, output_size, cudaMemcpyDeviceToHost) != 0 ) return -1; // Copy segments from device memory if ( cudaSuccess != cudaMemcpy(coder->segment, coder->d_segment, coder->segment_count * sizeof(struct gpujpeg_segment), cudaMemcpyDeviceToHost) ) return -1; GPUJPEG_CUSTOM_TIMER_STOP(encoder->def); coder->duration_memory_from = GPUJPEG_CUSTOM_TIMER_DURATION(encoder->def); GPUJPEG_CUSTOM_TIMER_START(encoder->def); if ( coder->param.interleaved == 1 ) { // Write scan header (only one scan is written, that contains all color components data) gpujpeg_writer_write_scan_header(encoder, 0); // Write scan data for ( int segment_index = 0; segment_index < coder->segment_count; segment_index++ ) { struct gpujpeg_segment* segment = &coder->segment[segment_index]; gpujpeg_writer_write_segment_info(encoder); // Copy compressed data to writer memcpy( encoder->writer->buffer_current, &coder->data_compressed[segment->data_compressed_index], segment->data_compressed_size ); encoder->writer->buffer_current += segment->data_compressed_size; //printf("Compressed data %d bytes\n", segment->data_compressed_size); } // Remove last restart marker in scan (is not needed) encoder->writer->buffer_current -= 2; gpujpeg_writer_write_segment_info(encoder); } else { // Write huffman coder results as one scan for each color component int segment_index = 0; for ( int comp = 0; comp < coder->param_image.comp_count; comp++ ) { // Write scan header gpujpeg_writer_write_scan_header(encoder, comp); // Write scan data for ( int index = 0; index < coder->component[comp].segment_count; index++ ) { struct gpujpeg_segment* segment = &coder->segment[segment_index]; gpujpeg_writer_write_segment_info(encoder); // Copy compressed data to writer memcpy( encoder->writer->buffer_current, &coder->data_compressed[segment->data_compressed_index], segment->data_compressed_size ); encoder->writer->buffer_current += segment->data_compressed_size; //printf("Compressed data %d bytes\n", segment->data_compressed_size); segment_index++; } // Remove last restart marker in scan (is not needed) encoder->writer->buffer_current -= 2; gpujpeg_writer_write_segment_info(encoder); } } GPUJPEG_CUSTOM_TIMER_STOP(encoder->def); coder->duration_stream = GPUJPEG_CUSTOM_TIMER_DURATION(encoder->def); } gpujpeg_writer_emit_marker(encoder->writer, GPUJPEG_MARKER_EOI); // Set compressed image *image_compressed = encoder->writer->buffer; *image_compressed_size = encoder->writer->buffer_current - encoder->writer->buffer; return 0; }
/** Documented at declaration */ int gpujpeg_decoder_decode(struct gpujpeg_decoder* decoder, uint8_t* image, int image_size, struct gpujpeg_decoder_output* output) { // Get coder struct gpujpeg_coder* coder = &decoder->coder; // Reset durations coder->duration_memory_to = 0.0; coder->duration_memory_from = 0.0; coder->duration_memory_map = 0.0; coder->duration_memory_unmap = 0.0; coder->duration_preprocessor = 0.0; coder->duration_dct_quantization = 0.0; coder->duration_huffman_coder = 0.0; coder->duration_stream = 0.0; coder->duration_in_gpu = 0.0; GPUJPEG_CUSTOM_TIMER_START(decoder->def); // Read JPEG image data if ( gpujpeg_reader_read_image(decoder, image, image_size) != 0 ) { fprintf(stderr, "[GPUJPEG] [Error] Decoder failed when decoding image data!\n"); return -1; } GPUJPEG_CUSTOM_TIMER_STOP(decoder->def); coder->duration_stream = GPUJPEG_CUSTOM_TIMER_DURATION(decoder->def); GPUJPEG_CUSTOM_TIMER_START(decoder->def); // Perform huffman decoding on CPU (when restart interval is not set) if ( coder->param.restart_interval == 0 ) { if ( gpujpeg_huffman_cpu_decoder_decode(decoder) != 0 ) { fprintf(stderr, "[GPUJPEG] [Error] Huffman decoder failed!\n"); return -1; } GPUJPEG_CUSTOM_TIMER_STOP(decoder->def); coder->duration_huffman_coder = GPUJPEG_CUSTOM_TIMER_DURATION(decoder->def); GPUJPEG_CUSTOM_TIMER_START(decoder->def); // Copy quantized data to device memory from cpu memory cudaMemcpy(coder->d_data_quantized, coder->data_quantized, coder->data_size * sizeof(int16_t), cudaMemcpyHostToDevice); GPUJPEG_CUSTOM_TIMER_STOP(decoder->def); coder->duration_memory_to = GPUJPEG_CUSTOM_TIMER_DURATION(decoder->def); GPUJPEG_CUSTOM_TIMER_START(decoder->def); GPUJPEG_CUSTOM_TIMER_START(decoder->in_gpu); } // Perform huffman decoding on GPU (when restart interval is set) else { // Reset huffman output cudaMemset(coder->d_data_quantized, 0, coder->data_size * sizeof(int16_t)); // Copy scan data to device memory cudaMemcpy(coder->d_data_compressed, coder->data_compressed, decoder->data_compressed_size * sizeof(uint8_t), cudaMemcpyHostToDevice); gpujpeg_cuda_check_error("Decoder copy compressed data"); // Copy segments to device memory cudaMemcpy(coder->d_segment, coder->segment, decoder->segment_count * sizeof(struct gpujpeg_segment), cudaMemcpyHostToDevice); gpujpeg_cuda_check_error("Decoder copy compressed data"); // Zero output memory cudaMemset(coder->d_data_quantized, 0, coder->data_size * sizeof(int16_t)); GPUJPEG_CUSTOM_TIMER_STOP(decoder->def); coder->duration_memory_to = GPUJPEG_CUSTOM_TIMER_DURATION(decoder->def); GPUJPEG_CUSTOM_TIMER_START(decoder->def); GPUJPEG_CUSTOM_TIMER_START(decoder->in_gpu); // Perform huffman decoding if ( gpujpeg_huffman_gpu_decoder_decode(decoder) != 0 ) { fprintf(stderr, "[GPUJPEG] [Error] Huffman decoder on GPU failed!\n"); return -1; } GPUJPEG_CUSTOM_TIMER_STOP(decoder->def); coder->duration_huffman_coder = GPUJPEG_CUSTOM_TIMER_DURATION(decoder->def); GPUJPEG_CUSTOM_TIMER_START(decoder->def); } // Perform IDCT and dequantization (own CUDA implementation) gpujpeg_idct_gpu(decoder); GPUJPEG_CUSTOM_TIMER_STOP(decoder->def); coder->duration_dct_quantization = GPUJPEG_CUSTOM_TIMER_DURATION(decoder->def); GPUJPEG_CUSTOM_TIMER_START(decoder->def); // Preprocessing if ( gpujpeg_preprocessor_decode(&decoder->coder) != 0 ) return -1; GPUJPEG_CUSTOM_TIMER_STOP(decoder->in_gpu); coder->duration_in_gpu = GPUJPEG_CUSTOM_TIMER_DURATION(decoder->in_gpu); GPUJPEG_CUSTOM_TIMER_STOP(decoder->def); coder->duration_preprocessor = GPUJPEG_CUSTOM_TIMER_DURATION(decoder->def); // Set decompressed image size output->data_size = coder->data_raw_size * sizeof(uint8_t); // Set decompressed image if ( output->type == GPUJPEG_DECODER_OUTPUT_INTERNAL_BUFFER ) { GPUJPEG_CUSTOM_TIMER_START(decoder->def); // Copy decompressed image to host memory cudaMemcpy(coder->data_raw, coder->d_data_raw, coder->data_raw_size * sizeof(uint8_t), cudaMemcpyDeviceToHost); GPUJPEG_CUSTOM_TIMER_STOP(decoder->def); coder->duration_memory_from = GPUJPEG_CUSTOM_TIMER_DURATION(decoder->def); // Set output to internal buffer output->data = coder->data_raw; } else if ( output->type == GPUJPEG_DECODER_OUTPUT_CUSTOM_BUFFER ) { GPUJPEG_CUSTOM_TIMER_START(decoder->def); assert(output->data != NULL); // Copy decompressed image to host memory cudaMemcpy(output->data, coder->d_data_raw, coder->data_raw_size * sizeof(uint8_t), cudaMemcpyDeviceToHost); GPUJPEG_CUSTOM_TIMER_STOP(decoder->def); coder->duration_memory_from = GPUJPEG_CUSTOM_TIMER_DURATION(decoder->def); } else if ( output->type == GPUJPEG_DECODER_OUTPUT_OPENGL_TEXTURE ) { GPUJPEG_CUSTOM_TIMER_START(decoder->def); // Map OpenGL texture int data_size = 0; uint8_t* d_data = gpujpeg_opengl_texture_map(output->texture, &data_size); assert(data_size == coder->data_raw_size); GPUJPEG_CUSTOM_TIMER_STOP(decoder->def); coder->duration_memory_map = GPUJPEG_CUSTOM_TIMER_DURATION(decoder->def); GPUJPEG_CUSTOM_TIMER_START(decoder->def); // Copy decompressed image to texture pixel buffer object device data cudaMemcpy(d_data, coder->d_data_raw, coder->data_raw_size * sizeof(uint8_t), cudaMemcpyDeviceToDevice); GPUJPEG_CUSTOM_TIMER_STOP(decoder->def); coder->duration_memory_from = GPUJPEG_CUSTOM_TIMER_DURATION(decoder->def); GPUJPEG_CUSTOM_TIMER_START(decoder->def); // Unmap OpenGL texture gpujpeg_opengl_texture_unmap(output->texture); GPUJPEG_CUSTOM_TIMER_STOP(decoder->def); coder->duration_memory_unmap = GPUJPEG_CUSTOM_TIMER_DURATION(decoder->def); } else { // Unknown output type assert(0); } return 0; }
/** Documented at declaration */ int gpujpeg_decoder_decode(struct gpujpeg_decoder* decoder, uint8_t* image, int image_size, struct gpujpeg_decoder_output* output) { // Get coder struct gpujpeg_coder* coder = &decoder->coder; // Reset durations coder->duration_memory_to = 0.0; coder->duration_memory_from = 0.0; coder->duration_memory_map = 0.0; coder->duration_memory_unmap = 0.0; coder->duration_preprocessor = 0.0; coder->duration_dct_quantization = 0.0; coder->duration_huffman_coder = 0.0; coder->duration_stream = 0.0; coder->duration_in_gpu = 0.0; GPUJPEG_CUSTOM_TIMER_START(decoder->def); // Read JPEG image data if ( gpujpeg_reader_read_image(decoder, image, image_size) != 0 ) { fprintf(stderr, "[GPUJPEG] [Error] Decoder failed when decoding image data!\n"); return -1; } GPUJPEG_CUSTOM_TIMER_STOP(decoder->def); coder->duration_stream = GPUJPEG_CUSTOM_TIMER_DURATION(decoder->def); GPUJPEG_CUSTOM_TIMER_START(decoder->def); // Perform huffman decoding on CPU (when restart interval is not set) if ( coder->param.restart_interval == 0 ) { if ( gpujpeg_huffman_cpu_decoder_decode(decoder) != 0 ) { fprintf(stderr, "[GPUJPEG] [Error] Huffman decoder failed!\n"); return -1; } GPUJPEG_CUSTOM_TIMER_STOP(decoder->def); coder->duration_huffman_coder = GPUJPEG_CUSTOM_TIMER_DURATION(decoder->def); GPUJPEG_CUSTOM_TIMER_START(decoder->def); // Copy quantized data to device memory from cpu memory cudaMemcpy(coder->d_data_quantized, coder->data_quantized, coder->data_size * sizeof(int16_t), cudaMemcpyHostToDevice); GPUJPEG_CUSTOM_TIMER_STOP(decoder->def); coder->duration_memory_to = GPUJPEG_CUSTOM_TIMER_DURATION(decoder->def); GPUJPEG_CUSTOM_TIMER_START(decoder->def); GPUJPEG_CUSTOM_TIMER_START(decoder->in_gpu); } // Perform huffman decoding on GPU (when restart interval is set) else { #ifdef GPUJPEG_HUFFMAN_CODER_TABLES_IN_CONSTANT // Copy huffman tables to constant memory for ( int comp_type = 0; comp_type < GPUJPEG_COMPONENT_TYPE_COUNT; comp_type++ ) { for ( int huff_type = 0; huff_type < GPUJPEG_HUFFMAN_TYPE_COUNT; huff_type++ ) { int index = (comp_type * GPUJPEG_HUFFMAN_TYPE_COUNT + huff_type); cudaMemcpyToSymbol( (char*)gpujpeg_decoder_table_huffman, &decoder->table_huffman[comp_type][huff_type], sizeof(struct gpujpeg_table_huffman_decoder), index * sizeof(struct gpujpeg_table_huffman_decoder), cudaMemcpyHostToDevice ); } } gpujpeg_cuda_check_error("Decoder copy huffman tables to constant memory"); #endif // Reset huffman output cudaMemset(coder->d_data_quantized, 0, coder->data_size * sizeof(int16_t)); // Copy scan data to device memory cudaMemcpy(coder->d_data_compressed, coder->data_compressed, decoder->data_compressed_size * sizeof(uint8_t), cudaMemcpyHostToDevice); gpujpeg_cuda_check_error("Decoder copy compressed data"); // Copy segments to device memory cudaMemcpy(coder->d_segment, coder->segment, decoder->segment_count * sizeof(struct gpujpeg_segment), cudaMemcpyHostToDevice); gpujpeg_cuda_check_error("Decoder copy compressed data"); // Zero output memory cudaMemset(coder->d_data_quantized, 0, coder->data_size * sizeof(int16_t)); GPUJPEG_CUSTOM_TIMER_STOP(decoder->def); coder->duration_memory_to = GPUJPEG_CUSTOM_TIMER_DURATION(decoder->def); GPUJPEG_CUSTOM_TIMER_START(decoder->def); GPUJPEG_CUSTOM_TIMER_START(decoder->in_gpu); // Perform huffman decoding if ( gpujpeg_huffman_gpu_decoder_decode(decoder) != 0 ) { fprintf(stderr, "[GPUJPEG] [Error] Huffman decoder on GPU failed!\n"); return -1; } GPUJPEG_CUSTOM_TIMER_STOP(decoder->def); coder->duration_huffman_coder = GPUJPEG_CUSTOM_TIMER_DURATION(decoder->def); GPUJPEG_CUSTOM_TIMER_START(decoder->def); } #ifdef GPUJPEG_DCT_FROM_NPP // Perform IDCT and dequantization (implementation from NPP) for ( int comp = 0; comp < coder->param_image.comp_count; comp++ ) { // Get component struct gpujpeg_component* component = &coder->component[comp]; // Determine table type enum gpujpeg_component_type type = (comp == 0) ? GPUJPEG_COMPONENT_LUMINANCE : GPUJPEG_COMPONENT_CHROMINANCE; //gpujpeg_component_print16(component, component->d_data_quantized); cudaMemset(component->d_data, 0, component->data_size * sizeof(uint8_t)); //Perform inverse DCT NppiSize inv_roi; inv_roi.width = component->data_width * GPUJPEG_BLOCK_SIZE; inv_roi.height = component->data_height / GPUJPEG_BLOCK_SIZE; assert(GPUJPEG_BLOCK_SIZE == 8); NppStatus status = nppiDCTQuantInv8x8LS_JPEG_16s8u_C1R( component->d_data_quantized, component->data_width * GPUJPEG_BLOCK_SIZE * sizeof(int16_t), component->d_data, component->data_width * sizeof(uint8_t), decoder->table_quantization[type].d_table, inv_roi ); if ( status != 0 ) { fprintf(stderr, "[GPUJPEG] [Error] Inverse DCT failed (error %d)!\n", status); } //gpujpeg_component_print8(component, component->d_data); } #else // Perform IDCT and dequantization (own CUDA implementation) gpujpeg_idct_gpu(decoder); // Perform IDCT and dequantization (own CPU implementation) // gpujpeg_idct_cpu(decoder); #endif GPUJPEG_CUSTOM_TIMER_STOP(decoder->def); coder->duration_dct_quantization = GPUJPEG_CUSTOM_TIMER_DURATION(decoder->def); GPUJPEG_CUSTOM_TIMER_START(decoder->def); // Preprocessing if ( gpujpeg_preprocessor_decode(&decoder->coder) != 0 ) return -1; GPUJPEG_CUSTOM_TIMER_STOP(decoder->in_gpu); coder->duration_in_gpu = GPUJPEG_CUSTOM_TIMER_DURATION(decoder->in_gpu); GPUJPEG_CUSTOM_TIMER_STOP(decoder->def); coder->duration_preprocessor = GPUJPEG_CUSTOM_TIMER_DURATION(decoder->def); // Set decompressed image size output->data_size = coder->data_raw_size * sizeof(uint8_t); // Set decompressed image if ( output->type == GPUJPEG_DECODER_OUTPUT_INTERNAL_BUFFER ) { GPUJPEG_CUSTOM_TIMER_START(decoder->def); // Copy decompressed image to host memory cudaMemcpy(coder->data_raw, coder->d_data_raw, coder->data_raw_size * sizeof(uint8_t), cudaMemcpyDeviceToHost); GPUJPEG_CUSTOM_TIMER_STOP(decoder->def); coder->duration_memory_from = GPUJPEG_CUSTOM_TIMER_DURATION(decoder->def); // Set output to internal buffer output->data = coder->data_raw; } else if ( output->type == GPUJPEG_DECODER_OUTPUT_CUSTOM_BUFFER ) { GPUJPEG_CUSTOM_TIMER_START(decoder->def); assert(output->data != NULL); // Copy decompressed image to host memory cudaMemcpy(output->data, coder->d_data_raw, coder->data_raw_size * sizeof(uint8_t), cudaMemcpyDeviceToHost); GPUJPEG_CUSTOM_TIMER_STOP(decoder->def); coder->duration_memory_from = GPUJPEG_CUSTOM_TIMER_DURATION(decoder->def); } else if ( output->type == GPUJPEG_DECODER_OUTPUT_OPENGL_TEXTURE ) { GPUJPEG_CUSTOM_TIMER_START(decoder->def); // Map OpenGL texture int data_size = 0; uint8_t* d_data = gpujpeg_opengl_texture_map(output->texture, &data_size); assert(data_size == coder->data_raw_size); GPUJPEG_CUSTOM_TIMER_STOP(decoder->def); coder->duration_memory_map = GPUJPEG_CUSTOM_TIMER_DURATION(decoder->def); GPUJPEG_CUSTOM_TIMER_START(decoder->def); // Copy decompressed image to texture pixel buffer object device data cudaMemcpy(d_data, coder->d_data_raw, coder->data_raw_size * sizeof(uint8_t), cudaMemcpyDeviceToDevice); GPUJPEG_CUSTOM_TIMER_STOP(decoder->def); coder->duration_memory_from = GPUJPEG_CUSTOM_TIMER_DURATION(decoder->def); GPUJPEG_CUSTOM_TIMER_START(decoder->def); // Unmap OpenGL texture gpujpeg_opengl_texture_unmap(output->texture); GPUJPEG_CUSTOM_TIMER_STOP(decoder->def); coder->duration_memory_unmap = GPUJPEG_CUSTOM_TIMER_DURATION(decoder->def); } else { // Unknown output type assert(0); } return 0; }