Example #1
0
/** Documented at declaration */
void
gpujpeg_image_convert(const char* input, const char* output, struct gpujpeg_image_parameters param_image_from,
                      struct gpujpeg_image_parameters param_image_to)
{
    assert(param_image_from.width == param_image_to.width);
    assert(param_image_from.height == param_image_to.height);
    assert(param_image_from.comp_count == param_image_to.comp_count);

    // Load image
    int image_size = gpujpeg_image_calculate_size(&param_image_from);
    uint8_t* image = NULL;
    if ( gpujpeg_image_load_from_file(input, &image, &image_size) != 0 ) {
        fprintf(stderr, "[GPUJPEG] [Error] Failed to load image [%s]!\n", input);
        return;
    }

    struct gpujpeg_coder coder;
    gpujpeg_set_default_parameters(&coder.param);
    coder.param.color_space_internal = GPUJPEG_RGB;

    // Initialize coder and preprocessor
    coder.param_image = param_image_from;
    assert(gpujpeg_coder_init(&coder) == 0);
    assert(gpujpeg_preprocessor_encoder_init(&coder) == 0);
    // Perform preprocessor
    assert(cudaMemcpy(coder.d_data_raw, image, coder.data_raw_size * sizeof(uint8_t), cudaMemcpyHostToDevice) == cudaSuccess);
    assert(gpujpeg_preprocessor_encode(&coder) == 0);
    // Save preprocessor result
    uint8_t* buffer = NULL;
    assert(cudaMallocHost((void**)&buffer, coder.data_size * sizeof(uint8_t)) == cudaSuccess);
    assert(buffer != NULL);
    assert(cudaMemcpy(buffer, coder.d_data, coder.data_size * sizeof(uint8_t), cudaMemcpyDeviceToHost) == cudaSuccess);
    // Deinitialize decoder
    gpujpeg_coder_deinit(&coder);

    // Initialize coder and postprocessor
    coder.param_image = param_image_to;
    assert(gpujpeg_coder_init(&coder) == 0);
    assert(gpujpeg_preprocessor_decoder_init(&coder) == 0);
    // Perform postprocessor
    assert(cudaMemcpy(coder.d_data, buffer, coder.data_size * sizeof(uint8_t), cudaMemcpyHostToDevice) == cudaSuccess);
    assert(gpujpeg_preprocessor_decode(&coder) == 0);
    // Save preprocessor result
    assert(cudaMemcpy(coder.data_raw, coder.d_data_raw, coder.data_raw_size * sizeof(uint8_t), cudaMemcpyDeviceToHost) == cudaSuccess);
    if ( gpujpeg_image_save_to_file(output, coder.data_raw, coder.data_raw_size) != 0 ) {
        fprintf(stderr, "[GPUJPEG] [Error] Failed to save image [%s]!\n", output);
        return;
    }
    // Deinitialize decoder
    gpujpeg_coder_deinit(&coder);
}
/** Documented at declaration */
int
gpujpeg_decoder_decode(struct gpujpeg_decoder* decoder, uint8_t* image, int image_size, struct gpujpeg_decoder_output* output)
{
    // Get coder
    struct gpujpeg_coder* coder = &decoder->coder;
    
    // Reset durations
    coder->duration_memory_to = 0.0;
    coder->duration_memory_from = 0.0;
    coder->duration_memory_map = 0.0;
    coder->duration_memory_unmap = 0.0;
    coder->duration_preprocessor = 0.0;
    coder->duration_dct_quantization = 0.0;
    coder->duration_huffman_coder = 0.0;
    coder->duration_stream = 0.0;
    coder->duration_in_gpu = 0.0;

    GPUJPEG_CUSTOM_TIMER_START(decoder->def);
    
    // Read JPEG image data
    if ( gpujpeg_reader_read_image(decoder, image, image_size) != 0 ) {
        fprintf(stderr, "[GPUJPEG] [Error] Decoder failed when decoding image data!\n");
        return -1;
    }
    
    GPUJPEG_CUSTOM_TIMER_STOP(decoder->def);
    coder->duration_stream = GPUJPEG_CUSTOM_TIMER_DURATION(decoder->def);
    GPUJPEG_CUSTOM_TIMER_START(decoder->def);
    
    // Perform huffman decoding on CPU (when restart interval is not set)
    if ( coder->param.restart_interval == 0 ) {
        if ( gpujpeg_huffman_cpu_decoder_decode(decoder) != 0 ) {
            fprintf(stderr, "[GPUJPEG] [Error] Huffman decoder failed!\n");
            return -1;
        }
        GPUJPEG_CUSTOM_TIMER_STOP(decoder->def);
        coder->duration_huffman_coder = GPUJPEG_CUSTOM_TIMER_DURATION(decoder->def);
        GPUJPEG_CUSTOM_TIMER_START(decoder->def);

        // Copy quantized data to device memory from cpu memory
        cudaMemcpy(coder->d_data_quantized, coder->data_quantized, coder->data_size * sizeof(int16_t), cudaMemcpyHostToDevice);

        GPUJPEG_CUSTOM_TIMER_STOP(decoder->def);
        coder->duration_memory_to = GPUJPEG_CUSTOM_TIMER_DURATION(decoder->def);
        GPUJPEG_CUSTOM_TIMER_START(decoder->def);

        GPUJPEG_CUSTOM_TIMER_START(decoder->in_gpu);
    }
    // Perform huffman decoding on GPU (when restart interval is set)
    else {
        // Reset huffman output
        cudaMemset(coder->d_data_quantized, 0, coder->data_size * sizeof(int16_t));
        
        // Copy scan data to device memory
        cudaMemcpy(coder->d_data_compressed, coder->data_compressed, decoder->data_compressed_size * sizeof(uint8_t), cudaMemcpyHostToDevice);
        gpujpeg_cuda_check_error("Decoder copy compressed data");
        
        // Copy segments to device memory
        cudaMemcpy(coder->d_segment, coder->segment, decoder->segment_count * sizeof(struct gpujpeg_segment), cudaMemcpyHostToDevice);
        gpujpeg_cuda_check_error("Decoder copy compressed data");
        
        // Zero output memory
        cudaMemset(coder->d_data_quantized, 0, coder->data_size * sizeof(int16_t));
        
        GPUJPEG_CUSTOM_TIMER_STOP(decoder->def);
        coder->duration_memory_to = GPUJPEG_CUSTOM_TIMER_DURATION(decoder->def);
        GPUJPEG_CUSTOM_TIMER_START(decoder->def);

        GPUJPEG_CUSTOM_TIMER_START(decoder->in_gpu);

        // Perform huffman decoding
        if ( gpujpeg_huffman_gpu_decoder_decode(decoder) != 0 ) {
            fprintf(stderr, "[GPUJPEG] [Error] Huffman decoder on GPU failed!\n");
            return -1;
        }

        GPUJPEG_CUSTOM_TIMER_STOP(decoder->def);
        coder->duration_huffman_coder = GPUJPEG_CUSTOM_TIMER_DURATION(decoder->def);
        GPUJPEG_CUSTOM_TIMER_START(decoder->def);
    }
    
    // Perform IDCT and dequantization (own CUDA implementation)
    gpujpeg_idct_gpu(decoder);
    
    GPUJPEG_CUSTOM_TIMER_STOP(decoder->def);
    coder->duration_dct_quantization = GPUJPEG_CUSTOM_TIMER_DURATION(decoder->def);
    GPUJPEG_CUSTOM_TIMER_START(decoder->def);
    
    // Preprocessing
    if ( gpujpeg_preprocessor_decode(&decoder->coder) != 0 )
        return -1;

    GPUJPEG_CUSTOM_TIMER_STOP(decoder->in_gpu);
    coder->duration_in_gpu = GPUJPEG_CUSTOM_TIMER_DURATION(decoder->in_gpu);

    GPUJPEG_CUSTOM_TIMER_STOP(decoder->def);
    coder->duration_preprocessor = GPUJPEG_CUSTOM_TIMER_DURATION(decoder->def);
    
    // Set decompressed image size
    output->data_size = coder->data_raw_size * sizeof(uint8_t);
    
    // Set decompressed image
    if ( output->type == GPUJPEG_DECODER_OUTPUT_INTERNAL_BUFFER ) {
        GPUJPEG_CUSTOM_TIMER_START(decoder->def);

        // Copy decompressed image to host memory
        cudaMemcpy(coder->data_raw, coder->d_data_raw, coder->data_raw_size * sizeof(uint8_t), cudaMemcpyDeviceToHost);
        
        GPUJPEG_CUSTOM_TIMER_STOP(decoder->def);
        coder->duration_memory_from = GPUJPEG_CUSTOM_TIMER_DURATION(decoder->def);

        // Set output to internal buffer
        output->data = coder->data_raw;
    } else if ( output->type == GPUJPEG_DECODER_OUTPUT_CUSTOM_BUFFER ) {
        GPUJPEG_CUSTOM_TIMER_START(decoder->def);
        
        assert(output->data != NULL);

        // Copy decompressed image to host memory
        cudaMemcpy(output->data, coder->d_data_raw, coder->data_raw_size * sizeof(uint8_t), cudaMemcpyDeviceToHost);
        
        GPUJPEG_CUSTOM_TIMER_STOP(decoder->def);
        coder->duration_memory_from = GPUJPEG_CUSTOM_TIMER_DURATION(decoder->def);
    } else if ( output->type == GPUJPEG_DECODER_OUTPUT_OPENGL_TEXTURE ) {
        GPUJPEG_CUSTOM_TIMER_START(decoder->def);

        // Map OpenGL texture
        int data_size = 0;
        uint8_t* d_data = gpujpeg_opengl_texture_map(output->texture, &data_size);
        assert(data_size == coder->data_raw_size);

        GPUJPEG_CUSTOM_TIMER_STOP(decoder->def);
        coder->duration_memory_map = GPUJPEG_CUSTOM_TIMER_DURATION(decoder->def);

        GPUJPEG_CUSTOM_TIMER_START(decoder->def);
            
        // Copy decompressed image to texture pixel buffer object device data
        cudaMemcpy(d_data, coder->d_data_raw, coder->data_raw_size * sizeof(uint8_t), cudaMemcpyDeviceToDevice);

        GPUJPEG_CUSTOM_TIMER_STOP(decoder->def);
        coder->duration_memory_from = GPUJPEG_CUSTOM_TIMER_DURATION(decoder->def);

        GPUJPEG_CUSTOM_TIMER_START(decoder->def);
            
        // Unmap OpenGL texture
        gpujpeg_opengl_texture_unmap(output->texture);

        GPUJPEG_CUSTOM_TIMER_STOP(decoder->def);
        coder->duration_memory_unmap = GPUJPEG_CUSTOM_TIMER_DURATION(decoder->def);
    } else {
        // Unknown output type
        assert(0);
    }

    return 0;
}
Example #3
0
/** Documented at declaration */
int
gpujpeg_decoder_decode(struct gpujpeg_decoder* decoder, uint8_t* image, int image_size, struct gpujpeg_decoder_output* output)
{
    // Get coder
    struct gpujpeg_coder* coder = &decoder->coder;
    
    // Reset durations
    coder->duration_memory_to = 0.0;
    coder->duration_memory_from = 0.0;
    coder->duration_memory_map = 0.0;
    coder->duration_memory_unmap = 0.0;
    coder->duration_preprocessor = 0.0;
    coder->duration_dct_quantization = 0.0;
    coder->duration_huffman_coder = 0.0;
    coder->duration_stream = 0.0;
    coder->duration_in_gpu = 0.0;

    GPUJPEG_CUSTOM_TIMER_START(decoder->def);
    
    // Read JPEG image data
    if ( gpujpeg_reader_read_image(decoder, image, image_size) != 0 ) {
        fprintf(stderr, "[GPUJPEG] [Error] Decoder failed when decoding image data!\n");
        return -1;
    }
    
    GPUJPEG_CUSTOM_TIMER_STOP(decoder->def);
    coder->duration_stream = GPUJPEG_CUSTOM_TIMER_DURATION(decoder->def);
    GPUJPEG_CUSTOM_TIMER_START(decoder->def);
    
    // Perform huffman decoding on CPU (when restart interval is not set)
    if ( coder->param.restart_interval == 0 ) {
        if ( gpujpeg_huffman_cpu_decoder_decode(decoder) != 0 ) {
            fprintf(stderr, "[GPUJPEG] [Error] Huffman decoder failed!\n");
            return -1;
        }
        GPUJPEG_CUSTOM_TIMER_STOP(decoder->def);
        coder->duration_huffman_coder = GPUJPEG_CUSTOM_TIMER_DURATION(decoder->def);
        GPUJPEG_CUSTOM_TIMER_START(decoder->def);

        // Copy quantized data to device memory from cpu memory
        cudaMemcpy(coder->d_data_quantized, coder->data_quantized, coder->data_size * sizeof(int16_t), cudaMemcpyHostToDevice);

        GPUJPEG_CUSTOM_TIMER_STOP(decoder->def);
        coder->duration_memory_to = GPUJPEG_CUSTOM_TIMER_DURATION(decoder->def);
        GPUJPEG_CUSTOM_TIMER_START(decoder->def);

        GPUJPEG_CUSTOM_TIMER_START(decoder->in_gpu);
    }
    // Perform huffman decoding on GPU (when restart interval is set)
    else {
    #ifdef GPUJPEG_HUFFMAN_CODER_TABLES_IN_CONSTANT
        // Copy huffman tables to constant memory
        for ( int comp_type = 0; comp_type < GPUJPEG_COMPONENT_TYPE_COUNT; comp_type++ ) {
            for ( int huff_type = 0; huff_type < GPUJPEG_HUFFMAN_TYPE_COUNT; huff_type++ ) {
                int index = (comp_type * GPUJPEG_HUFFMAN_TYPE_COUNT + huff_type);
                cudaMemcpyToSymbol(
                    (char*)gpujpeg_decoder_table_huffman, 
                    &decoder->table_huffman[comp_type][huff_type], 
                    sizeof(struct gpujpeg_table_huffman_decoder), 
                    index * sizeof(struct gpujpeg_table_huffman_decoder), 
                    cudaMemcpyHostToDevice
                );
            }
        }
        gpujpeg_cuda_check_error("Decoder copy huffman tables to constant memory");
    #endif
    
        // Reset huffman output
        cudaMemset(coder->d_data_quantized, 0, coder->data_size * sizeof(int16_t));
        
        // Copy scan data to device memory
        cudaMemcpy(coder->d_data_compressed, coder->data_compressed, decoder->data_compressed_size * sizeof(uint8_t), cudaMemcpyHostToDevice);
        gpujpeg_cuda_check_error("Decoder copy compressed data");
        
        // Copy segments to device memory
        cudaMemcpy(coder->d_segment, coder->segment, decoder->segment_count * sizeof(struct gpujpeg_segment), cudaMemcpyHostToDevice);
        gpujpeg_cuda_check_error("Decoder copy compressed data");
        
        // Zero output memory
        cudaMemset(coder->d_data_quantized, 0, coder->data_size * sizeof(int16_t));
        
        GPUJPEG_CUSTOM_TIMER_STOP(decoder->def);
        coder->duration_memory_to = GPUJPEG_CUSTOM_TIMER_DURATION(decoder->def);
        GPUJPEG_CUSTOM_TIMER_START(decoder->def);

        GPUJPEG_CUSTOM_TIMER_START(decoder->in_gpu);

        // Perform huffman decoding
        if ( gpujpeg_huffman_gpu_decoder_decode(decoder) != 0 ) {
            fprintf(stderr, "[GPUJPEG] [Error] Huffman decoder on GPU failed!\n");
            return -1;
        }

        GPUJPEG_CUSTOM_TIMER_STOP(decoder->def);
        coder->duration_huffman_coder = GPUJPEG_CUSTOM_TIMER_DURATION(decoder->def);
        GPUJPEG_CUSTOM_TIMER_START(decoder->def);
    }
    
#ifdef GPUJPEG_DCT_FROM_NPP
    // Perform IDCT and dequantization (implementation from NPP)
    for ( int comp = 0; comp < coder->param_image.comp_count; comp++ ) {
        // Get component
        struct gpujpeg_component* component = &coder->component[comp];
                
        // Determine table type
        enum gpujpeg_component_type type = (comp == 0) ? GPUJPEG_COMPONENT_LUMINANCE : GPUJPEG_COMPONENT_CHROMINANCE;
        
        //gpujpeg_component_print16(component, component->d_data_quantized);
        
        cudaMemset(component->d_data, 0, component->data_size * sizeof(uint8_t));
        
        //Perform inverse DCT
        NppiSize inv_roi;
        inv_roi.width = component->data_width * GPUJPEG_BLOCK_SIZE;
        inv_roi.height = component->data_height / GPUJPEG_BLOCK_SIZE;
        assert(GPUJPEG_BLOCK_SIZE == 8);
        NppStatus status = nppiDCTQuantInv8x8LS_JPEG_16s8u_C1R(
            component->d_data_quantized, 
            component->data_width * GPUJPEG_BLOCK_SIZE * sizeof(int16_t), 
            component->d_data, 
            component->data_width * sizeof(uint8_t), 
            decoder->table_quantization[type].d_table, 
            inv_roi
        );
        if ( status != 0 ) {
            fprintf(stderr, "[GPUJPEG] [Error] Inverse DCT failed (error %d)!\n", status);
        }
            
        //gpujpeg_component_print8(component, component->d_data);
    }
#else
    // Perform IDCT and dequantization (own CUDA implementation)
    gpujpeg_idct_gpu(decoder);
    
    // Perform IDCT and dequantization (own CPU implementation)
    // gpujpeg_idct_cpu(decoder);
#endif
    
    GPUJPEG_CUSTOM_TIMER_STOP(decoder->def);
    coder->duration_dct_quantization = GPUJPEG_CUSTOM_TIMER_DURATION(decoder->def);
    GPUJPEG_CUSTOM_TIMER_START(decoder->def);
    
    // Preprocessing
    if ( gpujpeg_preprocessor_decode(&decoder->coder) != 0 )
        return -1;

    GPUJPEG_CUSTOM_TIMER_STOP(decoder->in_gpu);
    coder->duration_in_gpu = GPUJPEG_CUSTOM_TIMER_DURATION(decoder->in_gpu);

    GPUJPEG_CUSTOM_TIMER_STOP(decoder->def);
    coder->duration_preprocessor = GPUJPEG_CUSTOM_TIMER_DURATION(decoder->def);
    
    // Set decompressed image size
    output->data_size = coder->data_raw_size * sizeof(uint8_t);
    
    // Set decompressed image
    if ( output->type == GPUJPEG_DECODER_OUTPUT_INTERNAL_BUFFER ) {
        GPUJPEG_CUSTOM_TIMER_START(decoder->def);

        // Copy decompressed image to host memory
        cudaMemcpy(coder->data_raw, coder->d_data_raw, coder->data_raw_size * sizeof(uint8_t), cudaMemcpyDeviceToHost);
        
        GPUJPEG_CUSTOM_TIMER_STOP(decoder->def);
        coder->duration_memory_from = GPUJPEG_CUSTOM_TIMER_DURATION(decoder->def);

        // Set output to internal buffer
        output->data = coder->data_raw;
    } else if ( output->type == GPUJPEG_DECODER_OUTPUT_CUSTOM_BUFFER ) {
        GPUJPEG_CUSTOM_TIMER_START(decoder->def);
        
        assert(output->data != NULL);

        // Copy decompressed image to host memory
        cudaMemcpy(output->data, coder->d_data_raw, coder->data_raw_size * sizeof(uint8_t), cudaMemcpyDeviceToHost);
        
        GPUJPEG_CUSTOM_TIMER_STOP(decoder->def);
        coder->duration_memory_from = GPUJPEG_CUSTOM_TIMER_DURATION(decoder->def);
    } else if ( output->type == GPUJPEG_DECODER_OUTPUT_OPENGL_TEXTURE ) {
        GPUJPEG_CUSTOM_TIMER_START(decoder->def);

        // Map OpenGL texture
        int data_size = 0;
        uint8_t* d_data = gpujpeg_opengl_texture_map(output->texture, &data_size);
        assert(data_size == coder->data_raw_size);

        GPUJPEG_CUSTOM_TIMER_STOP(decoder->def);
        coder->duration_memory_map = GPUJPEG_CUSTOM_TIMER_DURATION(decoder->def);

        GPUJPEG_CUSTOM_TIMER_START(decoder->def);
            
        // Copy decompressed image to texture pixel buffer object device data
        cudaMemcpy(d_data, coder->d_data_raw, coder->data_raw_size * sizeof(uint8_t), cudaMemcpyDeviceToDevice);

        GPUJPEG_CUSTOM_TIMER_STOP(decoder->def);
        coder->duration_memory_from = GPUJPEG_CUSTOM_TIMER_DURATION(decoder->def);

        GPUJPEG_CUSTOM_TIMER_START(decoder->def);
            
        // Unmap OpenGL texture
        gpujpeg_opengl_texture_unmap(output->texture);

        GPUJPEG_CUSTOM_TIMER_STOP(decoder->def);
        coder->duration_memory_unmap = GPUJPEG_CUSTOM_TIMER_DURATION(decoder->def);
    } else {
        // Unknown output type
        assert(0);
    }

    return 0;
}