예제 #1
0
    /// Enqueues a command to copy data from \p src_image to \p dst_image.
    ///
    /// \see_opencl_ref{clEnqueueCopyImage}
    event enqueue_copy_image(const image3d &src_image,
                             const image3d &dst_image,
                             const size_t src_origin[3],
                             const size_t dst_origin[3],
                             const size_t region[3],
                             const wait_list &events = wait_list())
    {
        BOOST_ASSERT(m_queue != 0);
        BOOST_ASSERT(src_image.get_context() == this->get_context());
        BOOST_ASSERT(dst_image.get_context() == this->get_context());
        BOOST_ASSERT_MSG(src_image.get_format() == dst_image.get_format(),
                         "Source and destination image formats must match.");

        event event_;

        cl_int ret = clEnqueueCopyImage(
            m_queue,
            src_image.get(),
            dst_image.get(),
            src_origin,
            dst_origin,
            region,
            events.size(),
            events.get_event_ptr(),
            &event_.get()
        );

        if(ret != CL_SUCCESS){
            BOOST_THROW_EXCEPTION(opencl_error(ret));
        }

        return event_;
    }
예제 #2
0
/**
 * \brief ocl::Image::copyTo Copies from this Image to the destination Image.
 *
 * The operation assumes that all data are valid and no synchronization is necessary (active Queue executes in-order).
 * The operation forces that all commands within the active Queue including this one are completed.
 *
 * \param src_origin is the 3D offset in bytes from which the Image is read.
 * \param region is the 3D region of the data. It is given with {image_width, image_height, image_depth}.
 * \param dest is the Image into which the data is going to be copied.
 * \param dest_origin is the 3D offset in bytes from which the destionation Image is read.
 */
void ocl::Image::copyTo(size_t *src_origin, const size_t *region, const Image & dest, size_t *dest_origin, const EventList & list ) const
{
    TRUE_ASSERT(this->context() == dest.context(), "Context of this and dest must be equal");
    TRUE_ASSERT(this->id() != dest.id(), "Images must not be equal this->id() " << this->id() << "; other.id " << dest.id());
    OPENCL_SAFE_CALL( clEnqueueCopyImage(this->activeQueue().id(), this->id(), dest.id(), src_origin, dest_origin, region, list.size(), list.events().data(), NULL) );
    OPENCL_SAFE_CALL( clFinish(this->activeQueue().id()) );
}
void timedImageCLCopy( cl_command_queue queue,
                       cl_mem srcImg,
                       cl_mem dstImg )
{
    CPerfCounter t1;
    cl_int ret;
    cl_event ev;

    t1.Start();

    ret = clEnqueueCopyImage( queue,
                              srcImg,
                              dstImg,
                              imageOrigin,
                              imageOrigin,
                              imageRegion,
                              0, NULL,
                              &ev );
    ASSERT_CL_RETURN( ret );

    clFlush( queue );
    spinForEventsComplete( 1, &ev );

    t1.Stop();

    tlog->Timer( "%32s  %lf s   %8.2lf GB/s\n", "clEnqueueCopyImage():", t1.GetElapsedTime(), nBytesRegion, 1 );
}
예제 #4
0
	void OpenCLImage::copyFrom(OpenCLImage &srcImage, size_t *pSrcOrigin, size_t *pDstOrigin, size_t *pRegion) {
		if(pSrcOrigin == NULL) pSrcOrigin = origin;
		if(pDstOrigin == NULL) pDstOrigin = origin;
		if(pRegion == NULL) pRegion = region;
		
		cl_int err = clEnqueueCopyImage(pOpenCL->getQueue(), srcImage.getCLMem(), clMemObject, pSrcOrigin, pDstOrigin, pRegion, 0, NULL, NULL);
		assert(err == CL_SUCCESS);
	}
예제 #5
0
파일: opencl.c 프로젝트: YongHaoWu/wine-hub
cl_int WINAPI wine_clEnqueueCopyImage(cl_command_queue command_queue, cl_mem src_image, cl_mem dst_image,
                                      size_t * src_origin, size_t * dst_origin, size_t * region,
                                      cl_uint num_events_in_wait_list, cl_event * event_wait_list, cl_event * event)
{
    cl_int ret;
    TRACE("\n");
    ret = clEnqueueCopyImage(command_queue, src_image, dst_image, src_origin, dst_origin, region, num_events_in_wait_list, event_wait_list, event);
    return ret;
}
예제 #6
0
/**
 * \brief ocl::Image::copyToAsync Copies asynchronously from this Image to the destination Image.
 *
 * \param queue is a command queue on which the command is executed.
 * \param src_origin is the 3D offset in bytes from which the Image is read.
 * \param region is the 3D region of the data. It is given with {image_width, image_height, image_depth}.
 * \param dest is the Image into which the data is going to be copied.
 * \param dest_origin is the 3D offset in bytes from which the destionation Image is read.
 * \param list contains all events for which this command has to wait.
 * \return event which can be integrated into other EventList.
 */
ocl::Event ocl::Image::copyToAsync(const Queue &queue, size_t *src_origin, const size_t *region, const Image &dest, size_t *dest_origin, const EventList &list)
{
    TRUE_ASSERT(this->context() == dest.context(), "Context of this and dest must be equal");
    TRUE_ASSERT(queue.context() == *this->context(), "Context of queue and this must be equal");
    cl_event event_id;
    OPENCL_SAFE_CALL( clEnqueueCopyImage(queue.id(), this->id(), dest.id(),
                                         src_origin, dest_origin, region, list.size(),
                                         list.events().data(), &event_id) );
    return ocl::Event(event_id, this->context());
}
예제 #7
0
/**
 * \brief ocl::Image::copyToAsync Copies asynchronously from this Image to the destination Image.
 *
 * \param src_origin is the 3D offset in bytes from which the Image is read.
 * \param region is the 3D region of the data. It is given with {image_width, image_height, image_depth}.
 * \param dest is the Image into which the data is going to be copied.
 * \param dest_origin is the 3D offset in bytes from which the destionation Image is read.
 * \param list contains all events for which this command has to wait.
 * \return event which can be integrated into other EventList.
 */
ocl::Event ocl::Image::copyToAsync(size_t *src_origin, const size_t *region, const Image &dest, size_t *dest_origin, const EventList &list)
{
    TRUE_ASSERT(this->context() == dest.context(), "Context of this and dest must be equal");
    TRUE_ASSERT(this->id() != dest.id(), "Images must not be equal this->id() " << this->id() << "; other.id " << dest.id());
    cl_event event_id;
    OPENCL_SAFE_CALL( clEnqueueCopyImage(this->activeQueue().id(), this->id(), dest.id(),
                                         src_origin, dest_origin, region, list.size(),
                                         list.events().data(), &event_id) );
    return ocl::Event(event_id, this->context());
}
예제 #8
0
static int neighbor_opencl_filter_frame(AVFilterLink *inlink, AVFrame *input)
{
    AVFilterContext *avctx = inlink->dst;
    AVFilterLink *outlink = avctx->outputs[0];
    NeighborOpenCLContext *ctx = avctx->priv;
    AVFrame *output = NULL;
    cl_int cle;
    size_t global_work[2];
    cl_mem src, dst;
    int err, p;
    size_t origin[3] = {0, 0, 0};
    size_t region[3] = {0, 0, 1};

    av_log(ctx, AV_LOG_DEBUG, "Filter input: %s, %ux%u (%"PRId64").\n",
           av_get_pix_fmt_name(input->format),
           input->width, input->height, input->pts);

    if (!input->hw_frames_ctx)
        return AVERROR(EINVAL);

    if (!ctx->initialised) {
        err = neighbor_opencl_init(avctx);
        if (err < 0)
            goto fail;

        err = neighbor_opencl_make_filter_params(avctx);
        if (err < 0)
            goto fail;

    }

    output = ff_get_video_buffer(outlink, outlink->w, outlink->h);
    if (!output) {
        err = AVERROR(ENOMEM);
        goto fail;
    }

    for (p = 0; p < FF_ARRAY_ELEMS(output->data); p++) {
        src = (cl_mem) input->data[p];
        dst = (cl_mem)output->data[p];

        if (!dst)
            break;

        if (ctx->threshold[p] == 0) {
            err = ff_opencl_filter_work_size_from_image(avctx, region, output, p, 0);
            if (err < 0)
                goto fail;

            cle = clEnqueueCopyImage(ctx->command_queue, src, dst,
                                     origin, origin, region, 0, NULL, NULL);
            CL_FAIL_ON_ERROR(AVERROR(EIO), "Failed to copy plane %d: %d.\n",
                             p, cle);
        } else {
            CL_SET_KERNEL_ARG(ctx->kernel, 0, cl_mem,   &dst);
            CL_SET_KERNEL_ARG(ctx->kernel, 1, cl_mem,   &src);
            CL_SET_KERNEL_ARG(ctx->kernel, 2, cl_float, &ctx->threshold[p]);
            CL_SET_KERNEL_ARG(ctx->kernel, 3, cl_mem,   &ctx->coord);

            err = ff_opencl_filter_work_size_from_image(avctx, global_work, output, p, 0);
            if (err < 0)
                goto fail;

            av_log(avctx, AV_LOG_DEBUG, "Run kernel on plane %d "
                   "(%"SIZE_SPECIFIER"x%"SIZE_SPECIFIER").\n",
                   p, global_work[0], global_work[1]);

            cle = clEnqueueNDRangeKernel(ctx->command_queue, ctx->kernel, 2, NULL,
                                         global_work, NULL,
                                         0, NULL, NULL);
            CL_FAIL_ON_ERROR(AVERROR(EIO), "Failed to enqueue "
                             "kernel: %d.\n", cle);
        }
    }

    cle = clFinish(ctx->command_queue);
    CL_FAIL_ON_ERROR(AVERROR(EIO), "Failed to finish command queue: %d.\n", cle);

    err = av_frame_copy_props(output, input);
    if (err < 0)
        goto fail;

    av_frame_free(&input);

    av_log(ctx, AV_LOG_DEBUG, "Filter output: %s, %ux%u (%"PRId64").\n",
           av_get_pix_fmt_name(output->format),
           output->width, output->height, output->pts);

    return ff_filter_frame(outlink, output);

fail:
    clFinish(ctx->command_queue);
    av_frame_free(&input);
    av_frame_free(&output);
    return err;
}
예제 #9
0
END_TEST

START_TEST (test_read_write_image)
{
    cl_platform_id platform = 0;
    cl_device_id device;
    cl_context ctx;
    cl_command_queue queue;
    cl_mem image2d, part2d;
    cl_int result;

    unsigned char image2d_data_24bpp[3*3*4] = {
        255, 0, 0, 0,       0, 255, 0, 0,       128, 128, 128, 0,
        0, 0, 255, 0,       255, 255, 0, 0,     0, 128, 0, 0,
        255, 128, 0, 0,     128, 0, 255, 0,     0, 0, 0, 0
    };

    unsigned char image2d_part_24bpp[2*2*4] = {
        255, 0, 0, 0,       0, 255, 0, 0,
        0, 0, 255, 0,       255, 255, 0, 0
    };

    unsigned char image2d_buffer[3*3*4];
    unsigned char image2d_part[2*2*4];

    cl_image_format fmt;

    fmt.image_channel_data_type = CL_UNORM_INT8;
    fmt.image_channel_order = CL_RGBA;

    size_t origin[3] = {0, 0, 0};
    size_t region[3] = {3, 3, 1};

    result = clGetDeviceIDs(platform, CL_DEVICE_TYPE_DEFAULT, 1, &device, 0);
    fail_if(
        result != CL_SUCCESS,
        "unable to get the default device"
    );

    ctx = clCreateContext(0, 1, &device, 0, 0, &result);
    fail_if(
        result != CL_SUCCESS || ctx == 0,
        "unable to create a valid context"
    );

    queue = clCreateCommandQueue(ctx, device, 0, &result);
    fail_if(
        result != CL_SUCCESS || queue == 0,
        "cannot create a command queue"
    );

    image2d = clCreateImage2D(ctx, CL_MEM_READ_ONLY | CL_MEM_USE_HOST_PTR, &fmt,
                              3, 3, 0, image2d_buffer, &result);
    fail_if(
        result != CL_SUCCESS || image2d == 0,
        "cannot create a valid 3x3 image2D"
    );

    part2d = clCreateImage2D(ctx, CL_MEM_READ_ONLY | CL_MEM_USE_HOST_PTR, &fmt,
                             2, 2, 0, image2d_part, &result);
    fail_if(
        result != CL_SUCCESS || image2d == 0,
        "cannot create a valid 2x2 image2D"
    );

    // Write data in buffer
    result = clEnqueueWriteImage(queue, image2d, 1, origin, region, 0, 0,
                                 image2d_data_24bpp, 0, 0, 0);
    fail_if(
        result != CL_SUCCESS,
        "cannot enqueue a blocking write image event"
    );

    // Read it back
    region[0] = 2;
    region[1] = 2;

    result = clEnqueueReadImage(queue, image2d, 1, origin, region, 0, 0,
                                image2d_part, 0, 0, 0);
    fail_if(
        result != CL_SUCCESS,
        "cannot enqueue a blocking read image event"
    );

    // Compare
    fail_if(
        std::memcmp(image2d_part, image2d_part_24bpp, sizeof(image2d_part)) != 0,
        "reading and writing images doesn't produce the correct result"
    );

    // Read it back using a buffer
    cl_event event;
    std::memset(image2d_part, 0, sizeof(image2d_part));

    result = clEnqueueCopyImage(queue, image2d, part2d, origin, origin,
                                region, 0, 0, &event);
    fail_if(
        result != CL_SUCCESS,
        "unable to enqueue a copy image event"
    );

    result = clWaitForEvents(1, &event);
    fail_if(
        result != CL_SUCCESS,
        "unable to wait for events"
    );

    // Compare
    fail_if(
        std::memcmp(image2d_part, image2d_part_24bpp, sizeof(image2d_part)) != 0,
        "copying images doesn't produce the correct result"
    );

    clReleaseEvent(event);
    clReleaseMemObject(part2d);
    clReleaseMemObject(image2d);
    clReleaseCommandQueue(queue);
    clReleaseContext(ctx);
}
/**
 * \related cl_Mem_Object_t
 *
 * This function copy content of one OpenCL Image memory object into another.
 * @param[in,out] self  pointer to structure, in which 'Copy' function pointer
 * is defined to point on this function.
 * @param[out] dest pointer to another Memory Object structure, where the data
 * from 'self' will be copied to.
 * @param[in] blocking_flag flag, that denotes, should operation be blocking or not.
 * @param[in] time_mode enumeration, that denotes how time measurement should be
 * performed.
 * @param[out] evt_to_generate pointer to OpenCL event that will be generated
 * at the end of operation.
 *
 * @return CL_SUCCESS in case of success, error code of type 'ret_code' otherwise.
 *
 * @see cl_err_codes.h for detailed error description.
 * @see 'cl_Error_t' structure for error handling.
 */
static ret_code Image_Copy(
    scow_Mem_Object         *self, 
    scow_Mem_Object         *dest,
    cl_bool                 blocking_flag, 
    TIME_STUDY_MODE         time_mode,
    cl_event                *evt_to_generate, 
    cl_command_queue        explicit_queue)
{
    cl_int ret = CL_SUCCESS;

    cl_event copy_ready, *p_copy_ready = (cl_event*) 0x0;

    const size_t origin[3] =
    { 0, 0, 0 }, region[3] =
    { self->width, self->height, 1 };

    if (self->obj_mem_type != dest->obj_mem_type)
    {
        return DISTINCT_MEM_OBJECTS;
    }

    // If src & dest are the same, no need to copy at all, just reset timer
    if (self == dest)
    {
        self->timer->current_time_device = 0;
        self->timer->total_time_device += self->timer->current_time_device;

        return CL_SUCCESS;
    }

    (evt_to_generate != NULL) ?
            (p_copy_ready = evt_to_generate) : (p_copy_ready = &copy_ready);

    OCL_CHECK_EXISTENCE(self, INVALID_BUFFER_GIVEN);
    OCL_CHECK_EXISTENCE(dest, INVALID_BUFFER_GIVEN);

    // Can't copy bigger image into smaller one
    if ((self->row_pitch > dest->row_pitch) || (self->height > dest->height)
            || (self->width > dest->width))
    {
        return INVALID_BUFFER_SIZE;
    }

    cl_command_queue q =
            (explicit_queue == NULL) ?
                    (self->parent_thread->q_data_dtod) : (explicit_queue);

    ret = clEnqueueCopyImage(q, self->cl_mem_object, dest->cl_mem_object,
            origin, origin, region, 0, NULL, p_copy_ready);

    OCL_DIE_ON_ERROR(ret, CL_SUCCESS, NULL, ret);

    switch (time_mode)
    {
    case MEASURE:
        self->timer->current_time_device = Gather_Time_uS(p_copy_ready);
        self->timer->total_time_device += self->timer->current_time_device;
        break;

    case DONT_MEASURE:
        break;

    default:
        break;
    }

    if (p_copy_ready != evt_to_generate){
        clReleaseEvent(*p_copy_ready);
    }

    return ret;
}