Esempio n. 1
0
    // ----------------------------------------------------------------------------
    //  Generates a opacity map from the transfer function specification
    // ----------------------------------------------------------------------------
    void mapOpacity(Image3D<float> & map, std::list<std::shared_ptr<Node>> transfer)
    {
        float samples = static_cast<float>(map.width()) - 1;
        auto buffer = map.data();

        memset(buffer, 0, map.size()*sizeof(float));

        auto iter = transfer.begin();
        auto curr = *iter; 
        iter++;

        while (iter != transfer.end())
        {
            auto next = *iter;

            size_t x1 = static_cast<size_t>(curr->density * samples);
            if (x1 >= map.width()) x1 = map.width()-1;
            size_t x2 = static_cast<size_t>(next->density * samples);
            if (x2 >= map.width()) x2 = map.width()-1;
            float  y1 = curr->material->opticalThickness;
            float  y2 = next->material->opticalThickness;

            for (size_t i = x1; i <= x2; i++)
            {
                float px = i / samples - curr->density;
                float py = next->density - curr->density;
                float part = low( high(px / py, 0.0f), 1.0f );
                buffer[i] = - logf( 1.f - (1.f - part) * y1 - part * y2 );
            }

            curr = next;
            iter++;
        }
    }
Esempio n. 2
0
    // ----------------------------------------------------------------------------
    //  Generates a diffuse map from the transfer function specification
    // ----------------------------------------------------------------------------
    void mapSpecular(Image3D<Vector<UInt8,4>> & map, std::list<std::shared_ptr<Node>> transfer)
    {
        float samples = static_cast<float>(map.width()) - 1;
        auto buffer = map.data();

        memset(buffer, 0, map.size()*sizeof(Vector<UInt8,4>));

        auto iter = transfer.begin();
        auto curr = *iter; 
        iter++;
        
        while (iter != transfer.end())
        {
            auto next = *iter;

            size_t x1 = static_cast<size_t>(curr->density * samples);
            if (x1 >= map.width()) x1 = map.width()-1;
            size_t x2 = static_cast<size_t>(next->density * samples);
            if (x2 >= map.width()) x2 = map.width()-1;                
            Vector3f s1(curr->material->specular);
            Vector3f s2(next->material->specular);
            Vector4f y1(s1[0], s1[1], s1[2], curr->material->glossiness*255.0f);
            Vector4f y2(s2[0], s2[1], s2[2], next->material->glossiness*255.0f);

            for (size_t i = x1; i <= x2; i++)
            {
                float px = i / samples - curr->density;
                float py = next->density - curr->density;
                float part = low( high(px / py, 0.0f), 1.0f );
                buffer[i] = static_cast<Vector<UInt8,4>>(y1*(1.f - part) + y2*part);
            }

            curr = next;
            iter++;
        }
    }
Esempio n. 3
0
    // ----------------------------------------------------------------------------
    //  Generates an emissive map from the tranfer specification
    // ----------------------------------------------------------------------------
    void mapEmissive(Image3D<Vector4f> & map, std::list<std::shared_ptr<Node>> transfer)
    {
        float samples = static_cast<float>(map.width()) - 1;
        auto buffer = map.data();

        memset(buffer, 0, map.size()*sizeof(Vector4f));

        auto iter = transfer.begin();
        auto curr = *iter; 
        iter++;
        
        while (iter != transfer.end())
        {
            auto next = *iter;

            size_t x1 = static_cast<size_t>(curr->density * samples);
            if (x1 >= map.width()) x1 = map.width()-1;
            size_t x2 = static_cast<size_t>(next->density * samples);
            if (x2 >= map.width()) x2 = map.width()-1;                
            Vector3f s1 = Vector3f(curr->material->emissive) / 255.0f * curr->material->emissiveStrength;
            Vector3f s2 = Vector3f(next->material->emissive) / 255.0f * curr->material->emissiveStrength;
            Vector4f y1(s1[0], s1[1], s1[2], 0.0f);
            Vector4f y2(s2[0], s2[1], s2[2], 0.0f);

            for (size_t i = x1; i <= x2; i++)
            {
                float px = i / samples - curr->density;
                float py = next->density - curr->density;
                float part = low( high(px / py, 0.0f), 1.0f );
                buffer[i] = y1*(1.f - part) + y2*part;
            }

            curr = next;
            iter++;
        }
    }
Esempio n. 4
0
/*
 * Kernel event
 */
KernelEvent::KernelEvent(CommandQueue *parent,
                         Kernel *kernel,
                         cl_uint work_dim,
                         const size_t *global_work_offset,
                         const size_t *global_work_size,
                         const size_t *local_work_size,
                         cl_uint num_events_in_wait_list,
                         const cl_event *event_wait_list,
                         cl_int *errcode_ret)
: Event(parent, Queued, num_events_in_wait_list, event_wait_list, errcode_ret),
  p_work_dim(work_dim), p_kernel(kernel), p_timeout_ms(0)
{
    clRetainKernel(desc(p_kernel));

    if (*errcode_ret != CL_SUCCESS) return;

    *errcode_ret = CL_SUCCESS;

    // Sanity checks
    if (!kernel)
    {
        *errcode_ret = CL_INVALID_KERNEL;
        return;
    }

    // Check that the kernel was built for parent's device.
    cl_device_id d_device = 0;
    cl_context k_ctx, q_ctx;
    size_t max_work_group_size;
    cl_uint max_dims = 0;

    *errcode_ret = parent->info(CL_QUEUE_DEVICE, sizeof(cl_device_id), &d_device, 0);

    if (*errcode_ret != CL_SUCCESS)
        return;

    auto device = pobj(d_device);
    *errcode_ret = parent->info(CL_QUEUE_CONTEXT, sizeof(cl_context), &q_ctx, 0);
    *errcode_ret |= kernel->info(CL_KERNEL_CONTEXT, sizeof(cl_context), &k_ctx, 0);
    *errcode_ret |= device->info(CL_DEVICE_MAX_WORK_GROUP_SIZE, sizeof(size_t),
                                &max_work_group_size, 0);
    *errcode_ret |= device->info(CL_DEVICE_MAX_WORK_ITEM_DIMENSIONS, sizeof(size_t),
                                &max_dims, 0);
    *errcode_ret |= device->info(CL_DEVICE_MAX_WORK_ITEM_SIZES,
                                max_dims * sizeof(size_t), p_max_work_item_sizes, 0);

    if (*errcode_ret != CL_SUCCESS)
        return;

    p_dev_kernel = kernel->deviceDependentKernel(device);

    if (!p_dev_kernel)
    {
        *errcode_ret = CL_INVALID_PROGRAM_EXECUTABLE;
        return;
    }

    // Check that contexts match
    if (k_ctx != q_ctx)
    {
        *errcode_ret = CL_INVALID_CONTEXT;
        return;
    }

    // Check args
    if (!kernel->argsSpecified())
    {
        *errcode_ret = CL_INVALID_KERNEL_ARGS;
        return;
    }

    // Check dimension
    if ((work_dim == 0 || work_dim > max_dims) ||
            (max_dims > MAX_WORK_DIMS))
    {
        *errcode_ret = CL_INVALID_WORK_DIMENSION;
        return;
    }

    // Populate work_offset, work_size and local_work_size
    size_t work_group_size = 1;
    cl_uint dims[3];
    kernel->reqdWorkGroupSize(kernel->deviceDependentModule(device), dims);

    uint32_t reqd_x = dims[0];
    uint32_t reqd_y = dims[1];
    uint32_t reqd_z = dims[2];
    bool reqd_any = reqd_x > 0 || reqd_y > 0 || reqd_z > 0;

    if (reqd_any)
    {
        // if __attribute__((reqd_work_group_size(X, Y, Z))) is set and local size not specified
        if (!local_work_size)
        {
            *errcode_ret = CL_INVALID_WORK_GROUP_SIZE;
            return;
        }

        // if __attribute__((reqd_work_group_size(X, Y, Z))) doesn't match
        else 
        {
            if ((                local_work_size[0] != reqd_x) ||
                (work_dim > 1 && local_work_size[1] != reqd_y) ||
                (work_dim > 2 && local_work_size[2] != reqd_z))
            {
                *errcode_ret = CL_INVALID_WORK_GROUP_SIZE;
                return;
            }
        }
    }

    // If kernel has zero arguments
    if (local_work_size && local_work_size[0] == 0) return;

    cl_uint i;
    for (i=0; i<work_dim; ++i)
    {
        if (global_work_offset)
        {
            p_global_work_offset[i] = global_work_offset[i];
        }
        else
        {
            p_global_work_offset[i] = 0;
        }

        if (!global_work_size || !global_work_size[i])
        {
            *errcode_ret = CL_INVALID_GLOBAL_WORK_SIZE;
            return;
        }
        p_global_work_size[i] = global_work_size[i];

        if (!local_work_size)
        {
            // Guess the best value according to the device
            p_local_work_size[i] =
                p_dev_kernel->guessWorkGroupSize(work_dim, i, global_work_size[i]);
        }
        else
        {
            // Check divisibility
            if ((global_work_size[i] % local_work_size[i]) != 0)
            {
                *errcode_ret = CL_INVALID_WORK_GROUP_SIZE;
                return;
            }

            // Not too big ?
            if (local_work_size[i] > p_max_work_item_sizes[i])
            {
                *errcode_ret = CL_INVALID_WORK_ITEM_SIZE;
                return;
            }

            p_local_work_size[i] = local_work_size[i];
            work_group_size *= local_work_size[i];
        }
    }
    // initialize missing dimensions
    for (; i < max_dims; i++)
    {
        p_global_work_offset[i]  = 0;
        p_global_work_size[i]    = 1;
        p_local_work_size[i]     = 1;
    }

    // Check we don't ask too much to the device
    if (work_group_size > max_work_group_size)
    {
        *errcode_ret = CL_INVALID_WORK_GROUP_SIZE;
        return;
    }

    // Check arguments (buffer alignment, image size, ...)
    for (unsigned int i=0; i<kernel->numArgs(); ++i)
    {
        const Kernel::Arg &a = kernel->arg(i);

        if (a.kind() == Kernel::Arg::Buffer && a.file() != Kernel::Arg::Local)
        {
            MemObject *buffer = *(MemObject **)(a.value(0));

            if (!BufferEvent::isSubBufferAligned(buffer, device))
            {
                *errcode_ret = CL_MISALIGNED_SUB_BUFFER_OFFSET;
                return;
            }

            clRetainMemObject(desc(buffer));
            p_mem_objects.push_back((MemObject *) buffer);
        }
        else if (a.kind() == Kernel::Arg::Image2D)
        {
            Image2D *image = *(Image2D **)(a.value(0));
            size_t maxWidth, maxHeight;

            *errcode_ret = device->info(CL_DEVICE_IMAGE2D_MAX_WIDTH,
                                        sizeof(size_t), &maxWidth, 0);
            *errcode_ret |= device->info(CL_DEVICE_IMAGE2D_MAX_HEIGHT,
                                         sizeof(size_t), &maxHeight, 0);

            if (*errcode_ret != CL_SUCCESS)
                return;

            if (image->width() > maxWidth || image->height() > maxHeight)
            {
                *errcode_ret = CL_INVALID_IMAGE_SIZE;
                return;
            }

            clRetainMemObject(desc(image));
            p_mem_objects.push_back((MemObject *) image);
        }
        else if (a.kind() == Kernel::Arg::Image3D)
        {
            Image3D *image = *(Image3D **)a.value(0);
            size_t maxWidth, maxHeight, maxDepth;

            *errcode_ret = device->info(CL_DEVICE_IMAGE3D_MAX_WIDTH,
                                        sizeof(size_t), &maxWidth, 0);
            *errcode_ret |= device->info(CL_DEVICE_IMAGE3D_MAX_HEIGHT,
                                         sizeof(size_t), &maxHeight, 0);
            *errcode_ret |= device->info(CL_DEVICE_IMAGE3D_MAX_DEPTH,
                                         sizeof(size_t), &maxDepth, 0);

            if (*errcode_ret != CL_SUCCESS)
                return;

            if (image->width() > maxWidth || image->height() > maxHeight ||
                image->depth() > maxDepth)
            {
                *errcode_ret = CL_INVALID_IMAGE_SIZE;
                return;
            }

            clRetainMemObject(desc(image));
            p_mem_objects.push_back((MemObject *) image);
        }
    }

    // Check if kernel has timeout specified and CommandQueue allows it
    if (kernel->getTimeout() > 0)
    {
        cl_command_queue_properties queue_props;
        *errcode_ret = parent->info(CL_QUEUE_PROPERTIES,
                                    sizeof(cl_command_queue_properties),
                                    &queue_props, 0);
        if (*errcode_ret != CL_SUCCESS)  return;
        if ((queue_props & CL_QUEUE_KERNEL_TIMEOUT_COMPUTE_UNIT_TI) != 0)
            p_timeout_ms = kernel->getTimeout();
    }
}