NBodyStatus nbInitNBodyStateCL(NBodyState* st, const NBodyCtx* ctx, const CLRequest* clr)
{
    cl_int err;

    /* Bodies must be set before trying to use this */
    if (!st->bodytab)
    {
        return NBODY_CONSISTENCY_ERROR;
    }

    if (ctx->potentialType == EXTERNAL_POTENTIAL_CUSTOM_LUA)
    {
        mw_printf("Cannot use Lua potential with OpenCL\n");
        return NBODY_UNSUPPORTED;
    }

    st->usesQuad = ctx->useQuad;
    st->usesExact = (ctx->criterion == Exact);
    st->usesCL = TRUE;

    st->ci = mwCalloc(1, sizeof(CLInfo));
    st->nbb = mwCalloc(1, sizeof(NBodyBuffers));
    st->workSizes = mwCalloc(1, sizeof(NBodyWorkSizes));
    st->kernels = mwCalloc(1, sizeof(NBodyKernels));

    err = mwSetupCL(st->ci, clr);
    if (err != CL_SUCCESS)
        return NBODY_CL_ERROR;

    if (!nbCheckDevCapabilities(&st->ci->di, ctx, st->nbody))
        return NBODY_CAPABILITY_ERROR;

    if (nbSetThreadCounts(st->workSizes, &st->ci->di, ctx) || nbSetWorkSizes(st->workSizes, &st->ci->di))
        return NBODY_ERROR;

    st->effNBody = nbFindEffectiveNBody(st->workSizes, st->usesExact, st->nbody);

    if (nbLoadKernels(ctx, st))
        return NBODY_CL_ERROR;

    err = nbCreateBuffers(ctx, st);
    if (err != CL_SUCCESS)
        return NBODY_CL_ERROR;

    err = nbSetInitialTreeStatus(st);
    if (err != CL_SUCCESS)
        return NBODY_CL_ERROR;

    err = nbSetAllKernelArguments(st);
    if (err != CL_SUCCESS)
        return NBODY_CL_ERROR;

    err = nbMarshalBodies(st, CL_TRUE);
    if (err != CL_SUCCESS)
        return NBODY_CL_ERROR;

    return NBODY_SUCCESS;
}
NBodyStatus nbInitCL(NBodyState* st, const NBodyCtx* ctx, const CLRequest* clr)
{
    cl_int err;

    st->usesQuad = ctx->useQuad;
    st->usesExact = (ctx->criterion == Exact);
    st->usesCL = TRUE;
    st->useCLCheckpointing = clr->enableCheckpointing;

    st->ci = mwCalloc(1, sizeof(CLInfo));
    st->nbb = mwCalloc(1, sizeof(NBodyBuffers));
    st->workSizes = mwCalloc(1, sizeof(NBodyWorkSizes));
    st->kernels = mwCalloc(1, sizeof(NBodyKernels));

    err = mwSetupCL(st->ci, clr);
    if (err != CL_SUCCESS)
        return NBODY_CL_ERROR;

    return NBODY_SUCCESS;
}
cl_int setupSeparationCL(CLInfo* ci,
                         const AstronomyParameters* ap,
                         const IntegralArea* ias,
                         const CLRequest* clr)
{
    char* compileFlags;
    cl_bool useILKernel;
    cl_int err = MW_CL_ERROR;
    const char* kernSrc = (const char*) probabilities_kernel_cl;
    size_t kernSrcLen = probabilities_kernel_cl_len;

    const char* summarizationKernSrc = (const char*) summarization_kernel_cl;
    size_t summarizationKernSrcLen = summarization_kernel_cl_len;


    err = mwSetupCL(ci, clr);
    if (err != CL_SUCCESS)
    {
        mwPerrorCL(err, "Error getting device and context");
        return err;
    }

    if (!separationCheckDevCapabilities(&ci->di))
    {
        return MW_CL_ERROR;
    }

    useILKernel = usingILKernelIsAcceptable(ci, ap, clr);
    compileFlags = getCompilerFlags(ci, ap, useILKernel);
    if (!compileFlags)
    {
        mw_printf("Failed to get CL compiler flags\n");
        return MW_CL_ERROR;
    }

    if (clr->verbose)
    {
        mw_printf("\nCompiler flags:\n%s\n\n", compileFlags);
    }

    integrationProgram = mwCreateProgramFromSrc(ci, 1, &kernSrc, &kernSrcLen, compileFlags);
    if (!integrationProgram)
    {
        mw_printf("Error creating integral program from source\n");
        err = MW_CL_ERROR;
        goto setup_exit;
    }

    summarizationProgram = mwCreateProgramFromSrc(ci, 1, &summarizationKernSrc, &summarizationKernSrcLen, compileFlags);
    if (!summarizationProgram)
    {
        mw_printf("Error creating summarization program from source\n");
        err = MW_CL_ERROR;
        goto setup_exit;
    }

    if (useILKernel)
    {
        mw_printf("Using AMD IL kernel\n");
        err = setProgramFromILKernel(ci, ap);
        if (err != CL_SUCCESS)
        {
            mw_printf("Failed to create IL kernel. Falling back to source kernel\n");
        }
    }

    if (err == CL_SUCCESS)
    {
        _separationKernel = mwCreateKernel(integrationProgram, "probabilities");
        _summarizationKernel = mwCreateKernel(summarizationProgram, "summarization");
        if (   !_separationKernel
            || !_summarizationKernel
            || setSummarizationWorkgroupSize(ci)
            || !separationCheckDevMemory(&ci->di, ap, ias))
        {
            err = MW_CL_ERROR;
        }
    }


setup_exit:
    free(compileFlags);

    return err;
}
cl_int setupSeparationCL(CLInfo* ci,
                         const AstronomyParameters* ap,
                         const IntegralArea* ias,
                         const CLRequest* clr,
                         cl_int* useImages)
{
    cl_int err;
    char* compileFlags;
    char* kernelSrc;

    err = mwSetupCL(ci, clr);
    if (err != CL_SUCCESS)
    {
        mwCLWarn("Error getting device and context", err);
        return err;
    }

    err = mwGetDevInfo(&ci->di, ci->dev);
    if (err != CL_SUCCESS)
    {
        warn("Failed to get device info\n");
        return err;
    }

    if (clr->verbose)
    {
        mwPrintDevInfo(&ci->di);
    }
    else
    {
        mwPrintDevInfoShort(&ci->di);
    }

    if (!separationCheckDevCapabilities(&ci->di, ap, ias))
    {
        warn("Device failed capability check\n");
        return MW_CL_ERROR;
    }

    *useImages = *useImages && ci->di.imgSupport;

    compileFlags = getCompilerFlags(ap, &ci->di, *useImages);
    if (!compileFlags)
    {
        warn("Failed to get compiler flags\n");
        return MW_CL_ERROR;
    }

    kernelSrc = findKernelSrc();
    if (!kernelSrc)
    {
        warn("Failed to read CL kernel source\n");
        return MW_CL_ERROR;
    }

    warn("\nCompiler flags:\n%s\n\n", compileFlags);
    err = mwSetProgramFromSrc(ci, "mu_sum_kernel", (const char**) &kernelSrc, 1, compileFlags);

    freeKernelSrc(kernelSrc);
    free(compileFlags);

    if (err != CL_SUCCESS)
    {
        mwCLWarn("Error creating program from source", err);
        return err;
    }

    return CL_SUCCESS;
}