NBodyStatus nbInitNBodyStateCL(NBodyState* st, const NBodyCtx* ctx, const CLRequest* clr) { cl_int err; /* Bodies must be set before trying to use this */ if (!st->bodytab) { return NBODY_CONSISTENCY_ERROR; } if (ctx->potentialType == EXTERNAL_POTENTIAL_CUSTOM_LUA) { mw_printf("Cannot use Lua potential with OpenCL\n"); return NBODY_UNSUPPORTED; } st->usesQuad = ctx->useQuad; st->usesExact = (ctx->criterion == Exact); st->usesCL = TRUE; st->ci = mwCalloc(1, sizeof(CLInfo)); st->nbb = mwCalloc(1, sizeof(NBodyBuffers)); st->workSizes = mwCalloc(1, sizeof(NBodyWorkSizes)); st->kernels = mwCalloc(1, sizeof(NBodyKernels)); err = mwSetupCL(st->ci, clr); if (err != CL_SUCCESS) return NBODY_CL_ERROR; if (!nbCheckDevCapabilities(&st->ci->di, ctx, st->nbody)) return NBODY_CAPABILITY_ERROR; if (nbSetThreadCounts(st->workSizes, &st->ci->di, ctx) || nbSetWorkSizes(st->workSizes, &st->ci->di)) return NBODY_ERROR; st->effNBody = nbFindEffectiveNBody(st->workSizes, st->usesExact, st->nbody); if (nbLoadKernels(ctx, st)) return NBODY_CL_ERROR; err = nbCreateBuffers(ctx, st); if (err != CL_SUCCESS) return NBODY_CL_ERROR; err = nbSetInitialTreeStatus(st); if (err != CL_SUCCESS) return NBODY_CL_ERROR; err = nbSetAllKernelArguments(st); if (err != CL_SUCCESS) return NBODY_CL_ERROR; err = nbMarshalBodies(st, CL_TRUE); if (err != CL_SUCCESS) return NBODY_CL_ERROR; return NBODY_SUCCESS; }
NBodyStatus nbInitCL(NBodyState* st, const NBodyCtx* ctx, const CLRequest* clr) { cl_int err; st->usesQuad = ctx->useQuad; st->usesExact = (ctx->criterion == Exact); st->usesCL = TRUE; st->useCLCheckpointing = clr->enableCheckpointing; st->ci = mwCalloc(1, sizeof(CLInfo)); st->nbb = mwCalloc(1, sizeof(NBodyBuffers)); st->workSizes = mwCalloc(1, sizeof(NBodyWorkSizes)); st->kernels = mwCalloc(1, sizeof(NBodyKernels)); err = mwSetupCL(st->ci, clr); if (err != CL_SUCCESS) return NBODY_CL_ERROR; return NBODY_SUCCESS; }
cl_int setupSeparationCL(CLInfo* ci, const AstronomyParameters* ap, const IntegralArea* ias, const CLRequest* clr) { char* compileFlags; cl_bool useILKernel; cl_int err = MW_CL_ERROR; const char* kernSrc = (const char*) probabilities_kernel_cl; size_t kernSrcLen = probabilities_kernel_cl_len; const char* summarizationKernSrc = (const char*) summarization_kernel_cl; size_t summarizationKernSrcLen = summarization_kernel_cl_len; err = mwSetupCL(ci, clr); if (err != CL_SUCCESS) { mwPerrorCL(err, "Error getting device and context"); return err; } if (!separationCheckDevCapabilities(&ci->di)) { return MW_CL_ERROR; } useILKernel = usingILKernelIsAcceptable(ci, ap, clr); compileFlags = getCompilerFlags(ci, ap, useILKernel); if (!compileFlags) { mw_printf("Failed to get CL compiler flags\n"); return MW_CL_ERROR; } if (clr->verbose) { mw_printf("\nCompiler flags:\n%s\n\n", compileFlags); } integrationProgram = mwCreateProgramFromSrc(ci, 1, &kernSrc, &kernSrcLen, compileFlags); if (!integrationProgram) { mw_printf("Error creating integral program from source\n"); err = MW_CL_ERROR; goto setup_exit; } summarizationProgram = mwCreateProgramFromSrc(ci, 1, &summarizationKernSrc, &summarizationKernSrcLen, compileFlags); if (!summarizationProgram) { mw_printf("Error creating summarization program from source\n"); err = MW_CL_ERROR; goto setup_exit; } if (useILKernel) { mw_printf("Using AMD IL kernel\n"); err = setProgramFromILKernel(ci, ap); if (err != CL_SUCCESS) { mw_printf("Failed to create IL kernel. Falling back to source kernel\n"); } } if (err == CL_SUCCESS) { _separationKernel = mwCreateKernel(integrationProgram, "probabilities"); _summarizationKernel = mwCreateKernel(summarizationProgram, "summarization"); if ( !_separationKernel || !_summarizationKernel || setSummarizationWorkgroupSize(ci) || !separationCheckDevMemory(&ci->di, ap, ias)) { err = MW_CL_ERROR; } } setup_exit: free(compileFlags); return err; }
cl_int setupSeparationCL(CLInfo* ci, const AstronomyParameters* ap, const IntegralArea* ias, const CLRequest* clr, cl_int* useImages) { cl_int err; char* compileFlags; char* kernelSrc; err = mwSetupCL(ci, clr); if (err != CL_SUCCESS) { mwCLWarn("Error getting device and context", err); return err; } err = mwGetDevInfo(&ci->di, ci->dev); if (err != CL_SUCCESS) { warn("Failed to get device info\n"); return err; } if (clr->verbose) { mwPrintDevInfo(&ci->di); } else { mwPrintDevInfoShort(&ci->di); } if (!separationCheckDevCapabilities(&ci->di, ap, ias)) { warn("Device failed capability check\n"); return MW_CL_ERROR; } *useImages = *useImages && ci->di.imgSupport; compileFlags = getCompilerFlags(ap, &ci->di, *useImages); if (!compileFlags) { warn("Failed to get compiler flags\n"); return MW_CL_ERROR; } kernelSrc = findKernelSrc(); if (!kernelSrc) { warn("Failed to read CL kernel source\n"); return MW_CL_ERROR; } warn("\nCompiler flags:\n%s\n\n", compileFlags); err = mwSetProgramFromSrc(ci, "mu_sum_kernel", (const char**) &kernelSrc, 1, compileFlags); freeKernelSrc(kernelSrc); free(compileFlags); if (err != CL_SUCCESS) { mwCLWarn("Error creating program from source", err); return err; } return CL_SUCCESS; }