NBodyStatus nbInitNBodyStateCL(NBodyState* st, const NBodyCtx* ctx, const CLRequest* clr) { cl_int err; /* Bodies must be set before trying to use this */ if (!st->bodytab) { return NBODY_CONSISTENCY_ERROR; } if (ctx->potentialType == EXTERNAL_POTENTIAL_CUSTOM_LUA) { mw_printf("Cannot use Lua potential with OpenCL\n"); return NBODY_UNSUPPORTED; } st->usesQuad = ctx->useQuad; st->usesExact = (ctx->criterion == Exact); st->usesCL = TRUE; st->ci = mwCalloc(1, sizeof(CLInfo)); st->nbb = mwCalloc(1, sizeof(NBodyBuffers)); st->workSizes = mwCalloc(1, sizeof(NBodyWorkSizes)); st->kernels = mwCalloc(1, sizeof(NBodyKernels)); err = mwSetupCL(st->ci, clr); if (err != CL_SUCCESS) return NBODY_CL_ERROR; if (!nbCheckDevCapabilities(&st->ci->di, ctx, st->nbody)) return NBODY_CAPABILITY_ERROR; if (nbSetThreadCounts(st->workSizes, &st->ci->di, ctx) || nbSetWorkSizes(st->workSizes, &st->ci->di)) return NBODY_ERROR; st->effNBody = nbFindEffectiveNBody(st->workSizes, st->usesExact, st->nbody); if (nbLoadKernels(ctx, st)) return NBODY_CL_ERROR; err = nbCreateBuffers(ctx, st); if (err != CL_SUCCESS) return NBODY_CL_ERROR; err = nbSetInitialTreeStatus(st); if (err != CL_SUCCESS) return NBODY_CL_ERROR; err = nbSetAllKernelArguments(st); if (err != CL_SUCCESS) return NBODY_CL_ERROR; err = nbMarshalBodies(st, CL_TRUE); if (err != CL_SUCCESS) return NBODY_CL_ERROR; return NBODY_SUCCESS; }
NBodyStatus nbInitNBodyStateCL(NBodyState* st, const NBodyCtx* ctx) { cl_int err; const DevInfo* devInfo; if (!st->usesCL) { mw_printf("CL not setup for CL state initialization\n"); return NBODY_CONSISTENCY_ERROR; } /* Bodies must be set before trying to use this */ if (!st->bodytab) { mw_printf("Bodies not set for CL state initialization\n"); return NBODY_CONSISTENCY_ERROR; } if (ctx->potentialType == EXTERNAL_POTENTIAL_CUSTOM_LUA) { mw_printf("Cannot use Lua potential with OpenCL\n"); return NBODY_UNSUPPORTED; } devInfo = &st->ci->di; if (!nbCheckDevCapabilities(devInfo, ctx, st->nbody)) return NBODY_CAPABILITY_ERROR; if ( nbSetThreadCounts(st->workSizes, devInfo, ctx) || nbSetWorkSizes(st->workSizes, devInfo, st->nbody, st->ignoreResponsive)) return NBODY_ERROR; st->effNBody = nbFindEffectiveNBody(st->workSizes, st->usesExact, st->nbody); st->maxDepth = nbFindMaxDepthForDevice(devInfo, st->workSizes, ctx->useQuad); st->usesConsistentMemory = (mwIsNvidiaGPUDevice(devInfo) && mwNvidiaInlinePTXAvailable(st->ci->plat)) || mwDeviceHasConsistentMemory(devInfo); if (nbLoadKernels(ctx, st)) return NBODY_CL_ERROR; err = nbCreateBuffers(ctx, st); if (err != CL_SUCCESS) return NBODY_CL_ERROR; err = nbSetInitialTreeStatus(st); if (err != CL_SUCCESS) return NBODY_CL_ERROR; err = nbSetAllKernelArguments(st); if (err != CL_SUCCESS) return NBODY_CL_ERROR; err = nbMarshalBodies(st, CL_TRUE); if (err != CL_SUCCESS) { mw_printf("Error marshalling initial bodies\n"); return NBODY_CL_ERROR; } return NBODY_SUCCESS; }