C++ (Cpp) mwDivisible示例

编程语言: C++ (Cpp)

方法/功能: mwDivisible

hotexamples.com的示例: 2

C++ (Cpp) mwDivisible - 已找到2个示例。这些是从开源项目中提取的最受好评的mwDivisible现实C++ (Cpp)示例。您可以评价示例，以帮助我们提高示例质量。

示例#1

显示文件

文件： setup_cl.c 项目： abergstr/milkywayathome_client

/* Returns CL_TRUE on error */
cl_bool findRunSizes(RunSizes* sizes,
                     const CLInfo* ci,
                     const DevInfo* di,
                     const AstronomyParameters* ap,
                     const IntegralArea* ia,
                     const CLRequest* clr)
{
    WGInfo wgi;
    cl_int err;
    size_t nWavefrontPerCU;
    size_t blockSize; /* Size chunks should be multiples of */
    cl_bool forceOneChunk = clr->nonResponsive || di->nonOutput || di->hasGraphicsQOS;

    /* I assume this is how this works for 1D limit */
    const cl_ulong maxWorkDim = (cl_ulong) di->maxWorkItemSizes[0] * di->maxWorkItemSizes[1] * di->maxWorkItemSizes[2];
    const cl_ulong r = (cl_ulong) ia->r_steps;
    const cl_ulong mu = (cl_ulong) ia->mu_steps;

    sizes->r = ia->r_steps;
    sizes->mu = ia->mu_steps;
    sizes->nu = ia->nu_steps;
    sizes->area = r * mu;

    if (r > CL_ULONG_MAX / mu)
    {
        mw_printf("Integral area overflows cl_ulong\n");
        return CL_TRUE;
    }

    if (di->devType == CL_DEVICE_TYPE_CPU)
    {
        sizes->nChunk = sizes->nChunkEstimate = 1;
        sizes->chunkSize = sizes->effectiveArea = sizes->area;
        sizes->extra = 0;

        sizes->local[0] = 1;
        sizes->global[0] = sizes->area;

        return CL_FALSE;
    }

    err = mwGetWorkGroupInfo(_separationKernel, ci, &wgi);
    if (err != CL_SUCCESS)
    {
        mwPerrorCL(err, "Failed to get work group info");
        return CL_TRUE;
    }

    if (clr->verbose)
    {
        mwPrintWorkGroupInfo(&wgi);
    }

    if (!mwDivisible(wgi.wgs, (size_t) di->warpSize))
    {
        mw_printf("Kernel reported work group size ("ZU") not a multiple of warp size (%u)\n",
                  wgi.wgs,
                  di->warpSize);
        return CL_TRUE;
    }

    /* This should give a good occupancy. If the global size isn't a
     * multiple of this bad performance things happen. */
    nWavefrontPerCU = wgi.wgs / di->warpSize;

    /* Since we don't use any workgroup features, it makes sense to
     * use the wavefront size as the workgroup size */
    sizes->local[0] = di->warpSize;

    /* For maximum efficiency, we want global work sizes to be multiples of
     * (warp size) * (number compute units) * (number of warps for good occupancy)
     * Then we throw in another factor since we can realistically do more work at once
     */
    blockSize = nWavefrontPerCU * di->warpSize * di->maxCompUnits;
    {
        cl_uint nBlockPerChunk = 1;
        sizes->nChunkEstimate = findNChunk(ap, ia, di, clr, &sizes->initialWait);

        /* Make a guess appropriate for the hardware. */

        /* m * b ~= area / n   */
        nBlockPerChunk = sizes->area / (sizes->nChunkEstimate * blockSize);
        if (nBlockPerChunk == 0)
            nBlockPerChunk = 1;

        sizes->chunkSize = nBlockPerChunk * blockSize;
    }

    //sizes->effectiveArea = sizes->chunkSize * mwDivRoundup(sizes->area, sizes->chunkSize);
    sizes->effectiveArea = di->warpSize * mwDivRoundup(sizes->area, di->warpSize);
    sizes->nChunk = forceOneChunk ? 1 : mwDivRoundup(sizes->effectiveArea, sizes->chunkSize);
    sizes->extra = (cl_uint) (sizes->effectiveArea - sizes->area);

    if (sizes->nChunk == 1) /* BlockPerChunk factor probably too high or very small workunit, or nonresponsive */
    {
        /* Like using nBlockPerChunk == 1 */
        sizes->effectiveArea = blockSize * mwDivRoundup(sizes->area, blockSize);
        sizes->chunkSize = sizes->effectiveArea;
        sizes->extra = sizes->effectiveArea - sizes->area;
    }

    mw_printf("Using a target frequency of %.1f\n"
              "Using a block size of "ZU" with "ZU" blocks/chunk\n",
              clr->targetFrequency,
              blockSize,
              sizes->chunkSize / blockSize
        );
    printPollMode(ci, sizes);

    sizes->chunkSize = sizes->effectiveArea / sizes->nChunk;

    /* We should be hitting memory size limits before we ever get to this */
    if (sizes->chunkSize > maxWorkDim)
    {
        mw_printf("Warning: Area too large for one chunk (max size = "LLU")\n", maxWorkDim);
        while (sizes->chunkSize > maxWorkDim)
        {
            sizes->nChunk *= 2;
            sizes->chunkSize = sizes->effectiveArea / sizes->nChunk;
        }

        if (!mwDivisible(sizes->chunkSize, sizes->local[0]))
        {
            mw_printf("FIXME: I'm too lazy to handle very large workunits properly\n");
            return CL_TRUE;
        }
        else if (!mwDivisible(sizes->chunkSize, blockSize))
        {
            mw_printf("FIXME: Very large workunit potentially slower than it should be\n");
        }
    }

    sizes->global[0] = sizes->chunkSize;

    printRunSizes(sizes, ia);

    if (sizes->effectiveArea < sizes->area)
    {
        mw_printf("Effective area less than actual area!\n");
        return CL_TRUE;
    }

    return CL_FALSE;
}

示例#2

显示文件

文件： separation_lua.c 项目： Rytiss/native-boinc-for-android

/* Set arguments using the same fit parameters as have been used */
static int luaDefaultSetBGStreamParametersFromArguments(lua_State* luaSt)
{
    int i, ptable, bgTable, streamTable, n, nStream, idx, thisStream;

    ptable = mw_lua_checktable(luaSt, lua_gettop(luaSt));
    n = luaL_getn(luaSt, ptable);  /* Number parameters */

    /* Set background parameters */
    lua_newtable(luaSt);
    bgTable = lua_gettop(luaSt);
    lua_pushvalue(luaSt, bgTable);
    lua_setglobal(luaSt, BACKGROUND_NAME);

    bgTable = lua_gettop(luaSt);

    lua_rawgeti(luaSt, ptable, 1);
    lua_setfield(luaSt, bgTable, "q");

    lua_rawgeti(luaSt, ptable, 2);
    lua_setfield(luaSt, bgTable, "r0");

    /* Not included in fit */
    lua_pushnumber(luaSt, 0.0);
    lua_setfield(luaSt, bgTable, "epsilon");

    lua_pushnumber(luaSt, 1.0);
    lua_setfield(luaSt, bgTable, "alpha");

    lua_pushnumber(luaSt, 1.0);
    lua_setfield(luaSt, bgTable, "delta");

    lua_pop(luaSt, 1);


    /* Set stream parameters */
    lua_newtable(luaSt);
    streamTable = lua_gettop(luaSt);
    lua_pushvalue(luaSt, streamTable);
    lua_setglobal(luaSt, STREAMS_NAME);

    streamTable = lua_gettop(luaSt);

    if (!mwDivisible(n - NUMBER_FIT_BG_PARAMETERS, NUMBER_FIT_STREAM_PARAMETERS))
    {
        return luaL_error(luaSt, "Parameter count (%d) inconsistent with default argument mapping\n", n);
    }

    nStream = (n - NUMBER_FIT_BG_PARAMETERS) / NUMBER_FIT_STREAM_PARAMETERS;

    for (i = 0; i < nStream; ++i)
    {
        lua_newtable(luaSt);
        thisStream = lua_gettop(luaSt);
        idx = NUMBER_FIT_BG_PARAMETERS + i * NUMBER_FIT_STREAM_PARAMETERS;

        lua_rawgeti(luaSt, ptable, idx + 1);
        lua_setfield(luaSt, thisStream, "epsilon");

        lua_rawgeti(luaSt, ptable, idx + 2);
        lua_setfield(luaSt, thisStream, "mu");

        lua_rawgeti(luaSt, ptable, idx + 3);
        lua_setfield(luaSt, thisStream, "r");

        lua_rawgeti(luaSt, ptable, idx + 4);
        lua_setfield(luaSt, thisStream, "theta");

        lua_rawgeti(luaSt, ptable, idx + 5);
        lua_setfield(luaSt, thisStream, "phi");

        lua_rawgeti(luaSt, ptable, idx + 6);
        lua_setfield(luaSt, thisStream, "sigma");

        mw_lua_assert_top_type(luaSt, LUA_TTABLE);

        lua_rawseti(luaSt, streamTable, i + 1);
    }

    lua_pop(luaSt, 1);
    return 0;
}