Ejemplo n.º 1
0
/* Use one of the faster functions if available */
static void probabilityFunctionDispatch(const AstronomyParameters* ap, const CLRequest* clr)
{
    int hasSSE2 = FALSE, hasSSE3 = FALSE;
    int useSSE2 = FALSE, useSSE3 = FALSE;

    getSSELevelSupport(&hasSSE2, &hasSSE3);

    if (clr->verbose)
    {
        warn("CPU features: SSE2 = %d, SSE3 = %d\n"
             "Forcing: SSE2 = %d, SSE3 = %d, x87 = %d\n",
             hasSSE2, hasSSE3,
             clr->forceSSE2, clr->forceSSE3, clr->forceX87);
    }

    if (!clr->forceSSE2 && !clr->forceSSE3 && !clr->forceX87)
    {
        /* Default to using highest capability if not forcing anything */
        useSSE3 = hasSSE3;
        useSSE2 = hasSSE2;
    }
    else if (clr->forceSSE2)
    {
        useSSE2 = TRUE;
    }
    else if (clr->forceSSE3)
    {
        useSSE3 = TRUE;
    }
    else if (clr->forceX87)
    {
      #if MW_IS_X86_32
        useSSE2 = useSSE3 = FALSE;
      #elif MW_IS_X86_64
        useSSE2 = TRUE;  /* Ignore flag */
      #endif
    }

    if (useSSE3)  /* Precedence to higher level */
    {
        warn("Using SSE3 path\n");
        probabilityFunc = initProbabilities_SSE3(ap, clr->forceNoIntrinsics);
    }
    else if (useSSE2)
    {
        warn("Using SSE2 path\n");
        probabilityFunc = initProbabilities_SSE2(ap, clr->forceNoIntrinsics);
    }
    else
    {
      #if !MW_NO_X87_EVER
        warn("Using x87 path\n");
        probabilityFunc = initProbabilities(ap, clr->forceNoIntrinsics);
      #else
        mw_unreachable();
      #endif
    }

    if (!probabilityFunc)
    {
        mw_panic("Probability function not set!:\n"
                 "  Has SSE2             = %d\n"
                 "  Has SSE3             = %d\n"
                 "  Forced SSE3          = %d\n"
                 "  Forced SSE2          = %d\n"
                 "  Forced x87           = %d\n"
                 "  Forced no intrinsics = %d\n"
                 "  Arch                 = %s\n",
                 hasSSE2, hasSSE3,
                 clr->forceSSE3, clr->forceSSE2, clr->forceX87, clr->forceNoIntrinsics,
                 ARCH_STRING);
    }
}
/* Different UAV IDs are used for different GPUs, and it must match */
static char* getILSrc(int nStream, MWCALtargetEnum target, cl_int uavGuess, size_t* len)
{
    char* ilSrc = NULL;
    cl_int u = (uavGuess >= 0) ? uavGuess : mwUAVIdFromMWCALtargetEnum(target);

    /* Should we be checking which UAV is used from the binary? */
    if (u > 99)
    {
        /* We rely on the size of a 2 digit UAV id being the same as the '%d' format,
           but there are only a few UAVs anyway
         */
        mw_printf("UAV id %u is absurd\n", u);
        return NULL;
    }

    /* This is pretty special */
    switch (nStream)
    {
        case 1:   /* 9 */
            ilSrc = replaceUAVIds(ilKernelSrc1, len, u, u, u, u, u, u, u, u, u);
            break;

        case 2:  /* 11 */
            ilSrc = replaceUAVIds(ilKernelSrc2, len, u, u, u, u, u, u, u, u, u, u, u);
            break;

        case 3:  /* 13 */
            ilSrc = replaceUAVIds(ilKernelSrc3, len, u, u, u, u, u, u, u, u, u, u, u, u, u);
            break;

        case 4:  /* 15 */
            ilSrc = replaceUAVIds(ilKernelSrc4, len, u, u, u, u, u, u, u, u, u, u, u, u, u, u, u);
            break;

        default:
            mw_unreachable();
    }

    if (!ilSrc)
    {
        mw_printf("Error getting processed IL kernel source\n");
    }
    else
    {
        if (target < MW_CAL_TARGET_CYPRESS)
        {
            char* dclPos;

            /* Stupid hack.

               The OpenCL compiler generates a dcl_arena_uav_id(8) on Evergreen and later,
               but it's then unused. This isn't supported on R700, and the build
               fails if it's there. On Evergreen, if we remove the declaration the build is OK,
               but then when it silently fails / runs instantly.
               There is probably a better way of dealing with this.

               We could probably avoid this and a lot of this othero
               junk if we reverse engineered the .rodata section, but
               I'm too lazy to do that.

               Remove this declaration by finding the line and overwriting with spaces.
            */
            dclPos = strstr(ilSrc, "dcl_arena_uav_id(8)\n");
            if (dclPos)
            {
                while (*dclPos != '\n')
                {
                    *dclPos++ = ' ';
                }
            }
        }
    }

    return ilSrc;
}