Beispiel #1
0
void TypeCompiler::generateConstructor(FunctionLiteral *function,
                                       ConstructorInfo *constructor)
{
    currentMethod          = constructor;
    currentFunctionLiteral = function;

    CompilationUnit *cunit = cls->pkgDecl->compilationUnit;

    CodeState codeState;
    FuncState funcState;

    initCodeState(&codeState, &funcState, cunit->filename);

    cs = &codeState;

    parList(function, true);

    declareLocalVariables(function);

    visitStatementArray(function->statements);

    closeCodeState(&codeState);

    constructor->setByteCode(generateByteCode(funcState.f));

    currentMethod = NULL;
}
Beispiel #2
0
void TypeCompiler::functionBody(ExpDesc *e, FunctionLiteral *flit, int line)
{
    FuncState new_fs;

    BC::openFunction(cs, &new_fs);

    new_fs.f->linedefined = line;

    parList(flit, false);

    // setup closure info here so it is captured as an upvalue
    char funcinfo[256];
    snprintf(funcinfo, 250, "__ls_funcinfo_numargs_%i", flit->childIndex);
    ExpDesc finfo;
    BC::singleVar(cs, &finfo, funcinfo);

    declareLocalVariables(flit);

    chunk(flit->statements);

    new_fs.f->lastlinedefined = flit->lineNumber;

    BC::closeFunction(cs);

    BC::pushClosure(cs, &new_fs, e);
}
Beispiel #3
0
void TypeCompiler::generateMethod(FunctionLiteral *function,
                                  MethodInfo      *method)
{
    currentMethod          = method;
    currentMethodCoroutine = function->isCoroutine;
    currentFunctionLiteral = function;

    CompilationUnit *cunit = cls->pkgDecl->compilationUnit;

    CodeState codeState;
    FuncState funcState;

    initCodeState(&codeState, &funcState, cunit->filename);

    cs = &codeState;

    parList(function, !function->isStatic);

    declareLocalVariables(function);

    // we insert a yield to account for argument passing
    if (function->isCoroutine)
    {
        ExpDesc yield;
        insertYield(&yield);
    }

    visitStatementArray(function->statements);

    closeCodeState(&codeState);

    method->setByteCode(generateByteCode(funcState.f));

    currentMethod          = NULL;
    currentMethodCoroutine = false;
}
Beispiel #4
0
static ssize_t
generator(
   char *buf,
   size_t buflen,
   const struct SubproblemDim *subdims,
   const struct PGranularity *pgran,
   void *extra)
{
    char tmp[1024];
    struct KgenContext *ctx;
    ssize_t ret;
    CLBLASKernExtra *kextra = (CLBLASKernExtra*)extra;
    DataType dtype = kextra->dtype;
    KernelExtraFlags kflags = kextra->flags;
    CLBLASKernExtra extraNew;
    BlasGenSettings gset;
    TileMulOpts mulOpts;
    const char *ptrName;
    UpdateResultFlags upFlags = 0;
    TilePostFetchPrivate pfPriv;
    unsigned int l1Pans;
    bool b;
    Tile parTile;
    TrsmExtraParams *extraParams = (TrsmExtraParams *)kextra->solverPriv;
    int ldsLarge, lds_diagonal;
    bool isInline;
    TileSet tileSet;
    char copy2LDSFuncName[FUNC_NAME_MAXLEN];
    TailStatus tailStatus = 0;
    FetchAddrMode addrMode = 0;
    bool tailM = ((kflags & KEXTRA_TAILS_M) != 0);
    bool tailN = ((kflags & KEXTRA_TAILS_N) != 0);
    size_t alignK;

    if (pgran->wgDim != 1) {
        return -EINVAL;
    }

    l1Pans = (unsigned int)(subdims[0].x / subdims[1].x);

    memset(&gset, 0, sizeof(gset));
    gset.flags = BGF_WHOLE_A | BGF_EXPLICIT_INLINE | BGF_UPTRS;
    memcpy(gset.subdims, subdims, sizeof(SubproblemDim) * 2);
    // there is not need in block structure along K
    gset.subdims[0].bwidth = gset.subdims[1].bwidth;
    subdims = gset.subdims;

    /*
     * Since tiles are changed dynamically, e. g. in the main tilemul
     * loop they are rectangular, but at the second stage both A and B
     * tile storages are used for square tiles. One must adjust physical
     * vectorization accordindly, so as vector length might not be
     * greater than linear size of any tile
     */
    memcpy(&extraNew, kextra, sizeof(extraNew));
    extraNew.vecLenA = umin(kextra->vecLenA, (unsigned int)subdims[1].y);
    extraNew.vecLenB = umin(kextra->vecLenB, (unsigned int)subdims[1].y);

    gset.pgran = pgran;
    gset.kextra = &extraNew;
    initKernelVarNames(&gset.varNames);

    // multiplication options
    mulOpts.memA = CLMEM_GLOBAL_MEMORY;
    mulOpts.memB = CLMEM_GLOBAL_MEMORY;
    mulOpts.core = (kextra->flags & KEXTRA_ENABLE_MAD) ? TILEMUL_MAD :
                                                         TILEMUL_MULADD;
    mulOpts.postFetch = NULL;
    mulOpts.flags = kextraToTilemulFlags(CLBLAS_TRSM, kflags);
    mulOpts.flags |= TILEMUL_EXTERN_RDECL | TILEMUL_NOT_INC_K;
    mulOpts.fctx = createFetchContext();
    if (mulOpts.fctx == NULL) {
        return -ENOMEM;
    }

    disableFetchOptLevels(mulOpts.fctx, FOPTLEV_TMP_COORD_PRECOMPUTING);

    isInline = (gset.flags & BGF_EXPLICIT_INLINE);

    initTiles(&gset, &tileSet, subdims, kflags, dtype,
              PRIV_STORAGE_VARIABLE_SET);

    ctx = createKgenContext(buf, buflen, true);
    if (ctx == NULL) {
        destroyFetchContext(mulOpts.fctx);
        return -ENOMEM;
    }

    kgenAddStmt(ctx, "#pragma OPENCL EXTENSION cl_amd_printf : enable\n\n");

    b = isDoubleBasedType(dtype);
    kgenDeclareUptrs(ctx, b);
    if (isComplexType(dtype)) {
        genComplexMathOperators(ctx, dtype);
    }
    if(!isInline) {
        genTileInverting(ctx, &gset, &tileSet);
    }

    if ( extraParams->ldsUse != LDS_NO_USE ) {
        SubproblemDim sdims;
        DBlockCopyFlags flags;
        unsigned int vecLen;

        if (!isMatrixAccessColMaj(CLBLAS_TRSM, kflags, MATRIX_B)) {
            sdims.x = gset.subdims[1].bwidth * extraParams->unrollingFactor;
            sdims.y = gset.subdims[0].x;
        }
        else {
            sdims.x = gset.subdims[0].x;
            sdims.y = gset.subdims[1].bwidth * extraParams->unrollingFactor;
        }

        vecLen = getVecLen(&gset, CLBLAS_TRSM, MATRIX_B);
        flags = (vecLen < 4) ? DBLOCK_COPY_NOT_VECTORIZE : 0;
        copyDataBlockGen(ctx, &sdims, gset.pgran, dtype,
                         DBLOCK_GLOBAL_TO_LOCAL, flags);
        kgenAddBlankLine(ctx);
        kgenGetLastFuncName(copy2LDSFuncName, FUNC_NAME_MAXLEN, ctx);
    }

    declareTrxmKernel(ctx, dtype, pgran, kflags, CLBLAS_TRSM, "Cached", false,
                      true);
    kgenBeginFuncBody(ctx);

    declareLocalVariables(ctx, &gset, &parTile, extraParams);
    if (kflags & KEXTRA_A_OFF_NOT_ZERO) {
        kgenAddStmt(ctx, "A += offA;\n");
    }
    genTrxmBMatrShift(ctx, kflags, false);

    ptrName = dtypeUPtrField(dtype);

    sprintf(tmp, "uB.%s = B;\n\n", ptrName);
    kgenAddStmt(ctx, tmp);

    // external loop
    sprintf(tmp, "for (m0 = 0; m0 < M; m0 += %lu)", subdims[0].y);
    kgenBeginBranch(ctx, tmp);
    genZeroTile(ctx, &gset.tileCY);
    genSetupCoords(ctx, &gset, BLOCK_UPDATE);

    kgenAddStmt(ctx, "// Stage 1. Multiply and update with large blocks\n");

    gset.tileA = tileSet.rectA;
    gset.tileBX = tileSet.origB;

    if (!isMatrixUpper(kflags) && tailM) {
        addrMode |= FETCH_ADDR_A_CYCLICAL;
        setFetchAddrMode(mulOpts.fctx, addrMode);
    }

    ldsLarge = ((extraParams->ldsUse & LDS_USE_LARGE) != 0);
    alignK = subdims[1].bwidth;
    if (ldsLarge) {
        alignK *= extraParams->unrollingFactor;
    }

    if (ldsLarge) {
        const char *oldCoordB;
        FetchAddrMode bamode = addrMode | FETCH_ADDR_K_RELATIVE;
        bool withSkew;

        withSkew = useSkewedFetchB(&gset);
        if (!withSkew) {
            bamode |= FETCH_ADDR_B_RELATIVE;
        }
        else {
            bamode |= FETCH_ADDR_B_CYCLICAL;
        }

        setFetchAddrMode(mulOpts.fctx, bamode);

        if (tailN) {
            /*
             * Conditional branch for those items which hit into
             * matrix B with their matrix coordinates
             */
            sprintf(tmp, "if ((gid + 1) * %lu < N)", subdims[0].x);
            kgenBeginBranch(ctx, tmp);
        }

        if (isMatrixAccessColMaj(CLBLAS_TRSM, kflags, MATRIX_A)) {
            kgenPrintf(ctx, "uA.%s = A + k0 * lda;\n", ptrName);
        }
        else {
            kgenPrintf(ctx, "uA.%s = A + k0;\n", ptrName);
        }

        if (withSkew) {
            unsigned int bwidthOld;

            oldCoordB = gset.varNames.coordB;
            gset.varNames.coordB = "skewX";
            bwidthOld = gset.subdims[0].bwidth;
            gset.subdims[0].bwidth = (parTile.trans) ? parTile.nrRows :
                                                       parTile.nrCols;
            gset.subdims[0].bwidth = bwidthOld;
        }

        genInternalLoopCtl(ctx, subdims, kflags, alignK, alignK);
        genPreloadedTileMul(ctx, &gset, &mulOpts, &parTile, copy2LDSFuncName);
        genInternalLoopEnd(ctx);                             // loop over K

        if (withSkew) {
            gset.varNames.coordB = oldCoordB;
            setFetchAddrMode(mulOpts.fctx, bamode & ~FETCH_ADDR_B_CYCLICAL);
            // deliver from skew in the result before proceed to the next stage
            genTileCyclicalShift(ctx, &gset);
        }

        if (tailN) {
            kgenEndBranch(ctx, NULL);
            kgenBeginBranch(ctx, "else");
        }

        setFetchAddrMode(mulOpts.fctx, addrMode);
    }

    if (!ldsLarge || tailN) {
        genCheckShiftTailB(ctx, &gset, 0, &tailStatus);
        if ((kflags & KEXTRA_TAILS_N_LOWER) && !tailStatus) {
            addrMode |= FETCH_ADDR_B_CYCLICAL;
            setFetchAddrMode(mulOpts.fctx, addrMode);
        }

        if (tailN) {
            sprintfHitMatrixCond(tmp, MATRIX_B, "if (", ")");
            kgenBeginBranch(ctx, tmp);
        }

        genInternalLoopCtl(ctx, subdims, kflags, subdims[1].bwidth, alignK);
        tileMulGen(ctx, &gset, &mulOpts);
        genInternalLoopEnd(ctx);                             // loop over K

        if (tailN) {
            kgenEndBranch(ctx, NULL);
        }

        if (extraParams->ldsUse & LDS_USE_LARGE) {
            kgenEndBranch(ctx, NULL);
        }
    }

    sprintf(tmp, "uA.%s = A;\n\n", ptrName);
    kgenAddStmt(ctx, tmp);

    // processing tails along update dimension
    if (isMatrixUpper(kflags) &&
        ((kflags & KEXTRA_TAILS_K_LOWER) ||
          (ldsLarge && extraParams->unrolledTail))) {

        unsigned int tailChunks;

        tailChunks = (extraParams->ldsUse & LDS_USE_LARGE) ?
            extraParams->unrolledTail : 1;

        if (tailN) {
            char hitCond[1024];

            sprintfHitMatrixCond(hitCond, MATRIX_B, "(", ")");
            sprintf(tmp, "if ((currM + %lu < M) && %s)",
                    subdims[0].y, hitCond);
        }
        else {
            sprintf(tmp, "if (currM + %lu < M)", subdims[0].y);
        }
        kgenBeginBranch(ctx, tmp);

        if (kflags & KEXTRA_TAILS_K_LOWER) {
            setFetchAddrMode(mulOpts.fctx, addrMode | FETCH_ADDR_K_CYCLICAL);
            setFetchHandler(&mulOpts, &gset, defaultTilePostFetch, &pfPriv);
        }
        if (tailChunks > 1) {
            mulOpts.flags &= ~TILEMUL_NOT_INC_K;
            sprintf(tmp, "for (uint k1 = 0; k1 < %u; k1++)", tailChunks);
            kgenBeginBranch(ctx, tmp);
        }

		addrMode |= FETCH_ADDR_B_CYCLICAL;
        setFetchAddrMode(mulOpts.fctx, addrMode);
        tileMulGen(ctx, &gset, &mulOpts);
        if (tailChunks > 1) {
            kgenEndBranch(ctx, NULL);
            mulOpts.flags |= TILEMUL_NOT_INC_K;
        }

        kgenEndBranch(ctx, NULL);
    }

    gset.tileA = tileSet.squareA;

    kgenAddStmt(ctx, "\n/*\n"
                     " * Stage 2. A part of work items multiply got result on "
                     "a respective\n"
                     " * inverted diagonal block, and the remaining ones wait. "
                     "Then they perform\n"
                     " * one step of further intermediate result evaluation as "
                     "multiplying tile by tile.\n"
                     " * It continues until the whole panel of the "
                     "matrix A is processed\n"
                     " */\n");

    // one must deal further with square blocks strictly
    gset.subdims[0].bwidth = gset.subdims[1].bwidth = gset.subdims[1].y;

    sprintf(tmp, "for (m1 = 0; m1 < %lu; m1++)", subdims[0].y / subdims[1].y);
    kgenBeginBranch(ctx, tmp);

    if (extraParams->ldsUse & LDS_USE_DIAGONAL) {
        sprintf(tmp, "const int bid = lid %% %u;\n\n",
                l1Pans);
        kgenAddStmt(ctx, tmp);
    }

    /*
     * Update the intermediate result multiply on the inverted diagonal tile,
     * and write back
     */
    genSetupCoords(ctx, &gset, TILE_UPDATE);

    sprintfStage2Condition(tmp, &gset, 0);
    ret = kgenBeginBranch(ctx, tmp);

    upFlags = kextraToUpresFlags(CLBLAS_TRSM, kflags);
    upFlags |= tailStatusToUpresFlags(tailStatus);
    upFlags |= UPRES_PRIV_DEST | UPRES_WITH_BETA;
    genUpdateIntermResult(ctx, &gset, false, upFlags);

    kgenAddBlankLine(ctx);

    lds_diagonal = ((extraParams->ldsUse & LDS_USE_DIAGONAL) &&
                    (kflags & (KEXTRA_COLUMN_MAJOR)) == 0 &&
                    !(tailM || tailN) &&
                    !(upFlags & UPRES_NO_VECTORIZATION) &&
                    !isComplexType(kextra->dtype));

    /*
     * it's needed now to adjust addressing mode of A so as to don't
     * exceed the bound of A
     */
    if (tailM) {
        setFetchAddrMode(mulOpts.fctx,
                         addrMode | FETCH_ADDR_A_CYCLICAL |
                         FETCH_ADDR_K_CYCLICAL);
        extraNew.flags |= KEXTRA_TAILS_K_LOWER;
    }

    genMulOnDiagonalTile(ctx, &gset, &tileSet, &mulOpts);
    gset.tileBX = tileSet.bStage2;
    if (tailM) {
        setFetchHandler(&mulOpts, &gset, defaultTilePostFetch, &pfPriv);
    }

    kgenAddStmt(ctx, "// Write back the given result\n");

    upFlags = kextraToUpresFlags(CLBLAS_TRSM, kflags);
    upFlags |= tailStatusToUpresFlags(tailStatus);

    if (lds_diagonal) {
       sprintf(tmp, "tmpB[%%u * %u + bid]", l1Pans);
    }

    genResultUpdateWithFlags(ctx, CLBLAS_TRSM, &gset, upFlags,
                                 NULL, NULL, lds_diagonal ? tmp : NULL);

    kgenEndBranch(ctx, NULL);   // multiply on the inverted tile path
    kgenAddBarrier(ctx, CLK_GLOBAL_MEM_FENCE);

    // continue the tile update
    kgenAddBlankLine(ctx);
    sprintfStage2Condition(tmp, &gset, 1);
    kgenBeginBranch(ctx, tmp);
    genCheckShiftTailB(ctx, &gset, 0, &tailStatus);
    if (lds_diagonal) {
        // TODO: add here storing to LDS as well
    }
    else {
		addrMode |= FETCH_ADDR_B_CYCLICAL;
        setFetchAddrMode(mulOpts.fctx, addrMode);
        tileMulGen(ctx, &gset, &mulOpts);
    }
    kgenEndBranch(ctx, NULL);           // tile update path
    kgenAddBarrier(ctx, CLK_GLOBAL_MEM_FENCE);

    kgenEndBranch(ctx, NULL);           // second stage loop

    if (isMatrixUpper(kflags)) {
        sprintf(tmp, "currM -= %lu;\n", subdims[0].y);
        kgenAddStmt(ctx, tmp);
    }

    kgenEndBranch(ctx, NULL);           // loop over M

    ret = kgenEndFuncBody(ctx);

    if (!ret) {
        ret = (ssize_t)kgenSourceSize(ctx) + 1;
    }

    destroyFetchContext(mulOpts.fctx);
    destroyKgenContext(ctx);

    return (ret < 0) ? -EOVERFLOW : ret;
}