예제 #1
0
static int
copyImgPostUnroll(struct KgenContext *ctx, void *priv)
{
    char tmp[1024];
    GenPriv *gpriv = (GenPriv*)priv;
    const char *vfield = dtypeUPtrField(gpriv->dtype);

    if (gpriv->work && gpriv->work->tail) {
        addCopyTailCode(ctx, gpriv);
    }

    kgenAddBlankLine(ctx);

    if (gpriv->dir == DBLOCK_GLOBAL_TO_IMAGE) {
        sprintf(tmp, "src.%s += %s;\n", vfield, gpriv->globLDName);
    }
    else if (gpriv->dir == DBLOCK_LOCAL_TO_IMAGE) {
        sprintf(tmp, "src.%s += %lu;\n", vfield, gpriv->lmemLD);
    }
    kgenAddStmt(ctx, tmp);
    if(gpriv->packed) {
        sprintf(tmp, "index++;\n");
    } else {
        sprintf(tmp, "y++;\n");
    }
    return kgenAddStmt(ctx, tmp);
}
예제 #2
0
static int
copyMemVec(struct KgenContext *ctx, void *priv)
{
    char tmp[1024];
    char vec[64];
    GenPriv *gpriv = (GenPriv*)priv;

    if (gpriv->vecLen == 1)
        sprintf(vec,"f");
    else
        sprintf(vec,"f%dv", gpriv->vecLen);

    if (gpriv->conjugate) {
        sprintf(tmp, "tmp = *%s.%s++;\n", gpriv->srcName, vec);
        kgenAddStmt(ctx, tmp);
        if (gpriv->dtype == TYPE_COMPLEX_FLOAT) {
            kgenAddStmt(ctx, "tmp.y = -tmp.y;\n"
                             "tmp.w = -tmp.w;\n");
        }
        else {
            kgenAddStmt(ctx, "tmp.y = -tmp.y;\n");
        }
        sprintf(tmp, "*%s.%s++ = tmp;\n",
                gpriv->dstName, vec);
    }
    else {
        sprintf(tmp, "*%s.%s++ = *%s.%s++;\n", gpriv->dstName, vec,
                gpriv->srcName, vec);
    }

    return kgenAddStmt(ctx, tmp);
}
예제 #3
0
static int
copyMemSingle(struct KgenContext *ctx, void *priv)
{
    char tmp[1024];
    GenPriv *gpriv = (GenPriv*)priv;
    const char *vfield;

    vfield = dtypeUPtrField(gpriv->dtype);

    if (gpriv->conjugate) {
        sprintf(tmp, "*%s.%s = *%s.%s++;\n",
                gpriv->dstName, vfield, gpriv->srcName, vfield);
        kgenAddStmt(ctx, tmp);
        sprintf(tmp, "(*%s.%s).y = -(*%s.%s).y;\n",
                gpriv->dstName, vfield, gpriv->dstName, vfield);
        kgenAddStmt(ctx, tmp);
        sprintf(tmp, "%s.%s++;\n", gpriv->dstName, vfield);
    }
    else {
        sprintf(tmp, "*%s.%s++ = *%s.%s++;\n",
                gpriv->dstName, vfield, gpriv->srcName, vfield);
    }

    return kgenAddStmt(ctx, tmp);
}
예제 #4
0
/*
 * Prepare generator outer loop
 */
static void
prepareLoop(struct KgenContext *ctx, ItemWork *work, LoopCtl *loopCtl)
{
    char tmp[1024];

    kgenAddStmt(ctx, "size_t n;\n");
    loopCtl->ocName = "n";

    if (work->nrItems) {
        sprintf(tmp, "size_t %s;\n\n", lboundVarName);
        kgenAddStmt(ctx, tmp);

        /*
         * set number of rows to be processed by the work item;
         * in the case it is not a constant
         */
        if (work->blockTail) {
            sprintf(tmp, setLoopBoundStmt, work->nrItems - 1, work->nrItems - 1,
                    work->blockTail, work->nrRows);
            kgenAddStmt(ctx, tmp);
        }
        else {
            sprintf(tmp, "nrows = (%s >= %u) ? 0 : %lu;\n", lidVarName,
                    work->nrItems, work->nrRows);
            kgenAddStmt(ctx, tmp);
        }

        loopCtl->outBound.name = lboundVarName;
    }
    else {
        loopCtl->outBound.val = (unsigned long)work->nrRows;
        loopCtl->obConst = true;
    }
}
예제 #5
0
static int
copyMemPreUnroll(struct KgenContext *ctx, void *priv)
{
    DUMMY_ARG_USAGE(priv);

    kgenAddStmt(ctx, "src1 = src;\n");

    return kgenAddStmt(ctx, "dst1 = dst;\n\n");
}
예제 #6
0
static int
copyMemSingleTransp(struct KgenContext *ctx, void *priv)
{
    char tmp[1024];
    GenPriv *gpriv = (GenPriv*)priv;
    const char *vfield;

    vfield = dtypeUPtrField(gpriv->dtype);
    kgenAddBlankLine(ctx);

    if (gpriv->dir == DBLOCK_GLOBAL_TO_LOCAL) {
        if (gpriv->locLDName) {
            sprintf(tmp, "*%s.%s = *%s.%s++;\n",
                    gpriv->dstName, vfield,
                    gpriv->srcName, vfield);
            kgenAddStmt(ctx, tmp);

            if (gpriv->conjugate) {
                sprintf(tmp, "(*%s.%s).y = -(*%s.%s).y;\n",
                        gpriv->dstName, vfield, gpriv->dstName,
                        vfield);
                kgenAddStmt(ctx, tmp);
            }
            sprintf(tmp, "%s.%s += %s;\n",
                    gpriv->dstName, vfield, gpriv->locLDName);
        }
        else {
            sprintf(tmp, "%s.%s[%lu] = *%s.%s++;\n",
                    gpriv->dstName, vfield,
                    gpriv->lmemLD * gpriv->cnt, gpriv->srcName,
                    vfield);
            if (gpriv->conjugate) {
                kgenAddStmt(ctx, tmp);
                sprintf(tmp, "%s.%s[%lu].y = -%s.%s[%lu].y;\n",
                        gpriv->dstName, vfield, gpriv->lmemLD * gpriv->cnt,
                        gpriv->dstName, vfield, gpriv->lmemLD * gpriv->cnt);
            }
        }
    }
    else {
        if (gpriv->locLDName) {
            sprintf(tmp, "*%s.%s++ = *%s.%s;\n"
                         "%s.%s += %s;\n",
                    gpriv->dstName, vfield,
                    gpriv->srcName, vfield,
                    gpriv->srcName, vfield, gpriv->locLDName);
        }
        else {
            sprintf(tmp, "*%s.%s++ = %s.%s[%lu];\n",
                    gpriv->dstName, vfield, gpriv->srcName, vfield,
                    gpriv->lmemLD * gpriv->cnt);
        }
    }
    gpriv->cnt++;

    return kgenAddStmt(ctx, tmp);
}
예제 #7
0
파일: trsm.c 프로젝트: AndreasMiller/clBLAS
static void
genZeroTileTrash(
    struct KgenContext *ctx,
    const BlasGenSettings *gset,
    MatrixRole mrole,
    Tile* tile)
{
    char tmp[1024];
    const SubproblemDim *dim = &gset->subdims[1];
    const CLBLASKernExtra *kextra = gset->kextra;
    unsigned int i, j;
    unsigned int step;
    Kstring elem;

    if (mrole == MATRIX_A) {
        kgenAddBlankLine(ctx);
    }
    else {
        kgenBeginBranch(ctx, NULL);
    }

    sprintf(tmp, "const int bound = (coordA + %lu > M) ? (M - coordA) : %lu;\n",
            dim->y, dim->y);
    kgenAddStmt(ctx, tmp);

    step = tileLineSegmentLen(tile);
    step = (tile->trans) ? 1 : step;

    for (j = 0; j < tile->nrRows; ++j) {
        for (i = 0; i < tile->nrCols; i+=step) {
            sprintfTileElement(&elem, tile, j, i, step);
            sprintf(tmp, "%s = (bound <= %u) ? 0 : %s;\n", elem.buf, j, elem.buf);
            kgenAddStmt(ctx, tmp);
        }
    }

    // Set units in the trash diagonal elements for a tile of A
    if (mrole == MATRIX_A) {
        for (i = 0; i < (unsigned int)dim->y; i++) {
            sprintfTileElement(&elem, tile, i, i, 1);
            sprintf(tmp, "%s = (bound <= %d) ? %s : %s;\n",
                    elem.buf, (int)i, strOne(kextra->dtype), elem.buf);
            kgenAddStmt(ctx, tmp);
        }
    }

    if (mrole == MATRIX_A) {
        kgenAddBlankLine(ctx);
    }
    else {
        kgenEndBranch(ctx, NULL);
    }
}
예제 #8
0
/*
 * Add statement setting initial coordinates pointer for image
 *
 */
static void
addSettingImageXYCode(
    struct KgenContext *ctx,
    const char *xName,
    const char *yName,
    const PGranularity *pgran,
    GenPriv *gpriv)
{
    char tmp[4096];
    const ItemWork *work = gpriv->work;
    size_t gsize = pgran->wgSize[0] * pgran->wgSize[1];

    if (gpriv->packed) {
        sprintf(tmp, "pLine = ((get_image_width(dst) - startX) * %d / %lu) * %lu;\n",
                FLOAT4_VECLEN / gpriv->nfloats, gpriv->dim->x, gpriv->lmemLD);
        kgenAddStmt(ctx, tmp);
        if (gpriv->dim->y < gsize) {
            sprintf(tmp, "index = %s / %u;\n", lidVarName,
                    work->itemsPerRow);
        }
        else {
            sprintf(tmp, "index = %s * %lu;\n", lidVarName,
                    work->nrRows);
        }
        kgenAddStmt(ctx, tmp);
        sprintf(tmp, "x = startX + (index * %lu) %% pLine / %u;\n", gpriv->dim->x,
                FLOAT4_VECLEN / gpriv->nfloats);
        kgenAddStmt(ctx, tmp);
        if (gpriv->dim->y < gsize) {
            sprintf(tmp, "x += (%s %% %u) * (%lu / %u / %u);\n", lidVarName,
                    work->itemsPerRow, gpriv->dim->x,
                    (FLOAT4_VECLEN / gpriv->nfloats), work->itemsPerRow);
            kgenAddStmt(ctx, tmp);
        }
        sprintf(tmp, "y = startY + (index * %lu) / pLine;\n", gpriv->dim->x);
        kgenAddStmt(ctx, tmp);
    }
    else {
        if (gpriv->dim->y < gsize) {
            sprintf(tmp, "%s = startX + %s %% %u * %lu / %d;\n",
                    xName, lidVarName, work->itemsPerRow, work->nrCols,
                    FLOAT4_VECLEN/gpriv->nfloats);
            kgenAddStmt(ctx, tmp);
            sprintf(tmp, "%s = startY + %s / %u;\n", yName, lidVarName,
                    work->itemsPerRow);
            kgenAddStmt(ctx, tmp);
        }
        else {
            sprintf(tmp, "%s = startX;\n", xName);
            kgenAddStmt(ctx, tmp);
            sprintf(tmp, "%s = startY + %s * %lu;\n", yName, lidVarName,
                    gpriv->work->nrRows);
            kgenAddStmt(ctx, tmp);
        }
    }

    kgenAddBlankLine(ctx);
}
예제 #9
0
static void
genRealMulUpdate(
    struct KgenContext *ctx,
    const Kstring *elA,
    const Kstring *elB,
    const Kstring *elC,
    bool transC,
    TileMulCore core)
{
    char tmp[MAX_LENGTH];
    const char *src1, *src2;

    /*
     * Select order of source operands because type of 'mad' result is
     * determined by the first operand
     */
    src1 = (transC) ? elA->buf : elB->buf;
    src2 = (transC) ? elB->buf : elA->buf;

        if (core == TILEMUL_MAD) {
        sprintf(tmp, "%s = mad(%s, %s, %s);\n",
                elC->buf, src1, src2, elC->buf);
        }
        else {
        sprintf(tmp, "%s += %s * %s;\n", elC->buf, src1, src2);
        }

    kgenAddStmt(ctx, tmp);
}
예제 #10
0
파일: tile.c 프로젝트: AndreasMiller/clBLAS
int
declareOneTileStorage(struct KgenContext *ctx, const Tile *tile)
{
    char tmp[1024];
    const char *tname;
    int r;
    size_t size;

    getVectorTypeName(tile->dtype, tile->vecLen, &tname, NULL);
    size = tileVectorsNum(tile);
    if (tile->storType == PRIV_STORAGE_ARRAY) {
        sprintf(tmp, "%s %s[%lu];\n", tname, tile->baseName, size);
    }
    else {
        size_t i;
        char *p;

        sprintf(tmp, "%s %s0", tname, tile->baseName);
        p = tmp + strlen(tmp);
        for (i = 1; i < size; i++) {
            sprintf(p, ", %s%lu", tile->baseName, i);
            p += strlen(p);
        }
        strcpy(p, ";\n");
    }

    r = kgenAddStmt(ctx, tmp);

    return (r) ? -EOVERFLOW : 0;
}
예제 #11
0
void
genHeapTrsmResultToLDS(
    struct KgenContext *ctx,
    const BlasGenSettings *gset,
    const char *funcName,
    const char *dstName)
{
    char tmp[1024];
    char *alp;
    unsigned int l1Pans;
    DataType dtype = gset->kextra->dtype;
    const SubproblemDim *dims = gset->subdims;

    if(isComplexType(dtype)) {
        if (dtype == TYPE_COMPLEX_FLOAT) {
            alp = "(float2)(1.f, 0)";
        }
        else {
            alp = "(double2)(1., 0)";
        }
    }
    else {
        alp = "1.";
    }

    l1Pans = (unsigned int)dims[0].x / (unsigned int)dims[1].x;
    sprintf(tmp, "%s(%s, c, %s, (lid / %u * %lu), (lid %% %u * %lu), %lu);\n",
            funcName, dstName, alp, l1Pans, dims[1].y, l1Pans, dims[1].x,
            dims[0].bwidth);
    kgenAddStmt(ctx, tmp);
}
예제 #12
0
void
genFillTileWithNAN(struct KgenContext *ctx, const Tile *tile)
{
    char tmp[1024];
    Kstring elem;
    unsigned int incRows, incCols;
    unsigned int i, j, v;

    if (!tile->trans) {
        incRows = 1;
        v = incCols = umin(tile->vecLen, tile->nrCols);
    }
    else {
        v = incRows = umin(tile->vecLen, tile->nrRows);
        incCols = 1;
    }

    for (i = 0; i < tile->nrRows; i += incRows) {
        for (j = 0; j < tile->nrCols; j += incCols) {
            sprintfTileElement(&elem, tile, i, j, v);
            sprintf(tmp, "%s = NAN;\n", elem.buf);
            kgenAddStmt(ctx, tmp);
        }
    }

    kgenAddBlankLine(ctx);
}
예제 #13
0
파일: tile.c 프로젝트: AndreasMiller/clBLAS
void
genZeroTile(struct KgenContext *ctx, const Tile *tile)
{
    char tmp[1024];
    Kstring elem;
    unsigned int incRows, incCols;
    unsigned int i, j, v;

    v = tileLineSegmentLen(tile);
    if (!tile->trans) {
        incRows = 1;
        incCols = v;
    }
    else {
        incRows = v;
        incCols = 1;
    }

    for (i = 0; i < tile->nrRows; i += incRows) {
        for (j = 0; j < tile->nrCols; j += incCols) {
            sprintfTileElement(&elem, tile, i, j, v);
            sprintf(tmp, "%s = 0;\n", elem.buf);
            kgenAddStmt(ctx, tmp);
        }
    }

    kgenAddBlankLine(ctx);
}
예제 #14
0
// unrolling generator for the f4zero function
static int
f4zeroSingle(struct KgenContext *ctx, void *priv)
{
    DUMMY_ARG_USAGE(priv);

    return kgenAddStmt(ctx, "*data++ = 0;\n");
}
예제 #15
0
static void
genStartPosK(
    struct KgenContext *ctx,
    const SubproblemDim *dim,
    KernelExtraFlags kflags,
    bool subgMode)
{
    char tmp[1024];
    if (isMatrixUpper(kflags)) {
        // K loop - from diagonal till M
        if (subgMode) {
            sprintf(tmp, "uint kBegin = currM;\n");
        }
        else {
            if (!(kflags & KEXTRA_TAILS_M)) {
                sprintf(tmp, "uint kBegin = currM;\n");
            }
            else {
                sprintf(tmp, "uint kBegin = currM / %lu * %lu;\n",
                        dim->bwidth, dim->bwidth);
            }
        }
    }
    else {
        // K loop - from 0 till diagonal
        sprintf(tmp, "uint kBegin = 0;\n");
    }

    kgenAddStmt(ctx, tmp);
}
예제 #16
0
파일: trsm.c 프로젝트: AndreasMiller/clBLAS
static void
genPreloadedTileMul(
    struct KgenContext *ctx,
    BlasGenSettings *gset,
    TileMulOpts *mulOpts,
    const Tile *parTile,
    const char* copy2LDSFuncName)
{
    char tmp[1024];
    KernelExtraFlags kflags = gset->kextra->flags;
    unsigned int bwidthOld;
    const char *oldNameB;
    const char *ptrName;

    getVectorTypeName(gset->kextra->dtype, parTile->vecLen, NULL, &ptrName);
    kgenPrintf(ctx, "lB.%s = tmpB;\n", ptrName);
    kgenAddBarrier(ctx, CLK_LOCAL_MEM_FENCE);

    if (!isMatrixAccessColMaj(CLBLAS_TRSM, kflags, MATRIX_B)) {
        sprintf(tmp, "%s(lB, uB, gid * %lu, k0, ldb);\n",
            copy2LDSFuncName, gset->subdims[0].x);
    }
    else {
        sprintf(tmp, "%s(lB, uB, k0, gid * %lu, ldb);\n",
            copy2LDSFuncName, gset->subdims[0].x);
    }
    kgenAddStmt(ctx, tmp);

    kgenAddBarrier(ctx, CLK_LOCAL_MEM_FENCE);
    kgenAddBlankLine(ctx);

    kgenAddStmt(ctx, "lB = lBMain;\n\n");

    mulOpts->memB = CLMEM_LOCAL_MEMORY;
    oldNameB = gset->varNames.B;
    bwidthOld = (unsigned int)gset->subdims[0].bwidth;
    gset->varNames.B = "lB";
    gset->subdims[0].bwidth = (parTile->trans) ? parTile->nrRows :
                                                 parTile->nrCols;

    tileMulGen(ctx, gset, mulOpts);

    gset->varNames.B = oldNameB;
    gset->subdims[0].bwidth = bwidthOld;
    mulOpts->memB = CLMEM_GLOBAL_MEMORY;
}
예제 #17
0
// Generate complete vector-vector product
static void
genVecMul(
    struct KgenContext *ctx,
    unsigned int m,
    unsigned int n,
    const Tile *a,
    const Tile *b,
    const Tile *c,
    bool conjA,
    bool conjB,
    TileMulCore core,
    bool wholeA)
{
    unsigned int k;
    char tmp[MAX_LENGTH];
    Kstring elA, elB, elC;
    unsigned int vlen = 0;
    bool isComplex;
    bool isDouble;

    isDouble = isDoubleBasedType(c->dtype);
    isComplex = isComplexType(c->dtype);
    if ((core == TILEMUL_DOT) && !isComplex) {
        vlen = commonTileSegmentLen(a, b);
    }
    else {
        vlen = 1;
    }

    sprintfTileElement(&elC, c, m, n, 1);
    if (!wholeA) {
        m = 0;
        }

    for (k = 0; k < a->nrCols; k += vlen) {
        sprintfTileElement(&elA, a, m, k, vlen);
        sprintfTileElement(&elB, b, k, n, vlen);

        /*
         * Using 'dot' is not valid for complex, and replaced with '*' operator
         * for unvectorized real data
         */
        if ((core == TILEMUL_DOT) && (vlen > 1)) {
            sprintf(tmp, "%s += dot(%s, %s);\n",
                    elC.buf, elA.buf, elB.buf);
        }
        else if (isComplex) {
            Kstring expr;

            sprintfComplexMulUpdate(&expr, &elC, &elA, &elB, &elC, isDouble,
                                    conjA, conjB, core);
            kgenAddStmt(ctx, expr.buf);
        }
        else {
            genRealMulUpdate(ctx, &elA, &elB, &elC, c->trans, core);
        }
    }
}
예제 #18
0
파일: trsm.c 프로젝트: AndreasMiller/clBLAS
/*
 * Setup coordinates before beginning a trsm stage
 * A caller must ensure the strict stage sequence:
 * BLOCK_UPDATE -> TILE_UPDATE
 */
static void
genSetupCoords(
    struct KgenContext *ctx,
    const BlasGenSettings *gset,
    enum TrsmStage stage)
{
    char tmp[1024];
    KernelExtraFlags kflags = gset->kextra->flags;
    const SubproblemDim *dims = gset->subdims;
    unsigned int l1Pans = (unsigned int)(dims[0].x / dims[1].x);
    const char *s;

    s = isMatrixUpper(kflags) ? "currM" : "m0";
    sprintf(tmp, "coordA = %s + (lid / %u * %lu);\n",
            s, l1Pans, dims[1].y);
    kgenAddStmt(ctx, tmp);

    switch (stage) {
    case BLOCK_UPDATE:
        if (isMatrixUpper(kflags)) {
            sprintf(tmp, "k0 = currM + %lu;\n", dims[0].y);
        }
        else {
            sprintf(tmp, "k0 = 0;\n");
        }
        break;
    case TILE_UPDATE:
        if (isMatrixUpper(kflags)) {
            sprintf(tmp, "k0 = currM + %lu - m1 * %lu;\n",
                    dims[0].y - dims[1].y, dims[1].y);
        }
        else {
            sprintf(tmp, "k0 = m0 + m1 * %lu;\n", dims[1].y);
        }
        break;
    }

    kgenAddStmt(ctx, tmp);

    sprintf(tmp, "coordB = gid * %lu + (lid %% %u * %lu);\n",
            dims[0].x, l1Pans, dims[1].x);

    kgenAddStmt(ctx, tmp);
    kgenAddBlankLine(ctx);
}
예제 #19
0
파일: symv.c 프로젝트: AndreasMiller/clBLAS
static int
genPostFetchVertDiag(
    struct KgenContext *ctx,
    MatrixRole mrole,
    void *priv)
{
    TilePostFetchPrivate *pfPriv = ((struct symvPrivate *)priv)->pfPriv;
    TileMulOpts *mulOpts = ((struct symvPrivate *)priv)->mulOpts;
    Tile *tile = (Tile *)&pfPriv->gset->tileA;
    bool diag = ((struct symvPrivate *)priv)->diag;
    char tmp[1024], tmp1[128] = "";
    char stmtStr[2][128];
    size_t blockx, blocky;
    unsigned int x, y;
    const struct SubproblemDim *dims = &pfPriv->gset->subdims[1];
    (void)mrole;

    blockx = blocky = 0;
    // zero triangular part of tile a
    // either single row of tile a either the whole tile have been fetched

    if (!diag) {
        blocky = dims->bwidth;
        blockx = dims->y;
    }
    else {
        blocky = dims->y;
        blockx = dims->bwidth;
    }

    // loop through block rows
    for(y = 0; y < blocky; y++) {
        // loop through all elements of block row
        for(x = 0; x < blockx; x++) {
            Kstring kstr[3];
            const char *cmp = diag ? ">=" : ">";
            const char *name = diag ? "k" : "coordA";
            if (diag) {
                sprintfTileElement(&kstr[0], tile, y, x, 1);
            }
            else {
                sprintfTileElement(&kstr[0], tile, x, y, 1);
            }
            genAdd(stmtStr[0], x);
            genAdd(stmtStr[1], y);
            if (mulOpts->flags & TILEMUL_SKEW_B) {
                sprintf(tmp1, "Ktail <= %i || ", y);
            }
            sprintf(tmp, "%s = %s%s%s %s n%s ? 0 : %s;\n",
                    kstr[0].buf, tmp1, name, stmtStr[0], cmp, stmtStr[1],
                    kstr[0].buf);
            kgenAddStmt(ctx, tmp);
        }
        pfPriv->fetchNumA++;
    }
    return 0;
}
예제 #20
0
/*
 * Add statement setting initial local pointer for the work item
 *
 * @ld: lead dimension for the local block in float words;
 *       if it's zero, the "ld" argument of a generated function is
 *       used instead
 */
static void
addSettingPtrCode(
    struct KgenContext *ctx,
    const char *ptrName,
    size_t ld,
    bool transpose,
    const PGranularity *pgran,
    GenPriv *gpriv)
{
    char tmp[4096];
    const char *vfield;
    const SubproblemDim *dim = gpriv->dim;
    const ItemWork *work = gpriv->work;
    size_t gsize;

    vfield = dtypeUPtrField(gpriv->dtype);
    gsize = pgran->wgSize[0] * pgran->wgSize[1];

    if (ld) {
        // offset between two rows and two elements in each row
        size_t roff, eoff;

        if (transpose) {
            roff = 1;
            eoff = ld;
        }
        else {
            roff = ld;
            eoff = 1;
        }

        if (dim->y < gsize) {
            sprintf(tmp, "%s.%s += (%s / %u) * %lu + (%s %% %u * %lu) * %lu;\n",
                    ptrName, vfield, lidVarName, work->itemsPerRow,
                    roff, lidVarName, work->itemsPerRow, work->nrCols, eoff);
        }
        else {
            sprintf(tmp, "%s.%s += %s * %lu * %lu;\n",
                    ptrName, vfield, lidVarName, work->nrRows, roff);
        }
    }
    else {
        if (dim->y < gsize) {
            sprintf(tmp, "%s.%s += (startRow + %s / %u) * %s + "
                                   "startCol + %s %% %u * %lu;\n",
                    ptrName, vfield, lidVarName, work->itemsPerRow,
                    gpriv->globLDName, lidVarName, work->itemsPerRow, work->nrCols);
        }
        else {
            sprintf(tmp, "%s.%s += (startRow + %s * %lu) * %s + startCol;\n",
                    ptrName, vfield, lidVarName, work->nrRows, gpriv->globLDName);
        }
    }

    kgenAddStmt(ctx, tmp);
    kgenAddBlankLine(ctx);
}
예제 #21
0
파일: symv.c 프로젝트: AndreasMiller/clBLAS
static int
genPostFetchDiag(
    struct KgenContext *ctx,
    MatrixRole mrole,
    void *priv)
{
    TilePostFetchPrivate *pfPriv = ((struct symvPrivate *)priv)->pfPriv;
    Tile *tile = (Tile *)&pfPriv->gset->tileA;
    bool diag = ((struct symvPrivate *)priv)->diag;
    bool tra = ((struct symvPrivate *)priv)->coord;
    char tmp[1024];
    char stmtStr[2][128];
    const KernelVarNames *vnames = &pfPriv->gset->varNames;
    const char *coord = tra ? vnames->coordA : vnames->k;
    size_t blockx, blocky;
    unsigned int x, y;
    const struct SubproblemDim *dims = &pfPriv->gset->subdims[1];
    (void)mrole;


    blockx = blocky = 0;
    // zero triangular part of tile a
    // either single row of tile a either the whole tile have been fetched

    if (tra) {
        blocky = dims->bwidth;
        blockx = dims->y;
    }
    else {
        blocky = dims->y;
        blockx = dims->bwidth;
    }

    // loop through block rows
    for(y = 0; y < blocky; y++) {
        // loop through all elements of block row
        for(x = 0; x < blockx; x++) {
            Kstring kstr[3];
            const char *cmp = diag ? ">=" : ">";
            if (diag) {
                sprintfTileElement(&kstr[0], tile, x, y, 1);
            }
            else {
                sprintfTileElement(&kstr[0], tile, y, x, 1);
            }
            genAdd(stmtStr[0], x);
            genAdd(stmtStr[1], y);
            sprintf(tmp, "%s = Ktail <= %i || %s%s %s n%s ? 0 : %s;\n",
                    kstr[0].buf, y, coord, stmtStr[0], cmp, stmtStr[1],
                    kstr[0].buf);
            kgenAddStmt(ctx, tmp);
        }
        pfPriv->fetchNumA++;
    }
    return 0;
}
예제 #22
0
int
f4zeroBlockGen(
    struct KgenContext *ctx,
    const SubproblemDim *dim,
    const PGranularity *pgran,
    const char *memPrefix)
{
    char tmp[1024];
    ItemWork work;
    LoopCtl loopCtl;
    GenPriv priv;
    char pref;
    LoopUnrollers unrollers;

    if (!strcmp(memPrefix, "__local")) {
        pref = 'l';
    }
    else if (!strcmp(memPrefix, "__global")) {
        pref = 'g';
    }
    else {
        return -EINVAL;
    }

    if (dim->y != 1) {
        return -EINVAL;
    }

    memset(&loopCtl, 0, sizeof(loopCtl));
    memset(&unrollers, 0, sizeof(unrollers));
    memset(&priv, 0, sizeof(GenPriv));
    initGenPriv(&priv, TYPE_COMPLEX_DOUBLE, FLOAT4_VECLEN * sizeof(cl_float),
                dim, 0, (const ItemWork*)&work, pgran);
    getItemWork(&work, dim, pgran, priv.nfloats, priv.vecLen);

    sprintf(tmp, f4zeroDecl, pref, dim->x, memPrefix);
    kgenDeclareFunction(ctx, tmp);
    kgenBeginFuncBody(ctx);

    // declare local ID variable and set data offset
    kgenDeclareLocalID(ctx, lidVarName, pgran);
    sprintf(tmp, "\ndata += %s * %lu;\n\n",
            lidVarName, work.nrCols);
    kgenAddStmt(ctx, tmp);

    unrollers.genSingle = f4zeroSingle;
    loopCtl.inBound = (unsigned int)work.nrCols;
    unrollers.getVecLen = getVecLen;

    kgenLoopUnroll(ctx, &loopCtl, TYPE_COMPLEX_DOUBLE, &unrollers, &priv);
    if (work.tail) {
        addTailCode(ctx, &priv, NULL, f4zeroSingle);
    }

    return kgenEndFuncBody(ctx);
}
예제 #23
0
파일: symv.c 프로젝트: AndreasMiller/clBLAS
static int
genPostFetchMirror(
    struct KgenContext *ctx,
    MatrixRole mrole,
    void *priv)
{
    TilePostFetchPrivate *pfPriv = ((struct symvPrivate *)priv)->pfPriv;
    TileMulOpts *mulOpts = ((struct symvPrivate *)priv)->mulOpts;
    Tile *tileb = (Tile *)&pfPriv->gset->tileA;
    Tile *tilea = &((struct symvPrivate *)priv)->tilea;
    bool tra = ((mulOpts->flags & TILEMUL_TRA) != 0);
    char tmp[1024];
    char stmtStr[2][128];
    size_t blockx, blocky;
    unsigned int x, y;
    const struct SubproblemDim *dims = &pfPriv->gset->subdims[1];
    (void)mrole;


    blockx = blocky = 0;
    // zero triangular part of tile a
    // either single row of tile a either the whole tile have been fetched

    if (tra) {
        blocky = dims->bwidth;
        blockx = dims->y;
    }
    else {
        blocky = dims->y;
        blockx = dims->bwidth;
    }

    // loop through block rows
    for(y = 0; y < blocky; y++) {
        // loop through all elements of block row
        for(x = 0; x < blockx; x++) {
            Kstring kstr[3];
            const char *cmp = ">";
            sprintfTileElement(&kstr[0], tileb, x, y, 1);
            sprintfTileElement(&kstr[1], tileb, y, x, 1);
            sprintfTileElement(&kstr[2], tilea, y, x, 1);
            genAdd(stmtStr[0], x);
            genAdd(stmtStr[1], y);
            sprintf(tmp, "%s = k%s %s n%s ? %s : %s;\n",
                    kstr[2].buf, stmtStr[0], cmp, stmtStr[1],
                    kstr[0].buf, kstr[1].buf);
            kgenAddStmt(ctx, tmp);
        }
        pfPriv->fetchNumA++;
    }

    *tileb = *tilea;

    return 0;
}
예제 #24
0
int
genMulTiles(
    struct KgenContext *ctx,
    const BlasGenSettings *gset,
    const TileMulOpts *mulOpts)
{
    char s[32];
    const CLBLASKernExtra *kextra = gset->kextra;
    const char *tNameIn;
    unsigned int i;
    unsigned int iend;
    bool tra = ((mulOpts->flags & TILEMUL_TRA) != 0);
    bool trb = ((mulOpts->flags & TILEMUL_TRB) != 0);
    TileMulCore core;
    int ret;

    ret = checkInput(gset, mulOpts);
    if (ret) {
        return ret;
    }

    getVectorTypeName(kextra->dtype, kextra->vecLen, &tNameIn, NULL);
    core = checkReplaceCore(gset, mulOpts->core, tra, trb);

    if (((core == TILEMUL_MULADD || isComplexType(kextra->dtype)) &&
          !tra && trb)) {
        sprintf(s,"%s sum;\n", tNameIn);
        kgenAddStmt(ctx, s);
    }

    iend = (unsigned int)((mulOpts->flags & TILEMUL_TRA) ?
                            gset->subdims[1].bwidth : gset->subdims[1].y);
    for (i = 0; i < iend; i++) {
        genMulLineOnTile(ctx, gset, mulOpts, i, true);
    }

    // just to get state
    ret = kgenAddStmt(ctx, NULL);

    return (ret) ? -EOVERFLOW : 0;
}
예제 #25
0
static int
copyImgVec(struct KgenContext *ctx, void *priv)
{
    char tmp[1024];
    GenPriv *gpriv = (GenPriv*)priv;

    dtypeUPtrField(gpriv->dtype);
    sprintf(tmp, "write_imageui(%s, (int2)(%s++,%s), as_uint4(*%s.f4v++));\n",
            gpriv->dstName, gpriv->imgXName, gpriv->imgYName, gpriv->srcName);

    return kgenAddStmt(ctx, tmp);
}
예제 #26
0
static int
copyMemPostUnroll(struct KgenContext *ctx, void *priv)
{
    char tmp[1024];
    const char *s[2] = {"src", "dst"};
    GenPriv *gpriv = (GenPriv*)priv;
    int gdir;
    const char *vfield;

    gdir = (gpriv->dir == DBLOCK_GLOBAL_TO_LOCAL) ? 0 : 1;

    if (gpriv->work && gpriv->work->tail) {
        addCopyTailCode(ctx, gpriv);
    }

    if (!gpriv->transp) {
        kgenAddBlankLine(ctx);
    }

    // modify pointers
    vfield = dtypeUPtrField(gpriv->dtype);
    sprintf(tmp, "%s.%s += %s;\n", s[gdir], vfield, gpriv->globLDName);
    kgenAddStmt(ctx, tmp);

    if (gpriv->transp) {
        sprintf(tmp, "%s.%s++;\n", s[1 - gdir], vfield);
    }
    else {
        if (gpriv->locLDName) {
            sprintf(tmp, "%s.%s += %s;\n", s[1 - gdir],
                    vfield, gpriv->locLDName);
        }
        else {
            sprintf(tmp, "%s.%s += %lu;\n", s[1 - gdir],
                    vfield, gpriv->lmemLD);
        }
    }

    return kgenAddStmt(ctx, tmp);
}
예제 #27
0
파일: tile.c 프로젝트: AndreasMiller/clBLAS
void
genSetZeroInTile(
    struct KgenContext *ctx,
    const Tile *tile,
    unsigned int row,
    unsigned int col,
    unsigned int len)
{
    char tmp[1024];
    Kstring elem;

    sprintfTileElement(&elem, tile, row, col, len);
    sprintf(tmp, "%s = 0;\n", elem.buf);
    kgenAddStmt(ctx, tmp);
}
예제 #28
0
파일: tile.c 프로젝트: AndreasMiller/clBLAS
void
genSetUnitInTile(
    struct KgenContext *ctx,
    const Tile *tile,
    unsigned int row,
    unsigned int col)
{
    char tmp[1024];
    Kstring elem;
    const char *s;

    sprintfTileElement(&elem, tile, row, col, 1);
    s = strOne(tile->dtype);
    sprintf(tmp, "%s = %s;\n", elem.buf, s);
    kgenAddStmt(ctx, tmp);
}
예제 #29
0
static int
copyImgPreUnroll(struct KgenContext *ctx, void *priv)
{
    char tmp[1024];
    GenPriv *gpriv = (GenPriv*)priv;
    if (gpriv->packed) {
        sprintf(tmp, "%s = startX + (index * %lu) %% pLine / %u;\n"
                "%s = startY + (index * %lu) / pLine;\n" "%s = src;\n\n",
                gpriv->imgXName, gpriv->dim->x, FLOAT4_VECLEN / gpriv->nfloats,
                gpriv->imgYName, gpriv->dim->x, gpriv->srcName);
    }
    else {
        sprintf(tmp, "%s = x;\n" "%s = y;\n" "%s = src;\n\n", gpriv->imgXName,
                gpriv->imgYName, gpriv->srcName);
    }
    return kgenAddStmt(ctx, tmp);
}
예제 #30
0
static void
genInitCurrM(
    struct KgenContext *ctx,
    const SubproblemDim *dim,
    KernelExtraFlags kflags)
{
    char tmp[1024];

    if (isMatrixUpper(kflags)) {
        strcpy(tmp, "currM = 0;\n");
    }
    else {
        sprintf(tmp, "currM = (M - 1) / %lu * %lu;\n", dim->y, dim->y);
    }

    kgenAddStmt(ctx, tmp);
    kgenAddBlankLine(ctx);
}