static int copyImgPostUnroll(struct KgenContext *ctx, void *priv) { char tmp[1024]; GenPriv *gpriv = (GenPriv*)priv; const char *vfield = dtypeUPtrField(gpriv->dtype); if (gpriv->work && gpriv->work->tail) { addCopyTailCode(ctx, gpriv); } kgenAddBlankLine(ctx); if (gpriv->dir == DBLOCK_GLOBAL_TO_IMAGE) { sprintf(tmp, "src.%s += %s;\n", vfield, gpriv->globLDName); } else if (gpriv->dir == DBLOCK_LOCAL_TO_IMAGE) { sprintf(tmp, "src.%s += %lu;\n", vfield, gpriv->lmemLD); } kgenAddStmt(ctx, tmp); if(gpriv->packed) { sprintf(tmp, "index++;\n"); } else { sprintf(tmp, "y++;\n"); } return kgenAddStmt(ctx, tmp); }
static int copyMemVec(struct KgenContext *ctx, void *priv) { char tmp[1024]; char vec[64]; GenPriv *gpriv = (GenPriv*)priv; if (gpriv->vecLen == 1) sprintf(vec,"f"); else sprintf(vec,"f%dv", gpriv->vecLen); if (gpriv->conjugate) { sprintf(tmp, "tmp = *%s.%s++;\n", gpriv->srcName, vec); kgenAddStmt(ctx, tmp); if (gpriv->dtype == TYPE_COMPLEX_FLOAT) { kgenAddStmt(ctx, "tmp.y = -tmp.y;\n" "tmp.w = -tmp.w;\n"); } else { kgenAddStmt(ctx, "tmp.y = -tmp.y;\n"); } sprintf(tmp, "*%s.%s++ = tmp;\n", gpriv->dstName, vec); } else { sprintf(tmp, "*%s.%s++ = *%s.%s++;\n", gpriv->dstName, vec, gpriv->srcName, vec); } return kgenAddStmt(ctx, tmp); }
static int copyMemSingle(struct KgenContext *ctx, void *priv) { char tmp[1024]; GenPriv *gpriv = (GenPriv*)priv; const char *vfield; vfield = dtypeUPtrField(gpriv->dtype); if (gpriv->conjugate) { sprintf(tmp, "*%s.%s = *%s.%s++;\n", gpriv->dstName, vfield, gpriv->srcName, vfield); kgenAddStmt(ctx, tmp); sprintf(tmp, "(*%s.%s).y = -(*%s.%s).y;\n", gpriv->dstName, vfield, gpriv->dstName, vfield); kgenAddStmt(ctx, tmp); sprintf(tmp, "%s.%s++;\n", gpriv->dstName, vfield); } else { sprintf(tmp, "*%s.%s++ = *%s.%s++;\n", gpriv->dstName, vfield, gpriv->srcName, vfield); } return kgenAddStmt(ctx, tmp); }
/* * Prepare generator outer loop */ static void prepareLoop(struct KgenContext *ctx, ItemWork *work, LoopCtl *loopCtl) { char tmp[1024]; kgenAddStmt(ctx, "size_t n;\n"); loopCtl->ocName = "n"; if (work->nrItems) { sprintf(tmp, "size_t %s;\n\n", lboundVarName); kgenAddStmt(ctx, tmp); /* * set number of rows to be processed by the work item; * in the case it is not a constant */ if (work->blockTail) { sprintf(tmp, setLoopBoundStmt, work->nrItems - 1, work->nrItems - 1, work->blockTail, work->nrRows); kgenAddStmt(ctx, tmp); } else { sprintf(tmp, "nrows = (%s >= %u) ? 0 : %lu;\n", lidVarName, work->nrItems, work->nrRows); kgenAddStmt(ctx, tmp); } loopCtl->outBound.name = lboundVarName; } else { loopCtl->outBound.val = (unsigned long)work->nrRows; loopCtl->obConst = true; } }
static int copyMemPreUnroll(struct KgenContext *ctx, void *priv) { DUMMY_ARG_USAGE(priv); kgenAddStmt(ctx, "src1 = src;\n"); return kgenAddStmt(ctx, "dst1 = dst;\n\n"); }
static int copyMemSingleTransp(struct KgenContext *ctx, void *priv) { char tmp[1024]; GenPriv *gpriv = (GenPriv*)priv; const char *vfield; vfield = dtypeUPtrField(gpriv->dtype); kgenAddBlankLine(ctx); if (gpriv->dir == DBLOCK_GLOBAL_TO_LOCAL) { if (gpriv->locLDName) { sprintf(tmp, "*%s.%s = *%s.%s++;\n", gpriv->dstName, vfield, gpriv->srcName, vfield); kgenAddStmt(ctx, tmp); if (gpriv->conjugate) { sprintf(tmp, "(*%s.%s).y = -(*%s.%s).y;\n", gpriv->dstName, vfield, gpriv->dstName, vfield); kgenAddStmt(ctx, tmp); } sprintf(tmp, "%s.%s += %s;\n", gpriv->dstName, vfield, gpriv->locLDName); } else { sprintf(tmp, "%s.%s[%lu] = *%s.%s++;\n", gpriv->dstName, vfield, gpriv->lmemLD * gpriv->cnt, gpriv->srcName, vfield); if (gpriv->conjugate) { kgenAddStmt(ctx, tmp); sprintf(tmp, "%s.%s[%lu].y = -%s.%s[%lu].y;\n", gpriv->dstName, vfield, gpriv->lmemLD * gpriv->cnt, gpriv->dstName, vfield, gpriv->lmemLD * gpriv->cnt); } } } else { if (gpriv->locLDName) { sprintf(tmp, "*%s.%s++ = *%s.%s;\n" "%s.%s += %s;\n", gpriv->dstName, vfield, gpriv->srcName, vfield, gpriv->srcName, vfield, gpriv->locLDName); } else { sprintf(tmp, "*%s.%s++ = %s.%s[%lu];\n", gpriv->dstName, vfield, gpriv->srcName, vfield, gpriv->lmemLD * gpriv->cnt); } } gpriv->cnt++; return kgenAddStmt(ctx, tmp); }
static void genZeroTileTrash( struct KgenContext *ctx, const BlasGenSettings *gset, MatrixRole mrole, Tile* tile) { char tmp[1024]; const SubproblemDim *dim = &gset->subdims[1]; const CLBLASKernExtra *kextra = gset->kextra; unsigned int i, j; unsigned int step; Kstring elem; if (mrole == MATRIX_A) { kgenAddBlankLine(ctx); } else { kgenBeginBranch(ctx, NULL); } sprintf(tmp, "const int bound = (coordA + %lu > M) ? (M - coordA) : %lu;\n", dim->y, dim->y); kgenAddStmt(ctx, tmp); step = tileLineSegmentLen(tile); step = (tile->trans) ? 1 : step; for (j = 0; j < tile->nrRows; ++j) { for (i = 0; i < tile->nrCols; i+=step) { sprintfTileElement(&elem, tile, j, i, step); sprintf(tmp, "%s = (bound <= %u) ? 0 : %s;\n", elem.buf, j, elem.buf); kgenAddStmt(ctx, tmp); } } // Set units in the trash diagonal elements for a tile of A if (mrole == MATRIX_A) { for (i = 0; i < (unsigned int)dim->y; i++) { sprintfTileElement(&elem, tile, i, i, 1); sprintf(tmp, "%s = (bound <= %d) ? %s : %s;\n", elem.buf, (int)i, strOne(kextra->dtype), elem.buf); kgenAddStmt(ctx, tmp); } } if (mrole == MATRIX_A) { kgenAddBlankLine(ctx); } else { kgenEndBranch(ctx, NULL); } }
/* * Add statement setting initial coordinates pointer for image * */ static void addSettingImageXYCode( struct KgenContext *ctx, const char *xName, const char *yName, const PGranularity *pgran, GenPriv *gpriv) { char tmp[4096]; const ItemWork *work = gpriv->work; size_t gsize = pgran->wgSize[0] * pgran->wgSize[1]; if (gpriv->packed) { sprintf(tmp, "pLine = ((get_image_width(dst) - startX) * %d / %lu) * %lu;\n", FLOAT4_VECLEN / gpriv->nfloats, gpriv->dim->x, gpriv->lmemLD); kgenAddStmt(ctx, tmp); if (gpriv->dim->y < gsize) { sprintf(tmp, "index = %s / %u;\n", lidVarName, work->itemsPerRow); } else { sprintf(tmp, "index = %s * %lu;\n", lidVarName, work->nrRows); } kgenAddStmt(ctx, tmp); sprintf(tmp, "x = startX + (index * %lu) %% pLine / %u;\n", gpriv->dim->x, FLOAT4_VECLEN / gpriv->nfloats); kgenAddStmt(ctx, tmp); if (gpriv->dim->y < gsize) { sprintf(tmp, "x += (%s %% %u) * (%lu / %u / %u);\n", lidVarName, work->itemsPerRow, gpriv->dim->x, (FLOAT4_VECLEN / gpriv->nfloats), work->itemsPerRow); kgenAddStmt(ctx, tmp); } sprintf(tmp, "y = startY + (index * %lu) / pLine;\n", gpriv->dim->x); kgenAddStmt(ctx, tmp); } else { if (gpriv->dim->y < gsize) { sprintf(tmp, "%s = startX + %s %% %u * %lu / %d;\n", xName, lidVarName, work->itemsPerRow, work->nrCols, FLOAT4_VECLEN/gpriv->nfloats); kgenAddStmt(ctx, tmp); sprintf(tmp, "%s = startY + %s / %u;\n", yName, lidVarName, work->itemsPerRow); kgenAddStmt(ctx, tmp); } else { sprintf(tmp, "%s = startX;\n", xName); kgenAddStmt(ctx, tmp); sprintf(tmp, "%s = startY + %s * %lu;\n", yName, lidVarName, gpriv->work->nrRows); kgenAddStmt(ctx, tmp); } } kgenAddBlankLine(ctx); }
static void genRealMulUpdate( struct KgenContext *ctx, const Kstring *elA, const Kstring *elB, const Kstring *elC, bool transC, TileMulCore core) { char tmp[MAX_LENGTH]; const char *src1, *src2; /* * Select order of source operands because type of 'mad' result is * determined by the first operand */ src1 = (transC) ? elA->buf : elB->buf; src2 = (transC) ? elB->buf : elA->buf; if (core == TILEMUL_MAD) { sprintf(tmp, "%s = mad(%s, %s, %s);\n", elC->buf, src1, src2, elC->buf); } else { sprintf(tmp, "%s += %s * %s;\n", elC->buf, src1, src2); } kgenAddStmt(ctx, tmp); }
int declareOneTileStorage(struct KgenContext *ctx, const Tile *tile) { char tmp[1024]; const char *tname; int r; size_t size; getVectorTypeName(tile->dtype, tile->vecLen, &tname, NULL); size = tileVectorsNum(tile); if (tile->storType == PRIV_STORAGE_ARRAY) { sprintf(tmp, "%s %s[%lu];\n", tname, tile->baseName, size); } else { size_t i; char *p; sprintf(tmp, "%s %s0", tname, tile->baseName); p = tmp + strlen(tmp); for (i = 1; i < size; i++) { sprintf(p, ", %s%lu", tile->baseName, i); p += strlen(p); } strcpy(p, ";\n"); } r = kgenAddStmt(ctx, tmp); return (r) ? -EOVERFLOW : 0; }
void genHeapTrsmResultToLDS( struct KgenContext *ctx, const BlasGenSettings *gset, const char *funcName, const char *dstName) { char tmp[1024]; char *alp; unsigned int l1Pans; DataType dtype = gset->kextra->dtype; const SubproblemDim *dims = gset->subdims; if(isComplexType(dtype)) { if (dtype == TYPE_COMPLEX_FLOAT) { alp = "(float2)(1.f, 0)"; } else { alp = "(double2)(1., 0)"; } } else { alp = "1."; } l1Pans = (unsigned int)dims[0].x / (unsigned int)dims[1].x; sprintf(tmp, "%s(%s, c, %s, (lid / %u * %lu), (lid %% %u * %lu), %lu);\n", funcName, dstName, alp, l1Pans, dims[1].y, l1Pans, dims[1].x, dims[0].bwidth); kgenAddStmt(ctx, tmp); }
void genFillTileWithNAN(struct KgenContext *ctx, const Tile *tile) { char tmp[1024]; Kstring elem; unsigned int incRows, incCols; unsigned int i, j, v; if (!tile->trans) { incRows = 1; v = incCols = umin(tile->vecLen, tile->nrCols); } else { v = incRows = umin(tile->vecLen, tile->nrRows); incCols = 1; } for (i = 0; i < tile->nrRows; i += incRows) { for (j = 0; j < tile->nrCols; j += incCols) { sprintfTileElement(&elem, tile, i, j, v); sprintf(tmp, "%s = NAN;\n", elem.buf); kgenAddStmt(ctx, tmp); } } kgenAddBlankLine(ctx); }
void genZeroTile(struct KgenContext *ctx, const Tile *tile) { char tmp[1024]; Kstring elem; unsigned int incRows, incCols; unsigned int i, j, v; v = tileLineSegmentLen(tile); if (!tile->trans) { incRows = 1; incCols = v; } else { incRows = v; incCols = 1; } for (i = 0; i < tile->nrRows; i += incRows) { for (j = 0; j < tile->nrCols; j += incCols) { sprintfTileElement(&elem, tile, i, j, v); sprintf(tmp, "%s = 0;\n", elem.buf); kgenAddStmt(ctx, tmp); } } kgenAddBlankLine(ctx); }
// unrolling generator for the f4zero function static int f4zeroSingle(struct KgenContext *ctx, void *priv) { DUMMY_ARG_USAGE(priv); return kgenAddStmt(ctx, "*data++ = 0;\n"); }
static void genStartPosK( struct KgenContext *ctx, const SubproblemDim *dim, KernelExtraFlags kflags, bool subgMode) { char tmp[1024]; if (isMatrixUpper(kflags)) { // K loop - from diagonal till M if (subgMode) { sprintf(tmp, "uint kBegin = currM;\n"); } else { if (!(kflags & KEXTRA_TAILS_M)) { sprintf(tmp, "uint kBegin = currM;\n"); } else { sprintf(tmp, "uint kBegin = currM / %lu * %lu;\n", dim->bwidth, dim->bwidth); } } } else { // K loop - from 0 till diagonal sprintf(tmp, "uint kBegin = 0;\n"); } kgenAddStmt(ctx, tmp); }
static void genPreloadedTileMul( struct KgenContext *ctx, BlasGenSettings *gset, TileMulOpts *mulOpts, const Tile *parTile, const char* copy2LDSFuncName) { char tmp[1024]; KernelExtraFlags kflags = gset->kextra->flags; unsigned int bwidthOld; const char *oldNameB; const char *ptrName; getVectorTypeName(gset->kextra->dtype, parTile->vecLen, NULL, &ptrName); kgenPrintf(ctx, "lB.%s = tmpB;\n", ptrName); kgenAddBarrier(ctx, CLK_LOCAL_MEM_FENCE); if (!isMatrixAccessColMaj(CLBLAS_TRSM, kflags, MATRIX_B)) { sprintf(tmp, "%s(lB, uB, gid * %lu, k0, ldb);\n", copy2LDSFuncName, gset->subdims[0].x); } else { sprintf(tmp, "%s(lB, uB, k0, gid * %lu, ldb);\n", copy2LDSFuncName, gset->subdims[0].x); } kgenAddStmt(ctx, tmp); kgenAddBarrier(ctx, CLK_LOCAL_MEM_FENCE); kgenAddBlankLine(ctx); kgenAddStmt(ctx, "lB = lBMain;\n\n"); mulOpts->memB = CLMEM_LOCAL_MEMORY; oldNameB = gset->varNames.B; bwidthOld = (unsigned int)gset->subdims[0].bwidth; gset->varNames.B = "lB"; gset->subdims[0].bwidth = (parTile->trans) ? parTile->nrRows : parTile->nrCols; tileMulGen(ctx, gset, mulOpts); gset->varNames.B = oldNameB; gset->subdims[0].bwidth = bwidthOld; mulOpts->memB = CLMEM_GLOBAL_MEMORY; }
// Generate complete vector-vector product static void genVecMul( struct KgenContext *ctx, unsigned int m, unsigned int n, const Tile *a, const Tile *b, const Tile *c, bool conjA, bool conjB, TileMulCore core, bool wholeA) { unsigned int k; char tmp[MAX_LENGTH]; Kstring elA, elB, elC; unsigned int vlen = 0; bool isComplex; bool isDouble; isDouble = isDoubleBasedType(c->dtype); isComplex = isComplexType(c->dtype); if ((core == TILEMUL_DOT) && !isComplex) { vlen = commonTileSegmentLen(a, b); } else { vlen = 1; } sprintfTileElement(&elC, c, m, n, 1); if (!wholeA) { m = 0; } for (k = 0; k < a->nrCols; k += vlen) { sprintfTileElement(&elA, a, m, k, vlen); sprintfTileElement(&elB, b, k, n, vlen); /* * Using 'dot' is not valid for complex, and replaced with '*' operator * for unvectorized real data */ if ((core == TILEMUL_DOT) && (vlen > 1)) { sprintf(tmp, "%s += dot(%s, %s);\n", elC.buf, elA.buf, elB.buf); } else if (isComplex) { Kstring expr; sprintfComplexMulUpdate(&expr, &elC, &elA, &elB, &elC, isDouble, conjA, conjB, core); kgenAddStmt(ctx, expr.buf); } else { genRealMulUpdate(ctx, &elA, &elB, &elC, c->trans, core); } } }
/* * Setup coordinates before beginning a trsm stage * A caller must ensure the strict stage sequence: * BLOCK_UPDATE -> TILE_UPDATE */ static void genSetupCoords( struct KgenContext *ctx, const BlasGenSettings *gset, enum TrsmStage stage) { char tmp[1024]; KernelExtraFlags kflags = gset->kextra->flags; const SubproblemDim *dims = gset->subdims; unsigned int l1Pans = (unsigned int)(dims[0].x / dims[1].x); const char *s; s = isMatrixUpper(kflags) ? "currM" : "m0"; sprintf(tmp, "coordA = %s + (lid / %u * %lu);\n", s, l1Pans, dims[1].y); kgenAddStmt(ctx, tmp); switch (stage) { case BLOCK_UPDATE: if (isMatrixUpper(kflags)) { sprintf(tmp, "k0 = currM + %lu;\n", dims[0].y); } else { sprintf(tmp, "k0 = 0;\n"); } break; case TILE_UPDATE: if (isMatrixUpper(kflags)) { sprintf(tmp, "k0 = currM + %lu - m1 * %lu;\n", dims[0].y - dims[1].y, dims[1].y); } else { sprintf(tmp, "k0 = m0 + m1 * %lu;\n", dims[1].y); } break; } kgenAddStmt(ctx, tmp); sprintf(tmp, "coordB = gid * %lu + (lid %% %u * %lu);\n", dims[0].x, l1Pans, dims[1].x); kgenAddStmt(ctx, tmp); kgenAddBlankLine(ctx); }
static int genPostFetchVertDiag( struct KgenContext *ctx, MatrixRole mrole, void *priv) { TilePostFetchPrivate *pfPriv = ((struct symvPrivate *)priv)->pfPriv; TileMulOpts *mulOpts = ((struct symvPrivate *)priv)->mulOpts; Tile *tile = (Tile *)&pfPriv->gset->tileA; bool diag = ((struct symvPrivate *)priv)->diag; char tmp[1024], tmp1[128] = ""; char stmtStr[2][128]; size_t blockx, blocky; unsigned int x, y; const struct SubproblemDim *dims = &pfPriv->gset->subdims[1]; (void)mrole; blockx = blocky = 0; // zero triangular part of tile a // either single row of tile a either the whole tile have been fetched if (!diag) { blocky = dims->bwidth; blockx = dims->y; } else { blocky = dims->y; blockx = dims->bwidth; } // loop through block rows for(y = 0; y < blocky; y++) { // loop through all elements of block row for(x = 0; x < blockx; x++) { Kstring kstr[3]; const char *cmp = diag ? ">=" : ">"; const char *name = diag ? "k" : "coordA"; if (diag) { sprintfTileElement(&kstr[0], tile, y, x, 1); } else { sprintfTileElement(&kstr[0], tile, x, y, 1); } genAdd(stmtStr[0], x); genAdd(stmtStr[1], y); if (mulOpts->flags & TILEMUL_SKEW_B) { sprintf(tmp1, "Ktail <= %i || ", y); } sprintf(tmp, "%s = %s%s%s %s n%s ? 0 : %s;\n", kstr[0].buf, tmp1, name, stmtStr[0], cmp, stmtStr[1], kstr[0].buf); kgenAddStmt(ctx, tmp); } pfPriv->fetchNumA++; } return 0; }
/* * Add statement setting initial local pointer for the work item * * @ld: lead dimension for the local block in float words; * if it's zero, the "ld" argument of a generated function is * used instead */ static void addSettingPtrCode( struct KgenContext *ctx, const char *ptrName, size_t ld, bool transpose, const PGranularity *pgran, GenPriv *gpriv) { char tmp[4096]; const char *vfield; const SubproblemDim *dim = gpriv->dim; const ItemWork *work = gpriv->work; size_t gsize; vfield = dtypeUPtrField(gpriv->dtype); gsize = pgran->wgSize[0] * pgran->wgSize[1]; if (ld) { // offset between two rows and two elements in each row size_t roff, eoff; if (transpose) { roff = 1; eoff = ld; } else { roff = ld; eoff = 1; } if (dim->y < gsize) { sprintf(tmp, "%s.%s += (%s / %u) * %lu + (%s %% %u * %lu) * %lu;\n", ptrName, vfield, lidVarName, work->itemsPerRow, roff, lidVarName, work->itemsPerRow, work->nrCols, eoff); } else { sprintf(tmp, "%s.%s += %s * %lu * %lu;\n", ptrName, vfield, lidVarName, work->nrRows, roff); } } else { if (dim->y < gsize) { sprintf(tmp, "%s.%s += (startRow + %s / %u) * %s + " "startCol + %s %% %u * %lu;\n", ptrName, vfield, lidVarName, work->itemsPerRow, gpriv->globLDName, lidVarName, work->itemsPerRow, work->nrCols); } else { sprintf(tmp, "%s.%s += (startRow + %s * %lu) * %s + startCol;\n", ptrName, vfield, lidVarName, work->nrRows, gpriv->globLDName); } } kgenAddStmt(ctx, tmp); kgenAddBlankLine(ctx); }
static int genPostFetchDiag( struct KgenContext *ctx, MatrixRole mrole, void *priv) { TilePostFetchPrivate *pfPriv = ((struct symvPrivate *)priv)->pfPriv; Tile *tile = (Tile *)&pfPriv->gset->tileA; bool diag = ((struct symvPrivate *)priv)->diag; bool tra = ((struct symvPrivate *)priv)->coord; char tmp[1024]; char stmtStr[2][128]; const KernelVarNames *vnames = &pfPriv->gset->varNames; const char *coord = tra ? vnames->coordA : vnames->k; size_t blockx, blocky; unsigned int x, y; const struct SubproblemDim *dims = &pfPriv->gset->subdims[1]; (void)mrole; blockx = blocky = 0; // zero triangular part of tile a // either single row of tile a either the whole tile have been fetched if (tra) { blocky = dims->bwidth; blockx = dims->y; } else { blocky = dims->y; blockx = dims->bwidth; } // loop through block rows for(y = 0; y < blocky; y++) { // loop through all elements of block row for(x = 0; x < blockx; x++) { Kstring kstr[3]; const char *cmp = diag ? ">=" : ">"; if (diag) { sprintfTileElement(&kstr[0], tile, x, y, 1); } else { sprintfTileElement(&kstr[0], tile, y, x, 1); } genAdd(stmtStr[0], x); genAdd(stmtStr[1], y); sprintf(tmp, "%s = Ktail <= %i || %s%s %s n%s ? 0 : %s;\n", kstr[0].buf, y, coord, stmtStr[0], cmp, stmtStr[1], kstr[0].buf); kgenAddStmt(ctx, tmp); } pfPriv->fetchNumA++; } return 0; }
int f4zeroBlockGen( struct KgenContext *ctx, const SubproblemDim *dim, const PGranularity *pgran, const char *memPrefix) { char tmp[1024]; ItemWork work; LoopCtl loopCtl; GenPriv priv; char pref; LoopUnrollers unrollers; if (!strcmp(memPrefix, "__local")) { pref = 'l'; } else if (!strcmp(memPrefix, "__global")) { pref = 'g'; } else { return -EINVAL; } if (dim->y != 1) { return -EINVAL; } memset(&loopCtl, 0, sizeof(loopCtl)); memset(&unrollers, 0, sizeof(unrollers)); memset(&priv, 0, sizeof(GenPriv)); initGenPriv(&priv, TYPE_COMPLEX_DOUBLE, FLOAT4_VECLEN * sizeof(cl_float), dim, 0, (const ItemWork*)&work, pgran); getItemWork(&work, dim, pgran, priv.nfloats, priv.vecLen); sprintf(tmp, f4zeroDecl, pref, dim->x, memPrefix); kgenDeclareFunction(ctx, tmp); kgenBeginFuncBody(ctx); // declare local ID variable and set data offset kgenDeclareLocalID(ctx, lidVarName, pgran); sprintf(tmp, "\ndata += %s * %lu;\n\n", lidVarName, work.nrCols); kgenAddStmt(ctx, tmp); unrollers.genSingle = f4zeroSingle; loopCtl.inBound = (unsigned int)work.nrCols; unrollers.getVecLen = getVecLen; kgenLoopUnroll(ctx, &loopCtl, TYPE_COMPLEX_DOUBLE, &unrollers, &priv); if (work.tail) { addTailCode(ctx, &priv, NULL, f4zeroSingle); } return kgenEndFuncBody(ctx); }
static int genPostFetchMirror( struct KgenContext *ctx, MatrixRole mrole, void *priv) { TilePostFetchPrivate *pfPriv = ((struct symvPrivate *)priv)->pfPriv; TileMulOpts *mulOpts = ((struct symvPrivate *)priv)->mulOpts; Tile *tileb = (Tile *)&pfPriv->gset->tileA; Tile *tilea = &((struct symvPrivate *)priv)->tilea; bool tra = ((mulOpts->flags & TILEMUL_TRA) != 0); char tmp[1024]; char stmtStr[2][128]; size_t blockx, blocky; unsigned int x, y; const struct SubproblemDim *dims = &pfPriv->gset->subdims[1]; (void)mrole; blockx = blocky = 0; // zero triangular part of tile a // either single row of tile a either the whole tile have been fetched if (tra) { blocky = dims->bwidth; blockx = dims->y; } else { blocky = dims->y; blockx = dims->bwidth; } // loop through block rows for(y = 0; y < blocky; y++) { // loop through all elements of block row for(x = 0; x < blockx; x++) { Kstring kstr[3]; const char *cmp = ">"; sprintfTileElement(&kstr[0], tileb, x, y, 1); sprintfTileElement(&kstr[1], tileb, y, x, 1); sprintfTileElement(&kstr[2], tilea, y, x, 1); genAdd(stmtStr[0], x); genAdd(stmtStr[1], y); sprintf(tmp, "%s = k%s %s n%s ? %s : %s;\n", kstr[2].buf, stmtStr[0], cmp, stmtStr[1], kstr[0].buf, kstr[1].buf); kgenAddStmt(ctx, tmp); } pfPriv->fetchNumA++; } *tileb = *tilea; return 0; }
int genMulTiles( struct KgenContext *ctx, const BlasGenSettings *gset, const TileMulOpts *mulOpts) { char s[32]; const CLBLASKernExtra *kextra = gset->kextra; const char *tNameIn; unsigned int i; unsigned int iend; bool tra = ((mulOpts->flags & TILEMUL_TRA) != 0); bool trb = ((mulOpts->flags & TILEMUL_TRB) != 0); TileMulCore core; int ret; ret = checkInput(gset, mulOpts); if (ret) { return ret; } getVectorTypeName(kextra->dtype, kextra->vecLen, &tNameIn, NULL); core = checkReplaceCore(gset, mulOpts->core, tra, trb); if (((core == TILEMUL_MULADD || isComplexType(kextra->dtype)) && !tra && trb)) { sprintf(s,"%s sum;\n", tNameIn); kgenAddStmt(ctx, s); } iend = (unsigned int)((mulOpts->flags & TILEMUL_TRA) ? gset->subdims[1].bwidth : gset->subdims[1].y); for (i = 0; i < iend; i++) { genMulLineOnTile(ctx, gset, mulOpts, i, true); } // just to get state ret = kgenAddStmt(ctx, NULL); return (ret) ? -EOVERFLOW : 0; }
static int copyImgVec(struct KgenContext *ctx, void *priv) { char tmp[1024]; GenPriv *gpriv = (GenPriv*)priv; dtypeUPtrField(gpriv->dtype); sprintf(tmp, "write_imageui(%s, (int2)(%s++,%s), as_uint4(*%s.f4v++));\n", gpriv->dstName, gpriv->imgXName, gpriv->imgYName, gpriv->srcName); return kgenAddStmt(ctx, tmp); }
static int copyMemPostUnroll(struct KgenContext *ctx, void *priv) { char tmp[1024]; const char *s[2] = {"src", "dst"}; GenPriv *gpriv = (GenPriv*)priv; int gdir; const char *vfield; gdir = (gpriv->dir == DBLOCK_GLOBAL_TO_LOCAL) ? 0 : 1; if (gpriv->work && gpriv->work->tail) { addCopyTailCode(ctx, gpriv); } if (!gpriv->transp) { kgenAddBlankLine(ctx); } // modify pointers vfield = dtypeUPtrField(gpriv->dtype); sprintf(tmp, "%s.%s += %s;\n", s[gdir], vfield, gpriv->globLDName); kgenAddStmt(ctx, tmp); if (gpriv->transp) { sprintf(tmp, "%s.%s++;\n", s[1 - gdir], vfield); } else { if (gpriv->locLDName) { sprintf(tmp, "%s.%s += %s;\n", s[1 - gdir], vfield, gpriv->locLDName); } else { sprintf(tmp, "%s.%s += %lu;\n", s[1 - gdir], vfield, gpriv->lmemLD); } } return kgenAddStmt(ctx, tmp); }
void genSetZeroInTile( struct KgenContext *ctx, const Tile *tile, unsigned int row, unsigned int col, unsigned int len) { char tmp[1024]; Kstring elem; sprintfTileElement(&elem, tile, row, col, len); sprintf(tmp, "%s = 0;\n", elem.buf); kgenAddStmt(ctx, tmp); }
void genSetUnitInTile( struct KgenContext *ctx, const Tile *tile, unsigned int row, unsigned int col) { char tmp[1024]; Kstring elem; const char *s; sprintfTileElement(&elem, tile, row, col, 1); s = strOne(tile->dtype); sprintf(tmp, "%s = %s;\n", elem.buf, s); kgenAddStmt(ctx, tmp); }
static int copyImgPreUnroll(struct KgenContext *ctx, void *priv) { char tmp[1024]; GenPriv *gpriv = (GenPriv*)priv; if (gpriv->packed) { sprintf(tmp, "%s = startX + (index * %lu) %% pLine / %u;\n" "%s = startY + (index * %lu) / pLine;\n" "%s = src;\n\n", gpriv->imgXName, gpriv->dim->x, FLOAT4_VECLEN / gpriv->nfloats, gpriv->imgYName, gpriv->dim->x, gpriv->srcName); } else { sprintf(tmp, "%s = x;\n" "%s = y;\n" "%s = src;\n\n", gpriv->imgXName, gpriv->imgYName, gpriv->srcName); } return kgenAddStmt(ctx, tmp); }
static void genInitCurrM( struct KgenContext *ctx, const SubproblemDim *dim, KernelExtraFlags kflags) { char tmp[1024]; if (isMatrixUpper(kflags)) { strcpy(tmp, "currM = 0;\n"); } else { sprintf(tmp, "currM = (M - 1) / %lu * %lu;\n", dim->y, dim->y); } kgenAddStmt(ctx, tmp); kgenAddBlankLine(ctx); }