// // FIXME: Report correct return value - Needs change in KPRINTF // static ssize_t generator( char *buf, size_t buflen, const struct SubproblemDim *subdims, const struct PGranularity *pgran, void *extra) { DUMMY_ARGS_USAGE_2(subdims, pgran); CLBLASKernExtra *extraFlags = ( CLBLASKernExtra *)extra; char tempTemplate[32*1024]; if ( buf == NULL) // return buffer size { buflen = (32 * 1024 * sizeof(char)); return (ssize_t)buflen; } #ifdef DEBUG_ROTMG printf("dataType : %c\n", Prefix[extraFlags->dtype]); #endif strcpy( tempTemplate, (char*)rotmg_kernel ); kprintf kobj( Prefix[extraFlags->dtype], 1, false, false); kobj.spit((char*)buf, tempTemplate); return (32 * 1024 * sizeof(char)); }
BencodeObject* BencodeObject::setValueForKey(const char* key, BencodeObject* val) { if (_type != BencodeTypeDictionary) { return NULL; } removeValueForKey(key); BencodeObject kobj(key, BencodeModeCopy); return const_cast<BencodeObject*>(&_dictValue->insert(BencodeDictStorage::value_type(kobj, *val)).first->second); }
// // FIXME: Report correct return value - Needs change in KPRINTF // static ssize_t generator( char *buf, size_t buflen, const struct SubproblemDim *subdims, const struct PGranularity *pgran, void *extra) { DUMMY_ARG_USAGE(subdims); size_t BLOCKSIZE = pgran->wgSize[0]; char tempTemplate[32*1024]; if ( buf == NULL) // return buffer size { buflen = (32 * 1024 * sizeof(char)); return (ssize_t)buflen; } CLBLASKernExtra *extraFlags = ( CLBLASKernExtra *)extra; #ifdef DEBUG_ASUM printf("ASUM GENERATOR called....\n"); printf("dataType : %c\n", Prefix[extraFlags->dtype]); #endif unsigned int vecLenA = extraFlags->vecLenA; #ifdef DEBUG_ASUM printf("Vector length used : %d\n\n", vecLenA); #endif bool doVLOAD = false; if( extraFlags->flags & KEXTRA_NO_COPY_VEC_A ) { doVLOAD = true; #ifdef DEBUG_ASUM printf("DOing VLOAD as Aligned Data Pointer not Availabe\n"); #endif } else { #ifdef DEBUG_ASUM printf("Using Aligned Data Pointer \n"); #endif } strcpy( tempTemplate, (char*)asum_kernel ); kprintf kobj( Prefix[extraFlags->dtype], vecLenA, doVLOAD, doVLOAD, BLOCKSIZE); kobj.spit((char*)buf, tempTemplate); return (32 * 1024 * sizeof(char)); }
// // FIXME: Report correct return value - Needs change in KPRINTF // static ssize_t generator( char *buf, size_t buflen, const struct SubproblemDim *subdims, const struct PGranularity *pgran, void *extra) { size_t BLOCKSIZE = pgran->wgSize[0]; char tempTemplate[32*1024]; SolutionStep *step = container_of(subdims, subdims, SolutionStep); if ( buf == NULL) // return buffer size { buflen = (32 * 1024 * sizeof(char)); return (ssize_t)buflen; } CLBLASKernExtra *extraFlags = ( CLBLASKernExtra *)extra; unsigned int vecLenA = extraFlags->vecLenA; bool doVLOAD = false; if( extraFlags->flags & KEXTRA_NO_COPY_VEC_A ) { doVLOAD = true; } const char *kernName; if(step->args.redctnType == REDUCE_BY_HYPOT) { kernName = nrm2_hypot_kernel; } else if (step->args.redctnType == REDUCE_BY_SSQ) { kernName = nrm2_ssq_kernel; } else { printf(" Error in selecting kernel!\n"); return 0; } strcpy( tempTemplate, kernName ); kprintf kobj( Prefix[extraFlags->dtype], vecLenA, doVLOAD, doVLOAD, BLOCKSIZE); kobj.spit((char*)buf, tempTemplate); return (32 * 1024 * sizeof(char)); }
static ssize_t generator( char *buf, size_t buflen, const struct SubproblemDim *subdims, const struct PGranularity *pgran, void *extra) { CLBLASKernExtra *kextra = (CLBLASKernExtra*)extra; KernelExtraFlags kflags = kextra->flags; DataType dtype = kextra->dtype; char tempTemplate[32*1024]; char itemx[10], itemy[10], width[10], itemy_by_width[10]; size_t Y, X, BLOCKSIZE, ITEMX, ITEMY; if (buf == NULL) { buflen = 32*1024*sizeof(char); return (ssize_t)buflen; } // // Row-major is implemented in terms of column major routines // if ((kflags & KEXTRA_COLUMN_MAJOR) == 0) { return 0; } kprintf kobj(Prefix[dtype], kextra->vecLenA, true, true); BLOCKSIZE = pgran->wgSize[0]; #ifdef DEBUG_SYMM printf("SYMM- generator(): Blocksize passed = %lu, subdimy = %lu, subdimx = %lu, veclen = %lu \n", BLOCKSIZE, subdims->y, subdims->x, kextra->vecLenA); #endif Y = 16; while (Y*(kextra->vecLenA) > subdims->y) { Y /= 2; } X = BLOCKSIZE/Y; ITEMY = (subdims->y) / Y; ITEMX = (subdims->x) / X; if (ITEMX == 0) { ITEMX = 1; } if ((BLOCKSIZE % Y) || ((subdims->y) % Y) || ((subdims->x)%X) || (ITEMY % kextra->vecLenA)) { printf("WARNING: SYMM- generator: subdim and blocksize in-compatible.\n"); } sprintf(width, "%" SPREFIX "u", Y); sprintf(itemy, "%" SPREFIX "u", ITEMY); sprintf(itemx, "%" SPREFIX "u", ITEMX); sprintf(itemy_by_width, "%" SPREFIX "u", (size_t) ITEMY/kextra->vecLenA); kobj.put("%WIDTH", width); kobj.put("%ITEMX", itemx); kobj.put("%ITEMY", itemy); kobj.put("%ITEMY_BY_V", itemy_by_width); #ifdef DEBUG_SYMM printf("ColMajor SYMM - WIDTH = %s, ITEMX = %s, ITEMY = %s\n", width, itemx, itemy); #endif strcpy(tempTemplate, SYMM_C_KERNEL); kobj.spit(buf, tempTemplate); #ifdef DEBUG_SYMM printf("Kernel = \n%s\n", buf); #endif size_t tail = strlen(buf) + 1; while(tail < 32*1024) { buf[tail++] = 0; } return 32*1024*sizeof(char); }
void BencodeObject::removeValueForKey(const char* key) { BencodeObject kobj(key, BencodeModeCopy); _dictValue->erase(kobj); }
// // FIXME: Report correct return value when "buf" is NULL - Needs change in KPRINTF // FIXME: Return correct return value when "buf" is NON NULL - Needs change in KPRINTF // FIXME: "buflen" check needs to be more accurate. Relies on above changes to KPRINTF // static ssize_t generator( char *buf, size_t buflen, const struct SubproblemDim *subdims, const struct PGranularity *pgran, void *extra) { CLBLASKernExtra *extraFlags = ( CLBLASKernExtra *)extra; unsigned int vecLenA = extraFlags->vecLenA; char tempTemplate[32*1024]; char TARGETROWS_S[10], NLOOPS_S[10], TARGETWIDTH_S[10]; size_t TARGETROWS, NLOOPS, TARGETWIDTH; char TARGETHEIGHT_S[10], BLOCKSIZE_S[10], TRIANGLE_HEIGHT_S[10]; size_t TARGETHEIGHT; bool doVLOAD = false; int BLOCKSIZE = pgran->wgSize[0] * pgran->wgSize[1]; // [1] will always be 1 since we are a 1D implementation if (buf == NULL) // PENDING: Return correct buffer size { return (32 * 1024 * sizeof(char)); } if (buflen > 32*1024) { #ifdef DEBUG_TRSV_GEMV printf("TRSV GEMV: generator(): WARNING: Returning 0 as buflen is > 32K\n"); #endif return 0; } if( extraFlags->flags & KEXTRA_NO_COPY_VEC_A ) { doVLOAD = true; #ifdef DEBUG_TRSV_GEMV printf("DOing VLOAD as Aligned Data Pointer not Availabe\n"); #endif } else { #ifdef DEBUG_TRSV_GEMV printf("Using Aligned Data Pointer .........................\n"); #endif } kprintf kobj( Prefix[extraFlags->dtype], vecLenA, doVLOAD); #ifdef DEBUG_TRSV_GEMV printf("TRSV GEMV GENERATOR called....\n"); #endif clblasUplo uplo = ( extraFlags->flags & KEXTRA_UPPER_TRIANG) ? clblasUpper : clblasLower; clblasOrder order = ( extraFlags->flags & KEXTRA_COLUMN_MAJOR) ? clblasColumnMajor: clblasRowMajor; clblasTranspose trans = (extraFlags->flags & KEXTRA_TRANS_A) ? clblasTrans : (( extraFlags->flags & KEXTRA_CONJUGATE_A) ? clblasConjTrans: clblasNoTrans); bool unit = (((extraFlags->flags) & KEXTRA_UNIT_DIAGONAL) != 0); // unity and doConj handled in setKernelArgs if ( order == clblasRowMajor ) { order = clblasColumnMajor; if ( trans == clblasNoTrans) { trans = clblasTrans; } else if ( trans == clblasTrans ) { trans = clblasNoTrans; } else // clblasConjTrans { trans = clblasNoTrans; } uplo = ( uplo == clblasUpper)? clblasLower : clblasUpper; } // // Check Feasibility and then generate the code. // if ( trans != clblasNoTrans) { if (isTransposeFeasible(subdims->y, BLOCKSIZE, vecLenA, TARGETHEIGHT) == false) { return 0; } sprintf( TARGETHEIGHT_S, "%" SPREFIX "u", TARGETHEIGHT ); sprintf( BLOCKSIZE_S, "%d", BLOCKSIZE ); sprintf( TRIANGLE_HEIGHT_S, "%" SPREFIX "u", subdims->y ); kobj.put("%TARGET_HEIGHT", TARGETHEIGHT_S); kobj.put("%BLOCKSIZE", BLOCKSIZE_S); kobj.put("%TRIANGLE_HEIGHT", TRIANGLE_HEIGHT_S); ( uplo == clblasLower )? (strcpy(tempTemplate, (char*)trsv_CLT_ComputeRectangle_kernel)) : (strcpy(tempTemplate, (char*)trsv_CUT_ComputeRectangle_kernel)); } else // No-Transpose cases... { if (isNoTransposeFeasible(subdims->y, BLOCKSIZE, vecLenA, TARGETROWS, TARGETWIDTH, NLOOPS) == false) { return 0; } sprintf( TARGETROWS_S, "%" SPREFIX "u", TARGETROWS ); sprintf( TARGETWIDTH_S, "%" SPREFIX "u", TARGETWIDTH ); sprintf( NLOOPS_S, "%" SPREFIX "u", NLOOPS ); kobj.put("%TARGET_ROWS", TARGETROWS_S); kobj.put("%TARGET_WIDTH", TARGETWIDTH_S); kobj.put("%NLOOPS", NLOOPS_S); if (unit) { ( uplo == clblasLower )? (strcpy(tempTemplate, (char*)trsv_CL_ComputeRectangle_kernel)) : (strcpy(tempTemplate, (char*)trsv_CU_ComputeRectangle_kernel)); } else { ( uplo == clblasLower )? (strcpy(tempTemplate, (char*)trsv_CL_ComputeRectangle_NonUnity_kernel)) : (strcpy(tempTemplate, (char*)trsv_CU_ComputeRectangle_NonUnity_kernel)); } } #ifdef DEBUG_TRSV_GEMV printf("dataType : %c\n", Prefix[extraFlags->dtype]); #endif // FIXME: VECTORSIZE HARD CODED // FIXME : SetKernelArgs.. sends offa, offx, and lda should be received as uint #ifdef DEBUG_TRSV_GEMV printf("Vector length used : %d\n\n", vecLenA); #endif kobj.spit((char*)buf, tempTemplate); return (32 * 1024 * sizeof(char)); }
// // FIXME: Report correct return value - Needs change in KPRINTF // static ssize_t generator( char *buf, size_t buflen, const struct SubproblemDim *subdims, const struct PGranularity *pgran, void *extra) { int BLOCKSIZE = pgran->wgSize[0]; char tempTemplate[64*1024]; char targetRows[10], blockSize[10]; if ( buf == NULL) // return buffer size { buflen = (64 * 1024 * sizeof(char)); return (ssize_t)buflen; } CLBLASKernExtra *extraFlags = ( CLBLASKernExtra *)extra; #ifdef DEBUG_HER2 printf("HER2 GENERATOR called....\n"); #endif clblasUplo uplo = ( extraFlags->flags & KEXTRA_UPPER_TRIANG) ? clblasUpper : clblasLower; if ((subdims->y % extraFlags->vecLenA) != 0) { printf("WARNING: HER2: generator: TARGETROWS must be divisible by Vector Length\n"); return 0; } size_t TARGETROWS = 0; ( uplo == clblasLower )? (strcpy(tempTemplate, (char*)syr2_her2_CL_kernel)) : (strcpy(tempTemplate, (char*)syr2_her2_CU_kernel)); TARGETROWS = subdims->y; if ((BLOCKSIZE % TARGETROWS) != 0) { printf("WARNING: HER2: generator: Invalid Block Size\n"); return 0; } #ifdef DEBUG_HER2 printf("dataType : %c\n", Prefix[extraFlags->dtype]); #endif // FIXME: VECTORSIZE HARD CODED // FIXME : SetKernelArgs.. sends offa, offx, and lda should be received as uint unsigned int vecLenA = extraFlags->vecLenA; #ifdef DEBUG_HER2 printf("Vector length used : %d\n\n", vecLenA); #endif bool doVLOAD = false; if( extraFlags->flags & KEXTRA_NO_COPY_VEC_A ) { doVLOAD = true; #ifdef DEBUG_HER2 printf("DOing VLOAD as Aligned Data Pointer not Availabe\n"); #endif } else { #ifdef DEBUG_HER2 printf("Using Aligned Data Pointer .........................\n"); #endif } kprintf kobj( Prefix[extraFlags->dtype], vecLenA, doVLOAD, doVLOAD); sprintf( targetRows, "%" SPREFIX "u", TARGETROWS ); sprintf( blockSize, "%d", BLOCKSIZE ); #ifdef DEBUG_HER2 printf("TARGET ROWS = %s\n", targetRows); printf("BLOCK SIZE = %s\n", blockSize); #endif kobj.put("%TARGET_ROWS", (const char *)targetRows); kobj.put("%BLOCKSIZE", (const char *) blockSize); kobj.spit((char*)buf, tempTemplate); return (64 * 1024 * sizeof(char)); // return 0;//(ret < 0) ? -EOVERFLOW : ret; }
// // FIXME: Report correct return value - Needs change in KPRINTF // static ssize_t generator( char *buf, size_t buflen, const struct SubproblemDim *subdims, const struct PGranularity *pgran, void *extra) { size_t BLOCKSIZE = pgran->wgSize[0]; char tempTemplate[32*1024]; char targetRows[10], blockSize[10]; if ( buf == NULL) // return buffer size { buflen = (64 * 1024 * sizeof(char)); return (ssize_t)buflen; } CLBLASKernExtra *extraFlags = ( CLBLASKernExtra *)extra; #ifdef DEBUG_TRMV printf("TRMV GENERATOR called....\n"); #endif if((( extraFlags->flags & KEXTRA_TRANS_A) || ( extraFlags ->flags & KEXTRA_CONJUGATE_A ))) { #ifdef DEBUG_TRMV printf("A is trans or CONJ-TRANS\n"); #endif } else { #ifdef DEBUG_TRMV printf("A is noTrans...\n"); #endif } clblasUplo uplo = ( extraFlags->flags & KEXTRA_UPPER_TRIANG) ? clblasUpper : clblasLower; clblasOrder order = ( extraFlags->flags & KEXTRA_COLUMN_MAJOR) ? clblasColumnMajor: clblasRowMajor; clblasTranspose trans = ( extraFlags->flags & KEXTRA_TRANS_A) ? clblasTrans : (( extraFlags->flags & KEXTRA_CONJUGATE_A) ? clblasConjTrans: clblasNoTrans); // unity and doConj handled in setKernelArgs if ( order == clblasRowMajor ) { order = clblasColumnMajor; if ( trans == clblasNoTrans) { trans = clblasTrans; } else if ( trans == clblasTrans ) { trans = clblasNoTrans; } else // clblasConjTrans { trans = clblasNoTrans; } uplo = ( uplo == clblasUpper)? clblasLower : clblasUpper; } if ((subdims->y % extraFlags->vecLenA) != 0) { printf("WARNING: TRMV: generator: TARGETROWS must be divisible by Vector Length\n"); return 0; } size_t TARGETROWS = 0; if ( trans == clblasNoTrans) { #ifdef DEBUG_TRMV printf("clblasNoTrans....%s\n", ( uplo == clblasLower )?"LOWER":"UPPER"); #endif ( uplo == clblasLower )? (strcpy(tempTemplate, (char*)trmv_CL_kernel)) : (strcpy(tempTemplate, (char*)trmv_CU_kernel)); TARGETROWS = subdims->y; if ((BLOCKSIZE % TARGETROWS) != 0) { printf("WARNING: TRMV: generator: Invalid Block Size\n"); return 0; } } else // Transpose cases... { #ifdef DEBUG_TRMV printf("clblasTrans....%s\n", ( uplo == clblasLower )?"LOWER":"UPPER"); #endif ( uplo == clblasLower )? (strcpy(tempTemplate, (char*)trmv_CLT_kernel)) : (strcpy(tempTemplate, (char*)trmv_CUT_kernel)); if ((BLOCKSIZE % (subdims->y / extraFlags->vecLenA)) != 0) { printf("WARNING: TRMV: generator: Invalid Block Size\n"); return 0; } TARGETROWS = BLOCKSIZE/(subdims->y / extraFlags->vecLenA); } #ifdef DEBUG_TRMV printf("dataType : %c\n", Prefix[extraFlags->dtype]); #endif // FIXME: VECTORSIZE HARD CODED // FIXME : SetKernelArgs.. sends offa, offx, and lda should be received as uint unsigned int vecLenA = extraFlags->vecLenA; #ifdef DEBUG_TRMV printf("Vector length used : %d\n\n", vecLenA); #endif bool doVLOAD = false; if( extraFlags->flags & KEXTRA_NO_COPY_VEC_A ) { doVLOAD = true; #ifdef DEBUG_TRMV printf("DOing VLOAD as Aligned Data Pointer not Availabe\n"); #endif } else { #ifdef DEBUG_TRMV printf("Using Aligned Data Pointer .........................\n"); #endif } kprintf kobj( Prefix[extraFlags->dtype], vecLenA, doVLOAD); sprintf( targetRows, "%" SPREFIX "u", TARGETROWS ); sprintf( blockSize, "%" SPREFIX "u", BLOCKSIZE ); #ifdef DEBUG_TRMV printf("TARGET ROWS = %s\n", targetRows); printf("BLOCK SIZE = %s\n", blockSize); #endif kobj.put("%TARGET_ROWS", (const char *)targetRows); kobj.put("%BLOCKSIZE", (const char *) blockSize); kobj.spit((char*)buf, tempTemplate); return (64 * 1024 * sizeof(char)); // return 0;//(ret < 0) ? -EOVERFLOW : ret; }
int main(int argc, char** argv) { int ppw=10; // Point per wavelength std::string filename="kitenormcond10.txt"; std::vector<double> freqs; freqs.push_back(5); freqs.push_back(10); freqs.push_back(20); freqs.push_back(40); freqs.push_back(80); freqs.push_back(160); freqs.push_back(320); freqs.push_back(640); std::vector<double> norm_sl(freqs.size()); std::vector<double> norm_dl(freqs.size()); std::vector<double> norm_combined1(freqs.size()); std::vector<double> norm_combined2(freqs.size()); std::vector<double> cond_sl(freqs.size()); std::vector<double> cond_dl(freqs.size()); std::vector<double> cond_combined1(freqs.size()); std::vector<double> cond_combined2(freqs.size()); clock_t start, finish; double time; start=clock(); #ifdef BEM2DMPI MPI_Init(&argc, &argv); int nprow=4; // Number of rows in process grid int npcol=2; // Number of columns in process grid int mb=24; // Row Block size int nb=24; // Column Block size bem2d::BlacsSystem* b=bem2d::BlacsSystem::Initialize(nprow,npcol,mb,nb); // Exit if Context could not be created or process does not belong to context if (!b) { std::cout << "Could not create Blacs context" << std::endl; MPI_Finalize(); exit(1); } if ((b->get_myrow()==-1)&&(b->get_mycol()==-1)) { MPI_Finalize(); exit(0); } #endif for (int j=0; j<freqs.size(); j++) { bem2d::freqtype k= {(double)freqs[j],0}; double eta1=k.re; // Coupling between conj. double and single layer pot. double eta2=cbrt(k.re*k.re); bem2d::pCurve kobj(new bem2d::Kite); int n=(int)(kobj->Length()*k.re*ppw/2.0/bem2d::PI); bem2d::AnalyticCurve kite(n,kobj); bem2d::pGeometry pgeom=kite.GetGeometry(); bem2d::PolBasis::AddBasis(0,pgeom); // Add constant basis functions // Discretize the single and double layer potential bem2d::SingleLayer sl(k); bem2d::ConjDoubleLayer cdl(k); bem2d::DoubleLayer dl(k); bem2d::QuadOption quadopts; quadopts.L=3; quadopts.N=5; quadopts.sigma=0.15; #ifdef BEM2DMPI if (b->IsRoot()) { std::cout << "Discretize Kernels with n=" << n << std::endl; } #else std::cout << "Discretize Kernels with n=" << n << std::endl; #endif bem2d::Matrix dsl=*(DiscreteKernel(*pgeom,quadopts,sl)); bem2d::Matrix ddl=(*DiscreteKernel(*pgeom,quadopts,dl)); bem2d::Matrix dcdl=*(DiscreteKernel(*pgeom,quadopts,cdl)); bem2d::Matrix Id=*(EvalIdent(*pgeom, quadopts)); bem2d::Matrix combined1=Id+2.0*dcdl-bem2d::complex(0,2.0)*eta1*dsl; bem2d::Matrix combined2=Id+2.0*dcdl-bem2d::complex(0,2.0)*eta2*dsl; dsl=2.0*bem2d::ChangeBasis(dsl,Id); ddl=2.0*bem2d::ChangeBasis(ddl,Id); dcdl=2.0*bem2d::ChangeBasis(dcdl,Id); combined1=bem2d::ChangeBasis(combined1,Id); combined2=bem2d::ChangeBasis(combined2,Id); #ifdef BEM2DMPI if (b->IsRoot()) { std::cout << "Compute norms and condition numbers" << std::endl; } #else std::cout << "Compute norms and condition numbers" << std::endl; #endif bem2d::L2NormCond(dsl,norm_sl[j],cond_sl[j]); bem2d::L2NormCond(ddl,norm_dl[j],cond_dl[j]); bem2d::L2NormCond(combined1,norm_combined1[j],cond_combined1[j]); bem2d::L2NormCond(combined2,norm_combined2[j],cond_combined2[j]); } finish=clock(); time=(double(finish)-double(start))/CLOCKS_PER_SEC/60; #ifdef BEM2DMPI if (b->IsRoot()) { #endif std::ofstream out(filename.c_str()); out << "Single Layer" << std::endl; for (int j=0; j<freqs.size(); j++) { out << "k=" << freqs[j] << " Norm: " << norm_sl[j] << " Norm of Inverse: " << cond_sl[j]/norm_sl[j] << " Condition Nr.: " << cond_sl[j] << std::endl; } out << "Double Layer" << std::endl; for (int j=0; j<freqs.size(); j++) { out << "k=" << freqs[j] << " Norm: " << norm_dl[j] << " Norm of Inverse: " << cond_dl[j]/norm_dl[j] << " Condition Nr.: " << cond_dl[j] << std::endl; } out << "Combined Layer eta=k" << std::endl; for (int j=0; j<freqs.size(); j++) { out << "k=" << freqs[j] << " Norm: " << norm_combined1[j] << " Norm of Inverse: " << cond_combined1[j]/norm_combined1[j] << " Condition Nr.: " << cond_combined1[j] << std::endl; } out << "Combined Layer eta=k^(2/3)" << std::endl; for (int j=0; j<freqs.size(); j++) { out << "k=" << freqs[j] << " Norm: " << norm_combined2[j] << " Norm of Inverse: " << cond_combined2[j]/norm_combined2[j] << " Condition Nr.: " << cond_combined2[j] << std::endl; } out << "Overalll time (minutes): " << time << std::endl; std::cout << "Overall time (minutes): " << time << std::endl; out.close(); #ifdef BEM2DMPI } #endif #ifdef BEM2DMPI bem2d::BlacsSystem::Release(); MPI_Finalize(); #endif }
static ssize_t generator( char *buf, size_t buflen, const struct SubproblemDim *subdims, const struct PGranularity *pgran, void *extra) { size_t BLOCKSIZE = pgran->wgSize[0]; size_t H = subdims->x; char tempTemplate[64*1024]; char def_target_rows[10], def_h[10]; SolutionStep *step = container_of( pgran , pgran, SolutionStep); // NOTE: using container_of() to get pigFuncID CLBlasKargs* kargs = (CLBlasKargs*) &(step->args); if ( buf == NULL) // return buffer size { buflen = (64 * 1024 * sizeof(char)); return (ssize_t)buflen; } CLBLASKernExtra *extraFlags = ( CLBLASKernExtra *)extra; //clblasUplo uplo = ( extraFlags->flags & KEXTRA_UPPER_TRIANG) ? clblasUpper : clblasLower; clblasOrder order = ( extraFlags->flags & KEXTRA_COLUMN_MAJOR) ? clblasColumnMajor: clblasRowMajor; clblasTranspose trans = ( extraFlags->flags & KEXTRA_TRANS_A) ? clblasTrans : (( extraFlags->flags & KEXTRA_CONJUGATE_A) ? clblasConjTrans: clblasNoTrans); if ( order == clblasColumnMajor ) { order = clblasRowMajor; if ( trans == clblasNoTrans) { trans = clblasTrans; } else if ( trans == clblasTrans ) { trans = clblasNoTrans; } else // clblasConjTrans { trans = clblasNoTrans; } } if( (kargs->pigFuncID == CLBLAS_SBMV) || (kargs->pigFuncID == CLBLAS_HBMV) ) // Only NT kernel is used { trans = clblasNoTrans; } if ((BLOCKSIZE % H) != 0) { printf("WARNING: GBMV: generator: Invalid Block Size\n"); return 0; } size_t TARGET_ROWS = BLOCKSIZE / H; if ( trans == clblasNoTrans) { strcpy(tempTemplate, (char*)gbmv_RNT_kernel); } else // Transpose cases... { strcpy(tempTemplate, (char*)gbmv_RT_kernel);; } unsigned int vecLenA = extraFlags->vecLenA; bool doVLOAD = false; // Always scalar load for banded matrices kprintf kobj( Prefix[extraFlags->dtype], vecLenA, doVLOAD); sprintf( def_target_rows, "%d", (int)TARGET_ROWS ); sprintf( def_h, "%d", (int)H ); #ifdef DEBUG_GBMV printf("GBMV GENERATOR called....\n"); if((( extraFlags->flags & KEXTRA_TRANS_A) || ( extraFlags ->flags & KEXTRA_CONJUGATE_A ))) { printf("A is trans or CONJ-TRANS\n"); } else { printf("A is noTrans...\n"); } printf("TARGET ROWS = %s\n", def_target_rows); printf("H = %s\n", def_h); printf("dataType : %c\n", Prefix[extraFlags->dtype]); #endif kobj.put("%DEF_H", (const char *)def_h); kobj.put("%DEF_TARGET_ROWS", (const char *)def_target_rows); kobj.spit((char*)buf, tempTemplate); return (64 * 1024 * sizeof(char)); }