DEF_GPUTEST(GLPrograms, reporter, factory) { // Set a locale that would cause shader compilation to fail because of , as decimal separator. // skbug 3330 #ifdef SK_BUILD_FOR_WIN GrAutoLocaleSetter als("sv-SE"); #else GrAutoLocaleSetter als("sv_SE.UTF-8"); #endif // We suppress prints to avoid spew GrContextOptions opts; opts.fSuppressPrints = true; GrContextFactory debugFactory(opts); for (int type = 0; type < GrContextFactory::kLastGLContextType; ++type) { GrContext* context = debugFactory.get(static_cast<GrContextFactory::GLContextType>(type)); if (context) { GrGLGpu* gpu = static_cast<GrGLGpu*>(context->getGpu()); /* * For the time being, we only support the test with desktop GL or for android on * ARM platforms * TODO When we run ES 3.00 GLSL in more places, test again */ int maxStages; if (kGL_GrGLStandard == gpu->glStandard() || kARM_GrGLVendor == gpu->ctxInfo().vendor()) { maxStages = 6; } else if (kTegra3_GrGLRenderer == gpu->ctxInfo().renderer() || kOther_GrGLRenderer == gpu->ctxInfo().renderer()) { maxStages = 1; } else { return; } #if SK_ANGLE // Some long shaders run out of temporary registers in the D3D compiler on ANGLE. if (type == GrContextFactory::kANGLE_GLContextType) { maxStages = 2; } #endif #if SK_COMMAND_BUFFER // Some long shaders run out of temporary registers in the D3D compiler on ANGLE. // TODO(hendrikw): This only needs to happen with the ANGLE comand buffer backend. if (type == GrContextFactory::kCommandBuffer_GLContextType) { maxStages = 2; } #endif GrTestTarget testTarget; context->getTestTarget(&testTarget); REPORTER_ASSERT(reporter, GrDrawingManager::ProgramUnitTest( context, testTarget.target(), maxStages)); } } }
DEF_GPUTEST(GLPrograms, reporter, options) { // Set a locale that would cause shader compilation to fail because of , as decimal separator. // skbug 3330 #ifdef SK_BUILD_FOR_WIN GrAutoLocaleSetter als("sv-SE"); #else GrAutoLocaleSetter als("sv_SE.UTF-8"); #endif // We suppress prints to avoid spew GrContextOptions opts = options; opts.fSuppressPrints = true; sk_gpu_test::GrContextFactory debugFactory(opts); skiatest::RunWithGPUTestContexts(test_glprograms, &skiatest::IsRenderingGLContextType, reporter, opts); }
GrGLProgram* GrGLProgramBuilder::CreateProgram(const GrPipeline& pipeline, const GrPrimitiveProcessor& primProc, GrProgramDesc* desc, GrGLGpu* gpu) { #ifdef SK_DEBUG GrResourceProvider* resourceProvider = gpu->getContext()->contextPriv().resourceProvider(); SkASSERT(!pipeline.isBad() && primProc.instantiate(resourceProvider)); #endif ATRACE_ANDROID_FRAMEWORK("Shader Compile"); GrAutoLocaleSetter als("C"); // create a builder. This will be handed off to effects so they can use it to add // uniforms, varyings, textures, etc GrGLProgramBuilder builder(gpu, pipeline, primProc, desc); auto persistentCache = gpu->getContext()->contextPriv().getPersistentCache(); if (persistentCache && gpu->glCaps().programBinarySupport()) { sk_sp<SkData> key = SkData::MakeWithoutCopy(desc->asKey(), desc->keyLength()); builder.fCached = persistentCache->load(*key); // the eventual end goal is to completely skip emitAndInstallProcs on a cache hit, but it's // doing necessary setup in addition to generating the SkSL code. Currently we are only able // to skip the SkSL->GLSL step on a cache hit. } if (!builder.emitAndInstallProcs()) { builder.cleanupFragmentProcessors(); return nullptr; } return builder.finalize(); }
void print_procedure(dident wdid, vmcode *code) { extern int als(word addr); p_fprintf(current_output_, "\n%s/", DidName(wdid)); p_fprintf(current_output_, "%d:\n", DidArity(wdid)); (void) als((word) code); ec_flush(current_output_); }
GrGLProgram* GrGLProgramBuilder::CreateProgram(const DrawArgs& args, GrGLGpu* gpu) { GrAutoLocaleSetter als("C"); // create a builder. This will be handed off to effects so they can use it to add // uniforms, varyings, textures, etc SkAutoTDelete<GrGLProgramBuilder> builder(CreateProgramBuilder(args, gpu)); GrGLProgramBuilder* pb = builder.get(); // TODO: Once all stages can handle taking a float or vec4 and correctly handling them we can // seed correctly here GrGLSLExpr4 inputColor; GrGLSLExpr4 inputCoverage; if (!pb->emitAndInstallProcs(&inputColor, &inputCoverage)) { return nullptr; } return pb->finalize(); }
UBool BiDiConformanceTest::checkLevels(const UBiDiLevel actualLevels[], int32_t actualCount, const char *paraLevelName) { UBool isOk=TRUE; if(levelsCount!=actualCount) { errln("Wrong number of level values; expected %d actual %d", (int)levelsCount, (int)actualCount); isOk=FALSE; } else { for(int32_t i=0; i<actualCount; ++i) { if(levels[i]!=actualLevels[i] && levels[i]<UBIDI_DEFAULT_LTR) { if(directionBits!=3 && directionBits==getDirectionBits(actualLevels, actualCount)) { // ICU used a shortcut: // Since the text is unidirectional, it did not store the resolved // levels but just returns all levels as the paragraph level 0 or 1. // The reordering result is the same, so this is fine. break; } else { errln("Wrong level value at index %d; expected %d actual %d", (int)i, levels[i], actualLevels[i]); isOk=FALSE; break; } } } } if(!isOk) { printErrorLine(paraLevelName); UnicodeString els("Expected levels: "); int32_t i; for(i=0; i<levelsCount; ++i) { els.append((UChar)0x20).append(printLevel(levels[i])); } UnicodeString als("Actual levels: "); for(i=0; i<actualCount; ++i) { als.append((UChar)0x20).append(printLevel(actualLevels[i])); } errln(els); errln(als); } return isOk; }
GrGLProgram* GrGLProgramBuilder::CreateProgram(const DrawArgs& args, GrGLGpu* gpu) { GrAutoLocaleSetter als("C"); // create a builder. This will be handed off to effects so they can use it to add // uniforms, varyings, textures, etc GrGLProgramBuilder builder(gpu, args); // TODO: Once all stages can handle taking a float or vec4 and correctly handling them we can // seed correctly here GrGLSLExpr4 inputColor; GrGLSLExpr4 inputCoverage; if (!builder.emitAndInstallProcs(&inputColor, &inputCoverage, gpu->glCaps().maxFragmentTextureUnits())) { builder.cleanupFragmentProcessors(); return nullptr; } return builder.finalize(); }
GrGLProgram* GrGLProgramBuilder::CreateProgram(const GrPipeline& pipeline, const GrPrimitiveProcessor& primProc, const GrGLProgramDesc& desc, GrGLGpu* gpu) { GrAutoLocaleSetter als("C"); // create a builder. This will be handed off to effects so they can use it to add // uniforms, varyings, textures, etc GrGLProgramBuilder builder(gpu, pipeline, primProc, desc); // TODO: Once all stages can handle taking a float or vec4 and correctly handling them we can // seed correctly here GrGLSLExpr4 inputColor; GrGLSLExpr4 inputCoverage; if (!builder.emitAndInstallProcs(&inputColor, &inputCoverage)) { builder.cleanupFragmentProcessors(); return nullptr; } return builder.finalize(); }
int main(int argc, char* argv[]) { // Print help if necessary bool help = read_bool(argc, argv, "--help", false); if ((argc < 2) || (help)) { usage(argv); return 0; } // Use parameters struct for passing parameters to kernels efficiently parameters prm; // Parse inputs prm.matDims[0] = read_int(argc, argv, "--m", 2); prm.matDims[1] = read_int(argc, argv, "--k", 2); prm.matDims[2] = read_int(argc, argv, "--n", 2); prm.rank = read_int(argc, argv, "--rank", 7); prm.method = read_string(argc, argv, "--method", (char *)"als"); int maxIters = read_int(argc, argv, "--maxiters", 1000); int maxSecs = read_int(argc, argv, "--maxsecs", 1000); double tol = read_double(argc, argv, "--tol", 1e-8); int printItn = read_int(argc, argv, "--printitn", 0); double printTol = read_double(argc, argv, "--printtol", 1.0); int seed = read_int(argc, argv, "--seed", 0); int numSeeds = read_int(argc, argv, "--numseeds", 1); bool verbose = read_bool(argc, argv, "--verbose", false); prm.rnd_maxVal = read_double(argc,argv,"--maxval",1.0); prm.rnd_pwrOfTwo = read_int(argc,argv,"--pwrof2",0); bool roundFinal = read_bool(argc, argv, "--rndfin",false); prm.alpha = read_double(argc,argv, "--alpha", 0.1); int M = read_int(argc,argv, "--M", 0); if (M) { prm.M[0] = M; prm.M[1] = M; prm.M[2] = M; } else { prm.M[0] = read_int(argc, argv, "--M0", -1); prm.M[1] = read_int(argc, argv, "--M1", -1); prm.M[2] = read_int(argc, argv, "--M2", -1); } char * infile = read_string(argc, argv, "--input", NULL); char * outfile = read_string(argc, argv, "--output", NULL); if (verbose) { setbuf(stdout, NULL); printf("\n\n---------------------------------------------------------\n"); printf("PARAMETERS\n"); printf("dimensions = %d %d %d\n",prm.matDims[0],prm.matDims[1],prm.matDims[2]); printf("rank = %d\n",prm.rank); printf("method = %s\n",prm.method); if (infile) printf("input = %s\n",infile); else { if (numSeeds == 1) printf("input = seed %d\n",seed); else printf("inputs = seeds %d-%d\n",seed,seed+numSeeds-1); } if (outfile) printf("output = %s\n",outfile); else printf("output = none\n"); if (!strcmp(prm.method,"als")) { printf("tol = %1.2e\n",tol); printf("alpha = %1.2e\n",prm.alpha); printf("maval = %1.2e\n",prm.rnd_maxVal); printf("M's = (%d,%d,%d)\n",prm.M[0],prm.M[1],prm.M[2]); printf("maxiters = %d\n",maxIters); printf("maxsecs = %d\n",maxSecs); printf("printitn = %d\n",printItn); printf("printtol = %1.2e\n",printTol); } printf("---------------------------------------------------------\n"); } // Initialize other variables int i, j, k, numIters, mkn, tidx[3]; double err, errOld, errChange = 0.0, start_als, start_search, elapsed, threshold; // Compute tensor dimensions prm.dims[0] = prm.matDims[0]*prm.matDims[1]; prm.dims[1] = prm.matDims[1]*prm.matDims[2]; prm.dims[2] = prm.matDims[0]*prm.matDims[2]; // Compute tensor's nnz, total number of entries, and Frobenius norm mkn = prm.matDims[0]*prm.matDims[1]*prm.matDims[2]; prm.mkn2 = mkn*mkn; prm.xNorm = sqrt(mkn); // Compute number of columns in matricized tensors for (i = 0; i < 3; i++) prm.mtCols[i] = prm.mkn2 / prm.dims[i]; // Construct three matricizations of matmul tensor prm.X = (double**) malloc( 3 * sizeof(double*) ); for (i = 0; i < 3; i++) prm.X[i] = (double*) calloc( prm.mkn2, sizeof(double) ); for (int mm = 0; mm < prm.matDims[0]; mm++) for (int kk = 0; kk < prm.matDims[1]; kk++) for (int nn = 0; nn < prm.matDims[2]; nn++) { tidx[0] = mm + kk*prm.matDims[0]; tidx[1] = kk + nn*prm.matDims[1]; tidx[2] = mm + nn*prm.matDims[0]; prm.X[0][tidx[0]+prm.dims[0]*(tidx[1]+prm.dims[1]*tidx[2])] = 1; prm.X[1][tidx[1]+prm.dims[1]*(tidx[0]+prm.dims[0]*tidx[2])] = 1; prm.X[2][tidx[2]+prm.dims[2]*(tidx[0]+prm.dims[0]*tidx[1])] = 1; } // Allocate factor weights and matrices: working, initial, and model prm.lambda = (double*) malloc( prm.rank * sizeof(double) ); prm.U = (double**) malloc( 3 * sizeof(double*) ); double** U0 = (double**) malloc( 3 * sizeof(double*) ); prm.model = (double**) malloc( 3 * sizeof(double*) ); for (i = 0; i < 3; i++) { prm.U[i] = (double*) calloc( prm.mkn2, sizeof(double) ); U0[i] = (double*) calloc( prm.dims[i]*prm.rank, sizeof(double) ); prm.model[i] = (double*) calloc( prm.dims[i]*prm.rank, sizeof(double) ); } // Allocate coefficient matrix within ALS (Khatri-Rao product) int maxMatDim = prm.matDims[0]; if (maxMatDim < prm.matDims[1]) maxMatDim = prm.matDims[1]; if (maxMatDim < prm.matDims[2]) maxMatDim = prm.matDims[2]; prm.A = (double*) malloc( maxMatDim*mkn*prm.rank * sizeof(double) ); // Allocate workspaces prm.tau = (double*) malloc( mkn * sizeof(double) ); prm.lwork = maxMatDim*mkn*prm.rank; prm.work = (double*) malloc( prm.lwork * sizeof(double) ); prm.iwork = (int*) malloc( prm.mkn2 * sizeof(int) ); // Allocate matrices for normal equations int maxDim = prm.dims[0]; if (maxDim < prm.dims[1]) maxDim = prm.dims[1]; if (maxDim < prm.dims[2]) maxDim = prm.dims[2]; prm.NE_coeff = (double*) malloc( prm.rank*prm.rank * sizeof(double) ); prm.NE_rhs = (double*) malloc( maxDim*prm.rank * sizeof(double) ); prm.residual = (double*) malloc( prm.mkn2 * sizeof(double) ); //-------------------------------------------------- // Search Loop //-------------------------------------------------- int mySeed = seed, numGoodSeeds = 0, statusCnt = 0, status = 1; start_search = wall_time(); for (int seed_cnt = 0; seed_cnt < numSeeds; ++seed_cnt) { // Set starting point from random seed (match Matlab Tensor Toolbox) RandomMT cRMT(mySeed); for (i = 0; i < 3; i++) for (j = 0; j < prm.dims[i]; j++) for (k = 0; k < prm.rank; k++) U0[i][j+k*prm.dims[i]] = cRMT.genMatlabMT(); for (i = 0; i < prm.rank; i++) prm.lambda[i] = 1.0; // Copy starting point for (i = 0; i < 3; i++) cblas_dcopy(prm.dims[i]*prm.rank,U0[i],1,prm.U[i],1); // read from file if input is given if( infile ) read_input( infile, prm ); if (verbose) { printf("\nSTARTING POINT...\n"); for (i = 0; i < 3; i++) { printf("Factor matrix %d:\n",i); print_matrix(prm.U[i],prm.dims[i],prm.rank,prm.dims[i]); } printf("\n"); } //-------------------------------------------------- // Main ALS Loop //-------------------------------------------------- start_als = wall_time(); err = 1.0; threshold = 1e-4; for (numIters = 0; numIters < maxIters && (wall_time()-start_als) < maxSecs; numIters++) { errOld = err; if (!strcmp(prm.method,"als")) { // Perform an iteration of ALS using NE with Smirnov's penalty term err = als( prm ); } else if (!strcmp(prm.method,"sparsify")) { // print stats before sparsifying printf("Old residual: %1.2e\n",compute_residual(prm,2,true)); printf("Old nnz (larger than %1.1e): %d %d %d\n", threshold, nnz(prm.U[0],prm.dims[0]*prm.rank,threshold), nnz(prm.U[1],prm.dims[1]*prm.rank,threshold), nnz(prm.U[2],prm.dims[2]*prm.rank,threshold) ); // sparsify and return printf("\nSparsifying...\n\n"); sparsify( prm ); numIters = maxIters; // print stats after sparsifying printf("New residual: %1.2e\n",compute_residual(prm,2,true)); printf("New nnz (larger than %1.1e): %d %d %d\n", threshold, nnz(prm.U[0],prm.dims[0]*prm.rank,threshold), nnz(prm.U[1],prm.dims[1]*prm.rank,threshold), nnz(prm.U[2],prm.dims[2]*prm.rank,threshold) ); } else if (!strcmp(prm.method,"round")) { // print stats before rounding printf("Old residual: %1.2e\n",compute_residual(prm,2,true)); printf("Old nnz (larger than %1.1e): %d %d %d\n", threshold, nnz(prm.U[0],prm.dims[0]*prm.rank,threshold), nnz(prm.U[1],prm.dims[1]*prm.rank,threshold), nnz(prm.U[2],prm.dims[2]*prm.rank,threshold) ); // round and return for (i = 0; i < 3; i++) { capping(prm.U[i],prm.dims[i]*prm.rank,prm.rnd_maxVal); rounding(prm.U[i],prm.dims[i]*prm.rank,prm.rnd_pwrOfTwo); } numIters = maxIters; // print stats after rounding printf("New residual: %1.2e\n",compute_residual(prm,2,true)); printf("New nnz (larger than %1.1e): %d %d %d\n", threshold, nnz(prm.U[0],prm.dims[0]*prm.rank,threshold), nnz(prm.U[1],prm.dims[1]*prm.rank,threshold), nnz(prm.U[2],prm.dims[2]*prm.rank,threshold) ); } else die("Invalid method\n"); // Compute change in relative residual norm errChange = fabs(err - errOld); // Print info at current iteration if ((printItn > 0) && (((numIters + 1) % printItn) == 0)) { // print info printf ("Iter %d: residual = %1.5e change = %1.5e\n", numIters + 1, err, errChange); } // Check for convergence if ( numIters > 0 && errChange < tol ) break; } // If rounding, round final solution and re-compute residual if(roundFinal) { // normalize columns in A and B factors, put arbitrary weights into C normalize_model( prm, 2 ); // cap large values and round to nearest power of 2 for (i = 0; i < 3; i++) { capping(prm.U[i],prm.dims[i]*prm.rank,prm.rnd_maxVal); rounding(prm.U[i],prm.dims[i]*prm.rank,prm.rnd_pwrOfTwo); } err = compute_residual(prm,0,true); } // Print status if searching over many seeds statusCnt++; if (numSeeds > 1000 && statusCnt == numSeeds/10) { printf("...%d%% complete...\n",10*status); status++; statusCnt = 0; } // Print final info elapsed = wall_time() - start_als; if ((printItn > 0 || verbose) && !strcmp(prm.method,"als")) { if (infile) printf("\nInput %s ",infile); else printf("\nInitial seed %d ",mySeed); printf("achieved residual %1.3e in %d iterations and %1.3e seconds\n \t final residual change: %1.3e\n \t average time per iteration: %1.3e s\n", err, numIters, elapsed, errChange, elapsed/numIters); } if (verbose) { printf("\nSOLUTION...\n"); for (i = 0; i < 3; i++) { printf("Factor matrix %d:\n",i); if (roundFinal || !strcmp(prm.method,"round")) print_int_matrix(prm.U[i], prm.dims[i], prm.rank, prm.dims[i], prm.rnd_pwrOfTwo); else print_matrix(prm.U[i],prm.dims[i],prm.rank,prm.dims[i]); } if (err < printTol) numGoodSeeds++; } else if (err < printTol) { numGoodSeeds++; printf("\n\n***************************************\n"); if (infile) printf("Input %s: ",infile); else printf("Initial seed %d: ",mySeed); printf("after %d iterations, achieved residual %1.3e with final residual change of %1.3e\n", numIters, err, errChange); if (roundFinal) { for (i = 0; i < 3; i++) { printf("Factor matrix %d:\n",i); print_int_matrix(prm.U[i], prm.dims[i], prm.rank, prm.dims[i], prm.rnd_pwrOfTwo); } int count = 0; for (i = 0; i < 3; i++) count += nnz(prm.U[i],prm.dims[i]*prm.rank); printf("\ttotal nnz in solution: %d\n",count); printf("\tnaive adds/subs: %d\n",count - prm.dims[2] - 2*prm.rank); } printf("***************************************\n\n\n"); } // write to output if( outfile ) write_output( outfile, prm ); mySeed++; } // Final report of processor statistics elapsed = wall_time()-start_search; // Print stats if (!strcmp(prm.method,"als")) { printf("\n\n------------------------------------------------------------\n"); printf("Time elapsed: \t%1.1e\tseconds\n",elapsed); printf("Total number of seeds tried: \t%d\n",numSeeds); printf("Total number of good seeds: \t%d",numGoodSeeds); printf("\t(residual < %2.1e)\n",printTol); printf("------------------------------------------------------------\n"); } // free allocated memory for (i = 0; i < 3; i++) { free( prm.X[i] ); free( prm.U[i] ); free( U0[i] ); free( prm.model[i] ); } free( prm.X ); free( prm.U ); free( U0 ); free( prm.model ); free( prm.lambda ); free( prm.A ); free( prm.NE_coeff ); free( prm.NE_rhs ); free( prm.residual ); free( prm.tau ); free( prm.work ); free( prm.iwork ); return 0; }