コード例 #1
0
ファイル: GLProgramsTest.cpp プロジェクト: Liuxiaochuan/skia
DEF_GPUTEST(GLPrograms, reporter, factory) {
    // Set a locale that would cause shader compilation to fail because of , as decimal separator.
    // skbug 3330
#ifdef SK_BUILD_FOR_WIN
    GrAutoLocaleSetter als("sv-SE");
#else
    GrAutoLocaleSetter als("sv_SE.UTF-8");
#endif

    // We suppress prints to avoid spew
    GrContextOptions opts;
    opts.fSuppressPrints = true;
    GrContextFactory debugFactory(opts);
    for (int type = 0; type < GrContextFactory::kLastGLContextType; ++type) {
        GrContext* context = debugFactory.get(static_cast<GrContextFactory::GLContextType>(type));
        if (context) {
            GrGLGpu* gpu = static_cast<GrGLGpu*>(context->getGpu());

            /*
             * For the time being, we only support the test with desktop GL or for android on
             * ARM platforms
             * TODO When we run ES 3.00 GLSL in more places, test again
             */
            int maxStages;
            if (kGL_GrGLStandard == gpu->glStandard() ||
                kARM_GrGLVendor == gpu->ctxInfo().vendor()) {
                maxStages = 6;
            } else if (kTegra3_GrGLRenderer == gpu->ctxInfo().renderer() ||
                       kOther_GrGLRenderer == gpu->ctxInfo().renderer()) {
                maxStages = 1;
            } else {
                return;
            }
#if SK_ANGLE
            // Some long shaders run out of temporary registers in the D3D compiler on ANGLE.
            if (type == GrContextFactory::kANGLE_GLContextType) {
                maxStages = 2;
            }
#endif
#if SK_COMMAND_BUFFER
            // Some long shaders run out of temporary registers in the D3D compiler on ANGLE.
            // TODO(hendrikw): This only needs to happen with the ANGLE comand buffer backend.
            if (type == GrContextFactory::kCommandBuffer_GLContextType) {
                maxStages = 2;
            }
#endif
            GrTestTarget testTarget;
            context->getTestTarget(&testTarget);
            REPORTER_ASSERT(reporter, GrDrawingManager::ProgramUnitTest(
                                            context, testTarget.target(), maxStages));
        }
    }
}
コード例 #2
0
ファイル: GLProgramsTest.cpp プロジェクト: molikto/Skia
DEF_GPUTEST(GLPrograms, reporter, options) {
    // Set a locale that would cause shader compilation to fail because of , as decimal separator.
    // skbug 3330
#ifdef SK_BUILD_FOR_WIN
    GrAutoLocaleSetter als("sv-SE");
#else
    GrAutoLocaleSetter als("sv_SE.UTF-8");
#endif

    // We suppress prints to avoid spew
    GrContextOptions opts = options;
    opts.fSuppressPrints = true;
    sk_gpu_test::GrContextFactory debugFactory(opts);
    skiatest::RunWithGPUTestContexts(test_glprograms, &skiatest::IsRenderingGLContextType, reporter,
                                     opts);
}
コード例 #3
0
ファイル: GrGLProgramBuilder.cpp プロジェクト: vschs007/skia
GrGLProgram* GrGLProgramBuilder::CreateProgram(const GrPipeline& pipeline,
                                               const GrPrimitiveProcessor& primProc,
                                               GrProgramDesc* desc,
                                               GrGLGpu* gpu) {
#ifdef SK_DEBUG
    GrResourceProvider* resourceProvider = gpu->getContext()->contextPriv().resourceProvider();

    SkASSERT(!pipeline.isBad() && primProc.instantiate(resourceProvider));
#endif

    ATRACE_ANDROID_FRAMEWORK("Shader Compile");
    GrAutoLocaleSetter als("C");

    // create a builder.  This will be handed off to effects so they can use it to add
    // uniforms, varyings, textures, etc
    GrGLProgramBuilder builder(gpu, pipeline, primProc, desc);

    auto persistentCache = gpu->getContext()->contextPriv().getPersistentCache();
    if (persistentCache && gpu->glCaps().programBinarySupport()) {
        sk_sp<SkData> key = SkData::MakeWithoutCopy(desc->asKey(), desc->keyLength());
        builder.fCached = persistentCache->load(*key);
        // the eventual end goal is to completely skip emitAndInstallProcs on a cache hit, but it's
        // doing necessary setup in addition to generating the SkSL code. Currently we are only able
        // to skip the SkSL->GLSL step on a cache hit.
    }
    if (!builder.emitAndInstallProcs()) {
        builder.cleanupFragmentProcessors();
        return nullptr;
    }
    return builder.finalize();
}
コード例 #4
0
void
print_procedure(dident wdid, vmcode *code)
{
	extern int	als(word addr);

	p_fprintf(current_output_, "\n%s/", DidName(wdid));
	p_fprintf(current_output_, "%d:\n", DidArity(wdid));

	(void) als((word) code);
	ec_flush(current_output_);
}
コード例 #5
0
GrGLProgram* GrGLProgramBuilder::CreateProgram(const DrawArgs& args, GrGLGpu* gpu) {
    GrAutoLocaleSetter als("C");

    // create a builder.  This will be handed off to effects so they can use it to add
    // uniforms, varyings, textures, etc
    SkAutoTDelete<GrGLProgramBuilder> builder(CreateProgramBuilder(args, gpu));

    GrGLProgramBuilder* pb = builder.get();

    // TODO: Once all stages can handle taking a float or vec4 and correctly handling them we can
    // seed correctly here
    GrGLSLExpr4 inputColor;
    GrGLSLExpr4 inputCoverage;

    if (!pb->emitAndInstallProcs(&inputColor, &inputCoverage)) {
        return nullptr;
    }

    return pb->finalize();
}
コード例 #6
0
UBool BiDiConformanceTest::checkLevels(const UBiDiLevel actualLevels[], int32_t actualCount,
                                       const char *paraLevelName) {
    UBool isOk=TRUE;
    if(levelsCount!=actualCount) {
        errln("Wrong number of level values; expected %d actual %d",
              (int)levelsCount, (int)actualCount);
        isOk=FALSE;
    } else {
        for(int32_t i=0; i<actualCount; ++i) {
            if(levels[i]!=actualLevels[i] && levels[i]<UBIDI_DEFAULT_LTR) {
                if(directionBits!=3 && directionBits==getDirectionBits(actualLevels, actualCount)) {
                    // ICU used a shortcut:
                    // Since the text is unidirectional, it did not store the resolved
                    // levels but just returns all levels as the paragraph level 0 or 1.
                    // The reordering result is the same, so this is fine.
                    break;
                } else {
                    errln("Wrong level value at index %d; expected %d actual %d",
                          (int)i, levels[i], actualLevels[i]);
                    isOk=FALSE;
                    break;
                }
            }
        }
    }
    if(!isOk) {
        printErrorLine(paraLevelName);
        UnicodeString els("Expected levels:   ");
        int32_t i;
        for(i=0; i<levelsCount; ++i) {
            els.append((UChar)0x20).append(printLevel(levels[i]));
        }
        UnicodeString als("Actual   levels:   ");
        for(i=0; i<actualCount; ++i) {
            als.append((UChar)0x20).append(printLevel(actualLevels[i]));
        }
        errln(els);
        errln(als);
    }
    return isOk;
}
コード例 #7
0
GrGLProgram* GrGLProgramBuilder::CreateProgram(const DrawArgs& args, GrGLGpu* gpu) {
    GrAutoLocaleSetter als("C");

    // create a builder.  This will be handed off to effects so they can use it to add
    // uniforms, varyings, textures, etc
    GrGLProgramBuilder builder(gpu, args);

    // TODO: Once all stages can handle taking a float or vec4 and correctly handling them we can
    // seed correctly here
    GrGLSLExpr4 inputColor;
    GrGLSLExpr4 inputCoverage;

    if (!builder.emitAndInstallProcs(&inputColor,
                                     &inputCoverage,
                                     gpu->glCaps().maxFragmentTextureUnits())) {
        builder.cleanupFragmentProcessors();
        return nullptr;
    }

    return builder.finalize();
}
コード例 #8
0
GrGLProgram* GrGLProgramBuilder::CreateProgram(const GrPipeline& pipeline,
                                               const GrPrimitiveProcessor& primProc,
                                               const GrGLProgramDesc& desc,
                                               GrGLGpu* gpu) {
    GrAutoLocaleSetter als("C");

    // create a builder.  This will be handed off to effects so they can use it to add
    // uniforms, varyings, textures, etc
    GrGLProgramBuilder builder(gpu, pipeline, primProc, desc);

    // TODO: Once all stages can handle taking a float or vec4 and correctly handling them we can
    // seed correctly here
    GrGLSLExpr4 inputColor;
    GrGLSLExpr4 inputCoverage;

    if (!builder.emitAndInstallProcs(&inputColor, &inputCoverage)) {
        builder.cleanupFragmentProcessors();
        return nullptr;
    }

    return builder.finalize();
}
コード例 #9
0
int main(int argc, char* argv[])
{

	// Print help if necessary
	bool help = read_bool(argc, argv, "--help", false);
	if ((argc < 2) || (help)) {
		usage(argv);
		return 0;
	}

	// Use parameters struct for passing parameters to kernels efficiently
	parameters prm;

	// Parse inputs
	prm.matDims[0] = read_int(argc, argv, "--m", 2);
	prm.matDims[1] = read_int(argc, argv, "--k", 2);
	prm.matDims[2] = read_int(argc, argv, "--n", 2);
	prm.rank = read_int(argc, argv, "--rank", 7);
	prm.method = read_string(argc, argv, "--method", (char *)"als");
	int maxIters = read_int(argc, argv, "--maxiters", 1000);
	int maxSecs = read_int(argc, argv, "--maxsecs", 1000);
	double tol = read_double(argc, argv, "--tol", 1e-8);
	int printItn = read_int(argc, argv, "--printitn", 0);
	double printTol = read_double(argc, argv, "--printtol", 1.0);
	int seed = read_int(argc, argv, "--seed", 0);
	int numSeeds = read_int(argc, argv, "--numseeds", 1);
	bool verbose = read_bool(argc, argv, "--verbose", false);
	prm.rnd_maxVal = read_double(argc,argv,"--maxval",1.0);
	prm.rnd_pwrOfTwo = read_int(argc,argv,"--pwrof2",0);
	bool roundFinal = read_bool(argc, argv, "--rndfin",false);
	prm.alpha = read_double(argc,argv, "--alpha", 0.1);
	int M = read_int(argc,argv, "--M", 0);
	if (M)
	{
		prm.M[0] = M;
		prm.M[1] = M;
		prm.M[2] = M;
	} else {	    
		prm.M[0] = read_int(argc, argv, "--M0", -1);
		prm.M[1] = read_int(argc, argv, "--M1", -1);
		prm.M[2] = read_int(argc, argv, "--M2", -1);
	}
	char * infile = read_string(argc, argv, "--input", NULL);
	char * outfile = read_string(argc, argv, "--output", NULL);

	if (verbose) {
		setbuf(stdout, NULL);
		printf("\n\n---------------------------------------------------------\n");
		printf("PARAMETERS\n");
		printf("dimensions = %d %d %d\n",prm.matDims[0],prm.matDims[1],prm.matDims[2]);
		printf("rank       = %d\n",prm.rank);
		printf("method     = %s\n",prm.method);
		if (infile)
			printf("input      = %s\n",infile);
		else
		{
			if (numSeeds == 1)
				printf("input      = seed %d\n",seed); 
			else
				printf("inputs     = seeds %d-%d\n",seed,seed+numSeeds-1);
		}
		if (outfile)
			printf("output     = %s\n",outfile);
		else
			printf("output     = none\n"); 
		if (!strcmp(prm.method,"als"))
		{
			printf("tol        = %1.2e\n",tol);
			printf("alpha      = %1.2e\n",prm.alpha);
			printf("maval      = %1.2e\n",prm.rnd_maxVal);
			printf("M's        = (%d,%d,%d)\n",prm.M[0],prm.M[1],prm.M[2]);
			printf("maxiters   = %d\n",maxIters);
			printf("maxsecs    = %d\n",maxSecs);
			printf("printitn   = %d\n",printItn);
			printf("printtol   = %1.2e\n",printTol);
		}
		printf("---------------------------------------------------------\n");
	}

	// Initialize other variables
	int i, j, k, numIters, mkn, tidx[3];
	double err, errOld, errChange = 0.0, start_als, start_search, elapsed, threshold;

	// Compute tensor dimensions
	prm.dims[0] = prm.matDims[0]*prm.matDims[1];
	prm.dims[1] = prm.matDims[1]*prm.matDims[2];
	prm.dims[2] = prm.matDims[0]*prm.matDims[2];

	// Compute tensor's nnz, total number of entries, and Frobenius norm
	mkn = prm.matDims[0]*prm.matDims[1]*prm.matDims[2];
	prm.mkn2 = mkn*mkn;
	prm.xNorm = sqrt(mkn);

	// Compute number of columns in matricized tensors
	for (i = 0; i < 3; i++)
		prm.mtCols[i] = prm.mkn2 / prm.dims[i];

	// Construct three matricizations of matmul tensor
	prm.X = (double**) malloc( 3 * sizeof(double*) );
	for (i = 0; i < 3; i++)
		prm.X[i] = (double*) calloc( prm.mkn2, sizeof(double) );
	for (int mm = 0; mm < prm.matDims[0]; mm++)
		for (int kk = 0; kk < prm.matDims[1]; kk++)
			for (int nn = 0; nn < prm.matDims[2]; nn++)
			{
				tidx[0] = mm + kk*prm.matDims[0];
				tidx[1] = kk + nn*prm.matDims[1];
				tidx[2] = mm + nn*prm.matDims[0];
				prm.X[0][tidx[0]+prm.dims[0]*(tidx[1]+prm.dims[1]*tidx[2])] = 1;
				prm.X[1][tidx[1]+prm.dims[1]*(tidx[0]+prm.dims[0]*tidx[2])] = 1;
				prm.X[2][tidx[2]+prm.dims[2]*(tidx[0]+prm.dims[0]*tidx[1])] = 1;
			}

	// Allocate factor weights and matrices: working, initial, and model
	prm.lambda = (double*) malloc( prm.rank * sizeof(double) );
	prm.U  = (double**) malloc( 3 * sizeof(double*) );
	double** U0 = (double**) malloc( 3 * sizeof(double*) );
	prm.model = (double**) malloc( 3 * sizeof(double*) );
	for (i = 0; i < 3; i++)
	{
		prm.U[i] =  (double*) calloc( prm.mkn2, sizeof(double) );
		U0[i] = (double*) calloc( prm.dims[i]*prm.rank, sizeof(double) );
		prm.model[i] = (double*) calloc( prm.dims[i]*prm.rank, sizeof(double) );
	}

	// Allocate coefficient matrix within ALS (Khatri-Rao product) 
	int maxMatDim = prm.matDims[0];
	if (maxMatDim < prm.matDims[1]) maxMatDim = prm.matDims[1];
	if (maxMatDim < prm.matDims[2]) maxMatDim = prm.matDims[2];
	prm.A = (double*) malloc( maxMatDim*mkn*prm.rank * sizeof(double) );

	// Allocate workspaces
	prm.tau = (double*) malloc( mkn * sizeof(double) );
	prm.lwork = maxMatDim*mkn*prm.rank;
	prm.work = (double*) malloc( prm.lwork * sizeof(double) );
	prm.iwork = (int*) malloc( prm.mkn2 * sizeof(int) );    

	// Allocate matrices for normal equations 
	int maxDim = prm.dims[0];
	if (maxDim < prm.dims[1]) maxDim = prm.dims[1];
	if (maxDim < prm.dims[2]) maxDim = prm.dims[2];
	prm.NE_coeff = (double*) malloc( prm.rank*prm.rank * sizeof(double) );
	prm.NE_rhs = (double*) malloc( maxDim*prm.rank * sizeof(double) );
	prm.residual = (double*) malloc( prm.mkn2 * sizeof(double) );

	//--------------------------------------------------
	// Search Loop
	//--------------------------------------------------
	int mySeed = seed, numGoodSeeds = 0, statusCnt = 0, status = 1;
	start_search = wall_time(); 
	for (int seed_cnt = 0; seed_cnt < numSeeds; ++seed_cnt)
	{
		// Set starting point from random seed (match Matlab Tensor Toolbox)
		RandomMT cRMT(mySeed);
		for (i = 0; i < 3; i++)
			for (j = 0; j < prm.dims[i]; j++)
				for (k = 0; k < prm.rank; k++)
					U0[i][j+k*prm.dims[i]] = cRMT.genMatlabMT();
		for (i = 0; i < prm.rank; i++)
			prm.lambda[i] = 1.0;  

		// Copy starting point
		for (i = 0; i < 3; i++)
			cblas_dcopy(prm.dims[i]*prm.rank,U0[i],1,prm.U[i],1); 

		// read from file if input is given    
		if( infile )
			read_input( infile, prm ); 

		if (verbose)
		{ 
			printf("\nSTARTING POINT...\n");
			for (i = 0; i < 3; i++)
			{
				printf("Factor matrix %d:\n",i);
				print_matrix(prm.U[i],prm.dims[i],prm.rank,prm.dims[i]);
			}
			printf("\n");
		}   

		//--------------------------------------------------
		// Main ALS Loop
		//--------------------------------------------------
		start_als = wall_time();
		err = 1.0; 
		threshold = 1e-4;
		for (numIters = 0; numIters < maxIters && (wall_time()-start_als) < maxSecs; numIters++)
		{
			errOld = err;

			if (!strcmp(prm.method,"als"))
			{
				// Perform an iteration of ALS using NE with Smirnov's penalty term
				err = als( prm );
			}
			else if (!strcmp(prm.method,"sparsify"))
			{   
				// print stats before sparsifying
				printf("Old residual: %1.2e\n",compute_residual(prm,2,true));
				printf("Old nnz (larger than %1.1e): %d %d %d\n", threshold, nnz(prm.U[0],prm.dims[0]*prm.rank,threshold), nnz(prm.U[1],prm.dims[1]*prm.rank,threshold), nnz(prm.U[2],prm.dims[2]*prm.rank,threshold) );

				// sparsify and return
				printf("\nSparsifying...\n\n");
				sparsify( prm );
				numIters = maxIters;

				// print stats after sparsifying
				printf("New residual: %1.2e\n",compute_residual(prm,2,true));
				printf("New nnz (larger than %1.1e): %d %d %d\n", threshold, nnz(prm.U[0],prm.dims[0]*prm.rank,threshold), nnz(prm.U[1],prm.dims[1]*prm.rank,threshold), nnz(prm.U[2],prm.dims[2]*prm.rank,threshold) );
			}
			else if (!strcmp(prm.method,"round"))
			{
				// print stats before rounding
				printf("Old residual: %1.2e\n",compute_residual(prm,2,true));
				printf("Old nnz (larger than %1.1e): %d %d %d\n", threshold, nnz(prm.U[0],prm.dims[0]*prm.rank,threshold), nnz(prm.U[1],prm.dims[1]*prm.rank,threshold), nnz(prm.U[2],prm.dims[2]*prm.rank,threshold) );
				// round and return
				for (i = 0; i < 3; i++)
				{
					capping(prm.U[i],prm.dims[i]*prm.rank,prm.rnd_maxVal);
					rounding(prm.U[i],prm.dims[i]*prm.rank,prm.rnd_pwrOfTwo);
				}
				numIters = maxIters;

				// print stats after rounding
				printf("New residual: %1.2e\n",compute_residual(prm,2,true));
				printf("New nnz (larger than %1.1e): %d %d %d\n", threshold, nnz(prm.U[0],prm.dims[0]*prm.rank,threshold), nnz(prm.U[1],prm.dims[1]*prm.rank,threshold), nnz(prm.U[2],prm.dims[2]*prm.rank,threshold) );
			}
			else
				die("Invalid method\n");   

			// Compute change in relative residual norm
			errChange = fabs(err - errOld);          

			// Print info at current iteration
			if ((printItn > 0) && (((numIters + 1) % printItn) == 0))
			{                
				// print info                    
				printf ("Iter %d: residual = %1.5e change = %1.5e\n", numIters + 1, err, errChange);
			} 

			// Check for convergence 
			if ( numIters > 0 && errChange < tol )
				break;

		}

		// If rounding, round final solution and re-compute residual
		if(roundFinal)
		{
			// normalize columns in A and B factors, put arbitrary weights into C
			normalize_model( prm, 2 );

			// cap large values and round to nearest power of 2
			for (i = 0; i < 3; i++)
			{
				capping(prm.U[i],prm.dims[i]*prm.rank,prm.rnd_maxVal);
				rounding(prm.U[i],prm.dims[i]*prm.rank,prm.rnd_pwrOfTwo);
			}

			err = compute_residual(prm,0,true);
		}    

		// Print status if searching over many seeds
		statusCnt++;
		if (numSeeds > 1000 && statusCnt == numSeeds/10)
		{
			printf("...%d%% complete...\n",10*status);
			status++;
			statusCnt = 0;
		}

		// Print final info
		elapsed = wall_time() - start_als;
		if ((printItn > 0 || verbose) && !strcmp(prm.method,"als"))
		{
			if (infile)
				printf("\nInput %s ",infile);
			else
				printf("\nInitial seed %d ",mySeed);
			printf("achieved residual %1.3e in %d iterations and %1.3e seconds\n \t final residual change: %1.3e\n \t average time per iteration: %1.3e s\n", err, numIters, elapsed, errChange, elapsed/numIters);
		}

		if (verbose)
		{
			printf("\nSOLUTION...\n");
			for (i = 0; i < 3; i++)
			{
				printf("Factor matrix %d:\n",i);
				if (roundFinal || !strcmp(prm.method,"round"))
					print_int_matrix(prm.U[i], prm.dims[i], prm.rank, prm.dims[i], prm.rnd_pwrOfTwo);
				else
					print_matrix(prm.U[i],prm.dims[i],prm.rank,prm.dims[i]);
			}
			
			if (err < printTol)
				numGoodSeeds++;
		}
		else if (err < printTol)
		{
			numGoodSeeds++;

			printf("\n\n***************************************\n");
			if (infile)
				printf("Input %s: ",infile);
			else
				printf("Initial seed %d: ",mySeed);
			printf("after %d iterations, achieved residual %1.3e with final residual change of %1.3e\n", numIters, err, errChange);
			if (roundFinal)
			{

				for (i = 0; i < 3; i++)
				{
					printf("Factor matrix %d:\n",i);
					print_int_matrix(prm.U[i], prm.dims[i], prm.rank, prm.dims[i], prm.rnd_pwrOfTwo);
				}

				int count = 0;
				for (i = 0; i < 3; i++)
					count += nnz(prm.U[i],prm.dims[i]*prm.rank);
				printf("\ttotal nnz in solution: %d\n",count);
				printf("\tnaive adds/subs:       %d\n",count - prm.dims[2] - 2*prm.rank);
			}
			printf("***************************************\n\n\n");
		}

		// write to output
		if( outfile )
			write_output( outfile, prm ); 

		mySeed++;
	}      

	// Final report of processor statistics
	elapsed = wall_time()-start_search;

	// Print stats
	if (!strcmp(prm.method,"als"))
	{
		printf("\n\n------------------------------------------------------------\n");
		printf("Time elapsed:                \t%1.1e\tseconds\n",elapsed);
		printf("Total number of seeds tried: \t%d\n",numSeeds);
		printf("Total number of good seeds:  \t%d",numGoodSeeds);
		printf("\t(residual < %2.1e)\n",printTol);   
		printf("------------------------------------------------------------\n");
	}


	// free allocated memory
	for (i = 0; i < 3; i++)
	{
		free( prm.X[i] );
		free( prm.U[i] );
		free( U0[i] );
		free( prm.model[i] );
	} 
	free( prm.X );
	free( prm.U );
	free( U0 );
	free( prm.model );
	free( prm.lambda );
	free( prm.A );
	free( prm.NE_coeff );
	free( prm.NE_rhs );
	free( prm.residual );
	free( prm.tau );
	free( prm.work );
	free( prm.iwork );

	return 0;

}