Пример #1
0
/* ============================================================================
Function:     =        Debugger
Purpose:      =       Print out debug phase and element types at various stages.
==============================================================================
Input arg:  =        1. flag: Flag for stage for specified output.
            =        7. my_rank: The processors rank.
            =        3. phase: The current phase of the algorithim.
            =        4. partner_size: The size of the paring among processors.
            =        5. my_list: Local list for each processor.
            =        6. global_size: The size of the input array.
            =        7. list_size: The size of the processors array.
=========================================================================== */
void Debugger(int flag, int my_rank, int phase, int partner_size, int my_list[],
                int global_size, int list_size)
{
    if (flag == 1)
    {
        printf("Processor %d's #%d %d-element butterfly now executing. \n",
                my_rank, phase, partner_size);
        fflush(stdout);
    }
    else if (flag == 2)
    {
        printf("Processor %d's #%d %d-element butterfly now executing. \n",
                my_rank, phase, partner_size);
        fflush(stdout);
    }
    else
    {
        if(my_rank == 0)
        {
            printf(" \nStage %d completed. \n", phase);
        }
        Gather(my_list, global_size, my_rank, list_size);
        fflush(stdout);
    }
}
Пример #2
0
void Renderer::Gather(std::vector<Avpl>& avpls_shadowmap, std::vector<Avpl>& avpls_antiradiance)
{
	if(m_confManager->GetConfVars()->UseDebugMode && m_FinishedDebug)
		return;
	
	Gather(avpls_antiradiance, m_gatherAntiradianceRenderTarget.get());
	GatherRadianceWithShadowMap(avpls_shadowmap, m_gatherShadowmapRenderTarget.get());
}
Пример #3
0
bool GameBonus::CollidesWith(GamePlayer Player) {
    if (CollisionRecRec({ Player.x, Player.y, Player.w, Player.h }))
    {
        Gather();
        Destroy();
        return true;
    }
    else return false;
}
Пример #4
0
Index<String> MakeBuild::PackageConfig(const Workspace& wspc, int package,
                                 const VectorMap<String, String>& bm, String mainparam,
                                 Host& host, Builder& b, String *target)
{
	String packagepath = PackagePath(wspc[package]);
	const Package& pkg = wspc.package[package];
	Index<String> cfg;
	mainparam << ' ' << bm.Get(targetmode ? "RELEASE_FLAGS" : "DEBUG_FLAGS", NULL);
	cfg = SplitFlags(mainparam, package == 0, wspc.GetAllAccepts(package));
	cfg.FindAdd(bm.Get("BUILDER", "GCC"));
	const TargetMode& m = GetTargetMode();
	if(targetmode == 0)
		cfg.FindAdd("DEBUG");
	switch(m.linkmode) {
	case 2:
		cfg.FindAdd("SO");
	case 1:
		cfg.FindAdd("SHARED");
	}
	if(targetmode == 2)
		cfg.FindAdd("FORCE_SPEED");
	if(targetmode == 3)
		cfg.FindAdd("FORCE_SIZE");
	int q = m.package.Find(wspc[package]);
	if(q >= 0) {
		const PackageMode& p = m.package[q];
		switch(p.debug >= 0 ? p.debug : m.def.debug) {
		case 1:  cfg.FindAdd("DEBUG_MINIMAL"); break;
		case 2:  cfg.FindAdd("DEBUG_FULL"); break;
		}
		if(!pkg.noblitz && (p.blitz >= 0 ? p.blitz : m.def.blitz))
			cfg.FindAdd("BLITZ");
	}
	else {
		switch(m.def.debug) {
		case 1:  cfg.FindAdd("DEBUG_MINIMAL"); break;
		case 2:  cfg.FindAdd("DEBUG_FULL"); break;
		}
		if(!pkg.noblitz && m.def.blitz)
			cfg.FindAdd("BLITZ");
	}
	host.AddFlags(cfg);
	b.AddFlags(cfg);
	for(int i = 0; i < pkg.flag.GetCount(); i++) {
		if(MatchWhen(pkg.flag[i].when, cfg.GetKeys()))
			cfg.Add(pkg.flag[i].text);
	}
	if(target)
		*target = Gather(pkg.target, cfg.GetKeys(), true);
	return cfg;
}
Пример #5
0
int main(int argc, char** argv)
{
	if(argc <= 2)
	{
		PrintUsage();
		return -1;
	}

	if(!strcmp(argv[1], "gather"))
	{
		if(argc < 4)
		{
			PrintUsage();
			return -1;
		}
		else
		{
			Gather(argv[2], argv[3]);
		}
	}
	else if(!strcmp(argv[1], "compile"))
	{
		if(argc < 6)
		{
			PrintUsage();
			return -1;
		}

		try
		{
			const char* databasePath = argv[2];
			const char* cpuArchName = argv[3];
			const char* imageFormatName = argv[4];
			const char* outputPath = argv[5];
			Compile(databasePath, cpuArchName, imageFormatName, outputPath);
		}
		catch(const std::exception& exception)
		{
			printf("Failed to compile: %s\r\n", exception.what());
			return -1;
		}
	}

	return 0;
}
Пример #6
0
Vector<String> MakeBuild::GetAllLibraries(const Workspace& wspc, int index,
	const VectorMap<String, String>& bm, String mainparam,
	Host& host, Builder& builder)
{ // Warning: This does not seem to do what it is supposed to do...
	Vector<String> uses = GetAllUses(wspc, index);
	uses.Add(wspc[index]);
	Index<String> libraries;
	
	for(int i = 0; i < uses.GetCount(); i++) {
		int f = wspc.package.Find(UnixPath(uses[i]));
		if(f >= 0) {
			const Package& pk = wspc.package[f];
			Index<String> config = PackageConfig(wspc, f, bm, mainparam, host, builder);
			Vector<String> pklibs = Split(Gather(pk.library, config.GetKeys()), ' ');
			FindAppend(libraries, pklibs);
		}
	}
	return libraries.PickKeys();
}
Пример #7
0
/* ============================================================================
Function:     =        Mpi_Bitonic_sort
Purpose:      =        The root itteration logic for the bitonic sequence sort.
==============================================================================
Input arg:  =        1. my_rank: The size of the processors array.
            =        2. p: The size of the input array.
            =        3. my_list[]: A pointer to an array.
            =        4. neighbors_list[]: A pointer to an array.
            =        5. list_size: The size of the processors array.
            =        6. comm: The mpi communicator channel.
=========================================================================== */
void Mpi_Bitonic_sort(int my_rank, int p, int my_list[], int neighbors_list[],
                      int list_size, MPI_Comm comm)
{
    int partner_size, phase = 1;
    unsigned and_bit;

    for(partner_size = 2, and_bit = 2; partner_size <= p;
        partner_size = partner_size * 2, and_bit = and_bit << 1)
    {
        if((my_rank & and_bit) == 0)
        {
            #ifdef DEBUG
                Debugger(1, my_rank, phase, partner_size, my_list, p*list_size,
                            list_size);
            #endif
            Bitonic_sort_incr(my_rank, my_list, neighbors_list, list_size,
                                partner_size, comm);
        }
        else
        {
            #ifdef DEBUG
            Debugger(2, my_rank, phase, partner_size, my_list, p*list_size,
                        list_size);
            #endif
            Bitonic_sort_decr(my_rank, my_list, neighbors_list, list_size,
                                partner_size, comm);
        }
        #ifdef DEBUG
        Debugger(3, my_rank, phase, partner_size, my_list, p*list_size,
                list_size);
        #endif
        phase++;
    }

    if(my_rank == 0) printf("The list is now sorted. \n");
    Gather(my_list, p*list_size, my_rank, list_size);
}
Пример #8
0
//--------------------------------------------------------------------------------
// Bin the screen space transformed triangles into tiles. For single threaded version
//--------------------------------------------------------------------------------
void SoftOccluderMeshScalar::BinTransformedTrianglesST(UINT taskId,
        UINT modelId,
        UINT meshId,
        UINT start,
        UINT end,
        UINT* pBin,
        USHORT* pBinModel,
        USHORT* pBinMesh,
        USHORT* pNumTrisInBin,
        UINT idx)
{
    // working on one triangle at a time
    for(UINT index = start; index <= end; index++)
    {
        float4 xformedPos[3];
        Gather(xformedPos, index, idx);

        int fxPtX[3], fxPtY[3];
        for(int i = 0; i < 3; i++)
        {
            fxPtX[i] = (int)(xformedPos[i].x + 0.5);
            fxPtY[i] = (int)(xformedPos[i].y + 0.5);
        }

        // Compute triangle area
        int triArea = (fxPtX[1] - fxPtX[0]) * (fxPtY[2] - fxPtY[0]) - (fxPtX[0] - fxPtX[2]) * (fxPtY[0] - fxPtY[1]);

        // Find bounding box for screen space triangle in terms of pixels
        int startX = max(min(min(fxPtX[0], fxPtX[1]), fxPtX[2]), 0);
        int endX   = min(max(max(fxPtX[0], fxPtX[1]), fxPtX[2]), mRasterData->mScreenWidth - 1);

        int startY = max(min(min(fxPtY[0], fxPtY[1]), fxPtY[2]), 0 );
        int endY   = min(max(max(fxPtY[0], fxPtY[1]), fxPtY[2]), mRasterData->mScreenHeight - 1);

        // Skip triangle if area is zero
        if(triArea <= 0) continue;
        // Dont bin screen-clipped triangles
        if(endX < startX || endY < startY) continue;

        // Reject the triangle if any of its verts is behind the nearclip plane
        if(xformedPos[0].w > 0.0f && xformedPos[1].w > 0.0f && xformedPos[2].w > 0.0f)
        {
            // Convert bounding box in terms of pixels to bounding box in terms of tiles
            int startXx = max(startX/mRasterData->mTileWidthInPixels, 0);
            int endXx   = min(endX/mRasterData->mTileWidthInPixels, mRasterData->mScreenWidthInTiles-1);

            int startYy = max(startY/mRasterData->mTileHeightInPixels, 0);
            int endYy   = min(endY/mRasterData->mTileHeightInPixels, mRasterData->mScreenHeightInTiles-1);

            // Add triangle to the tiles or bins that the bounding box covers
            int row, col;
            for(row = startYy; row <= endYy; row++)
            {
                int offset1 = mRasterData->mYOffset1_ST * row;
                int offset2 = mRasterData->mYOffset2_ST * row;
                for(col = startXx; col <= endXx; col++)
                {
                    int idx1 = offset1 + (mRasterData->mXOffset1_ST * col) + taskId;
                    int idx2 = offset2 + (mRasterData->mXOffset2_ST * col) + (taskId * MAX_TRIS_IN_BIN_ST) + pNumTrisInBin[idx1];
                    pBin[idx2] = index;
                    pBinModel[idx2] = modelId;
                    pBinMesh[idx2] = meshId;
                    pNumTrisInBin[idx1] += 1;
                }
            }
        }
    }
}
//-----------------------------------------------------------------------------------------
// Rasterize the occludee AABB and depth test it against the CPU rasterized depth buffer
// If any of the rasterized AABB pixels passes the depth test exit early and mark the occludee
// as visible. If all rasterized AABB pixels are occluded then the occludee is culled
//-----------------------------------------------------------------------------------------
void TransformedAABBoxSSE::RasterizeAndDepthTestAABBox(UINT *pRenderTargetPixels)
{
	// Set DAZ and FZ MXCSR bits to flush denormals to zero (i.e., make it faster)
	// Denormal are zero (DAZ) is bit 6 and Flush to zero (FZ) is bit 15. 
	// so to enable the two to have to set bits 6 and 15 which 1000 0000 0100 0000 = 0x8040
	_mm_setcsr( _mm_getcsr() | 0x8040 );

	__m128i colOffset = _mm_set_epi32(0, 1, 0, 1);
	__m128i rowOffset = _mm_set_epi32(0, 0, 1, 1);

	__m128i fxptZero = _mm_setzero_si128();
	float* pDepthBuffer = (float*)pRenderTargetPixels; 
	
	// Rasterize the AABB triangles 4 at a time
	for(UINT i = 0; i < AABB_TRIANGLES; i += SSE)
	{
		vFloat4 xformedPos[3];
		Gather(xformedPos, i);

		// use fixed-point only for X and Y.  Avoid work for Z and W.
        vFxPt4 xFormedFxPtPos[3];
		for(int m = 0; m < 3; m++)
		{
			xFormedFxPtPos[m].X = _mm_cvtps_epi32(xformedPos[m].X);
			xFormedFxPtPos[m].Y = _mm_cvtps_epi32(xformedPos[m].Y);
			xFormedFxPtPos[m].Z = _mm_cvtps_epi32(xformedPos[m].Z);
			xFormedFxPtPos[m].W = _mm_cvtps_epi32(xformedPos[m].W);
		}

		// Fab(x, y) =     Ax       +       By     +      C              = 0
		// Fab(x, y) = (ya - yb)x   +   (xb - xa)y + (xa * yb - xb * ya) = 0
		// Compute A = (ya - yb) for the 3 line segments that make up each triangle
		__m128i A0 = _mm_sub_epi32(xFormedFxPtPos[1].Y, xFormedFxPtPos[2].Y);
		__m128i A1 = _mm_sub_epi32(xFormedFxPtPos[2].Y, xFormedFxPtPos[0].Y);
		__m128i A2 = _mm_sub_epi32(xFormedFxPtPos[0].Y, xFormedFxPtPos[1].Y);

		// Compute B = (xb - xa) for the 3 line segments that make up each triangle
		__m128i B0 = _mm_sub_epi32(xFormedFxPtPos[2].X, xFormedFxPtPos[1].X);
		__m128i B1 = _mm_sub_epi32(xFormedFxPtPos[0].X, xFormedFxPtPos[2].X);
		__m128i B2 = _mm_sub_epi32(xFormedFxPtPos[1].X, xFormedFxPtPos[0].X);

		// Compute C = (xa * yb - xb * ya) for the 3 line segments that make up each triangle
		__m128i C0 = _mm_sub_epi32(_mm_mullo_epi32(xFormedFxPtPos[1].X, xFormedFxPtPos[2].Y), _mm_mullo_epi32(xFormedFxPtPos[2].X, xFormedFxPtPos[1].Y));
		__m128i C1 = _mm_sub_epi32(_mm_mullo_epi32(xFormedFxPtPos[2].X, xFormedFxPtPos[0].Y), _mm_mullo_epi32(xFormedFxPtPos[0].X, xFormedFxPtPos[2].Y));
		__m128i C2 = _mm_sub_epi32(_mm_mullo_epi32(xFormedFxPtPos[0].X, xFormedFxPtPos[1].Y), _mm_mullo_epi32(xFormedFxPtPos[1].X, xFormedFxPtPos[0].Y));

		// Compute triangle area
		__m128i triArea = _mm_mullo_epi32(A0, xFormedFxPtPos[0].X);
		triArea = _mm_add_epi32(triArea, _mm_mullo_epi32(B0, xFormedFxPtPos[0].Y));
		triArea = _mm_add_epi32(triArea, C0);

		__m128 oneOverTriArea = _mm_div_ps(_mm_set1_ps(1.0f), _mm_cvtepi32_ps(triArea));

		// Use bounding box traversal strategy to determine which pixels to rasterize 
		__m128i startX = _mm_and_si128(Max(Min(Min(xFormedFxPtPos[0].X, xFormedFxPtPos[1].X), xFormedFxPtPos[2].X), _mm_set1_epi32(0)), _mm_set1_epi32(0xFFFFFFFE));
		__m128i endX   = Min(_mm_add_epi32(Max(Max(xFormedFxPtPos[0].X, xFormedFxPtPos[1].X), xFormedFxPtPos[2].X), _mm_set1_epi32(1)), _mm_set1_epi32(SCREENW));

		__m128i startY = _mm_and_si128(Max(Min(Min(xFormedFxPtPos[0].Y, xFormedFxPtPos[1].Y), xFormedFxPtPos[2].Y), _mm_set1_epi32(0)), _mm_set1_epi32(0xFFFFFFFE));
		__m128i endY   = Min(_mm_add_epi32(Max(Max(xFormedFxPtPos[0].Y, xFormedFxPtPos[1].Y), xFormedFxPtPos[2].Y), _mm_set1_epi32(1)), _mm_set1_epi32(SCREENH));

		for(int vv = 0; vv < 3; vv++) 
		{
            // If W (holding 1/w in our case) is not between 0 and 1,
            // then vertex is behind near clip plane (1.0 in our case.
            // If W < 1, then verify 1/W > 1 (for W>0), and 1/W < 0 (for W < 0).
		    __m128 nearClipMask0 = _mm_cmple_ps(xformedPos[vv].W, _mm_set1_ps(0.0f));
		    __m128 nearClipMask1 = _mm_cmpge_ps(xformedPos[vv].W, _mm_set1_ps(1.0f));
            __m128 nearClipMask  = _mm_or_ps(nearClipMask0, nearClipMask1);

			if(!_mm_test_all_zeros(*(__m128i*)&nearClipMask, *(__m128i*)&nearClipMask))
			{
                // All four vertices are behind the near plane (we're processing four triangles at a time w/ SSE)
                *mVisible = true;
                return;
			}
		}

		// Now we have 4 triangles set up.  Rasterize them each individually.
        for(int lane=0; lane < SSE; lane++)
        {
			// Skip triangle if area is zero 
			if(triArea.m128i_i32[lane] <= 0)
			{
				continue;
			}

			// Extract this triangle's properties from the SIMD versions
            __m128 zz[3], oneOverW[3];
			for(int vv = 0; vv < 3; vv++)
			{
				zz[vv] = _mm_set1_ps(xformedPos[vv].Z.m128_f32[lane]);
				oneOverW[vv] = _mm_set1_ps(xformedPos[vv].W.m128_f32[lane]);
			}

			__m128 oneOverTotalArea = _mm_set1_ps(oneOverTriArea.m128_f32[lane]);
			zz[0] *= oneOverTotalArea;
			zz[1] *= oneOverTotalArea;
			zz[2] *= oneOverTotalArea;
			
			int startXx = startX.m128i_i32[lane];
			int endXx	= endX.m128i_i32[lane];
			int startYy = startY.m128i_i32[lane];
			int endYy	= endY.m128i_i32[lane];
		
			__m128i aa0 = _mm_set1_epi32(A0.m128i_i32[lane]);
			__m128i aa1 = _mm_set1_epi32(A1.m128i_i32[lane]);
			__m128i aa2 = _mm_set1_epi32(A2.m128i_i32[lane]);

			__m128i bb0 = _mm_set1_epi32(B0.m128i_i32[lane]);
			__m128i bb1 = _mm_set1_epi32(B1.m128i_i32[lane]);
			__m128i bb2 = _mm_set1_epi32(B2.m128i_i32[lane]);

			__m128i cc0 = _mm_set1_epi32(C0.m128i_i32[lane]);
			__m128i cc1 = _mm_set1_epi32(C1.m128i_i32[lane]);
			__m128i cc2 = _mm_set1_epi32(C2.m128i_i32[lane]);

			__m128i aa0Inc = _mm_slli_epi32(aa0, 1);
			__m128i aa1Inc = _mm_slli_epi32(aa1, 1);
			__m128i aa2Inc = _mm_slli_epi32(aa2, 1);

			__m128i row, col;

			int rowIdx;
			// To avoid this branching, choose one method to traverse and store the pixel depth
			if(gVisualizeDepthBuffer)
			{
				// Sequentially traverse and store pixel depths contiguously
				rowIdx = (startYy * SCREENW + startXx);
			}
			else
			{
				// Tranverse pixels in 2x2 blocks and store 2x2 pixel quad depths contiguously in memory ==> 2*X
				// This method provides better perfromance
				rowIdx = (startYy * SCREENW + 2 * startXx);
			}

			col = _mm_add_epi32(colOffset, _mm_set1_epi32(startXx));
			__m128i aa0Col = _mm_mullo_epi32(aa0, col);
			__m128i aa1Col = _mm_mullo_epi32(aa1, col);
			__m128i aa2Col = _mm_mullo_epi32(aa2, col);

			row = _mm_add_epi32(rowOffset, _mm_set1_epi32(startYy));
			__m128i bb0Row = _mm_add_epi32(_mm_mullo_epi32(bb0, row), cc0);
			__m128i bb1Row = _mm_add_epi32(_mm_mullo_epi32(bb1, row), cc1);
			__m128i bb2Row = _mm_add_epi32(_mm_mullo_epi32(bb2, row), cc2);

			__m128i bb0Inc = _mm_slli_epi32(bb0, 1);
			__m128i bb1Inc = _mm_slli_epi32(bb1, 1);
			__m128i bb2Inc = _mm_slli_epi32(bb2, 1);

			// Incrementally compute Fab(x, y) for all the pixels inside the bounding box formed by (startX, endX) and (startY, endY)
			for(int r = startYy; r < endYy; r += 2,
											row  = _mm_add_epi32(row, _mm_set1_epi32(2)),
											rowIdx = rowIdx + 2 * SCREENW,
											bb0Row = _mm_add_epi32(bb0Row, bb0Inc),
											bb1Row = _mm_add_epi32(bb1Row, bb1Inc),
											bb2Row = _mm_add_epi32(bb2Row, bb2Inc))
			{
				// Compute barycentric coordinates 
				int idx = rowIdx;
				__m128i alpha = _mm_add_epi32(aa0Col, bb0Row);
				__m128i beta = _mm_add_epi32(aa1Col, bb1Row);
				__m128i gama = _mm_add_epi32(aa2Col, bb2Row);

				int idxIncr;
				if(gVisualizeDepthBuffer)
				{ 
					idxIncr = 2;
				}
				else
				{
					idxIncr = 4;
				}

				for(int c = startXx; c < endXx; c += 2,
												idx = idx + idxIncr,
												alpha = _mm_add_epi32(alpha, aa0Inc),
												beta  = _mm_add_epi32(beta, aa1Inc),
												gama  = _mm_add_epi32(gama, aa2Inc))
				{
					//Test Pixel inside triangle
					__m128i mask = _mm_cmplt_epi32(fxptZero, _mm_or_si128(_mm_or_si128(alpha, beta), gama));
					
					// Early out if all of this quad's pixels are outside the triangle.
					if(_mm_test_all_zeros(mask, mask))
					{
						continue;
					}

					// Compute barycentric-interpolated depth
			        __m128 depth = _mm_mul_ps(_mm_cvtepi32_ps(alpha), zz[0]);
					depth = _mm_add_ps(depth, _mm_mul_ps(_mm_cvtepi32_ps(beta), zz[1]));
					depth = _mm_add_ps(depth, _mm_mul_ps(_mm_cvtepi32_ps(gama), zz[2]));

					__m128 previousDepthValue;
					if(gVisualizeDepthBuffer)
					{
						previousDepthValue = _mm_set_ps(pDepthBuffer[idx], pDepthBuffer[idx + 1], pDepthBuffer[idx + SCREENW], pDepthBuffer[idx + SCREENW + 1]);
					}
					else
					{
						previousDepthValue = *(__m128*)&pDepthBuffer[idx];
					}

					__m128 depthMask  = _mm_cmpge_ps( depth, previousDepthValue);
					__m128i finalMask = _mm_and_si128( mask, _mm_castps_si128(depthMask));
					if(!_mm_test_all_zeros(finalMask, finalMask))
					{
						*mVisible = true;
						return; //early exit
					}
				}//for each column											
			}// for each row
		}// for each triangle
	}// for each set of SIMD# triangles
}
Пример #10
0
CFGTOKEN CfgFile::Scan()
{
	CFGTOKEN ret;	
    int i;
	char c;	

	ret.id = TINVALID;
	
loop:
	c = Input();
	if(c=='\t' || c==' '){
		goto loop;
	} else if(c=='\n' || c==';'){
		ReadLine();
		goto loop;
	} else if(isdigit(c) || c=='.'){
		m_nTextLen = 0;
		
		if( GetDigit(c) > 0 ) ret.id = TREAL; else ret.id = TINTEGER;
		
		if(m_nTextLen == 1 && m_szText[0]=='.') goto reterr; 
		if(m_nTextLen == 0) goto reterr;

		Gather('\0');

		if( ret.id == TINTEGER ){
			ret.v.nval = atoi(m_szText);
		} else if( ret.id == TREAL ){
			ret.v.fval = (float)atof(m_szText);
		}
		return ret;
reterr:
		ret.id = TINVALID;
		return ret;
	}

	// SYMBOL

    else if( isalpha(c) ){
		m_nTextLen=0;
		Gather(c);
		for( c = Input(); isalpha(c) || isdigit(c); c=Input() ) Gather(c);
		Unput(c);
		Gather('\0');

		for( i=0; rwords[i].val != TINVALID; i++ ){
			if( strcmp(m_szText, rwords[i].szText) == 0 ){
				break;
			}
		}
		
		if( rwords[i].val != TINVALID ){
			ret.id = rwords[i].val;
			return ret;
		} else {
			ret.id = TSYMBOL;
			ret.v.szval = strdup(m_szText);
			return ret;
		}
	}

	// STRING CONSTANT
    else if(c == '\"'){
		m_nTextLen = 0;
		for( c = Input(); ; c=Input() ){
			if(c == '\"'){
				if((c=Input()) == '\"'){					
					Gather('\\');
					Gather('\"');
				} else {
					break;
				}
			} else if(c == '\\'){
				c=Input();
				Gather(c);
				if(c == '\n') ReadLine();
			} else if(c == '\n'){
				ret.id = TINVALID;
				ReadLine();
				return ret;
			} else {
				Gather(c);
			}
		}
		Unput(c);
		Gather('\0');

		ret.id = TSTRING;
		ret.v.szval = strdup(m_szText);
		return ret;
    } else if(c == '='){
		ret.id = TEQUAL;
	} else if(c == '!'){
		ret.id = TSECTION;
	} else if(c == '-'){
		ret.id = TUNARY;
	} else if(c == EOF){
		ret.id = TEOF;
	}

	return ret;
}
Пример #11
0
void CppBuilder::AddMakeFile(MakeFile& makefile, String package,
	const Vector<String>& all_uses, const Vector<String>& all_libraries,
	const Index<String>& common_config, bool exporting)
{
	String packagepath = PackagePath(package);
	Package pkg;
	pkg.Load(packagepath);
	String packagedir = GetFileFolder(packagepath);
	Vector<String> src = GetUppDirs();
	for(int i = 0; i < src.GetCount(); i++)
		src[i] = UnixPath(src[i]);

	bool main = HasFlag("MAIN");
	bool is_shared = HasFlag("SO");
	bool libout = !main && !HasFlag("NOLIB");
	bool win32 = HasFlag("WIN32");

	String pack_ident = MakeIdent(package);
	String outdir = "OutDir_" + pack_ident;
	String macros = "Macro_" + pack_ident;
	String macdef = "$(Macro)";
	String objext = (HasFlag("MSC") || HasFlag("EVC") ? ".obj" : ".o");

	Vector<String> x(config.GetKeys(), 1);
	Sort(x);
	for(int i = 0; i < x.GetCount(); i++) {
		if(common_config.Find(x[i]) < 0)
			macdef << " -Dflag" << x[i];
		x[i] = InitCaps(x[i]);
	}

	makefile.outdir << "$(" << outdir << ")";
	makefile.outfile << AdjustMakePath(GetFileTitle(NativePath(package)));
	if(main)
		makefile.outfile << GetTargetExt();
	else if(is_shared)
		makefile.outfile << (win32 ? ".dll" : ".so");
	else
		makefile.outfile << (win32 && HasFlag("MSC") ? ".lib" : ".a");
	makefile.output << (main ? String("$(OutDir)") : makefile.outdir) << makefile.outfile;

	if(main) {
		makefile.config << "CXX = c++\n"
			"LINKER = $(CXX)\n";
		String flags;
		if(HasFlag("DEBUG"))
			flags << " -D_DEBUG " << debug_options;
		else
			flags << ' ' << release_options;
		if(HasFlag("DEBUG_MINIMAL"))
			flags << " -ggdb -g1";
		if(HasFlag("DEBUG_FULL"))
			flags << " -ggdb -g2";
		if(is_shared && !win32)
			flags << " -fPIC ";
		flags << ' ' << Gather(pkg.option, config.GetKeys());
		makefile.config << "CFLAGS =" << flags << "\n"
			"CXXFLAGS =" << flags << "\n"
			"LDFLAGS = " << (HasFlag("DEBUG") ? debug_link : release_link) << " $(LINKOPTIONS)\n"
			"LIBPATH =";
		for(int i = 0; i < libpath.GetCount(); i++)
			makefile.config << " -L" << GetMakePath(AdjustMakePath(GetHostPathQ(libpath[i])));
		makefile.config << "\n"
			"AR = ar -sr\n\n";
		makefile.install << "\t-mkdir -p $(OutDir)\n";
		Vector<String> lib;
		String lnk;
		lnk << "$(LINKER)";
		if(!HasFlag("SHARED"))
			lnk << " -static";
		if(HasFlag("WIN32")) {
			lnk << " -mwindows";
			if(!HasFlag("GUI"))
				makefile.linkfiles << " -mconsole";
		}
		lnk << " -o $(OutFile)";
		if(HasFlag("DEBUG") || HasFlag("DEBUG_MINIMAL") || HasFlag("DEBUG_FULL"))
			lnk << " -ggdb";
		else
			lnk << (!HasFlag("OSX11") ? " -Wl,-s" : "");

		lnk << " $(LIBPATH)";
		if (!HasFlag("OSX11"))
			lnk << " -Wl,-O,2";
		lnk << " $(LDFLAGS) -Wl,--start-group ";

		makefile.linkfiles = lnk;
	}

	makefile.config << outdir << " = $(UPPOUT)"
		<< GetMakePath(AdjustMakePath(String().Cat() << package << '/' << method << '-' << Join(x, "-") << '/')) << "\n"
		<< macros << " = " << macdef << "\n";

	makefile.install << "\t-mkdir -p $(" << outdir << ")\n";

	String libdep, libfiles;

	libdep << makefile.output << ":";
	if(is_shared)
	{
		libfiles = "c++ -shared -fPIC"; // -v";
		Point p = ExtractVersion();
		if(!IsNull(p.x)) {
			libfiles << " -Xlinker --major-image-version -Xlinker " << p.x;
			if(!IsNull(p.y))
				libfiles << " -Xlinker --minor-image-version -Xlinker " << p.y;
		}
		libfiles << " -o ";
	}
	else
		libfiles = "$(AR) ";
	libfiles << makefile.output;

	Vector<String> libs = Split(Gather(pkg.library, config.GetKeys()), ' ');
	for(int i = 0; i < libs.GetCount(); i++) {
		String ln = libs[i];
		String ext = ToLower(GetFileExt(ln));
		if(ext == ".a" || ext == ".so" || ext == ".dll")
			makefile.linkfileend << " \\\n\t\t\t" << GetHostPathQ(FindInDirs(libpath, ln));
		else
			makefile.linkfileend << " \\\n\t\t\t-l" << ln;
	}
	
	for(int i = 0; i < pkg.GetCount(); i++)
		if(!pkg[i].separator) {
			String gop = Gather(pkg[i].option, config.GetKeys());
			String fn = SourcePath(package, pkg[i]);
			String ext = ToLower(GetFileExt(fn));
			bool isc = ext == ".c";
			bool isrc = (ext == ".rc" && HasFlag("WIN32"));
			bool iscpp = (ext == ".cpp" || ext == ".cc" || ext == ".cxx");
			bool isicpp = (ext == ".icpp");
			if(ext == ".brc") {
				isc = true;
				fn << "c";
			}
			if(isc || isrc || iscpp || isicpp) {
				String outfile;
				outfile << makefile.outdir << AdjustMakePath(GetFileTitle(fn)) << (isrc ? "_rc" : "") << objext;
				String srcfile = GetMakePath(MakeSourcePath(src, fn, false, exporting));
				makefile.rules << outfile << ": " << srcfile;
				Vector<String> dep = HdependGetDependencies(fn);
				Sort(dep, GetLanguageInfo());
				for(int d = 0; d < dep.GetCount(); d++) {
					String dfn = MakeSourcePath(src, dep[d], true, exporting);
					if(!IsNull(dfn))
						makefile.rules << " \\\n\t" << GetMakePath(dfn);
				}
				makefile.rules << "\n"
					"\t$(CXX) -c " << (isc ? "-x c $(CFLAGS)" : "-x c++ $(CXXFLAGS)") << " $(CINC) $(" << macros << ") "
						<< gop << " " << srcfile << " -o " << outfile << "\n\n";
				if(!libout || isicpp) {
					makefile.linkdep << " \\\n\t" << outfile;
					makefile.linkfiles << " \\\n\t\t" << outfile;
				}
				else {
					libdep << " \\\n\t" << outfile;
					libfiles << " \\\n\t\t" << outfile;
				}
			}
			else
			if(ext == ".o" || ext == ".obj" || ext == ".a" || ext == ".so" || ext == ".lib" || ext == ".dll") {
				makefile.linkdep << " \\\n\t" << fn;
				makefile.linkfiles << ' ' << fn;
			}
		}

	if(libout) {
		makefile.rules << libdep << "\n\t" << libfiles << "\n\n";
		makefile.linkdep << " \\\n\t" << makefile.output;
		makefile.linkfiles << " \\\n\t\t\t" << makefile.output;
	}
/*
	if(main) {
		if(!HasFlag("SOLARIS")&&!HasFlag("OSX11"))
			makefile.linkfiles << " \\\n\t\t-Wl,--start-group ";
		DDUMPC(all_libraries);
		for(int i = 0; i < all_libraries.GetCount(); i++) {
			String ln = all_libraries[i];
			String ext = ToLower(GetFileExt(ln));
			if(ext == ".a" || ext == ".so" || ext == ".dll")
				makefile.linkfileend << " \\\n\t\t\t" << GetHostPathQ(FindInDirs(libpath, ln));
			else
				makefile.linkfileend << " \\\n\t\t\t-l" << ln;
		}
		if(!HasFlag("SOLARIS")&&!HasFlag("OSX11"))
			makefile.linkfileend << " \\\n\t\t-Wl,--end-group\n\n";
	}
*/
}
//-----------------------------------------------------------------------------------------
// Rasterize the occludee AABB and depth test it against the CPU rasterized depth buffer
// If any of the rasterized AABB pixels passes the depth test exit early and mark the occludee
// as visible. If all rasterized AABB pixels are occluded then the occludee is culled
//-----------------------------------------------------------------------------------------
bool TransformedAABBoxSSE::RasterizeAndDepthTestAABBox(UINT *pRenderTargetPixels, const __m128 pXformedPos[], UINT idx)
{
	// Set DAZ and FZ MXCSR bits to flush denormals to zero (i.e., make it faster)
	// Denormal are zero (DAZ) is bit 6 and Flush to zero (FZ) is bit 15. 
	// so to enable the two to have to set bits 6 and 15 which 1000 0000 0100 0000 = 0x8040
	ssp_setcsr( ssp_getcsr() | 0x8040 );

	__m128i colOffset = ssp_setr_epi32(0, 1, 0, 1);
	__m128i rowOffset = ssp_setr_epi32(0, 0, 1, 1);

	float* pDepthBuffer = (float*)pRenderTargetPixels; 
	
	// Rasterize the AABB triangles 4 at a time
	for(UINT i = 0; i < AABB_TRIANGLES; i += SSE)
	{
		vFloat4 xformedPos[3];
		Gather(xformedPos, i, pXformedPos, idx);

		// use fixed-point only for X and Y.  Avoid work for Z and W.
        __m128i fxPtX[3], fxPtY[3];
		for(int m = 0; m < 3; m++)
		{
			fxPtX[m] = ssp_cvtps_epi32(xformedPos[m].X);
			fxPtY[m] = ssp_cvtps_epi32(xformedPos[m].Y);
		}

		// Fab(x, y) =     Ax       +       By     +      C              = 0
		// Fab(x, y) = (ya - yb)x   +   (xb - xa)y + (xa * yb - xb * ya) = 0
		// Compute A = (ya - yb) for the 3 line segments that make up each triangle
		__m128i A0 = ssp_sub_epi32(fxPtY[1], fxPtY[2]);
		__m128i A1 = ssp_sub_epi32(fxPtY[2], fxPtY[0]);
		__m128i A2 = ssp_sub_epi32(fxPtY[0], fxPtY[1]);

		// Compute B = (xb - xa) for the 3 line segments that make up each triangle
		__m128i B0 = ssp_sub_epi32(fxPtX[2], fxPtX[1]);
		__m128i B1 = ssp_sub_epi32(fxPtX[0], fxPtX[2]);
		__m128i B2 = ssp_sub_epi32(fxPtX[1], fxPtX[0]);

		// Compute C = (xa * yb - xb * ya) for the 3 line segments that make up each triangle
		__m128i C0 = ssp_sub_epi32(ssp_mullo_epi32(fxPtX[1], fxPtY[2]), ssp_mullo_epi32(fxPtX[2], fxPtY[1]));
		__m128i C1 = ssp_sub_epi32(ssp_mullo_epi32(fxPtX[2], fxPtY[0]), ssp_mullo_epi32(fxPtX[0], fxPtY[2]));
		__m128i C2 = ssp_sub_epi32(ssp_mullo_epi32(fxPtX[0], fxPtY[1]), ssp_mullo_epi32(fxPtX[1], fxPtY[0]));

		// Compute triangle area
		__m128i triArea = ssp_mullo_epi32(B2, A1);
		triArea = ssp_sub_epi32(triArea, ssp_mullo_epi32(B1, A2));
		__m128 oneOverTriArea = ssp_div_ps(ssp_set1_ps(1.0f), ssp_cvtepi32_ps(triArea));

		__m128 Z[3];
		Z[0] = xformedPos[0].Z;
		Z[1] = ssp_mul_ps(ssp_sub_ps(xformedPos[1].Z, Z[0]), oneOverTriArea);
		Z[2] = ssp_mul_ps(ssp_sub_ps(xformedPos[2].Z, Z[0]), oneOverTriArea);
		
		// Use bounding box traversal strategy to determine which pixels to rasterize 
		__m128i startX =  ssp_and_si128(Max(Min(Min(fxPtX[0], fxPtX[1]), fxPtX[2]),  ssp_set1_epi32(0)), ssp_set1_epi32(~1));
		__m128i endX   = Min(Max(Max(fxPtX[0], fxPtX[1]), fxPtX[2]), ssp_set1_epi32(SCREENW - 1));

		__m128i startY = ssp_and_si128(Max(Min(Min(fxPtY[0], fxPtY[1]), fxPtY[2]), ssp_set1_epi32(0)), ssp_set1_epi32(~1));
		__m128i endY   = Min(Max(Max(fxPtY[0], fxPtY[1]), fxPtY[2]), ssp_set1_epi32(SCREENH - 1));

		// Now we have 4 triangles set up.  Rasterize them each individually.
        for(int lane=0; lane < SSE; lane++)
        {
			// Skip triangle if area is zero 
			if(triArea.m128i_i32[lane] <= 0)
			{
				continue;
			}

			// Extract this triangle's properties from the SIMD versions
            __m128 zz[3];
			for(int vv = 0; vv < 3; vv++)
			{
				zz[vv] = ssp_set1_ps(Z[vv].m128_f32[lane]);
			}

			int startXx = startX.m128i_i32[lane];
			int endXx	= endX.m128i_i32[lane];
			int startYy = startY.m128i_i32[lane];
			int endYy	= endY.m128i_i32[lane];
		
			__m128i aa0 = ssp_set1_epi32(A0.m128i_i32[lane]);
			__m128i aa1 = ssp_set1_epi32(A1.m128i_i32[lane]);
			__m128i aa2 = ssp_set1_epi32(A2.m128i_i32[lane]);

			__m128i bb0 = ssp_set1_epi32(B0.m128i_i32[lane]);
			__m128i bb1 = ssp_set1_epi32(B1.m128i_i32[lane]);
			__m128i bb2 = ssp_set1_epi32(B2.m128i_i32[lane]);

			__m128i aa0Inc = ssp_slli_epi32(aa0, 1);
			__m128i aa1Inc = ssp_slli_epi32(aa1, 1);
			__m128i aa2Inc = ssp_slli_epi32(aa2, 1);

			__m128i bb0Inc = ssp_slli_epi32(bb0, 1);
			__m128i bb1Inc = ssp_slli_epi32(bb1, 1);
			__m128i bb2Inc = ssp_slli_epi32(bb2, 1);

			__m128i row, col;

			// Tranverse pixels in 2x2 blocks and store 2x2 pixel quad depths contiguously in memory ==> 2*X
			// This method provides better perfromance
			int	rowIdx = (startYy * SCREENW + 2 * startXx);

			col = ssp_add_epi32(colOffset, ssp_set1_epi32(startXx));
			__m128i aa0Col = ssp_mullo_epi32(aa0, col);
			__m128i aa1Col = ssp_mullo_epi32(aa1, col);
			__m128i aa2Col = ssp_mullo_epi32(aa2, col);

			row = ssp_add_epi32(rowOffset, ssp_set1_epi32(startYy));
			__m128i bb0Row = ssp_add_epi32(ssp_mullo_epi32(bb0, row), ssp_set1_epi32(C0.m128i_i32[lane]));
			__m128i bb1Row = ssp_add_epi32(ssp_mullo_epi32(bb1, row), ssp_set1_epi32(C1.m128i_i32[lane]));
			__m128i bb2Row = ssp_add_epi32(ssp_mullo_epi32(bb2, row), ssp_set1_epi32(C2.m128i_i32[lane]));

			__m128i sum0Row = ssp_add_epi32(aa0Col, bb0Row);
			__m128i sum1Row = ssp_add_epi32(aa1Col, bb1Row);
			__m128i sum2Row = ssp_add_epi32(aa2Col, bb2Row);

			__m128 zx = ssp_mul_ps(ssp_cvtepi32_ps(aa1Inc), zz[1]);
			zx = ssp_add_ps(zx, ssp_mul_ps(ssp_cvtepi32_ps(aa2Inc), zz[2]));

			// Incrementally compute Fab(x, y) for all the pixels inside the bounding box formed by (startX, endX) and (startY, endY)
			for(int r = startYy; r < endYy; r += 2,
											rowIdx += 2 * SCREENW,
											sum0Row = ssp_add_epi32(sum0Row, bb0Inc),
											sum1Row = ssp_add_epi32(sum1Row, bb1Inc),
											sum2Row = ssp_add_epi32(sum2Row, bb2Inc))
			{
				// Compute barycentric coordinates 
				int index = rowIdx;
				__m128i alpha = sum0Row;
				__m128i beta = sum1Row;
				__m128i gama = sum2Row;

				//Compute barycentric-interpolated depth
				__m128 depth = zz[0];
				depth = ssp_add_ps(depth, ssp_mul_ps(ssp_cvtepi32_ps(beta), zz[1]));
				depth = ssp_add_ps(depth, ssp_mul_ps(ssp_cvtepi32_ps(gama), zz[2]));
				__m128i anyOut = ssp_setzero_si128();

				for(int c = startXx; c < endXx; c += 2,
												index += 4,
												alpha = ssp_add_epi32(alpha, aa0Inc),
												beta  = ssp_add_epi32(beta, aa1Inc),
												gama  = ssp_add_epi32(gama, aa2Inc),
												depth = ssp_add_ps(depth, zx))
				{
					//Test Pixel inside triangle
					__m128i mask = ssp_or_si128(ssp_or_si128(alpha, beta), gama);
					
					__m128 previousDepthValue = ssp_load_ps(&pDepthBuffer[index]);
					__m128 depthMask  = ssp_cmpge_ps(depth, previousDepthValue);
					__m128i finalMask = ssp_andnot_si128(mask, ssp_castps_si128(depthMask));
					anyOut = ssp_or_si128(anyOut, finalMask);
				}//for each column	
				
				if(!ssp_testz_si128(anyOut, ssp_set1_epi32(0x80000000)))
				{
					return true; //early exit
				}
			}// for each row
		}// for each triangle
	}// for each set of SIMD# triangles

	return false;
}
Пример #13
0
void Renderer::Render() 
{	
	if(m_ClearLighting)
		ClearLighting();
	if(m_ClearAccumulationBuffer)
		ClearAccumulationBuffer();
	
	CTimer frameTimer(CTimer::OGL);
	CTimer timer(CTimer::OGL);

	if(m_ProfileFrame)
	{	
		std::cout << std::endl;
		std::cout << "Profile frame --------------- " << std::endl;
		std::cout << std::endl;
		frameTimer.Start();
		timer.Start();
	}

	SetUpRender();

	if(m_ProfileFrame) timer.Stop("set up render");
	if(m_ProfileFrame) timer.Start();

	UpdateUniformBuffers();

	if(m_ProfileFrame) timer.Stop("update ubs");
	
	if(m_CurrentPathAntiradiance == 0 && m_CurrentPathShadowmap == 0)
	{
		m_experimentData->Init("test", "nois.data");
		m_experimentData->MaxTime(450);

		m_globalTimer->Start();
		m_resultTimer->Start();
		m_glTimer->Start();
		CreateGBuffer();

		m_cudaGather->rebuildVisiblePointsBvh();
	}
	
	if (m_confManager->GetConfVars()->drawGBufferTextures) {
		int border = 10;
		int width = (m_camera->GetWidth() - 4 * border) / 2;
		int height = (m_camera->GetHeight() - 4 * border) / 2;
		m_textureViewer->drawTexture(m_gbuffer->GetNormalTexture(),  border, border, width, height);
		m_textureViewer->drawTexture(m_gbuffer->GetPositionTextureWS(),  3 * border + width, border, width, height);
		m_textureViewer->drawTexture(m_normalizeAntiradianceRenderTarget->GetTarget(2),  border, 3 * border + height, width, height);
		m_textureViewer->drawTexture(m_depthBuffer.get(),  3 * border + width, 3 * border + height, width, height);
		return;
	}
	
	std::vector<Avpl> avpls_shadowmap;
	std::vector<Avpl> avpls_antiradiance;

	if(m_ProfileFrame) timer.Start();

	//GetAVPLs(avpls_shadowmap, avpls_antiradiance);

	m_avplShooter->shoot(avpls_shadowmap, avpls_antiradiance, m_confManager->GetConfVars()->NumAVPLsPerFrame);
	m_CurrentPathAntiradiance += m_confManager->GetConfVars()->NumAVPLsPerFrame;
	
	if(m_ProfileFrame) timer.Stop("get avpls");
	if(m_ProfileFrame) timer.Start();

	if (m_confManager->GetConfVars()->gatherWithCuda) 
	{
		if (avpls_antiradiance.size() > 0) {
			m_cudaGather->run(avpls_antiradiance, m_camera->GetPosition(), 
				m_sceneProbe.get(), m_scene->getSceneExtent(), m_ProfileFrame);
			
			Add(m_gatherAntiradianceRenderTarget.get(), m_cudaRenderTarget.get());
		}
		
		if(m_ProfileFrame) timer.Stop("gather");
		if(m_ProfileFrame) timer.Start();
	}
	else
	{
		Gather(avpls_shadowmap, avpls_antiradiance);
		
		if(m_ProfileFrame) timer.Stop("gather");
		if(m_ProfileFrame) timer.Start();
	}

	Normalize(m_normalizeShadowmapRenderTarget.get(), m_gatherShadowmapRenderTarget.get(), m_CurrentPathShadowmap);
	Normalize(m_normalizeAntiradianceRenderTarget.get(), m_gatherAntiradianceRenderTarget.get(), m_CurrentPathAntiradiance);
	
	if(m_ProfileFrame) timer.Stop("normalize");
	if(m_ProfileFrame) timer.Start();

	if(m_confManager->GetConfVars()->LightingMode == 2)
	{
		drawAreaLight(m_normalizeShadowmapRenderTarget.get(), glm::vec3(0.f, 0.f, 0.f));
		drawAreaLight(m_normalizeAntiradianceRenderTarget.get(), glm::vec3(0.f, 0.f, 0.f));
	}
	else
	{
		drawAreaLight(m_normalizeShadowmapRenderTarget.get(), m_scene->getAreaLight()->getRadiance());
	}
	
	SetTransformToCamera();
	
	Add(m_resultRenderTarget.get(), m_normalizeAntiradianceRenderTarget.get(), m_normalizeShadowmapRenderTarget.get());

	if (m_confManager->GetConfVars()->UseDebugMode)
	{
		if (m_confManager->GetConfVars()->DrawClusterLights) {
			CRenderTargetLock lock(m_resultRenderTarget.get());
			PointCloud pc(m_cudaGather->getVisiblePointsBvh()->centerPositions,
				m_cudaGather->getVisiblePointsBvh()->colors, m_ubTransform.get(),
				m_confManager->GetConfVars()->lightRadiusScale * m_scene->getSceneExtent() / 100.f);
			pc.Draw();
			//m_cudaGather->getPointCloud()->Draw();
		}
		if (m_confManager->GetConfVars()->DrawClusterAABBs) {
			CRenderTargetLock lock(m_resultRenderTarget.get());
			AABBCloud aabb(m_cudaGather->getVisiblePointsBvh()->clusterMin, 
				m_cudaGather->getVisiblePointsBvh()->clusterMax, m_ubTransform.get());
			aabb.Draw();
			//m_cudaGather->getAABBCloud()->Draw();
		}
		
		if (m_confManager->GetConfVars()->DrawLights) {
			CRenderTargetLock lock(m_resultRenderTarget.get());
			m_pointCloud->Draw();
		}

		if (m_sceneProbe) {
			m_sceneProbe->draw(m_resultRenderTarget.get(), m_debugProgram.get(), 
					m_ubTransform.get(), m_camera);
			m_pointCloud->Draw();
		}
	}
	DrawDebug();

	if(m_ProfileFrame) timer.Stop("draw debug");
		
	m_postProcess->postprocess(m_resultRenderTarget->GetTarget(0), m_postProcessRenderTarget.get());
	m_textureViewer->drawTexture(m_postProcessRenderTarget->GetTarget(0), 0, 0, m_camera->GetWidth(), m_camera->GetHeight());	
	
	m_NumAVPLs += (int)avpls_antiradiance.size();
	m_NumAVPLs += (int)avpls_shadowmap.size();

	if(m_ProfileFrame) timer.Start();

	avpls_antiradiance.clear();
	avpls_shadowmap.clear();

	if(m_ProfileFrame) timer.Stop("clear avpls");
	
	CheckExport();

	m_Frame++;

	if(m_ProfileFrame) frameTimer.Stop("frame time");
		
	m_ProfileFrame = false;
	m_FinishedDebug = true;
}
//--------------------------------------------------------------------------------
// Bin the screen space transformed triangles into tiles. For single threaded version
//--------------------------------------------------------------------------------
void TransformedMeshScalar::BinTransformedTrianglesST(UINT taskId,
													  UINT modelId,
													  UINT meshId,
													  UINT start,
													  UINT end,
													  UINT* pBin,
												      USHORT* pBinModel,
													  USHORT* pBinMesh,
													  USHORT* pNumTrisInBin)
{
	// working on one triangle at a time
	for(UINT index = start; index <= end; index++)
	{
		float4 xformedPos[3];
		Gather(xformedPos, index);
			
		// TODO: Maybe convert to Fixed pt and store it once so that dont have to convert to fixedPt again during rasterization
		int4 xFormedFxPtPos[3] = {
			int4(xformedPos[0]),
			int4(xformedPos[1]),
			int4(xformedPos[2]),
		};

		// Compute triangle are
		int A0 = xFormedFxPtPos[1].y - xFormedFxPtPos[2].y;
		int B0 = xFormedFxPtPos[2].x - xFormedFxPtPos[1].x;
		int C0 = (xFormedFxPtPos[1].x * xFormedFxPtPos[2].y) - (xFormedFxPtPos[2].x * xFormedFxPtPos[1].y);
		int triArea = A0 * xFormedFxPtPos[0].x + B0 * xFormedFxPtPos[0].y + C0;
		
		// Find bounding box for screen space triangle in terms of pixels
		int startX = max(min(min(xFormedFxPtPos[0].x, xFormedFxPtPos[1].x), xFormedFxPtPos[2].x), 0); 
        int endX   = min(max(max(xFormedFxPtPos[0].x, xFormedFxPtPos[1].x), xFormedFxPtPos[2].x) + 1, SCREENW);

        int startY = max(min(min(xFormedFxPtPos[0].y, xFormedFxPtPos[1].y), xFormedFxPtPos[2].y), 0 );
        int endY   = min(max(max(xFormedFxPtPos[0].y, xFormedFxPtPos[1].y), xFormedFxPtPos[2].y) + 1, SCREENH);

		// Skip triangle if area is zero 
		if(triArea <= 0) continue;
			
		float oneOverW[3];
		for(int j = 0; j < 3; j++)
		{
			oneOverW[j] = xformedPos[j].w;
		}

		// Reject the triangle if any of its verts is behind the nearclip plane
		if(oneOverW[0] > 1.0f || oneOverW[1] > 1.0f || oneOverW[2] > 1.0f) continue;

		// Convert bounding box in terms of pixels to bounding box in terms of tiles
		int startXx = max(startX/TILE_WIDTH_IN_PIXELS, 0);
		int endXx   = min(endX/TILE_WIDTH_IN_PIXELS, SCREENW_IN_TILES-1);

		int startYy = max(startY/TILE_HEIGHT_IN_PIXELS, 0);
		int endYy   = min(endY/TILE_HEIGHT_IN_PIXELS, SCREENH_IN_TILES-1);

		// Add triangle to the tiles or bins that the bounding box covers
		int row, col;
		for(row = startYy; row <= endYy; row++)
		{
			int offset1 = YOFFSET1_ST * row;
			int offset2 = YOFFSET2_ST * row;
			for(col = startXx; col <= endXx; col++)
			{
				int idx1 = offset1 + (XOFFSET1_ST * col) + taskId;
				int idx2 = offset2 + (XOFFSET2_ST * col) + (taskId * MAX_TRIS_IN_BIN_ST) + pNumTrisInBin[idx1];
				pBin[idx2] = index;
				pBinModel[idx2] = modelId;
				pBinMesh[idx2] = meshId;
				pNumTrisInBin[idx1] += 1;
			}
		}
	}
}