Esempio n. 1
0
//Binning
static void gather4Simd(VecF32Soa dest[3],VecF32 vertices[12]){
	for(uint32 i = 0;i<3;++i){
		__m128 v0 = vertices[i].simd; //x0, y0, z0, w0
		__m128 v1 = vertices[3+i].simd;//x1, y1, z1, w1
		__m128 v2 = vertices[6+i].simd;//x2, y2, z2, w2
		__m128 v3 = vertices[9+i].simd;//x3, y3, z3, w3
		_MM_TRANSPOSE4_PS(v0, v1, v2, v3);
		dest[i].x = VecF32(v0);
		dest[i].y = VecF32(v1);
		dest[i].z = VecF32(v2);
		dest[i].w = VecF32(v3);
	}
}
VecF32 LinearAlgebra::eigenValue(const Mat2x<F32,2,2> &m){
    F32 T = m.trace();
    F32 D = m.determinant();
    F32 sum = T*T/4 -D;
    VecF32 eigen_value(2);
    if(sum>0)
    {
        sum = std::sqrt(sum);
        eigen_value(0) = T/2 + (sum);
        eigen_value(1) = T/2 - (sum);
        return eigen_value;
    }else{
        return VecF32();
    }

}
void LinearAlgebra::QRDecomposition(const Mat2F32 &m, Mat2F32 &Q, Mat2F32 &R){
    Q = LinearAlgebra::orthogonalGramSchmidt(m);
    R.clear();
    R.resize(m.sizeI(),m.sizeJ());


    std::vector<VecF32> v_a(m.sizeI(),VecF32(m.sizeI()));
    for(unsigned int j =0;j<m.sizeJ();j++)
        v_a[j]=m.getCol(j);

    for(unsigned int i =0;i<m.sizeI();i++){
        VecF32 e = Q.getCol(i);
        for(unsigned int j =i;j<m.sizeJ();j++){
            R(i,j)=productInner(e,v_a[j]);
        }
    }
}
VecF32 DistributionMultiVariateRegularStep::randomVariable()const {
    F32 u = this->uni.randomVariable();
    std::vector<F32>::const_iterator low=std::upper_bound (_repartition.begin(), _repartition.end(),u ); //
    I32 indice = I32(low- _repartition.begin()) ;
    if(_xmin.size()==2){
        Vec2I32 v;
        v(0)= indice/_mat2d.sizeJ();
        v(1)= indice-v(0)*_mat2d.sizeJ();
        VecF32 vv(2);
        vv(0)=v(0)*_step+_xmin(0);vv(1)=v(1)*_step+_xmin(1);
        return vv;
    }
    else{
        std::cerr<<"work only for two variates";
        return VecF32();
    }

}
Mat2F32 LinearAlgebra::orthogonalGramSchmidt(const Mat2F32& m)
{
    if(m.sizeI()!=m.sizeI())
        std::cerr<<"In linearAlgebra::orthogonalGramSchmidt, Mat2F32 must be square";
    Vec<VecF32> u(m.sizeI(),VecF32(m.sizeI()));
    for(unsigned int k=0;k<m.sizeI();k++){
        VecF32 v_k = m.getCol(k);
        VecF32 temp(m.sizeI());
        for(unsigned int p=0;p<k;p++){
            temp+=productInner(u[p],v_k)/productInner(u[p],u[p])*u[p];
        }
        u(k)=v_k-temp;
    }
    Mat2F32 out(m.sizeI(),m.sizeI());
    for(unsigned int k=0;k<m.sizeI();k++){
        u(k)/=u(k).norm();
        out.setCol(k,u(k));
    }
    return out;

}
VecF32 DistributionMultiVariateExpression::randomVariable()const {
    std::cerr<<"In distributionMultiVariateArithmetic::randomVariable(), no  probability distribution, you have to use pop::Statistics::toProbabilityDistribution";
    return VecF32();
}
Esempio n. 7
0
//Rasterize 4 pixels at once
void DepthBuffer::rasterizeTile2x2(int32 x,int32 y,uint32 pass) {

	auto tileIndex = x + y*tileCount_.x;
	auto count = tileTriangleCount_[tileIndex];
	tileTriangleCount_[tileIndex] = 0;
	auto faces = triangleBins_ + x*kMaxTrianglesPerTile + y*tileCount_.x*kMaxTrianglesPerTile;
	vec2i tilePos(x*tileSize_.x,y*tileSize_.y);
	vec2i tileEnd(tilePos + tileSize_);
#ifdef ARPHEG_ARCH_X86
	enum { kNumLanes = 4 };

	//Flush denormals to zero
	_mm_setcsr( _mm_getcsr() | 0x8040 );

	VecS32 colOffset(0, 1, 0, 1);
	VecS32 rowOffset(0, 0, 1, 1);

	//Process the 4 binned triangles at a time
	VecS32 vertexX[3];
	VecS32 vertexY[3];
	VecF32  vertexZ[4];
	VecS32 tileMinXSimd(tilePos.x);
	VecS32 tileMaxXSimd(tilePos.x+tileSize_.x-2);
	VecS32 tileMinYSimd(tilePos.y);
	VecS32 tileMaxYSimd(tilePos.y+tileSize_.y-2);

	for(uint32 i = 0;i<count;i += kNumLanes){

		uint32 numSimdTris = std::min(uint32(kNumLanes),count-i);
		auto f = faces+i;
		for(uint32 ii = 0;ii< numSimdTris;++ii){
			vertexX[0].lane[ii] = f[ii].v[0].x;
			vertexY[0].lane[ii] = f[ii].v[0].y;
			vertexX[1].lane[ii] = f[ii].v[1].x;
			vertexY[1].lane[ii] = f[ii].v[1].y;
			vertexX[2].lane[ii] = f[ii].v[2].x;
			vertexY[2].lane[ii] = f[ii].v[2].y;
			vertexZ[ii] = VecF32(f[ii].z[0],f[ii].z[1],f[ii].z[2],0.0f);
		}

		// Fab(x, y) =     Ax       +       By     +      C              = 0
		// Fab(x, y) = (ya - yb)x   +   (xb - xa)y + (xa * yb - xb * ya) = 0
		// Compute A = (ya - yb) for the 3 line segments that make up each triangle
		VecS32 A0 = vertexY[1] - vertexY[2];
		VecS32 A1 = vertexY[2] - vertexY[0];
		VecS32 A2 = vertexY[0] - vertexY[1];

		// Compute B = (xb - xa) for the 3 line segments that make up each triangle
		VecS32 B0 = vertexX[2] - vertexX[1];
		VecS32 B1 = vertexX[0] - vertexX[2];
		VecS32 B2 = vertexX[1] - vertexX[0];

		// Compute C = (xa * yb - xb * ya) for the 3 line segments that make up each triangle
		VecS32 C0 = vertexX[1] * vertexY[2] - vertexX[2] * vertexY[1];
		VecS32 C1 = vertexX[2] * vertexY[0] - vertexX[0] * vertexY[2];
		VecS32 C2 = vertexX[0] * vertexY[1] - vertexX[1] * vertexY[0];

		// Use bounding box traversal strategy to determine which pixels to rasterize 
		VecS32 minX = vmax(vmin(vmin(vertexX[0], vertexX[1]), vertexX[2]), tileMinXSimd) & VecS32(~1);
		VecS32 maxX   = vmin(vmax(vmax(vertexX[0], vertexX[1]), vertexX[2]), tileMaxXSimd);

		VecS32 minY = vmax(vmin(vmin(vertexY[0], vertexY[1]), vertexY[2]), tileMinYSimd) & VecS32(~1);
		VecS32 maxY = vmin(vmax(vmax(vertexY[0], vertexY[1]), vertexY[2]), tileMaxYSimd);

		//Rasterize each triangle individually
		for(uint32 lane = 0;lane < numSimdTris;++lane){
			//Rasterize in 2x2 quads.
			VecF32 zz[3];
			zz[0] = VecF32(vertexZ[lane].lane[0]);
			zz[1] = VecF32(vertexZ[lane].lane[1]);
			zz[2] = VecF32(vertexZ[lane].lane[2]);

			VecS32 a0(A0.lane[lane]);
			VecS32 a1(A1.lane[lane]);
			VecS32 a2(A2.lane[lane]);
			VecS32 b0(B0.lane[lane]);
			VecS32 b1(B1.lane[lane]);
			VecS32 b2(B2.lane[lane]);

			int32 minx = minX.lane[lane];
			int32 maxx = maxX.lane[lane];
			int32 miny = minY.lane[lane];
			int32 maxy = maxY.lane[lane];

			VecS32 col = VecS32(minx) + colOffset;
			VecS32 row = VecS32(miny) + rowOffset;
			auto rowIdx = miny*size_.x + 2 * minx;
			VecS32 w0_row  = a0 * col + b0 * row + VecS32(C0.lane[lane]);
			VecS32 w1_row  = a1 * col + b1 * row + VecS32(C1.lane[lane]);
			VecS32 w2_row  = a2 * col + b2 * row + VecS32(C2.lane[lane]);

			//Multiply each weight by two(rasterize 2x2 quad at once).
			a0 = shiftl<1>(a0);
			a1 = shiftl<1>(a1);
			a2 = shiftl<1>(a2);
			b0 = shiftl<1>(b0);
			b1 = shiftl<1>(b1);
			b2 = shiftl<1>(b2);

			VecF32 zInc = itof(a1)*zz[1] + itof(a2)*zz[2];
	
			for(int32 y = miny;y<=maxy;y+=2,rowIdx += 2 * size_.x){
				auto w0 = w0_row;
				auto w1 = w1_row;
				auto w2 = w2_row;

				VecF32 depth = zz[0] + itof(w1)*zz[1] + itof(w2)*zz[2];
				auto idx = rowIdx;
				
				for(int32 x = minx;x<=maxx;x+=2,idx+=4){
					auto mask = w0|w1|w2;
					VecF32 previousDepth = VecF32::load(data_+idx);
					VecF32 mergedDepth = vmin(depth,previousDepth);
					previousDepth = select(mergedDepth,previousDepth,mask);
					previousDepth.store(data_+idx);
	
					w0+=a0;
					w1+=a1;
					w2+=a2;
					depth+=zInc;
				}
				w0_row += b0;
				w1_row += b1;
				w2_row += b2;
			}
		}
	}
#endif
}
Esempio n. 8
0
void DepthBuffer::rasterizeTile(int32 x,int32 y,uint32 pass) {
	if(pass == 0){
		//init tile(clear depth).	
		//auto tilePixels = data_ + x*tileSize_.x*tileSize_.y + (y*tileSize_.x*tileSize_.y)*tileCount_.x;
		//clearDepth(tilePixels,tileSize_.x*tileSize_.y,1.0f);
	}
	if(mode_ == kModeDepthPackedQuads){
		rasterizeTile2x2(x,y,pass);
		return;
	}
	auto tileIndex = x + y*tileCount_.x;
	auto count = tileTriangleCount_[tileIndex];
	tileTriangleCount_[tileIndex] = 0;
	auto faces = triangleBins_ + x*kMaxTrianglesPerTile + y*tileCount_.x*kMaxTrianglesPerTile;
	vec2i tilePos(x*tileSize_.x,y*tileSize_.y);
	vec2i tileEnd(tilePos + tileSize_);
#ifdef ARPHEG_ARCH_X86
	enum { kNumLanes = 4 };

	//Flush denormals to zero
	//_mm_setcsr( _mm_getcsr() | 0x8040 );

	VecS32 colOffset(0, 1, 0, 1);
	VecS32 rowOffset(0, 0, 1, 1);

	//Process the 4 binned triangles at a time
	VecS32 vertexX[3];
	VecS32 vertexY[3];
	VecF32  vertexZ[4];
	VecS32 tileMinXSimd(tilePos.x);
	VecS32 tileMaxXSimd(tilePos.x+tileSize_.x-1);
	VecS32 tileMinYSimd(tilePos.y);
	VecS32 tileMaxYSimd(tilePos.y+tileSize_.y-1);

	for(uint32 i = 0;i<count;i += kNumLanes){

		uint32 numSimdTris = std::min(uint32(kNumLanes),count-i);
		auto f = faces+i;
		for(uint32 ii = 0;ii< numSimdTris;++ii){
			vertexX[0].lane[ii] = f[ii].v[0].x;
			vertexY[0].lane[ii] = f[ii].v[0].y;
			vertexX[1].lane[ii] = f[ii].v[1].x;
			vertexY[1].lane[ii] = f[ii].v[1].y;
			vertexX[2].lane[ii] = f[ii].v[2].x;
			vertexY[2].lane[ii] = f[ii].v[2].y;
			vertexZ[ii] = VecF32(f[ii].z[0],f[ii].z[1],f[ii].z[2],0.0f);
		}

		// Fab(x, y) =     Ax       +       By     +      C              = 0
		// Fab(x, y) = (ya - yb)x   +   (xb - xa)y + (xa * yb - xb * ya) = 0
		// Compute A = (ya - yb) for the 3 line segments that make up each triangle
		VecS32 A0 = vertexY[1] - vertexY[2];
		VecS32 A1 = vertexY[2] - vertexY[0];
		VecS32 A2 = vertexY[0] - vertexY[1];

		// Compute B = (xb - xa) for the 3 line segments that make up each triangle
		VecS32 B0 = vertexX[2] - vertexX[1];
		VecS32 B1 = vertexX[0] - vertexX[2];
		VecS32 B2 = vertexX[1] - vertexX[0];

		// Compute C = (xa * yb - xb * ya) for the 3 line segments that make up each triangle
		VecS32 C0 = vertexX[1] * vertexY[2] - vertexX[2] * vertexY[1];
		VecS32 C1 = vertexX[2] * vertexY[0] - vertexX[0] * vertexY[2];
		VecS32 C2 = vertexX[0] * vertexY[1] - vertexX[1] * vertexY[0];

		// Use bounding box traversal strategy to determine which pixels to rasterize 
		VecS32 minX = vmax(vmin(vmin(vertexX[0], vertexX[1]), vertexX[2]), tileMinXSimd); 
		VecS32 maxX   = vmin(vmax(vmax(vertexX[0], vertexX[1]), vertexX[2]), tileMaxXSimd);

		VecS32 minY = vmax(vmin(vmin(vertexY[0], vertexY[1]), vertexY[2]), tileMinYSimd); 
		VecS32 maxY   = vmin(vmax(vmax(vertexY[0], vertexY[1]), vertexY[2]), tileMaxYSimd);

		//Rasterize each triangle individually
		for(uint32 lane = 0;lane < numSimdTris;++lane){
			float zz[3] = { vertexZ[lane].lane[0],vertexZ[lane].lane[1],vertexZ[lane].lane[2] };

			int32 a0 = A0.lane[lane]; 
			int32 a1 = A1.lane[lane]; 
			int32 a2 = A2.lane[lane]; 
			int32 b0 = B0.lane[lane];
			int32 b1 = B1.lane[lane];
			int32 b2 = B2.lane[lane];

			int32 minx = minX.lane[lane];
			int32 maxx = maxX.lane[lane];
			int32 miny = minY.lane[lane];
			int32 maxy = maxY.lane[lane];

			auto w0_row = a0 * minx + b0 * miny + C0.lane[lane];
			auto w1_row = a1 * minx + b1 * miny + C1.lane[lane];
			auto w2_row = a2 * minx + b2 * miny + C2.lane[lane];

			float* tilePixels = data_ + tilePos.x*tileSize_.y + (tilePos.y*tileSize_.x)*tileCount_.x;
	
			int32 idx2  = minx-tilePos.x + (miny - tilePos.y)*tileSize_.x;
			int32 spanx = maxx-minx;

	
			for(int32 endIdx2 = idx2+(tileSize_.x)*(maxy-miny);idx2<=endIdx2;idx2+=tileSize_.x){
				auto w0 = w0_row;
				auto w1 = w1_row;
				auto w2 = w2_row;

				auto idx = idx2;
				for(int32 endIdx = idx+spanx;idx<=endIdx;++idx){
					auto mask = w0|w1|w2;
					if(mask >= 0){
						float betaf = float(w1);
						float gamaf = float(w2);
						float depth = zz[0] + betaf*zz[1] + gamaf*zz[2];

						auto d = tilePixels[idx];
						d = depth<d?depth:d;
						tilePixels[idx] = d;
					}

					w0+=a0;
					w1+=a1;
					w2+=a2;
				}
				w0_row += b0;
				w1_row += b1;
				w2_row += b2;
			}
		}
	}
#else
	for(uint32 i = 0;i<count;i ++){
		drawTriangle(faces[i],tilePos);
	}
#endif
}
Esempio n. 9
0
void DepthBuffer::binTriangles4Simd(vec4f vertices[12],uint32 count) {
	enum { kNumLanes = 4 };

	VecF32Soa transformedPos[3];
	gather4Simd(transformedPos,(VecF32*)vertices);

	VecS32 vertexX[3],vertexY[3];
	VecF32 vertexZ[3];

	for(int i = 0;i<3;i++){
		//Convert the floating point coordinates to integer screen space coordinates.
		//NB: truncate
		vertexX[i] = ftoi(transformedPos[i].x);
		vertexY[i] = ftoi(transformedPos[i].y);

		vertexZ[i] = transformedPos[i].z;
	}

	//Compute triangle area.
	VecS32 area = (vertexX[1] - vertexX[0]) * (vertexY[2] - vertexY[0]) - (vertexX[0] - vertexX[2]) * (vertexY[0] - vertexY[1]);
	VecF32 oneOverArea = VecF32(1.0f)/itof(area);

	//Setup Z for interpolation
	vertexZ[1] = (vertexZ[1] - vertexZ[0]) * oneOverArea;
	vertexZ[2] = (vertexZ[2] - vertexZ[0]) * oneOverArea;

	//Find bounding box for the screen space triangle
	VecS32 zero = VecS32(0);
	VecS32 minX = vmax( vmin(vmin(vertexX[0],vertexX[1]),vertexX[2]), zero);
	VecS32 maxX = vmin( vmax(vmax(vertexX[0],vertexX[1]),vertexX[2]), VecS32(size_.x-1) );
	VecS32 minY = vmax( vmin(vmin(vertexY[0],vertexY[1]),vertexY[2]), zero);
	VecS32 maxY = vmin( vmax(vmax(vertexY[0],vertexY[1]),vertexY[2]), VecS32(size_.y-1) );

	uint32 numLanes = std::min(count,uint32(kNumLanes));
	for(uint32 i =0;i<numLanes;++i){
		//Skip triangle if the area is zero
		if(area.lane[i] <= 0) continue;

		float oneOverW[3];
		for(int j = 0;j<3;++j){
			oneOverW[j] = transformedPos[j].w.lane[i];
		}

		// Reject the triangle if any of its verts is behind the nearclip plane
		if(oneOverW[0] == 0.0f || oneOverW[1] == 0.0f || oneOverW[2] == 0.0f) continue;

		//Convert bounding box in terms of pixels to bounding box in terms of tiles.
		int32 tileMinX = minX.lane[i]/tileSize_.x;//std::max(minX.lane[i]/tileSize_.x,0);
		int32 tileMaxX = maxX.lane[i]/tileSize_.x;//std::min(maxX.lane[i]/tileSize_.x,tileCount_.x);
		int32 tileMinY = minY.lane[i]/tileSize_.y;//std::max(minY.lane[i]/tileSize_.y,0);
		int32 tileMaxY = maxY.lane[i]/tileSize_.y;//std::min(maxY.lane[i]/tileSize_.y,tileCount_.y);

		for(;tileMinY <= tileMaxY;tileMinY++){
			auto tileIndex = tileMinX + tileMinY*tileCount_.x;
		for(auto x = tileMinX; x<= tileMaxX; x++,tileIndex++){
			auto count = tileTriangleCount_[tileIndex];
			if(count >= kMaxTrianglesPerTile) continue;
			tileTriangleCount_[tileIndex]++;

			BinnedTriangle& triangle =*( triangleBins_ + count + x*kMaxTrianglesPerTile + tileMinY*tileCount_.x*kMaxTrianglesPerTile);
			triangle.v[0].x = vertexX[0].lane[i];
			triangle.v[0].y = vertexY[0].lane[i];
			triangle.v[1].x = vertexX[1].lane[i];
			triangle.v[1].y = vertexY[1].lane[i];
			triangle.v[2].x = vertexX[2].lane[i];
			triangle.v[2].y = vertexY[2].lane[i];
			triangle.z[0] = vertexZ[0].lane[i];
			triangle.z[1] = vertexZ[1].lane[i];
			triangle.z[2] = vertexZ[2].lane[i];
		} }
	}
}
VecF32 DistributionMultiVariateArithmeticDivision::randomVariable()const{
    std::cerr<<"No random variable for addition"<<std::endl;
    return VecF32();
}