PhysicsHingeConstraint::PhysicsHingeConstraint(PhysicsRigidBody* a, const Quaternion& rotationOffsetA, const Vector3& translationOffsetA,
    PhysicsRigidBody* b, const Quaternion& rotationOffsetB, const Vector3& translationOffsetB)
    : PhysicsConstraint(a, b)
{
    GP_ASSERT(a && a->_body && a->getNode());

    // Take scale into account for the first node's translation offset.
    Vector3 sA;
    a->getNode()->getWorldMatrix().getScale(&sA);
    Vector3 tA(translationOffsetA.x * sA.x, translationOffsetA.y * sA.y, translationOffsetA.z * sA.z);

    if (b)
    {
        GP_ASSERT(b->_body && b->getNode());

        // Take scale into account for the second node's translation offset.
        Vector3 sB;
        b->getNode()->getWorldMatrix().getScale(&sB);
        Vector3 tB(translationOffsetB.x * sB.x, translationOffsetB.y * sB.y, translationOffsetB.z * sB.z);

        btTransform frameInA(BQ(rotationOffsetA), BV(tA));
        btTransform frameInB(BQ(rotationOffsetB), BV(tB));
        _constraint = new btHingeConstraint(*a->_body, *b->_body, frameInA, frameInB);
    }
    else
    {
        btTransform frameInA(BQ(rotationOffsetA), BV(tA));
        _constraint = new btHingeConstraint(*a->_body, frameInA);
    }
}
示例#2
0
文件: libMatrix.c 项目: cran/MixSim
void XAXt2(double **X, int p, double **A, double ***Res, int k){

	double **Res1, **Res2;

	MAKE_MATRIX(Res1, p, p);	
	MAKE_MATRIX(Res2, p, p);

	tA(X, p, p, Res2);

	multiply(X, p, p, A, p, p, Res1);
	multiply2(Res1, p, p, Res2, p, p, Res, k);

	FREE_MATRIX(Res1);
 	FREE_MATRIX(Res2);

}
//Script that takes two matrices, performs bootstrapped correlation, and returns the median
// [[Rcpp::export]]
arma::mat BeQTL(const arma::mat & A, const arma::mat & B, const arma::umat & Bootmat){
  int bsi= Bootmat.n_rows;
  Rcpp::Rcout<<"Starting Bootstrap!"<<std::endl;
  arma::mat C(A.n_cols*B.n_cols,Bootmat.n_rows);
  arma::mat tA(A.n_rows,A.n_cols);
  arma::mat tB(B.n_rows,B.n_cols);
  arma::mat tC(A.n_rows,B.n_rows);
  for(int i=0; i<bsi; i++){
    tA = A.rows(Bootmat.row(i));
    tB = B.rows(Bootmat.row(i));
    tC = cor(tA,tB);
    C.col(i) = vectorise(tC,0);
  }
  C.elem(find_nonfinite(C)).zeros();

 return reshape(median(C,1),A.n_cols,B.n_cols);
}
void Scene::AddTexturedObject(const std::string fname, Material* material, Shapes &objects, const std::string textName, const Point &ofs) const
{
	size_t index = Shape::GetUniqueID();

	// —читывание меша из файла
	L3DS *l3ds = new L3DS(fname.c_str());
	if(!l3ds || !l3ds->GetMeshCount())
		throw Error("Error in loading extern files");

	for(int i = 0; i<l3ds->GetMeshCount(); i++) {
		LMesh *mesh = l3ds->GetMesh(i);

		Texture *texture = new Texture(textName.c_str());

		for(int j = 0; j<mesh->GetTriangleCount(); j++) {
			LTriangle tr = mesh->GetTriangle(j);

			Point a(mesh->GetVertex(tr.a).x, mesh->GetVertex(tr.a).y, mesh->GetVertex(tr.a).z);
			Point b(mesh->GetVertex(tr.b).x, mesh->GetVertex(tr.b).y, mesh->GetVertex(tr.b).z);
			Point c(mesh->GetVertex(tr.c).x, mesh->GetVertex(tr.c).y, mesh->GetVertex(tr.c).z);	

			TextureCoords tA(mesh->GetUV(tr.a).u, mesh->GetUV(tr.a).v);
			TextureCoords tB(mesh->GetUV(tr.b).u, mesh->GetUV(tr.b).v);
			TextureCoords tC(mesh->GetUV(tr.c).u, mesh->GetUV(tr.c).v);

			tA.InvertU();
			tB.InvertU();
			tC.InvertU();

			tA*=8;
			tB*=8;
			tC*=8;

			if (Triangle::IsValidTrangle(a,b,c))
				objects.push_back(new Triangle(a-ofs, b-ofs, c-ofs, material, index, texture, tA, tB, tC));
		}
	}
}
	virtual void processTriangle(btVector3* triangle, int partId, int triangleIndex)
	{
		//skip self-collisions
		if ((m_partIdA == partId) && (m_triangleIndexA == triangleIndex))
			return;

		//skip duplicates (disabled for now)
		//if ((m_partIdA <= partId) && (m_triangleIndexA <= triangleIndex))
		//	return;

		//search for shared vertices and edges
		int numshared = 0;
		int sharedVertsA[3]={-1,-1,-1};
		int sharedVertsB[3]={-1,-1,-1};

		///skip degenerate triangles
		btScalar crossBSqr = ((triangle[1]-triangle[0]).cross(triangle[2]-triangle[0])).length2();
		if (crossBSqr < m_triangleInfoMap->m_equalVertexThreshold)
			return;


		btScalar crossASqr = ((m_triangleVerticesA[1]-m_triangleVerticesA[0]).cross(m_triangleVerticesA[2]-m_triangleVerticesA[0])).length2();
		///skip degenerate triangles
		if (crossASqr< m_triangleInfoMap->m_equalVertexThreshold)
			return;

#if 0
		printf("triangle A[0]	=	(%f,%f,%f)\ntriangle A[1]	=	(%f,%f,%f)\ntriangle A[2]	=	(%f,%f,%f)\n",
			m_triangleVerticesA[0].getX(),m_triangleVerticesA[0].getY(),m_triangleVerticesA[0].getZ(),
			m_triangleVerticesA[1].getX(),m_triangleVerticesA[1].getY(),m_triangleVerticesA[1].getZ(),
			m_triangleVerticesA[2].getX(),m_triangleVerticesA[2].getY(),m_triangleVerticesA[2].getZ());

		printf("partId=%d, triangleIndex=%d\n",partId,triangleIndex);
		printf("triangle B[0]	=	(%f,%f,%f)\ntriangle B[1]	=	(%f,%f,%f)\ntriangle B[2]	=	(%f,%f,%f)\n",
			triangle[0].getX(),triangle[0].getY(),triangle[0].getZ(),
			triangle[1].getX(),triangle[1].getY(),triangle[1].getZ(),
			triangle[2].getX(),triangle[2].getY(),triangle[2].getZ());
#endif

		for (int i=0;i<3;i++)
		{
			for (int j=0;j<3;j++)
			{
				if ( (m_triangleVerticesA[i]-triangle[j]).length2() < m_triangleInfoMap->m_equalVertexThreshold)
				{
					sharedVertsA[numshared] = i;
					sharedVertsB[numshared] = j;
					numshared++;
					///degenerate case
					if(numshared >= 3)
						return;
				}
			}
			///degenerate case
			if(numshared >= 3)
				return;
		}
		switch (numshared)
		{
		case 0:
			{
				break;
			}
		case 1:
			{
				//shared vertex
				break;
			}
		case 2:
			{
				//shared edge
				//we need to make sure the edge is in the order V2V0 and not V0V2 so that the signs are correct
				if (sharedVertsA[0] == 0 && sharedVertsA[1] == 2)
				{
					sharedVertsA[0] = 2;
					sharedVertsA[1] = 0;
					int tmp = sharedVertsB[1];
					sharedVertsB[1] = sharedVertsB[0];
					sharedVertsB[0] = tmp;
				}

				int hash = btGetHash(m_partIdA,m_triangleIndexA);

				btTriangleInfo* info = m_triangleInfoMap->find(hash);
				if (!info)
				{
					btTriangleInfo tmp;
					m_triangleInfoMap->insert(hash,tmp);
					info = m_triangleInfoMap->find(hash);
				}

				int sumvertsA = sharedVertsA[0]+sharedVertsA[1];
				int otherIndexA = 3-sumvertsA;

				
				btVector3 edge(m_triangleVerticesA[sharedVertsA[1]]-m_triangleVerticesA[sharedVertsA[0]]);

				btTriangleShape tA(m_triangleVerticesA[0],m_triangleVerticesA[1],m_triangleVerticesA[2]);
				int otherIndexB = 3-(sharedVertsB[0]+sharedVertsB[1]);

				btTriangleShape tB(triangle[sharedVertsB[1]],triangle[sharedVertsB[0]],triangle[otherIndexB]);
				//btTriangleShape tB(triangle[0],triangle[1],triangle[2]);

				btVector3 normalA;
				btVector3 normalB;
				tA.calcNormal(normalA);
				tB.calcNormal(normalB);
				edge.normalize();
				btVector3 edgeCrossA = edge.cross(normalA).normalize();

				{
					btVector3 tmp = m_triangleVerticesA[otherIndexA]-m_triangleVerticesA[sharedVertsA[0]];
					if (edgeCrossA.dot(tmp) < 0)
					{
						edgeCrossA*=-1;
					}
				}

				btVector3 edgeCrossB = edge.cross(normalB).normalize();

				{
					btVector3 tmp = triangle[otherIndexB]-triangle[sharedVertsB[0]];
					if (edgeCrossB.dot(tmp) < 0)
					{
						edgeCrossB*=-1;
					}
				}

				btScalar	angle2 = 0;
				btScalar	ang4 = 0.f;


				btVector3 calculatedEdge = edgeCrossA.cross(edgeCrossB);
				btScalar len2 = calculatedEdge.length2();

				btScalar correctedAngle(0);
				btVector3 calculatedNormalB = normalA;
				bool isConvex = false;

				if (len2<m_triangleInfoMap->m_planarEpsilon)
				{
					angle2 = 0.f;
					ang4 = 0.f;
				} else
				{

					calculatedEdge.normalize();
					btVector3 calculatedNormalA = calculatedEdge.cross(edgeCrossA);
					calculatedNormalA.normalize();
					angle2 = btGetAngle(calculatedNormalA,edgeCrossA,edgeCrossB);
					ang4 = SIMD_PI-angle2;
					btScalar dotA = normalA.dot(edgeCrossB);
					///@todo: check if we need some epsilon, due to floating point imprecision
					isConvex = (dotA<0.);

					correctedAngle = isConvex ? ang4 : -ang4;
					btQuaternion orn2(calculatedEdge,-correctedAngle);
					calculatedNormalB = btMatrix3x3(orn2)*normalA;


				}

				

				
							
				//alternatively use 
				//btVector3 calculatedNormalB2 = quatRotate(orn,normalA);


				switch (sumvertsA)
				{
				case 1:
					{
						btVector3 edge = m_triangleVerticesA[0]-m_triangleVerticesA[1];
						btQuaternion orn(edge,-correctedAngle);
						btVector3 computedNormalB = quatRotate(orn,normalA);
						btScalar bla = computedNormalB.dot(normalB);
						if (bla<0)
						{
							computedNormalB*=-1;
							info->m_flags |= TRI_INFO_V0V1_SWAP_NORMALB;
						}
#ifdef DEBUG_INTERNAL_EDGE
						if ((computedNormalB-normalB).length()>0.0001)
						{
							printf("warning: normals not identical\n");
						}
#endif//DEBUG_INTERNAL_EDGE

						info->m_edgeV0V1Angle = -correctedAngle;

						if (isConvex)
							info->m_flags |= TRI_INFO_V0V1_CONVEX;
						break;
					}
				case 2:
					{
						btVector3 edge = m_triangleVerticesA[2]-m_triangleVerticesA[0];
						btQuaternion orn(edge,-correctedAngle);
						btVector3 computedNormalB = quatRotate(orn,normalA);
						if (computedNormalB.dot(normalB)<0)
						{
							computedNormalB*=-1;
							info->m_flags |= TRI_INFO_V2V0_SWAP_NORMALB;
						}

#ifdef DEBUG_INTERNAL_EDGE
						if ((computedNormalB-normalB).length()>0.0001)
						{
							printf("warning: normals not identical\n");
						}
#endif //DEBUG_INTERNAL_EDGE
						info->m_edgeV2V0Angle = -correctedAngle;
						if (isConvex)
							info->m_flags |= TRI_INFO_V2V0_CONVEX;
						break;	
					}
				case 3:
					{
						btVector3 edge = m_triangleVerticesA[1]-m_triangleVerticesA[2];
						btQuaternion orn(edge,-correctedAngle);
						btVector3 computedNormalB = quatRotate(orn,normalA);
						if (computedNormalB.dot(normalB)<0)
						{
							info->m_flags |= TRI_INFO_V1V2_SWAP_NORMALB;
							computedNormalB*=-1;
						}
#ifdef DEBUG_INTERNAL_EDGE
						if ((computedNormalB-normalB).length()>0.0001)
						{
							printf("warning: normals not identical\n");
						}
#endif //DEBUG_INTERNAL_EDGE
						info->m_edgeV1V2Angle = -correctedAngle;

						if (isConvex)
							info->m_flags |= TRI_INFO_V1V2_CONVEX;
						break;
					}
				}

				break;
			}
		default:
			{
				//				printf("warning: duplicate triangle\n");
			}

		}
	}
示例#6
0
void DiffEq::setupABmatrices(Grid& thegrid, Modes& lmmodes)
{
  double Omega, Omegap, H, Hp, eL, eLp, fT, fTp, fTpp,rm2M;
  for(int i = 0; i < thegrid.gridNodeLocations().GFvecDim(); i++){
    for(int j = 0; j < thegrid.gridNodeLocations().GFarrDim(); j++){
      //regular wave equation
      if(params.metric.flatspacetime){
        Array2D<double> A(3, 3, 0.0);
        A[1][2] = -pow(params.waveeq.speed, 2.0);
        A[2][1] = -1.0;
        Amatrices.set(i, j, A);
        Array2D<double> tA(2,2,0.0);
        tA[0][1] = -pow(params.waveeq.speed, 2.0);
        tA[1][0] = -1.0;
        trimmedAmatrices.set(i, j, tA);
        //vector dimension of B is actually number of modes
        for(int k = 0; k < Bmatrices.VGFdim(); k++) {
          Array2D<double> B(3, 3, 0.0);
          B[0][1] = -1.0;
          Bmatrices.set(k, i, j, B);
        }
	
       } else if (params.metric.schwarschild) {
        
        int region;
        if (thegrid.gridNodeLocations().get(i,j)==Sminus) {region = 0;}
        else if ((thegrid.gridNodeLocations().get(i,j)>Sminus)&&(thegrid.gridNodeLocations().get(i,j)<Rminus)) {region=1;}
        else if ((thegrid.gridNodeLocations().get(i,j)>=Rminus)&&(thegrid.gridNodeLocations().get(i,j)<=Rplus)) {region=2;}
        else if ((thegrid.gridNodeLocations().get(i,j)>Rplus)&&(thegrid.gridNodeLocations().get(i,j)<Splus)){region=3;}
        else if (thegrid.gridNodeLocations().get(i,j)==Splus) {region=4;}
        
        
        double Omega, Omegap, eL, eLp, H, Hp, term1, term2;
        //thegrid.gridNodeLocations() = rho
        //horizon
        switch (region){
        case 0:
          {
	    Omega = 0.0;
	    Omegap = 0.0;
	    eL = 1.0;
	    eLp = 0.0;
	    H = -1.0;
	    Hp = 0.0;
	    thegrid.rstar.set(i,j,DBL_MAX);
	    thegrid.rschw.set(i,j,2.0*params.schw.mass); 
	    term1 = 0.0;
	    term2 = 1.0;
	    Array2D<double> A(3, 3, 0.0);
	    A[1][2] = -1.0;
	    A[2][1] = 0.0;
	    A[2][2] = -1.0;
	    Amatrices.set(i, j, A);
	    Array2D<double> tA(2, 2, 0.0);
	    tA[0][1] = -1.0;
	    tA[1][0] = 0.0;
	    tA[1][1] = -1.0;
	    trimmedAmatrices.set(i, j, tA);
	    for(int k = 0; k < lmmodes.ntotal; k++) {
            Array2D<double> B(3, 3, 0.0);
            B[0][2] = -1.0;
            Bmatrices.set(k, i, j, B);
	    }
	    break;
          }
	case 1:
	  {
	    //inner hyperboloidal layer
	    transition(thegrid.gridNodeLocations().get(i, j), Rminus, 
		       Sminus, fT, fTp, fTpp);
	    Omega = 1.0 - thegrid.gridNodeLocations().get(i, j) / Sminus * fT;
	    Omegap = -(fT + thegrid.gridNodeLocations().get(i, j) * fTp) / Sminus;
	    eL = 1.0 + pow(thegrid.gridNodeLocations().get(i, j), 2.0) * fTp 
	      / Sminus;
	    eLp = thegrid.gridNodeLocations().get(i, j) 
	      * (2.0 * fTp + thegrid.gridNodeLocations().get(i, j) * fTpp)
            / Sminus;
	    H = -1.0 + pow(Omega, 2.0) / eL;
	    Hp = (2.0 * Omega * Omegap * eL - pow(Omega, 2.0) * eLp) 
	      / pow(eL, 2.0);
	    thegrid.rstar.set(i, j, thegrid.gridNodeLocations().get(i,j) / Omega);
	    rm2M = invert_tortoise(thegrid.rstar.get(i, j), params.schw.mass);
	    thegrid.rschw.set(i, j, 2.0 * params.schw.mass + rm2M);
	    term1 = rm2M / (pow(Omega, 2.0) * pow(thegrid.rschw.get(i,j),3.0));
	    term2 = 2.0 * params.schw.mass / thegrid.rschw.get(i,j);
	    Array2D<double> A(3, 3, 0.0);
	    A[1][2] = -1.0;
	    A[2][1] = -(1.0 + H) / (1.0 - H);
	    A[2][2] = 2.0 * H / (1.0 - H);
	    Amatrices.set(i,j,A);
	    Array2D<double> tA(2, 2, 0.0);
	    tA[0][1] = -1.0;
	    tA[1][0] = -(1.0 + H) / (1.0 - H);
	    tA[1][1] = 2.0 * H / (1.0 - H);
	    trimmedAmatrices.set(i,j,tA);
	    for(int k = 0; k < lmmodes.ntotal; k++) {
	      Array2D<double> B(3, 3, 0.0);
	      B[0][2] = -1.0;
	      B[2][1] = -Hp / (1.0 - H);
	      B[2][2] = Hp / (1.0 - H);
	      B[2][0] = 1.0 / (1.0 - pow(H,2.0)) * pow(Omega, 2.0) * term1
               *( lmmodes.ll[k] * (lmmodes.ll[k] + 1.0) + term2);
	      Bmatrices.set(k, i, j, B);
	    }
	    
	    break;
	  }
         case 2:
           {
	     
           //central tortoise region
	     Omega = 1.0;
	     Omegap = 0.0;
	     eL = 1.0;
	     eLp = 0.0;
	     H = 0.0;
	     Hp = 0.0;
	     thegrid.rstar.set(i, j, thegrid.gridNodeLocations().get(i, j));
	     rm2M = invert_tortoise(thegrid.rstar.get(i, j), params.schw.mass);
	     thegrid.rschw.set(i, j, 2.0 * params.schw.mass + rm2M);
	     term1 = rm2M / (pow(Omega, 2.0) * pow(thegrid.rschw.get(i, j), 3.0));
	     term2 = 2.0 * params.schw.mass / thegrid.rschw.get(i, j);
	     Array2D<double> A(3, 3, 0.0);
	     A[1][2] = -1.0;
	     A[2][1] = -1.0;
	     Amatrices.set(i, j, A);
	     Array2D<double> tA(2, 2, 0.0);
	     tA[0][1] = -1.0;
	     tA[1][0] = -1.0;
	     trimmedAmatrices.set(i, j, tA);
	     for(int k = 0; k < lmmodes.ntotal; k++) {
	       Array2D<double> B(3, 3, 0.0);
	       B[0][2] = -1.0;
	       B[2][0] = 1.0 / (1.0 - pow(H, 2.0)) * pow(Omega, 2.0) * term1
		 * (lmmodes.ll[k] * (lmmodes.ll[k] + 1.0) + term2);
	       Bmatrices.set(k, i, j, B);
	     }
	     break;
           }
	case 3:
	  {
	    
	    //outer hyperboloidal region
	    transition(thegrid.gridNodeLocations().get(i,j), Rplus, Splus, fT, fTp, fTpp);
	    Omega = 1.0 - thegrid.gridNodeLocations().get(i, j) / Splus * fT;
	    Omegap = -(fT + thegrid.gridNodeLocations().get(i, j) * fTp) / Splus;
	    eL = 1.0 + pow(thegrid.gridNodeLocations().get(i, j), 2.0) * fTp / Splus; 
	    eLp = thegrid.gridNodeLocations().get(i, j) 
            * (2.0 * fTp + thegrid.gridNodeLocations().get(i, j) * fTpp) / Splus;
	    H = 1.0 - pow(Omega, 2.0) / eL;
	    Hp = -(2.0 * Omega * Omegap * eL - pow(Omega, 2.0) * eLp) 
	      / pow(eL, 2.0);
	    thegrid.rstar.set(i, j, thegrid.gridNodeLocations().get(i, j) / Omega);
	    rm2M = invert_tortoise(thegrid.rstar.get(i, j), params.schw.mass);
	    thegrid.rschw.set(i, j, 2.0 * params.schw.mass + rm2M);
	    term1 = rm2M / (pow(Omega, 2.0) * pow(thegrid.rschw.get(i, j),3.0));
	    term2 = 2.0 * params.schw.mass / thegrid.rschw.get(i, j);
	    Array2D<double> A(3, 3, 0.0);
	    A[1][2] = -1.0;
	    A[2][1] = -(1.0 - H) / (1.0 + H);
	    A[2][2] = 2.0 * H / (1.0 + H);
	    Amatrices.set(i, j, A);
	    Array2D<double> tA(2, 2, 0.0);
	    tA[0][1] = -1.0;
	    tA[1][0] = -(1.0 - H) / (1.0 + H);
	    tA[1][1] = 2.0 * H / (1.0 + H);
	    trimmedAmatrices.set(i, j, tA);
	    for(int k = 0; k < lmmodes.ntotal; k++) {
	      Array2D<double> B(3, 3, 0.0);
	      B[0][2] = -1.0;
	      B[2][1] = Hp / (1.0 + H);
	      B[2][2] = Hp / (1.0 + H);
	      B[2][0] = 1.0 / (1.0 - pow(H, 2.0)) * pow(Omega, 2.0) * term1
		* (lmmodes.ll[k] * (lmmodes.ll[k] + 1.0) + term2);
	      Bmatrices.set(k, i, j, B);
	    }
	    break;
             }
        case 4:
          {
	    
	    //scri-plus
	    Omega = 0.0;
	    Omegap = 0.0;
	    eL = 1.0;
	    eLp = 0.0;
	    H = 1.0;
	    Hp = 0.0;
	    thegrid.rstar.set(i, j, -DBL_MAX);
	    thegrid.rschw.set(i,j, -DBL_MAX);
	    term1 = 1.0/ pow(thegrid.gridNodeLocations().get(i,j),2.0);
	    term2 = 0.0;
	    Array2D<double> A(3,3,0.0);
	    A[1][2]=-1.0;
	    A[2][2]=1.0;
	    Amatrices.set(i,j,A);
	    Array2D<double> tA(2,2,0.0);
	    tA[0][1]=-1.0;
	    tA[1][1]=1.0;
	    trimmedAmatrices.set(i,j,tA);
	    for(int k= 0; k < lmmodes.ntotal; k++) {
            Array2D<double> B(3,3,0.0);
            B[0][2]=-1.0;
            B[2][0]=lmmodes.ll[k]*(lmmodes.ll[k]+1.0)/(2.0*pow(Splus,2.0));
            Bmatrices.set(k,i,j,B);
	    }
            break;
          }
	default:
	  {
              throw logic_error("AB matrix region not defined");
	      break;
	  }
        }//end switch case
      }//end inner for            
    }//end outer for
  }//end if schw
}//end function setab
示例#7
0
int main(int, char**)
{
    std::vector<std::chrono::duration<double,std::milli>> duration_vector_1;
    std::vector<std::chrono::duration<double,std::milli>> duration_vector_2;

#if SYNTHETIC_INPUT
    Halide::Buffer<uint8_t> im1(10, 10);
    Halide::Buffer<uint8_t> im2(10, 10);

    for (int i = 0; i < 10; i++)
	    for (int j = 0; j < 10; j++)
	    {
		    im1(i, j) = (uint8_t) i*i+j*j;
		    im2(i, j) = (uint8_t) i*i+j*j;
	    }
#else
    Halide::Buffer<uint8_t> im1 = Halide::Tools::load_image("./utils/images/rgb.png");
    Halide::Buffer<uint8_t> im2 = Halide::Tools::load_image("./utils/images/rgb.png");
#endif

    Halide::Buffer<float> Ix_m(im1.width(), im1.height());
    Halide::Buffer<float> Iy_m(im1.width(), im1.height());
    Halide::Buffer<float> It_m(im1.width(), im1.height());
    Halide::Buffer<int> C1(_NC);
    Halide::Buffer<int> C2(_NC);
    Halide::Buffer<int> SIZES(2);
    Halide::Buffer<int> u(_NC);
    Halide::Buffer<int> v(_NC);
    Halide::Buffer<float> A(2, 4*w*w);
    Halide::Buffer<float> tA(4*w*w, 2);
    Halide::Buffer<double> pinvA(4*w*w, 2);
    Halide::Buffer<double> det(1);
    Halide::Buffer<float> tAA(2, 2);
    Halide::Buffer<double> X(2, 2);

    SIZES(0) = im1.height();
    SIZES(1) = im1.width();
    C1(0) = 500; C2(0) = 400;
    C1(1) = 800; C2(1) = 900;
    C1(2) = 200; C2(2) = 400;
    C1(3) = 400; C2(3) = 200;
    C1(4) = 400; C2(4) = 500;
    C1(5) = 800; C2(5) = 200;
    C1(6) = 200; C2(6) = 900;
    C1(7) = 900; C2(7) = 200;

    det(0) = 0;
    init_buffer(Ix_m, (float) 0);
    init_buffer(Iy_m, (float) 0);
    init_buffer(It_m, (float) 0);
    init_buffer(A, (float) 0);
    init_buffer(tA, (float) 0);
    init_buffer(pinvA, (double) 0);
    init_buffer(tAA, (float) 0);
    init_buffer(X, (double) 0);

    // Warm up
    optical_flow_tiramisu(SIZES.raw_buffer(), im1.raw_buffer(), im2.raw_buffer(),
			  Ix_m.raw_buffer(), Iy_m.raw_buffer(), It_m.raw_buffer(),
			  C1.raw_buffer(), C2.raw_buffer(), u.raw_buffer(), v.raw_buffer(), A.raw_buffer(), pinvA.raw_buffer(), det.raw_buffer(), tAA.raw_buffer(), tA.raw_buffer(), X.raw_buffer());

    // Tiramisu
    for (int i=0; i<NB_TESTS; i++)
    {
        auto start1 = std::chrono::high_resolution_clock::now();
        optical_flow_tiramisu(SIZES.raw_buffer(), im1.raw_buffer(), im2.raw_buffer(),
			  Ix_m.raw_buffer(), Iy_m.raw_buffer(), It_m.raw_buffer(),
			  C1.raw_buffer(), C2.raw_buffer(), u.raw_buffer(), v.raw_buffer(), A.raw_buffer(), pinvA.raw_buffer(), det.raw_buffer(), tAA.raw_buffer(), tA.raw_buffer(), X.raw_buffer());
        auto end1 = std::chrono::high_resolution_clock::now();
        std::chrono::duration<double,std::milli> duration1 = end1 - start1;
        duration_vector_1.push_back(duration1);
    }

    std::cout << "Time: " << median(duration_vector_1) << std::endl;

#if SYNTHETIC_INPUT
    print_buffer(im1);
    print_buffer(im2);

    print_buffer(Ix_m);
    print_buffer(Iy_m);
    print_buffer(It_m);

    print_buffer(A);
    print_buffer(tA);
    print_buffer(tAA);
    print_buffer(det);
    print_buffer(X);
    print_buffer(pinvA);
#endif

    std::cout << "Output" << std::endl;

    print_buffer(u);
    print_buffer(v);

    return 0;
}
示例#8
0
Foam::solverPerformance Foam::PBiCGStab::solve
(
    scalarField& psi,
    const scalarField& source,
    const direction cmpt
) const
{
    // --- Setup class containing solver performance data
    solverPerformance solverPerf
    (
        lduMatrix::preconditioner::getName(controlDict_) + typeName,
        fieldName_
    );

    const label nCells = psi.size();

    scalar* __restrict__ psiPtr = psi.begin();

    scalarField pA(nCells);
    scalar* __restrict__ pAPtr = pA.begin();

    scalarField yA(nCells);
    scalar* __restrict__ yAPtr = yA.begin();

    // --- Calculate A.psi
    matrix_.Amul(yA, psi, interfaceBouCoeffs_, interfaces_, cmpt);

    // --- Calculate initial residual field
    scalarField rA(source - yA);
    scalar* __restrict__ rAPtr = rA.begin();

    // --- Calculate normalisation factor
    const scalar normFactor = this->normFactor(psi, source, yA, pA);

    if (lduMatrix::debug >= 2)
    {
        Info<< "   Normalisation factor = " << normFactor << endl;
    }

    // --- Calculate normalised residual norm
    solverPerf.initialResidual() =
        gSumMag(rA, matrix().mesh().comm())
       /normFactor;
    solverPerf.finalResidual() = solverPerf.initialResidual();

    // --- Check convergence, solve if not converged
    if
    (
        minIter_ > 0
     || !solverPerf.checkConvergence(tolerance_, relTol_)
    )
    {
        scalarField AyA(nCells);
        scalar* __restrict__ AyAPtr = AyA.begin();

        scalarField sA(nCells);
        scalar* __restrict__ sAPtr = sA.begin();

        scalarField zA(nCells);
        scalar* __restrict__ zAPtr = zA.begin();

        scalarField tA(nCells);
        scalar* __restrict__ tAPtr = tA.begin();

        // --- Store initial residual
        const scalarField rA0(rA);

        // --- Initial values not used
        scalar rA0rA = 0;
        scalar alpha = 0;
        scalar omega = 0;

        // --- Select and construct the preconditioner
        autoPtr<lduMatrix::preconditioner> preconPtr =
        lduMatrix::preconditioner::New
        (
            *this,
            controlDict_
        );

        // --- Solver iteration
        do
        {
            // --- Store previous rA0rA
            const scalar rA0rAold = rA0rA;

            rA0rA = gSumProd(rA0, rA, matrix().mesh().comm());

            // --- Test for singularity
            if (solverPerf.checkSingularity(mag(rA0rA)))
            {
                break;
            }

            // --- Update pA
            if (solverPerf.nIterations() == 0)
            {
                for (label cell=0; cell<nCells; cell++)
                {
                    pAPtr[cell] = rAPtr[cell];
                }
            }
            else
            {
                // --- Test for singularity
                if (solverPerf.checkSingularity(mag(omega)))
                {
                    break;
                }

                const scalar beta = (rA0rA/rA0rAold)*(alpha/omega);

                for (label cell=0; cell<nCells; cell++)
                {
                    pAPtr[cell] =
                        rAPtr[cell] + beta*(pAPtr[cell] - omega*AyAPtr[cell]);
                }
            }

            // --- Precondition pA
            preconPtr->precondition(yA, pA, cmpt);

            // --- Calculate AyA
            matrix_.Amul(AyA, yA, interfaceBouCoeffs_, interfaces_, cmpt);

            const scalar rA0AyA = gSumProd(rA0, AyA, matrix().mesh().comm());

            alpha = rA0rA/rA0AyA;

            // --- Calculate sA
            for (label cell=0; cell<nCells; cell++)
            {
                sAPtr[cell] = rAPtr[cell] - alpha*AyAPtr[cell];
            }

            // --- Test sA for convergence
            solverPerf.finalResidual() =
                gSumMag(sA, matrix().mesh().comm())/normFactor;

            if (solverPerf.checkConvergence(tolerance_, relTol_))
            {
                for (label cell=0; cell<nCells; cell++)
                {
                    psiPtr[cell] += alpha*yAPtr[cell];
                }

                solverPerf.nIterations()++;

                return solverPerf;
            }

            // --- Precondition sA
            preconPtr->precondition(zA, sA, cmpt);

            // --- Calculate tA
            matrix_.Amul(tA, zA, interfaceBouCoeffs_, interfaces_, cmpt);

            const scalar tAtA = gSumSqr(tA, matrix().mesh().comm());

            // --- Calculate omega from tA and sA
            //     (cheaper than using zA with preconditioned tA)
            omega = gSumProd(tA, sA, matrix().mesh().comm())/tAtA;

            // --- Update solution and residual
            for (label cell=0; cell<nCells; cell++)
            {
                psiPtr[cell] += alpha*yAPtr[cell] + omega*zAPtr[cell];
                rAPtr[cell] = sAPtr[cell] - omega*tAPtr[cell];
            }

            solverPerf.finalResidual() =
                gSumMag(rA, matrix().mesh().comm())
               /normFactor;
        } while
        (
            (
                solverPerf.nIterations()++ < maxIter_
            && !solverPerf.checkConvergence(tolerance_, relTol_)
            )
         || solverPerf.nIterations() < minIter_
        );
    }

    return solverPerf;
}