PhysicsHingeConstraint::PhysicsHingeConstraint(PhysicsRigidBody* a, const Quaternion& rotationOffsetA, const Vector3& translationOffsetA, PhysicsRigidBody* b, const Quaternion& rotationOffsetB, const Vector3& translationOffsetB) : PhysicsConstraint(a, b) { GP_ASSERT(a && a->_body && a->getNode()); // Take scale into account for the first node's translation offset. Vector3 sA; a->getNode()->getWorldMatrix().getScale(&sA); Vector3 tA(translationOffsetA.x * sA.x, translationOffsetA.y * sA.y, translationOffsetA.z * sA.z); if (b) { GP_ASSERT(b->_body && b->getNode()); // Take scale into account for the second node's translation offset. Vector3 sB; b->getNode()->getWorldMatrix().getScale(&sB); Vector3 tB(translationOffsetB.x * sB.x, translationOffsetB.y * sB.y, translationOffsetB.z * sB.z); btTransform frameInA(BQ(rotationOffsetA), BV(tA)); btTransform frameInB(BQ(rotationOffsetB), BV(tB)); _constraint = new btHingeConstraint(*a->_body, *b->_body, frameInA, frameInB); } else { btTransform frameInA(BQ(rotationOffsetA), BV(tA)); _constraint = new btHingeConstraint(*a->_body, frameInA); } }
void XAXt2(double **X, int p, double **A, double ***Res, int k){ double **Res1, **Res2; MAKE_MATRIX(Res1, p, p); MAKE_MATRIX(Res2, p, p); tA(X, p, p, Res2); multiply(X, p, p, A, p, p, Res1); multiply2(Res1, p, p, Res2, p, p, Res, k); FREE_MATRIX(Res1); FREE_MATRIX(Res2); }
//Script that takes two matrices, performs bootstrapped correlation, and returns the median // [[Rcpp::export]] arma::mat BeQTL(const arma::mat & A, const arma::mat & B, const arma::umat & Bootmat){ int bsi= Bootmat.n_rows; Rcpp::Rcout<<"Starting Bootstrap!"<<std::endl; arma::mat C(A.n_cols*B.n_cols,Bootmat.n_rows); arma::mat tA(A.n_rows,A.n_cols); arma::mat tB(B.n_rows,B.n_cols); arma::mat tC(A.n_rows,B.n_rows); for(int i=0; i<bsi; i++){ tA = A.rows(Bootmat.row(i)); tB = B.rows(Bootmat.row(i)); tC = cor(tA,tB); C.col(i) = vectorise(tC,0); } C.elem(find_nonfinite(C)).zeros(); return reshape(median(C,1),A.n_cols,B.n_cols); }
void Scene::AddTexturedObject(const std::string fname, Material* material, Shapes &objects, const std::string textName, const Point &ofs) const { size_t index = Shape::GetUniqueID(); // —читывание меша из файла L3DS *l3ds = new L3DS(fname.c_str()); if(!l3ds || !l3ds->GetMeshCount()) throw Error("Error in loading extern files"); for(int i = 0; i<l3ds->GetMeshCount(); i++) { LMesh *mesh = l3ds->GetMesh(i); Texture *texture = new Texture(textName.c_str()); for(int j = 0; j<mesh->GetTriangleCount(); j++) { LTriangle tr = mesh->GetTriangle(j); Point a(mesh->GetVertex(tr.a).x, mesh->GetVertex(tr.a).y, mesh->GetVertex(tr.a).z); Point b(mesh->GetVertex(tr.b).x, mesh->GetVertex(tr.b).y, mesh->GetVertex(tr.b).z); Point c(mesh->GetVertex(tr.c).x, mesh->GetVertex(tr.c).y, mesh->GetVertex(tr.c).z); TextureCoords tA(mesh->GetUV(tr.a).u, mesh->GetUV(tr.a).v); TextureCoords tB(mesh->GetUV(tr.b).u, mesh->GetUV(tr.b).v); TextureCoords tC(mesh->GetUV(tr.c).u, mesh->GetUV(tr.c).v); tA.InvertU(); tB.InvertU(); tC.InvertU(); tA*=8; tB*=8; tC*=8; if (Triangle::IsValidTrangle(a,b,c)) objects.push_back(new Triangle(a-ofs, b-ofs, c-ofs, material, index, texture, tA, tB, tC)); } } }
virtual void processTriangle(btVector3* triangle, int partId, int triangleIndex) { //skip self-collisions if ((m_partIdA == partId) && (m_triangleIndexA == triangleIndex)) return; //skip duplicates (disabled for now) //if ((m_partIdA <= partId) && (m_triangleIndexA <= triangleIndex)) // return; //search for shared vertices and edges int numshared = 0; int sharedVertsA[3]={-1,-1,-1}; int sharedVertsB[3]={-1,-1,-1}; ///skip degenerate triangles btScalar crossBSqr = ((triangle[1]-triangle[0]).cross(triangle[2]-triangle[0])).length2(); if (crossBSqr < m_triangleInfoMap->m_equalVertexThreshold) return; btScalar crossASqr = ((m_triangleVerticesA[1]-m_triangleVerticesA[0]).cross(m_triangleVerticesA[2]-m_triangleVerticesA[0])).length2(); ///skip degenerate triangles if (crossASqr< m_triangleInfoMap->m_equalVertexThreshold) return; #if 0 printf("triangle A[0] = (%f,%f,%f)\ntriangle A[1] = (%f,%f,%f)\ntriangle A[2] = (%f,%f,%f)\n", m_triangleVerticesA[0].getX(),m_triangleVerticesA[0].getY(),m_triangleVerticesA[0].getZ(), m_triangleVerticesA[1].getX(),m_triangleVerticesA[1].getY(),m_triangleVerticesA[1].getZ(), m_triangleVerticesA[2].getX(),m_triangleVerticesA[2].getY(),m_triangleVerticesA[2].getZ()); printf("partId=%d, triangleIndex=%d\n",partId,triangleIndex); printf("triangle B[0] = (%f,%f,%f)\ntriangle B[1] = (%f,%f,%f)\ntriangle B[2] = (%f,%f,%f)\n", triangle[0].getX(),triangle[0].getY(),triangle[0].getZ(), triangle[1].getX(),triangle[1].getY(),triangle[1].getZ(), triangle[2].getX(),triangle[2].getY(),triangle[2].getZ()); #endif for (int i=0;i<3;i++) { for (int j=0;j<3;j++) { if ( (m_triangleVerticesA[i]-triangle[j]).length2() < m_triangleInfoMap->m_equalVertexThreshold) { sharedVertsA[numshared] = i; sharedVertsB[numshared] = j; numshared++; ///degenerate case if(numshared >= 3) return; } } ///degenerate case if(numshared >= 3) return; } switch (numshared) { case 0: { break; } case 1: { //shared vertex break; } case 2: { //shared edge //we need to make sure the edge is in the order V2V0 and not V0V2 so that the signs are correct if (sharedVertsA[0] == 0 && sharedVertsA[1] == 2) { sharedVertsA[0] = 2; sharedVertsA[1] = 0; int tmp = sharedVertsB[1]; sharedVertsB[1] = sharedVertsB[0]; sharedVertsB[0] = tmp; } int hash = btGetHash(m_partIdA,m_triangleIndexA); btTriangleInfo* info = m_triangleInfoMap->find(hash); if (!info) { btTriangleInfo tmp; m_triangleInfoMap->insert(hash,tmp); info = m_triangleInfoMap->find(hash); } int sumvertsA = sharedVertsA[0]+sharedVertsA[1]; int otherIndexA = 3-sumvertsA; btVector3 edge(m_triangleVerticesA[sharedVertsA[1]]-m_triangleVerticesA[sharedVertsA[0]]); btTriangleShape tA(m_triangleVerticesA[0],m_triangleVerticesA[1],m_triangleVerticesA[2]); int otherIndexB = 3-(sharedVertsB[0]+sharedVertsB[1]); btTriangleShape tB(triangle[sharedVertsB[1]],triangle[sharedVertsB[0]],triangle[otherIndexB]); //btTriangleShape tB(triangle[0],triangle[1],triangle[2]); btVector3 normalA; btVector3 normalB; tA.calcNormal(normalA); tB.calcNormal(normalB); edge.normalize(); btVector3 edgeCrossA = edge.cross(normalA).normalize(); { btVector3 tmp = m_triangleVerticesA[otherIndexA]-m_triangleVerticesA[sharedVertsA[0]]; if (edgeCrossA.dot(tmp) < 0) { edgeCrossA*=-1; } } btVector3 edgeCrossB = edge.cross(normalB).normalize(); { btVector3 tmp = triangle[otherIndexB]-triangle[sharedVertsB[0]]; if (edgeCrossB.dot(tmp) < 0) { edgeCrossB*=-1; } } btScalar angle2 = 0; btScalar ang4 = 0.f; btVector3 calculatedEdge = edgeCrossA.cross(edgeCrossB); btScalar len2 = calculatedEdge.length2(); btScalar correctedAngle(0); btVector3 calculatedNormalB = normalA; bool isConvex = false; if (len2<m_triangleInfoMap->m_planarEpsilon) { angle2 = 0.f; ang4 = 0.f; } else { calculatedEdge.normalize(); btVector3 calculatedNormalA = calculatedEdge.cross(edgeCrossA); calculatedNormalA.normalize(); angle2 = btGetAngle(calculatedNormalA,edgeCrossA,edgeCrossB); ang4 = SIMD_PI-angle2; btScalar dotA = normalA.dot(edgeCrossB); ///@todo: check if we need some epsilon, due to floating point imprecision isConvex = (dotA<0.); correctedAngle = isConvex ? ang4 : -ang4; btQuaternion orn2(calculatedEdge,-correctedAngle); calculatedNormalB = btMatrix3x3(orn2)*normalA; } //alternatively use //btVector3 calculatedNormalB2 = quatRotate(orn,normalA); switch (sumvertsA) { case 1: { btVector3 edge = m_triangleVerticesA[0]-m_triangleVerticesA[1]; btQuaternion orn(edge,-correctedAngle); btVector3 computedNormalB = quatRotate(orn,normalA); btScalar bla = computedNormalB.dot(normalB); if (bla<0) { computedNormalB*=-1; info->m_flags |= TRI_INFO_V0V1_SWAP_NORMALB; } #ifdef DEBUG_INTERNAL_EDGE if ((computedNormalB-normalB).length()>0.0001) { printf("warning: normals not identical\n"); } #endif//DEBUG_INTERNAL_EDGE info->m_edgeV0V1Angle = -correctedAngle; if (isConvex) info->m_flags |= TRI_INFO_V0V1_CONVEX; break; } case 2: { btVector3 edge = m_triangleVerticesA[2]-m_triangleVerticesA[0]; btQuaternion orn(edge,-correctedAngle); btVector3 computedNormalB = quatRotate(orn,normalA); if (computedNormalB.dot(normalB)<0) { computedNormalB*=-1; info->m_flags |= TRI_INFO_V2V0_SWAP_NORMALB; } #ifdef DEBUG_INTERNAL_EDGE if ((computedNormalB-normalB).length()>0.0001) { printf("warning: normals not identical\n"); } #endif //DEBUG_INTERNAL_EDGE info->m_edgeV2V0Angle = -correctedAngle; if (isConvex) info->m_flags |= TRI_INFO_V2V0_CONVEX; break; } case 3: { btVector3 edge = m_triangleVerticesA[1]-m_triangleVerticesA[2]; btQuaternion orn(edge,-correctedAngle); btVector3 computedNormalB = quatRotate(orn,normalA); if (computedNormalB.dot(normalB)<0) { info->m_flags |= TRI_INFO_V1V2_SWAP_NORMALB; computedNormalB*=-1; } #ifdef DEBUG_INTERNAL_EDGE if ((computedNormalB-normalB).length()>0.0001) { printf("warning: normals not identical\n"); } #endif //DEBUG_INTERNAL_EDGE info->m_edgeV1V2Angle = -correctedAngle; if (isConvex) info->m_flags |= TRI_INFO_V1V2_CONVEX; break; } } break; } default: { // printf("warning: duplicate triangle\n"); } } }
void DiffEq::setupABmatrices(Grid& thegrid, Modes& lmmodes) { double Omega, Omegap, H, Hp, eL, eLp, fT, fTp, fTpp,rm2M; for(int i = 0; i < thegrid.gridNodeLocations().GFvecDim(); i++){ for(int j = 0; j < thegrid.gridNodeLocations().GFarrDim(); j++){ //regular wave equation if(params.metric.flatspacetime){ Array2D<double> A(3, 3, 0.0); A[1][2] = -pow(params.waveeq.speed, 2.0); A[2][1] = -1.0; Amatrices.set(i, j, A); Array2D<double> tA(2,2,0.0); tA[0][1] = -pow(params.waveeq.speed, 2.0); tA[1][0] = -1.0; trimmedAmatrices.set(i, j, tA); //vector dimension of B is actually number of modes for(int k = 0; k < Bmatrices.VGFdim(); k++) { Array2D<double> B(3, 3, 0.0); B[0][1] = -1.0; Bmatrices.set(k, i, j, B); } } else if (params.metric.schwarschild) { int region; if (thegrid.gridNodeLocations().get(i,j)==Sminus) {region = 0;} else if ((thegrid.gridNodeLocations().get(i,j)>Sminus)&&(thegrid.gridNodeLocations().get(i,j)<Rminus)) {region=1;} else if ((thegrid.gridNodeLocations().get(i,j)>=Rminus)&&(thegrid.gridNodeLocations().get(i,j)<=Rplus)) {region=2;} else if ((thegrid.gridNodeLocations().get(i,j)>Rplus)&&(thegrid.gridNodeLocations().get(i,j)<Splus)){region=3;} else if (thegrid.gridNodeLocations().get(i,j)==Splus) {region=4;} double Omega, Omegap, eL, eLp, H, Hp, term1, term2; //thegrid.gridNodeLocations() = rho //horizon switch (region){ case 0: { Omega = 0.0; Omegap = 0.0; eL = 1.0; eLp = 0.0; H = -1.0; Hp = 0.0; thegrid.rstar.set(i,j,DBL_MAX); thegrid.rschw.set(i,j,2.0*params.schw.mass); term1 = 0.0; term2 = 1.0; Array2D<double> A(3, 3, 0.0); A[1][2] = -1.0; A[2][1] = 0.0; A[2][2] = -1.0; Amatrices.set(i, j, A); Array2D<double> tA(2, 2, 0.0); tA[0][1] = -1.0; tA[1][0] = 0.0; tA[1][1] = -1.0; trimmedAmatrices.set(i, j, tA); for(int k = 0; k < lmmodes.ntotal; k++) { Array2D<double> B(3, 3, 0.0); B[0][2] = -1.0; Bmatrices.set(k, i, j, B); } break; } case 1: { //inner hyperboloidal layer transition(thegrid.gridNodeLocations().get(i, j), Rminus, Sminus, fT, fTp, fTpp); Omega = 1.0 - thegrid.gridNodeLocations().get(i, j) / Sminus * fT; Omegap = -(fT + thegrid.gridNodeLocations().get(i, j) * fTp) / Sminus; eL = 1.0 + pow(thegrid.gridNodeLocations().get(i, j), 2.0) * fTp / Sminus; eLp = thegrid.gridNodeLocations().get(i, j) * (2.0 * fTp + thegrid.gridNodeLocations().get(i, j) * fTpp) / Sminus; H = -1.0 + pow(Omega, 2.0) / eL; Hp = (2.0 * Omega * Omegap * eL - pow(Omega, 2.0) * eLp) / pow(eL, 2.0); thegrid.rstar.set(i, j, thegrid.gridNodeLocations().get(i,j) / Omega); rm2M = invert_tortoise(thegrid.rstar.get(i, j), params.schw.mass); thegrid.rschw.set(i, j, 2.0 * params.schw.mass + rm2M); term1 = rm2M / (pow(Omega, 2.0) * pow(thegrid.rschw.get(i,j),3.0)); term2 = 2.0 * params.schw.mass / thegrid.rschw.get(i,j); Array2D<double> A(3, 3, 0.0); A[1][2] = -1.0; A[2][1] = -(1.0 + H) / (1.0 - H); A[2][2] = 2.0 * H / (1.0 - H); Amatrices.set(i,j,A); Array2D<double> tA(2, 2, 0.0); tA[0][1] = -1.0; tA[1][0] = -(1.0 + H) / (1.0 - H); tA[1][1] = 2.0 * H / (1.0 - H); trimmedAmatrices.set(i,j,tA); for(int k = 0; k < lmmodes.ntotal; k++) { Array2D<double> B(3, 3, 0.0); B[0][2] = -1.0; B[2][1] = -Hp / (1.0 - H); B[2][2] = Hp / (1.0 - H); B[2][0] = 1.0 / (1.0 - pow(H,2.0)) * pow(Omega, 2.0) * term1 *( lmmodes.ll[k] * (lmmodes.ll[k] + 1.0) + term2); Bmatrices.set(k, i, j, B); } break; } case 2: { //central tortoise region Omega = 1.0; Omegap = 0.0; eL = 1.0; eLp = 0.0; H = 0.0; Hp = 0.0; thegrid.rstar.set(i, j, thegrid.gridNodeLocations().get(i, j)); rm2M = invert_tortoise(thegrid.rstar.get(i, j), params.schw.mass); thegrid.rschw.set(i, j, 2.0 * params.schw.mass + rm2M); term1 = rm2M / (pow(Omega, 2.0) * pow(thegrid.rschw.get(i, j), 3.0)); term2 = 2.0 * params.schw.mass / thegrid.rschw.get(i, j); Array2D<double> A(3, 3, 0.0); A[1][2] = -1.0; A[2][1] = -1.0; Amatrices.set(i, j, A); Array2D<double> tA(2, 2, 0.0); tA[0][1] = -1.0; tA[1][0] = -1.0; trimmedAmatrices.set(i, j, tA); for(int k = 0; k < lmmodes.ntotal; k++) { Array2D<double> B(3, 3, 0.0); B[0][2] = -1.0; B[2][0] = 1.0 / (1.0 - pow(H, 2.0)) * pow(Omega, 2.0) * term1 * (lmmodes.ll[k] * (lmmodes.ll[k] + 1.0) + term2); Bmatrices.set(k, i, j, B); } break; } case 3: { //outer hyperboloidal region transition(thegrid.gridNodeLocations().get(i,j), Rplus, Splus, fT, fTp, fTpp); Omega = 1.0 - thegrid.gridNodeLocations().get(i, j) / Splus * fT; Omegap = -(fT + thegrid.gridNodeLocations().get(i, j) * fTp) / Splus; eL = 1.0 + pow(thegrid.gridNodeLocations().get(i, j), 2.0) * fTp / Splus; eLp = thegrid.gridNodeLocations().get(i, j) * (2.0 * fTp + thegrid.gridNodeLocations().get(i, j) * fTpp) / Splus; H = 1.0 - pow(Omega, 2.0) / eL; Hp = -(2.0 * Omega * Omegap * eL - pow(Omega, 2.0) * eLp) / pow(eL, 2.0); thegrid.rstar.set(i, j, thegrid.gridNodeLocations().get(i, j) / Omega); rm2M = invert_tortoise(thegrid.rstar.get(i, j), params.schw.mass); thegrid.rschw.set(i, j, 2.0 * params.schw.mass + rm2M); term1 = rm2M / (pow(Omega, 2.0) * pow(thegrid.rschw.get(i, j),3.0)); term2 = 2.0 * params.schw.mass / thegrid.rschw.get(i, j); Array2D<double> A(3, 3, 0.0); A[1][2] = -1.0; A[2][1] = -(1.0 - H) / (1.0 + H); A[2][2] = 2.0 * H / (1.0 + H); Amatrices.set(i, j, A); Array2D<double> tA(2, 2, 0.0); tA[0][1] = -1.0; tA[1][0] = -(1.0 - H) / (1.0 + H); tA[1][1] = 2.0 * H / (1.0 + H); trimmedAmatrices.set(i, j, tA); for(int k = 0; k < lmmodes.ntotal; k++) { Array2D<double> B(3, 3, 0.0); B[0][2] = -1.0; B[2][1] = Hp / (1.0 + H); B[2][2] = Hp / (1.0 + H); B[2][0] = 1.0 / (1.0 - pow(H, 2.0)) * pow(Omega, 2.0) * term1 * (lmmodes.ll[k] * (lmmodes.ll[k] + 1.0) + term2); Bmatrices.set(k, i, j, B); } break; } case 4: { //scri-plus Omega = 0.0; Omegap = 0.0; eL = 1.0; eLp = 0.0; H = 1.0; Hp = 0.0; thegrid.rstar.set(i, j, -DBL_MAX); thegrid.rschw.set(i,j, -DBL_MAX); term1 = 1.0/ pow(thegrid.gridNodeLocations().get(i,j),2.0); term2 = 0.0; Array2D<double> A(3,3,0.0); A[1][2]=-1.0; A[2][2]=1.0; Amatrices.set(i,j,A); Array2D<double> tA(2,2,0.0); tA[0][1]=-1.0; tA[1][1]=1.0; trimmedAmatrices.set(i,j,tA); for(int k= 0; k < lmmodes.ntotal; k++) { Array2D<double> B(3,3,0.0); B[0][2]=-1.0; B[2][0]=lmmodes.ll[k]*(lmmodes.ll[k]+1.0)/(2.0*pow(Splus,2.0)); Bmatrices.set(k,i,j,B); } break; } default: { throw logic_error("AB matrix region not defined"); break; } }//end switch case }//end inner for }//end outer for }//end if schw }//end function setab
int main(int, char**) { std::vector<std::chrono::duration<double,std::milli>> duration_vector_1; std::vector<std::chrono::duration<double,std::milli>> duration_vector_2; #if SYNTHETIC_INPUT Halide::Buffer<uint8_t> im1(10, 10); Halide::Buffer<uint8_t> im2(10, 10); for (int i = 0; i < 10; i++) for (int j = 0; j < 10; j++) { im1(i, j) = (uint8_t) i*i+j*j; im2(i, j) = (uint8_t) i*i+j*j; } #else Halide::Buffer<uint8_t> im1 = Halide::Tools::load_image("./utils/images/rgb.png"); Halide::Buffer<uint8_t> im2 = Halide::Tools::load_image("./utils/images/rgb.png"); #endif Halide::Buffer<float> Ix_m(im1.width(), im1.height()); Halide::Buffer<float> Iy_m(im1.width(), im1.height()); Halide::Buffer<float> It_m(im1.width(), im1.height()); Halide::Buffer<int> C1(_NC); Halide::Buffer<int> C2(_NC); Halide::Buffer<int> SIZES(2); Halide::Buffer<int> u(_NC); Halide::Buffer<int> v(_NC); Halide::Buffer<float> A(2, 4*w*w); Halide::Buffer<float> tA(4*w*w, 2); Halide::Buffer<double> pinvA(4*w*w, 2); Halide::Buffer<double> det(1); Halide::Buffer<float> tAA(2, 2); Halide::Buffer<double> X(2, 2); SIZES(0) = im1.height(); SIZES(1) = im1.width(); C1(0) = 500; C2(0) = 400; C1(1) = 800; C2(1) = 900; C1(2) = 200; C2(2) = 400; C1(3) = 400; C2(3) = 200; C1(4) = 400; C2(4) = 500; C1(5) = 800; C2(5) = 200; C1(6) = 200; C2(6) = 900; C1(7) = 900; C2(7) = 200; det(0) = 0; init_buffer(Ix_m, (float) 0); init_buffer(Iy_m, (float) 0); init_buffer(It_m, (float) 0); init_buffer(A, (float) 0); init_buffer(tA, (float) 0); init_buffer(pinvA, (double) 0); init_buffer(tAA, (float) 0); init_buffer(X, (double) 0); // Warm up optical_flow_tiramisu(SIZES.raw_buffer(), im1.raw_buffer(), im2.raw_buffer(), Ix_m.raw_buffer(), Iy_m.raw_buffer(), It_m.raw_buffer(), C1.raw_buffer(), C2.raw_buffer(), u.raw_buffer(), v.raw_buffer(), A.raw_buffer(), pinvA.raw_buffer(), det.raw_buffer(), tAA.raw_buffer(), tA.raw_buffer(), X.raw_buffer()); // Tiramisu for (int i=0; i<NB_TESTS; i++) { auto start1 = std::chrono::high_resolution_clock::now(); optical_flow_tiramisu(SIZES.raw_buffer(), im1.raw_buffer(), im2.raw_buffer(), Ix_m.raw_buffer(), Iy_m.raw_buffer(), It_m.raw_buffer(), C1.raw_buffer(), C2.raw_buffer(), u.raw_buffer(), v.raw_buffer(), A.raw_buffer(), pinvA.raw_buffer(), det.raw_buffer(), tAA.raw_buffer(), tA.raw_buffer(), X.raw_buffer()); auto end1 = std::chrono::high_resolution_clock::now(); std::chrono::duration<double,std::milli> duration1 = end1 - start1; duration_vector_1.push_back(duration1); } std::cout << "Time: " << median(duration_vector_1) << std::endl; #if SYNTHETIC_INPUT print_buffer(im1); print_buffer(im2); print_buffer(Ix_m); print_buffer(Iy_m); print_buffer(It_m); print_buffer(A); print_buffer(tA); print_buffer(tAA); print_buffer(det); print_buffer(X); print_buffer(pinvA); #endif std::cout << "Output" << std::endl; print_buffer(u); print_buffer(v); return 0; }
Foam::solverPerformance Foam::PBiCGStab::solve ( scalarField& psi, const scalarField& source, const direction cmpt ) const { // --- Setup class containing solver performance data solverPerformance solverPerf ( lduMatrix::preconditioner::getName(controlDict_) + typeName, fieldName_ ); const label nCells = psi.size(); scalar* __restrict__ psiPtr = psi.begin(); scalarField pA(nCells); scalar* __restrict__ pAPtr = pA.begin(); scalarField yA(nCells); scalar* __restrict__ yAPtr = yA.begin(); // --- Calculate A.psi matrix_.Amul(yA, psi, interfaceBouCoeffs_, interfaces_, cmpt); // --- Calculate initial residual field scalarField rA(source - yA); scalar* __restrict__ rAPtr = rA.begin(); // --- Calculate normalisation factor const scalar normFactor = this->normFactor(psi, source, yA, pA); if (lduMatrix::debug >= 2) { Info<< " Normalisation factor = " << normFactor << endl; } // --- Calculate normalised residual norm solverPerf.initialResidual() = gSumMag(rA, matrix().mesh().comm()) /normFactor; solverPerf.finalResidual() = solverPerf.initialResidual(); // --- Check convergence, solve if not converged if ( minIter_ > 0 || !solverPerf.checkConvergence(tolerance_, relTol_) ) { scalarField AyA(nCells); scalar* __restrict__ AyAPtr = AyA.begin(); scalarField sA(nCells); scalar* __restrict__ sAPtr = sA.begin(); scalarField zA(nCells); scalar* __restrict__ zAPtr = zA.begin(); scalarField tA(nCells); scalar* __restrict__ tAPtr = tA.begin(); // --- Store initial residual const scalarField rA0(rA); // --- Initial values not used scalar rA0rA = 0; scalar alpha = 0; scalar omega = 0; // --- Select and construct the preconditioner autoPtr<lduMatrix::preconditioner> preconPtr = lduMatrix::preconditioner::New ( *this, controlDict_ ); // --- Solver iteration do { // --- Store previous rA0rA const scalar rA0rAold = rA0rA; rA0rA = gSumProd(rA0, rA, matrix().mesh().comm()); // --- Test for singularity if (solverPerf.checkSingularity(mag(rA0rA))) { break; } // --- Update pA if (solverPerf.nIterations() == 0) { for (label cell=0; cell<nCells; cell++) { pAPtr[cell] = rAPtr[cell]; } } else { // --- Test for singularity if (solverPerf.checkSingularity(mag(omega))) { break; } const scalar beta = (rA0rA/rA0rAold)*(alpha/omega); for (label cell=0; cell<nCells; cell++) { pAPtr[cell] = rAPtr[cell] + beta*(pAPtr[cell] - omega*AyAPtr[cell]); } } // --- Precondition pA preconPtr->precondition(yA, pA, cmpt); // --- Calculate AyA matrix_.Amul(AyA, yA, interfaceBouCoeffs_, interfaces_, cmpt); const scalar rA0AyA = gSumProd(rA0, AyA, matrix().mesh().comm()); alpha = rA0rA/rA0AyA; // --- Calculate sA for (label cell=0; cell<nCells; cell++) { sAPtr[cell] = rAPtr[cell] - alpha*AyAPtr[cell]; } // --- Test sA for convergence solverPerf.finalResidual() = gSumMag(sA, matrix().mesh().comm())/normFactor; if (solverPerf.checkConvergence(tolerance_, relTol_)) { for (label cell=0; cell<nCells; cell++) { psiPtr[cell] += alpha*yAPtr[cell]; } solverPerf.nIterations()++; return solverPerf; } // --- Precondition sA preconPtr->precondition(zA, sA, cmpt); // --- Calculate tA matrix_.Amul(tA, zA, interfaceBouCoeffs_, interfaces_, cmpt); const scalar tAtA = gSumSqr(tA, matrix().mesh().comm()); // --- Calculate omega from tA and sA // (cheaper than using zA with preconditioned tA) omega = gSumProd(tA, sA, matrix().mesh().comm())/tAtA; // --- Update solution and residual for (label cell=0; cell<nCells; cell++) { psiPtr[cell] += alpha*yAPtr[cell] + omega*zAPtr[cell]; rAPtr[cell] = sAPtr[cell] - omega*tAPtr[cell]; } solverPerf.finalResidual() = gSumMag(rA, matrix().mesh().comm()) /normFactor; } while ( ( solverPerf.nIterations()++ < maxIter_ && !solverPerf.checkConvergence(tolerance_, relTol_) ) || solverPerf.nIterations() < minIter_ ); } return solverPerf; }