void Selector::filter_photons(){ for(int phoInd = 0; phoInd < tree->nPho_; ++phoInd){ double eta = tree->phoEta_[phoInd]; double et = tree->phoEt_[phoInd]; Pho03ChHadIso.push_back( tree->phoPFChIso_[phoInd] - tree->rho2012_ * phoEffArea03ChHad(eta) ); Pho03ChHadSCRIso.push_back( tree->phoSCRChIso_[phoInd] - tree->rho2012_ * phoEffArea03ChHad(eta) ); Pho03NeuHadIso.push_back( tree->phoPFNeuIso_[phoInd] - tree->rho2012_ * phoEffArea03NeuHad(eta) ); Pho03PhoIso.push_back( tree->phoPFPhoIso_[phoInd] - tree->rho2012_ * phoEffArea03Pho(eta) ); Pho03PhoSCRIso.push_back( tree->phoSCRPhoIso_[phoInd] - tree->rho2012_ * phoEffArea03Pho(eta) ); // manual spike cleaning if (dR(tree->phoEta_[phoInd], tree->phoPhi_[phoInd], -1.76, 1.37) < 0.05) continue; if (dR(tree->phoEta_[phoInd], tree->phoPhi_[phoInd], 2.37, 2.69) < 0.05) continue; int region = 0; //barrel if(TMath::Abs( eta )>1.5) region = 1; //endcap bool phoPresel = fidEtaPass( eta ) && et > pho_Et_cut && ( pho_noPixelSeed_cut || tree->phohasPixelSeed_[phoInd] == 0 ) && ( pho_noEleVeto_cut || tree->phoEleVeto_[phoInd] == 0 ) && tree->phoIsConv_[phoInd] == photonID_IsConv[region][pho_ID_ind] && tree->phoHoverE_[phoInd] < photonID_HoverE[region][pho_ID_ind] && Pho03NeuHadIso[phoInd] < (photonID_RhoCorrR03NeuHadIso_0[region][pho_ID_ind] + et * photonID_RhoCorrR03NeuHadIso_1[region][pho_ID_ind]); if(phoPresel){ PhotonsPresel.push_back(phoInd); PhoPassSih.push_back( tree->phoSigmaIEtaIEta_[phoInd] < photonID_SigmaIEtaIEta[region][pho_ID_ind] ); PhoPassChHadIso.push_back( Pho03ChHadIso[phoInd] < photonID_RhoCorrR03ChHadIso[region][pho_ID_ind] ); PhoPassPhoIso.push_back( Pho03PhoIso[phoInd] < photonID_RhoCorrR03PhoIso_0[region][pho_ID_ind] + et * photonID_RhoCorrR03PhoIso_1[region][pho_ID_ind] ); } } }
void LIBeam3dNL :: updateTempTriad(TimeStep *tStep) { // test if not previously done if ( tStep->giveSolutionStateCounter() == tempTcCounter ) { return; } FloatArray u, centreSpin(3); FloatMatrix dR(3, 3); // ask element's displacement increments this->computeVectorOf(EID_MomentumBalance, VM_Incremental, tStep, u); // interpolate spin at the centre centreSpin.at(1) = 0.5 * ( u.at(4) + u.at(10) ); centreSpin.at(2) = 0.5 * ( u.at(5) + u.at(11) ); centreSpin.at(3) = 0.5 * ( u.at(6) + u.at(12) ); // compute rotation matrix from centreSpin pseudovector this->computeRotMtrx(dR, centreSpin); // update triad tempTc.beProductOf(dR, tc); // remember timestamp tempTcCounter = tStep->giveSolutionStateCounter(); }
void ProcessEvent(Everything &ev, Everything &evout) { int Nmuons = ev.GetInt("Glb_nptl"); auto muonpT = ev["Glb_pt"]; auto muonphi = ev["Glb_phi"]; auto muoneta = ev["Glb_eta"]; auto muonchi2ndof = ev["Glb_glbChi2_ndof"]; auto muonnhits = ev.GetVInt("Glb_nValMuHits"); auto muonpxhits = ev.GetVInt("Glb_nValPixHits"); auto muonnMatchedStations = ev.GetVInt("Glb_nMatchedStations"); auto muontrkDxy = ev["Glb_trkDxy"]; auto muontrkDz = ev["Glb_trkDz"]; auto muontrkLayerWM = ev.GetVInt("Glb_trkLayerWMeas"); //Glb_pt>7 && fabs(Glb_eta)<2 && Glb_nMatchedStations>1 && Glb_glbChi2_ndof<10 && Glb_nValMuHits>0 && Glb_nValPixHits>2 && Glb_trkDxy<0.2 && Glb_trkDz<0.5 && Glb_trkLayerWMeas>5) int njets = ev.GetInt("nref"); auto jtpt = ev["jtpt"]; auto jtphi = ev["jtphi"]; auto jteta = ev["jteta"]; for (int j=0;j<njets;j++) { float jeteta = jteta[j], jetpt = jtpt[j], jetphi = jtphi[j]; int indexClosest = -1; double dRclosest = 999; double jetmuonpt=0, jetmuonphi=0, jetmuoneta=0, jetmuonptrel = 0, jetmuonip3d = 0; for (int m=0;m<Nmuons;m++) if (fabs(muoneta[m])<2 && muonnMatchedStations[m]>1 && muonchi2ndof[m]<10 && muonnhits[m]>0 && muonpxhits[m]>2 && muontrkDxy[m]<0.2 && muontrkDz[m]<0.5) { double dR_jm = dR(muonphi[m], muoneta[m], jetphi,jeteta); if (dR_jm<0.4 && dR_jm<dRclosest) { indexClosest = m; dRclosest = dR_jm; } } if (indexClosest>=0) { jetmuonpt = muonpT[indexClosest]; jetmuonphi = muonphi[indexClosest]; jetmuoneta = muoneta[indexClosest]; jetmuonptrel=getPtRel(jetpt, jeteta, jetphi, jetmuonpt, jetmuoneta, jetmuonphi); jetmuonip3d = sqrt(muontrkDxy[indexClosest]*muontrkDxy[indexClosest] + muontrkDz[indexClosest]*muontrkDz[indexClosest]); } evout["jetmuonpt"].push_back(jetmuonpt); evout["jetmuonphi"].push_back(jetmuonphi); evout["jetmuoneta"].push_back(jetmuoneta); evout["jetmuonptrel"].push_back(jetmuonptrel); evout["jetmuonip3d"].push_back(jetmuonip3d); //copy other jet stuff evout.AddRow(ev,"nref",j); } }
double Histogrammer::minDrPhoB(int PhoInd, EventTree* tree){ // find the closest b-jet TLorentzVector b; TLorentzVector bBar; int phoGen=-1; double mindr = 999.0; for( int mcI = 0; mcI < tree->nMC_; ++mcI){ if( tree->mcIndex->at(mcI) == tree->phoGenIndex_->at(PhoInd) ) phoGen=mcI; if( tree->mcPID->at(mcI) == 5) b.SetPtEtaPhiM(tree->mcPt->at(mcI), tree->mcEta->at(mcI), tree->mcPhi->at(mcI), tree->mcMass->at(mcI)); if( tree->mcPID->at(mcI) == -5) bBar.SetPtEtaPhiM(tree->mcPt->at(mcI), tree->mcEta->at(mcI), tree->mcPhi->at(mcI), tree->mcMass->at(mcI)); } if( phoGen > 0 && b.Pt() > 0.0001 && bBar.Pt() > 0.0001 ) { mindr = std::min(dR(tree->mcEta->at(phoGen), tree->mcPhi->at(phoGen), b.Eta(), b.Phi()), dR(tree->mcEta->at(phoGen), tree->mcPhi->at(phoGen), bBar.Eta(), bBar.Phi())); } return mindr; }
int minDrIndex(double myEta, double myPhi, std::vector<float> *etas, std::vector<float> *phis){ double mindr = 999.0; double dr; int bestInd = -1; for( int oind = 0; oind < etas->size(); oind++){ dr = dR(myEta, myPhi, etas->at(oind), phis->at(oind)); if( mindr > dr ) { mindr = dr; bestInd = oind; } } return bestInd; }
int Histogrammer::minDrIndex(double myEta, double myPhi, std::vector<int> Inds, std::vector<float> *etas, std::vector<float> *phis){ double mindr = 999.0; double dr; int bestInd = -1; for( std::vector<int>::iterator it = Inds.begin(); it != Inds.end(); ++it){ dr = dR(myEta, myPhi, etas->at(*it), phis->at(*it)); if( mindr > dr ) { mindr = dr; bestInd = *it; } } return bestInd; }
Matrix4x4 TransformDerivative::asHomogeneousTransformDerivative() const { Matrix4x4 ret; Eigen::Map< Eigen::Matrix<double,4,4,Eigen::RowMajor> > retEigen(ret.data()); Eigen::Map<const Eigen::Vector3d> dp(this->posDerivative.data()); Eigen::Map<const Eigen::Matrix<double,3,3,Eigen::RowMajor> > dR(this->rotDerivative.data()); retEigen.block<3,3>(0,0) = dR; retEigen.block<3,1>(0,3) = dp; retEigen.block<1,4>(3,0).setZero(); return ret; }
SpatialForceVector TransformDerivative::transform(const Transform& transform, SpatialForceVector& other) { SpatialForceVector ret; Eigen::Map<const Eigen::Vector3d> p(transform.getPosition().data()); Eigen::Map<const Eigen::Matrix<double,3,3,Eigen::RowMajor> > R(transform.getRotation().data()); Eigen::Map<const Eigen::Vector3d> dp(this->posDerivative.data()); Eigen::Map<const Eigen::Matrix<double,3,3,Eigen::RowMajor> > dR(this->rotDerivative.data()); toEigen(ret.getLinearVec3()) = dR*toEigen(other.getLinearVec3()); toEigen(ret.getAngularVec3()) = dR*toEigen(other.getAngularVec3()) + p.cross(toEigen(ret.getLinearVec3())) + dp.cross(R*toEigen(other.getLinearVec3())); return ret; }
TransformDerivative TransformDerivative::operator*(const Transform& otherTransform) const { TransformDerivative ret; Eigen::Map<const Eigen::Vector3d> dp(this->posDerivative.data()); Eigen::Map<const Eigen::Matrix<double,3,3,Eigen::RowMajor> > dR(this->rotDerivative.data()); Eigen::Map<const Eigen::Vector3d> other_p(otherTransform.getPosition().data()); Eigen::Map<const Eigen::Matrix<double,3,3,Eigen::RowMajor> > other_R(otherTransform.getRotation().data()); Eigen::Map<Eigen::Vector3d> ret_dp(ret.posDerivative.data()); Eigen::Map<Eigen::Matrix<double,3,3,Eigen::RowMajor> >ret_dR(ret.rotDerivative.data()); ret_dR = dR*other_R; ret_dp = dR*other_p+dp; return ret; }
Matrix6x6 TransformDerivative::asAdjointTransformDerivative(const Transform& transform) const { Matrix6x6 ret; Eigen::Map< Eigen::Matrix<double,6,6,Eigen::RowMajor> > retEigen(ret.data()); Eigen::Map<const Eigen::Vector3d> dp(this->posDerivative.data()); Eigen::Map<const Eigen::Matrix<double,3,3,Eigen::RowMajor> > dR(this->rotDerivative.data()); Eigen::Map<const Eigen::Vector3d> p(transform.getPosition().data()); Eigen::Map<const Eigen::Matrix<double,3,3,Eigen::RowMajor> > R(transform.getRotation().data()); retEigen.block<3,3>(0,0) = dR; retEigen.block<3,3>(0,3) = mySkeww(dp)*R+mySkeww(p)*dR; retEigen.block<3,3>(3,0).setZero(); retEigen.block<3,3>(3,3) = dR; return ret; }
int makeDiPhotonMass::MCTruthMatch(int jpho){ int phoInd = -1; for(int imc = 0; imc < nMC; ++imc){ if( mcPID->at(imc) != 22) continue; if( mcPt->at(imc) < 20) continue; bool match_gen = dR((*mcEta)[imc], (*mcPhi)[imc], (*phoSCEta)[jpho], (*phoSCPhi)[jpho]) < 0.05; if(match_gen && phoInd < 0) phoInd = imc; } if(phoInd >= 0){ if(((*mcParentage)[phoInd]&4)==0) return 1; else return 2; } else { return 3; } }
int secondMinDrIndex(int myInd, EventTree* tree){ double myEta = tree->mcEta->at(myInd); double myPhi = tree->mcPhi->at(myInd); int myPID = tree->mcPID->at(myInd); double mindr = 999.0; double dr; int bestInd = -1; for( int oind = 0; oind < tree->nMC_; oind++){ if(oind == myInd) continue; if(tree->mcMass->at(oind) > 10.0) continue; int opid = abs(tree->mcPID->at(oind)); if(opid == 12 || opid == 14 || opid == 16) continue; dr = dR(myEta, myPhi, tree->mcEta->at(oind), tree->mcPhi->at(oind)); if( mindr > dr ) { mindr = dr; bestInd = oind; } } return bestInd; }
void registerDerivativeTest() { FunctionParserAD R; std::string func = "x*a"; // Parse the input expression into bytecode R.Parse(func, "x,a"); // add a new variable y and map it to the da/dx derivative R.AddVariable("y"); R.RegisterDerivative("a", "x", "y"); // parameter vector double p[3]; double & x = p[0]; double & a = p[1]; double & y = p[2]; FunctionParserAD dR(R); CPPUNIT_ASSERT_EQUAL (dR.AutoDiff("x"), -1); dR.Optimize(); // dR = a+x*y FunctionParserAD d2R(dR); CPPUNIT_ASSERT_EQUAL (d2R.AutoDiff("x"), -1); d2R.Optimize(); // d2R = 2*y // we probe the parsers and check if they agree with the reference solution for (x = -1.0; x < 1.0; x+=0.3726) for (a = -1.0; a < 1.0; a+=0.2642) for (y = -1.0; y < 1.0; y+=0.3156) { CPPUNIT_ASSERT_DOUBLES_EQUAL(R.Eval(p), x*a, 1.e-12); CPPUNIT_ASSERT_DOUBLES_EQUAL(dR.Eval(p), a+x*y, 1.e-12); CPPUNIT_ASSERT_DOUBLES_EQUAL(d2R.Eval(p), 2*y, 1.e-12); } }
/** Purpose ------- DSPOSV computes the solution to a real system of linear equations A * X = B, where A is an N-by-N symmetric positive definite matrix and X and B are N-by-NRHS matrices. DSPOSV first attempts to factorize the matrix in real SINGLE PRECISION and use this factorization within an iterative refinement procedure to produce a solution with real DOUBLE PRECISION norm-wise backward error quality (see below). If the approach fails the method switches to a real DOUBLE PRECISION factorization and solve. The iterative refinement is not going to be a winning strategy if the ratio real SINGLE PRECISION performance over real DOUBLE PRECISION performance is too small. A reasonable strategy should take the number of right-hand sides and the size of the matrix into account. This might be done with a call to ILAENV in the future. Up to now, we always try iterative refinement. The iterative refinement process is stopped if ITER > ITERMAX or for all the RHS we have: RNRM < SQRT(N)*XNRM*ANRM*EPS*BWDMAX where o ITER is the number of the current iteration in the iterative refinement process o RNRM is the infinity-norm of the residual o XNRM is the infinity-norm of the solution o ANRM is the infinity-operator-norm of the matrix A o EPS is the machine epsilon returned by DLAMCH('Epsilon') The value ITERMAX and BWDMAX are fixed to 30 and 1.0D+00 respectively. Arguments --------- @param[in] uplo magma_uplo_t - = MagmaUpper: Upper triangle of A is stored; - = MagmaLower: Lower triangle of A is stored. @param[in] n INTEGER The number of linear equations, i.e., the order of the matrix A. N >= 0. @param[in] nrhs INTEGER The number of right hand sides, i.e., the number of columns of the matrix B. NRHS >= 0. @param[in,out] dA DOUBLE PRECISION array on the GPU, dimension (LDDA,N) On entry, the symmetric matrix A. If UPLO = MagmaUpper, the leading N-by-N upper triangular part of A contains the upper triangular part of the matrix A, and the strictly lower triangular part of A is not referenced. If UPLO = MagmaLower, the leading N-by-N lower triangular part of A contains the lower triangular part of the matrix A, and the strictly upper triangular part of A is not referenced. On exit, if iterative refinement has been successfully used (INFO.EQ.0 and ITER.GE.0, see description below), then A is unchanged, if double factorization has been used (INFO.EQ.0 and ITER.LT.0, see description below), then the array dA contains the factor U or L from the Cholesky factorization A = U**T*U or A = L*L**T. @param[in] ldda INTEGER The leading dimension of the array dA. LDDA >= max(1,N). @param[in] dB DOUBLE PRECISION array on the GPU, dimension (LDDB,NRHS) The N-by-NRHS right hand side matrix B. @param[in] lddb INTEGER The leading dimension of the array dB. LDDB >= max(1,N). @param[out] dX DOUBLE PRECISION array on the GPU, dimension (LDDX,NRHS) If INFO = 0, the N-by-NRHS solution matrix X. @param[in] lddx INTEGER The leading dimension of the array dX. LDDX >= max(1,N). @param dworkd (workspace) DOUBLE PRECISION array on the GPU, dimension (N*NRHS) This array is used to hold the residual vectors. @param dworks (workspace) SINGLE PRECISION array on the GPU, dimension (N*(N+NRHS)) This array is used to store the real single precision matrix and the right-hand sides or solutions in single precision. @param[out] iter INTEGER - < 0: iterative refinement has failed, double precision factorization has been performed + -1 : the routine fell back to full precision for implementation- or machine-specific reasons + -2 : narrowing the precision induced an overflow, the routine fell back to full precision + -3 : failure of SPOTRF + -31: stop the iterative refinement after the 30th iteration - > 0: iterative refinement has been successfully used. Returns the number of iterations @param[out] info INTEGER - = 0: successful exit - < 0: if INFO = -i, the i-th argument had an illegal value - > 0: if INFO = i, the leading minor of order i of (DOUBLE PRECISION) A is not positive definite, so the factorization could not be completed, and the solution has not been computed. @ingroup magma_dposv_driver ********************************************************************/ extern "C" magma_int_t magma_dsposv_gpu( magma_uplo_t uplo, magma_int_t n, magma_int_t nrhs, magmaDouble_ptr dA, magma_int_t ldda, magmaDouble_ptr dB, magma_int_t lddb, magmaDouble_ptr dX, magma_int_t lddx, magmaDouble_ptr dworkd, magmaFloat_ptr dworks, magma_int_t *iter, magma_int_t *info) { #define dB(i,j) (dB + (i) + (j)*lddb) #define dX(i,j) (dX + (i) + (j)*lddx) #define dR(i,j) (dR + (i) + (j)*lddr) #define dSX(i,j) (dSX + (i) + (j)*lddsx) // Constants const double BWDMAX = 1.0; const magma_int_t ITERMAX = 30; const double c_neg_one = MAGMA_D_NEG_ONE; const double c_one = MAGMA_D_ONE; const magma_int_t ione = 1; // Local variables magmaDouble_ptr dR; magmaFloat_ptr dSA, dSX; double Xnrmv, Rnrmv; double Anrm, Xnrm, Rnrm, cte, eps; magma_int_t i, j, iiter, lddsa, lddsx, lddr; /* Check arguments */ *iter = 0; *info = 0; if ( n < 0 ) *info = -1; else if ( nrhs < 0 ) *info = -2; else if ( ldda < max(1,n)) *info = -4; else if ( lddb < max(1,n)) *info = -7; else if ( lddx < max(1,n)) *info = -9; if (*info != 0) { magma_xerbla( __func__, -(*info) ); return *info; } if ( n == 0 || nrhs == 0 ) return *info; lddsa = n; lddsx = n; lddr = n; dSA = dworks; dSX = dSA + lddsa*n; dR = dworkd; magma_queue_t queue; magma_device_t cdev; magma_getdevice( &cdev ); magma_queue_create( cdev, &queue ); eps = lapackf77_dlamch("Epsilon"); Anrm = magmablas_dlansy( MagmaInfNorm, uplo, n, dA, ldda, (double*)dworkd, n*nrhs, queue ); cte = Anrm * eps * magma_dsqrt( n ) * BWDMAX; /* * Convert to single precision */ magmablas_dlag2s( n, nrhs, dB, lddb, dSX, lddsx, queue, info ); if (*info != 0) { *iter = -2; goto fallback; } magmablas_dlat2s( uplo, n, dA, ldda, dSA, lddsa, queue, info ); if (*info != 0) { *iter = -2; goto fallback; } // factor dSA in single precision magma_spotrf_gpu( uplo, n, dSA, lddsa, info ); if (*info != 0) { *iter = -3; goto fallback; } // solve dSA*dSX = dB in single precision magma_spotrs_gpu( uplo, n, nrhs, dSA, lddsa, dSX, lddsx, info ); // residual dR = dB - dA*dX in double precision magmablas_slag2d( n, nrhs, dSX, lddsx, dX, lddx, queue, info ); magmablas_dlacpy( MagmaFull, n, nrhs, dB, lddb, dR, lddr, queue ); if ( nrhs == 1 ) { magma_dsymv( uplo, n, c_neg_one, dA, ldda, dX, 1, c_one, dR, 1, queue ); } else { magma_dsymm( MagmaLeft, uplo, n, nrhs, c_neg_one, dA, ldda, dX, lddx, c_one, dR, lddr, queue ); } // TODO: use MAGMA_D_ABS( dX(i,j) ) instead of dlange? for( j=0; j < nrhs; j++ ) { i = magma_idamax( n, dX(0,j), 1, queue ) - 1; magma_dgetmatrix( 1, 1, dX(i,j), 1, &Xnrmv, 1, queue ); Xnrm = lapackf77_dlange( "F", &ione, &ione, &Xnrmv, &ione, NULL ); i = magma_idamax( n, dR(0,j), 1, queue ) - 1; magma_dgetmatrix( 1, 1, dR(i,j), 1, &Rnrmv, 1, queue ); Rnrm = lapackf77_dlange( "F", &ione, &ione, &Rnrmv, &ione, NULL ); if ( Rnrm > Xnrm*cte ) { goto refinement; } } *iter = 0; goto cleanup; //return *info; refinement: for( iiter=1; iiter < ITERMAX; ) { *info = 0; // convert residual dR to single precision dSX magmablas_dlag2s( n, nrhs, dR, lddr, dSX, lddsx, queue, info ); if (*info != 0) { *iter = -2; goto fallback; } // solve dSA*dSX = R in single precision magma_spotrs_gpu( uplo, n, nrhs, dSA, lddsa, dSX, lddsx, info ); // Add correction and setup residual // dX += dSX [including conversion] --and-- // dR = dB for( j=0; j < nrhs; j++ ) { magmablas_dsaxpycp( n, dSX(0,j), dX(0,j), dB(0,j), dR(0,j), queue ); } // residual dR = dB - dA*dX in double precision if ( nrhs == 1 ) { magma_dsymv( uplo, n, c_neg_one, dA, ldda, dX, 1, c_one, dR, 1, queue ); } else { magma_dsymm( MagmaLeft, uplo, n, nrhs, c_neg_one, dA, ldda, dX, lddx, c_one, dR, lddr, queue ); } // TODO: use MAGMA_D_ABS( dX(i,j) ) instead of dlange? /* Check whether the nrhs normwise backward errors satisfy the * stopping criterion. If yes, set ITER=IITER > 0 and return. */ for( j=0; j < nrhs; j++ ) { i = magma_idamax( n, dX(0,j), 1, queue ) - 1; magma_dgetmatrix( 1, 1, dX(i,j), 1, &Xnrmv, 1, queue ); Xnrm = lapackf77_dlange( "F", &ione, &ione, &Xnrmv, &ione, NULL ); i = magma_idamax( n, dR(0,j), 1, queue ) - 1; magma_dgetmatrix( 1, 1, dR(i,j), 1, &Rnrmv, 1, queue ); Rnrm = lapackf77_dlange( "F", &ione, &ione, &Rnrmv, &ione, NULL ); if ( Rnrm > Xnrm*cte ) { goto L20; } } /* If we are here, the nrhs normwise backward errors satisfy * the stopping criterion, we are good to exit. */ *iter = iiter; goto cleanup; //return *info; L20: iiter++; } /* If we are at this place of the code, this is because we have * performed ITER=ITERMAX iterations and never satisified the * stopping criterion. Set up the ITER flag accordingly and follow * up on double precision routine. */ *iter = -ITERMAX - 1; fallback: /* Single-precision iterative refinement failed to converge to a * satisfactory solution, so we resort to double precision. */ magma_dpotrf_gpu( uplo, n, dA, ldda, info ); if (*info == 0) { magmablas_dlacpy( MagmaFull, n, nrhs, dB, lddb, dX, lddx, queue ); magma_dpotrs_gpu( uplo, n, nrhs, dA, ldda, dX, lddx, info ); } cleanup: magma_queue_destroy( queue ); return *info; }
int main(int argc, char* argv[]) { Teuchos::GlobalMPISession mpiSession(&argc, &argv); Teuchos::RCP<const Teuchos::Comm<int> > comm = Teuchos::DefaultComm<int>::getComm(); // This little trick lets us print to std::cout only if a (dummy) command-line argument is provided. int iprint = argc - 1; Teuchos::RCP<std::ostream> outStream; Teuchos::oblackholestream bhs; // outputs nothing if (iprint > 0 && Teuchos::rank<int>(*comm)==0) outStream = Teuchos::rcp(&std::cout, false); else outStream = Teuchos::rcp(&bhs, false); int errorFlag = 0; try { /**********************************************************************************************/ /************************* CONSTRUCT ROL ALGORITHM ********************************************/ /**********************************************************************************************/ // Get ROL parameterlist std::string filename = "input.xml"; Teuchos::RCP<Teuchos::ParameterList> parlist = Teuchos::rcp( new Teuchos::ParameterList() ); Teuchos::updateParametersFromXmlFile( filename, parlist.ptr() ); RealT initZ = parlist->sublist("Problem Description").get("Initial Control Guess", 0.0); RealT cvarLevel = parlist->sublist("Problem Description").get("CVaR Level", 0.8); RealT pfuncSmoothing = parlist->sublist("Problem Description").get("Plus Function Smoothing Parameter", 1e-2); /**********************************************************************************************/ /************************* CONSTRUCT VECTORS **************************************************/ /**********************************************************************************************/ // Build control vectors int nx = 256; Teuchos::RCP<std::vector<RealT> > x1_rcp = Teuchos::rcp( new std::vector<RealT>(nx+2,0.0) ); //ROL::StdVector<RealT> x1(x1_rcp); Teuchos::RCP<ROL::StdVector<RealT> > x1 = Teuchos::rcp(new ROL::StdVector<RealT>(x1_rcp)); Teuchos::RCP<std::vector<RealT> > x2_rcp = Teuchos::rcp( new std::vector<RealT>(nx+2,0.0) ); ROL::StdVector<RealT> x2(x2_rcp); Teuchos::RCP<std::vector<RealT> > x3_rcp = Teuchos::rcp( new std::vector<RealT>(nx+2,0.0) ); ROL::StdVector<RealT> x3(x3_rcp); Teuchos::RCP<std::vector<RealT> > z_rcp = Teuchos::rcp( new std::vector<RealT>(nx+2,0.0) ); //ROL::StdVector<RealT> z(z_rcp); Teuchos::RCP<ROL::StdVector<RealT> > z = Teuchos::rcp(new ROL::StdVector<RealT>(z_rcp)); Teuchos::RCP<std::vector<RealT> > xr_rcp = Teuchos::rcp( new std::vector<RealT>(nx+2,0.0) ); ROL::StdVector<RealT> xr(xr_rcp); Teuchos::RCP<std::vector<RealT> > d_rcp = Teuchos::rcp( new std::vector<RealT>(nx+2,0.0) ); //ROL::StdVector<RealT> d(d_rcp); Teuchos::RCP<ROL::StdVector<RealT> > d = Teuchos::rcp(new ROL::StdVector<RealT>(d_rcp)); for ( int i = 0; i < nx+2; i++ ) { (*xr_rcp)[i] = random<RealT>(comm); (*d_rcp)[i] = random<RealT>(comm); (*z_rcp)[i] = initZ; } ROL::RiskVector<RealT> zR(z,true), x1R(x1,true), dR(d,true); // Build state and adjoint vectors Teuchos::RCP<std::vector<RealT> > u_rcp = Teuchos::rcp( new std::vector<RealT>(nx,1.0) ); ROL::StdVector<RealT> u(u_rcp); Teuchos::RCP<std::vector<RealT> > p_rcp = Teuchos::rcp( new std::vector<RealT>(nx,0.0) ); ROL::StdVector<RealT> p(p_rcp); Teuchos::RCP<ROL::Vector<RealT> > up = Teuchos::rcp(&u,false); Teuchos::RCP<ROL::Vector<RealT> > pp = Teuchos::rcp(&p,false); /**********************************************************************************************/ /************************* CONSTRUCT SOL COMPONENTS *******************************************/ /**********************************************************************************************/ // Build samplers int dim = 4; int nSamp = parlist->sublist("Problem Description").get("Number of Samples", 20); std::vector<RealT> tmp(2,0.0); tmp[0] = -1.0; tmp[1] = 1.0; std::vector<std::vector<RealT> > bounds(dim,tmp); Teuchos::RCP<ROL::BatchManager<RealT> > bman = Teuchos::rcp(new ROL::StdTeuchosBatchManager<RealT,int>(comm)); Teuchos::RCP<ROL::SampleGenerator<RealT> > sampler = Teuchos::rcp(new ROL::MonteCarloGenerator<RealT>(nSamp,bounds,bman,false,false,100)); /**********************************************************************************************/ /************************* CONSTRUCT OBJECTIVE FUNCTION ***************************************/ /**********************************************************************************************/ // Build risk-averse objective function RealT alpha = 1.e-3; Teuchos::RCP<ROL::ParametrizedObjective_SimOpt<RealT> > pobjSimOpt = Teuchos::rcp(new Objective_BurgersControl<RealT>(alpha,nx)); Teuchos::RCP<ROL::ParametrizedEqualityConstraint_SimOpt<RealT> > pconSimOpt = Teuchos::rcp(new EqualityConstraint_BurgersControl<RealT>(nx)); Teuchos::RCP<ROL::ParametrizedObjective<RealT> > pObj = Teuchos::rcp(new ROL::Reduced_ParametrizedObjective_SimOpt<RealT>(pobjSimOpt,pconSimOpt,up,pp)); //Teuchos::RCP<ROL::Objective<RealT> > obj = Teuchos::rcp(new ROL::RiskNeutralObjective<RealT>(pObj, sampler, true)); Teuchos::RCP<ROL::Distribution<RealT> > dist = Teuchos::rcp(new ROL::Parabolic<RealT>(-0.5, 0.5)); Teuchos::RCP<ROL::PlusFunction<RealT> > pfunc = Teuchos::rcp(new ROL::PlusFunction<RealT>(dist, pfuncSmoothing)); Teuchos::RCP<ROL::RiskMeasure<RealT> > rmeas = Teuchos::rcp(new ROL::CVaR<RealT>(cvarLevel, 1.0, pfunc)); Teuchos::RCP<ROL::Objective<RealT> > obj = Teuchos::rcp(new ROL::RiskAverseObjective<RealT>(pObj, rmeas, sampler)); // Test parametrized objective functions *outStream << "Check Derivatives of Parametrized Objective Function\n"; //x1.set(xr); x1->set(xr); pObj->setParameter(sampler->getMyPoint(0)); //pObj->checkGradient(x1,d,true,*outStream); pObj->checkGradient(*x1,*d,true,*outStream); //pObj->checkHessVec(x1,d,true,*outStream); pObj->checkHessVec(*x1,*d,true,*outStream); //obj->checkGradient(x1,d,true,*outStream); obj->checkGradient(x1R,dR,true,*outStream); //obj->checkHessVec(x1,d,true,*outStream); obj->checkHessVec(x1R,dR,true,*outStream); ROL::Algorithm<RealT> algors("Trust Region", *parlist); //algors.run(z, *obj, true, *outStream); algors.run(zR, *obj, true, *outStream); /**********************************************************************************************/ /****************** CONSTRUCT SIMULATED CONSTRAINT AND VECTORS ********************************/ /**********************************************************************************************/ // Construct SimulatedEqualityConstraint. int useW = parlist->sublist("Problem Description").get("Use Constraint Weights", true); ROL::SimulatedEqualityConstraint<RealT> simcon(sampler, pconSimOpt, useW); // Construct SimulatedObjective. ROL::SimulatedObjectiveCVaR<RealT> simobj(sampler, pobjSimOpt, pfunc, cvarLevel); // Simulated vectors. std::vector<Teuchos::RCP<ROL::Vector<RealT> > > xu_rcp; std::vector<Teuchos::RCP<ROL::Vector<RealT> > > vu_rcp; int nvecloc = sampler->numMySamples(); RealT right = 1, left = 0; for( int k=0; k<nvecloc; ++k ) { Teuchos::RCP<std::vector<RealT> > xuk_rcp = Teuchos::rcp( new std::vector<RealT>(nx,1.0) ); Teuchos::RCP<std::vector<RealT> > vuk_rcp = Teuchos::rcp( new std::vector<RealT>(nx,1.0) ); Teuchos::RCP<ROL::Vector<RealT> > xuk = Teuchos::rcp( new ROL::StdVector<RealT>( xuk_rcp ) ); Teuchos::RCP<ROL::Vector<RealT> > vuk = Teuchos::rcp( new ROL::StdVector<RealT>( vuk_rcp ) ); for( int i=0; i<nx; ++i ) { (*xuk_rcp)[i] = ( (RealT)rand() / (RealT)RAND_MAX ) * (right - left) + left; (*vuk_rcp)[i] = ( (RealT)rand() / (RealT)RAND_MAX ) * (right - left) + left; } xu_rcp.push_back(xuk); vu_rcp.push_back(vuk); } Teuchos::RCP<ROL::SimulatedVector<RealT> > xu = Teuchos::rcp(new ROL::SimulatedVector<RealT>(xu_rcp, bman)); Teuchos::RCP<ROL::SimulatedVector<RealT> > vu = Teuchos::rcp(new ROL::SimulatedVector<RealT>(vu_rcp, bman)); // SimOpt vectors. Teuchos::RCP<std::vector<RealT> > zvec_rcp = Teuchos::rcp(new std::vector<RealT>(nx+2,0.0)); Teuchos::RCP<ROL::StdVector<RealT> > zvec = Teuchos::rcp(new ROL::StdVector<RealT>(zvec_rcp)); Teuchos::RCP<std::vector<RealT> > dvec_rcp = Teuchos::rcp(new std::vector<RealT>(nx+2,0.0)); Teuchos::RCP<ROL::StdVector<RealT> > dvec = Teuchos::rcp(new ROL::StdVector<RealT>(dvec_rcp)); for ( int i = 0; i < nx+2; i++ ) { (*zvec_rcp)[i] = random<RealT>(comm); (*dvec_rcp)[i] = random<RealT>(comm); } Teuchos::RCP<ROL::RiskVector<RealT> > rz = Teuchos::rcp(new ROL::RiskVector<RealT>(zvec, true)); Teuchos::RCP<ROL::RiskVector<RealT> > rd = Teuchos::rcp(new ROL::RiskVector<RealT>(dvec, true)); ROL::Vector_SimOpt<RealT> x(xu, rz); ROL::Vector_SimOpt<RealT> v(vu, rd); *outStream << std::endl << "TESTING SimulatedEqualityConstraint" << std::endl; simcon.checkApplyJacobian(x, v, *vu, true, *outStream); simcon.checkAdjointConsistencyJacobian(*vu, v, x, *vu, x, true, *outStream); simcon.checkApplyAdjointHessian(x, *vu, v, x, true, *outStream); *outStream << std::endl << "TESTING SimulatedObjective" << std::endl; RealT tol = 1e-8; simobj.value(x, tol); simobj.checkGradient(x, v, true, *outStream); simobj.checkHessVec(x, v, true, *outStream); ROL::Algorithm<RealT> algo("Composite Step", *parlist); ROL::Algorithm<RealT> algo2("Composite Step", *parlist); ROL::Algorithm<RealT> algo3("Composite Step", *parlist); ROL::Algorithm<RealT> algo4("Composite Step", *parlist); ROL::Algorithm<RealT> algo5("Composite Step", *parlist); vu->zero(); for ( int i = 0; i < nx+2; i++ ) { (*zvec_rcp)[i] = initZ; } ROL::SimulatedObjectiveCVaR<RealT> simobjExpval(sampler, pobjSimOpt, pfunc, 0.0); ROL::SimulatedObjectiveCVaR<RealT> simobjCVaR3(sampler, pobjSimOpt, pfunc, 0.3); ROL::SimulatedObjectiveCVaR<RealT> simobjCVaR6(sampler, pobjSimOpt, pfunc, 0.6); ROL::SimulatedObjectiveCVaR<RealT> simobjCVaR7(sampler, pobjSimOpt, pfunc, 0.6); algo2.run(x, *vu, simobjExpval, simcon, true, *outStream); algo3.run(x, *vu, simobjCVaR3, simcon, true, *outStream); algo4.run(x, *vu, simobjCVaR6, simcon, true, *outStream); algo5.run(x, *vu, simobjCVaR7, simcon, true, *outStream); algo.run(x, *vu, simobj, simcon, true, *outStream); // Output control to file. if (Teuchos::rank<int>(*comm)==0) { std::ofstream file; file.open("control-fs-cvar.txt"); for ( int i = 0; i < nx+2; ++i ) { file << (*zvec_rcp)[i] << "\n"; } file.close(); } ROL::RiskVector<RealT> &rxfz = Teuchos::dyn_cast<ROL::RiskVector<RealT> >(*(x.get_2())); Teuchos::RCP<ROL::Vector<RealT> > rfz = rxfz.getVector(); ROL::StdVector<RealT> &rfz_std = Teuchos::dyn_cast<ROL::StdVector<RealT> >(*rfz); z->set(rfz_std); ROL::Algorithm<RealT> algors2("Trust Region", *parlist); algors2.run(zR, *obj, true, *outStream); } catch (std::logic_error err) { *outStream << err.what() << "\n"; errorFlag = -1000; }; // end try if (errorFlag != 0) std::cout << "End Result: TEST FAILED\n"; else std::cout << "End Result: TEST PASSED\n"; return 0; }
int MaxwellCorrectionGadget:: process(GadgetContainerMessage<ISMRMRD::ImageHeader>* m1, GadgetContainerMessage< hoNDArray< std::complex<float> > >* m2) { if (maxwell_coefficients_present_) { //GDEBUG("Got coefficients\n"); int Nx = m2->getObjectPtr()->get_size(0); int Ny = m2->getObjectPtr()->get_size(1); int Nz = m2->getObjectPtr()->get_size(2); float dx = m1->getObjectPtr()->field_of_view[0] / Nx; float dy = m1->getObjectPtr()->field_of_view[1] / Ny; float dz = m1->getObjectPtr()->field_of_view[2] / Nz; /* GDEBUG("Nx = %d, Ny = %d, Nz = %d\n", Nx, Ny, Nz); GDEBUG("dx = %f, dy = %f, dz = %f\n", dx, dy, dz); GDEBUG("img_pos_x = %f, img_pos_y = %f, img_pos_z = %f\n", m1->getObjectPtr()->position[0], m1->getObjectPtr()->position[1], m1->getObjectPtr()->position[2]); */ std::vector<float> dR(3,0); std::vector<float> dP(3,0); std::vector<float> dS(3,0); std::vector<float> p(3,0); for (int z = 0; z < Nz; z++) { for (int y = 0; y < Ny; y++) { for (int x = 0; x < Nx; x++) { dR[0] = (x-Nx/2+0.5) * dx * m1->getObjectPtr()->read_dir[0]; dR[1] = (x-Nx/2+0.5) * dx * m1->getObjectPtr()->read_dir[1]; dR[2] = (x-Nx/2+0.5) * dx * m1->getObjectPtr()->read_dir[2]; dP[0] = (y-Ny/2+0.5) * dy * m1->getObjectPtr()->phase_dir[0]; dP[1] = (y-Ny/2+0.5) * dy * m1->getObjectPtr()->phase_dir[1]; dP[2] = (y-Ny/2+0.5) * dy * m1->getObjectPtr()->phase_dir[2]; if (Nz > 1) { dS[0] = (z-Nz/2+0.5) * dz * m1->getObjectPtr()->slice_dir[0]; dS[1] = (z-Nz/2+0.5) * dz * m1->getObjectPtr()->slice_dir[1]; dS[2] = (z-Nz/2+0.5) * dz * m1->getObjectPtr()->slice_dir[2]; } p[0] = m1->getObjectPtr()->position[0] + dP[0] + dR[0] + dS[0]; p[1] = m1->getObjectPtr()->position[1] + dP[1] + dR[1] + dS[1]; p[2] = m1->getObjectPtr()->position[2] + dP[2] + dR[2] + dS[2]; //Convert to centimeters p[0] = p[0]/1000.0; p[1] = p[1]/1000.0; p[2] = p[2]/1000.0; float delta_phi = maxwell_coefficients_[0]*p[2]*p[2] + maxwell_coefficients_[1]*(p[0]*p[0] + p[1]*p[1]) + maxwell_coefficients_[2]*p[0]*p[2] + maxwell_coefficients_[3]*p[1]*p[2]; long index = z*Ny*Nx+y*Nx+x; std::complex<float>* data_ptr = m2->getObjectPtr()->get_data_ptr(); std::complex<float> correction = std::polar(1.0f,static_cast<float>(2*M_PI*delta_phi)); data_ptr[index] *= correction; } } } } if (this->next()->putq(m1) < 0) { GDEBUG("Unable to put data on next Gadgets Q\n"); return GADGET_FAIL; } return GADGET_OK; }
extern "C" magma_int_t magma_zcposv_gpu(char uplo, magma_int_t n, magma_int_t nrhs, magmaDoubleComplex *dA, magma_int_t ldda, magmaDoubleComplex *dB, magma_int_t lddb, magmaDoubleComplex *dX, magma_int_t lddx, magmaDoubleComplex *dworkd, magmaFloatComplex *dworks, magma_int_t *iter, magma_int_t *info) { /* -- MAGMA (version 1.4.0) -- Univ. of Tennessee, Knoxville Univ. of California, Berkeley Univ. of Colorado, Denver August 2013 Purpose ======= ZCPOSV computes the solution to a complex system of linear equations A * X = B, where A is an N-by-N Hermitian positive definite matrix and X and B are N-by-NRHS matrices. ZCPOSV first attempts to factorize the matrix in complex SINGLE PRECISION and use this factorization within an iterative refinement procedure to produce a solution with complex DOUBLE PRECISION norm-wise backward error quality (see below). If the approach fails the method switches to a complex DOUBLE PRECISION factorization and solve. The iterative refinement is not going to be a winning strategy if the ratio complex SINGLE PRECISION performance over complex DOUBLE PRECISION performance is too small. A reasonable strategy should take the number of right-hand sides and the size of the matrix into account. This might be done with a call to ILAENV in the future. Up to now, we always try iterative refinement. The iterative refinement process is stopped if ITER > ITERMAX or for all the RHS we have: RNRM < SQRT(N)*XNRM*ANRM*EPS*BWDMAX where o ITER is the number of the current iteration in the iterative refinement process o RNRM is the infinity-norm of the residual o XNRM is the infinity-norm of the solution o ANRM is the infinity-operator-norm of the matrix A o EPS is the machine epsilon returned by DLAMCH('Epsilon') The value ITERMAX and BWDMAX are fixed to 30 and 1.0D+00 respectively. Arguments ========= UPLO (input) CHARACTER = 'U': Upper triangle of A is stored; = 'L': Lower triangle of A is stored. N (input) INTEGER The number of linear equations, i.e., the order of the matrix A. N >= 0. NRHS (input) INTEGER The number of right hand sides, i.e., the number of columns of the matrix B. NRHS >= 0. dA (input or input/output) COMPLEX_16 array on the GPU, dimension (LDDA,N) On entry, the Hermitian matrix A. If UPLO = 'U', the leading N-by-N upper triangular part of A contains the upper triangular part of the matrix A, and the strictly lower triangular part of A is not referenced. If UPLO = 'L', the leading N-by-N lower triangular part of A contains the lower triangular part of the matrix A, and the strictly upper triangular part of A is not referenced. On exit, if iterative refinement has been successfully used (INFO.EQ.0 and ITER.GE.0, see description below), then A is unchanged, if double factorization has been used (INFO.EQ.0 and ITER.LT.0, see description below), then the array dA contains the factor U or L from the Cholesky factorization A = U**T*U or A = L*L**T. LDDA (input) INTEGER The leading dimension of the array dA. LDDA >= max(1,N). dB (input) COMPLEX_16 array on the GPU, dimension (LDDB,NRHS) The N-by-NRHS right hand side matrix B. LDDB (input) INTEGER The leading dimension of the array dB. LDDB >= max(1,N). dX (output) COMPLEX_16 array on the GPU, dimension (LDDX,NRHS) If INFO = 0, the N-by-NRHS solution matrix X. LDDX (input) INTEGER The leading dimension of the array dX. LDDX >= max(1,N). dworkd (workspace) COMPLEX_16 array on the GPU, dimension (N*NRHS) This array is used to hold the residual vectors. dworks (workspace) COMPLEX array on the GPU, dimension (N*(N+NRHS)) This array is used to store the complex single precision matrix and the right-hand sides or solutions in single precision. ITER (output) INTEGER < 0: iterative refinement has failed, double precision factorization has been performed -1 : the routine fell back to full precision for implementation- or machine-specific reasons -2 : narrowing the precision induced an overflow, the routine fell back to full precision -3 : failure of SPOTRF -31: stop the iterative refinement after the 30th iteration > 0: iterative refinement has been successfully used. Returns the number of iterations INFO (output) INTEGER = 0: successful exit < 0: if INFO = -i, the i-th argument had an illegal value > 0: if INFO = i, the leading minor of order i of (DOUBLE PRECISION) A is not positive definite, so the factorization could not be completed, and the solution has not been computed. ===================================================================== */ #define dB(i,j) (dB + (i) + (j)*lddb) #define dX(i,j) (dX + (i) + (j)*lddx) #define dR(i,j) (dR + (i) + (j)*lddr) #define dSX(i,j) (dSX + (i) + (j)*lddsx) magmaDoubleComplex c_neg_one = MAGMA_Z_NEG_ONE; magmaDoubleComplex c_one = MAGMA_Z_ONE; magma_int_t ione = 1; magmaDoubleComplex *dR; magmaFloatComplex *dSA, *dSX; magmaDoubleComplex Xnrmv, Rnrmv; double Anrm, Xnrm, Rnrm, cte, eps; magma_int_t i, j, iiter, lddsa, lddsx, lddr; /* Check arguments */ *iter = 0; *info = 0; if ( n < 0 ) *info = -1; else if ( nrhs < 0 ) *info = -2; else if ( ldda < max(1,n)) *info = -4; else if ( lddb < max(1,n)) *info = -7; else if ( lddx < max(1,n)) *info = -9; if (*info != 0) { magma_xerbla( __func__, -(*info) ); return *info; } if ( n == 0 || nrhs == 0 ) return *info; lddsa = n; lddsx = n; lddr = n; dSA = dworks; dSX = dSA + lddsa*n; dR = dworkd; eps = lapackf77_dlamch("Epsilon"); Anrm = magmablas_zlanhe('I', uplo, n, dA, ldda, (double*)dworkd ); cte = Anrm * eps * pow((double)n, 0.5) * BWDMAX; /* * Convert to single precision */ magmablas_zlag2c( n, nrhs, dB, lddb, dSX, lddsx, info ); if (*info != 0) { *iter = -2; goto FALLBACK; } magmablas_zlat2c( uplo, n, dA, ldda, dSA, lddsa, info ); if (*info != 0) { *iter = -2; goto FALLBACK; } // factor dSA in single precision magma_cpotrf_gpu( uplo, n, dSA, lddsa, info ); if (*info != 0) { *iter = -3; goto FALLBACK; } // solve dSA*dSX = dB in single precision magma_cpotrs_gpu( uplo, n, nrhs, dSA, lddsa, dSX, lddsx, info ); // residual dR = dB - dA*dX in double precision magmablas_clag2z( n, nrhs, dSX, lddsx, dX, lddx, info ); magmablas_zlacpy( MagmaUpperLower, n, nrhs, dB, lddb, dR, lddr ); if ( nrhs == 1 ) { magma_zhemv( uplo, n, c_neg_one, dA, ldda, dX, 1, c_one, dR, 1 ); } else { magma_zhemm( MagmaLeft, uplo, n, nrhs, c_neg_one, dA, ldda, dX, lddx, c_one, dR, lddr ); } // TODO: use MAGMA_Z_ABS( dX(i,j) ) instead of zlange? for( j=0; j < nrhs; j++ ) { i = magma_izamax( n, dX(0,j), 1) - 1; magma_zgetmatrix( 1, 1, dX(i,j), 1, &Xnrmv, 1 ); Xnrm = lapackf77_zlange( "F", &ione, &ione, &Xnrmv, &ione, NULL ); i = magma_izamax ( n, dR(0,j), 1 ) - 1; magma_zgetmatrix( 1, 1, dR(i,j), 1, &Rnrmv, 1 ); Rnrm = lapackf77_zlange( "F", &ione, &ione, &Rnrmv, &ione, NULL ); if ( Rnrm > Xnrm*cte ) { goto REFINEMENT; } } *iter = 0; return *info; REFINEMENT: for( iiter=1; iiter < ITERMAX; ) { *info = 0; // convert residual dR to single precision dSX magmablas_zlag2c( n, nrhs, dR, lddr, dSX, lddsx, info ); if (*info != 0) { *iter = -2; goto FALLBACK; } // solve dSA*dSX = R in single precision magma_cpotrs_gpu( uplo, n, nrhs, dSA, lddsa, dSX, lddsx, info ); // Add correction and setup residual // dX += dSX [including conversion] --and-- // dR = dB for( j=0; j < nrhs; j++ ) { magmablas_zcaxpycp( n, dSX(0,j), dX(0,j), dB(0,j), dR(0,j) ); } // residual dR = dB - dA*dX in double precision if ( nrhs == 1 ) { magma_zhemv( uplo, n, c_neg_one, dA, ldda, dX, 1, c_one, dR, 1 ); } else { magma_zhemm( MagmaLeft, uplo, n, nrhs, c_neg_one, dA, ldda, dX, lddx, c_one, dR, lddr ); } /* Check whether the nrhs normwise backward errors satisfy the * stopping criterion. If yes, set ITER=IITER>0 and return. */ for( j=0; j < nrhs; j++ ) { i = magma_izamax( n, dX(0,j), 1) - 1; magma_zgetmatrix( 1, 1, dX(i,j), 1, &Xnrmv, 1 ); Xnrm = lapackf77_zlange( "F", &ione, &ione, &Xnrmv, &ione, NULL ); i = magma_izamax ( n, dR(0,j), 1 ) - 1; magma_zgetmatrix( 1, 1, dR(i,j), 1, &Rnrmv, 1 ); Rnrm = lapackf77_zlange( "F", &ione, &ione, &Rnrmv, &ione, NULL ); if ( Rnrm > Xnrm*cte ) { goto L20; } } /* If we are here, the nrhs normwise backward errors satisfy * the stopping criterion, we are good to exit. */ *iter = iiter; return *info; L20: iiter++; } /* If we are at this place of the code, this is because we have * performed ITER=ITERMAX iterations and never satisified the * stopping criterion. Set up the ITER flag accordingly and follow * up on double precision routine. */ *iter = -ITERMAX - 1; FALLBACK: /* Single-precision iterative refinement failed to converge to a * satisfactory solution, so we resort to double precision. */ magma_zpotrf_gpu( uplo, n, dA, ldda, info ); if (*info == 0) { magmablas_zlacpy( MagmaUpperLower, n, nrhs, dB, lddb, dX, lddx ); magma_zpotrs_gpu( uplo, n, nrhs, dA, ldda, dX, lddx, info ); } return *info; }
extern "C" magma_int_t magma_dsgesv_gpu(char trans, magma_int_t n, magma_int_t nrhs, double *dA, magma_int_t ldda, magma_int_t *ipiv, magma_int_t *dipiv, double *dB, magma_int_t lddb, double *dX, magma_int_t lddx, double *dworkd, float *dworks, magma_int_t *iter, magma_int_t *info) { /* -- MAGMA (version 1.4.0) -- Univ. of Tennessee, Knoxville Univ. of California, Berkeley Univ. of Colorado, Denver August 2013 Purpose ======= DSGESV computes the solution to a real system of linear equations A * X = B or A' * X = B where A is an N-by-N matrix and X and B are N-by-NRHS matrices. DSGESV first attempts to factorize the matrix in real SINGLE PRECISION and use this factorization within an iterative refinement procedure to produce a solution with real DOUBLE PRECISION norm-wise backward error quality (see below). If the approach fails the method switches to a real DOUBLE PRECISION factorization and solve. The iterative refinement is not going to be a winning strategy if the ratio real SINGLE PRECISION performance over real DOUBLE PRECISION performance is too small. A reasonable strategy should take the number of right-hand sides and the size of the matrix into account. This might be done with a call to ILAENV in the future. Up to now, we always try iterative refinement. The iterative refinement process is stopped if ITER > ITERMAX or for all the RHS we have: RNRM < SQRT(N)*XNRM*ANRM*EPS*BWDMAX where o ITER is the number of the current iteration in the iterative refinement process o RNRM is the infinity-norm of the residual o XNRM is the infinity-norm of the solution o ANRM is the infinity-operator-norm of the matrix A o EPS is the machine epsilon returned by DLAMCH('Epsilon') The value ITERMAX and BWDMAX are fixed to 30 and 1.0D+00 respectively. Arguments ========= TRANS (input) CHARACTER*1 Specifies the form of the system of equations: = 'N': A * X = B (No transpose) = 'T': A'* X = B (Transpose) = 'C': A'* X = B (Conjugate transpose = Transpose) N (input) INTEGER The number of linear equations, i.e., the order of the matrix A. N >= 0. NRHS (input) INTEGER The number of right hand sides, i.e., the number of columns of the matrix B. NRHS >= 0. dA (input or input/output) DOUBLE PRECISION array on the GPU, dimension (ldda,N) On entry, the N-by-N coefficient matrix A. On exit, if iterative refinement has been successfully used (info.EQ.0 and ITER.GE.0, see description below), A is unchanged. If double precision factorization has been used (info.EQ.0 and ITER.LT.0, see description below), then the array dA contains the factors L and U from the factorization A = P*L*U; the unit diagonal elements of L are not stored. ldda (input) INTEGER The leading dimension of the array dA. ldda >= max(1,N). IPIV (output) INTEGER array, dimension (N) The pivot indices that define the permutation matrix P; row i of the matrix was interchanged with row IPIV(i). Corresponds either to the single precision factorization (if info.EQ.0 and ITER.GE.0) or the double precision factorization (if info.EQ.0 and ITER.LT.0). dIPIV (output) INTEGER array on the GPU, dimension (min(M,N)) The pivot indices; for 1 <= i <= min(M,N), row i of the matrix was moved to row IPIV(i). dB (input) DOUBLE PRECISION array on the GPU, dimension (lddb,NRHS) The N-by-NRHS right hand side matrix B. lddb (input) INTEGER The leading dimension of the array dB. lddb >= max(1,N). dX (output) DOUBLE PRECISION array on the GPU, dimension (lddx,NRHS) If info = 0, the N-by-NRHS solution matrix X. lddx (input) INTEGER The leading dimension of the array dX. lddx >= max(1,N). dworkd (workspace) DOUBLE PRECISION array on the GPU, dimension (N*NRHS) This array is used to hold the residual vectors. dworks (workspace) SINGLE PRECISION array on the GPU, dimension (N*(N+NRHS)) This array is used to store the real single precision matrix and the right-hand sides or solutions in single precision. iter (output) INTEGER < 0: iterative refinement has failed, double precision factorization has been performed -1 : the routine fell back to full precision for implementation- or machine-specific reasons -2 : narrowing the precision induced an overflow, the routine fell back to full precision -3 : failure of SGETRF -31: stop the iterative refinement after the 30th iteration > 0: iterative refinement has been successfully used. Returns the number of iterations info (output) INTEGER = 0: successful exit < 0: if info = -i, the i-th argument had an illegal value > 0: if info = i, U(i,i) computed in DOUBLE PRECISION is exactly zero. The factorization has been completed, but the factor U is exactly singular, so the solution could not be computed. ===================================================================== */ #define dB(i,j) (dB + (i) + (j)*lddb) #define dX(i,j) (dX + (i) + (j)*lddx) #define dR(i,j) (dR + (i) + (j)*lddr) double c_neg_one = MAGMA_D_NEG_ONE; double c_one = MAGMA_D_ONE; magma_int_t ione = 1; double *dR; float *dSA, *dSX; double Xnrmv, Rnrmv; double Anrm, Xnrm, Rnrm, cte, eps; magma_int_t i, j, iiter, lddsa, lddr; /* Check arguments */ *iter = 0; *info = 0; if ( n < 0 ) *info = -1; else if ( nrhs < 0 ) *info = -2; else if ( ldda < max(1,n)) *info = -4; else if ( lddb < max(1,n)) *info = -8; else if ( lddx < max(1,n)) *info = -10; if (*info != 0) { magma_xerbla( __func__, -(*info) ); return *info; } if ( n == 0 || nrhs == 0 ) return *info; lddsa = n; lddr = n; dSA = dworks; dSX = dSA + lddsa*n; dR = dworkd; eps = lapackf77_dlamch("Epsilon"); Anrm = magmablas_dlange('I', n, n, dA, ldda, (double*)dworkd ); cte = Anrm * eps * pow((double)n, 0.5) * BWDMAX; /* * Convert to single precision */ //magmablas_dlag2s( n, nrhs, dB, lddb, dSX, lddsx, info ); // done inside dsgetrs with pivots if (*info != 0) { *iter = -2; goto FALLBACK; } magmablas_dlag2s( n, n, dA, ldda, dSA, lddsa, info ); if (*info != 0) { *iter = -2; goto FALLBACK; } // factor dSA in single precision magma_sgetrf_gpu( n, n, dSA, lddsa, ipiv, info ); if (*info != 0) { *iter = -3; goto FALLBACK; } // Generate parallel pivots { magma_int_t *newipiv; magma_imalloc_cpu( &newipiv, n ); if ( newipiv == NULL ) { *iter = -3; goto FALLBACK; } swp2pswp( trans, n, ipiv, newipiv ); magma_setvector( n, sizeof(magma_int_t), newipiv, 1, dipiv, 1 ); magma_free_cpu( newipiv ); } // solve dSA*dSX = dB in single precision // converts dB to dSX and applies pivots, solves, then converts result back to dX magma_dsgetrs_gpu( trans, n, nrhs, dSA, lddsa, dipiv, dB, lddb, dX, lddx, dSX, info ); // residual dR = dB - dA*dX in double precision magmablas_dlacpy( MagmaUpperLower, n, nrhs, dB, lddb, dR, lddr ); if ( nrhs == 1 ) { magma_dgemv( trans, n, n, c_neg_one, dA, ldda, dX, 1, c_one, dR, 1 ); } else { magma_dgemm( trans, MagmaNoTrans, n, nrhs, n, c_neg_one, dA, ldda, dX, lddx, c_one, dR, lddr ); } // TODO: use MAGMA_D_ABS( dX(i,j) ) instead of dlange? for( j=0; j < nrhs; j++ ) { i = magma_idamax( n, dX(0,j), 1) - 1; magma_dgetmatrix( 1, 1, dX(i,j), 1, &Xnrmv, 1 ); Xnrm = lapackf77_dlange( "F", &ione, &ione, &Xnrmv, &ione, NULL ); i = magma_idamax ( n, dR(0,j), 1 ) - 1; magma_dgetmatrix( 1, 1, dR(i,j), 1, &Rnrmv, 1 ); Rnrm = lapackf77_dlange( "F", &ione, &ione, &Rnrmv, &ione, NULL ); if ( Rnrm > Xnrm*cte ) { goto REFINEMENT; } } *iter = 0; return *info; REFINEMENT: for( iiter=1; iiter < ITERMAX; ) { *info = 0; // convert residual dR to single precision dSX // solve dSA*dSX = R in single precision // convert result back to double precision dR // it's okay that dR is used for both dB input and dX output. magma_dsgetrs_gpu( trans, n, nrhs, dSA, lddsa, dipiv, dR, lddr, dR, lddr, dSX, info ); if (*info != 0) { *iter = -3; goto FALLBACK; } // Add correction and setup residual // dX += dR --and-- // dR = dB // This saves going through dR a second time (if done with one more kernel). // -- not really: first time is read, second time is write. for( j=0; j < nrhs; j++ ) { magmablas_daxpycp( n, dR(0,j), dX(0,j), dB(0,j) ); } // residual dR = dB - dA*dX in double precision if ( nrhs == 1 ) { magma_dgemv( trans, n, n, c_neg_one, dA, ldda, dX, 1, c_one, dR, 1 ); } else { magma_dgemm( trans, MagmaNoTrans, n, nrhs, n, c_neg_one, dA, ldda, dX, lddx, c_one, dR, lddr ); } /* Check whether the nrhs normwise backward errors satisfy the * stopping criterion. If yes, set ITER=IITER>0 and return. */ for( j=0; j < nrhs; j++ ) { i = magma_idamax( n, dX(0,j), 1) - 1; magma_dgetmatrix( 1, 1, dX(i,j), 1, &Xnrmv, 1 ); Xnrm = lapackf77_dlange( "F", &ione, &ione, &Xnrmv, &ione, NULL ); i = magma_idamax ( n, dR(0,j), 1 ) - 1; magma_dgetmatrix( 1, 1, dR(i,j), 1, &Rnrmv, 1 ); Rnrm = lapackf77_dlange( "F", &ione, &ione, &Rnrmv, &ione, NULL ); if ( Rnrm > Xnrm*cte ) { goto L20; } } /* If we are here, the nrhs normwise backward errors satisfy * the stopping criterion, we are good to exit. */ *iter = iiter; return *info; L20: iiter++; } /* If we are at this place of the code, this is because we have * performed ITER=ITERMAX iterations and never satisified the * stopping criterion. Set up the ITER flag accordingly and follow * up on double precision routine. */ *iter = -ITERMAX - 1; FALLBACK: /* Single-precision iterative refinement failed to converge to a * satisfactory solution, so we resort to double precision. */ magma_dgetrf_gpu( n, n, dA, ldda, ipiv, info ); if (*info == 0) { magmablas_dlacpy( MagmaUpperLower, n, nrhs, dB, lddb, dX, lddx ); magma_dgetrs_gpu( trans, n, nrhs, dA, ldda, ipiv, dX, lddx, info ); } return *info; }
/** Purpose ------- CGERFS improve the computed solution to a system of linear equations. The iterative refinement process is stopped if ITER > ITERMAX or for all the RHS we have: RNRM < SQRT(n)*XNRM*ANRM*EPS*BWDMAX where o ITER is the number of the current iteration in the iterative refinement process o RNRM is the infinity-norm of the residual o XNRM is the infinity-norm of the solution o ANRM is the infinity-operator-norm of the matrix A o EPS is the machine epsilon returned by SLAMCH('Epsilon') The value ITERMAX and BWDMAX are fixed to 30 and 1.0D+00 respectively. Arguments --------- @param[in] trans magma_trans_t Specifies the form of the system of equations: - = MagmaNoTrans: A * X = B (No transpose) - = MagmaTrans: A**T * X = B (Transpose) - = MagmaConjTrans: A**H * X = B (Conjugate transpose) @param[in] n INTEGER The number of linear equations, i.e., the order of the matrix A. N >= 0. @param[in] nrhs INTEGER The number of right hand sides, i.e., the number of columns of the matrix B. NRHS >= 0. @param[in] dA COMPLEX array on the GPU, dimension (ldda,N) the N-by-N coefficient matrix A. @param[in] ldda INTEGER The leading dimension of the array dA. ldda >= max(1,N). @param[in] dB COMPLEX array on the GPU, dimension (lddb,NRHS) The N-by-NRHS right hand side matrix B. @param[in] lddb INTEGER The leading dimension of the array dB. lddb >= max(1,N). @param[in, out] dX COMPLEX array on the GPU, dimension (lddx,NRHS) On entry, the solution matrix X, as computed by CGETRS_NOPIV. On exit, the improved solution matrix X. @param[in] lddx INTEGER The leading dimension of the array dX. lddx >= max(1,N). @param dworkd (workspace) COMPLEX array on the GPU, dimension (N*NRHS) This array is used to hold the residual vectors. @param dAF COMPLEX array on the GPU, dimension (ldda,n) The factors L and U from the factorization A = L*U as computed by CGETRF_NOPIV. @param[out] iter INTEGER - < 0: iterative refinement has failed, real factorization has been performed + -1 : the routine fell back to full precision for implementation- or machine-specific reasons + -2 : narrowing the precision induced an overflow, the routine fell back to full precision + -3 : failure of SGETRF + -31: stop the iterative refinement after the 30th iteration - > 0: iterative refinement has been successfully used. Returns the number of iterations @param[out] info INTEGER - = 0: successful exit - < 0: if info = -i, the i-th argument had an illegal value - > 0: if info = i, U(i,i) computed in REAL is exactly zero. The factorization has been completed, but the factor U is exactly singular, so the solution could not be computed. @ingroup magma_cgesv_driver ********************************************************************/ extern "C" magma_int_t magma_cgerfs_nopiv_gpu( magma_trans_t trans, magma_int_t n, magma_int_t nrhs, magmaFloatComplex_ptr dA, magma_int_t ldda, magmaFloatComplex_ptr dB, magma_int_t lddb, magmaFloatComplex_ptr dX, magma_int_t lddx, magmaFloatComplex_ptr dworkd, magmaFloatComplex_ptr dAF, magma_int_t *iter, magma_int_t *info) { #define dB(i,j) (dB + (i) + (j)*lddb) #define dX(i,j) (dX + (i) + (j)*lddx) #define dR(i,j) (dR + (i) + (j)*lddr) magmaFloatComplex c_neg_one = MAGMA_C_NEG_ONE; magmaFloatComplex c_one = MAGMA_C_ONE; magma_int_t ione = 1; magmaFloatComplex_ptr dR; magmaFloatComplex Xnrmv, Rnrmv; float Anrm, Xnrm, Rnrm, cte, eps; magma_int_t i, j, iiter, lddsa, lddr; /* Check arguments */ *iter = 0; *info = 0; if ( n < 0 ) *info = -1; else if ( nrhs < 0 ) *info = -2; else if ( ldda < max(1,n)) *info = -4; else if ( lddb < max(1,n)) *info = -8; else if ( lddx < max(1,n)) *info = -10; if (*info != 0) { magma_xerbla( __func__, -(*info) ); return *info; } if ( n == 0 || nrhs == 0 ) return *info; lddsa = n; lddr = n; dR = dworkd; eps = lapackf77_slamch("Epsilon"); Anrm = magmablas_clange(MagmaInfNorm, n, n, dA, ldda, (float*)dworkd ); cte = Anrm * eps * pow( (float)n, (float)0.5 ) * BWDMAX; // residual dR = dB - dA*dX in real magmablas_clacpy( MagmaUpperLower, n, nrhs, dB, lddb, dR, lddr ); if ( nrhs == 1 ) { magma_cgemv( trans, n, n, c_neg_one, dA, ldda, dX, 1, c_one, dR, 1 ); } else { magma_cgemm( trans, MagmaNoTrans, n, nrhs, n, c_neg_one, dA, ldda, dX, lddx, c_one, dR, lddr ); } // TODO: use MAGMA_C_ABS( dX(i,j) ) instead of clange? for( j=0; j < nrhs; j++ ) { i = magma_icamax( n, dX(0,j), 1) - 1; magma_cgetmatrix( 1, 1, dX(i,j), 1, &Xnrmv, 1 ); Xnrm = lapackf77_clange( "F", &ione, &ione, &Xnrmv, &ione, NULL ); i = magma_icamax ( n, dR(0,j), 1 ) - 1; magma_cgetmatrix( 1, 1, dR(i,j), 1, &Rnrmv, 1 ); Rnrm = lapackf77_clange( "F", &ione, &ione, &Rnrmv, &ione, NULL ); // printf("Rnrm : %e, Xnrm*cte : %e\n", Rnrm, Xnrm*cte); if ( Rnrm > Xnrm*cte ) { goto REFINEMENT; } } *iter = 0; return *info; REFINEMENT: for( iiter=1; iiter < ITERMAX; ) { *info = 0; // solve dAF*dX = dR // it's okay that dR is used for both dB input and dX output. magma_cgetrs_nopiv_gpu( trans, n, nrhs, dAF, lddsa, dR, lddr, info ); if (*info != 0) { *iter = -3; goto FALLBACK; } // Add correction and setup residual // dX += dR --and-- // dR = dB // This saves going through dR a second time (if done with one more kernel). // -- not really: first time is read, second time is write. for( j=0; j < nrhs; j++ ) { magmablas_caxpycp2( n, dR(0,j), dX(0,j), dB(0,j) ); } // residual dR = dB - dA*dX in real if ( nrhs == 1 ) { magma_cgemv( trans, n, n, c_neg_one, dA, ldda, dX, 1, c_one, dR, 1 ); } else { magma_cgemm( trans, MagmaNoTrans, n, nrhs, n, c_neg_one, dA, ldda, dX, lddx, c_one, dR, lddr ); } /* Check whether the nrhs normwise backward errors satisfy the * stopping criterion. If yes, set ITER=IITER > 0 and return. */ for( j=0; j < nrhs; j++ ) { i = magma_icamax( n, dX(0,j), 1) - 1; magma_cgetmatrix( 1, 1, dX(i,j), 1, &Xnrmv, 1 ); Xnrm = lapackf77_clange( "F", &ione, &ione, &Xnrmv, &ione, NULL ); i = magma_icamax ( n, dR(0,j), 1 ) - 1; magma_cgetmatrix( 1, 1, dR(i,j), 1, &Rnrmv, 1 ); Rnrm = lapackf77_clange( "F", &ione, &ione, &Rnrmv, &ione, NULL ); if ( Rnrm > Xnrm*cte ) { goto L20; } } /* If we are here, the nrhs normwise backward errors satisfy * the stopping criterion, we are good to exit. */ *iter = iiter; return *info; L20: iiter++; } /* If we are at this place of the code, this is because we have * performed ITER=ITERMAX iterations and never satisified the * stopping criterion. Set up the ITER flag accordingly. */ *iter = -ITERMAX - 1; FALLBACK: /* Iterative refinement failed to converge to a * satisfactory solution. */ return *info; }
/** Purpose ------- ZCGESV computes the solution to a complex system of linear equations A * X = B, A**T * X = B, or A**H * X = B, where A is an N-by-N matrix and X and B are N-by-NRHS matrices. ZCGESV first attempts to factorize the matrix in complex SINGLE PRECISION and use this factorization within an iterative refinement procedure to produce a solution with complex DOUBLE PRECISION norm-wise backward error quality (see below). If the approach fails the method switches to a complex DOUBLE PRECISION factorization and solve. The iterative refinement is not going to be a winning strategy if the ratio complex SINGLE PRECISION performance over complex DOUBLE PRECISION performance is too small. A reasonable strategy should take the number of right-hand sides and the size of the matrix into account. This might be done with a call to ILAENV in the future. Up to now, we always try iterative refinement. The iterative refinement process is stopped if ITER > ITERMAX or for all the RHS we have: RNRM < SQRT(N)*XNRM*ANRM*EPS*BWDMAX where o ITER is the number of the current iteration in the iterative refinement process o RNRM is the infinity-norm of the residual o XNRM is the infinity-norm of the solution o ANRM is the infinity-operator-norm of the matrix A o EPS is the machine epsilon returned by DLAMCH('Epsilon') The value ITERMAX and BWDMAX are fixed to 30 and 1.0D+00 respectively. Arguments --------- @param[in] trans magma_trans_t Specifies the form of the system of equations: - = MagmaNoTrans: A * X = B (No transpose) - = MagmaTrans: A**T * X = B (Transpose) - = MagmaConjTrans: A**H * X = B (Conjugate transpose) @param[in] n INTEGER The number of linear equations, i.e., the order of the matrix A. N >= 0. @param[in] nrhs INTEGER The number of right hand sides, i.e., the number of columns of the matrix B. NRHS >= 0. @param[in,out] dA COMPLEX_16 array on the GPU, dimension (ldda,N) On entry, the N-by-N coefficient matrix A. On exit, if iterative refinement has been successfully used (info.EQ.0 and ITER.GE.0, see description below), A is unchanged. If double precision factorization has been used (info.EQ.0 and ITER.LT.0, see description below), then the array dA contains the factors L and U from the factorization A = P*L*U; the unit diagonal elements of L are not stored. @param[in] ldda INTEGER The leading dimension of the array dA. ldda >= max(1,N). @param[out] ipiv INTEGER array, dimension (N) The pivot indices that define the permutation matrix P; row i of the matrix was interchanged with row IPIV(i). Corresponds either to the single precision factorization (if info.EQ.0 and ITER.GE.0) or the double precision factorization (if info.EQ.0 and ITER.LT.0). @param[out] dipiv INTEGER array on the GPU, dimension (N) The pivot indices; for 1 <= i <= N, after permuting, row i of the matrix was moved to row dIPIV(i). Note this is different than IPIV, where interchanges are applied one-after-another. @param[in] dB COMPLEX_16 array on the GPU, dimension (lddb,NRHS) The N-by-NRHS right hand side matrix B. @param[in] lddb INTEGER The leading dimension of the array dB. lddb >= max(1,N). @param[out] dX COMPLEX_16 array on the GPU, dimension (lddx,NRHS) If info = 0, the N-by-NRHS solution matrix X. @param[in] lddx INTEGER The leading dimension of the array dX. lddx >= max(1,N). @param dworkd (workspace) COMPLEX_16 array on the GPU, dimension (N*NRHS) This array is used to hold the residual vectors. @param dworks (workspace) COMPLEX array on the GPU, dimension (N*(N+NRHS)) This array is used to store the complex single precision matrix and the right-hand sides or solutions in single precision. @param[out] iter INTEGER - < 0: iterative refinement has failed, double precision factorization has been performed + -1 : the routine fell back to full precision for implementation- or machine-specific reasons + -2 : narrowing the precision induced an overflow, the routine fell back to full precision + -3 : failure of SGETRF + -31: stop the iterative refinement after the 30th iteration - > 0: iterative refinement has been successfully used. Returns the number of iterations @param[out] info INTEGER - = 0: successful exit - < 0: if info = -i, the i-th argument had an illegal value - > 0: if info = i, U(i,i) computed in DOUBLE PRECISION is exactly zero. The factorization has been completed, but the factor U is exactly singular, so the solution could not be computed. @ingroup magma_zgesv_driver ********************************************************************/ extern "C" magma_int_t magma_zcgesv_gpu(magma_trans_t trans, magma_int_t n, magma_int_t nrhs, magmaDoubleComplex *dA, magma_int_t ldda, magma_int_t *ipiv, magma_int_t *dipiv, magmaDoubleComplex *dB, magma_int_t lddb, magmaDoubleComplex *dX, magma_int_t lddx, magmaDoubleComplex *dworkd, magmaFloatComplex *dworks, magma_int_t *iter, magma_int_t *info) { #define dB(i,j) (dB + (i) + (j)*lddb) #define dX(i,j) (dX + (i) + (j)*lddx) #define dR(i,j) (dR + (i) + (j)*lddr) magmaDoubleComplex c_neg_one = MAGMA_Z_NEG_ONE; magmaDoubleComplex c_one = MAGMA_Z_ONE; magma_int_t ione = 1; magmaDoubleComplex *dR; magmaFloatComplex *dSA, *dSX; magmaDoubleComplex Xnrmv, Rnrmv; double Anrm, Xnrm, Rnrm, cte, eps; magma_int_t i, j, iiter, lddsa, lddr; /* Check arguments */ *iter = 0; *info = 0; if ( n < 0 ) *info = -1; else if ( nrhs < 0 ) *info = -2; else if ( ldda < max(1,n)) *info = -4; else if ( lddb < max(1,n)) *info = -8; else if ( lddx < max(1,n)) *info = -10; if (*info != 0) { magma_xerbla( __func__, -(*info) ); return *info; } if ( n == 0 || nrhs == 0 ) return *info; lddsa = n; lddr = n; dSA = dworks; dSX = dSA + lddsa*n; dR = dworkd; eps = lapackf77_dlamch("Epsilon"); Anrm = magmablas_zlange(MagmaInfNorm, n, n, dA, ldda, (double*)dworkd ); cte = Anrm * eps * pow((double)n, 0.5) * BWDMAX; /* * Convert to single precision */ //magmablas_zlag2c( n, nrhs, dB, lddb, dSX, lddsx, info ); // done inside zcgetrs with pivots if (*info != 0) { *iter = -2; goto FALLBACK; } magmablas_zlag2c( n, n, dA, ldda, dSA, lddsa, info ); if (*info != 0) { *iter = -2; goto FALLBACK; } // factor dSA in single precision magma_cgetrf_gpu( n, n, dSA, lddsa, ipiv, info ); if (*info != 0) { *iter = -3; goto FALLBACK; } // Generate parallel pivots { magma_int_t *newipiv; magma_imalloc_cpu( &newipiv, n ); if ( newipiv == NULL ) { *iter = -3; goto FALLBACK; } swp2pswp( trans, n, ipiv, newipiv ); magma_setvector( n, sizeof(magma_int_t), newipiv, 1, dipiv, 1 ); magma_free_cpu( newipiv ); } // solve dSA*dSX = dB in single precision // converts dB to dSX and applies pivots, solves, then converts result back to dX magma_zcgetrs_gpu( trans, n, nrhs, dSA, lddsa, dipiv, dB, lddb, dX, lddx, dSX, info ); // residual dR = dB - dA*dX in double precision magmablas_zlacpy( MagmaUpperLower, n, nrhs, dB, lddb, dR, lddr ); if ( nrhs == 1 ) { magma_zgemv( trans, n, n, c_neg_one, dA, ldda, dX, 1, c_one, dR, 1 ); } else { magma_zgemm( trans, MagmaNoTrans, n, nrhs, n, c_neg_one, dA, ldda, dX, lddx, c_one, dR, lddr ); } // TODO: use MAGMA_Z_ABS( dX(i,j) ) instead of zlange? for( j=0; j < nrhs; j++ ) { i = magma_izamax( n, dX(0,j), 1) - 1; magma_zgetmatrix( 1, 1, dX(i,j), 1, &Xnrmv, 1 ); Xnrm = lapackf77_zlange( "F", &ione, &ione, &Xnrmv, &ione, NULL ); i = magma_izamax ( n, dR(0,j), 1 ) - 1; magma_zgetmatrix( 1, 1, dR(i,j), 1, &Rnrmv, 1 ); Rnrm = lapackf77_zlange( "F", &ione, &ione, &Rnrmv, &ione, NULL ); if ( Rnrm > Xnrm*cte ) { goto REFINEMENT; } } *iter = 0; return *info; REFINEMENT: for( iiter=1; iiter < ITERMAX; ) { *info = 0; // convert residual dR to single precision dSX // solve dSA*dSX = R in single precision // convert result back to double precision dR // it's okay that dR is used for both dB input and dX output. magma_zcgetrs_gpu( trans, n, nrhs, dSA, lddsa, dipiv, dR, lddr, dR, lddr, dSX, info ); if (*info != 0) { *iter = -3; goto FALLBACK; } // Add correction and setup residual // dX += dR --and-- // dR = dB // This saves going through dR a second time (if done with one more kernel). // -- not really: first time is read, second time is write. for( j=0; j < nrhs; j++ ) { magmablas_zaxpycp( n, dR(0,j), dX(0,j), dB(0,j) ); } // residual dR = dB - dA*dX in double precision if ( nrhs == 1 ) { magma_zgemv( trans, n, n, c_neg_one, dA, ldda, dX, 1, c_one, dR, 1 ); } else { magma_zgemm( trans, MagmaNoTrans, n, nrhs, n, c_neg_one, dA, ldda, dX, lddx, c_one, dR, lddr ); } /* Check whether the nrhs normwise backward errors satisfy the * stopping criterion. If yes, set ITER=IITER > 0 and return. */ for( j=0; j < nrhs; j++ ) { i = magma_izamax( n, dX(0,j), 1) - 1; magma_zgetmatrix( 1, 1, dX(i,j), 1, &Xnrmv, 1 ); Xnrm = lapackf77_zlange( "F", &ione, &ione, &Xnrmv, &ione, NULL ); i = magma_izamax ( n, dR(0,j), 1 ) - 1; magma_zgetmatrix( 1, 1, dR(i,j), 1, &Rnrmv, 1 ); Rnrm = lapackf77_zlange( "F", &ione, &ione, &Rnrmv, &ione, NULL ); if ( Rnrm > Xnrm*cte ) { goto L20; } } /* If we are here, the nrhs normwise backward errors satisfy * the stopping criterion, we are good to exit. */ *iter = iiter; return *info; L20: iiter++; } /* If we are at this place of the code, this is because we have * performed ITER=ITERMAX iterations and never satisified the * stopping criterion. Set up the ITER flag accordingly and follow * up on double precision routine. */ *iter = -ITERMAX - 1; FALLBACK: /* Single-precision iterative refinement failed to converge to a * satisfactory solution, so we resort to double precision. */ magma_zgetrf_gpu( n, n, dA, ldda, ipiv, info ); if (*info == 0) { magmablas_zlacpy( MagmaUpperLower, n, nrhs, dB, lddb, dX, lddx ); magma_zgetrs_gpu( trans, n, nrhs, dA, ldda, ipiv, dX, lddx, info ); } return *info; }
/** Purpose ------- CGEQRF3 computes a QR factorization of a complex M-by-N matrix A: A = Q * R. This version stores the triangular dT matrices used in the block QR factorization so that they can be applied directly (i.e., without being recomputed) later. As a result, the application of Q is much faster. Also, the upper triangular matrices for V have 0s in them. The corresponding parts of the upper triangular R are stored separately in dT. Arguments --------- @param[in] m INTEGER The number of rows of the matrix A. M >= 0. @param[in] n INTEGER The number of columns of the matrix A. N >= 0. @param[in,out] dA COMPLEX array on the GPU, dimension (LDDA,N) On entry, the M-by-N matrix A. On exit, the elements on and above the diagonal of the array contain the min(M,N)-by-N upper trapezoidal matrix R (R is upper triangular if m >= n); the elements below the diagonal, with the array TAU, represent the orthogonal matrix Q as a product of min(m,n) elementary reflectors (see Further Details). @param[in] ldda INTEGER The leading dimension of the array dA. LDDA >= max(1,M). To benefit from coalescent memory accesses LDDA must be divisible by 16. @param[out] tau COMPLEX array, dimension (min(M,N)) The scalar factors of the elementary reflectors (see Further Details). @param[out] dT (workspace) COMPLEX array on the GPU, dimension (2*MIN(M, N) + ceil(N/32)*32 )*NB, where NB can be obtained through magma_get_cgeqrf_nb( M, N ). It starts with a MIN(M,N)*NB block that stores the triangular T matrices, followed by a MIN(M,N)*NB block that stores the diagonal blocks of the R matrix. The rest of the array is used as workspace. @param[out] info INTEGER - = 0: successful exit - < 0: if INFO = -i, the i-th argument had an illegal value or another error occured, such as memory allocation failed. Further Details --------------- The matrix Q is represented as a product of elementary reflectors Q = H(1) H(2) . . . H(k), where k = min(m,n). Each H(i) has the form H(i) = I - tau * v * v^H where tau is a complex scalar, and v is a complex vector with v(1:i-1) = 0 and v(i) = 1; v(i+1:m) is stored on exit in A(i+1:m,i), and tau in TAU(i). @ingroup magma_cgeqrf_comp ********************************************************************/ extern "C" magma_int_t magma_cgeqrf3_gpu( magma_int_t m, magma_int_t n, magmaFloatComplex_ptr dA, magma_int_t ldda, magmaFloatComplex *tau, magmaFloatComplex_ptr dT, magma_int_t *info ) { #ifdef HAVE_clBLAS #define dA(i_, j_) dA, (dA_offset + (i_) + (j_)*(ldda)) #define dT(i_) dT, (dT_offset + (i_)*nb) #define dR(i_) dT, (dT_offset + ( minmn + (i_))*nb) #define dwork(i_) dT, (dT_offset + (2*minmn + (i_))*nb) #else #define dA(i_, j_) (dA + (i_) + (j_)*(ldda)) #define dT(i_) (dT + (i_)*nb) #define dR(i_) (dT + ( minmn + (i_))*nb) #define dwork(i_) (dT + (2*minmn + (i_))*nb) #endif magmaFloatComplex *work, *hwork, *R; magma_int_t cols, i, ib, ldwork, lddwork, lhwork, lwork, minmn, nb, old_i, old_ib, rows; // check arguments *info = 0; if (m < 0) { *info = -1; } else if (n < 0) { *info = -2; } else if (ldda < max(1,m)) { *info = -4; } if (*info != 0) { magma_xerbla( __func__, -(*info) ); return *info; } minmn = min( m, n ); if (minmn == 0) return *info; // TODO: use min(m,n), but that affects dT nb = magma_get_cgeqrf_nb( m, n ); // dT contains 3 blocks: // dT is minmn*nb // dR is minmn*nb // dwork is n*nb lddwork = n; // work is m*nb for panel // hwork is n*nb, and at least nb*nb for T in larft // R is nb*nb ldwork = m; lhwork = max( n*nb, nb*nb ); lwork = ldwork*nb + lhwork + nb*nb; // last block needs rows*cols for matrix and prefers cols*nb for work // worst case is n > m*nb, m a small multiple of nb: // needs n*nb + n > (m+n)*nb // prefers 2*n*nb, about twice above (m+n)*nb. i = ((minmn-1)/nb)*nb; lwork = max( lwork, (m-i)*(n-i) + (n-i)*nb ); if (MAGMA_SUCCESS != magma_cmalloc_pinned( &work, lwork )) { *info = MAGMA_ERR_HOST_ALLOC; return *info; } hwork = work + ldwork*nb; R = work + ldwork*nb + lhwork; memset( R, 0, nb*nb*sizeof(magmaFloatComplex) ); magma_queue_t queues[2]; magma_device_t cdev; magma_getdevice( &cdev ); magma_queue_create( cdev, &queues[0] ); magma_queue_create( cdev, &queues[1] ); if ( nb > 1 && nb < minmn ) { // need nb*nb for T in larft assert( lhwork >= nb*nb ); // Use blocked code initially old_i = 0; old_ib = nb; for (i = 0; i < minmn-nb; i += nb) { ib = min( minmn-i, nb ); rows = m - i; // get i-th panel from device magma_cgetmatrix_async( rows, ib, dA(i,i), ldda, work, ldwork, queues[1] ); if (i > 0) { // Apply H^H to A(i:m,i+2*ib:n) from the left cols = n - old_i - 2*old_ib; magma_clarfb_gpu( MagmaLeft, MagmaConjTrans, MagmaForward, MagmaColumnwise, m-old_i, cols, old_ib, dA(old_i, old_i ), ldda, dT(old_i), nb, dA(old_i, old_i+2*old_ib), ldda, dwork(0), lddwork, queues[0] ); // Fix the diagonal block magma_csetmatrix_async( old_ib, old_ib, R, old_ib, dR(old_i), old_ib, queues[0] ); } magma_queue_sync( queues[1] ); // wait to get work(i) lapackf77_cgeqrf( &rows, &ib, work, &ldwork, &tau[i], hwork, &lhwork, info ); // Form the triangular factor of the block reflector in hwork // H = H(i) H(i+1) . . . H(i+ib-1) lapackf77_clarft( MagmaForwardStr, MagmaColumnwiseStr, &rows, &ib, work, &ldwork, &tau[i], hwork, &ib ); // wait for previous trailing matrix update (above) to finish with R magma_queue_sync( queues[0] ); // copy the upper triangle of panel to R and invert it, and // set the upper triangle of panel (V) to identity csplit_diag_block( ib, work, ldwork, R ); // send i-th V matrix to device magma_csetmatrix( rows, ib, work, ldwork, dA(i,i), ldda, queues[1] ); if (i + ib < n) { // send T matrix to device magma_csetmatrix( ib, ib, hwork, ib, dT(i), nb, queues[1] ); if (i+nb < minmn-nb) { // Apply H^H to A(i:m,i+ib:i+2*ib) from the left magma_clarfb_gpu( MagmaLeft, MagmaConjTrans, MagmaForward, MagmaColumnwise, rows, ib, ib, dA(i, i ), ldda, dT(i), nb, dA(i, i+ib), ldda, dwork(0), lddwork, queues[1] ); // wait for larfb to finish with dwork before larfb in next iteration starts magma_queue_sync( queues[1] ); } else { // Apply H^H to A(i:m,i+ib:n) from the left magma_clarfb_gpu( MagmaLeft, MagmaConjTrans, MagmaForward, MagmaColumnwise, rows, n-i-ib, ib, dA(i, i ), ldda, dT(i), nb, dA(i, i+ib), ldda, dwork(0), lddwork, queues[1] ); // Fix the diagonal block magma_csetmatrix( ib, ib, R, ib, dR(i), ib, queues[1] ); } old_i = i; old_ib = ib; } } } else { i = 0; } // Use unblocked code to factor the last or only block. if (i < minmn) { rows = m-i; cols = n-i; magma_cgetmatrix( rows, cols, dA(i, i), ldda, work, rows, queues[1] ); // see comments for lwork above lhwork = lwork - rows*cols; lapackf77_cgeqrf( &rows, &cols, work, &rows, &tau[i], &work[rows*cols], &lhwork, info ); magma_csetmatrix( rows, cols, work, rows, dA(i, i), ldda, queues[1] ); } magma_queue_destroy( queues[0] ); magma_queue_destroy( queues[1] ); magma_free_pinned( work ); return *info; } // magma_cgeqrf_gpu
int main(int ac, char** av){ if(ac < 2){ std::cout << "usage: ./signalAcceptance inputFile[s]" << std::endl; return -1; } TH1F* allCategory = new TH1F("allCategory","all Category",11,0.5,11.5); TH1F* preselCategory = new TH1F("preselCategory","presel Category",11,0.5,11.5); TH1F* photonCategory = new TH1F("photonCategory","reco photon Category",11,0.5,11.5); TH1F* VisAllCategory = new TH1F("VisAllCategory","all Category, Vis",11,0.5,11.5); TH1F* VisPreselCategory = new TH1F("VisPreselCategory","presel Category, Vis",11,0.5,11.5); TH1F* VisPhotonCategory = new TH1F("VisPhotonCategory","reco photon Category, Vis",11,0.5,11.5); TH1F* dROtherGen = new TH1F("dROtherGen", "dROtherGen", 800, 0.0, 4.0); TH1F* parentage = new TH1F("parentage","parentage",30, 0, 30); TH1F* dptOverpt = new TH1F("dptOverpt","dptOverpt", 400, -2.0, 2.0); TH1F* dRrecoGen = new TH1F("dRrecoGen","dRrecoGen", 200, 0.0, 0.2); TH1F* dPhiRecoGen = new TH1F("dPhiRecoGen","dPhiRecoGen", 400, 0.0, 0.2); TH1F* dEtaRecoGen = new TH1F("dEtaRecoGen","dEtaRecoGen", 800, -0.2, 0.2); TH1F* dRGenNearJet = new TH1F("dRGenNearJet","dRGenNearJet", 200, 0.0, 1.0); TH1F* dPhiGenNearJet = new TH1F("dPhiGenNearJet","dPhiGenNearJet", 100, 0.0, 0.5); TH1F* dEtaGenNearJet = new TH1F("dEtaGenNearJet","dEtaGenNearJet", 200, -0.5, 0.5); //TH1F* dRGenNextNearJet = new TH1F("dRGenNextNearJet","dRGenNextNearJet", 600, 0.0, 6.0); //TH1F* dPhiGenNextNearJet = new TH1F("dPhiGenNextNearJet","dPhiGenNextNearJet", 300, 0.0, 3.0); //TH1F* dEtaGenNextNearJet = new TH1F("dEtaGenNextNearJet","dEtaGenNextNearJet", 600, -3.0, 3.0); // object selector Selector* selectorLoose = new Selector(); // create event selectors here EventPick* evtPickLoose = new EventPick("LoosePhotonID"); // do not do jet to photon dR cleaning evtPickLoose->veto_pho_jet_dR = 0.0; EventTree* tree = new EventTree(ac-1, av+1); double PUweight = 1.0; Long64_t nEntr = tree->GetEntries(); for(Long64_t entry=0; entry<nEntr; entry++){ if(entry%10000 == 0) std::cout << "processing entry " << entry << " out of " << nEntr << std::endl; tree->GetEntry(entry); doJER(tree); selectorLoose->process_objects(tree); evtPickLoose->process_event(tree, selectorLoose, PUweight); // fill the histograms fillCategory(tree, allCategory, PUweight); if(evtPickLoose->passPreSel) fillCategory(tree, preselCategory, PUweight); if(evtPickLoose->passAll) fillCategory(tree, photonCategory, PUweight); // fill histograms for gen photon passing the acceptance cuts defined in analysis bool inAcc = false; for(int mcInd=0; mcInd<tree->nMC_; ++mcInd){ if(tree->mcPID->at(mcInd) == 22 && (tree->mcParentage->at(mcInd)==2 || tree->mcParentage->at(mcInd)==10 || tree->mcParentage->at(mcInd)==26) && tree->mcPt->at(mcInd) > 25 && fabs(tree->mcEta->at(mcInd)) < 1.4442){ inAcc = true; } } if(inAcc){ fillCategory(tree, VisAllCategory, PUweight); if(evtPickLoose->passPreSel) fillCategory(tree, VisPreselCategory, PUweight); if(evtPickLoose->passAll) fillCategory(tree, VisPhotonCategory, PUweight); } // have at least one good photon if(!evtPickLoose->passAll) continue; // test //if(overlapMadGraph(tree)) continue; int phoInd = evtPickLoose->Photons.at(0); // experiment with delta R cuts for photons for(int mcInd=0; mcInd<tree->nMC_; ++mcInd){ bool etetamatch = dR(tree->mcEta->at(mcInd),tree->mcPhi->at(mcInd),tree->phoEta_->at(phoInd),tree->phoPhi_->at(phoInd)) < 0.2 && (fabs(tree->phoEt_->at(phoInd) - tree->mcPt->at(mcInd)) / tree->mcPt->at(mcInd)) < 1.0; if( etetamatch && tree->mcPID->at(mcInd) == 22){ // test if(!(tree->mcParentage->at(mcInd)==2 || tree->mcParentage->at(mcInd)==10 || tree->mcParentage->at(mcInd)==26)) continue; // fill histograms for mathced photon candidate parentage->Fill( tree->mcParentage->at(mcInd) ); dptOverpt->Fill( (tree->phoEt_->at(phoInd) - tree->mcPt->at(mcInd)) / tree->mcPt->at(mcInd)); dRrecoGen->Fill( dR(tree->mcEta->at(mcInd),tree->mcPhi->at(mcInd),tree->phoEta_->at(phoInd),tree->phoPhi_->at(phoInd)) ); dPhiRecoGen->Fill( dPhi( tree->phoPhi_->at(phoInd) - tree->mcPhi->at(mcInd) ) ); dEtaRecoGen->Fill( tree->phoEta_->at(phoInd) - tree->mcEta->at(mcInd) ); int closestGenInd = secondMinDrIndex( mcInd, tree ); if(dR(tree->mcEta->at(mcInd), tree->mcPhi->at(mcInd), tree->mcEta->at(closestGenInd), tree->mcPhi->at(closestGenInd)) < 0.01){ std::cout << "closest PID " << tree->mcPID->at(closestGenInd) << " MomPID " << tree->mcMomPID->at(closestGenInd) << std::endl; std::cout << "photon mother PID " << tree->mcMomPID->at(mcInd) << std::endl; } dROtherGen->Fill( dR(tree->mcEta->at(mcInd), tree->mcPhi->at(mcInd), tree->mcEta->at(closestGenInd), tree->mcPhi->at(closestGenInd)) ); int closestJetInd = minDrIndex( tree->mcEta->at(mcInd), tree->mcPhi->at(mcInd), tree->jetEta_, tree->jetPhi_ ); dRGenNearJet->Fill( dR(tree->mcEta->at(mcInd), tree->mcPhi->at(mcInd), tree->jetEta_->at(closestJetInd), tree->jetPhi_->at(closestJetInd) ) ); dPhiGenNearJet->Fill( dPhi( tree->jetPhi_->at(closestJetInd) - tree->mcPhi->at(mcInd) ) ); dEtaGenNearJet->Fill( tree->jetEta_->at(closestJetInd) - tree->mcEta->at(mcInd) ); //closestJetInd = secondMinDrIndex( tree->mcEta->at(mcInd), tree->mcPhi->at(mcInd), tree->jetEta_, tree->jetPhi_ ); //dRGenNextNearJet->Fill( dR(tree->mcEta->at(mcInd), tree->mcPhi->at(mcInd), tree->jetEta_->at(closestJetInd), tree->jetPhi_->at(closestJetInd) ) ); //dPhiGenNextNearJet->Fill( dPhi( tree->jetPhi_->at(closestJetInd) - tree->mcPhi->at(mcInd) ) ); //dEtaGenNextNearJet->Fill( tree->jetEta_->at(closestJetInd) - tree->mcEta->at(mcInd) ); } } } evtPickLoose->print_cutflow(); // write histograms TFile outFile("signalAcc.root","RECREATE"); saveHist(allCategory, &outFile); saveHist(preselCategory, &outFile); saveHist(photonCategory, &outFile); saveHist(VisAllCategory, &outFile); saveHist(VisPreselCategory, &outFile); saveHist(VisPhotonCategory, &outFile); saveHist(dROtherGen, &outFile); saveHist(parentage, &outFile); saveHist(dptOverpt, &outFile); saveHist(dRrecoGen, &outFile); saveHist(dPhiRecoGen, &outFile); saveHist(dEtaRecoGen, &outFile); saveHist(dRGenNearJet, &outFile); saveHist(dPhiGenNearJet, &outFile); saveHist(dEtaGenNearJet, &outFile); //saveHist(dRGenNextNearJet, &outFile); //saveHist(dPhiGenNextNearJet, &outFile); //saveHist(dEtaGenNextNearJet, &outFile); outFile.Close(); delete tree; return 0; }
void BaderGrid::construct_bader(const arma::mat & P, double otoler) { // Amount of radial shells on the atoms std::vector<size_t> nrad(basp->get_Nnuc()); Timer t; size_t nd=0, ng=0; // Form radial shells std::vector<angshell_t> grids; for(size_t iat=0;iat<basp->get_Nnuc();iat++) { angshell_t sh; sh.atind=iat; sh.cen=basp->get_nuclear_coords(iat); sh.tol=otoler*PRUNETHR; // Compute necessary number of radial points for atom size_t nr=std::max(20,(int) round(-5*(3*log10(otoler)+8-element_row[basp->get_Z(iat)]))); // Get Chebyshev nodes and weights for radial part std::vector<double> rad, wrad; radial_chebyshev_jac(nr,rad,wrad); nr=rad.size(); // Sanity check nrad[iat]=nr; // Loop over radii for(size_t irad=0;irad<nr;irad++) { sh.R=rad[irad]; sh.w=wrad[irad]; grids.push_back(sh); } } // List of grid points std::vector<gridpoint_t> points; // Initialize list of maxima maxima.clear(); reggrid.clear(); for(size_t i=0;i<basp->get_Nnuc();i++) { nucleus_t nuc(basp->get_nucleus(i)); if(!nuc.bsse) { // Add to list maxima.push_back(nuc.r); std::vector<gridpoint_t> ghlp; reggrid.push_back(ghlp); } } Nnuc=maxima.size(); // Block inside classification? std::vector<bool> block(maxima.size(),false); // Index of last treated atom size_t oldatom=-1; for(size_t ig=0;ig<grids.size();ig++) { // Construct the shell wrk.set_grid(grids[ig]); grids[ig]=wrk.construct_becke(otoler/nrad[grids[ig].atind]); // Form the grid again wrk.form_grid(); // Extract the points on the shell std::vector<gridpoint_t> shellpoints(wrk.get_grid()); if(!shellpoints.size()) continue; // Are we inside an established trust radius, or are we close enough to a real nucleus? bool inside=false; if(grids[ig].R<=TRUSTRAD && !(basp->get_nucleus(grids[ig].atind).bsse)) inside=true; else if(!block[grids[ig].atind] && oldatom==grids[ig].atind) { // Compute projection of density gradient of points on shell arma::vec proj(shellpoints.size()); coords_t nuccoord(basp->get_nuclear_coords(grids[ig].atind)); #ifdef _OPENMP #pragma omp parallel for #endif for(size_t ip=0;ip<shellpoints.size();ip++) { // Compute density gradient double d; arma::vec g; compute_density_gradient(P,*basp,shellpoints[ip].r,d,g); // Vector pointing to nucleus coords_t dRc=nuccoord-shellpoints[ip].r; arma::vec dR(3); dR(0)=dRc.x; dR(1)=dRc.y; dR(2)=dRc.z; // Compute dot product with gradient proj(ip)=arma::norm_dot(dR,g); } // Increment amount of gradient evaluations ng+=shellpoints.size(); // Check if all points are inside const double cthcrit=cos(M_PI/4.0); inside=(arma::min(proj) >= cthcrit); } // If we are not inside, we need to run a point by point classification. if(!inside) { Timer tc; // Reset the trust atom oldatom=-1; // and the current atom block[grids[ig].atind]=true; // Loop over points #ifdef _OPENMP #pragma omp parallel for schedule(dynamic) #endif for(size_t ip=0;ip<shellpoints.size();ip++) { if(compute_density(P,*basp,shellpoints[ip].r)<=SMALLDENSITY) { // Zero density - skip point continue; } // Track the density to its maximum coords_t r=track_to_maximum(*basp,P,shellpoints[ip].r,nd,ng); #ifdef _OPENMP #pragma omp critical #endif { // Now that we have the maximum, check if it is on the list of known maxima bool found=false; for(size_t im=0;im<maxima.size();im++) if(norm(r-maxima[im])<=SAMEMAXIMUM) { found=true; reggrid[im].push_back(shellpoints[ip]); break; } // Maximum was not found, add it to the list if(!found) { maxima.push_back(r); std::vector<gridpoint_t> ghlp; ghlp.push_back(shellpoints[ip]); reggrid.push_back(ghlp); } } } // Continue with the next radial shell continue; } else { // If we are here, then all points belong to this nuclear maximum oldatom=grids[ig].atind; reggrid[ grids[ig].atind ].insert(reggrid[ grids[ig].atind ].end(), shellpoints.begin(), shellpoints.end()); } } if(verbose) { printf("Bader grid constructed in %s, taking %i density and %i gradient evaluations.\n",t.elapsed().c_str(),(int) nd, (int) ng); print_maxima(); // Amount of integration points arma::uvec np(basp->get_Nnuc()); np.zeros(); // Amount of function values arma::uvec nf(basp->get_Nnuc()); nf.zeros(); for(size_t i=0;i<grids.size();i++) { np(grids[i].atind)+=grids[i].np; nf(grids[i].atind)+=grids[i].nfunc; } printf("Composition of atomic integration grid:\n %7s %7s %10s\n","atom","Npoints","Nfuncs"); for(size_t i=0;i<basp->get_Nnuc();i++) printf(" %4i %-2s %7i %10i\n",(int) i+1, basp->get_symbol(i).c_str(), (int) np(i), (int) nf(i)); printf("\nAmount of grid points in the regions:\n %7s %7s\n","region","Npoints"); for(size_t i=0;i<reggrid.size();i++) printf(" %4i %7i\n",(int) i+1, (int) reggrid[i].size()); fflush(stdout); } }
bool Foam::chemPointISAT<CompType, ThermoType>::checkSolution ( const scalarField& phiq, const scalarField& Rphiq ) { scalar eps2 = 0; scalarField dR(Rphiq - Rphi()); scalarField dphi(phiq - phi()); const scalarField& scaleFactorV(scaleFactor()); const scalarSquareMatrix& Avar(A()); bool isMechRedActive = chemistry_.mechRed()->active(); scalar dRl = 0; label dim = completeSpaceSize()-2; if (isMechRedActive) { dim = nActiveSpecies_; } // Since we build only the solution for the species, T and p are not // included for (label i=0; i<completeSpaceSize()-nAdditionalEqns_; i++) { dRl = 0; if (isMechRedActive) { label si = completeToSimplifiedIndex_[i]; // If this species is active if (si != -1) { for (label j=0; j<dim; j++) { label sj=simplifiedToCompleteIndex_[j]; dRl += Avar(si, j)*dphi[sj]; } dRl += Avar(si, nActiveSpecies_)*dphi[idT_]; dRl += Avar(si, nActiveSpecies_+1)*dphi[idp_]; if (variableTimeStep()) { dRl += Avar(si, nActiveSpecies_+2)*dphi[iddeltaT_]; } } else { dRl = dphi[i]; } } else { for (label j=0; j<completeSpaceSize(); j++) { dRl += Avar(i, j)*dphi[j]; } } eps2 += sqr((dR[i]-dRl)/scaleFactorV[i]); } eps2 = sqrt(eps2); if (eps2 > tolerance()) { return false; } else { // if the solution is in the ellipsoid of accuracy return true; } }
/** Purpose ------- ZCGEQRSV solves the least squares problem min || A*X - B ||, where A is an M-by-N matrix and X and B are M-by-NRHS matrices. ZCGEQRSV first attempts to factorize the matrix in complex SINGLE PRECISION and use this factorization within an iterative refinement procedure to produce a solution with complex DOUBLE PRECISION norm-wise backward error quality (see below). If the approach fails the method switches to a complex DOUBLE PRECISION factorization and solve. The iterative refinement is not going to be a winning strategy if the ratio complex SINGLE PRECISION performance over complex DOUBLE PRECISION performance is too small. A reasonable strategy should take the number of right-hand sides and the size of the matrix into account. This might be done with a call to ILAENV in the future. Up to now, we always try iterative refinement. The iterative refinement process is stopped if ITER > ITERMAX or for all the RHS we have: RNRM < SQRT(N)*XNRM*ANRM*EPS*BWDMAX where o ITER is the number of the current iteration in the iterative refinement process o RNRM is the infinity-norm of the residual o XNRM is the infinity-norm of the solution o ANRM is the infinity-operator-norm of the matrix A o EPS is the machine epsilon returned by DLAMCH('Epsilon') The value ITERMAX and BWDMAX are fixed to 30 and 1.0D+00 respectively. Arguments --------- @param[in] m INTEGER The number of rows of the matrix A. M >= 0. @param[in] n INTEGER The number of columns of the matrix A. M >= N >= 0. @param[in] nrhs INTEGER The number of right hand sides, i.e., the number of columns of the matrix B. NRHS >= 0. @param[in,out] dA COMPLEX_16 array on the GPU, dimension (LDDA,N) On entry, the M-by-N coefficient matrix A. On exit, if iterative refinement has been successfully used (info.EQ.0 and ITER.GE.0, see description below), A is unchanged. If double precision factorization has been used (info.EQ.0 and ITER.LT.0, see description below), then the array dA contains the QR factorization of A as returned by function DGEQRF_GPU. @param[in] ldda INTEGER The leading dimension of the array dA. LDDA >= max(1,M). @param[in,out] dB COMPLEX_16 array on the GPU, dimension (LDDB,NRHS) The M-by-NRHS right hand side matrix B. May be overwritten (e.g., if refinement fails). @param[in] lddb INTEGER The leading dimension of the array dB. LDDB >= max(1,M). @param[out] dX COMPLEX_16 array on the GPU, dimension (LDDX,NRHS) If info = 0, the N-by-NRHS solution matrix X. @param[in] lddx INTEGER The leading dimension of the array dX. LDDX >= max(1,N). @param[out] iter INTEGER - < 0: iterative refinement has failed, double precision factorization has been performed + -1 : the routine fell back to full precision for implementation- or machine-specific reasons + -2 : narrowing the precision induced an overflow, the routine fell back to full precision + -3 : failure of SGEQRF + -31: stop the iterative refinement after the 30th iteration - > 0: iterative refinement has been successfully used. Returns the number of iterations @param[out] info INTEGER - = 0: successful exit - < 0: if info = -i, the i-th argument had an illegal value @ingroup magma_zgels_driver ********************************************************************/ extern "C" magma_int_t magma_zcgeqrsv_gpu( magma_int_t m, magma_int_t n, magma_int_t nrhs, magmaDoubleComplex_ptr dA, magma_int_t ldda, magmaDoubleComplex_ptr dB, magma_int_t lddb, magmaDoubleComplex_ptr dX, magma_int_t lddx, magma_int_t *iter, magma_int_t *info) { #define dB(i,j) (dB + (i) + (j)*lddb) #define dX(i,j) (dX + (i) + (j)*lddx) #define dR(i,j) (dR + (i) + (j)*lddr) #define dSX(i,j) (dSX + (i) + (j)*lddsx) magmaDoubleComplex c_neg_one = MAGMA_Z_NEG_ONE; magmaDoubleComplex c_one = MAGMA_Z_ONE; magma_int_t ione = 1; magmaDoubleComplex *hworkd; magmaFloatComplex *hworks; magmaDoubleComplex *tau; magmaFloatComplex *stau; magmaDoubleComplex_ptr dworkd; magmaFloatComplex_ptr dworks; magmaDoubleComplex_ptr dR, dT; magmaFloatComplex_ptr dSA, dSX, dST; magmaDoubleComplex Xnrmv, Rnrmv; double Anrm, Xnrm, Rnrm, cte, eps; magma_int_t i, j, iiter, lddsa, lddsx, lddr, nb, lhwork, minmn, size, ldworkd; /* Check arguments */ *iter = 0; *info = 0; if ( m < 0 ) *info = -1; else if ( n < 0 || n > m ) *info = -2; else if ( nrhs < 0 ) *info = -3; else if ( ldda < max(1,m)) *info = -5; else if ( lddb < max(1,m)) *info = -7; else if ( lddx < max(1,n)) *info = -9; if (*info != 0) { magma_xerbla( __func__, -(*info) ); return *info; } if ( m == 0 || n == 0 || nrhs == 0 ) return *info; nb = magma_get_cgeqrf_nb(m); minmn= min(m, n); /* dSX contains both B and X, so must be max(m or lddb,n). */ lddsa = ldda; lddsx = max(lddb,n); lddr = lddb; /* * Allocate temporary buffers */ /* dworks(dSA + dSX + dST) */ size = lddsa*n + lddsx*nrhs + ( 2*minmn + ((n+31)/32)*32 )*nb; if (MAGMA_SUCCESS != magma_cmalloc( &dworks, size )) { fprintf(stderr, "Allocation of dworks failed (%d)\n", (int) size); *info = MAGMA_ERR_DEVICE_ALLOC; return *info; } dSA = dworks; dSX = dSA + lddsa*n; dST = dSX + lddsx*nrhs; /* dworkd(dR) = lddr*nrhs */ ldworkd = lddr*nrhs; if (MAGMA_SUCCESS != magma_zmalloc( &dworkd, ldworkd )) { magma_free( dworks ); fprintf(stderr, "Allocation of dworkd failed\n"); *info = MAGMA_ERR_DEVICE_ALLOC; return *info; } dR = dworkd; /* hworks(workspace for cgeqrs + stau) = min(m,n) + lhworks */ lhwork = (m - n + nb)*(nrhs + nb) + nrhs*nb; size = lhwork + minmn; magma_cmalloc_cpu( &hworks, size ); if ( hworks == NULL ) { magma_free( dworks ); magma_free( dworkd ); fprintf(stderr, "Allocation of hworks failed\n"); *info = MAGMA_ERR_HOST_ALLOC; return *info; } stau = hworks + lhwork; eps = lapackf77_dlamch("Epsilon"); Anrm = magmablas_zlange(MagmaInfNorm, m, n, dA, ldda, (double*)dworkd ); cte = Anrm * eps * pow((double)n, 0.5) * BWDMAX; /* * Convert to single precision */ magmablas_zlag2c( m, nrhs, dB, lddb, dSX, lddsx, info ); if (*info != 0) { *iter = -2; goto FALLBACK; } magmablas_zlag2c( m, n, dA, ldda, dSA, lddsa, info ); if (*info != 0) { *iter = -2; goto FALLBACK; } // factor dSA in single precision magma_cgeqrf_gpu( m, n, dSA, lddsa, stau, dST, info ); if (*info != 0) { *iter = -3; goto FALLBACK; } // solve dSA*dSX = dB in single precision magma_cgeqrs_gpu( m, n, nrhs, dSA, lddsa, stau, dST, dSX, lddsx, hworks, lhwork, info ); if (*info != 0) { *iter = -3; goto FALLBACK; } // residual dR = dB - dA*dX in double precision magmablas_clag2z( n, nrhs, dSX, lddsx, dX, lddx, info ); magmablas_zlacpy( MagmaUpperLower, m, nrhs, dB, lddb, dR, lddr ); if ( nrhs == 1 ) { magma_zgemv( MagmaNoTrans, m, n, c_neg_one, dA, ldda, dX, 1, c_one, dR, 1 ); } else { magma_zgemm( MagmaNoTrans, MagmaNoTrans, m, nrhs, n, c_neg_one, dA, ldda, dX, lddx, c_one, dR, lddr ); } // TODO: use MAGMA_Z_ABS( dX(i,j) ) instead of zlange? for( j=0; j < nrhs; j++ ) { i = magma_izamax( n, dX(0,j), 1) - 1; magma_zgetmatrix( 1, 1, dX(i,j), 1, &Xnrmv, 1 ); Xnrm = lapackf77_zlange( "F", &ione, &ione, &Xnrmv, &ione, NULL ); i = magma_izamax ( m, dR(0,j), 1 ) - 1; magma_zgetmatrix( 1, 1, dR(i,j), 1, &Rnrmv, 1 ); Rnrm = lapackf77_zlange( "F", &ione, &ione, &Rnrmv, &ione, NULL ); if ( Rnrm > Xnrm*cte ) { goto REFINEMENT; } } *iter = 0; /* Free workspaces */ magma_free( dworks ); magma_free( dworkd ); magma_free_cpu( hworks ); return *info; REFINEMENT: /* TODO: this iterative refinement algorithm works only for compatibile * systems (B in colspan of A). * See Matrix Computations (3rd ed) p. 267 for correct algorithm. */ for( iiter=1; iiter < ITERMAX; ) { *info = 0; // convert residual dR to single precision dSX magmablas_zlag2c( m, nrhs, dR, lddr, dSX, lddsx, info ); if (*info != 0) { *iter = -2; goto FALLBACK; } // solve dSA*dSX = R in single precision magma_cgeqrs_gpu( m, n, nrhs, dSA, lddsa, stau, dST, dSX, lddsx, hworks, lhwork, info ); if (*info != 0) { *iter = -3; goto FALLBACK; } // Add correction and setup residual // dX += dSX [including conversion] --and-- // dR[1:n] = dB[1:n] (only n rows, not whole m rows! -- useless if m > n) for( j=0; j < nrhs; j++ ) { magmablas_zcaxpycp( n, dSX(0,j), dX(0,j), dB(0,j), dR(0,j) ); } // dR = dB (whole m rows) magmablas_zlacpy( MagmaUpperLower, m, nrhs, dB, lddb, dR, lddr ); // residual dR = dB - dA*dX in double precision if ( nrhs == 1 ) { magma_zgemv( MagmaNoTrans, m, n, c_neg_one, dA, ldda, dX, 1, c_one, dR, 1 ); } else { magma_zgemm( MagmaNoTrans, MagmaNoTrans, m, nrhs, n, c_neg_one, dA, ldda, dX, lddx, c_one, dR, lddr ); } /* Check whether the nrhs normwise backward errors satisfy the * stopping criterion. If yes, set ITER=IITER > 0 and return. */ for( j=0; j < nrhs; j++ ) { i = magma_izamax( n, dX(0,j), 1) - 1; magma_zgetmatrix( 1, 1, dX(i,j), 1, &Xnrmv, 1 ); Xnrm = lapackf77_zlange( "F", &ione, &ione, &Xnrmv, &ione, NULL ); i = magma_izamax ( m, dR(0,j), 1 ) - 1; magma_zgetmatrix( 1, 1, dR(i,j), 1, &Rnrmv, 1 ); Rnrm = lapackf77_zlange( "F", &ione, &ione, &Rnrmv, &ione, NULL ); if ( Rnrm > Xnrm*cte ) { goto L20; } } /* If we are here, the nrhs normwise backward errors satisfy * the stopping criterion, we are good to exit. */ *iter = iiter; /* Free workspaces */ magma_free( dworks ); magma_free( dworkd ); magma_free_cpu( hworks ); return *info; L20: iiter++; } /* If we are at this place of the code, this is because we have * performed ITER=ITERMAX iterations and never satisified the * stopping criterion. Set up the ITER flag accordingly and follow * up on double precision routine. */ *iter = -ITERMAX - 1; FALLBACK: /* Single-precision iterative refinement failed to converge to a * satisfactory solution, so we resort to double precision. */ magma_free( dworks ); magma_free_cpu( hworks ); /* * Allocate temporary buffers */ /* dworkd = dT for zgeqrf */ nb = magma_get_zgeqrf_nb( m ); size = (2*min(m, n) + (n+31)/32*32 )*nb; if ( size > ldworkd ) { magma_free( dworkd ); if (MAGMA_SUCCESS != magma_zmalloc( &dworkd, size )) { fprintf(stderr, "Allocation of dworkd2 failed\n"); *info = MAGMA_ERR_DEVICE_ALLOC; return *info; } } dT = dworkd; /* hworkd(dtau + workspace for zgeqrs) = min(m,n) + lhwork */ size = lhwork + minmn; magma_zmalloc_cpu( &hworkd, size ); if ( hworkd == NULL ) { magma_free( dworkd ); fprintf(stderr, "Allocation of hworkd2 failed\n"); *info = MAGMA_ERR_HOST_ALLOC; return *info; } tau = hworkd + lhwork; magma_zgeqrf_gpu( m, n, dA, ldda, tau, dT, info ); if (*info == 0) { // if m > n, then dB won't fit in dX, so solve with dB and copy n rows to dX magma_zgeqrs_gpu( m, n, nrhs, dA, ldda, tau, dT, dB, lddb, hworkd, lhwork, info ); magmablas_zlacpy( MagmaUpperLower, n, nrhs, dB, lddb, dX, lddx ); } magma_free( dworkd ); magma_free_cpu( hworkd ); return *info; }
rspfSensorModelTuple::IntersectStatus rspfSensorModelTuple:: intersect(const DptSet_t obs, rspfEcefPoint& pt, NEWMAT::Matrix& covMat) const { IntersectStatus opOK = OP_FAIL; bool covOK = true; bool epOK; rspf_int32 nImages = (rspf_int32)obs.size(); NEWMAT::SymmetricMatrix N(3); NEWMAT::SymmetricMatrix BtWB(3); NEWMAT::Matrix Ni(3,3); NEWMAT::ColumnVector C(3); NEWMAT::ColumnVector BtWF(3); NEWMAT::ColumnVector F(2); NEWMAT::ColumnVector dR(3); NEWMAT::Matrix B(2,3); NEWMAT::SymmetricMatrix W(2); rspfGpt estG; theImages[0]->lineSampleHeightToWorld(obs[0], rspf::nan(), estG); for (int iter=0; iter<3; iter++) { N = 0.0; C = 0.0; for (int i=0; i<nImages; i++) { rspfDpt resid; if (!getGroundObsEqComponents(i, iter, obs[i], estG, resid, B, W)) covOK = false; F[0] = resid.x; F[1] = resid.y; BtWF << B.t() * W * F; BtWB << B.t() * W * B; C += BtWF; N += BtWB; } Ni = invert(N); dR = Ni * C; rspfEcefPoint estECF(estG); for (rspf_int32 i=0; i<3; i++) estECF[i] += dR[i]; rspfGpt upd(estECF); estG = upd; if (traceDebug()) { rspfNotify(rspfNotifyLevel_DEBUG) << "DEBUG: intersect:\n" << " iteration:\n" << iter << " C:\n" << C << " Ni:\n" << Ni << " dR:\n" << dR <<std::endl; } } // iterative loop rspfEcefPoint finalEst(estG); pt = finalEst; if (covOK) { covMat = Ni; epOK = true; } else epOK = false; if (epOK) opOK = OP_SUCCESS; else opOK = ERROR_PROP_FAIL; return opOK; }
extern "C" magma_int_t magma_zcgeqrsv_gpu(magma_int_t m, magma_int_t n, magma_int_t nrhs, magmaDoubleComplex *dA, magma_int_t ldda, magmaDoubleComplex *dB, magma_int_t lddb, magmaDoubleComplex *dX, magma_int_t lddx, magma_int_t *iter, magma_int_t *info) { /* -- MAGMA (version 1.4.0) -- Univ. of Tennessee, Knoxville Univ. of California, Berkeley Univ. of Colorado, Denver August 2013 Purpose ======= ZCGEQRSV solves the least squares problem min || A*X - B ||, where A is an M-by-N matrix and X and B are M-by-NRHS matrices. ZCGEQRSV first attempts to factorize the matrix in complex SINGLE PRECISION and use this factorization within an iterative refinement procedure to produce a solution with complex DOUBLE PRECISION norm-wise backward error quality (see below). If the approach fails the method switches to a complex DOUBLE PRECISION factorization and solve. The iterative refinement is not going to be a winning strategy if the ratio complex SINGLE PRECISION performance over complex DOUBLE PRECISION performance is too small. A reasonable strategy should take the number of right-hand sides and the size of the matrix into account. This might be done with a call to ILAENV in the future. Up to now, we always try iterative refinement. The iterative refinement process is stopped if ITER > ITERMAX or for all the RHS we have: RNRM < SQRT(N)*XNRM*ANRM*EPS*BWDMAX where o ITER is the number of the current iteration in the iterative refinement process o RNRM is the infinity-norm of the residual o XNRM is the infinity-norm of the solution o ANRM is the infinity-operator-norm of the matrix A o EPS is the machine epsilon returned by DLAMCH('Epsilon') The value ITERMAX and BWDMAX are fixed to 30 and 1.0D+00 respectively. Arguments ========= M (input) INTEGER The number of rows of the matrix A. M >= 0. N (input) INTEGER The number of columns of the matrix A. M >= N >= 0. NRHS (input) INTEGER The number of right hand sides, i.e., the number of columns of the matrix B. NRHS >= 0. dA (input or input/output) COMPLEX_16 array on the GPU, dimension (LDDA,N) On entry, the M-by-N coefficient matrix A. On exit, if iterative refinement has been successfully used (info.EQ.0 and ITER.GE.0, see description below), A is unchanged. If double precision factorization has been used (info.EQ.0 and ITER.LT.0, see description below), then the array dA contains the QR factorization of A as returned by function DGEQRF_GPU. LDDA (input) INTEGER The leading dimension of the array dA. LDDA >= max(1,M). dB (input or input/output) COMPLEX_16 array on the GPU, dimension (LDDB,NRHS) The M-by-NRHS right hand side matrix B. May be overwritten (e.g., if refinement fails). LDDB (input) INTEGER The leading dimension of the array dB. LDDB >= max(1,M). dX (output) COMPLEX_16 array on the GPU, dimension (LDDX,NRHS) If info = 0, the N-by-NRHS solution matrix X. LDDX (input) INTEGER The leading dimension of the array dX. LDDX >= max(1,N). ITER (output) INTEGER < 0: iterative refinement has failed, double precision factorization has been performed -1 : the routine fell back to full precision for implementation- or machine-specific reasons -2 : narrowing the precision induced an overflow, the routine fell back to full precision -3 : failure of SGEQRF -31: stop the iterative refinement after the 30th iteration > 0: iterative refinement has been successfully used. Returns the number of iterations INFO (output) INTEGER = 0: successful exit < 0: if info = -i, the i-th argument had an illegal value ===================================================================== */ #define dB(i,j) (dB + (i) + (j)*lddb) #define dX(i,j) (dX + (i) + (j)*lddx) #define dR(i,j) (dR + (i) + (j)*lddr) #define dSX(i,j) (dSX + (i) + (j)*lddsx) magmaDoubleComplex c_neg_one = MAGMA_Z_NEG_ONE; magmaDoubleComplex c_one = MAGMA_Z_ONE; magma_int_t ione = 1; magmaDoubleComplex *dworkd, *hworkd; magmaFloatComplex *dworks, *hworks; magmaDoubleComplex *dR, *tau, *dT; magmaFloatComplex *dSA, *dSX, *dST, *stau; magmaDoubleComplex Xnrmv, Rnrmv; double Anrm, Xnrm, Rnrm, cte, eps; magma_int_t i, j, iiter, lddsa, lddsx, lddr, nb, lhwork, minmn, size, ldworkd; /* Check arguments */ *iter = 0; *info = 0; if ( m < 0 ) *info = -1; else if ( n < 0 || n > m ) *info = -2; else if ( nrhs < 0 ) *info = -3; else if ( ldda < max(1,m)) *info = -5; else if ( lddb < max(1,m)) *info = -7; else if ( lddx < max(1,n)) *info = -9; if (*info != 0) { magma_xerbla( __func__, -(*info) ); return *info; } if ( m == 0 || n == 0 || nrhs == 0 ) return *info; nb = magma_get_cgeqrf_nb(m); minmn= min(m, n); /* dSX contains both B and X, so must be max(m or lddb,n). */ lddsa = ldda; lddsx = max(lddb,n); lddr = lddb; /* * Allocate temporary buffers */ /* dworks(dSA + dSX + dST) */ size = lddsa*n + lddsx*nrhs + ( 2*minmn + ((n+31)/32)*32 )*nb; if (MAGMA_SUCCESS != magma_cmalloc( &dworks, size )) { fprintf(stderr, "Allocation of dworks failed (%d)\n", (int) size); *info = MAGMA_ERR_DEVICE_ALLOC; return *info; } dSA = dworks; dSX = dSA + lddsa*n; dST = dSX + lddsx*nrhs; /* dworkd(dR) = lddr*nrhs */ ldworkd = lddr*nrhs; if (MAGMA_SUCCESS != magma_zmalloc( &dworkd, ldworkd )) { magma_free( dworks ); fprintf(stderr, "Allocation of dworkd failed\n"); *info = MAGMA_ERR_DEVICE_ALLOC; return *info; } dR = dworkd; /* hworks(workspace for cgeqrs + stau) = min(m,n) + lhworks */ lhwork = (m - n + nb)*(nrhs + nb) + nrhs*nb; size = lhwork + minmn; magma_cmalloc_cpu( &hworks, size ); if ( hworks == NULL ) { magma_free( dworks ); magma_free( dworkd ); fprintf(stderr, "Allocation of hworks failed\n"); *info = MAGMA_ERR_HOST_ALLOC; return *info; } stau = hworks + lhwork; eps = lapackf77_dlamch("Epsilon"); Anrm = magmablas_zlange('I', m, n, dA, ldda, (double*)dworkd ); cte = Anrm * eps * pow((double)n, 0.5) * BWDMAX; /* * Convert to single precision */ magmablas_zlag2c( m, nrhs, dB, lddb, dSX, lddsx, info ); if (*info != 0) { *iter = -2; goto FALLBACK; } magmablas_zlag2c( m, n, dA, ldda, dSA, lddsa, info ); if (*info != 0) { *iter = -2; goto FALLBACK; } // factor dSA in single precision magma_cgeqrf_gpu( m, n, dSA, lddsa, stau, dST, info ); if (*info != 0) { *iter = -3; goto FALLBACK; } // solve dSA*dSX = dB in single precision magma_cgeqrs_gpu( m, n, nrhs, dSA, lddsa, stau, dST, dSX, lddsx, hworks, lhwork, info ); if (*info != 0) { *iter = -3; goto FALLBACK; } // residual dR = dB - dA*dX in double precision magmablas_clag2z( n, nrhs, dSX, lddsx, dX, lddx, info ); magmablas_zlacpy( MagmaUpperLower, m, nrhs, dB, lddb, dR, lddr ); if ( nrhs == 1 ) { magma_zgemv( MagmaNoTrans, m, n, c_neg_one, dA, ldda, dX, 1, c_one, dR, 1 ); } else { magma_zgemm( MagmaNoTrans, MagmaNoTrans, m, nrhs, n, c_neg_one, dA, ldda, dX, lddx, c_one, dR, lddr ); } // TODO: use MAGMA_Z_ABS( dX(i,j) ) instead of zlange? for( j=0; j < nrhs; j++ ) { i = magma_izamax( n, dX(0,j), 1) - 1; magma_zgetmatrix( 1, 1, dX(i,j), 1, &Xnrmv, 1 ); Xnrm = lapackf77_zlange( "F", &ione, &ione, &Xnrmv, &ione, NULL ); i = magma_izamax ( m, dR(0,j), 1 ) - 1; magma_zgetmatrix( 1, 1, dR(i,j), 1, &Rnrmv, 1 ); Rnrm = lapackf77_zlange( "F", &ione, &ione, &Rnrmv, &ione, NULL ); if ( Rnrm > Xnrm*cte ) { goto REFINEMENT; } } *iter = 0; /* Free workspaces */ magma_free( dworks ); magma_free( dworkd ); magma_free_cpu( hworks ); return *info; REFINEMENT: /* TODO: this iterative refinement algorithm works only for compatibile * systems (B in colspan of A). * See Matrix Computations (3rd ed) p. 267 for correct algorithm. */ for( iiter=1; iiter < ITERMAX; ) { *info = 0; // convert residual dR to single precision dSX magmablas_zlag2c( m, nrhs, dR, lddr, dSX, lddsx, info ); if (*info != 0) { *iter = -2; goto FALLBACK; } // solve dSA*dSX = R in single precision magma_cgeqrs_gpu( m, n, nrhs, dSA, lddsa, stau, dST, dSX, lddsx, hworks, lhwork, info ); if (*info != 0) { *iter = -3; goto FALLBACK; } // Add correction and setup residual // dX += dSX [including conversion] --and-- // dR[1:n] = dB[1:n] (only n rows, not whole m rows! -- useless if m > n) for( j=0; j < nrhs; j++ ) { magmablas_zcaxpycp( n, dSX(0,j), dX(0,j), dB(0,j), dR(0,j) ); } // dR = dB (whole m rows) magmablas_zlacpy( MagmaUpperLower, m, nrhs, dB, lddb, dR, lddr ); // residual dR = dB - dA*dX in double precision if ( nrhs == 1 ) { magma_zgemv( MagmaNoTrans, m, n, c_neg_one, dA, ldda, dX, 1, c_one, dR, 1 ); } else { magma_zgemm( MagmaNoTrans, MagmaNoTrans, m, nrhs, n, c_neg_one, dA, ldda, dX, lddx, c_one, dR, lddr ); } /* Check whether the nrhs normwise backward errors satisfy the * stopping criterion. If yes, set ITER=IITER>0 and return. */ for( j=0; j < nrhs; j++ ) { i = magma_izamax( n, dX(0,j), 1) - 1; magma_zgetmatrix( 1, 1, dX(i,j), 1, &Xnrmv, 1 ); Xnrm = lapackf77_zlange( "F", &ione, &ione, &Xnrmv, &ione, NULL ); i = magma_izamax ( m, dR(0,j), 1 ) - 1; magma_zgetmatrix( 1, 1, dR(i,j), 1, &Rnrmv, 1 ); Rnrm = lapackf77_zlange( "F", &ione, &ione, &Rnrmv, &ione, NULL ); if ( Rnrm > Xnrm*cte ) { goto L20; } } /* If we are here, the nrhs normwise backward errors satisfy * the stopping criterion, we are good to exit. */ *iter = iiter; /* Free workspaces */ magma_free( dworks ); magma_free( dworkd ); magma_free_cpu( hworks ); return *info; L20: iiter++; } /* If we are at this place of the code, this is because we have * performed ITER=ITERMAX iterations and never satisified the * stopping criterion. Set up the ITER flag accordingly and follow * up on double precision routine. */ *iter = -ITERMAX - 1; FALLBACK: /* Single-precision iterative refinement failed to converge to a * satisfactory solution, so we resort to double precision. */ magma_free( dworks ); magma_free_cpu( hworks ); /* * Allocate temporary buffers */ /* dworkd = dT for zgeqrf */ nb = magma_get_zgeqrf_nb( m ); size = (2*min(m, n) + (n+31)/32*32 )*nb; if ( size > ldworkd ) { magma_free( dworkd ); if (MAGMA_SUCCESS != magma_zmalloc( &dworkd, size )) { fprintf(stderr, "Allocation of dworkd2 failed\n"); *info = MAGMA_ERR_DEVICE_ALLOC; return *info; } } dT = dworkd; /* hworkd(dtau + workspace for zgeqrs) = min(m,n) + lhwork */ size = lhwork + minmn; magma_zmalloc_cpu( &hworkd, size ); if ( hworkd == NULL ) { magma_free( dworkd ); fprintf(stderr, "Allocation of hworkd2 failed\n"); *info = MAGMA_ERR_HOST_ALLOC; return *info; } tau = hworkd + lhwork; magma_zgeqrf_gpu( m, n, dA, ldda, tau, dT, info ); if (*info == 0) { // if m > n, then dB won't fit in dX, so solve with dB and copy n rows to dX magma_zgeqrs_gpu( m, n, nrhs, dA, ldda, tau, dT, dB, lddb, hworkd, lhwork, info ); magmablas_zlacpy( MagmaUpperLower, n, nrhs, dB, lddb, dX, lddx ); } magma_free( dworkd ); magma_free_cpu( hworkd ); return *info; }
void Histogrammer::fill(Selector* selector, EventPick* selEvent, EventTree* tree, double weight){ // sanity check: PU weight hists["PUweight"]->Fill(weight); // 2d photon candidate histograms //std::cout << "here0" << std::endl; if(selEvent->PhotonsPresel.size()>0){ int candArrInd = -1; int candInd = -1; for(int phoItmp = 0; phoItmp < selEvent->PhotonsPresel.size(); phoItmp++){ if((int)selEvent->PhoPassChHadIso[phoItmp] + (int)selEvent->PhoPassPhoIso[phoItmp]+ (int)selEvent->PhoPassSih[phoItmp] >= 1){ // at least 1 cut passed. candArrInd = selEvent->PhotonsPresel[phoItmp]; candInd = phoItmp; break; } } //std::cout << "here01" << std::endl; if(candInd >= 0 && selEvent->PhoPassPhoIso[candInd]){ hists2d["photon1_Sigma_ChIso"]->Fill(tree->phoSigmaIEtaIEta_->at(candArrInd),selector->Pho03ChHadIso[candArrInd], weight); hists2d["photon1_Sigma_ChSCRIso"]->Fill(tree->phoSigmaIEtaIEta_->at(candArrInd),selector->Pho03ChHadSCRIso[candArrInd], weight); hists2d["photon1_Sigma_Et"]->Fill(tree->phoSigmaIEtaIEta_->at(candArrInd), tree->phoEt_->at(candArrInd), weight); double phoEt = tree->phoEt_->at(candArrInd); if(phoEt>=25 && phoEt<35){ hists2d["photon1_25_35_Sigma_ChSCRIso"]->Fill(tree->phoSigmaIEtaIEta_->at(candArrInd),selector->Pho03ChHadSCRIso[candArrInd], weight); } if(phoEt>=35 && phoEt<45){ hists2d["photon1_35_45_Sigma_ChSCRIso"]->Fill(tree->phoSigmaIEtaIEta_->at(candArrInd),selector->Pho03ChHadSCRIso[candArrInd], weight); } if(phoEt>=45 && phoEt<60){ hists2d["photon1_45_60_Sigma_ChSCRIso"]->Fill(tree->phoSigmaIEtaIEta_->at(candArrInd),selector->Pho03ChHadSCRIso[candArrInd], weight); } if(phoEt>=60){ hists2d["photon1_60_up_Sigma_ChSCRIso"]->Fill(tree->phoSigmaIEtaIEta_->at(candArrInd),selector->Pho03ChHadSCRIso[candArrInd], weight); } } if(candInd >= 0 && selEvent->PhoPassChHadIso[candInd]){ hists2d["photon1_Sigma_PhoIso"]->Fill(tree->phoSigmaIEtaIEta_->at(candArrInd),selector->Pho03PhoIso[candArrInd], weight); hists2d["photon1_Sigma_PhoSCRIso"]->Fill(tree->phoSigmaIEtaIEta_->at(candArrInd),selector->Pho03PhoSCRIso[candArrInd], weight); } } //std::cout << "here1" << std::endl; // full event selection histograms if(!selEvent->passAll) return; // mc category if( tree->isData_ == 0 ){ int EleP = 0; int EleM = 0; int MuP = 0; int MuM = 0; int TauP = 0; int TauM = 0; for( int mcI = 0; mcI < tree->nMC_; ++mcI){ if(abs(tree->mcMomPID->at(mcI))==24 && tree->mcParentage->at(mcI)==10){ if( tree->mcPID->at(mcI) == 11 ) EleP = 1; if( tree->mcPID->at(mcI) == -11 ) EleM = 1; if( tree->mcPID->at(mcI) == 13 ) MuP = 1; if( tree->mcPID->at(mcI) == -13 ) MuM = 1; if( tree->mcPID->at(mcI) == 15) TauP = 1; if( tree->mcPID->at(mcI) == -15) TauM = 1; } } hists["MCcategory"]->Fill(1.0, weight); // Total int nEle = EleP + EleM; int nMu = MuP + MuM; int nTau = TauP + TauM; if( nEle + nMu + nTau == 0) hists["MCcategory"]->Fill(2.0, weight); // All Had if( nEle + nMu + nTau == 1) hists["MCcategory"]->Fill(3.0, weight); // Single Lepton if( nEle + nMu + nTau == 2) hists["MCcategory"]->Fill(4.0, weight); // Di Lepton if(nEle==1 && nMu==0 && nTau==0) hists["MCcategory"]->Fill(6.0, weight); // 1 e if(nEle==2 && nMu==0 && nTau==0) hists["MCcategory"]->Fill(7.0, weight); // 2 e if(nEle==0 && nMu==1 && nTau==0) hists["MCcategory"]->Fill(8.0, weight); // 1 mu if(nEle==0 && nMu==2 && nTau==0) hists["MCcategory"]->Fill(9.0, weight); // 2 mu if(nEle==0 && nMu==0 && nTau==1) hists["MCcategory"]->Fill(10.0, weight); // 1 tau if(nEle==0 && nMu==0 && nTau==2) hists["MCcategory"]->Fill(11.0, weight); // 2 tau //std::cout << "EleP " << EleP << " EleM " << EleM << " MuP " << MuP << " MuM " << MuM << " TauP " << TauP << " TauM " << TauM << std::endl; } double MTW = 0.0; // muons if( selEvent->Muons.size() > 0 ){ int ind = selEvent->Muons[0]; hists["mu1Pt"]->Fill( tree->muPt_->at(ind), weight ); hists["mu1Eta"]->Fill( tree->muEta_->at(ind), weight ); hists["mu1RelIso"]->Fill( selector->Mu04RelIso[ind], weight ); MTW = TMath::Sqrt(2*(tree->muPt_->at(ind))*(tree->pfMET_)*( 1.0 - TMath::Cos(dR(0.0,tree->muPhi_->at(ind),0.0,tree->pfMETPhi_)) )); hists["WtransMass"]->Fill( MTW, weight ); } // electrons if( selEvent->Electrons.size() > 0 ){ int ind = selEvent->Electrons[0]; hists["ele1Pt"]->Fill( tree->elePt_->at(ind), weight ); hists["ele1Eta"]->Fill( tree->eleSCEta_->at(ind), weight ); hists["ele1RelIso"]->Fill( selector->Ele03RelIso[ind], weight ); hists["ele1MVA"]->Fill( tree->eleIDMVATrig_->at(ind), weight ); hists["ele1D0"]->Fill( tree->eleD0_->at(ind), weight ); hists["ele1Dz"]->Fill( tree->eleDz_->at(ind), weight ); hists["ele1EoverP"]->Fill( tree->eleEoverP_->at(ind), weight ); hists["ele1sigmaIetaIeta"]->Fill( tree->eleSigmaIEtaIEta_->at(ind), weight ); hists["ele1MissHits"]->Fill( tree->eleMissHits_->at(ind), weight ); hists["ele1DrJet"]->Fill( minDr(tree->eleSCEta_->at(ind), tree->elePhi_->at(ind), selEvent->Jets, tree->jetEta_, tree->jetPhi_), weight ); if( tree->isData_ == 0 ){ if( tree->eleGenIndex_->at(ind) >= 0 ){ hists["ele1MotherID"]->Fill( fabs(tree->eleGenMomPID_->at(ind)), weight ); //if( TMath::Abs(tree->eleGenMomPID_->at(ind)) == 11 ) hists["ele1GMotherID"]->Fill( fabs(tree->eleGenGMomPID_->at(ind)), weight ); } else hists["ele1MotherID"]->Fill( 0.0, weight ); } if( selEvent->Electrons.size() > 1 ){ int ind2 = selEvent->Electrons[1]; hists["ele2Pt"]->Fill( tree->elePt_->at(ind2), weight ); hists["ele2RelIso"]->Fill( selector->Ele03RelIso[ind2], weight ); TLorentzVector ele1; TLorentzVector ele2; ele1.SetPtEtaPhiM(tree->elePt_->at(ind), tree->eleSCEta_->at(ind), tree->elePhi_->at(ind), 0.0); ele2.SetPtEtaPhiM(tree->elePt_->at(ind2), tree->eleSCEta_->at(ind2), tree->elePhi_->at(ind2), 0.0); hists["ele1ele2Mass"]->Fill( (ele1+ele2).M(), weight); } if(selEvent->Photons.size() > 0){ TLorentzVector ele; TLorentzVector pho; int phoi = selEvent->Photons[0]; ele.SetPtEtaPhiM(tree->elePt_->at(ind), tree->eleSCEta_->at(ind), tree->elePhi_->at(ind), 0.0); pho.SetPtEtaPhiM(tree->phoEt_->at(phoi), tree->phoEta_->at(phoi), tree->phoPhi_->at(phoi), 0.0); hists["ele1pho1Mass"]->Fill( (ele+pho).M(), weight); } } //std::cout << "here2" << std::endl; // Loose Electrons (if any) if(selEvent->ElectronsLoose.size() > 0 && tree->isData_ == 0){ int eleInd = selEvent->ElectronsLoose[0]; double mindr = 999; for( int mcI = 0; mcI < tree->nMC_; ++mcI){ if( tree->mcPID->at(mcI) == 22 ){ double thisdr = dR(tree->mcEta->at(mcI), tree->mcPhi->at(mcI), tree->eleSCEta_->at(eleInd), tree->elePhi_->at(eleInd)); if( mindr > thisdr ) mindr = thisdr; } } hists["looseEleDrGenPho"]->Fill(mindr, weight); } //std::cout << "here3" << std::endl; // photons hists["nPhotons"]->Fill(selEvent->Photons.size(), weight); if( selEvent->Photons.size() > 0 ){ int ind = selEvent->Photons[0]; hists["photon1Et"]->Fill( tree->phoEt_->at(ind), weight ); hists["photon1Eta"]->Fill( tree->phoEta_->at(ind), weight ); hists["photon1IsConv"]->Fill( tree->phoIsConv_->at(ind), weight ); hists["photon1ChHadIso"]->Fill( selector->Pho03ChHadIso[ind], weight ); hists["photon1ChHadSCRIso"]->Fill( selector->Pho03ChHadSCRIso[ind], weight ); hists["photon1ChHadRandIso"]->Fill( selector->Pho03RandChHadIso[ind], weight ); double phoEt = tree->phoEt_->at(ind); if(phoEt>=25 && phoEt<35){ hists["photon1_25_35_ChHadRandIso"]->Fill( selector->Pho03RandChHadIso[ind], weight ); } if(phoEt>=35 && phoEt<45){ hists["photon1_35_45_ChHadRandIso"]->Fill( selector->Pho03RandChHadIso[ind], weight ); } if(phoEt>=45 && phoEt<60){ hists["photon1_45_60_ChHadRandIso"]->Fill( selector->Pho03RandChHadIso[ind], weight ); } if(phoEt>=60){ hists["photon1_60_up_ChHadRandIso"]->Fill( selector->Pho03RandChHadIso[ind], weight ); } hists["photon1NeuHadIso"]->Fill( selector->Pho03NeuHadIso[ind], weight ); hists["photon1PhoIso"]->Fill( selector->Pho03PhoIso[ind], weight ); hists["photon1PhoSCRIso"]->Fill( selector->Pho03PhoSCRIso[ind], weight ); hists["photon1PhoRandIso"]->Fill( selector->Pho03RandPhoIso[ind], weight ); hists["photon1HoverE"]->Fill( tree->phoHoverE_->at(ind), weight ); hists["photon1SigmaIEtaIEta"]->Fill( tree->phoSigmaIEtaIEta_->at(ind), weight ); hists["photon1DrElectron"]->Fill( minDr(tree->phoEta_->at(ind), tree->phoPhi_->at(ind), selEvent->Electrons, tree->eleSCEta_, tree->elePhi_), weight ); hists["photon1DrJet"]->Fill( minDr(tree->phoEta_->at(ind), tree->phoPhi_->at(ind), selector->Jets, tree->jetEta_, tree->jetPhi_), weight ); if( tree->isData_ == 0 ){ if( tree->phoGenIndex_->at(ind) >= 0 ){ hists["photon1MotherID"]->Fill( fabs(tree->phoGenMomPID_->at(ind)), weight ); //if( TMath::Abs(tree->phoGenMomPID_->at(ind)) == 22 ) hists["photon1GMotherID"]->Fill( fabs(tree->phoGenGMomPID_->at(ind)), weight ); } else { hists["photon1MotherID"]->Fill( 0.0, weight ); } // find the closest b-jet double mindr = minDrPhoB(ind, tree); int phoGen=-1; for( int mcI = 0; mcI < tree->nMC_; ++mcI){ if( tree->mcIndex->at(mcI) == tree->phoGenIndex_->at(ind) ) phoGen=mcI; } if( phoGen > 0) hists["GenPhotonEt"]->Fill(tree->mcPt->at(phoGen), weight); if(mindr<999) { hists["photon1DrMCbquark"]->Fill( mindr, weight ); } } } //std::cout << "here4" << std::endl; hists["Ht"]->Fill( calc_ht(selEvent, tree), weight ); hists["MET"]->Fill( tree->pfMET_, weight ); hists["nVtx"]->Fill( tree->nVtx_, weight ); hists["nJets"]->Fill( selEvent->Jets.size(), weight ); //std::cout << "here5" << std::endl; // jets if(selEvent->Jets.size()>=3){ TLorentzVector j1,j2,j3; int jetI; double minM3 = 99999.9; double M3maxPt = 99999.9; double M3minPt = 99999.9; double minPt = 99999.9; double M4maxPt = 99999.9; double max4Pt = 0.0; double maxPt = 0.0; double M3first = 0.0; TLorentzVector maxPtsystem; TLorentzVector phovec; phovec.SetPtEtaPhiM(0.00001,0.0,0.0,0.0); if( selEvent->Photons.size() > 0 ){ int phoi = selEvent->Photons[0]; phovec.SetPtEtaPhiM(tree->phoEt_->at(phoi), tree->phoEta_->at(phoi), tree->phoPhi_->at(phoi), 0.0); } for(int jet1I=0; jet1I < selEvent->Jets.size()-2; jet1I++){ jetI = selEvent->Jets[jet1I]; j1.SetPtEtaPhiM(tree->jetPt_->at(jetI), tree->jetEta_->at(jetI), tree->jetPhi_->at(jetI), 0.0); for(int jet2I=jet1I+1; jet2I < selEvent->Jets.size()-1; jet2I++){ jetI = selEvent->Jets[jet2I]; j2.SetPtEtaPhiM(tree->jetPt_->at(jetI), tree->jetEta_->at(jetI), tree->jetPhi_->at(jetI), 0.0); for(int jet3I=jet2I+1; jet3I < selEvent->Jets.size(); jet3I++){ jetI = selEvent->Jets[jet3I]; j3.SetPtEtaPhiM(tree->jetPt_->at(jetI), tree->jetEta_->at(jetI), tree->jetPhi_->at(jetI), 0.0); double m3 = (j1+j2+j3).M(); double totalPt = (j1+j2+j3).Pt(); if(jet1I==0 && jet2I==1 && jet3I==2) M3first = m3; if(m3 < minM3) minM3 = m3; if(minPt > totalPt){ minPt = totalPt; M3minPt = m3; } if(maxPt < totalPt){ maxPt = totalPt; M3maxPt = m3; maxPtsystem = (j1+j2+j3); } if( phovec.DrEtaPhi(j1) < 0.3 ) j1 = j1 - phovec; if( phovec.DrEtaPhi(j2) < 0.3 ) j2 = j2 - phovec; if( phovec.DrEtaPhi(j3) < 0.3) j3 = j3 - phovec; double m4 = (phovec+j1+j2+j3).M(); double total4Pt = (phovec+j1+j2+j3).Pt(); hists["M3phoMulti"]->Fill(m4, weight); if(max4Pt < total4Pt ){ max4Pt=total4Pt; M4maxPt=m4; } } } } double toppt=0.0; double antitoppt=0.0; for(int mcInd=0; mcInd<tree->nMC_; ++mcInd){ if(tree->mcPID->at(mcInd)==6) toppt = tree->mcPt->at(mcInd); if(tree->mcPID->at(mcInd)==-6) antitoppt = tree->mcPt->at(mcInd); } double maxtoppt = std::max(toppt,antitoppt); if( maxtoppt < 30 ) hists["M3_0_30"]->Fill(M3maxPt, weight); else if( maxtoppt < 100) hists["M3_30_100"]->Fill(M3maxPt, weight); else if( maxtoppt < 200) hists["M3_100_200"]->Fill(M3maxPt, weight); else if( maxtoppt < 300) hists["M3_200_300"]->Fill(M3maxPt, weight); else hists["M3_300_up"]->Fill(M3maxPt, weight); hists["M3first"]->Fill(M3first, weight); hists["M3"]->Fill(M3maxPt, weight); hists["M3minPt"]->Fill(M3minPt, weight); if( selEvent->Photons.size() > 0 ) { hists["M3pho"]->Fill(M4maxPt, weight); hists["dRpho3j"]->Fill(phovec.DrEtaPhi(maxPtsystem), weight); } hists["minM3"]->Fill(minM3, weight); hists2d["MTW_M3"]->Fill( MTW, M3maxPt, weight); } if( selEvent->Jets.size() > 0 ){ int ind = selEvent->Jets[0]; hists["jet1Pt"]->Fill( tree->jetPt_->at(ind), weight ); hists["jet1Eta"]->Fill( tree->jetEta_->at(ind), weight ); } if( selEvent->Jets.size() > 1 ){ int ind = selEvent->Jets[1]; hists["jet2Pt"]->Fill( tree->jetPt_->at(ind), weight ); hists["jet2Eta"]->Fill( tree->jetEta_->at(ind), weight ); } if( selEvent->Jets.size() > 2 ){ int ind = selEvent->Jets[2]; hists["jet3Pt"]->Fill( tree->jetPt_->at(ind), weight ); hists["jet3Eta"]->Fill( tree->jetEta_->at(ind), weight ); } if( selEvent->Jets.size() > 3 ){ int ind = selEvent->Jets[3]; hists["jet4Pt"]->Fill( tree->jetPt_->at(ind), weight ); hists["jet4Eta"]->Fill( tree->jetEta_->at(ind), weight ); } }
double Histogrammer::minDr(double myEta, double myPhi, std::vector<int> Inds, std::vector<float> *etas, std::vector<float> *phis){ int ind = minDrIndex(myEta, myPhi, Inds, etas, phis); if(ind>=0) return dR(myEta, myPhi, etas->at(ind), phis->at(ind)); else return 999.0; }