void Foam::sixDoFSolvers::CrankNicolson::solve ( bool firstIter, const vector& fGlobal, const vector& tauGlobal, scalar deltaT, scalar deltaT0 ) { // Update the linear acceleration and torque updateAcceleration(fGlobal, tauGlobal); // Correct linear velocity v() = tConstraints() & (v0() + aDamp()*deltaT*(aoc_*a() + (1 - aoc_)*a0())); // Correct angular momentum pi() = rConstraints() & (pi0() + aDamp()*deltaT*(aoc_*tau() + (1 - aoc_)*tau0())); // Correct position centreOfRotation() = centreOfRotation0() + deltaT*(voc_*v() + (1 - voc_)*v0()); // Correct orientation Tuple2<tensor, vector> Qpi = rotate(Q0(), (voc_*pi() + (1 - voc_)*pi0()), deltaT); Q() = Qpi.first(); }
void Foam::sixDoFRigidBodyMotion::updatePosition ( scalar deltaT, scalar deltaT0 ) { // First leapfrog velocity adjust and motion part, required before // force calculation if (Pstream::master()) { v() = tConstraints_ & aDamp_*(v0() + 0.5*deltaT0*a()); pi() = rConstraints_ & aDamp_*(pi0() + 0.5*deltaT0*tau()); // Leapfrog move part centreOfRotation() = centreOfRotation0() + deltaT*v(); // Leapfrog orientation adjustment Tuple2<tensor, vector> Qpi = rotate(Q0(), pi(), deltaT); Q() = Qpi.first(); pi() = rConstraints_ & Qpi.second(); } Pstream::scatter(motionState_); }
void GQR(int r, int c, double **Q, double **R){ int i,j,k; double s,s1,s2; double t1,t2; for(i=0;i<r;i++){ for(k=0;k<r;k++)Q0(i,k)=0.0; Q0(i,i)=1.0; } for (i=0;i<c;i++) for (k=i+1;k<r;k++) /* performing givens rotations to zero A[k][i] */ if (R0(k,i)!=0){ s=sqrt(R0(i,i)*R0(i,i)+R0(k,i)*R0(k,i)); s1=R0(i,i)/s; s2=R0(k,i)/s; for(j=0;j<c;j++) { t1=R0(i,j); t2=R0(k,j); R0(i,j)=s1*t1+s2*t2; R0(k,j)=-s2*t1+s1*t2; } /* actually doing givens row rotations on Q */ for(j=0;j<r;j++){ t1=Q0(j,i); t2=Q0(j,k); Q0(j,i)=s1*t1+s2*t2; Q0(j,k)=-s2*t1+s1*t2; } } }
/*********************************************************************************************** * 函数名称:void sha256_ProChunk() * 功 能:处理一个数据块(512位) ***********************************************************************************************/ void sha256_ProChunk() { short i; unsigned long t1,t2; //步骤一 for(i=0;i<64;i++) { if(0<=i&&i<=15) { } if(16<=i&&i<=63) { sha256_w[i]=Q1(sha256_w[i-2])+sha256_w[i-7]+Q0(sha256_w[i-15])+sha256_w[i-16]; } } //步骤二 sha256_a=sha256_hh[0]; sha256_b=sha256_hh[1]; sha256_c=sha256_hh[2]; sha256_d=sha256_hh[3]; sha256_e=sha256_hh[4]; sha256_f=sha256_hh[5]; sha256_g=sha256_hh[6]; sha256_h=sha256_hh[7]; //步骤三 for(i=0;i<64;i++) { t1=sha256_h+E1(sha256_e)+CH(sha256_e,sha256_f,sha256_g)+sha256_K[i]+sha256_w[i]; t2=E0(sha256_a)+MAJ(sha256_a,sha256_b,sha256_c); sha256_h=sha256_g; sha256_g=sha256_f; sha256_f=sha256_e; sha256_e=sha256_d+t1; sha256_d=sha256_c; sha256_c=sha256_b; sha256_b=sha256_a; sha256_a=t1+t2; } //步骤四 sha256_hh[0] += sha256_a; sha256_hh[1] += sha256_b; sha256_hh[2] += sha256_c; sha256_hh[3] += sha256_d; sha256_hh[4] += sha256_e; sha256_hh[5] += sha256_f; sha256_hh[6] += sha256_g; sha256_hh[7] += sha256_h; }
void ChLoadCustom::ComputeJacobian(ChState* state_x, // state position to evaluate jacobians ChStateDelta* state_w, // state speed to evaluate jacobians ChMatrix<>& mK, // result dQ/dx ChMatrix<>& mR, // result dQ/dv ChMatrix<>& mM) // result dQ/da { double Delta = 1e-8; int mrows_w = this->LoadGet_ndof_w(); int mrows_x = this->LoadGet_ndof_x(); // compute Q at current speed & position, x_0, v_0 ChVectorDynamic<> Q0(mrows_w); this->ComputeQ(state_x, state_w); // Q0 = Q(x, v) Q0 = this->load_Q; ChVectorDynamic<> Q1(mrows_w); ChVectorDynamic<> Jcolumn(mrows_w); ChState state_x_inc(mrows_x, nullptr); ChStateDelta state_delta(mrows_w, nullptr); // Compute K=-dQ(x,v)/dx by backward differentiation for (int i = 0; i < mrows_w; ++i) { state_delta(i) += Delta; this->LoadStateIncrement(*state_x, state_delta, state_x_inc); // exponential, usually state_x_inc(i) = state_x(i) + Delta; this->ComputeQ(&state_x_inc, state_w); // Q1 = Q(x+Dx, v) Q1 = this->load_Q; state_delta(i) -= Delta; Jcolumn = (Q1 - Q0) * (-1.0 / Delta); // - sign because K=-dQ/dx this->jacobians->K.PasteMatrix(Jcolumn, 0, i); } // Compute R=-dQ(x,v)/dv by backward differentiation for (int i = 0; i < mrows_w; ++i) { (*state_w)(i) += Delta; this->ComputeQ(state_x, state_w); // Q1 = Q(x, v+Dv) Q1 = this->load_Q; (*state_w)(i) -= Delta; Jcolumn = (Q1 - Q0) * (-1.0 / Delta); // - sign because R=-dQ/dv this->jacobians->R.PasteMatrix(Jcolumn, 0, i); } }
void Foam::sixDoFSolvers::Newmark::solve ( bool firstIter, const vector& fGlobal, const vector& tauGlobal, scalar deltaT, scalar deltaT0 ) { // Update the linear acceleration and torque updateAcceleration(fGlobal, tauGlobal); // Correct linear velocity v() = tConstraints() & (v0() + aDamp()*deltaT*(gamma_*a() + (1 - gamma_)*a0())); // Correct angular momentum pi() = rConstraints() & (pi0() + aDamp()*deltaT*(gamma_*tau() + (1 - gamma_)*tau0())); // Correct position centreOfRotation() = centreOfRotation0() + ( tConstraints() & ( deltaT*v0() + aDamp()*sqr(deltaT)*(beta_*a() + (0.5 - beta_)*a0()) ) ); // Correct orientation vector piDeltaT = rConstraints() & ( deltaT*pi0() + aDamp()*sqr(deltaT)*(beta_*tau() + (0.5 - beta_)*tau0()) ); Tuple2<tensor, vector> Qpi = rotate(Q0(), piDeltaT, 1); Q() = Qpi.first(); }
void Foam::sixDoFRigidBodyMotion::updatePosition ( bool firstIter, scalar deltaT, scalar deltaT0 ) { if (Pstream::master()) { if (firstIter) { // First simplectic step: // Half-step for linear and angular velocities // Update position and orientation v() = tConstraints_ & (v0() + aDamp_*0.5*deltaT0*a()); pi() = rConstraints_ & (pi0() + aDamp_*0.5*deltaT0*tau()); centreOfRotation() = centreOfRotation0() + deltaT*v(); } else { // For subsequent iterations use Crank-Nicolson v() = tConstraints_ & (v0() + aDamp_*0.5*deltaT*(a() + motionState0_.a())); pi() = rConstraints_ & (pi0() + aDamp_*0.5*deltaT*(tau() + motionState0_.tau())); centreOfRotation() = centreOfRotation0() + 0.5*deltaT*(v() + motionState0_.v()); } // Correct orientation Tuple2<tensor, vector> Qpi = rotate(Q0(), pi(), deltaT); Q() = Qpi.first(); pi() = rConstraints_ & Qpi.second(); } Pstream::scatter(motionState_); }
int main(int , char**) { t_Q Q0(1,3); std::cout << "Q0: " << Q0 << "\n"; t_dVecQ vecQ0(2,Q0); std::cout << "vecQ0: " << vecQ0 << "\n"; t_Q Q1(1,2); std::cout << "Q1: " << Q1 << "\n"; t_dVecQ vecQ1(3,Q1); std::cout << "vecQ1: " << vecQ1 << "\n"; vecQ1[mtl::irange(2)] = vecQ0; std::cout << "vecQ1: " << vecQ1 << "\n"; assign_test(vecQ1); st_test test(vecQ1); // std::cout << "size(vecQ1): " << mtl::vector::size(vecQ1) << "\n"; test2(); return 0; }
void extr(jvec &ext_EP,jvec &ext_ED,jvec &ext_Q2,jvec &ext_fP,jvec &ext_fM,jvec &ext_f0,jvec &ext_fT,int il_sea,int il,int ic) { ////////////////////////////////////////// R0 ////////////////////////////////////// jvec R0_corr; jack R0(njack); //load standing jvec ll0_st=load_3pts("V0",il,il,0,RE,ODD,1); jvec lc0_st=load_3pts("V0",ic,il,0,RE,ODD,1); jvec cc0_st=load_3pts("V0",ic,ic,0,RE,ODD,1); //build R0 R0_corr=lc0_st*lc0_st.simmetric()/(cc0_st*ll0_st); //fit and plot R0=constant_fit(R0_corr,TH-tmax,tmax,combine("plots/R0_il_%d_ic_%d.xmg",il,ic).c_str()); //////////////////////////////////////////// R2 //////////////////////////////////// jvec R2_corr[nth]; jvec RT_corr[nth]; jvec R2(nth,njack); jvec RT(nth,njack); ofstream out_R2(combine("plots/R2_il_%d_ic_%d.xmg",il,ic).c_str()); ofstream out_RT(combine("plots/RT_il_%d_ic_%d.xmg",il,ic).c_str()); jvec lcK_th[nth],lc0_th[nth],lcT_th[nth]; for(int ith=0;ith<nth;ith++) { //load corrs lcK_th[ith]=load_3pts("VK",ic,il,ith,IM,EVN,-1)/(6*th_P[ith]); lc0_th[ith]=load_3pts("V0",ic,il,ith,RE,ODD,1); lcT_th[ith]=load_3pts("VTK",ic,il,ith,IM,ODD,1)/(6*th_P[ith]); //build ratios R2_corr[ith]=lcK_th[ith]/lc0_th[ith]; RT_corr[ith]=lcT_th[ith]/lcK_th[ith]; //fit R2[ith]=constant_fit(R2_corr[ith],tmin,tmax); RT[ith]=constant_fit(RT_corr[ith],tmin,tmax); //plot out_R2<<write_constant_fit_plot(R2_corr[ith],R2[ith],tmin,tmax); out_RT<<write_constant_fit_plot(RT_corr[ith],RT[ith],tmin,tmax); } ////////////////////////////////////////// R1 ////////////////////////////////////// jvec R1_corr[nth]; jvec R1(nth,njack); ofstream out_P(combine("plots/out_P_il_%d_ic_%d.xmg",il,ic).c_str()); out_P<<"@type xydy"<<endl; ofstream out_D(combine("plots/out_D_il_%d_ic_%d.xmg",il,ic).c_str()); out_D<<"@type xydy"<<endl; ofstream out_R1(combine("plots/out_R1_il_%d_ic_%d.xmg",il,ic).c_str()); out_R1<<"@type xydy"<<endl; //load Pi and D jvec P_corr[nth],D_corr[nth]; jvec ED(nth,njack),EP(nth,njack); for(int ith=0;ith<nth;ith++) { //load moving pion P_corr[ith]=load_2pts("2pts_P5P5.dat",il_sea,il,ith); out_P<<"@type xydy"<<endl; EP[ith]=constant_fit(effective_mass(P_corr[ith]),tmin_P,TH,combine("plots/P_eff_mass_il_%d_ic_%d_ith_%d.xmg", il,ic,ith).c_str()); out_P<<write_constant_fit_plot(effective_mass(P_corr[ith]),EP[ith],tmin_P,TH); out_P<<"&"<<endl; //recompute EP and ED from standing one if(ith) { ED[ith]=latt_en(ED[0],th_P[ith]); EP[ith]=latt_en(EP[0],th_P[ith]); } //load moving D D_corr[ith]=load_2pts("2pts_P5P5.dat",il,ic,ith); out_D<<"@type xydy"<<endl; ED[ith]=constant_fit(effective_mass(D_corr[ith]),tmin_D,TH,combine("plots/D_eff_mass_il_%d_ic_%d_ith_%d.xmg", il,ic,ith).c_str()); out_D<<write_constant_fit_plot(effective_mass(D_corr[ith]),ED[ith],tmin_D,TH); out_D<<"&"<<endl; //build the ratio R1_corr[ith]=lc0_th[ith]/lc0_th[0]; for(int t=0;t<TH;t++) { int E_fit_reco_flag=1; jack Dt(njack),Pt(njack); if(E_fit_reco_flag==0) { Dt=D_corr[0][t]/D_corr[ith][t]; Pt=P_corr[0][TH-t]/P_corr[ith][TH-t]; } else { jack ED_th=latt_en(ED[0],th_P[ith]),EP_th=latt_en(EP[0],th_P[ith]); Dt=exp(-(ED[0]-ED_th)*t)*ED_th/ED[0]; Pt=exp(-(EP[0]-EP_th)*(TH-t))*EP_th/EP[0]; } R1_corr[ith][t]*=Dt*Pt; } //fit R1[ith]=constant_fit(R1_corr[ith],tmin,tmax); //plot out_R1<<write_constant_fit_plot(R1_corr[ith],R1[ith],tmin,tmax); } //////////////////////////////////////// solve the ratios ////////////////////////////// //compute f0[q2max] jvec f0_r(nth,njack),fP_r(nth,njack),fT_r(nth,njack); f0_r[0]=sqrt(R0*4*ED[0]*EP[0])/(ED[0]+EP[0]); cout<<"f0_r[q2max]: "<<f0_r[0]<<endl; //compute QK and Q2 double mom[nth]; jvec PK(nth,njack),QK(nth,njack); jvec P0(nth,njack),Q0(nth,njack),Q2(nth,njack),P2(nth,njack); jvec P0_r(nth,njack),Q0_r(nth,njack),Q2_r(nth,njack),P2_r(nth,njack); for(int ith=0;ith<nth;ith++) { P0[ith]=ED[ith]+EP[ith]; //P=initial+final Q0[ith]=ED[ith]-EP[ith]; //Q=initial-final P0_r[ith]=latt_en(ED[0],th_P[ith])+latt_en(EP[0],th_P[ith]); Q0_r[ith]=latt_en(ED[0],th_P[ith])-latt_en(EP[0],th_P[ith]); //we are describing the process D->Pi mom[ith]=momentum(th_P[ith]); double P_D=-mom[ith]; double P_Pi=mom[ith]; PK[ith]=P_D+P_Pi; QK[ith]=P_D-P_Pi; P2[ith]=sqr(P0[ith])-3*sqr(PK[ith]); Q2[ith]=sqr(Q0[ith])-3*sqr(QK[ith]); //reconstruct Q2 P2_r[ith]=sqr(P0_r[ith])-3*sqr(PK[ith]); Q2_r[ith]=sqr(Q0_r[ith])-3*sqr(QK[ith]); } //checking Pion dispertion relation ofstream out_disp_P(combine("plots/Pion_disp_rel_il_%d_ic_%d.xmg",il,ic).c_str()); out_disp_P<<"@type xydy"<<endl; for(int ith=0;ith<nth;ith++) out_disp_P<<3*sqr(mom[ith])<<" "<<sqr(EP[ith])<<endl; out_disp_P<<"&"<<endl; for(int ith=0;ith<nth;ith++) out_disp_P<<3*sqr(mom[ith])<<" "<<sqr(cont_en(EP[0],th_P[ith]))<<endl; out_disp_P<<"&"<<endl; for(int ith=0;ith<nth;ith++) out_disp_P<<3*sqr(mom[ith])<<" "<<sqr(latt_en(EP[0],th_P[ith]))<<endl; out_disp_P<<"&"<<endl; //checking D dispertion relation ofstream out_disp_D(combine("plots/D_disp_rel_il_%d_ic_%d.xmg",il,ic).c_str()); out_disp_D<<"@type xydy"<<endl; for(int ith=0;ith<nth;ith++) out_disp_D<<3*sqr(mom[ith])<<" "<<sqr(ED[ith])<<endl; out_disp_D<<"&"<<endl; for(int ith=0;ith<nth;ith++) out_disp_D<<3*sqr(mom[ith])<<" "<<sqr(cont_en(ED[0],th_P[ith]))<<endl; out_disp_D<<"&"<<endl; for(int ith=0;ith<nth;ith++) out_disp_D<<3*sqr(mom[ith])<<" "<<sqr(latt_en(ED[0],th_P[ith]))<<endl; out_disp_D<<"&"<<endl; //compute xi jvec xi(nth,njack); for(int ith=1;ith<nth;ith++) { int E_fit_reco_flag=0; //it makes no diff jack P0_th=E_fit_reco_flag?P0_r[ith]:P0[ith]; jack Q0_th=E_fit_reco_flag?Q0_r[ith]:Q0[ith]; xi[ith]=R2[ith]*P0_th; xi[ith]/=QK[ith]-R2[ith]*Q0_th; } //compute fP ofstream out_fP_r(combine("plots/fP_r_il_%d_ic_%d.xmg",il,ic).c_str()); out_fP_r<<"@type xydy"<<endl; for(int ith=1;ith<nth;ith++) { int E_fit_reco_flag=1; //it makes no diff jack P0_th=E_fit_reco_flag?P0_r[ith]:P0[ith]; jack Q0_th=E_fit_reco_flag?Q0_r[ith]:Q0[ith]; jack c=P0_th/(ED[0]+EP[0])*(1+xi[ith]*Q0_th/P0_th); fP_r[ith]=R1[ith]/c*f0_r[0]; out_fP_r<<Q2[ith].med()<<" "<<fP_r[ith]<<endl; } //compute f0 and fT ofstream out_f0_r(combine("plots/f0_r_il_%d_ic_%d.xmg",il,ic).c_str()); ofstream out_fT_r(combine("plots/fT_r_il_%d_ic_%d.xmg",il,ic).c_str());; out_f0_r<<"@type xydy"<<endl; out_f0_r<<Q2[0].med()<<" "<<f0_r[0]<<endl; out_fT_r<<"@type xydy"<<endl; for(int ith=1;ith<nth;ith++) { //it seems better here to solve using reconstructed energies int E_fit_reco_flag=0; jack EP_th=E_fit_reco_flag?latt_en(EP[0],th_P[ith]):EP[ith]; jack ED_th=E_fit_reco_flag?latt_en(ED[0],th_P[ith]):ED[ith]; jack Q2_th=E_fit_reco_flag?Q2_r[ith]:Q2[ith]; jack fM_r=xi[ith]*fP_r[ith]; //checked f0_r[ith]=fP_r[ith]+fM_r[ith]*Q2_th/(sqr(ED_th)-sqr(EP_th)); out_f0_r<<Q2[ith].med()<<" "<<f0_r[ith]<<endl; fT_r[ith]=fM_r[ith]*RT[ith]*Zt_med[ibeta]/Zv_med[ibeta]*(EP[0]+ED[0])/(ED[ith]+EP[ith]); //ADD out_fT_r<<Q2[ith].med()<<" "<<fT_r[ith]<<endl; } //////////////////////////////////////// analytic method ///////////////////////////// jvec fP_a(nth,njack),fM_a(nth,njack),f0_a(nth,njack),fT_a(nth,njack); jvec fP_n(nth,njack),fM_n(nth,njack),f0_n(nth,njack),fT_n(nth,njack); //determine M and Z for pion and D jvec ZP(nth,njack),ZD(nth,njack); for(int ith=0;ith<nth;ith++) { jack E,Z2; two_pts_fit(E,Z2,P_corr[ith],tmin_P,TH); ZP[ith]=sqrt(Z2); two_pts_fit(E,Z2,D_corr[ith],tmin_D,TH); ZD[ith]=sqrt(Z2); } //compute V jvec VK_a(nth,njack),V0_a(nth,njack),TK_a(nth,njack); jvec VK_n(nth,njack),V0_n(nth,njack),TK_n(nth,njack); for(int ith=0;ith<nth;ith++) { ofstream out_V0(combine("plots/V0_il_%d_ic_%d_ith_%d_analytic_numeric.xmg",il,ic,ith).c_str()); out_V0<<"@type xydy"<<endl; ofstream out_VK(combine("plots/VK_il_%d_ic_%d_ith_%d_analytic_numeric.xmg",il,ic,ith).c_str()); out_VK<<"@type xydy"<<endl; ofstream out_TK(combine("plots/TK_il_%d_ic_%d_ith_%d_analytic_numeric.xmg",il,ic,ith).c_str()); out_TK<<"@type xydy"<<endl; ofstream out_dt(combine("plots/dt_il_%d_ic_%d_ith_%d.xmg",il,ic,ith).c_str()); out_dt<<"@type xydy"<<endl; //computing time dependance jvec dt_a(TH+1,njack),dt_n(TH+1,njack); { //it seems better here to use fitted energies int E_fit_reco_flag=1; jack EP_th=E_fit_reco_flag?latt_en(EP[0],th_P[ith]):EP[ith]; jack ED_th=E_fit_reco_flag?latt_en(ED[0],th_P[ith]):ED[ith]; for(int t=0;t<=TH;t++) { dt_a[t]=exp(-(ED_th*t+EP_th*(TH-t)))*ZP[0]*ZD[0]/(4*EP_th*ED_th); dt_n[t]=D_corr[ith][t]*P_corr[ith][TH-t]/(ZD[0]*ZP[0]); } } //remove time dependance using analytic or numeric expression jvec VK_corr_a=Zv_med[ibeta]*lcK_th[ith]/dt_a,V0_corr_a=Zv_med[ibeta]*lc0_th[ith]/dt_a; jvec VK_corr_n=Zv_med[ibeta]*lcK_th[ith]/dt_n,V0_corr_n=Zv_med[ibeta]*lc0_th[ith]/dt_n; jvec TK_corr_n=Zt_med[ibeta]*lcT_th[ith]/dt_n,TK_corr_a=Zt_med[ibeta]*lcT_th[ith]/dt_a; //fit V0 V0_a[ith]=constant_fit(V0_corr_a,tmin,tmax); V0_n[ith]=constant_fit(V0_corr_n,tmin,tmax); out_V0<<write_constant_fit_plot(V0_corr_a,V0_a[ith],tmin,tmax)<<"&"<<endl; out_V0<<write_constant_fit_plot(V0_corr_n,V0_n[ith],tmin,tmax)<<"&"<<endl; //fit VK VK_a[ith]=constant_fit(VK_corr_a,tmin,tmax); VK_n[ith]=constant_fit(VK_corr_n,tmin,tmax); out_VK<<write_constant_fit_plot(VK_corr_a,VK_a[ith],tmin,tmax)<<"&"<<endl; out_VK<<write_constant_fit_plot(VK_corr_n,VK_n[ith],tmin,tmax)<<"&"<<endl; //fit TK TK_a[ith]=constant_fit(TK_corr_a,tmin,tmax); TK_n[ith]=constant_fit(TK_corr_n,tmin,tmax); out_TK<<write_constant_fit_plot(TK_corr_a,TK_a[ith],tmin,tmax)<<"&"<<endl; out_TK<<write_constant_fit_plot(TK_corr_n,TK_n[ith],tmin,tmax)<<"&"<<endl; } //compute f0(q2max) f0_a[0]=V0_a[0]/(ED[0]+EP[0]); f0_n[0]=V0_n[0]/(ED[0]+EP[0]); cout<<"f0_a["<<Q2[0].med()<<"]: "<<f0_a[0]<<endl; cout<<"f0_n["<<Q2[0].med()<<"]: "<<f0_n[0]<<endl; //solve for fP and f0 for(int ith=1;ith<nth;ith++) { jack delta=P0[ith]*QK[ith]-Q0[ith]*PK[ith]; //solve using analytic fit jack deltaP_a=V0_a[ith]*QK[ith]-Q0[ith]*VK_a[ith]; jack deltaM_a=P0[ith]*VK_a[ith]-V0_a[ith]*PK[ith]; fP_a[ith]=deltaP_a/delta; fM_a[ith]=deltaM_a/delta; //solve using numeric fit jack deltaP_n=V0_n[ith]*QK[ith]-Q0[ith]*VK_n[ith]; jack deltaM_n=P0[ith]*VK_n[ith]-V0_n[ith]*PK[ith]; fP_n[ith]=deltaP_n/delta; fM_n[ith]=deltaM_n/delta; //compute f0 f0_a[ith]=fP_a[ith]+fM_a[ith]*Q2[ith]/(ED[0]*ED[0]-EP[0]*EP[0]); f0_n[ith]=fP_n[ith]+fM_n[ith]*Q2[ith]/(ED[0]*ED[0]-EP[0]*EP[0]); //solve fT fT_a[ith]=-TK_a[ith]*(EP[0]+ED[0])/(2*(ED[ith]+EP[ith]))/mom[ith]; fT_n[ith]=-TK_n[ith]*(EP[0]+ED[0])/(2*(ED[ith]+EP[ith]))/mom[ith]; } //write analytic and umeric plot of fP and f0 ofstream out_fP_a("plots/fP_a.xmg"),out_fP_n("plots/fP_n.xmg"); ofstream out_fM_a("plots/fM_a.xmg"),out_fM_n("plots/fM_n.xmg"); ofstream out_f0_a("plots/f0_a.xmg"),out_f0_n("plots/f0_n.xmg"); ofstream out_fT_a("plots/fT_a.xmg"),out_fT_n("plots/fT_n.xmg"); out_fP_a<<"@type xydy"<<endl; out_fP_n<<"@type xydy"<<endl; out_f0_a<<"@type xydy"<<endl; out_f0_n<<"@type xydy"<<endl; out_fM_a<<"@type xydy"<<endl; out_fM_n<<"@type xydy"<<endl; out_fT_a<<"@type xydy"<<endl; out_fT_n<<"@type xydy"<<endl; out_f0_a<<Q2[0].med()<<" "<<f0_a[0]<<endl; out_f0_n<<Q2[0].med()<<" "<<f0_n[0]<<endl; for(int ith=1;ith<nth;ith++) { out_fP_a<<Q2[ith].med()<<" "<<fP_a[ith]<<endl; out_fP_n<<Q2[ith].med()<<" "<<fP_n[ith]<<endl; out_fM_a<<Q2[ith].med()<<" "<<fM_a[ith]<<endl; out_fM_n<<Q2[ith].med()<<" "<<fM_n[ith]<<endl; out_f0_a<<Q2[ith].med()<<" "<<f0_a[ith]<<endl; out_f0_n<<Q2[ith].med()<<" "<<f0_n[ith]<<endl; out_fT_a<<Q2[ith].med()<<" "<<fT_a[ith]<<endl; out_fT_n<<Q2[ith].med()<<" "<<fT_n[ith]<<endl; } ext_EP=EP; ext_ED=ED; ext_Q2=Q2; ext_fP=fP_a; ext_fM=fM_a; ext_f0=f0_a; ext_fT=fT_a; }
void sha256(char *pInput, unsigned int iInputLength, _hash *p_hash) { //printf("length:%d\n",iInputLength); unsigned long h1 = 0x6a09e667; unsigned long h2 = 0xbb67ae85; unsigned long h3 = 0x3c6ef372; unsigned long h4 = 0xa54ff53a; unsigned long h5 = 0x510e527f; unsigned long h6 = 0x9b05688c; unsigned long h7 = 0x1f83d9ab; unsigned long h8 = 0x5be0cd19; //print8longs(h1, h2, h3, h4, h5, h6, h7, h8); unsigned int isize = (iInputLength / 64 > 0) ? (iInputLength / 64 * 64 + 64) : 64; isize = iInputLength % 64 >= 56 ? isize + 64 : isize; //printf("size:%d\n", isize); unsigned long *pPreparedInput = (unsigned long*)malloc(isize); prepare_input(pPreparedInput, isize/4, pInput, iInputLength); //printstr((char*)pPreparedInput, isize); for (int i = 0; i < isize/4; i = i + 16) { unsigned long W[64] = {0}; for (int j = 0; j < 16; j++) W[j] = pPreparedInput[i+j]; for (int j = 16; j < 64; j++) W[j] = Q1(W[j-2]) + W[j-7] + Q0(W[j-15]) + W[j-16]; unsigned long a = h1; unsigned long b = h2; unsigned long c = h3; unsigned long d = h4; unsigned long e = h5; unsigned long f = h6; unsigned long g = h7; unsigned long h = h8; for (int j = 0; j < 64; j++) { unsigned long t1 = h + E1(e) + CH(e, f, g) + K[j] + W[j]; unsigned long t2 = E0(a) + MAJ(a, b, c); /* printf("h:"); printlong(h); printf("\n"); printf("E1(e):"); printlong(E1(e)); printf("\n"); printf("CH(e,f,g):"); printlong(CH(e,f,g)); printf("\n"); printf("K[%d]:", j); printlong(K[j]); printf("\n"); printf("W[%d]:", j); printlong(W[j]); printf("\n"); printf("T1"); printlong(t1); printf("\n"); */ h = g; g = f; f = e; e = d + t1; d = c; c = b; b = a; a = t1 + t2; //printf("%d:", j); //print8longs(a, b, c, d, e, f, g, h); } h1 += a; h2 += b; h3 += c; h4 += d; h5 += e; h6 += f; h7 += g; h8 += h; } //print8longs(h1, h2, h3, h4, h5, h6, h7, h8); long2char4(h1, p_hash->X); long2char4(h2, p_hash->X+4); long2char4(h3, p_hash->X+8); long2char4(h4, p_hash->X+12); long2char4(h5, p_hash->X+16); long2char4(h6, p_hash->X+20); long2char4(h7, p_hash->X+24); long2char4(h8, p_hash->X+28); free(pPreparedInput); }
void MAST::GCMMAOptimizationInterface::optimize() { #if MAST_ENABLE_GCMMA == 1 // make sure that all processes have the same problem setup _feval->sanitize_parallel(); int N = _feval->n_vars(), M = _feval->n_eq() + _feval->n_ineq(), n_rel_change_iters = _feval->n_iters_relative_change(); libmesh_assert_greater(N, 0); std::vector<Real> XVAL(N, 0.), XOLD1(N, 0.), XOLD2(N, 0.), XMMA(N, 0.), XMIN(N, 0.), XMAX(N, 0.), XLOW(N, 0.), XUPP(N, 0.), ALFA(N, 0.), BETA(N, 0.), DF0DX(N, 0.), A(M, 0.), B(M, 0.), C(M, 0.), Y(M, 0.), RAA(M, 0.), ULAM(M, 0.), FVAL(M, 0.), FAPP(M, 0.), FNEW(M, 0.), FMAX(M, 0.), DFDX(M*N, 0.), P(M*N, 0.), Q(M*N, 0.), P0(N, 0.), Q0(N, 0.), UU(M, 0.), GRADF(M, 0.), DSRCH(M, 0.), HESSF(M*(M+1)/2, 0.), f0_iters(n_rel_change_iters); std::vector<int> IYFREE(M, 0); std::vector<bool> eval_grads(M, false); Real ALBEFA = 0.1, GHINIT = 0.5, GHDECR = 0.7, GHINCR = 1.2, F0VAL = 0., F0NEW = 0., F0APP = 0., RAA0 = 0., Z = 0., GEPS =_feval->tolerance(); /*C********+*********+*********+*********+*********+*********+*********+ C C The meaning of some of the scalars and vectors in the program: C C N = Complex of variables x_j in the problem. C M = Complex of constraints in the problem (not including C the simple upper and lower bounds on the variables). C ALBEFA = Relative spacing between asymptote and mode limit. Lower value C will cause the move limit (alpha,beta) to move closer to asymptote C values (l, u). C GHINIT = Initial asymptote setting. For the first two iterations the C asymptotes (l, u) are defined based on offsets from the design C point as this fraction of the design variable bounds, ie. C l_j = x_j^k - GHINIT * (x_j^max - x_j^min) C u_j = x_j^k + GHINIT * (x_j^max - x_j^min) C GHDECR = Fraction by which the asymptote is reduced for oscillating C changes in design variables based on three consecutive iterations C GHINCR = Fraction by which the asymptote is increased for non-oscillating C changes in design variables based on three consecutive iterations C INNMAX = Maximal number of inner iterations within each outer iter. C A reasonable choice is INNMAX=10. C ITER = Current outer iteration number ( =1 the first iteration). C GEPS = Tolerance parameter for the constraints. C (Used in the termination criteria for the subproblem.) C C XVAL(j) = Current value of the variable x_j. C XOLD1(j) = Value of the variable x_j one iteration ago. C XOLD2(j) = Value of the variable x_j two iterations ago. C XMMA(j) = Optimal value of x_j in the MMA subproblem. C XMIN(j) = Original lower bound for the variable x_j. C XMAX(j) = Original upper bound for the variable x_j. C XLOW(j) = Value of the lower asymptot l_j. C XUPP(j) = Value of the upper asymptot u_j. C ALFA(j) = Lower bound for x_j in the MMA subproblem. C BETA(j) = Upper bound for x_j in the MMA subproblem. C F0VAL = Value of the objective function f_0(x) C FVAL(i) = Value of the i:th constraint function f_i(x). C DF0DX(j) = Derivative of f_0(x) with respect to x_j. C FMAX(i) = Right hand side of the i:th constraint. C DFDX(k) = Derivative of f_i(x) with respect to x_j, C where k = (j-1)*M + i. C P(k) = Coefficient p_ij in the MMA subproblem, where C k = (j-1)*M + i. C Q(k) = Coefficient q_ij in the MMA subproblem, where C k = (j-1)*M + i. C P0(j) = Coefficient p_0j in the MMA subproblem. C Q0(j) = Coefficient q_0j in the MMA subproblem. C B(i) = Right hand side b_i in the MMA subproblem. C F0APP = Value of the approximating objective function C at the optimal soultion of the MMA subproblem. C FAPP(i) = Value of the approximating i:th constraint function C at the optimal soultion of the MMA subproblem. C RAA0 = Parameter raa_0 in the MMA subproblem. C RAA(i) = Parameter raa_i in the MMA subproblem. C Y(i) = Value of the "artificial" variable y_i. C Z = Value of the "minimax" variable z. C A(i) = Coefficient a_i for the variable z. C C(i) = Coefficient c_i for the variable y_i. C ULAM(i) = Value of the dual variable lambda_i. C GRADF(i) = Gradient component of the dual objective function. C DSRCH(i) = Search direction component in the dual subproblem. C HESSF(k) = Hessian matrix component of the dual function. C IYFREE(i) = 0 for dual variables which are fixed to zero in C the current subspace of the dual subproblem, C = 1 for dual variables which are "free" in C the current subspace of the dual subproblem. C C********+*********+*********+*********+*********+*********+*********+*/ /* * The USER should now give values to the parameters * M, N, GEPS, XVAL (starting point), * XMIN, XMAX, FMAX, A and C. */ // _initi(M,N,GEPS,XVAL,XMIN,XMAX,FMAX,A,C); // Assumed: FMAX == A _feval->_init_dvar_wrapper(XVAL, XMIN, XMAX); // set the value of C[i] to be very large numbers Real max_x = 0.; for (unsigned int i=0; i<N; i++) if (max_x < fabs(XVAL[i])) max_x = fabs(XVAL[i]); std::fill(C.begin(), C.end(), std::max(1.e0*max_x, _constr_penalty)); int INNMAX=_max_inner_iters, ITER=0, ITE=0, INNER=0, ICONSE=0; /* * The outer iterative process starts. */ bool terminate = false, inner_terminate=false; while (!terminate) { ITER=ITER+1; ITE=ITE+1; /* * The USER should now calculate function values and gradients * at XVAL. The result should be put in F0VAL,DF0DX,FVAL,DFDX. */ std::fill(eval_grads.begin(), eval_grads.end(), true); _feval->_evaluate_wrapper(XVAL, F0VAL, true, DF0DX, FVAL, eval_grads, DFDX); if (ITER == 1) // output the very first iteration _feval->_output_wrapper(0, XVAL, F0VAL, FVAL, true); /* * RAA0,RAA,XLOW,XUPP,ALFA and BETA are calculated. */ raasta_(&M, &N, &RAA0, &RAA[0], &XMIN[0], &XMAX[0], &DF0DX[0], &DFDX[0]); asympg_(&ITER, &M, &N, &ALBEFA, &GHINIT, &GHDECR, &GHINCR, &XVAL[0], &XMIN[0], &XMAX[0], &XOLD1[0], &XOLD2[0], &XLOW[0], &XUPP[0], &ALFA[0], &BETA[0]); /* * The inner iterative process starts. */ // write the asymptote data for the inneriterations _output_iteration_data(ITER, XVAL, XMIN, XMAX, XLOW, XUPP, ALFA, BETA); INNER=0; inner_terminate = false; while (!inner_terminate) { /* * The subproblem is generated and solved. */ mmasug_(&ITER, &M, &N, &GEPS, &IYFREE[0], &XVAL[0], &XMMA[0], &XMIN[0], &XMAX[0], &XLOW[0], &XUPP[0], &ALFA[0], &BETA[0], &A[0], &B[0], &C[0], &Y[0], &Z, &RAA0, &RAA[0], &ULAM[0], &F0VAL, &FVAL[0], &F0APP, &FAPP[0], &FMAX[0], &DF0DX[0], &DFDX[0], &P[0], &Q[0], &P0[0], &Q0[0], &UU[0], &GRADF[0], &DSRCH[0], &HESSF[0]); /* * The USER should now calculate function values at XMMA. * The result should be put in F0NEW and FNEW. */ std::fill(eval_grads.begin(), eval_grads.end(), false); _feval->_evaluate_wrapper(XMMA, F0NEW, false, DF0DX, FNEW, eval_grads, DFDX); if (INNER >= INNMAX) { libMesh::out << "** Max Inner Iter Reached: Terminating! Inner Iter = " << INNER << std::endl; inner_terminate = true; } else { /* * It is checked if the approximations were conservative. */ conser_( &M, &ICONSE, &GEPS, &F0NEW, &F0APP, &FNEW[0], &FAPP[0]); if (ICONSE == 1) { libMesh::out << "** Conservative Solution: Terminating! Inner Iter = " << INNER << std::endl; inner_terminate = true; } else { /* * The approximations were not conservative, so RAA0 and RAA * are updated and one more inner iteration is started. */ INNER=INNER+1; raaupd_( &M, &N, &GEPS, &XMMA[0], &XVAL[0], &XMIN[0], &XMAX[0], &XLOW[0], &XUPP[0], &F0NEW, &FNEW[0], &F0APP, &FAPP[0], &RAA0, &RAA[0]); } } } /* * The inner iterative process has terminated, which means * that an outer iteration has been completed. * The variables are updated so that XVAL stands for the new * outer iteration point. The fuction values are also updated. */ xupdat_( &N, &ITER, &XMMA[0], &XVAL[0], &XOLD1[0], &XOLD2[0]); fupdat_( &M, &F0NEW, &FNEW[0], &F0VAL, &FVAL[0]); /* * The USER may now write the current solution. */ _feval->_output_wrapper(ITER, XVAL, F0VAL, FVAL, true); f0_iters[(ITE-1)%n_rel_change_iters] = F0VAL; /* * One more outer iteration is started as long as * ITE is less than MAXITE: */ if (ITE == _feval->max_iters()) { libMesh::out << "GCMMA: Reached maximum iterations, terminating! " << std::endl; terminate = true; } // relative change in objective bool rel_change_conv = true; Real f0_curr = f0_iters[n_rel_change_iters-1]; for (unsigned int i=0; i<n_rel_change_iters-1; i++) { if (f0_curr > sqrt(GEPS)) rel_change_conv = (rel_change_conv && fabs(f0_iters[i]-f0_curr)/fabs(f0_curr) < GEPS); else rel_change_conv = (rel_change_conv && fabs(f0_iters[i]-f0_curr) < GEPS); } if (rel_change_conv) { libMesh::out << "GCMMA: Converged relative change tolerance, terminating! " << std::endl; terminate = true; } } #endif //MAST_ENABLE_GCMMA == 1 }
double radius(){ rx=Q(x)-Q0(x); ry=Q(y)-Q0(y); wrap(&rx); wrap(&ry); R sqrt(sqr(rx)+sqr(ry));}
Type objective_function<Type>::operator() () { DATA_STRING(distr); DATA_INTEGER(n); Type ans = 0; if (distr == "norm") { PARAMETER(mu); PARAMETER(sd); vector<Type> x = rnorm(n, mu, sd); ans -= dnorm(x, mu, sd, true).sum(); } else if (distr == "gamma") { PARAMETER(shape); PARAMETER(scale); vector<Type> x = rgamma(n, shape, scale); ans -= dgamma(x, shape, scale, true).sum(); } else if (distr == "pois") { PARAMETER(lambda); vector<Type> x = rpois(n, lambda); ans -= dpois(x, lambda, true).sum(); } else if (distr == "compois") { PARAMETER(mode); PARAMETER(nu); vector<Type> x = rcompois(n, mode, nu); ans -= dcompois(x, mode, nu, true).sum(); } else if (distr == "compois2") { PARAMETER(mean); PARAMETER(nu); vector<Type> x = rcompois2(n, mean, nu); ans -= dcompois2(x, mean, nu, true).sum(); } else if (distr == "nbinom") { PARAMETER(size); PARAMETER(prob); vector<Type> x = rnbinom(n, size, prob); ans -= dnbinom(x, size, prob, true).sum(); } else if (distr == "nbinom2") { PARAMETER(mu); PARAMETER(var); vector<Type> x = rnbinom2(n, mu, var); ans -= dnbinom2(x, mu, var, true).sum(); } else if (distr == "exp") { PARAMETER(rate); vector<Type> x = rexp(n, rate); ans -= dexp(x, rate, true).sum(); } else if (distr == "beta") { PARAMETER(shape1); PARAMETER(shape2); vector<Type> x = rbeta(n, shape1, shape2); ans -= dbeta(x, shape1, shape2, true).sum(); } else if (distr == "f") { PARAMETER(df1); PARAMETER(df2); vector<Type> x = rf(n, df1, df2); ans -= df(x, df1, df2, true).sum(); } else if (distr == "logis") { PARAMETER(location); PARAMETER(scale); vector<Type> x = rlogis(n, location, scale); ans -= dlogis(x, location, scale, true).sum(); } else if (distr == "t") { PARAMETER(df); vector<Type> x = rt(n, df); ans -= dt(x, df, true).sum(); } else if (distr == "weibull") { PARAMETER(shape); PARAMETER(scale); vector<Type> x = rweibull(n, shape, scale); ans -= dweibull(x, shape, scale, true).sum(); } else if (distr == "AR1") { PARAMETER(phi); vector<Type> x(n); density::AR1(phi).simulate(x); ans += density::AR1(phi)(x); } else if (distr == "ARk") { PARAMETER_VECTOR(phi); vector<Type> x(n); density::ARk(phi).simulate(x); ans += density::ARk(phi)(x); } else if (distr == "MVNORM") { PARAMETER(phi); matrix<Type> Sigma(5,5); for(int i=0; i<Sigma.rows(); i++) for(int j=0; j<Sigma.rows(); j++) Sigma(i,j) = exp( -phi * abs(i - j) ); density::MVNORM_t<Type> nldens = density::MVNORM(Sigma); for(int i = 0; i<n; i++) { vector<Type> x = nldens.simulate(); ans += nldens(x); } } else if (distr == "SEPARABLE") { PARAMETER(phi1); PARAMETER_VECTOR(phi2); array<Type> x(100, 200); SEPARABLE( density::ARk(phi2), density::AR1(phi1) ).simulate(x); ans += SEPARABLE( density::ARk(phi2), density::AR1(phi1) )(x); } else if (distr == "GMRF") { PARAMETER(delta); matrix<Type> Q0(5, 5); Q0 << 1,-1, 0, 0, 0, -1, 2,-1, 0, 0, 0,-1, 2,-1, 0, 0, 0,-1, 2,-1, 0, 0, 0,-1, 1; Q0.diagonal().array() += delta; Eigen::SparseMatrix<Type> Q = asSparseMatrix(Q0); vector<Type> x(5); for(int i = 0; i<n; i++) { density::GMRF(Q).simulate(x); ans += density::GMRF(Q)(x); } } else if (distr == "SEPARABLE_NESTED") { PARAMETER(phi1); PARAMETER(phi2); PARAMETER(delta); matrix<Type> Q0(5, 5); Q0 << 1,-1, 0, 0, 0, -1, 2,-1, 0, 0, 0,-1, 2,-1, 0, 0, 0,-1, 2,-1, 0, 0, 0,-1, 1; Q0.diagonal().array() += delta; Eigen::SparseMatrix<Type> Q = asSparseMatrix(Q0); array<Type> x(5, 6, 7); for(int i = 0; i<n; i++) { SEPARABLE(density::AR1(phi2), SEPARABLE(density::AR1(phi1), density::GMRF(Q) ) ).simulate(x); ans += SEPARABLE(density::AR1(phi2), SEPARABLE(density::AR1(phi1), density::GMRF(Q) ) )(x); } } else error( ("Invalid distribution '" + distr + "'").c_str() ); return ans; }
int Rsimp(int m, int n, double **A, double *b, double *c, double *x, int *basis, int *nonbasis, double **R, double **Q, double *t1, double *t2){ int i,j,k,l,q,qv; int max_steps=20; double r,a,at; void GQR(int,int,double**,double**); max_steps=4*n; for(k=0; k<=max_steps;k++){ /* ++ Step 0) load new basis matrix and factor it */ for(i=0;i<m;i++)for(j=0;j<m;j++)R0(i,j)=AB0(i,j); GQR(m,m,Q,R); /* ++ Step 1) solving system B'*w=c(basis) ++ a) forward solve R'*y=c(basis) */ for(i=0;i<m;i++){ Y0(i)=0.0; for(j=0;j<i;j++)Y0(i)+=R0(j,i)*Y0(j); if (R0(i,i)!=0.0) Y0(i)=(CB0(i)-Y0(i))/R0(i,i); else { printf("Warning Singular Matrix Found\n"); return LP_FAIL; } } /* ++ b) find w=Q*y ++ note: B'*w=(Q*R)'*Q*y= R'*(Q'*Q)*y=R'*y=c(basis) */ for(i=0;i<m;i++){ W0(i)=0.0; for(j=0;j<m;j++)W0(i)+=Q0(i,j)*Y0(j); } /* ++ Step 2)find entering variable, ++ (use lexicographically first variable with negative reduced cost) */ q=n; for(i=0;i<n-m;i++){ /* calculate reduced cost */ r=CN0(i); for(j=0;j<m;j++) r-=W0(j)*AN0(j,i); if (r<-zero_tol && (q==n || nonbasis0(i)<nonbasis0(q))) q=i; } /* ++ if ratios were all nonnegative current solution is optimal */ if (q==n){ if (verbose>0) printf("optimal solution found in %d iterations\n",k); return LP_OPT; } /* ++ Step 3)Calculate translation direction for q entering ++ by solving system B*d=-A(:,nonbasis(q)); ++ a) let y=-Q'*A(:,nonbasis(q)); */ for(i=0;i<m;i++){ Y0(i)=0.0; for(j=0;j<m;j++) Y0(i)-=Q0(j,i)*AN0(j,q); } /* ++ b) back solve Rd=y (d=R\y) ++ note B*d= Q*R*d=Q*y=Q*-Q'*A(:nonbasis(q))=-A(:,nonbasis(q)) */ for(i=m-1;i>=0;i--){ D0(i)=0.0; for(j=m-1;j>=i+1;j--)D0(i)+=R0(i,j)*D0(j); if (R0(i,i)!=0.0) D0(i)=(Y0(i)-D0(i))/R0(i,i); else { printf("Warning Singular Matrix Found\n"); return LP_FAIL; } } /* ++ Step 4 Choose leaving variable ++ (first variable to become negative, by moving in direction D) ++ (if none become negative, then objective function unbounded) */ a=0; l=-1; for(i=0;i<m;i++){ if (D0(i)<-zero_tol){ at=-1*XB0(i)/D0(i); if (l==-1 || at<a){ a=at; l=i;} } } if (l==-1){ if (verbose>0){ printf("Objective function Unbounded (%d iterations)\n",k); } return LP_UNBD; } /* ++ Step 5) Update solution and basis data */ XN0(q)=a; for(j=0;j<m;j++) XB0(j)+=a*D0(j); XB0(l)=0.0; /* enforce strict zeroness of nonbasis variables */ qv=nonbasis0(q); nonbasis0(q)=basis0(l); basis0(l)=qv; } if (verbose>=0){ printf("Simplex Algorithm did not Terminate in %d iterations\n",k); } return LP_FAIL; }
int main(int argc,char **argv){ // Print GPU properties //print_properties(); // Files to print the result after the last time step FILE *rho_file; FILE *E_file; rho_file = fopen("rho_final.txt", "w"); E_file = fopen("E_final.txt", "w"); // Construct initial condition for problem ICsinus Config(-1.0, 1.0, -1.0, 1.0); //ICsquare Config(0.5,0.5,gasGam); // Set initial values for Configuration 1 /* Config.set_rho(rhoConfig19); Config.set_pressure(pressureConfig19); Config.set_u(uConfig19); Config.set_v(vConfig19); */ // Determining global border based on left over tiles (a little hack) int globalPadding; globalPadding = (nx+2*border+16)/16; globalPadding = 16*globalPadding - (nx+2*border); //printf("Globalpad: %i\n", globalPadding); // Change border to add padding //border = border + globalPadding/2; // Initiate the matrices for the unknowns in the Euler equations cpu_ptr_2D rho(nx, ny, border,1); cpu_ptr_2D E(nx, ny, border,1); cpu_ptr_2D rho_u(nx, ny, border,1); cpu_ptr_2D rho_v(nx, ny, border,1); cpu_ptr_2D zeros(nx, ny, border,1); // Set initial condition Config.setIC(rho, rho_u, rho_v, E); double timeStart = get_wall_time(); // Test cpu_ptr_2D rho_dummy(nx, ny, border); cpu_ptr_2D E_dummy(nx, ny, border); /* rho_dummy.xmin = -1.0; rho_dummy.ymin = -1.0; E_dummy.xmin = -1.0; E_dummy.ymin = -1.0; */ // Set block and grid sizes dim3 gridBC = dim3(1, 1, 1); dim3 blockBC = dim3(BLOCKDIM_BC,1,1); dim3 gridBlockFlux; dim3 threadBlockFlux; dim3 gridBlockRK; dim3 threadBlockRK; computeGridBlock(gridBlockFlux, threadBlockFlux, nx + 2*border, ny + 2*border, INNERTILEDIM_X, INNERTILEDIM_Y, BLOCKDIM_X, BLOCKDIM_Y); computeGridBlock(gridBlockRK, threadBlockRK, nx + 2*border, ny + 2*border, BLOCKDIM_X_RK, BLOCKDIM_Y_RK, BLOCKDIM_X_RK, BLOCKDIM_Y_RK); int nElements = gridBlockFlux.x*gridBlockFlux.y; // Allocate memory for the GPU pointers gpu_ptr_1D L_device(nElements); gpu_ptr_1D dt_device(1); gpu_ptr_2D rho_device(nx, ny, border); gpu_ptr_2D E_device(nx, ny, border); gpu_ptr_2D rho_u_device(nx, ny, border); gpu_ptr_2D rho_v_device(nx, ny, border); gpu_ptr_2D R0(nx, ny, border); gpu_ptr_2D R1(nx, ny, border); gpu_ptr_2D R2(nx, ny, border); gpu_ptr_2D R3(nx, ny, border); gpu_ptr_2D Q0(nx, ny, border); gpu_ptr_2D Q1(nx, ny, border); gpu_ptr_2D Q2(nx, ny, border); gpu_ptr_2D Q3(nx, ny, border); // Allocate pinned memory on host init_allocate(); // Set BC arguments set_bc_args(BCArgs[0], rho_device.getRawPtr(), rho_u_device.getRawPtr(), rho_v_device.getRawPtr(), E_device.getRawPtr(), nx+2*border, ny+2*border, border); set_bc_args(BCArgs[1], Q0.getRawPtr(), Q1.getRawPtr(), Q2.getRawPtr(), Q3.getRawPtr(), nx+2*border, ny+2*border, border); set_bc_args(BCArgs[2], rho_device.getRawPtr(), rho_u_device.getRawPtr(), rho_v_device.getRawPtr(), E_device.getRawPtr(), nx+2*border, ny+2*border, border); // Set FLUX arguments set_flux_args(fluxArgs[0], L_device.getRawPtr(), rho_device.getRawPtr(), rho_u_device.getRawPtr(), rho_v_device.getRawPtr(), E_device.getRawPtr(), R0.getRawPtr(),R1.getRawPtr(), R2.getRawPtr(), R3.getRawPtr(), nx, ny, border, rho.get_dx(), rho.get_dy(), theta, gasGam, INNERTILEDIM_X, INNERTILEDIM_Y); set_flux_args(fluxArgs[1], L_device.getRawPtr(), Q0.getRawPtr(), Q1.getRawPtr(), Q2.getRawPtr(), Q3.getRawPtr(), R0.getRawPtr(),R1.getRawPtr(), R2.getRawPtr(), R3.getRawPtr(), nx, ny, border, rho.get_dx(), rho.get_dy(), theta, gasGam, INNERTILEDIM_X, INNERTILEDIM_Y); // Set TIME argument set_dt_args(dtArgs, L_device.getRawPtr(), dt_device.getRawPtr(), nElements, rho.get_dx(), rho.get_dy(), cfl_number); // Set Rk arguments set_rk_args(RKArgs[0], dt_device.getRawPtr(), rho_device.getRawPtr(), rho_u_device.getRawPtr(), rho_v_device.getRawPtr(), E_device.getRawPtr(), R0.getRawPtr(), R1.getRawPtr(), R2.getRawPtr(), R3.getRawPtr(), Q0.getRawPtr(), Q1.getRawPtr(), Q2.getRawPtr(), Q3.getRawPtr(), nx, ny, border); set_rk_args(RKArgs[1], dt_device.getRawPtr(), Q0.getRawPtr(), Q1.getRawPtr(), Q2.getRawPtr(), Q3.getRawPtr(), R0.getRawPtr(), R1.getRawPtr(), R2.getRawPtr(), R3.getRawPtr(), rho_device.getRawPtr(), rho_u_device.getRawPtr(), rho_v_device.getRawPtr(), E_device.getRawPtr(), nx, ny, border); L_device.set(FLT_MAX); /* R0.upload(zeros.get_ptr()); R1.upload(zeros.get_ptr()); R2.upload(zeros.get_ptr()); R3.upload(zeros.get_ptr()); Q0.upload(zeros.get_ptr()); Q1.upload(zeros.get_ptr()); Q2.upload(zeros.get_ptr()); Q3.upload(zeros.get_ptr()); */ R0.set(0,0,0,nx,ny,border); R1.set(0,0,0,nx,ny,border); R2.set(0,0,0,nx,ny,border); R3.set(0,0,0,nx,ny,border); Q0.set(0,0,0,nx,ny,border); Q1.set(0,0,0,nx,ny,border); Q2.set(0,0,0,nx,ny,border); Q3.set(0,0,0,nx,ny,border); rho_device.upload(rho.get_ptr()); rho_u_device.upload(rho_u.get_ptr()); rho_v_device.upload(rho_v.get_ptr()); E_device.upload(E.get_ptr()); // Update boudries callCollectiveSetBCPeriodic(gridBC, blockBC, BCArgs[0]); //Create cuda stream cudaStream_t stream1; cudaStreamCreate(&stream1); cudaEvent_t dt_complete; cudaEventCreate(&dt_complete); while (currentTime < timeLength && step < maxStep){ //RK1 //Compute flux callFluxKernel(gridBlockFlux, threadBlockFlux, 0, fluxArgs[0]); // Compute timestep (based on CFL condition) callDtKernel(TIMETHREADS, dtArgs); cudaMemcpyAsync(dt_host, dt_device.getRawPtr(), sizeof(float), cudaMemcpyDeviceToHost, stream1); cudaEventRecord(dt_complete, stream1); // Perform RK1 step callRKKernel(gridBlockRK, threadBlockRK, 0, RKArgs[0]); //Update boudries callCollectiveSetBCPeriodic(gridBC, blockBC, BCArgs[1]); //RK2 // Compute flux callFluxKernel(gridBlockFlux, threadBlockFlux, 1, fluxArgs[1]); //Perform RK2 step callRKKernel(gridBlockRK, threadBlockRK, 1, RKArgs[1]); //cudaEventRecord(srteam_sync, srteam1); callCollectiveSetBCPeriodic(gridBC, blockBC, BCArgs[2]); cudaEventSynchronize(dt_complete); step++; currentTime += *dt_host; // printf("Step: %i, current time: %.6f dt:%.6f\n" , step,currentTime, dt_host[0]); } //cuProfilerStop(); //cudaProfilerStop(); printf("Elapsed time %.5f", get_wall_time() - timeStart); E_device.download(E.get_ptr()); rho_u_device.download(rho_u.get_ptr()); rho_v_device.download(rho_v.get_ptr()); rho_device.download(rho_dummy.get_ptr()); rho_dummy.printToFile(rho_file, true, false); Config.exactSolution(E_dummy, currentTime); E_dummy.printToFile(E_file, true, false); float LinfError = Linf(E_dummy, rho_dummy); float L1Error = L1(E_dummy, rho_dummy); float L1Error2 = L1test(E_dummy, rho_dummy); printf("nx: %i\t Linf error %.9f\t L1 error %.7f L1test erro %.7f", nx, LinfError, L1Error, L1Error2); printf("nx: %i step: %i, current time: %.6f dt:%.6f\n" , nx, step,currentTime, dt_host[0]); /* cudaMemcpy(L_host, L_device, sizeof(float)*(nElements), cudaMemcpyDeviceToHost); for (int i =0; i < nElements; i++) printf(" %.7f ", L_host[i]); */ printf("%s\n", cudaGetErrorString(cudaGetLastError())); return(0); }
Z D d1(J j){D d=(D)(L)j;Q0(SQLAllocStmt(d,&d))R d;}