void TestNonlinearEquationPde() { ChastePoint<1> zero1(0); ChastePoint<2> zero2(0,0); ChastePoint<3> zero3(0,0,0); double u = 2.0; NonlinearEquationPde<1> heat_equation1; NonlinearEquationPde<2> heat_equation2; NonlinearEquationPde<3> heat_equation3; TS_ASSERT_DELTA(heat_equation1.ComputeNonlinearSourceTerm(zero1,u),0.0,1e-12); TS_ASSERT_DELTA(heat_equation2.ComputeNonlinearSourceTerm(zero2,u),0.0,1e-12); TS_ASSERT_DELTA(heat_equation3.ComputeNonlinearSourceTerm(zero3,u),0.0,1e-12); // Diffusion matrices should be equal to identity * u; c_matrix<double, 1, 1> diff1 = heat_equation1.ComputeDiffusionTerm(zero1,u); c_matrix<double, 2, 2> diff2 = heat_equation2.ComputeDiffusionTerm(zero2,u); c_matrix<double, 3, 3> diff3 = heat_equation3.ComputeDiffusionTerm(zero3,u); TS_ASSERT_DELTA(diff1(0,0),u,1e-12); TS_ASSERT_DELTA(diff2(0,0),u,1e-12); TS_ASSERT_DELTA(diff2(1,1),u,1e-12); TS_ASSERT_DELTA(diff2(0,1),0,1e-12); TS_ASSERT_DELTA(diff3(0,0),u,1e-12); TS_ASSERT_DELTA(diff3(1,1),u,1e-12); TS_ASSERT_DELTA(diff3(2,2),u,1e-12); TS_ASSERT_DELTA(diff3(0,1),0,1e-12); TS_ASSERT_DELTA(diff3(0,2),0,1e-12); TS_ASSERT_DELTA(diff3(1,2),0,1e-12); }
void add(double *A1, double *A2, double *B1, double *B2, int n, double *outA, double *outB) { double *A = A1, *B = A1 + 2 * n * n, *C = A1 + n, *D = A1 + 2 * n * n + n, *E = B1, *F = B1 + n, *G = A2, *H = A2 + 2 * n * n, *I_ = A2 + n, *J = A2 + 2 * n * n + n, *K = B2, *L = B2 + n, *oA = outA, *oB = outA + 3 * n * n, *oC = outA + 2 * 3 * n * n, *oD = outA + n, *oE = outA + 3 * n * n + n, *oF = outA + 2 * 3 * n * n + n, *oG = outA + 2 * n, *oH = outA + 3 * n * n + 2 * n, *oI = outA + 2 * 3 * n * n + 2 * n, *oK = outB, *oL = outB + n, *oM = outB + 2 * n; /* oA = D+G oB = C oC = H oD = B oE = A oF = 0 oG = I oH = 0 oI = I oK = F + K oL = E oM = L */ copy2(oA, D, n, 3 * n, 2 * n); add2(oA, G, n, 3 * n, 2 * n); copy2(oB, C, n, 3 * n, 2 * n); copy2(oC, H, n, 3 * n, 2 * n); copy2(oD, B, n, 3 * n, 2 * n); copy2(oE, A, n, 3 * n, 2 * n); zero2(oF, n, 3 * n); copy2(oG, I_, n, 3 * n, 2 * n); zero2(oH, n, 3 * n); copy2(oI, J, n, 3 * n, 2 * n); memcpy(oK, F, n * sizeof(double)); for (int i = 0; i < n; i++) oK[i] += K[i]; memcpy(oL, E, n * sizeof(double)); memcpy(oM, L, n * sizeof(double)); }
void TestHeatEquation() { ChastePoint<1> zero1(0); ChastePoint<2> zero2(0,0); ChastePoint<3> zero3(0,0,0); double u = 2.0; HeatEquation<1> pde1; HeatEquation<2> pde2; HeatEquation<3> pde3; TS_ASSERT_DELTA(pde1.ComputeSourceTerm(zero1,u), 0.0, 1e-12); TS_ASSERT_DELTA(pde2.ComputeSourceTerm(zero2,u), 0.0, 1e-12); TS_ASSERT_DELTA(pde3.ComputeSourceTerm(zero3,u), 0.0, 1e-12); TS_ASSERT_DELTA(pde1.ComputeDuDtCoefficientFunction(zero1), 1.0, 1e-12); TS_ASSERT_DELTA(pde2.ComputeDuDtCoefficientFunction(zero2), 1.0, 1e-12); TS_ASSERT_DELTA(pde3.ComputeDuDtCoefficientFunction(zero3), 1.0, 1e-12); // Diffusion matrices should be equal to identity c_matrix<double, 1, 1> diff1 = pde1.ComputeDiffusionTerm(zero1); c_matrix<double, 2, 2> diff2 = pde2.ComputeDiffusionTerm(zero2); c_matrix<double, 3, 3> diff3 = pde3.ComputeDiffusionTerm(zero3); TS_ASSERT_DELTA(diff1(0,0), 1, 1e-12); TS_ASSERT_DELTA(diff2(0,0), 1, 1e-12); TS_ASSERT_DELTA(diff2(1,1), 1, 1e-12); TS_ASSERT_DELTA(diff2(0,1), 0, 1e-12); TS_ASSERT_DELTA(diff3(0,0), 1, 1e-12); TS_ASSERT_DELTA(diff3(1,1), 1, 1e-12); TS_ASSERT_DELTA(diff3(2,2), 1, 1e-12); TS_ASSERT_DELTA(diff3(0,1), 0, 1e-12); TS_ASSERT_DELTA(diff3(0,2), 0, 1e-12); TS_ASSERT_DELTA(diff3(1,2), 0, 1e-12); Node<1> node(0, zero1); TS_ASSERT_DELTA(pde1.ComputeSourceTermAtNode(node,u), 0.0, 1e-12); }
void object::test<5>() { geos::geom::Envelope empty; geos::geom::Envelope zero(0, 0, 0, 0); geos::geom::Envelope zero2(0, 0, 0, 0); geos::geom::Envelope box(0, 100, 0, 100); ensure( empty.isNull() ); ensure( !zero.isNull() ); ensure( !zero2.isNull() ); ensure( !box.isNull() ); /* See http://trac.osgeo.org/geos/ticket/703 */ ensure( empty.equals( &empty ) ); ensure( !empty.equals( &zero ) ); ensure( !zero.equals( &empty ) ); ensure( zero.equals( &zero2 ) ); ensure( zero2.equals( &zero ) ); ensure( !box.equals( &empty ) ); ensure( !box.equals( &zero ) ); }
int main(int argc, char *argv[]) { clock_t tstart, tend; double duration; int numprocs, rank; float *sendbuf, *recvbuf; MPI_Comm Comm=MPI_COMM_WORLD; bool verb, wantrecord, wantwf, onlyrecord; sf_file Ffvel, Ffden, Fbvel, Fbden; sf_file Fsrc, Frcd, Fimg1, Fimg2; sf_file FGx, FGz, Fsxx, Fsxz, Fszx, Fszz; sf_file Ftmpfwf, Ftmpbwf; sf_axis at, ax, az, atau; int shtbgn, shtinv, shtnmb, shtpad, shtnmb0; int snapturn, tmpint; float **fvel, **bvel; float ***fwf, ***record, **localrec; float ***img1, **img2, ***mig1, **mig2; float *tmpsxx, *tmpsxz, *tmpszx, *tmpszz; sf_init(argc, argv); MPI_Init(&argc, &argv); MPI_Comm_size(Comm, &numprocs); MPI_Comm_rank(Comm, &rank); tstart=clock(); if(rank==0) sf_warning("numprocs=%d", numprocs); if(!sf_getbool("verb", &verb)) verb=true; if(!sf_getbool("wantrecord", &wantrecord)) wantrecord=false; if(!sf_getbool("wantwf", &wantwf)) wantwf=false; if(!sf_getbool("onlyrecord", &onlyrecord)) onlyrecord=false; Fsrc=sf_input("-input"); Fimg1=sf_output("-output"); Fimg2=sf_output("img2"); Ffvel=sf_input("fvel"); Ffden=sf_input("fden"); Fbvel=sf_input("bvel"); Fbden=sf_input("bden"); if(wantrecord) Frcd=sf_input("record"); else Frcd=sf_output("record"); if(wantwf){ Ftmpfwf=sf_output("tmpfwf"); Ftmpbwf=sf_output("tmpbwf"); } FGx=sf_input("Gx"); FGz=sf_input("Gz"); Fsxx=sf_input("sxx"); Fsxz=sf_input("sxz"); Fszx=sf_input("szx"); Fszz=sf_input("szz"); at=sf_iaxa(Fsrc, 1); nt=sf_n(at); dt=sf_d(at); if(!sf_getbool("srcdecay", &srcdecay)) srcdecay=true; if(!sf_getint("srcrange", &srcrange)) srcrange=3; if(!sf_getfloat("srctrunc", &srctrunc)) srctrunc=0.2; if(!sf_getfloat("srcalpha", &srcalpha)) srcalpha=0.5; wavelet=sf_floatalloc(nt); sf_floatread(wavelet, nt, Fsrc); if(!sf_getint("pmlsize", &pmlsize)) pmlsize=30; if(!sf_getint("nfd", &nfd)) sf_error("Need half of the FD order!"); if(!sf_getfloat("pmld0", &pmld0)) pmld0=200; if(!sf_getint("shtnmb", &shtnmb)) sf_error("Need shot number!"); if(!sf_getint("shtinv", &shtinv)) sf_error("Need shot interval!"); if(!sf_getint("shtbgn", &shtbgn)) shtbgn=0; shtpad=numprocs-shtnmb%numprocs; shtnmb0=shtnmb+shtpad; az=sf_iaxa(Ffvel, 1); nzb=sf_n(az); ax=sf_iaxa(Ffvel, 2); nxb=sf_n(ax); nxzb=nxb*nzb; nz=nzb-2*nfd-2*pmlsize; nx=nxb-2*nfd-2*pmlsize; if(!sf_getint("snapturn", &snapturn)) snapturn=1; if(!sf_getint("ginv", &ginv)) ginv=1; if(!sf_getint("wfinv", &wfinv)) wfinv=1; if(!sf_getint("spz", &spz)) spz=6; if(!sf_getint("gp", &gp)) gp=0; ng=(nx-1)/ginv+1; wfnt=(nt-1)/wfinv+1; wfdt=dt*wfinv; if(!sf_getint("ntau", &ntau)) ntau=1; if(!sf_getfloat("dtau", &dtau)) dtau=wfdt; if(!sf_getfloat("tau0", &tau0)) tau0=0; atau=sf_iaxa(Fsrc, 1); sf_setn(atau, ntau); sf_setd(atau, dtau); sf_seto(atau, tau0); if(!sf_histint(FGx, "n1", &nxz)) sf_error("No n1= in FGx!"); if(nxz != nxzb) sf_error("Dimension error!"); if(!sf_histint(FGx, "n2", &lenx)) sf_error("No n2= in FGx!"); if(!sf_histint(FGz, "n2", &lenz)) sf_error("No n2= in FGz!"); Gx=sf_floatalloc3(nzb, nxb, lenx); Gz=sf_floatalloc3(nzb, nxb, lenz); sxx=sf_intalloc(lenx); sxz=sf_intalloc(lenx); szx=sf_intalloc(lenz); szz=sf_intalloc(lenz); tmpsxx=sf_floatalloc(lenx); tmpsxz=sf_floatalloc(lenx); tmpszx=sf_floatalloc(lenz); tmpszz=sf_floatalloc(lenz); sf_floatread(Gx[0][0], nxzb*lenx, FGx); sf_floatread(Gz[0][0], nxzb*lenz, FGz); sf_floatread(tmpsxx, lenx, Fsxx); sf_floatread(tmpsxz, lenx, Fsxz); sf_floatread(tmpszx, lenz, Fszx); sf_floatread(tmpszz, lenz, Fszz); for (ix=0; ix<lenx; ix++){ sxx[ix]=(int)tmpsxx[ix]; sxz[ix]=(int)tmpsxz[ix]; } for (iz=0; iz<lenz; iz++){ szx[iz]=(int)tmpszx[iz]; szz[iz]=(int)tmpszz[iz]; } fvel=sf_floatalloc2(nzb, nxb); fden=sf_floatalloc2(nzb, nxb); fc11=sf_floatalloc2(nzb, nxb); bvel=sf_floatalloc2(nzb, nxb); bden=sf_floatalloc2(nzb, nxb); bc11=sf_floatalloc2(nzb, nxb); sf_floatread(fvel[0], nxzb, Ffvel); sf_floatread(fden[0], nxzb, Ffden); sf_floatread(bvel[0], nxzb, Fbvel); sf_floatread(bden[0], nxzb, Fbden); for (ix=0; ix<nxb; ix++){ for (iz=0; iz<nzb; iz++){ fc11[ix][iz]=fden[ix][iz]*fvel[ix][iz]*fvel[ix][iz]; bc11[ix][iz]=bden[ix][iz]*bvel[ix][iz]*bvel[ix][iz]; } } if(wantrecord){ /* check record data */ sf_histint(Frcd, "n1", &tmpint); if(tmpint != nt) sf_error("Not matched dimensions!"); sf_histint(Frcd, "n2", &tmpint); if(tmpint != ng) sf_error("Not matched dimensions!"); sf_histint(Frcd, "n3", &tmpint); if(tmpint != shtnmb) sf_error("Not matched dimensions!"); } if(rank==0){ record=sf_floatalloc3(nt, ng, shtnmb0); if(wantrecord){ sf_floatread(record[0][0], nt*ng*shtnmb, Frcd); for(is=shtnmb; is<shtnmb0; is++) for(ix=0; ix<ng; ix++) for(it=0; it<nt; it++) record[is][ix][it]=0.0; } } img1=sf_floatalloc3(nz, nx, ntau); mig1=sf_floatalloc3(nz, nx, ntau); img2=sf_floatalloc2(nz, nx); mig2=sf_floatalloc2(nz, nx); zero3(img1, nz, nx, ntau); zero2(img2, nz, nx); sf_setn(az, nz); sf_setn(ax, ng); if(!wantrecord){ sf_oaxa(Frcd, at, 1); sf_oaxa(Frcd, ax, 2); sf_putint(Frcd, "n3", shtnmb); sf_putint(Frcd, "d3", shtinv); sf_putint(Frcd, "o3", shtbgn); } sf_setn(ax, nx); if(wantwf){ sf_setn(at, wfnt); sf_setd(at, wfdt); sf_oaxa(Ftmpfwf, az, 1); sf_oaxa(Ftmpfwf, ax, 2); sf_oaxa(Ftmpfwf, at, 3); sf_oaxa(Ftmpbwf, az, 1); sf_oaxa(Ftmpbwf, ax, 2); sf_oaxa(Ftmpbwf, at, 3); } sf_oaxa(Fimg1, az, 1); sf_oaxa(Fimg1, ax, 2); sf_oaxa(Fimg1, atau, 3); sf_oaxa(Fimg2, az, 1); sf_oaxa(Fimg2, ax, 2); fwf=sf_floatalloc3(nz, nx, wfnt); localrec=sf_floatalloc2(nt, ng); if(verb){ sf_warning("=================================="); sf_warning("nx=%d nz=%d nt=%d", nx, nz, nt); sf_warning("wfnt=%d wfdt=%f wfinv=%d dt=%f", wfnt, wfdt, wfinv, dt); sf_warning("nxb=%d nzb=%d pmlsize=%d nfd=%d", nxb, nzb, pmlsize, nfd); sf_warning("ntau=%d dtau=%f tau0=%f", ntau, dtau, tau0); sf_warning("shtnmb=%d shtbgn=%d shtinv=%d", shtnmb, shtbgn, shtinv); sf_warning("lenx=%d lenz=%d spz=%d gp=%d", lenx, lenz, spz, gp); sf_warning("=================================="); } init(); for(iturn=0; iturn*numprocs<shtnmb; iturn++){ is=iturn*numprocs+rank; if(is<shtnmb){ sf_warning("ishot/nshot: %d/%d", is+1, shtnmb); spx=is*shtinv+shtbgn; sglfdfor2(fwf, localrec, verb); } if(wantrecord){ recvbuf=localrec[0]; if(rank==0) sendbuf=record[iturn*numprocs][0]; else sendbuf=NULL; MPI_Scatter(sendbuf, ng*nt, MPI_FLOAT, recvbuf, ng*nt, MPI_FLOAT, 0, Comm); }else{ sendbuf=localrec[0]; if(rank==0) recvbuf=record[iturn*numprocs][0]; else recvbuf=NULL; MPI_Gather(sendbuf, ng*nt, MPI_FLOAT, recvbuf, ng*nt, MPI_FLOAT, 0, Comm); } if(wantwf && rank==0 && iturn==snapturn-1) wantwf=true; else wantwf=false; if(wantwf) sf_floatwrite(fwf[0][0], wfnt*nx*nz, Ftmpfwf); if(!onlyrecord && is<shtnmb){ sglfdback2(mig1, mig2, fwf, localrec, verb, wantwf, Ftmpbwf); for(itau=0; itau<ntau; itau++){ for(ix=0; ix<nx; ix++){ for(iz=0; iz<nz; iz++){ img1[itau][ix][iz]+=mig1[itau][ix][iz]; } } } for(ix=0; ix<nx; ix++){ for(iz=0; iz<nz; iz++){ img2[ix][iz]+=mig2[ix][iz]; } } } MPI_Barrier(Comm); } //end of iturn if(!onlyrecord){ if(rank==0){ sendbuf=(float *)MPI_IN_PLACE; recvbuf=img1[0][0]; }else{ sendbuf=img1[0][0]; recvbuf=NULL; } MPI_Reduce(sendbuf, recvbuf, ntau*nx*nz, MPI_FLOAT, MPI_SUM, 0, Comm); if(rank==0){ sendbuf=MPI_IN_PLACE; recvbuf=img2[0]; }else{ sendbuf=img2[0]; recvbuf=NULL; } MPI_Reduce(sendbuf, recvbuf, nx*nz, MPI_FLOAT, MPI_SUM, 0, Comm); } if(rank==0){ if(!wantrecord){ sf_floatwrite(record[0][0], shtnmb*ng*nt, Frcd); } sf_floatwrite(img1[0][0], ntau*nx*nz, Fimg1); sf_floatwrite(img2[0], nx*nz, Fimg2); } tend=clock(); duration=(double)(tend-tstart)/CLOCKS_PER_SEC; sf_warning(">>The CPU time of sfmpilfdrtm2 is: %f seconds<<", duration); MPI_Finalize(); exit(0); }
int sglfdback2(float ***mig1, float **mig2, float ***fwf, float **localrec, bool verb, bool wantwf, sf_file Ftmpbwf) { float **txxn1, **txxn0, **vxn1, **vxn0, **vzn1, **vzn0; float **sill, **ccr, ***bwf; int wfit, htau; float tau; sill=sf_floatalloc2(nz, nx); ccr=sf_floatalloc2(nz, nx); bwf=sf_floatalloc3(nz, nx, wfnt); zero2(sill, nz, nx); zero2(ccr, nz, nx); zero3(mig1, nz, nx, ntau); txxn1=sf_floatalloc2(nzb, nxb); txxn0=sf_floatalloc2(nzb, nxb); vxn1=sf_floatalloc2(nzb, nxb); vxn0=sf_floatalloc2(nzb, nxb); vzn1=sf_floatalloc2(nzb, nxb); vzn0=sf_floatalloc2(nzb, nxb); zero2(txxn1, nzb, nxb); zero2(txxn0, nzb, nxb); zero2(vxn1, nzb, nxb); zero2(vxn0, nzb, nxb); zero2(vzn1, nzb, nxb); zero2(vzn0, nzb, nxb); zero2(txxn1x, nzb, nxb); zero2(txxn1z, nzb, nxb); zero2(txxn0x, nzb, nxb); zero2(txxn0z, nzb, nxb); wfit=wfnt-1; for(it=nt-1; it>=0; it--){ if(verb) sf_warning("Backward it=%d/%d;", it+1, nt); #ifdef _OPENMP #pragma omp parallel for private(ix,iz) #endif for(ix=nfd+pmlsize; ix<nfd+pmlsize+nx; ix++){ for(iz=nfd+pmlsize; iz<nfd+pmlsize+nz; iz++){ txxn0[ix][iz]=txxn1[ix][iz]+dt*bc11[ix][iz]*(ldx(vxn1, ix-1, iz) +ldz(vzn1, ix, iz-1)); } } pml_txxb(txxn0, vxn1, vzn1); #ifdef _OPENMP #pragma omp parallel for private(ix) #endif for(ix=0; ix<ng; ix++){ txxn0[ix*ginv+pmlsize+nfd][pmlsize+nfd+gp]+=localrec[ix][it]; } #ifdef _OPENMP #pragma omp parallel for private(ix,iz) #endif for(ix=nfd+pmlsize; ix<nfd+pmlsize+nx; ix++){ for(iz=nfd+pmlsize; iz<nfd+pmlsize+nz; iz++){ vxn0[ix][iz]=vxn1[ix][iz]+dt/bdenx[ix][iz]*ldx(txxn0, ix, iz); vzn0[ix][iz]=vzn1[ix][iz]+dt/bdenz[ix][iz]*ldz(txxn0, ix, iz); } } pml_vxzb(vxn1, vzn1, vxn0, vzn0, txxn0); transp=txxn1; txxn1=txxn0; txxn0=transp; transp=vxn1; vxn1=vxn0; vxn0=transp; transp=vzn1; vzn1=vzn0; vzn0=transp; if(it%wfinv==0){ for(ix=0; ix<nx; ix++) for(iz=0; iz<nz; iz++){ bwf[wfit][ix][iz]=txxn0[ix+pmlsize+nfd][iz+pmlsize+nfd]; ccr[ix][iz]+=fwf[wfit][ix][iz]*bwf[wfit][ix][iz]; sill[ix][iz]+=fwf[wfit][ix][iz]*fwf[wfit][ix][iz]; } wfit--; } } //end of it if(verb) sf_warning("."); for(itau=0; itau<ntau; itau++){ tau=itau*dtau+tau0; htau=tau/wfdt; for(it=abs(htau); it<wfnt-abs(htau); it++){ for(ix=0; ix<nx; ix++){ for(iz=0; iz<nz; iz++){ mig1[itau][ix][iz]+=fwf[it+htau][ix][iz]*bwf[it-htau][ix][iz]; } } }//end of it } // end of itau for(ix=0; ix<nx; ix++){ for(iz=0; iz<nz; iz++){ mig2[ix][iz]=ccr[ix][iz]/(sill[ix][iz]+SF_EPS); } } if(wantwf) sf_floatwrite(bwf[0][0], wfnt*nx*nz, Ftmpbwf); return 0; }
int sglfdfor2(float ***fwf, float **rcd, bool verb) { float **txxn1, **txxn0, **vxn1, **vxn0, **vzn1, **vzn0; int wfit; txxn1=sf_floatalloc2(nzb, nxb); txxn0=sf_floatalloc2(nzb, nxb); vxn1=sf_floatalloc2(nzb, nxb); vxn0=sf_floatalloc2(nzb, nxb); vzn1=sf_floatalloc2(nzb, nxb); vzn0=sf_floatalloc2(nzb, nxb); zero2(txxn1, nzb, nxb); zero2(txxn0, nzb, nxb); zero2(vxn1, nzb, nxb); zero2(vxn0, nzb, nxb); zero2(vzn1, nzb, nxb); zero2(vzn0, nzb, nxb); zero2(txxn1x, nzb, nxb); zero2(txxn1z, nzb, nxb); zero2(txxn0x, nzb, nxb); zero2(txxn0z, nzb, nxb); wfit=0; for(it=0; it<nt; it++){ // sf_warning("test txxn1[801][30]=%d",txxn1[801][30]) if(verb) sf_warning("Forward it=%d/%d;", it+1, nt); #ifdef _OPENMP #pragma omp parallel for private(ix,iz) #endif for(ix=nfd+pmlsize; ix<nfd+pmlsize+nx; ix++){ for(iz=nfd+pmlsize; iz<nfd+pmlsize+nz; iz++){ vxn1[ix][iz]=vxn0[ix][iz]-dt/fdenx[ix][iz]*ldx(txxn0, ix, iz); vzn1[ix][iz]=vzn0[ix][iz]-dt/fdenz[ix][iz]*ldz(txxn0, ix, iz); } } pml_vxz(vxn1, vzn1, vxn0, vzn0, txxn0); #ifdef _OPENMP #pragma omp parallel for private(ix,iz) #endif for(ix=nfd+pmlsize; ix<nfd+pmlsize+nx; ix++){ for(iz=nfd+pmlsize; iz<nfd+pmlsize+nz; iz++){ txxn1[ix][iz]=txxn0[ix][iz]-dt*fc11[ix][iz]*(ldx(vxn1, ix-1, iz) + ldz(vzn1, ix, iz-1)); } } pml_txx(txxn1, vxn1, vzn1); if((it*dt)<srctrunc){ explsource(txxn1); } if(it%wfinv==0){ #ifdef _OPENMP #pragma omp parallel for private(ix,iz) #endif for(ix=0; ix<nx; ix++){ for(iz=0; iz<nz; iz++){ fwf[wfit][ix][iz]=txxn0[ix+nfd+pmlsize][iz+nfd+pmlsize]; } } wfit++; } #ifdef _OPENMP #pragma omp parallel for private(ix) #endif for(ix=0; ix<ng; ix++){ rcd[ix][it]=txxn0[ix*ginv+pmlsize+nfd][pmlsize+nfd+gp]; } transp=txxn0; txxn0=txxn1; txxn1=transp; transp=vxn0; vxn0=vxn1; vxn1=transp; transp=vzn0; vzn0=vzn1; vzn1=transp; } // end of it if(verb) sf_warning("."); return 0;; }
double NOINLINE dval() { return 10.0; } double NOINLINE dval2() { return -10.0; } double NOINLINE dzero() { return 0.0; } double NOINLINE dzero2() { return -0.0; } const float e = ret_e(); const float negpi = ret_minuspi(); const float inf = FLOAT_INF; const float negInf = -FLOAT_INF; const float floatNan = FLOAT_NAN; const float floatMax = FLT_MAX; const float floatMin = -FLT_MAX; const float posValue = val(); const float negValue = val2(); const float posZero = zero(); const float negZero = zero2(); const double de = dret_e(); const double dnegpi = dret_minuspi(); const double dinf = DOUBLE_INF; const double dnegInf = -DOUBLE_INF; const double doubleNan = DOUBLE_NAN; const double doubleMax = DBL_MAX; const double doubleMin = -DBL_MAX; const double dposValue = dval(); const double dnegValue = dval2(); const double dposZero = dzero(); const double dnegZero = dzero2(); int main() {
void TestHeatEquationWithElementDependentSourceTerm() { // The PDE is set to give elements with index = 0 a source of zero // and a source of 1 otherwise. std::vector<Node<1>*> one_d_nodes; one_d_nodes.push_back(new Node<1>(0, false, 2.0)); one_d_nodes.push_back(new Node<1>(1, false, 2.5)); Element<1,1> one_d_element(0u, one_d_nodes); ChastePoint<1> zero1(0); std::vector<Node<2>*> two_d_nodes; two_d_nodes.push_back(new Node<2>(0, false, 0.0, 0.0)); two_d_nodes.push_back(new Node<2>(1, false, 1.0, 0.0)); two_d_nodes.push_back(new Node<2>(2, false, 0.0, 1.0)); Element<2,2> two_d_element(0u, two_d_nodes); ChastePoint<2> zero2(0,0); std::vector<Node<3>*> three_d_nodes; three_d_nodes.push_back(new Node<3>(0, false, 0.0, 0.0, 0.0)); three_d_nodes.push_back(new Node<3>(1, false, 1.0, 0.0, 0.0)); three_d_nodes.push_back(new Node<3>(2, false, 0.0, 1.0, 0.0)); three_d_nodes.push_back(new Node<3>(3, false, 0.0, 0.0, 1.0)); Element<3,3> three_d_element(0u, three_d_nodes); ChastePoint<3> zero3(0,0,0); double u = 2.0; HeatEquationWithElementDependentSourceTerm<1> pde1; HeatEquationWithElementDependentSourceTerm<2> pde2; HeatEquationWithElementDependentSourceTerm<3> pde3; TS_ASSERT_DELTA(pde1.ComputeSourceTerm(zero1, u, &one_d_element), 0.0, 1e-12); one_d_element.ResetIndex(1u); TS_ASSERT_DELTA(pde1.ComputeSourceTerm(zero1, u, &one_d_element), 1.0, 1e-12); TS_ASSERT_DELTA(pde2.ComputeSourceTerm(zero2, u, &two_d_element), 0.0, 1e-12); two_d_element.ResetIndex(1u); TS_ASSERT_DELTA(pde2.ComputeSourceTerm(zero2, u, &two_d_element), 1.0, 1e-12); TS_ASSERT_DELTA(pde3.ComputeSourceTerm(zero3, u, &three_d_element), 0.0, 1e-12); three_d_element.ResetIndex(1u); TS_ASSERT_DELTA(pde3.ComputeSourceTerm(zero3, u, &three_d_element), 1.0, 1e-12); TS_ASSERT_DELTA(pde1.ComputeDuDtCoefficientFunction(zero1), 1.0, 1e-12); TS_ASSERT_DELTA(pde2.ComputeDuDtCoefficientFunction(zero2), 1.0, 1e-12); TS_ASSERT_DELTA(pde3.ComputeDuDtCoefficientFunction(zero3), 1.0, 1e-12); // Diffusion matrices should be equal to identity c_matrix<double, 1, 1> diff1 = pde1.ComputeDiffusionTerm(zero1); c_matrix<double, 2, 2> diff2 = pde2.ComputeDiffusionTerm(zero2); c_matrix<double, 3, 3> diff3 = pde3.ComputeDiffusionTerm(zero3); TS_ASSERT_DELTA(diff1(0,0), 1, 1e-12); TS_ASSERT_DELTA(diff2(0,0), 1, 1e-12); TS_ASSERT_DELTA(diff2(1,1), 1, 1e-12); TS_ASSERT_DELTA(diff2(0,1), 0, 1e-12); TS_ASSERT_DELTA(diff3(0,0), 1, 1e-12); TS_ASSERT_DELTA(diff3(1,1), 1, 1e-12); TS_ASSERT_DELTA(diff3(2,2), 1, 1e-12); TS_ASSERT_DELTA(diff3(0,1), 0, 1e-12); TS_ASSERT_DELTA(diff3(0,2), 0, 1e-12); TS_ASSERT_DELTA(diff3(1,2), 0, 1e-12); delete one_d_nodes[0]; delete one_d_nodes[1]; delete two_d_nodes[0]; delete two_d_nodes[1]; delete two_d_nodes[2]; delete three_d_nodes[0]; delete three_d_nodes[1]; delete three_d_nodes[2]; delete three_d_nodes[3]; }
int main(int argc, char *argv[]) { int ix, iz, it, itau; int nx, nz, ntau, nt, pad; float dt, dtau, dx, dz, dt2, idz2, idx2; float tau0, tau; float ***dd, ***mm, **vv, **v0; float **u0, **u1, **u2, **ud, **tmp; sf_axis ax, az, atau; sf_file tgather, cgather, vel; sf_init(argc, argv); tgather=sf_input("in"); cgather=sf_output("out"); vel=sf_input("velocity"); az=sf_iaxa(tgather, 1); ax=sf_iaxa(tgather, 2); atau=sf_iaxa(tgather, 3); nz=sf_n(az); dz=sf_d(az); nx=sf_n(ax); dx=sf_d(ax); ntau=sf_n(atau); dtau=sf_d(atau); tau0=sf_o(atau); if(!sf_getfloat("dt", &dt)) dt=0.001; if(!sf_getint("pad", &pad)) pad=30; padnx=nx+2*pad; padnz=nz+2*pad; idz2=1.0/(dz*dz); idx2=1.0/(dx*dx); c11=4.0*idz2/3.0; c12=-idz2/12.0; c21=4.0*idx2/3.0; c22=-idx2/12.0; c0=-2.0*(c11+c12+c21+c22); dd=sf_floatalloc3(nz, nx, ntau); mm=sf_floatalloc3(nz, nx, ntau); vv=sf_floatalloc2(nz, nx); v0=sf_floatalloc2(padnz, padnx); padvv=sf_floatalloc2(padnz, padnx); u0=sf_floatalloc2(padnz, padnx); u1=sf_floatalloc2(padnz, padnx); u2=sf_floatalloc2(padnz, padnx); ud=sf_floatalloc2(padnz, padnx); sf_floatread(dd[0][0], ntau*nx*nz, tgather); sf_floatread(vv[0], nx*nz, vel); dt2=dt*dt; for(ix=0; ix<nx; ix++) for(iz=0; iz<nz; iz++) padvv[ix+pad][iz+pad]=vv[ix][iz]*vv[ix][iz]*dt2; for(iz=0; iz<pad; iz++) for(ix=pad; ix<nx+pad; ix++){ padvv[ix][iz]=padvv[ix][pad]; padvv[ix][pad+nz+iz]=padvv[ix][pad+nz-1]; } for(ix=0; ix<pad; ix++) for(iz=0; iz<padnz; iz++){ padvv[ix][iz]=padvv[pad][iz]; padvv[ix+pad+nx][iz]=padvv[pad+nx-1][iz]; } for(itau=0; itau<ntau; itau++){ sf_warning("itau=%d/%d", itau+1, ntau); zero2(u0, padnz, padnx); zero2(u1, padnz, padnx); zero2(u2, padnz, padnx); zero2(ud, padnz, padnx); zero2(v0, padnz, padnx); tau=tau0+itau*dtau; // tau=0 if(tau==0.){ for(ix=0; ix<nx; ix++) for(iz=0; iz<nz; iz++) mm[itau][ix][iz]=dd[itau][ix][iz]; continue; } // calculate v0 (derivative with respect to tau) if(itau==0){ for(ix=0; ix<nx; ix++){ for(iz=0; iz<nz; iz++){ v0[ix+pad][iz+pad]=(dd[1][ix][iz]-dd[0][ix][iz])/dtau; } } } else if (itau==ntau-1){ for(ix=0; ix<nx; ix++){ for(iz=0; iz<nz; iz++){ v0[ix+pad][iz+pad]=(dd[ntau-1][ix][iz]-dd[ntau-2][ix][iz])/dtau; } } } else { #ifdef _OPENMP #pragma omp parallel for private(ix, iz) #endif for(ix=0; ix<nx; ix++){ for(iz=0; iz<nz; iz++){ v0[ix+pad][iz+pad]=(dd[itau+1][ix][iz]-dd[itau-1][ix][iz])/dtau/2.0; } } } // calculate u1 #ifdef _OPENMP #pragma omp parallel for private(ix, iz) #endif for(ix=0; ix<nx; ix++) for(iz=0; iz<nz; iz++) u1[ix+pad][iz+pad]=dd[itau][ix][iz]; // tau>0 if(tau>0.){ laplacian(u1, ud); #ifdef _OPENMP #pragma omp parallel for private(ix, iz) #endif for(ix=0; ix<padnx; ix++){ for(iz=0; iz<padnz; iz++){ u0[ix][iz]=u1[ix][iz]+ud[ix][iz]/2.0-v0[ix][iz]*dt; } } nt=tau/dt+0.5; for(it=1; it<nt; it++){ sf_warning("it=%d/%d;", it+1, nt); tmp=u2; u2=u1; u1=u0; u0=tmp; laplacian(u1, ud); #ifdef _OPENMP #pragma omp parallel for private(ix, iz) #endif for(ix=0; ix<padnx; ix++){ for(iz=0; iz<padnz; iz++){ u0[ix][iz]=2*u1[ix][iz]-u2[ix][iz]+ud[ix][iz]; } } } //end of it #ifdef _OPENMP #pragma omp parallel for private(ix, iz) #endif for(ix=0; ix<nx; ix++){ for(iz=0; iz<nz; iz++){ mm[itau][ix][iz]=u0[ix+pad][iz+pad]; } } } // tau<0 if(tau<0.){ laplacian(u1, ud); #ifdef _OPENMP #pragma omp parallel for private(ix, iz) #endif for(ix=0; ix<padnx; ix++){ for(iz=0; iz<padnz; iz++){ u2[ix][iz]=u1[ix][iz]+dt*v0[ix][iz]+ud[ix][iz]/2.0; } } nt=-tau/dt+0.5; for(it=1; it<nt; it++){ sf_warning("it=%d/%d;", it+1, nt); tmp=u0; u0=u1; u1=u2; u2=tmp; laplacian(u1, ud); #ifdef _OPENMP #pragma omp parallel for private(ix, iz) #endif for(ix=0; ix<padnx; ix++){ for(iz=0; iz<padnz; iz++){ u2[ix][iz]=2*u1[ix][iz]-u0[ix][iz]+ud[ix][iz]; } } }//end of it #ifdef _OPENMP #pragma omp parallel for private(ix, iz) #endif for(ix=0; ix<nx; ix++){ for(iz=0; iz<nz; iz++){ mm[itau][ix][iz]=u2[ix+pad][iz+pad]; } } } } //end of itau sf_floatwrite(mm[0][0], ntau*nx*nz, cgather); exit(0); }
int main(int argc, char *argv[]) { bool cmplx, abc, taper; int ix, iz, it, itau, n2, m2; int nx, nz, ntau, nt, pad1, nb, nx2, nz2, nzx2, fnx, fnz, fnzx, nk; float dt, dtau, par, dz, dx, thresh; float tau0, tau; float **lt, **rt; float *curr, *prev; float ***dd, ***mm, **v0; sf_axis ax, az, atau; sf_file tgather, cgather, left, right; int cpuid, numprocs, nth; float *sendbuf, *recvbuf; MPI_Comm comm=MPI_COMM_WORLD; sf_init(argc, argv); MPI_Init(&argc, &argv); MPI_Comm_rank(comm, &cpuid); MPI_Comm_size(comm, &numprocs); #ifdef _OPENMP #pragma omp parallel { nth=omp_get_num_threads(); } sf_warning(">>> Using %d threads <<<", nth); #endif tgather=sf_input("--input"); cgather=sf_output("--output"); left=sf_input("left"); right=sf_input("right"); az=sf_iaxa(tgather, 1); ax=sf_iaxa(tgather, 2); atau=sf_iaxa(tgather, 3); nz=sf_n(az); dz = sf_d(az); nx=sf_n(ax); dx = sf_d(ax); ntau=sf_n(atau); dtau=sf_d(atau); tau0=sf_o(atau); if(cpuid==0){ sf_oaxa(cgather, az, 1); sf_oaxa(cgather, ax, 2); sf_oaxa(cgather, atau, 3); } if(!sf_getfloat("dt", &dt)) dt=0.001; /* time interval */ if(!sf_getint("nb", &nb)) nb=60; /* boundary width */ if(!sf_getfloat("par", &par)) par=0.01; /* absorbing boundary coefficient */ if(!sf_getbool("cmplx", &cmplx)) cmplx=false; /* use complex FFT */ if(!sf_getbool("abc", &abc)) abc=true; /* absorbing boundary condition */ if(!sf_getint("pad1", &pad1)) pad1=1; /* padding factor on the first axis */ if(!sf_getbool("taper", &taper)) taper=true; /* tapering */ if(!sf_getfloat("thresh", &thresh)) thresh=0.92; /* thresholding */ nx2=nx+2*nb; nz2=nz+2*nb; nk=fft2_init(cmplx,pad1,nz2,nx2,&fnz,&fnx); nzx2=nz2*nx2; fnzx=fnz*fnx; if (!sf_histint(left,"n1",&n2) || n2 != nzx2) sf_error("Need n1=%d in left",nzx2); if (!sf_histint(left,"n2",&m2)) sf_error("Need n2= in left"); if (!sf_histint(right,"n1",&n2) || n2 != m2) sf_error("Need n1=%d in right",m2); if (!sf_histint(right,"n2",&n2) || n2 != nk) sf_error("Need n2=%d in right",nk); lt = sf_floatalloc2(nzx2,m2); rt = sf_floatalloc2(m2,nk); sf_floatread(lt[0],nzx2*m2,left); sf_floatread(rt[0],m2*nk,right); dd=sf_floatalloc3(nz, nx, ntau); mm=sf_floatalloc3(nz, nx, ntau); v0=sf_floatalloc2(nz2, nx2); curr=sf_floatalloc(fnzx); prev=sf_floatalloc(fnzx); /* broad cast input time-shift gather */ if(cpuid==0) sf_floatread(dd[0][0], ntau*nx*nz, tgather); for(itau=0; itau<ntau; itau++){ MPI_Bcast(dd[itau][0], nx*nz, MPI_FLOAT, 0, comm); } /* initialize corrected time-shift gather */ #ifdef _OPENMP #pragma omp parallel for private(itau, ix, iz) #endif for(itau=0; itau<ntau; itau++) for(ix=0; ix<nx; ix++) for(iz=0; iz<nz; iz++) mm[itau][ix][iz]=0.; /* initialize functions */ lrinit(nx2, nz2, m2, cmplx, pad1, nb, par, abc, lt, rt, taper, thresh, dz, dx); /* tau loop */ for(itau=cpuid; itau<ntau; itau+=numprocs){ sf_warning("itau=%d/%d", itau+1, ntau); tau=tau0+itau*dtau; // tau=0 if(itau==(ntau-1)/2){ for(ix=0; ix<nx; ix++) for(iz=0; iz<nz; iz++) mm[itau][ix][iz]=dd[itau][ix][iz]; continue; } // calculate v0 (derivative with respect to tau) zero2(v0, nz2, nx2); if(itau==0){ for(ix=0; ix<nx; ix++){ for(iz=0; iz<nz; iz++){ v0[ix+nb][iz+nb]=(dd[1][ix][iz]-dd[0][ix][iz])/dtau; } } } else if (itau==ntau-1){ for(ix=0; ix<nx; ix++){ for(iz=0; iz<nz; iz++){ v0[ix+nb][iz+nb]=(dd[ntau-1][ix][iz]-dd[ntau-2][ix][iz])/dtau; } } } else { #ifdef _OPENMP #pragma omp parallel for private(ix, iz) #endif for(ix=0; ix<nx; ix++){ for(iz=0; iz<nz; iz++){ v0[ix+nb][iz+nb]=(dd[itau+1][ix][iz]-dd[itau-1][ix][iz])/dtau/2.0; } } } // calculate u1 zero1(curr, fnzx); zero1(prev, fnzx); #ifdef _OPENMP #pragma omp parallel for private(ix, iz) #endif for(ix=0; ix<nx; ix++) for(iz=0; iz<nz; iz++) curr[(ix+nb)*fnz+iz+nb]=dd[itau][ix][iz]; // tau>0 if(itau>(ntau-1)/2){ // calculate u(t-lt) for (ix=0; ix<nx2; ix++) for (iz=0; iz<nz2; iz++) prev[ix*fnz+iz]=2.*v0[ix][iz]*dt; lrupdate(curr,prev); for (ix=0; ix<fnzx; ix++) curr[ix]=curr[ix]/2.; // it loop nt=tau/dt+0.5; for(it=1; it<nt; it++){ // sf_warning("it=%d/%d;", it+1, nt); lrupdate(curr,prev); } //end of it #ifdef _OPENMP #pragma omp parallel for private(ix, iz) #endif for(ix=0; ix<nx; ix++){ for(iz=0; iz<nz; iz++){ mm[itau][ix][iz]=curr[(ix+nb)*fnz+iz+nb]; } } } // end of positive tau // tau<0 if(itau<(ntau-1)/2){ //calculate u(t+lt) for (ix=0; ix<nx2; ix++) for(iz=0; iz<nz2; iz++) prev[ix*fnz+iz]=-2.*v0[ix][iz]*dt; lrupdate(curr, prev); for (ix=0; ix<fnzx; ix++) curr[ix]=curr[ix]/2.; // it loop nt=-tau/dt+0.5; for(it=1; it<nt; it++){ //sf_warning("it=%d/%d;", it+1, nt); lrupdate(curr, prev); }//end of it #ifdef _OPENMP #pragma omp parallel for private(ix, iz) #endif for(ix=0; ix<nx; ix++){ for(iz=0; iz<nz; iz++){ mm[itau][ix][iz]=curr[(ix+nb)*fnz+iz+nb]; } } }// end of negative tau } //end of itau MPI_Barrier(comm); /* corrected time-shift gather reduction */ for(itau=0; itau<ntau; itau++){ if(cpuid==0){ sendbuf=MPI_IN_PLACE; recvbuf=mm[itau][0]; }else{ sendbuf=mm[itau][0]; recvbuf=NULL; } MPI_Reduce(sendbuf, recvbuf, nz*nx, MPI_FLOAT, MPI_SUM, 0, comm); } if(cpuid==0) sf_floatwrite(mm[0][0], ntau*nz*nx, cgather); lrclose(); MPI_Finalize(); }
vector<GaussianDistribution> EMGaussianClustering(vector<Feature>& points, int K, int maxIter, float tolerance) { if (K >= points.size()) { vector<GaussianDistribution> dist(points.size()); for (int i = 0; i < points.size(); ++i) { dist[i].mu = points[i]; vector<Feature> f(1, points[i]); dist[i].sgm = Covariance::computeCovariance(f); } return dist; } int N = points[0].length(); Feature zero(N); //initialize the Gaussian distributions from K-means vector<Feature> centers = KmeansClustering(points, K, tolerance); //use K-means for initialization K = centers.size(); Feature zero2(K); vector<int> labels(points.size()); for (int i = 0; i < points.size(); ++i) { labels[i] = selectKmeansCluster(points[i], centers); } vector<GaussianDistribution> dist(centers.size()); for (int i = 0; i < centers.size(); ++i) { vector<Feature> x; for (int j = 0; j < labels.size(); ++j) { if (labels[j] == i) { x.push_back(points[j]); } } dist[i] = GaussianDistribution(x); } int iter = 0; while (true) { iter++; if (iter >= maxIter) { //printf("EMGaussian: Maximum iteration reached before convergence.\n"); break; } //E-step: compute the weights vector<Feature> w(points.size(), zero2); for (int i = 0; i < points.size(); ++i) { float sum = 0; for (int j = 0; j < K; ++j) { float pval = dist[j].eval(points[i]); w[i].vals[j] = pval; sum += pval; } for (int j = 0; j < K; ++j) { w[i].vals[j] /= sum; } } //M-step: //Compute the means vector<GaussianDistribution> dist2(K); vector<float> vsum(K); for (int i = 0; i < K; ++i) { Feature m = zero; float sum = 0; for (int j = 0; j < points.size(); ++j) { for (int k = 0; k < m.length(); ++k) { m.vals[k] += w[j].vals[i] * points[j].vals[k]; } sum += w[j].vals[i]; } for (int k = 0; k < m.length(); ++k) { m.vals[k] /= sum; } dist2[i].mu = m; vsum[i] = sum; } //Compute the covariances for (int i = 0; i < K; ++i) { techsoft::matrix<float> cov(N, N, 0.0f); techsoft::matrix<float> m = Feature::toColumnVector(dist2[i].mu); float sum = 0; for (int j = 0; j < points.size(); ++j) { techsoft::matrix<float> df(N, 1); for (int k = 0; k < N; ++k) { df(k,0) = points[j].vals[k] - m(k, 0); } cov = cov + w[j].vals[i] * (df * (~df)); } cov /= vsum[i]; dist2[i].sgm = Covariance(K); dist2[i].sgm.mat = cov; dist2[i].sgm.imat = !cov; } dist = dist2; } return dist; }