int main(int argc, char **argv) { //double tend = 1E2, speed = 1.; double tend = 1E-1, speed = 1.; char *init_type = "mixed2"; double *roots, *weights, *ll, *dl, xmin, xmax, lxmin, lxmax, deltax, jac, xr, xl, cfl, dt, rtime, min_dx; int ii, jj, kk, ee, idx, eres; long nstep; double *dx, *mesh; double *smat, *xx, *qq, *qtemp, *k1, *k2, *k3, *k4, *minv_vec, *mmat, *dv, *mf, *ib, *df, *fstar; MPI_Init(&argc, &argv); MPI_Comm_size(MPI_COMM_WORLD, &nprocs); MPI_Comm_rank(MPI_COMM_WORLD, &rank); para_range(0, tne, nprocs, rank, &ista, &iend); ne = iend - ista; // initialize // fortran index structure array[ii,jj,ee] where size(array) = (np, np, ne) // c 1d index structure array = [ee*np*np + jj*np + ii] roots = (double *)malloc(np * sizeof(double)); weights = (double *)malloc(np * sizeof(double)); ll = (double *)malloc(np * sizeof(double)); dl = (double *)malloc(np * sizeof(double)); dx = (double *)malloc(ne * sizeof(double)); mesh = (double *)malloc((ne + 1) * sizeof(double)); smat = (double *)malloc(np * np * sizeof(double)); // [jj np, ii np] xx = (double *)malloc(ne * np * sizeof(double)); // [ee ne, ii np] qq = (double *)malloc(ne * np * sizeof(double)); // [ee ne, ii np] qtemp = (double *)malloc(ne * np * sizeof(double)); // [ee ne, ii np] k1 = (double *)malloc(ne * np * sizeof(double)); // [ee ne, ii np] k2 = (double *)malloc(ne * np * sizeof(double)); // [ee ne, ii np] k3 = (double *)malloc(ne * np * sizeof(double)); // [ee ne, ii np] k4 = (double *)malloc(ne * np * sizeof(double)); // [ee ne, ii np] minv_vec = (double *)malloc(ne * np * sizeof(double)); // [ee ne, ii np] mmat = (double *)malloc(ne * np * np * sizeof(double)); // [ee ne, jj np, ii np] dv = (double *)malloc(ne * np * np * sizeof(double)); // [ee ne, jj np, ii np] mf = (double *)malloc(2 * np * sizeof(double)); // [jj 2, ii np] ib = (double *)malloc(2 * np * sizeof(double)); // [jj 2, ii np] fstar = (double *)malloc(2 * ne * sizeof(double)); // [jj 2, ii ne] df = (double *)malloc(ne * 2 * np * sizeof(double)); // [ee ne, jj 2, ii np] for (ii = 0; ii < np; ++ii) { roots[ii] = 0; weights[ii] = 0; ll[ii] = 0; dl[ii] = 0; } for (ii = 0; ii < ne; ++ii) { dx[ii] = 0; mesh[ii] = 0; } mesh[ne] = 0; for (ii = 0; ii < np * np; ++ii) { smat[ii] = 0; } for (ii = 0; ii < ne * np; ++ii) { xx[ii] = 0; qq[ii] = 0; k1[ii] = 0; k2[ii] = 0; k3[ii] = 0; k4[ii] = 0; minv_vec[ii] = 0; } for (ii = 0; ii < ne * np * np; ++ii) { mmat[ii] = 0; dv[ii] = 0; } for (ii = 0; ii < np * 2; ++ii) { mf[ii] = 0; ib[ii] = 0; } for (ii = 0; ii < ne * 2; ++ii) { fstar[ii] = 0; } for (ii = 0; ii < ne * 2 * np; ++ii) { df[ii] = 0; } // mesh setup xmin = 0.; xmax = 10.; deltax = (xmax-xmin)/(double)tne; /** * lxim, lxmax를 이용하여 각 구간의 mesh[ee]를 구한다 * ne의 크기가 tne / process의 개수이기 때문에, * 각 구간에 맞는 mesh[ee]를 구해야 한다. * 그리고 mesh[ee]를 이용하여 각 변수들을 초기화 한다. */ lxmin = xmin + (ista)*deltax; lxmax = xmin + (iend)*deltax; /** * mesh[ne]은 마지막 원소가 아니라는점에 유의한다. */ mesh[ne] = lxmax; for(ee=0;ee<ne;++ee){ mesh[ee] = lxmin+ee*deltax; } // gauss lobatto quadrature point, weight setup gausslobatto_quadrature(np, roots, weights); // coordinates and element size min_dx = xmax - xmin; // initial guess for (ee = 0; ee < ne; ee++) { xl = mesh[ee]; xr = mesh[ee + 1]; dx[ee] = xr - xl; // size of each element if (dx[ee] < min_dx) { min_dx = dx[ee]; // finding minimum dx } for (ii = 0; ii < np; ii++) { idx = ee * np + ii; xx[idx] = xl + 0.5 * (1 + roots[ii]) * dx[ee]; } } // mass matrix for (ii = 0; ii < ne * np * np; ii++) { mmat[ii] = 0; } for (ee = 0; ee < ne; ee++) { jac = fabs(dx[ee]) / 2; for (kk = 0; kk < np; kk++) { lagrange(roots[kk], ll, roots); for (jj = 0; jj < np; jj++) { for (ii = 0; ii < np; ii++) { idx = ee * np * np + jj * np + ii; // mass matrix mmat[ne][np][np] in 1d index representation mmat[idx] += jac * weights[kk] * ll[ii] * ll[jj]; } } } } // stiffness matrix for (ii = 0; ii < np * np; ii++) { smat[ii] = 0; } for (kk = 0; kk < np; kk++) { lagrange(roots[kk], ll, roots); lagrange_deriv(roots[kk], dl, roots); for (jj = 0; jj < np; jj++) { for (ii = 0; ii < np; ii++) { idx = jj * np + ii; // stiffness matrix smat[np][np] in 1d index representation smat[idx] += weights[kk] * ll[jj] * dl[ii]; } } } // face integration for (ii = 0; ii < np * 2; ii++) { mf[ii] = 0; } lagrange(-1, mf, roots); // mf[ii] for(ii=0, ii<np,ii++) represents element left face integration lagrange(1, mf + np, roots); // mf[ii] for ii=np, ii<2*np, ii++) reresents element right face integration // boundary interpolation for (ii = 0; ii < np * 2; ii++) { ib[ii] = 0; } lagrange(-1, ib, roots); // element left edge interpolation lagrange(1, ib + np, roots); // element right edge interpolation // divergence operators for (ii = 0; ii < ne * np * np; ii++) { dv[ii] = 0; } for (ii = 0; ii < ne * np * 2; ii++) { dv[ii] = 0; } for (ee = 0; ee < ne; ee++) { for (jj = 0; jj < np; jj++) { // it turn out that mmat is diagonal. i.e., ii != jj, mmat[ee][jj][ii] = 0 // the inverse of mmat is just the inverse of the diagonal components // here, we are extracting the inverse diagonal components only minv_vec[ee * np + jj] = 1. / mmat[ee * np * np + jj * np + jj]; } for (jj = 0; jj < np; jj++) { for (ii = 0; ii < np; ii++) { dv[ee * np * np + jj * np + ii] = minv_vec[ee * np + ii] * smat[jj * np + ii]; } } for (jj = 0; jj < 2; jj++) { for (ii = 0; ii < np; ii++) { df[ee * np * 2 + jj * np + ii] = minv_vec[ee * np + ii] * mf[jj * np + ii]; } } } // initialize qq field initialize(qq, xx, xmax, xmin, init_type); cfl = 1. / (np * np); dt = cfl * min_dx / fabs(speed); rtime = 0.; nstep = 0; printf("Start Time Integration\n"); // Runge-Kutta 4th order Time integration loop t_sta = clock(); while (rtime < tend) { dt = fmin(dt, tend - rtime); rhs(qq, k1, dv, df, ib, speed); for (ii = 0; ii < ne * np; ii++) qtemp[ii] = qq[ii] + 0.5 * dt * k1[ii]; rhs(qtemp, k2, dv, df, ib, speed); for (ii = 0; ii < ne * np; ii++) qtemp[ii] = qq[ii] + 0.5 * dt * k2[ii]; rhs(qtemp, k3, dv, df, ib, speed); for (ii = 0; ii < ne * np; ii++) qtemp[ii] = qq[ii] + dt * k3[ii]; rhs(qtemp, k4, dv, df, ib, speed); for (ii = 0; ii < ne * np; ii++) qq[ii] += 1. / 6. * dt * (k1[ii] + 2 * k2[ii] + 2 * k3[ii] + k4[ii]); rtime += dt; nstep += 1; if (nstep % 10000 == 0 && rank == 0) printf("nstep = %10ld, %5.1f%% complete\n", nstep, rtime / tend * 100); } // timeloop ends here; if (rank != 0) { int nne = iend - ista; MPI_Isend(&nne, 1, MPI_INT, 0, 11, MPI_COMM_WORLD, &ser1); MPI_Isend(xx, ne * np, MPI_DOUBLE, 0, 22, MPI_COMM_WORLD, &ser2); MPI_Isend(qq, ne * np, MPI_DOUBLE, 0, 33, MPI_COMM_WORLD, &ser3); MPI_Wait(&ser1, &st); MPI_Wait(&ser2, &st); MPI_Wait(&ser3, &st); } double *bufx; double *bufq; int *istart; int *idisp; if (rank == 0) { printf("Integration complete\n"); if (tne > 200) { eres = 2; } else if (tne > 60) { eres = 3; } else if (tne > 30) { eres = 6; } else { eres = 10; } // final report printf("-----------------------------------------------\n"); printf("code type : c serial\n"); printf("Final time : %13.5e\n", rtime); printf("CFL : %13.5e\n", cfl); printf("DOF : %13d\n", tne * np); printf("No. of Elem : %13d\n", tne); printf("Order : %13d\n", np); printf("eres : %13d\n", eres); printf("time steps : %13ld\n", nstep); printf("-----------------------------------------------\n"); bufx = (double *)malloc(sizeof(double) * tne * np); bufq = (double *)malloc(sizeof(double) * tne * np); for (int i = 0; i < ne * np; i++) { bufx[i] = xx[i]; bufq[i] = qq[i]; } } if (rank == 0) { int index[nprocs]; index[0] = ne * np; int idx = index[0]; for (int i = 1; i < nprocs; i++) { MPI_Irecv(index + i, 1, MPI_INT, i, 11, MPI_COMM_WORLD, &rer1); MPI_Wait(&rer1, &st); index[i] *= np; MPI_Irecv(bufx + idx, index[i], MPI_DOUBLE, i, 22, MPI_COMM_WORLD, &rer2); MPI_Irecv(bufq + idx, index[i], MPI_DOUBLE, i, 33, MPI_COMM_WORLD, &rer3); MPI_Wait(&rer2, &st); MPI_Wait(&rer3, &st); idx += index[i]; } for(int i = 0; i < tne*np; i++){ printf("%f ", bufx[i]); } printf("\n"); for(int i = 0; i < tne*np; i++){ printf("%f ", bufq[i]); } printf("\n"); save_field(bufx, bufq, tne, roots, eres); t_end = clock(); printf("Motion time = %f msec\n", (double)(t_end - t_sta) / 1000.0); } free(roots); free(weights); free(ll); free(dl); free(dx); free(mesh); free(smat); free(xx); free(qq); free(qtemp); free(k1); free(k2); free(k3); free(k4); free(minv_vec); free(mmat); free(dv); free(mf); free(ib); free(fstar); free(df); MPI_Finalize(); return 0; }
int main (int argc, char **argv) { int i,n1,n2,j,jsta,jend; int iter,niter; MPI_Status istatus; int ierr, myid,nid; int iprev, inext, ista, iend; MPI_Request isd1,isd2,irv1,irv2; int itag, iroot; double xi,xf,dx; double tmr; double *ar, *br; double ptmr, tic,toc; /* do not change ------ */ n1 = 0; n2 = 100000000; niter = 3; /* do not change ------ */ ar = (double*) malloc(n2*sizeof(double)); br = (double*) malloc(n2*sizeof(double)); xi = 0.L; xf = 1.; dx = (xf-xi)/(double)(n2-n1-1); for(i=n1;i<n2;i++){ br[i] = xi+(double)(i-n1)*dx; } MPI_Init(&argc, &argv); tic = MPI_Wtime(); MPI_Comm_size(MPI_COMM_WORLD, &nid); MPI_Comm_rank(MPI_COMM_WORLD, &myid); para_range(n1,n2,nid,myid,&ista,&iend); // para range를 통해 작업 범위 나눔. printf("rank:%10d ista=%15d iend=%15d\n", myid, ista, iend); jsta = ista; jend = iend; if(myid==0) jsta = n1+1; if(myid == nid-1) jend = n2-1; // send/recv할때 보낼 rank에 사용 inext = myid + 1; iprev = myid - 1; if(myid == nid-1) inext = MPI_PROC_NULL; if(myid == 0) iprev = MPI_PROC_NULL; for(i=ista;i<iend;i++){ br[i] = xi+ (double)(i-n1)*dx; } for(iter=0;iter<niter;iter++){ itag = 101; /** * 각 부분에서 idx-1과 idx+1부분이 필요하기 때문에 * Isend/Irecv로 비동기적으로 보낸다. * Wait를 통해 통신이 동작하는지 확인한다. */ MPI_Isend(br+iend-1, 1, MPI_DOUBLE, inext, itag, MPI_COMM_WORLD, &isd1); // inext에 b[j-1]전달 MPI_Isend(br+ista, 1, MPI_DOUBLE, iprev, itag, MPI_COMM_WORLD, &isd2); // iprev b[j+1] 전달 MPI_Irecv(br+ista-1, 1, MPI_DOUBLE, iprev, itag, MPI_COMM_WORLD, &irv1); // b[j-1] 받음 MPI_Irecv(br+iend, 1, MPI_DOUBLE, inext, itag, MPI_COMM_WORLD, &irv2); // b[j+1] 받음 MPI_Wait(&isd1,&istatus); MPI_Wait(&isd2,&istatus); MPI_Wait(&irv1,&istatus); MPI_Wait(&irv2,&istatus); for(j=jsta;j<jend;j++) { /* not change -----{ */ ar[j] = (br[j-1]+br[j+1])/4.L + br[j]/2.L + 1.L/genvv(br[j]); /* not change -----} */ } for(i=ista;i<iend;i++) { /* not change -----{ */ br[i] = ar[i]; /* not change -----} */ } } ptmr = 0.L; for(j=jsta;j<jend;j++){ ptmr += ar[j]; } iroot = 0; MPI_Reduce(&ptmr, &tmr, 1, MPI_DOUBLE, MPI_SUM, iroot, MPI_COMM_WORLD); // MPI_Reduce로 ptmr 합침 if(myid==0) printf("tmr = %16.6f\n",tmr); toc = MPI_Wtime(); if(myid==0) printf("%g sec\n",toc-tic); free(ar); free(br); MPI_Finalize(); }