Ejemplo n.º 1
0
int main(int argc, char **argv)
{

	//double tend = 1E2, speed = 1.;
	double tend = 1E-1, speed = 1.;
	char *init_type = "mixed2";
	double *roots, *weights, *ll, *dl, xmin, xmax, lxmin, lxmax,
		deltax, jac, xr, xl, cfl, dt, rtime, min_dx;
	int ii, jj, kk, ee, idx, eres;
	long nstep;
	double *dx, *mesh;
	double *smat, *xx, *qq, *qtemp, *k1, *k2, *k3, *k4, *minv_vec, *mmat, *dv,
		*mf, *ib, *df, *fstar;

	MPI_Init(&argc, &argv);
	MPI_Comm_size(MPI_COMM_WORLD, &nprocs);
	MPI_Comm_rank(MPI_COMM_WORLD, &rank);
	para_range(0, tne, nprocs, rank, &ista, &iend);
	ne = iend - ista;

	// initialize
	// fortran index structure array[ii,jj,ee] where size(array) = (np, np, ne)
	// c 1d index structure array = [ee*np*np + jj*np + ii]
	roots = (double *)malloc(np * sizeof(double));
	weights = (double *)malloc(np * sizeof(double));
	ll = (double *)malloc(np * sizeof(double));
	dl = (double *)malloc(np * sizeof(double));
	dx = (double *)malloc(ne * sizeof(double));
	mesh = (double *)malloc((ne + 1) * sizeof(double));

	smat = (double *)malloc(np * np * sizeof(double));		// [jj np, ii np]
	xx = (double *)malloc(ne * np * sizeof(double));		// [ee ne, ii np]
	qq = (double *)malloc(ne * np * sizeof(double));		// [ee ne, ii np]
	qtemp = (double *)malloc(ne * np * sizeof(double));		// [ee ne, ii np]
	k1 = (double *)malloc(ne * np * sizeof(double));		// [ee ne, ii np]
	k2 = (double *)malloc(ne * np * sizeof(double));		// [ee ne, ii np]
	k3 = (double *)malloc(ne * np * sizeof(double));		// [ee ne, ii np]
	k4 = (double *)malloc(ne * np * sizeof(double));		// [ee ne, ii np]
	minv_vec = (double *)malloc(ne * np * sizeof(double));  // [ee ne, ii np]
	mmat = (double *)malloc(ne * np * np * sizeof(double)); // [ee ne, jj np, ii np]
	dv = (double *)malloc(ne * np * np * sizeof(double));   // [ee ne, jj np, ii np]
	mf = (double *)malloc(2 * np * sizeof(double));			// [jj 2,  ii np]
	ib = (double *)malloc(2 * np * sizeof(double));			// [jj 2,  ii np]
	fstar = (double *)malloc(2 * ne * sizeof(double));		// [jj 2,  ii ne]
	df = (double *)malloc(ne * 2 * np * sizeof(double));	// [ee ne, jj 2, ii np]

	for (ii = 0; ii < np; ++ii)
	{
		roots[ii] = 0;
		weights[ii] = 0;
		ll[ii] = 0;
		dl[ii] = 0;
	}
	for (ii = 0; ii < ne; ++ii)
	{
		dx[ii] = 0;
		mesh[ii] = 0;
	}
	mesh[ne] = 0;

	for (ii = 0; ii < np * np; ++ii)
	{
		smat[ii] = 0;
	}
	for (ii = 0; ii < ne * np; ++ii)
	{
		xx[ii] = 0;
		qq[ii] = 0;
		k1[ii] = 0;
		k2[ii] = 0;
		k3[ii] = 0;
		k4[ii] = 0;
		minv_vec[ii] = 0;
	}
	for (ii = 0; ii < ne * np * np; ++ii)
	{
		mmat[ii] = 0;
		dv[ii] = 0;
	}
	for (ii = 0; ii < np * 2; ++ii)
	{
		mf[ii] = 0;
		ib[ii] = 0;
	}
	for (ii = 0; ii < ne * 2; ++ii)
	{
		fstar[ii] = 0;
	}
	for (ii = 0; ii < ne * 2 * np; ++ii)
	{
		df[ii] = 0;
	}

	// mesh setup
	xmin = 0.;
	xmax = 10.;
	deltax = (xmax-xmin)/(double)tne;
	/**
	 * lxim, lxmax를 이용하여 각 구간의 mesh[ee]를 구한다
	 * ne의 크기가 tne / process의 개수이기 때문에, 
	 * 각 구간에 맞는 mesh[ee]를 구해야 한다.
	 * 그리고 mesh[ee]를 이용하여 각 변수들을 초기화 한다.
	 */
	lxmin = xmin + (ista)*deltax;
	lxmax = xmin + (iend)*deltax;
	/**
	 * mesh[ne]은 마지막 원소가 아니라는점에 유의한다.
	 */ 
	mesh[ne] = lxmax; 
	for(ee=0;ee<ne;++ee){
		mesh[ee] = lxmin+ee*deltax;
	}

	// gauss lobatto quadrature point, weight setup
	gausslobatto_quadrature(np, roots, weights);

	// coordinates and element size
	min_dx = xmax - xmin; // initial guess
	for (ee = 0; ee < ne; ee++)
	{
		xl = mesh[ee];
		xr = mesh[ee + 1];
		dx[ee] = xr - xl; // size of each element
		if (dx[ee] < min_dx)
		{
			min_dx = dx[ee]; // finding minimum dx
		}
		for (ii = 0; ii < np; ii++)
		{
			idx = ee * np + ii;
			xx[idx] = xl + 0.5 * (1 + roots[ii]) * dx[ee];
		}
	}

	// mass matrix
	for (ii = 0; ii < ne * np * np; ii++)
	{
		mmat[ii] = 0;
	}
	for (ee = 0; ee < ne; ee++)
	{
		jac = fabs(dx[ee]) / 2;
		for (kk = 0; kk < np; kk++)
		{
			lagrange(roots[kk], ll, roots);
			for (jj = 0; jj < np; jj++)
			{
				for (ii = 0; ii < np; ii++)
				{
					idx = ee * np * np + jj * np + ii;
					// mass matrix mmat[ne][np][np] in 1d index representation
					mmat[idx] += jac * weights[kk] * ll[ii] * ll[jj];
				}
			}
		}
	}

	// stiffness matrix
	for (ii = 0; ii < np * np; ii++)
	{
		smat[ii] = 0;
	}
	for (kk = 0; kk < np; kk++)
	{
		lagrange(roots[kk], ll, roots);
		lagrange_deriv(roots[kk], dl, roots);
		for (jj = 0; jj < np; jj++)
		{
			for (ii = 0; ii < np; ii++)
			{
				idx = jj * np + ii;
				// stiffness matrix smat[np][np] in 1d index representation
				smat[idx] += weights[kk] * ll[jj] * dl[ii];
			}
		}
	}

	// face integration
	for (ii = 0; ii < np * 2; ii++)
	{
		mf[ii] = 0;
	}
	lagrange(-1, mf, roots);	 // mf[ii] for(ii=0, ii<np,ii++) represents element left face integration
	lagrange(1, mf + np, roots); // mf[ii] for ii=np, ii<2*np, ii++) reresents element right face integration

	// boundary interpolation
	for (ii = 0; ii < np * 2; ii++)
	{
		ib[ii] = 0;
	}
	lagrange(-1, ib, roots);	 // element left edge interpolation
	lagrange(1, ib + np, roots); // element right edge interpolation

	// divergence operators
	for (ii = 0; ii < ne * np * np; ii++)
	{
		dv[ii] = 0;
	}
	for (ii = 0; ii < ne * np * 2; ii++)
	{
		dv[ii] = 0;
	}
	for (ee = 0; ee < ne; ee++)
	{
		for (jj = 0; jj < np; jj++)
		{
			// it turn out that mmat is diagonal. i.e., ii != jj, mmat[ee][jj][ii] = 0
			// the inverse of mmat is just the inverse of the diagonal components
			// here, we are extracting the inverse diagonal components only
			minv_vec[ee * np + jj] = 1. / mmat[ee * np * np + jj * np + jj];
		}
		for (jj = 0; jj < np; jj++)
		{
			for (ii = 0; ii < np; ii++)
			{
				dv[ee * np * np + jj * np + ii] = minv_vec[ee * np + ii] * smat[jj * np + ii];
			}
		}
		for (jj = 0; jj < 2; jj++)
		{
			for (ii = 0; ii < np; ii++)
			{
				df[ee * np * 2 + jj * np + ii] = minv_vec[ee * np + ii] * mf[jj * np + ii];
			}
		}
	}

	// initialize qq field
	initialize(qq, xx, xmax, xmin, init_type);
	cfl = 1. / (np * np);
	dt = cfl * min_dx / fabs(speed);
	rtime = 0.;
	nstep = 0;

	printf("Start Time Integration\n");

	// Runge-Kutta 4th order Time integration loop

	t_sta = clock();

	while (rtime < tend)
	{
		dt = fmin(dt, tend - rtime);
		rhs(qq, k1, dv, df, ib, speed);
		for (ii = 0; ii < ne * np; ii++)
			qtemp[ii] = qq[ii] + 0.5 * dt * k1[ii];

		rhs(qtemp, k2, dv, df, ib, speed);
		for (ii = 0; ii < ne * np; ii++)
			qtemp[ii] = qq[ii] + 0.5 * dt * k2[ii];
		rhs(qtemp, k3, dv, df, ib, speed);


		for (ii = 0; ii < ne * np; ii++)
			qtemp[ii] = qq[ii] + dt * k3[ii];
		rhs(qtemp, k4, dv, df, ib, speed);

		for (ii = 0; ii < ne * np; ii++)
			qq[ii] += 1. / 6. * dt * (k1[ii] + 2 * k2[ii] + 2 * k3[ii] + k4[ii]);

		rtime += dt;
		nstep += 1;
		if (nstep % 10000 == 0 && rank == 0)
			printf("nstep = %10ld, %5.1f%% complete\n", nstep, rtime / tend * 100);
	}

	// timeloop ends here;

	if (rank != 0)
	{
		int nne = iend - ista;
		MPI_Isend(&nne, 1, MPI_INT, 0, 11, MPI_COMM_WORLD, &ser1);
		MPI_Isend(xx, ne * np, MPI_DOUBLE, 0, 22, MPI_COMM_WORLD, &ser2);
		MPI_Isend(qq, ne * np, MPI_DOUBLE, 0, 33, MPI_COMM_WORLD, &ser3);
		MPI_Wait(&ser1, &st);
		MPI_Wait(&ser2, &st);
		MPI_Wait(&ser3, &st);
	}

	double *bufx;
	double *bufq;
	int *istart;
	int *idisp;

	if (rank == 0)
	{

		printf("Integration complete\n");

		if (tne > 200)
		{
			eres = 2;
		}
		else if (tne > 60)
		{
			eres = 3;
		}
		else if (tne > 30)
		{
			eres = 6;
		}
		else
		{
			eres = 10;
		}

		// final report
		printf("-----------------------------------------------\n");
		printf("code type   : c serial\n");
		printf("Final time  : %13.5e\n", rtime);
		printf("CFL         : %13.5e\n", cfl);
		printf("DOF         : %13d\n", tne * np);
		printf("No. of Elem : %13d\n", tne);
		printf("Order       : %13d\n", np);
		printf("eres        : %13d\n", eres);
		printf("time steps  : %13ld\n", nstep);
		printf("-----------------------------------------------\n");

		bufx = (double *)malloc(sizeof(double) * tne * np);
		bufq = (double *)malloc(sizeof(double) * tne * np);

		for (int i = 0; i < ne * np; i++)
		{
			bufx[i] = xx[i];
			bufq[i] = qq[i];
		}
	}
	if (rank == 0)
	{
		int index[nprocs];
		index[0] = ne * np;
		int idx = index[0];
		for (int i = 1; i < nprocs; i++)
		{
			MPI_Irecv(index + i, 1, MPI_INT, i, 11, MPI_COMM_WORLD, &rer1);
			MPI_Wait(&rer1, &st);
			index[i] *= np;
			MPI_Irecv(bufx + idx, index[i], MPI_DOUBLE, i, 22, MPI_COMM_WORLD, &rer2);
			MPI_Irecv(bufq + idx, index[i], MPI_DOUBLE, i, 33, MPI_COMM_WORLD, &rer3);
			MPI_Wait(&rer2, &st);
			MPI_Wait(&rer3, &st);
			idx += index[i];
		}

		for(int i = 0; i < tne*np; i++){
			printf("%f ", bufx[i]);
		}
		printf("\n");
		for(int i = 0; i < tne*np; i++){
			printf("%f ", bufq[i]);
		}
		printf("\n");

		save_field(bufx, bufq, tne, roots, eres);
		t_end = clock();
		printf("Motion time = %f msec\n", (double)(t_end - t_sta) / 1000.0);
	}
	free(roots);
	free(weights);
	free(ll);
	free(dl);
	free(dx);
	free(mesh);
	free(smat);
	free(xx);
	free(qq);
	free(qtemp);
	free(k1);
	free(k2);
	free(k3);
	free(k4);
	free(minv_vec);
	free(mmat);
	free(dv);
	free(mf);
	free(ib);
	free(fstar);
	free(df);

	MPI_Finalize();
	return 0;
}
Ejemplo n.º 2
0
int main (int argc, char **argv)
{
	int i,n1,n2,j,jsta,jend;
	int iter,niter;
	MPI_Status istatus;
	int ierr, myid,nid;
	int iprev, inext, ista, iend;
	MPI_Request isd1,isd2,irv1,irv2;
	int itag, iroot;
	double xi,xf,dx;
	double tmr;
	double *ar, *br;
	double ptmr, tic,toc;

	/* do not change ------ */
	n1 = 0;
	n2 = 100000000;
	niter = 3;
	/* do not change ------ */

	ar = (double*) malloc(n2*sizeof(double));
	br = (double*) malloc(n2*sizeof(double));
	
	xi = 0.L;
	xf = 1.;
	dx = (xf-xi)/(double)(n2-n1-1);

	for(i=n1;i<n2;i++){
		br[i] = xi+(double)(i-n1)*dx;
	}

	MPI_Init(&argc, &argv);
	tic = MPI_Wtime();
	MPI_Comm_size(MPI_COMM_WORLD, &nid);
	MPI_Comm_rank(MPI_COMM_WORLD, &myid);

	para_range(n1,n2,nid,myid,&ista,&iend); // para range를 통해 작업 범위 나눔.
	printf("rank:%10d ista=%15d iend=%15d\n", myid, ista, iend);

	jsta = ista;
	jend = iend;
	if(myid==0) jsta = n1+1;
	if(myid == nid-1) jend = n2-1;

	// send/recv할때 보낼 rank에 사용
	inext = myid + 1; 
	iprev = myid - 1;
	if(myid == nid-1) inext = MPI_PROC_NULL;
	if(myid == 0) iprev = MPI_PROC_NULL;
	for(i=ista;i<iend;i++){
		br[i] = xi+ (double)(i-n1)*dx;
	}

	for(iter=0;iter<niter;iter++){
		itag = 101;
		/**
		 * 각 부분에서 idx-1과 idx+1부분이 필요하기 때문에 
		 * Isend/Irecv로 비동기적으로 보낸다.
		 * Wait를 통해 통신이 동작하는지 확인한다.
		 */
		MPI_Isend(br+iend-1,   1, MPI_DOUBLE, inext, itag, MPI_COMM_WORLD, &isd1); // inext에 b[j-1]전달
		MPI_Isend(br+ista,   1, MPI_DOUBLE, iprev, itag, MPI_COMM_WORLD, &isd2); // iprev b[j+1] 전달
		MPI_Irecv(br+ista-1,   1, MPI_DOUBLE, iprev, itag, MPI_COMM_WORLD, &irv1); // b[j-1] 받음
		MPI_Irecv(br+iend,   1, MPI_DOUBLE, inext, itag, MPI_COMM_WORLD, &irv2); // b[j+1] 받음
		MPI_Wait(&isd1,&istatus);
		MPI_Wait(&isd2,&istatus);
		MPI_Wait(&irv1,&istatus);
		MPI_Wait(&irv2,&istatus);

		for(j=jsta;j<jend;j++) {
			/*  not change -----{ */
			ar[j] = (br[j-1]+br[j+1])/4.L + br[j]/2.L + 1.L/genvv(br[j]);
			/*  not change -----} */
		}
		for(i=ista;i<iend;i++) {
			/*  not change -----{ */
			br[i] = ar[i];
			/*  not change -----} */
		}
	}
	ptmr = 0.L;
	for(j=jsta;j<jend;j++){
		ptmr += ar[j];
	}
	iroot = 0;
	MPI_Reduce(&ptmr, &tmr, 1, MPI_DOUBLE, MPI_SUM, iroot, MPI_COMM_WORLD); // MPI_Reduce로 ptmr 합침
	if(myid==0) printf("tmr = %16.6f\n",tmr);
	toc = MPI_Wtime();
	if(myid==0) printf("%g sec\n",toc-tic);

	free(ar);
	free(br); 

	MPI_Finalize();

}