int hypre_SMGAxpy( double alpha, hypre_StructVector *x, hypre_StructVector *y, hypre_Index base_index, hypre_Index base_stride ) { int ierr = 0; hypre_Box *x_data_box; hypre_Box *y_data_box; int xi; int yi; double *xp; double *yp; hypre_BoxArray *boxes; hypre_Box *box; hypre_Index loop_size; hypre_IndexRef start; int i; int loopi, loopj, loopk; box = hypre_BoxCreate(); boxes = hypre_StructGridBoxes(hypre_StructVectorGrid(y)); hypre_ForBoxI(i, boxes) { hypre_CopyBox(hypre_BoxArrayBox(boxes, i), box); hypre_ProjectBox(box, base_index, base_stride); start = hypre_BoxIMin(box); x_data_box = hypre_BoxArrayBox(hypre_StructVectorDataSpace(x), i); y_data_box = hypre_BoxArrayBox(hypre_StructVectorDataSpace(y), i); xp = hypre_StructVectorBoxData(x, i); yp = hypre_StructVectorBoxData(y, i); hypre_BoxGetStrideSize(box, base_stride, loop_size); hypre_BoxLoop2Begin(loop_size, x_data_box, start, base_stride, xi, y_data_box, start, base_stride, yi); #define HYPRE_BOX_SMP_PRIVATE loopk,loopi,loopj,xi,yi #include "hypre_box_smp_forloop.h" hypre_BoxLoop2For(loopi, loopj, loopk, xi, yi) { yp[yi] += alpha * xp[xi]; }
int hypre_SMGSetStructVectorConstantValues( hypre_StructVector *vector, double values, hypre_BoxArray *box_array, hypre_Index stride ) { int ierr = 0; hypre_Box *v_data_box; int vi; double *vp; hypre_Box *box; hypre_Index loop_size; hypre_IndexRef start; int loopi, loopj, loopk; int i; /*----------------------------------------------------------------------- * Set the vector coefficients *-----------------------------------------------------------------------*/ hypre_ForBoxI(i, box_array) { box = hypre_BoxArrayBox(box_array, i); start = hypre_BoxIMin(box); v_data_box = hypre_BoxArrayBox(hypre_StructVectorDataSpace(vector), i); vp = hypre_StructVectorBoxData(vector, i); hypre_BoxGetStrideSize(box, stride, loop_size); hypre_BoxLoop1Begin(loop_size, v_data_box, start, stride, vi); #define HYPRE_BOX_SMP_PRIVATE loopk,loopi,loopj,vi #include "hypre_box_smp_forloop.h" hypre_BoxLoop1For(loopi, loopj, loopk, vi) { vp[vi] = values; } hypre_BoxLoop1End(vi); }
HYPRE_Int hypre_SemiInterp( void *interp_vdata, hypre_StructMatrix *P, hypre_StructVector *xc, hypre_StructVector *e ) { hypre_SemiInterpData *interp_data = interp_vdata; HYPRE_Int P_stored_as_transpose; hypre_ComputePkg *compute_pkg; hypre_IndexRef cindex; hypre_IndexRef findex; hypre_IndexRef stride; hypre_StructGrid *fgrid; HYPRE_Int *fgrid_ids; hypre_StructGrid *cgrid; hypre_BoxArray *cgrid_boxes; HYPRE_Int *cgrid_ids; hypre_CommHandle *comm_handle; hypre_BoxArrayArray *compute_box_aa; hypre_BoxArray *compute_box_a; hypre_Box *compute_box; hypre_Box *P_dbox; hypre_Box *xc_dbox; hypre_Box *e_dbox; HYPRE_Int Pi; HYPRE_Int xci; HYPRE_Int ei; HYPRE_Int constant_coefficient; HYPRE_Real *Pp0, *Pp1; HYPRE_Real *xcp; HYPRE_Real *ep, *ep0, *ep1; hypre_Index loop_size; hypre_Index start; hypre_Index startc; hypre_Index stridec; hypre_StructStencil *stencil; hypre_Index *stencil_shape; HYPRE_Int compute_i, fi, ci, j; /*----------------------------------------------------------------------- * Initialize some things *-----------------------------------------------------------------------*/ hypre_BeginTiming(interp_data -> time_index); P_stored_as_transpose = (interp_data -> P_stored_as_transpose); compute_pkg = (interp_data -> compute_pkg); cindex = (interp_data -> cindex); findex = (interp_data -> findex); stride = (interp_data -> stride); stencil = hypre_StructMatrixStencil(P); stencil_shape = hypre_StructStencilShape(stencil); constant_coefficient = hypre_StructMatrixConstantCoefficient(P); hypre_assert( constant_coefficient==0 || constant_coefficient==1 ); /* ... constant_coefficient==2 for P shouldn't happen, see hypre_PFMGCreateInterpOp in pfmg_setup_interp.c */ if (constant_coefficient) hypre_StructVectorClearBoundGhostValues(e, 0); hypre_SetIndex3(stridec, 1, 1, 1); /*----------------------------------------------------------------------- * Compute e at coarse points (injection) *-----------------------------------------------------------------------*/ fgrid = hypre_StructVectorGrid(e); fgrid_ids = hypre_StructGridIDs(fgrid); cgrid = hypre_StructVectorGrid(xc); cgrid_boxes = hypre_StructGridBoxes(cgrid); cgrid_ids = hypre_StructGridIDs(cgrid); fi = 0; hypre_ForBoxI(ci, cgrid_boxes) { while (fgrid_ids[fi] != cgrid_ids[ci]) { fi++; } compute_box = hypre_BoxArrayBox(cgrid_boxes, ci); hypre_CopyIndex(hypre_BoxIMin(compute_box), startc); hypre_StructMapCoarseToFine(startc, cindex, stride, start); e_dbox = hypre_BoxArrayBox(hypre_StructVectorDataSpace(e), fi); xc_dbox = hypre_BoxArrayBox(hypre_StructVectorDataSpace(xc), ci); ep = hypre_StructVectorBoxData(e, fi); xcp = hypre_StructVectorBoxData(xc, ci); hypre_BoxGetSize(compute_box, loop_size); hypre_BoxLoop2Begin(hypre_StructMatrixNDim(P), loop_size, e_dbox, start, stride, ei, xc_dbox, startc, stridec, xci); #ifdef HYPRE_USING_OPENMP #pragma omp parallel for private(HYPRE_BOX_PRIVATE,ei,xci) HYPRE_SMP_SCHEDULE #endif hypre_BoxLoop2For(ei, xci) { ep[ei] = xcp[xci]; } hypre_BoxLoop2End(ei, xci); } /*----------------------------------------------------------------------- * Compute e at fine points *-----------------------------------------------------------------------*/ for (compute_i = 0; compute_i < 2; compute_i++) { switch(compute_i) { case 0: { ep = hypre_StructVectorData(e); hypre_InitializeIndtComputations(compute_pkg, ep, &comm_handle); compute_box_aa = hypre_ComputePkgIndtBoxes(compute_pkg); } break; case 1: { hypre_FinalizeIndtComputations(comm_handle); compute_box_aa = hypre_ComputePkgDeptBoxes(compute_pkg); } break; } hypre_ForBoxArrayI(fi, compute_box_aa) { compute_box_a = hypre_BoxArrayArrayBoxArray(compute_box_aa, fi); P_dbox = hypre_BoxArrayBox(hypre_StructMatrixDataSpace(P), fi); e_dbox = hypre_BoxArrayBox(hypre_StructVectorDataSpace(e), fi); if (P_stored_as_transpose) { if ( constant_coefficient ) { Pp0 = hypre_StructMatrixBoxData(P, fi, 1); Pp1 = hypre_StructMatrixBoxData(P, fi, 0) - hypre_CCBoxOffsetDistance(P_dbox, stencil_shape[0]); } else { Pp0 = hypre_StructMatrixBoxData(P, fi, 1); Pp1 = hypre_StructMatrixBoxData(P, fi, 0) - hypre_BoxOffsetDistance(P_dbox, stencil_shape[0]); } } else { Pp0 = hypre_StructMatrixBoxData(P, fi, 0); Pp1 = hypre_StructMatrixBoxData(P, fi, 1); } ep = hypre_StructVectorBoxData(e, fi); ep0 = ep + hypre_BoxOffsetDistance(e_dbox, stencil_shape[0]); ep1 = ep + hypre_BoxOffsetDistance(e_dbox, stencil_shape[1]); hypre_ForBoxI(j, compute_box_a) { compute_box = hypre_BoxArrayBox(compute_box_a, j); hypre_CopyIndex(hypre_BoxIMin(compute_box), start); hypre_StructMapFineToCoarse(start, findex, stride, startc); hypre_BoxGetStrideSize(compute_box, stride, loop_size); if ( constant_coefficient ) { Pi = hypre_CCBoxIndexRank( P_dbox, startc ); hypre_BoxLoop1Begin(hypre_StructMatrixNDim(P), loop_size, e_dbox, start, stride, ei); #ifdef HYPRE_USING_OPENMP #pragma omp parallel for private(HYPRE_BOX_PRIVATE,ei) HYPRE_SMP_SCHEDULE #endif hypre_BoxLoop1For(ei) { ep[ei] = (Pp0[Pi] * ep0[ei] + Pp1[Pi] * ep1[ei]); } hypre_BoxLoop1End(ei); } else { hypre_BoxLoop2Begin(hypre_StructMatrixNDim(P), loop_size, P_dbox, startc, stridec, Pi, e_dbox, start, stride, ei); #ifdef HYPRE_USING_OPENMP #pragma omp parallel for private(HYPRE_BOX_PRIVATE,Pi,ei) HYPRE_SMP_SCHEDULE #endif hypre_BoxLoop2For(Pi, ei) { ep[ei] = (Pp0[Pi] * ep0[ei] + Pp1[Pi] * ep1[ei]); } hypre_BoxLoop2End(Pi, ei); } }
HYPRE_Int hypre_SparseMSGInterp( void *interp_vdata, hypre_StructMatrix *P, hypre_StructVector *xc, hypre_StructVector *e ) { HYPRE_Int ierr = 0; hypre_SparseMSGInterpData *interp_data = interp_vdata; hypre_ComputePkg *compute_pkg; hypre_IndexRef cindex; hypre_IndexRef findex; hypre_IndexRef stride; hypre_IndexRef strideP; hypre_StructGrid *fgrid; HYPRE_Int *fgrid_ids; hypre_StructGrid *cgrid; hypre_BoxArray *cgrid_boxes; HYPRE_Int *cgrid_ids; hypre_CommHandle *comm_handle; hypre_BoxArrayArray *compute_box_aa; hypre_BoxArray *compute_box_a; hypre_Box *compute_box; hypre_Box *P_dbox; hypre_Box *xc_dbox; hypre_Box *e_dbox; HYPRE_Int Pi; HYPRE_Int xci; HYPRE_Int ei; double *Pp0, *Pp1; double *xcp; double *ep, *ep0, *ep1; hypre_Index loop_size; hypre_Index start; hypre_Index startc; hypre_Index startP; hypre_Index stridec; hypre_StructStencil *stencil; hypre_Index *stencil_shape; HYPRE_Int compute_i, fi, ci, j; HYPRE_Int loopi, loopj, loopk; /*----------------------------------------------------------------------- * Initialize some things *-----------------------------------------------------------------------*/ hypre_BeginTiming(interp_data -> time_index); compute_pkg = (interp_data -> compute_pkg); cindex = (interp_data -> cindex); findex = (interp_data -> findex); stride = (interp_data -> stride); strideP = (interp_data -> strideP); stencil = hypre_StructMatrixStencil(P); stencil_shape = hypre_StructStencilShape(stencil); hypre_SetIndex(stridec, 1, 1, 1); /*----------------------------------------------------------------------- * Compute e at coarse points (injection) *-----------------------------------------------------------------------*/ fgrid = hypre_StructVectorGrid(e); fgrid_ids = hypre_StructGridIDs(fgrid); cgrid = hypre_StructVectorGrid(xc); cgrid_boxes = hypre_StructGridBoxes(cgrid); cgrid_ids = hypre_StructGridIDs(cgrid); fi = 0; hypre_ForBoxI(ci, cgrid_boxes) { while (fgrid_ids[fi] != cgrid_ids[ci]) { fi++; } compute_box = hypre_BoxArrayBox(cgrid_boxes, ci); hypre_CopyIndex(hypre_BoxIMin(compute_box), startc); hypre_StructMapCoarseToFine(startc, cindex, stride, start); e_dbox = hypre_BoxArrayBox(hypre_StructVectorDataSpace(e), fi); xc_dbox = hypre_BoxArrayBox(hypre_StructVectorDataSpace(xc), ci); ep = hypre_StructVectorBoxData(e, fi); xcp = hypre_StructVectorBoxData(xc, ci); hypre_BoxGetSize(compute_box, loop_size); hypre_BoxLoop2Begin(loop_size, e_dbox, start, stride, ei, xc_dbox, startc, stridec, xci); #define HYPRE_BOX_SMP_PRIVATE loopk,loopi,loopj,ei,xci #include "hypre_box_smp_forloop.h" hypre_BoxLoop2For(loopi, loopj, loopk, ei, xci) { ep[ei] = xcp[xci]; } hypre_BoxLoop2End(ei, xci); } /*----------------------------------------------------------------------- * Compute e at fine points *-----------------------------------------------------------------------*/ for (compute_i = 0; compute_i < 2; compute_i++) { switch(compute_i) { case 0: { ep = hypre_StructVectorData(e); hypre_InitializeIndtComputations(compute_pkg, ep, &comm_handle); compute_box_aa = hypre_ComputePkgIndtBoxes(compute_pkg); } break; case 1: { hypre_FinalizeIndtComputations(comm_handle); compute_box_aa = hypre_ComputePkgDeptBoxes(compute_pkg); } break; } hypre_ForBoxArrayI(fi, compute_box_aa) { compute_box_a = hypre_BoxArrayArrayBoxArray(compute_box_aa, fi); P_dbox = hypre_BoxArrayBox(hypre_StructMatrixDataSpace(P), fi); e_dbox = hypre_BoxArrayBox(hypre_StructVectorDataSpace(e), fi); Pp0 = hypre_StructMatrixBoxData(P, fi, 0); Pp1 = hypre_StructMatrixBoxData(P, fi, 1); ep = hypre_StructVectorBoxData(e, fi); ep0 = ep + hypre_BoxOffsetDistance(e_dbox, stencil_shape[0]); ep1 = ep + hypre_BoxOffsetDistance(e_dbox, stencil_shape[1]); hypre_ForBoxI(j, compute_box_a) { compute_box = hypre_BoxArrayBox(compute_box_a, j); hypre_CopyIndex(hypre_BoxIMin(compute_box), start); hypre_StructMapFineToCoarse(start, findex, stride, startc); hypre_StructMapCoarseToFine(startc, cindex, strideP, startP); hypre_BoxGetStrideSize(compute_box, stride, loop_size); hypre_BoxLoop2Begin(loop_size, P_dbox, startP, strideP, Pi, e_dbox, start, stride, ei); #define HYPRE_BOX_SMP_PRIVATE loopk,loopi,loopj,Pi,ei #include "hypre_box_smp_forloop.h" hypre_BoxLoop2For(loopi, loopj, loopk, Pi, ei) { ep[ei] = (Pp0[Pi] * ep0[ei] + Pp1[Pi] * ep1[ei]); } hypre_BoxLoop2End(Pi, ei); } }
int hypre_SMGResidual( void *residual_vdata, hypre_StructMatrix *A, hypre_StructVector *x, hypre_StructVector *b, hypre_StructVector *r ) { int ierr = 0; hypre_SMGResidualData *residual_data = residual_vdata; hypre_IndexRef base_stride = (residual_data -> base_stride); hypre_BoxArray *base_points = (residual_data -> base_points); hypre_ComputePkg *compute_pkg = (residual_data -> compute_pkg); hypre_CommHandle *comm_handle; hypre_BoxArrayArray *compute_box_aa; hypre_BoxArray *compute_box_a; hypre_Box *compute_box; hypre_Box *A_data_box; hypre_Box *x_data_box; hypre_Box *b_data_box; hypre_Box *r_data_box; int Ai; int xi; int bi; int ri; double *Ap; double *xp; double *bp; double *rp; hypre_Index loop_size; hypre_IndexRef start; hypre_StructStencil *stencil; hypre_Index *stencil_shape; int stencil_size; int compute_i, i, j, si; int loopi, loopj, loopk; hypre_BeginTiming(residual_data -> time_index); /*----------------------------------------------------------------------- * Compute residual r = b - Ax *-----------------------------------------------------------------------*/ stencil = hypre_StructMatrixStencil(A); stencil_shape = hypre_StructStencilShape(stencil); stencil_size = hypre_StructStencilSize(stencil); for (compute_i = 0; compute_i < 2; compute_i++) { switch(compute_i) { case 0: { xp = hypre_StructVectorData(x); hypre_InitializeIndtComputations(compute_pkg, xp, &comm_handle); compute_box_aa = hypre_ComputePkgIndtBoxes(compute_pkg); /*---------------------------------------- * Copy b into r *----------------------------------------*/ compute_box_a = base_points; hypre_ForBoxI(i, compute_box_a) { compute_box = hypre_BoxArrayBox(compute_box_a, i); start = hypre_BoxIMin(compute_box); b_data_box = hypre_BoxArrayBox(hypre_StructVectorDataSpace(b), i); r_data_box = hypre_BoxArrayBox(hypre_StructVectorDataSpace(r), i); bp = hypre_StructVectorBoxData(b, i); rp = hypre_StructVectorBoxData(r, i); hypre_BoxGetStrideSize(compute_box, base_stride, loop_size); hypre_BoxLoop2Begin(loop_size, b_data_box, start, base_stride, bi, r_data_box, start, base_stride, ri); #define HYPRE_BOX_SMP_PRIVATE loopk,loopi,loopj,bi,ri #include "hypre_box_smp_forloop.h" hypre_BoxLoop2For(loopi, loopj, loopk, bi, ri) { rp[ri] = bp[bi]; } hypre_BoxLoop2End(bi, ri); } } break; case 1: { hypre_FinalizeIndtComputations(comm_handle); compute_box_aa = hypre_ComputePkgDeptBoxes(compute_pkg); } break; } /*-------------------------------------------------------------------- * Compute r -= A*x *--------------------------------------------------------------------*/ hypre_ForBoxArrayI(i, compute_box_aa) { compute_box_a = hypre_BoxArrayArrayBoxArray(compute_box_aa, i); A_data_box = hypre_BoxArrayBox(hypre_StructMatrixDataSpace(A), i); x_data_box = hypre_BoxArrayBox(hypre_StructVectorDataSpace(x), i); r_data_box = hypre_BoxArrayBox(hypre_StructVectorDataSpace(r), i); rp = hypre_StructVectorBoxData(r, i); hypre_ForBoxI(j, compute_box_a) { compute_box = hypre_BoxArrayBox(compute_box_a, j); start = hypre_BoxIMin(compute_box); for (si = 0; si < stencil_size; si++) { Ap = hypre_StructMatrixBoxData(A, i, si); xp = hypre_StructVectorBoxData(x, i) + hypre_BoxOffsetDistance(x_data_box, stencil_shape[si]); hypre_BoxGetStrideSize(compute_box, base_stride, loop_size); hypre_BoxLoop3Begin(loop_size, A_data_box, start, base_stride, Ai, x_data_box, start, base_stride, xi, r_data_box, start, base_stride, ri); #define HYPRE_BOX_SMP_PRIVATE loopk,loopi,loopj,Ai,xi,ri #include "hypre_box_smp_forloop.h" hypre_BoxLoop3For(loopi, loopj, loopk, Ai, xi, ri) { rp[ri] -= Ap[Ai] * xp[xi]; } hypre_BoxLoop3End(Ai, xi, ri); }
int hypre_PointRelax( void *relax_vdata, hypre_StructMatrix *A, hypre_StructVector *b, hypre_StructVector *x ) { hypre_PointRelaxData *relax_data = (hypre_PointRelaxData *)relax_vdata; int max_iter = (relax_data -> max_iter); int zero_guess = (relax_data -> zero_guess); double weight = (relax_data -> weight); int num_pointsets = (relax_data -> num_pointsets); int *pointset_ranks = (relax_data -> pointset_ranks); hypre_Index *pointset_strides = (relax_data -> pointset_strides); hypre_StructVector *t = (relax_data -> t); int diag_rank = (relax_data -> diag_rank); hypre_ComputePkg **compute_pkgs = (relax_data -> compute_pkgs); hypre_ComputePkg *compute_pkg; hypre_CommHandle *comm_handle; hypre_BoxArrayArray *compute_box_aa; hypre_BoxArray *compute_box_a; hypre_Box *compute_box; hypre_Box *A_data_box; hypre_Box *b_data_box; hypre_Box *x_data_box; hypre_Box *t_data_box; int Ai; int bi; int xi; int ti; double *Ap; double *bp; double *xp; double *tp; hypre_IndexRef stride; hypre_IndexRef start; hypre_Index loop_size; hypre_StructStencil *stencil; hypre_Index *stencil_shape; int stencil_size; int iter, p, compute_i, i, j, si; int loopi, loopj, loopk; int pointset; int ierr = 0; /*---------------------------------------------------------- * Initialize some things and deal with special cases *----------------------------------------------------------*/ hypre_BeginTiming(relax_data -> time_index); hypre_StructMatrixDestroy(relax_data -> A); hypre_StructVectorDestroy(relax_data -> b); hypre_StructVectorDestroy(relax_data -> x); (relax_data -> A) = hypre_StructMatrixRef(A); (relax_data -> x) = hypre_StructVectorRef(x); (relax_data -> b) = hypre_StructVectorRef(b); (relax_data -> num_iterations) = 0; /* if max_iter is zero, return */ if (max_iter == 0) { /* if using a zero initial guess, return zero */ if (zero_guess) { hypre_StructVectorSetConstantValues(x, 0.0); } hypre_EndTiming(relax_data -> time_index); return ierr; } stencil = hypre_StructMatrixStencil(A); stencil_shape = hypre_StructStencilShape(stencil); stencil_size = hypre_StructStencilSize(stencil); /*---------------------------------------------------------- * Do zero_guess iteration *----------------------------------------------------------*/ p = 0; iter = 0; if (zero_guess) { pointset = pointset_ranks[p]; compute_pkg = compute_pkgs[pointset]; stride = pointset_strides[pointset]; for (compute_i = 0; compute_i < 2; compute_i++) { switch(compute_i) { case 0: { compute_box_aa = hypre_ComputePkgIndtBoxes(compute_pkg); } break; case 1: { compute_box_aa = hypre_ComputePkgDeptBoxes(compute_pkg); } break; } hypre_ForBoxArrayI(i, compute_box_aa) { compute_box_a = hypre_BoxArrayArrayBoxArray(compute_box_aa, i); A_data_box = hypre_BoxArrayBox(hypre_StructMatrixDataSpace(A), i); b_data_box = hypre_BoxArrayBox(hypre_StructVectorDataSpace(b), i); x_data_box = hypre_BoxArrayBox(hypre_StructVectorDataSpace(x), i); Ap = hypre_StructMatrixBoxData(A, i, diag_rank); bp = hypre_StructVectorBoxData(b, i); xp = hypre_StructVectorBoxData(x, i); hypre_ForBoxI(j, compute_box_a) { compute_box = hypre_BoxArrayBox(compute_box_a, j); start = hypre_BoxIMin(compute_box); hypre_BoxGetStrideSize(compute_box, stride, loop_size); hypre_BoxLoop3Begin(loop_size, A_data_box, start, stride, Ai, b_data_box, start, stride, bi, x_data_box, start, stride, xi); #define HYPRE_BOX_SMP_PRIVATE loopk,loopi,loopj,Ai,bi,xi #include "hypre_box_smp_forloop.h" hypre_BoxLoop3For(loopi, loopj, loopk, Ai, bi, xi) { xp[xi] = bp[bi] / Ap[Ai]; } hypre_BoxLoop3End(Ai, bi, xi); } } }
int hypre_SMGResidual( void *residual_vdata, hypre_StructMatrix *A, hypre_StructVector *x, hypre_StructVector *b, hypre_StructVector *r ) { int ierr = 0; hypre_SMGResidualData *residual_data = (hypre_SMGResidualData *)residual_vdata; hypre_IndexRef base_stride = (residual_data -> base_stride); hypre_BoxArray *base_points = (residual_data -> base_points); hypre_ComputePkg *compute_pkg = (residual_data -> compute_pkg); hypre_CommHandle *comm_handle; hypre_BoxArrayArray *compute_box_aa; hypre_BoxArray *compute_box_a; hypre_Box *compute_box; hypre_Box *A_data_box; hypre_Box *x_data_box; hypre_Box *b_data_box; hypre_Box *r_data_box; int Ai; int xi; int bi; int ri; double *Ap; double *xp; double *bp; double *rp; hypre_Index loop_size; hypre_IndexRef start; hypre_StructStencil *stencil; hypre_Index *stencil_shape; int stencil_size; int compute_i, i, j, si; int loopi, loopj, loopk; hypre_BeginTiming(residual_data -> time_index); /*----------------------------------------------------------------------- * Compute residual r = b - Ax *-----------------------------------------------------------------------*/ stencil = hypre_StructMatrixStencil(A); stencil_shape = hypre_StructStencilShape(stencil); stencil_size = hypre_StructStencilSize(stencil); for (compute_i = 0; compute_i < 2; compute_i++) { switch(compute_i) { case 0: { xp = hypre_StructVectorData(x); hypre_InitializeIndtComputations(compute_pkg, xp, &comm_handle); compute_box_aa = hypre_ComputePkgIndtBoxes(compute_pkg); /*---------------------------------------- * Copy b into r *----------------------------------------*/ compute_box_a = base_points; hypre_ForBoxI(i, compute_box_a) { compute_box = hypre_BoxArrayBox(compute_box_a, i); start = hypre_BoxIMin(compute_box); b_data_box = hypre_BoxArrayBox(hypre_StructVectorDataSpace(b), i); r_data_box = hypre_BoxArrayBox(hypre_StructVectorDataSpace(r), i); bp = hypre_StructVectorBoxData(b, i); rp = hypre_StructVectorBoxData(r, i); hypre_BoxGetStrideSize(compute_box, base_stride, loop_size); hypre_BoxLoop2Begin(loop_size, b_data_box, start, base_stride, bi, r_data_box, start, base_stride, ri); #define HYPRE_BOX_SMP_PRIVATE loopk,loopi,loopj,bi,ri #include "hypre_box_smp_forloop.h" hypre_BoxLoop2For(loopi, loopj, loopk, bi, ri) { rp[ri] = bp[bi]; } hypre_BoxLoop2End(bi, ri); } } break; case 1: { hypre_FinalizeIndtComputations(comm_handle); compute_box_aa = hypre_ComputePkgDeptBoxes(compute_pkg); } break; } /*-------------------------------------------------------------------- * Compute r -= A*x *--------------------------------------------------------------------*/ hypre_ForBoxArrayI(i, compute_box_aa) { compute_box_a = hypre_BoxArrayArrayBoxArray(compute_box_aa, i); A_data_box = hypre_BoxArrayBox(hypre_StructMatrixDataSpace(A), i); x_data_box = hypre_BoxArrayBox(hypre_StructVectorDataSpace(x), i); r_data_box = hypre_BoxArrayBox(hypre_StructVectorDataSpace(r), i); rp = hypre_StructVectorBoxData(r, i); hypre_ForBoxI(j, compute_box_a) { compute_box = hypre_BoxArrayBox(compute_box_a, j); start = hypre_BoxIMin(compute_box); for (si = 0; si < stencil_size; si++) { Ap = hypre_StructMatrixBoxData(A, i, si); xp = hypre_StructVectorBoxData(x, i) + hypre_BoxOffsetDistance(x_data_box, stencil_shape[si]); hypre_BoxGetStrideSize(compute_box, base_stride, loop_size); hypre_BoxLoop3Begin(loop_size, A_data_box, start, base_stride, Ai, x_data_box, start, base_stride, xi, r_data_box, start, base_stride, ri); #if 0 /* The following portion is preprocessed to be handled by ROSE outliner */ #define HYPRE_BOX_SMP_PRIVATE loopk,loopi,loopj,Ai,xi,ri #include "hypre_box_smp_forloop.h" hypre_BoxLoop3For(loopi, loopj, loopk, Ai, xi, ri) { rp[ri] -= Ap[Ai] * xp[xi]; } hypre_BoxLoop3End(Ai, xi, ri); #else for (hypre__block = 0; hypre__block < hypre__num_blocks; hypre__block++) { loopi = 0; loopj = 0; loopk = 0; hypre__nx = hypre__mx; hypre__ny = hypre__my; hypre__nz = hypre__mz; if (hypre__num_blocks > 1) { if (hypre__dir == 0) { loopi = hypre__block * hypre__div + (((hypre__mod) < (hypre__block)) ? (hypre__mod) : (hypre__block)); hypre__nx = hypre__div + ((hypre__mod > hypre__block) ? 1 : 0); } else if (hypre__dir == 1) { loopj = hypre__block * hypre__div + (((hypre__mod) < (hypre__block)) ? (hypre__mod) : (hypre__block)); hypre__ny = hypre__div + ((hypre__mod > hypre__block) ? 1 : 0); } else if (hypre__dir == 2) { loopk = hypre__block * hypre__div + (((hypre__mod) < (hypre__block)) ? (hypre__mod) : (hypre__block)); hypre__nz = hypre__div + ((hypre__mod > hypre__block) ? 1 : 0); } }; Ai = hypre__i1start + loopi * hypre__sx1 + loopj * hypre__sy1 + loopk * hypre__sz1; xi = hypre__i2start + loopi * hypre__sx2 + loopj * hypre__sy2 + loopk * hypre__sz2; ri = hypre__i3start + loopi * hypre__sx3 + loopj * hypre__sy3 + loopk * hypre__sz3; //begin of the loop #if 0 // for (loopk = 0; loopk < hypre__nz; loopk++) { for (loopj = 0; loopj < hypre__ny; loopj++) { for (loopi = 0; loopi < hypre__nx; loopi++) { { rp[ri] -= Ap[Ai] * xp[xi]; } Ai += hypre__sx1; xi += hypre__sx2; ri += hypre__sx3; } Ai += hypre__sy1 - hypre__nx * hypre__sx1; xi += hypre__sy2 - hypre__nx * hypre__sx2; ri += hypre__sy3 - hypre__nx * hypre__sx3; } Ai += hypre__sz1 - hypre__ny * hypre__sy1; xi += hypre__sz2 - hypre__ny * hypre__sy2; ri += hypre__sz3 - hypre__ny * hypre__sy3; } // end of the loop #else #if BLCR_CHECKPOINTING // Only checkpoint it at the first occurrance. if (g_checkpoint_flag == 0) { int err; cr_checkpoint_args_t cr_args; cr_checkpoint_handle_t cr_handle; cr_initialize_checkpoint_args_t(&cr_args); cr_args.cr_scope = CR_SCOPE_PROC;// a process cr_args.cr_target = 0; //self cr_args.cr_signal = SIGKILL; // kill after checkpointing cr_args.cr_fd = open("dump.yy", O_WRONLY|O_CREAT|O_LARGEFILE, 0400); if (cr_args.cr_fd < 0) { printf("Error: cannot open file for checkpoiting context\n"); abort(); } g_checkpoint_flag ++; printf("Checkpoiting: starting here ..\n"); err = cr_request_checkpoint(&cr_args, &cr_handle); if (err < 0) { printf("cannot request checkpoining! err=%d\n",err); abort(); } // block until the request is served cr_enter_cs(cr); cr_leave_cs(cr); printf("Checkpoiting: restarting here ..\n"); } #endif OUT__1__6755__(&Ai,&xi,&ri,&Ap,&xp,&rp,&loopi,&loopj,&loopk,&hypre__sx1,&hypre__sy1,&hypre__sz1,&hypre__sx2,&hypre__sy2,&hypre__sz2,&hypre__sx3,&hypre__sy3,&hypre__sz3,&hypre__nx,&hypre__ny,&hypre__nz); #endif } };
int hypre_SMGResidual(void *residual_vdata,hypre_StructMatrix *A,hypre_StructVector *x,hypre_StructVector *b,hypre_StructVector *r) { int ierr = 0; hypre_SMGResidualData *residual_data = residual_vdata; hypre_IndexRef base_stride = (residual_data -> base_stride); hypre_BoxArray *base_points = (residual_data -> base_points); hypre_ComputePkg *compute_pkg = (residual_data -> compute_pkg); hypre_CommHandle *comm_handle; hypre_BoxArrayArray *compute_box_aa; hypre_BoxArray *compute_box_a; hypre_Box *compute_box; hypre_Box *A_data_box; hypre_Box *x_data_box; hypre_Box *b_data_box; hypre_Box *r_data_box; int Ai; int xi; int bi; int ri; double *Ap; double *xp; double *bp; double *rp; hypre_Index loop_size; hypre_IndexRef start; hypre_StructStencil *stencil; hypre_Index *stencil_shape; int stencil_size; int compute_i; int i; int j; int si; int loopi; int loopj; int loopk; /* New static variables, precomputed */ hypre_BeginTiming((residual_data -> time_index)); stencil = (A -> stencil); stencil_shape = (stencil -> shape); stencil_size = (stencil -> size); (stencil_size <= 15)?0 : ((__assert_fail(("stencil_size <= 15"),("/home/liao6/svnrepos/benchmarks/smg2000/struct_ls/smg_residual.c"),(203),("int hypre_SMGResidual(void *, struct hypre_StructMatrix_struct *, struct hypre_StructVector_struct *, struct hypre_StructVector_struct *, struct hypre_StructVector_struct *)")) , 0)); for (compute_i = 0; compute_i < 2; compute_i++) { switch(compute_i){ case 0: { { xp = (x -> data); hypre_InitializeIndtComputations(compute_pkg,xp,&comm_handle); compute_box_aa = (compute_pkg -> indt_boxes); compute_box_a = base_points; for (i = 0; i < (compute_box_a -> size); i++) { compute_box = ((compute_box_a -> boxes) + i); start = (compute_box -> imin); b_data_box = ((( *(b -> data_space)).boxes) + i); r_data_box = ((( *(r -> data_space)).boxes) + i); bp = ((b -> data) + ((b -> data_indices)[i])); rp = ((r -> data) + ((r -> data_indices)[i])); hypre_BoxGetStrideSize(compute_box,base_stride,loop_size); { int hypre__i1start = (((start[0]) - ((b_data_box -> imin)[0])) + ((((start[1]) - ((b_data_box -> imin)[1])) + (((start[2]) - ((b_data_box -> imin)[2])) * (((0 < ((((b_data_box -> imax)[1]) - ((b_data_box -> imin)[1])) + 1))?((((b_data_box -> imax)[1]) - ((b_data_box -> imin)[1])) + 1) : 0)))) * (((0 < ((((b_data_box -> imax)[0]) - ((b_data_box -> imin)[0])) + 1))?((((b_data_box -> imax)[0]) - ((b_data_box -> imin)[0])) + 1) : 0)))); int hypre__i2start = (((start[0]) - ((r_data_box -> imin)[0])) + ((((start[1]) - ((r_data_box -> imin)[1])) + (((start[2]) - ((r_data_box -> imin)[2])) * (((0 < ((((r_data_box -> imax)[1]) - ((r_data_box -> imin)[1])) + 1))?((((r_data_box -> imax)[1]) - ((r_data_box -> imin)[1])) + 1) : 0)))) * (((0 < ((((r_data_box -> imax)[0]) - ((r_data_box -> imin)[0])) + 1))?((((r_data_box -> imax)[0]) - ((r_data_box -> imin)[0])) + 1) : 0)))); int hypre__sx1 = (base_stride[0]); int hypre__sy1 = ((base_stride[1]) * (((0 < ((((b_data_box -> imax)[0]) - ((b_data_box -> imin)[0])) + 1))?((((b_data_box -> imax)[0]) - ((b_data_box -> imin)[0])) + 1) : 0))); int hypre__sz1 = (((base_stride[2]) * (((0 < ((((b_data_box -> imax)[0]) - ((b_data_box -> imin)[0])) + 1))?((((b_data_box -> imax)[0]) - ((b_data_box -> imin)[0])) + 1) : 0))) * (((0 < ((((b_data_box -> imax)[1]) - ((b_data_box -> imin)[1])) + 1))?((((b_data_box -> imax)[1]) - ((b_data_box -> imin)[1])) + 1) : 0))); int hypre__sx2 = (base_stride[0]); int hypre__sy2 = ((base_stride[1]) * (((0 < ((((r_data_box -> imax)[0]) - ((r_data_box -> imin)[0])) + 1))?((((r_data_box -> imax)[0]) - ((r_data_box -> imin)[0])) + 1) : 0))); int hypre__sz2 = (((base_stride[2]) * (((0 < ((((r_data_box -> imax)[0]) - ((r_data_box -> imin)[0])) + 1))?((((r_data_box -> imax)[0]) - ((r_data_box -> imin)[0])) + 1) : 0))) * (((0 < ((((r_data_box -> imax)[1]) - ((r_data_box -> imin)[1])) + 1))?((((r_data_box -> imax)[1]) - ((r_data_box -> imin)[1])) + 1) : 0))); int hypre__nx = (loop_size[0]); int hypre__ny = (loop_size[1]); int hypre__nz = (loop_size[2]); int hypre__mx = hypre__nx; int hypre__my = hypre__ny; int hypre__mz = hypre__nz; int hypre__dir; int hypre__max; int hypre__div; int hypre__mod; int hypre__block; int hypre__num_blocks; hypre__dir = 0; hypre__max = hypre__nx; if (hypre__ny > hypre__max) { hypre__dir = 1; hypre__max = hypre__ny; } if (hypre__nz > hypre__max) { hypre__dir = 2; hypre__max = hypre__nz; } hypre__num_blocks = 1; if (hypre__max < hypre__num_blocks) { hypre__num_blocks = hypre__max; } if (hypre__num_blocks > 0) { hypre__div = (hypre__max / hypre__num_blocks); hypre__mod = (hypre__max % hypre__num_blocks); } /* # 236 "smg_residual.c" */ (hypre__sx1 == 1)?0 : ((__assert_fail(("hypre__sx1 == 1"),("/home/liao6/svnrepos/benchmarks/smg2000/struct_ls/smg_residual.c"),(357),("int hypre_SMGResidual(void *, struct hypre_StructMatrix_struct *, struct hypre_StructVector_struct *, struct hypre_StructVector_struct *, struct hypre_StructVector_struct *)")) , 0)); (hypre__sx2 == 1)?0 : ((__assert_fail(("hypre__sx2 == 1"),("/home/liao6/svnrepos/benchmarks/smg2000/struct_ls/smg_residual.c"),(358),("int hypre_SMGResidual(void *, struct hypre_StructMatrix_struct *, struct hypre_StructVector_struct *, struct hypre_StructVector_struct *, struct hypre_StructVector_struct *)")) , 0)); if (hypre__num_blocks == 1) { int ii; int jj; int kk; const double *bp_0 = (bp + hypre__i1start); double *rp_0 = (rp + hypre__i2start); for (kk = 0; kk < hypre__mz; kk++) { for (jj = 0; jj < hypre__my; jj++) { const double *bpp = ((bp_0 + (jj * hypre__sy1)) + (kk * hypre__sz1)); double *rpp = ((rp_0 + (jj * hypre__sy2)) + (kk * hypre__sz2)); for (ii = 0; ii < hypre__mx; ii++) { rpp[ii] = (bpp[ii]); } } } /* hypre__num_blocks > 1 */ } else { for (hypre__block = 0; hypre__block < hypre__num_blocks; hypre__block++) { loopi = 0; loopj = 0; loopk = 0; hypre__nx = hypre__mx; hypre__ny = hypre__my; hypre__nz = hypre__mz; if (hypre__dir == 0) { loopi = ((hypre__block * hypre__div) + (((hypre__mod < hypre__block)?hypre__mod : hypre__block))); hypre__nx = (hypre__div + (((hypre__mod > hypre__block)?1 : 0))); } else if (hypre__dir == 1) { loopj = ((hypre__block * hypre__div) + (((hypre__mod < hypre__block)?hypre__mod : hypre__block))); hypre__ny = (hypre__div + (((hypre__mod > hypre__block)?1 : 0))); } else if (hypre__dir == 2) { loopk = ((hypre__block * hypre__div) + (((hypre__mod < hypre__block)?hypre__mod : hypre__block))); hypre__nz = (hypre__div + (((hypre__mod > hypre__block)?1 : 0))); } bi = (((hypre__i1start + loopi) + (loopj * hypre__sy1)) + (loopk * hypre__sz1)); ri = (((hypre__i2start + loopi) + (loopj * hypre__sy2)) + (loopk * hypre__sz2)); /* AAA */ { int ii; int jj; int kk; const double *bp_0 = (bp + bi); double *rp_0 = (rp + ri); for (kk = 0; kk < hypre__nz; kk++) { for (jj = 0; jj < hypre__ny; jj++) { const double *bpp = ((bp_0 + (jj * hypre__sy1)) + (kk * hypre__sz1)); double *rpp = ((rp_0 + (jj * hypre__sy2)) + (kk * hypre__sz2)); for (ii = 0; ii < hypre__nx; ii++) { rpp[ii] = (bpp[ii]); } } } /* AAA */ } } /* hypre__num_blocks > 1 */ } } } } break; } case 1: { { hypre_FinalizeIndtComputations(comm_handle); compute_box_aa = (compute_pkg -> dept_boxes); } break; } /* switch */ } /*-------------------------------------------------------------------- * Compute r -= A*x *--------------------------------------------------------------------*/ for (i = 0; i < (compute_box_aa -> size); i++) { int dxp_s[15UL]; compute_box_a = ((compute_box_aa -> box_arrays)[i]); A_data_box = ((( *(A -> data_space)).boxes) + i); x_data_box = ((( *(x -> data_space)).boxes) + i); r_data_box = ((( *(r -> data_space)).boxes) + i); rp = ((r -> data) + ((r -> data_indices)[i])); for (si = 0; si < stencil_size; si++) { dxp_s[si] = (((stencil_shape[si])[0]) + ((((stencil_shape[si])[1]) + (((stencil_shape[si])[2]) * (((0 < ((((x_data_box -> imax)[1]) - ((x_data_box -> imin)[1])) + 1))?((((x_data_box -> imax)[1]) - ((x_data_box -> imin)[1])) + 1) : 0)))) * (((0 < ((((x_data_box -> imax)[0]) - ((x_data_box -> imin)[0])) + 1))?((((x_data_box -> imax)[0]) - ((x_data_box -> imin)[0])) + 1) : 0)))); } for (j = 0; j < (compute_box_a -> size); j++) {{ int hypre__i1start; int hypre__i2start; int hypre__i3start; int hypre__sx1; int hypre__sy1; int hypre__sz1; int hypre__sx2; int hypre__sy2; int hypre__sz2; int hypre__sx3; int hypre__sy3; int hypre__sz3; int hypre__nx; int hypre__ny; int hypre__nz; int hypre__mx; int hypre__my; int hypre__mz; int hypre__dir; int hypre__max; int hypre__div; int hypre__mod; int hypre__block; int hypre__num_blocks; compute_box = ((compute_box_a -> boxes) + j); start = (compute_box -> imin); hypre__i1start = (((start[0]) - ((A_data_box -> imin)[0])) + ((((start[1]) - ((A_data_box -> imin)[1])) + (((start[2]) - ((A_data_box -> imin)[2])) * (((0 < ((((A_data_box -> imax)[1]) - ((A_data_box -> imin)[1])) + 1))?((((A_data_box -> imax)[1]) - ((A_data_box -> imin)[1])) + 1) : 0)))) * (((0 < ((((A_data_box -> imax)[0]) - ((A_data_box -> imin)[0])) + 1))?((((A_data_box -> imax)[0]) - ((A_data_box -> imin)[0])) + 1) : 0)))); hypre__i2start = (((start[0]) - ((x_data_box -> imin)[0])) + ((((start[1]) - ((x_data_box -> imin)[1])) + (((start[2]) - ((x_data_box -> imin)[2])) * (((0 < ((((x_data_box -> imax)[1]) - ((x_data_box -> imin)[1])) + 1))?((((x_data_box -> imax)[1]) - ((x_data_box -> imin)[1])) + 1) : 0)))) * (((0 < ((((x_data_box -> imax)[0]) - ((x_data_box -> imin)[0])) + 1))?((((x_data_box -> imax)[0]) - ((x_data_box -> imin)[0])) + 1) : 0)))); hypre__i3start = (((start[0]) - ((r_data_box -> imin)[0])) + ((((start[1]) - ((r_data_box -> imin)[1])) + (((start[2]) - ((r_data_box -> imin)[2])) * (((0 < ((((r_data_box -> imax)[1]) - ((r_data_box -> imin)[1])) + 1))?((((r_data_box -> imax)[1]) - ((r_data_box -> imin)[1])) + 1) : 0)))) * (((0 < ((((r_data_box -> imax)[0]) - ((r_data_box -> imin)[0])) + 1))?((((r_data_box -> imax)[0]) - ((r_data_box -> imin)[0])) + 1) : 0)))); hypre_BoxGetStrideSize(compute_box,base_stride,loop_size); hypre__sx1 = (base_stride[0]); hypre__sy1 = ((base_stride[1]) * (((0 < ((((A_data_box -> imax)[0]) - ((A_data_box -> imin)[0])) + 1))?((((A_data_box -> imax)[0]) - ((A_data_box -> imin)[0])) + 1) : 0))); hypre__sz1 = (((base_stride[2]) * (((0 < ((((A_data_box -> imax)[0]) - ((A_data_box -> imin)[0])) + 1))?((((A_data_box -> imax)[0]) - ((A_data_box -> imin)[0])) + 1) : 0))) * (((0 < ((((A_data_box -> imax)[1]) - ((A_data_box -> imin)[1])) + 1))?((((A_data_box -> imax)[1]) - ((A_data_box -> imin)[1])) + 1) : 0))); hypre__sx2 = (base_stride[0]); hypre__sy2 = ((base_stride[1]) * (((0 < ((((x_data_box -> imax)[0]) - ((x_data_box -> imin)[0])) + 1))?((((x_data_box -> imax)[0]) - ((x_data_box -> imin)[0])) + 1) : 0))); hypre__sz2 = (((base_stride[2]) * (((0 < ((((x_data_box -> imax)[0]) - ((x_data_box -> imin)[0])) + 1))?((((x_data_box -> imax)[0]) - ((x_data_box -> imin)[0])) + 1) : 0))) * (((0 < ((((x_data_box -> imax)[1]) - ((x_data_box -> imin)[1])) + 1))?((((x_data_box -> imax)[1]) - ((x_data_box -> imin)[1])) + 1) : 0))); hypre__sx3 = (base_stride[0]); hypre__sy3 = ((base_stride[1]) * (((0 < ((((r_data_box -> imax)[0]) - ((r_data_box -> imin)[0])) + 1))?((((r_data_box -> imax)[0]) - ((r_data_box -> imin)[0])) + 1) : 0))); hypre__sz3 = (((base_stride[2]) * (((0 < ((((r_data_box -> imax)[0]) - ((r_data_box -> imin)[0])) + 1))?((((r_data_box -> imax)[0]) - ((r_data_box -> imin)[0])) + 1) : 0))) * (((0 < ((((r_data_box -> imax)[1]) - ((r_data_box -> imin)[1])) + 1))?((((r_data_box -> imax)[1]) - ((r_data_box -> imin)[1])) + 1) : 0))); /* Based on BG/L Milestone #46 */ (hypre__sx1 == 1)?0 : ((__assert_fail(("hypre__sx1 == 1"),("/home/liao6/svnrepos/benchmarks/smg2000/struct_ls/smg_residual.c"),(602),("int hypre_SMGResidual(void *, struct hypre_StructMatrix_struct *, struct hypre_StructVector_struct *, struct hypre_StructVector_struct *, struct hypre_StructVector_struct *)")) , 0)); (hypre__sx2 == 1)?0 : ((__assert_fail(("hypre__sx2 == 1"),("/home/liao6/svnrepos/benchmarks/smg2000/struct_ls/smg_residual.c"),(603),("int hypre_SMGResidual(void *, struct hypre_StructMatrix_struct *, struct hypre_StructVector_struct *, struct hypre_StructVector_struct *, struct hypre_StructVector_struct *)")) , 0)); (hypre__sx3 == 1)?0 : ((__assert_fail(("hypre__sx3 == 1"),("/home/liao6/svnrepos/benchmarks/smg2000/struct_ls/smg_residual.c"),(604),("int hypre_SMGResidual(void *, struct hypre_StructMatrix_struct *, struct hypre_StructVector_struct *, struct hypre_StructVector_struct *, struct hypre_StructVector_struct *)")) , 0)); hypre__mx = (loop_size[0]); hypre__my = (loop_size[1]); hypre__mz = (loop_size[2]); hypre__dir = 0; hypre__max = hypre__mx; if (hypre__my > hypre__max) { hypre__dir = 1; hypre__max = hypre__my; } if (hypre__mz > hypre__max) { hypre__dir = 2; hypre__max = hypre__mz; } hypre__num_blocks = 1; if (hypre__max < hypre__num_blocks) { hypre__num_blocks = hypre__max; } if (hypre__num_blocks > 0) { hypre__div = (hypre__max / hypre__num_blocks); hypre__mod = (hypre__max % hypre__num_blocks); } else continue; if (hypre__num_blocks == 1) { int si; int ii; int jj; int kk; const double *Ap_0 = ((A -> data) + hypre__i1start); const double *xp_0 = (((x -> data) + hypre__i2start) + ((x -> data_indices)[i])); ri = hypre__i3start; void *__out_argv1__1527__[21]; *(__out_argv1__1527__ + 0) = ((void *)(&xp_0)); *(__out_argv1__1527__ + 1) = ((void *)(&Ap_0)); *(__out_argv1__1527__ + 2) = ((void *)(&kk)); *(__out_argv1__1527__ + 3) = ((void *)(&jj)); *(__out_argv1__1527__ + 4) = ((void *)(&ii)); *(__out_argv1__1527__ + 5) = ((void *)(&si)); *(__out_argv1__1527__ + 6) = ((void *)(&hypre__mz)); *(__out_argv1__1527__ + 7) = ((void *)(&hypre__my)); *(__out_argv1__1527__ + 8) = ((void *)(&hypre__mx)); *(__out_argv1__1527__ + 9) = ((void *)(&hypre__sz3)); *(__out_argv1__1527__ + 10) = ((void *)(&hypre__sy3)); *(__out_argv1__1527__ + 11) = ((void *)(&hypre__sz2)); *(__out_argv1__1527__ + 12) = ((void *)(&hypre__sy2)); *(__out_argv1__1527__ + 13) = ((void *)(&hypre__sz1)); *(__out_argv1__1527__ + 14) = ((void *)(&hypre__sy1)); *(__out_argv1__1527__ + 15) = ((void *)(&dxp_s)); *(__out_argv1__1527__ + 16) = ((void *)(&i)); *(__out_argv1__1527__ + 17) = ((void *)(&stencil_size)); *(__out_argv1__1527__ + 18) = ((void *)(&rp)); *(__out_argv1__1527__ + 19) = ((void *)(&ri)); *(__out_argv1__1527__ + 20) = ((void *)(&A)); #ifdef USE_DLOPEN if (g_execution_flag == 0){ printf("Opening the .so file ...\n"); FunctionLib = dlopen("/home/liao6/svnrepos/benchmarks/smg2000/struct_ls/OUT__1__6119__.so",RTLD_LAZY); dlError = dlerror(); if( dlError ) { printf("cannot open .so file!\n"); exit(1); } /* Find the first loaded function */ OUT__1__6119__ = dlsym( FunctionLib, "OUT__1__6119__"); dlError = dlerror(); if( dlError ) { printf("cannot find OUT__1__6755__() !\n"); exit(1); } //remove("/tmp/peri.result"); //time1=time_stamp(); } // end if (flag ==0) g_execution_flag ++; (*OUT__1__6119__)(__out_argv1__1527__); #else OUT__1__6119__(__out_argv1__1527__); #endif /* hypre__num_blocks > 1 */ } else { for (si = 0; si < stencil_size; si++) { Ap = ((A -> data) + (((A -> data_indices)[i])[si])); xp = (((x -> data) + ((x -> data_indices)[i])) + (dxp_s[si])); for (hypre__block = 0; hypre__block < hypre__num_blocks; hypre__block++) { loopi = 0; loopj = 0; loopk = 0; hypre__nx = hypre__mx; hypre__ny = hypre__my; hypre__nz = hypre__mz; if (hypre__dir == 0) { loopi = ((hypre__block * hypre__div) + (((hypre__mod < hypre__block)?hypre__mod : hypre__block))); hypre__nx = (hypre__div + (((hypre__mod > hypre__block)?1 : 0))); } else if (hypre__dir == 1) { loopj = ((hypre__block * hypre__div) + (((hypre__mod < hypre__block)?hypre__mod : hypre__block))); hypre__ny = (hypre__div + (((hypre__mod > hypre__block)?1 : 0))); } else if (hypre__dir == 2) { loopk = ((hypre__block * hypre__div) + (((hypre__mod < hypre__block)?hypre__mod : hypre__block))); hypre__nz = (hypre__div + (((hypre__mod > hypre__block)?1 : 0))); } Ai = (((hypre__i1start + (loopi * hypre__sx1)) + (loopj * hypre__sy1)) + (loopk * hypre__sz1)); xi = (((hypre__i2start + (loopi * hypre__sx2)) + (loopj * hypre__sy2)) + (loopk * hypre__sz2)); ri = (((hypre__i3start + (loopi * hypre__sx3)) + (loopj * hypre__sy3)) + (loopk * hypre__sz3)); /* CORE LOOP BEGIN */ (hypre__sx1 == 1)?0 : ((__assert_fail(("hypre__sx1 == 1"),("/home/liao6/svnrepos/benchmarks/smg2000/struct_ls/smg_residual.c"),(689),("int hypre_SMGResidual(void *, struct hypre_StructMatrix_struct *, struct hypre_StructVector_struct *, struct hypre_StructVector_struct *, struct hypre_StructVector_struct *)")) , 0)); (hypre__sx2 == 1)?0 : ((__assert_fail(("hypre__sx2 == 1"),("/home/liao6/svnrepos/benchmarks/smg2000/struct_ls/smg_residual.c"),(690),("int hypre_SMGResidual(void *, struct hypre_StructMatrix_struct *, struct hypre_StructVector_struct *, struct hypre_StructVector_struct *, struct hypre_StructVector_struct *)")) , 0)); (hypre__sx3 == 1)?0 : ((__assert_fail(("hypre__sx3 == 1"),("/home/liao6/svnrepos/benchmarks/smg2000/struct_ls/smg_residual.c"),(691),("int hypre_SMGResidual(void *, struct hypre_StructMatrix_struct *, struct hypre_StructVector_struct *, struct hypre_StructVector_struct *, struct hypre_StructVector_struct *)")) , 0)); { /* In essence, this loop computes: * FOR_ALL i, j, k DO rp[ri + i + j*DJ_R + k*DK_R] -= Ap[Ai + i + j*DJ_A + k*DK_A] * xp[xi + i + j*DJ_X + k*DK_X]; */ // 1. promoting loop invariant expressions // j loop increment for Ai, xi, and ri int DJA0 = (hypre__sy1 - (hypre__nx * hypre__sx1)); int DJX0 = (hypre__sy2 - (hypre__nx * hypre__sx2)); int DJR0 = (hypre__sy3 - (hypre__nx * hypre__sx3)); // k loop increment for Ai, xi, and ri int DKA0 = (hypre__sz1 - (hypre__ny * hypre__sy1)); int DKX0 = (hypre__sz2 - (hypre__ny * hypre__sy2)); int DKR0 = (hypre__sz3 - (hypre__ny * hypre__sy3)); // pre-compute array index offset changes for one iteration within each level of loop // one iteration of j loop on ri int DJR1 = (DJR0 + (hypre__nx * hypre__sx3)); // one iteration of k loop on ri int DKR1 = (DKR0 + (hypre__ny * DJR1)); // one iteration of j loop on Ai int DJA1 = (DJA0 + (hypre__nx * hypre__sx1)); // one iteration of k loop on Ai int DKA1 = (DKA0 + (hypre__ny * DJA1)); // one iteration of j loop on xi int DJX1 = (DJX0 + (hypre__nx * hypre__sx2)); // one iteration of k loop on xi int DKX1 = (DKX0 + (hypre__ny * DJX1)); for (loopk = 0; loopk < hypre__nz; loopk++) { for (loopj = 0; loopj < hypre__ny; loopj++) { for (loopi = 0; loopi < hypre__nx; loopi++) {{ rp[((ri + (loopi * hypre__sx1)) + (loopj * DJR1)) + (loopk * DKR1)] -= ((Ap[((Ai + (loopi * hypre__sx1)) + (loopj * DJA1)) + (loopk * DKA1)]) * (xp[((xi + (loopi * hypre__sx2)) + (loopj * DJX1)) + (loopk * DKX1)])); //rp[ri] -= Ap[Ai] * xp[xi]; } //Ai += hypre__sx1; // 2. merging loop index changes //xi += hypre__sx2; //ri += hypre__sx3; } //Ai += DJA0;//(hypre__sy1 - (hypre__nx * hypre__sx1)); //xi += DJX0;//(hypre__sy2 - (hypre__nx * hypre__sx2)); //ri += DJR0;//(hypre__sy3 - (hypre__nx * hypre__sx3)); } //Ai += DKA0;//(hypre__sz1 - (hypre__ny * hypre__sy1)); //xi += DKX0; //(hypre__sz2 - (hypre__ny * hypre__sy2)); //ri += DKR0;//(hypre__sz3 - (hypre__ny * hypre__sy3)); } } /* CORE LOOP END */ /* hypre__block */ } /* si */ } /* else hypre__num_blocks > 1 */ } /* j */ } } /* i */ } /* compute_i */ } hypre_IncFLOPCount((residual_data -> flops)); hypre_EndTiming((residual_data -> time_index)); return ierr; }
HYPRE_Int hypre_SMGResidual( void *residual_vdata, hypre_StructMatrix *A, hypre_StructVector *x, hypre_StructVector *b, hypre_StructVector *r ) { HYPRE_Int ierr; hypre_SMGResidualData *residual_data = residual_vdata; hypre_IndexRef base_stride = (residual_data -> base_stride); hypre_BoxArray *base_points = (residual_data -> base_points); hypre_ComputePkg *compute_pkg = (residual_data -> compute_pkg); hypre_CommHandle *comm_handle; hypre_BoxArrayArray *compute_box_aa; hypre_BoxArray *compute_box_a; hypre_Box *compute_box; hypre_Box *A_data_box; hypre_Box *x_data_box; hypre_Box *b_data_box; hypre_Box *r_data_box; HYPRE_Int Ai; HYPRE_Int xi; HYPRE_Int bi; HYPRE_Int ri; double *Ap0; double *xp0; double *bp; double *rp; hypre_Index loop_size; hypre_IndexRef start; hypre_StructStencil *stencil; hypre_Index *stencil_shape; HYPRE_Int stencil_size; HYPRE_Int compute_i, i, j, si; HYPRE_Int loopi, loopj, loopk; double *Ap1, *Ap2; double *Ap3, *Ap4; double *Ap5, *Ap6; double *Ap7, *Ap8, *Ap9; double *Ap10, *Ap11, *Ap12, *Ap13, *Ap14; double *Ap15, *Ap16, *Ap17, *Ap18; double *Ap19, *Ap20, *Ap21, *Ap22, *Ap23, *Ap24, *Ap25, *Ap26; double *xp1, *xp2; double *xp3, *xp4; double *xp5, *xp6; double *xp7, *xp8, *xp9; double *xp10, *xp11, *xp12, *xp13, *xp14; double *xp15, *xp16, *xp17, *xp18; double *xp19, *xp20, *xp21, *xp22, *xp23, *xp24, *xp25, *xp26; hypre_BeginTiming(residual_data -> time_index); /*----------------------------------------------------------------------- * Compute residual r = b - Ax *-----------------------------------------------------------------------*/ stencil = hypre_StructMatrixStencil(A); stencil_shape = hypre_StructStencilShape(stencil); stencil_size = hypre_StructStencilSize(stencil); for (compute_i = 0; compute_i < 2; compute_i++) { switch(compute_i) { case 0: { xp0 = hypre_StructVectorData(x); hypre_InitializeIndtComputations(compute_pkg, xp0, &comm_handle); compute_box_aa = hypre_ComputePkgIndtBoxes(compute_pkg); /*---------------------------------------- * Copy b into r *----------------------------------------*/ compute_box_a = base_points; hypre_ForBoxI(i, compute_box_a) { compute_box = hypre_BoxArrayBox(compute_box_a, i); start = hypre_BoxIMin(compute_box); b_data_box = hypre_BoxArrayBox(hypre_StructVectorDataSpace(b), i); r_data_box = hypre_BoxArrayBox(hypre_StructVectorDataSpace(r), i); bp = hypre_StructVectorBoxData(b, i); rp = hypre_StructVectorBoxData(r, i); hypre_BoxGetStrideSize(compute_box, base_stride, loop_size); hypre_BoxLoop2Begin(loop_size, b_data_box, start, base_stride, bi, r_data_box, start, base_stride, ri); #define HYPRE_BOX_SMP_PRIVATE loopk,loopi,loopj,bi,ri #include "hypre_box_smp_forloop.h" hypre_BoxLoop2For(loopi, loopj, loopk, bi, ri) { rp[ri] = bp[bi]; } hypre_BoxLoop2End(bi, ri); } } break; case 1: { hypre_FinalizeIndtComputations(comm_handle); compute_box_aa = hypre_ComputePkgDeptBoxes(compute_pkg); } break; } /*-------------------------------------------------------------------- * Compute r -= A*x *--------------------------------------------------------------------*/ hypre_ForBoxArrayI(i, compute_box_aa) { compute_box_a = hypre_BoxArrayArrayBoxArray(compute_box_aa, i); A_data_box = hypre_BoxArrayBox(hypre_StructMatrixDataSpace(A), i); x_data_box = hypre_BoxArrayBox(hypre_StructVectorDataSpace(x), i); r_data_box = hypre_BoxArrayBox(hypre_StructVectorDataSpace(r), i); rp = hypre_StructVectorBoxData(r, i); /*-------------------------------------------------------------- * Switch statement to direct control (based on stencil size) to * code to get pointers and offsets fo A and x. *--------------------------------------------------------------*/ switch (stencil_size) { case 1: Ap0 = hypre_StructMatrixBoxData(A, i, 0); xp0 = hypre_StructVectorBoxData(x, i) + hypre_BoxOffsetDistance(x_data_box, stencil_shape[0]); break; case 3: Ap0 = hypre_StructMatrixBoxData(A, i, 0); Ap1 = hypre_StructMatrixBoxData(A, i, 1); Ap2 = hypre_StructMatrixBoxData(A, i, 2); xp0 = hypre_StructVectorBoxData(x, i) + hypre_BoxOffsetDistance(x_data_box, stencil_shape[0]); xp1 = hypre_StructVectorBoxData(x, i) + hypre_BoxOffsetDistance(x_data_box, stencil_shape[1]); xp2 = hypre_StructVectorBoxData(x, i) + hypre_BoxOffsetDistance(x_data_box, stencil_shape[2]); break; case 5: Ap0 = hypre_StructMatrixBoxData(A, i, 0); Ap1 = hypre_StructMatrixBoxData(A, i, 1); Ap2 = hypre_StructMatrixBoxData(A, i, 2); Ap3 = hypre_StructMatrixBoxData(A, i, 3); Ap4 = hypre_StructMatrixBoxData(A, i, 4); xp0 = hypre_StructVectorBoxData(x, i) + hypre_BoxOffsetDistance(x_data_box, stencil_shape[0]); xp1 = hypre_StructVectorBoxData(x, i) + hypre_BoxOffsetDistance(x_data_box, stencil_shape[1]); xp2 = hypre_StructVectorBoxData(x, i) + hypre_BoxOffsetDistance(x_data_box, stencil_shape[2]); xp3 = hypre_StructVectorBoxData(x, i) + hypre_BoxOffsetDistance(x_data_box, stencil_shape[3]); xp4 = hypre_StructVectorBoxData(x, i) + hypre_BoxOffsetDistance(x_data_box, stencil_shape[4]); break; case 7: Ap0 = hypre_StructMatrixBoxData(A, i, 0); Ap1 = hypre_StructMatrixBoxData(A, i, 1); Ap2 = hypre_StructMatrixBoxData(A, i, 2); Ap3 = hypre_StructMatrixBoxData(A, i, 3); Ap4 = hypre_StructMatrixBoxData(A, i, 4); Ap5 = hypre_StructMatrixBoxData(A, i, 5); Ap6 = hypre_StructMatrixBoxData(A, i, 6); xp0 = hypre_StructVectorBoxData(x, i) + hypre_BoxOffsetDistance(x_data_box, stencil_shape[0]); xp1 = hypre_StructVectorBoxData(x, i) + hypre_BoxOffsetDistance(x_data_box, stencil_shape[1]); xp2 = hypre_StructVectorBoxData(x, i) + hypre_BoxOffsetDistance(x_data_box, stencil_shape[2]); xp3 = hypre_StructVectorBoxData(x, i) + hypre_BoxOffsetDistance(x_data_box, stencil_shape[3]); xp4 = hypre_StructVectorBoxData(x, i) + hypre_BoxOffsetDistance(x_data_box, stencil_shape[4]); xp5 = hypre_StructVectorBoxData(x, i) + hypre_BoxOffsetDistance(x_data_box, stencil_shape[5]); xp6 = hypre_StructVectorBoxData(x, i) + hypre_BoxOffsetDistance(x_data_box, stencil_shape[6]); break; case 9: Ap0 = hypre_StructMatrixBoxData(A, i, 0); Ap1 = hypre_StructMatrixBoxData(A, i, 1); Ap2 = hypre_StructMatrixBoxData(A, i, 2); Ap3 = hypre_StructMatrixBoxData(A, i, 3); Ap4 = hypre_StructMatrixBoxData(A, i, 4); Ap5 = hypre_StructMatrixBoxData(A, i, 5); Ap6 = hypre_StructMatrixBoxData(A, i, 6); Ap7 = hypre_StructMatrixBoxData(A, i, 7); Ap8 = hypre_StructMatrixBoxData(A, i, 8); xp0 = hypre_StructVectorBoxData(x, i) + hypre_BoxOffsetDistance(x_data_box, stencil_shape[0]); xp1 = hypre_StructVectorBoxData(x, i) + hypre_BoxOffsetDistance(x_data_box, stencil_shape[1]); xp2 = hypre_StructVectorBoxData(x, i) + hypre_BoxOffsetDistance(x_data_box, stencil_shape[2]); xp3 = hypre_StructVectorBoxData(x, i) + hypre_BoxOffsetDistance(x_data_box, stencil_shape[3]); xp4 = hypre_StructVectorBoxData(x, i) + hypre_BoxOffsetDistance(x_data_box, stencil_shape[4]); xp5 = hypre_StructVectorBoxData(x, i) + hypre_BoxOffsetDistance(x_data_box, stencil_shape[5]); xp6 = hypre_StructVectorBoxData(x, i) + hypre_BoxOffsetDistance(x_data_box, stencil_shape[6]); xp7 = hypre_StructVectorBoxData(x, i) + hypre_BoxOffsetDistance(x_data_box, stencil_shape[7]); xp8 = hypre_StructVectorBoxData(x, i) + hypre_BoxOffsetDistance(x_data_box, stencil_shape[8]); break; case 15: Ap0 = hypre_StructMatrixBoxData(A, i, 0); Ap1 = hypre_StructMatrixBoxData(A, i, 1); Ap2 = hypre_StructMatrixBoxData(A, i, 2); Ap3 = hypre_StructMatrixBoxData(A, i, 3); Ap4 = hypre_StructMatrixBoxData(A, i, 4); Ap5 = hypre_StructMatrixBoxData(A, i, 5); Ap6 = hypre_StructMatrixBoxData(A, i, 6); Ap7 = hypre_StructMatrixBoxData(A, i, 7); Ap8 = hypre_StructMatrixBoxData(A, i, 8); Ap9 = hypre_StructMatrixBoxData(A, i, 9); Ap10 = hypre_StructMatrixBoxData(A, i, 10); Ap11 = hypre_StructMatrixBoxData(A, i, 11); Ap12 = hypre_StructMatrixBoxData(A, i, 12); Ap13 = hypre_StructMatrixBoxData(A, i, 13); Ap14 = hypre_StructMatrixBoxData(A, i, 14); xp0 = hypre_StructVectorBoxData(x, i) + hypre_BoxOffsetDistance(x_data_box, stencil_shape[0]); xp1 = hypre_StructVectorBoxData(x, i) + hypre_BoxOffsetDistance(x_data_box, stencil_shape[1]); xp2 = hypre_StructVectorBoxData(x, i) + hypre_BoxOffsetDistance(x_data_box, stencil_shape[2]); xp3 = hypre_StructVectorBoxData(x, i) + hypre_BoxOffsetDistance(x_data_box, stencil_shape[3]); xp4 = hypre_StructVectorBoxData(x, i) + hypre_BoxOffsetDistance(x_data_box, stencil_shape[4]); xp5 = hypre_StructVectorBoxData(x, i) + hypre_BoxOffsetDistance(x_data_box, stencil_shape[5]); xp6 = hypre_StructVectorBoxData(x, i) + hypre_BoxOffsetDistance(x_data_box, stencil_shape[6]); xp7 = hypre_StructVectorBoxData(x, i) + hypre_BoxOffsetDistance(x_data_box, stencil_shape[7]); xp8 = hypre_StructVectorBoxData(x, i) + hypre_BoxOffsetDistance(x_data_box, stencil_shape[8]); xp9 = hypre_StructVectorBoxData(x, i) + hypre_BoxOffsetDistance(x_data_box, stencil_shape[9]); xp10 = hypre_StructVectorBoxData(x, i) + hypre_BoxOffsetDistance(x_data_box, stencil_shape[10]); xp11 = hypre_StructVectorBoxData(x, i) + hypre_BoxOffsetDistance(x_data_box, stencil_shape[11]); xp12 = hypre_StructVectorBoxData(x, i) + hypre_BoxOffsetDistance(x_data_box, stencil_shape[12]); xp13 = hypre_StructVectorBoxData(x, i) + hypre_BoxOffsetDistance(x_data_box, stencil_shape[13]); xp14 = hypre_StructVectorBoxData(x, i) + hypre_BoxOffsetDistance(x_data_box, stencil_shape[14]); break; case 19: Ap0 = hypre_StructMatrixBoxData(A, i, 0); Ap1 = hypre_StructMatrixBoxData(A, i, 1); Ap2 = hypre_StructMatrixBoxData(A, i, 2); Ap3 = hypre_StructMatrixBoxData(A, i, 3); Ap4 = hypre_StructMatrixBoxData(A, i, 4); Ap5 = hypre_StructMatrixBoxData(A, i, 5); Ap6 = hypre_StructMatrixBoxData(A, i, 6); Ap7 = hypre_StructMatrixBoxData(A, i, 7); Ap8 = hypre_StructMatrixBoxData(A, i, 8); Ap9 = hypre_StructMatrixBoxData(A, i, 9); Ap10 = hypre_StructMatrixBoxData(A, i, 10); Ap11 = hypre_StructMatrixBoxData(A, i, 11); Ap12 = hypre_StructMatrixBoxData(A, i, 12); Ap13 = hypre_StructMatrixBoxData(A, i, 13); Ap14 = hypre_StructMatrixBoxData(A, i, 14); Ap15 = hypre_StructMatrixBoxData(A, i, 15); Ap16 = hypre_StructMatrixBoxData(A, i, 16); Ap17 = hypre_StructMatrixBoxData(A, i, 17); Ap18 = hypre_StructMatrixBoxData(A, i, 18); xp0 = hypre_StructVectorBoxData(x, i) + hypre_BoxOffsetDistance(x_data_box, stencil_shape[0]); xp1 = hypre_StructVectorBoxData(x, i) + hypre_BoxOffsetDistance(x_data_box, stencil_shape[1]); xp2 = hypre_StructVectorBoxData(x, i) + hypre_BoxOffsetDistance(x_data_box, stencil_shape[2]); xp3 = hypre_StructVectorBoxData(x, i) + hypre_BoxOffsetDistance(x_data_box, stencil_shape[3]); xp4 = hypre_StructVectorBoxData(x, i) + hypre_BoxOffsetDistance(x_data_box, stencil_shape[4]); xp5 = hypre_StructVectorBoxData(x, i) + hypre_BoxOffsetDistance(x_data_box, stencil_shape[5]); xp6 = hypre_StructVectorBoxData(x, i) + hypre_BoxOffsetDistance(x_data_box, stencil_shape[6]); xp7 = hypre_StructVectorBoxData(x, i) + hypre_BoxOffsetDistance(x_data_box, stencil_shape[7]); xp8 = hypre_StructVectorBoxData(x, i) + hypre_BoxOffsetDistance(x_data_box, stencil_shape[8]); xp9 = hypre_StructVectorBoxData(x, i) + hypre_BoxOffsetDistance(x_data_box, stencil_shape[9]); xp10 = hypre_StructVectorBoxData(x, i) + hypre_BoxOffsetDistance(x_data_box, stencil_shape[10]); xp11 = hypre_StructVectorBoxData(x, i) + hypre_BoxOffsetDistance(x_data_box, stencil_shape[11]); xp12 = hypre_StructVectorBoxData(x, i) + hypre_BoxOffsetDistance(x_data_box, stencil_shape[12]); xp13 = hypre_StructVectorBoxData(x, i) + hypre_BoxOffsetDistance(x_data_box, stencil_shape[13]); xp14 = hypre_StructVectorBoxData(x, i) + hypre_BoxOffsetDistance(x_data_box, stencil_shape[14]); xp15 = hypre_StructVectorBoxData(x, i) + hypre_BoxOffsetDistance(x_data_box, stencil_shape[15]); xp16 = hypre_StructVectorBoxData(x, i) + hypre_BoxOffsetDistance(x_data_box, stencil_shape[16]); xp17 = hypre_StructVectorBoxData(x, i) + hypre_BoxOffsetDistance(x_data_box, stencil_shape[17]); xp18 = hypre_StructVectorBoxData(x, i) + hypre_BoxOffsetDistance(x_data_box, stencil_shape[18]); break; case 27: Ap0 = hypre_StructMatrixBoxData(A, i, 0); Ap1 = hypre_StructMatrixBoxData(A, i, 1); Ap2 = hypre_StructMatrixBoxData(A, i, 2); Ap3 = hypre_StructMatrixBoxData(A, i, 3); Ap4 = hypre_StructMatrixBoxData(A, i, 4); Ap5 = hypre_StructMatrixBoxData(A, i, 5); Ap6 = hypre_StructMatrixBoxData(A, i, 6); Ap7 = hypre_StructMatrixBoxData(A, i, 7); Ap8 = hypre_StructMatrixBoxData(A, i, 8); Ap9 = hypre_StructMatrixBoxData(A, i, 9); Ap10 = hypre_StructMatrixBoxData(A, i, 10); Ap11 = hypre_StructMatrixBoxData(A, i, 11); Ap12 = hypre_StructMatrixBoxData(A, i, 12); Ap13 = hypre_StructMatrixBoxData(A, i, 13); Ap14 = hypre_StructMatrixBoxData(A, i, 14); Ap15 = hypre_StructMatrixBoxData(A, i, 15); Ap16 = hypre_StructMatrixBoxData(A, i, 16); Ap17 = hypre_StructMatrixBoxData(A, i, 17); Ap18 = hypre_StructMatrixBoxData(A, i, 18); Ap19 = hypre_StructMatrixBoxData(A, i, 19); Ap20 = hypre_StructMatrixBoxData(A, i, 20); Ap21 = hypre_StructMatrixBoxData(A, i, 21); Ap22 = hypre_StructMatrixBoxData(A, i, 22); Ap23 = hypre_StructMatrixBoxData(A, i, 23); Ap24 = hypre_StructMatrixBoxData(A, i, 24); Ap25 = hypre_StructMatrixBoxData(A, i, 25); Ap26 = hypre_StructMatrixBoxData(A, i, 26); xp0 = hypre_StructVectorBoxData(x, i) + hypre_BoxOffsetDistance(x_data_box, stencil_shape[0]); xp1 = hypre_StructVectorBoxData(x, i) + hypre_BoxOffsetDistance(x_data_box, stencil_shape[1]); xp2 = hypre_StructVectorBoxData(x, i) + hypre_BoxOffsetDistance(x_data_box, stencil_shape[2]); xp3 = hypre_StructVectorBoxData(x, i) + hypre_BoxOffsetDistance(x_data_box, stencil_shape[3]); xp4 = hypre_StructVectorBoxData(x, i) + hypre_BoxOffsetDistance(x_data_box, stencil_shape[4]); xp5 = hypre_StructVectorBoxData(x, i) + hypre_BoxOffsetDistance(x_data_box, stencil_shape[5]); xp6 = hypre_StructVectorBoxData(x, i) + hypre_BoxOffsetDistance(x_data_box, stencil_shape[6]); xp7 = hypre_StructVectorBoxData(x, i) + hypre_BoxOffsetDistance(x_data_box, stencil_shape[7]); xp8 = hypre_StructVectorBoxData(x, i) + hypre_BoxOffsetDistance(x_data_box, stencil_shape[8]); xp9 = hypre_StructVectorBoxData(x, i) + hypre_BoxOffsetDistance(x_data_box, stencil_shape[9]); xp10 = hypre_StructVectorBoxData(x, i) + hypre_BoxOffsetDistance(x_data_box, stencil_shape[10]); xp11 = hypre_StructVectorBoxData(x, i) + hypre_BoxOffsetDistance(x_data_box, stencil_shape[11]); xp12 = hypre_StructVectorBoxData(x, i) + hypre_BoxOffsetDistance(x_data_box, stencil_shape[12]); xp13 = hypre_StructVectorBoxData(x, i) + hypre_BoxOffsetDistance(x_data_box, stencil_shape[13]); xp14 = hypre_StructVectorBoxData(x, i) + hypre_BoxOffsetDistance(x_data_box, stencil_shape[14]); xp15 = hypre_StructVectorBoxData(x, i) + hypre_BoxOffsetDistance(x_data_box, stencil_shape[15]); xp16 = hypre_StructVectorBoxData(x, i) + hypre_BoxOffsetDistance(x_data_box, stencil_shape[16]); xp17 = hypre_StructVectorBoxData(x, i) + hypre_BoxOffsetDistance(x_data_box, stencil_shape[17]); xp18 = hypre_StructVectorBoxData(x, i) + hypre_BoxOffsetDistance(x_data_box, stencil_shape[18]); xp19 = hypre_StructVectorBoxData(x, i) + hypre_BoxOffsetDistance(x_data_box, stencil_shape[19]); xp20 = hypre_StructVectorBoxData(x, i) + hypre_BoxOffsetDistance(x_data_box, stencil_shape[20]); xp21 = hypre_StructVectorBoxData(x, i) + hypre_BoxOffsetDistance(x_data_box, stencil_shape[21]); xp22 = hypre_StructVectorBoxData(x, i) + hypre_BoxOffsetDistance(x_data_box, stencil_shape[22]); xp23 = hypre_StructVectorBoxData(x, i) + hypre_BoxOffsetDistance(x_data_box, stencil_shape[23]); xp24 = hypre_StructVectorBoxData(x, i) + hypre_BoxOffsetDistance(x_data_box, stencil_shape[24]); xp25 = hypre_StructVectorBoxData(x, i) + hypre_BoxOffsetDistance(x_data_box, stencil_shape[25]); xp26 = hypre_StructVectorBoxData(x, i) + hypre_BoxOffsetDistance(x_data_box, stencil_shape[26]); break; default: ; } hypre_ForBoxI(j, compute_box_a) { compute_box = hypre_BoxArrayBox(compute_box_a, j); start = hypre_BoxIMin(compute_box); /*------------------------------------------------------ * Switch statement to direct control to appropriate * box loop depending on stencil size *------------------------------------------------------*/ switch (stencil_size) { case 1: hypre_BoxGetStrideSize(compute_box, base_stride, loop_size); hypre_BoxLoop3Begin(loop_size, A_data_box, start, base_stride, Ai, x_data_box, start, base_stride, xi, r_data_box, start, base_stride, ri); #define HYPRE_BOX_SMP_PRIVATE loopk,loopi,loopj,Ai,xi,ri #include "hypre_box_smp_forloop.h" hypre_BoxLoop3For(loopi, loopj, loopk, Ai, xi, ri) { rp[ri] = rp[ri] - Ap0[Ai] * xp0[xi]; } hypre_BoxLoop3End(Ai, xi, ri); break; case 3: hypre_BoxGetStrideSize(compute_box, base_stride, loop_size); hypre_BoxLoop3Begin(loop_size, A_data_box, start, base_stride, Ai, x_data_box, start, base_stride, xi, r_data_box, start, base_stride, ri); #define HYPRE_BOX_SMP_PRIVATE loopk,loopi,loopj,Ai,xi,ri #include "hypre_box_smp_forloop.h" hypre_BoxLoop3For(loopi, loopj, loopk, Ai, xi, ri) { rp[ri] = rp[ri] - Ap0[Ai] * xp0[xi] - Ap1[Ai] * xp1[xi] - Ap2[Ai] * xp2[xi]; }
int hypre_SemiRestrict( void *restrict_vdata, hypre_StructMatrix *R, hypre_StructVector *r, hypre_StructVector *rc ) { int ierr = 0; hypre_SemiRestrictData *restrict_data = (hypre_SemiRestrictData *)restrict_vdata; int R_stored_as_transpose; hypre_ComputePkg *compute_pkg; hypre_IndexRef cindex; hypre_IndexRef stride; hypre_StructGrid *fgrid; int *fgrid_ids; hypre_StructGrid *cgrid; hypre_BoxArray *cgrid_boxes; int *cgrid_ids; hypre_CommHandle *comm_handle; hypre_BoxArrayArray *compute_box_aa; hypre_BoxArray *compute_box_a; hypre_Box *compute_box; hypre_Box *R_dbox; hypre_Box *r_dbox; hypre_Box *rc_dbox; int Ri; int ri; int rci; double *Rp0, *Rp1; double *rp, *rp0, *rp1; double *rcp; hypre_Index loop_size; hypre_IndexRef start; hypre_Index startc; hypre_Index stridec; hypre_StructStencil *stencil; hypre_Index *stencil_shape; int compute_i, fi, ci, j; int loopi, loopj, loopk; /*----------------------------------------------------------------------- * Initialize some things. *-----------------------------------------------------------------------*/ hypre_BeginTiming(restrict_data -> time_index); R_stored_as_transpose = (restrict_data -> R_stored_as_transpose); compute_pkg = (restrict_data -> compute_pkg); cindex = (restrict_data -> cindex); stride = (restrict_data -> stride); stencil = hypre_StructMatrixStencil(R); stencil_shape = hypre_StructStencilShape(stencil); hypre_SetIndex(stridec, 1, 1, 1); /*-------------------------------------------------------------------- * Restrict the residual. *--------------------------------------------------------------------*/ fgrid = hypre_StructVectorGrid(r); fgrid_ids = hypre_StructGridIDs(fgrid); cgrid = hypre_StructVectorGrid(rc); cgrid_boxes = hypre_StructGridBoxes(cgrid); cgrid_ids = hypre_StructGridIDs(cgrid); for (compute_i = 0; compute_i < 2; compute_i++) { switch(compute_i) { case 0: { rp = hypre_StructVectorData(r); hypre_InitializeIndtComputations(compute_pkg, rp, &comm_handle); compute_box_aa = hypre_ComputePkgIndtBoxes(compute_pkg); } break; case 1: { hypre_FinalizeIndtComputations(comm_handle); compute_box_aa = hypre_ComputePkgDeptBoxes(compute_pkg); } break; } fi = 0; hypre_ForBoxArrayI(ci, cgrid_boxes) { while (fgrid_ids[fi] != cgrid_ids[ci]) { fi++; } compute_box_a = hypre_BoxArrayArrayBoxArray(compute_box_aa, fi); R_dbox = hypre_BoxArrayBox(hypre_StructMatrixDataSpace(R), fi); r_dbox = hypre_BoxArrayBox(hypre_StructVectorDataSpace(r), fi); rc_dbox = hypre_BoxArrayBox(hypre_StructVectorDataSpace(rc), ci); if (R_stored_as_transpose) { Rp0 = hypre_StructMatrixBoxData(R, fi, 1) - hypre_BoxOffsetDistance(R_dbox, stencil_shape[1]); Rp1 = hypre_StructMatrixBoxData(R, fi, 0); } else { Rp0 = hypre_StructMatrixBoxData(R, fi, 0); Rp1 = hypre_StructMatrixBoxData(R, fi, 1); } rp = hypre_StructVectorBoxData(r, fi); rp0 = rp + hypre_BoxOffsetDistance(r_dbox, stencil_shape[0]); rp1 = rp + hypre_BoxOffsetDistance(r_dbox, stencil_shape[1]); rcp = hypre_StructVectorBoxData(rc, ci); hypre_ForBoxI(j, compute_box_a) { compute_box = hypre_BoxArrayBox(compute_box_a, j); start = hypre_BoxIMin(compute_box); hypre_StructMapFineToCoarse(start, cindex, stride, startc); hypre_BoxGetStrideSize(compute_box, stride, loop_size); hypre_BoxLoop3Begin(loop_size, R_dbox, startc, stridec, Ri, r_dbox, start, stride, ri, rc_dbox, startc, stridec, rci); #define HYPRE_BOX_SMP_PRIVATE loopk,loopi,loopj,Ri,ri,rci #include "hypre_box_smp_forloop.h" hypre_BoxLoop3For(loopi, loopj, loopk, Ri, ri, rci) { rcp[rci] = rp[ri] + (Rp0[Ri] * rp0[ri] + Rp1[Ri] * rp1[ri]); } hypre_BoxLoop3End(Ri, ri, rci); } }