void cs_mesh_smoother_unwarp(cs_mesh_t *mesh, const int vtx_is_fixed[]) { int face; cs_real_t maxwarp, minhist_i, minhist_b, maxhist_i, maxhist_b; cs_real_t rnorm_b, rnorm_i; bool conv = false; int iter = 0; int max_iter = UNWARPING_MAX_LOOPS; double frac = 0.1; double eps = 1.e-4; cs_real_t maxwarp_p = 90; cs_real_t *vtx_tolerance = NULL; cs_real_t *loc_vtx_mvt = NULL; cs_real_t *i_face_norm = NULL; cs_real_t *i_face_cog = NULL; cs_real_t *b_face_norm = NULL; cs_real_t *b_face_cog = NULL; cs_real_t *b_face_warp = NULL; cs_real_t *i_face_warp = NULL; if (mesh->have_rotation_perio) bft_error(__FILE__, __LINE__, 0, "Smoothing in case of periodicity of rotation not yet handled."); bft_printf(_("\n Start unwarping algorithm\n\n")); BFT_MALLOC(b_face_warp, mesh->n_b_faces, cs_real_t); BFT_MALLOC(i_face_warp, mesh->n_i_faces, cs_real_t); BFT_MALLOC(vtx_tolerance, mesh->n_vertices, cs_real_t); BFT_MALLOC(loc_vtx_mvt, 3*(mesh->n_vertices), cs_real_t); while (!conv) { cs_mesh_quantities_i_faces(mesh, &(i_face_cog), &(i_face_norm)); cs_mesh_quantities_b_faces(mesh, &(b_face_cog), &(b_face_norm)); cs_mesh_quality_compute_warping(mesh, i_face_norm, b_face_norm, i_face_warp, b_face_warp); _get_tolerance(mesh, vtx_tolerance, frac); for (face = 0; face < mesh->n_i_faces; face++) { rnorm_i = sqrt ( i_face_norm[3*face]*i_face_norm[3*face] + i_face_norm[3*face + 1]*i_face_norm[3*face + 1] + i_face_norm[3*face + 2]*i_face_norm[3*face + 2]); i_face_norm[3*face ] /= rnorm_i; i_face_norm[3*face +1] /= rnorm_i; i_face_norm[3*face +2] /= rnorm_i; } for (face = 0; face < mesh->n_b_faces; face++) { rnorm_b = sqrt( b_face_norm[3*face]*b_face_norm[3*face] + b_face_norm[3*face + 1]*b_face_norm[3*face + 1] + b_face_norm[3*face + 2]*b_face_norm[3*face + 2]); b_face_norm[3*face ] /= rnorm_b; b_face_norm[3*face +1] /= rnorm_b; b_face_norm[3*face +2] /= rnorm_b; } maxwarp = _unwarping_mvt(mesh, i_face_norm, b_face_norm, i_face_cog, b_face_cog, loc_vtx_mvt, i_face_warp, b_face_warp, vtx_tolerance, frac); if (iter == 0) { _compute_minmax(mesh->n_i_faces, i_face_warp, &minhist_i, &maxhist_i); _compute_minmax(mesh->n_b_faces, b_face_warp, &minhist_b, &maxhist_b); bft_printf(_("\n Histogram of the boundary faces warping" " before unwarping algorithm:\n\n")); _histogram(mesh->n_b_faces, b_face_warp, minhist_b, maxhist_b, minhist_b, maxhist_b); bft_printf(_("\n Histogram of the interior faces warping" " before unwarping algorithm:\n\n")); _int_face_histogram(mesh, i_face_warp, minhist_i, maxhist_i, minhist_i, maxhist_i); } if (maxwarp/maxwarp_p > 1.005) { if (iter <= 1) bft_error(__FILE__, __LINE__, 0, _("\nUnwarping algorithm failed.")); else { cs_base_warn(__FILE__, __LINE__); bft_printf(_("\nUnwarping algorithm stopped at iteration %d" " because it starting to diverge.\n"), iter); iter = max_iter +100; conv = true; } } if ( ((1 - maxwarp/maxwarp_p) > 0 && (1 - maxwarp/maxwarp_p) < eps) || iter == max_iter) { conv = true; bft_printf(_("\nUnwarping algorithm converged at iteration %d \n"), iter +1); } maxwarp_p = maxwarp; if (iter <= max_iter) _move_vertices(mesh, loc_vtx_mvt, vtx_is_fixed); BFT_FREE(i_face_norm); BFT_FREE(b_face_norm); BFT_FREE(i_face_cog); BFT_FREE(b_face_cog); iter++; } /* Output quality histograms */ { cs_real_t min_b, max_b, max_i, min_i; _compute_minmax(mesh->n_i_faces, i_face_warp, &min_i, &max_i); _compute_minmax(mesh->n_b_faces, b_face_warp, &min_b, &max_b); bft_printf(_("\n Histogram of the boundary faces warping" " after unwarping algorithm:\n\n")); _histogram(mesh->n_b_faces, b_face_warp, minhist_b, maxhist_b, min_b, max_b); bft_printf(_("\n Histogram of the interior faces warping" " after unwarping algorithm:\n\n")); _int_face_histogram(mesh, i_face_warp, minhist_i, maxhist_i, min_i, max_i); } BFT_FREE(vtx_tolerance); BFT_FREE(loc_vtx_mvt); BFT_FREE(i_face_warp); BFT_FREE(b_face_warp); bft_printf(_("\n End unwarping algorithm\n\n")); }
int mpsort_mpi_histogram_sort(struct crstruct d, struct crmpistruct o, struct TIMER * tmr) { char Pmax[d.rsize]; char Pmin[d.rsize]; char P[d.rsize * (o.NTask - 1)]; ptrdiff_t C[o.NTask + 1]; /* desired counts */ ptrdiff_t myCLT[o.NTask + 1]; /* counts of less than P */ ptrdiff_t CLT[o.NTask + 1]; ptrdiff_t myCLE[o.NTask + 1]; /* counts of less than or equal to P */ ptrdiff_t CLE[o.NTask + 1]; int SendCount[o.NTask]; int SendDispl[o.NTask]; int RecvCount[o.NTask]; int RecvDispl[o.NTask]; ptrdiff_t myT_CLT[o.NTask]; ptrdiff_t myT_CLE[o.NTask]; ptrdiff_t myT_C[o.NTask]; ptrdiff_t myC[o.NTask + 1]; int iter = 0; int done = 0; char * buffer; int i; (tmr->time = MPI_Wtime(), strcpy(tmr->name, "START"), tmr++); /* and sort the local array */ radix_sort(d.base, d.nmemb, d.size, d.radix, d.rsize, d.arg); MPI_Barrier(o.comm); (tmr->time = MPI_Wtime(), strcpy(tmr->name, "FirstSort"), tmr++); _find_Pmax_Pmin_C(o.mybase, o.mynmemb, o.myoutnmemb, Pmax, Pmin, C, &d, &o); (tmr->time = MPI_Wtime(), strcpy(tmr->name, "PmaxPmin"), tmr++); memset(P, 0, d.rsize * (o.NTask -1)); struct piter pi; piter_init(&pi, Pmin, Pmax, o.NTask - 1, &d); while(!done) { iter ++; piter_bisect(&pi, P); #if MPI_VERSION >= 3 if (1 || mpsort_mpi_has_options(MPSORT_DISABLE_IALLREDUCE) ) { _histogram(P, o.NTask - 1, o.mybase, o.mynmemb, myCLT, myCLE, &d); MPI_Allreduce(myCLT, CLT, o.NTask + 1, MPI_TYPE_PTRDIFF, MPI_SUM, o.comm); MPI_Allreduce(myCLE, CLE, o.NTask + 1, MPI_TYPE_PTRDIFF, MPI_SUM, o.comm); } else { /* overlap allreduce with histogramming by pipelining */ MPI_Request r[1]; _histogram(P, o.NTask - 1, o.mybase, o.mynmemb, myCLT, NULL, &d); /* reduce the bins just calculated */ MPI_Iallreduce(myCLT, CLT, o.NTask + 1, MPI_TYPE_PTRDIFF, MPI_SUM, o.comm, &r[0]); _histogram(P, o.NTask - 1, o.mybase, o.mynmemb, myCLE, NULL, &d); MPI_Waitall(1, r, MPI_STATUSES_IGNORE); MPI_Allreduce(myCLE, CLE, o.NTask + 1, MPI_TYPE_PTRDIFF, MPI_SUM, o.comm); } #else _histogram(P, o.NTask - 1, o.mybase, o.mynmemb, myCLT, myCLE, &d); MPI_Allreduce(myCLT, CLT, o.NTask + 1, MPI_TYPE_PTRDIFF, MPI_SUM, o.comm); MPI_Allreduce(myCLE, CLE, o.NTask + 1, MPI_TYPE_PTRDIFF, MPI_SUM, o.comm); #endif (iter>10?tmr--:0, tmr->time = MPI_Wtime(), sprintf(tmr->name, "bisect%04d", iter), tmr++); piter_accept(&pi, P, C, CLT, CLE); #if 0 { int k; for(k = 0; k < o.NTask; k ++) { MPI_Barrier(o.comm); int i; if(o.ThisTask != k) continue; printf("P (%d): PMin %d PMax %d P ", o.ThisTask, *(int*) Pmin, *(int*) Pmax ); for(i = 0; i < o.NTask - 1; i ++) { printf(" %d ", ((int*) P) [i]); } printf("\n"); printf("C (%d): ", o.ThisTask); for(i = 0; i < o.NTask + 1; i ++) { printf("%d ", C[i]); } printf("\n"); printf("CLT (%d): ", o.ThisTask); for(i = 0; i < o.NTask + 1; i ++) { printf("%d ", CLT[i]); } printf("\n"); printf("CLE (%d): ", o.ThisTask); for(i = 0; i < o.NTask + 1; i ++) { printf("%d ", CLE[i]); } printf("\n"); } } #endif done = piter_all_done(&pi); } piter_destroy(&pi); _histogram(P, o.NTask - 1, o.mybase, o.mynmemb, myCLT, myCLE, &d); (tmr->time = MPI_Wtime(), strcpy(tmr->name, "findP"), tmr++); /* transpose the matrix, could have been done with a new datatype */ /* MPI_Alltoall(myCLT, 1, MPI_TYPE_PTRDIFF, myT_CLT, 1, MPI_TYPE_PTRDIFF, o.comm); */ MPI_Alltoall(myCLT + 1, 1, MPI_TYPE_PTRDIFF, myT_CLT, 1, MPI_TYPE_PTRDIFF, o.comm); /*MPI_Alltoall(myCLE, 1, MPI_TYPE_PTRDIFF, myT_CLE, 1, MPI_TYPE_PTRDIFF, o.comm); */ MPI_Alltoall(myCLE + 1, 1, MPI_TYPE_PTRDIFF, myT_CLE, 1, MPI_TYPE_PTRDIFF, o.comm); (tmr->time = MPI_Wtime(), strcpy(tmr->name, "LayDistr"), tmr++); _solve_for_layout_mpi(o.NTask, C, myT_CLT, myT_CLE, myT_C, o.comm); myC[0] = 0; MPI_Alltoall(myT_C, 1, MPI_TYPE_PTRDIFF, myC + 1, 1, MPI_TYPE_PTRDIFF, o.comm); #if 0 for(i = 0;i < o.NTask; i ++) { int j; MPI_Barrier(o.comm); if(o.ThisTask != i) continue; for(j = 0; j < o.NTask + 1; j ++) { printf("%d %d %d, ", myCLT[j], myC[j], myCLE[j]); } printf("\n"); } #endif (tmr->time = MPI_Wtime(), strcpy(tmr->name, "LaySolve"), tmr++); for(i = 0; i < o.NTask; i ++) { SendCount[i] = myC[i + 1] - myC[i]; } MPI_Alltoall(SendCount, 1, MPI_INT, RecvCount, 1, MPI_INT, o.comm); SendDispl[0] = 0; RecvDispl[0] = 0; size_t totrecv = RecvCount[0]; for(i = 1; i < o.NTask; i ++) { SendDispl[i] = SendDispl[i - 1] + SendCount[i - 1]; RecvDispl[i] = RecvDispl[i - 1] + RecvCount[i - 1]; if(SendDispl[i] != myC[i]) { fprintf(stderr, "SendDispl error\n"); abort(); } totrecv += RecvCount[i]; } if(totrecv != o.myoutnmemb) { fprintf(stderr, "totrecv = %td, mismatch with %td\n", totrecv, o.myoutnmemb); abort(); } #if 0 { int k; for(k = 0; k < o.NTask; k ++) { MPI_Barrier(o.comm); if(o.ThisTask != k) continue; printf("P (%d): ", o.ThisTask); for(i = 0; i < o.NTask - 1; i ++) { printf("%d ", ((int*) P) [i]); } printf("\n"); printf("C (%d): ", o.ThisTask); for(i = 0; i < o.NTask + 1; i ++) { printf("%d ", C[i]); } printf("\n"); printf("CLT (%d): ", o.ThisTask); for(i = 0; i < o.NTask + 1; i ++) { printf("%d ", CLT[i]); } printf("\n"); printf("CLE (%d): ", o.ThisTask); for(i = 0; i < o.NTask + 1; i ++) { printf("%d ", CLE[i]); } printf("\n"); printf("MyC (%d): ", o.ThisTask); for(i = 0; i < o.NTask + 1; i ++) { printf("%d ", myC[i]); } printf("\n"); printf("MyCLT (%d): ", o.ThisTask); for(i = 0; i < o.NTask + 1; i ++) { printf("%d ", myCLT[i]); } printf("\n"); printf("MyCLE (%d): ", o.ThisTask); for(i = 0; i < o.NTask + 1; i ++) { printf("%d ", myCLE[i]); } printf("\n"); printf("Send Count(%d): ", o.ThisTask); for(i = 0; i < o.NTask; i ++) { printf("%d ", SendCount[i]); } printf("\n"); printf("My data(%d): ", o.ThisTask); for(i = 0; i < mynmemb; i ++) { printf("%d ", ((int*) mybase)[i]); } printf("\n"); } } #endif if(o.myoutbase == o.mybase) buffer = malloc(d.size * o.myoutnmemb); else buffer = o.myoutbase; MPI_Alltoallv_smart( o.mybase, SendCount, SendDispl, o.MPI_TYPE_DATA, buffer, RecvCount, RecvDispl, o.MPI_TYPE_DATA, o.comm); if(o.myoutbase == o.mybase) { memcpy(o.myoutbase, buffer, o.myoutnmemb * d.size); free(buffer); } MPI_Barrier(o.comm); (tmr->time = MPI_Wtime(), strcpy(tmr->name, "Exchange"), tmr++); radix_sort(o.myoutbase, o.myoutnmemb, d.size, d.radix, d.rsize, d.arg); MPI_Barrier(o.comm); (tmr->time = MPI_Wtime(), strcpy(tmr->name, "SecondSort"), tmr++); (tmr->time = MPI_Wtime(), strcpy(tmr->name, "END"), tmr++); return 0; }