void hypre_sort_and_create_inverse_map( HYPRE_Int *in, HYPRE_Int len, HYPRE_Int **out, hypre_UnorderedIntMap *inverse_map) { if (len == 0) { return; } #ifdef HYPRE_PROFILE hypre_profile_times[HYPRE_TIMER_ID_MERGE] -= hypre_MPI_Wtime(); #endif HYPRE_Int *temp = hypre_TAlloc(HYPRE_Int, len); hypre_merge_sort(in, temp, len, out); hypre_UnorderedIntMapCreate(inverse_map, 2*len, 16*hypre_NumThreads()); HYPRE_Int i; #pragma omp parallel for HYPRE_SMP_SCHEDULE for (i = 0; i < len; i++) { HYPRE_Int old = hypre_UnorderedIntMapPutIfAbsent(inverse_map, (*out)[i], i); assert(old == HYPRE_HOPSCOTCH_HASH_EMPTY); #ifdef DBG_MERGE_SORT if (hypre_UnorderedIntMapGet(inverse_map, (*out)[i]) != i) { fprintf(stderr, "%d %d\n", i, (*out)[i]); assert(false); } #endif } #ifdef DBG_MERGE_SORT std::unordered_map<HYPRE_Int, HYPRE_Int> inverse_map2(len); for (HYPRE_Int i = 0; i < len; ++i) { inverse_map2[(*out)[i]] = i; if (hypre_UnorderedIntMapGet(inverse_map, (*out)[i]) != i) { fprintf(stderr, "%d %d\n", i, (*out)[i]); assert(false); } } assert(hypre_UnorderedIntMapSize(inverse_map) == len); #endif if (*out == in) { hypre_TFree(temp); } else { hypre_TFree(in); } #ifdef HYPRE_PROFILE hypre_profile_times[HYPRE_TIMER_ID_MERGE] += hypre_MPI_Wtime(); #endif }
HYPRE_Real time_getWallclockSeconds(void) { #ifdef TIMER_USE_MPI return(hypre_MPI_Wtime()); #else #ifdef WIN32 clock_t cl=clock(); return(((HYPRE_Real) cl)/((HYPRE_Real) CLOCKS_PER_SEC)); #else struct tms usage; hypre_longint wallclock = times(&usage); return(((HYPRE_Real) wallclock)/((HYPRE_Real) sysconf(_SC_CLK_TCK))); #endif #endif }
HYPRE_Int main( HYPRE_Int argc, char *argv[] ) { HYPRE_Int num_procs, myid; HYPRE_Int verbose = 0, build_matrix_type = 1; HYPRE_Int index, matrix_arg_index, commpkg_flag=3; HYPRE_Int i, k, ierr=0; HYPRE_Int row_start, row_end; HYPRE_Int col_start, col_end, global_num_rows; HYPRE_Int *row_part, *col_part; char *csrfilename; HYPRE_Int preload = 0, loop = 0, loop2 = LOOP2; HYPRE_Int bcast_rows[2], *info; hypre_ParCSRMatrix *parcsr_A, *small_A; HYPRE_ParCSRMatrix A_temp, A_temp_small; hypre_CSRMatrix *A_CSR; hypre_ParCSRCommPkg *comm_pkg; HYPRE_Int nx, ny, nz; HYPRE_Int P, Q, R; HYPRE_Int p, q, r; HYPRE_Real values[4]; hypre_ParVector *x_new; hypre_ParVector *y_new, *y; HYPRE_Int *row_starts; HYPRE_Real ans; HYPRE_Real start_time, end_time, total_time, *loop_times; HYPRE_Real T_avg, T_std; HYPRE_Int noparmprint = 0; #if mydebug HYPRE_Int j, tmp_int; #endif /*----------------------------------------------------------- * Initialize MPI *-----------------------------------------------------------*/ hypre_MPI_Init(&argc, &argv); hypre_MPI_Comm_size(hypre_MPI_COMM_WORLD, &num_procs ); hypre_MPI_Comm_rank(hypre_MPI_COMM_WORLD, &myid ); /*----------------------------------------------------------- * default - is 27pt laplace *-----------------------------------------------------------*/ build_matrix_type = 2; matrix_arg_index = argc; /*----------------------------------------------------------- * Parse command line *-----------------------------------------------------------*/ index = 1; while ( index < argc) { if ( strcmp(argv[index], "-verbose") == 0 ) { index++; verbose = 1; } else if ( strcmp(argv[index], "-fromonecsrfile") == 0 ) { index++; build_matrix_type = 1; matrix_arg_index = index; /*this tells where the name is*/ } else if ( strcmp(argv[index], "-commpkg") == 0 ) { index++; commpkg_flag = atoi(argv[index++]); } else if ( strcmp(argv[index], "-laplacian") == 0 ) { index++; build_matrix_type = 2; matrix_arg_index = index; } else if ( strcmp(argv[index], "-27pt") == 0 ) { index++; build_matrix_type = 4; matrix_arg_index = index; } /* else if ( strcmp(argv[index], "-nopreload") == 0 ) { index++; preload = 0; } */ else if ( strcmp(argv[index], "-loop") == 0 ) { index++; loop = atoi(argv[index++]); } else if ( strcmp(argv[index], "-noparmprint") == 0 ) { index++; noparmprint = 1; } else { index++; /*hypre_printf("Warning: Unrecogized option '%s'\n",argv[index++] );*/ } } /*----------------------------------------------------------- * Setup the Matrix problem *-----------------------------------------------------------*/ /*----------------------------------------------------------- * Get actual partitioning- * read in an actual csr matrix. *-----------------------------------------------------------*/ if (build_matrix_type ==1) /*read in a csr matrix from one file */ { if (matrix_arg_index < argc) { csrfilename = argv[matrix_arg_index]; } else { hypre_printf("Error: No filename specified \n"); exit(1); } if (myid == 0) { /*hypre_printf(" FromFile: %s\n", csrfilename);*/ A_CSR = hypre_CSRMatrixRead(csrfilename); } row_part = NULL; col_part = NULL; parcsr_A = hypre_CSRMatrixToParCSRMatrix(hypre_MPI_COMM_WORLD, A_CSR, row_part, col_part); if (myid == 0) hypre_CSRMatrixDestroy(A_CSR); } else if (build_matrix_type ==2) { myBuildParLaplacian(argc, argv, matrix_arg_index, &A_temp, !noparmprint); parcsr_A = (hypre_ParCSRMatrix *) A_temp; } else if (build_matrix_type ==4) { myBuildParLaplacian27pt(argc, argv, matrix_arg_index, &A_temp, !noparmprint); parcsr_A = (hypre_ParCSRMatrix *) A_temp; } /*----------------------------------------------------------- * create a small problem so that timings are more accurate - * code gets run twice (small laplace) *-----------------------------------------------------------*/ /*this is no longer being used - preload = 0 is set at the beginning */ if (preload == 1) { /*hypre_printf("preload!\n");*/ values[1] = -1; values[2] = -1; values[3] = -1; values[0] = - 6.0 ; nx = 2; ny = num_procs; nz = 2; P = 1; Q = num_procs; R = 1; p = myid % P; q = (( myid - p)/P) % Q; r = ( myid - p - P*q)/( P*Q ); A_temp_small = (HYPRE_ParCSRMatrix) GenerateLaplacian(hypre_MPI_COMM_WORLD, nx, ny, nz, P, Q, R, p, q, r, values); small_A = (hypre_ParCSRMatrix *) A_temp_small; /*do comm packages*/ hypre_NewCommPkgCreate(small_A); hypre_NewCommPkgDestroy(small_A); hypre_MatvecCommPkgCreate(small_A); hypre_ParCSRMatrixDestroy(small_A); } /*----------------------------------------------------------- * Prepare for timing *-----------------------------------------------------------*/ /* instead of preloading, let's not time the first one if more than one*/ if (!loop) { loop = 1; /* and don't do any timings */ } else { loop +=1; if (loop < 2) loop = 2; } loop_times = hypre_CTAlloc(HYPRE_Real, loop); /******************************************************************************************/ hypre_MPI_Barrier(hypre_MPI_COMM_WORLD); if (commpkg_flag == 1 || commpkg_flag ==3 ) { /*----------------------------------------------------------- * Create new comm package *-----------------------------------------------------------*/ if (!myid) hypre_printf("********************************************************\n" ); /*do loop times*/ for (i=0; i< loop; i++) { loop_times[i] = 0.0; for (k=0; k< loop2; k++) { hypre_MPI_Barrier(hypre_MPI_COMM_WORLD); start_time = hypre_MPI_Wtime(); #if mpip_on if (i==(loop-1)) hypre_MPI_Pcontrol(1); #endif hypre_NewCommPkgCreate(parcsr_A); #if mpip_on if (i==(loop-1)) hypre_MPI_Pcontrol(0); #endif end_time = hypre_MPI_Wtime(); end_time = end_time - start_time; hypre_MPI_Allreduce(&end_time, &total_time, 1, HYPRE_MPI_REAL, hypre_MPI_MAX, hypre_MPI_COMM_WORLD); loop_times[i] += total_time; if ( !((i+1)== loop && (k+1) == loop2)) hypre_NewCommPkgDestroy(parcsr_A); }/*end of loop2 */ } /*end of loop*/ /* calculate the avg and std. */ if (loop > 1) { /* calculate the avg and std. */ stats_mo(loop_times, loop, &T_avg, &T_std); if (!myid) hypre_printf(" NewCommPkgCreate: AVG. wall clock time = %f seconds\n", T_avg); if (!myid) hypre_printf(" STD. for %d runs = %f\n", loop-1, T_std); if (!myid) hypre_printf(" (Note: avg./std. timings exclude run 0.)\n"); if (!myid) hypre_printf("********************************************************\n" ); for (i=0; i< loop; i++) { if (!myid) hypre_printf(" run %d = %f sec.\n", i, loop_times[i]); } if (!myid) hypre_printf("********************************************************\n" ); } else { if (!myid) hypre_printf("********************************************************\n" ); if (!myid) hypre_printf(" NewCommPkgCreate:\n"); if (!myid) hypre_printf(" run time = %f sec.\n", loop_times[0]); if (!myid) hypre_printf("********************************************************\n" ); } /*----------------------------------------------------------- * Verbose printing *-----------------------------------------------------------*/ /*some verification*/ global_num_rows = hypre_ParCSRMatrixGlobalNumRows(parcsr_A); if (verbose) { ierr = hypre_ParCSRMatrixGetLocalRange( parcsr_A, &row_start, &row_end , &col_start, &col_end ); comm_pkg = hypre_ParCSRMatrixCommPkg(parcsr_A); hypre_printf("myid = %i, my ACTUAL local range: [%i, %i]\n", myid, row_start, row_end); ierr = hypre_GetAssumedPartitionRowRange( myid, global_num_rows, &row_start, &row_end); hypre_printf("myid = %i, my assumed local range: [%i, %i]\n", myid, row_start, row_end); hypre_printf("myid = %d, num_recvs = %d\n", myid, hypre_ParCSRCommPkgNumRecvs(comm_pkg) ); #if mydebug for (i=0; i < hypre_ParCSRCommPkgNumRecvs(comm_pkg); i++) { hypre_printf("myid = %d, recv proc = %d, vec_starts = [%d : %d]\n", myid, hypre_ParCSRCommPkgRecvProcs(comm_pkg)[i], hypre_ParCSRCommPkgRecvVecStarts(comm_pkg)[i], hypre_ParCSRCommPkgRecvVecStarts(comm_pkg)[i+1]-1); } #endif hypre_printf("myid = %d, num_sends = %d\n", myid, hypre_ParCSRCommPkgNumSends(comm_pkg) ); #if mydebug for (i=0; i <hypre_ParCSRCommPkgNumSends(comm_pkg) ; i++) { tmp_int = hypre_ParCSRCommPkgSendMapStarts(comm_pkg)[i+1] - hypre_ParCSRCommPkgSendMapStarts(comm_pkg)[i]; index = hypre_ParCSRCommPkgSendMapStarts(comm_pkg)[i]; for (j=0; j< tmp_int; j++) { hypre_printf("myid = %d, send proc = %d, send element = %d\n",myid, hypre_ParCSRCommPkgSendProcs(comm_pkg)[i], hypre_ParCSRCommPkgSendMapElmts(comm_pkg)[index+j]); } } #endif } /*----------------------------------------------------------- * To verify correctness (if commpkg_flag = 3) *-----------------------------------------------------------*/ if (commpkg_flag == 3 ) { /*do a matvec - we are assuming a square matrix */ row_starts = hypre_ParCSRMatrixRowStarts(parcsr_A); x_new = hypre_ParVectorCreate(hypre_MPI_COMM_WORLD, global_num_rows, row_starts); hypre_ParVectorSetPartitioningOwner(x_new, 0); hypre_ParVectorInitialize(x_new); hypre_ParVectorSetRandomValues(x_new, 1); y_new = hypre_ParVectorCreate(hypre_MPI_COMM_WORLD, global_num_rows, row_starts); hypre_ParVectorSetPartitioningOwner(y_new, 0); hypre_ParVectorInitialize(y_new); hypre_ParVectorSetConstantValues(y_new, 0.0); /*y = 1.0*A*x+1.0*y */ hypre_ParCSRMatrixMatvec (1.0, parcsr_A, x_new, 1.0, y_new); } /*----------------------------------------------------------- * Clean up after MyComm *-----------------------------------------------------------*/ hypre_NewCommPkgDestroy(parcsr_A); } /******************************************************************************************/ /******************************************************************************************/ hypre_MPI_Barrier(hypre_MPI_COMM_WORLD); if (commpkg_flag > 1 ) { /*----------------------------------------------------------- * Set up standard comm package *-----------------------------------------------------------*/ bcast_rows[0] = 23; bcast_rows[1] = 1789; if (!myid) hypre_printf("********************************************************\n" ); /*do loop times*/ for (i=0; i< loop; i++) { loop_times[i] = 0.0; for (k=0; k< loop2; k++) { hypre_MPI_Barrier(hypre_MPI_COMM_WORLD); start_time = hypre_MPI_Wtime(); #if time_gather info = hypre_CTAlloc(HYPRE_Int, num_procs); hypre_MPI_Allgather(bcast_rows, 1, HYPRE_MPI_INT, info, 1, HYPRE_MPI_INT, hypre_MPI_COMM_WORLD); #endif hypre_MatvecCommPkgCreate(parcsr_A); end_time = hypre_MPI_Wtime(); end_time = end_time - start_time; hypre_MPI_Allreduce(&end_time, &total_time, 1, HYPRE_MPI_REAL, hypre_MPI_MAX, hypre_MPI_COMM_WORLD); loop_times[i] += total_time; if ( !((i+1)== loop && (k+1) == loop2)) hypre_MatvecCommPkgDestroy(hypre_ParCSRMatrixCommPkg(parcsr_A)); }/* end of loop 2*/ } /*end of loop*/ /* calculate the avg and std. */ if (loop > 1) { stats_mo(loop_times, loop, &T_avg, &T_std); if (!myid) hypre_printf("Current CommPkgCreate: AVG. wall clock time = %f seconds\n", T_avg); if (!myid) hypre_printf(" STD. for %d runs = %f\n", loop-1, T_std); if (!myid) hypre_printf(" (Note: avg./std. timings exclude run 0.)\n"); if (!myid) hypre_printf("********************************************************\n" ); for (i=0; i< loop; i++) { if (!myid) hypre_printf(" run %d = %f sec.\n", i, loop_times[i]); } if (!myid) hypre_printf("********************************************************\n" ); } else { if (!myid) hypre_printf("********************************************************\n" ); if (!myid) hypre_printf(" Current CommPkgCreate:\n"); if (!myid) hypre_printf(" run time = %f sec.\n", loop_times[0]); if (!myid) hypre_printf("********************************************************\n" ); } /*----------------------------------------------------------- * Verbose printing *-----------------------------------------------------------*/ /*some verification*/ if (verbose) { ierr = hypre_ParCSRMatrixGetLocalRange( parcsr_A, &row_start, &row_end , &col_start, &col_end ); comm_pkg = hypre_ParCSRMatrixCommPkg(parcsr_A); hypre_printf("myid = %i, std - my local range: [%i, %i]\n", myid, row_start, row_end); ierr = hypre_ParCSRMatrixGetLocalRange( parcsr_A, &row_start, &row_end , &col_start, &col_end ); hypre_printf("myid = %d, std - num_recvs = %d\n", myid, hypre_ParCSRCommPkgNumRecvs(comm_pkg) ); #if mydebug for (i=0; i < hypre_ParCSRCommPkgNumRecvs(comm_pkg); i++) { hypre_printf("myid = %d, std - recv proc = %d, vec_starts = [%d : %d]\n", myid, hypre_ParCSRCommPkgRecvProcs(comm_pkg)[i], hypre_ParCSRCommPkgRecvVecStarts(comm_pkg)[i], hypre_ParCSRCommPkgRecvVecStarts(comm_pkg)[i+1]-1); } #endif hypre_printf("myid = %d, std - num_sends = %d\n", myid, hypre_ParCSRCommPkgNumSends(comm_pkg)); #if mydebug for (i=0; i <hypre_ParCSRCommPkgNumSends(comm_pkg) ; i++) { tmp_int = hypre_ParCSRCommPkgSendMapStarts(comm_pkg)[i+1] - hypre_ParCSRCommPkgSendMapStarts(comm_pkg)[i]; index = hypre_ParCSRCommPkgSendMapStarts(comm_pkg)[i]; for (j=0; j< tmp_int; j++) { hypre_printf("myid = %d, std - send proc = %d, send element = %d\n",myid, hypre_ParCSRCommPkgSendProcs(comm_pkg)[i], hypre_ParCSRCommPkgSendMapElmts(comm_pkg)[index+j]); } } #endif } /*----------------------------------------------------------- * Verify correctness *-----------------------------------------------------------*/ if (commpkg_flag == 3 ) { global_num_rows = hypre_ParCSRMatrixGlobalNumRows(parcsr_A); row_starts = hypre_ParCSRMatrixRowStarts(parcsr_A); y = hypre_ParVectorCreate(hypre_MPI_COMM_WORLD, global_num_rows,row_starts); hypre_ParVectorSetPartitioningOwner(y, 0); hypre_ParVectorInitialize(y); hypre_ParVectorSetConstantValues(y, 0.0); hypre_ParCSRMatrixMatvec (1.0, parcsr_A, x_new, 1.0, y); } } /*----------------------------------------------------------- * Compare matvecs for both comm packages (3) *-----------------------------------------------------------*/ if (commpkg_flag == 3 ) { /*make sure that y and y_new are the same - now y_new should=0*/ hypre_ParVectorAxpy( -1.0, y, y_new ); hypre_ParVectorSetRandomValues(y, 1); ans = hypre_ParVectorInnerProd( y, y_new ); if (!myid) { if ( fabs(ans) > 1e-8 ) { hypre_printf("!!!!! WARNING !!!!! should be zero if correct = %6.10f\n", ans); } else { hypre_printf("Matvecs match ( should be zero = %6.10f )\n", ans); } } } /*----------------------------------------------------------- * Clean up *-----------------------------------------------------------*/ hypre_ParCSRMatrixDestroy(parcsr_A); /*this calls the standard comm package destroy - but we'll destroy ours separately until it is incorporated */ if (commpkg_flag == 3 ) { hypre_ParVectorDestroy(x_new); hypre_ParVectorDestroy(y); hypre_ParVectorDestroy(y_new); } hypre_MPI_Finalize(); return(ierr); }
void hypre_merge_sort(HYPRE_Int *in, HYPRE_Int *temp, HYPRE_Int len, HYPRE_Int **out) { if (0 == len) return; #ifdef HYPRE_PROFILE hypre_profile_times[HYPRE_TIMER_ID_MERGE] -= hypre_MPI_Wtime(); #endif #ifdef DBG_MERGE_SORT HYPRE_Int *dbg_buf = new HYPRE_Int[len]; std::copy(in, in + len, dbg_buf); std::sort(dbg_buf, dbg_buf + len); #endif // HYPRE_Int thread_private_len[hypre_NumThreads()]; // HYPRE_Int out_len = 0; #ifdef HYPRE_USING_OPENMP #pragma omp parallel #endif { HYPRE_Int num_threads = hypre_NumActiveThreads(); HYPRE_Int my_thread_num = hypre_GetThreadNum(); // thread-private sort HYPRE_Int i_per_thread = (len + num_threads - 1)/num_threads; HYPRE_Int i_begin = hypre_min(i_per_thread*my_thread_num, len); HYPRE_Int i_end = hypre_min(i_begin + i_per_thread, len); hypre_qsort0(in, i_begin, i_end - 1); // merge sorted sequences HYPRE_Int in_group_size; HYPRE_Int *in_buf = in; HYPRE_Int *out_buf = temp; for (in_group_size = 1; in_group_size < num_threads; in_group_size *= 2) { #ifdef HYPRE_USING_OPENMP #pragma omp barrier #endif // merge 2 in-groups into 1 out-group HYPRE_Int out_group_size = in_group_size*2; HYPRE_Int group_leader = my_thread_num/out_group_size*out_group_size; // HYPRE_Int group_sub_leader = hypre_min(group_leader + in_group_size, num_threads - 1); HYPRE_Int id_in_group = my_thread_num%out_group_size; HYPRE_Int num_threads_in_group = hypre_min(group_leader + out_group_size, num_threads) - group_leader; HYPRE_Int in_group1_begin = hypre_min(i_per_thread*group_leader, len); HYPRE_Int in_group1_end = hypre_min(in_group1_begin + i_per_thread*in_group_size, len); HYPRE_Int in_group2_begin = hypre_min(in_group1_begin + i_per_thread*in_group_size, len); HYPRE_Int in_group2_end = hypre_min(in_group2_begin + i_per_thread*in_group_size, len); hypre_parallel_merge( in_buf + in_group1_begin, in_buf + in_group1_end, in_buf + in_group2_begin, in_buf + in_group2_end, out_buf + in_group1_begin, num_threads_in_group, id_in_group); HYPRE_Int *temp = in_buf; in_buf = out_buf; out_buf = temp; } *out = in_buf; } /* omp parallel */ #ifdef DBG_MERGE_SORT assert(std::equal(*out, *out + len, dbg_buf)); delete[] dbg_buf; #endif #ifdef HYPRE_PROFILE hypre_profile_times[HYPRE_TIMER_ID_MERGE] += hypre_MPI_Wtime(); #endif }
/* y[offset:end] = alpha*A[offset:end,:]*x + beta*b[offset:end] */ HYPRE_Int hypre_CSRMatrixMatvecOutOfPlace( HYPRE_Complex alpha, hypre_CSRMatrix *A, hypre_Vector *x, HYPRE_Complex beta, hypre_Vector *b, hypre_Vector *y, HYPRE_Int offset ) { #ifdef HYPRE_PROFILE HYPRE_Real time_begin = hypre_MPI_Wtime(); #endif HYPRE_Complex *A_data = hypre_CSRMatrixData(A); HYPRE_Int *A_i = hypre_CSRMatrixI(A) + offset; HYPRE_Int *A_j = hypre_CSRMatrixJ(A); HYPRE_Int num_rows = hypre_CSRMatrixNumRows(A) - offset; HYPRE_Int num_cols = hypre_CSRMatrixNumCols(A); /*HYPRE_Int num_nnz = hypre_CSRMatrixNumNonzeros(A);*/ HYPRE_Int *A_rownnz = hypre_CSRMatrixRownnz(A); HYPRE_Int num_rownnz = hypre_CSRMatrixNumRownnz(A); HYPRE_Complex *x_data = hypre_VectorData(x); HYPRE_Complex *b_data = hypre_VectorData(b) + offset; HYPRE_Complex *y_data = hypre_VectorData(y); HYPRE_Int x_size = hypre_VectorSize(x); HYPRE_Int b_size = hypre_VectorSize(b) - offset; HYPRE_Int y_size = hypre_VectorSize(y) - offset; HYPRE_Int num_vectors = hypre_VectorNumVectors(x); HYPRE_Int idxstride_y = hypre_VectorIndexStride(y); HYPRE_Int vecstride_y = hypre_VectorVectorStride(y); /*HYPRE_Int idxstride_b = hypre_VectorIndexStride(b); HYPRE_Int vecstride_b = hypre_VectorVectorStride(b);*/ HYPRE_Int idxstride_x = hypre_VectorIndexStride(x); HYPRE_Int vecstride_x = hypre_VectorVectorStride(x); HYPRE_Complex temp, tempx; HYPRE_Int i, j, jj; HYPRE_Int m; HYPRE_Real xpar=0.7; HYPRE_Int ierr = 0; hypre_Vector *x_tmp = NULL; /*--------------------------------------------------------------------- * Check for size compatibility. Matvec returns ierr = 1 if * length of X doesn't equal the number of columns of A, * ierr = 2 if the length of Y doesn't equal the number of rows * of A, and ierr = 3 if both are true. * * Because temporary vectors are often used in Matvec, none of * these conditions terminates processing, and the ierr flag * is informational only. *--------------------------------------------------------------------*/ hypre_assert( num_vectors == hypre_VectorNumVectors(y) ); hypre_assert( num_vectors == hypre_VectorNumVectors(b) ); if (num_cols != x_size) ierr = 1; if (num_rows != y_size || num_rows != b_size) ierr = 2; if (num_cols != x_size && (num_rows != y_size || num_rows != b_size)) ierr = 3; /*----------------------------------------------------------------------- * Do (alpha == 0.0) computation - RDF: USE MACHINE EPS *-----------------------------------------------------------------------*/ if (alpha == 0.0) { #ifdef HYPRE_USING_OPENMP #pragma omp parallel for private(i) HYPRE_SMP_SCHEDULE #endif for (i = 0; i < num_rows*num_vectors; i++) y_data[i] *= beta; #ifdef HYPRE_PROFILE hypre_profile_times[HYPRE_TIMER_ID_MATVEC] += hypre_MPI_Wtime() - time_begin; #endif return ierr; } if (x == y) { x_tmp = hypre_SeqVectorCloneDeep(x); x_data = hypre_VectorData(x_tmp); } /*----------------------------------------------------------------------- * y = (beta/alpha)*y *-----------------------------------------------------------------------*/ temp = beta / alpha; /* use rownnz pointer to do the A*x multiplication when num_rownnz is smaller than num_rows */ if (num_rownnz < xpar*(num_rows) || num_vectors > 1) { /*----------------------------------------------------------------------- * y = (beta/alpha)*y *-----------------------------------------------------------------------*/ if (temp != 1.0) { if (temp == 0.0) { #ifdef HYPRE_USING_OPENMP #pragma omp parallel for private(i) HYPRE_SMP_SCHEDULE #endif for (i = 0; i < num_rows*num_vectors; i++) y_data[i] = 0.0; } else { #ifdef HYPRE_USING_OPENMP #pragma omp parallel for private(i) HYPRE_SMP_SCHEDULE #endif for (i = 0; i < num_rows*num_vectors; i++) y_data[i] = b_data[i]*temp; } } /*----------------------------------------------------------------- * y += A*x *-----------------------------------------------------------------*/ if (num_rownnz < xpar*(num_rows)) { #ifdef HYPRE_USING_OPENMP #pragma omp parallel for private(i,j,jj,m,tempx) HYPRE_SMP_SCHEDULE #endif for (i = 0; i < num_rownnz; i++) { m = A_rownnz[i]; /* * for (jj = A_i[m]; jj < A_i[m+1]; jj++) * { * j = A_j[jj]; * y_data[m] += A_data[jj] * x_data[j]; * } */ if ( num_vectors==1 ) { tempx = 0; for (jj = A_i[m]; jj < A_i[m+1]; jj++) tempx += A_data[jj] * x_data[A_j[jj]]; y_data[m] += tempx; } else for ( j=0; j<num_vectors; ++j ) { tempx = 0; for (jj = A_i[m]; jj < A_i[m+1]; jj++) tempx += A_data[jj] * x_data[ j*vecstride_x + A_j[jj]*idxstride_x ]; y_data[ j*vecstride_y + m*idxstride_y] += tempx; } } } else // num_vectors > 1 { #ifdef HYPRE_USING_OPENMP #pragma omp parallel for private(i,j,jj,tempx) HYPRE_SMP_SCHEDULE #endif for (i = 0; i < num_rows; i++) { for (j = 0; j < num_vectors; ++j) { tempx = 0; for (jj = A_i[i]; jj < A_i[i+1]; jj++) { tempx += A_data[jj] * x_data[ j*vecstride_x + A_j[jj]*idxstride_x ]; } y_data[ j*vecstride_y + i*idxstride_y ] += tempx; } } } /*----------------------------------------------------------------- * y = alpha*y *-----------------------------------------------------------------*/ if (alpha != 1.0) { #ifdef HYPRE_USING_OPENMP #pragma omp parallel for private(i) HYPRE_SMP_SCHEDULE #endif for (i = 0; i < num_rows*num_vectors; i++) y_data[i] *= alpha; } } else { // JSP: this is currently the only path optimized #ifdef HYPRE_USING_OPENMP #pragma omp parallel private(i,jj,tempx) #endif { HYPRE_Int iBegin = hypre_CSRMatrixGetLoadBalancedPartitionBegin(A); HYPRE_Int iEnd = hypre_CSRMatrixGetLoadBalancedPartitionEnd(A); hypre_assert(iBegin <= iEnd); hypre_assert(iBegin >= 0 && iBegin <= num_rows); hypre_assert(iEnd >= 0 && iEnd <= num_rows); if (0 == temp) { if (1 == alpha) // JSP: a common path { for (i = iBegin; i < iEnd; i++) { tempx = 0.0; for (jj = A_i[i]; jj < A_i[i+1]; jj++) { tempx += A_data[jj] * x_data[A_j[jj]]; } y_data[i] = tempx; } } // y = A*x else if (-1 == alpha) { for (i = iBegin; i < iEnd; i++) { tempx = 0.0; for (jj = A_i[i]; jj < A_i[i+1]; jj++) { tempx -= A_data[jj] * x_data[A_j[jj]]; } y_data[i] = tempx; } } // y = -A*x else { for (i = iBegin; i < iEnd; i++) { tempx = 0.0; for (jj = A_i[i]; jj < A_i[i+1]; jj++) { tempx += A_data[jj] * x_data[A_j[jj]]; } y_data[i] = alpha*tempx; } } // y = alpha*A*x } // temp == 0 else if (-1 == temp) // beta == -alpha { if (1 == alpha) // JSP: a common path { for (i = iBegin; i < iEnd; i++) { tempx = -b_data[i]; for (jj = A_i[i]; jj < A_i[i+1]; jj++) { tempx += A_data[jj] * x_data[A_j[jj]]; } y_data[i] = tempx; } } // y = A*x - y else if (-1 == alpha) // JSP: a common path { for (i = iBegin; i < iEnd; i++) { tempx = b_data[i]; for (jj = A_i[i]; jj < A_i[i+1]; jj++) { tempx -= A_data[jj] * x_data[A_j[jj]]; } y_data[i] = tempx; } } // y = -A*x + y else { for (i = iBegin; i < iEnd; i++) { tempx = -b_data[i]; for (jj = A_i[i]; jj < A_i[i+1]; jj++) { tempx += A_data[jj] * x_data[A_j[jj]]; } y_data[i] = alpha*tempx; } } // y = alpha*(A*x - y) } // temp == -1 else if (1 == temp) { if (1 == alpha) // JSP: a common path { for (i = iBegin; i < iEnd; i++) { tempx = b_data[i]; for (jj = A_i[i]; jj < A_i[i+1]; jj++) { tempx += A_data[jj] * x_data[A_j[jj]]; } y_data[i] = tempx; } } // y = A*x + y else if (-1 == alpha) { for (i = iBegin; i < iEnd; i++) { tempx = -b_data[i]; for (jj = A_i[i]; jj < A_i[i+1]; jj++) { tempx -= A_data[jj] * x_data[A_j[jj]]; } y_data[i] = tempx; } } // y = -A*x - y else { for (i = iBegin; i < iEnd; i++) { tempx = b_data[i]; for (jj = A_i[i]; jj < A_i[i+1]; jj++) { tempx += A_data[jj] * x_data[A_j[jj]]; } y_data[i] = alpha*tempx; } } // y = alpha*(A*x + y) } else { if (1 == alpha) // JSP: a common path { for (i = iBegin; i < iEnd; i++) { tempx = b_data[i]*temp; for (jj = A_i[i]; jj < A_i[i+1]; jj++) { tempx += A_data[jj] * x_data[A_j[jj]]; } y_data[i] = tempx; } } // y = A*x + temp*y else if (-1 == alpha) { for (i = iBegin; i < iEnd; i++) { tempx = -b_data[i]*temp; for (jj = A_i[i]; jj < A_i[i+1]; jj++) { tempx -= A_data[jj] * x_data[A_j[jj]]; } y_data[i] = tempx; } } // y = -A*x - temp*y else { for (i = iBegin; i < iEnd; i++) { tempx = b_data[i]*temp; for (jj = A_i[i]; jj < A_i[i+1]; jj++) { tempx += A_data[jj] * x_data[A_j[jj]]; } y_data[i] = alpha*tempx; } } // y = alpha*(A*x + temp*y) } // temp != 0 && temp != -1 && temp != 1 } // omp parallel } if (x == y) hypre_SeqVectorDestroy(x_tmp); #ifdef HYPRE_PROFILE hypre_profile_times[HYPRE_TIMER_ID_MATVEC] += hypre_MPI_Wtime() - time_begin; #endif return ierr; }
HYPRE_Int main(HYPRE_Int argc, char *argv[]) { HYPRE_Int mype, npes; HYPRE_Int symmetric; HYPRE_Int num_runs; Matrix *A; ParaSails *ps; FILE *file; HYPRE_Int n, beg_row, end_row; HYPRE_Real time0, time1; HYPRE_Real setup_time, solve_time; HYPRE_Real max_setup_time, max_solve_time; HYPRE_Real cost; HYPRE_Real *x, *b; HYPRE_Int i, niter; HYPRE_Real thresh; HYPRE_Real threshg; HYPRE_Int nlevels; HYPRE_Real filter; HYPRE_Real loadbal; hypre_MPI_Init(&argc, &argv); hypre_MPI_Comm_rank(hypre_MPI_COMM_WORLD, &mype); hypre_MPI_Comm_size(hypre_MPI_COMM_WORLD, &npes); /* Read number of rows in matrix */ symmetric = atoi(argv[1]); num_runs = atoi(argv[2]); file = fopen(argv[3], "r"); assert(file != NULL); #ifdef EMSOLVE hypre_fscanf(file, "%*d %d\n", &n); #else hypre_fscanf(file, "%d\n", &n); #endif fclose(file); assert(n >= npes); beg_row = (HYPRE_Int) ((HYPRE_Real)(mype*n) / npes) + 1; /* assumes 1-based */ end_row = (HYPRE_Int) ((HYPRE_Real)((mype+1)* n) / npes); if (mype == 0) assert(beg_row == 1); if (mype == npes-1) assert(end_row == n); #ifdef EMSOLVE beg_row--; end_row--; #endif x = (HYPRE_Real *) malloc((end_row-beg_row+1) * sizeof(HYPRE_Real)); b = (HYPRE_Real *) malloc((end_row-beg_row+1) * sizeof(HYPRE_Real)); A = MatrixCreate(hypre_MPI_COMM_WORLD, beg_row, end_row); MatrixRead(A, argv[3]); if (mype == 0) hypre_printf("%s\n", argv[3]); /* MatrixPrint(A, "A"); */ /* Right-hand side */ if (argc > 4) { RhsRead(b, A, argv[4]); if (mype == 0) hypre_printf("Using rhs from %s\n", argv[4]); } else { for (i=0; i<end_row-beg_row+1; i++) b[i] = (HYPRE_Real) (2*rand()) / (HYPRE_Real) RAND_MAX - 1.0; } while (num_runs && num_runs >= -1) { /* Initial guess */ for (i=0; i<end_row-beg_row+1; i++) x[i] = 0.0; if (num_runs == -1) { thresh = 0.0; nlevels = 0; filter = 0.0; loadbal = 0.0; } else { if (mype == 0) { #if PARASAILS_EXT_PATTERN hypre_printf("Enter parameters threshg, thresh, nlevels, " "filter, beta:\n"); fflush(stdout); hypre_scanf("%lf %lf %d %lf %lf", &threshg, &thresh, &nlevels, &filter, &loadbal); #else hypre_printf("Enter parameters thresh, nlevels, " "filter, beta:\n"); fflush(stdout); hypre_scanf("%lf %d %lf %lf", &thresh, &nlevels, &filter, &loadbal); #endif } hypre_MPI_Bcast(&threshg, 1, hypre_MPI_DOUBLE, 0, hypre_MPI_COMM_WORLD); hypre_MPI_Bcast(&thresh, 1, hypre_MPI_DOUBLE, 0, hypre_MPI_COMM_WORLD); hypre_MPI_Bcast(&nlevels, 1, HYPRE_MPI_INT, 0, hypre_MPI_COMM_WORLD); hypre_MPI_Bcast(&filter, 1, hypre_MPI_DOUBLE, 0, hypre_MPI_COMM_WORLD); hypre_MPI_Bcast(&loadbal, 1, hypre_MPI_DOUBLE, 0, hypre_MPI_COMM_WORLD); if (nlevels < 0) break; } /************** * Setup phase **************/ hypre_MPI_Barrier(hypre_MPI_COMM_WORLD); time0 = hypre_MPI_Wtime(); ps = ParaSailsCreate(hypre_MPI_COMM_WORLD, beg_row, end_row, symmetric); ps->loadbal_beta = loadbal; #if PARASAILS_EXT_PATTERN ParaSailsSetupPatternExt(ps, A, threshg, thresh, nlevels); #else ParaSailsSetupPattern(ps, A, thresh, nlevels); #endif time1 = hypre_MPI_Wtime(); setup_time = time1-time0; cost = ParaSailsStatsPattern(ps, A); if (cost > 5.e11) { hypre_printf("Aborting setup and solve due to high cost.\n"); goto cleanup; } hypre_MPI_Barrier(hypre_MPI_COMM_WORLD); time0 = hypre_MPI_Wtime(); err = ParaSailsSetupValues(ps, A, filter); if (err != 0) { hypre_printf("ParaSailsSetupValues returned error.\n"); goto cleanup; } time1 = hypre_MPI_Wtime(); setup_time += (time1-time0); ParaSailsStatsValues(ps, A); if (!strncmp(argv[3], "testpsmat", 8)) MatrixPrint(ps->M, "M"); #if 0 if (mype == 0) hypre_printf("SETTING UP VALUES AGAIN WITH FILTERED PATTERN\n"); ps->loadbal_beta = 0; ParaSailsSetupValues(ps, A, 0.0); #endif /***************** * Solution phase *****************/ niter = 3000; if (MatrixNnz(ps->M) == n) /* if diagonal preconditioner */ niter = 5000; hypre_MPI_Barrier(hypre_MPI_COMM_WORLD); time0 = hypre_MPI_Wtime(); if (symmetric == 1) PCG_ParaSails(A, ps, b, x, 1.e-8, niter); else FGMRES_ParaSails(A, ps, b, x, 50, 1.e-8, niter); time1 = hypre_MPI_Wtime(); solve_time = time1-time0; hypre_MPI_Reduce(&setup_time, &max_setup_time, 1, hypre_MPI_DOUBLE, hypre_MPI_MAX, 0, hypre_MPI_COMM_WORLD); hypre_MPI_Reduce(&solve_time, &max_solve_time, 1, hypre_MPI_DOUBLE, hypre_MPI_MAX, 0, hypre_MPI_COMM_WORLD); if (mype == 0) { hypre_printf("**********************************************\n"); hypre_printf("*** Setup Solve Total\n"); hypre_printf("III %8.1f %8.1f %8.1f\n", max_setup_time, max_solve_time, max_setup_time+max_solve_time); hypre_printf("**********************************************\n"); } cleanup: ParaSailsDestroy(ps); num_runs--; } free(x); free(b); MatrixDestroy(A); hypre_MPI_Finalize(); return 0; }
void Euclid_dhApply(Euclid_dh ctx, double *rhs, double *lhs) { START_FUNC_DH double *rhs_, *lhs_; double t1, t2; t1 = hypre_MPI_Wtime(); /* default settings; for everything except PILU */ ctx->from = 0; ctx->to = ctx->m; /* case 1: no preconditioning */ if (! strcmp(ctx->algo_ilu, "none") || ! strcmp(ctx->algo_par, "none")) { HYPRE_Int i, m = ctx->m; for (i=0; i<m; ++i) lhs[i] = rhs[i]; goto END_OF_FUNCTION; } /*---------------------------------------------------------------- * permute and scale rhs vector *----------------------------------------------------------------*/ /* permute rhs vector */ if (ctx->sg != NULL) { /* hypre_printf("@@@@@@@@@@@@@@@@@ permute_vec_n2o_private\n"); */ permute_vec_n2o_private(ctx, rhs, lhs); CHECK_V_ERROR; rhs_ = lhs; lhs_ = ctx->work2; } else { rhs_ = rhs; lhs_ = lhs; } /* scale rhs vector */ if (ctx->isScaled) { /* hypre_printf("@@@@@@@@@@@@@@@@@ scale_rhs_private\n"); */ scale_rhs_private(ctx, rhs_); CHECK_V_ERROR; } /* note: rhs_ is permuted, scaled; the input, "rhs" vector has not been disturbed. */ /*---------------------------------------------------------------- * big switch to choose the appropriate triangular solve *----------------------------------------------------------------*/ /* sequential and mpi block jacobi cases */ if (np_dh == 1 || ! strcmp(ctx->algo_par, "bj") ) { Factor_dhSolveSeq(rhs_, lhs_, ctx); CHECK_V_ERROR; } /* pilu case */ else { Factor_dhSolve(rhs_, lhs_, ctx); CHECK_V_ERROR; } /*---------------------------------------------------------------- * unpermute lhs vector * (note: don't need to unscale, because we were clever) *----------------------------------------------------------------*/ if (ctx->sg != NULL) { permute_vec_o2n_private(ctx, lhs_, lhs); CHECK_V_ERROR; } END_OF_FUNCTION: ; t2 = hypre_MPI_Wtime(); /* collective timing for triangular solves */ ctx->timing[TRI_SOLVE_T] += (t2 - t1); /* collective timing for setup+krylov+triSolves (intent is to time linear solve, but this is at best probelematical!) */ ctx->timing[TOTAL_SOLVE_TEMP_T] = t2 - ctx->timing[SOLVE_START_T]; /* total triangular solve count */ ctx->its += 1; ctx->itsTotal += 1; END_FUNC_DH }
static void matvec_timing(MPI_Comm comm, Matrix *mat) { HYPRE_Real time0, time1; HYPRE_Real trial1, trial2, trial3, trial4, trial5, trial6; HYPRE_Real *temp1, *temp2; HYPRE_Int i, mype; HYPRE_Int n = mat->end_row - mat->beg_row + 1; temp1 = (HYPRE_Real *) calloc(n, sizeof(HYPRE_Real)); temp2 = (HYPRE_Real *) calloc(n, sizeof(HYPRE_Real)); /* warm-up */ hypre_MPI_Barrier(comm); for (i=0; i<100; i++) MatrixMatvec(mat, temp1, temp2); hypre_MPI_Barrier(comm); time0 = hypre_MPI_Wtime(); for (i=0; i<100; i++) MatrixMatvec(mat, temp1, temp2); hypre_MPI_Barrier(comm); time1 = hypre_MPI_Wtime(); trial1 = time1-time0; hypre_MPI_Barrier(comm); time0 = hypre_MPI_Wtime(); for (i=0; i<100; i++) MatrixMatvec(mat, temp1, temp2); hypre_MPI_Barrier(comm); time1 = hypre_MPI_Wtime(); trial2 = time1-time0; hypre_MPI_Barrier(comm); time0 = hypre_MPI_Wtime(); for (i=0; i<100; i++) MatrixMatvec(mat, temp1, temp2); hypre_MPI_Barrier(comm); time1 = hypre_MPI_Wtime(); trial3 = time1-time0; hypre_MPI_Barrier(comm); time0 = hypre_MPI_Wtime(); for (i=0; i<100; i++) MatrixMatvecSerial(mat, temp1, temp2); hypre_MPI_Barrier(comm); time1 = hypre_MPI_Wtime(); trial4 = time1-time0; hypre_MPI_Barrier(comm); time0 = hypre_MPI_Wtime(); for (i=0; i<100; i++) MatrixMatvecSerial(mat, temp1, temp2); hypre_MPI_Barrier(comm); time1 = hypre_MPI_Wtime(); trial5 = time1-time0; hypre_MPI_Barrier(comm); time0 = hypre_MPI_Wtime(); for (i=0; i<100; i++) MatrixMatvecSerial(mat, temp1, temp2); hypre_MPI_Barrier(comm); time1 = hypre_MPI_Wtime(); trial6 = time1-time0; hypre_MPI_Comm_rank(comm, &mype); if (mype == 0) hypre_printf("Timings: %f %f %f Serial: %f %f %f\n", trial1, trial2, trial3, trial4, trial5, trial6); fflush(stdout); /* this is all we wanted, so don't waste any more cycles */ exit(0); }