hypre_ParCSRBooleanMatrix * hypre_CSRBooleanMatrixToParCSRBooleanMatrix ( MPI_Comm comm, hypre_CSRBooleanMatrix *A, HYPRE_Int *row_starts, HYPRE_Int *col_starts ) { HYPRE_Int global_data[2]; HYPRE_Int global_num_rows; HYPRE_Int global_num_cols; HYPRE_Int *local_num_rows; HYPRE_Int num_procs, my_id; HYPRE_Int *local_num_nonzeros; HYPRE_Int num_nonzeros; HYPRE_Int *a_i; HYPRE_Int *a_j; hypre_CSRBooleanMatrix *local_A; hypre_MPI_Request *requests; hypre_MPI_Status *status, status0; hypre_MPI_Datatype *csr_matrix_datatypes; hypre_ParCSRBooleanMatrix *par_matrix; HYPRE_Int first_col_diag; HYPRE_Int last_col_diag; HYPRE_Int i, j, ind; hypre_MPI_Comm_rank(comm, &my_id); hypre_MPI_Comm_size(comm, &num_procs); if (my_id == 0) { global_data[0] = hypre_CSRBooleanMatrix_Get_NRows(A); global_data[1] = hypre_CSRBooleanMatrix_Get_NCols(A); a_i = hypre_CSRBooleanMatrix_Get_I(A); a_j = hypre_CSRBooleanMatrix_Get_J(A); } hypre_MPI_Bcast(global_data,2,HYPRE_MPI_INT,0,comm); global_num_rows = global_data[0]; global_num_cols = global_data[1]; local_num_rows = hypre_CTAlloc(HYPRE_Int, num_procs); csr_matrix_datatypes = hypre_CTAlloc(hypre_MPI_Datatype, num_procs); par_matrix = hypre_ParCSRBooleanMatrixCreate (comm, global_num_rows, global_num_cols,row_starts,col_starts,0,0,0); row_starts = hypre_ParCSRBooleanMatrix_Get_RowStarts(par_matrix); col_starts = hypre_ParCSRBooleanMatrix_Get_ColStarts(par_matrix); for (i=0; i < num_procs; i++) local_num_rows[i] = row_starts[i+1] - row_starts[i]; if (my_id == 0) { local_num_nonzeros = hypre_CTAlloc(HYPRE_Int, num_procs); for (i=0; i < num_procs-1; i++) local_num_nonzeros[i] = a_i[row_starts[i+1]] - a_i[row_starts[i]]; local_num_nonzeros[num_procs-1] = a_i[global_num_rows] - a_i[row_starts[num_procs-1]]; } hypre_MPI_Scatter(local_num_nonzeros,1,HYPRE_MPI_INT,&num_nonzeros,1,HYPRE_MPI_INT,0,comm); if (my_id == 0) num_nonzeros = local_num_nonzeros[0]; local_A = hypre_CSRBooleanMatrixCreate(local_num_rows[my_id], global_num_cols, num_nonzeros); if (my_id == 0) { requests = hypre_CTAlloc (hypre_MPI_Request, num_procs-1); status = hypre_CTAlloc(hypre_MPI_Status, num_procs-1); j=0; for (i=1; i < num_procs; i++) { ind = a_i[row_starts[i]]; hypre_BuildCSRBooleanMatrixMPIDataType(local_num_nonzeros[i], local_num_rows[i], &a_i[row_starts[i]], &a_j[ind], &csr_matrix_datatypes[i]); hypre_MPI_Isend(hypre_MPI_BOTTOM, 1, csr_matrix_datatypes[i], i, 0, comm, &requests[j++]); hypre_MPI_Type_free(&csr_matrix_datatypes[i]); } hypre_CSRBooleanMatrix_Get_I(local_A) = a_i; hypre_CSRBooleanMatrix_Get_J(local_A) = a_j; hypre_MPI_Waitall(num_procs-1,requests,status); hypre_TFree(requests); hypre_TFree(status); hypre_TFree(local_num_nonzeros); } else { hypre_CSRBooleanMatrixInitialize(local_A); hypre_BuildCSRBooleanMatrixMPIDataType(num_nonzeros, local_num_rows[my_id], hypre_CSRBooleanMatrix_Get_I(local_A), hypre_CSRBooleanMatrix_Get_J(local_A), csr_matrix_datatypes); hypre_MPI_Recv(hypre_MPI_BOTTOM,1,csr_matrix_datatypes[0],0,0,comm,&status0); hypre_MPI_Type_free(csr_matrix_datatypes); } first_col_diag = col_starts[my_id]; last_col_diag = col_starts[my_id+1]-1; BooleanGenerateDiagAndOffd(local_A, par_matrix, first_col_diag, last_col_diag); /* set pointers back to NULL before destroying */ if (my_id == 0) { hypre_CSRBooleanMatrix_Get_I(local_A) = NULL; hypre_CSRBooleanMatrix_Get_J(local_A) = NULL; } hypre_CSRBooleanMatrixDestroy(local_A); hypre_TFree(local_num_rows); hypre_TFree(csr_matrix_datatypes); return par_matrix; }
hypre_ParVector * hypre_VectorToParVector (MPI_Comm comm, hypre_Vector *v, HYPRE_Int *vec_starts) { HYPRE_Int global_size; HYPRE_Int local_size; HYPRE_Int num_vectors; HYPRE_Int num_procs, my_id; HYPRE_Int global_vecstride, vecstride, idxstride; hypre_ParVector *par_vector; hypre_Vector *local_vector; double *v_data; double *local_data; hypre_MPI_Request *requests; hypre_MPI_Status *status, status0; HYPRE_Int i, j, k, p; hypre_MPI_Comm_size(comm,&num_procs); hypre_MPI_Comm_rank(comm,&my_id); if (my_id == 0) { global_size = hypre_VectorSize(v); v_data = hypre_VectorData(v); num_vectors = hypre_VectorNumVectors(v); /* for multivectors */ global_vecstride = hypre_VectorVectorStride(v); } hypre_MPI_Bcast(&global_size,1,HYPRE_MPI_INT,0,comm); hypre_MPI_Bcast(&num_vectors,1,HYPRE_MPI_INT,0,comm); hypre_MPI_Bcast(&global_vecstride,1,HYPRE_MPI_INT,0,comm); if ( num_vectors==1 ) par_vector = hypre_ParVectorCreate(comm, global_size, vec_starts); else par_vector = hypre_ParMultiVectorCreate(comm, global_size, vec_starts, num_vectors); vec_starts = hypre_ParVectorPartitioning(par_vector); local_size = vec_starts[my_id+1] - vec_starts[my_id]; hypre_ParVectorInitialize(par_vector); local_vector = hypre_ParVectorLocalVector(par_vector); local_data = hypre_VectorData(local_vector); vecstride = hypre_VectorVectorStride(local_vector); idxstride = hypre_VectorIndexStride(local_vector); hypre_assert( idxstride==1 ); /* <<< so far only the only implemented multivector StorageMethod is 0 <<< */ if (my_id == 0) { requests = hypre_CTAlloc(hypre_MPI_Request,num_vectors*(num_procs-1)); status = hypre_CTAlloc(hypre_MPI_Status,num_vectors*(num_procs-1)); k = 0; for ( p=1; p<num_procs; p++) for ( j=0; j<num_vectors; ++j ) { hypre_MPI_Isend( &v_data[vec_starts[p]]+j*global_vecstride, (vec_starts[p+1]-vec_starts[p]), hypre_MPI_DOUBLE, p, 0, comm, &requests[k++] ); } if ( num_vectors==1 ) { for (i=0; i < local_size; i++) local_data[i] = v_data[i]; } else for ( j=0; j<num_vectors; ++j ) { for (i=0; i < local_size; i++) local_data[i+j*vecstride] = v_data[i+j*global_vecstride]; } hypre_MPI_Waitall(num_procs-1,requests, status); hypre_TFree(requests); hypre_TFree(status); } else { for ( j=0; j<num_vectors; ++j ) hypre_MPI_Recv( local_data+j*vecstride, local_size, hypre_MPI_DOUBLE, 0, 0, comm,&status0 ); } return par_vector; }
void hypre_ParChordMatrix_RowStarts( hypre_ParChordMatrix *Ac, MPI_Comm comm, HYPRE_Int ** row_starts, HYPRE_Int * global_num_cols ) /* This function computes the ParCSRMatrix-style row_starts from a chord matrix. It requires the the idofs of the chord matrix be partitioned among processors, so their numbering is monotonic with the processor number; see below. The algorithm: each proc. p knows its min & max global row & col numbers. Mins are first_index_rdof[p], first_index_idof[p] ***IF*** these were in proper order (see below), first_index_rdof[p] is row_starts[p]. Add num_rdofs-1 to get the max, i.e. add num_rdofs to get row_starts[p+1] (IF the processors are ordered thus). Compute these, then broadcast to the other processors to form row_starts. (We also could get global_num_rows by an AllReduce num_idofs.) We get global_num_cols by taking the min and max over processors of the min and max col no.s on each processor. If the chord matrix is not ordered so the above will work, then we would need to to completely move matrices around sometimes, a very expensive operation. The problem is that the chord matrix format makes no assumptions about processor order, but the ParCSR format assumes that p<q => (local row numbers of p) < (local row numbers of q) Maybe instead I could change the global numbering scheme as part of this conversion. A closely related ordering-type problem to watch for: row_starts must be a partition for a ParCSRMatrix. In a ChordMatrix, the struct itself makes no guarantees, but Panayot said, in essence, that row_starts will be a partition. col_starts should be NULL; later we shall let the Create function compute one. */ { HYPRE_Int * fis_idof = hypre_ParChordMatrixFirstindexIdof(Ac); HYPRE_Int * fis_rdof = hypre_ParChordMatrixFirstindexRdof(Ac); HYPRE_Int my_id, num_procs; HYPRE_Int num_idofs = hypre_ParChordMatrixNumIdofs(Ac); HYPRE_Int num_rdofs = hypre_ParChordMatrixNumRdofs(Ac); HYPRE_Int min_rdof, max_rdof, global_min_rdof, global_max_rdof; HYPRE_Int p, lens[2], lastlens[2]; hypre_MPI_Status *status; hypre_MPI_Request *request; hypre_MPI_Comm_rank(comm, &my_id); hypre_MPI_Comm_size(comm, &num_procs); request = hypre_CTAlloc(hypre_MPI_Request, 1 ); status = hypre_CTAlloc(hypre_MPI_Status, 1 ); min_rdof = fis_rdof[my_id]; max_rdof = min_rdof + num_rdofs; lens[0] = num_idofs; lens[1] = num_rdofs; /* row_starts (except last value */ *row_starts = hypre_CTAlloc( HYPRE_Int, num_procs+1 ); for ( p=0; p<num_procs; ++p ) { (*row_starts)[p] = fis_idof[p]; } /* check that ordering and partitioning of rows is as expected (much is missing, and even then not perfect)... */ if ( my_id<num_procs-1 ) hypre_MPI_Isend( lens, 2, HYPRE_MPI_INT, my_id+1, 0, comm, request ); if ( my_id>0 ) hypre_MPI_Recv( lastlens, 2, HYPRE_MPI_INT, my_id-1, 0, comm, status ); if ( my_id<num_procs-1 ) hypre_MPI_Waitall( 1, request, status); if ( my_id>0 ) hypre_assert( (*row_starts)[my_id] == (*row_starts)[my_id-1] + lastlens[0] ); hypre_TFree( request ); hypre_TFree( status ); /* Get the upper bound for all the rows */ hypre_MPI_Bcast( lens, 2, HYPRE_MPI_INT, num_procs-1, comm ); (*row_starts)[num_procs] = (*row_starts)[num_procs-1] + lens[0]; /* Global number of columns */ /* hypre_MPI_Allreduce( &num_rdofs, global_num_cols, 1, HYPRE_MPI_INT, hypre_MPI_SUM, comm );*/ hypre_MPI_Allreduce( &min_rdof, &global_min_rdof, 1, HYPRE_MPI_INT, hypre_MPI_MIN, comm ); hypre_MPI_Allreduce( &max_rdof, &global_max_rdof, 1, HYPRE_MPI_INT, hypre_MPI_MAX, comm ); *global_num_cols = global_max_rdof - global_min_rdof; }
HYPRE_Int main(HYPRE_Int argc, char *argv[]) { HYPRE_Int mype, npes; HYPRE_Int symmetric; HYPRE_Int num_runs; Matrix *A; ParaSails *ps; FILE *file; HYPRE_Int n, beg_row, end_row; HYPRE_Real time0, time1; HYPRE_Real setup_time, solve_time; HYPRE_Real max_setup_time, max_solve_time; HYPRE_Real cost; HYPRE_Real *x, *b; HYPRE_Int i, niter; HYPRE_Real thresh; HYPRE_Real threshg; HYPRE_Int nlevels; HYPRE_Real filter; HYPRE_Real loadbal; hypre_MPI_Init(&argc, &argv); hypre_MPI_Comm_rank(hypre_MPI_COMM_WORLD, &mype); hypre_MPI_Comm_size(hypre_MPI_COMM_WORLD, &npes); /* Read number of rows in matrix */ symmetric = atoi(argv[1]); num_runs = atoi(argv[2]); file = fopen(argv[3], "r"); assert(file != NULL); #ifdef EMSOLVE hypre_fscanf(file, "%*d %d\n", &n); #else hypre_fscanf(file, "%d\n", &n); #endif fclose(file); assert(n >= npes); beg_row = (HYPRE_Int) ((HYPRE_Real)(mype*n) / npes) + 1; /* assumes 1-based */ end_row = (HYPRE_Int) ((HYPRE_Real)((mype+1)* n) / npes); if (mype == 0) assert(beg_row == 1); if (mype == npes-1) assert(end_row == n); #ifdef EMSOLVE beg_row--; end_row--; #endif x = (HYPRE_Real *) malloc((end_row-beg_row+1) * sizeof(HYPRE_Real)); b = (HYPRE_Real *) malloc((end_row-beg_row+1) * sizeof(HYPRE_Real)); A = MatrixCreate(hypre_MPI_COMM_WORLD, beg_row, end_row); MatrixRead(A, argv[3]); if (mype == 0) hypre_printf("%s\n", argv[3]); /* MatrixPrint(A, "A"); */ /* Right-hand side */ if (argc > 4) { RhsRead(b, A, argv[4]); if (mype == 0) hypre_printf("Using rhs from %s\n", argv[4]); } else { for (i=0; i<end_row-beg_row+1; i++) b[i] = (HYPRE_Real) (2*rand()) / (HYPRE_Real) RAND_MAX - 1.0; } while (num_runs && num_runs >= -1) { /* Initial guess */ for (i=0; i<end_row-beg_row+1; i++) x[i] = 0.0; if (num_runs == -1) { thresh = 0.0; nlevels = 0; filter = 0.0; loadbal = 0.0; } else { if (mype == 0) { #if PARASAILS_EXT_PATTERN hypre_printf("Enter parameters threshg, thresh, nlevels, " "filter, beta:\n"); fflush(stdout); hypre_scanf("%lf %lf %d %lf %lf", &threshg, &thresh, &nlevels, &filter, &loadbal); #else hypre_printf("Enter parameters thresh, nlevels, " "filter, beta:\n"); fflush(stdout); hypre_scanf("%lf %d %lf %lf", &thresh, &nlevels, &filter, &loadbal); #endif } hypre_MPI_Bcast(&threshg, 1, hypre_MPI_DOUBLE, 0, hypre_MPI_COMM_WORLD); hypre_MPI_Bcast(&thresh, 1, hypre_MPI_DOUBLE, 0, hypre_MPI_COMM_WORLD); hypre_MPI_Bcast(&nlevels, 1, HYPRE_MPI_INT, 0, hypre_MPI_COMM_WORLD); hypre_MPI_Bcast(&filter, 1, hypre_MPI_DOUBLE, 0, hypre_MPI_COMM_WORLD); hypre_MPI_Bcast(&loadbal, 1, hypre_MPI_DOUBLE, 0, hypre_MPI_COMM_WORLD); if (nlevels < 0) break; } /************** * Setup phase **************/ hypre_MPI_Barrier(hypre_MPI_COMM_WORLD); time0 = hypre_MPI_Wtime(); ps = ParaSailsCreate(hypre_MPI_COMM_WORLD, beg_row, end_row, symmetric); ps->loadbal_beta = loadbal; #if PARASAILS_EXT_PATTERN ParaSailsSetupPatternExt(ps, A, threshg, thresh, nlevels); #else ParaSailsSetupPattern(ps, A, thresh, nlevels); #endif time1 = hypre_MPI_Wtime(); setup_time = time1-time0; cost = ParaSailsStatsPattern(ps, A); if (cost > 5.e11) { hypre_printf("Aborting setup and solve due to high cost.\n"); goto cleanup; } hypre_MPI_Barrier(hypre_MPI_COMM_WORLD); time0 = hypre_MPI_Wtime(); err = ParaSailsSetupValues(ps, A, filter); if (err != 0) { hypre_printf("ParaSailsSetupValues returned error.\n"); goto cleanup; } time1 = hypre_MPI_Wtime(); setup_time += (time1-time0); ParaSailsStatsValues(ps, A); if (!strncmp(argv[3], "testpsmat", 8)) MatrixPrint(ps->M, "M"); #if 0 if (mype == 0) hypre_printf("SETTING UP VALUES AGAIN WITH FILTERED PATTERN\n"); ps->loadbal_beta = 0; ParaSailsSetupValues(ps, A, 0.0); #endif /***************** * Solution phase *****************/ niter = 3000; if (MatrixNnz(ps->M) == n) /* if diagonal preconditioner */ niter = 5000; hypre_MPI_Barrier(hypre_MPI_COMM_WORLD); time0 = hypre_MPI_Wtime(); if (symmetric == 1) PCG_ParaSails(A, ps, b, x, 1.e-8, niter); else FGMRES_ParaSails(A, ps, b, x, 50, 1.e-8, niter); time1 = hypre_MPI_Wtime(); solve_time = time1-time0; hypre_MPI_Reduce(&setup_time, &max_setup_time, 1, hypre_MPI_DOUBLE, hypre_MPI_MAX, 0, hypre_MPI_COMM_WORLD); hypre_MPI_Reduce(&solve_time, &max_solve_time, 1, hypre_MPI_DOUBLE, hypre_MPI_MAX, 0, hypre_MPI_COMM_WORLD); if (mype == 0) { hypre_printf("**********************************************\n"); hypre_printf("*** Setup Solve Total\n"); hypre_printf("III %8.1f %8.1f %8.1f\n", max_setup_time, max_solve_time, max_setup_time+max_solve_time); hypre_printf("**********************************************\n"); } cleanup: ParaSailsDestroy(ps); num_runs--; } free(x); free(b); MatrixDestroy(A); hypre_MPI_Finalize(); return 0; }
HYPRE_Int HYPRE_IJMatrixCreate( MPI_Comm comm, HYPRE_Int ilower, HYPRE_Int iupper, HYPRE_Int jlower, HYPRE_Int jupper, HYPRE_IJMatrix *matrix ) { HYPRE_Int *row_partitioning; HYPRE_Int *col_partitioning; HYPRE_Int *info; HYPRE_Int num_procs; HYPRE_Int myid; hypre_IJMatrix *ijmatrix; #ifdef HYPRE_NO_GLOBAL_PARTITION HYPRE_Int row0, col0, rowN, colN; #else HYPRE_Int *recv_buf; HYPRE_Int i, i4; HYPRE_Int square; #endif ijmatrix = hypre_CTAlloc(hypre_IJMatrix, 1); hypre_IJMatrixComm(ijmatrix) = comm; hypre_IJMatrixObject(ijmatrix) = NULL; hypre_IJMatrixTranslator(ijmatrix) = NULL; hypre_IJMatrixObjectType(ijmatrix) = HYPRE_UNITIALIZED; hypre_IJMatrixAssembleFlag(ijmatrix) = 0; hypre_IJMatrixPrintLevel(ijmatrix) = 0; hypre_MPI_Comm_size(comm,&num_procs); hypre_MPI_Comm_rank(comm, &myid); if (ilower > iupper+1 || ilower < 0) { hypre_error_in_arg(2); hypre_TFree(ijmatrix); return hypre_error_flag; } if (iupper < -1) { hypre_error_in_arg(3); hypre_TFree(ijmatrix); return hypre_error_flag; } if (jlower > jupper+1 || jlower < 0) { hypre_error_in_arg(4); hypre_TFree(ijmatrix); return hypre_error_flag; } if (jupper < -1) { hypre_error_in_arg(5); hypre_TFree(ijmatrix); return hypre_error_flag; } #ifdef HYPRE_NO_GLOBAL_PARTITION info = hypre_CTAlloc(HYPRE_Int,2); row_partitioning = hypre_CTAlloc(HYPRE_Int, 2); col_partitioning = hypre_CTAlloc(HYPRE_Int, 2); row_partitioning[0] = ilower; row_partitioning[1] = iupper+1; col_partitioning[0] = jlower; col_partitioning[1] = jupper+1; /* now we need the global number of rows and columns as well as the global first row and column index */ /* proc 0 has the first row and col */ if (myid==0) { info[0] = ilower; info[1] = jlower; } hypre_MPI_Bcast(info, 2, HYPRE_MPI_INT, 0, comm); row0 = info[0]; col0 = info[1]; /* proc (num_procs-1) has the last row and col */ if (myid == (num_procs-1)) { info[0] = iupper; info[1] = jupper; } hypre_MPI_Bcast(info, 2, HYPRE_MPI_INT, num_procs-1, comm); rowN = info[0]; colN = info[1]; hypre_IJMatrixGlobalFirstRow(ijmatrix) = row0; hypre_IJMatrixGlobalFirstCol(ijmatrix) = col0; hypre_IJMatrixGlobalNumRows(ijmatrix) = rowN - row0 + 1; hypre_IJMatrixGlobalNumCols(ijmatrix) = colN - col0 + 1; hypre_TFree(info); #else info = hypre_CTAlloc(HYPRE_Int,4); recv_buf = hypre_CTAlloc(HYPRE_Int,4*num_procs); row_partitioning = hypre_CTAlloc(HYPRE_Int, num_procs+1); info[0] = ilower; info[1] = iupper; info[2] = jlower; info[3] = jupper; /* Generate row- and column-partitioning through information exchange across all processors, check whether the matrix is square, and if the partitionings match. i.e. no overlaps or gaps, if there are overlaps or gaps in the row partitioning or column partitioning , ierr will be set to -9 or -10, respectively */ hypre_MPI_Allgather(info,4,HYPRE_MPI_INT,recv_buf,4,HYPRE_MPI_INT,comm); row_partitioning[0] = recv_buf[0]; square = 1; for (i=0; i < num_procs-1; i++) { i4 = 4*i; if ( recv_buf[i4+1] != (recv_buf[i4+4]-1) ) { hypre_error(HYPRE_ERROR_GENERIC); hypre_TFree(ijmatrix); hypre_TFree(info); hypre_TFree(recv_buf); hypre_TFree(row_partitioning); return hypre_error_flag; } else row_partitioning[i+1] = recv_buf[i4+4]; if ((square && (recv_buf[i4] != recv_buf[i4+2])) || (recv_buf[i4+1] != recv_buf[i4+3]) ) { square = 0; } } i4 = (num_procs-1)*4; row_partitioning[num_procs] = recv_buf[i4+1]+1; if ((recv_buf[i4] != recv_buf[i4+2]) || (recv_buf[i4+1] != recv_buf[i4+3])) square = 0; if (square) col_partitioning = row_partitioning; else { col_partitioning = hypre_CTAlloc(HYPRE_Int,num_procs+1); col_partitioning[0] = recv_buf[2]; for (i=0; i < num_procs-1; i++) { i4 = 4*i; if (recv_buf[i4+3] != recv_buf[i4+6]-1) { hypre_error(HYPRE_ERROR_GENERIC); hypre_TFree(ijmatrix); hypre_TFree(info); hypre_TFree(recv_buf); hypre_TFree(row_partitioning); hypre_TFree(col_partitioning); return hypre_error_flag; } else col_partitioning[i+1] = recv_buf[i4+6]; } col_partitioning[num_procs] = recv_buf[num_procs*4-1]+1; } hypre_IJMatrixGlobalFirstRow(ijmatrix) = row_partitioning[0]; hypre_IJMatrixGlobalFirstCol(ijmatrix) = col_partitioning[0]; hypre_IJMatrixGlobalNumRows(ijmatrix) = row_partitioning[num_procs] - row_partitioning[0]; hypre_IJMatrixGlobalNumCols(ijmatrix) = col_partitioning[num_procs] - col_partitioning[0]; hypre_TFree(info); hypre_TFree(recv_buf); #endif hypre_IJMatrixRowPartitioning(ijmatrix) = row_partitioning; hypre_IJMatrixColPartitioning(ijmatrix) = col_partitioning; *matrix = (HYPRE_IJMatrix) ijmatrix; return hypre_error_flag; }