HYPRE_Int hypre_ParCSRMatrixMatvecT( HYPRE_Complex alpha, hypre_ParCSRMatrix *A, hypre_ParVector *x, HYPRE_Complex beta, hypre_ParVector *y ) { hypre_ParCSRCommHandle **comm_handle; hypre_ParCSRCommPkg *comm_pkg = hypre_ParCSRMatrixCommPkg(A); hypre_CSRMatrix *diag = hypre_ParCSRMatrixDiag(A); hypre_CSRMatrix *offd = hypre_ParCSRMatrixOffd(A); hypre_Vector *x_local = hypre_ParVectorLocalVector(x); hypre_Vector *y_local = hypre_ParVectorLocalVector(y); hypre_Vector *y_tmp; HYPRE_Int vecstride = hypre_VectorVectorStride( y_local ); HYPRE_Int idxstride = hypre_VectorIndexStride( y_local ); HYPRE_Complex *y_tmp_data, **y_buf_data; HYPRE_Complex *y_local_data = hypre_VectorData(y_local); HYPRE_Int num_rows = hypre_ParCSRMatrixGlobalNumRows(A); HYPRE_Int num_cols = hypre_ParCSRMatrixGlobalNumCols(A); HYPRE_Int num_cols_offd = hypre_CSRMatrixNumCols(offd); HYPRE_Int x_size = hypre_ParVectorGlobalSize(x); HYPRE_Int y_size = hypre_ParVectorGlobalSize(y); HYPRE_Int num_vectors = hypre_VectorNumVectors(y_local); HYPRE_Int i, j, jv, index, start, num_sends; HYPRE_Int ierr = 0; /*--------------------------------------------------------------------- * Check for size compatibility. MatvecT returns ierr = 1 if * length of X doesn't equal the number of rows of A, * ierr = 2 if the length of Y doesn't equal the number of * columns of A, and ierr = 3 if both are true. * * Because temporary vectors are often used in MatvecT, none of * these conditions terminates processing, and the ierr flag * is informational only. *--------------------------------------------------------------------*/ if (num_rows != x_size) ierr = 1; if (num_cols != y_size) ierr = 2; if (num_rows != x_size && num_cols != y_size) ierr = 3; /*----------------------------------------------------------------------- *-----------------------------------------------------------------------*/ comm_handle = hypre_CTAlloc(hypre_ParCSRCommHandle*,num_vectors); if ( num_vectors==1 ) { y_tmp = hypre_SeqVectorCreate(num_cols_offd); } else { y_tmp = hypre_SeqMultiVectorCreate(num_cols_offd,num_vectors); } hypre_SeqVectorInitialize(y_tmp); /*--------------------------------------------------------------------- * If there exists no CommPkg for A, a CommPkg is generated using * equally load balanced partitionings *--------------------------------------------------------------------*/ if (!comm_pkg) { hypre_MatvecCommPkgCreate(A); comm_pkg = hypre_ParCSRMatrixCommPkg(A); } num_sends = hypre_ParCSRCommPkgNumSends(comm_pkg); y_buf_data = hypre_CTAlloc( HYPRE_Complex*, num_vectors ); for ( jv=0; jv<num_vectors; ++jv ) y_buf_data[jv] = hypre_CTAlloc(HYPRE_Complex, hypre_ParCSRCommPkgSendMapStart (comm_pkg, num_sends)); y_tmp_data = hypre_VectorData(y_tmp); y_local_data = hypre_VectorData(y_local); hypre_assert( idxstride==1 ); /* only 'column' storage of multivectors * implemented so far */ if (num_cols_offd) hypre_CSRMatrixMatvecT(alpha, offd, x_local, 0.0, y_tmp); for ( jv=0; jv<num_vectors; ++jv ) { /* this is where we assume multivectors are 'column' storage */ comm_handle[jv] = hypre_ParCSRCommHandleCreate ( 2, comm_pkg, &(y_tmp_data[jv*num_cols_offd]), y_buf_data[jv] ); } hypre_CSRMatrixMatvecT(alpha, diag, x_local, beta, y_local); for ( jv=0; jv<num_vectors; ++jv ) { hypre_ParCSRCommHandleDestroy(comm_handle[jv]); comm_handle[jv] = NULL; } hypre_TFree(comm_handle); if ( num_vectors==1 ) { index = 0; for (i = 0; i < num_sends; i++) { start = hypre_ParCSRCommPkgSendMapStart(comm_pkg, i); for (j = start; j < hypre_ParCSRCommPkgSendMapStart(comm_pkg, i+1); j++) y_local_data[hypre_ParCSRCommPkgSendMapElmt(comm_pkg,j)] += y_buf_data[0][index++]; } } else for ( jv=0; jv<num_vectors; ++jv ) { index = 0; for (i = 0; i < num_sends; i++) { start = hypre_ParCSRCommPkgSendMapStart(comm_pkg, i); for (j = start; j < hypre_ParCSRCommPkgSendMapStart(comm_pkg, i+1); j++) y_local_data[ jv*vecstride + idxstride*hypre_ParCSRCommPkgSendMapElmt(comm_pkg,j) ] += y_buf_data[jv][index++]; } } hypre_SeqVectorDestroy(y_tmp); y_tmp = NULL; for ( jv=0; jv<num_vectors; ++jv ) hypre_TFree(y_buf_data[jv]); hypre_TFree(y_buf_data); return ierr; }
HYPRE_Int hypre_ParCSRMatrixMatvec( HYPRE_Complex alpha, hypre_ParCSRMatrix *A, hypre_ParVector *x, HYPRE_Complex beta, hypre_ParVector *y ) { hypre_ParCSRCommHandle **comm_handle; hypre_ParCSRCommPkg *comm_pkg = hypre_ParCSRMatrixCommPkg(A); hypre_CSRMatrix *diag = hypre_ParCSRMatrixDiag(A); hypre_CSRMatrix *offd = hypre_ParCSRMatrixOffd(A); hypre_Vector *x_local = hypre_ParVectorLocalVector(x); hypre_Vector *y_local = hypre_ParVectorLocalVector(y); HYPRE_Int num_rows = hypre_ParCSRMatrixGlobalNumRows(A); HYPRE_Int num_cols = hypre_ParCSRMatrixGlobalNumCols(A); hypre_Vector *x_tmp; HYPRE_Int x_size = hypre_ParVectorGlobalSize(x); HYPRE_Int y_size = hypre_ParVectorGlobalSize(y); HYPRE_Int num_vectors = hypre_VectorNumVectors(x_local); HYPRE_Int num_cols_offd = hypre_CSRMatrixNumCols(offd); HYPRE_Int ierr = 0; HYPRE_Int num_sends, i, j, jv, index, start; HYPRE_Int vecstride = hypre_VectorVectorStride( x_local ); HYPRE_Int idxstride = hypre_VectorIndexStride( x_local ); HYPRE_Complex *x_tmp_data, **x_buf_data; HYPRE_Complex *x_local_data = hypre_VectorData(x_local); /*--------------------------------------------------------------------- * Check for size compatibility. ParMatvec returns ierr = 11 if * length of X doesn't equal the number of columns of A, * ierr = 12 if the length of Y doesn't equal the number of rows * of A, and ierr = 13 if both are true. * * Because temporary vectors are often used in ParMatvec, none of * these conditions terminates processing, and the ierr flag * is informational only. *--------------------------------------------------------------------*/ hypre_assert( idxstride>0 ); if (num_cols != x_size) ierr = 11; if (num_rows != y_size) ierr = 12; if (num_cols != x_size && num_rows != y_size) ierr = 13; hypre_assert( hypre_VectorNumVectors(y_local)==num_vectors ); if ( num_vectors==1 ) x_tmp = hypre_SeqVectorCreate( num_cols_offd ); else { hypre_assert( num_vectors>1 ); x_tmp = hypre_SeqMultiVectorCreate( num_cols_offd, num_vectors ); } hypre_SeqVectorInitialize(x_tmp); x_tmp_data = hypre_VectorData(x_tmp); comm_handle = hypre_CTAlloc(hypre_ParCSRCommHandle*,num_vectors); /*--------------------------------------------------------------------- * If there exists no CommPkg for A, a CommPkg is generated using * equally load balanced partitionings *--------------------------------------------------------------------*/ if (!comm_pkg) { hypre_MatvecCommPkgCreate(A); comm_pkg = hypre_ParCSRMatrixCommPkg(A); } num_sends = hypre_ParCSRCommPkgNumSends(comm_pkg); x_buf_data = hypre_CTAlloc( HYPRE_Complex*, num_vectors ); for ( jv=0; jv<num_vectors; ++jv ) x_buf_data[jv] = hypre_CTAlloc(HYPRE_Complex, hypre_ParCSRCommPkgSendMapStart (comm_pkg, num_sends)); if ( num_vectors==1 ) { index = 0; for (i = 0; i < num_sends; i++) { start = hypre_ParCSRCommPkgSendMapStart(comm_pkg, i); for (j = start; j < hypre_ParCSRCommPkgSendMapStart(comm_pkg, i+1); j++) x_buf_data[0][index++] = x_local_data[hypre_ParCSRCommPkgSendMapElmt(comm_pkg,j)]; } } else for ( jv=0; jv<num_vectors; ++jv ) { index = 0; for (i = 0; i < num_sends; i++) { start = hypre_ParCSRCommPkgSendMapStart(comm_pkg, i); for (j = start; j < hypre_ParCSRCommPkgSendMapStart(comm_pkg, i+1); j++) x_buf_data[jv][index++] = x_local_data[ jv*vecstride + idxstride*hypre_ParCSRCommPkgSendMapElmt(comm_pkg,j) ]; } } hypre_assert( idxstride==1 ); /* ... The assert is because the following loop only works for 'column' storage of a multivector. This needs to be fixed to work more generally, at least for 'row' storage. This in turn, means either change CommPkg so num_sends is no.zones*no.vectors (not no.zones) or, less dangerously, put a stride in the logic of CommHandleCreate (stride either from a new arg or a new variable inside CommPkg). Or put the num_vector iteration inside CommHandleCreate (perhaps a new multivector variant of it). */ for ( jv=0; jv<num_vectors; ++jv ) { comm_handle[jv] = hypre_ParCSRCommHandleCreate ( 1, comm_pkg, x_buf_data[jv], &(x_tmp_data[jv*num_cols_offd]) ); } hypre_CSRMatrixMatvec( alpha, diag, x_local, beta, y_local); for ( jv=0; jv<num_vectors; ++jv ) { hypre_ParCSRCommHandleDestroy(comm_handle[jv]); comm_handle[jv] = NULL; } hypre_TFree(comm_handle); if (num_cols_offd) hypre_CSRMatrixMatvec( alpha, offd, x_tmp, 1.0, y_local); hypre_SeqVectorDestroy(x_tmp); x_tmp = NULL; for ( jv=0; jv<num_vectors; ++jv ) hypre_TFree(x_buf_data[jv]); hypre_TFree(x_buf_data); return ierr; }
int hypre_CSRMatrixMatvecT( double alpha, hypre_CSRMatrix *A, hypre_Vector *x, double beta, hypre_Vector *y ) { double *A_data = hypre_CSRMatrixData(A); int *A_i = hypre_CSRMatrixI(A); int *A_j = hypre_CSRMatrixJ(A); int num_rows = hypre_CSRMatrixNumRows(A); int num_cols = hypre_CSRMatrixNumCols(A); double *x_data = hypre_VectorData(x); double *y_data = hypre_VectorData(y); int x_size = hypre_VectorSize(x); int y_size = hypre_VectorSize(y); int num_vectors = hypre_VectorNumVectors(x); int idxstride_y = hypre_VectorIndexStride(y); int vecstride_y = hypre_VectorVectorStride(y); int idxstride_x = hypre_VectorIndexStride(x); int vecstride_x = hypre_VectorVectorStride(x); double temp; int i, i1, j, jv, jj, ns, ne, size, rest; int num_threads; int ierr = 0; /*--------------------------------------------------------------------- * Check for size compatibility. MatvecT returns ierr = 1 if * length of X doesn't equal the number of rows of A, * ierr = 2 if the length of Y doesn't equal the number of * columns of A, and ierr = 3 if both are true. * * Because temporary vectors are often used in MatvecT, none of * these conditions terminates processing, and the ierr flag * is informational only. *--------------------------------------------------------------------*/ hypre_assert( num_vectors == hypre_VectorNumVectors(y) ); if (num_rows != x_size) ierr = 1; if (num_cols != y_size) ierr = 2; if (num_rows != x_size && num_cols != y_size) ierr = 3; /*----------------------------------------------------------------------- * Do (alpha == 0.0) computation - RDF: USE MACHINE EPS *-----------------------------------------------------------------------*/ if (alpha == 0.0) { for (i = 0; i < num_cols*num_vectors; i++) y_data[i] *= beta; return ierr; } /*----------------------------------------------------------------------- * y = (beta/alpha)*y *-----------------------------------------------------------------------*/ temp = beta / alpha; if (temp != 1.0) { if (temp == 0.0) { for (i = 0; i < num_cols*num_vectors; i++) y_data[i] = 0.0; } else { for (i = 0; i < num_cols*num_vectors; i++) y_data[i] *= temp; } } /*----------------------------------------------------------------- * y += A^T*x *-----------------------------------------------------------------*/ num_threads = hypre_NumThreads(); if (num_threads > 1) { for (i1 = 0; i1 < num_threads; i1++) { size = num_cols/num_threads; rest = num_cols - size*num_threads; if (i1 < rest) { ns = i1*size+i1-1; ne = (i1+1)*size+i1+1; } else { ns = i1*size+rest-1; ne = (i1+1)*size+rest; } if ( num_vectors==1 ) { for (i = 0; i < num_rows; i++) { for (jj = A_i[i]; jj < A_i[i+1]; jj++) { j = A_j[jj]; if (j > ns && j < ne) y_data[j] += A_data[jj] * x_data[i]; } } } else { for (i = 0; i < num_rows; i++) { for ( jv=0; jv<num_vectors; ++jv ) { for (jj = A_i[i]; jj < A_i[i+1]; jj++) { j = A_j[jj]; if (j > ns && j < ne) y_data[ j*idxstride_y + jv*vecstride_y ] += A_data[jj] * x_data[ i*idxstride_x + jv*vecstride_x]; } } } } } } else { for (i = 0; i < num_rows; i++) { if ( num_vectors==1 ) { for (jj = A_i[i]; jj < A_i[i+1]; jj++) { j = A_j[jj]; y_data[j] += A_data[jj] * x_data[i]; } } else { for ( jv=0; jv<num_vectors; ++jv ) { for (jj = A_i[i]; jj < A_i[i+1]; jj++) { j = A_j[jj]; y_data[ j*idxstride_y + jv*vecstride_y ] += A_data[jj] * x_data[ i*idxstride_x + jv*vecstride_x ]; } } } } } /*----------------------------------------------------------------- * y = alpha*y *-----------------------------------------------------------------*/ if (alpha != 1.0) { for (i = 0; i < num_cols*num_vectors; i++) y_data[i] *= alpha; } return ierr; }
int hypre_CSRMatrixMatvec( double alpha, hypre_CSRMatrix *A, hypre_Vector *x, double beta, hypre_Vector *y ) { double *A_data = hypre_CSRMatrixData(A); int *A_i = hypre_CSRMatrixI(A); int *A_j = hypre_CSRMatrixJ(A); int num_rows = hypre_CSRMatrixNumRows(A); int num_cols = hypre_CSRMatrixNumCols(A); int *A_rownnz = hypre_CSRMatrixRownnz(A); int num_rownnz = hypre_CSRMatrixNumRownnz(A); double *x_data = hypre_VectorData(x); double *y_data = hypre_VectorData(y); int x_size = hypre_VectorSize(x); int y_size = hypre_VectorSize(y); int num_vectors = hypre_VectorNumVectors(x); int idxstride_y = hypre_VectorIndexStride(y); int vecstride_y = hypre_VectorVectorStride(y); int idxstride_x = hypre_VectorIndexStride(x); int vecstride_x = hypre_VectorVectorStride(x); double temp, tempx; int i, j, jj; int m; double xpar=0.7; int ierr = 0; /*--------------------------------------------------------------------- * Check for size compatibility. Matvec returns ierr = 1 if * length of X doesn't equal the number of columns of A, * ierr = 2 if the length of Y doesn't equal the number of rows * of A, and ierr = 3 if both are true. * * Because temporary vectors are often used in Matvec, none of * these conditions terminates processing, and the ierr flag * is informational only. *--------------------------------------------------------------------*/ hypre_assert( num_vectors == hypre_VectorNumVectors(y) ); if (num_cols != x_size) ierr = 1; if (num_rows != y_size) ierr = 2; if (num_cols != x_size && num_rows != y_size) ierr = 3; /*----------------------------------------------------------------------- * Do (alpha == 0.0) computation - RDF: USE MACHINE EPS *-----------------------------------------------------------------------*/ if (alpha == 0.0) { for (i = 0; i < num_rows*num_vectors; i++) y_data[i] *= beta; return ierr; } /*----------------------------------------------------------------------- * y = (beta/alpha)*y *-----------------------------------------------------------------------*/ temp = beta / alpha; if (temp != 1.0) { if (temp == 0.0) { for (i = 0; i < num_rows*num_vectors; i++) y_data[i] = 0.0; } else { for (i = 0; i < num_rows*num_vectors; i++) y_data[i] *= temp; } } /*----------------------------------------------------------------- * y += A*x *-----------------------------------------------------------------*/ /* use rownnz pointer to do the A*x multiplication when num_rownnz is smaller than num_rows */ if (num_rownnz < xpar*(num_rows)) { for (i = 0; i < num_rownnz; i++) { m = A_rownnz[i]; /* * for (jj = A_i[m]; jj < A_i[m+1]; jj++) * { * j = A_j[jj]; * y_data[m] += A_data[jj] * x_data[j]; * } */ if ( num_vectors==1 ) { tempx = y_data[m]; for (jj = A_i[m]; jj < A_i[m+1]; jj++) tempx += A_data[jj] * x_data[A_j[jj]]; y_data[m] = tempx; } else for ( j=0; j<num_vectors; ++j ) { tempx = y_data[ j*vecstride_y + m*idxstride_y ]; for (jj = A_i[m]; jj < A_i[m+1]; jj++) tempx += A_data[jj] * x_data[ j*vecstride_x + A_j[jj]*idxstride_x ]; y_data[ j*vecstride_y + m*idxstride_y] = tempx; } } } else { #pragma omp parallel for private(i,jj,temp) schedule(static) for (i = 0; i < num_rows; i++) { if ( num_vectors==1 ) { temp = y_data[i]; for (jj = A_i[i]; jj < A_i[i+1]; jj++) temp += A_data[jj] * x_data[A_j[jj]]; y_data[i] = temp; } else for ( j=0; j<num_vectors; ++j ) { temp = y_data[ j*vecstride_y + i*idxstride_y ]; for (jj = A_i[i]; jj < A_i[i+1]; jj++) { temp += A_data[jj] * x_data[ j*vecstride_x + A_j[jj]*idxstride_x ]; } y_data[ j*vecstride_y + i*idxstride_y ] = temp; } } } /*----------------------------------------------------------------- * y = alpha*y *-----------------------------------------------------------------*/ if (alpha != 1.0) { for (i = 0; i < num_rows*num_vectors; i++) y_data[i] *= alpha; } return ierr; }
hypre_Vector * hypre_ParVectorToVectorAll (hypre_ParVector *par_v) { MPI_Comm comm = hypre_ParVectorComm(par_v); HYPRE_Int global_size = hypre_ParVectorGlobalSize(par_v); #ifndef HYPRE_NO_GLOBAL_PARTITION HYPRE_Int *vec_starts = hypre_ParVectorPartitioning(par_v); #endif hypre_Vector *local_vector = hypre_ParVectorLocalVector(par_v); HYPRE_Int num_procs, my_id; HYPRE_Int num_vectors = hypre_ParVectorNumVectors(par_v); hypre_Vector *vector; double *vector_data; double *local_data; HYPRE_Int local_size; hypre_MPI_Request *requests; hypre_MPI_Status *status; HYPRE_Int i, j; HYPRE_Int *used_procs; HYPRE_Int num_types, num_requests; HYPRE_Int vec_len, proc_id; #ifdef HYPRE_NO_GLOBAL_PARTITION HYPRE_Int *new_vec_starts; HYPRE_Int num_contacts; HYPRE_Int contact_proc_list[1]; HYPRE_Int contact_send_buf[1]; HYPRE_Int contact_send_buf_starts[2]; HYPRE_Int max_response_size; HYPRE_Int *response_recv_buf=NULL; HYPRE_Int *response_recv_buf_starts = NULL; hypre_DataExchangeResponse response_obj; hypre_ProcListElements send_proc_obj; HYPRE_Int *send_info = NULL; hypre_MPI_Status status1; HYPRE_Int count, tag1 = 112, tag2 = 223; HYPRE_Int start; #endif hypre_MPI_Comm_size(comm, &num_procs); hypre_MPI_Comm_rank(comm, &my_id); #ifdef HYPRE_NO_GLOBAL_PARTITION local_size = hypre_ParVectorLastIndex(par_v) - hypre_ParVectorFirstIndex(par_v) + 1; /* determine procs which hold data of par_v and store ids in used_procs */ /* we need to do an exchange data for this. If I own row then I will contact processor 0 with the endpoint of my local range */ if (local_size > 0) { num_contacts = 1; contact_proc_list[0] = 0; contact_send_buf[0] = hypre_ParVectorLastIndex(par_v); contact_send_buf_starts[0] = 0; contact_send_buf_starts[1] = 1; } else { num_contacts = 0; contact_send_buf_starts[0] = 0; contact_send_buf_starts[1] = 0; } /*build the response object*/ /*send_proc_obj will be for saving info from contacts */ send_proc_obj.length = 0; send_proc_obj.storage_length = 10; send_proc_obj.id = hypre_CTAlloc(HYPRE_Int, send_proc_obj.storage_length); send_proc_obj.vec_starts = hypre_CTAlloc(HYPRE_Int, send_proc_obj.storage_length + 1); send_proc_obj.vec_starts[0] = 0; send_proc_obj.element_storage_length = 10; send_proc_obj.elements = hypre_CTAlloc(HYPRE_Int, send_proc_obj.element_storage_length); max_response_size = 0; /* each response is null */ response_obj.fill_response = hypre_FillResponseParToVectorAll; response_obj.data1 = NULL; response_obj.data2 = &send_proc_obj; /*this is where we keep info from contacts*/ hypre_DataExchangeList(num_contacts, contact_proc_list, contact_send_buf, contact_send_buf_starts, sizeof(HYPRE_Int), sizeof(HYPRE_Int), &response_obj, max_response_size, 1, comm, (void**) &response_recv_buf, &response_recv_buf_starts); /* now processor 0 should have a list of ranges for processors that have rows - these are in send_proc_obj - it needs to create the new list of processors and also an array of vec starts - and send to those who own row*/ if (my_id) { if (local_size) { /* look for a message from processor 0 */ hypre_MPI_Probe(0, tag1, comm, &status1); hypre_MPI_Get_count(&status1, HYPRE_MPI_INT, &count); send_info = hypre_CTAlloc(HYPRE_Int, count); hypre_MPI_Recv(send_info, count, HYPRE_MPI_INT, 0, tag1, comm, &status1); /* now unpack */ num_types = send_info[0]; used_procs = hypre_CTAlloc(HYPRE_Int, num_types); new_vec_starts = hypre_CTAlloc(HYPRE_Int, num_types+1); for (i=1; i<= num_types; i++) { used_procs[i-1] = send_info[i]; } for (i=num_types+1; i< count; i++) { new_vec_starts[i-num_types-1] = send_info[i] ; } } else /* clean up and exit */ { hypre_TFree(send_proc_obj.vec_starts); hypre_TFree(send_proc_obj.id); hypre_TFree(send_proc_obj.elements); if(response_recv_buf) hypre_TFree(response_recv_buf); if(response_recv_buf_starts) hypre_TFree(response_recv_buf_starts); return NULL; } } else /* my_id ==0 */ { num_types = send_proc_obj.length; used_procs = hypre_CTAlloc(HYPRE_Int, num_types); new_vec_starts = hypre_CTAlloc(HYPRE_Int, num_types+1); new_vec_starts[0] = 0; for (i=0; i< num_types; i++) { used_procs[i] = send_proc_obj.id[i]; new_vec_starts[i+1] = send_proc_obj.elements[i]+1; } qsort0(used_procs, 0, num_types-1); qsort0(new_vec_starts, 0, num_types); /*now we need to put into an array to send */ count = 2*num_types+2; send_info = hypre_CTAlloc(HYPRE_Int, count); send_info[0] = num_types; for (i=1; i<= num_types; i++) { send_info[i] = used_procs[i-1]; } for (i=num_types+1; i< count; i++) { send_info[i] = new_vec_starts[i-num_types-1]; } requests = hypre_CTAlloc(hypre_MPI_Request, num_types); status = hypre_CTAlloc(hypre_MPI_Status, num_types); /* don't send to myself - these are sorted so my id would be first*/ start = 0; if (used_procs[0] == 0) { start = 1; } for (i=start; i < num_types; i++) { hypre_MPI_Isend(send_info, count, HYPRE_MPI_INT, used_procs[i], tag1, comm, &requests[i-start]); } hypre_MPI_Waitall(num_types-start, requests, status); hypre_TFree(status); hypre_TFree(requests); } /* clean up */ hypre_TFree(send_proc_obj.vec_starts); hypre_TFree(send_proc_obj.id); hypre_TFree(send_proc_obj.elements); hypre_TFree(send_info); if(response_recv_buf) hypre_TFree(response_recv_buf); if(response_recv_buf_starts) hypre_TFree(response_recv_buf_starts); /* now proc 0 can exit if it has no rows */ if (!local_size) { hypre_TFree(used_procs); hypre_TFree(new_vec_starts); return NULL; } /* everyone left has rows and knows: new_vec_starts, num_types, and used_procs */ /* this vector should be rather small */ local_data = hypre_VectorData(local_vector); vector = hypre_SeqVectorCreate(global_size); hypre_VectorNumVectors(vector) = num_vectors; hypre_SeqVectorInitialize(vector); vector_data = hypre_VectorData(vector); num_requests = 2*num_types; requests = hypre_CTAlloc(hypre_MPI_Request, num_requests); status = hypre_CTAlloc(hypre_MPI_Status, num_requests); /* initialize data exchange among used_procs and generate vector - here we send to ourself also*/ j = 0; for (i = 0; i < num_types; i++) { proc_id = used_procs[i]; vec_len = new_vec_starts[i+1] - new_vec_starts[i]; hypre_MPI_Irecv(&vector_data[new_vec_starts[i]], num_vectors*vec_len, hypre_MPI_DOUBLE, proc_id, tag2, comm, &requests[j++]); } for (i = 0; i < num_types; i++) { hypre_MPI_Isend(local_data, num_vectors*local_size, hypre_MPI_DOUBLE, used_procs[i], tag2, comm, &requests[j++]); } hypre_MPI_Waitall(num_requests, requests, status); if (num_requests) { hypre_TFree(requests); hypre_TFree(status); hypre_TFree(used_procs); } hypre_TFree(new_vec_starts); #else local_size = vec_starts[my_id+1] - vec_starts[my_id]; /* if my_id contains no data, return NULL */ if (!local_size) return NULL; local_data = hypre_VectorData(local_vector); vector = hypre_SeqVectorCreate(global_size); hypre_VectorNumVectors(vector) = num_vectors; hypre_SeqVectorInitialize(vector); vector_data = hypre_VectorData(vector); /* determine procs which hold data of par_v and store ids in used_procs */ num_types = -1; for (i=0; i < num_procs; i++) if (vec_starts[i+1]-vec_starts[i]) num_types++; num_requests = 2*num_types; used_procs = hypre_CTAlloc(HYPRE_Int, num_types); j = 0; for (i=0; i < num_procs; i++) if (vec_starts[i+1]-vec_starts[i] && i-my_id) used_procs[j++] = i; requests = hypre_CTAlloc(hypre_MPI_Request, num_requests); status = hypre_CTAlloc(hypre_MPI_Status, num_requests); /* initialize data exchange among used_procs and generate vector */ j = 0; for (i = 0; i < num_types; i++) { proc_id = used_procs[i]; vec_len = vec_starts[proc_id+1] - vec_starts[proc_id]; hypre_MPI_Irecv(&vector_data[vec_starts[proc_id]], num_vectors*vec_len, hypre_MPI_DOUBLE, proc_id, 0, comm, &requests[j++]); } for (i = 0; i < num_types; i++) { hypre_MPI_Isend(local_data, num_vectors*local_size, hypre_MPI_DOUBLE, used_procs[i], 0, comm, &requests[j++]); } for (i=0; i < num_vectors*local_size; i++) vector_data[vec_starts[my_id]+i] = local_data[i]; hypre_MPI_Waitall(num_requests, requests, status); if (num_requests) { hypre_TFree(used_procs); hypre_TFree(requests); hypre_TFree(status); } #endif return vector; }
hypre_ParVector * hypre_VectorToParVector (MPI_Comm comm, hypre_Vector *v, HYPRE_Int *vec_starts) { HYPRE_Int global_size; HYPRE_Int local_size; HYPRE_Int num_vectors; HYPRE_Int num_procs, my_id; HYPRE_Int global_vecstride, vecstride, idxstride; hypre_ParVector *par_vector; hypre_Vector *local_vector; double *v_data; double *local_data; hypre_MPI_Request *requests; hypre_MPI_Status *status, status0; HYPRE_Int i, j, k, p; hypre_MPI_Comm_size(comm,&num_procs); hypre_MPI_Comm_rank(comm,&my_id); if (my_id == 0) { global_size = hypre_VectorSize(v); v_data = hypre_VectorData(v); num_vectors = hypre_VectorNumVectors(v); /* for multivectors */ global_vecstride = hypre_VectorVectorStride(v); } hypre_MPI_Bcast(&global_size,1,HYPRE_MPI_INT,0,comm); hypre_MPI_Bcast(&num_vectors,1,HYPRE_MPI_INT,0,comm); hypre_MPI_Bcast(&global_vecstride,1,HYPRE_MPI_INT,0,comm); if ( num_vectors==1 ) par_vector = hypre_ParVectorCreate(comm, global_size, vec_starts); else par_vector = hypre_ParMultiVectorCreate(comm, global_size, vec_starts, num_vectors); vec_starts = hypre_ParVectorPartitioning(par_vector); local_size = vec_starts[my_id+1] - vec_starts[my_id]; hypre_ParVectorInitialize(par_vector); local_vector = hypre_ParVectorLocalVector(par_vector); local_data = hypre_VectorData(local_vector); vecstride = hypre_VectorVectorStride(local_vector); idxstride = hypre_VectorIndexStride(local_vector); hypre_assert( idxstride==1 ); /* <<< so far only the only implemented multivector StorageMethod is 0 <<< */ if (my_id == 0) { requests = hypre_CTAlloc(hypre_MPI_Request,num_vectors*(num_procs-1)); status = hypre_CTAlloc(hypre_MPI_Status,num_vectors*(num_procs-1)); k = 0; for ( p=1; p<num_procs; p++) for ( j=0; j<num_vectors; ++j ) { hypre_MPI_Isend( &v_data[vec_starts[p]]+j*global_vecstride, (vec_starts[p+1]-vec_starts[p]), hypre_MPI_DOUBLE, p, 0, comm, &requests[k++] ); } if ( num_vectors==1 ) { for (i=0; i < local_size; i++) local_data[i] = v_data[i]; } else for ( j=0; j<num_vectors; ++j ) { for (i=0; i < local_size; i++) local_data[i+j*vecstride] = v_data[i+j*global_vecstride]; } hypre_MPI_Waitall(num_procs-1,requests, status); hypre_TFree(requests); hypre_TFree(status); } else { for ( j=0; j<num_vectors; ++j ) hypre_MPI_Recv( local_data+j*vecstride, local_size, hypre_MPI_DOUBLE, 0, 0, comm,&status0 ); } return par_vector; }
int hypre_CSRMatrixMatvecT( double alpha, hypre_CSRMatrix *A, hypre_Vector *x, double beta, hypre_Vector *y ) { double *A_data = hypre_CSRMatrixData(A); int *A_i = hypre_CSRMatrixI(A); int *A_j = hypre_CSRMatrixJ(A); int num_rows = hypre_CSRMatrixNumRows(A); int num_cols = hypre_CSRMatrixNumCols(A); double *x_data = hypre_VectorData(x); double *y_data = hypre_VectorData(y); int x_size = hypre_VectorSize(x); int y_size = hypre_VectorSize(y); int num_vectors = hypre_VectorNumVectors(x); int idxstride_y = hypre_VectorIndexStride(y); int vecstride_y = hypre_VectorVectorStride(y); int idxstride_x = hypre_VectorIndexStride(x); int vecstride_x = hypre_VectorVectorStride(x); double temp; double *y_data_expand = NULL; int offset = 0; #ifdef HYPRE_USING_OPENMP int my_thread_num = 0; #endif int i, j, jv, jj; int num_threads; int ierr = 0; /*--------------------------------------------------------------------- * Check for size compatibility. MatvecT returns ierr = 1 if * length of X doesn't equal the number of rows of A, * ierr = 2 if the length of Y doesn't equal the number of * columns of A, and ierr = 3 if both are true. * * Because temporary vectors are often used in MatvecT, none of * these conditions terminates processing, and the ierr flag * is informational only. *--------------------------------------------------------------------*/ hypre_assert( num_vectors == hypre_VectorNumVectors(y) ); if (num_rows != x_size) ierr = 1; if (num_cols != y_size) ierr = 2; if (num_rows != x_size && num_cols != y_size) ierr = 3; /*----------------------------------------------------------------------- * Do (alpha == 0.0) computation - RDF: USE MACHINE EPS *-----------------------------------------------------------------------*/ if (alpha == 0.0) { #ifdef HYPRE_USING_OPENMP #pragma omp parallel for private(i) schedule(static) #endif for (i = 0; i < num_cols*num_vectors; i++) y_data[i] *= beta; return ierr; } /*----------------------------------------------------------------------- * y = (beta/alpha)*y *-----------------------------------------------------------------------*/ temp = beta / alpha; if (temp != 1.0) { if (temp == 0.0) { #ifdef HYPRE_USING_OPENMP #pragma omp parallel for private(i) schedule(static) #endif for (i = 0; i < num_cols*num_vectors; i++) y_data[i] = 0.0; } else { #ifdef HYPRE_USING_OPENMP #pragma omp parallel for private(i) schedule(static) #endif for (i = 0; i < num_cols*num_vectors; i++) y_data[i] *= temp; } } /*----------------------------------------------------------------- * y += A^T*x *-----------------------------------------------------------------*/ num_threads = hypre_NumThreads(); if (num_threads > 1) { y_data_expand = hypre_CTAlloc(double, num_threads*y_size); if ( num_vectors==1 ) { #ifdef HYPRE_USING_OPENMP #pragma omp parallel private(i,jj,j, my_thread_num, offset) { my_thread_num = omp_get_thread_num(); offset = y_size*my_thread_num; #pragma omp for schedule(static) #endif for (i = 0; i < num_rows; i++) { for (jj = A_i[i]; jj < A_i[i+1]; jj++) { j = A_j[jj]; y_data_expand[offset + j] += A_data[jj] * x_data[i]; } } #ifdef HYPRE_USING_OPENMP /* implied barrier */ #pragma omp for schedule(static) #endif for (i = 0; i < y_size; i++) { for (j = 0; j < num_threads; j++) { y_data[i] += y_data_expand[j*y_size + i]; /*y_data_expand[j*y_size + i] = 0; //zero out for next time */ } } #ifdef HYPRE_USING_OPENMP } /* end parallel region */ #endif hypre_TFree(y_data_expand); } else { /* MULTIPLE VECTORS NOT THREADED YET */ for (i = 0; i < num_rows; i++) { for ( jv=0; jv<num_vectors; ++jv ) { for (jj = A_i[i]; jj < A_i[i+1]; jj++) { j = A_j[jj]; y_data[ j*idxstride_y + jv*vecstride_y ] += A_data[jj] * x_data[ i*idxstride_x + jv*vecstride_x]; } } } } hypre_TFree(y_data_expand); }
HYPRE_Int hypre_CSRMatrixMatvecT( HYPRE_Complex alpha, hypre_CSRMatrix *A, hypre_Vector *x, HYPRE_Complex beta, hypre_Vector *y ) { HYPRE_Complex *A_data = hypre_CSRMatrixData(A); HYPRE_Int *A_i = hypre_CSRMatrixI(A); HYPRE_Int *A_j = hypre_CSRMatrixJ(A); HYPRE_Int num_rows = hypre_CSRMatrixNumRows(A); HYPRE_Int num_cols = hypre_CSRMatrixNumCols(A); HYPRE_Complex *x_data = hypre_VectorData(x); HYPRE_Complex *y_data = hypre_VectorData(y); HYPRE_Int x_size = hypre_VectorSize(x); HYPRE_Int y_size = hypre_VectorSize(y); HYPRE_Int num_vectors = hypre_VectorNumVectors(x); HYPRE_Int idxstride_y = hypre_VectorIndexStride(y); HYPRE_Int vecstride_y = hypre_VectorVectorStride(y); HYPRE_Int idxstride_x = hypre_VectorIndexStride(x); HYPRE_Int vecstride_x = hypre_VectorVectorStride(x); HYPRE_Complex temp; HYPRE_Complex *y_data_expand; HYPRE_Int my_thread_num = 0, offset = 0; HYPRE_Int i, j, jv, jj; HYPRE_Int num_threads; HYPRE_Int ierr = 0; hypre_Vector *x_tmp = NULL; /*--------------------------------------------------------------------- * Check for size compatibility. MatvecT returns ierr = 1 if * length of X doesn't equal the number of rows of A, * ierr = 2 if the length of Y doesn't equal the number of * columns of A, and ierr = 3 if both are true. * * Because temporary vectors are often used in MatvecT, none of * these conditions terminates processing, and the ierr flag * is informational only. *--------------------------------------------------------------------*/ hypre_assert( num_vectors == hypre_VectorNumVectors(y) ); if (num_rows != x_size) ierr = 1; if (num_cols != y_size) ierr = 2; if (num_rows != x_size && num_cols != y_size) ierr = 3; /*----------------------------------------------------------------------- * Do (alpha == 0.0) computation - RDF: USE MACHINE EPS *-----------------------------------------------------------------------*/ if (alpha == 0.0) { #ifdef HYPRE_USING_OPENMP #pragma omp parallel for private(i) HYPRE_SMP_SCHEDULE #endif for (i = 0; i < num_cols*num_vectors; i++) y_data[i] *= beta; return ierr; } if (x == y) { x_tmp = hypre_SeqVectorCloneDeep(x); x_data = hypre_VectorData(x_tmp); } /*----------------------------------------------------------------------- * y = (beta/alpha)*y *-----------------------------------------------------------------------*/ temp = beta / alpha; if (temp != 1.0) { if (temp == 0.0) { #ifdef HYPRE_USING_OPENMP #pragma omp parallel for private(i) HYPRE_SMP_SCHEDULE #endif for (i = 0; i < num_cols*num_vectors; i++) y_data[i] = 0.0; } else { #ifdef HYPRE_USING_OPENMP #pragma omp parallel for private(i) HYPRE_SMP_SCHEDULE #endif for (i = 0; i < num_cols*num_vectors; i++) y_data[i] *= temp; } } /*----------------------------------------------------------------- * y += A^T*x *-----------------------------------------------------------------*/ num_threads = hypre_NumThreads(); if (num_threads > 1) { y_data_expand = hypre_CTAlloc(HYPRE_Complex, num_threads*y_size); if ( num_vectors==1 ) { #ifdef HYPRE_USING_OPENMP #pragma omp parallel private(i,jj,j,my_thread_num,offset) #endif { my_thread_num = hypre_GetThreadNum(); offset = y_size*my_thread_num; #ifdef HYPRE_USING_OPENMP #pragma omp for HYPRE_SMP_SCHEDULE #endif for (i = 0; i < num_rows; i++) { for (jj = A_i[i]; jj < A_i[i+1]; jj++) { j = A_j[jj]; y_data_expand[offset + j] += A_data[jj] * x_data[i]; } } /* implied barrier (for threads)*/ #ifdef HYPRE_USING_OPENMP #pragma omp for HYPRE_SMP_SCHEDULE #endif for (i = 0; i < y_size; i++) { for (j = 0; j < num_threads; j++) { y_data[i] += y_data_expand[j*y_size + i]; } } } /* end parallel threaded region */ } else { /* multiple vector case is not threaded */ for (i = 0; i < num_rows; i++) { for ( jv=0; jv<num_vectors; ++jv ) { for (jj = A_i[i]; jj < A_i[i+1]; jj++) { j = A_j[jj]; y_data[ j*idxstride_y + jv*vecstride_y ] += A_data[jj] * x_data[ i*idxstride_x + jv*vecstride_x]; } } } } hypre_TFree(y_data_expand); } else { for (i = 0; i < num_rows; i++) { if ( num_vectors==1 ) { for (jj = A_i[i]; jj < A_i[i+1]; jj++) { j = A_j[jj]; y_data[j] += A_data[jj] * x_data[i]; } } else { for ( jv=0; jv<num_vectors; ++jv ) { for (jj = A_i[i]; jj < A_i[i+1]; jj++) { j = A_j[jj]; y_data[ j*idxstride_y + jv*vecstride_y ] += A_data[jj] * x_data[ i*idxstride_x + jv*vecstride_x ]; } } } } } /*----------------------------------------------------------------- * y = alpha*y *-----------------------------------------------------------------*/ if (alpha != 1.0) { #ifdef HYPRE_USING_OPENMP #pragma omp parallel for private(i) HYPRE_SMP_SCHEDULE #endif for (i = 0; i < num_cols*num_vectors; i++) y_data[i] *= alpha; } if (x == y) hypre_SeqVectorDestroy(x_tmp); return ierr; }
/* y[offset:end] = alpha*A[offset:end,:]*x + beta*b[offset:end] */ HYPRE_Int hypre_CSRMatrixMatvecOutOfPlace( HYPRE_Complex alpha, hypre_CSRMatrix *A, hypre_Vector *x, HYPRE_Complex beta, hypre_Vector *b, hypre_Vector *y, HYPRE_Int offset ) { #ifdef HYPRE_PROFILE HYPRE_Real time_begin = hypre_MPI_Wtime(); #endif HYPRE_Complex *A_data = hypre_CSRMatrixData(A); HYPRE_Int *A_i = hypre_CSRMatrixI(A) + offset; HYPRE_Int *A_j = hypre_CSRMatrixJ(A); HYPRE_Int num_rows = hypre_CSRMatrixNumRows(A) - offset; HYPRE_Int num_cols = hypre_CSRMatrixNumCols(A); /*HYPRE_Int num_nnz = hypre_CSRMatrixNumNonzeros(A);*/ HYPRE_Int *A_rownnz = hypre_CSRMatrixRownnz(A); HYPRE_Int num_rownnz = hypre_CSRMatrixNumRownnz(A); HYPRE_Complex *x_data = hypre_VectorData(x); HYPRE_Complex *b_data = hypre_VectorData(b) + offset; HYPRE_Complex *y_data = hypre_VectorData(y); HYPRE_Int x_size = hypre_VectorSize(x); HYPRE_Int b_size = hypre_VectorSize(b) - offset; HYPRE_Int y_size = hypre_VectorSize(y) - offset; HYPRE_Int num_vectors = hypre_VectorNumVectors(x); HYPRE_Int idxstride_y = hypre_VectorIndexStride(y); HYPRE_Int vecstride_y = hypre_VectorVectorStride(y); /*HYPRE_Int idxstride_b = hypre_VectorIndexStride(b); HYPRE_Int vecstride_b = hypre_VectorVectorStride(b);*/ HYPRE_Int idxstride_x = hypre_VectorIndexStride(x); HYPRE_Int vecstride_x = hypre_VectorVectorStride(x); HYPRE_Complex temp, tempx; HYPRE_Int i, j, jj; HYPRE_Int m; HYPRE_Real xpar=0.7; HYPRE_Int ierr = 0; hypre_Vector *x_tmp = NULL; /*--------------------------------------------------------------------- * Check for size compatibility. Matvec returns ierr = 1 if * length of X doesn't equal the number of columns of A, * ierr = 2 if the length of Y doesn't equal the number of rows * of A, and ierr = 3 if both are true. * * Because temporary vectors are often used in Matvec, none of * these conditions terminates processing, and the ierr flag * is informational only. *--------------------------------------------------------------------*/ hypre_assert( num_vectors == hypre_VectorNumVectors(y) ); hypre_assert( num_vectors == hypre_VectorNumVectors(b) ); if (num_cols != x_size) ierr = 1; if (num_rows != y_size || num_rows != b_size) ierr = 2; if (num_cols != x_size && (num_rows != y_size || num_rows != b_size)) ierr = 3; /*----------------------------------------------------------------------- * Do (alpha == 0.0) computation - RDF: USE MACHINE EPS *-----------------------------------------------------------------------*/ if (alpha == 0.0) { #ifdef HYPRE_USING_OPENMP #pragma omp parallel for private(i) HYPRE_SMP_SCHEDULE #endif for (i = 0; i < num_rows*num_vectors; i++) y_data[i] *= beta; #ifdef HYPRE_PROFILE hypre_profile_times[HYPRE_TIMER_ID_MATVEC] += hypre_MPI_Wtime() - time_begin; #endif return ierr; } if (x == y) { x_tmp = hypre_SeqVectorCloneDeep(x); x_data = hypre_VectorData(x_tmp); } /*----------------------------------------------------------------------- * y = (beta/alpha)*y *-----------------------------------------------------------------------*/ temp = beta / alpha; /* use rownnz pointer to do the A*x multiplication when num_rownnz is smaller than num_rows */ if (num_rownnz < xpar*(num_rows) || num_vectors > 1) { /*----------------------------------------------------------------------- * y = (beta/alpha)*y *-----------------------------------------------------------------------*/ if (temp != 1.0) { if (temp == 0.0) { #ifdef HYPRE_USING_OPENMP #pragma omp parallel for private(i) HYPRE_SMP_SCHEDULE #endif for (i = 0; i < num_rows*num_vectors; i++) y_data[i] = 0.0; } else { #ifdef HYPRE_USING_OPENMP #pragma omp parallel for private(i) HYPRE_SMP_SCHEDULE #endif for (i = 0; i < num_rows*num_vectors; i++) y_data[i] = b_data[i]*temp; } } /*----------------------------------------------------------------- * y += A*x *-----------------------------------------------------------------*/ if (num_rownnz < xpar*(num_rows)) { #ifdef HYPRE_USING_OPENMP #pragma omp parallel for private(i,j,jj,m,tempx) HYPRE_SMP_SCHEDULE #endif for (i = 0; i < num_rownnz; i++) { m = A_rownnz[i]; /* * for (jj = A_i[m]; jj < A_i[m+1]; jj++) * { * j = A_j[jj]; * y_data[m] += A_data[jj] * x_data[j]; * } */ if ( num_vectors==1 ) { tempx = 0; for (jj = A_i[m]; jj < A_i[m+1]; jj++) tempx += A_data[jj] * x_data[A_j[jj]]; y_data[m] += tempx; } else for ( j=0; j<num_vectors; ++j ) { tempx = 0; for (jj = A_i[m]; jj < A_i[m+1]; jj++) tempx += A_data[jj] * x_data[ j*vecstride_x + A_j[jj]*idxstride_x ]; y_data[ j*vecstride_y + m*idxstride_y] += tempx; } } } else // num_vectors > 1 { #ifdef HYPRE_USING_OPENMP #pragma omp parallel for private(i,j,jj,tempx) HYPRE_SMP_SCHEDULE #endif for (i = 0; i < num_rows; i++) { for (j = 0; j < num_vectors; ++j) { tempx = 0; for (jj = A_i[i]; jj < A_i[i+1]; jj++) { tempx += A_data[jj] * x_data[ j*vecstride_x + A_j[jj]*idxstride_x ]; } y_data[ j*vecstride_y + i*idxstride_y ] += tempx; } } } /*----------------------------------------------------------------- * y = alpha*y *-----------------------------------------------------------------*/ if (alpha != 1.0) { #ifdef HYPRE_USING_OPENMP #pragma omp parallel for private(i) HYPRE_SMP_SCHEDULE #endif for (i = 0; i < num_rows*num_vectors; i++) y_data[i] *= alpha; } } else { // JSP: this is currently the only path optimized #ifdef HYPRE_USING_OPENMP #pragma omp parallel private(i,jj,tempx) #endif { HYPRE_Int iBegin = hypre_CSRMatrixGetLoadBalancedPartitionBegin(A); HYPRE_Int iEnd = hypre_CSRMatrixGetLoadBalancedPartitionEnd(A); hypre_assert(iBegin <= iEnd); hypre_assert(iBegin >= 0 && iBegin <= num_rows); hypre_assert(iEnd >= 0 && iEnd <= num_rows); if (0 == temp) { if (1 == alpha) // JSP: a common path { for (i = iBegin; i < iEnd; i++) { tempx = 0.0; for (jj = A_i[i]; jj < A_i[i+1]; jj++) { tempx += A_data[jj] * x_data[A_j[jj]]; } y_data[i] = tempx; } } // y = A*x else if (-1 == alpha) { for (i = iBegin; i < iEnd; i++) { tempx = 0.0; for (jj = A_i[i]; jj < A_i[i+1]; jj++) { tempx -= A_data[jj] * x_data[A_j[jj]]; } y_data[i] = tempx; } } // y = -A*x else { for (i = iBegin; i < iEnd; i++) { tempx = 0.0; for (jj = A_i[i]; jj < A_i[i+1]; jj++) { tempx += A_data[jj] * x_data[A_j[jj]]; } y_data[i] = alpha*tempx; } } // y = alpha*A*x } // temp == 0 else if (-1 == temp) // beta == -alpha { if (1 == alpha) // JSP: a common path { for (i = iBegin; i < iEnd; i++) { tempx = -b_data[i]; for (jj = A_i[i]; jj < A_i[i+1]; jj++) { tempx += A_data[jj] * x_data[A_j[jj]]; } y_data[i] = tempx; } } // y = A*x - y else if (-1 == alpha) // JSP: a common path { for (i = iBegin; i < iEnd; i++) { tempx = b_data[i]; for (jj = A_i[i]; jj < A_i[i+1]; jj++) { tempx -= A_data[jj] * x_data[A_j[jj]]; } y_data[i] = tempx; } } // y = -A*x + y else { for (i = iBegin; i < iEnd; i++) { tempx = -b_data[i]; for (jj = A_i[i]; jj < A_i[i+1]; jj++) { tempx += A_data[jj] * x_data[A_j[jj]]; } y_data[i] = alpha*tempx; } } // y = alpha*(A*x - y) } // temp == -1 else if (1 == temp) { if (1 == alpha) // JSP: a common path { for (i = iBegin; i < iEnd; i++) { tempx = b_data[i]; for (jj = A_i[i]; jj < A_i[i+1]; jj++) { tempx += A_data[jj] * x_data[A_j[jj]]; } y_data[i] = tempx; } } // y = A*x + y else if (-1 == alpha) { for (i = iBegin; i < iEnd; i++) { tempx = -b_data[i]; for (jj = A_i[i]; jj < A_i[i+1]; jj++) { tempx -= A_data[jj] * x_data[A_j[jj]]; } y_data[i] = tempx; } } // y = -A*x - y else { for (i = iBegin; i < iEnd; i++) { tempx = b_data[i]; for (jj = A_i[i]; jj < A_i[i+1]; jj++) { tempx += A_data[jj] * x_data[A_j[jj]]; } y_data[i] = alpha*tempx; } } // y = alpha*(A*x + y) } else { if (1 == alpha) // JSP: a common path { for (i = iBegin; i < iEnd; i++) { tempx = b_data[i]*temp; for (jj = A_i[i]; jj < A_i[i+1]; jj++) { tempx += A_data[jj] * x_data[A_j[jj]]; } y_data[i] = tempx; } } // y = A*x + temp*y else if (-1 == alpha) { for (i = iBegin; i < iEnd; i++) { tempx = -b_data[i]*temp; for (jj = A_i[i]; jj < A_i[i+1]; jj++) { tempx -= A_data[jj] * x_data[A_j[jj]]; } y_data[i] = tempx; } } // y = -A*x - temp*y else { for (i = iBegin; i < iEnd; i++) { tempx = b_data[i]*temp; for (jj = A_i[i]; jj < A_i[i+1]; jj++) { tempx += A_data[jj] * x_data[A_j[jj]]; } y_data[i] = alpha*tempx; } } // y = alpha*(A*x + temp*y) } // temp != 0 && temp != -1 && temp != 1 } // omp parallel } if (x == y) hypre_SeqVectorDestroy(x_tmp); #ifdef HYPRE_PROFILE hypre_profile_times[HYPRE_TIMER_ID_MATVEC] += hypre_MPI_Wtime() - time_begin; #endif return ierr; }