예제 #1
0
hypre_ParCSRBooleanMatrix *
hypre_CSRBooleanMatrixToParCSRBooleanMatrix
( MPI_Comm comm, hypre_CSRBooleanMatrix *A,
  HYPRE_Int *row_starts, HYPRE_Int *col_starts )
{
   HYPRE_Int          global_data[2];
   HYPRE_Int          global_num_rows;
   HYPRE_Int          global_num_cols;
   HYPRE_Int          *local_num_rows;

   HYPRE_Int          num_procs, my_id;
   HYPRE_Int          *local_num_nonzeros;
   HYPRE_Int          num_nonzeros;
  
   HYPRE_Int          *a_i;
   HYPRE_Int          *a_j;
  
   hypre_CSRBooleanMatrix *local_A;

   hypre_MPI_Request  *requests;
   hypre_MPI_Status   *status, status0;
   hypre_MPI_Datatype *csr_matrix_datatypes;

   hypre_ParCSRBooleanMatrix *par_matrix;

   HYPRE_Int          first_col_diag;
   HYPRE_Int          last_col_diag;
 
   HYPRE_Int i, j, ind;

   hypre_MPI_Comm_rank(comm, &my_id);
   hypre_MPI_Comm_size(comm, &num_procs);

   if (my_id == 0) 
   {
        global_data[0] = hypre_CSRBooleanMatrix_Get_NRows(A);
        global_data[1] = hypre_CSRBooleanMatrix_Get_NCols(A);
        a_i = hypre_CSRBooleanMatrix_Get_I(A);
        a_j = hypre_CSRBooleanMatrix_Get_J(A);
   }
   hypre_MPI_Bcast(global_data,2,HYPRE_MPI_INT,0,comm);
   global_num_rows = global_data[0];
   global_num_cols = global_data[1];

   local_num_rows = hypre_CTAlloc(HYPRE_Int, num_procs);
   csr_matrix_datatypes = hypre_CTAlloc(hypre_MPI_Datatype, num_procs);

   par_matrix = hypre_ParCSRBooleanMatrixCreate (comm, global_num_rows,
        global_num_cols,row_starts,col_starts,0,0,0);

   row_starts = hypre_ParCSRBooleanMatrix_Get_RowStarts(par_matrix);
   col_starts = hypre_ParCSRBooleanMatrix_Get_ColStarts(par_matrix);

   for (i=0; i < num_procs; i++)
         local_num_rows[i] = row_starts[i+1] - row_starts[i];

   if (my_id == 0)
   {
        local_num_nonzeros = hypre_CTAlloc(HYPRE_Int, num_procs);
        for (i=0; i < num_procs-1; i++)
                local_num_nonzeros[i] = a_i[row_starts[i+1]] 
                                - a_i[row_starts[i]];
        local_num_nonzeros[num_procs-1] = a_i[global_num_rows] 
                                - a_i[row_starts[num_procs-1]];
   }
   hypre_MPI_Scatter(local_num_nonzeros,1,HYPRE_MPI_INT,&num_nonzeros,1,HYPRE_MPI_INT,0,comm);

   if (my_id == 0) num_nonzeros = local_num_nonzeros[0];

   local_A = hypre_CSRBooleanMatrixCreate(local_num_rows[my_id], global_num_cols,
                num_nonzeros);
   if (my_id == 0)
   {
        requests = hypre_CTAlloc (hypre_MPI_Request, num_procs-1);
        status = hypre_CTAlloc(hypre_MPI_Status, num_procs-1);
        j=0;
        for (i=1; i < num_procs; i++)
        {
                ind = a_i[row_starts[i]];
                hypre_BuildCSRBooleanMatrixMPIDataType(local_num_nonzeros[i], 
                        local_num_rows[i],
                        &a_i[row_starts[i]],
                        &a_j[ind],
                        &csr_matrix_datatypes[i]);
                hypre_MPI_Isend(hypre_MPI_BOTTOM, 1, csr_matrix_datatypes[i], i, 0, comm,
                        &requests[j++]);
                hypre_MPI_Type_free(&csr_matrix_datatypes[i]);
        }
        hypre_CSRBooleanMatrix_Get_I(local_A) = a_i;
        hypre_CSRBooleanMatrix_Get_J(local_A) = a_j;
        hypre_MPI_Waitall(num_procs-1,requests,status);
        hypre_TFree(requests);
        hypre_TFree(status);
        hypre_TFree(local_num_nonzeros);
    }
   else
   {
        hypre_CSRBooleanMatrixInitialize(local_A);
        hypre_BuildCSRBooleanMatrixMPIDataType(num_nonzeros, 
                        local_num_rows[my_id],
                        hypre_CSRBooleanMatrix_Get_I(local_A),
                        hypre_CSRBooleanMatrix_Get_J(local_A),
                        csr_matrix_datatypes);
        hypre_MPI_Recv(hypre_MPI_BOTTOM,1,csr_matrix_datatypes[0],0,0,comm,&status0);
        hypre_MPI_Type_free(csr_matrix_datatypes);
   }

   first_col_diag = col_starts[my_id];
   last_col_diag = col_starts[my_id+1]-1;

   BooleanGenerateDiagAndOffd(local_A, par_matrix, first_col_diag, last_col_diag);

   /* set pointers back to NULL before destroying */
   if (my_id == 0)
   {      
      hypre_CSRBooleanMatrix_Get_I(local_A) = NULL;
      hypre_CSRBooleanMatrix_Get_J(local_A) = NULL; 
   }      
   hypre_CSRBooleanMatrixDestroy(local_A);
   hypre_TFree(local_num_rows);
   hypre_TFree(csr_matrix_datatypes);

   return par_matrix;
}
예제 #2
0
hypre_ParVector *
hypre_VectorToParVector (MPI_Comm comm, hypre_Vector *v, HYPRE_Int *vec_starts)
{
   HYPRE_Int 			global_size;
   HYPRE_Int 			local_size;
   HYPRE_Int                  num_vectors;
   HYPRE_Int  		num_procs, my_id;
   HYPRE_Int                  global_vecstride, vecstride, idxstride;
   hypre_ParVector  	*par_vector;
   hypre_Vector     	*local_vector;
   double          	*v_data;
   double		*local_data;
   hypre_MPI_Request		*requests;
   hypre_MPI_Status		*status, status0;
   HYPRE_Int			i, j, k, p;

   hypre_MPI_Comm_size(comm,&num_procs);
   hypre_MPI_Comm_rank(comm,&my_id);

   if (my_id == 0)
   {
        global_size = hypre_VectorSize(v);
        v_data = hypre_VectorData(v);
        num_vectors = hypre_VectorNumVectors(v); /* for multivectors */
        global_vecstride = hypre_VectorVectorStride(v);

   }

   hypre_MPI_Bcast(&global_size,1,HYPRE_MPI_INT,0,comm);
   hypre_MPI_Bcast(&num_vectors,1,HYPRE_MPI_INT,0,comm);
   hypre_MPI_Bcast(&global_vecstride,1,HYPRE_MPI_INT,0,comm);

   if ( num_vectors==1 )
      par_vector = hypre_ParVectorCreate(comm, global_size, vec_starts);
   else
      par_vector = hypre_ParMultiVectorCreate(comm, global_size, vec_starts, num_vectors);

   vec_starts = hypre_ParVectorPartitioning(par_vector);

   local_size = vec_starts[my_id+1] - vec_starts[my_id];

   hypre_ParVectorInitialize(par_vector);
   local_vector = hypre_ParVectorLocalVector(par_vector);
   local_data = hypre_VectorData(local_vector);
   vecstride = hypre_VectorVectorStride(local_vector);
   idxstride = hypre_VectorIndexStride(local_vector);
   hypre_assert( idxstride==1 );  /* <<< so far only the only implemented multivector StorageMethod is 0 <<< */

   if (my_id == 0)
   {
	requests = hypre_CTAlloc(hypre_MPI_Request,num_vectors*(num_procs-1));
	status = hypre_CTAlloc(hypre_MPI_Status,num_vectors*(num_procs-1));
	k = 0;
	for ( p=1; p<num_procs; p++)
           for ( j=0; j<num_vectors; ++j )
           {
		hypre_MPI_Isend( &v_data[vec_starts[p]]+j*global_vecstride,
                          (vec_starts[p+1]-vec_starts[p]),
                          hypre_MPI_DOUBLE, p, 0, comm, &requests[k++] );
           }
        if ( num_vectors==1 )
        {
           for (i=0; i < local_size; i++)
              local_data[i] = v_data[i];
        }
        else
           for ( j=0; j<num_vectors; ++j )
           {
              for (i=0; i < local_size; i++)
                 local_data[i+j*vecstride] = v_data[i+j*global_vecstride];
           }
	hypre_MPI_Waitall(num_procs-1,requests, status);
	hypre_TFree(requests);
	hypre_TFree(status);
   }
   else
   {
      for ( j=0; j<num_vectors; ++j )
	hypre_MPI_Recv( local_data+j*vecstride, local_size, hypre_MPI_DOUBLE, 0, 0, comm,&status0 );
   }

   return par_vector;
}
예제 #3
0
void hypre_ParChordMatrix_RowStarts(
   hypre_ParChordMatrix *Ac, MPI_Comm comm,
   HYPRE_Int ** row_starts, HYPRE_Int * global_num_cols )
   /* This function computes the ParCSRMatrix-style row_starts from a chord matrix.
      It requires the the idofs of the chord matrix be partitioned among
      processors, so their numbering is monotonic with the processor number;
      see below.

      The algorithm: each proc. p knows its min & max global row & col numbers.
      Mins are first_index_rdof[p], first_index_idof[p]
      ***IF*** these were in proper order (see below),
      first_index_rdof[p] is row_starts[p].
      Add num_rdofs-1 to get the max, i.e. add num_rdofs
      to get row_starts[p+1] (IF the processors are ordered thus).
      Compute these, then broadcast to the other processors to form
      row_starts.
      (We also could get global_num_rows by an AllReduce num_idofs.)
      We get global_num_cols by taking the min and max over processors of
      the min and max col no.s on each processor.

      If the chord matrix is not ordered so the above will work, then we
      would need to to completely move matrices around sometimes, a very expensive
      operation.
      The problem is that the chord matrix format makes no assumptions about
      processor order, but the ParCSR format assumes that
      p<q => (local row numbers of p) < (local row numbers of q)
      Maybe instead I could change the global numbering scheme as part of this
      conversion.
      A closely related ordering-type problem to watch for: row_starts must be
      a partition for a ParCSRMatrix.  In a ChordMatrix, the struct itself
      makes no guarantees, but Panayot said, in essence, that row_starts will
      be a partition.
      col_starts should be NULL; later we shall let the Create function compute one.
   */
{
   HYPRE_Int * fis_idof = hypre_ParChordMatrixFirstindexIdof(Ac);
   HYPRE_Int * fis_rdof = hypre_ParChordMatrixFirstindexRdof(Ac);
   HYPRE_Int my_id, num_procs;
   HYPRE_Int num_idofs = hypre_ParChordMatrixNumIdofs(Ac);
   HYPRE_Int num_rdofs = hypre_ParChordMatrixNumRdofs(Ac);
   HYPRE_Int min_rdof, max_rdof, global_min_rdof, global_max_rdof;
   HYPRE_Int p, lens[2], lastlens[2];
   hypre_MPI_Status *status;
   hypre_MPI_Request *request;

   hypre_MPI_Comm_rank(comm, &my_id);
   hypre_MPI_Comm_size(comm, &num_procs);
   request = hypre_CTAlloc(hypre_MPI_Request, 1 );
   status = hypre_CTAlloc(hypre_MPI_Status, 1 );

   min_rdof = fis_rdof[my_id];
   max_rdof = min_rdof + num_rdofs;
   lens[0] = num_idofs;
   lens[1] = num_rdofs;

   /* row_starts (except last value */
   *row_starts = hypre_CTAlloc( HYPRE_Int, num_procs+1 );
   for ( p=0; p<num_procs; ++p ) {
      (*row_starts)[p] = fis_idof[p];
   }

   /* check that ordering and partitioning of rows is as expected
      (much is missing, and even then not perfect)... */
   if ( my_id<num_procs-1 )
      hypre_MPI_Isend( lens, 2, HYPRE_MPI_INT, my_id+1, 0, comm, request );
   if ( my_id>0 )
      hypre_MPI_Recv( lastlens, 2, HYPRE_MPI_INT, my_id-1, 0, comm, status );
   if ( my_id<num_procs-1 )
	hypre_MPI_Waitall( 1, request, status);
   if ( my_id>0 )
      hypre_assert( (*row_starts)[my_id] == (*row_starts)[my_id-1] + lastlens[0] );
   hypre_TFree( request );
   hypre_TFree( status );
      
   /* Get the upper bound for all the rows */
   hypre_MPI_Bcast( lens, 2, HYPRE_MPI_INT, num_procs-1, comm );
   (*row_starts)[num_procs] = (*row_starts)[num_procs-1] + lens[0];

   /* Global number of columns */
/*   hypre_MPI_Allreduce( &num_rdofs, global_num_cols, 1, HYPRE_MPI_INT, hypre_MPI_SUM, comm );*/
   hypre_MPI_Allreduce( &min_rdof, &global_min_rdof, 1, HYPRE_MPI_INT, hypre_MPI_MIN, comm );
   hypre_MPI_Allreduce( &max_rdof, &global_max_rdof, 1, HYPRE_MPI_INT, hypre_MPI_MAX, comm );
   *global_num_cols = global_max_rdof - global_min_rdof;
}
예제 #4
0
HYPRE_Int main(HYPRE_Int argc, char *argv[])
{
    HYPRE_Int mype, npes;
    HYPRE_Int symmetric;
    HYPRE_Int num_runs;
    Matrix *A;
    ParaSails *ps;
    FILE *file;
    HYPRE_Int n, beg_row, end_row;
    HYPRE_Real time0, time1;
    HYPRE_Real setup_time, solve_time;
    HYPRE_Real max_setup_time, max_solve_time;
    HYPRE_Real cost;

    HYPRE_Real *x, *b;
    HYPRE_Int i, niter;
    HYPRE_Real thresh;
    HYPRE_Real threshg;
    HYPRE_Int nlevels;
    HYPRE_Real filter;
    HYPRE_Real loadbal;

    hypre_MPI_Init(&argc, &argv);
    hypre_MPI_Comm_rank(hypre_MPI_COMM_WORLD, &mype);
    hypre_MPI_Comm_size(hypre_MPI_COMM_WORLD, &npes);

    /* Read number of rows in matrix */
    symmetric = atoi(argv[1]);
    num_runs  = atoi(argv[2]);

    file = fopen(argv[3], "r");
    assert(file != NULL);
#ifdef EMSOLVE
    hypre_fscanf(file, "%*d %d\n", &n);
#else
    hypre_fscanf(file, "%d\n", &n);
#endif
    fclose(file);
    assert(n >= npes);

    beg_row = (HYPRE_Int) ((HYPRE_Real)(mype*n) / npes) + 1; /* assumes 1-based */
    end_row = (HYPRE_Int) ((HYPRE_Real)((mype+1)* n) / npes);

    if (mype == 0)
        assert(beg_row == 1);
    if (mype == npes-1)
        assert(end_row == n);

#ifdef EMSOLVE
    beg_row--;
    end_row--;
#endif

    x = (HYPRE_Real *) malloc((end_row-beg_row+1) * sizeof(HYPRE_Real));
    b = (HYPRE_Real *) malloc((end_row-beg_row+1) * sizeof(HYPRE_Real));

    A = MatrixCreate(hypre_MPI_COMM_WORLD, beg_row, end_row);

    MatrixRead(A, argv[3]);
    if (mype == 0) 
        hypre_printf("%s\n", argv[3]);

    /* MatrixPrint(A, "A"); */

    /* Right-hand side */
    if (argc > 4)
    {
        RhsRead(b, A, argv[4]);
        if (mype == 0) 
            hypre_printf("Using rhs from %s\n", argv[4]);
    }
    else
    {
        for (i=0; i<end_row-beg_row+1; i++)
            b[i] = (HYPRE_Real) (2*rand()) / (HYPRE_Real) RAND_MAX - 1.0;
    }

    while (num_runs && num_runs >= -1)
    {
        /* Initial guess */
        for (i=0; i<end_row-beg_row+1; i++)
            x[i] = 0.0;

	if (num_runs == -1)
	{
            thresh = 0.0;
	    nlevels = 0;
	    filter = 0.0;
            loadbal = 0.0;
	}
	else
	{
            if (mype == 0)
            {
#if PARASAILS_EXT_PATTERN
                hypre_printf("Enter parameters threshg, thresh, nlevels, "
	            "filter, beta:\n");
	        fflush(stdout);
                hypre_scanf("%lf %lf %d %lf %lf", &threshg, &thresh, &nlevels, 
		    &filter, &loadbal);
#else
                hypre_printf("Enter parameters thresh, nlevels, "
	            "filter, beta:\n");
	        fflush(stdout);
                hypre_scanf("%lf %d %lf %lf", &thresh, &nlevels, 
		    &filter, &loadbal);
#endif
	    }

	    hypre_MPI_Bcast(&threshg, 1, hypre_MPI_DOUBLE, 0, hypre_MPI_COMM_WORLD);
	    hypre_MPI_Bcast(&thresh,  1, hypre_MPI_DOUBLE, 0, hypre_MPI_COMM_WORLD);
	    hypre_MPI_Bcast(&nlevels, 1, HYPRE_MPI_INT,    0, hypre_MPI_COMM_WORLD);
	    hypre_MPI_Bcast(&filter,  1, hypre_MPI_DOUBLE, 0, hypre_MPI_COMM_WORLD);
	    hypre_MPI_Bcast(&loadbal, 1, hypre_MPI_DOUBLE, 0, hypre_MPI_COMM_WORLD);

            if (nlevels < 0)
                break;
	}

        /**************
	 * Setup phase   
	 **************/

        hypre_MPI_Barrier(hypre_MPI_COMM_WORLD);
        time0 = hypre_MPI_Wtime();

        ps = ParaSailsCreate(hypre_MPI_COMM_WORLD, beg_row, end_row, symmetric);

        ps->loadbal_beta = loadbal;

#if PARASAILS_EXT_PATTERN
        ParaSailsSetupPatternExt(ps, A, threshg, thresh, nlevels);
#else
        ParaSailsSetupPattern(ps, A, thresh, nlevels);
#endif

        time1 = hypre_MPI_Wtime();
	setup_time = time1-time0;

        cost = ParaSailsStatsPattern(ps, A);
	if (cost > 5.e11)
	{
            hypre_printf("Aborting setup and solve due to high cost.\n");
	    goto cleanup;
	}

        hypre_MPI_Barrier(hypre_MPI_COMM_WORLD);
        time0 = hypre_MPI_Wtime();

        err = ParaSailsSetupValues(ps, A, filter);
        if (err != 0)
	{
            hypre_printf("ParaSailsSetupValues returned error.\n");
	    goto cleanup;
	}

        time1 = hypre_MPI_Wtime();
	setup_time += (time1-time0);

        ParaSailsStatsValues(ps, A);

	if (!strncmp(argv[3], "testpsmat", 8))
            MatrixPrint(ps->M, "M");

#if 0
        if (mype == 0) 
            hypre_printf("SETTING UP VALUES AGAIN WITH FILTERED PATTERN\n");
        ps->loadbal_beta = 0;
        ParaSailsSetupValues(ps, A, 0.0);
#endif

        /*****************
	 * Solution phase
	 *****************/

	niter = 3000;
        if (MatrixNnz(ps->M) == n) /* if diagonal preconditioner */
	    niter = 5000;

        hypre_MPI_Barrier(hypre_MPI_COMM_WORLD);
        time0 = hypre_MPI_Wtime();

        if (symmetric == 1)
            PCG_ParaSails(A, ps, b, x, 1.e-8, niter);
	else
            FGMRES_ParaSails(A, ps, b, x, 50, 1.e-8, niter);

        time1 = hypre_MPI_Wtime();
	solve_time = time1-time0;

        hypre_MPI_Reduce(&setup_time, &max_setup_time, 1, hypre_MPI_DOUBLE, hypre_MPI_MAX, 0, 
	    hypre_MPI_COMM_WORLD);
        hypre_MPI_Reduce(&solve_time, &max_solve_time, 1, hypre_MPI_DOUBLE, hypre_MPI_MAX, 0, 
	    hypre_MPI_COMM_WORLD);

	if (mype == 0)
	{
            hypre_printf("**********************************************\n");
            hypre_printf("***    Setup    Solve    Total\n");
            hypre_printf("III %8.1f %8.1f %8.1f\n", max_setup_time, max_solve_time, 
		max_setup_time+max_solve_time);
            hypre_printf("**********************************************\n");
	}

cleanup:
        ParaSailsDestroy(ps);

        num_runs--;
    }

    free(x);
    free(b);

    MatrixDestroy(A);
    hypre_MPI_Finalize();

    return 0;
}
예제 #5
0
HYPRE_Int
HYPRE_IJMatrixCreate( MPI_Comm        comm,
                      HYPRE_Int       ilower,
                      HYPRE_Int       iupper,
                      HYPRE_Int       jlower,
                      HYPRE_Int       jupper,
                      HYPRE_IJMatrix *matrix )
{
   HYPRE_Int *row_partitioning;
   HYPRE_Int *col_partitioning;
   HYPRE_Int *info;
   HYPRE_Int num_procs;
   HYPRE_Int myid;

   hypre_IJMatrix *ijmatrix;

#ifdef HYPRE_NO_GLOBAL_PARTITION
   HYPRE_Int  row0, col0, rowN, colN;
#else
 HYPRE_Int *recv_buf;
   HYPRE_Int i, i4;
   HYPRE_Int square;
#endif

   ijmatrix = hypre_CTAlloc(hypre_IJMatrix, 1);

   hypre_IJMatrixComm(ijmatrix)         = comm;
   hypre_IJMatrixObject(ijmatrix)       = NULL;
   hypre_IJMatrixTranslator(ijmatrix)   = NULL;
   hypre_IJMatrixObjectType(ijmatrix)   = HYPRE_UNITIALIZED;
   hypre_IJMatrixAssembleFlag(ijmatrix) = 0;
   hypre_IJMatrixPrintLevel(ijmatrix) = 0;

   hypre_MPI_Comm_size(comm,&num_procs);
   hypre_MPI_Comm_rank(comm, &myid);
   

   if (ilower > iupper+1 || ilower < 0)
   {
      hypre_error_in_arg(2);
      hypre_TFree(ijmatrix);
      return hypre_error_flag;
   }

   if (iupper < -1)
   {
      hypre_error_in_arg(3);
      hypre_TFree(ijmatrix);
      return hypre_error_flag;
   }

   if (jlower > jupper+1 || jlower < 0)
   {
      hypre_error_in_arg(4);
      hypre_TFree(ijmatrix);
      return hypre_error_flag;
   }

   if (jupper < -1)
   {
      hypre_error_in_arg(5);
      hypre_TFree(ijmatrix);
      return hypre_error_flag;
   }

#ifdef HYPRE_NO_GLOBAL_PARTITION

   info = hypre_CTAlloc(HYPRE_Int,2);

   row_partitioning = hypre_CTAlloc(HYPRE_Int, 2);
   col_partitioning = hypre_CTAlloc(HYPRE_Int, 2);

   row_partitioning[0] = ilower;
   row_partitioning[1] = iupper+1;
   col_partitioning[0] = jlower;
   col_partitioning[1] = jupper+1;

   /* now we need the global number of rows and columns as well
      as the global first row and column index */

   /* proc 0 has the first row and col */
   if (myid==0) 
   {
      info[0] = ilower;
      info[1] = jlower;
   }
   hypre_MPI_Bcast(info, 2, HYPRE_MPI_INT, 0, comm);
   row0 = info[0];
   col0 = info[1];
   
   /* proc (num_procs-1) has the last row and col */   
   if (myid == (num_procs-1))
   {
      info[0] = iupper;
      info[1] = jupper;
   }
   hypre_MPI_Bcast(info, 2, HYPRE_MPI_INT, num_procs-1, comm);

   rowN = info[0];
   colN = info[1];

   hypre_IJMatrixGlobalFirstRow(ijmatrix) = row0;
   hypre_IJMatrixGlobalFirstCol(ijmatrix) = col0;
   hypre_IJMatrixGlobalNumRows(ijmatrix) = rowN - row0 + 1;
   hypre_IJMatrixGlobalNumCols(ijmatrix) = colN - col0 + 1;
   
   hypre_TFree(info);
   

#else

   info = hypre_CTAlloc(HYPRE_Int,4);
   recv_buf = hypre_CTAlloc(HYPRE_Int,4*num_procs);
   row_partitioning = hypre_CTAlloc(HYPRE_Int, num_procs+1);

   info[0] = ilower;
   info[1] = iupper;
   info[2] = jlower;
   info[3] = jupper;
  
   /* Generate row- and column-partitioning through information exchange
      across all processors, check whether the matrix is square, and
      if the partitionings match. i.e. no overlaps or gaps,
      if there are overlaps or gaps in the row partitioning or column
      partitioning , ierr will be set to -9 or -10, respectively */

   hypre_MPI_Allgather(info,4,HYPRE_MPI_INT,recv_buf,4,HYPRE_MPI_INT,comm);

   row_partitioning[0] = recv_buf[0];
   square = 1;
   for (i=0; i < num_procs-1; i++)
   {
      i4 = 4*i;
      if ( recv_buf[i4+1] != (recv_buf[i4+4]-1) )
      {
         hypre_error(HYPRE_ERROR_GENERIC);
         hypre_TFree(ijmatrix);
         hypre_TFree(info);
         hypre_TFree(recv_buf);
         hypre_TFree(row_partitioning);
   	 return hypre_error_flag;
      }
      else
	 row_partitioning[i+1] = recv_buf[i4+4];
	 
      if ((square && (recv_buf[i4]   != recv_buf[i4+2])) ||
                    (recv_buf[i4+1] != recv_buf[i4+3])  )
      {
         square = 0;
      }
   }	
   i4 = (num_procs-1)*4;
   row_partitioning[num_procs] = recv_buf[i4+1]+1;

   if ((recv_buf[i4] != recv_buf[i4+2]) || (recv_buf[i4+1] != recv_buf[i4+3])) 
      square = 0;

   if (square)
      col_partitioning = row_partitioning;
   else
   {   
      col_partitioning = hypre_CTAlloc(HYPRE_Int,num_procs+1);
      col_partitioning[0] = recv_buf[2];
      for (i=0; i < num_procs-1; i++)
      {
         i4 = 4*i;
         if (recv_buf[i4+3] != recv_buf[i4+6]-1)
         {
           hypre_error(HYPRE_ERROR_GENERIC);
           hypre_TFree(ijmatrix);
           hypre_TFree(info);
           hypre_TFree(recv_buf);
           hypre_TFree(row_partitioning);
           hypre_TFree(col_partitioning);
   	   return hypre_error_flag;
         }
         else
   	   col_partitioning[i+1] = recv_buf[i4+6];
      }
      col_partitioning[num_procs] = recv_buf[num_procs*4-1]+1;
   }

   hypre_IJMatrixGlobalFirstRow(ijmatrix) = row_partitioning[0];
   hypre_IJMatrixGlobalFirstCol(ijmatrix) = col_partitioning[0];
   hypre_IJMatrixGlobalNumRows(ijmatrix) = row_partitioning[num_procs] - 
      row_partitioning[0];
   hypre_IJMatrixGlobalNumCols(ijmatrix) = col_partitioning[num_procs] - 
      col_partitioning[0];
   
   hypre_TFree(info);
   hypre_TFree(recv_buf);
   
#endif

   hypre_IJMatrixRowPartitioning(ijmatrix) = row_partitioning;
   hypre_IJMatrixColPartitioning(ijmatrix) = col_partitioning;

   *matrix = (HYPRE_IJMatrix) ijmatrix;
  
   return hypre_error_flag;
}