Beispiel #1
0
HYPRE_ParCSRMatrix
GenerateRotate7pt( MPI_Comm comm,
                   HYPRE_Int      nx,
                   HYPRE_Int      ny,
                   HYPRE_Int      P,
                   HYPRE_Int      Q,
                   HYPRE_Int      p,
                   HYPRE_Int      q,
                   HYPRE_Real   alpha,
                   HYPRE_Real   eps )
{
    hypre_ParCSRMatrix *A;
    hypre_CSRMatrix *diag;
    hypre_CSRMatrix *offd;

    HYPRE_Int    *diag_i;
    HYPRE_Int    *diag_j;
    HYPRE_Real *diag_data;

    HYPRE_Int    *offd_i;
    HYPRE_Int    *offd_j;
    HYPRE_Real *offd_data;

    HYPRE_Real *value;
    HYPRE_Real ac, bc, cc, s, c, pi, x;
    HYPRE_Int *global_part;
    HYPRE_Int ix, iy;
    HYPRE_Int cnt, o_cnt;
    HYPRE_Int local_num_rows;
    HYPRE_Int *col_map_offd;
    HYPRE_Int *work;
    HYPRE_Int row_index;
    HYPRE_Int i,j;

    HYPRE_Int nx_local, ny_local;
    HYPRE_Int nx_size, ny_size;
    HYPRE_Int num_cols_offd;
    HYPRE_Int grid_size;

    HYPRE_Int *nx_part;
    HYPRE_Int *ny_part;

    HYPRE_Int num_procs, my_id;
    HYPRE_Int P_busy, Q_busy;

    hypre_MPI_Comm_size(comm,&num_procs);
    hypre_MPI_Comm_rank(comm,&my_id);

    grid_size = nx*ny;

    value = hypre_CTAlloc(HYPRE_Real,4);
    pi = 4.0*atan(1.0);
    x = pi*alpha/180.0;
    s = sin(x);
    c = cos(x);
    ac = -(c*c + eps*s*s);
    bc = 2.0*(1.0 - eps)*s*c;
    cc = -(s*s + eps*c*c);
    value[0] = -2*(2*ac+bc+2*cc);
    value[1] = 2*ac+bc;
    value[2] = bc+2*cc;
    value[3] = -bc;

    hypre_GeneratePartitioning(nx,P,&nx_part);
    hypre_GeneratePartitioning(ny,Q,&ny_part);

    global_part = hypre_CTAlloc(HYPRE_Int,P*Q+1);

    global_part[0] = 0;
    cnt = 1;
    for (iy = 0; iy < Q; iy++)
    {
        ny_size = ny_part[iy+1]-ny_part[iy];
        for (ix = 0; ix < P; ix++)
        {
            nx_size = nx_part[ix+1] - nx_part[ix];
            global_part[cnt] = global_part[cnt-1];
            global_part[cnt++] += nx_size*ny_size;
        }
    }

    nx_local = nx_part[p+1] - nx_part[p];
    ny_local = ny_part[q+1] - ny_part[q];

    my_id = q*P + p;
    num_procs = P*Q;

    local_num_rows = nx_local*ny_local;
    diag_i = hypre_CTAlloc(HYPRE_Int, local_num_rows+1);
    offd_i = hypre_CTAlloc(HYPRE_Int, local_num_rows+1);

    P_busy = hypre_min(nx,P);
    Q_busy = hypre_min(ny,Q);

    num_cols_offd = 0;
    if (p) num_cols_offd += ny_local;
    if (p < P_busy-1) num_cols_offd += ny_local;
    if (q) num_cols_offd += nx_local;
    if (q < Q_busy-1) num_cols_offd += nx_local;
    if (p && q) num_cols_offd++;
    if (p && q < Q_busy-1 ) num_cols_offd++;
    if (p < P_busy-1 && q ) num_cols_offd++;
    if (p < P_busy-1 && q < Q_busy-1 ) num_cols_offd++;

    if (!local_num_rows) num_cols_offd = 0;

    col_map_offd = hypre_CTAlloc(HYPRE_Int, num_cols_offd);

    cnt = 0;
    o_cnt = 0;
    diag_i[0] = 0;
    offd_i[0] = 0;
    for (iy = ny_part[q];  iy < ny_part[q+1]; iy++)
    {
        for (ix = nx_part[p]; ix < nx_part[p+1]; ix++)
        {
            cnt++;
            o_cnt++;
            diag_i[cnt] = diag_i[cnt-1];
            offd_i[o_cnt] = offd_i[o_cnt-1];
            diag_i[cnt]++;
            if (iy > ny_part[q])
            {
                diag_i[cnt]++;
                if (ix > nx_part[p])
                {
                    diag_i[cnt]++;
                }
                else
                {
                    if (ix)
                        offd_i[o_cnt]++;
                }
            }
            else
            {
                if (iy)
                {
                    offd_i[o_cnt]++;
                    if (ix > nx_part[p])
                    {
                        offd_i[o_cnt]++;
                    }
                    else if (ix)
                    {
                        offd_i[o_cnt]++;
                    }
                }
            }
            if (ix > nx_part[p])
                diag_i[cnt]++;
            else
            {
                if (ix)
                {
                    offd_i[o_cnt]++;
                }
            }
            if (ix+1 < nx_part[p+1])
                diag_i[cnt]++;
            else
            {
                if (ix+1 < nx)
                {
                    offd_i[o_cnt]++;
                }
            }
            if (iy+1 < ny_part[q+1])
            {
                diag_i[cnt]++;
                if (ix < nx_part[p+1]-1)
                {
                    diag_i[cnt]++;
                }
                else
                {
                    if (ix+1 < nx)
                        offd_i[o_cnt]++;
                }
            }
            else
            {
                if (iy+1 < ny)
                {
                    offd_i[o_cnt]++;
                    if (ix < nx_part[p+1]-1)
                    {
                        offd_i[o_cnt]++;
                    }
                    else if (ix < nx-1)
                    {
                        offd_i[o_cnt]++;
                    }
                }
            }
        }
    }

    diag_j = hypre_CTAlloc(HYPRE_Int, diag_i[local_num_rows]);
    diag_data = hypre_CTAlloc(HYPRE_Real, diag_i[local_num_rows]);

    if (num_procs > 1)
    {
        offd_j = hypre_CTAlloc(HYPRE_Int, offd_i[local_num_rows]);
        offd_data = hypre_CTAlloc(HYPRE_Real, offd_i[local_num_rows]);
    }

    row_index = 0;
    cnt = 0;
    o_cnt = 0;
    for (iy = ny_part[q];  iy < ny_part[q+1]; iy++)
    {
        for (ix = nx_part[p]; ix < nx_part[p+1]; ix++)
        {
            diag_j[cnt] = row_index;
            diag_data[cnt++] = value[0];
            if (iy > ny_part[q])
            {
                if (ix > nx_part[p])
                {
                    diag_j[cnt] = row_index-nx_local-1 ;
                    diag_data[cnt++] = value[3];
                }
                else
                {
                    if (ix)
                    {
                        offd_j[o_cnt] = hypre_map2(ix-1,iy-1,p-1,q,P,Q,
                                                   nx_part,ny_part,global_part);
                        offd_data[o_cnt++] = value[3];
                    }
                }
                diag_j[cnt] = row_index-nx_local;
                diag_data[cnt++] = value[2];
            }
            else
            {
                if (iy)
                {
                    if (ix > nx_part[p])
                    {
                        offd_j[o_cnt] = hypre_map2(ix-1,iy-1,p,q-1,P,Q,
                                                   nx_part,ny_part,global_part);
                        offd_data[o_cnt++] = value[3];
                    }
                    else if (ix)
                    {
                        offd_j[o_cnt] = hypre_map2(ix-1,iy-1,p-1,q-1,P,Q,
                                                   nx_part,ny_part,global_part);
                        offd_data[o_cnt++] = value[3];
                    }
                    offd_j[o_cnt] = hypre_map2(ix,iy-1,p,q-1,P,Q,
                                               nx_part,ny_part,global_part);
                    offd_data[o_cnt++] = value[2];
                }
            }
            if (ix > nx_part[p])
            {
                diag_j[cnt] = row_index-1;
                diag_data[cnt++] = value[1];
            }
            else
            {
                if (ix)
                {
                    offd_j[o_cnt] = hypre_map2(ix-1,iy,p-1,q,P,Q,
                                               nx_part,ny_part,global_part);
                    offd_data[o_cnt++] = value[1];
                }
            }
            if (ix+1 < nx_part[p+1])
            {
                diag_j[cnt] = row_index+1;
                diag_data[cnt++] = value[1];
            }
            else
            {
                if (ix+1 < nx)
                {
                    offd_j[o_cnt] = hypre_map2(ix+1,iy,p+1,q,P,Q,
                                               nx_part,ny_part,global_part);
                    offd_data[o_cnt++] = value[1];
                }
            }
            if (iy+1 < ny_part[q+1])
            {
                diag_j[cnt] = row_index+nx_local;
                diag_data[cnt++] = value[2];
                if (ix < nx_part[p+1]-1)
                {
                    diag_j[cnt] = row_index+nx_local+1 ;
                    diag_data[cnt++] = value[3];
                }
                else
                {
                    if (ix+1 < nx)
                    {
                        offd_j[o_cnt] = hypre_map2(ix+1,iy+1,p+1,q,P,Q,
                                                   nx_part,ny_part,global_part);
                        offd_data[o_cnt++] = value[3];
                    }
                }
            }
            else
            {
                if (iy+1 < ny)
                {
                    offd_j[o_cnt] = hypre_map2(ix,iy+1,p,q+1,P,Q,
                                               nx_part,ny_part,global_part);
                    offd_data[o_cnt++] = value[2];
                    if (ix < nx_part[p+1]-1)
                    {
                        offd_j[o_cnt] = hypre_map2(ix+1,iy+1,p,q+1,P,Q,
                                                   nx_part,ny_part,global_part);
                        offd_data[o_cnt++] = value[3];
                    }
                    else if (ix < nx-1)
                    {
                        offd_j[o_cnt] = hypre_map2(ix+1,iy+1,p+1,q+1,P,Q,
                                                   nx_part,ny_part,global_part);
                        offd_data[o_cnt++] = value[3];
                    }
                }
            }
            row_index++;
        }
    }

    if (num_procs > 1)
    {
        work = hypre_CTAlloc(HYPRE_Int,o_cnt);

        for (i=0; i < o_cnt; i++)
            work[i] = offd_j[i];

        qsort0(work, 0, o_cnt-1);

        col_map_offd[0] = work[0];
        cnt = 0;
        for (i=0; i < o_cnt; i++)
        {
            if (work[i] > col_map_offd[cnt])
            {
                cnt++;
                col_map_offd[cnt] = work[i];
            }
        }

        for (i=0; i < o_cnt; i++)
        {
            for (j=0; j < num_cols_offd; j++)
            {
                if (offd_j[i] == col_map_offd[j])
                {
                    offd_j[i] = j;
                    break;
                }
            }
        }

        hypre_TFree(work);
    }

    A = hypre_ParCSRMatrixCreate(comm, grid_size, grid_size,
                                 global_part, global_part, num_cols_offd,
                                 diag_i[local_num_rows],
                                 offd_i[local_num_rows]);

    hypre_ParCSRMatrixColMapOffd(A) = col_map_offd;

    diag = hypre_ParCSRMatrixDiag(A);
    hypre_CSRMatrixI(diag) = diag_i;
    hypre_CSRMatrixJ(diag) = diag_j;
    hypre_CSRMatrixData(diag) = diag_data;

    offd = hypre_ParCSRMatrixOffd(A);
    hypre_CSRMatrixI(offd) = offd_i;
    if (num_cols_offd)
    {
        hypre_CSRMatrixJ(offd) = offd_j;
        hypre_CSRMatrixData(offd) = offd_data;
    }

    hypre_TFree(nx_part);
    hypre_TFree(ny_part);
    hypre_TFree(value);

    return (HYPRE_ParCSRMatrix) A;
}
Beispiel #2
0
int HYPRE_LSI_SchwarzDecompose(HYPRE_LSI_Schwarz *sch_ptr,MH_Matrix *Amat,
           int total_recv_leng, int *recv_lengths, int *ext_ja, 
           double *ext_aa, int *map, int *map2, int Noffset)
{
   int               i, j, k, nnz, *mat_ia, *mat_ja;
   int               **bmat_ia, **bmat_ja;
   int               mypid, *blk_size, index, **blk_indices, **aux_bmat_ia;
   int               ncnt, rownum, offset, Nrows, extNrows, **aux_bmat_ja;
   int               *tmp_blk_leng, *cols, rowleng;
   int               nblocks, col_ind, init_size, aux_nnz, max_blk_size;
   int               *tmp_indices, cur_off_row, length;
   double            *mat_aa, *vals, **aux_bmat_aa, **bmat_aa;

   /* --------------------------------------------------------- */
   /* fetch Schwarz parameters                                  */
   /* --------------------------------------------------------- */

   MPI_Comm_rank(sch_ptr->comm, &mypid);
   Nrows          = sch_ptr->Nrows;
   extNrows       = Nrows + total_recv_leng;
   sch_ptr->Nrows = Nrows;
   sch_ptr->extNrows = extNrows;

   /* --------------------------------------------------------- */
   /* adjust the off-processor row data                         */
   /* --------------------------------------------------------- */

   offset = 0;
   for ( i = 0; i < total_recv_leng; i++ )
   {
      for ( j = offset; j < offset+recv_lengths[i]; j++ )
      {
         index = ext_ja[j];
         if ( index >= Noffset && index < Noffset+Nrows )
            ext_ja[j] = index - Noffset;
         else
         {
            col_ind = HYPRE_LSI_Search(map, index, extNrows-Nrows);
            if ( col_ind >= 0 ) ext_ja[j] = map2[col_ind] + Nrows;
            else                ext_ja[j] = -1;
         }
      }
      offset += recv_lengths[i];
   }

   /* --------------------------------------------------------- */
   /* compose the initial blk_size information                  */
   /* and extend the each block for the overlap                 */
   /* (at the end blk_indices and bli_size contains the info)   */
   /* --------------------------------------------------------- */

   if ( sch_ptr->nblocks == 1 )
   {
      nblocks = 1;
      max_blk_size = extNrows;
      sch_ptr->blk_sizes   = (int *)  malloc(nblocks * sizeof(int));
      blk_size = sch_ptr->blk_sizes;
      blk_size[0] = extNrows;
   }
   else
   {
      if ( sch_ptr->nblocks != 0 )
      {
         nblocks  = sch_ptr->nblocks;
         sch_ptr->block_size = (Nrows + nblocks / 2) / nblocks;
      }
      else
      {
         nblocks  = (Nrows - sch_ptr->block_size / 2) / sch_ptr->block_size + 1;
         sch_ptr->nblocks = nblocks;
      }
      sch_ptr->blk_indices = (int **) malloc(nblocks * sizeof(int*));
      sch_ptr->blk_sizes   = (int *)  malloc(nblocks * sizeof(int));
      blk_indices  = sch_ptr->blk_indices;
      blk_size     = sch_ptr->blk_sizes;
      tmp_blk_leng = (int *) malloc(nblocks * sizeof(int) );
      for ( i = 0; i < nblocks-1; i++ ) blk_size[i] = sch_ptr->block_size;
      blk_size[nblocks-1] = Nrows - sch_ptr->block_size * (nblocks - 1 );
      for ( i = 0; i < nblocks; i++ )
      {
         tmp_blk_leng[i] = 5 * blk_size[i] + 5;
         blk_indices[i] = (int *) malloc(tmp_blk_leng[i] * sizeof(int));
         for (j = 0; j < blk_size[i]; j++) 
            blk_indices[i][j] = sch_ptr->block_size * i + j; 
      }
      max_blk_size = 0;
      for ( i = 0; i < nblocks; i++ )
      {
         init_size = blk_size[i];
         for ( j = 0; j < init_size; j++ )
         {
            rownum = blk_indices[i][j];
            cols = &(Amat->colnum[Amat->rowptr[rownum]]);
            vals = &(Amat->values[Amat->rowptr[rownum]]);
            rowleng = Amat->rowptr[rownum+1] - Amat->rowptr[rownum];
            if ( blk_size[i] + rowleng > tmp_blk_leng[i] )
            {
               tmp_indices = blk_indices[i];
               tmp_blk_leng[i] = 2 * ( blk_size[i] + rowleng ) + 2;
               blk_indices[i] = (int *) malloc(tmp_blk_leng[i] * sizeof(int));
               for (k = 0; k < blk_size[i]; k++) 
                  blk_indices[i][k] = tmp_indices[k];
               free( tmp_indices );
            }
            for ( k = 0; k < rowleng; k++ )
            {
               col_ind = cols[k];
               blk_indices[i][blk_size[i]++] = col_ind;
            }
         }
         qsort0(blk_indices[i], 0, blk_size[i]-1);
         ncnt = 0;
         for ( j = 1; j < blk_size[i]; j++ )
            if ( blk_indices[i][j] != blk_indices[i][ncnt] )
              blk_indices[i][++ncnt] = blk_indices[i][j];
         blk_size[i] = ncnt + 1;
         if ( blk_size[i] > max_blk_size ) max_blk_size = blk_size[i];
      }
      free(tmp_blk_leng);
   } 

   /* --------------------------------------------------------- */
   /* compute the memory requirements for each block            */
   /* --------------------------------------------------------- */

   sch_ptr->bmat_ia = (int **)    malloc(nblocks * sizeof(int*) );
   sch_ptr->bmat_ja = (int **)    malloc(nblocks * sizeof(int*) );
   sch_ptr->bmat_aa = (double **) malloc(nblocks * sizeof(double*) );
   bmat_ia = sch_ptr->bmat_ia;
   bmat_ja = sch_ptr->bmat_ja;
   bmat_aa = sch_ptr->bmat_aa;
   if ( nblocks != 1 )
   {
      sch_ptr->aux_bmat_ia = (int **)    malloc(nblocks * sizeof(int*) );
      sch_ptr->aux_bmat_ja = (int **)    malloc(nblocks * sizeof(int*) );
      sch_ptr->aux_bmat_aa = (double **) malloc(nblocks * sizeof(double*) );
      aux_bmat_ia = sch_ptr->aux_bmat_ia;
      aux_bmat_ja = sch_ptr->aux_bmat_ja;
      aux_bmat_aa = sch_ptr->aux_bmat_aa;
   }
   else
   {
      aux_bmat_ia = NULL;
      aux_bmat_ja = NULL;
      aux_bmat_aa = NULL;
   }

   /* --------------------------------------------------------- */
   /* compose each block into sch_ptr                           */
   /* --------------------------------------------------------- */

   cols = (int *)    malloc( max_blk_size * sizeof(int) );
   vals = (double *) malloc( max_blk_size * sizeof(double) );

   for ( i = 0; i < nblocks; i++ )
   {
      nnz = aux_nnz = offset = cur_off_row = 0;
      if ( nblocks > 1 ) length = blk_size[i];
      else               length = extNrows;
      for ( j = 0; j < length; j++ )
      {
         if ( nblocks > 1 ) rownum = blk_indices[i][j];
         else               rownum = j;
         if ( rownum < Nrows )
         {
            rowleng = 0;
            for ( k = Amat->rowptr[rownum]; k < Amat->rowptr[rownum+1]; k++ )
               cols[rowleng++] = Amat->colnum[k];
         }
         else
         {
            for ( k = cur_off_row; k < rownum-Nrows; k++ )
               offset += recv_lengths[k];
            cur_off_row = rownum - Nrows;
            rowleng = 0;
            for ( k = offset; k < offset+recv_lengths[cur_off_row]; k++ )
               if ( ext_ja[k] != -1 ) cols[rowleng++] = ext_ja[k];
         }
         for ( k = 0; k < rowleng; k++ )
         {
            if ( nblocks > 1 ) 
               index = HYPRE_LSI_Search( blk_indices[i], cols[k], blk_size[i]);
            else
               index = cols[k];
            if ( index >= 0 ) nnz++;
            else              aux_nnz++;
         }
      }
      bmat_ia[i] = (int *)    malloc( (length + 1) * sizeof(int));
      bmat_ja[i] = (int *)    malloc( nnz * sizeof(int));
      bmat_aa[i] = (double *) malloc( nnz * sizeof(double));
      mat_ia = bmat_ia[i];
      mat_ja = bmat_ja[i];
      mat_aa = bmat_aa[i];
      if ( nblocks > 1 ) 
      {
         aux_bmat_ia[i] = (int *)    malloc( (blk_size[i] + 1) * sizeof(int));
         aux_bmat_ja[i] = (int *)    malloc( aux_nnz * sizeof(int));
         aux_bmat_aa[i] = (double *) malloc( aux_nnz * sizeof(double));
      }

      /* ------------------------------------------------------ */
      /* load the submatrices                                   */
      /* ------------------------------------------------------ */

      nnz = aux_nnz = offset = cur_off_row = 0;
      mat_ia[0] = 0;
      if ( nblocks > 1 ) aux_bmat_ia[i][0] = 0;

      for ( j = 0; j < blk_size[i]; j++ )
      {
         if ( nblocks > 1 ) rownum = blk_indices[i][j];
         else               rownum = j;
         if ( rownum < Nrows )
         {
            rowleng = 0;
            for ( k = Amat->rowptr[rownum]; k < Amat->rowptr[rownum+1]; k++ )
            {
               vals[rowleng]   = Amat->values[k];
               cols[rowleng++] = Amat->colnum[k];
            }
         }
         else
         {
            for ( k = cur_off_row; k < rownum-Nrows; k++ )
            {
               offset += recv_lengths[k];
            }
            cur_off_row = rownum - Nrows;
            rowleng = 0;
            for ( k = offset; k < offset+recv_lengths[cur_off_row]; k++ )
            {
               if ( ext_ja[k] != -1 )
               {
                  cols[rowleng] = ext_ja[k];
                  vals[rowleng++] = ext_aa[k];
               }
            }
         }
         for ( k = 0; k < rowleng; k++ )
         {
            if ( nblocks > 1 ) 
               index = HYPRE_LSI_Search( blk_indices[i], cols[k], blk_size[i]);
            else index = cols[k];
            if ( index >= 0 )
            {
               mat_ja[nnz] = index;
               mat_aa[nnz++] = vals[k];
            }
            else
            {
               aux_bmat_ja[i][aux_nnz] = cols[k];
               aux_bmat_aa[i][aux_nnz++] = vals[k];
            }
         }
         mat_ia[j+1] = nnz;
         if ( nblocks > 1 ) aux_bmat_ia[i][j+1] = aux_nnz;
      }
      for ( j = 0; j < mat_ia[blk_size[i]]; j++ )
         if ( mat_ja[j] < 0 || mat_ja[j] >= length )
            printf("block %d has index %d\n", i, mat_ja[j]);
   }
   free( cols );
   free( vals );

   /* --------------------------------------------------------- */
   /* decompose each block                                      */
   /* --------------------------------------------------------- */

   HYPRE_LSI_ILUTDecompose( sch_ptr );

   return 0;
}
Beispiel #3
0
hypre_ParCSRMatrix * hypre_ParMatmul_FC(
   hypre_ParCSRMatrix * A, hypre_ParCSRMatrix * P, HYPRE_Int * CF_marker,
   HYPRE_Int * dof_func, HYPRE_Int * dof_func_offd )
/* hypre_parMatmul_FC creates and returns the "Fine"-designated rows of the
   matrix product A*P.  A's size is (nC+nF)*(nC+nF), P's size is (nC+nF)*nC
   where nC is the number of coarse rows/columns, nF the number of fine
   rows/columns.  The size of C=A*P is (nC+nF)*nC, even though not all rows
   of C are actually computed.  If we were to construct a matrix consisting
   only of the computed rows of C, its size would be nF*nC.
   "Fine" is defined solely by the marker array, and for example could be
   a proper subset of the fine points of a multigrid hierarchy.
*/
{
   /* To compute a submatrix of C containing only the computed data, i.e.
      only "Fine" rows, we would have to do a lot of computational work,
      with a lot of communication.  The communication is because such a
      matrix would need global information that depends on which rows are
      "Fine".
   */

   MPI_Comm 	   comm = hypre_ParCSRMatrixComm(A);

   hypre_CSRMatrix *A_diag = hypre_ParCSRMatrixDiag(A);
   
   double          *A_diag_data = hypre_CSRMatrixData(A_diag);
   HYPRE_Int             *A_diag_i = hypre_CSRMatrixI(A_diag);
   HYPRE_Int             *A_diag_j = hypre_CSRMatrixJ(A_diag);

   hypre_CSRMatrix *A_offd = hypre_ParCSRMatrixOffd(A);
   
   double          *A_offd_data = hypre_CSRMatrixData(A_offd);
   HYPRE_Int             *A_offd_i = hypre_CSRMatrixI(A_offd);
   HYPRE_Int             *A_offd_j = hypre_CSRMatrixJ(A_offd);

   HYPRE_Int *row_starts_A = hypre_ParCSRMatrixRowStarts(A);
   HYPRE_Int	num_rows_diag_A = hypre_CSRMatrixNumRows(A_diag);
   HYPRE_Int	num_cols_diag_A = hypre_CSRMatrixNumCols(A_diag);
   HYPRE_Int	num_cols_offd_A = hypre_CSRMatrixNumCols(A_offd);
   
   hypre_CSRMatrix *P_diag = hypre_ParCSRMatrixDiag(P);
   
   double          *P_diag_data = hypre_CSRMatrixData(P_diag);
   HYPRE_Int             *P_diag_i = hypre_CSRMatrixI(P_diag);
   HYPRE_Int             *P_diag_j = hypre_CSRMatrixJ(P_diag);

   hypre_CSRMatrix *P_offd = hypre_ParCSRMatrixOffd(P);
   HYPRE_Int		   *col_map_offd_P = hypre_ParCSRMatrixColMapOffd(P);
   
   double          *P_offd_data = hypre_CSRMatrixData(P_offd);
   HYPRE_Int             *P_offd_i = hypre_CSRMatrixI(P_offd);
   HYPRE_Int             *P_offd_j = hypre_CSRMatrixJ(P_offd);

   HYPRE_Int	first_col_diag_P = hypre_ParCSRMatrixFirstColDiag(P);
   HYPRE_Int	last_col_diag_P;
   HYPRE_Int *col_starts_P = hypre_ParCSRMatrixColStarts(P);
   HYPRE_Int	num_rows_diag_P = hypre_CSRMatrixNumRows(P_diag);
   HYPRE_Int	num_cols_diag_P = hypre_CSRMatrixNumCols(P_diag);
   HYPRE_Int	num_cols_offd_P = hypre_CSRMatrixNumCols(P_offd);

   hypre_ParCSRMatrix *C;
   HYPRE_Int		      *col_map_offd_C;
   HYPRE_Int		      *map_P_to_C;

   hypre_CSRMatrix *C_diag;

   double          *C_diag_data;
   HYPRE_Int             *C_diag_i;
   HYPRE_Int             *C_diag_j;

   hypre_CSRMatrix *C_offd;

   double          *C_offd_data=NULL;
   HYPRE_Int             *C_offd_i=NULL;
   HYPRE_Int             *C_offd_j=NULL;

   HYPRE_Int              C_diag_size;
   HYPRE_Int              C_offd_size;
   HYPRE_Int		    num_cols_offd_C = 0;
   
   hypre_CSRMatrix *Ps_ext;
   
   double          *Ps_ext_data;
   HYPRE_Int             *Ps_ext_i;
   HYPRE_Int             *Ps_ext_j;

   double          *P_ext_diag_data;
   HYPRE_Int             *P_ext_diag_i;
   HYPRE_Int             *P_ext_diag_j;
   HYPRE_Int              P_ext_diag_size;

   double          *P_ext_offd_data;
   HYPRE_Int             *P_ext_offd_i;
   HYPRE_Int             *P_ext_offd_j;
   HYPRE_Int              P_ext_offd_size;

   HYPRE_Int		   *P_marker;
   HYPRE_Int		   *temp;

   HYPRE_Int              i, j;
   HYPRE_Int              i1, i2, i3;
   HYPRE_Int              jj2, jj3;
   
   HYPRE_Int              jj_count_diag, jj_count_offd;
   HYPRE_Int              jj_row_begin_diag, jj_row_begin_offd;
   HYPRE_Int              start_indexing = 0; /* start indexing for C_data at 0 */
   HYPRE_Int		    n_rows_A_global, n_cols_A_global;
   HYPRE_Int		    n_rows_P_global, n_cols_P_global;
   HYPRE_Int              allsquare = 0;
   HYPRE_Int              cnt, cnt_offd, cnt_diag;
   HYPRE_Int 		    num_procs;
   HYPRE_Int 		    value;

   double           a_entry;
   double           a_b_product;
   
   n_rows_A_global = hypre_ParCSRMatrixGlobalNumRows(A);
   n_cols_A_global = hypre_ParCSRMatrixGlobalNumCols(A);
   n_rows_P_global = hypre_ParCSRMatrixGlobalNumRows(P);
   n_cols_P_global = hypre_ParCSRMatrixGlobalNumCols(P);

   if (n_cols_A_global != n_rows_P_global || num_cols_diag_A != num_rows_diag_P)
   {
	hypre_printf(" Error! Incompatible matrix dimensions!\n");
	return NULL;
   }
   /* if (num_rows_A==num_cols_P) allsquare = 1; */

   /*-----------------------------------------------------------------------
    *  Extract P_ext, i.e. portion of P that is stored on neighbor procs
    *  and needed locally for matrix matrix product 
    *-----------------------------------------------------------------------*/

   hypre_MPI_Comm_size(comm, &num_procs);

   if (num_procs > 1)
   {
       /*---------------------------------------------------------------------
    	* If there exists no CommPkg for A, a CommPkg is generated using
    	* equally load balanced partitionings within 
	* hypre_ParCSRMatrixExtractBExt
    	*--------------------------------------------------------------------*/
   	Ps_ext = hypre_ParCSRMatrixExtractBExt(P,A,1);
   	Ps_ext_data = hypre_CSRMatrixData(Ps_ext);
   	Ps_ext_i    = hypre_CSRMatrixI(Ps_ext);
   	Ps_ext_j    = hypre_CSRMatrixJ(Ps_ext);
   }
   P_ext_diag_i = hypre_CTAlloc(HYPRE_Int, num_cols_offd_A+1);
   P_ext_offd_i = hypre_CTAlloc(HYPRE_Int, num_cols_offd_A+1);
   P_ext_diag_size = 0;
   P_ext_offd_size = 0;
   last_col_diag_P = first_col_diag_P + num_cols_diag_P -1;

   for (i=0; i < num_cols_offd_A; i++)
   {
      for (j=Ps_ext_i[i]; j < Ps_ext_i[i+1]; j++)
         if (Ps_ext_j[j] < first_col_diag_P || Ps_ext_j[j] > last_col_diag_P)
            P_ext_offd_size++;
         else
            P_ext_diag_size++;
      P_ext_diag_i[i+1] = P_ext_diag_size;
      P_ext_offd_i[i+1] = P_ext_offd_size;
   }

   if (P_ext_diag_size)
   {
      P_ext_diag_j = hypre_CTAlloc(HYPRE_Int, P_ext_diag_size);
      P_ext_diag_data = hypre_CTAlloc(double, P_ext_diag_size);
   }
   if (P_ext_offd_size)
   {
      P_ext_offd_j = hypre_CTAlloc(HYPRE_Int, P_ext_offd_size);
      P_ext_offd_data = hypre_CTAlloc(double, P_ext_offd_size);
   }

   cnt_offd = 0;
   cnt_diag = 0;
   for (i=0; i < num_cols_offd_A; i++)
   {
      for (j=Ps_ext_i[i]; j < Ps_ext_i[i+1]; j++)
         if (Ps_ext_j[j] < first_col_diag_P || Ps_ext_j[j] > last_col_diag_P)
         {
            P_ext_offd_j[cnt_offd] = Ps_ext_j[j];
            P_ext_offd_data[cnt_offd++] = Ps_ext_data[j];
         }
         else
         {
            P_ext_diag_j[cnt_diag] = Ps_ext_j[j] - first_col_diag_P;
            P_ext_diag_data[cnt_diag++] = Ps_ext_data[j];
         }
   }

   if (num_procs > 1)
   {
      hypre_CSRMatrixDestroy(Ps_ext);
      Ps_ext = NULL;
   }

   cnt = 0;
   if (P_ext_offd_size || num_cols_offd_P)
   {
      temp = hypre_CTAlloc(HYPRE_Int, P_ext_offd_size+num_cols_offd_P);
      for (i=0; i < P_ext_offd_size; i++)
         temp[i] = P_ext_offd_j[i];
      cnt = P_ext_offd_size;
      for (i=0; i < num_cols_offd_P; i++)
         temp[cnt++] = col_map_offd_P[i];
   }
   if (cnt)
   {
      qsort0(temp, 0, cnt-1);

      num_cols_offd_C = 1;
      value = temp[0];
      for (i=1; i < cnt; i++)
      {
         if (temp[i] > value)
         {
            value = temp[i];
            temp[num_cols_offd_C++] = value;
         }
      }
   }

   if (num_cols_offd_C)
        col_map_offd_C = hypre_CTAlloc(HYPRE_Int,num_cols_offd_C);

   for (i=0; i < num_cols_offd_C; i++)
      col_map_offd_C[i] = temp[i];

   if (P_ext_offd_size || num_cols_offd_P)
      hypre_TFree(temp);

   for (i=0 ; i < P_ext_offd_size; i++)
      P_ext_offd_j[i] = hypre_BinarySearch(col_map_offd_C,
                                           P_ext_offd_j[i],
                                           num_cols_offd_C);
   if (num_cols_offd_P)
   {
      map_P_to_C = hypre_CTAlloc(HYPRE_Int,num_cols_offd_P);

      cnt = 0;
      for (i=0; i < num_cols_offd_C; i++)
         if (col_map_offd_C[i] == col_map_offd_P[cnt])
         {
            map_P_to_C[cnt++] = i;
            if (cnt == num_cols_offd_P) break;
         }
   }

   /*-----------------------------------------------------------------------
   *  Allocate marker array.
    *-----------------------------------------------------------------------*/

   P_marker = hypre_CTAlloc(HYPRE_Int, num_cols_diag_P+num_cols_offd_C);

   /*-----------------------------------------------------------------------
    *  Initialize some stuff.
    *-----------------------------------------------------------------------*/

   for (i1 = 0; i1 < num_cols_diag_P+num_cols_offd_C; i1++)
   {      
      P_marker[i1] = -1;
   }


/* no changes for the marked version above this point */
   /* This function call is the first pass: */
   hypre_ParMatmul_RowSizes_Marked(
      &C_diag_i, &C_offd_i, &P_marker,
      A_diag_i, A_diag_j, A_offd_i, A_offd_j,
      P_diag_i, P_diag_j, P_offd_i, P_offd_j,
      P_ext_diag_i, P_ext_diag_j, P_ext_offd_i, P_ext_offd_j,
      map_P_to_C,
      &C_diag_size, &C_offd_size,
      num_rows_diag_A, num_cols_offd_A, allsquare,
      num_cols_diag_P, num_cols_offd_P,
      num_cols_offd_C, CF_marker, dof_func, dof_func_offd
      );

   /* The above call of hypre_ParMatmul_RowSizes_Marked computed
      two scalars: C_diag_size, C_offd_size,
      and two arrays: C_diag_i, C_offd_i
      ( P_marker is also computed, but only used internally )
   */

   /*-----------------------------------------------------------------------
    *  Allocate C_diag_data and C_diag_j arrays.
    *  Allocate C_offd_data and C_offd_j arrays.
    *-----------------------------------------------------------------------*/
 
   last_col_diag_P = first_col_diag_P + num_cols_diag_P - 1;
   C_diag_data = hypre_CTAlloc(double, C_diag_size);
   C_diag_j    = hypre_CTAlloc(HYPRE_Int, C_diag_size);
   if (C_offd_size)
   { 
   	C_offd_data = hypre_CTAlloc(double, C_offd_size);
   	C_offd_j    = hypre_CTAlloc(HYPRE_Int, C_offd_size);
   } 


   /*-----------------------------------------------------------------------
    *  Second Pass: Fill in C_diag_data and C_diag_j.
    *  Second Pass: Fill in C_offd_data and C_offd_j.
    *-----------------------------------------------------------------------*/

   /*-----------------------------------------------------------------------
    *  Initialize some stuff.
    *-----------------------------------------------------------------------*/

   jj_count_diag = start_indexing;
   jj_count_offd = start_indexing;
   for (i1 = 0; i1 < num_cols_diag_P+num_cols_offd_C; i1++)
   {      
      P_marker[i1] = -1;
   }
   
   /*-----------------------------------------------------------------------
    *  Loop over interior c-points.
    *-----------------------------------------------------------------------*/
    
   for (i1 = 0; i1 < num_rows_diag_A; i1++)
   {

      if ( CF_marker[i1] < 0 )  /* i1 is a fine row */
         /* ... This and the coarse row code are the only parts between first pass
            and near the end where
            hypre_ParMatmul_FC is different from the regular hypre_ParMatmul */
      {

         /*--------------------------------------------------------------------
          *  Create diagonal entry, C_{i1,i1} 
          *--------------------------------------------------------------------*/

         jj_row_begin_diag = jj_count_diag;
         jj_row_begin_offd = jj_count_offd;

         /*-----------------------------------------------------------------
          *  Loop over entries in row i1 of A_offd.
          *-----------------------------------------------------------------*/
         
	 if (num_cols_offd_A)
	 {
            for (jj2 = A_offd_i[i1]; jj2 < A_offd_i[i1+1]; jj2++)
            {
               i2 = A_offd_j[jj2];
               if( dof_func==NULL || dof_func[i1] == dof_func_offd[i2] )
               {  /* interpolate only like "functions" */
                  a_entry = A_offd_data[jj2];
            
                  /*-----------------------------------------------------------
                   *  Loop over entries in row i2 of P_ext.
                   *-----------------------------------------------------------*/

                  for (jj3 = P_ext_offd_i[i2]; jj3 < P_ext_offd_i[i2+1]; jj3++)
                  {
                     i3 = num_cols_diag_P+P_ext_offd_j[jj3];
                     a_b_product = a_entry * P_ext_offd_data[jj3];
                  
                     /*--------------------------------------------------------
                      *  Check P_marker to see that C_{i1,i3} has not already
                      *  been accounted for. If it has not, create a new entry.
                      *  If it has, add new contribution.
                      *--------------------------------------------------------*/
                     if (P_marker[i3] < jj_row_begin_offd)
                     {
                        P_marker[i3] = jj_count_offd;
                        C_offd_data[jj_count_offd] = a_b_product;
                        C_offd_j[jj_count_offd] = i3-num_cols_diag_P;
                        jj_count_offd++;
                     }
                     else
                        C_offd_data[P_marker[i3]] += a_b_product;
                  }
                  for (jj3 = P_ext_diag_i[i2]; jj3 < P_ext_diag_i[i2+1]; jj3++)
                  {
                     i3 = P_ext_diag_j[jj3];
                     a_b_product = a_entry * P_ext_diag_data[jj3];

                     if (P_marker[i3] < jj_row_begin_diag)
                     {
                        P_marker[i3] = jj_count_diag;
                        C_diag_data[jj_count_diag] = a_b_product;
                        C_diag_j[jj_count_diag] = i3;
                        jj_count_diag++;
                     }
                     else
                        C_diag_data[P_marker[i3]] += a_b_product;
                  }
               }
               else
               {  /* Interpolation mat should be 0 where i1 and i2 correspond to
                     different "functions".  As we haven't created an entry for
                     C(i1,i2), nothing needs to be done. */
               }

            }
         }

         /*-----------------------------------------------------------------
          *  Loop over entries in row i1 of A_diag.
          *-----------------------------------------------------------------*/

         for (jj2 = A_diag_i[i1]; jj2 < A_diag_i[i1+1]; jj2++)
         {
            i2 = A_diag_j[jj2];
            if( dof_func==NULL || dof_func[i1] == dof_func[i2] )
            {  /* interpolate only like "functions" */
               a_entry = A_diag_data[jj2];
            
               /*-----------------------------------------------------------
                *  Loop over entries in row i2 of P_diag.
                *-----------------------------------------------------------*/

               for (jj3 = P_diag_i[i2]; jj3 < P_diag_i[i2+1]; jj3++)
               {
                  i3 = P_diag_j[jj3];
                  a_b_product = a_entry * P_diag_data[jj3];
                  
                  /*--------------------------------------------------------
                   *  Check P_marker to see that C_{i1,i3} has not already
                   *  been accounted for. If it has not, create a new entry.
                   *  If it has, add new contribution.
                   *--------------------------------------------------------*/

                  if (P_marker[i3] < jj_row_begin_diag)
                  {
                     P_marker[i3] = jj_count_diag;
                     C_diag_data[jj_count_diag] = a_b_product;
                     C_diag_j[jj_count_diag] = i3;
                     jj_count_diag++;
                  }
                  else
                  {
                     C_diag_data[P_marker[i3]] += a_b_product;
                  }
               }
               if (num_cols_offd_P)
	       {
                  for (jj3 = P_offd_i[i2]; jj3 < P_offd_i[i2+1]; jj3++)
                  {
                     i3 = num_cols_diag_P+map_P_to_C[P_offd_j[jj3]];
                     a_b_product = a_entry * P_offd_data[jj3];
                  
                     /*--------------------------------------------------------
                      *  Check P_marker to see that C_{i1,i3} has not already
                      *  been accounted for. If it has not, create a new entry.
                      *  If it has, add new contribution.
                      *--------------------------------------------------------*/

                     if (P_marker[i3] < jj_row_begin_offd)
                     {
                        P_marker[i3] = jj_count_offd;
                        C_offd_data[jj_count_offd] = a_b_product;
                        C_offd_j[jj_count_offd] = i3-num_cols_diag_P;
                        jj_count_offd++;
                     }
                     else
                     {
                        C_offd_data[P_marker[i3]] += a_b_product;
                     }
                  }
               }
            }
            else
            {  /* Interpolation mat should be 0 where i1 and i2 correspond to
                  different "functions".  As we haven't created an entry for
                  C(i1,i2), nothing needs to be done. */
            }
         }
      }
      else  /* i1 is a coarse row.*/
         /* Copy P coarse-row values to C.  This is useful if C is meant to
            become a replacement for P */
      {
	 if (num_cols_offd_P)
	 {
            for (jj2 = P_offd_i[i1]; jj2 < P_offd_i[i1+1]; jj2++)
            {
               C_offd_j[jj_count_offd] = P_offd_j[jj_count_offd];
               C_offd_data[jj_count_offd] = P_offd_data[jj_count_offd];
               ++jj_count_offd;
            }
         }
         for (jj2 = P_diag_i[i1]; jj2 < P_diag_i[i1+1]; jj2++)
         {
            C_diag_j[jj_count_diag] = P_diag_j[jj2];
            C_diag_data[jj_count_diag] = P_diag_data[jj2];
            ++jj_count_diag;
         }
      }
   }

   C = hypre_ParCSRMatrixCreate(
      comm, n_rows_A_global, n_cols_P_global,
      row_starts_A, col_starts_P, num_cols_offd_C, C_diag_size, C_offd_size );

   /* Note that C does not own the partitionings */
   hypre_ParCSRMatrixSetRowStartsOwner(C,0);
   hypre_ParCSRMatrixSetColStartsOwner(C,0);

   C_diag = hypre_ParCSRMatrixDiag(C);
   hypre_CSRMatrixData(C_diag) = C_diag_data; 
   hypre_CSRMatrixI(C_diag) = C_diag_i; 
   hypre_CSRMatrixJ(C_diag) = C_diag_j; 

   C_offd = hypre_ParCSRMatrixOffd(C);
   hypre_CSRMatrixI(C_offd) = C_offd_i; 
   hypre_ParCSRMatrixOffd(C) = C_offd;

   if (num_cols_offd_C)
   {
      hypre_CSRMatrixData(C_offd) = C_offd_data; 
      hypre_CSRMatrixJ(C_offd) = C_offd_j; 
      hypre_ParCSRMatrixColMapOffd(C) = col_map_offd_C;

   }

   /*-----------------------------------------------------------------------
    *  Free various arrays
    *-----------------------------------------------------------------------*/

   hypre_TFree(P_marker);   
   hypre_TFree(P_ext_diag_i);
   if (P_ext_diag_size)
   {
      hypre_TFree(P_ext_diag_j);
      hypre_TFree(P_ext_diag_data);
   }
   hypre_TFree(P_ext_offd_i);
   if (P_ext_offd_size)
   {
      hypre_TFree(P_ext_offd_j);
      hypre_TFree(P_ext_offd_data);
   }
   if (num_cols_offd_P) hypre_TFree(map_P_to_C);

   return C;
   
}
/*
  Assume that we are given a fine and coarse topology and the
  coarse degrees of freedom (DOFs) have been chosen. Assume also,
  that the global interpolation matrix dof_DOF has a prescribed
  nonzero pattern. Then, the fine degrees of freedom can be split
  into 4 groups (here "i" stands for "interior"):

  NODEidof - dofs which are interpolated only from the DOF
             in one coarse vertex
  EDGEidof - dofs which are interpolated only from the DOFs
             in one coarse edge
  FACEidof - dofs which are interpolated only from the DOFs
             in one coarse face
  ELEMidof - dofs which are interpolated only from the DOFs
             in one coarse element

  The interpolation operator dof_DOF can be build in 4 steps, by
  consequently filling-in the rows corresponding to the above groups.
  The code below uses harmonic extension to extend the interpolation
  from one group to the next.
*/
HYPRE_Int hypre_ND1AMGeInterpolation (hypre_ParCSRMatrix       * Aee,
                                hypre_ParCSRMatrix       * ELEM_idof,
                                hypre_ParCSRMatrix       * FACE_idof,
                                hypre_ParCSRMatrix       * EDGE_idof,
                                hypre_ParCSRMatrix       * ELEM_FACE,
                                hypre_ParCSRMatrix       * ELEM_EDGE,
                                HYPRE_Int                  num_OffProcRows,
                                hypre_MaxwellOffProcRow ** OffProcRows,
                                hypre_IJMatrix           * IJ_dof_DOF)
{
   HYPRE_Int ierr = 0;

   HYPRE_Int  i, j, k;
   HYPRE_Int *offproc_rnums, *swap;

   hypre_ParCSRMatrix * dof_DOF = hypre_IJMatrixObject(IJ_dof_DOF);
   hypre_ParCSRMatrix * ELEM_DOF = ELEM_EDGE;
   hypre_ParCSRMatrix * ELEM_FACEidof;
   hypre_ParCSRMatrix * ELEM_EDGEidof;
   hypre_CSRMatrix *A, *P;
   HYPRE_Int numELEM = hypre_CSRMatrixNumRows(hypre_ParCSRMatrixDiag(ELEM_EDGE));

   HYPRE_Int getrow_ierr;
   HYPRE_Int three_dimensional_problem;

   MPI_Comm comm= hypre_ParCSRMatrixComm(Aee);
   HYPRE_Int      myproc;

   hypre_MPI_Comm_rank(comm, &myproc);

#if 0
   hypre_IJMatrix * ij_dof_DOF = hypre_CTAlloc(hypre_IJMatrix, 1);
   /* Convert dof_DOF to IJ matrix, so we can use AddToValues */
   hypre_IJMatrixComm(ij_dof_DOF) = hypre_ParCSRMatrixComm(dof_DOF);
   hypre_IJMatrixRowPartitioning(ij_dof_DOF) =
      hypre_ParCSRMatrixRowStarts(dof_DOF);
   hypre_IJMatrixColPartitioning(ij_dof_DOF) =
      hypre_ParCSRMatrixColStarts(dof_DOF);
   hypre_IJMatrixObject(ij_dof_DOF) = dof_DOF;
   hypre_IJMatrixAssembleFlag(ij_dof_DOF) = 1;
#endif

  /* sort the offproc rows to get quicker comparison for later */
   if (num_OffProcRows)
   {
      offproc_rnums= hypre_TAlloc(HYPRE_Int, num_OffProcRows);
      swap         = hypre_TAlloc(HYPRE_Int, num_OffProcRows);
      for (i= 0; i< num_OffProcRows; i++)
      {
         offproc_rnums[i]=(OffProcRows[i] -> row);
         swap[i]         = i;
      }
   }

   if (num_OffProcRows > 1)
   {
      hypre_qsort2i(offproc_rnums, swap, 0, num_OffProcRows-1);
   }

   if (FACE_idof == EDGE_idof)
      three_dimensional_problem = 0;
   else
      three_dimensional_problem = 1;

   /* ELEM_FACEidof = ELEM_FACE x FACE_idof */
   if (three_dimensional_problem)
      ELEM_FACEidof = hypre_ParMatmul(ELEM_FACE, FACE_idof);

   /* ELEM_EDGEidof = ELEM_EDGE x EDGE_idof */
   ELEM_EDGEidof = hypre_ParMatmul(ELEM_EDGE, EDGE_idof);

   /* Loop over local coarse elements */
   k = hypre_ParCSRMatrixFirstRowIndex(ELEM_EDGE);
   for (i = 0; i < numELEM; i++, k++)
   {
      HYPRE_Int size1, size2;
      HYPRE_Int *col_ind0, *col_ind1, *col_ind2;

      HYPRE_Int num_DOF, *DOF0, *DOF;
      HYPRE_Int num_idof, *idof0, *idof;
      HYPRE_Int num_bdof, *bdof;

      double *boolean_data;

      /* Determine the coarse DOFs */
      hypre_ParCSRMatrixGetRow (ELEM_DOF, k, &num_DOF, &DOF0, &boolean_data);
      DOF= hypre_TAlloc(HYPRE_Int, num_DOF);
      for (j= 0; j< num_DOF; j++)
      {
         DOF[j]= DOF0[j];
      }
      hypre_ParCSRMatrixRestoreRow (ELEM_DOF, k, &num_DOF, &DOF0, &boolean_data);

      qsort0(DOF,0,num_DOF-1);

      /* Find the fine dofs interior for the current coarse element */
      hypre_ParCSRMatrixGetRow (ELEM_idof, k, &num_idof, &idof0, &boolean_data);
      idof= hypre_TAlloc(HYPRE_Int, num_idof);
      for (j= 0; j< num_idof; j++)
      {
         idof[j]= idof0[j];
      }
      hypre_ParCSRMatrixRestoreRow (ELEM_idof, k, &num_idof, &idof0, &boolean_data);

      /* Sort the interior dofs according to their global number */
      qsort0(idof,0,num_idof-1);

      /* Find the fine dofs on the boundary of the current coarse element */
      if (three_dimensional_problem)
      {
         hypre_ParCSRMatrixGetRow (ELEM_FACEidof, k, &size1, &col_ind0, &boolean_data);
         col_ind1= hypre_TAlloc(HYPRE_Int, size1);
         for (j= 0; j< size1; j++)
         {
            col_ind1[j]= col_ind0[j];
         }
         hypre_ParCSRMatrixRestoreRow (ELEM_FACEidof, k, &size1, &col_ind0, &boolean_data);
      }
      else
         size1 = 0;

      hypre_ParCSRMatrixGetRow (ELEM_EDGEidof, k, &size2, &col_ind0, &boolean_data);
      col_ind2= hypre_TAlloc(HYPRE_Int, size2);
      for (j= 0; j< size2; j++)
      {
         col_ind2[j]= col_ind0[j];
      }
      hypre_ParCSRMatrixRestoreRow (ELEM_EDGEidof, k, &size2, &col_ind0, &boolean_data);

      /* Merge and sort the boundary dofs according to their global number */
      num_bdof = size1 + size2;
      bdof = hypre_CTAlloc(HYPRE_Int, num_bdof);
      if (three_dimensional_problem)
         memcpy(bdof, col_ind1, size1*sizeof(HYPRE_Int));
      memcpy(bdof+size1, col_ind2, size2*sizeof(HYPRE_Int));

      qsort0(bdof,0,num_bdof-1);

      /* A = extract_rows(Aee, idof) */
      A = hypre_CSRMatrixCreate (num_idof, num_idof + num_bdof,
                                 num_idof * (num_idof + num_bdof));
      hypre_CSRMatrixInitialize(A);
      {
         HYPRE_Int *I = hypre_CSRMatrixI(A);
         HYPRE_Int *J = hypre_CSRMatrixJ(A);
         double *data = hypre_CSRMatrixData(A);

         HYPRE_Int *tmp_J;
         double *tmp_data;

         I[0] = 0;
         for (j = 0; j < num_idof; j++)
         {
            getrow_ierr= hypre_ParCSRMatrixGetRow (Aee, idof[j], &I[j+1], &tmp_J, &tmp_data);
            if (getrow_ierr <0)
               hypre_printf("getrow Aee off proc[%d] = \n",myproc);
            memcpy(J, tmp_J, I[j+1]*sizeof(HYPRE_Int));
            memcpy(data, tmp_data, I[j+1]*sizeof(double));
            J+= I[j+1];
            data+= I[j+1];
            hypre_ParCSRMatrixRestoreRow (Aee, idof[j], &I[j+1], &tmp_J, &tmp_data);
            I[j+1] += I[j];
         }
      }

      /* P = extract_rows(dof_DOF, idof+bdof) */
      P = hypre_CSRMatrixCreate (num_idof + num_bdof, num_DOF,
                                 (num_idof + num_bdof) * num_DOF);
      hypre_CSRMatrixInitialize(P);
      {
         HYPRE_Int *I = hypre_CSRMatrixI(P);
         HYPRE_Int *J = hypre_CSRMatrixJ(P);
         double *data = hypre_CSRMatrixData(P);
         HYPRE_Int     m;

         HYPRE_Int *tmp_J;
         double *tmp_data;
     
         I[0] = 0;
         for (j = 0; j < num_idof; j++)
         {
            getrow_ierr= hypre_ParCSRMatrixGetRow (dof_DOF, idof[j], &I[j+1], &tmp_J, &tmp_data);
            if (getrow_ierr >= 0)
            {
               memcpy(J, tmp_J, I[j+1]*sizeof(HYPRE_Int));
               memcpy(data, tmp_data, I[j+1]*sizeof(double));
               J+= I[j+1];
               data+= I[j+1];
               hypre_ParCSRMatrixRestoreRow (dof_DOF, idof[j], &I[j+1], &tmp_J, &tmp_data);
               I[j+1] += I[j];
            }
            else    /* row offproc */
            {
               hypre_ParCSRMatrixRestoreRow (dof_DOF, idof[j], &I[j+1], &tmp_J, &tmp_data);
              /* search for OffProcRows */
               m= 0;
               while (m < num_OffProcRows)
               {
                  if (offproc_rnums[m] == idof[j])
                  { 
                     break;
                  }
                  else
                  {
                     m++;
                  }
               }
               I[j+1]= (OffProcRows[swap[m]] -> ncols);
               tmp_J = (OffProcRows[swap[m]] -> cols);
               tmp_data= (OffProcRows[swap[m]] -> data);
               memcpy(J, tmp_J, I[j+1]*sizeof(HYPRE_Int));
               memcpy(data, tmp_data, I[j+1]*sizeof(double));
               J+= I[j+1];
               data+= I[j+1];
               I[j+1] += I[j];
            }

         }
         for ( ; j < num_idof + num_bdof; j++)
         {
            getrow_ierr= hypre_ParCSRMatrixGetRow (dof_DOF, bdof[j-num_idof], &I[j+1], &tmp_J, &tmp_data);
            if (getrow_ierr >= 0)
            {
               memcpy(J, tmp_J, I[j+1]*sizeof(HYPRE_Int));
               memcpy(data, tmp_data, I[j+1]*sizeof(double));
               J+= I[j+1];
               data+= I[j+1];
               hypre_ParCSRMatrixRestoreRow (dof_DOF, bdof[j-num_idof], &I[j+1], &tmp_J, &tmp_data);
               I[j+1] += I[j];
            }
            else    /* row offproc */
            {
               hypre_ParCSRMatrixRestoreRow (dof_DOF, bdof[j-num_idof], &I[j+1], &tmp_J, &tmp_data);
              /* search for OffProcRows */
               m= 0;
               while (m < num_OffProcRows)
               {
                  if (offproc_rnums[m] == bdof[j-num_idof])
                  {
                     break;
                  }
                  else
                  {
                     m++;
                  }
               }
               if (m>= num_OffProcRows)hypre_printf("here the mistake\n");
               I[j+1]= (OffProcRows[swap[m]] -> ncols);
               tmp_J = (OffProcRows[swap[m]] -> cols);
               tmp_data= (OffProcRows[swap[m]] -> data);
               memcpy(J, tmp_J, I[j+1]*sizeof(HYPRE_Int));
               memcpy(data, tmp_data, I[j+1]*sizeof(double));
               J+= I[j+1];
               data+= I[j+1];
               I[j+1] += I[j];
            }
         }
      }

      /* Pi = Aii^{-1} Aib Pb */
      hypre_HarmonicExtension (A, P, num_DOF, DOF,
                               num_idof, idof, num_bdof, bdof);

      /* Insert Pi in dof_DOF */
      {
         HYPRE_Int * ncols = hypre_CTAlloc(HYPRE_Int, num_idof);

         for (j = 0; j < num_idof; j++)
            ncols[j] = num_DOF;

         hypre_IJMatrixAddToValuesParCSR (IJ_dof_DOF,
                                          num_idof, ncols, idof,
                                          hypre_CSRMatrixJ(P),
                                          hypre_CSRMatrixData(P));

         hypre_TFree(ncols);
      }

      hypre_TFree(DOF);
      hypre_TFree(idof);
      if (three_dimensional_problem)
      {
         hypre_TFree(col_ind1);
      }
      hypre_TFree(col_ind2);
      hypre_TFree(bdof);

      hypre_CSRMatrixDestroy(A);
      hypre_CSRMatrixDestroy(P);
   }

#if 0
   hypre_TFree(ij_dof_DOF);
#endif

   if (three_dimensional_problem)
      hypre_ParCSRMatrixDestroy(ELEM_FACEidof);
   hypre_ParCSRMatrixDestroy(ELEM_EDGEidof);

   if (num_OffProcRows)
   {
      hypre_TFree(offproc_rnums);
      hypre_TFree(swap);
   }

   return ierr;
}
Beispiel #5
0
hypre_Vector *
hypre_ParVectorToVectorAll (hypre_ParVector *par_v)
{
   MPI_Comm		comm = hypre_ParVectorComm(par_v);
   HYPRE_Int 			global_size = hypre_ParVectorGlobalSize(par_v);
#ifndef HYPRE_NO_GLOBAL_PARTITION
   HYPRE_Int 			*vec_starts = hypre_ParVectorPartitioning(par_v);
#endif
   hypre_Vector     	*local_vector = hypre_ParVectorLocalVector(par_v);
   HYPRE_Int  		num_procs, my_id;
   HYPRE_Int                  num_vectors = hypre_ParVectorNumVectors(par_v);
   hypre_Vector  	*vector;
   double		*vector_data;
   double		*local_data;
   HYPRE_Int 			local_size;
   hypre_MPI_Request		*requests;
   hypre_MPI_Status		*status;
   HYPRE_Int			i, j;
   HYPRE_Int			*used_procs;
   HYPRE_Int			num_types, num_requests;
   HYPRE_Int			vec_len, proc_id;

#ifdef HYPRE_NO_GLOBAL_PARTITION

   HYPRE_Int *new_vec_starts;
   
   HYPRE_Int num_contacts;
   HYPRE_Int contact_proc_list[1];
   HYPRE_Int contact_send_buf[1];
   HYPRE_Int contact_send_buf_starts[2];
   HYPRE_Int max_response_size;
   HYPRE_Int *response_recv_buf=NULL;
   HYPRE_Int *response_recv_buf_starts = NULL;
   hypre_DataExchangeResponse response_obj;
   hypre_ProcListElements send_proc_obj;
   
   HYPRE_Int *send_info = NULL;
   hypre_MPI_Status  status1;
   HYPRE_Int count, tag1 = 112, tag2 = 223;
   HYPRE_Int start;
   
#endif


   hypre_MPI_Comm_size(comm, &num_procs);
   hypre_MPI_Comm_rank(comm, &my_id);

#ifdef HYPRE_NO_GLOBAL_PARTITION

  local_size = hypre_ParVectorLastIndex(par_v) - 
     hypre_ParVectorFirstIndex(par_v) + 1;

 

/* determine procs which hold data of par_v and store ids in used_procs */
/* we need to do an exchange data for this.  If I own row then I will contact
   processor 0 with the endpoint of my local range */


   if (local_size > 0)
   {
      num_contacts = 1;
      contact_proc_list[0] = 0;
      contact_send_buf[0] =  hypre_ParVectorLastIndex(par_v);
      contact_send_buf_starts[0] = 0;
      contact_send_buf_starts[1] = 1;
   }
   else
   {
      num_contacts = 0;
      contact_send_buf_starts[0] = 0;
      contact_send_buf_starts[1] = 0;
   }

   /*build the response object*/
   /*send_proc_obj will  be for saving info from contacts */
   send_proc_obj.length = 0;
   send_proc_obj.storage_length = 10;
   send_proc_obj.id = hypre_CTAlloc(HYPRE_Int, send_proc_obj.storage_length);
   send_proc_obj.vec_starts = hypre_CTAlloc(HYPRE_Int, send_proc_obj.storage_length + 1); 
   send_proc_obj.vec_starts[0] = 0;
   send_proc_obj.element_storage_length = 10;
   send_proc_obj.elements = hypre_CTAlloc(HYPRE_Int, send_proc_obj.element_storage_length);

   max_response_size = 0; /* each response is null */
   response_obj.fill_response = hypre_FillResponseParToVectorAll;
   response_obj.data1 = NULL;
   response_obj.data2 = &send_proc_obj; /*this is where we keep info from contacts*/
  
   
   hypre_DataExchangeList(num_contacts, 
                          contact_proc_list, contact_send_buf, 
                          contact_send_buf_starts, sizeof(HYPRE_Int), 
                          sizeof(HYPRE_Int), &response_obj, 
                          max_response_size, 1,
                          comm, (void**) &response_recv_buf,	   
                          &response_recv_buf_starts);

 /* now processor 0 should have a list of ranges for processors that have rows -
      these are in send_proc_obj - it needs to create the new list of processors
      and also an array of vec starts - and send to those who own row*/
   if (my_id)
   {
      if (local_size)      
      {
         /* look for a message from processor 0 */         
         hypre_MPI_Probe(0, tag1, comm, &status1);
         hypre_MPI_Get_count(&status1, HYPRE_MPI_INT, &count);
         
         send_info = hypre_CTAlloc(HYPRE_Int, count);
         hypre_MPI_Recv(send_info, count, HYPRE_MPI_INT, 0, tag1, comm, &status1);

         /* now unpack */  
         num_types = send_info[0];
         used_procs =  hypre_CTAlloc(HYPRE_Int, num_types);  
         new_vec_starts = hypre_CTAlloc(HYPRE_Int, num_types+1);

         for (i=1; i<= num_types; i++)
         {
            used_procs[i-1] = send_info[i];
         }
         for (i=num_types+1; i< count; i++)
         {
            new_vec_starts[i-num_types-1] = send_info[i] ;
         }
      }
      else /* clean up and exit */
      {
         hypre_TFree(send_proc_obj.vec_starts);
         hypre_TFree(send_proc_obj.id);
         hypre_TFree(send_proc_obj.elements);
         if(response_recv_buf)        hypre_TFree(response_recv_buf);
         if(response_recv_buf_starts) hypre_TFree(response_recv_buf_starts);
         return NULL;
      }
   }
   else /* my_id ==0 */
   {
      num_types = send_proc_obj.length;
      used_procs =  hypre_CTAlloc(HYPRE_Int, num_types);  
      new_vec_starts = hypre_CTAlloc(HYPRE_Int, num_types+1);
      
      new_vec_starts[0] = 0;
      for (i=0; i< num_types; i++)
      {
         used_procs[i] = send_proc_obj.id[i];
         new_vec_starts[i+1] = send_proc_obj.elements[i]+1;
      }
      qsort0(used_procs, 0, num_types-1);
      qsort0(new_vec_starts, 0, num_types);
      /*now we need to put into an array to send */
      count =  2*num_types+2;
      send_info = hypre_CTAlloc(HYPRE_Int, count);
      send_info[0] = num_types;
      for (i=1; i<= num_types; i++)
      {
         send_info[i] = used_procs[i-1];
      }
      for (i=num_types+1; i< count; i++)
      {
         send_info[i] = new_vec_starts[i-num_types-1];
      }
      requests = hypre_CTAlloc(hypre_MPI_Request, num_types);
      status =  hypre_CTAlloc(hypre_MPI_Status, num_types);

      /* don't send to myself  - these are sorted so my id would be first*/
      start = 0;
      if (used_procs[0] == 0)
      {
         start = 1;
      }
   
      
      for (i=start; i < num_types; i++)
      {
         hypre_MPI_Isend(send_info, count, HYPRE_MPI_INT, used_procs[i], tag1, comm, &requests[i-start]);
      }
      hypre_MPI_Waitall(num_types-start, requests, status);

      hypre_TFree(status);
      hypre_TFree(requests);
   }

   /* clean up */
   hypre_TFree(send_proc_obj.vec_starts);
   hypre_TFree(send_proc_obj.id);
   hypre_TFree(send_proc_obj.elements);
   hypre_TFree(send_info);
   if(response_recv_buf)        hypre_TFree(response_recv_buf);
   if(response_recv_buf_starts) hypre_TFree(response_recv_buf_starts);

   /* now proc 0 can exit if it has no rows */
   if (!local_size) {
      hypre_TFree(used_procs);
      hypre_TFree(new_vec_starts);
      return NULL;
   }
   
   /* everyone left has rows and knows: new_vec_starts, num_types, and used_procs */

  /* this vector should be rather small */

   local_data = hypre_VectorData(local_vector);
   vector = hypre_SeqVectorCreate(global_size);
   hypre_VectorNumVectors(vector) = num_vectors;
   hypre_SeqVectorInitialize(vector);
   vector_data = hypre_VectorData(vector);

   num_requests = 2*num_types;

   requests = hypre_CTAlloc(hypre_MPI_Request, num_requests);
   status = hypre_CTAlloc(hypre_MPI_Status, num_requests);

/* initialize data exchange among used_procs and generate vector  - here we 
   send to ourself also*/
 
   j = 0;
   for (i = 0; i < num_types; i++)
   {
        proc_id = used_procs[i];
        vec_len = new_vec_starts[i+1] - new_vec_starts[i];
        hypre_MPI_Irecv(&vector_data[new_vec_starts[i]], num_vectors*vec_len, hypre_MPI_DOUBLE,
                                proc_id, tag2, comm, &requests[j++]);
   }
   for (i = 0; i < num_types; i++)
   {
        hypre_MPI_Isend(local_data, num_vectors*local_size, hypre_MPI_DOUBLE, used_procs[i],
                          tag2, comm, &requests[j++]);
   }
 
   hypre_MPI_Waitall(num_requests, requests, status);


   if (num_requests)
   {
   	hypre_TFree(requests);
   	hypre_TFree(status); 
        hypre_TFree(used_procs);
   }

   hypre_TFree(new_vec_starts);
   


#else
   local_size = vec_starts[my_id+1] - vec_starts[my_id];

/* if my_id contains no data, return NULL  */

   if (!local_size)
	return NULL;
 
   local_data = hypre_VectorData(local_vector);
   vector = hypre_SeqVectorCreate(global_size);
   hypre_VectorNumVectors(vector) = num_vectors;
   hypre_SeqVectorInitialize(vector);
   vector_data = hypre_VectorData(vector);

/* determine procs which hold data of par_v and store ids in used_procs */

   num_types = -1;
   for (i=0; i < num_procs; i++)
        if (vec_starts[i+1]-vec_starts[i])
                num_types++;
   num_requests = 2*num_types;
 
   used_procs = hypre_CTAlloc(HYPRE_Int, num_types);
   j = 0;
   for (i=0; i < num_procs; i++)
        if (vec_starts[i+1]-vec_starts[i] && i-my_id)
                used_procs[j++] = i;
 
   requests = hypre_CTAlloc(hypre_MPI_Request, num_requests);
   status = hypre_CTAlloc(hypre_MPI_Status, num_requests);

/* initialize data exchange among used_procs and generate vector */
 
   j = 0;
   for (i = 0; i < num_types; i++)
   {
        proc_id = used_procs[i];
        vec_len = vec_starts[proc_id+1] - vec_starts[proc_id];
        hypre_MPI_Irecv(&vector_data[vec_starts[proc_id]], num_vectors*vec_len, hypre_MPI_DOUBLE,
                                proc_id, 0, comm, &requests[j++]);
   }
   for (i = 0; i < num_types; i++)
   {
        hypre_MPI_Isend(local_data, num_vectors*local_size, hypre_MPI_DOUBLE, used_procs[i],
                          0, comm, &requests[j++]);
   }
 
   for (i=0; i < num_vectors*local_size; i++)
        vector_data[vec_starts[my_id]+i] = local_data[i];
 
   hypre_MPI_Waitall(num_requests, requests, status);

   if (num_requests)
   {
   	hypre_TFree(used_procs);
   	hypre_TFree(requests);
   	hypre_TFree(status); 
   }


#endif

   return vector;
}
Beispiel #6
0
HYPRE_Int HYPRE_ParCSRMLConstructMHMatrix(HYPRE_ParCSRMatrix A, MH_Matrix *mh_mat,
                             MPI_Comm comm, HYPRE_Int *partition,MH_Context *obj) 
{
    HYPRE_Int         i, j, index, my_id, nprocs, msgid, *tempCnt;
    HYPRE_Int         sendProcCnt, *sendLeng, *sendProc, **sendList;
    HYPRE_Int         recvProcCnt, *recvLeng, *recvProc;
    HYPRE_Int         rowLeng, *colInd, startRow, endRow, localEqns;
    HYPRE_Int         *diagSize, *offdiagSize, externLeng, *externList, ncnt, nnz;
    HYPRE_Int         *rowptr, *columns, num_bdry;
    double      *colVal, *values;
    hypre_MPI_Request *Request;
    hypre_MPI_Status  status;

    /* -------------------------------------------------------- */
    /* get machine information and local matrix information     */
    /* -------------------------------------------------------- */
    
    hypre_MPI_Comm_rank(comm, &my_id);
    hypre_MPI_Comm_size(comm, &nprocs);

    startRow  = partition[my_id];
    endRow    = partition[my_id+1] - 1;
    localEqns = endRow - startRow + 1;

    /* -------------------------------------------------------- */
    /* probe A to find out about diagonal and off-diagonal      */
    /* block information                                        */
    /* -------------------------------------------------------- */

    diagSize    = (HYPRE_Int*) malloc( sizeof(HYPRE_Int) * localEqns );
    offdiagSize = (HYPRE_Int*) malloc( sizeof(HYPRE_Int) * localEqns );
    num_bdry = 0;
    for ( i = startRow; i <= endRow; i++ )
    {
       diagSize[i-startRow] = offdiagSize[i-startRow] = 0;
       HYPRE_ParCSRMatrixGetRow(A, i, &rowLeng, &colInd, &colVal);
       for (j = 0; j < rowLeng; j++)
          if ( colInd[j] < startRow || colInd[j] > endRow )
          {
             //if ( colVal[j] != 0.0 ) offdiagSize[i-startRow]++;
             offdiagSize[i-startRow]++;
          }
          else
          {
             //if ( colVal[j] != 0.0 ) diagSize[i-startRow]++;
             diagSize[i-startRow]++;
          }
       HYPRE_ParCSRMatrixRestoreRow(A, i, &rowLeng, &colInd, &colVal);
       if ( diagSize[i-startRow] + offdiagSize[i-startRow] == 1 ) num_bdry++;
    }

    /* -------------------------------------------------------- */
    /* construct external node list in global eqn numbers       */
    /* -------------------------------------------------------- */

    externLeng = 0;
    for ( i = 0; i < localEqns; i++ ) externLeng += offdiagSize[i];
    if ( externLeng > 0 )
         externList = (HYPRE_Int *) malloc( sizeof(HYPRE_Int) * externLeng);
    else externList = NULL;
    externLeng = 0;
    for ( i = startRow; i <= endRow; i++ )
    {
       HYPRE_ParCSRMatrixGetRow(A, i, &rowLeng, &colInd, &colVal);
       for (j = 0; j < rowLeng; j++)
       {
          if ( colInd[j] < startRow || colInd[j] > endRow )
             //if ( colVal[j] != 0.0 ) externList[externLeng++] = colInd[j];
             externList[externLeng++] = colInd[j];
       }
       HYPRE_ParCSRMatrixRestoreRow(A, i, &rowLeng, &colInd, &colVal);
    }
    qsort0( externList, 0, externLeng-1 );
    ncnt = 0;
    for ( i = 1; i < externLeng; i++ )
    {
       if ( externList[i] != externList[ncnt] ) 
          externList[++ncnt] = externList[i];
    }
    externLeng = ncnt + 1;

    /* -------------------------------------------------------- */
    /* allocate the CSR matrix                                  */
    /* -------------------------------------------------------- */ 

    nnz = 0; 
    for ( i = 0; i < localEqns; i++ ) nnz += diagSize[i] + offdiagSize[i]; 
    rowptr  = (HYPRE_Int *)    malloc( (localEqns + 1) * sizeof(HYPRE_Int) ); 
    columns = (HYPRE_Int *)    malloc( nnz * sizeof(HYPRE_Int) ); 
    values  = (double *) malloc( nnz * sizeof(double) ); 
    rowptr[0] = 0; 
    for ( i = 1; i <= localEqns; i++ ) 
       rowptr[i] = rowptr[i-1] + diagSize[i-1] + offdiagSize[i-1];
    free( diagSize );
    free( offdiagSize );

    /* -------------------------------------------------------- */ 
    /* put the matrix data in the CSR matrix                    */
    /* -------------------------------------------------------- */ 

    rowptr[0] = 0; 
    ncnt      = 0;
    for ( i = startRow; i <= endRow; i++ )
    {
       HYPRE_ParCSRMatrixGetRow(A, i, &rowLeng, &colInd, &colVal);
       for (j = 0; j < rowLeng; j++)
       {
          index = colInd[j];
          //if ( colVal[j] != 0.0 ) 
          {
             if ( index < startRow || index > endRow )
             {
                columns[ncnt] = hypre_BinarySearch(externList,index,
                                                   externLeng );
                columns[ncnt] += localEqns;
                values [ncnt++] = colVal[j];
             }
             else
             {
                columns[ncnt] = index - startRow;
                values[ncnt++] = colVal[j];
             }
          }
       }
       rowptr[i-startRow+1] = ncnt;
       HYPRE_ParCSRMatrixRestoreRow(A, i, &rowLeng, &colInd, &colVal);
    }
    assert( ncnt == nnz );
   
    /* -------------------------------------------------------- */ 
    /* initialize the MH_Matrix data structure                  */
    /* -------------------------------------------------------- */ 

    mh_mat->Nrows       = localEqns;
    mh_mat->rowptr      = rowptr;
    mh_mat->colnum      = columns;
    mh_mat->values      = values;
    mh_mat->sendProcCnt = 0;
    mh_mat->recvProcCnt = 0;
    mh_mat->sendLeng    = NULL;
    mh_mat->recvLeng    = NULL;
    mh_mat->sendProc    = NULL;
    mh_mat->recvProc    = NULL;
    mh_mat->sendList    = NULL;
    mh_mat->map         = externList;
 
    /* -------------------------------------------------------- */ 
    /* form the remote portion of the matrix                    */
    /* -------------------------------------------------------- */ 

    if ( nprocs > 1 ) 
    {
       /* ----------------------------------------------------- */ 
       /* count number of elements to be received from each     */
       /* remote processor (assume sequential mapping)          */
       /* ----------------------------------------------------- */ 

       tempCnt = (HYPRE_Int *) malloc( sizeof(HYPRE_Int) * nprocs );
       for ( i = 0; i < nprocs; i++ ) tempCnt[i] = 0;
       for ( i = 0; i < externLeng; i++ )
       {
          for ( j = 0; j < nprocs; j++ )
          {
             if ( externList[i] >= partition[j] && 
                  externList[i] < partition[j+1] )
             {
                tempCnt[j]++;
                break;
             }
          }
       }

       /* ----------------------------------------------------- */ 
       /* compile a list processors data is to be received from */
       /* ----------------------------------------------------- */ 

       recvProcCnt = 0;
       for ( i = 0; i < nprocs; i++ )
          if ( tempCnt[i] > 0 ) recvProcCnt++;
       recvLeng = (HYPRE_Int*) malloc( sizeof(HYPRE_Int) * recvProcCnt );
       recvProc = (HYPRE_Int*) malloc( sizeof(HYPRE_Int) * recvProcCnt );
       recvProcCnt = 0;
       for ( i = 0; i < nprocs; i++ )
       {
          if ( tempCnt[i] > 0 ) 
          {
             recvProc[recvProcCnt]   = i;
             recvLeng[recvProcCnt++] = tempCnt[i];
          }
       }

       /* ----------------------------------------------------- */ 
       /* each processor has to find out how many processors it */
       /* has to send data to                                   */
       /* ----------------------------------------------------- */ 

       sendLeng = (HYPRE_Int *) malloc( nprocs * sizeof(HYPRE_Int) );
       for ( i = 0; i < nprocs; i++ ) tempCnt[i] = 0;
       for ( i = 0; i < recvProcCnt; i++ ) tempCnt[recvProc[i]] = 1;
       hypre_MPI_Allreduce(tempCnt, sendLeng, nprocs, HYPRE_MPI_INT, hypre_MPI_SUM, comm );
       sendProcCnt = sendLeng[my_id];
       free( sendLeng );
       if ( sendProcCnt > 0 )
       {
          sendLeng = (HYPRE_Int *)  malloc( sendProcCnt * sizeof(HYPRE_Int) );
          sendProc = (HYPRE_Int *)  malloc( sendProcCnt * sizeof(HYPRE_Int) );
          sendList = (HYPRE_Int **) malloc( sendProcCnt * sizeof(HYPRE_Int*) );
       }
       else 
       {
          sendLeng = sendProc = NULL;
          sendList = NULL;
       }

       /* ----------------------------------------------------- */ 
       /* each processor sends to all processors it expects to  */
       /* receive data about the lengths of data expected       */
       /* ----------------------------------------------------- */ 

       msgid = 539;
       for ( i = 0; i < recvProcCnt; i++ ) 
       {
          hypre_MPI_Send((void*) &recvLeng[i],1,HYPRE_MPI_INT,recvProc[i],msgid,comm);
       }
       for ( i = 0; i < sendProcCnt; i++ ) 
       {
          hypre_MPI_Recv((void*) &sendLeng[i],1,HYPRE_MPI_INT,hypre_MPI_ANY_SOURCE,msgid,
                   comm,&status);
          sendProc[i] = status.hypre_MPI_SOURCE;
          sendList[i] = (HYPRE_Int *) malloc( sendLeng[i] * sizeof(HYPRE_Int) );
          if ( sendList[i] == NULL ) 
             hypre_printf("allocate problem %d \n", sendLeng[i]);
       }

       /* ----------------------------------------------------- */ 
       /* each processor sends to all processors it expects to  */
       /* receive data about the equation numbers               */
       /* ----------------------------------------------------- */ 

       for ( i = 0; i < nprocs; i++ ) tempCnt[i] = 0; 
       ncnt = 1;
       for ( i = 0; i < externLeng; i++ ) 
       {
          if ( externList[i] >= partition[ncnt] )
          {
             tempCnt[ncnt-1] = i;
             i--;
             ncnt++;
          }
       }    
       for ( i = ncnt-1; i < nprocs; i++ ) tempCnt[i] = externLeng; 

       /* ----------------------------------------------------- */ 
       /* send the global equation numbers                      */
       /* ----------------------------------------------------- */ 

       msgid = 540;
       for ( i = 0; i < recvProcCnt; i++ ) 
       {
          if ( recvProc[i] == 0 ) j = 0;
          else                    j = tempCnt[recvProc[i]-1];
          rowLeng = recvLeng[i];
          hypre_MPI_Send((void*) &externList[j],rowLeng,HYPRE_MPI_INT,recvProc[i],
                    msgid,comm);
       }
       for ( i = 0; i < sendProcCnt; i++ ) 
       {
          rowLeng = sendLeng[i];
          hypre_MPI_Recv((void*)sendList[i],rowLeng,HYPRE_MPI_INT,sendProc[i],
                   msgid,comm,&status);
       }

       /* ----------------------------------------------------- */ 
       /* convert the send list from global to local numbers    */
       /* ----------------------------------------------------- */ 

       for ( i = 0; i < sendProcCnt; i++ )
       { 
          for ( j = 0; j < sendLeng[i]; j++ )
          {
             index = sendList[i][j] - startRow;
             if ( index < 0 || index >= localEqns )
             {
                hypre_printf("%d : Construct MH matrix Error - index out ");
                hypre_printf("of range%d\n", my_id, index);
             }
             sendList[i][j] = index;
          }
       }

       /* ----------------------------------------------------- */ 
       /* convert the send list from global to local numbers    */
       /* ----------------------------------------------------- */ 

       mh_mat->sendProcCnt = sendProcCnt;
       mh_mat->recvProcCnt = recvProcCnt;
       mh_mat->sendLeng    = sendLeng;
       mh_mat->recvLeng    = recvLeng;
       mh_mat->sendProc    = sendProc;
       mh_mat->recvProc    = recvProc;
       mh_mat->sendList    = sendList;

       /* ----------------------------------------------------- */ 
       /* clean up                                              */
       /* ----------------------------------------------------- */ 

       free( tempCnt );
    }

    return 0;
}
Beispiel #7
0
HYPRE_Int hypre_CreateLambda(void *amg_vdata)
{
   hypre_ParAMGData *amg_data = amg_vdata;

   /* Data Structure variables */
   MPI_Comm comm;
   hypre_ParCSRMatrix **A_array;
   hypre_ParVector    **F_array;
   hypre_ParVector    **U_array;

   hypre_ParCSRMatrix *A_tmp;
   hypre_ParCSRMatrix *Lambda;
   hypre_CSRMatrix *L_diag;
   hypre_CSRMatrix *L_offd;
   hypre_CSRMatrix *A_tmp_diag;
   hypre_CSRMatrix *A_tmp_offd;
   hypre_ParVector *Xtilde;
   hypre_ParVector *Rtilde;
   hypre_Vector *Xtilde_local;
   hypre_Vector *Rtilde_local;
   hypre_ParCSRCommPkg *comm_pkg;
   hypre_ParCSRCommPkg *L_comm_pkg = NULL;
   hypre_ParCSRCommHandle *comm_handle;
   HYPRE_Real    *L_diag_data;
   HYPRE_Real    *L_offd_data;
   HYPRE_Real    *buf_data = NULL;
   HYPRE_Real    *tmp_data;
   HYPRE_Real    *x_data;
   HYPRE_Real    *r_data;
   HYPRE_Real    *l1_norms;
   HYPRE_Real    *A_tmp_diag_data;
   HYPRE_Real    *A_tmp_offd_data;
   HYPRE_Real    *D_data = NULL;
   HYPRE_Real    *D_data_offd = NULL;
   HYPRE_Int *L_diag_i;
   HYPRE_Int *L_diag_j;
   HYPRE_Int *L_offd_i;
   HYPRE_Int *L_offd_j;
   HYPRE_Int *A_tmp_diag_i;
   HYPRE_Int *A_tmp_offd_i;
   HYPRE_Int *A_tmp_diag_j;
   HYPRE_Int *A_tmp_offd_j;
   HYPRE_Int *L_recv_ptr = NULL;
   HYPRE_Int *L_send_ptr = NULL;
   HYPRE_Int *L_recv_procs = NULL;
   HYPRE_Int *L_send_procs = NULL;
   HYPRE_Int *L_send_map_elmts = NULL;
   HYPRE_Int *recv_procs;
   HYPRE_Int *send_procs;
   HYPRE_Int *send_map_elmts;
   HYPRE_Int *send_map_starts;
   HYPRE_Int *recv_vec_starts;
   HYPRE_Int *all_send_procs = NULL;
   HYPRE_Int *all_recv_procs = NULL;
   HYPRE_Int *remap = NULL;
   HYPRE_Int *level_start;

   HYPRE_Int       addlvl;
   HYPRE_Int       additive;
   HYPRE_Int       mult_additive;
   HYPRE_Int       num_levels;
   HYPRE_Int       num_add_lvls;
   HYPRE_Int       num_procs;
   HYPRE_Int       num_sends, num_recvs;
   HYPRE_Int       num_sends_L = 0;
   HYPRE_Int       num_recvs_L = 0;
   HYPRE_Int       send_data_L = 0;
   HYPRE_Int       num_rows_L = 0;
   HYPRE_Int       num_rows_tmp = 0;
   HYPRE_Int       num_cols_offd_L = 0;
   HYPRE_Int       num_cols_offd = 0;
   HYPRE_Int       level, i, j, k;
   HYPRE_Int       this_proc, cnt, cnt_diag, cnt_offd;
   HYPRE_Int       cnt_recv, cnt_send, cnt_row, row_start;
   HYPRE_Int       start_diag, start_offd, indx, cnt_map;
   HYPRE_Int       start, j_indx, index, cnt_level;
   HYPRE_Int       max_sends, max_recvs;

 /* Local variables  */ 
   HYPRE_Int       Solve_err_flag = 0;
   HYPRE_Int       num_threads;
   HYPRE_Int       num_nonzeros_diag;
   HYPRE_Int       num_nonzeros_offd;

   HYPRE_Real  **l1_norms_ptr = NULL;
   HYPRE_Real   *relax_weight = NULL;
   HYPRE_Real    relax_type;

   /* Acquire data and allocate storage */

   num_threads = hypre_NumThreads();

   A_array           = hypre_ParAMGDataAArray(amg_data);
   F_array           = hypre_ParAMGDataFArray(amg_data);
   U_array           = hypre_ParAMGDataUArray(amg_data);
   additive          = hypre_ParAMGDataAdditive(amg_data);
   mult_additive     = hypre_ParAMGDataMultAdditive(amg_data);
   num_levels        = hypre_ParAMGDataNumLevels(amg_data);
   relax_weight      = hypre_ParAMGDataRelaxWeight(amg_data);
   relax_type        = hypre_ParAMGDataGridRelaxType(amg_data)[1];
   comm              = hypre_ParCSRMatrixComm(A_array[0]);

   hypre_MPI_Comm_size(comm,&num_procs);

   l1_norms_ptr      = hypre_ParAMGDataL1Norms(amg_data); 

   addlvl = hypre_max(additive, mult_additive);
   num_add_lvls = num_levels+1-addlvl;

   level_start = hypre_CTAlloc(HYPRE_Int, num_add_lvls+1);
   send_data_L = 0;
   num_rows_L  = 0;
   num_cols_offd_L = 0;
   num_nonzeros_diag = 0;
   num_nonzeros_offd = 0;
   level_start[0] = 0; 
   cnt = 1;
   max_sends = 0;
   max_recvs = 0;
   for (i=addlvl; i < num_levels; i++)
   {
      A_tmp = A_array[i];
      A_tmp_diag = hypre_ParCSRMatrixDiag(A_tmp);
      A_tmp_offd = hypre_ParCSRMatrixOffd(A_tmp);
      A_tmp_diag_i = hypre_CSRMatrixI(A_tmp_diag);
      A_tmp_offd_i = hypre_CSRMatrixI(A_tmp_offd);
      num_rows_tmp = hypre_CSRMatrixNumRows(A_tmp_diag);
      num_cols_offd = hypre_CSRMatrixNumCols(A_tmp_offd);
      num_rows_L += num_rows_tmp;
      level_start[cnt] = level_start[cnt-1] + num_rows_tmp;
      cnt++;
      num_cols_offd_L += num_cols_offd;
      num_nonzeros_diag += A_tmp_diag_i[num_rows_tmp];
      num_nonzeros_offd += A_tmp_offd_i[num_rows_tmp];
      comm_pkg = hypre_ParCSRMatrixCommPkg(A_tmp);
      if (comm_pkg)
      {
         num_sends = hypre_ParCSRCommPkgNumSends(comm_pkg);
         max_sends += num_sends;
         if (num_sends) 
	    send_data_L += hypre_ParCSRCommPkgSendMapStart(comm_pkg,num_sends);
         max_recvs += hypre_ParCSRCommPkgNumRecvs(comm_pkg);
      }
   }
   if (max_sends >= num_procs ||max_recvs >= num_procs)
   {
         max_sends = num_procs;
         max_recvs = num_procs;
   }
   if (max_sends) all_send_procs = hypre_CTAlloc(HYPRE_Int, max_sends);
   if (max_recvs) all_recv_procs = hypre_CTAlloc(HYPRE_Int, max_recvs);

   cnt_send = 0;
   cnt_recv = 0;
   if (max_sends || max_recvs)
   {
      if (max_sends < num_procs && max_recvs < num_procs)
      {
         for (i=addlvl; i < num_levels; i++)
         {
            A_tmp = A_array[i];
            comm_pkg = hypre_ParCSRMatrixCommPkg(A_tmp);
            if (comm_pkg)
            {
               num_sends = hypre_ParCSRCommPkgNumSends(comm_pkg);
               num_recvs = hypre_ParCSRCommPkgNumRecvs(comm_pkg);
               send_procs = hypre_ParCSRCommPkgSendProcs(comm_pkg);
               recv_procs = hypre_ParCSRCommPkgRecvProcs(comm_pkg);
               for (j = 0; j < num_sends; j++)
	          all_send_procs[cnt_send++] = send_procs[j];
               for (j = 0; j < num_recvs; j++)
	          all_recv_procs[cnt_recv++] = recv_procs[j];
            }
         }
         if (max_sends)
         {
            qsort0(all_send_procs, 0, max_sends-1);
            num_sends_L = 1;
            this_proc = all_send_procs[0];
            for (i=1; i < max_sends; i++)
            {
               if (all_send_procs[i] > this_proc)
               {
                  this_proc = all_send_procs[i];
                  all_send_procs[num_sends_L++] = this_proc;
               }
            }
            L_send_procs = hypre_CTAlloc(HYPRE_Int, num_sends_L);
            for (j=0; j < num_sends_L; j++)
	       L_send_procs[j] = all_send_procs[j];
	    hypre_TFree(all_send_procs);
         }
         if (max_recvs)
         {
            qsort0(all_recv_procs, 0, max_recvs-1);
            num_recvs_L = 1;
            this_proc = all_recv_procs[0];
            for (i=1; i < max_recvs; i++)
            {
               if (all_recv_procs[i] > this_proc)
               {
                  this_proc = all_recv_procs[i];
                  all_recv_procs[num_recvs_L++] = this_proc;
               }
            }
            L_recv_procs = hypre_CTAlloc(HYPRE_Int, num_recvs_L);
            for (j=0; j < num_recvs_L; j++)
	       L_recv_procs[j] = all_recv_procs[j];
	    hypre_TFree(all_recv_procs);
         }

         L_recv_ptr = hypre_CTAlloc(HYPRE_Int, num_recvs_L+1);
         L_send_ptr = hypre_CTAlloc(HYPRE_Int, num_sends_L+1);

         for (i=addlvl; i < num_levels; i++)
         {
            A_tmp = A_array[i];
            comm_pkg = hypre_ParCSRMatrixCommPkg(A_tmp);
            if (comm_pkg)
            {
               num_sends = hypre_ParCSRCommPkgNumSends(comm_pkg);
               num_recvs = hypre_ParCSRCommPkgNumRecvs(comm_pkg);
               send_procs = hypre_ParCSRCommPkgSendProcs(comm_pkg);
               recv_procs = hypre_ParCSRCommPkgRecvProcs(comm_pkg);
               send_map_starts = hypre_ParCSRCommPkgSendMapStarts(comm_pkg);
               recv_vec_starts = hypre_ParCSRCommPkgRecvVecStarts(comm_pkg);
            }
            else
            {
               num_sends = 0;
               num_recvs = 0;
            }
            for (k = 0; k < num_sends; k++)
            {
               this_proc = hypre_BinarySearch(L_send_procs,send_procs[k],num_sends_L);
               L_send_ptr[this_proc+1] += send_map_starts[k+1]-send_map_starts[k];
            }
            for (k = 0; k < num_recvs; k++)
            {
               this_proc = hypre_BinarySearch(L_recv_procs,recv_procs[k],num_recvs_L);
               L_recv_ptr[this_proc+1] += recv_vec_starts[k+1]-recv_vec_starts[k];
            }
         }

         L_recv_ptr[0] = 0;
         for (i=1; i < num_recvs_L; i++)
            L_recv_ptr[i+1] += L_recv_ptr[i];

         L_send_ptr[0] = 0;
         for (i=1; i < num_sends_L; i++)
            L_send_ptr[i+1] += L_send_ptr[i];
      }
      else
      {
         num_recvs_L = 0;
         num_sends_L = 0;
         for (i=addlvl; i < num_levels; i++)
         {
            A_tmp = A_array[i];
            comm_pkg = hypre_ParCSRMatrixCommPkg(A_tmp);
            if (comm_pkg)
            {
               num_sends = hypre_ParCSRCommPkgNumSends(comm_pkg);
               num_recvs = hypre_ParCSRCommPkgNumRecvs(comm_pkg);
               send_procs = hypre_ParCSRCommPkgSendProcs(comm_pkg);
               recv_procs = hypre_ParCSRCommPkgRecvProcs(comm_pkg);
               send_map_starts = hypre_ParCSRCommPkgSendMapStarts(comm_pkg);
               recv_vec_starts = hypre_ParCSRCommPkgRecvVecStarts(comm_pkg);
               for (j = 0; j < num_sends; j++)
               {
                  this_proc = send_procs[j];
	          if (all_send_procs[this_proc] == 0)
		      num_sends_L++;
                  all_send_procs[this_proc] += send_map_starts[j+1]-send_map_starts[j];
               }
               for (j = 0; j < num_recvs; j++)
               {
                  this_proc = recv_procs[j];
	          if (all_recv_procs[this_proc] == 0)
		      num_recvs_L++;
                  all_recv_procs[this_proc] += recv_vec_starts[j+1]-recv_vec_starts[j];
               }
            }
         }
         if (max_sends)
         {
            L_send_procs = hypre_CTAlloc(HYPRE_Int, num_sends_L);
            L_send_ptr = hypre_CTAlloc(HYPRE_Int, num_sends_L+1);
            num_sends_L = 0;
            for (j=0; j < num_procs; j++)
            {
	       this_proc = all_send_procs[j];
	       if (this_proc)
	       {
	           L_send_procs[num_sends_L++] = j;
	           L_send_ptr[num_sends_L] = this_proc + L_send_ptr[num_sends_L-1];
	       }
            }
         }
         if (max_recvs)
         {
            L_recv_procs = hypre_CTAlloc(HYPRE_Int, num_recvs_L);
            L_recv_ptr = hypre_CTAlloc(HYPRE_Int, num_recvs_L+1);
            num_recvs_L = 0;
            for (j=0; j < num_procs; j++)
            {
	       this_proc = all_recv_procs[j];
	       if (this_proc)
	       {
	           L_recv_procs[num_recvs_L++] = j;
	           L_recv_ptr[num_recvs_L] = this_proc + L_recv_ptr[num_recvs_L-1];
	       }
            }
         }
      } 
   }
   if (max_sends) hypre_TFree(all_send_procs);
   if (max_recvs) hypre_TFree(all_recv_procs);

   L_diag = hypre_CSRMatrixCreate(num_rows_L, num_rows_L, num_nonzeros_diag);
   L_offd = hypre_CSRMatrixCreate(num_rows_L, num_cols_offd_L, num_nonzeros_offd);
   hypre_CSRMatrixInitialize(L_diag);
   hypre_CSRMatrixInitialize(L_offd);
   if (num_nonzeros_diag)
   {
      L_diag_data = hypre_CSRMatrixData(L_diag);
      L_diag_j = hypre_CSRMatrixJ(L_diag);
   }
   L_diag_i = hypre_CSRMatrixI(L_diag);
   if (num_nonzeros_offd)
   {
      L_offd_data = hypre_CSRMatrixData(L_offd);
      L_offd_j = hypre_CSRMatrixJ(L_offd);
   }
   L_offd_i = hypre_CSRMatrixI(L_offd);

   if (num_rows_L) D_data = hypre_CTAlloc(HYPRE_Real,num_rows_L);
   if (send_data_L)
   {
      L_send_map_elmts = hypre_CTAlloc(HYPRE_Int, send_data_L);
      buf_data = hypre_CTAlloc(HYPRE_Real,send_data_L);
   }
   if (num_cols_offd_L)
   {
      D_data_offd = hypre_CTAlloc(HYPRE_Real,num_cols_offd_L);
      /*L_col_map_offd = hypre_CTAlloc(HYPRE_Int, num_cols_offd_L);*/
      remap = hypre_CTAlloc(HYPRE_Int, num_cols_offd_L);
   }

   Rtilde = hypre_CTAlloc(hypre_ParVector, 1);
   Rtilde_local = hypre_SeqVectorCreate(num_rows_L);   
   hypre_SeqVectorInitialize(Rtilde_local);
   hypre_ParVectorLocalVector(Rtilde) = Rtilde_local;   
   hypre_ParVectorOwnsData(Rtilde) = 1;

   Xtilde = hypre_CTAlloc(hypre_ParVector, 1);
   Xtilde_local = hypre_SeqVectorCreate(num_rows_L);   
   hypre_SeqVectorInitialize(Xtilde_local);
   hypre_ParVectorLocalVector(Xtilde) = Xtilde_local;   
   hypre_ParVectorOwnsData(Xtilde) = 1;
      
   x_data = hypre_VectorData(hypre_ParVectorLocalVector(Xtilde));
   r_data = hypre_VectorData(hypre_ParVectorLocalVector(Rtilde));

   cnt = 0;
   cnt_level = 0;
   cnt_diag = 0; 
   cnt_offd = 0; 
   cnt_row = 1; 
   L_diag_i[0] = 0;
   L_offd_i[0] = 0;
   for (level=addlvl; level < num_levels; level++)
   {
      row_start = level_start[cnt_level];
      if (level != 0)
      {
         tmp_data = hypre_VectorData(hypre_ParVectorLocalVector(F_array[level]));
         if (tmp_data) hypre_TFree(tmp_data);
         hypre_VectorData(hypre_ParVectorLocalVector(F_array[level])) = &r_data[row_start];
         hypre_VectorOwnsData(hypre_ParVectorLocalVector(F_array[level])) = 0;
         tmp_data = hypre_VectorData(hypre_ParVectorLocalVector(U_array[level]));
         if (tmp_data) hypre_TFree(tmp_data);
         hypre_VectorData(hypre_ParVectorLocalVector(U_array[level])) = &x_data[row_start];
         hypre_VectorOwnsData(hypre_ParVectorLocalVector(U_array[level])) = 0;
      }
      cnt_level++;

      start_diag = L_diag_i[cnt_row-1];
      start_offd = L_offd_i[cnt_row-1];
      A_tmp = A_array[level];
      A_tmp_diag = hypre_ParCSRMatrixDiag(A_tmp);
      A_tmp_offd = hypre_ParCSRMatrixOffd(A_tmp);
      comm_pkg = hypre_ParCSRMatrixCommPkg(A_tmp);
      A_tmp_diag_i = hypre_CSRMatrixI(A_tmp_diag);
      A_tmp_offd_i = hypre_CSRMatrixI(A_tmp_offd);
      A_tmp_diag_j = hypre_CSRMatrixJ(A_tmp_diag);
      A_tmp_offd_j = hypre_CSRMatrixJ(A_tmp_offd);
      A_tmp_diag_data = hypre_CSRMatrixData(A_tmp_diag);
      A_tmp_offd_data = hypre_CSRMatrixData(A_tmp_offd);
      num_rows_tmp = hypre_CSRMatrixNumRows(A_tmp_diag);
      if (comm_pkg)
      {
         num_sends = hypre_ParCSRCommPkgNumSends(comm_pkg);
         num_recvs = hypre_ParCSRCommPkgNumRecvs(comm_pkg);
         send_procs = hypre_ParCSRCommPkgSendProcs(comm_pkg);
         recv_procs = hypre_ParCSRCommPkgRecvProcs(comm_pkg);
         send_map_starts = hypre_ParCSRCommPkgSendMapStarts(comm_pkg);
         send_map_elmts = hypre_ParCSRCommPkgSendMapElmts(comm_pkg);
         recv_vec_starts = hypre_ParCSRCommPkgRecvVecStarts(comm_pkg);
      }
      else
      {
         num_sends = 0;
         num_recvs = 0;
      }
   
      /* Compute new combined communication package */
      for (i=0; i < num_sends; i++)
      {
         this_proc = hypre_BinarySearch(L_send_procs,send_procs[i],num_sends_L);
         indx = L_send_ptr[this_proc];
         for (j=send_map_starts[i]; j < send_map_starts[i+1]; j++)
         {
	    L_send_map_elmts[indx++] = row_start + send_map_elmts[j];
         }
         L_send_ptr[this_proc] = indx;
      }
            
      cnt_map = 0;
      for (i = 0; i < num_recvs; i++)
      {
         this_proc = hypre_BinarySearch(L_recv_procs,recv_procs[i],num_recvs_L);
         indx = L_recv_ptr[this_proc];
         for (j=recv_vec_starts[i]; j < recv_vec_starts[i+1]; j++)
         {
	    remap[cnt_map++] = indx++;
         }
         L_recv_ptr[this_proc] = indx;
      }
   
      /* Compute Lambda */ 
      if (relax_type == 0)
      {
        HYPRE_Real rlx_wt = relax_weight[level];
#ifdef HYPRE_USING_OPENMP
#pragma omp for private(i) HYPRE_SMP_SCHEDULE
#endif
         for (i=0; i < num_rows_tmp; i++)
        {
           D_data[i] = rlx_wt/A_tmp_diag_data[A_tmp_diag_i[i]];
           L_diag_i[cnt_row+i] = start_diag + A_tmp_diag_i[i+1];
           L_offd_i[cnt_row+i] = start_offd + A_tmp_offd_i[i+1];
        }
      }
      else
      {
        l1_norms = l1_norms_ptr[level];
#ifdef HYPRE_USING_OPENMP
#pragma omp for private(i) HYPRE_SMP_SCHEDULE
#endif
        for (i=0; i < num_rows_tmp; i++)
        {
           D_data[i] = 1.0/l1_norms[i];
           L_diag_i[cnt_row+i] = start_diag + A_tmp_diag_i[i+1];
           L_offd_i[cnt_row+i] = start_offd + A_tmp_offd_i[i+1];
        }
      }
 
      if (num_procs > 1)
      {
         index = 0;
         for (i=0; i < num_sends; i++)
         {
            start = send_map_starts[i];
            for (j=start; j < send_map_starts[i+1]; j++)
              buf_data[index++] = D_data[send_map_elmts[j]];
         }

         comm_handle = hypre_ParCSRCommHandleCreate(1, comm_pkg,
                        buf_data, D_data_offd);
         hypre_ParCSRCommHandleDestroy(comm_handle);
      }

      for (i = 0; i < num_rows_tmp; i++)
      {
         j_indx = A_tmp_diag_i[i];
         L_diag_data[cnt_diag] = (2.0 - A_tmp_diag_data[j_indx]*D_data[i])*D_data[i];
         L_diag_j[cnt_diag++] = i+row_start;
         for (j=A_tmp_diag_i[i]+1; j < A_tmp_diag_i[i+1]; j++)
         {
             j_indx = A_tmp_diag_j[j];
             L_diag_data[cnt_diag] = (- A_tmp_diag_data[j]*D_data[j_indx])*D_data[i];
             L_diag_j[cnt_diag++] = j_indx+row_start;
         }
         for (j=A_tmp_offd_i[i]; j < A_tmp_offd_i[i+1]; j++)
         {
             j_indx = A_tmp_offd_j[j];
             L_offd_data[cnt_offd] = (- A_tmp_offd_data[j]*D_data_offd[j_indx])*D_data[i];
             L_offd_j[cnt_offd++] = remap[j_indx];
         }
      }
      cnt_row += num_rows_tmp;
   }

   if (L_send_ptr)
   {
      for (i=num_sends_L-1; i > 0; i--)
         L_send_ptr[i] = L_send_ptr[i-1];
      L_send_ptr[0] = 0;
   }
   else
      L_send_ptr = hypre_CTAlloc(HYPRE_Int,1);

   if (L_recv_ptr)
   {
      for (i=num_recvs_L-1; i > 0; i--)
         L_recv_ptr[i] = L_recv_ptr[i-1];
      L_recv_ptr[0] = 0;
   }
   else
      L_recv_ptr = hypre_CTAlloc(HYPRE_Int,1);

   L_comm_pkg = hypre_CTAlloc(hypre_ParCSRCommPkg,1);

   hypre_ParCSRCommPkgNumRecvs(L_comm_pkg) = num_recvs_L;
   hypre_ParCSRCommPkgNumSends(L_comm_pkg) = num_sends_L;
   hypre_ParCSRCommPkgRecvProcs(L_comm_pkg) = L_recv_procs;
   hypre_ParCSRCommPkgSendProcs(L_comm_pkg) = L_send_procs;
   hypre_ParCSRCommPkgRecvVecStarts(L_comm_pkg) = L_recv_ptr;
   hypre_ParCSRCommPkgSendMapStarts(L_comm_pkg) = L_send_ptr;
   hypre_ParCSRCommPkgSendMapElmts(L_comm_pkg) = L_send_map_elmts;
   hypre_ParCSRCommPkgComm(L_comm_pkg) = comm;


   Lambda = hypre_CTAlloc(hypre_ParCSRMatrix, 1);
   hypre_ParCSRMatrixDiag(Lambda) = L_diag;
   hypre_ParCSRMatrixOffd(Lambda) = L_offd;
   hypre_ParCSRMatrixCommPkg(Lambda) = L_comm_pkg;
   hypre_ParCSRMatrixComm(Lambda) = comm;
   hypre_ParCSRMatrixOwnsData(Lambda) = 1;

   hypre_ParAMGDataLambda(amg_data) = Lambda;
   hypre_ParAMGDataRtilde(amg_data) = Rtilde;
   hypre_ParAMGDataXtilde(amg_data) = Xtilde;

   hypre_TFree(D_data_offd);
   hypre_TFree(D_data);
   if (num_procs > 1) hypre_TFree(buf_data);
   hypre_TFree(remap);
   hypre_TFree(buf_data);
   hypre_TFree(level_start);

   return Solve_err_flag;
}
Beispiel #8
0
HYPRE_ParCSRMatrix 
GenerateDifConv( MPI_Comm comm,
                 HYPRE_Int      nx,
                 HYPRE_Int      ny,
                 HYPRE_Int      nz, 
                 HYPRE_Int      P,
                 HYPRE_Int      Q,
                 HYPRE_Int      R,
                 HYPRE_Int      p,
                 HYPRE_Int      q,
                 HYPRE_Int      r,
                 HYPRE_Real  *value )
{
   hypre_ParCSRMatrix *A;
   hypre_CSRMatrix *diag;
   hypre_CSRMatrix *offd;

   HYPRE_Int    *diag_i;
   HYPRE_Int    *diag_j;
   HYPRE_Real *diag_data;

   HYPRE_Int    *offd_i;
   HYPRE_Int    *offd_j;
   HYPRE_Real *offd_data;

   HYPRE_Int *global_part;
   HYPRE_Int ix, iy, iz;
   HYPRE_Int cnt, o_cnt;
   HYPRE_Int local_num_rows; 
   HYPRE_Int *col_map_offd;
   HYPRE_Int row_index;
   HYPRE_Int i,j;

   HYPRE_Int nx_local, ny_local, nz_local;
   HYPRE_Int nx_size, ny_size, nz_size;
   HYPRE_Int num_cols_offd;
   HYPRE_Int grid_size;

   HYPRE_Int *nx_part;
   HYPRE_Int *ny_part;
   HYPRE_Int *nz_part;

   HYPRE_Int num_procs, my_id;
   HYPRE_Int P_busy, Q_busy, R_busy;

   hypre_MPI_Comm_size(comm,&num_procs);
   hypre_MPI_Comm_rank(comm,&my_id);

   grid_size = nx*ny*nz;

   hypre_GeneratePartitioning(nx,P,&nx_part);
   hypre_GeneratePartitioning(ny,Q,&ny_part);
   hypre_GeneratePartitioning(nz,R,&nz_part);

   global_part = hypre_CTAlloc(HYPRE_Int,P*Q*R+1);

   global_part[0] = 0;
   cnt = 1;
   for (iz = 0; iz < R; iz++)
   {
      nz_size = nz_part[iz+1]-nz_part[iz];
      for (iy = 0; iy < Q; iy++)
      {
         ny_size = ny_part[iy+1]-ny_part[iy];
         for (ix = 0; ix < P; ix++)
         {
            nx_size = nx_part[ix+1] - nx_part[ix];
            global_part[cnt] = global_part[cnt-1];
            global_part[cnt++] += nx_size*ny_size*nz_size;
         }
      }
   }

   nx_local = nx_part[p+1] - nx_part[p];
   ny_local = ny_part[q+1] - ny_part[q];
   nz_local = nz_part[r+1] - nz_part[r];

   my_id = r*(P*Q) + q*P + p;
   num_procs = P*Q*R;

   local_num_rows = nx_local*ny_local*nz_local;
   diag_i = hypre_CTAlloc(HYPRE_Int, local_num_rows+1);
   offd_i = hypre_CTAlloc(HYPRE_Int, local_num_rows+1);

   P_busy = hypre_min(nx,P);
   Q_busy = hypre_min(ny,Q);
   R_busy = hypre_min(nz,R);

   num_cols_offd = 0;
   if (p) num_cols_offd += ny_local*nz_local;
   if (p < P_busy-1) num_cols_offd += ny_local*nz_local;
   if (q) num_cols_offd += nx_local*nz_local;
   if (q < Q_busy-1) num_cols_offd += nx_local*nz_local;
   if (r) num_cols_offd += nx_local*ny_local;
   if (r < R_busy-1) num_cols_offd += nx_local*ny_local;

   if (!local_num_rows) num_cols_offd = 0;

   col_map_offd = hypre_CTAlloc(HYPRE_Int, num_cols_offd);

   cnt = 1;
   o_cnt = 1;
   diag_i[0] = 0;
   offd_i[0] = 0;
   for (iz = nz_part[r]; iz < nz_part[r+1]; iz++)
   {
      for (iy = ny_part[q];  iy < ny_part[q+1]; iy++)
      {
         for (ix = nx_part[p]; ix < nx_part[p+1]; ix++)
         {
            diag_i[cnt] = diag_i[cnt-1];
            offd_i[o_cnt] = offd_i[o_cnt-1];
            diag_i[cnt]++;
            if (iz > nz_part[r]) 
               diag_i[cnt]++;
            else
            {
               if (iz) 
               {
                  offd_i[o_cnt]++;
               }
            }
            if (iy > ny_part[q]) 
               diag_i[cnt]++;
            else
            {
               if (iy) 
               {
                  offd_i[o_cnt]++;
               }
            }
            if (ix > nx_part[p]) 
               diag_i[cnt]++;
            else
            {
               if (ix) 
               {
                  offd_i[o_cnt]++; 
               }
            }
            if (ix+1 < nx_part[p+1]) 
               diag_i[cnt]++;
            else
            {
               if (ix+1 < nx) 
               {
                  offd_i[o_cnt]++; 
               }
            }
            if (iy+1 < ny_part[q+1]) 
               diag_i[cnt]++;
            else
            {
               if (iy+1 < ny) 
               {
                  offd_i[o_cnt]++;
               }
            }
            if (iz+1 < nz_part[r+1]) 
               diag_i[cnt]++;
            else
            {
               if (iz+1 < nz) 
               {
                  offd_i[o_cnt]++;
               }
            }
            cnt++;
            o_cnt++;
         }
      }
   }

   diag_j = hypre_CTAlloc(HYPRE_Int, diag_i[local_num_rows]);
   diag_data = hypre_CTAlloc(HYPRE_Real, diag_i[local_num_rows]);

   if (num_procs > 1)
   {
      offd_j = hypre_CTAlloc(HYPRE_Int, offd_i[local_num_rows]);
      offd_data = hypre_CTAlloc(HYPRE_Real, offd_i[local_num_rows]);
   }

   row_index = 0;
   cnt = 0;
   o_cnt = 0;
   for (iz = nz_part[r]; iz < nz_part[r+1]; iz++)
   {
      for (iy = ny_part[q];  iy < ny_part[q+1]; iy++)
      {
         for (ix = nx_part[p]; ix < nx_part[p+1]; ix++)
         {
            diag_j[cnt] = row_index;
            diag_data[cnt++] = value[0];
            if (iz > nz_part[r]) 
            {
               diag_j[cnt] = row_index-nx_local*ny_local;
               diag_data[cnt++] = value[3];
            }
            else
            {
               if (iz) 
               {
                  offd_j[o_cnt] = hypre_map(ix,iy,iz-1,p,q,r-1,P,Q,R,
                                      nx_part,ny_part,nz_part,global_part);
                  offd_data[o_cnt++] = value[3];
               }
            }
            if (iy > ny_part[q]) 
            {
               diag_j[cnt] = row_index-nx_local;
               diag_data[cnt++] = value[2];
            }
            else
            {
               if (iy) 
               {
                  offd_j[o_cnt] = hypre_map(ix,iy-1,iz,p,q-1,r,P,Q,R,
                                      nx_part,ny_part,nz_part,global_part);
                  offd_data[o_cnt++] = value[2];
               }
            }
            if (ix > nx_part[p]) 
            {
               diag_j[cnt] = row_index-1;
               diag_data[cnt++] = value[1];
            }
            else
            {
               if (ix) 
               {
                  offd_j[o_cnt] = hypre_map(ix-1,iy,iz,p-1,q,r,P,Q,R,
                                      nx_part,ny_part,nz_part,global_part);
                  offd_data[o_cnt++] = value[1];
               }
            }
            if (ix+1 < nx_part[p+1]) 
            {
               diag_j[cnt] = row_index+1;
               diag_data[cnt++] = value[4];
            }
            else
            {
               if (ix+1 < nx) 
               {
                  offd_j[o_cnt] = hypre_map(ix+1,iy,iz,p+1,q,r,P,Q,R,
                                      nx_part,ny_part,nz_part,global_part);
                  offd_data[o_cnt++] = value[4];
               }
            }
            if (iy+1 < ny_part[q+1]) 
            {
               diag_j[cnt] = row_index+nx_local;
               diag_data[cnt++] = value[5];
            }
            else
            {
               if (iy+1 < ny) 
               {
                  offd_j[o_cnt] = hypre_map(ix,iy+1,iz,p,q+1,r,P,Q,R,
                                      nx_part,ny_part,nz_part,global_part);
                  offd_data[o_cnt++] = value[5];
               }
            }
            if (iz+1 < nz_part[r+1]) 
            {
               diag_j[cnt] = row_index+nx_local*ny_local;
               diag_data[cnt++] = value[6];
            }
            else
            {
               if (iz+1 < nz) 
               {
                  offd_j[o_cnt] = hypre_map(ix,iy,iz+1,p,q,r+1,P,Q,R,
                                      nx_part,ny_part,nz_part,global_part);
                  offd_data[o_cnt++] = value[6];
               }
            }
            row_index++;
         }
      }
   }

   if (num_procs > 1)
   {
      for (i=0; i < num_cols_offd; i++)
         col_map_offd[i] = offd_j[i];
   	
      qsort0(col_map_offd, 0, num_cols_offd-1);

      for (i=0; i < num_cols_offd; i++)
         for (j=0; j < num_cols_offd; j++)
            if (offd_j[i] == col_map_offd[j])
            {
               offd_j[i] = j;
               break;
            }
   }

   A = hypre_ParCSRMatrixCreate(comm, grid_size, grid_size,
                                global_part, global_part, num_cols_offd,
                                diag_i[local_num_rows],
                                offd_i[local_num_rows]);

   hypre_ParCSRMatrixColMapOffd(A) = col_map_offd;

   diag = hypre_ParCSRMatrixDiag(A);
   hypre_CSRMatrixI(diag) = diag_i;
   hypre_CSRMatrixJ(diag) = diag_j;
   hypre_CSRMatrixData(diag) = diag_data;

   offd = hypre_ParCSRMatrixOffd(A);
   hypre_CSRMatrixI(offd) = offd_i;
   if (num_cols_offd)
   {
      hypre_CSRMatrixJ(offd) = offd_j;
      hypre_CSRMatrixData(offd) = offd_data;
   }

   hypre_TFree(nx_part);
   hypre_TFree(ny_part);
   hypre_TFree(nz_part);

   return (HYPRE_ParCSRMatrix) A;
}