Example #1
/* Based on hypre_CSRMatrixMatvec in hypre's csr_matvec.c */
void hypre_CSRMatrixBooleanMatvec(hypre_CSRMatrix *A,
                                  HYPRE_Bool alpha,
                                  HYPRE_Bool *x,
                                  HYPRE_Bool beta,
                                  HYPRE_Bool *y)
    /* HYPRE_Complex    *A_data   = hypre_CSRMatrixData(A); */
    HYPRE_Int        *A_i      = hypre_CSRMatrixI(A);
    HYPRE_Int        *A_j      = hypre_CSRMatrixJ(A);
    HYPRE_Int         num_rows = hypre_CSRMatrixNumRows(A);

    HYPRE_Int        *A_rownnz = hypre_CSRMatrixRownnz(A);
    HYPRE_Int         num_rownnz = hypre_CSRMatrixNumRownnz(A);

    HYPRE_Bool       *x_data = x;
    HYPRE_Bool       *y_data = y;

    HYPRE_Bool        temp, tempx;

    HYPRE_Int         i, jj;

    HYPRE_Int         m;

    HYPRE_Real        xpar=0.7;

     * Do (alpha == 0.0) computation - RDF: USE MACHINE EPS

    if (alpha == 0)
        #pragma omp parallel for private(i) HYPRE_SMP_SCHEDULE
        for (i = 0; i < num_rows; i++)
            y_data[i] = y_data[i] && beta;

     * y = (beta/alpha)*y

    if (beta == 0)
        #pragma omp parallel for private(i) HYPRE_SMP_SCHEDULE
        for (i = 0; i < num_rows; i++)
            y_data[i] = 0;
        /* beta is true -> no change to y_data */

     * y += A*x

    /* use rownnz pointer to do the A*x multiplication  when num_rownnz is smaller than num_rows */

    if (num_rownnz < xpar*(num_rows))
        #pragma omp parallel for private(i,jj,m,tempx) HYPRE_SMP_SCHEDULE
        for (i = 0; i < num_rownnz; i++)
            m = A_rownnz[i];

            tempx = 0;
            for (jj = A_i[m]; jj < A_i[m+1]; jj++)
                /* tempx = tempx || ((A_data[jj] != 0.0) && x_data[A_j[jj]]); */
                tempx = tempx || x_data[A_j[jj]];
            y_data[m] = y_data[m] || tempx;
        #pragma omp parallel for private(i,jj,temp) HYPRE_SMP_SCHEDULE
        for (i = 0; i < num_rows; i++)
            temp = 0;
            for (jj = A_i[i]; jj < A_i[i+1]; jj++)
                /* temp = temp || ((A_data[jj] != 0.0) && x_data[A_j[jj]]); */
                temp = temp || x_data[A_j[jj]];
            y_data[i] = y_data[i] || temp;

     * y = alpha*y
    /* alpha is true */
Example #2
hypre_CSRMatrixMatvec( double           alpha,
              hypre_CSRMatrix *A,
              hypre_Vector    *x,
              double           beta,
              hypre_Vector    *y     )
   double     *A_data   = hypre_CSRMatrixData(A);
   int        *A_i      = hypre_CSRMatrixI(A);
   int        *A_j      = hypre_CSRMatrixJ(A);
   int         num_rows = hypre_CSRMatrixNumRows(A);
   int         num_cols = hypre_CSRMatrixNumCols(A);

   int        *A_rownnz = hypre_CSRMatrixRownnz(A);
   int         num_rownnz = hypre_CSRMatrixNumRownnz(A);

   double     *x_data = hypre_VectorData(x);
   double     *y_data = hypre_VectorData(y);
   int         x_size = hypre_VectorSize(x);
   int         y_size = hypre_VectorSize(y);
   int         num_vectors = hypre_VectorNumVectors(x);
   int         idxstride_y = hypre_VectorIndexStride(y);
   int         vecstride_y = hypre_VectorVectorStride(y);
   int         idxstride_x = hypre_VectorIndexStride(x);
   int         vecstride_x = hypre_VectorVectorStride(x);

   double      temp, tempx;

   int         i, j, jj;

   int         m;

   double     xpar=0.7;

   int         ierr = 0;

    *  Check for size compatibility.  Matvec returns ierr = 1 if
    *  length of X doesn't equal the number of columns of A,
    *  ierr = 2 if the length of Y doesn't equal the number of rows
    *  of A, and ierr = 3 if both are true.
    *  Because temporary vectors are often used in Matvec, none of 
    *  these conditions terminates processing, and the ierr flag
    *  is informational only.
    hypre_assert( num_vectors == hypre_VectorNumVectors(y) );

    if (num_cols != x_size)
              ierr = 1;

    if (num_rows != y_size)
              ierr = 2;

    if (num_cols != x_size && num_rows != y_size)
              ierr = 3;

    * Do (alpha == 0.0) computation - RDF: USE MACHINE EPS

    if (alpha == 0.0)
       for (i = 0; i < num_rows*num_vectors; i++)
          y_data[i] *= beta;

       return ierr;

    * y = (beta/alpha)*y
   temp = beta / alpha;

   if (temp != 1.0)
      if (temp == 0.0)
	 for (i = 0; i < num_rows*num_vectors; i++)
	    y_data[i] = 0.0;
	 for (i = 0; i < num_rows*num_vectors; i++)
	    y_data[i] *= temp;

    * y += A*x

/* use rownnz pointer to do the A*x multiplication  when num_rownnz is smaller than num_rows */

   if (num_rownnz < xpar*(num_rows))
      for (i = 0; i < num_rownnz; i++)
         m = A_rownnz[i];

          * for (jj = A_i[m]; jj < A_i[m+1]; jj++)
          * {
          *         j = A_j[jj];   
          *  y_data[m] += A_data[jj] * x_data[j];
          * } */
         if ( num_vectors==1 )
            tempx = y_data[m];
            for (jj = A_i[m]; jj < A_i[m+1]; jj++) 
               tempx +=  A_data[jj] * x_data[A_j[jj]];
            y_data[m] = tempx;
            for ( j=0; j<num_vectors; ++j )
               tempx = y_data[ j*vecstride_y + m*idxstride_y ];
               for (jj = A_i[m]; jj < A_i[m+1]; jj++) 
                  tempx +=  A_data[jj] * x_data[ j*vecstride_x + A_j[jj]*idxstride_x ];
               y_data[ j*vecstride_y + m*idxstride_y] = tempx;

#pragma omp parallel for private(i,jj,temp) schedule(static)
      for (i = 0; i < num_rows; i++)
         if ( num_vectors==1 )
            temp = y_data[i];
            for (jj = A_i[i]; jj < A_i[i+1]; jj++)
               temp += A_data[jj] * x_data[A_j[jj]];
            y_data[i] = temp;
            for ( j=0; j<num_vectors; ++j )
               temp = y_data[ j*vecstride_y + i*idxstride_y ];
               for (jj = A_i[i]; jj < A_i[i+1]; jj++)
                  temp += A_data[jj] * x_data[ j*vecstride_x + A_j[jj]*idxstride_x ];
               y_data[ j*vecstride_y + i*idxstride_y ] = temp;

    * y = alpha*y

   if (alpha != 1.0)
      for (i = 0; i < num_rows*num_vectors; i++)
	 y_data[i] *= alpha;

   return ierr;
Example #3
/* y[offset:end] = alpha*A[offset:end,:]*x + beta*b[offset:end] */
hypre_CSRMatrixMatvecOutOfPlace( HYPRE_Complex    alpha,
                                 hypre_CSRMatrix *A,
                                 hypre_Vector    *x,
                                 HYPRE_Complex    beta,
                                 hypre_Vector    *b,
                                 hypre_Vector    *y,
                                 HYPRE_Int        offset     )
   HYPRE_Real time_begin = hypre_MPI_Wtime();

   HYPRE_Complex    *A_data   = hypre_CSRMatrixData(A);
   HYPRE_Int        *A_i      = hypre_CSRMatrixI(A) + offset;
   HYPRE_Int        *A_j      = hypre_CSRMatrixJ(A);
   HYPRE_Int         num_rows = hypre_CSRMatrixNumRows(A) - offset;
   HYPRE_Int         num_cols = hypre_CSRMatrixNumCols(A);
   /*HYPRE_Int         num_nnz  = hypre_CSRMatrixNumNonzeros(A);*/

   HYPRE_Int        *A_rownnz = hypre_CSRMatrixRownnz(A);
   HYPRE_Int         num_rownnz = hypre_CSRMatrixNumRownnz(A);

   HYPRE_Complex    *x_data = hypre_VectorData(x);
   HYPRE_Complex    *b_data = hypre_VectorData(b) + offset;
   HYPRE_Complex    *y_data = hypre_VectorData(y);
   HYPRE_Int         x_size = hypre_VectorSize(x);
   HYPRE_Int         b_size = hypre_VectorSize(b) - offset;
   HYPRE_Int         y_size = hypre_VectorSize(y) - offset;
   HYPRE_Int         num_vectors = hypre_VectorNumVectors(x);
   HYPRE_Int         idxstride_y = hypre_VectorIndexStride(y);
   HYPRE_Int         vecstride_y = hypre_VectorVectorStride(y);
   /*HYPRE_Int         idxstride_b = hypre_VectorIndexStride(b);
   HYPRE_Int         vecstride_b = hypre_VectorVectorStride(b);*/
   HYPRE_Int         idxstride_x = hypre_VectorIndexStride(x);
   HYPRE_Int         vecstride_x = hypre_VectorVectorStride(x);

   HYPRE_Complex     temp, tempx;

   HYPRE_Int         i, j, jj;

   HYPRE_Int         m;

   HYPRE_Real        xpar=0.7;

   HYPRE_Int         ierr = 0;
   hypre_Vector	    *x_tmp = NULL;

    *  Check for size compatibility.  Matvec returns ierr = 1 if
    *  length of X doesn't equal the number of columns of A,
    *  ierr = 2 if the length of Y doesn't equal the number of rows
    *  of A, and ierr = 3 if both are true.
    *  Because temporary vectors are often used in Matvec, none of 
    *  these conditions terminates processing, and the ierr flag
    *  is informational only.
   hypre_assert( num_vectors == hypre_VectorNumVectors(y) );
   hypre_assert( num_vectors == hypre_VectorNumVectors(b) );

   if (num_cols != x_size)
      ierr = 1;

   if (num_rows != y_size || num_rows != b_size)
      ierr = 2;

   if (num_cols != x_size && (num_rows != y_size || num_rows != b_size))
      ierr = 3;

    * Do (alpha == 0.0) computation - RDF: USE MACHINE EPS

   if (alpha == 0.0)
#pragma omp parallel for private(i) HYPRE_SMP_SCHEDULE
      for (i = 0; i < num_rows*num_vectors; i++)
         y_data[i] *= beta;

      hypre_profile_times[HYPRE_TIMER_ID_MATVEC] += hypre_MPI_Wtime() - time_begin;

      return ierr;

   if (x == y)
      x_tmp = hypre_SeqVectorCloneDeep(x);
      x_data = hypre_VectorData(x_tmp);

    * y = (beta/alpha)*y
   temp = beta / alpha;
/* use rownnz pointer to do the A*x multiplication  when num_rownnz is smaller than num_rows */

   if (num_rownnz < xpar*(num_rows) || num_vectors > 1)
       * y = (beta/alpha)*y
      if (temp != 1.0)
         if (temp == 0.0)
#pragma omp parallel for private(i) HYPRE_SMP_SCHEDULE
            for (i = 0; i < num_rows*num_vectors; i++)
               y_data[i] = 0.0;
#pragma omp parallel for private(i) HYPRE_SMP_SCHEDULE
            for (i = 0; i < num_rows*num_vectors; i++)
               y_data[i] = b_data[i]*temp;

       * y += A*x

      if (num_rownnz < xpar*(num_rows))
#pragma omp parallel for private(i,j,jj,m,tempx) HYPRE_SMP_SCHEDULE

         for (i = 0; i < num_rownnz; i++)
            m = A_rownnz[i];

             * for (jj = A_i[m]; jj < A_i[m+1]; jj++)
             * {
             *         j = A_j[jj];
             *  y_data[m] += A_data[jj] * x_data[j];
             * } */
            if ( num_vectors==1 )
               tempx = 0;
               for (jj = A_i[m]; jj < A_i[m+1]; jj++)
                  tempx +=  A_data[jj] * x_data[A_j[jj]];
               y_data[m] += tempx;
               for ( j=0; j<num_vectors; ++j )
                  tempx = 0;
                  for (jj = A_i[m]; jj < A_i[m+1]; jj++) 
                     tempx +=  A_data[jj] * x_data[ j*vecstride_x + A_j[jj]*idxstride_x ];
                  y_data[ j*vecstride_y + m*idxstride_y] += tempx;
      else // num_vectors > 1
#pragma omp parallel for private(i,j,jj,tempx) HYPRE_SMP_SCHEDULE
         for (i = 0; i < num_rows; i++)
            for (j = 0; j < num_vectors; ++j)
               tempx = 0;
               for (jj = A_i[i]; jj < A_i[i+1]; jj++)
                  tempx += A_data[jj] * x_data[ j*vecstride_x + A_j[jj]*idxstride_x ];
               y_data[ j*vecstride_y + i*idxstride_y ] += tempx;

       * y = alpha*y

      if (alpha != 1.0)
#pragma omp parallel for private(i) HYPRE_SMP_SCHEDULE
         for (i = 0; i < num_rows*num_vectors; i++)
            y_data[i] *= alpha;
   { // JSP: this is currently the only path optimized
#pragma omp parallel private(i,jj,tempx)
      HYPRE_Int iBegin = hypre_CSRMatrixGetLoadBalancedPartitionBegin(A);
      HYPRE_Int iEnd = hypre_CSRMatrixGetLoadBalancedPartitionEnd(A);
      hypre_assert(iBegin <= iEnd);
      hypre_assert(iBegin >= 0 && iBegin <= num_rows);
      hypre_assert(iEnd >= 0 && iEnd <= num_rows);

      if (0 == temp)
         if (1 == alpha) // JSP: a common path
            for (i = iBegin; i < iEnd; i++)
               tempx = 0.0;
               for (jj = A_i[i]; jj < A_i[i+1]; jj++)
                  tempx += A_data[jj] * x_data[A_j[jj]];
               y_data[i] = tempx;
         } // y = A*x
         else if (-1 == alpha)
            for (i = iBegin; i < iEnd; i++)
               tempx = 0.0;
               for (jj = A_i[i]; jj < A_i[i+1]; jj++)
                  tempx -= A_data[jj] * x_data[A_j[jj]];
               y_data[i] = tempx;
         } // y = -A*x
            for (i = iBegin; i < iEnd; i++)
               tempx = 0.0;
               for (jj = A_i[i]; jj < A_i[i+1]; jj++)
                  tempx += A_data[jj] * x_data[A_j[jj]];
               y_data[i] = alpha*tempx;
         } // y = alpha*A*x
      } // temp == 0
      else if (-1 == temp) // beta == -alpha
         if (1 == alpha) // JSP: a common path
            for (i = iBegin; i < iEnd; i++)
               tempx = -b_data[i];
               for (jj = A_i[i]; jj < A_i[i+1]; jj++)
                  tempx += A_data[jj] * x_data[A_j[jj]];
               y_data[i] = tempx;
         } // y = A*x - y
         else if (-1 == alpha) // JSP: a common path
            for (i = iBegin; i < iEnd; i++)
               tempx = b_data[i];
               for (jj = A_i[i]; jj < A_i[i+1]; jj++)
                  tempx -= A_data[jj] * x_data[A_j[jj]];
               y_data[i] = tempx;
         } // y = -A*x + y
            for (i = iBegin; i < iEnd; i++)
               tempx = -b_data[i];
               for (jj = A_i[i]; jj < A_i[i+1]; jj++)
                  tempx += A_data[jj] * x_data[A_j[jj]];
               y_data[i] = alpha*tempx;
         } // y = alpha*(A*x - y)
      } // temp == -1
      else if (1 == temp)
         if (1 == alpha) // JSP: a common path
            for (i = iBegin; i < iEnd; i++)
               tempx = b_data[i];
               for (jj = A_i[i]; jj < A_i[i+1]; jj++)
                  tempx += A_data[jj] * x_data[A_j[jj]];
               y_data[i] = tempx;
         } // y = A*x + y
         else if (-1 == alpha)
            for (i = iBegin; i < iEnd; i++)
               tempx = -b_data[i];
               for (jj = A_i[i]; jj < A_i[i+1]; jj++)
                  tempx -= A_data[jj] * x_data[A_j[jj]];
               y_data[i] = tempx;
         } // y = -A*x - y
            for (i = iBegin; i < iEnd; i++)
               tempx = b_data[i];
               for (jj = A_i[i]; jj < A_i[i+1]; jj++)
                  tempx += A_data[jj] * x_data[A_j[jj]];
               y_data[i] = alpha*tempx;
         } // y = alpha*(A*x + y)
         if (1 == alpha) // JSP: a common path
            for (i = iBegin; i < iEnd; i++)
               tempx = b_data[i]*temp;
               for (jj = A_i[i]; jj < A_i[i+1]; jj++)
                  tempx += A_data[jj] * x_data[A_j[jj]];
               y_data[i] = tempx;
         } // y = A*x + temp*y
         else if (-1 == alpha)
            for (i = iBegin; i < iEnd; i++)
               tempx = -b_data[i]*temp;
               for (jj = A_i[i]; jj < A_i[i+1]; jj++)
                  tempx -= A_data[jj] * x_data[A_j[jj]];
               y_data[i] = tempx;
         } // y = -A*x - temp*y
            for (i = iBegin; i < iEnd; i++)
               tempx = b_data[i]*temp;
               for (jj = A_i[i]; jj < A_i[i+1]; jj++)
                  tempx += A_data[jj] * x_data[A_j[jj]];
               y_data[i] = alpha*tempx;
         } // y = alpha*(A*x + temp*y)
      } // temp != 0 && temp != -1 && temp != 1
      } // omp parallel

   if (x == y) hypre_SeqVectorDestroy(x_tmp);

   hypre_profile_times[HYPRE_TIMER_ID_MATVEC] += hypre_MPI_Wtime() - time_begin;
   return ierr;