Beispiel #1
0
void hypre_sort_and_create_inverse_map(
  HYPRE_Int *in, HYPRE_Int len, HYPRE_Int **out, hypre_UnorderedIntMap *inverse_map)
{
   if (len == 0)
   {
      return;
   }

#ifdef HYPRE_PROFILE
   hypre_profile_times[HYPRE_TIMER_ID_MERGE] -= hypre_MPI_Wtime();
#endif

   HYPRE_Int *temp = hypre_TAlloc(HYPRE_Int, len);
   hypre_merge_sort(in, temp, len, out);
   hypre_UnorderedIntMapCreate(inverse_map, 2*len, 16*hypre_NumThreads());
   HYPRE_Int i;
#pragma omp parallel for HYPRE_SMP_SCHEDULE
   for (i = 0; i < len; i++)
   {
      HYPRE_Int old = hypre_UnorderedIntMapPutIfAbsent(inverse_map, (*out)[i], i);
      assert(old == HYPRE_HOPSCOTCH_HASH_EMPTY);
#ifdef DBG_MERGE_SORT
      if (hypre_UnorderedIntMapGet(inverse_map, (*out)[i]) != i)
      {
         fprintf(stderr, "%d %d\n", i, (*out)[i]);
         assert(false);
      }
#endif
   }

#ifdef DBG_MERGE_SORT
  std::unordered_map<HYPRE_Int, HYPRE_Int> inverse_map2(len);
  for (HYPRE_Int i = 0; i < len; ++i) {
    inverse_map2[(*out)[i]] = i;
    if (hypre_UnorderedIntMapGet(inverse_map, (*out)[i]) != i)
    {
      fprintf(stderr, "%d %d\n", i, (*out)[i]);
      assert(false);
    }
  }
  assert(hypre_UnorderedIntMapSize(inverse_map) == len);
#endif

   if (*out == in)
   {
      hypre_TFree(temp);
   }
   else
   {
      hypre_TFree(in);
   }

#ifdef HYPRE_PROFILE
   hypre_profile_times[HYPRE_TIMER_ID_MERGE] += hypre_MPI_Wtime();
#endif
}
Beispiel #2
0
HYPRE_Real time_getWallclockSeconds(void)
{
#ifdef TIMER_USE_MPI
   return(hypre_MPI_Wtime());
#else
#ifdef WIN32
   clock_t cl=clock();
   return(((HYPRE_Real) cl)/((HYPRE_Real) CLOCKS_PER_SEC));
#else
   struct tms usage;
   hypre_longint wallclock = times(&usage);
   return(((HYPRE_Real) wallclock)/((HYPRE_Real) sysconf(_SC_CLK_TCK)));
#endif
#endif
}
Beispiel #3
0
HYPRE_Int
main( HYPRE_Int   argc,
      char *argv[] )
{


   HYPRE_Int        num_procs, myid;
   HYPRE_Int        verbose = 0, build_matrix_type = 1;
   HYPRE_Int        index, matrix_arg_index, commpkg_flag=3;
   HYPRE_Int        i, k, ierr=0;
   HYPRE_Int        row_start, row_end; 
   HYPRE_Int        col_start, col_end, global_num_rows;
   HYPRE_Int       *row_part, *col_part; 
   char      *csrfilename;
   HYPRE_Int        preload = 0, loop = 0, loop2 = LOOP2;   
   HYPRE_Int        bcast_rows[2], *info;
   


   hypre_ParCSRMatrix    *parcsr_A, *small_A;
   HYPRE_ParCSRMatrix    A_temp, A_temp_small; 
   hypre_CSRMatrix       *A_CSR;
   hypre_ParCSRCommPkg	 *comm_pkg;   

  
   HYPRE_Int                 nx, ny, nz;
   HYPRE_Int                 P, Q, R;
   HYPRE_Int                 p, q, r;
   HYPRE_Real          values[4];

   hypre_ParVector     *x_new;
   hypre_ParVector     *y_new, *y;
   HYPRE_Int                 *row_starts;
   HYPRE_Real          ans;
   HYPRE_Real          start_time, end_time, total_time, *loop_times;
   HYPRE_Real          T_avg, T_std;
   
   HYPRE_Int                   noparmprint = 0;
 
#if mydebug   
   HYPRE_Int  j, tmp_int;
#endif

   /*-----------------------------------------------------------
    * Initialize MPI
    *-----------------------------------------------------------*/


   hypre_MPI_Init(&argc, &argv);

   hypre_MPI_Comm_size(hypre_MPI_COMM_WORLD, &num_procs );
   hypre_MPI_Comm_rank(hypre_MPI_COMM_WORLD, &myid );



   /*-----------------------------------------------------------
    * default - is 27pt laplace
    *-----------------------------------------------------------*/

    
   build_matrix_type = 2;
   matrix_arg_index = argc;

   /*-----------------------------------------------------------
    * Parse command line
    *-----------------------------------------------------------*/
 
   index = 1;
   while ( index < argc) 
   {
      if  ( strcmp(argv[index], "-verbose") == 0 )
      {
         index++;  
         verbose = 1;
      }
      else if ( strcmp(argv[index], "-fromonecsrfile") == 0 )
      {
         index++;
         build_matrix_type      = 1;      
         matrix_arg_index = index; /*this tells where the name is*/
      }
      else if  ( strcmp(argv[index], "-commpkg") == 0 )
      {
         index++;  
         commpkg_flag = atoi(argv[index++]);
      }
      else if ( strcmp(argv[index], "-laplacian") == 0 )
      {
         index++;
         build_matrix_type      = 2;
         matrix_arg_index = index;
      }
      else if ( strcmp(argv[index], "-27pt") == 0 )
      {
         index++;
         build_matrix_type      = 4;
         matrix_arg_index = index;
      }
/*
      else if  ( strcmp(argv[index], "-nopreload") == 0 )
      {
         index++;  
         preload = 0;
      }
*/
      else if  ( strcmp(argv[index], "-loop") == 0 )
      {
         index++;  
         loop = atoi(argv[index++]);
      }
      else if  ( strcmp(argv[index], "-noparmprint") == 0 )
      {
         index++;  
         noparmprint = 1;
         
      }
      else  
      {
	 index++;
         /*hypre_printf("Warning: Unrecogized option '%s'\n",argv[index++] );*/
      }
   }
   
   
  
   /*-----------------------------------------------------------
    * Setup the Matrix problem   
    *-----------------------------------------------------------*/

  /*-----------------------------------------------------------
    *  Get actual partitioning- 
    *  read in an actual csr matrix.
    *-----------------------------------------------------------*/


   if (build_matrix_type ==1) /*read in a csr matrix from one file */
   {
      if (matrix_arg_index < argc)
      {
	 csrfilename = argv[matrix_arg_index];
      }
      else
      {
         hypre_printf("Error: No filename specified \n");
         exit(1);
      }
      if (myid == 0)
      {
	/*hypre_printf("  FromFile: %s\n", csrfilename);*/
         A_CSR = hypre_CSRMatrixRead(csrfilename);
      }
      row_part = NULL;
      col_part = NULL;

      parcsr_A = hypre_CSRMatrixToParCSRMatrix(hypre_MPI_COMM_WORLD, A_CSR, 
					       row_part, col_part);

      if (myid == 0) hypre_CSRMatrixDestroy(A_CSR);
   }
   else if (build_matrix_type ==2)
   {
      
      myBuildParLaplacian(argc, argv, matrix_arg_index,  &A_temp, !noparmprint);
     parcsr_A = (hypre_ParCSRMatrix *) A_temp;      
 
   }
   else if (build_matrix_type ==4)
   {
      myBuildParLaplacian27pt(argc, argv, matrix_arg_index, &A_temp, !noparmprint);
     parcsr_A = (hypre_ParCSRMatrix *) A_temp;
   }

 
  /*-----------------------------------------------------------
   * create a small problem so that timings are more accurate - 
   * code gets run twice (small laplace)
   *-----------------------------------------------------------*/

   /*this is no longer being used - preload = 0 is set at the beginning */

   if (preload == 1) 
   {
 
      /*hypre_printf("preload!\n");*/
      
        
       values[1] = -1;
       values[2] = -1;
       values[3] = -1;
       values[0] = - 6.0    ;

       nx = 2;
       ny = num_procs;
       nz = 2;

       P  = 1;
       Q  = num_procs;
       R  = 1;

       p = myid % P;
       q = (( myid - p)/P) % Q;
       r = ( myid - p - P*q)/( P*Q );
       
      A_temp_small = (HYPRE_ParCSRMatrix) GenerateLaplacian(hypre_MPI_COMM_WORLD, nx, ny, nz, 
				      P, Q, R, p, q, r, values);
      small_A = (hypre_ParCSRMatrix *) A_temp_small;     

      /*do comm packages*/
      hypre_NewCommPkgCreate(small_A);
      hypre_NewCommPkgDestroy(small_A); 

      hypre_MatvecCommPkgCreate(small_A);
      hypre_ParCSRMatrixDestroy(small_A); 
  
   }





   /*-----------------------------------------------------------
    *  Prepare for timing
    *-----------------------------------------------------------*/

   /* instead of preloading, let's not time the first one if more than one*/

    
   if (!loop)
   {
      loop = 1;
      /* and don't do any timings */
      
   }
   else
   {
      
      loop +=1;
      if (loop < 2) loop = 2;
   }
      
   
   loop_times = hypre_CTAlloc(HYPRE_Real, loop);
   


/******************************************************************************************/   

   hypre_MPI_Barrier(hypre_MPI_COMM_WORLD);

   if (commpkg_flag == 1 || commpkg_flag ==3 )
   {
  
      /*-----------------------------------------------------------
       *  Create new comm package
       *-----------------------------------------------------------*/


    
      if (!myid) hypre_printf("********************************************************\n" );  
 
      /*do loop times*/
      for (i=0; i< loop; i++) 
      {
         loop_times[i] = 0.0;
         for (k=0; k< loop2; k++) 
         {
         
            hypre_MPI_Barrier(hypre_MPI_COMM_WORLD);
            
            start_time = hypre_MPI_Wtime();

#if mpip_on
            if (i==(loop-1)) hypre_MPI_Pcontrol(1); 
#endif
     
            hypre_NewCommPkgCreate(parcsr_A);

#if mpip_on
            if (i==(loop-1)) hypre_MPI_Pcontrol(0); 
#endif  
  
            end_time = hypre_MPI_Wtime();
            
            end_time = end_time - start_time;
        
            hypre_MPI_Allreduce(&end_time, &total_time, 1,
                       HYPRE_MPI_REAL, hypre_MPI_MAX, hypre_MPI_COMM_WORLD);
         
            loop_times[i] += total_time;

            if (  !((i+1)== loop  &&  (k+1) == loop2)) hypre_NewCommPkgDestroy(parcsr_A); 
            
         }/*end of loop2 */
      
        
      } /*end of loop*/
      


      /* calculate the avg and std. */
      if (loop > 1)
      {
         
         /* calculate the avg and std. */
         stats_mo(loop_times, loop, &T_avg, &T_std);
      
         if (!myid) hypre_printf(" NewCommPkgCreate:  AVG. wall clock time =  %f seconds\n", T_avg);  
         if (!myid) hypre_printf("                    STD. for %d  runs     =  %f\n", loop-1, T_std);  
         if (!myid) hypre_printf("                    (Note: avg./std. timings exclude run 0.)\n");
         if (!myid) hypre_printf("********************************************************\n" );  
         for (i=0; i< loop; i++) 
         {
            if (!myid) hypre_printf("      run %d  =  %f sec.\n", i, loop_times[i]);  
         }
         if (!myid) hypre_printf("********************************************************\n" );  
   
       }
       else 
       {
         if (!myid) hypre_printf("********************************************************\n" );  
         if (!myid) hypre_printf(" NewCommPkgCreate:\n");  
         if (!myid) hypre_printf("      run time =  %f sec.\n", loop_times[0]);  
         if (!myid) hypre_printf("********************************************************\n" );  
       }


     /*-----------------------------------------------------------
       *  Verbose printing
       *-----------------------------------------------------------*/

      /*some verification*/

       global_num_rows = hypre_ParCSRMatrixGlobalNumRows(parcsr_A); 

       if (verbose) 
       {

	  ierr = hypre_ParCSRMatrixGetLocalRange( parcsr_A,
                                      &row_start, &row_end ,
                                       &col_start, &col_end );


	  comm_pkg = hypre_ParCSRMatrixCommPkg(parcsr_A);
     
          hypre_printf("myid = %i, my ACTUAL local range: [%i, %i]\n", myid, 
		 row_start, row_end);
	  
	
	  ierr = hypre_GetAssumedPartitionRowRange( myid, global_num_rows, &row_start, 
					      &row_end);


	  hypre_printf("myid = %i, my assumed local range: [%i, %i]\n", myid, 
		 row_start, row_end);

          hypre_printf("myid = %d, num_recvs = %d\n", myid, 
		 hypre_ParCSRCommPkgNumRecvs(comm_pkg)  );  

#if mydebug   
	  for (i=0; i < hypre_ParCSRCommPkgNumRecvs(comm_pkg); i++) 
	  {
              hypre_printf("myid = %d, recv proc = %d, vec_starts = [%d : %d]\n", 
		     myid,  hypre_ParCSRCommPkgRecvProcs(comm_pkg)[i], 
		     hypre_ParCSRCommPkgRecvVecStarts(comm_pkg)[i],
		     hypre_ParCSRCommPkgRecvVecStarts(comm_pkg)[i+1]-1);
	   }
#endif 
	  hypre_printf("myid = %d, num_sends = %d\n", myid, 
		 hypre_ParCSRCommPkgNumSends(comm_pkg)  );  

#if mydebug
	  for (i=0; i <hypre_ParCSRCommPkgNumSends(comm_pkg) ; i++) 
          {
	    tmp_int =  hypre_ParCSRCommPkgSendMapStarts(comm_pkg)[i+1] -  
                     hypre_ParCSRCommPkgSendMapStarts(comm_pkg)[i];
	    index = hypre_ParCSRCommPkgSendMapStarts(comm_pkg)[i];
	    for (j=0; j< tmp_int; j++) 
	    {
	       hypre_printf("myid = %d, send proc = %d, send element = %d\n",myid,  
		      hypre_ParCSRCommPkgSendProcs(comm_pkg)[i],
		      hypre_ParCSRCommPkgSendMapElmts(comm_pkg)[index+j]); 
	     }   
	  }
#endif
       }
       /*-----------------------------------------------------------
        *  To verify correctness (if commpkg_flag = 3)
        *-----------------------------------------------------------*/

       if (commpkg_flag == 3 ) 
       {
          /*do a matvec - we are assuming a square matrix */
          row_starts = hypre_ParCSRMatrixRowStarts(parcsr_A);
   
          x_new = hypre_ParVectorCreate(hypre_MPI_COMM_WORLD, global_num_rows, row_starts);
          hypre_ParVectorSetPartitioningOwner(x_new, 0);
          hypre_ParVectorInitialize(x_new);
          hypre_ParVectorSetRandomValues(x_new, 1);    
          
          y_new = hypre_ParVectorCreate(hypre_MPI_COMM_WORLD, global_num_rows, row_starts);
          hypre_ParVectorSetPartitioningOwner(y_new, 0);
          hypre_ParVectorInitialize(y_new);
          hypre_ParVectorSetConstantValues(y_new, 0.0);
          
          /*y = 1.0*A*x+1.0*y */
          hypre_ParCSRMatrixMatvec (1.0, parcsr_A, x_new, 1.0, y_new);
       }
   
   /*-----------------------------------------------------------
    *  Clean up after MyComm
    *-----------------------------------------------------------*/


       hypre_NewCommPkgDestroy(parcsr_A); 

   }

  




/******************************************************************************************/
/******************************************************************************************/

   hypre_MPI_Barrier(hypre_MPI_COMM_WORLD);


   if (commpkg_flag > 1 )
   {

      /*-----------------------------------------------------------
       *  Set up standard comm package
       *-----------------------------------------------------------*/

      bcast_rows[0] = 23;
      bcast_rows[1] = 1789;
      
      if (!myid) hypre_printf("********************************************************\n" );  
      /*do loop times*/
      for (i=0; i< loop; i++) 
      {

         loop_times[i] = 0.0;
         for (k=0; k< loop2; k++) 
         {
            

            hypre_MPI_Barrier(hypre_MPI_COMM_WORLD);

         
            start_time = hypre_MPI_Wtime();

#if time_gather
                  
            info = hypre_CTAlloc(HYPRE_Int, num_procs);
            
            hypre_MPI_Allgather(bcast_rows, 1, HYPRE_MPI_INT, info, 1, HYPRE_MPI_INT, hypre_MPI_COMM_WORLD); 

#endif

            hypre_MatvecCommPkgCreate(parcsr_A);

            end_time = hypre_MPI_Wtime();


            end_time = end_time - start_time;
        
            hypre_MPI_Allreduce(&end_time, &total_time, 1,
                          HYPRE_MPI_REAL, hypre_MPI_MAX, hypre_MPI_COMM_WORLD);

            loop_times[i] += total_time;
         
       
         if (  !((i+1)== loop  &&  (k+1) == loop2))   hypre_MatvecCommPkgDestroy(hypre_ParCSRMatrixCommPkg(parcsr_A));
               
         }/* end of loop 2*/
         
        
      } /*end of loop*/
      
      /* calculate the avg and std. */
      if (loop > 1)
      {
         
         stats_mo(loop_times, loop, &T_avg, &T_std);      
         if (!myid) hypre_printf("Current CommPkgCreate:  AVG. wall clock time =  %f seconds\n", T_avg);  
         if (!myid) hypre_printf("                        STD. for %d  runs     =  %f\n", loop-1, T_std);  
         if (!myid) hypre_printf("                        (Note: avg./std. timings exclude run 0.)\n");
         if (!myid) hypre_printf("********************************************************\n" );  
         for (i=0; i< loop; i++) 
         {
            if (!myid) hypre_printf("      run %d  =  %f sec.\n", i, loop_times[i]);  
         }
         if (!myid) hypre_printf("********************************************************\n" );  
         
      }
      else 
      {
         if (!myid) hypre_printf("********************************************************\n" );  
         if (!myid) hypre_printf(" Current CommPkgCreate:\n");  
         if (!myid) hypre_printf("      run time =  %f sec.\n", loop_times[0]);  
         if (!myid) hypre_printf("********************************************************\n" );  
      }





      /*-----------------------------------------------------------
       * Verbose printing
       *-----------------------------------------------------------*/

      /*some verification*/

    
       if (verbose) 
       {

          ierr = hypre_ParCSRMatrixGetLocalRange( parcsr_A,
						  &row_start, &row_end ,
						  &col_start, &col_end );


          comm_pkg = hypre_ParCSRMatrixCommPkg(parcsr_A);
     
          hypre_printf("myid = %i, std - my local range: [%i, %i]\n", myid, 
		 row_start, row_end);

          ierr = hypre_ParCSRMatrixGetLocalRange( parcsr_A,
						  &row_start, &row_end ,
						  &col_start, &col_end );

          hypre_printf("myid = %d, std - num_recvs = %d\n", myid, 
		 hypre_ParCSRCommPkgNumRecvs(comm_pkg)  );  

#if mydebug   
	  for (i=0; i < hypre_ParCSRCommPkgNumRecvs(comm_pkg); i++) 
          {
              hypre_printf("myid = %d, std - recv proc = %d, vec_starts = [%d : %d]\n", 
		     myid,  hypre_ParCSRCommPkgRecvProcs(comm_pkg)[i], 
		     hypre_ParCSRCommPkgRecvVecStarts(comm_pkg)[i],
		     hypre_ParCSRCommPkgRecvVecStarts(comm_pkg)[i+1]-1);
	  }
#endif
          hypre_printf("myid = %d, std - num_sends = %d\n", myid, 
		 hypre_ParCSRCommPkgNumSends(comm_pkg));  


#if mydebug
          for (i=0; i <hypre_ParCSRCommPkgNumSends(comm_pkg) ; i++) 
          {
	     tmp_int =  hypre_ParCSRCommPkgSendMapStarts(comm_pkg)[i+1] -  
	                hypre_ParCSRCommPkgSendMapStarts(comm_pkg)[i];
	     index = hypre_ParCSRCommPkgSendMapStarts(comm_pkg)[i];
	     for (j=0; j< tmp_int; j++) 
	     {
	        hypre_printf("myid = %d, std - send proc = %d, send element = %d\n",myid,  
		       hypre_ParCSRCommPkgSendProcs(comm_pkg)[i],
		       hypre_ParCSRCommPkgSendMapElmts(comm_pkg)[index+j]); 
	     }   
	  } 
#endif
       }

       /*-----------------------------------------------------------
        * Verify correctness
        *-----------------------------------------------------------*/

 

       if (commpkg_flag == 3 ) 
       { 
          global_num_rows = hypre_ParCSRMatrixGlobalNumRows(parcsr_A); 
          row_starts = hypre_ParCSRMatrixRowStarts(parcsr_A);
 
       
          y = hypre_ParVectorCreate(hypre_MPI_COMM_WORLD, global_num_rows,row_starts);
          hypre_ParVectorSetPartitioningOwner(y, 0);
          hypre_ParVectorInitialize(y);
          hypre_ParVectorSetConstantValues(y, 0.0);

          hypre_ParCSRMatrixMatvec (1.0, parcsr_A, x_new, 1.0, y);
      
       }

   }






   /*-----------------------------------------------------------
    *  Compare matvecs for both comm packages (3)
    *-----------------------------------------------------------*/

   if (commpkg_flag == 3 ) 
   { 
     /*make sure that y and y_new are the same  - now y_new should=0*/   
     hypre_ParVectorAxpy( -1.0, y, y_new );


     hypre_ParVectorSetRandomValues(y, 1);

     ans = hypre_ParVectorInnerProd( y, y_new );
     if (!myid)
     {
        
        if ( fabs(ans) > 1e-8 ) 
        {  
           hypre_printf("!!!!! WARNING !!!!! should be zero if correct = %6.10f\n", 
                  ans); 
        } 
        else
        {
           hypre_printf("Matvecs match ( should be zero = %6.10f )\n", 
                  ans); 
        }
     }
     

   }
 

   /*-----------------------------------------------------------
    *  Clean up
    *-----------------------------------------------------------*/

    
   hypre_ParCSRMatrixDestroy(parcsr_A); /*this calls the standard comm 
                                          package destroy - but we'll destroy 
                                          ours separately until it is
                                          incorporated */

  if (commpkg_flag == 3 ) 
  { 

      hypre_ParVectorDestroy(x_new);
      hypre_ParVectorDestroy(y);
      hypre_ParVectorDestroy(y_new);
  }




   hypre_MPI_Finalize();

   return(ierr);


}
Beispiel #4
0
void hypre_merge_sort(HYPRE_Int *in, HYPRE_Int *temp, HYPRE_Int len, HYPRE_Int **out)
{
   if (0 == len) return;

#ifdef HYPRE_PROFILE
   hypre_profile_times[HYPRE_TIMER_ID_MERGE] -= hypre_MPI_Wtime();
#endif

#ifdef DBG_MERGE_SORT
   HYPRE_Int *dbg_buf = new HYPRE_Int[len];
   std::copy(in, in + len, dbg_buf);
   std::sort(dbg_buf, dbg_buf + len);
#endif

   // HYPRE_Int thread_private_len[hypre_NumThreads()];
   // HYPRE_Int out_len = 0;

#ifdef HYPRE_USING_OPENMP
#pragma omp parallel
#endif
   {
      HYPRE_Int num_threads = hypre_NumActiveThreads();
      HYPRE_Int my_thread_num = hypre_GetThreadNum();

      // thread-private sort
      HYPRE_Int i_per_thread = (len + num_threads - 1)/num_threads;
      HYPRE_Int i_begin = hypre_min(i_per_thread*my_thread_num, len);
      HYPRE_Int i_end = hypre_min(i_begin + i_per_thread, len);

      hypre_qsort0(in, i_begin, i_end - 1);

      // merge sorted sequences
      HYPRE_Int in_group_size;
      HYPRE_Int *in_buf = in;
      HYPRE_Int *out_buf = temp;
      for (in_group_size = 1; in_group_size < num_threads; in_group_size *= 2)
      {
#ifdef HYPRE_USING_OPENMP
#pragma omp barrier
#endif

         // merge 2 in-groups into 1 out-group
         HYPRE_Int out_group_size = in_group_size*2;
         HYPRE_Int group_leader = my_thread_num/out_group_size*out_group_size;
         // HYPRE_Int group_sub_leader = hypre_min(group_leader + in_group_size, num_threads - 1);
         HYPRE_Int id_in_group = my_thread_num%out_group_size;
         HYPRE_Int num_threads_in_group =
            hypre_min(group_leader + out_group_size, num_threads) - group_leader;

         HYPRE_Int in_group1_begin = hypre_min(i_per_thread*group_leader, len);
         HYPRE_Int in_group1_end = hypre_min(in_group1_begin + i_per_thread*in_group_size, len);

         HYPRE_Int in_group2_begin = hypre_min(in_group1_begin + i_per_thread*in_group_size, len);
         HYPRE_Int in_group2_end = hypre_min(in_group2_begin + i_per_thread*in_group_size, len);

         hypre_parallel_merge(
            in_buf + in_group1_begin, in_buf + in_group1_end,
            in_buf + in_group2_begin, in_buf + in_group2_end,
            out_buf + in_group1_begin,
            num_threads_in_group,
            id_in_group);

         HYPRE_Int *temp = in_buf;
         in_buf = out_buf;
         out_buf = temp;
      }

      *out = in_buf;
   } /* omp parallel */

#ifdef DBG_MERGE_SORT
   assert(std::equal(*out, *out + len, dbg_buf));

   delete[] dbg_buf;
#endif

#ifdef HYPRE_PROFILE
   hypre_profile_times[HYPRE_TIMER_ID_MERGE] += hypre_MPI_Wtime();
#endif
}
Beispiel #5
0
/* y[offset:end] = alpha*A[offset:end,:]*x + beta*b[offset:end] */
HYPRE_Int
hypre_CSRMatrixMatvecOutOfPlace( HYPRE_Complex    alpha,
                                 hypre_CSRMatrix *A,
                                 hypre_Vector    *x,
                                 HYPRE_Complex    beta,
                                 hypre_Vector    *b,
                                 hypre_Vector    *y,
                                 HYPRE_Int        offset     )
{
#ifdef HYPRE_PROFILE
   HYPRE_Real time_begin = hypre_MPI_Wtime();
#endif

   HYPRE_Complex    *A_data   = hypre_CSRMatrixData(A);
   HYPRE_Int        *A_i      = hypre_CSRMatrixI(A) + offset;
   HYPRE_Int        *A_j      = hypre_CSRMatrixJ(A);
   HYPRE_Int         num_rows = hypre_CSRMatrixNumRows(A) - offset;
   HYPRE_Int         num_cols = hypre_CSRMatrixNumCols(A);
   /*HYPRE_Int         num_nnz  = hypre_CSRMatrixNumNonzeros(A);*/

   HYPRE_Int        *A_rownnz = hypre_CSRMatrixRownnz(A);
   HYPRE_Int         num_rownnz = hypre_CSRMatrixNumRownnz(A);

   HYPRE_Complex    *x_data = hypre_VectorData(x);
   HYPRE_Complex    *b_data = hypre_VectorData(b) + offset;
   HYPRE_Complex    *y_data = hypre_VectorData(y);
   HYPRE_Int         x_size = hypre_VectorSize(x);
   HYPRE_Int         b_size = hypre_VectorSize(b) - offset;
   HYPRE_Int         y_size = hypre_VectorSize(y) - offset;
   HYPRE_Int         num_vectors = hypre_VectorNumVectors(x);
   HYPRE_Int         idxstride_y = hypre_VectorIndexStride(y);
   HYPRE_Int         vecstride_y = hypre_VectorVectorStride(y);
   /*HYPRE_Int         idxstride_b = hypre_VectorIndexStride(b);
   HYPRE_Int         vecstride_b = hypre_VectorVectorStride(b);*/
   HYPRE_Int         idxstride_x = hypre_VectorIndexStride(x);
   HYPRE_Int         vecstride_x = hypre_VectorVectorStride(x);

   HYPRE_Complex     temp, tempx;

   HYPRE_Int         i, j, jj;

   HYPRE_Int         m;

   HYPRE_Real        xpar=0.7;

   HYPRE_Int         ierr = 0;
   hypre_Vector	    *x_tmp = NULL;

   /*---------------------------------------------------------------------
    *  Check for size compatibility.  Matvec returns ierr = 1 if
    *  length of X doesn't equal the number of columns of A,
    *  ierr = 2 if the length of Y doesn't equal the number of rows
    *  of A, and ierr = 3 if both are true.
    *
    *  Because temporary vectors are often used in Matvec, none of 
    *  these conditions terminates processing, and the ierr flag
    *  is informational only.
    *--------------------------------------------------------------------*/
 
   hypre_assert( num_vectors == hypre_VectorNumVectors(y) );
   hypre_assert( num_vectors == hypre_VectorNumVectors(b) );

   if (num_cols != x_size)
      ierr = 1;

   if (num_rows != y_size || num_rows != b_size)
      ierr = 2;

   if (num_cols != x_size && (num_rows != y_size || num_rows != b_size))
      ierr = 3;

   /*-----------------------------------------------------------------------
    * Do (alpha == 0.0) computation - RDF: USE MACHINE EPS
    *-----------------------------------------------------------------------*/

   if (alpha == 0.0)
   {
#ifdef HYPRE_USING_OPENMP
#pragma omp parallel for private(i) HYPRE_SMP_SCHEDULE
#endif
      for (i = 0; i < num_rows*num_vectors; i++)
         y_data[i] *= beta;

#ifdef HYPRE_PROFILE
      hypre_profile_times[HYPRE_TIMER_ID_MATVEC] += hypre_MPI_Wtime() - time_begin;
#endif

      return ierr;
   }

   if (x == y)
   {
      x_tmp = hypre_SeqVectorCloneDeep(x);
      x_data = hypre_VectorData(x_tmp);
   }

   /*-----------------------------------------------------------------------
    * y = (beta/alpha)*y
    *-----------------------------------------------------------------------*/
   
   temp = beta / alpha;
   
/* use rownnz pointer to do the A*x multiplication  when num_rownnz is smaller than num_rows */

   if (num_rownnz < xpar*(num_rows) || num_vectors > 1)
   {
      /*-----------------------------------------------------------------------
       * y = (beta/alpha)*y
       *-----------------------------------------------------------------------*/
     
      if (temp != 1.0)
      {
         if (temp == 0.0)
         {
#ifdef HYPRE_USING_OPENMP
#pragma omp parallel for private(i) HYPRE_SMP_SCHEDULE
#endif
            for (i = 0; i < num_rows*num_vectors; i++)
               y_data[i] = 0.0;
         }
         else
         {
#ifdef HYPRE_USING_OPENMP
#pragma omp parallel for private(i) HYPRE_SMP_SCHEDULE
#endif
            for (i = 0; i < num_rows*num_vectors; i++)
               y_data[i] = b_data[i]*temp;
         }
      }


      /*-----------------------------------------------------------------
       * y += A*x
       *-----------------------------------------------------------------*/

      if (num_rownnz < xpar*(num_rows))
      {
#ifdef HYPRE_USING_OPENMP
#pragma omp parallel for private(i,j,jj,m,tempx) HYPRE_SMP_SCHEDULE
#endif

         for (i = 0; i < num_rownnz; i++)
         {
            m = A_rownnz[i];

            /*
             * for (jj = A_i[m]; jj < A_i[m+1]; jj++)
             * {
             *         j = A_j[jj];
             *  y_data[m] += A_data[jj] * x_data[j];
             * } */
            if ( num_vectors==1 )
            {
               tempx = 0;
               for (jj = A_i[m]; jj < A_i[m+1]; jj++)
                  tempx +=  A_data[jj] * x_data[A_j[jj]];
               y_data[m] += tempx;
            }
            else
               for ( j=0; j<num_vectors; ++j )
               {
                  tempx = 0;
                  for (jj = A_i[m]; jj < A_i[m+1]; jj++) 
                     tempx +=  A_data[jj] * x_data[ j*vecstride_x + A_j[jj]*idxstride_x ];
                  y_data[ j*vecstride_y + m*idxstride_y] += tempx;
               }
         }
      }
      else // num_vectors > 1
      {
#ifdef HYPRE_USING_OPENMP
#pragma omp parallel for private(i,j,jj,tempx) HYPRE_SMP_SCHEDULE
#endif
         for (i = 0; i < num_rows; i++)
         {
            for (j = 0; j < num_vectors; ++j)
            {
               tempx = 0;
               for (jj = A_i[i]; jj < A_i[i+1]; jj++)
               {
                  tempx += A_data[jj] * x_data[ j*vecstride_x + A_j[jj]*idxstride_x ];
               }
               y_data[ j*vecstride_y + i*idxstride_y ] += tempx;
            }
         }
      }

      /*-----------------------------------------------------------------
       * y = alpha*y
       *-----------------------------------------------------------------*/

      if (alpha != 1.0)
      {
#ifdef HYPRE_USING_OPENMP
#pragma omp parallel for private(i) HYPRE_SMP_SCHEDULE
#endif
         for (i = 0; i < num_rows*num_vectors; i++)
            y_data[i] *= alpha;
      }
   }
   else
   { // JSP: this is currently the only path optimized
#ifdef HYPRE_USING_OPENMP
#pragma omp parallel private(i,jj,tempx)
#endif
      {
      HYPRE_Int iBegin = hypre_CSRMatrixGetLoadBalancedPartitionBegin(A);
      HYPRE_Int iEnd = hypre_CSRMatrixGetLoadBalancedPartitionEnd(A);
      hypre_assert(iBegin <= iEnd);
      hypre_assert(iBegin >= 0 && iBegin <= num_rows);
      hypre_assert(iEnd >= 0 && iEnd <= num_rows);

      if (0 == temp)
      {
         if (1 == alpha) // JSP: a common path
         {
            for (i = iBegin; i < iEnd; i++)
            {
               tempx = 0.0;
               for (jj = A_i[i]; jj < A_i[i+1]; jj++)
               {
                  tempx += A_data[jj] * x_data[A_j[jj]];
               }
               y_data[i] = tempx;
            }
         } // y = A*x
         else if (-1 == alpha)
         {
            for (i = iBegin; i < iEnd; i++)
            {
               tempx = 0.0;
               for (jj = A_i[i]; jj < A_i[i+1]; jj++)
               {
                  tempx -= A_data[jj] * x_data[A_j[jj]];
               }
               y_data[i] = tempx;
            }
         } // y = -A*x
         else
         {
            for (i = iBegin; i < iEnd; i++)
            {
               tempx = 0.0;
               for (jj = A_i[i]; jj < A_i[i+1]; jj++)
               {
                  tempx += A_data[jj] * x_data[A_j[jj]];
               }
               y_data[i] = alpha*tempx;
            }
         } // y = alpha*A*x
      } // temp == 0
      else if (-1 == temp) // beta == -alpha
      {
         if (1 == alpha) // JSP: a common path
         {
            for (i = iBegin; i < iEnd; i++)
            {
               tempx = -b_data[i];
               for (jj = A_i[i]; jj < A_i[i+1]; jj++)
               {
                  tempx += A_data[jj] * x_data[A_j[jj]];
               }
               y_data[i] = tempx;
            }
         } // y = A*x - y
         else if (-1 == alpha) // JSP: a common path
         {
            for (i = iBegin; i < iEnd; i++)
            {
               tempx = b_data[i];
               for (jj = A_i[i]; jj < A_i[i+1]; jj++)
               {
                  tempx -= A_data[jj] * x_data[A_j[jj]];
               }
               y_data[i] = tempx;
            }
         } // y = -A*x + y
         else
         {
            for (i = iBegin; i < iEnd; i++)
            {
               tempx = -b_data[i];
               for (jj = A_i[i]; jj < A_i[i+1]; jj++)
               {
                  tempx += A_data[jj] * x_data[A_j[jj]];
               }
               y_data[i] = alpha*tempx;
            }
         } // y = alpha*(A*x - y)
      } // temp == -1
      else if (1 == temp)
      {
         if (1 == alpha) // JSP: a common path
         {
            for (i = iBegin; i < iEnd; i++)
            {
               tempx = b_data[i];
               for (jj = A_i[i]; jj < A_i[i+1]; jj++)
               {
                  tempx += A_data[jj] * x_data[A_j[jj]];
               }
               y_data[i] = tempx;
            }
         } // y = A*x + y
         else if (-1 == alpha)
         {
            for (i = iBegin; i < iEnd; i++)
            {
               tempx = -b_data[i];
               for (jj = A_i[i]; jj < A_i[i+1]; jj++)
               {
                  tempx -= A_data[jj] * x_data[A_j[jj]];
               }
               y_data[i] = tempx;
            }
         } // y = -A*x - y
         else
         {
            for (i = iBegin; i < iEnd; i++)
            {
               tempx = b_data[i];
               for (jj = A_i[i]; jj < A_i[i+1]; jj++)
               {
                  tempx += A_data[jj] * x_data[A_j[jj]];
               }
               y_data[i] = alpha*tempx;
            }
         } // y = alpha*(A*x + y)
      }
      else
      {
         if (1 == alpha) // JSP: a common path
         {
            for (i = iBegin; i < iEnd; i++)
            {
               tempx = b_data[i]*temp;
               for (jj = A_i[i]; jj < A_i[i+1]; jj++)
               {
                  tempx += A_data[jj] * x_data[A_j[jj]];
               }
               y_data[i] = tempx;
            }
         } // y = A*x + temp*y
         else if (-1 == alpha)
         {
            for (i = iBegin; i < iEnd; i++)
            {
               tempx = -b_data[i]*temp;
               for (jj = A_i[i]; jj < A_i[i+1]; jj++)
               {
                  tempx -= A_data[jj] * x_data[A_j[jj]];
               }
               y_data[i] = tempx;
            }
         } // y = -A*x - temp*y
         else
         {
            for (i = iBegin; i < iEnd; i++)
            {
               tempx = b_data[i]*temp;
               for (jj = A_i[i]; jj < A_i[i+1]; jj++)
               {
                  tempx += A_data[jj] * x_data[A_j[jj]];
               }
               y_data[i] = alpha*tempx;
            }
         } // y = alpha*(A*x + temp*y)
      } // temp != 0 && temp != -1 && temp != 1
      } // omp parallel
   }

   if (x == y) hypre_SeqVectorDestroy(x_tmp);

#ifdef HYPRE_PROFILE
   hypre_profile_times[HYPRE_TIMER_ID_MATVEC] += hypre_MPI_Wtime() - time_begin;
#endif
   return ierr;
}
Beispiel #6
0
HYPRE_Int main(HYPRE_Int argc, char *argv[])
{
    HYPRE_Int mype, npes;
    HYPRE_Int symmetric;
    HYPRE_Int num_runs;
    Matrix *A;
    ParaSails *ps;
    FILE *file;
    HYPRE_Int n, beg_row, end_row;
    HYPRE_Real time0, time1;
    HYPRE_Real setup_time, solve_time;
    HYPRE_Real max_setup_time, max_solve_time;
    HYPRE_Real cost;

    HYPRE_Real *x, *b;
    HYPRE_Int i, niter;
    HYPRE_Real thresh;
    HYPRE_Real threshg;
    HYPRE_Int nlevels;
    HYPRE_Real filter;
    HYPRE_Real loadbal;

    hypre_MPI_Init(&argc, &argv);
    hypre_MPI_Comm_rank(hypre_MPI_COMM_WORLD, &mype);
    hypre_MPI_Comm_size(hypre_MPI_COMM_WORLD, &npes);

    /* Read number of rows in matrix */
    symmetric = atoi(argv[1]);
    num_runs  = atoi(argv[2]);

    file = fopen(argv[3], "r");
    assert(file != NULL);
#ifdef EMSOLVE
    hypre_fscanf(file, "%*d %d\n", &n);
#else
    hypre_fscanf(file, "%d\n", &n);
#endif
    fclose(file);
    assert(n >= npes);

    beg_row = (HYPRE_Int) ((HYPRE_Real)(mype*n) / npes) + 1; /* assumes 1-based */
    end_row = (HYPRE_Int) ((HYPRE_Real)((mype+1)* n) / npes);

    if (mype == 0)
        assert(beg_row == 1);
    if (mype == npes-1)
        assert(end_row == n);

#ifdef EMSOLVE
    beg_row--;
    end_row--;
#endif

    x = (HYPRE_Real *) malloc((end_row-beg_row+1) * sizeof(HYPRE_Real));
    b = (HYPRE_Real *) malloc((end_row-beg_row+1) * sizeof(HYPRE_Real));

    A = MatrixCreate(hypre_MPI_COMM_WORLD, beg_row, end_row);

    MatrixRead(A, argv[3]);
    if (mype == 0) 
        hypre_printf("%s\n", argv[3]);

    /* MatrixPrint(A, "A"); */

    /* Right-hand side */
    if (argc > 4)
    {
        RhsRead(b, A, argv[4]);
        if (mype == 0) 
            hypre_printf("Using rhs from %s\n", argv[4]);
    }
    else
    {
        for (i=0; i<end_row-beg_row+1; i++)
            b[i] = (HYPRE_Real) (2*rand()) / (HYPRE_Real) RAND_MAX - 1.0;
    }

    while (num_runs && num_runs >= -1)
    {
        /* Initial guess */
        for (i=0; i<end_row-beg_row+1; i++)
            x[i] = 0.0;

	if (num_runs == -1)
	{
            thresh = 0.0;
	    nlevels = 0;
	    filter = 0.0;
            loadbal = 0.0;
	}
	else
	{
            if (mype == 0)
            {
#if PARASAILS_EXT_PATTERN
                hypre_printf("Enter parameters threshg, thresh, nlevels, "
	            "filter, beta:\n");
	        fflush(stdout);
                hypre_scanf("%lf %lf %d %lf %lf", &threshg, &thresh, &nlevels, 
		    &filter, &loadbal);
#else
                hypre_printf("Enter parameters thresh, nlevels, "
	            "filter, beta:\n");
	        fflush(stdout);
                hypre_scanf("%lf %d %lf %lf", &thresh, &nlevels, 
		    &filter, &loadbal);
#endif
	    }

	    hypre_MPI_Bcast(&threshg, 1, hypre_MPI_DOUBLE, 0, hypre_MPI_COMM_WORLD);
	    hypre_MPI_Bcast(&thresh,  1, hypre_MPI_DOUBLE, 0, hypre_MPI_COMM_WORLD);
	    hypre_MPI_Bcast(&nlevels, 1, HYPRE_MPI_INT,    0, hypre_MPI_COMM_WORLD);
	    hypre_MPI_Bcast(&filter,  1, hypre_MPI_DOUBLE, 0, hypre_MPI_COMM_WORLD);
	    hypre_MPI_Bcast(&loadbal, 1, hypre_MPI_DOUBLE, 0, hypre_MPI_COMM_WORLD);

            if (nlevels < 0)
                break;
	}

        /**************
	 * Setup phase   
	 **************/

        hypre_MPI_Barrier(hypre_MPI_COMM_WORLD);
        time0 = hypre_MPI_Wtime();

        ps = ParaSailsCreate(hypre_MPI_COMM_WORLD, beg_row, end_row, symmetric);

        ps->loadbal_beta = loadbal;

#if PARASAILS_EXT_PATTERN
        ParaSailsSetupPatternExt(ps, A, threshg, thresh, nlevels);
#else
        ParaSailsSetupPattern(ps, A, thresh, nlevels);
#endif

        time1 = hypre_MPI_Wtime();
	setup_time = time1-time0;

        cost = ParaSailsStatsPattern(ps, A);
	if (cost > 5.e11)
	{
            hypre_printf("Aborting setup and solve due to high cost.\n");
	    goto cleanup;
	}

        hypre_MPI_Barrier(hypre_MPI_COMM_WORLD);
        time0 = hypre_MPI_Wtime();

        err = ParaSailsSetupValues(ps, A, filter);
        if (err != 0)
	{
            hypre_printf("ParaSailsSetupValues returned error.\n");
	    goto cleanup;
	}

        time1 = hypre_MPI_Wtime();
	setup_time += (time1-time0);

        ParaSailsStatsValues(ps, A);

	if (!strncmp(argv[3], "testpsmat", 8))
            MatrixPrint(ps->M, "M");

#if 0
        if (mype == 0) 
            hypre_printf("SETTING UP VALUES AGAIN WITH FILTERED PATTERN\n");
        ps->loadbal_beta = 0;
        ParaSailsSetupValues(ps, A, 0.0);
#endif

        /*****************
	 * Solution phase
	 *****************/

	niter = 3000;
        if (MatrixNnz(ps->M) == n) /* if diagonal preconditioner */
	    niter = 5000;

        hypre_MPI_Barrier(hypre_MPI_COMM_WORLD);
        time0 = hypre_MPI_Wtime();

        if (symmetric == 1)
            PCG_ParaSails(A, ps, b, x, 1.e-8, niter);
	else
            FGMRES_ParaSails(A, ps, b, x, 50, 1.e-8, niter);

        time1 = hypre_MPI_Wtime();
	solve_time = time1-time0;

        hypre_MPI_Reduce(&setup_time, &max_setup_time, 1, hypre_MPI_DOUBLE, hypre_MPI_MAX, 0, 
	    hypre_MPI_COMM_WORLD);
        hypre_MPI_Reduce(&solve_time, &max_solve_time, 1, hypre_MPI_DOUBLE, hypre_MPI_MAX, 0, 
	    hypre_MPI_COMM_WORLD);

	if (mype == 0)
	{
            hypre_printf("**********************************************\n");
            hypre_printf("***    Setup    Solve    Total\n");
            hypre_printf("III %8.1f %8.1f %8.1f\n", max_setup_time, max_solve_time, 
		max_setup_time+max_solve_time);
            hypre_printf("**********************************************\n");
	}

cleanup:
        ParaSailsDestroy(ps);

        num_runs--;
    }

    free(x);
    free(b);

    MatrixDestroy(A);
    hypre_MPI_Finalize();

    return 0;
}
Beispiel #7
0
void Euclid_dhApply(Euclid_dh ctx, double *rhs, double *lhs)
{
    START_FUNC_DH
    double *rhs_, *lhs_;
    double t1, t2;

    t1 = hypre_MPI_Wtime();

    /* default settings; for everything except PILU */
    ctx->from = 0;
    ctx->to = ctx->m;

    /* case 1: no preconditioning */
    if (! strcmp(ctx->algo_ilu, "none") || ! strcmp(ctx->algo_par, "none")) {
        HYPRE_Int i, m = ctx->m;
        for (i=0; i<m; ++i) lhs[i] = rhs[i];
        goto END_OF_FUNCTION;
    }

    /*----------------------------------------------------------------
     * permute and scale rhs vector
     *----------------------------------------------------------------*/
    /* permute rhs vector */
    if (ctx->sg != NULL) {

        /* hypre_printf("@@@@@@@@@@@@@@@@@ permute_vec_n2o_private\n"); */

        permute_vec_n2o_private(ctx, rhs, lhs);
        CHECK_V_ERROR;
        rhs_ = lhs;
        lhs_ = ctx->work2;
    } else {
        rhs_ = rhs;
        lhs_ = lhs;
    }

    /* scale rhs vector */
    if (ctx->isScaled) {

        /* hypre_printf("@@@@@@@@@@@@@@@@@ scale_rhs_private\n"); */

        scale_rhs_private(ctx, rhs_);
        CHECK_V_ERROR;
    }

    /* note: rhs_ is permuted, scaled; the input, "rhs" vector has
             not been disturbed.
     */

    /*----------------------------------------------------------------
     * big switch to choose the appropriate triangular solve
     *----------------------------------------------------------------*/

    /* sequential and mpi block jacobi cases */
    if (np_dh == 1 ||
            ! strcmp(ctx->algo_par, "bj") ) {
        Factor_dhSolveSeq(rhs_, lhs_, ctx);
        CHECK_V_ERROR;
    }


    /* pilu case */
    else {
        Factor_dhSolve(rhs_, lhs_, ctx);
        CHECK_V_ERROR;
    }

    /*----------------------------------------------------------------
     * unpermute lhs vector
     * (note: don't need to unscale, because we were clever)
     *----------------------------------------------------------------*/
    if (ctx->sg != NULL) {
        permute_vec_o2n_private(ctx, lhs_, lhs);
        CHECK_V_ERROR;
    }

END_OF_FUNCTION:
    ;

    t2 = hypre_MPI_Wtime();
    /* collective timing for triangular solves */
    ctx->timing[TRI_SOLVE_T] += (t2 - t1);

    /* collective timing for setup+krylov+triSolves
       (intent is to time linear solve, but this is
       at best probelematical!)
     */
    ctx->timing[TOTAL_SOLVE_TEMP_T] = t2 - ctx->timing[SOLVE_START_T];

    /* total triangular solve count */
    ctx->its += 1;
    ctx->itsTotal += 1;

    END_FUNC_DH
}
Beispiel #8
0
static void matvec_timing(MPI_Comm comm, Matrix *mat)
{
   HYPRE_Real time0, time1;
   HYPRE_Real trial1, trial2, trial3, trial4, trial5, trial6;
   HYPRE_Real *temp1, *temp2;
   HYPRE_Int i, mype;
   HYPRE_Int n = mat->end_row - mat->beg_row + 1;

   temp1 = (HYPRE_Real *) calloc(n, sizeof(HYPRE_Real));
   temp2 = (HYPRE_Real *) calloc(n, sizeof(HYPRE_Real));

   /* warm-up */
   hypre_MPI_Barrier(comm);
   for (i=0; i<100; i++)
      MatrixMatvec(mat, temp1, temp2);

   hypre_MPI_Barrier(comm);
   time0 = hypre_MPI_Wtime();
   for (i=0; i<100; i++)
      MatrixMatvec(mat, temp1, temp2);
   hypre_MPI_Barrier(comm);
   time1 = hypre_MPI_Wtime();
   trial1 = time1-time0;

   hypre_MPI_Barrier(comm);
   time0 = hypre_MPI_Wtime();
   for (i=0; i<100; i++)
      MatrixMatvec(mat, temp1, temp2);
   hypre_MPI_Barrier(comm);
   time1 = hypre_MPI_Wtime();
   trial2 = time1-time0;

   hypre_MPI_Barrier(comm);
   time0 = hypre_MPI_Wtime();
   for (i=0; i<100; i++)
      MatrixMatvec(mat, temp1, temp2);
   hypre_MPI_Barrier(comm);
   time1 = hypre_MPI_Wtime();
   trial3 = time1-time0;

   hypre_MPI_Barrier(comm);
   time0 = hypre_MPI_Wtime();
   for (i=0; i<100; i++)
      MatrixMatvecSerial(mat, temp1, temp2);
   hypre_MPI_Barrier(comm);
   time1 = hypre_MPI_Wtime();
   trial4 = time1-time0;

   hypre_MPI_Barrier(comm);
   time0 = hypre_MPI_Wtime();
   for (i=0; i<100; i++)
      MatrixMatvecSerial(mat, temp1, temp2);
   hypre_MPI_Barrier(comm);
   time1 = hypre_MPI_Wtime();
   trial5 = time1-time0;

   hypre_MPI_Barrier(comm);
   time0 = hypre_MPI_Wtime();
   for (i=0; i<100; i++)
      MatrixMatvecSerial(mat, temp1, temp2);
   hypre_MPI_Barrier(comm);
   time1 = hypre_MPI_Wtime();
   trial6 = time1-time0;

   hypre_MPI_Comm_rank(comm, &mype);
   if (mype == 0)
      hypre_printf("Timings: %f %f %f Serial: %f %f %f\n", 
                   trial1, trial2, trial3, trial4, trial5, trial6);

   fflush(stdout);

   /* this is all we wanted, so don't waste any more cycles */
   exit(0);
}