示例#1
0
int main () {
  Kokkos::initialize ();

  srand (61391); // Set the random seed

  int nnumbers = 100000;
  view_type data ("RND", nnumbers);
  view_type result ("Prime", nnumbers);
  count_type count ("Count");

  host_view_type h_data = Kokkos::create_mirror_view (data);
  host_view_type h_result = Kokkos::create_mirror_view (result);
  host_count_type h_count = Kokkos::create_mirror_view (count);

  typedef view_type::size_type size_type;
  // Fill the 'data' array on the host with random numbers.  We assume
  // that they come from some process which is only implemented on the
  // host, via some library.  (That's true in this case.)
  for (size_type i = 0; i < data.dimension_0 (); ++i) {
    h_data(i) = rand () % nnumbers;
  }
  Kokkos::deep_copy (data, h_data); // copy from host to device

  Kokkos::parallel_for (data.dimension_0 (), findprimes (data, result, count));
  Kokkos::deep_copy (h_count, count); // copy from device to host

  printf ("Found %i prime numbers in %i random numbers\n", h_count(), nnumbers);
  Kokkos::finalize ();
}
示例#2
0
int main() {
  Kokkos::initialize();

  srand(61391);

  int nnumbers = 100000;
  view_type data("RND",nnumbers);
  view_type result("Prime",nnumbers);
  count_type count("Count");

  host_view_type h_data = Kokkos::create_mirror_view(data);
  host_view_type h_result = Kokkos::create_mirror_view(result);
  host_count_type h_count = Kokkos::create_mirror_view(count);

  typedef view_type::size_type size_type;
  for (size_type i = 0; i < data.dimension_0(); ++i) {
    h_data(i) = rand () % nnumbers;
  }

  Kokkos::deep_copy(data,h_data);

  Kokkos::parallel_for(data.dimension_0(),findprimes(data,result,count));
  Kokkos::deep_copy(h_count,count);

  printf("Found %i prime numbers in %i random numbers\n",h_count(),nnumbers);
  Kokkos::finalize();
}
示例#3
0
int
mpiPi_mergeResults ()
{
  int ac;
  callsite_stats_t **av;
  int totalCount = 0;
  int maxCount = 0;
  int retval = 1, sendval;

  /* gather local task data */
  h_gather_data (mpiPi.task_callsite_stats, &ac, (void ***) &av);

  /* determine size of space necessary on collector */
  PMPI_Allreduce (&ac, &totalCount, 1, MPI_INT, MPI_SUM, mpiPi.comm);
  PMPI_Reduce (&ac, &maxCount, 1, MPI_INT, MPI_MAX, mpiPi.collectorRank,
	       mpiPi.comm);

  if (totalCount < 1)
    {
      mpiPi_msg_warn
	("Collector found no records to merge. Omitting report.\n");
      return 0;
    }

  /* gather global data at collector */
  if (mpiPi.rank == mpiPi.collectorRank)
    {
      int i;
      int ndx = 0;

#ifdef ENABLE_BFD
      if (mpiPi.appFullName != NULL)
	{
	  if (open_bfd_executable (mpiPi.appFullName) == 0)
	    mpiPi.do_lookup = 0;
	}
#elif defined(USE_LIBDWARF)
      if (mpiPi.appFullName != NULL)
	{
	  if (open_dwarf_executable (mpiPi.appFullName) == 0)
	    mpiPi.do_lookup = 0;
	}
#endif
#if defined(ENABLE_BFD) || defined(USE_LIBDWARF)
      else
	{
	  mpiPi_msg_warn ("Failed to open executable\n");
	  mpiPi.do_lookup = 0;
	}
#endif
      /* convert data to src line; merge, if nec */
      mpiPi.global_callsite_stats = h_open (mpiPi.tableSize,
					    mpiPi_callsite_stats_src_hashkey,
					    mpiPi_callsite_stats_src_comparator);
      mpiPi.global_callsite_stats_agg = h_open (mpiPi.tableSize,
						mpiPi_callsite_stats_src_id_hashkey,
						mpiPi_callsite_stats_src_id_comparator);
      if (callsite_pc_cache == NULL)
	{
	  callsite_pc_cache = h_open (mpiPi.tableSize,
				      callsite_pc_cache_hashkey,
				      callsite_pc_cache_comparator);
	}
      if (callsite_src_id_cache == NULL)
	{
	  callsite_src_id_cache = h_open (mpiPi.tableSize,
					  callsite_src_id_cache_hashkey,
					  callsite_src_id_cache_comparator);
	}
      /* Try to allocate space for max count of callsite info from all tasks  */
      mpiPi.rawCallsiteData =
	(callsite_stats_t *) calloc (maxCount, sizeof (callsite_stats_t));
      if (mpiPi.rawCallsiteData == NULL)
	{
	  mpiPi_msg_warn
	    ("Failed to allocate memory to collect callsite info");
	  retval = 0;
	}

      /* Clear global_mpi_time and global_mpi_size before accumulation in mpiPi_insert_callsite_records */
      mpiPi.global_mpi_time = 0.0;
      mpiPi.global_mpi_size = 0.0;

      if (retval == 1)
	{
	  /* Insert collector callsite data into global and task-specific hash tables */
	  for (ndx = 0; ndx < ac; ndx++)
	    {
	      mpiPi_insert_callsite_records (av[ndx]);
	    }
	  ndx = 0;
	  for (i = 1; i < mpiPi.size; i++)	/* n-1 */
	    {
	      MPI_Status status;
	      int count;
	      int j;

	      /* okay in any order */
	      PMPI_Probe (MPI_ANY_SOURCE, mpiPi.tag, mpiPi.comm, &status);
	      PMPI_Get_count (&status, MPI_CHAR, &count);
	      PMPI_Recv (&(mpiPi.rawCallsiteData[ndx]), count, MPI_CHAR,
			 status.MPI_SOURCE, mpiPi.tag, mpiPi.comm, &status);
	      count /= sizeof (callsite_stats_t);


	      for (j = 0; j < count; j++)
		{
		  mpiPi_insert_callsite_records (&(mpiPi.rawCallsiteData[j]));
		}
	    }
	  free (mpiPi.rawCallsiteData);
	}
    }
  else
    {
      int ndx;
      char *sbuf = (char *) malloc (ac * sizeof (callsite_stats_t));
      for (ndx = 0; ndx < ac; ndx++)
	{
	  bcopy (av[ndx],
		 &(sbuf[ndx * sizeof (callsite_stats_t)]),
		 sizeof (callsite_stats_t));
	}
      PMPI_Send (sbuf, ac * sizeof (callsite_stats_t),
		 MPI_CHAR, mpiPi.collectorRank, mpiPi.tag, mpiPi.comm);
      free (sbuf);
    }
  if (mpiPi.rank == mpiPi.collectorRank && retval == 1)
    {
      if (mpiPi.collective_report == 0)
	mpiPi_msg_debug
	  ("MEMORY : Allocated for global_callsite_stats     : %13ld\n",
	   h_count (mpiPi.global_callsite_stats) * sizeof (callsite_stats_t));
      mpiPi_msg_debug
	("MEMORY : Allocated for global_callsite_stats_agg : %13ld\n",
	 h_count (mpiPi.global_callsite_stats_agg) *
	 sizeof (callsite_stats_t));
    }

  /* TODO: need to free all these pointers as well. */
  free (av);

  if (mpiPi.rank == mpiPi.collectorRank)
    {
      if (mpiPi.do_lookup == 1)
	{
#ifdef ENABLE_BFD
	  /* clean up */
	  close_bfd_executable ();
#elif defined(USE_LIBDWARF)
	  close_dwarf_executable ();
#endif
	}
    }

  /*  Quadrics MPI does not appear to support MPI_IN_PLACE   */
  sendval = retval;
  PMPI_Allreduce (&sendval, &retval, 1, MPI_INT, MPI_MIN, mpiPi.comm);
  return retval;
}