int main () { Kokkos::initialize (); srand (61391); // Set the random seed int nnumbers = 100000; view_type data ("RND", nnumbers); view_type result ("Prime", nnumbers); count_type count ("Count"); host_view_type h_data = Kokkos::create_mirror_view (data); host_view_type h_result = Kokkos::create_mirror_view (result); host_count_type h_count = Kokkos::create_mirror_view (count); typedef view_type::size_type size_type; // Fill the 'data' array on the host with random numbers. We assume // that they come from some process which is only implemented on the // host, via some library. (That's true in this case.) for (size_type i = 0; i < data.dimension_0 (); ++i) { h_data(i) = rand () % nnumbers; } Kokkos::deep_copy (data, h_data); // copy from host to device Kokkos::parallel_for (data.dimension_0 (), findprimes (data, result, count)); Kokkos::deep_copy (h_count, count); // copy from device to host printf ("Found %i prime numbers in %i random numbers\n", h_count(), nnumbers); Kokkos::finalize (); }
int main() { Kokkos::initialize(); srand(61391); int nnumbers = 100000; view_type data("RND",nnumbers); view_type result("Prime",nnumbers); count_type count("Count"); host_view_type h_data = Kokkos::create_mirror_view(data); host_view_type h_result = Kokkos::create_mirror_view(result); host_count_type h_count = Kokkos::create_mirror_view(count); typedef view_type::size_type size_type; for (size_type i = 0; i < data.dimension_0(); ++i) { h_data(i) = rand () % nnumbers; } Kokkos::deep_copy(data,h_data); Kokkos::parallel_for(data.dimension_0(),findprimes(data,result,count)); Kokkos::deep_copy(h_count,count); printf("Found %i prime numbers in %i random numbers\n",h_count(),nnumbers); Kokkos::finalize(); }
int mpiPi_mergeResults () { int ac; callsite_stats_t **av; int totalCount = 0; int maxCount = 0; int retval = 1, sendval; /* gather local task data */ h_gather_data (mpiPi.task_callsite_stats, &ac, (void ***) &av); /* determine size of space necessary on collector */ PMPI_Allreduce (&ac, &totalCount, 1, MPI_INT, MPI_SUM, mpiPi.comm); PMPI_Reduce (&ac, &maxCount, 1, MPI_INT, MPI_MAX, mpiPi.collectorRank, mpiPi.comm); if (totalCount < 1) { mpiPi_msg_warn ("Collector found no records to merge. Omitting report.\n"); return 0; } /* gather global data at collector */ if (mpiPi.rank == mpiPi.collectorRank) { int i; int ndx = 0; #ifdef ENABLE_BFD if (mpiPi.appFullName != NULL) { if (open_bfd_executable (mpiPi.appFullName) == 0) mpiPi.do_lookup = 0; } #elif defined(USE_LIBDWARF) if (mpiPi.appFullName != NULL) { if (open_dwarf_executable (mpiPi.appFullName) == 0) mpiPi.do_lookup = 0; } #endif #if defined(ENABLE_BFD) || defined(USE_LIBDWARF) else { mpiPi_msg_warn ("Failed to open executable\n"); mpiPi.do_lookup = 0; } #endif /* convert data to src line; merge, if nec */ mpiPi.global_callsite_stats = h_open (mpiPi.tableSize, mpiPi_callsite_stats_src_hashkey, mpiPi_callsite_stats_src_comparator); mpiPi.global_callsite_stats_agg = h_open (mpiPi.tableSize, mpiPi_callsite_stats_src_id_hashkey, mpiPi_callsite_stats_src_id_comparator); if (callsite_pc_cache == NULL) { callsite_pc_cache = h_open (mpiPi.tableSize, callsite_pc_cache_hashkey, callsite_pc_cache_comparator); } if (callsite_src_id_cache == NULL) { callsite_src_id_cache = h_open (mpiPi.tableSize, callsite_src_id_cache_hashkey, callsite_src_id_cache_comparator); } /* Try to allocate space for max count of callsite info from all tasks */ mpiPi.rawCallsiteData = (callsite_stats_t *) calloc (maxCount, sizeof (callsite_stats_t)); if (mpiPi.rawCallsiteData == NULL) { mpiPi_msg_warn ("Failed to allocate memory to collect callsite info"); retval = 0; } /* Clear global_mpi_time and global_mpi_size before accumulation in mpiPi_insert_callsite_records */ mpiPi.global_mpi_time = 0.0; mpiPi.global_mpi_size = 0.0; if (retval == 1) { /* Insert collector callsite data into global and task-specific hash tables */ for (ndx = 0; ndx < ac; ndx++) { mpiPi_insert_callsite_records (av[ndx]); } ndx = 0; for (i = 1; i < mpiPi.size; i++) /* n-1 */ { MPI_Status status; int count; int j; /* okay in any order */ PMPI_Probe (MPI_ANY_SOURCE, mpiPi.tag, mpiPi.comm, &status); PMPI_Get_count (&status, MPI_CHAR, &count); PMPI_Recv (&(mpiPi.rawCallsiteData[ndx]), count, MPI_CHAR, status.MPI_SOURCE, mpiPi.tag, mpiPi.comm, &status); count /= sizeof (callsite_stats_t); for (j = 0; j < count; j++) { mpiPi_insert_callsite_records (&(mpiPi.rawCallsiteData[j])); } } free (mpiPi.rawCallsiteData); } } else { int ndx; char *sbuf = (char *) malloc (ac * sizeof (callsite_stats_t)); for (ndx = 0; ndx < ac; ndx++) { bcopy (av[ndx], &(sbuf[ndx * sizeof (callsite_stats_t)]), sizeof (callsite_stats_t)); } PMPI_Send (sbuf, ac * sizeof (callsite_stats_t), MPI_CHAR, mpiPi.collectorRank, mpiPi.tag, mpiPi.comm); free (sbuf); } if (mpiPi.rank == mpiPi.collectorRank && retval == 1) { if (mpiPi.collective_report == 0) mpiPi_msg_debug ("MEMORY : Allocated for global_callsite_stats : %13ld\n", h_count (mpiPi.global_callsite_stats) * sizeof (callsite_stats_t)); mpiPi_msg_debug ("MEMORY : Allocated for global_callsite_stats_agg : %13ld\n", h_count (mpiPi.global_callsite_stats_agg) * sizeof (callsite_stats_t)); } /* TODO: need to free all these pointers as well. */ free (av); if (mpiPi.rank == mpiPi.collectorRank) { if (mpiPi.do_lookup == 1) { #ifdef ENABLE_BFD /* clean up */ close_bfd_executable (); #elif defined(USE_LIBDWARF) close_dwarf_executable (); #endif } } /* Quadrics MPI does not appear to support MPI_IN_PLACE */ sendval = retval; PMPI_Allreduce (&sendval, &retval, 1, MPI_INT, MPI_MIN, mpiPi.comm); return retval; }