int distmem_create(struct distmem_ops *ops, const char *name, memkind_t *kind) { int err = memkind_create(ops->memkind_operations, name, kind); if (err) { char err_msg[ERROR_MESSAGE_SIZE]; memkind_error_message(err, err_msg, ERROR_MESSAGE_SIZE); fprintf(stderr, "%s", err_msg); } struct distmem *dist_kind = (struct distmem *)memkind_malloc(MEMKIND_DEFAULT, sizeof(struct distmem)); dist_kind->memkind = *kind; dist_kind->name = (char *)memkind_malloc(MEMKIND_DEFAULT, strlen(name) + 1); dist_kind->operations = ops; strcpy(dist_kind->name, name); err = ops->dist_create(dist_kind, ops, name); if (err) { fprintf(stderr, "Error in dist memory creation\n"); } put_kind_entry_into_kind_state(dist_kind); return 0; }
static void print_err_message(int err) { char error_message[MEMKIND_ERROR_MESSAGE_SIZE]; memkind_error_message(err, error_message, MEMKIND_ERROR_MESSAGE_SIZE); fprintf(stderr, "%s\n", error_message); }
int main(int argc, char **argv) { int quantum, checktick(); int BytesPerWord; int k; ssize_t j; STREAM_TYPE scalar; double t, times[4][NTIMES]; #ifdef ENABLE_DYNAMIC_ALLOC int err = 0; memkind_t kind; char err_msg[ERR_MSG_SIZE]; if (argc > 1 && (strncmp("--help", argv[1], strlen("--help")) == 0 || strncmp("-h", argv[1], strlen("-h")) == 0)) { printf("Usage: %s [memkind_default | memkind_hbw | memkind_hbw_hugetlb | \n" " memkind_hbw_preferred | memkind_hbw_preferred_hugetlb | \n" " memkind_hbw_gbtlb | memkind_hbw_preferred_gbtlb | memkind_gbtlb | \n" " memkind_hbw_interleave | memkind_interleave]\n", argv[0]); return 0; } #endif /* --- SETUP --- determine precision and check timing --- */ printf(HLINE); printf("STREAM version $Revision: 5.10 $\n"); #ifdef ENABLE_DYNAMIC_ALLOC printf("Variant that uses the memkind library for dynamic memory allocation.\n"); #endif printf(HLINE); BytesPerWord = sizeof(STREAM_TYPE); printf("This system uses %d bytes per array element.\n", BytesPerWord); printf(HLINE); #ifdef N printf("***** WARNING: ******\n"); printf(" It appears that you set the preprocessor variable N when compiling this code.\n"); printf(" This version of the code uses the preprocesor variable STREAM_ARRAY_SIZE to control the array size\n"); printf(" Reverting to default value of STREAM_ARRAY_SIZE=%llu\n",(unsigned long long) STREAM_ARRAY_SIZE); printf("***** WARNING: ******\n"); #endif printf("Array size = %llu (elements), Offset = %d (elements)\n" , (unsigned long long) STREAM_ARRAY_SIZE, OFFSET); printf("Memory per array = %.1f MiB (= %.1f GiB).\n", BytesPerWord * ( (double) STREAM_ARRAY_SIZE / 1024.0/1024.0), BytesPerWord * ( (double) STREAM_ARRAY_SIZE / 1024.0/1024.0/1024.0)); printf("Total memory required = %.1f MiB (= %.1f GiB).\n", (3.0 * BytesPerWord) * ( (double) STREAM_ARRAY_SIZE / 1024.0/1024.), (3.0 * BytesPerWord) * ( (double) STREAM_ARRAY_SIZE / 1024.0/1024./1024.)); printf("Each kernel will be executed %d times.\n", NTIMES); printf(" The *best* time for each kernel (excluding the first iteration)\n"); printf(" will be used to compute the reported bandwidth.\n"); #ifdef _OPENMP printf(HLINE); #pragma omp parallel { #pragma omp master { k = omp_get_num_threads(); printf ("Number of Threads requested = %i\n",k); } } #endif #ifdef _OPENMP k = 0; #pragma omp parallel #pragma omp atomic k++; printf ("Number of Threads counted = %i\n",k); #endif #ifdef ENABLE_DYNAMIC_ALLOC if (argc > 1) { err = memkind_get_kind_by_name(argv[1], &kind); } else { err = memkind_get_kind_by_name("memkind_default", &kind); } if (err) { memkind_error_message(err, err_msg, ERR_MSG_SIZE); fprintf(stderr, "ERROR: %s\n", err_msg); return -1; } err = memkind_posix_memalign(kind, (void **)&a, 2097152, BytesPerWord * (STREAM_ARRAY_SIZE + OFFSET)); if (err) { fprintf(stderr, "ERROR: Unable to allocate stream array a\n"); return -err; } err = memkind_posix_memalign(kind, (void **)&b, 2097152, BytesPerWord * (STREAM_ARRAY_SIZE + OFFSET)); if (err) { fprintf(stderr, "ERROR: Unable to allocate stream array b\n"); return -err; } err = memkind_posix_memalign(kind, (void **)&c, 2097152, BytesPerWord * (STREAM_ARRAY_SIZE + OFFSET)); if (err) { fprintf(stderr, "ERROR: Unable to allocate stream array c\n"); return -err; } #endif /* Get initial value for system clock. */ #pragma omp parallel for for (j=0; j<STREAM_ARRAY_SIZE; j++) { a[j] = 1.0; b[j] = 2.0; c[j] = 0.0; } printf(HLINE); if ( (quantum = checktick()) >= 1) printf("Your clock granularity/precision appears to be " "%d microseconds.\n", quantum); else { printf("Your clock granularity appears to be " "less than one microsecond.\n"); quantum = 1; } t = mysecond(); #pragma omp parallel for for (j = 0; j < STREAM_ARRAY_SIZE; j++) a[j] = 2.0E0 * a[j]; t = 1.0E6 * (mysecond() - t); printf("Each test below will take on the order" " of %d microseconds.\n", (int) t ); printf(" (= %d clock ticks)\n", (int) (t/quantum) ); printf("Increase the size of the arrays if this shows that\n"); printf("you are not getting at least 20 clock ticks per test.\n"); printf(HLINE); printf("WARNING -- The above is only a rough guideline.\n"); printf("For best results, please be sure you know the\n"); printf("precision of your system timer.\n"); printf(HLINE); /* --- MAIN LOOP --- repeat test cases NTIMES times --- */ scalar = 3.0; for (k=0; k<NTIMES; k++) { times[0][k] = mysecond(); #ifdef TUNED tuned_STREAM_Copy(); #else #pragma omp parallel for for (j=0; j<STREAM_ARRAY_SIZE; j++) c[j] = a[j]; #endif times[0][k] = mysecond() - times[0][k]; times[1][k] = mysecond(); #ifdef TUNED tuned_STREAM_Scale(scalar); #else #pragma omp parallel for for (j=0; j<STREAM_ARRAY_SIZE; j++) b[j] = scalar*c[j]; #endif times[1][k] = mysecond() - times[1][k]; times[2][k] = mysecond(); #ifdef TUNED tuned_STREAM_Add(); #else #pragma omp parallel for for (j=0; j<STREAM_ARRAY_SIZE; j++) c[j] = a[j]+b[j]; #endif times[2][k] = mysecond() - times[2][k]; times[3][k] = mysecond(); #ifdef TUNED tuned_STREAM_Triad(scalar); #else #pragma omp parallel for for (j=0; j<STREAM_ARRAY_SIZE; j++) a[j] = b[j]+scalar*c[j]; #endif times[3][k] = mysecond() - times[3][k]; } /* --- SUMMARY --- */ for (k=1; k<NTIMES; k++) /* note -- skip first iteration */ { for (j=0; j<4; j++) { avgtime[j] = avgtime[j] + times[j][k]; mintime[j] = MIN(mintime[j], times[j][k]); maxtime[j] = MAX(maxtime[j], times[j][k]); } } printf("Function Best Rate MB/s Avg time Min time Max time\n"); for (j=0; j<4; j++) { avgtime[j] = avgtime[j]/(double)(NTIMES-1); printf("%s%12.1f %11.6f %11.6f %11.6f\n", label[j], 1.0E-06 * bytes[j]/mintime[j], avgtime[j], mintime[j], maxtime[j]); } printf(HLINE); /* --- Check Results --- */ checkSTREAMresults(); printf(HLINE); #ifdef ENABLE_DYNAMIC_ALLOC memkind_free(kind, c); memkind_free(kind, b); memkind_free(kind, a); #endif return 0; }