int hbw_posix_memalign(void **memptr, size_t alignment, size_t size) { memkind_t kind; kind = hbw_get_kind(HBW_PAGESIZE_4KB); return memkind_posix_memalign(kind, memptr, alignment, size); }
//////////////////////////////////////////////////////////////////////////// // Posix alignment //////////////////////////////////////////////////////////////////////////// int myMemkindAlign(void **memptr, size_t alignment, size_t size) { DBG(2) printf("In my memkind align sz:%ld .. ", size); int ret; // if we have not initialized memkind HBW arena yet, call default kind // Similarly, if the hueristic decides not to alloc in HBW, use default // if (!MemkindInitDone || !isAllocInHBW(size)) ret = memkind_posix_memalign(MEMKIND_DEFAULT, memptr, alignment, size); else { DBG(2) printf("\tHBW"); ret = memkind_posix_memalign(HBW_Type, memptr, alignment, size); logHBW(*memptr, size); } DBG(2) printf("\tptr:%p\n", *memptr); return ret; }
int hbw_posix_memalign_psize(void **memptr, size_t alignment, size_t size, int pagesize) { memkind_t kind; int err = 0; kind = hbw_get_kind(pagesize); if (pagesize == HBW_PAGESIZE_1GB_STRICT && size % 1073741824) { err = EINVAL; } if (!err) { err = memkind_posix_memalign(kind, memptr, alignment, size); } return err; }
MEMKIND_EXPORT int hbw_posix_memalign_psize(void **memptr, size_t alignment, size_t size, hbw_pagesize_t pagesize) { if (pagesize == HBW_PAGESIZE_1GB_STRICT && size % (1 << 30)) { return EINVAL; } if((pagesize == HBW_PAGESIZE_2MB || pagesize == HBW_PAGESIZE_1GB_STRICT || pagesize == HBW_PAGESIZE_1GB) && hbw_get_policy() == HBW_POLICY_INTERLEAVE) { log_err("HBW_POLICY_INTERLEAVE is unsupported with used page size!"); return EINVAL; } return memkind_posix_memalign(hbw_get_kind(pagesize), memptr, alignment, size); }
void* mca_mpool_memkind_alloc( mca_mpool_base_module_t* mpool, size_t size, size_t align, uint32_t flags) { mca_mpool_memkind_module_t *memkind_module = (mca_mpool_memkind_module_t *) mpool; void *addr; if (0 == align) { align = memkind_module->page_size; } if ((errno = memkind_posix_memalign(memkind_module->kind, &addr, align, size))!= 0){ return NULL; } return addr; }
MEMKIND_EXPORT int hbw_posix_memalign(void **memptr, size_t alignment, size_t size) { return memkind_posix_memalign(hbw_get_kind(HBW_PAGESIZE_4KB), memptr, alignment, size); }
int main(int argc, char **argv) { int quantum, checktick(); int BytesPerWord; int k; ssize_t j; STREAM_TYPE scalar; double t, times[4][NTIMES]; #ifdef ENABLE_DYNAMIC_ALLOC int err = 0; memkind_t kind; char err_msg[ERR_MSG_SIZE]; if (argc > 1 && (strncmp("--help", argv[1], strlen("--help")) == 0 || strncmp("-h", argv[1], strlen("-h")) == 0)) { printf("Usage: %s [memkind_default | memkind_hbw | memkind_hbw_hugetlb | \n" " memkind_hbw_preferred | memkind_hbw_preferred_hugetlb | \n" " memkind_hbw_gbtlb | memkind_hbw_preferred_gbtlb | memkind_gbtlb | \n" " memkind_hbw_interleave | memkind_interleave]\n", argv[0]); return 0; } #endif /* --- SETUP --- determine precision and check timing --- */ printf(HLINE); printf("STREAM version $Revision: 5.10 $\n"); #ifdef ENABLE_DYNAMIC_ALLOC printf("Variant that uses the memkind library for dynamic memory allocation.\n"); #endif printf(HLINE); BytesPerWord = sizeof(STREAM_TYPE); printf("This system uses %d bytes per array element.\n", BytesPerWord); printf(HLINE); #ifdef N printf("***** WARNING: ******\n"); printf(" It appears that you set the preprocessor variable N when compiling this code.\n"); printf(" This version of the code uses the preprocesor variable STREAM_ARRAY_SIZE to control the array size\n"); printf(" Reverting to default value of STREAM_ARRAY_SIZE=%llu\n",(unsigned long long) STREAM_ARRAY_SIZE); printf("***** WARNING: ******\n"); #endif printf("Array size = %llu (elements), Offset = %d (elements)\n" , (unsigned long long) STREAM_ARRAY_SIZE, OFFSET); printf("Memory per array = %.1f MiB (= %.1f GiB).\n", BytesPerWord * ( (double) STREAM_ARRAY_SIZE / 1024.0/1024.0), BytesPerWord * ( (double) STREAM_ARRAY_SIZE / 1024.0/1024.0/1024.0)); printf("Total memory required = %.1f MiB (= %.1f GiB).\n", (3.0 * BytesPerWord) * ( (double) STREAM_ARRAY_SIZE / 1024.0/1024.), (3.0 * BytesPerWord) * ( (double) STREAM_ARRAY_SIZE / 1024.0/1024./1024.)); printf("Each kernel will be executed %d times.\n", NTIMES); printf(" The *best* time for each kernel (excluding the first iteration)\n"); printf(" will be used to compute the reported bandwidth.\n"); #ifdef _OPENMP printf(HLINE); #pragma omp parallel { #pragma omp master { k = omp_get_num_threads(); printf ("Number of Threads requested = %i\n",k); } } #endif #ifdef _OPENMP k = 0; #pragma omp parallel #pragma omp atomic k++; printf ("Number of Threads counted = %i\n",k); #endif #ifdef ENABLE_DYNAMIC_ALLOC if (argc > 1) { err = memkind_get_kind_by_name(argv[1], &kind); } else { err = memkind_get_kind_by_name("memkind_default", &kind); } if (err) { memkind_error_message(err, err_msg, ERR_MSG_SIZE); fprintf(stderr, "ERROR: %s\n", err_msg); return -1; } err = memkind_posix_memalign(kind, (void **)&a, 2097152, BytesPerWord * (STREAM_ARRAY_SIZE + OFFSET)); if (err) { fprintf(stderr, "ERROR: Unable to allocate stream array a\n"); return -err; } err = memkind_posix_memalign(kind, (void **)&b, 2097152, BytesPerWord * (STREAM_ARRAY_SIZE + OFFSET)); if (err) { fprintf(stderr, "ERROR: Unable to allocate stream array b\n"); return -err; } err = memkind_posix_memalign(kind, (void **)&c, 2097152, BytesPerWord * (STREAM_ARRAY_SIZE + OFFSET)); if (err) { fprintf(stderr, "ERROR: Unable to allocate stream array c\n"); return -err; } #endif /* Get initial value for system clock. */ #pragma omp parallel for for (j=0; j<STREAM_ARRAY_SIZE; j++) { a[j] = 1.0; b[j] = 2.0; c[j] = 0.0; } printf(HLINE); if ( (quantum = checktick()) >= 1) printf("Your clock granularity/precision appears to be " "%d microseconds.\n", quantum); else { printf("Your clock granularity appears to be " "less than one microsecond.\n"); quantum = 1; } t = mysecond(); #pragma omp parallel for for (j = 0; j < STREAM_ARRAY_SIZE; j++) a[j] = 2.0E0 * a[j]; t = 1.0E6 * (mysecond() - t); printf("Each test below will take on the order" " of %d microseconds.\n", (int) t ); printf(" (= %d clock ticks)\n", (int) (t/quantum) ); printf("Increase the size of the arrays if this shows that\n"); printf("you are not getting at least 20 clock ticks per test.\n"); printf(HLINE); printf("WARNING -- The above is only a rough guideline.\n"); printf("For best results, please be sure you know the\n"); printf("precision of your system timer.\n"); printf(HLINE); /* --- MAIN LOOP --- repeat test cases NTIMES times --- */ scalar = 3.0; for (k=0; k<NTIMES; k++) { times[0][k] = mysecond(); #ifdef TUNED tuned_STREAM_Copy(); #else #pragma omp parallel for for (j=0; j<STREAM_ARRAY_SIZE; j++) c[j] = a[j]; #endif times[0][k] = mysecond() - times[0][k]; times[1][k] = mysecond(); #ifdef TUNED tuned_STREAM_Scale(scalar); #else #pragma omp parallel for for (j=0; j<STREAM_ARRAY_SIZE; j++) b[j] = scalar*c[j]; #endif times[1][k] = mysecond() - times[1][k]; times[2][k] = mysecond(); #ifdef TUNED tuned_STREAM_Add(); #else #pragma omp parallel for for (j=0; j<STREAM_ARRAY_SIZE; j++) c[j] = a[j]+b[j]; #endif times[2][k] = mysecond() - times[2][k]; times[3][k] = mysecond(); #ifdef TUNED tuned_STREAM_Triad(scalar); #else #pragma omp parallel for for (j=0; j<STREAM_ARRAY_SIZE; j++) a[j] = b[j]+scalar*c[j]; #endif times[3][k] = mysecond() - times[3][k]; } /* --- SUMMARY --- */ for (k=1; k<NTIMES; k++) /* note -- skip first iteration */ { for (j=0; j<4; j++) { avgtime[j] = avgtime[j] + times[j][k]; mintime[j] = MIN(mintime[j], times[j][k]); maxtime[j] = MAX(maxtime[j], times[j][k]); } } printf("Function Best Rate MB/s Avg time Min time Max time\n"); for (j=0; j<4; j++) { avgtime[j] = avgtime[j]/(double)(NTIMES-1); printf("%s%12.1f %11.6f %11.6f %11.6f\n", label[j], 1.0E-06 * bytes[j]/mintime[j], avgtime[j], mintime[j], maxtime[j]); } printf(HLINE); /* --- Check Results --- */ checkSTREAMresults(); printf(HLINE); #ifdef ENABLE_DYNAMIC_ALLOC memkind_free(kind, c); memkind_free(kind, b); memkind_free(kind, a); #endif return 0; }