testcase( const char *name, std::initializer_list<uint32_t> list1, std::initializer_list<uint32_t> list2, std::initializer_list<uint32_t> res_intersection, std::initializer_list<uint32_t> res_union, std::initializer_list<uint32_t> res_difference ) : name(name) { size1 = list1.size(); this->list1 = (uint32_t*)aligned_alloc(64, size1*sizeof(uint32_t)); std::copy(list1.begin(), list1.end(), this->list1); size2 = list2.size(); this->list2 = (uint32_t*)aligned_alloc(64, size2*sizeof(uint32_t)); std::copy(list2.begin(), list2.end(), this->list2); size_intersection = res_intersection.size(); this->res_intersection = (uint32_t*)aligned_alloc(64, size_intersection*sizeof(uint32_t)); std::copy(res_intersection.begin(), res_intersection.end(), this->res_intersection); size_union = res_union.size(); this->res_union = (uint32_t*)aligned_alloc(64, size_union*sizeof(uint32_t)); std::copy(res_union.begin(), res_union.end(), this->res_union); size_difference = res_difference.size(); this->res_difference = (uint32_t*)aligned_alloc(64, size_difference*sizeof(uint32_t)); std::copy(res_difference.begin(), res_difference.end(), this->res_difference); }
int main(int argc, char const** argv) { auto cbuf = (uint8_t*) aligned_alloc(4, sizeof(gr2::cdata) + 4); std::memcpy(cbuf, gr2::cdata, sizeof(gr2::cdata)); auto dbuf = (uint8_t*) aligned_alloc(4, sizeof(gr2::ddata)); std::memset(dbuf, 0, sizeof(gr2::ddata)); auto ebuf = (uint8_t*) aligned_alloc(4, sizeof(gr2::ddata)); std::memcpy(ebuf, gr2::ddata, sizeof(gr2::ddata)); gr2::decompress(sizeof(gr2::cdata), cbuf, gr2::pdata[0], gr2::pdata[1], sizeof(gr2::ddata), dbuf); { auto file = std::fopen("ddata-02.bin", "w"); std::fwrite(dbuf, sizeof(*dbuf), sizeof(gr2::ddata), file); std::fclose(file); } { auto file = std::fopen("edata-02.bin", "w"); std::fwrite(gr2::ddata, sizeof(*gr2::ddata), sizeof(gr2::ddata), file); std::fclose(file); } assert(std::memcmp(dbuf, ebuf, sizeof(gr2::ddata)) == 0); std::free(cbuf); std::free(dbuf); std::free(ebuf); }
double ljbi1d_sequential(struct args_dimt args, long *jbi_in, long *jbi_out) { int num_stencil_iters = args.iters, n = args.width; /* Boundaries initial condition */ int t, i; long *l1, *l2; l1 = aligned_alloc(CACHE_LINE_SIZE, sizeof(*l1) * n); l2 = aligned_alloc(CACHE_LINE_SIZE, sizeof(*l2) * n); memcpy(l1, jbi_in, n * sizeof(*jbi_in)); clock_gettime(CLOCK_MONOTONIC, &tbegin); for (t = 0; t < num_stencil_iters; t++) { for (i = 1; i < n - 1; i++) { JBI1D_STENCIL(l2, l1); } l2[0] = l1[0]; l2[n - 1] = l1[n - 1]; memcpy(l1, l2, n * sizeof(*l2)); } clock_gettime(CLOCK_MONOTONIC, &tend); for (int i = 0; i < n; i++) { jbi_out[i] = l1[i]; } free(l1); free(l2); return ELAPSED_TIME_S(tend, tbegin); }
float avx_dot_product(std::vector<float> &av, std::vector<float> &bv) { /* Get SIMD-vector pointers to the start of each vector */ unsigned int niters = av.size() / 8; float *a = (float *) aligned_alloc(32, av.size()*sizeof(float)); float *b = (float *) aligned_alloc(32, av.size()*sizeof(float)); memcpy(a,&av[0],av.size()*sizeof(float)); memcpy(b,&bv[0],bv.size()*sizeof(float)); __m256 *ptrA = (__m256*) &a[0], *ptrB = (__m256*) &b[0]; __m256 res = _mm256_set1_ps(0.0); for (unsigned int i = 0; i < niters; i++, ptrA++,ptrB++) res = _mm256_add_ps(_mm256_dp_ps(*ptrA, *ptrB, 255), res); /* Get result back from the SIMD vector */ float fres[8]; _mm256_storeu_ps (fres, res); int q = 8 * niters; for (unsigned int i = 0; i < av.size() % 8; i++) fres[0] += (a[i+q]*b[i+q]); free(a); free(b); return fres[0] + fres[4]; }
float sse_dot_product(std::vector<float> &av, std::vector<float> &bv) { /* Get SIMD-vector pointers to the start of each vector */ unsigned int niters = av.size() / 4; float zeros[] = {0.0, 0.0, 0.0, 0.0}; float *a = (float *) aligned_alloc(16, av.size()*sizeof(float)); float *b = (float *) aligned_alloc(16, av.size()*sizeof(float)); memcpy(a,&av[0],av.size()*sizeof(float)); memcpy(b,&bv[0],bv.size()*sizeof(float)); __m128 *ptrA = (__m128*) &a[0], *ptrB = (__m128*) &b[0]; __m128 res = _mm_load_ps(zeros); /* Do SIMD dot product */ for (unsigned int i = 0; i < niters; i++, ptrA++,ptrB++) res = _mm_add_ps(_mm_dp_ps(*ptrA, *ptrB, 255), res); /* Get result back from the SIMD vector */ float fres[4]; _mm_store_ps (fres, res); int q = 4 * niters; for (unsigned int i = 0; i < av.size() % 4; i++) fres[0] += (a[i+q]*b[i+q]); free(a); free(b); return fres[0]; }
int lower_middle_init(struct prime_ctx *pctx, uint32_t start_prime, uint32_t end_prime, void **ctx) { struct lower_middle_ctx *sctx = malloc(sizeof (struct lower_middle_ctx)); *ctx = sctx; sctx->start_prime = start_prime; sctx->end_prime = MIN(pctx->run_info.max_sieve_prime, end_prime); assert(sctx->start_prime >= 64); assert(sctx->end_prime <= 128); sctx->block_size = pctx->current_block.block_size; sctx->primebuf[0] = aligned_alloc(32, 67+32); sctx->primebuf[1] = aligned_alloc(32, 67+32); int k = 0; init_offsets(0, 67, sctx->offsets_v3); #define LM_INIT_X(PRIME) \ set_starting_v2(0, PRIME, &sctx->offsets_v2[k++*32]); DO_FOR(LM_INIT_X, USED_PRIMES) set_starting_byte(0, 67, &sctx->offsets[k++*64]); set_starting_byte(0, 71, &sctx->offsets[k++*64]); set_starting_byte(0, 73, &sctx->offsets[k++*64]); set_starting_byte(0, 79, &sctx->offsets[k++*64]); set_starting_byte(0, 83, &sctx->offsets[k++*64]); set_starting_byte(0, 89, &sctx->offsets[k++*64]); return 0; }
struct simple_soft_ctx *simple_soft_init(int w, int h, simple_soft_map_func map_func, int audio_rate, int audio_channels, julia_vis_pixel_format format) { struct simple_soft_ctx *ctx = malloc(sizeof(*ctx)); if(!ctx) return NULL; ctx->map_surf[0] = ctx->map_surf[1] = NULL; ctx->maxsrc = NULL; ctx->pal_ctx = NULL; ctx->pd = NULL; ctx->beat = NULL; ctx->cur_buf = ctx->prev_buf = ctx->fft_tmp = NULL; // force divisible by 16 ctx->im_w = w - w%16; ctx->im_h = h - h%16; simple_soft_change_map_func(ctx, map_func); ctx->m = 0; ctx->map_surf[0] = aligned_alloc(64, ctx->im_w * ctx->im_h * sizeof(uint16_t)); ctx->map_surf[1] = aligned_alloc(64, ctx->im_w * ctx->im_h * sizeof(uint16_t)); if(!ctx->map_surf[0] || !ctx->map_surf[1]) goto fail; memset(ctx->map_surf[0], 0, ctx->im_w * ctx->im_h * sizeof(uint16_t)); memset(ctx->map_surf[1], 0, ctx->im_w * ctx->im_h * sizeof(uint16_t)); ctx->pal_ctx = pal_ctx_pix_format_new(format); ctx->maxsrc_rate = 24; //TODO: add a property that can change this ctx->maxsrc = maxsrc_new(ctx->im_w, ctx->im_h); ctx->pd = new_point_data(ctx->rational_julia?4:2); if(!ctx->maxsrc || !ctx->pal_ctx || !ctx->pd) goto fail; ctx->last_beat_time = 0; ctx->lastpalstep = 0; ctx->maxfrms = 1; ctx->beats = 0; ctx->beat = beat_new(); if(!ctx->beat) goto fail; ctx->audio_bufp = 0; ctx->sample_count = 0; ctx->sample_rate = audio_rate; ctx->channels = audio_channels; int nr_samp = ctx->nr_samp = (audio_rate<50000)?1024:2048; ctx->fft_tmp = aligned_alloc(64, nr_samp*sizeof(float)); ctx->cur_buf = aligned_alloc(64, sizeof(float)*nr_samp/2); ctx->prev_buf = aligned_alloc(64, sizeof(float)*nr_samp/2); if(!ctx->fft_tmp || !ctx->cur_buf || !ctx->prev_buf) goto fail; memset(ctx->fft_tmp, 0, nr_samp*sizeof(float)); memset(ctx->cur_buf, 0, sizeof(float)*nr_samp/2); memset(ctx->prev_buf, 0, sizeof(float)*nr_samp/2); return ctx; fail: simple_soft_destroy(ctx); return NULL; }
// clear_color is RGBA8888 void r3d_init(uint32_t* fb) { FB = fb; // RENDER CONTROL LIST ------------------------------------------------- r3d_bin_address = (uint8_t*)aligned_alloc(R3D_ALIGN, R3D_BIN_SIZE); r3d_bin_base = (uint8_t*)aligned_alloc(R3D_ALIGN, R3D_BIN_SIZE); //r3d_render_ctl_list = (uint8_t*)aligned_alloc(R3D_ALIGN, CTL_BLOCK_SIZE * 10); // FIXME: enough? //uint8_t* cl = r3d_render_ctl_list; printf("-- bin address at %p\r\n",r3d_bin_address); printf("-- bin base at %p\r\n",r3d_bin_base); //printf("-- render control list at %p\r\n",r3d_render_ctl_list); r3d_overspill_mem = aligned_alloc(R3D_ALIGN,R3D_OVERSPILL_SIZE); r3d_bin_ctl_lists = (uint8_t*)aligned_alloc(R3D_ALIGN, CTL_BLOCK_SIZE * NUM_CTL_LISTS); r3d_render_ctl_lists = (uint8_t*)aligned_alloc(R3D_ALIGN, CTL_BLOCK_SIZE * NUM_CTL_LISTS); r3d_vertex_lists = (uint8_t*)aligned_alloc(R3D_ALIGN, CTL_BLOCK_SIZE * NUM_CTL_LISTS); r3d_shader_states = (uint8_t*)aligned_alloc(R3D_ALIGN, CTL_BLOCK_SIZE * NUM_CTL_LISTS); r3d_gouraud_shader = (uint8_t*)aligned_alloc(R3D_ALIGN, CTL_BLOCK_SIZE * 1); vc4_gouraud_shader(r3d_gouraud_shader); r3d_write_render_list(r3d_render_ctl_lists); cl_idx = -1; }
void calculateSSE(int start, int end) { int size = end - start + 1; // we use aligned memory, because SSE instructions are really slow // working on unaligned memory float* result = (float*)aligned_alloc(16, size * sizeof(float)); __m128 x; __m128 delta_x = _mm_set_ps1(4.0f); __m128 y = _mm_set_ps1(1.0f); __m128* sse_result = (__m128*)result; const int sse_length = size / 4; x = _mm_set_ps(4.0f, 3.0f, 2.0f, 1.0f); for (int loop = 0; loop < 100000; ++loop) { for (int i = 0; i < sse_length; ++i) { __m128 sqrt_result = _mm_sqrt_ps(x); sse_result[i] = _mm_div_ps(sqrt_result, x); //sse_result[i] = _mm_add_ps(x, y); // move x value to next 4 numbers x = _mm_add_ps(x, delta_x); } } }
void* random_data(__u64 size) { __u64 bytes_remaining = size; long long int *offset; void *new = aligned_alloc(ALLOCATION_ALIGNMENT, size); if(size % sizeof(long int)) { fprintf(stderr, "Warning: randomized data size is not multiple of %zd bytes, " "last %llu byte(s) will be left unrandomized\n", sizeof(long int), size % sizeof(long int)); } if(!new) { fprintf(stderr, "Error: Failed to allocate space for random data\n"); return NULL; } offset = new; // Generate random data while(bytes_remaining > sizeof(long long int)) { // Shift to write full 64 bits *offset = (long int)random() << 32; *offset += (long long int)random(); offset++; bytes_remaining -= sizeof(long long int); } return new; }
OOLock * oo_tatas_create(){ TATASLock * l = plain_tatas_create(); OOLock * ool = aligned_alloc(CACHE_LINE_SIZE, sizeof(OOLock)); ool->lock = l; ool->m = &TATAS_LOCK_METHOD_TABLE; return ool; }
void *aligned_alloc_and_null_check(size_t alignemnt, size_t size) { void *tmp = aligned_alloc(alignemnt, size); if (!tmp) { dbg_printf("aligned_alloc failed\n"); } return tmp; }
static inline void ccsynchlock_initLocalIfNeeded(){ if(ccsynchNextLocalNode == NULL){ ccsynchNextLocalNode = aligned_alloc(CACHE_LINE_SIZE, sizeof(CCSynchLockNode)); ccsynchlock_initNode(ccsynchNextLocalNode); } }
static void* _memory_reallocate_malloc( void* p, uint64_t size, unsigned int align ) { align = _memory_get_align( align ); #if FOUNDATION_PLATFORM_WINDOWS return _aligned_realloc( p, (size_t)size, align ); #else if( align ) { //No realloc aligned available void* memory = aligned_alloc( align, (size_t)size ); if( !memory ) { log_panicf( ERROR_OUT_OF_MEMORY, "Unable to reallocate memory: %s", system_error_message( 0 ) ); return 0; } if( p ) { size_t prev_size = malloc_usable_size( p ); memcpy( memory, p, ( size < prev_size ) ? size : prev_size ); } return memory; } return realloc( p, (size_t)size ); #endif }
static int create_files(void) { int i, err; char *zeros, *garbage; zeros = calloc(1, blocksize); if (!zeros) return ENOMEM; garbage = aligned_alloc(ull_bytes, blocksize); if (!garbage) { free(zeros); return ENOMEM; } for (i = 0; i < numfiles; i++) { err = write_file(files[i], zeros, garbage); if (err) goto out; } err = 0; out: free(zeros); free(garbage); return err; }
OOLock * oo_ccsynch_create(){ CCSynchLock * l = plain_ccsynch_create(); OOLock * ool = aligned_alloc(CACHE_LINE_SIZE, sizeof(OOLock)); ool->lock = l; ool->m = &CCSYNCH_LOCK_METHOD_TABLE; return ool; }
bool HapMap::loadHapBinary(const char* filename) { aligned_free(m_data); std::ifstream f(filename, std::ios::in | std::ios::binary); if (!f.good()) { std::cerr << "ERROR: Cannot open file or file not found: " << filename << std::endl; return false; } uint64_t check; f.read((char*) &check, sizeof(uint64_t)); if (check != magicNumber) { std::cerr << "ERROR: Wrong file type: " << filename << ". Expected binary format." << std::endl; return false; } f.read((char*) &m_numSnps, sizeof(uint64_t)); f.read((char*) &m_snpLength, sizeof(uint64_t)); m_snpDataSize = ::bitsetSize<PrimitiveType>(m_snpLength); m_snpDataSize64 = ::bitsetSize<uint64_t>(m_snpLength); m_snpDataSizeULL = ::bitsetSize<unsigned long long>(m_snpLength); m_data = (PrimitiveType*) aligned_alloc(128, m_snpDataSize*m_numSnps*sizeof(PrimitiveType)); for(uint64_t i = 0; i < m_numSnps; ++i) f.read((char*) &m_data[i*this->m_snpDataSize], sizeof(uint64_t)*m_snpDataSize64); return true; }
OOLock * oo_mrqd_create(){ MRQDLock * l = plain_mrqd_create(); OOLock * ool = aligned_alloc(CACHE_LINE_SIZE, sizeof(OOLock)); ool->lock = l; ool->m = &MRQD_LOCK_METHOD_TABLE; return ool; }
/** * Allocate a new picture in the heap. * * This function allocates a fake direct buffer in memory, which can be * used exactly like a video buffer. The video output thread then manages * how it gets displayed. */ static int AllocatePicture( picture_t *p_pic ) { /* Calculate how big the new image should be */ size_t i_bytes = 0; for( int i = 0; i < p_pic->i_planes; i++ ) { const plane_t *p = &p_pic->p[i]; if( p->i_pitch < 0 || p->i_lines <= 0 || (size_t)p->i_pitch > (SIZE_MAX - i_bytes)/p->i_lines ) { p_pic->i_planes = 0; return VLC_ENOMEM; } i_bytes += p->i_pitch * p->i_lines; } uint8_t *p_data = aligned_alloc( 16, i_bytes ); if( i_bytes > 0 && p_data == NULL ) { p_pic->i_planes = 0; return VLC_EGENERIC; } /* Fill the p_pixels field for each plane */ p_pic->p[0].p_pixels = p_data; for( int i = 1; i < p_pic->i_planes; i++ ) { p_pic->p[i].p_pixels = &p_pic->p[i-1].p_pixels[ p_pic->p[i-1].i_lines * p_pic->p[i-1].i_pitch ]; } return VLC_SUCCESS; }
void* operator new[](size_t size, size_t alignment, size_t alignmentOffset, const char* pName, int flags, unsigned debugFlags, const char* file, int line) { // #ifndef NDEBUG // fprintf(stderr, // "%zu (alignment: %zu offset: %zu) in %s (flags: %d debug // flags: " // "%u) from file %s:%d\n", // size, // alignment, // alignmentOffset, // pName, // flags, // debugFlags, // file, // line); // #endif if ((alignmentOffset % alignment) == 0) return aligned_alloc(alignment, size); return malloc(size); }
void* mmap(void* addr, size_t length, int prot, int flags, int fd, off_t offset) { // invalid or misaligned length if (length == 0 || (length & 4095) != 0) { errno = EINVAL; return MAP_FAILED; } // associate some VA space with open file @fd // for now just allocate page-aligned on heap mmap_entry_t entry; entry.addr = aligned_alloc(4096, length); entry.length = length; entry.prot = prot; entry.flags = flags; entry.fd = fd; entry.offset = offset; /// Note: we may have to read a file here to properly create the /// in-memory mapping. Unfortunately, this would mean both mean /// mmap must be asynch, and also mean we have to implement Poshitx. // create the mapping _mmap_entries[addr] = entry; return entry.addr; }
object_t *_qcgc_allocate_large(size_t size) { #if CHECKED assert(size >= 1<<QCGC_LARGE_ALLOC_THRESHOLD_EXP); #endif if (UNLIKELY(qcgc_state.cells_since_incmark > qcgc_state.incmark_threshold)) { if (qcgc_state.incmark_since_sweep == qcgc_state.incmark_to_sweep) { qcgc_collect(); } else { qcgc_incmark(); qcgc_state.incmark_since_sweep++; } } size_t rounded_size = (size + QCGC_ARENA_SIZE - 1) & ~(QCGC_ARENA_SIZE - 1); object_t *result = aligned_alloc(QCGC_ARENA_SIZE, rounded_size); #if QCGC_INIT_ZERO memset(result, 0, size); #endif qcgc_hbtable_insert(result); result->flags = QCGC_GRAY_FLAG; qcgc_state.cells_since_incmark += bytes_to_cells(size); return result; }
void * mem_alloc_align_func(size_t size, size_t align #if defined(MEMDBG_ON) , char * file, int line #endif ) { void * ptr = NULL; #if defined(MEMDBG_ON) ptr = (char *) MEMDBG_alloc_align(size, align, file, line); #elif HAVE_POSIX_MEMALIGN if (posix_memalign(&ptr, align, size)) pexit("posix_memalign"); #elif HAVE_ALIGNED_ALLOC /* According to the Linux man page, "size should be a multiple of alignment", whatever they mean with "should"... This does not make any sense whatsoever but we round it up to comply. */ size = ((size + (align - 1)) / align) * align; if (!(ptr = aligned_alloc(align, size))) pexit("aligned_alloc"); #elif HAVE_MEMALIGN /* Let's just pray this implementation can actually free it */ #if defined(__sparc__) || defined(__sparc) || defined(sparc) \ || defined(__sparcv9) if (!(ptr = memalign(align, size))) #else if (!(ptr = memalign(&ptr, align, size))) #endif perror("memalign"); #elif HAVE___MINGW_ALIGNED_MALLOC if (!(ptr = __mingw_aligned_malloc(size, align))) perror("__mingw_aligned_malloc"); #elif HAVE__ALIGNED_MALLOC if (!(ptr = _aligned_malloc(size, align))) perror("aligned_malloc"); #elif AC_BUILT #error No suitable aligned alloc found, please report to john-dev mailing list (state your OS details). /* we need an aligned alloc function for legacy builds */ #elif _ISOC11_SOURCE size = ((size + (align - 1)) / align) * align; if (!(ptr = aligned_alloc(align, size))) perror("aligned_alloc"); #else if (posix_memalign(&ptr, align, size)) perror("posix_memalign"); #endif return ptr; }
int main(void) { pstatus_t status; void *buf; /* * Alignment must be sizeof (void *) (word) aligned. */ VERIFY3P(aligned_alloc(sizeof (void *) - 1, 16), ==, NULL); VERIFY3S(errno, ==, EINVAL); VERIFY3P(aligned_alloc(sizeof (void *) + 1, 16), ==, NULL); VERIFY3S(errno, ==, EINVAL); VERIFY3P(aligned_alloc(23, 16), ==, NULL); VERIFY3S(errno, ==, EINVAL); buf = aligned_alloc(sizeof (void *), 16); VERIFY3P(buf, !=, NULL); free(buf); /* * Cause ENOMEM */ VERIFY0(proc_get_status(getpid(), &status)); VERIFY3P(mmap((caddr_t)P2ROUNDUP(status.pr_brkbase + status.pr_brksize, 0x1000), 0x1000, PROT_READ, MAP_ANON | MAP_FIXED | MAP_PRIVATE, -1, 0), !=, (void *)-1); for (;;) { if (malloc(16) == NULL) break; } for (;;) { if (aligned_alloc(sizeof (void *), 16) == NULL) break; } VERIFY3P(aligned_alloc(sizeof (void *), 16), ==, NULL); VERIFY3S(errno, ==, ENOMEM); return (0); }
void* allocate(size_t size, const char*, const char*, int) override { void* ret = aligned_alloc(16, size); // g_log_info.log("Physics") << "Allocated " << size << " bytes for " << typeName << " // from " << filename << "(" << line << ")"; ASSERT(ret); return ret; }
/** * Allocates memory aligned to 64B, * which is one cache line on intel processors. */ double *calloc64ByteAligned(size_t size) { void* ptr = aligned_alloc(64 * sizeof(void*), size); if(ptr == nullptr) { std::cout << "ERROR: Memory allocation failed (" << size << "B)" << std::endl; } memset(ptr, 0, size); return (double*) ptr; }
/** @brief Creates an array of the specified size in main RAM. If the second argument is provided, the buffer is initialized with data from that pointer. * * @param size_in_bytes Number of bytes to allocate * @param host_ptr Pointer to data which will be copied to the new array. Must point to at least 'size_in_bytes' bytes of data. * */ inline handle_type memory_create(vcl_size_t size_in_bytes, const void * host_ptr = NULL) { #if defined(VIENNACL_WITH_AVX2) || defined(VIENNACL_WITH_AVX) # ifdef VIENNACL_WITH_POSIX_MEMALIGN if (!host_ptr) { void *mem_ptr; if(posix_memalign(&mem_ptr, 32, size_in_bytes)) { std::bad_alloc exception; throw exception; } return handle_type(reinterpret_cast<char*>(mem_ptr), detail::array_deleter<char>()); } void *mem_ptr; if(posix_memalign(&mem_ptr, 32, size_in_bytes)) { std::bad_alloc exception; throw exception; } handle_type new_handle(reinterpret_cast<char*>(mem_ptr), detail::array_deleter<char>()); # else // "Note: aligned_alloc not available on all compilers. Consider platform-specific alternatives such as posix_memalign()" => added above if (!host_ptr) return handle_type(reinterpret_cast<char*>(aligned_alloc(32, size_in_bytes)), detail::array_deleter<char>()); handle_type new_handle(reinterpret_cast<char*>(aligned_alloc(32, size_in_bytes)), detail::array_deleter<char>()); #endif #else if (!host_ptr) return handle_type(new char[size_in_bytes], detail::array_deleter<char>()); handle_type new_handle(new char[size_in_bytes], detail::array_deleter<char>()); #endif // copy data: char * raw_ptr = new_handle.get(); const char * data_ptr = static_cast<const char *>(host_ptr); #ifdef VIENNACL_WITH_OPENMP #pragma omp parallel for #endif for (long i=0; i<long(size_in_bytes); ++i) raw_ptr[i] = data_ptr[i]; return new_handle; }
/** * Allocate an element from the requested pool. * This function uses the feature that pointers to void and char are * interchangeable. * 1. If no current block or current block exhausted - obtain another one * and chain it the block chain. Else reuse an LRU unused block; * The element pointer is aligned to the required alignment. * 2. Zero the block if required; * 3. Return element pointer. */ void *pool_alloc(Pool_desc *mp) { #ifdef POOL_FREE if (NULL != mp->free_list) { void *alloc_next = mp->free_list; mp->free_list = *(char **)mp->free_list; if (mp->zero_out) memset(alloc_next, 0, mp->element_size); return alloc_next; } #endif // POOL_FREE mp->curr_elements++; /* For stats. */ if ((NULL == mp->alloc_next) || (mp->alloc_next == mp->ring + mp->data_size)) { assert(!mp->exact || (NULL == mp->alloc_next), "Too many elements %zu>%zu (pool '%s' created in %s())", mp->curr_elements, mp->num_elements, mp->name, mp->func); /* No current block or current block exhausted - obtain another one. */ char *prev = mp->ring; /* Remember current block for possible chaining. */ if (NULL != mp->ring) { /* Next block already exists. */ mp->ring = POOL_NEXT_BLOCK(mp->ring, mp->data_size); } if (NULL == mp->ring) { /* Allocate a new block and chain it. */ mp->ring = aligned_alloc(mp->alignment, mp->block_size); if (NULL == mp->ring) { /* aligned_alloc() has strict requirements. */ char errbuf[64]; strerror_r(errno, errbuf, sizeof(errbuf)); assert(NULL != mp->ring, "Block/element sizes %zu/%zu: %s", mp->block_size, mp->element_size, errbuf); } if (NULL == mp->alloc_next) mp->chain = mp->ring; /* This is the start of the chain. */ else POOL_NEXT_BLOCK(prev, mp->data_size) = mp->ring; POOL_NEXT_BLOCK(mp->ring, mp->data_size) = NULL; //printf("New ring %p next %p\n", mp->ring, //POOL_NEXT_BLOCK(mp->ring, mp->data_size)); } /* Else reuse existing block. */ if (mp->zero_out) memset(mp->ring, 0, mp->data_size); mp->alloc_next = mp->ring; } /* Grab a new element. */ void *alloc_next = mp->alloc_next; mp->alloc_next += mp->element_size; return alloc_next; }
void run( const testcase *tests, size_t tests_size, std::vector<std::pair<const char*,func_t>> f_intersection, std::vector<std::pair<const char*,func_t>> f_union, std::vector<std::pair<const char*,func_t>> f_difference ){ //for(const auto &t : tests){ for(size_t i=0; i<tests_size; ++i){ const auto &t = tests[i]; uint32_t *res = (uint32_t*)aligned_alloc(64, (t.size1+t.size2)*sizeof(uint32_t)); for(const auto &f : f_intersection){ size_t size_res = f.second(t.list1, t.size1, t.list2, t.size2, res); if(!equivalent(res, size_res, t.res_intersection, t.size_intersection)){ //TODO printf("test \"%s\", intersection \"%s\" wrong\nlist1 : ", t.name, f.first); for(size_t i=0; i<t.size1; ++i) printf("%i, ", t.list1[i]); printf("\nlist2 : "); for(size_t i=0; i<t.size2; ++i) printf("%i, ", t.list2[i]); printf("\nresult: "); for(size_t i=0; i<size_res; ++i) printf("%i, ", res[i]); printf("\nexpect: "); for(size_t i=0; i<t.size_intersection; ++i) printf("%i, ", t.res_intersection[i]); printf("\n"); } } for(const auto &f : f_union){ size_t size_res = f.second(t.list1, t.size1, t.list2, t.size2, res); if(!equivalent(res, size_res, t.res_union, t.size_union)){ //TODO printf("test \"%s\", union \"%s\" wrong\nlist1 : ", t.name, f.first); for(size_t i=0; i<t.size1; ++i) printf("%i, ", t.list1[i]); printf("\nlist2 : "); for(size_t i=0; i<t.size2; ++i) printf("%i, ", t.list2[i]); printf("\nresult: "); for(size_t i=0; i<size_res; ++i) printf("%i, ", res[i]); printf("\nexpect: "); for(size_t i=0; i<t.size_union; ++i) printf("%i, ", t.res_union[i]); printf("\n"); } } for(const auto &f : f_difference){ size_t size_res = f.second(t.list1, t.size1, t.list2, t.size2, res); if(!equivalent(res, size_res, t.res_difference, t.size_difference)){ //TODO printf("test \"%s\", difference \"%s\" wrong\nlist1 : ", t.name, f.first); for(size_t i=0; i<t.size1; ++i) printf("%i, ", t.list1[i]); printf("\nlist2 : "); for(size_t i=0; i<t.size2; ++i) printf("%i, ", t.list2[i]); printf("\nresult: "); for(size_t i=0; i<size_res; ++i) printf("%i, ", res[i]); printf("\nexpect: "); for(size_t i=0; i<t.size_difference; ++i) printf("%i, ", t.res_difference[i]); printf("\n"); } } free(res); } }
void bigfl_calc(void){ float *val = (float*)aligned_alloc(16,1024*1024*1024*sizeof(float)); for(uint32_t i=0; i < 1024*1024*1024; i++){ val[i] = (i*i)/sqrt(i); } free(val); printf("\r\n"); }