Exemplo n.º 1
0
	testcase(
		const char *name,
		std::initializer_list<uint32_t> list1,
		std::initializer_list<uint32_t> list2,
		std::initializer_list<uint32_t> res_intersection,
		std::initializer_list<uint32_t> res_union,
		std::initializer_list<uint32_t> res_difference
	) : name(name)
	{
		size1 = list1.size();
		this->list1 = (uint32_t*)aligned_alloc(64, size1*sizeof(uint32_t));
		std::copy(list1.begin(), list1.end(), this->list1);
		size2 = list2.size();
		this->list2 = (uint32_t*)aligned_alloc(64, size2*sizeof(uint32_t));
		std::copy(list2.begin(), list2.end(), this->list2);
		size_intersection = res_intersection.size();
		this->res_intersection = (uint32_t*)aligned_alloc(64, size_intersection*sizeof(uint32_t));
		std::copy(res_intersection.begin(), res_intersection.end(), this->res_intersection);
		size_union = res_union.size();
		this->res_union = (uint32_t*)aligned_alloc(64, size_union*sizeof(uint32_t));
		std::copy(res_union.begin(), res_union.end(), this->res_union);
		size_difference = res_difference.size();
		this->res_difference = (uint32_t*)aligned_alloc(64, size_difference*sizeof(uint32_t));
		std::copy(res_difference.begin(), res_difference.end(), this->res_difference);
	}
Exemplo n.º 2
0
int main(int argc, char const** argv) {
  auto cbuf = (uint8_t*) aligned_alloc(4, sizeof(gr2::cdata) + 4);
  std::memcpy(cbuf, gr2::cdata, sizeof(gr2::cdata));

  auto dbuf = (uint8_t*) aligned_alloc(4, sizeof(gr2::ddata));
  std::memset(dbuf, 0, sizeof(gr2::ddata));

  auto ebuf = (uint8_t*) aligned_alloc(4, sizeof(gr2::ddata));
  std::memcpy(ebuf, gr2::ddata, sizeof(gr2::ddata));

  gr2::decompress(sizeof(gr2::cdata), cbuf, gr2::pdata[0], gr2::pdata[1], sizeof(gr2::ddata), dbuf);

  {
    auto file = std::fopen("ddata-02.bin", "w");
    std::fwrite(dbuf, sizeof(*dbuf), sizeof(gr2::ddata), file);
    std::fclose(file);
  }

  {
    auto file = std::fopen("edata-02.bin", "w");
    std::fwrite(gr2::ddata, sizeof(*gr2::ddata), sizeof(gr2::ddata), file);
    std::fclose(file);
  }

  assert(std::memcmp(dbuf, ebuf, sizeof(gr2::ddata)) == 0);

  std::free(cbuf);
  std::free(dbuf);
  std::free(ebuf);
}
Exemplo n.º 3
0
double ljbi1d_sequential(struct args_dimt args, long *jbi_in, long *jbi_out) {
  int num_stencil_iters = args.iters, n = args.width;
  /* Boundaries initial condition */
  int t, i;
  long *l1, *l2;

  l1 = aligned_alloc(CACHE_LINE_SIZE, sizeof(*l1) * n);
  l2 = aligned_alloc(CACHE_LINE_SIZE, sizeof(*l2) * n);
  memcpy(l1, jbi_in, n * sizeof(*jbi_in));

  clock_gettime(CLOCK_MONOTONIC, &tbegin);

  for (t = 0; t < num_stencil_iters; t++) {
    for (i = 1; i < n - 1; i++) {
      JBI1D_STENCIL(l2, l1);
    }
    l2[0] = l1[0];
    l2[n - 1] = l1[n - 1];
    memcpy(l1, l2, n * sizeof(*l2));
  }

  clock_gettime(CLOCK_MONOTONIC, &tend);
  for (int i = 0; i < n; i++) {
    jbi_out[i] = l1[i];
  }

  free(l1);
  free(l2);

  return ELAPSED_TIME_S(tend, tbegin);
}
Exemplo n.º 4
0
    float avx_dot_product(std::vector<float> &av, std::vector<float> &bv)
    {

      /* Get SIMD-vector pointers to the start of each vector */
      unsigned int niters = av.size() / 8;

      float *a = (float *) aligned_alloc(32, av.size()*sizeof(float));
      float *b = (float *) aligned_alloc(32, av.size()*sizeof(float));
      memcpy(a,&av[0],av.size()*sizeof(float));
      memcpy(b,&bv[0],bv.size()*sizeof(float));

      __m256 *ptrA = (__m256*) &a[0], *ptrB = (__m256*) &b[0];
      __m256 res = _mm256_set1_ps(0.0);

      for (unsigned int i = 0; i < niters; i++, ptrA++,ptrB++)
        res = _mm256_add_ps(_mm256_dp_ps(*ptrA, *ptrB, 255), res);

      /* Get result back from the SIMD vector */
      float fres[8];
      _mm256_storeu_ps (fres, res);
      int q = 8 * niters;

      for (unsigned int i = 0; i < av.size() % 8; i++)
        fres[0] += (a[i+q]*b[i+q]);

      free(a);
      free(b);

      return fres[0] + fres[4];
    }
Exemplo n.º 5
0
    float sse_dot_product(std::vector<float> &av, std::vector<float> &bv)
    {

      /* Get SIMD-vector pointers to the start of each vector */
      unsigned int niters = av.size() / 4;
      float zeros[] = {0.0, 0.0, 0.0, 0.0};

      float *a = (float *) aligned_alloc(16, av.size()*sizeof(float));
      float *b = (float *) aligned_alloc(16, av.size()*sizeof(float));
      memcpy(a,&av[0],av.size()*sizeof(float));
      memcpy(b,&bv[0],bv.size()*sizeof(float));

      __m128 *ptrA = (__m128*) &a[0], *ptrB = (__m128*) &b[0];
      __m128 res = _mm_load_ps(zeros);

      /* Do SIMD dot product */
      for (unsigned int i = 0; i < niters; i++, ptrA++,ptrB++)
        res = _mm_add_ps(_mm_dp_ps(*ptrA, *ptrB, 255), res);
      

      /* Get result back from the SIMD vector */
      float fres[4];
      _mm_store_ps (fres, res);
      int q = 4 * niters;

      for (unsigned int i = 0; i < av.size() % 4; i++)
        fres[0] += (a[i+q]*b[i+q]);

      free(a);
      free(b);

      return fres[0];
    }
Exemplo n.º 6
0
int
lower_middle_init(struct prime_ctx *pctx, uint32_t start_prime, uint32_t end_prime, void **ctx)
{
   struct lower_middle_ctx *sctx = malloc(sizeof (struct lower_middle_ctx));
   *ctx = sctx;

   sctx->start_prime = start_prime;
   sctx->end_prime = MIN(pctx->run_info.max_sieve_prime, end_prime);

   assert(sctx->start_prime >= 64);
   assert(sctx->end_prime <= 128);

   sctx->block_size = pctx->current_block.block_size;

   sctx->primebuf[0] = aligned_alloc(32, 67+32);
   sctx->primebuf[1] = aligned_alloc(32, 67+32);

   int k = 0;

   init_offsets(0, 67, sctx->offsets_v3);

#define LM_INIT_X(PRIME) \
   set_starting_v2(0, PRIME, &sctx->offsets_v2[k++*32]);

   DO_FOR(LM_INIT_X, USED_PRIMES)

   set_starting_byte(0, 67, &sctx->offsets[k++*64]);
   set_starting_byte(0, 71, &sctx->offsets[k++*64]);
   set_starting_byte(0, 73, &sctx->offsets[k++*64]);
   set_starting_byte(0, 79, &sctx->offsets[k++*64]);
   set_starting_byte(0, 83, &sctx->offsets[k++*64]);
   set_starting_byte(0, 89, &sctx->offsets[k++*64]);

   return 0;
}
Exemplo n.º 7
0
struct simple_soft_ctx *simple_soft_init(int w, int h, simple_soft_map_func map_func, int audio_rate, int audio_channels, julia_vis_pixel_format format)
{
	struct simple_soft_ctx *ctx = malloc(sizeof(*ctx));
	if(!ctx) return NULL;

	ctx->map_surf[0] = ctx->map_surf[1] = NULL;
	ctx->maxsrc = NULL;
	ctx->pal_ctx = NULL;
	ctx->pd = NULL;
	ctx->beat = NULL;
	ctx->cur_buf = ctx->prev_buf = ctx->fft_tmp = NULL;

	// force divisible by 16
	ctx->im_w = w - w%16;
	ctx->im_h = h - h%16;

	simple_soft_change_map_func(ctx, map_func);

	ctx->m = 0;
	ctx->map_surf[0] = aligned_alloc(64, ctx->im_w * ctx->im_h * sizeof(uint16_t));
	ctx->map_surf[1] = aligned_alloc(64, ctx->im_w * ctx->im_h * sizeof(uint16_t));
	if(!ctx->map_surf[0] || !ctx->map_surf[1]) goto fail;
	memset(ctx->map_surf[0], 0, ctx->im_w * ctx->im_h * sizeof(uint16_t));
	memset(ctx->map_surf[1], 0, ctx->im_w * ctx->im_h * sizeof(uint16_t));

	ctx->pal_ctx = pal_ctx_pix_format_new(format);
	ctx->maxsrc_rate = 24; //TODO: add a property that can change this
	ctx->maxsrc = maxsrc_new(ctx->im_w, ctx->im_h);
	ctx->pd = new_point_data(ctx->rational_julia?4:2);
	if(!ctx->maxsrc || !ctx->pal_ctx || !ctx->pd) goto fail;

	ctx->last_beat_time = 0;
	ctx->lastpalstep = 0;
	ctx->maxfrms = 1;
	ctx->beats = 0;

	ctx->beat = beat_new();
	if(!ctx->beat) goto fail;
	ctx->audio_bufp = 0;
	ctx->sample_count = 0;
	ctx->sample_rate = audio_rate;
	ctx->channels = audio_channels;
	int nr_samp = ctx->nr_samp = (audio_rate<50000)?1024:2048;
	ctx->fft_tmp = aligned_alloc(64, nr_samp*sizeof(float));
	ctx->cur_buf = aligned_alloc(64, sizeof(float)*nr_samp/2);
	ctx->prev_buf = aligned_alloc(64, sizeof(float)*nr_samp/2);
	if(!ctx->fft_tmp || !ctx->cur_buf || !ctx->prev_buf) goto fail;
	memset(ctx->fft_tmp, 0, nr_samp*sizeof(float));
	memset(ctx->cur_buf, 0, sizeof(float)*nr_samp/2);
	memset(ctx->prev_buf, 0, sizeof(float)*nr_samp/2);

	return ctx;
fail:
	simple_soft_destroy(ctx);
	return NULL;
}
Exemplo n.º 8
0
// clear_color is RGBA8888
void r3d_init(uint32_t* fb) {
  FB = fb;
  
  // RENDER CONTROL LIST -------------------------------------------------

  r3d_bin_address = (uint8_t*)aligned_alloc(R3D_ALIGN, R3D_BIN_SIZE);
  r3d_bin_base = (uint8_t*)aligned_alloc(R3D_ALIGN, R3D_BIN_SIZE);
  
  //r3d_render_ctl_list = (uint8_t*)aligned_alloc(R3D_ALIGN, CTL_BLOCK_SIZE * 10); // FIXME: enough?
  //uint8_t* cl = r3d_render_ctl_list;

  printf("-- bin address at %p\r\n",r3d_bin_address);
  printf("-- bin base at %p\r\n",r3d_bin_base);
  //printf("-- render control list at %p\r\n",r3d_render_ctl_list);
  
  r3d_overspill_mem = aligned_alloc(R3D_ALIGN,R3D_OVERSPILL_SIZE);

  r3d_bin_ctl_lists = (uint8_t*)aligned_alloc(R3D_ALIGN, CTL_BLOCK_SIZE * NUM_CTL_LISTS);
  r3d_render_ctl_lists = (uint8_t*)aligned_alloc(R3D_ALIGN, CTL_BLOCK_SIZE * NUM_CTL_LISTS);
  r3d_vertex_lists = (uint8_t*)aligned_alloc(R3D_ALIGN, CTL_BLOCK_SIZE * NUM_CTL_LISTS);
  r3d_shader_states = (uint8_t*)aligned_alloc(R3D_ALIGN, CTL_BLOCK_SIZE * NUM_CTL_LISTS);
  
  r3d_gouraud_shader = (uint8_t*)aligned_alloc(R3D_ALIGN, CTL_BLOCK_SIZE * 1);
  vc4_gouraud_shader(r3d_gouraud_shader);

  r3d_write_render_list(r3d_render_ctl_lists);

  cl_idx = -1;
}
Exemplo n.º 9
0
void calculateSSE(int start, int end)
{
    int size = end - start + 1;

    // we use aligned memory, because SSE instructions are really slow
    // working on unaligned memory
    float* result = (float*)aligned_alloc(16, size * sizeof(float));

    __m128 x;
    __m128 delta_x = _mm_set_ps1(4.0f);
    __m128 y = _mm_set_ps1(1.0f);
    __m128* sse_result = (__m128*)result;

    const int sse_length = size / 4;
    x = _mm_set_ps(4.0f, 3.0f, 2.0f, 1.0f);
    for (int loop = 0; loop < 100000; ++loop)
    {
        for (int i = 0; i < sse_length; ++i)
        {
            __m128 sqrt_result = _mm_sqrt_ps(x);
            sse_result[i] = _mm_div_ps(sqrt_result, x);
            //sse_result[i] = _mm_add_ps(x, y);

            // move x value to next 4 numbers
            x = _mm_add_ps(x, delta_x);
        }        
    }
}
Exemplo n.º 10
0
void* random_data(__u64 size)
{
	__u64 bytes_remaining = size;
	long long int *offset;
	void *new = aligned_alloc(ALLOCATION_ALIGNMENT, size);

	if(size % sizeof(long int))
	{
		fprintf(stderr,
		        "Warning: randomized data size is not multiple of %zd bytes, "
		        "last %llu byte(s) will be left unrandomized\n",
		        sizeof(long int), size % sizeof(long int));
	}

	if(!new)
	{
		fprintf(stderr, "Error: Failed to allocate space for random data\n");
		return NULL;
	}

	offset = new;
	// Generate random data
	while(bytes_remaining > sizeof(long long int))
	{
		// Shift to write full 64 bits
		*offset = (long int)random() << 32;
		*offset += (long long int)random();
		offset++;
		bytes_remaining -= sizeof(long long int);
	}

	return new;
}
Exemplo n.º 11
0
OOLock * oo_tatas_create(){
    TATASLock * l = plain_tatas_create();
    OOLock * ool = aligned_alloc(CACHE_LINE_SIZE, sizeof(OOLock));
    ool->lock = l;
    ool->m = &TATAS_LOCK_METHOD_TABLE;
    return ool;
}
Exemplo n.º 12
0
void *aligned_alloc_and_null_check(size_t alignemnt, size_t size) {
	void *tmp = aligned_alloc(alignemnt, size);
	if (!tmp) {
		dbg_printf("aligned_alloc failed\n");
	}
	return tmp;
}
Exemplo n.º 13
0
static inline
void ccsynchlock_initLocalIfNeeded(){
    if(ccsynchNextLocalNode == NULL){
        ccsynchNextLocalNode = aligned_alloc(CACHE_LINE_SIZE, sizeof(CCSynchLockNode));
        ccsynchlock_initNode(ccsynchNextLocalNode);
    }
}
Exemplo n.º 14
0
static void* _memory_reallocate_malloc( void* p, uint64_t size, unsigned int align )
{
	align = _memory_get_align( align );
#if FOUNDATION_PLATFORM_WINDOWS
	return _aligned_realloc( p, (size_t)size, align );
#else
	if( align )
	{
		//No realloc aligned available
		void* memory = aligned_alloc( align, (size_t)size );
		if( !memory )
		{
			log_panicf( ERROR_OUT_OF_MEMORY, "Unable to reallocate memory: %s", system_error_message( 0 ) );
			return 0;
		}
		if( p )
		{
			size_t prev_size = malloc_usable_size( p );
			memcpy( memory, p, ( size < prev_size ) ? size : prev_size );
		}
		return memory;
	}
	return realloc( p, (size_t)size );
#endif
}
Exemplo n.º 15
0
static int create_files(void)
{
	int i, err;
	char *zeros, *garbage;

	zeros = calloc(1, blocksize);
	if (!zeros)
		return ENOMEM;
	garbage = aligned_alloc(ull_bytes, blocksize);
	if (!garbage) {
		free(zeros);
		return ENOMEM;
	}

	for (i = 0; i < numfiles; i++) {
		err = write_file(files[i], zeros, garbage);
		if (err)
			goto out;
	}

	err = 0;
out:
	free(zeros);
	free(garbage);
	return err;
}
Exemplo n.º 16
0
OOLock * oo_ccsynch_create(){
    CCSynchLock * l = plain_ccsynch_create();
    OOLock * ool = aligned_alloc(CACHE_LINE_SIZE, sizeof(OOLock));
    ool->lock = l;
    ool->m = &CCSYNCH_LOCK_METHOD_TABLE;
    return ool;
}
Exemplo n.º 17
0
bool HapMap::loadHapBinary(const char* filename)
{
    aligned_free(m_data);
    
    std::ifstream f(filename, std::ios::in | std::ios::binary);
    if (!f.good())
    {
        std::cerr << "ERROR: Cannot open file or file not found: " << filename << std::endl;
        return false;
    }
    
    uint64_t check;
    f.read((char*) &check, sizeof(uint64_t));
    if (check != magicNumber)
    {
        std::cerr << "ERROR: Wrong file type: " << filename << ". Expected binary format." << std::endl;
        return false;
    }
    
    f.read((char*) &m_numSnps, sizeof(uint64_t));
    f.read((char*) &m_snpLength, sizeof(uint64_t));
    
    m_snpDataSize = ::bitsetSize<PrimitiveType>(m_snpLength);
    m_snpDataSize64 = ::bitsetSize<uint64_t>(m_snpLength);
    m_snpDataSizeULL = ::bitsetSize<unsigned long long>(m_snpLength);
    m_data = (PrimitiveType*) aligned_alloc(128, m_snpDataSize*m_numSnps*sizeof(PrimitiveType));
    
    for(uint64_t i = 0; i < m_numSnps; ++i)
        f.read((char*) &m_data[i*this->m_snpDataSize], sizeof(uint64_t)*m_snpDataSize64);
    
    return true;
}
Exemplo n.º 18
0
OOLock * oo_mrqd_create(){
    MRQDLock * l = plain_mrqd_create();
    OOLock * ool = aligned_alloc(CACHE_LINE_SIZE, sizeof(OOLock));
    ool->lock = l;
    ool->m = &MRQD_LOCK_METHOD_TABLE;
    return ool;
}
Exemplo n.º 19
0
/**
 * Allocate a new picture in the heap.
 *
 * This function allocates a fake direct buffer in memory, which can be
 * used exactly like a video buffer. The video output thread then manages
 * how it gets displayed.
 */
static int AllocatePicture( picture_t *p_pic )
{
    /* Calculate how big the new image should be */
    size_t i_bytes = 0;
    for( int i = 0; i < p_pic->i_planes; i++ )
    {
        const plane_t *p = &p_pic->p[i];

        if( p->i_pitch < 0 || p->i_lines <= 0 ||
            (size_t)p->i_pitch > (SIZE_MAX - i_bytes)/p->i_lines )
        {
            p_pic->i_planes = 0;
            return VLC_ENOMEM;
        }
        i_bytes += p->i_pitch * p->i_lines;
    }

    uint8_t *p_data = aligned_alloc( 16, i_bytes );
    if( i_bytes > 0 && p_data == NULL )
    {
        p_pic->i_planes = 0;
        return VLC_EGENERIC;
    }

    /* Fill the p_pixels field for each plane */
    p_pic->p[0].p_pixels = p_data;
    for( int i = 1; i < p_pic->i_planes; i++ )
    {
        p_pic->p[i].p_pixels = &p_pic->p[i-1].p_pixels[ p_pic->p[i-1].i_lines *
                                                        p_pic->p[i-1].i_pitch ];
    }

    return VLC_SUCCESS;
}
Exemplo n.º 20
0
void*
operator new[](size_t size,
               size_t alignment,
               size_t alignmentOffset,
               const char* pName,
               int flags,
               unsigned debugFlags,
               const char* file,
               int line)
{
    // #ifndef NDEBUG
    //     fprintf(stderr,
    //             "%zu (alignment: %zu offset: %zu) in %s (flags: %d debug
    //             flags: "
    //             "%u) from file %s:%d\n",
    //             size,
    //             alignment,
    //             alignmentOffset,
    //             pName,
    //             flags,
    //             debugFlags,
    //             file,
    //             line);
    // #endif

    if ((alignmentOffset % alignment) == 0)
        return aligned_alloc(alignment, size);

    return malloc(size);
}
Exemplo n.º 21
0
void* mmap(void* addr, size_t length, 
            int prot,  int flags,
            int fd,    off_t offset)
{
  // invalid or misaligned length
  if (length == 0 || (length & 4095) != 0)
  {
    errno = EINVAL;
    return MAP_FAILED;
  }
  
  // associate some VA space with open file @fd
  // for now just allocate page-aligned on heap
  mmap_entry_t entry;
  entry.addr   = aligned_alloc(4096, length);
  entry.length = length;
  entry.prot   = prot;
  entry.flags  = flags;
  entry.fd     = fd;
  entry.offset = offset;
  
  /// Note: we may have to read a file here to properly create the
  /// in-memory mapping. Unfortunately, this would mean both mean
  /// mmap must be asynch, and also mean we have to implement Poshitx.
  // create the mapping
  _mmap_entries[addr] = entry;
  
  return entry.addr;
}
Exemplo n.º 22
0
object_t *_qcgc_allocate_large(size_t size) {
#if CHECKED
	assert(size >= 1<<QCGC_LARGE_ALLOC_THRESHOLD_EXP);
#endif
	if (UNLIKELY(qcgc_state.cells_since_incmark >
				qcgc_state.incmark_threshold)) {
		if (qcgc_state.incmark_since_sweep == qcgc_state.incmark_to_sweep) {
			qcgc_collect();
		} else {
			qcgc_incmark();
			qcgc_state.incmark_since_sweep++;
		}
	}

	size_t rounded_size = (size + QCGC_ARENA_SIZE - 1) & ~(QCGC_ARENA_SIZE - 1);
	object_t *result = aligned_alloc(QCGC_ARENA_SIZE, rounded_size);
#if QCGC_INIT_ZERO
	memset(result, 0, size);
#endif
	qcgc_hbtable_insert(result);
	result->flags = QCGC_GRAY_FLAG;

	qcgc_state.cells_since_incmark += bytes_to_cells(size);

	return result;
}
Exemplo n.º 23
0
void * mem_alloc_align_func(size_t size,
							size_t align
#if defined(MEMDBG_ON)
							,
							char * file,
							int line
#endif
							)
{
	void * ptr = NULL;
#if defined(MEMDBG_ON)
	ptr = (char *) MEMDBG_alloc_align(size, align, file, line);
#elif HAVE_POSIX_MEMALIGN
	if (posix_memalign(&ptr, align, size)) pexit("posix_memalign");
#elif HAVE_ALIGNED_ALLOC
	/* According to the Linux man page, "size should be a multiple of
	   alignment", whatever they mean with "should"... This does not
	   make any sense whatsoever but we round it up to comply. */
	size = ((size + (align - 1)) / align) * align;
	if (!(ptr = aligned_alloc(align, size))) pexit("aligned_alloc");
#elif HAVE_MEMALIGN
/* Let's just pray this implementation can actually free it */
#if defined(__sparc__) || defined(__sparc) || defined(sparc)                   \
	|| defined(__sparcv9)
	if (!(ptr = memalign(align, size)))
#else
	if (!(ptr = memalign(&ptr, align, size)))
#endif
		perror("memalign");
#elif HAVE___MINGW_ALIGNED_MALLOC
	if (!(ptr = __mingw_aligned_malloc(size, align)))
		perror("__mingw_aligned_malloc");
#elif HAVE__ALIGNED_MALLOC
	if (!(ptr = _aligned_malloc(size, align))) perror("aligned_malloc");

#elif AC_BUILT
#error No suitable aligned alloc found, please report to john-dev mailing list (state your OS details).

/* we need an aligned alloc function for legacy builds */
#elif _ISOC11_SOURCE
	size = ((size + (align - 1)) / align) * align;
	if (!(ptr = aligned_alloc(align, size))) perror("aligned_alloc");
#else
	if (posix_memalign(&ptr, align, size)) perror("posix_memalign");
#endif
	return ptr;
}
Exemplo n.º 24
0
int
main(void)
{
    pstatus_t status;
    void *buf;

    /*
     * Alignment must be sizeof (void *) (word) aligned.
     */
    VERIFY3P(aligned_alloc(sizeof (void *) - 1, 16), ==, NULL);
    VERIFY3S(errno, ==, EINVAL);

    VERIFY3P(aligned_alloc(sizeof (void *) + 1, 16), ==, NULL);
    VERIFY3S(errno, ==, EINVAL);


    VERIFY3P(aligned_alloc(23, 16), ==, NULL);
    VERIFY3S(errno, ==, EINVAL);

    buf = aligned_alloc(sizeof (void *), 16);
    VERIFY3P(buf, !=, NULL);
    free(buf);

    /*
     * Cause ENOMEM
     */
    VERIFY0(proc_get_status(getpid(), &status));
    VERIFY3P(mmap((caddr_t)P2ROUNDUP(status.pr_brkbase +
                                     status.pr_brksize, 0x1000), 0x1000,
                  PROT_READ, MAP_ANON | MAP_FIXED | MAP_PRIVATE, -1, 0),
             !=, (void *)-1);

    for (;;) {
        if (malloc(16) == NULL)
            break;
    }

    for (;;) {
        if (aligned_alloc(sizeof (void *), 16) == NULL)
            break;
    }

    VERIFY3P(aligned_alloc(sizeof (void *), 16), ==, NULL);
    VERIFY3S(errno, ==, ENOMEM);

    return (0);
}
Exemplo n.º 25
0
			void* allocate(size_t size, const char*, const char*, int) override
			{
				void* ret = aligned_alloc(16, size);
				// g_log_info.log("Physics") << "Allocated " << size << " bytes for " << typeName << "
				// from " << filename << "(" << line << ")";
				ASSERT(ret);
				return ret;
			}
Exemplo n.º 26
0
/**
 * Allocates memory aligned to 64B,
 * which is one cache line on intel processors.
 */
double *calloc64ByteAligned(size_t size) {
    void* ptr = aligned_alloc(64 * sizeof(void*), size);
    if(ptr == nullptr) {
        std::cout << "ERROR: Memory allocation failed (" << size << "B)" << std::endl;
    }
    memset(ptr, 0, size);
    return (double*) ptr;
}
Exemplo n.º 27
0
/** @brief Creates an array of the specified size in main RAM. If the second argument is provided, the buffer is initialized with data from that pointer.
 *
 * @param size_in_bytes   Number of bytes to allocate
 * @param host_ptr        Pointer to data which will be copied to the new array. Must point to at least 'size_in_bytes' bytes of data.
 *
 */
inline handle_type  memory_create(vcl_size_t size_in_bytes, const void * host_ptr = NULL)
{
#if  defined(VIENNACL_WITH_AVX2) || defined(VIENNACL_WITH_AVX)
#  ifdef VIENNACL_WITH_POSIX_MEMALIGN
    if (!host_ptr)
    {
      void *mem_ptr;
      if(posix_memalign(&mem_ptr, 32, size_in_bytes))
      {
        std::bad_alloc exception;
        throw exception;
      }
      return handle_type(reinterpret_cast<char*>(mem_ptr), detail::array_deleter<char>());
    }
    void *mem_ptr;
    if(posix_memalign(&mem_ptr, 32, size_in_bytes))
    {
      std::bad_alloc exception;
      throw exception;
    }
    handle_type new_handle(reinterpret_cast<char*>(mem_ptr), detail::array_deleter<char>());
#  else
  // "Note: aligned_alloc not available on all compilers. Consider platform-specific alternatives such as posix_memalign()" => added above 
  if (!host_ptr)
    return handle_type(reinterpret_cast<char*>(aligned_alloc(32, size_in_bytes)), detail::array_deleter<char>());

  handle_type new_handle(reinterpret_cast<char*>(aligned_alloc(32, size_in_bytes)), detail::array_deleter<char>());
#endif
#else
  if (!host_ptr)
    return handle_type(new char[size_in_bytes], detail::array_deleter<char>());

  handle_type new_handle(new char[size_in_bytes], detail::array_deleter<char>());
#endif

  // copy data:
  char * raw_ptr = new_handle.get();
  const char * data_ptr = static_cast<const char *>(host_ptr);
#ifdef VIENNACL_WITH_OPENMP
    #pragma omp parallel for
#endif
  for (long i=0; i<long(size_in_bytes); ++i)
    raw_ptr[i] = data_ptr[i];

  return new_handle;
}
Exemplo n.º 28
0
/**
 * Allocate an element from the requested pool.
 * This function uses the feature that pointers to void and char are
 * interchangeable.
 * 1. If no current block or current block exhausted - obtain another one
 *    and chain it the block chain. Else reuse an LRU unused block;
 *    The element pointer is aligned to the required alignment.
 * 2. Zero the block if required;
 * 3. Return element pointer.
 */
void *pool_alloc(Pool_desc *mp)
{
#ifdef POOL_FREE
	if (NULL != mp->free_list)
	{
		void *alloc_next = mp->free_list;
		mp->free_list = *(char **)mp->free_list;
		if (mp->zero_out) memset(alloc_next, 0, mp->element_size);
		return alloc_next;
	}
#endif // POOL_FREE

	mp->curr_elements++; /* For stats. */

	if ((NULL == mp->alloc_next) || (mp->alloc_next == mp->ring + mp->data_size))
	{
		assert(!mp->exact || (NULL == mp->alloc_next),
				 "Too many elements %zu>%zu (pool '%s' created in %s())",
				 mp->curr_elements, mp->num_elements, mp->name, mp->func);

		/* No current block or current block exhausted - obtain another one. */
		char *prev = mp->ring; /* Remember current block for possible chaining. */
		if (NULL != mp->ring)
		{
			/* Next block already exists. */
			mp->ring = POOL_NEXT_BLOCK(mp->ring, mp->data_size);
		}

		if (NULL == mp->ring)
		{
			/* Allocate a new block and chain it. */
			mp->ring = aligned_alloc(mp->alignment, mp->block_size);
			if (NULL == mp->ring)
			{
				/* aligned_alloc() has strict requirements. */
				char errbuf[64];
				strerror_r(errno, errbuf, sizeof(errbuf));
				assert(NULL != mp->ring, "Block/element sizes %zu/%zu: %s",
				       mp->block_size, mp->element_size, errbuf);
			}
			if (NULL == mp->alloc_next)
				mp->chain = mp->ring; /* This is the start of the chain. */
			else
				POOL_NEXT_BLOCK(prev, mp->data_size) = mp->ring;
			POOL_NEXT_BLOCK(mp->ring, mp->data_size) = NULL;
			//printf("New ring %p next %p\n", mp->ring,
			       //POOL_NEXT_BLOCK(mp->ring, mp->data_size));
		} /* Else reuse existing block. */

		if (mp->zero_out) memset(mp->ring, 0, mp->data_size);
		mp->alloc_next = mp->ring;
	}

	/* Grab a new element. */
	void *alloc_next = mp->alloc_next;
	mp->alloc_next += mp->element_size;
	return alloc_next;
}
Exemplo n.º 29
0
void run(
	const testcase *tests, size_t tests_size,
	std::vector<std::pair<const char*,func_t>> f_intersection,
	std::vector<std::pair<const char*,func_t>> f_union,
	std::vector<std::pair<const char*,func_t>> f_difference
){
	//for(const auto &t : tests){
	for(size_t i=0; i<tests_size; ++i){
		const auto &t = tests[i];
		uint32_t *res = (uint32_t*)aligned_alloc(64, (t.size1+t.size2)*sizeof(uint32_t));
		for(const auto &f : f_intersection){
			size_t size_res = f.second(t.list1, t.size1, t.list2, t.size2, res);
			if(!equivalent(res, size_res, t.res_intersection, t.size_intersection)){
				//TODO
				printf("test \"%s\", intersection \"%s\" wrong\nlist1 : ", t.name, f.first);
				for(size_t i=0; i<t.size1; ++i) printf("%i, ", t.list1[i]);
				printf("\nlist2 : ");
				for(size_t i=0; i<t.size2; ++i) printf("%i, ", t.list2[i]);
				printf("\nresult: ");
				for(size_t i=0; i<size_res; ++i) printf("%i, ", res[i]);
				printf("\nexpect: ");
				for(size_t i=0; i<t.size_intersection; ++i) printf("%i, ", t.res_intersection[i]);
				printf("\n");
			}
		}
		for(const auto &f : f_union){
			size_t size_res = f.second(t.list1, t.size1, t.list2, t.size2, res);
			if(!equivalent(res, size_res, t.res_union, t.size_union)){
				//TODO
				printf("test \"%s\", union \"%s\" wrong\nlist1 : ", t.name, f.first);
				for(size_t i=0; i<t.size1; ++i) printf("%i, ", t.list1[i]);
				printf("\nlist2 : ");
				for(size_t i=0; i<t.size2; ++i) printf("%i, ", t.list2[i]);
				printf("\nresult: ");
				for(size_t i=0; i<size_res; ++i) printf("%i, ", res[i]);
				printf("\nexpect: ");
				for(size_t i=0; i<t.size_union; ++i) printf("%i, ", t.res_union[i]);
				printf("\n");
			}
		}
		for(const auto &f : f_difference){
			size_t size_res = f.second(t.list1, t.size1, t.list2, t.size2, res);
			if(!equivalent(res, size_res, t.res_difference, t.size_difference)){
				//TODO
				printf("test \"%s\", difference \"%s\" wrong\nlist1 : ", t.name, f.first);
				for(size_t i=0; i<t.size1; ++i) printf("%i, ", t.list1[i]);
				printf("\nlist2 : ");
				for(size_t i=0; i<t.size2; ++i) printf("%i, ", t.list2[i]);
				printf("\nresult: ");
				for(size_t i=0; i<size_res; ++i) printf("%i, ", res[i]);
				printf("\nexpect: ");
				for(size_t i=0; i<t.size_difference; ++i) printf("%i, ", t.res_difference[i]);
				printf("\n");
			}
		}
		free(res);
	}
}
Exemplo n.º 30
0
void bigfl_calc(void){
    float *val = (float*)aligned_alloc(16,1024*1024*1024*sizeof(float));
    
    for(uint32_t i=0; i < 1024*1024*1024; i++){
        val[i] = (i*i)/sqrt(i);
    }
    free(val);
    printf("\r\n"); 
}