aligned_array(aligned_array const & rhs)
 {
     elems = (T*)malloc_aligned(N * sizeof(T));
     for (int i = 0; i != N; ++i)
         new(elems+i) T();
     operator=(rhs);
 }
예제 #2
0
void CAudBuffer::Alloc(const size_t size)
{
    DeAlloc();
    m_buffer = (uint8_t*)malloc_aligned(size, 16);
    m_buffer_size = size;
    m_buffer_pos = 0;
}
예제 #3
0
파일: mda.cpp 프로젝트: magland/mountainlab
void* allocate(bigint nbytes)
{
#ifdef USE_SSE2
    return malloc_aligned(16, nbytes);
#else
    return malloc(nbytes);
#endif
}
static double evaluatePartialGTRCAT(int i, double ki, int counter,  traversalInfo *ti, double qz,
                                    int w, double *EIGN, double *EI, double *EV,
                                    double *tipVector, unsigned  char **yVector,
                                    int branchReference, int mxtips)
{
    double lz, term;
    double  d[3];
    double   *x1, *x2;
    int scale = 0, k;
    double *lVector = (double *)malloc_aligned(sizeof(double) * 4 * mxtips);

    traversalInfo *trav = &ti[0];

    assert(isTip(trav->pNumber, mxtips));

    x1 = &(tipVector[4 *  yVector[trav->pNumber][i]]);

    for(k = 1; k < counter; k++)
    {
        double
        qz = ti[k].qz[branchReference],
        rz = ti[k].rz[branchReference];

        qz = (qz > zmin) ? log(qz) : log(zmin);
        rz = (rz > zmin) ? log(rz) : log(zmin);

        computeVectorGTRCAT(lVector, &scale, ki, i, qz, rz, &ti[k],
                            EIGN, EI, EV,
                            tipVector, yVector, mxtips);
    }

    x2 = &lVector[4 * (trav->qNumber - mxtips)];

    assert(0 <=  (trav->qNumber - mxtips) && (trav->qNumber - mxtips) < mxtips);

    if(qz < zmin)
        lz = zmin;
    lz  = log(qz);
    lz *= ki;

    d[0] = EXP (EIGN[1] * lz);
    d[1] = EXP (EIGN[2] * lz);
    d[2] = EXP (EIGN[3] * lz);

    term =  x1[0] * x2[0];
    term += x1[1] * x2[1] * d[0];
    term += x1[2] * x2[2] * d[1];
    term += x1[3] * x2[3] * d[2];

    term = LOG(FABS(term)) + (scale * LOG(minlikelihood));

    term = term * w;

    free(lVector);

    return  term;
}
void *alloc_aligned_buffer(uint32_t size) {

    void *ptr = NULL;

    if (size) {
        // Alloc the buffer using the aligned version.
        ptr = malloc_aligned(size, MALLOC_ALIGN);
    }
 
    return ptr;
}
예제 #6
0
int32_t	ZXImgEnhanceProcess(int32_t Handle,uint8_t* pData,int32_t nImgW,int32_t nImgH)
{
	ZXImgEEFilter* pFilter = (ZXImgEEFilter*)Handle;
	int32_t ret;
	uint8_t	*p_data;
	//#0 check the data safty
	if((nImgW&0xF)||(nImgH&7))
		return ZXIMGCORE_PARA_ERR;
	//#1 color convert
	if(pFilter->format!=IMGEE_FORMAT_YUVNV21)
	{
		if(pFilter->format==IMGEE_FORMAT_S1)
			ret = nImgW * nImgH * 3 / 2;
		if(pFilter->size != ret)
		{
			if(pFilter->p_src)
			{
				free_aligned(pFilter->p_src);
				pFilter->p_src = NULL;
			}
			pFilter->p_src = (uint8_t*)malloc_aligned(ret,32);
			if(!pFilter->p_src)
				goto _error;
		}
		s1format_2_yuvnv12(pData,pFilter->p_src,nImgW,nImgH);
		p_data = pFilter->p_src;
	}
	else
		p_data = pData;

	//#2 Filter
	if(pFilter->pGFltHandle)
	{
		ret = sndaGuidedFilterProcess(pFilter->pGFltHandle,p_data,p_data,nImgW,nImgH);
		if(ret != GUIDED_FILTER_OK)
			return ZXIMGCORE_FAILED;
	}
	
	if(pFilter->pCFltHandle)
	{
		ret = sndaClaheFilterProcess(pFilter->pCFltHandle,p_data,p_data,nImgW,nImgH);
		if(ret != CLAHE_FILTER_OK)
			return ZXIMGCORE_FAILED;
	}
	//#1 color convert
	if(pFilter->format!=IMGEE_FORMAT_YUVNV21)
	{
		yuvnv12_2_s1format(p_data,pData,nImgW,nImgH);
	}

    return ZXIMGCORE_OK;
_error:
	return ZXIMGCORE_FAILED;
}
예제 #7
0
void CAudBuffer::ReAlloc(const size_t size)
{
    uint8_t* buffer = (uint8_t*)malloc_aligned(size, 16);
    if (m_buffer) {
        size_t copy = std::min(size, m_buffer_size);
        memcpy(buffer, m_buffer, copy);
        free_aligned(m_buffer);
    }
    m_buffer = buffer;

    m_buffer_size = size;
    m_buffer_pos = std::min(m_buffer_pos, m_buffer_size);
}
예제 #8
0
/*
Checks if the device is a photo frame by reading the first 512 bytes and
comparing against the known string that's there
*/
int is_photoframe(int f) {
    int y,res;
    char id[]="SITRONIX CORP.";
    char *buff;
    buff=malloc_aligned(0x200);
    lseek(f,0x0,SEEK_SET);
    y=read(f,buff,0x200);
    buff[15]=0;
//    fprintf(stderr,"ID=%s\n",buff);
    res=strcmp(buff,id)==0?1:0;
    free_aligned(buff,0x200);
    return res;
}
static double evaluatePartialGTRCATPROT(int i, double ki, int counter,  traversalInfo *ti, double qz,
					int w, double *EIGN, double *EI, double *EV,
					double *tipVector, unsigned char **yVector, 
					int branchReference, int mxtips)
{
  double lz, term;       
  double  d[20];
  double   *x1, *x2; 
  int scale = 0, k, l;
  double *lVector = (double *)malloc_aligned(sizeof(double) * 20 * mxtips);

  traversalInfo *trav = &ti[0];

  assert(isTip(trav->pNumber, mxtips));
     
  x1 = &(tipVector[20 *  yVector[trav->pNumber][i]]);   

  for(k = 1; k < counter; k++)                
    computeVectorGTRCATPROT(lVector, &scale, ki, i, ti[k].qz[branchReference], ti[k].rz[branchReference], 
			    &ti[k], EIGN, EI, EV, 
			    tipVector, yVector, mxtips);       
   
  x2 = &lVector[20 * (trav->qNumber - mxtips)];

       

  assert(0 <=  (trav->qNumber - mxtips) && (trav->qNumber - mxtips) < mxtips);  
  
  if(qz < zmin) 
    lz = zmin;
  lz  = log(qz); 
  lz *= ki;
  
  d[0] = 1.0;
  for(l = 1; l < 20; l++)
    d[l] = EXP (EIGN[l-1] * lz);

  term = 0.0;
  
  for(l = 0; l < 20; l++)
    term += x1[l] * x2[l] * d[l];   

  term = LOG(FABS(term)) + (scale * LOG(minlikelihood));   

  term = term * w;

  free(lVector);
  

  return  term;
}
예제 #10
0
static void* read_dds(el_file_ptr file, DdsHeader *header,
	const Uint32 strip_mipmaps, const Uint32 base_level)
{
	Uint8* dst;
	Uint32 size, offset;

	size = get_dds_size(header, 0, strip_mipmaps, base_level);
	offset = get_dds_offset(header, base_level);

	dst = malloc_aligned(size, 16);

	memcpy(dst, el_get_pointer(file) + sizeof(DdsHeader) + offset + 4, size);

	return dst;
}
예제 #11
0
int sendcmd(int f,int cmd, unsigned int arg1, unsigned int arg2, unsigned char arg3) {
    unsigned char *buff;
    buff=malloc_aligned(0x200);
    buff[0]=cmd;
    buff[1]=(arg1>>24)&0xff;
    buff[2]=(arg1>>16)&0xff;
    buff[3]=(arg1>>8)&0xff;
    buff[4]=(arg1>>0)&0xff;
    buff[5]=(arg2>>24)&0xff;
    buff[6]=(arg2>>16)&0xff;
    buff[7]=(arg2>>8)&0xff;
    buff[8]=(arg2>>0)&0xff;
    buff[9]=(arg3);
    lseek(f,POS_CMD,SEEK_SET);
    return write(f,buff,0x200);
}
예제 #12
0
static void* unpack_dds(el_file_ptr file, DdsHeader *header,
	const Uint32 strip_mipmaps, const Uint32 base_level)
{
	Uint8* dest;
	Uint32 size, offset, bpp;

	size = get_dds_size(header, 0, strip_mipmaps, base_level);
	offset = get_dds_offset(header, base_level);
	bpp = header->m_pixel_format.m_bit_count / 8;

	dest = malloc_aligned(size, 16);

	fast_unpack(el_get_pointer(file) + sizeof(DdsHeader) + offset + 4, size / bpp,
		header->m_pixel_format.m_red_mask,
		header->m_pixel_format.m_green_mask,
		header->m_pixel_format.m_blue_mask,
		header->m_pixel_format.m_alpha_mask, dest);

	return dest;
}
예제 #13
0
struct tw_hyperloglog *tw_hyperloglog_new(uint8_t precision)
{
  if (precision < TW_HLL_MIN_PRECISION || precision > TW_HLL_MAX_PRECISION) {
    return NULL;
  }

  struct tw_hyperloglog *hll = calloc(1, sizeof(struct tw_hyperloglog));
  if (!hll) {
    return NULL;
  }

  size_t alloc_size = TW_ALLOC_TO_CACHELINE(1 << precision) * sizeof(uint8_t);

  if ((hll->registers = malloc_aligned(TW_CACHELINE, alloc_size)) == NULL) {
    free(hll);
    return NULL;
  }

  memset(hll->registers, 0, alloc_size);
  hll->precision = precision;

  return hll;
}
예제 #14
0
static inline QUEUE* svc_queue_create(unsigned int block_size, unsigned int blocks_count, unsigned int align)
{
	QUEUE* queue = NULL;
	int i;
	//first, try to allocate space for queue data. In thread's current mempool
	unsigned int align_offset = sizeof(DLIST);
	if (align > align_offset)
		align_offset = align;
	void* mem_block = malloc_aligned(blocks_count * (block_size + align_offset), align_offset);
	if (mem_block)
	{
		queue = sys_alloc(sizeof(QUEUE));
		if (queue != NULL)
		{
			queue->align_offset = align_offset;
			queue->mem_block = mem_block;
			queue->pull_waiters = NULL;
			queue->push_waiters = NULL;
			DO_MAGIC(queue, MAGIC_QUEUE);
			//set all as free
			queue->free_blocks = NULL;
			queue->filled_blocks = NULL;
			for (i = 0; i < blocks_count; ++i)
				dlist_add_tail(&queue->free_blocks, (DLIST*)((unsigned int)mem_block + i * (block_size + align_offset)));
		}
		else
		{
			free(mem_block);
			fatal_error(ERROR_MEM_OUT_OF_SYSTEM_MEMORY, QUEUE_NAME);
		}
	}
	else
		error(ERROR_MEM_OUT_OF_HEAP, svc_thread_name(svc_thread_get_current()));

	return queue;
}
예제 #15
0
파일: common.cpp 프로젝트: krysanto/desmume
void* malloc_aligned16(size_t length)
{
	return malloc_aligned(length, 16);
}
예제 #16
0
static void insertHashRF(unsigned int *bitVector, hashtable *h, unsigned int vectorLength, int treeNumber, int treeVectorLength, hashNumberType position, int support, 
			 boolean computeWRF)
{     
  if(h->table[position] != NULL)
    {
      entry *e = h->table[position];     

      do
	{	 
	  unsigned int i;
	  
	  for(i = 0; i < vectorLength; i++)
	    if(bitVector[i] != e->bitVector[i])
	      break;
	  
	  if(i == vectorLength)
	    {
	      e->treeVector[treeNumber / MASK_LENGTH] |= mask32[treeNumber % MASK_LENGTH];
	      if(computeWRF)
		{
		  e->supportVector[treeNumber] = support;
		 
		  assert(0 <= treeNumber && treeNumber < treeVectorLength * MASK_LENGTH);
		}
	      return;
	    }
	  
	  e = e->next;
	}
      while(e != (entry*)NULL); 

      e = initEntry(); 
       
      /*e->bitVector  = (unsigned int*)calloc(vectorLength, sizeof(unsigned int));*/
      e->bitVector = (unsigned int*)malloc_aligned(vectorLength * sizeof(unsigned int));
      memset(e->bitVector, 0, vectorLength * sizeof(unsigned int));


      e->treeVector = (unsigned int*)calloc(treeVectorLength, sizeof(unsigned int));
      if(computeWRF)
	e->supportVector = (int*)calloc(treeVectorLength * MASK_LENGTH, sizeof(int));

      e->treeVector[treeNumber / MASK_LENGTH] |= mask32[treeNumber % MASK_LENGTH];
      if(computeWRF)
	{
	  e->supportVector[treeNumber] = support;
	 
	  assert(0 <= treeNumber && treeNumber < treeVectorLength * MASK_LENGTH);
	}

      memcpy(e->bitVector, bitVector, sizeof(unsigned int) * vectorLength);
     
      e->next = h->table[position];
      h->table[position] = e;          
    }
  else
    {
      entry *e = initEntry(); 
       
      /*e->bitVector  = (unsigned int*)calloc(vectorLength, sizeof(unsigned int)); */

      e->bitVector = (unsigned int*)malloc_aligned(vectorLength * sizeof(unsigned int));
      memset(e->bitVector, 0, vectorLength * sizeof(unsigned int));

      e->treeVector = (unsigned int*)calloc(treeVectorLength, sizeof(unsigned int));
      if(computeWRF)	
	e->supportVector = (int*)calloc(treeVectorLength * MASK_LENGTH, sizeof(int));


      e->treeVector[treeNumber / MASK_LENGTH] |= mask32[treeNumber % MASK_LENGTH];
      if(computeWRF)
	{
	  e->supportVector[treeNumber] = support;
	 
	  assert(0 <= treeNumber && treeNumber < treeVectorLength * MASK_LENGTH);
	}

      memcpy(e->bitVector, bitVector, sizeof(unsigned int) * vectorLength);     

      h->table[position] = e;
    }

  h->entryCount =  h->entryCount + 1;
}
예제 #17
0
파일: memory.c 프로젝트: ajkxyz/veles.simd
float *mallocf(size_t length) {
  return malloc_aligned(length * sizeof(float));
}
static double evaluatePartialGTRGAMMAPROT(int i, int counter,  traversalInfo *ti, double qz,
        int w, double *EIGN, double *EI, double *EV,
        double *tipVector, unsigned char **yVector,
        double *gammaRates,
        int branchReference, int mxtips)
{
    double lz, term;
    double  d[80];
    double   *x1, *x2;
    int scale = 0, k, l, j;
    double
    *lVector = (double *)malloc_aligned(sizeof(double) * 80 * mxtips),
     myEI[400]  __attribute__ ((aligned (BYTE_ALIGNMENT)));

    traversalInfo
    *trav = &ti[0];

    for(k = 0; k < 20; k++)
    {
        for(l = 0; l < 20; l++)
            myEI[k * 20 + l] = EI[k * 20 + l];
    }

    assert(isTip(trav->pNumber, mxtips));

    x1 = &(tipVector[20 *  yVector[trav->pNumber][i]]);

    for(k = 1; k < counter; k++)
    {
        double
        qz = ti[k].qz[branchReference],
        rz = ti[k].rz[branchReference];

        qz = (qz > zmin) ? log(qz) : log(zmin);
        rz = (rz > zmin) ? log(rz) : log(zmin);

        computeVectorGTRGAMMAPROT(lVector, &scale, gammaRates, i, qz, rz,
                                  &ti[k], EIGN, myEI, EV,
                                  tipVector, yVector, mxtips);
    }

    x2 = &lVector[80 * (trav->qNumber - mxtips)];

    assert(0 <=  (trav->qNumber - mxtips) && (trav->qNumber - mxtips) < mxtips);

    if(qz < zmin)
        lz = zmin;
    lz  = log(qz);

    for(j = 0; j < 4; j++)
    {
        d[20 * j] = 1.0;
        for(l = 1; l < 20; l++)
            d[20 * j + l] = EXP(EIGN[l] * lz * gammaRates[j]);
    }


    for(j = 0, term = 0.0; j < 4; j++)
    {
        for(l = 0; l < 20; l++)
            term += x1[l] * x2[20 * j + l] * d[j * 20 + l];
    }

    term = LOG(0.25 * FABS(term)) + (scale * LOG(minlikelihood));

    term = term * w;

    free(lVector);


    return  term;
}
예제 #19
0
long
shvpu_driver_init(shvpu_driver_t **ppDriver)
{
	long ret = 0;
	unsigned long reg_base;
	int zero = 0;

	pthread_mutex_lock(&initMutex);

	/* pass the pointer if the driver was already initialized */
	if (nCodecInstances > 0)
		goto init_already;

	/*** workaround clear VP5_IRQ_ENB and VPU5_IRQ_STA ***/
	reg_base = uio_register_base();
	vpu5_mmio_write(reg_base + VP5_IRQ_ENB, (unsigned long) &zero, 1);
	vpu5_mmio_write(reg_base + VP5_IRQ_STA, (unsigned long) &zero, 1);

	pDriver = (shvpu_driver_t *)calloc(1, sizeof(shvpu_driver_t));
	if (pDriver == NULL) {
		ret = -1;
		goto init_failed;
	}
	memset((void *)pDriver, 0, sizeof(shvpu_driver_t));

	/*** initialize vpu ***/
#if defined(VPU5HA_SERIES)
	pDriver->wbufVpu5.work_size = MCIPH_IP0_WORKAREA_SIZE;
#elif defined(VPU_VERSION_5)
	pDriver->wbufVpu5.work_size = MCIPH_HG_WORKAREA_SIZE;
#endif
	pDriver->wbufVpu5.work_area_addr =
		malloc_aligned(pDriver->wbufVpu5.work_size, 4);
	logd("work_area_addr = %p\n", pDriver->wbufVpu5.work_area_addr);
	if ((pDriver->wbufVpu5.work_area_addr == NULL) ||
	    ((unsigned int)pDriver->wbufVpu5.work_area_addr & 0x03U)) {
		ret = -1;
		goto init_failed;
	}

	pDriver->vpu5Init.vpu_base_address		= uio_register_base();
	pDriver->vpu5Init.vpu_image_endian		= MCIPH_LIT;
	pDriver->vpu5Init.vpu_stream_endian		= MCIPH_LIT;
	pDriver->vpu5Init.vpu_firmware_endian		= MCIPH_LIT;
	pDriver->vpu5Init.vpu_interrupt_enable		= MCIPH_ON;
	pDriver->vpu5Init.vpu_clock_supply_control	= MCIPH_CLK_CTRL;
#ifdef VPU_INTERNAL_TL
	pDriver->vpu5Init.vpu_constrained_mode		= MCIPH_VPU_TL;
#else
	pDriver->vpu5Init.vpu_constrained_mode		= MCIPH_OFF;
#endif
	pDriver->vpu5Init.vpu_address_mode		= MCIPH_ADDR_32BIT;
	pDriver->vpu5Init.vpu_reset_mode		= MCIPH_RESET_SOFT;
#if defined(VPU5HA_SERIES)
	pDriver->vpu5Init.vpu_version			= MCIPH_NA;
	pDriver->vpu5Init.vpu_ext_init			= &(pDriver->ip0Init);

#ifdef DECODER_COMPONENT
#ifdef MPEG4_DECODER
	pDriver->ip0Init.dec_tbl[0] = &mciph_ip0_m4vdec_api_tbl;
	pDriver->ip0Init.dec_tbl[1] = &mciph_ip0_m4vdec_api_tbl;
#endif
	pDriver->ip0Init.dec_tbl[2] = &mciph_ip0_avcdec_api_tbl;
	pDriver->apiTbl.dec_api_tbl 	= &mciph_ip0_dec_api_tbl;
#endif
#ifdef ENCODER_COMPONENT
	pDriver->ip0Init.enc_tbl[2] = &mciph_ip0_avcenc_api_tbl;
	pDriver->apiTbl.enc_api_tbl 	= &mciph_ip0_enc_api_tbl;
#endif

	pDriver->apiTbl.cmn_api_tbl 	= &mciph_ip0_cmn_api_tbl;
#if defined(VPU_VERSION_5HD)
	pDriver->ip0Init.drv_extensions = 0x3;
#endif
#elif defined(VPU_VERSION_5)
	memcpy(&(pDriver->apiTbl), &mciph_hg_api_tbl, sizeof(mciph_hg_api_tbl));
#endif
	logd("----- invoke mciph_vpu5Init() -----\n");
	ret = mciph_vpu5_init(&(pDriver->wbufVpu5),
			      &(pDriver->apiTbl),
			      &(pDriver->vpu5Init),
			      &(pDriver->pDrvInfo));
	logd("----- resume from mciph_vpu5_init() -----\n");

	if (ret != MCIPH_NML_END)
		goto init_failed;

	/* register an interrupt handler */
	tsem_init(&pDriver->uioSem, 0);
	ret = uio_create_int_handle(&pDriver->intrHandler,
				    handle_shvpu5_interrupt,
				    pDriver->pDrvInfo,
				    &pDriver->uioSem, &pDriver->isExit);
	if (ret < 0)
		goto init_failed;

init_already:
	*ppDriver = pDriver;
	nCodecInstances++;
init_failed:
	pthread_mutex_unlock(&initMutex);
	return ret;
}
예제 #20
0
파일: veth.c 프로젝트: AjayMashi/x-tier
static int veth_init(void)
{
	char *mac_addr = snk_module_interface->mac_addr;
	union ibmveth_buf_desc rxq_desc;
	unsigned long rx_queue_len = sizeof(struct ibmveth_rx_q_entry) *
		RX_QUEUE_SIZE;
	unsigned int i;
	long rc;

	dprintk("veth_init(%02x:%02x:%02x:%02x:%02x:%02x)\n",
		mac_addr[0], mac_addr[1], mac_addr[2],
		mac_addr[3], mac_addr[4], mac_addr[5]);

	if (snk_module_interface->running != 0)
		return 0;

	cur_rx_toggle = IBMVETH_RXQ_TOGGLE;
	cur_rx_index = 0;
	buffer_list = malloc_aligned(8192, 4096);
	filter_list = buffer_list + 4096;
	rx_queue = malloc_aligned(rx_queue_len, 16);
	rx_bufs = malloc(2048 * RX_QUEUE_SIZE + 4);
	if (!buffer_list || !filter_list || !rx_queue || !rx_bufs) {
		printk("veth: Failed to allocate memory !\n");
		goto fail;
	}
	rx_bufs_aligned = (uint64_t *)(((uint64_t)rx_bufs | 3) + 1);
	rxq_desc.fields.address = vaddr_to_dma(rx_queue);
	rxq_desc.fields.flags_len = IBMVETH_BUF_VALID | rx_queue_len;

	rc = h_register_logical_lan(g_reg,
				    vaddr_to_dma(buffer_list),
				    rxq_desc.desc,
				    vaddr_to_dma(filter_list),
				    (*(uint64_t *)mac_addr) >> 16);
	if (rc != H_SUCCESS) {
		printk("veth: Error %ld registering interface !\n", rc);
		goto fail;
	}
	for (i = 0; i < RX_QUEUE_SIZE; i++) {
		uint64_t *buf = veth_get_rx_buf(i);
		union ibmveth_buf_desc desc;
		*buf = (uint64_t)buf;
		desc.fields.address = vaddr_to_dma(buf);
		desc.fields.flags_len = IBMVETH_BUF_VALID | RX_BUF_SIZE;
		h_add_logical_lan_buffer(g_reg, desc.desc);
	}

	snk_module_interface->running = 1;

	return 0;
 fail:
	if (filter_list)
		free(filter_list);
	if (buffer_list)
		free(buffer_list);
	if (rx_queue)
		free(rx_queue);
	if (rx_bufs)
		free(rx_bufs);
	return -1;
}
예제 #21
0
int ZXImgEnhanceInit(int* p_handle,int format,int nMode)
{
    ZXImgEEFilter* pFilter = NULL;
	int32_t	ret = ZXIMGCORE_OK, eps0_y, eps1_y, eps0_uv, eps1_uv;
    float_t         fSaturation, fContrast;
    double_t        lumda0_y, lumda1_y, lumda0_uv, lumda1_uv;

	//#0 check the nMode parameter
	*p_handle = 0;
	if(!IMGEE_IS_VALID(nMode))
		return	ZXIMGCORE_PARA_ERR;
	if(((nMode&IMGEE_MODE_AUTO)==0)&&
		((nMode&IMGEE_MODE_HDR)==0)&&
		((nMode&IMGEE_MODE_BEAUTY)==0)&&
		((nMode&IMGEE_MODE_SHARPEN)==0)&&
		((nMode&IMGEE_MODE_DENOISE)==0)&&
		((nMode&IMGEE_MODE_SHARPEN_EX)==0))
		return ZXIMGCORE_PARA_ERR;
	//#1 malloc memory space 
	pFilter = (ZXImgEEFilter*)malloc_aligned(sizeof(ZXImgEEFilter),4);
	if(pFilter==NULL)
	{
		ret = ZXIMGCOER_MEM_ERR;
		goto _error;
	}
	memset(pFilter,0,sizeof(ZXImgEEFilter));
	//#1.1 malloc memory space for CLAHE
	if((nMode&IMGEE_MODE_AUTO)||(nMode&IMGEE_MODE_HDR)||(nMode&IMGEE_MODE_BEAUTY))
	{
        switch(IMGEE_GET_MODE(nMode))
        {
        case IMGEE_MODE_AUTO:
            fSaturation = 1.6f;
            fContrast   = 1.7f;
            break;
        case IMGEE_MODE_HDR:
            fSaturation = 1.6f;
            fContrast   = 2.5f;
            break;
        case IMGEE_MODE_BEAUTY:
            fSaturation = 1.5f;
            fContrast   = 1.7f;
            break;
        default:
            fSaturation = 1.6f;
            fContrast   = 1.7f;
            break;
        };

        ret = sndaClaheFilterInit(&pFilter->pCFltHandle,4,4,256,
                                fSaturation, fContrast,1);
		if(ret!=CLAHE_FILTER_OK)
		{
			ret = ZXIMGCORE_INITCLAHE_ERR;
			goto _error;
		}
	}
    //1.2 malloc memory for Guided Filter
	if(!(IMGEE_IS_FAST(nMode)))
	{
		switch(IMGEE_GET_MODE(nMode))
		{
			case IMGEE_MODE_AUTO:
            
					eps0_y  = 8;    lumda0_y  = 1.00;
					eps1_y  = 8192; lumda1_y  = 1.65;
					eps0_uv = 64;   lumda0_uv = 0.25;
					eps1_uv = 8192; lumda1_uv = 1.00;
					break;
			case IMGEE_MODE_HDR:
					eps0_y  = 32;   lumda0_y  = 0.50;
					eps1_y  = 8192; lumda1_y  = 1.50;
					eps0_uv = 64;   lumda0_uv = 0.25;
					eps1_uv = 8192; lumda1_uv = 1.00;
					break;
			case IMGEE_MODE_BEAUTY:
					eps0_y  = 128;  lumda0_y  = 0.50;
					eps1_y  = 8192; lumda1_y  = 1.75;  //1.75
					eps0_uv = 128;  lumda0_uv = 0.00;
					eps1_uv = 8192; lumda1_uv = 1.50;  //1.5
					break;
			default: // same as auto
					eps0_y  = 8;    lumda0_y  = 1.00;
					eps1_y  = 8192; lumda1_y  = 1.50;
					eps0_uv = 64;   lumda0_uv = 0.25;
					eps1_uv = 8192; lumda1_uv = 1.00;
					break;
		}
		ret = sndaGuidedFilterInit(&pFilter->pGFltHandle,eps0_y,eps1_y,eps0_uv, eps1_uv,
									(float_t)lumda0_y,(float_t)lumda1_y,(float_t)lumda0_uv,(float_t)lumda1_uv,2);
		if(ret != GUIDED_FILTER_OK)
		{
			ret = ZXIMGCORE_INITGUIDED_ERR;
			goto _error;
		}
	}
	pFilter->format = format;
	*p_handle = (int32_t)pFilter;
    return ZXIMGCORE_OK;

_error:
	if(pFilter)
	{
		if(pFilter->pGFltHandle)
		{
			sndaGuidedFilterRelease(pFilter->pGFltHandle);
			pFilter->pGFltHandle = 0;
		}
		if(pFilter->pCFltHandle)
		{
			sndaClaheFilterRelease(pFilter->pCFltHandle);
			pFilter->pCFltHandle = 0;
		}
		free_aligned(pFilter);
		pFilter = NULL;
	}
	return ret;
}
예제 #22
0
OMX_ERRORTYPE COMXCoreComponent::AllocOutputBuffers(bool use_buffers /* = false */)
{
    OMX_ERRORTYPE omx_err = OMX_ErrorNone;

    if(!m_handle)
        return OMX_ErrorUndefined;

    m_omx_output_use_buffers = use_buffers;

    OMX_PARAM_PORTDEFINITIONTYPE portFormat;
    OMX_INIT_STRUCTURE(portFormat);
    portFormat.nPortIndex = m_output_port;

    omx_err = OMX_GetParameter(m_handle, OMX_IndexParamPortDefinition, &portFormat);
    if(omx_err != OMX_ErrorNone)
        return omx_err;

    if(GetState() != OMX_StateIdle) {
        if(GetState() != OMX_StateLoaded)
            SetStateForComponent(OMX_StateLoaded);

        SetStateForComponent(OMX_StateIdle);
    }

    omx_err = EnablePort(m_output_port, false);
    if(omx_err != OMX_ErrorNone)
        return omx_err;

    m_output_alignment = portFormat.nBufferAlignment;
    m_output_buffer_count = portFormat.nBufferCountActual;
    m_output_buffer_size = portFormat.nBufferSize;

    Logger::LogOut(LOG_LEVEL_DEBUG, "COMXCoreComponent::AllocOutputBuffers component(%s) - port(%d), nBufferCountMin(%u), nBufferCountActual(%u), nBufferSize(%u) nBufferAlignmen(%u)",
            m_componentName.c_str(), m_output_port, portFormat.nBufferCountMin,
            portFormat.nBufferCountActual, portFormat.nBufferSize, portFormat.nBufferAlignment);

    for (size_t i = 0; i < portFormat.nBufferCountActual; i++) {
        OMX_BUFFERHEADERTYPE *buffer = NULL;
        OMX_U8* data = NULL;

        if(m_omx_output_use_buffers) {
            data = (OMX_U8*)malloc_aligned(portFormat.nBufferSize, m_output_alignment);
            omx_err = OMX_UseBuffer(m_handle, &buffer, m_output_port, NULL, portFormat.nBufferSize, data);
        }
        else {
            omx_err = OMX_AllocateBuffer(m_handle, &buffer, m_output_port, NULL, portFormat.nBufferSize);
        }

        if(omx_err != OMX_ErrorNone) {
            Logger::LogOut(LOG_LEVEL_ERROR, "COMXCoreComponent::AllocOutputBuffers component(%s) - OMX_UseBuffer failed with omx_err(0x%x)",
                    m_componentName.c_str(), omx_err);

            if(m_omx_output_use_buffers && data)
                free_aligned(data);

            return omx_err;
        }
        buffer->nOutputPortIndex = m_output_port;
        buffer->nFilledLen = 0;
        buffer->nOffset = 0;
        buffer->pAppPrivate = (void*)i;
        m_omx_output_buffers.push_back(buffer);
        m_omx_output_available.push(buffer);
    }
    omx_err = WaitForCommand(OMX_CommandPortEnable, m_output_port);

    m_flush_output = false;

    return omx_err;
}
예제 #23
0
static void* decompress_dds(el_file_ptr file, DdsHeader *header,
	const Uint32 strip_mipmaps, const Uint32 base_level)
{
	Uint32 width, height, size, format, mipmap_count;
	Uint32 x, y, i, w, h;
	Uint32 index;
	Uint8 *dest;

	if ((header->m_height % 4) != 0)
	{
		LOG_ERROR_OLD("Can`t decompressed DDS file %s because height is"
			" %d and not a multiple of four.", el_file_name(file),
			header->m_height);
		return 0;
	}

	if ((header->m_width % 4) != 0)
	{
		LOG_ERROR_OLD("Can`t decompressed DDS file %s because width is"
			" %d and not a multiple of four.", el_file_name(file),
			header->m_width);
		return 0;
	}

	format = header->m_pixel_format.m_fourcc;

	if ((format != DDSFMT_DXT1) && (format != DDSFMT_DXT2) &&
		(format != DDSFMT_DXT3) && (format != DDSFMT_DXT4) &&
		(format != DDSFMT_DXT5) && (format != DDSFMT_ATI1) &&
		(format != DDSFMT_ATI2))
	{
		return 0;
	}

	index = 0;

	size = get_dds_size(header, 1, strip_mipmaps, base_level);
	width = max2u(header->m_width >> base_level, 1);
	height = max2u(header->m_height >> base_level, 1);
	mipmap_count = header->m_mipmap_count;

	if (strip_mipmaps != 0)
	{
		if (mipmap_count > (base_level + 1))
		{
			mipmap_count = base_level + 1;
		}
	}

	dest = malloc_aligned(size, 16);

	el_seek(file, get_dds_offset(header, base_level), SEEK_CUR);

	for (i = base_level; i < mipmap_count; i++)
	{
		w = (width + 3) / 4;
		h = (height + 3) / 4;

		assert(index * 4 <= size);

		// 4x4 blocks in x/y
		for (y = 0; y < h; y++)
		{
			for (x = 0; x < w; x++)
			{
				decompress_block(file, format, x * 4, y * 4,
					width, height, index, dest);
			}
		}

		index += width * height;

		if (width > 1)
		{
			width /= 2;
		}

		if (height > 1)
		{
			height /= 2;
		}
	}

	assert(index * 4 == size);

	return dest;
}
예제 #24
0
void newviewIterativeAncestral(tree *tr)
{
  traversalInfo 
    *ti   = tr->td[0].ti;
  
  int 
    i, 
    model;
  
  assert(!tr->useGappedImplementation);
  assert(!tr->saveMemory);
  assert(!tr->estimatePerSiteAA);
  
  for(i = 1; i < tr->td[0].count; i++)
    {
      traversalInfo 
	*tInfo = &ti[i];

      for(model = 0; model < tr->NumberOfModels; model++)
	{	  	    
	  double
	    *x1_start = (double*)NULL,
	    *x2_start = (double*)NULL,	   
	    *left     = (double*)NULL,
	    *right    = (double*)NULL,	   	   	       
	    qz, 
	    rz;
	      	  	  	  	  
	  unsigned char
	    *tipX1 = (unsigned char *)NULL,
	    *tipX2 = (unsigned char *)NULL;
	  
	  size_t
	    rateHet,
	    states = (size_t)tr->partitionData[model].states,
	    width = tr->partitionData[model].width;
	    	       
	  if(tr->rateHetModel == CAT)
	    rateHet = 1;
	  else
	    rateHet = 4;	  	  
	 	  
	  switch(tInfo->tipCase)
	    {
	    case TIP_TIP:		  
	      tipX1    = tr->partitionData[model].yVector[tInfo->qNumber];
	      tipX2    = tr->partitionData[model].yVector[tInfo->rNumber];		 		 		 	      	     	      	      
	      break;
	    case TIP_INNER:		 
	      tipX1    = tr->partitionData[model].yVector[tInfo->qNumber];		 
	      x2_start = tr->partitionData[model].xVector[tInfo->rNumber - tr->mxtips - 1];	            
	      break;
	    case INNER_INNER:		 		 		    
	      x1_start = tr->partitionData[model].xVector[tInfo->qNumber - tr->mxtips - 1];
	      x2_start = tr->partitionData[model].xVector[tInfo->rNumber - tr->mxtips - 1];	      	      	  
	      break;
	    default:
	      assert(0);
	    }
	  
	  left  = tr->partitionData[model].left;
	  right = tr->partitionData[model].right;
	  
	  if(tr->multiBranch)
	    {
	      qz = tInfo->qz[model];
	      rz = tInfo->rz[model];
	    }
	  else
	    {
	      qz = tInfo->qz[0];
	      rz = tInfo->rz[0];
	    }	      	     	      	     		 	  	 
	  
	  switch(tr->rateHetModel)
	    {
	    case CAT:
	      {	
		double
		  *diagptable = (double*)malloc_aligned(tr->partitionData[model].numberOfCategories * states * states * sizeof(double));
		
		makeP_Flex(qz, rz, tr->partitionData[model].perSiteRates,
			   tr->partitionData[model].EI,
			   tr->partitionData[model].EIGN,
			   tr->partitionData[model].numberOfCategories, left, right, states);
		

		makeP_Flex_Ancestral(tr->partitionData[model].perSiteRates,
				     tr->partitionData[model].EI,
				     tr->partitionData[model].EIGN,
				     tr->partitionData[model].numberOfCategories, diagptable, states);
				     

		newviewFlexCat_Ancestral(tInfo->tipCase,  tr->partitionData[model].EV, tr->partitionData[model].rateCategory,
					 x1_start, x2_start, tr->partitionData[model].tipVector,
					 tipX1, tipX2, width, left, right, states, diagptable, 
					 tr->partitionData[model].sumBuffer);

		free(diagptable);
	      }
	      break;
	    case GAMMA:
	    case GAMMA_I:
	      {	
		double
		  *diagptable = (double*)malloc_aligned(4 * states * states * sizeof(double));
		
		makeP_Flex(qz, rz, tr->partitionData[model].gammaRates,
			   tr->partitionData[model].EI,
			   tr->partitionData[model].EIGN,
			   4, left, right, states);
		
		makeP_Flex_Ancestral(tr->partitionData[model].gammaRates,
				     tr->partitionData[model].EI,
				     tr->partitionData[model].EIGN,
				     4, diagptable, states);
		

		newviewFlexGamma_Ancestral(tInfo->tipCase,
					   x1_start, x2_start,
					   tr->partitionData[model].EV,
					   tr->partitionData[model].tipVector,
					   tipX1, tipX2,
					   width, left, right, states, diagptable, tr->partitionData[model].sumBuffer);
		
		free(diagptable);
	      }
	      break;
	    default:
	      assert(0);
	    }	  		 			  			 
	}    
    }

}
예제 #25
0
파일: common.cpp 프로젝트: krysanto/desmume
void* malloc_aligned32(size_t length)
{
	return malloc_aligned(length, 32);
}
예제 #26
0
static void compressDNA(tree *tr, int *informative)
{
  size_t
    totalNodes,
    i,
    model;
   
  totalNodes = 2 * (size_t)tr->mxtips;

 

  for(model = 0; model < (size_t) tr->NumberOfModels; model++)
    {
      size_t
	k,
	states = (size_t)tr->partitionData[model].states,       
	compressedEntries,
	compressedEntriesPadded,
	entries = 0, 
	lower = tr->partitionData[model].lower,
	upper = tr->partitionData[model].upper;

      parsimonyNumber 
	**compressedTips = (parsimonyNumber **)malloc(states * sizeof(parsimonyNumber*)),
	*compressedValues = (parsimonyNumber *)malloc(states * sizeof(parsimonyNumber));
      
      for(i = lower; i < upper; i++)    
	if(informative[i])
	  entries += (size_t)tr->aliaswgt[i];     
  
      compressedEntries = entries / PCF;

      if(entries % PCF != 0)
	compressedEntries++;

#if (defined(__SIM_SSE3) || defined(__AVX))
      if(compressedEntries % INTS_PER_VECTOR != 0)
	compressedEntriesPadded = compressedEntries + (INTS_PER_VECTOR - (compressedEntries % INTS_PER_VECTOR));
      else
	compressedEntriesPadded = compressedEntries;
#else
      compressedEntriesPadded = compressedEntries;
#endif     

      
      tr->partitionData[model].parsVect = (parsimonyNumber *)malloc_aligned((size_t)compressedEntriesPadded * states * totalNodes * sizeof(parsimonyNumber));
     
      for(i = 0; i < compressedEntriesPadded * states * totalNodes; i++)      
	tr->partitionData[model].parsVect[i] = 0;          

      for(i = 0; i < (size_t)tr->mxtips; i++)
	{
	  size_t
	    w = 0,
	    compressedIndex = 0,
	    compressedCounter = 0,
	    index = 0;

	  for(k = 0; k < states; k++)
	    {
	      compressedTips[k] = &(tr->partitionData[model].parsVect[(compressedEntriesPadded * states * (i + 1)) + (compressedEntriesPadded * k)]);
	      compressedValues[k] = 0;
	    }                
	      
	  for(index = lower; index < (size_t)upper; index++)
	    {
	      if(informative[index])
		{
		  const unsigned int 
		    *bitValue = getBitVector(tr->partitionData[model].dataType);

		  parsimonyNumber 
		    value = bitValue[tr->yVector[i + 1][index]];	  
	      
		  for(w = 0; w < (size_t)tr->aliaswgt[index]; w++)
		    {	   
		      for(k = 0; k < states; k++)
			{
			  if(value & mask32[k])
			    compressedValues[k] |= mask32[compressedCounter];
			}
		     
		      compressedCounter++;
		  
		      if(compressedCounter == PCF)
			{
			  for(k = 0; k < states; k++)
			    {
			      compressedTips[k][compressedIndex] = compressedValues[k];
			      compressedValues[k] = 0;
			    }			 
			  
			  compressedCounter = 0;
			  compressedIndex++;
			}
		    }
		}
	    }
                           
	  for(;compressedIndex < compressedEntriesPadded; compressedIndex++)
	    {	
	      for(;compressedCounter < PCF; compressedCounter++)	      
		for(k = 0; k < states; k++)
		  compressedValues[k] |= mask32[compressedCounter];		  
	  
	      for(k = 0; k < states; k++)
		{
		  compressedTips[k][compressedIndex] = compressedValues[k];
		  compressedValues[k] = 0;
		}	      	      
	      
	      compressedCounter = 0;
	    }	 	
	}               
  
      tr->partitionData[model].parsimonyLength = compressedEntriesPadded;   

      rax_free(compressedTips);
      rax_free(compressedValues);
    }
  
  tr->parsimonyScore = (unsigned int*)malloc_aligned(sizeof(unsigned int) * totalNodes);  
          
  for(i = 0; i < totalNodes; i++) 
    tr->parsimonyScore[i] = 0;
}
예제 #27
0
파일: common.cpp 프로젝트: krysanto/desmume
void* malloc_aligned64(size_t length)
{
	return malloc_aligned(length, 64);
}
static double evaluatePartialCAT_FLEX(int i, double ki, int counter,  traversalInfo *ti, double qz,
                                      int w, double *EIGN, double *EI, double *EV,
                                      double *tipVector, unsigned  char **yVector,
                                      int branchReference, int mxtips, const int states)
{
    int
    scale = 0,
    k;

    double
    *lVector = (double *)malloc_aligned(sizeof(double) * states * mxtips),
     *d = (double *)malloc_aligned(sizeof(double) * states),
      lz,
      term,
      *x1,
      *x2;

    traversalInfo
    *trav = &ti[0];

    assert(isTip(trav->pNumber, mxtips));

    x1 = &(tipVector[states *  yVector[trav->pNumber][i]]);

    for(k = 1; k < counter; k++)
    {
        double
        qz = ti[k].qz[branchReference],
        rz = ti[k].rz[branchReference];

        qz = (qz > zmin) ? log(qz) : log(zmin);
        rz = (rz > zmin) ? log(rz) : log(zmin);

        computeVectorCAT_FLEX(lVector, &scale, ki, i, qz, rz, &ti[k],
                              EIGN, EI, EV,
                              tipVector, yVector, mxtips, states);
    }

    x2 = &lVector[states * (trav->qNumber - mxtips)];

    assert(0 <=  (trav->qNumber - mxtips) && (trav->qNumber - mxtips) < mxtips);

    if(qz < zmin)
        lz = zmin;
    lz  = log(qz);
    lz *= ki;

    d[0] = 1.0;

    for(k = 1; k < states; k++)
        d[k] = EXP (EIGN[k] * lz);

    term = 0.0;

    for(k = 0; k < states; k++)
        term += x1[k] * x2[k] * d[k];

    term = LOG(FABS(term)) + (scale * LOG(minlikelihood));

    term = term * w;

    rax_free(lVector);
    rax_free(d);

    return  term;
}
예제 #29
0
/** 
    uses the information in the PartitionAssignment to only extract
    data relevant to this process (weights and alignment characters).
 */ 
void readMyData(ByteFile *bf, PartitionAssignment *pa, int procId)
{
  seekPos(bf, ALN_ALIGNMENT); 

  exa_off_t
    alnPos = exa_ftell(bf->fh); 

  size_t 
    len; 

  int numAssign = pa->numAssignPerProc[procId];
  Assignment *myAssigns = pa->assignPerProc[procId];

  /* first read aln characters   */
  int i,j ; 
  for(i = 0; i < numAssign; ++i )
    {
      Assignment a = myAssigns[i]; 
      /* printf("reading for: ") ;  */
      /* printAssignment(a, procId);  */

      pInfo 
	*partition = bf->partitions[a.partId];     

      partition->width = a.width; 
      partition->offset = a.offset; 
      len = (size_t)bf->numTax * a.width; 

      if(isPomo(partition->dataType))
	{	  
	  double 
	    *xTip =  (double *)malloc_aligned(len * (size_t)partition->states * sizeof(double));
	  
	  partition->xResource = (double *)malloc_aligned(len * (size_t)partition->states * sizeof(double)); 
	  
	  memset(partition->xResource, 0, len * (size_t)partition->states * sizeof(double));  
	  memset(xTip,                 0, len * (size_t)partition->states * sizeof(double)); 

	  partition->xTipCLV    = (double **)calloc((size_t)bf->numTax + 1 , sizeof(double *)); 
	  partition->xTipVector = (double **)calloc((size_t)bf->numTax + 1 , sizeof(double *)); 

	  for(j = 1; j <= bf->numTax; ++j)
	    {
	      partition->xTipCLV[j]    = partition->xResource + (size_t)(j-1) * a.width * (size_t)partition->states; 
	      partition->xTipVector[j] = xTip                 + (size_t)(j-1) * a.width * (size_t)partition->states;	      
	    }
	}
      else
	{
	  partition->yResource = (unsigned char*)malloc_aligned( len * sizeof(unsigned char)); 
	  memset(partition->yResource,0,(size_t)len * sizeof(unsigned char)); 
	  partition->yVector = (unsigned char**) calloc((size_t)bf->numTax + 1 , sizeof(unsigned char*)); 
	  for(j = 1; j <= bf->numTax; ++j)
	    partition->yVector[j] = partition->yResource + (size_t)(j-1) * a.width; 
	}

#ifdef OLD_LAYOUT
      for(j = 1; j <= bf->numTax; ++j )
	{
	  exa_off_t pos = alnPos + (  bf->numPattern * (j-1)    +  partition->lower + a.offset ) * sizeof(unsigned char); 
	  assert(alnPos <= pos); 
	  exa_fseek(bf->fh, pos, SEEK_SET); 
	  READ_ARRAY(bf->fh, partition->yVector[j], a.width, sizeof(unsigned char));
	}
#else 
      /*  if the entire partition is assigned to this process, read it
          in one go. Otherwise, several seeks are necessary.  */
      if( a.width == (partition->upper - partition->lower ) )
        { 
	  if(isPomo(partition->dataType))
	    {
	      exa_off_t
		pos = alnPos +  (exa_off_t)partition->lower * (exa_off_t)bf->numTax * (exa_off_t)partition->states * (exa_off_t)sizeof(double); 
	      
	      assert(alnPos <= pos); 
	      exa_fseek(bf->fh, pos, SEEK_SET); 
	      READ_ARRAY(bf->fh, partition->xResource, a.width * (size_t)bf->numTax * (size_t)partition->states, sizeof(double));
	    }
	  else
	    {
	      exa_off_t
		pos = alnPos + ((exa_off_t)partition->lower * (exa_off_t)bf->numTax) * (exa_off_t)sizeof(unsigned char); 
	      
	      assert(alnPos <= pos); 
	      exa_fseek(bf->fh, pos, SEEK_SET); 
	      READ_ARRAY(bf->fh, partition->yResource, a.width * (size_t)bf->numTax, sizeof(unsigned char));
	    }
        }
      else 
        {
          for(j = 1; j <= bf->numTax; ++j )
            {
	      if(isPomo(partition->dataType))
		{
		  exa_off_t 
		    pos = alnPos + (exa_off_t)sizeof(double) * (exa_off_t)partition->states 
		    * ( 
		       ((exa_off_t)partition->lower * (exa_off_t)bf->numTax ) /* until start of partition  */
		       + ((exa_off_t)(j-1) * ((exa_off_t)partition->upper - (exa_off_t)partition->lower) ) /* until start of sequence of taxon within partition */
		       + (exa_off_t)a.offset )  ; 
		  
		  assert(alnPos <= pos); 
		  exa_fseek(bf->fh, pos, SEEK_SET); 
		  READ_ARRAY(bf->fh, partition->xTipCLV[j], a.width * (size_t)partition->states, sizeof(double));
		}
	      else
		{
		  exa_off_t 
		    pos = alnPos + (exa_off_t)sizeof(unsigned char) 
		    * ( 
		       ((exa_off_t)partition->lower * (exa_off_t)bf->numTax ) /* until start of partition  */
		       + ((exa_off_t)(j-1) * ((exa_off_t)partition->upper - (exa_off_t)partition->lower) ) /* until start of sequence of taxon within partition */
		       + (exa_off_t)a.offset )  ; 
		  
		  assert(alnPos <= pos); 
		  exa_fseek(bf->fh, pos, SEEK_SET); 
		  READ_ARRAY(bf->fh, partition->yVector[j], a.width, sizeof(unsigned char));
		}
            }
        }
#endif
    }

  
  /* now read weights  */
  seekPos(bf, ALN_WEIGHTS); 

  exa_off_t
    wgtPos = exa_ftell(bf->fh); 
  assert( ! (wgtPos <  0) );

  for(i = 0; i < numAssign; ++i)
    {
      Assignment a = myAssigns[i]; 
      pInfo *partition = bf->partitions[a.partId];

#ifdef __MIC_NATIVE
     /* for Xeon Phi, wgt must be padded to the multiple of 8 (because of site blocking in kernels) */
     const int padded_width = GET_PADDED_WIDTH(a.width);
     len = padded_width * sizeof(int);
#else
     len = a.width * sizeof(int);
#endif

      partition->wgt = (int*)malloc_aligned( len); 
      memset(partition->wgt, 0, len); 

      exa_off_t pos = wgtPos +  ((exa_off_t)partition->lower  + (exa_off_t)a.offset) * (exa_off_t)sizeof(int); 
      assert(wgtPos <= pos );
      
      exa_fseek(bf->fh, pos, SEEK_SET); 
      READ_ARRAY(bf->fh, partition->wgt, a.width, sizeof(int)); 

    }

  bf->hasRead |= ALN_ALIGNMENT; 
  bf->hasRead |= ALN_WEIGHTS; 
} 
 aligned_array(void)
 {
     elems = (T*)malloc_aligned(N * sizeof(T));
     for (int i = 0; i != N; ++i)
         new(elems+i) T();
 }