aligned_array(aligned_array const & rhs) { elems = (T*)malloc_aligned(N * sizeof(T)); for (int i = 0; i != N; ++i) new(elems+i) T(); operator=(rhs); }
void CAudBuffer::Alloc(const size_t size) { DeAlloc(); m_buffer = (uint8_t*)malloc_aligned(size, 16); m_buffer_size = size; m_buffer_pos = 0; }
void* allocate(bigint nbytes) { #ifdef USE_SSE2 return malloc_aligned(16, nbytes); #else return malloc(nbytes); #endif }
static double evaluatePartialGTRCAT(int i, double ki, int counter, traversalInfo *ti, double qz, int w, double *EIGN, double *EI, double *EV, double *tipVector, unsigned char **yVector, int branchReference, int mxtips) { double lz, term; double d[3]; double *x1, *x2; int scale = 0, k; double *lVector = (double *)malloc_aligned(sizeof(double) * 4 * mxtips); traversalInfo *trav = &ti[0]; assert(isTip(trav->pNumber, mxtips)); x1 = &(tipVector[4 * yVector[trav->pNumber][i]]); for(k = 1; k < counter; k++) { double qz = ti[k].qz[branchReference], rz = ti[k].rz[branchReference]; qz = (qz > zmin) ? log(qz) : log(zmin); rz = (rz > zmin) ? log(rz) : log(zmin); computeVectorGTRCAT(lVector, &scale, ki, i, qz, rz, &ti[k], EIGN, EI, EV, tipVector, yVector, mxtips); } x2 = &lVector[4 * (trav->qNumber - mxtips)]; assert(0 <= (trav->qNumber - mxtips) && (trav->qNumber - mxtips) < mxtips); if(qz < zmin) lz = zmin; lz = log(qz); lz *= ki; d[0] = EXP (EIGN[1] * lz); d[1] = EXP (EIGN[2] * lz); d[2] = EXP (EIGN[3] * lz); term = x1[0] * x2[0]; term += x1[1] * x2[1] * d[0]; term += x1[2] * x2[2] * d[1]; term += x1[3] * x2[3] * d[2]; term = LOG(FABS(term)) + (scale * LOG(minlikelihood)); term = term * w; free(lVector); return term; }
void *alloc_aligned_buffer(uint32_t size) { void *ptr = NULL; if (size) { // Alloc the buffer using the aligned version. ptr = malloc_aligned(size, MALLOC_ALIGN); } return ptr; }
int32_t ZXImgEnhanceProcess(int32_t Handle,uint8_t* pData,int32_t nImgW,int32_t nImgH) { ZXImgEEFilter* pFilter = (ZXImgEEFilter*)Handle; int32_t ret; uint8_t *p_data; //#0 check the data safty if((nImgW&0xF)||(nImgH&7)) return ZXIMGCORE_PARA_ERR; //#1 color convert if(pFilter->format!=IMGEE_FORMAT_YUVNV21) { if(pFilter->format==IMGEE_FORMAT_S1) ret = nImgW * nImgH * 3 / 2; if(pFilter->size != ret) { if(pFilter->p_src) { free_aligned(pFilter->p_src); pFilter->p_src = NULL; } pFilter->p_src = (uint8_t*)malloc_aligned(ret,32); if(!pFilter->p_src) goto _error; } s1format_2_yuvnv12(pData,pFilter->p_src,nImgW,nImgH); p_data = pFilter->p_src; } else p_data = pData; //#2 Filter if(pFilter->pGFltHandle) { ret = sndaGuidedFilterProcess(pFilter->pGFltHandle,p_data,p_data,nImgW,nImgH); if(ret != GUIDED_FILTER_OK) return ZXIMGCORE_FAILED; } if(pFilter->pCFltHandle) { ret = sndaClaheFilterProcess(pFilter->pCFltHandle,p_data,p_data,nImgW,nImgH); if(ret != CLAHE_FILTER_OK) return ZXIMGCORE_FAILED; } //#1 color convert if(pFilter->format!=IMGEE_FORMAT_YUVNV21) { yuvnv12_2_s1format(p_data,pData,nImgW,nImgH); } return ZXIMGCORE_OK; _error: return ZXIMGCORE_FAILED; }
void CAudBuffer::ReAlloc(const size_t size) { uint8_t* buffer = (uint8_t*)malloc_aligned(size, 16); if (m_buffer) { size_t copy = std::min(size, m_buffer_size); memcpy(buffer, m_buffer, copy); free_aligned(m_buffer); } m_buffer = buffer; m_buffer_size = size; m_buffer_pos = std::min(m_buffer_pos, m_buffer_size); }
/* Checks if the device is a photo frame by reading the first 512 bytes and comparing against the known string that's there */ int is_photoframe(int f) { int y,res; char id[]="SITRONIX CORP."; char *buff; buff=malloc_aligned(0x200); lseek(f,0x0,SEEK_SET); y=read(f,buff,0x200); buff[15]=0; // fprintf(stderr,"ID=%s\n",buff); res=strcmp(buff,id)==0?1:0; free_aligned(buff,0x200); return res; }
static double evaluatePartialGTRCATPROT(int i, double ki, int counter, traversalInfo *ti, double qz, int w, double *EIGN, double *EI, double *EV, double *tipVector, unsigned char **yVector, int branchReference, int mxtips) { double lz, term; double d[20]; double *x1, *x2; int scale = 0, k, l; double *lVector = (double *)malloc_aligned(sizeof(double) * 20 * mxtips); traversalInfo *trav = &ti[0]; assert(isTip(trav->pNumber, mxtips)); x1 = &(tipVector[20 * yVector[trav->pNumber][i]]); for(k = 1; k < counter; k++) computeVectorGTRCATPROT(lVector, &scale, ki, i, ti[k].qz[branchReference], ti[k].rz[branchReference], &ti[k], EIGN, EI, EV, tipVector, yVector, mxtips); x2 = &lVector[20 * (trav->qNumber - mxtips)]; assert(0 <= (trav->qNumber - mxtips) && (trav->qNumber - mxtips) < mxtips); if(qz < zmin) lz = zmin; lz = log(qz); lz *= ki; d[0] = 1.0; for(l = 1; l < 20; l++) d[l] = EXP (EIGN[l-1] * lz); term = 0.0; for(l = 0; l < 20; l++) term += x1[l] * x2[l] * d[l]; term = LOG(FABS(term)) + (scale * LOG(minlikelihood)); term = term * w; free(lVector); return term; }
static void* read_dds(el_file_ptr file, DdsHeader *header, const Uint32 strip_mipmaps, const Uint32 base_level) { Uint8* dst; Uint32 size, offset; size = get_dds_size(header, 0, strip_mipmaps, base_level); offset = get_dds_offset(header, base_level); dst = malloc_aligned(size, 16); memcpy(dst, el_get_pointer(file) + sizeof(DdsHeader) + offset + 4, size); return dst; }
int sendcmd(int f,int cmd, unsigned int arg1, unsigned int arg2, unsigned char arg3) { unsigned char *buff; buff=malloc_aligned(0x200); buff[0]=cmd; buff[1]=(arg1>>24)&0xff; buff[2]=(arg1>>16)&0xff; buff[3]=(arg1>>8)&0xff; buff[4]=(arg1>>0)&0xff; buff[5]=(arg2>>24)&0xff; buff[6]=(arg2>>16)&0xff; buff[7]=(arg2>>8)&0xff; buff[8]=(arg2>>0)&0xff; buff[9]=(arg3); lseek(f,POS_CMD,SEEK_SET); return write(f,buff,0x200); }
static void* unpack_dds(el_file_ptr file, DdsHeader *header, const Uint32 strip_mipmaps, const Uint32 base_level) { Uint8* dest; Uint32 size, offset, bpp; size = get_dds_size(header, 0, strip_mipmaps, base_level); offset = get_dds_offset(header, base_level); bpp = header->m_pixel_format.m_bit_count / 8; dest = malloc_aligned(size, 16); fast_unpack(el_get_pointer(file) + sizeof(DdsHeader) + offset + 4, size / bpp, header->m_pixel_format.m_red_mask, header->m_pixel_format.m_green_mask, header->m_pixel_format.m_blue_mask, header->m_pixel_format.m_alpha_mask, dest); return dest; }
struct tw_hyperloglog *tw_hyperloglog_new(uint8_t precision) { if (precision < TW_HLL_MIN_PRECISION || precision > TW_HLL_MAX_PRECISION) { return NULL; } struct tw_hyperloglog *hll = calloc(1, sizeof(struct tw_hyperloglog)); if (!hll) { return NULL; } size_t alloc_size = TW_ALLOC_TO_CACHELINE(1 << precision) * sizeof(uint8_t); if ((hll->registers = malloc_aligned(TW_CACHELINE, alloc_size)) == NULL) { free(hll); return NULL; } memset(hll->registers, 0, alloc_size); hll->precision = precision; return hll; }
static inline QUEUE* svc_queue_create(unsigned int block_size, unsigned int blocks_count, unsigned int align) { QUEUE* queue = NULL; int i; //first, try to allocate space for queue data. In thread's current mempool unsigned int align_offset = sizeof(DLIST); if (align > align_offset) align_offset = align; void* mem_block = malloc_aligned(blocks_count * (block_size + align_offset), align_offset); if (mem_block) { queue = sys_alloc(sizeof(QUEUE)); if (queue != NULL) { queue->align_offset = align_offset; queue->mem_block = mem_block; queue->pull_waiters = NULL; queue->push_waiters = NULL; DO_MAGIC(queue, MAGIC_QUEUE); //set all as free queue->free_blocks = NULL; queue->filled_blocks = NULL; for (i = 0; i < blocks_count; ++i) dlist_add_tail(&queue->free_blocks, (DLIST*)((unsigned int)mem_block + i * (block_size + align_offset))); } else { free(mem_block); fatal_error(ERROR_MEM_OUT_OF_SYSTEM_MEMORY, QUEUE_NAME); } } else error(ERROR_MEM_OUT_OF_HEAP, svc_thread_name(svc_thread_get_current())); return queue; }
void* malloc_aligned16(size_t length) { return malloc_aligned(length, 16); }
static void insertHashRF(unsigned int *bitVector, hashtable *h, unsigned int vectorLength, int treeNumber, int treeVectorLength, hashNumberType position, int support, boolean computeWRF) { if(h->table[position] != NULL) { entry *e = h->table[position]; do { unsigned int i; for(i = 0; i < vectorLength; i++) if(bitVector[i] != e->bitVector[i]) break; if(i == vectorLength) { e->treeVector[treeNumber / MASK_LENGTH] |= mask32[treeNumber % MASK_LENGTH]; if(computeWRF) { e->supportVector[treeNumber] = support; assert(0 <= treeNumber && treeNumber < treeVectorLength * MASK_LENGTH); } return; } e = e->next; } while(e != (entry*)NULL); e = initEntry(); /*e->bitVector = (unsigned int*)calloc(vectorLength, sizeof(unsigned int));*/ e->bitVector = (unsigned int*)malloc_aligned(vectorLength * sizeof(unsigned int)); memset(e->bitVector, 0, vectorLength * sizeof(unsigned int)); e->treeVector = (unsigned int*)calloc(treeVectorLength, sizeof(unsigned int)); if(computeWRF) e->supportVector = (int*)calloc(treeVectorLength * MASK_LENGTH, sizeof(int)); e->treeVector[treeNumber / MASK_LENGTH] |= mask32[treeNumber % MASK_LENGTH]; if(computeWRF) { e->supportVector[treeNumber] = support; assert(0 <= treeNumber && treeNumber < treeVectorLength * MASK_LENGTH); } memcpy(e->bitVector, bitVector, sizeof(unsigned int) * vectorLength); e->next = h->table[position]; h->table[position] = e; } else { entry *e = initEntry(); /*e->bitVector = (unsigned int*)calloc(vectorLength, sizeof(unsigned int)); */ e->bitVector = (unsigned int*)malloc_aligned(vectorLength * sizeof(unsigned int)); memset(e->bitVector, 0, vectorLength * sizeof(unsigned int)); e->treeVector = (unsigned int*)calloc(treeVectorLength, sizeof(unsigned int)); if(computeWRF) e->supportVector = (int*)calloc(treeVectorLength * MASK_LENGTH, sizeof(int)); e->treeVector[treeNumber / MASK_LENGTH] |= mask32[treeNumber % MASK_LENGTH]; if(computeWRF) { e->supportVector[treeNumber] = support; assert(0 <= treeNumber && treeNumber < treeVectorLength * MASK_LENGTH); } memcpy(e->bitVector, bitVector, sizeof(unsigned int) * vectorLength); h->table[position] = e; } h->entryCount = h->entryCount + 1; }
float *mallocf(size_t length) { return malloc_aligned(length * sizeof(float)); }
static double evaluatePartialGTRGAMMAPROT(int i, int counter, traversalInfo *ti, double qz, int w, double *EIGN, double *EI, double *EV, double *tipVector, unsigned char **yVector, double *gammaRates, int branchReference, int mxtips) { double lz, term; double d[80]; double *x1, *x2; int scale = 0, k, l, j; double *lVector = (double *)malloc_aligned(sizeof(double) * 80 * mxtips), myEI[400] __attribute__ ((aligned (BYTE_ALIGNMENT))); traversalInfo *trav = &ti[0]; for(k = 0; k < 20; k++) { for(l = 0; l < 20; l++) myEI[k * 20 + l] = EI[k * 20 + l]; } assert(isTip(trav->pNumber, mxtips)); x1 = &(tipVector[20 * yVector[trav->pNumber][i]]); for(k = 1; k < counter; k++) { double qz = ti[k].qz[branchReference], rz = ti[k].rz[branchReference]; qz = (qz > zmin) ? log(qz) : log(zmin); rz = (rz > zmin) ? log(rz) : log(zmin); computeVectorGTRGAMMAPROT(lVector, &scale, gammaRates, i, qz, rz, &ti[k], EIGN, myEI, EV, tipVector, yVector, mxtips); } x2 = &lVector[80 * (trav->qNumber - mxtips)]; assert(0 <= (trav->qNumber - mxtips) && (trav->qNumber - mxtips) < mxtips); if(qz < zmin) lz = zmin; lz = log(qz); for(j = 0; j < 4; j++) { d[20 * j] = 1.0; for(l = 1; l < 20; l++) d[20 * j + l] = EXP(EIGN[l] * lz * gammaRates[j]); } for(j = 0, term = 0.0; j < 4; j++) { for(l = 0; l < 20; l++) term += x1[l] * x2[20 * j + l] * d[j * 20 + l]; } term = LOG(0.25 * FABS(term)) + (scale * LOG(minlikelihood)); term = term * w; free(lVector); return term; }
long shvpu_driver_init(shvpu_driver_t **ppDriver) { long ret = 0; unsigned long reg_base; int zero = 0; pthread_mutex_lock(&initMutex); /* pass the pointer if the driver was already initialized */ if (nCodecInstances > 0) goto init_already; /*** workaround clear VP5_IRQ_ENB and VPU5_IRQ_STA ***/ reg_base = uio_register_base(); vpu5_mmio_write(reg_base + VP5_IRQ_ENB, (unsigned long) &zero, 1); vpu5_mmio_write(reg_base + VP5_IRQ_STA, (unsigned long) &zero, 1); pDriver = (shvpu_driver_t *)calloc(1, sizeof(shvpu_driver_t)); if (pDriver == NULL) { ret = -1; goto init_failed; } memset((void *)pDriver, 0, sizeof(shvpu_driver_t)); /*** initialize vpu ***/ #if defined(VPU5HA_SERIES) pDriver->wbufVpu5.work_size = MCIPH_IP0_WORKAREA_SIZE; #elif defined(VPU_VERSION_5) pDriver->wbufVpu5.work_size = MCIPH_HG_WORKAREA_SIZE; #endif pDriver->wbufVpu5.work_area_addr = malloc_aligned(pDriver->wbufVpu5.work_size, 4); logd("work_area_addr = %p\n", pDriver->wbufVpu5.work_area_addr); if ((pDriver->wbufVpu5.work_area_addr == NULL) || ((unsigned int)pDriver->wbufVpu5.work_area_addr & 0x03U)) { ret = -1; goto init_failed; } pDriver->vpu5Init.vpu_base_address = uio_register_base(); pDriver->vpu5Init.vpu_image_endian = MCIPH_LIT; pDriver->vpu5Init.vpu_stream_endian = MCIPH_LIT; pDriver->vpu5Init.vpu_firmware_endian = MCIPH_LIT; pDriver->vpu5Init.vpu_interrupt_enable = MCIPH_ON; pDriver->vpu5Init.vpu_clock_supply_control = MCIPH_CLK_CTRL; #ifdef VPU_INTERNAL_TL pDriver->vpu5Init.vpu_constrained_mode = MCIPH_VPU_TL; #else pDriver->vpu5Init.vpu_constrained_mode = MCIPH_OFF; #endif pDriver->vpu5Init.vpu_address_mode = MCIPH_ADDR_32BIT; pDriver->vpu5Init.vpu_reset_mode = MCIPH_RESET_SOFT; #if defined(VPU5HA_SERIES) pDriver->vpu5Init.vpu_version = MCIPH_NA; pDriver->vpu5Init.vpu_ext_init = &(pDriver->ip0Init); #ifdef DECODER_COMPONENT #ifdef MPEG4_DECODER pDriver->ip0Init.dec_tbl[0] = &mciph_ip0_m4vdec_api_tbl; pDriver->ip0Init.dec_tbl[1] = &mciph_ip0_m4vdec_api_tbl; #endif pDriver->ip0Init.dec_tbl[2] = &mciph_ip0_avcdec_api_tbl; pDriver->apiTbl.dec_api_tbl = &mciph_ip0_dec_api_tbl; #endif #ifdef ENCODER_COMPONENT pDriver->ip0Init.enc_tbl[2] = &mciph_ip0_avcenc_api_tbl; pDriver->apiTbl.enc_api_tbl = &mciph_ip0_enc_api_tbl; #endif pDriver->apiTbl.cmn_api_tbl = &mciph_ip0_cmn_api_tbl; #if defined(VPU_VERSION_5HD) pDriver->ip0Init.drv_extensions = 0x3; #endif #elif defined(VPU_VERSION_5) memcpy(&(pDriver->apiTbl), &mciph_hg_api_tbl, sizeof(mciph_hg_api_tbl)); #endif logd("----- invoke mciph_vpu5Init() -----\n"); ret = mciph_vpu5_init(&(pDriver->wbufVpu5), &(pDriver->apiTbl), &(pDriver->vpu5Init), &(pDriver->pDrvInfo)); logd("----- resume from mciph_vpu5_init() -----\n"); if (ret != MCIPH_NML_END) goto init_failed; /* register an interrupt handler */ tsem_init(&pDriver->uioSem, 0); ret = uio_create_int_handle(&pDriver->intrHandler, handle_shvpu5_interrupt, pDriver->pDrvInfo, &pDriver->uioSem, &pDriver->isExit); if (ret < 0) goto init_failed; init_already: *ppDriver = pDriver; nCodecInstances++; init_failed: pthread_mutex_unlock(&initMutex); return ret; }
static int veth_init(void) { char *mac_addr = snk_module_interface->mac_addr; union ibmveth_buf_desc rxq_desc; unsigned long rx_queue_len = sizeof(struct ibmveth_rx_q_entry) * RX_QUEUE_SIZE; unsigned int i; long rc; dprintk("veth_init(%02x:%02x:%02x:%02x:%02x:%02x)\n", mac_addr[0], mac_addr[1], mac_addr[2], mac_addr[3], mac_addr[4], mac_addr[5]); if (snk_module_interface->running != 0) return 0; cur_rx_toggle = IBMVETH_RXQ_TOGGLE; cur_rx_index = 0; buffer_list = malloc_aligned(8192, 4096); filter_list = buffer_list + 4096; rx_queue = malloc_aligned(rx_queue_len, 16); rx_bufs = malloc(2048 * RX_QUEUE_SIZE + 4); if (!buffer_list || !filter_list || !rx_queue || !rx_bufs) { printk("veth: Failed to allocate memory !\n"); goto fail; } rx_bufs_aligned = (uint64_t *)(((uint64_t)rx_bufs | 3) + 1); rxq_desc.fields.address = vaddr_to_dma(rx_queue); rxq_desc.fields.flags_len = IBMVETH_BUF_VALID | rx_queue_len; rc = h_register_logical_lan(g_reg, vaddr_to_dma(buffer_list), rxq_desc.desc, vaddr_to_dma(filter_list), (*(uint64_t *)mac_addr) >> 16); if (rc != H_SUCCESS) { printk("veth: Error %ld registering interface !\n", rc); goto fail; } for (i = 0; i < RX_QUEUE_SIZE; i++) { uint64_t *buf = veth_get_rx_buf(i); union ibmveth_buf_desc desc; *buf = (uint64_t)buf; desc.fields.address = vaddr_to_dma(buf); desc.fields.flags_len = IBMVETH_BUF_VALID | RX_BUF_SIZE; h_add_logical_lan_buffer(g_reg, desc.desc); } snk_module_interface->running = 1; return 0; fail: if (filter_list) free(filter_list); if (buffer_list) free(buffer_list); if (rx_queue) free(rx_queue); if (rx_bufs) free(rx_bufs); return -1; }
int ZXImgEnhanceInit(int* p_handle,int format,int nMode) { ZXImgEEFilter* pFilter = NULL; int32_t ret = ZXIMGCORE_OK, eps0_y, eps1_y, eps0_uv, eps1_uv; float_t fSaturation, fContrast; double_t lumda0_y, lumda1_y, lumda0_uv, lumda1_uv; //#0 check the nMode parameter *p_handle = 0; if(!IMGEE_IS_VALID(nMode)) return ZXIMGCORE_PARA_ERR; if(((nMode&IMGEE_MODE_AUTO)==0)&& ((nMode&IMGEE_MODE_HDR)==0)&& ((nMode&IMGEE_MODE_BEAUTY)==0)&& ((nMode&IMGEE_MODE_SHARPEN)==0)&& ((nMode&IMGEE_MODE_DENOISE)==0)&& ((nMode&IMGEE_MODE_SHARPEN_EX)==0)) return ZXIMGCORE_PARA_ERR; //#1 malloc memory space pFilter = (ZXImgEEFilter*)malloc_aligned(sizeof(ZXImgEEFilter),4); if(pFilter==NULL) { ret = ZXIMGCOER_MEM_ERR; goto _error; } memset(pFilter,0,sizeof(ZXImgEEFilter)); //#1.1 malloc memory space for CLAHE if((nMode&IMGEE_MODE_AUTO)||(nMode&IMGEE_MODE_HDR)||(nMode&IMGEE_MODE_BEAUTY)) { switch(IMGEE_GET_MODE(nMode)) { case IMGEE_MODE_AUTO: fSaturation = 1.6f; fContrast = 1.7f; break; case IMGEE_MODE_HDR: fSaturation = 1.6f; fContrast = 2.5f; break; case IMGEE_MODE_BEAUTY: fSaturation = 1.5f; fContrast = 1.7f; break; default: fSaturation = 1.6f; fContrast = 1.7f; break; }; ret = sndaClaheFilterInit(&pFilter->pCFltHandle,4,4,256, fSaturation, fContrast,1); if(ret!=CLAHE_FILTER_OK) { ret = ZXIMGCORE_INITCLAHE_ERR; goto _error; } } //1.2 malloc memory for Guided Filter if(!(IMGEE_IS_FAST(nMode))) { switch(IMGEE_GET_MODE(nMode)) { case IMGEE_MODE_AUTO: eps0_y = 8; lumda0_y = 1.00; eps1_y = 8192; lumda1_y = 1.65; eps0_uv = 64; lumda0_uv = 0.25; eps1_uv = 8192; lumda1_uv = 1.00; break; case IMGEE_MODE_HDR: eps0_y = 32; lumda0_y = 0.50; eps1_y = 8192; lumda1_y = 1.50; eps0_uv = 64; lumda0_uv = 0.25; eps1_uv = 8192; lumda1_uv = 1.00; break; case IMGEE_MODE_BEAUTY: eps0_y = 128; lumda0_y = 0.50; eps1_y = 8192; lumda1_y = 1.75; //1.75 eps0_uv = 128; lumda0_uv = 0.00; eps1_uv = 8192; lumda1_uv = 1.50; //1.5 break; default: // same as auto eps0_y = 8; lumda0_y = 1.00; eps1_y = 8192; lumda1_y = 1.50; eps0_uv = 64; lumda0_uv = 0.25; eps1_uv = 8192; lumda1_uv = 1.00; break; } ret = sndaGuidedFilterInit(&pFilter->pGFltHandle,eps0_y,eps1_y,eps0_uv, eps1_uv, (float_t)lumda0_y,(float_t)lumda1_y,(float_t)lumda0_uv,(float_t)lumda1_uv,2); if(ret != GUIDED_FILTER_OK) { ret = ZXIMGCORE_INITGUIDED_ERR; goto _error; } } pFilter->format = format; *p_handle = (int32_t)pFilter; return ZXIMGCORE_OK; _error: if(pFilter) { if(pFilter->pGFltHandle) { sndaGuidedFilterRelease(pFilter->pGFltHandle); pFilter->pGFltHandle = 0; } if(pFilter->pCFltHandle) { sndaClaheFilterRelease(pFilter->pCFltHandle); pFilter->pCFltHandle = 0; } free_aligned(pFilter); pFilter = NULL; } return ret; }
OMX_ERRORTYPE COMXCoreComponent::AllocOutputBuffers(bool use_buffers /* = false */) { OMX_ERRORTYPE omx_err = OMX_ErrorNone; if(!m_handle) return OMX_ErrorUndefined; m_omx_output_use_buffers = use_buffers; OMX_PARAM_PORTDEFINITIONTYPE portFormat; OMX_INIT_STRUCTURE(portFormat); portFormat.nPortIndex = m_output_port; omx_err = OMX_GetParameter(m_handle, OMX_IndexParamPortDefinition, &portFormat); if(omx_err != OMX_ErrorNone) return omx_err; if(GetState() != OMX_StateIdle) { if(GetState() != OMX_StateLoaded) SetStateForComponent(OMX_StateLoaded); SetStateForComponent(OMX_StateIdle); } omx_err = EnablePort(m_output_port, false); if(omx_err != OMX_ErrorNone) return omx_err; m_output_alignment = portFormat.nBufferAlignment; m_output_buffer_count = portFormat.nBufferCountActual; m_output_buffer_size = portFormat.nBufferSize; Logger::LogOut(LOG_LEVEL_DEBUG, "COMXCoreComponent::AllocOutputBuffers component(%s) - port(%d), nBufferCountMin(%u), nBufferCountActual(%u), nBufferSize(%u) nBufferAlignmen(%u)", m_componentName.c_str(), m_output_port, portFormat.nBufferCountMin, portFormat.nBufferCountActual, portFormat.nBufferSize, portFormat.nBufferAlignment); for (size_t i = 0; i < portFormat.nBufferCountActual; i++) { OMX_BUFFERHEADERTYPE *buffer = NULL; OMX_U8* data = NULL; if(m_omx_output_use_buffers) { data = (OMX_U8*)malloc_aligned(portFormat.nBufferSize, m_output_alignment); omx_err = OMX_UseBuffer(m_handle, &buffer, m_output_port, NULL, portFormat.nBufferSize, data); } else { omx_err = OMX_AllocateBuffer(m_handle, &buffer, m_output_port, NULL, portFormat.nBufferSize); } if(omx_err != OMX_ErrorNone) { Logger::LogOut(LOG_LEVEL_ERROR, "COMXCoreComponent::AllocOutputBuffers component(%s) - OMX_UseBuffer failed with omx_err(0x%x)", m_componentName.c_str(), omx_err); if(m_omx_output_use_buffers && data) free_aligned(data); return omx_err; } buffer->nOutputPortIndex = m_output_port; buffer->nFilledLen = 0; buffer->nOffset = 0; buffer->pAppPrivate = (void*)i; m_omx_output_buffers.push_back(buffer); m_omx_output_available.push(buffer); } omx_err = WaitForCommand(OMX_CommandPortEnable, m_output_port); m_flush_output = false; return omx_err; }
static void* decompress_dds(el_file_ptr file, DdsHeader *header, const Uint32 strip_mipmaps, const Uint32 base_level) { Uint32 width, height, size, format, mipmap_count; Uint32 x, y, i, w, h; Uint32 index; Uint8 *dest; if ((header->m_height % 4) != 0) { LOG_ERROR_OLD("Can`t decompressed DDS file %s because height is" " %d and not a multiple of four.", el_file_name(file), header->m_height); return 0; } if ((header->m_width % 4) != 0) { LOG_ERROR_OLD("Can`t decompressed DDS file %s because width is" " %d and not a multiple of four.", el_file_name(file), header->m_width); return 0; } format = header->m_pixel_format.m_fourcc; if ((format != DDSFMT_DXT1) && (format != DDSFMT_DXT2) && (format != DDSFMT_DXT3) && (format != DDSFMT_DXT4) && (format != DDSFMT_DXT5) && (format != DDSFMT_ATI1) && (format != DDSFMT_ATI2)) { return 0; } index = 0; size = get_dds_size(header, 1, strip_mipmaps, base_level); width = max2u(header->m_width >> base_level, 1); height = max2u(header->m_height >> base_level, 1); mipmap_count = header->m_mipmap_count; if (strip_mipmaps != 0) { if (mipmap_count > (base_level + 1)) { mipmap_count = base_level + 1; } } dest = malloc_aligned(size, 16); el_seek(file, get_dds_offset(header, base_level), SEEK_CUR); for (i = base_level; i < mipmap_count; i++) { w = (width + 3) / 4; h = (height + 3) / 4; assert(index * 4 <= size); // 4x4 blocks in x/y for (y = 0; y < h; y++) { for (x = 0; x < w; x++) { decompress_block(file, format, x * 4, y * 4, width, height, index, dest); } } index += width * height; if (width > 1) { width /= 2; } if (height > 1) { height /= 2; } } assert(index * 4 == size); return dest; }
void newviewIterativeAncestral(tree *tr) { traversalInfo *ti = tr->td[0].ti; int i, model; assert(!tr->useGappedImplementation); assert(!tr->saveMemory); assert(!tr->estimatePerSiteAA); for(i = 1; i < tr->td[0].count; i++) { traversalInfo *tInfo = &ti[i]; for(model = 0; model < tr->NumberOfModels; model++) { double *x1_start = (double*)NULL, *x2_start = (double*)NULL, *left = (double*)NULL, *right = (double*)NULL, qz, rz; unsigned char *tipX1 = (unsigned char *)NULL, *tipX2 = (unsigned char *)NULL; size_t rateHet, states = (size_t)tr->partitionData[model].states, width = tr->partitionData[model].width; if(tr->rateHetModel == CAT) rateHet = 1; else rateHet = 4; switch(tInfo->tipCase) { case TIP_TIP: tipX1 = tr->partitionData[model].yVector[tInfo->qNumber]; tipX2 = tr->partitionData[model].yVector[tInfo->rNumber]; break; case TIP_INNER: tipX1 = tr->partitionData[model].yVector[tInfo->qNumber]; x2_start = tr->partitionData[model].xVector[tInfo->rNumber - tr->mxtips - 1]; break; case INNER_INNER: x1_start = tr->partitionData[model].xVector[tInfo->qNumber - tr->mxtips - 1]; x2_start = tr->partitionData[model].xVector[tInfo->rNumber - tr->mxtips - 1]; break; default: assert(0); } left = tr->partitionData[model].left; right = tr->partitionData[model].right; if(tr->multiBranch) { qz = tInfo->qz[model]; rz = tInfo->rz[model]; } else { qz = tInfo->qz[0]; rz = tInfo->rz[0]; } switch(tr->rateHetModel) { case CAT: { double *diagptable = (double*)malloc_aligned(tr->partitionData[model].numberOfCategories * states * states * sizeof(double)); makeP_Flex(qz, rz, tr->partitionData[model].perSiteRates, tr->partitionData[model].EI, tr->partitionData[model].EIGN, tr->partitionData[model].numberOfCategories, left, right, states); makeP_Flex_Ancestral(tr->partitionData[model].perSiteRates, tr->partitionData[model].EI, tr->partitionData[model].EIGN, tr->partitionData[model].numberOfCategories, diagptable, states); newviewFlexCat_Ancestral(tInfo->tipCase, tr->partitionData[model].EV, tr->partitionData[model].rateCategory, x1_start, x2_start, tr->partitionData[model].tipVector, tipX1, tipX2, width, left, right, states, diagptable, tr->partitionData[model].sumBuffer); free(diagptable); } break; case GAMMA: case GAMMA_I: { double *diagptable = (double*)malloc_aligned(4 * states * states * sizeof(double)); makeP_Flex(qz, rz, tr->partitionData[model].gammaRates, tr->partitionData[model].EI, tr->partitionData[model].EIGN, 4, left, right, states); makeP_Flex_Ancestral(tr->partitionData[model].gammaRates, tr->partitionData[model].EI, tr->partitionData[model].EIGN, 4, diagptable, states); newviewFlexGamma_Ancestral(tInfo->tipCase, x1_start, x2_start, tr->partitionData[model].EV, tr->partitionData[model].tipVector, tipX1, tipX2, width, left, right, states, diagptable, tr->partitionData[model].sumBuffer); free(diagptable); } break; default: assert(0); } } } }
void* malloc_aligned32(size_t length) { return malloc_aligned(length, 32); }
static void compressDNA(tree *tr, int *informative) { size_t totalNodes, i, model; totalNodes = 2 * (size_t)tr->mxtips; for(model = 0; model < (size_t) tr->NumberOfModels; model++) { size_t k, states = (size_t)tr->partitionData[model].states, compressedEntries, compressedEntriesPadded, entries = 0, lower = tr->partitionData[model].lower, upper = tr->partitionData[model].upper; parsimonyNumber **compressedTips = (parsimonyNumber **)malloc(states * sizeof(parsimonyNumber*)), *compressedValues = (parsimonyNumber *)malloc(states * sizeof(parsimonyNumber)); for(i = lower; i < upper; i++) if(informative[i]) entries += (size_t)tr->aliaswgt[i]; compressedEntries = entries / PCF; if(entries % PCF != 0) compressedEntries++; #if (defined(__SIM_SSE3) || defined(__AVX)) if(compressedEntries % INTS_PER_VECTOR != 0) compressedEntriesPadded = compressedEntries + (INTS_PER_VECTOR - (compressedEntries % INTS_PER_VECTOR)); else compressedEntriesPadded = compressedEntries; #else compressedEntriesPadded = compressedEntries; #endif tr->partitionData[model].parsVect = (parsimonyNumber *)malloc_aligned((size_t)compressedEntriesPadded * states * totalNodes * sizeof(parsimonyNumber)); for(i = 0; i < compressedEntriesPadded * states * totalNodes; i++) tr->partitionData[model].parsVect[i] = 0; for(i = 0; i < (size_t)tr->mxtips; i++) { size_t w = 0, compressedIndex = 0, compressedCounter = 0, index = 0; for(k = 0; k < states; k++) { compressedTips[k] = &(tr->partitionData[model].parsVect[(compressedEntriesPadded * states * (i + 1)) + (compressedEntriesPadded * k)]); compressedValues[k] = 0; } for(index = lower; index < (size_t)upper; index++) { if(informative[index]) { const unsigned int *bitValue = getBitVector(tr->partitionData[model].dataType); parsimonyNumber value = bitValue[tr->yVector[i + 1][index]]; for(w = 0; w < (size_t)tr->aliaswgt[index]; w++) { for(k = 0; k < states; k++) { if(value & mask32[k]) compressedValues[k] |= mask32[compressedCounter]; } compressedCounter++; if(compressedCounter == PCF) { for(k = 0; k < states; k++) { compressedTips[k][compressedIndex] = compressedValues[k]; compressedValues[k] = 0; } compressedCounter = 0; compressedIndex++; } } } } for(;compressedIndex < compressedEntriesPadded; compressedIndex++) { for(;compressedCounter < PCF; compressedCounter++) for(k = 0; k < states; k++) compressedValues[k] |= mask32[compressedCounter]; for(k = 0; k < states; k++) { compressedTips[k][compressedIndex] = compressedValues[k]; compressedValues[k] = 0; } compressedCounter = 0; } } tr->partitionData[model].parsimonyLength = compressedEntriesPadded; rax_free(compressedTips); rax_free(compressedValues); } tr->parsimonyScore = (unsigned int*)malloc_aligned(sizeof(unsigned int) * totalNodes); for(i = 0; i < totalNodes; i++) tr->parsimonyScore[i] = 0; }
void* malloc_aligned64(size_t length) { return malloc_aligned(length, 64); }
static double evaluatePartialCAT_FLEX(int i, double ki, int counter, traversalInfo *ti, double qz, int w, double *EIGN, double *EI, double *EV, double *tipVector, unsigned char **yVector, int branchReference, int mxtips, const int states) { int scale = 0, k; double *lVector = (double *)malloc_aligned(sizeof(double) * states * mxtips), *d = (double *)malloc_aligned(sizeof(double) * states), lz, term, *x1, *x2; traversalInfo *trav = &ti[0]; assert(isTip(trav->pNumber, mxtips)); x1 = &(tipVector[states * yVector[trav->pNumber][i]]); for(k = 1; k < counter; k++) { double qz = ti[k].qz[branchReference], rz = ti[k].rz[branchReference]; qz = (qz > zmin) ? log(qz) : log(zmin); rz = (rz > zmin) ? log(rz) : log(zmin); computeVectorCAT_FLEX(lVector, &scale, ki, i, qz, rz, &ti[k], EIGN, EI, EV, tipVector, yVector, mxtips, states); } x2 = &lVector[states * (trav->qNumber - mxtips)]; assert(0 <= (trav->qNumber - mxtips) && (trav->qNumber - mxtips) < mxtips); if(qz < zmin) lz = zmin; lz = log(qz); lz *= ki; d[0] = 1.0; for(k = 1; k < states; k++) d[k] = EXP (EIGN[k] * lz); term = 0.0; for(k = 0; k < states; k++) term += x1[k] * x2[k] * d[k]; term = LOG(FABS(term)) + (scale * LOG(minlikelihood)); term = term * w; rax_free(lVector); rax_free(d); return term; }
/** uses the information in the PartitionAssignment to only extract data relevant to this process (weights and alignment characters). */ void readMyData(ByteFile *bf, PartitionAssignment *pa, int procId) { seekPos(bf, ALN_ALIGNMENT); exa_off_t alnPos = exa_ftell(bf->fh); size_t len; int numAssign = pa->numAssignPerProc[procId]; Assignment *myAssigns = pa->assignPerProc[procId]; /* first read aln characters */ int i,j ; for(i = 0; i < numAssign; ++i ) { Assignment a = myAssigns[i]; /* printf("reading for: ") ; */ /* printAssignment(a, procId); */ pInfo *partition = bf->partitions[a.partId]; partition->width = a.width; partition->offset = a.offset; len = (size_t)bf->numTax * a.width; if(isPomo(partition->dataType)) { double *xTip = (double *)malloc_aligned(len * (size_t)partition->states * sizeof(double)); partition->xResource = (double *)malloc_aligned(len * (size_t)partition->states * sizeof(double)); memset(partition->xResource, 0, len * (size_t)partition->states * sizeof(double)); memset(xTip, 0, len * (size_t)partition->states * sizeof(double)); partition->xTipCLV = (double **)calloc((size_t)bf->numTax + 1 , sizeof(double *)); partition->xTipVector = (double **)calloc((size_t)bf->numTax + 1 , sizeof(double *)); for(j = 1; j <= bf->numTax; ++j) { partition->xTipCLV[j] = partition->xResource + (size_t)(j-1) * a.width * (size_t)partition->states; partition->xTipVector[j] = xTip + (size_t)(j-1) * a.width * (size_t)partition->states; } } else { partition->yResource = (unsigned char*)malloc_aligned( len * sizeof(unsigned char)); memset(partition->yResource,0,(size_t)len * sizeof(unsigned char)); partition->yVector = (unsigned char**) calloc((size_t)bf->numTax + 1 , sizeof(unsigned char*)); for(j = 1; j <= bf->numTax; ++j) partition->yVector[j] = partition->yResource + (size_t)(j-1) * a.width; } #ifdef OLD_LAYOUT for(j = 1; j <= bf->numTax; ++j ) { exa_off_t pos = alnPos + ( bf->numPattern * (j-1) + partition->lower + a.offset ) * sizeof(unsigned char); assert(alnPos <= pos); exa_fseek(bf->fh, pos, SEEK_SET); READ_ARRAY(bf->fh, partition->yVector[j], a.width, sizeof(unsigned char)); } #else /* if the entire partition is assigned to this process, read it in one go. Otherwise, several seeks are necessary. */ if( a.width == (partition->upper - partition->lower ) ) { if(isPomo(partition->dataType)) { exa_off_t pos = alnPos + (exa_off_t)partition->lower * (exa_off_t)bf->numTax * (exa_off_t)partition->states * (exa_off_t)sizeof(double); assert(alnPos <= pos); exa_fseek(bf->fh, pos, SEEK_SET); READ_ARRAY(bf->fh, partition->xResource, a.width * (size_t)bf->numTax * (size_t)partition->states, sizeof(double)); } else { exa_off_t pos = alnPos + ((exa_off_t)partition->lower * (exa_off_t)bf->numTax) * (exa_off_t)sizeof(unsigned char); assert(alnPos <= pos); exa_fseek(bf->fh, pos, SEEK_SET); READ_ARRAY(bf->fh, partition->yResource, a.width * (size_t)bf->numTax, sizeof(unsigned char)); } } else { for(j = 1; j <= bf->numTax; ++j ) { if(isPomo(partition->dataType)) { exa_off_t pos = alnPos + (exa_off_t)sizeof(double) * (exa_off_t)partition->states * ( ((exa_off_t)partition->lower * (exa_off_t)bf->numTax ) /* until start of partition */ + ((exa_off_t)(j-1) * ((exa_off_t)partition->upper - (exa_off_t)partition->lower) ) /* until start of sequence of taxon within partition */ + (exa_off_t)a.offset ) ; assert(alnPos <= pos); exa_fseek(bf->fh, pos, SEEK_SET); READ_ARRAY(bf->fh, partition->xTipCLV[j], a.width * (size_t)partition->states, sizeof(double)); } else { exa_off_t pos = alnPos + (exa_off_t)sizeof(unsigned char) * ( ((exa_off_t)partition->lower * (exa_off_t)bf->numTax ) /* until start of partition */ + ((exa_off_t)(j-1) * ((exa_off_t)partition->upper - (exa_off_t)partition->lower) ) /* until start of sequence of taxon within partition */ + (exa_off_t)a.offset ) ; assert(alnPos <= pos); exa_fseek(bf->fh, pos, SEEK_SET); READ_ARRAY(bf->fh, partition->yVector[j], a.width, sizeof(unsigned char)); } } } #endif } /* now read weights */ seekPos(bf, ALN_WEIGHTS); exa_off_t wgtPos = exa_ftell(bf->fh); assert( ! (wgtPos < 0) ); for(i = 0; i < numAssign; ++i) { Assignment a = myAssigns[i]; pInfo *partition = bf->partitions[a.partId]; #ifdef __MIC_NATIVE /* for Xeon Phi, wgt must be padded to the multiple of 8 (because of site blocking in kernels) */ const int padded_width = GET_PADDED_WIDTH(a.width); len = padded_width * sizeof(int); #else len = a.width * sizeof(int); #endif partition->wgt = (int*)malloc_aligned( len); memset(partition->wgt, 0, len); exa_off_t pos = wgtPos + ((exa_off_t)partition->lower + (exa_off_t)a.offset) * (exa_off_t)sizeof(int); assert(wgtPos <= pos ); exa_fseek(bf->fh, pos, SEEK_SET); READ_ARRAY(bf->fh, partition->wgt, a.width, sizeof(int)); } bf->hasRead |= ALN_ALIGNMENT; bf->hasRead |= ALN_WEIGHTS; }
aligned_array(void) { elems = (T*)malloc_aligned(N * sizeof(T)); for (int i = 0; i != N; ++i) new(elems+i) T(); }