VC2HQDECODE_API QuantisationMatrix *quantisation_matrices(uint32_t wavelet_index, int depth, int qindex_max) { if (depth > 4) { writelog(LOG_ERROR, "%s:%d: Could not form quantisation matrices, depth greater than 4 not supported\n", __FILE__, __LINE__); throw VC2DECODER_NOTIMPLEMENTED; } QuantisationMatrix *matrix = new QuantisationMatrix[qindex_max + 1]; __m128i **pointers_qf_1 = new __m128i*[(depth + 1)*(qindex_max + 1)]; __m128i **pointers_qo_1 = new __m128i*[(depth + 1)*(qindex_max + 1)]; __m128i *pointers_qf_2 = (__m128i *)ALIGNED_ALLOC(16, 4 * (depth + 1)*(qindex_max + 1)*sizeof(__m128i)); //new __m128i[4*(depth + 1)*(qindex_max + 1)]; __m128i *pointers_qo_2 = (__m128i *)ALIGNED_ALLOC(16, 4 * (depth + 1)*(qindex_max + 1)*sizeof(__m128i)); for (int q = 0; q <= qindex_max; q++) { matrix[q].qfactor = &pointers_qf_1[(depth + 1)*q]; matrix[q].qoffset = &pointers_qo_1[(depth + 1)*q]; for (int l = 0; l <= depth; l++) { matrix[q].qfactor[l] = &pointers_qf_2[4 * ((depth + 1)*q + l)]; matrix[q].qoffset[l] = &pointers_qo_2[4 * ((depth + 1)*q + l)]; for (int s = 0; s < 4; s++) { int qi = (q > DEFAULT_QUANTISATION_MATRIX_ADJUSTMENTS[wavelet_index][depth][l][s]) ? (q - DEFAULT_QUANTISATION_MATRIX_ADJUSTMENTS[wavelet_index][depth][l][s]) : 0; matrix[q].qfactor[l][s] = _mm_set1_epi32(quant_factor(qi)); matrix[q].qoffset[l][s] = _mm_set1_epi32(quant_offset(qi) + 2); } } } return matrix; }
int perform_invhtransformtest(invhtransformtest_data &data, void *idata_pre, const int width, const int height, const int stride, bool HAS_SSE4_2, bool HAS_AVX, bool HAS_AVX2) { int r = 0; (void)HAS_AVX;(void)HAS_AVX2; printf("%-20s: H %d/%d ", VC2DecoderWaveletFilterTypeString[data.wavelet], data.level, data.depth); if (data.sample_size == 2) printf("16-bit "); else printf("32-bit "); /* Use C version to generate comparison value */ printf("C [ "); void *cdata = ALIGNED_ALLOC(32, height*stride*data.sample_size); memcpy(cdata, idata_pre, height*stride*data.sample_size); InplaceTransform ctrans = NULL; try { ctrans = get_invhtransform_c(data.wavelet, data.level, data.depth, data.sample_size); } catch(...) { printf(" NONE ]"); r = 1; goto out; } printf("EXISTS ] "); ctrans(cdata, stride, width, height); /* Test SSE4_2 version */ if (HAS_SSE4_2 && data.SSE4_2) { printf("SSE4.2 ["); InplaceTransform trans = get_invhtransform_sse4_2(data.wavelet, data.level, data.depth, data.sample_size); if (trans == ctrans) { printf("NONE]"); } else { void *tdata = ALIGNED_ALLOC(32, height*stride*data.sample_size); memcpy(tdata, idata_pre, height*stride*data.sample_size); trans(tdata, stride, width, height); if (memcmp(cdata, tdata, height*stride*data.sample_size)) { printf("FAIL]\n"); r = 1; } else { printf(" OK ] "); } ALIGNED_FREE(tdata); } } out: printf("\n"); ALIGNED_FREE(cdata); return r; }
int OGGDecoder::open(VPResource resource) { int ret=ov_fopen(resource.getPath().c_str(),&vf); if (ret<0){ DBG("open fail"); return -1; } buffer = (float *) ALIGNED_ALLOC(sizeof(float)*VPBUFFER_FRAMES*ov_info(&vf,0)->channels); half_buffer_size = VPBUFFER_FRAMES*ov_info(&vf,0)->channels*sizeof(float); VPBuffer bin; bin.srate = ov_info(&vf,0)->rate; bin.chans = ov_info(&vf,0)->channels; bin.buffer[0] = NULL; bin.buffer[1] = NULL; owner->setOutBuffers(&bin,&bout); for (unsigned i=0;i<VPBUFFER_FRAMES*bout->chans;i++){ bout->buffer[0][i]=0.0f; bout->buffer[1][i]=0.0f; } return 0; }
ptst_t *critical_enter(gc_global_t *gc_global) { ptst_t *ptst, *next, *new_next; #ifdef NEED_ID unsigned int id, oid; #endif ptst = (ptst_t *)pthread_getspecific(gc_global->ptst_key); if ( ptst == NULL ) { for ( ptst = _ptst_first(gc_global); ptst != NULL; ptst = ptst_next(ptst) ) { if ( (ptst->count == 0) && (CASIO(&ptst->count, 0, 1) == 0) ) { break; } } if ( ptst == NULL ) { ptst = ALIGNED_ALLOC(sizeof(*ptst)); if ( ptst == NULL ) exit(1); memset(ptst, 0, sizeof(*ptst)); ptst->gc = gc_init(gc_global); rand_init(ptst); ptst->count = 1; #ifdef NEED_ID id = gc_global->next_id; while ( (oid = CASIO(&gc_global->next_id, id, id+1)) != id ) id = oid; ptst->id = id; #endif new_next = gc_global->ptst_list; do { ptst->next = next = new_next; WMB_NEAR_CAS(); } while ( (new_next = CASPO(&gc_global->ptst_list, next, ptst)) != next ); } pthread_setspecific(gc_global->ptst_key, ptst); } gc_enter(ptst); return(ptst); }
void TranspositionTable::resize(size_t mbSize) { size_t newClusterCount = size_t(1) << msb((mbSize * 1024 * 1024) / sizeof(Cluster)); if (newClusterCount == clusterCount) return; clusterCount = newClusterCount; if (table) { ALIGNED_FREE(table); table = nullptr; } table = (Cluster*)ALIGNED_ALLOC( sizeof(Cluster), clusterCount * sizeof(Cluster)); if (!table) { SYNCCOUT << "info string Failed to allocate " << mbSize << "MB for transposition table." << SYNCENDL; exit(EXIT_FAILURE); } }
result_t gfx_csm_init(uint width, uint height) { result_t r; log_printf(LOG_INFO, "init csm render-path ..."); struct allocator* lsr_alloc = eng_get_lsralloc(); struct allocator* tmp_alloc = tsk_get_tmpalloc(0); g_csm = (struct gfx_csm*)ALIGNED_ALLOC(sizeof(struct gfx_csm), MID_GFX); if (g_csm == NULL) return RET_OUTOFMEMORY; memset(g_csm, 0x00, sizeof(struct gfx_csm)); /* render targets and buffers */ r = csm_create_shadowrt(CSM_SHADOW_SIZE, CSM_SHADOW_SIZE); if (IS_FAIL(r)) { err_print(__FILE__, __LINE__, "gfx-csm init failed: could not create shadow map buffers"); return RET_FAIL; } if (BIT_CHECK(eng_get_params()->flags, ENG_FLAG_DEV)) { if (!csm_load_prev_shaders(lsr_alloc)) { err_print(__FILE__, __LINE__, "gfx-csm init failed: could not load preview shaders"); return RET_FAIL; } r = csm_create_prevrt(CSM_PREV_SIZE, CSM_PREV_SIZE); if (IS_FAIL(r)) { err_print(__FILE__, __LINE__, "gfx-csm init failed: could not create prev buffers"); return RET_FAIL; } /* console commands */ con_register_cmd("gfx_debugcsm", csm_console_debugcsm, NULL, "gfx_debugcsm [1*/0]"); } /* shaders */ if (!csm_load_shaders(lsr_alloc)) { err_print(__FILE__, __LINE__, "gfx-csm init failed: could not load shaders"); return RET_FAIL; } /* cblocks */ if (gfx_check_feature(GFX_FEATURE_RANGED_CBUFFERS)) { g_csm->sharedbuff = gfx_sharedbuffer_create(GFX_DEFAULT_RENDER_OBJ_CNT*GFX_INSTANCES_MAX*48); if (g_csm->sharedbuff == NULL) { err_print(__FILE__, __LINE__, "gfx-deferred init failed: could not create uniform buffer"); return RET_FAIL; } } g_csm->cb_frame = gfx_shader_create_cblock(lsr_alloc, tmp_alloc, gfx_shader_get(g_csm->shaders[0].shader_id), "cb_frame", NULL); g_csm->cb_xforms = gfx_shader_create_cblock(lsr_alloc, tmp_alloc, gfx_shader_get(g_csm->shaders[0].shader_id), "cb_xforms", g_csm->sharedbuff); g_csm->cb_frame_gs = gfx_shader_create_cblock(lsr_alloc, tmp_alloc, gfx_shader_get(g_csm->shaders[0].shader_id), "cb_frame_gs", NULL); g_csm->tb_skins = gfx_shader_create_cblock_tbuffer(mem_heap(), gfx_shader_get(gfx_csm_getshader(CMP_OBJTYPE_MODEL, GFX_RPATH_SKINNED | GFX_RPATH_CSMSHADOW)), "tb_skins", sizeof(struct vec4f)*3*GFX_INSTANCES_MAX*GFX_SKIN_BONES_MAX); if (g_csm->cb_frame == NULL || g_csm->cb_xforms == NULL || g_csm->cb_frame_gs == NULL || g_csm->tb_skins == NULL) { err_print(__FILE__, __LINE__, "gfx-csm init failed: could not create cblocks"); return RET_FAIL; } /* states */ r = csm_create_states(); if (IS_FAIL(r)) { err_print(__FILE__, __LINE__, "gfx-csm init failed: could not create states"); return RET_FAIL; } g_csm->shadowmap_size = (float)CSM_SHADOW_SIZE; return RET_OK; }
int VPOutPluginAlsa::init(VPlayer *v, VPBuffer *in) { DBG("Alsa:init"); owner = v; bin = in; if (snd_pcm_open(&handle, "default", SND_PCM_STREAM_PLAYBACK, SND_PCM_NO_AUTO_RESAMPLE) < 0){ DBG("Alsa:init: failed to open pcm"); exit(0); return -1; } snd_pcm_sw_params_t *swparams; snd_pcm_sw_params_malloc(&swparams); snd_pcm_sw_params_current (handle, swparams); snd_pcm_sw_params_set_start_threshold (handle, swparams, VPBUFFER_FRAMES - PERIOD_SIZE); snd_pcm_sw_params (handle, swparams); snd_pcm_sw_params_free(swparams); snd_pcm_hw_params_alloca(¶ms); snd_pcm_hw_params_any(handle, params); snd_pcm_hw_params_set_access(handle, params, SND_PCM_ACCESS_RW_INTERLEAVED); snd_pcm_format_mask_t *mask; snd_pcm_format_mask_alloca(&mask); snd_pcm_hw_params_get_format_mask(params, mask); if (snd_pcm_format_mask_test(mask, SND_PCM_FORMAT_S32)) { DBG("bit depth is 32"); snd_pcm_hw_params_set_format(handle, params, SND_PCM_FORMAT_S32); multiplier = (1<<31) -1 ; DBG(multiplier); } else if (snd_pcm_format_mask_test(mask, SND_PCM_FORMAT_S24)) { DBG("bit depth is 24"); snd_pcm_hw_params_set_format(handle, params, SND_PCM_FORMAT_S24); multiplier = (1<<23) -1; } else if (snd_pcm_format_mask_test(mask, SND_PCM_FORMAT_S16)) { DBG("bit depth is 16"); snd_pcm_hw_params_set_format(handle, params, SND_PCM_FORMAT_S16); multiplier = (1<<15) -1; } else if (snd_pcm_format_mask_test(mask, SND_PCM_FORMAT_S8)) { DBG("bit depth is 8"); snd_pcm_hw_params_set_format(handle, params, SND_PCM_FORMAT_S8); multiplier = (1<<7) -1;; } snd_pcm_hw_params_set_channels(handle, params, bin->chans); snd_pcm_hw_params_set_period_size(handle, params, PERIOD_SIZE, 0); if (snd_pcm_hw_params(handle, params) < 0) { DBG("Alsa:init: failed to set pcm params"); return -1; } snd_pcm_hw_params_current(handle, params); int dir; snd_pcm_hw_params_get_rate(params, &out_srate, &dir); in_srate = bin->srate; int rerr; rs = src_new(SRC_SINC_FASTEST, bin->chans, &rerr); if (!rs){ DBG("SRC error"<<rerr); return -1; } rd.src_ratio = (out_srate*1.0)/(in_srate*1.0); out_frames = (VPBUFFER_FRAMES*rd.src_ratio)*2; out_buf = (float *)ALIGNED_ALLOC(sizeof(float)*out_frames*bin->chans); out_buf_i = (int *)ALIGNED_ALLOC(sizeof(int)*out_frames*bin->chans); DBG("target rate "<<out_srate); work = true; paused = false; pause_check = false; FULL_MEMORY_BARRIER; in_fd = inotify_init(); if ( in_fd < 0 ) { DBG("error initializing inotify, auto pause won't work"); } else { in_wd[0]=inotify_add_watch( in_fd, "/dev/snd/pcmC0D0p", IN_OPEN | IN_CLOSE ); } fcntl(in_fd, F_SETFL, O_NONBLOCK); worker = new std::thread((void(*)(void*))worker_run, this); worker->high_priority(); DBG("alsa thread made"); DBG((void *)VPOutPluginAlsa::check_contention); VPEvents::getSingleton()->schedulerAddJob((VPEvents::VPJob) VPOutPluginAlsa::check_contention, this,0); return 0; }
void VPlayer::setOutBuffers(VPBuffer *outprop, VPBuffer **out) { *out = &bout; bufferCursor = 0; bufferSamples[0] = 0; bufferSamples[1] = 0; if (bout.srate != outprop->srate || bout.chans != outprop->chans) { if (vpout){ delete vpout; vpout=NULL; } if (bout.buffer[0]) { ALIGNED_FREE(bout.buffer[0]); bout.buffer[0] = NULL; ALIGNED_FREE(bout.buffer[1]); bout.buffer[1] = NULL; } vpout =VPOutFactory::getSingleton()->create(); assert(vpout); outprop->buffer[0] = (float*)ALIGNED_ALLOC(sizeof(float)*VPBUFFER_FRAMES*outprop->chans); outprop->buffer[1] = (float*)ALIGNED_ALLOC(sizeof(float)*VPBUFFER_FRAMES*outprop->chans); outprop->cursor = &bufferCursor; assert(outprop->buffer[0] && outprop->buffer[1]); bout.chans = outprop->chans; bout.srate = outprop->srate; bout.buffer[0] = outprop->buffer[0]; bout.buffer[1] = outprop->buffer[1]; bout.cursor = &bufferCursor; bufferCursor =0; DBG("track chs: "<<bout.chans); DBG("track rate: "<<bout.srate); // bout - source to first dsp, output of input plugin // bin - source to vpout VPBuffer *tmp=&bout; DBG(eff_count); for (int i=0;i<eff_count;i++){ effects[i].in = tmp; effects[i].eff->init(this, tmp, &tmp); effects[i].out = tmp; if (!effects[i].active){ effects[i].eff->finit(); } } memcpy(&bin,tmp,sizeof(VPBuffer)); vpout->init(this, &bin); } else { DBG("gapless"); } }
int test_invtransform(bool HAS_SSE4_2, bool HAS_AVX, bool HAS_AVX2) { printf("--------------------------------------------------------------------------------\n"); printf(" Inverse Transform Tests\n"); printf("\n"); /* Load some input data for the tests */ const int height = 1080; const int width = 1920; const int stride = ((1920 + 1023)/1024)*1024; void *idata16 = ALIGNED_ALLOC(32, height*stride*sizeof(int16_t)); void *idata32 = ALIGNED_ALLOC(32, height*stride*sizeof(int32_t)); if (!randomiser((char *)idata16, height*stride*sizeof(int16_t))) { printf("Error Getting Random Data\n"); return 1; } /* Need to make sure the samples aren't so large they overflow during the calculations */ for (int y = 0; y < height; y++) { for (int x = 0; x < stride; x++) { ((int16_t *)idata16)[y*stride + x] >>= 2; ((int32_t *)idata32)[y*stride + x] = ((int16_t *)idata16)[y*stride + x]; } } /*for (int y = 0; y < height; y++) { for (int x = 0; x < stride; x++) { ((int32_t *)idata)[y*stride + x] = 1; } }*/ int r = 0; for (int i = 0; !r && i < INVHTRANSFORMTEST_DATA_NUM; i++) { void * idata = (INVHTRANSFORMTEST_DATA[i].sample_size == 2)?idata16:idata32; r = perform_invhtransformtest(INVHTRANSFORMTEST_DATA[i], idata, width, height, stride, HAS_SSE4_2, HAS_AVX, HAS_AVX2); } for (int i = 0; !r && i < INVHTRANSFORMFINALTEST_DATA_NUM; i++) { void * idata = (INVHTRANSFORMFINALTEST_DATA[i].sample_size == 2)?idata16:idata32; r = perform_invhtransformfinaltest(INVHTRANSFORMFINALTEST_DATA[i], idata, width, height, stride, HAS_SSE4_2, HAS_AVX, HAS_AVX2); } for (int i = 0; !r && i < INVVTRANSFORMTEST_DATA_NUM; i++) { void * idata = (INVVTRANSFORMTEST_DATA[i].sample_size == 2)?idata16:idata32; r = perform_invvtransformtest(INVVTRANSFORMTEST_DATA[i], idata, width, height, stride, HAS_SSE4_2, HAS_AVX, HAS_AVX2); } ALIGNED_FREE(idata16); ALIGNED_FREE(idata32); printf("--------------------------------------------------------------------------------\n"); return r; }
int perform_invhtransformfinaltest(invhtransformfinaltest_data &data, void *idata_pre, const int width, const int height, const int stride, bool HAS_SSE4_2, bool HAS_AVX, bool HAS_AVX2) { int r = 0; (void)HAS_AVX;(void)HAS_AVX2; void *idata = ALIGNED_ALLOC(32, height*stride*data.sample_size); memcpy(idata, idata_pre, height*stride*data.sample_size); struct offsets_t offsets[] = { { 0, 0, 0, 0 }, { 32, 0, 0, 0 }, { 0, 32, 0, 0 }, { 32, 32, 0, 0 }, { 0, 0, 32, 0 }, { 0, 0, 0, 32 }, { 0, 0, 32, 32 }, { 32, 0, 32, 0 }, { 32, 0, 0, 32 }, { 32, 0, 32, 32 }, { 0, 32, 32, 0 }, { 0, 32, 0, 32 }, { 0, 0, 32, 32 }, { 32, 32, 32, 0 }, { 32, 32, 0, 32 }, { 32, 32, 32, 32 }, }; for (int i = 0; r==0 && i < (int)(sizeof(offsets)/sizeof(struct offsets_t)); i++) { char *cdata = (char *)malloc(height*stride*sizeof(uint16_t)); memset(cdata, 0, height*stride*sizeof(uint16_t)); printf("%-20s: H 0/* (%2d,%2d,%2d,%2d) (active %d-bit) ", VC2DecoderWaveletFilterTypeString[data.wavelet], offsets[i].left, offsets[i].right, offsets[i].top, offsets[i].bottom, data.active_bits); if (data.sample_size == 2) printf("16-bit "); else printf("32-bit "); /* Use C version to generate comparison value */ printf("C [ "); InplaceTransformFinal ctrans = NULL; try { ctrans = get_invhtransformfinal_c(data.wavelet, data.active_bits, data.sample_size); } catch(...) { printf(" NONE ]"); r = 1; free(cdata); break; } ctrans(idata, stride, cdata + (offsets[i].top*stride + offsets[i].left)*2, stride, width, height, offsets[i].left, offsets[i].top, width - offsets[i].left - offsets[i].right, height - offsets[i].top - offsets[i].bottom); if (memcmp(idata, idata_pre, height*stride*data.sample_size) != 0) { printf(" BAD ]\n"); printf(" c function overwrites input data!\n"); r = 1; free(cdata); printf("\n"); break; } for (int y = 0; y < offsets[i].top; y++) { for (int x = 0; x < stride; x++) { if (((uint16_t *)cdata)[y*stride + x] != 0) { printf(" BAD ]\n"); printf(" c function writes outside of specified memory area!\n"); r = 1; free(cdata); printf("\n"); break; } } } for (int y = offsets[i].top; y < height - offsets[i].bottom; y++) { for (int x = 0; x < offsets[i].left; x++) { if (((uint16_t *)cdata)[y*stride + x] != 0) { printf(" BAD ]\n"); printf(" c function writes outside of specified memory area!\n"); r = 1; free(cdata); printf("\n"); break; } } for (int x = offsets[i].left; x < width - offsets[i].right; x++) { if (((uint16_t *)cdata)[y*stride + x] >= (1 << data.active_bits)) { printf(" BAD ]\n"); printf(" c function does not clip values to correct number of bits!\n"); r = 1; free(cdata); printf("\n"); break; } } for (int x = width - offsets[i].right; x < stride; x++) { if (((uint16_t *)cdata)[y*stride + x] != 0) { printf(" BAD ]\n"); printf(" c function writes outside of specified memory area!\n"); r = 1; free(cdata); printf("\n"); break; } } } for (int y = height - offsets[i].bottom; y < height; y++) { for (int x = 0; x < stride; x++) { if (((uint16_t *)cdata)[y*stride + x] != 0) { printf(" BAD ]\n"); printf(" c function writes outside of specified memory area!\n"); r = 1; free(cdata); printf("\n"); break; } } } printf(" GOOD ] "); /* Test SSE4_2 version */ if (HAS_SSE4_2 && data.SSE4_2) { printf("SSE4.2 ["); InplaceTransformFinal trans = get_invhtransformfinal_sse4_2(data.wavelet, data.active_bits, data.sample_size); if (trans == ctrans) { printf("NONE]"); } else { char *tdata = (char *)malloc(height*stride*sizeof(uint16_t)); memset(tdata, 0, height*stride*sizeof(uint16_t)); trans(idata, stride, tdata + (offsets[i].top*stride + offsets[i].left)*2, stride, width, height, offsets[i].left, offsets[i].top, width - offsets[i].left - offsets[i].right, height - offsets[i].top - offsets[i].bottom); if (memcmp(cdata, tdata, height*stride*sizeof(uint16_t))) { printf("FAIL]\n"); for (int i = 0; i < (int)(height*stride*sizeof(uint16_t)); i++) { if (cdata[i] != tdata[i]) { printf("\nFirst difference at byte %d, 0x%02x =/= 0x%02x\n\n", i, ((uint8_t *)cdata)[i], ((uint8_t *)tdata)[i]); break; } } r = 1; } else { printf(" OK ] "); } free(tdata); } } free(cdata); printf("\n"); } ALIGNED_FREE(idata); return r; }