static void read_and_decode_spectrum(TwinVQContext *tctx, float *out, enum TwinVQFrameType ftype) { const TwinVQModeTab *mtab = tctx->mtab; TwinVQFrameData *bits = &tctx->bits; int channels = tctx->avctx->channels; int sub = mtab->fmode[ftype].sub; int block_size = mtab->size / sub; float gain[TWINVQ_CHANNELS_MAX * TWINVQ_SUBBLOCKS_MAX]; float ppc_shape[TWINVQ_PPC_SHAPE_LEN_MAX * TWINVQ_CHANNELS_MAX * 4]; int i, j; dequant(tctx, bits->main_coeffs, out, ftype, mtab->fmode[ftype].cb0, mtab->fmode[ftype].cb1, mtab->fmode[ftype].cb_len_read); dec_gain(tctx, ftype, gain); if (ftype == TWINVQ_FT_LONG) { int cb_len_p = (tctx->n_div[3] + mtab->ppc_shape_len * channels - 1) / tctx->n_div[3]; dequant(tctx, bits->ppc_coeffs, ppc_shape, TWINVQ_FT_PPC, mtab->ppc_shape_cb, mtab->ppc_shape_cb + cb_len_p * TWINVQ_PPC_SHAPE_CB_SIZE, cb_len_p); } for (i = 0; i < channels; i++) { float *chunk = out + mtab->size * i; float lsp[TWINVQ_LSP_COEFS_MAX]; for (j = 0; j < sub; j++) { tctx->dec_bark_env(tctx, bits->bark1[i][j], bits->bark_use_hist[i][j], i, tctx->tmp_buf, gain[sub * i + j], ftype); tctx->fdsp.vector_fmul(chunk + block_size * j, chunk + block_size * j, tctx->tmp_buf, block_size); } if (ftype == TWINVQ_FT_LONG) tctx->decode_ppc(tctx, bits->p_coef[i], bits->g_coef[i], ppc_shape + i * mtab->ppc_shape_len, chunk); decode_lsp(tctx, bits->lpc_idx1[i], bits->lpc_idx2[i], bits->lpc_hist_idx[i], lsp, tctx->lsp_hist[i]); dec_lpc_spectrum_inv(tctx, lsp, ftype, tctx->tmp_buf); for (j = 0; j < mtab->fmode[ftype].sub; j++) { tctx->fdsp.vector_fmul(chunk, chunk, tctx->tmp_buf, block_size); chunk += block_size; } } }
/** * \brief RDO function to calculate cost for intra * \returns cost to code pred block ** Only for luma */ uint32_t rdo_cost_intra(encoder_state * const encoder_state, pixel *pred, pixel *orig_block, int width, int8_t mode, int tr_depth) { const encoder_control * const encoder = encoder_state->encoder_control; coefficient pre_quant_coeff[LCU_WIDTH*LCU_WIDTH>>2]; int16_t block[LCU_WIDTH*LCU_WIDTH>>2]; int16_t temp_block[LCU_WIDTH*LCU_WIDTH>>2]; coefficient temp_coeff[LCU_WIDTH*LCU_WIDTH>>2]; int8_t luma_scan_mode = SCAN_DIAG; int i = 0,x,y; for (y = 0; y < width; y++) { for (x = 0; x < width; x++) { block[i++] = orig_block[x + y*width]- pred[x + y*width]; } } // Scan mode is diagonal, except for 4x4 and 8x8, where: // - angular 6-14 = vertical // - angular 22-30 = horizontal if (width <= 8) { if (mode >= 6 && mode <= 14) { luma_scan_mode = SCAN_VER; } else if (mode >= 22 && mode <= 30) { luma_scan_mode = SCAN_HOR; } } transform2d(encoder, block,pre_quant_coeff,width,0); if(encoder->rdoq_enable) { rdoq(encoder_state, pre_quant_coeff, temp_coeff, width, width, 0, luma_scan_mode, CU_INTRA, tr_depth); } else { quant(encoder_state, pre_quant_coeff, temp_coeff, width, width, 0, luma_scan_mode, CU_INTRA); } dequant(encoder_state, temp_coeff, pre_quant_coeff, width, width, 0, CU_INTRA); itransform2d(encoder, temp_block,pre_quant_coeff,width,0); unsigned ssd = 0; // SSD between original and reconstructed for (i = 0; i < width*width; i++) { //int diff = temp_block[i]-block[i]; int diff = orig_block[i] - CLIP(0, 255, pred[i] + temp_block[i]); ssd += diff*diff; } double coeff_bits = 0; // Simple RDO if(encoder->rdo == 1) { // SSD between reconstruction and original + sum of coeffs int coeff_abs = 0; for (i = 0; i < width*width; i++) { coeff_abs += abs((int)temp_coeff[i]); } coeff_bits += 1 + 1.5 * coeff_abs; // Full RDO } else if(encoder->rdo >= 2) { coeff_bits = get_coeff_cost(encoder_state, temp_coeff, width, 0, luma_scan_mode); } return (uint32_t)(0.5 + ssd + coeff_bits * encoder_state->global->cur_lambda_cost); }
/** * \brief RDO function to calculate cost for intra * \returns cost to code pred block ** Only for luma */ uint32_t rdo_cost_intra(encoder_state * const encoder_state, pixel *pred, pixel *orig_block, int width, int8_t mode) { const encoder_control * const encoder = encoder_state->encoder_control; coefficient pre_quant_coeff[LCU_WIDTH*LCU_WIDTH>>2]; int16_t block[LCU_WIDTH*LCU_WIDTH>>2]; int16_t temp_block[LCU_WIDTH*LCU_WIDTH>>2]; coefficient temp_coeff[LCU_WIDTH*LCU_WIDTH>>2]; uint32_t ac_sum; uint32_t cost = 0; uint32_t coeffcost = 0; int8_t luma_scan_mode = SCAN_DIAG; int i = 0,x,y; for (y = 0; y < width; y++) { for (x = 0; x < width; x++) { block[i++] = orig_block[x + y*width]- pred[x + y*width]; } } // Scan mode is diagonal, except for 4x4 and 8x8, where: // - angular 6-14 = vertical // - angular 22-30 = horizontal if (width <= 8) { if (mode >= 6 && mode <= 14) { luma_scan_mode = SCAN_VER; } else if (mode >= 22 && mode <= 30) { luma_scan_mode = SCAN_HOR; } } transform2d(encoder, block,pre_quant_coeff,width,0); if(encoder->rdoq_enable) { rdoq(encoder_state, pre_quant_coeff, temp_coeff, width, width, &ac_sum, 0, luma_scan_mode, CU_INTRA,0); } else { quant(encoder_state, pre_quant_coeff, temp_coeff, width, width, &ac_sum, 0, luma_scan_mode, CU_INTRA); } dequant(encoder_state, temp_coeff, pre_quant_coeff, width, width, 0, CU_INTRA); itransform2d(encoder, temp_block,pre_quant_coeff,width,0); // SSD between original and reconstructed for (i = 0; i < width*width; i++) { int diff = temp_block[i]-block[i]; cost += diff*diff; } // Simple RDO if(encoder->rdo == 1) { // SSD between reconstruction and original + sum of coeffs for (i = 0; i < width*width; i++) { coeffcost += abs((int)temp_coeff[i]); } cost += (1 + coeffcost + (coeffcost>>1))*((int)encoder_state->global->cur_lambda_cost+0.5); // Full RDO } else if(encoder->rdo == 2) {
static void lsf_decode_fp_16k(float* lsf_history, float* isp_new, const int* parm, int ma_pred) { int i; float isp_q[LP_FILTER_ORDER_16k]; dequant(isp_q, parm, lsf_codebooks_16k); for (i = 0; i < LP_FILTER_ORDER_16k; i++) { isp_new[i] = (1 - qu[ma_pred]) * isp_q[i] + qu[ma_pred] * lsf_history[i] + mean_lsf_16k[i]; } memcpy(lsf_history, isp_q, LP_FILTER_ORDER_16k * sizeof(float)); }
static void lsf_decode_fp(float *lsfnew, float *lsf_history, const SiprParameters *parm) { int i; float lsf_tmp[LP_FILTER_ORDER]; dequant(lsf_tmp, parm->vq_indexes, lsf_codebooks); for (i = 0; i < LP_FILTER_ORDER; i++) lsfnew[i] = lsf_history[i] * 0.33 + lsf_tmp[i] + mean_lsf[i]; ff_sort_nearly_sorted_floats(lsfnew, LP_FILTER_ORDER - 1); /* Note that a minimum distance is not enforced between the last value and the previous one, contrary to what is done in ff_acelp_reorder_lsf() */ ff_set_min_dist_lsf(lsfnew, LSFQ_DIFF_MIN, LP_FILTER_ORDER - 1); lsfnew[9] = FFMIN(lsfnew[LP_FILTER_ORDER - 1], 1.3 * M_PI); memcpy(lsf_history, lsf_tmp, LP_FILTER_ORDER * sizeof(*lsf_history)); for (i = 0; i < LP_FILTER_ORDER - 1; i++) lsfnew[i] = cos(lsfnew[i]); lsfnew[LP_FILTER_ORDER - 1] *= 6.153848 / M_PI; }
tbb::task* execute() { auto &msg = *message; VncServer::EncodeResult result(message); if (depth) { const char *zbuf = reinterpret_cast<const char *>(depth); if (msg.compression & rfbTileDepthQuantize) { const int ds = msg.format == rfbDepth16Bit ? 2 : 3; msg.size = depthquant_size(DepthFloat, ds, w, h); char *qbuf = new char[msg.size]; depthquant(qbuf, zbuf, DepthFloat, ds, x, y, w, h, stride); #ifdef QUANT_ERROR std::vector<char> dequant(sizeof(float)*w*h); depthdequant(dequant.data(), qbuf, DepthFloat, ds, 0, 0, w, h); //depthquant(qbuf, dequant.data(), DepthFloat, ds, x, y, w, h, stride); // test depthcompare depthcompare(zbuf, dequant.data(), DepthFloat, ds, w, h); #endif result.payload = qbuf; } else { char *tilebuf = new char[msg.size]; for (int yy=0; yy<h; ++yy) { memcpy(tilebuf+yy*bpp*w, zbuf+((yy+y)*stride+x)*bpp, w*bpp); } result.payload = tilebuf; } } else if (rgba) { if (msg.compression & rfbTileJpeg) { int ret = -1; #ifdef HAVE_TURBOJPEG TJSAMP subsamp = TJSAMP_420; TjContext::reference tj = tjContexts.local(); size_t maxsize = tjBufSize(msg.width, msg.height, subsamp); char *jpegbuf = new char[maxsize]; unsigned long sz = 0; //unsigned char *src = reinterpret_cast<unsigned char *>(rgba); rgba += (msg.totalwidth*msg.y+msg.x)*bpp; { #ifdef TIMING double start = vistle::Clock::time(); #endif ret = tjCompress(tj.handle, rgba, msg.width, msg.totalwidth*bpp, msg.height, bpp, reinterpret_cast<unsigned char *>(jpegbuf), &sz, subsamp, 90, TJ_BGR); #ifdef TIMING double dur = vistle::Clock::time() - start; std::cerr << "JPEG compression: " << dur << "s, " << msg.width*(msg.height/dur)/1e6 << " MPix/s" << std::endl; #endif } if (ret >= 0) { msg.size = sz; result.payload = jpegbuf; } #endif if (ret < 0) msg.compression &= ~rfbTileJpeg; } if (!(msg.compression & rfbTileJpeg)) { char *tilebuf = new char[msg.size]; for (int yy=0; yy<h; ++yy) { memcpy(tilebuf+yy*bpp*w, rgba+((yy+y)*stride+x)*bpp, w*bpp); } result.payload = tilebuf; } } #ifdef HAVE_SNAPPY if((msg.compression & rfbTileSnappy) && !(msg.compression & rfbTileJpeg)) { size_t maxsize = snappy::MaxCompressedLength(msg.size); char *sbuf = new char[maxsize]; size_t compressed = 0; { #ifdef TIMING double start = vistle::Clock::time(); #endif snappy::RawCompress(result.payload, msg.size, sbuf, &compressed); #ifdef TIMING vistle::StopWatch timer(rgba ? "snappy RGBA" : "snappy depth"); double dur = vistle::Clock::time() - start; std::cerr << "SNAPPY " << (rgba ? "RGB" : "depth") << ": " << dur << "s, " << msg.width*(msg.height/dur)/1e6 << " MPix/s" << std::endl; #endif } msg.size = compressed; //std::cerr << "compressed " << msg.size << " to " << compressed << " (buf: " << cd->buf.size() << ")" << std::endl; delete[] result.payload; result.payload = sbuf; } else { msg.compression &= ~rfbTileSnappy; } #endif resultQueue.push(result); return nullptr; // or a pointer to a new task to be executed immediately }