bool SamplerJitCache::Jit_Decode5551() { MOV(32, R(tempReg2), R(resultReg)); MOV(32, R(tempReg1), R(resultReg)); AND(32, R(tempReg2), Imm32(0x0000001F)); AND(32, R(tempReg1), Imm32(0x000003E0)); SHL(32, R(tempReg1), Imm8(3)); OR(32, R(tempReg2), R(tempReg1)); MOV(32, R(tempReg1), R(resultReg)); AND(32, R(tempReg1), Imm32(0x00007C00)); SHL(32, R(tempReg1), Imm8(6)); OR(32, R(tempReg2), R(tempReg1)); // Expand 5 -> 8. After this is just A. MOV(32, R(tempReg1), R(tempReg2)); SHL(32, R(tempReg2), Imm8(3)); SHR(32, R(tempReg1), Imm8(2)); // Chop off the bits that were shifted out. AND(32, R(tempReg1), Imm32(0x00070707)); OR(32, R(tempReg2), R(tempReg1)); // For A, we shift it to a single bit, and then subtract and XOR. // That's probably the simplest way to expand it... SHR(32, R(resultReg), Imm8(15)); // If it was 0, it's now -1, otherwise it's 0. Easy. SUB(32, R(resultReg), Imm8(1)); XOR(32, R(resultReg), Imm32(0xFF000000)); AND(32, R(resultReg), Imm32(0xFF000000)); OR(32, R(resultReg), R(tempReg2)); return true; }
bool SamplerJitCache::Jit_Decode5650() { MOV(32, R(tempReg2), R(resultReg)); AND(32, R(tempReg2), Imm32(0x0000001F)); // B (we do R and B at the same time, they're both 5.) MOV(32, R(tempReg1), R(resultReg)); AND(32, R(tempReg1), Imm32(0x0000F800)); SHL(32, R(tempReg1), Imm8(5)); OR(32, R(tempReg2), R(tempReg1)); // Expand 5 -> 8. At this point we have 00BB00RR. MOV(32, R(tempReg1), R(tempReg2)); SHL(32, R(tempReg2), Imm8(3)); SHR(32, R(tempReg1), Imm8(2)); OR(32, R(tempReg2), R(tempReg1)); AND(32, R(tempReg2), Imm32(0x00FF00FF)); // Now's as good a time to put in A as any. OR(32, R(tempReg2), Imm32(0xFF000000)); // Last, we need to align, extract, and expand G. // 3 to align to G, and then 2 to expand to 8. SHL(32, R(resultReg), Imm8(3 + 2)); AND(32, R(resultReg), Imm32(0x0000FC00)); MOV(32, R(tempReg1), R(resultReg)); // 2 to account for resultReg being preshifted, 4 for expansion. SHR(32, R(tempReg1), Imm8(2 + 4)); OR(32, R(resultReg), R(tempReg1)); AND(32, R(resultReg), Imm32(0x0000FF00)); OR(32, R(resultReg), R(tempReg2));; return true; }
void arch_store16(cpu_t *cpu, Value *val, Value *addr, BasicBlock *bb) { Value *shift = arch_get_shift16(cpu, addr, bb); addr = AND(addr, CONST(~3ULL)); Value *mask = XOR(SHL(CONST(65535), shift),CONST(-1ULL)); Value *old = AND(arch_load32_aligned(cpu, addr, bb), mask); val = OR(old, SHL(AND(val, CONST(65535)), shift)); arch_store32_aligned(cpu, val, addr, bb); }
void split_cb_shape_sign_unquant( spx_sig_t *exc, const void *par, /* non-overlapping codebook */ int nsf, /* number of samples in subframe */ SpeexBits *bits, char *stack ) { int i,j; int *ind, *signs; const signed char *shape_cb; int shape_cb_size, subvect_size, nb_subvect; split_cb_params *params; int have_sign; params = (split_cb_params *) par; subvect_size = params->subvect_size; nb_subvect = params->nb_subvect; shape_cb_size = 1<<params->shape_bits; shape_cb = params->shape_cb; have_sign = params->have_sign; ind = PUSH(stack, nb_subvect, int); signs = PUSH(stack, nb_subvect, int); /* Decode codewords and gains */ for (i=0;i<nb_subvect;i++) { if (have_sign) signs[i] = speex_bits_unpack_unsigned(bits, 1); else signs[i] = 0; ind[i] = speex_bits_unpack_unsigned(bits, params->shape_bits); } /* Compute decoded excitation */ for (i=0;i<nb_subvect;i++) { spx_word16_t s=1; if (signs[i]) s=-1; #ifdef FIXED_POINT if (s==1) { for (j=0;j<subvect_size;j++) exc[subvect_size*i+j]=SHL((spx_word32_t)shape_cb[ind[i]*subvect_size+j],SIG_SHIFT-5); } else { for (j=0;j<subvect_size;j++) exc[subvect_size*i+j]=-SHL((spx_word32_t)shape_cb[ind[i]*subvect_size+j],SIG_SHIFT-5); } #else for (j=0;j<subvect_size;j++) exc[subvect_size*i+j]+=s*0.03125*shape_cb[ind[i]*subvect_size+j]; #endif } }
word32_t ChebyshevPolynomial(word16_t x, word32_t f[]) { /* bk in Q15*/ word32_t bk; word32_t bk1 = ADD32(SHL(x,1), f[1]); /* init: b4=2x+f1 */ word32_t bk2 = ONE_IN_Q15; /* init: b5=1 */ uint8_t k; for (k=3; k>0; k--) { /* at the end of loop execution we have b1 in bk1 and b2 in bk2 */ bk = SUB32(ADD32(SHL(MULT16_32_Q15(x,bk1), 1), f[5-k]), bk2); /* bk = 2*x*bk1 − bk2 + f(5-k) all in Q15*/ bk2 = bk1; bk1 = bk; } return SUB32(ADD32(MULT16_32_Q15(x,bk1), SHR(f[5],1)), bk2); /* C(x) = x*b1 - b2 + f(5)/2 */ }
static inline Value * arch_6502_pull16(cpu_t *cpu, BasicBlock *bb) { Value *lo = PULL; Value *hi = PULL; return (OR(ZEXT16(lo), SHL(ZEXT16(hi), CONST16(8)))); }
/*** shlGetAttrType - get the type (DATA_T_xxx) of an attribute by name. ***/ int shlGetAttrType(void* inf_v, char* attrname, pObjTrxTree* oxt) { pShlData inf = SHL(inf_v); int i; pStructInf find_inf; pEnvVar pEV; /** If name, it's a string **/ if (!strcmp(attrname,"name")) return DATA_T_STRING; /** If 'content-type', it's also a string. **/ if (!strcmp(attrname,"content_type")) return DATA_T_STRING; if (!strcmp(attrname,"inner_type")) return DATA_T_STRING; if (!strcmp(attrname,"outer_type")) return DATA_T_STRING; if (!strcmp(attrname,"annotation")) return DATA_T_STRING; if (!strcmp(attrname,"status")) return DATA_T_STRING; if (!strncmp(attrname,"arg",3)) return DATA_T_STRING; pEV = (pEnvVar)xhLookup(&inf->envHash,attrname); if(pEV) { return DATA_T_STRING; } mssError(1,"SHL","Could not locate requested attribute: %s",attrname); return -1; }
/*** shlGetNextAttr - get the next attribute name for this object. ***/ char* shlGetNextAttr(void* inf_v, pObjTrxTree oxt) { pShlData inf = SHL(inf_v); int i; if(inf->CurAttr>=xaCount(&inf->argArray)+xaCount(&inf->envList)+1) return NULL; if(inf->CurAttr==xaCount(&inf->argArray)+xaCount(&inf->envList)) { inf->CurAttr++; return "status"; } if(inf->CurAttr>999999 && xaCount(&inf->argArray)>=999999) return NULL; if(inf->CurAttr<xaCount(&inf->argArray)) { i=snprintf(inf->sCurAttr,10,"arg%02i",inf->CurAttr++); inf->sCurAttr[10]='\0'; return inf->sCurAttr; } else { return (char*)xaGetItem(&inf->envList,(inf->CurAttr++)-xaCount(&inf->argArray)); } }
/* FIXME: These functions are ugly and probably introduce too much error */ void signal_mul(const spx_sig_t *x, spx_sig_t *y, spx_word32_t scale, int len) { int i; for (i=0;i<len;i++) { y[i] = SHL(MULT16_32_Q14(SHR(x[i],7),scale),7); } }
static Value * arch_get_shift16(cpu_t *cpu, Value *addr, BasicBlock *bb) { Value *shift = AND(LSHR(addr,CONST(1)),CONST(1)); if (!IS_LITTLE_ENDIAN(cpu)) shift = XOR(shift, CONST(1)); return SHL(shift, CONST(4)); }
/*** shlGetFirstAttr - get the first attribute name for this object. ***/ char* shlGetFirstAttr(void* inf_v, pObjTrxTree oxt) { pShlData inf = SHL(inf_v); inf->CurAttr=0; return shlGetNextAttr(inf_v,oxt); }
/*** shlAddAttr - add an attribute to an object. This doesn't always work *** for all object types, and certainly makes no sense for some (like unix *** files). ***/ int shlAddAttr(void* inf_v, char* attrname, int type, void* val, pObjTrxTree oxt) { pShlData inf = SHL(inf_v); pStructInf new_inf; char* ptr; return -1; }
void lsp_interpolate(spx_lsp_t *old_lsp, spx_lsp_t *new_lsp, spx_lsp_t *interp_lsp, int len, int subframe, int nb_subframes) { int i; spx_word16_t tmp = DIV32_16(SHL(1 + subframe,14),nb_subframes); spx_word16_t tmp2 = 16384-tmp; for (i=0;i<len;i++) { interp_lsp[i] = MULT16_16_P14(tmp2,old_lsp[i]) + MULT16_16_P14(tmp,new_lsp[i]); } }
void vec4_gs_visitor::gs_end_primitive() { /* We can only do EndPrimitive() functionality when the control data * consists of cut bits. Fortunately, the only time it isn't is when the * output type is points, in which case EndPrimitive() is a no-op. */ if (gs_prog_data->control_data_format != GEN7_GS_CONTROL_DATA_FORMAT_GSCTL_CUT) { return; } if (c->control_data_header_size_bits == 0) return; /* Cut bits use one bit per vertex. */ assert(c->control_data_bits_per_vertex == 1); /* Cut bit n should be set to 1 if EndPrimitive() was called after emitting * vertex n, 0 otherwise. So all we need to do here is mark bit * (vertex_count - 1) % 32 in the cut_bits register to indicate that * EndPrimitive() was called after emitting vertex (vertex_count - 1); * vec4_gs_visitor::emit_control_data_bits() will take care of the rest. * * Note that if EndPrimitve() is called before emitting any vertices, this * will cause us to set bit 31 of the control_data_bits register to 1. * That's fine because: * * - If max_vertices < 32, then vertex number 31 (zero-based) will never be * output, so the hardware will ignore cut bit 31. * * - If max_vertices == 32, then vertex number 31 is guaranteed to be the * last vertex, so setting cut bit 31 has no effect (since the primitive * is automatically ended when the GS terminates). * * - If max_vertices > 32, then the ir_emit_vertex visitor will reset the * control_data_bits register to 0 when the first vertex is emitted. */ /* control_data_bits |= 1 << ((vertex_count - 1) % 32) */ src_reg one(this, glsl_type::uint_type); emit(MOV(dst_reg(one), brw_imm_ud(1u))); src_reg prev_count(this, glsl_type::uint_type); emit(ADD(dst_reg(prev_count), this->vertex_count, brw_imm_ud(0xffffffffu))); src_reg mask(this, glsl_type::uint_type); /* Note: we're relying on the fact that the GEN SHL instruction only pays * attention to the lower 5 bits of its second source argument, so on this * architecture, 1 << (vertex_count - 1) is equivalent to 1 << * ((vertex_count - 1) % 32). */ emit(SHL(dst_reg(mask), one, prev_count)); emit(OR(dst_reg(this->control_data_bits), this->control_data_bits, mask)); }
// shifts or rotates an lvalue left or right, regardless of width Value * arch_shiftrotate(cpu_t *cpu, Value *dst, Value *src, bool left, bool rotate, BasicBlock *bb) { Value *c; Value *v = LOAD(src); if (left) { c = ICMP_SLT(v, CONSTs(SIZE(v), 0)); /* old MSB to carry */ v = SHL(v, CONSTs(SIZE(v), 1)); if (rotate) v = OR(v,ZEXT(SIZE(v), LOAD(cpu->ptr_C))); } else { c = TRUNC1(v); /* old LSB to carry */ v = LSHR(v, CONSTs(SIZE(v), 1)); if (rotate) v = OR(v,SHL(ZEXT(SIZE(v), LOAD(cpu->ptr_C)), CONSTs(SIZE(v), SIZE(v)-1))); } LET1(cpu->ptr_C, c); return STORE(v, dst); }
void vec4_gs_visitor::set_stream_control_data_bits(unsigned stream_id) { /* control_data_bits |= stream_id << ((2 * (vertex_count - 1)) % 32) */ /* Note: we are calling this *before* increasing vertex_count, so * this->vertex_count == vertex_count - 1 in the formula above. */ /* Stream mode uses 2 bits per vertex */ assert(c->control_data_bits_per_vertex == 2); /* Must be a valid stream */ assert(stream_id >= 0 && stream_id < MAX_VERTEX_STREAMS); /* Control data bits are initialized to 0 so we don't have to set any * bits when sending vertices to stream 0. */ if (stream_id == 0) return; /* reg::sid = stream_id */ src_reg sid(this, glsl_type::uint_type); emit(MOV(dst_reg(sid), stream_id)); /* reg:shift_count = 2 * (vertex_count - 1) */ src_reg shift_count(this, glsl_type::uint_type); emit(SHL(dst_reg(shift_count), this->vertex_count, 1u)); /* Note: we're relying on the fact that the GEN SHL instruction only pays * attention to the lower 5 bits of its second source argument, so on this * architecture, stream_id << 2 * (vertex_count - 1) is equivalent to * stream_id << ((2 * (vertex_count - 1)) % 32). */ src_reg mask(this, glsl_type::uint_type); emit(SHL(dst_reg(mask), sid, shift_count)); emit(OR(dst_reg(this->control_data_bits), this->control_data_bits, mask)); }
void LPSynthesisFilter (word16_t *excitationVector, word16_t *LPCoefficients, word16_t *reconstructedSpeech) { int i; /* compute excitationVector[i] - Sum0-9(LPCoefficients[j]*reconstructedSpeech[i-j]) */ for (i=0; i<L_SUBFRAME; i++) { word32_t acc = SHL(excitationVector[i],12); /* acc get the first term of the sum, in Q12 (excitationVector is in Q0)*/ int j; for (j=0; j<NB_LSP_COEFF; j++) { acc = MSU16_16(acc, LPCoefficients[j], reconstructedSpeech[i-j-1]); } reconstructedSpeech[i] = (word16_t)SATURATE(PSHR(acc, 12), MAXINT16); /* shift right acc to get it back in Q0 and check overflow on 16 bits */ } return; }
/*** shlWrite - Write data to the shell's stdin ***/ int shlWrite(void* inf_v, char* buffer, int cnt, int offset, int flags, pObjTrxTree* oxt) { pShlData inf = SHL(inf_v); int i=-1; int waitret; int retval; if(SHELL_DEBUG & SHELL_DEBUG_IO) printf("%s -- %p, %p, %i, %i, %i, %p\n",__FUNCTION__,inf_v,buffer,cnt,offset,flags,oxt); /** launch the program if it's not running already **/ if(inf->shell_pid == -1) if(shl_internal_Launch(inf) < 0) return -1; /** seek is _not_ allowed (obviously) **/ if(flags & FD_U_SEEK && offset!=inf->curWrite) return -1; /** can't write to a dead child :) **/ if(!inf->shell_pid) return -1; while(i < 0) { i=fdWrite(inf->shell_fd,buffer,cnt,0,flags & ~FD_U_SEEK); if(i < 0) { /** user doesn;t want us to block **/ if(flags & FD_U_NOBLOCK) return -1; /** check and make sure the child process is still alive... **/ shl_internal_UpdateStatus(inf); if(inf->shell_pid>0) { /** child is alive, it just isn't ready for data yet -- wait a bit **/ thSleep(200); } else if(inf->shell_pid==0) { /** child died :( **/ return -1; } } } inf->curWrite+=i; return i; }
void gen8_vec4_generator::generate_gs_prepare_channel_masks(struct brw_reg dst) { /* We want to left shift just DWORD 4 (the x component belonging to the * second geometry shader invocation) by 4 bits. So generate the * instruction: * * shl(1) dst.4<1>UD dst.4<0,1,0>UD 4UD { align1 WE_all } */ dst = suboffset(vec1(dst), 4); default_state.access_mode = BRW_ALIGN_1; gen8_instruction *inst = SHL(dst, dst, brw_imm_ud(4)); gen8_set_mask_control(inst, BRW_MASK_DISABLE); default_state.access_mode = BRW_ALIGN_16; }
bool SamplerJitCache::Jit_TransformClutIndex(const SamplerID &id, int bitsPerIndex) { GEPaletteFormat fmt = (GEPaletteFormat)id.clutfmt; if (!id.hasClutShift && !id.hasClutMask && !id.hasClutOffset) { // This is simple - just mask if necessary. if (bitsPerIndex > 8) { AND(32, R(resultReg), Imm32(0x000000FF)); } return true; } MOV(PTRBITS, R(tempReg1), ImmPtr(&gstate.clutformat)); MOV(32, R(tempReg1), MatR(tempReg1)); // Shift = (clutformat >> 2) & 0x1F if (id.hasClutShift) { MOV(32, R(RCX), R(tempReg1)); SHR(32, R(RCX), Imm8(2)); AND(32, R(RCX), Imm8(0x1F)); SHR(32, R(resultReg), R(RCX)); } // Mask = (clutformat >> 8) & 0xFF if (id.hasClutMask) { MOV(32, R(tempReg2), R(tempReg1)); SHR(32, R(tempReg2), Imm8(8)); AND(32, R(resultReg), R(tempReg2)); } // We need to wrap any entries beyond the first 1024 bytes. u32 offsetMask = fmt == GE_CMODE_32BIT_ABGR8888 ? 0x00FF : 0x01FF; // We must mask to 0xFF before ORing 0x100 in 16 bit CMODEs. // But skip if we'll mask 0xFF after offset anyway. if (bitsPerIndex > 8 && (!id.hasClutOffset || offsetMask != 0x00FF)) { AND(32, R(resultReg), Imm32(0x000000FF)); } // Offset = (clutformat >> 12) & 0x01F0 if (id.hasClutOffset) { SHR(32, R(tempReg1), Imm8(16)); SHL(32, R(tempReg1), Imm8(4)); OR(32, R(resultReg), R(tempReg1)); AND(32, R(resultReg), Imm32(offsetMask)); } return true; }
void filter_mem2(const spx_sig_t *x, const spx_coef_t *num, const spx_coef_t *den, spx_sig_t *y, int N, int ord, spx_mem_t *mem) { int i,j; spx_sig_t xi,yi,nyi; for (i=0;i<N;i++) { int xh,xl,yh,yl; xi=SATURATE(x[i],805306368); yi = SATURATE(ADD32(xi, SHL(mem[0],2)),805306368); nyi = -yi; xh = xi>>15; xl=xi&0x00007fff; yh = yi>>15; yl=yi&0x00007fff; for (j=0;j<ord-1;j++) { mem[j] = MAC16_32_Q15(MAC16_32_Q15(mem[j+1], num[j+1],xi), den[j+1],nyi); } mem[ord-1] = SUB32(MULT16_32_Q15(num[ord],xi), MULT16_32_Q15(den[ord],yi)); y[i] = yi; } }
/*** shlSetAttrValue - sets the value of an attribute. 'val' must *** point to an appropriate data type. ***/ int shlSetAttrValue(void* inf_v, char* attrname, int datatype, pObjData val, pObjTrxTree oxt) { pShlData inf = SHL(inf_v); /** Choose the attr name **/ /** Changing name of node object? **/ if (!strcmp(attrname,"name")) { if (inf->Obj->Pathname->nElements == inf->Obj->SubPtr) { if (!strcmp(inf->Obj->Pathname->Pathbuf,".")) return -1; if (strlen(inf->Obj->Pathname->Pathbuf) - strlen(strrchr(inf->Obj->Pathname->Pathbuf,'/')) + strlen(val->String) + 1 > 255) { mssError(1,"SHL","SetAttr 'name': name too large for internal representation"); return -1; } strcpy(inf->Pathname, inf->Obj->Pathname->Pathbuf); strcpy(strrchr(inf->Pathname,'/')+1,val->String); if (rename(inf->Obj->Pathname->Pathbuf, inf->Pathname) < 0) { mssError(1,"SHL","SetAttr 'name': could not rename structure file node object"); return -1; } strcpy(inf->Obj->Pathname->Pathbuf, inf->Pathname); } /** Set dirty flag **/ inf->Node->Status = SN_NS_DIRTY; return 0; } /** Try to set a command parameter (env variable) **/ if (shl_internal_SetParam(inf, attrname, datatype, val) < 0) return -1; return -1; }
spx_word16_t compute_rms(const spx_sig_t *x, int len) { int i; spx_word32_t sum=0; spx_sig_t max_val=1; int sig_shift; for (i=0;i<len;i++) { spx_sig_t tmp = x[i]; if (tmp<0) tmp = -tmp; if (tmp > max_val) max_val = tmp; } sig_shift=0; while (max_val>16383) { sig_shift++; max_val >>= 1; } for (i=0;i<len;i+=4) { spx_word32_t sum2=0; spx_word16_t tmp; tmp = SHR(x[i],sig_shift); sum2 += MULT16_16(tmp,tmp); tmp = SHR(x[i+1],sig_shift); sum2 += MULT16_16(tmp,tmp); tmp = SHR(x[i+2],sig_shift); sum2 += MULT16_16(tmp,tmp); tmp = SHR(x[i+3],sig_shift); sum2 += MULT16_16(tmp,tmp); sum += SHR(sum2,6); } return SHR(SHL((spx_word32_t)spx_sqrt(1+DIV32(sum,len)),(sig_shift+3)),SIG_SHIFT); }
INT16 q_gain_pitch ( /* Return index of quantization */ INT16 *gain /* (i) : Pitch gain to quantize */ ) { INT16 i, index, gain_q14, err, err_min; VPP_EFR_PROFILE_FUNCTION_ENTER(q_gain_pitch); //gain_q14 = shl (*gain, 2); gain_q14 = SHL(*gain, 2); //err_min = abs_s (sub (gain_q14, qua_gain_pitch[0])); err_min = ABS_S(SUB (gain_q14, qua_gain_pitch[0])); index = 0; for (i = 1; i < NB_QUA_PITCH; i++) { //err = abs_s (sub (gain_q14, qua_gain_pitch[i])); err = ABS_S(SUB (gain_q14, qua_gain_pitch[i])); //if (sub (err, err_min) < 0) if (SUB (err, err_min) < 0) { err_min = err; index = i; } } //*gain = shr (qua_gain_pitch[index], 2); *gain = SHR_D(qua_gain_pitch[index], 2); VPP_EFR_PROFILE_FUNCTION_EXIT(q_gain_pitch); return index; }
void vec4_vs_visitor::emit_prolog() { dst_reg sign_recovery_shift; dst_reg normalize_factor; dst_reg es3_normalize_factor; for (int i = 0; i < VERT_ATTRIB_MAX; i++) { if (vs_prog_data->inputs_read & BITFIELD64_BIT(i)) { uint8_t wa_flags = key->gl_attrib_wa_flags[i]; dst_reg reg(ATTR, i); dst_reg reg_d = reg; reg_d.type = BRW_REGISTER_TYPE_D; dst_reg reg_ud = reg; reg_ud.type = BRW_REGISTER_TYPE_UD; /* Do GL_FIXED rescaling for GLES2.0. Our GL_FIXED attributes * come in as floating point conversions of the integer values. */ if (wa_flags & BRW_ATTRIB_WA_COMPONENT_MASK) { dst_reg dst = reg; dst.type = brw_type_for_base_type(glsl_type::vec4_type); dst.writemask = (1 << (wa_flags & BRW_ATTRIB_WA_COMPONENT_MASK)) - 1; emit(MUL(dst, src_reg(dst), src_reg(1.0f / 65536.0f))); } /* Do sign recovery for 2101010 formats if required. */ if (wa_flags & BRW_ATTRIB_WA_SIGN) { if (sign_recovery_shift.file == BAD_FILE) { /* shift constant: <22,22,22,30> */ sign_recovery_shift = dst_reg(this, glsl_type::uvec4_type); emit(MOV(writemask(sign_recovery_shift, WRITEMASK_XYZ), src_reg(22u))); emit(MOV(writemask(sign_recovery_shift, WRITEMASK_W), src_reg(30u))); } emit(SHL(reg_ud, src_reg(reg_ud), src_reg(sign_recovery_shift))); emit(ASR(reg_d, src_reg(reg_d), src_reg(sign_recovery_shift))); } /* Apply BGRA swizzle if required. */ if (wa_flags & BRW_ATTRIB_WA_BGRA) { src_reg temp = src_reg(reg); temp.swizzle = BRW_SWIZZLE4(2,1,0,3); emit(MOV(reg, temp)); } if (wa_flags & BRW_ATTRIB_WA_NORMALIZE) { /* ES 3.0 has different rules for converting signed normalized * fixed-point numbers than desktop GL. */ if ((wa_flags & BRW_ATTRIB_WA_SIGN) && !use_legacy_snorm_formula) { /* According to equation 2.2 of the ES 3.0 specification, * signed normalization conversion is done by: * * f = c / (2^(b-1)-1) */ if (es3_normalize_factor.file == BAD_FILE) { /* mul constant: 1 / (2^(b-1) - 1) */ es3_normalize_factor = dst_reg(this, glsl_type::vec4_type); emit(MOV(writemask(es3_normalize_factor, WRITEMASK_XYZ), src_reg(1.0f / ((1<<9) - 1)))); emit(MOV(writemask(es3_normalize_factor, WRITEMASK_W), src_reg(1.0f / ((1<<1) - 1)))); } dst_reg dst = reg; dst.type = brw_type_for_base_type(glsl_type::vec4_type); emit(MOV(dst, src_reg(reg_d))); emit(MUL(dst, src_reg(dst), src_reg(es3_normalize_factor))); emit_minmax(BRW_CONDITIONAL_GE, dst, src_reg(dst), src_reg(-1.0f)); } else { /* The following equations are from the OpenGL 3.2 specification: * * 2.1 unsigned normalization * f = c/(2^n-1) * * 2.2 signed normalization * f = (2c+1)/(2^n-1) * * Both of these share a common divisor, which is represented by * "normalize_factor" in the code below. */ if (normalize_factor.file == BAD_FILE) { /* 1 / (2^b - 1) for b=<10,10,10,2> */ normalize_factor = dst_reg(this, glsl_type::vec4_type); emit(MOV(writemask(normalize_factor, WRITEMASK_XYZ), src_reg(1.0f / ((1<<10) - 1)))); emit(MOV(writemask(normalize_factor, WRITEMASK_W), src_reg(1.0f / ((1<<2) - 1)))); } dst_reg dst = reg; dst.type = brw_type_for_base_type(glsl_type::vec4_type); emit(MOV(dst, src_reg((wa_flags & BRW_ATTRIB_WA_SIGN) ? reg_d : reg_ud))); /* For signed normalization, we want the numerator to be 2c+1. */ if (wa_flags & BRW_ATTRIB_WA_SIGN) { emit(MUL(dst, src_reg(dst), src_reg(2.0f))); emit(ADD(dst, src_reg(dst), src_reg(1.0f))); } emit(MUL(dst, src_reg(dst), src_reg(normalize_factor))); } } if (wa_flags & BRW_ATTRIB_WA_SCALE) { dst_reg dst = reg; dst.type = brw_type_for_base_type(glsl_type::vec4_type); emit(MOV(dst, src_reg((wa_flags & BRW_ATTRIB_WA_SIGN) ? reg_d : reg_ud))); } } } }
void split_cb_search_shape_sign( spx_sig_t target[], /* target vector */ spx_coef_t ak[], /* LPCs for this subframe */ spx_coef_t awk1[], /* Weighted LPCs for this subframe */ spx_coef_t awk2[], /* Weighted LPCs for this subframe */ const void *par, /* Codebook/search parameters*/ int p, /* number of LPC coeffs */ int nsf, /* number of samples in subframe */ spx_sig_t *exc, spx_sig_t *r, SpeexBits *bits, char *stack, int complexity ) { int i,j,k,m,n,q; spx_word16_t *resp; #ifdef _USE_SSE __m128 *resp2; __m128 *E; #else spx_word16_t *resp2; spx_word32_t *E; #endif spx_word16_t *t; spx_sig_t *e, *r2; spx_word16_t *tmp; spx_word32_t *ndist, *odist; int *itmp; spx_word16_t **ot, **nt; int **nind, **oind; int *ind; const signed char *shape_cb; int shape_cb_size, subvect_size, nb_subvect; split_cb_params *params; int N=2; int *best_index; spx_word32_t *best_dist; int have_sign; N=complexity; if (N>10) N=10; ot=PUSH(stack, N, spx_word16_t*); nt=PUSH(stack, N, spx_word16_t*); oind=PUSH(stack, N, int*); nind=PUSH(stack, N, int*); params = (split_cb_params *) par; subvect_size = params->subvect_size; nb_subvect = params->nb_subvect; shape_cb_size = 1<<params->shape_bits; shape_cb = params->shape_cb; have_sign = params->have_sign; resp = PUSH(stack, shape_cb_size*subvect_size, spx_word16_t); #ifdef _USE_SSE resp2 = PUSH(stack, (shape_cb_size*subvect_size)>>2, __m128); E = PUSH(stack, shape_cb_size>>2, __m128); #else resp2 = resp; E = PUSH(stack, shape_cb_size, spx_word32_t); #endif t = PUSH(stack, nsf, spx_word16_t); e = PUSH(stack, nsf, spx_sig_t); r2 = PUSH(stack, nsf, spx_sig_t); ind = PUSH(stack, nb_subvect, int); tmp = PUSH(stack, 2*N*nsf, spx_word16_t); for (i=0;i<N;i++) { ot[i]=tmp; tmp += nsf; nt[i]=tmp; tmp += nsf; } best_index = PUSH(stack, N, int); best_dist = PUSH(stack, N, spx_word32_t); ndist = PUSH(stack, N, spx_word32_t); odist = PUSH(stack, N, spx_word32_t); itmp = PUSH(stack, 2*N*nb_subvect, int); for (i=0;i<N;i++) { nind[i]=itmp; itmp+=nb_subvect; oind[i]=itmp; itmp+=nb_subvect; for (j=0;j<nb_subvect;j++) nind[i][j]=oind[i][j]=-1; } /* FIXME: make that adaptive? */ for (i=0;i<nsf;i++) t[i]=SHR(target[i],6); for (j=0;j<N;j++) for (i=0;i<nsf;i++) ot[j][i]=t[i]; /*for (i=0;i<nsf;i++) printf ("%d\n", (int)t[i]);*/ /* Pre-compute codewords response and energy */ compute_weighted_codebook(shape_cb, r, resp, resp2, E, shape_cb_size, subvect_size, stack); for (j=0;j<N;j++) odist[j]=0; /*For all subvectors*/ for (i=0;i<nb_subvect;i++) { /*"erase" nbest list*/ for (j=0;j<N;j++) ndist[j]=-2; /*For all n-bests of previous subvector*/ for (j=0;j<N;j++) { spx_word16_t *x=ot[j]+subvect_size*i; /*Find new n-best based on previous n-best j*/ if (have_sign) vq_nbest_sign(x, resp2, subvect_size, shape_cb_size, E, N, best_index, best_dist, stack); else vq_nbest(x, resp2, subvect_size, shape_cb_size, E, N, best_index, best_dist, stack); /*For all new n-bests*/ for (k=0;k<N;k++) { spx_word16_t *ct; spx_word32_t err=0; ct = ot[j]; /*update target*/ /*previous target*/ for (m=i*subvect_size;m<(i+1)*subvect_size;m++) t[m]=ct[m]; /* New code: update only enough of the target to calculate error*/ { int rind; spx_word16_t *res; spx_word16_t sign=1; rind = best_index[k]; if (rind>=shape_cb_size) { sign=-1; rind-=shape_cb_size; } res = resp+rind*subvect_size; if (sign>0) for (m=0;m<subvect_size;m++) t[subvect_size*i+m] -= res[m]; else for (m=0;m<subvect_size;m++) t[subvect_size*i+m] += res[m]; } /*compute error (distance)*/ err=odist[j]; for (m=i*subvect_size;m<(i+1)*subvect_size;m++) err += t[m]*t[m]; /*update n-best list*/ if (err<ndist[N-1] || ndist[N-1]<-1) { /*previous target (we don't care what happened before*/ for (m=(i+1)*subvect_size;m<nsf;m++) t[m]=ct[m]; /* New code: update the rest of the target only if it's worth it */ for (m=0;m<subvect_size;m++) { spx_word16_t g; int rind; spx_word16_t sign=1; rind = best_index[k]; if (rind>=shape_cb_size) { sign=-1; rind-=shape_cb_size; } q=subvect_size-m; #ifdef FIXED_POINT g=sign*shape_cb[rind*subvect_size+m]; for (n=subvect_size*(i+1);n<nsf;n++,q++) t[n] = SUB32(t[n],MULT16_16_Q11(g,r[q])); #else g=sign*0.03125*shape_cb[rind*subvect_size+m]; for (n=subvect_size*(i+1);n<nsf;n++,q++) t[n] = SUB32(t[n],g*r[q]); #endif } for (m=0;m<N;m++) { if (err < ndist[m] || ndist[m]<-1) { for (n=N-1;n>m;n--) { for (q=(i+1)*subvect_size;q<nsf;q++) nt[n][q]=nt[n-1][q]; for (q=0;q<nb_subvect;q++) nind[n][q]=nind[n-1][q]; ndist[n]=ndist[n-1]; } for (q=(i+1)*subvect_size;q<nsf;q++) nt[m][q]=t[q]; for (q=0;q<nb_subvect;q++) nind[m][q]=oind[j][q]; nind[m][i]=best_index[k]; ndist[m]=err; break; } } } } if (i==0) break; } /*update old-new data*/ /* just swap pointers instead of a long copy */ { spx_word16_t **tmp2; tmp2=ot; ot=nt; nt=tmp2; } for (j=0;j<N;j++) for (m=0;m<nb_subvect;m++) oind[j][m]=nind[j][m]; for (j=0;j<N;j++) odist[j]=ndist[j]; } /*save indices*/ for (i=0;i<nb_subvect;i++) { ind[i]=nind[0][i]; speex_bits_pack(bits,ind[i],params->shape_bits+have_sign); } /* Put everything back together */ for (i=0;i<nb_subvect;i++) { int rind; spx_word16_t sign=1; rind = ind[i]; if (rind>=shape_cb_size) { sign=-1; rind-=shape_cb_size; } #ifdef FIXED_POINT if (sign==1) { for (j=0;j<subvect_size;j++) e[subvect_size*i+j]=SHL((spx_word32_t)shape_cb[rind*subvect_size+j],SIG_SHIFT-5); } else { for (j=0;j<subvect_size;j++) e[subvect_size*i+j]=-SHL((spx_word32_t)shape_cb[rind*subvect_size+j],SIG_SHIFT-5); } #else for (j=0;j<subvect_size;j++) e[subvect_size*i+j]=sign*0.03125*shape_cb[rind*subvect_size+j]; #endif } /* Update excitation */ for (j=0;j<nsf;j++) exc[j]+=e[j]; /* Update target */ syn_percep_zero(e, ak, awk1, awk2, r2, nsf,p, stack); for (j=0;j<nsf;j++) target[j]-=r2[j]; }
static word_type ROTL(word_type x, unsigned n) { return SHL(x, n) | SHR(x, word_bits-n); }
void Az_lsp ( INT16 a[], /* (i) : predictor coefficients */ INT16 lsp[], /* (o) : line spectral pairs */ INT16 old_lsp[] /* (i) : old lsp[] (in case not found 10 roots) */ ) { INT16 i, j, nf, ip; INT16 xlow, ylow, xhigh, yhigh, xmid, ymid, xint; INT16 x, y, sign, exp; INT16 *coef; INT16 f1[M / 2 + 1], f2[M / 2 + 1]; INT32 t0=0; VPP_EFR_PROFILE_FUNCTION_ENTER(Az_lsp); /*-------------------------------------------------------------* * find the sum and diff. pol. F1(z) and F2(z) * * F1(z) <--- F1(z)/(1+z**-1) & F2(z) <--- F2(z)/(1-z**-1) * * * * f1[0] = 1.0; * * f2[0] = 1.0; * * * * for (i = 0; i< NC; i++) * * { * * f1[i+1] = a[i+1] + a[M-i] - f1[i] ; * * f2[i+1] = a[i+1] - a[M-i] + f2[i] ; * * } * *-------------------------------------------------------------*/ f1[0] = 1024; /* f1[0] = 1.0 */ f2[0] = 1024; /* f2[0] = 1.0 */ for (i = 0; i < NC; i++) { //VPP_MLX16 (t0_hi,t0_lo,a[i + 1], 8192); //VPP_MLA16 ( t0_hi, t0_lo, a[M - i], 8192); //t0 = VPP_SCALE64_TO_16( t0_hi, t0_lo); //x = EXTRACT_H(t0); t0 = (INT32) a[i + 1] + (INT32)a[M - i]; x = (INT16)(L_SHR_D(t0,2)); /* f1[i+1] = a[i+1] + a[M-i] - f1[i] */ f1[i + 1] = SUB (x, f1[i]); //VPP_MLX16(t0_hi, t0_lo, a[i + 1], 8192); //VPP_MLA16(t0_hi, t0_lo, a[M - i], -8192); //x = EXTRACT_H(VPP_SCALE64_TO_16(t0_hi, t0_lo)); t0 = (INT32) a[i + 1] - (INT32)a[M - i]; x = (INT16)(L_SHR_D(t0,2)); //f2[i + 1] = add (x, f2[i]); f2[i + 1] = ADD(x, f2[i]); } /*-------------------------------------------------------------* * find the LSPs using the Chebychev pol. evaluation * *-------------------------------------------------------------*/ nf = 0; /* number of found frequencies */ ip = 0; /* indicator for f1 or f2 */ coef = f1; xlow = grid[0]; ylow = Chebps (xlow, coef, NC); j = 0; /* while ( (nf < M) && (j < grid_points) ) */ //while ((sub (nf, M) < 0) && (sub (j, grid_points) < 0)) while ((SUB (nf, M) < 0) && (SUB (j, grid_points) < 0)) { j++; xhigh = xlow; yhigh = ylow; xlow = grid[j]; ylow = Chebps (xlow, coef, NC); //if (L_mult (ylow, yhigh) <= (INT32) 0L) if (L_MULT(ylow, yhigh) <= (INT32) 0L) { /* divide 4 times the interval */ for (i = 0; i < 4; i++) { /* xmid = (xlow + xhigh)/2 */ // xmid = add (shr (xlow, 1), shr (xhigh, 1)); xmid = ADD ((SHR_D(xlow, 1)),(SHR_D(xhigh, 1))); ymid = Chebps (xmid, coef, NC); //if (L_mult (ylow, ymid) <= (INT32) 0L) if (L_MULT(ylow, ymid) <= (INT32) 0L) { yhigh = ymid; xhigh = xmid; } else { ylow = ymid; xlow = xmid; } } /*-------------------------------------------------------------* * Linear interpolation * * xint = xlow - ylow*(xhigh-xlow)/(yhigh-ylow); * *-------------------------------------------------------------*/ //x = sub (xhigh, xlow); x = SUB (xhigh, xlow); //y = sub (yhigh, ylow); y = SUB (yhigh, ylow); if (y == 0) { xint = xlow; } else { sign = y; //y = abs_s (y); y = ABS_S(y); exp = norm_s (y); //y = shl (y, exp); y = SHL(y, exp); y = div_s ((INT16) 16383, y); //t0 = L_mult (x, y); t0 = L_MULT(x, y); //t0 = L_shr (t0, sub (20, exp)); t0 = L_SHR_V(t0, SUB (20, exp)); //y = extract_l (t0); /* y= (xhigh-xlow)/(yhigh-ylow) */ y = EXTRACT_L(t0); if (sign < 0) { //y = negate (y); y = NEGATE(y); } //t0 = L_mult (ylow, y); t0 = L_MULT(ylow, y); //t0 = L_shr (t0, 11); t0 = L_SHR_D(t0, 11); //xint = sub (xlow, extract_l (t0)); /* xint = xlow - ylow*y */ xint = SUB (xlow, EXTRACT_L(t0)); } lsp[nf] = xint; xlow = xint; nf++; if (ip == 0) { ip = 1; coef = f2; } else { ip = 0; coef = f1; } ylow = Chebps (xlow, coef, NC); } } /* Check if M roots found */ //if (sub (nf, M) < 0) if (SUB (nf, M) < 0) { for (i = 0; i < M; i++) { lsp[i] = old_lsp[i]; } } VPP_EFR_PROFILE_FUNCTION_EXIT(Az_lsp); return; }
int crypto_aead_decrypt( unsigned char *m,unsigned long long *mlen, // message unsigned char *nsec, // not relavent to CLOC or SLIC const unsigned char *c,unsigned long long clen, // ciphertext const unsigned char *ad,unsigned long long adlen, // associated data const unsigned char *npub, // nonce const unsigned char *k // the master key ) { block estate, tstate, tmp; // encryption state, tag state, and temporary state estate = SETZERO(); unsigned char ltag[16]; // local copy of temporary tag value unsigned long long i, lastblocklen,j; /* set ciphertext length */ *mlen = clen - CRYPTO_ABYTES; /* generate round keys from master key */ AES128_KeyExpansion(k); /* process the first (partial) block of ad */ load_partial_block(&estate, ad, (adlen>STATE_LEN)?STATE_LEN:adlen, ONE_ZERO_PADDING); fix0(estate); AES128_encrypt(estate, estate); if((ad[0] & 0x80) || (adlen == 0)){ // appy h h(estate); } else{ // do nothing } if(adlen > STATE_LEN){ // ad is of moer than one block i = STATE_LEN; /* process the middle ad blocks, excluding the first and last (partial) block */ while((i+STATE_LEN) < adlen) { tmp = LOAD(ad+i); estate = XOR(estate, tmp); AES128_encrypt(estate, estate); i += STATE_LEN; } /* process the last (partial) ad block */ load_partial_block(&tmp, ad+i, adlen - i, ONE_ZERO_PADDING); estate = XOR(estate, tmp); AES128_encrypt(estate, estate); } /* process the nonce */ load_partial_block(&tmp, npub, CRYPTO_NPUBBYTES, PARAM_OZP); estate = XOR(estate, tmp); if((adlen % STATE_LEN) || (adlen == 0)){ /* apply f2 */ f2(estate); } else{ /* apply f1 */ f1(estate); } /* process ciphertext */ tstate = estate; AES128_encrypt(estate, estate); if(*mlen){ /* apply g2 to tag state */ g2(tstate); } else{ /* apply g1 to tag state */ g1(tstate); } AES128_encrypt(tstate, tstate); i = 0; /* process all the message except for the last message/ciphertext block */ while((i + STATE_LEN) < (*mlen)){ tmp = LOAD(c+i); estate = XOR(estate, tmp); STORE(m+i, estate); tstate = XOR(tmp, tstate); AES128_encrypt(tstate, tstate); fix1(tmp); print_state("after applying fix1\n", estate); AES128_encrypt(tmp, estate); i += STATE_LEN; } /* process the last block of the message/ciphetext */ lastblocklen = (*mlen) - i; if(lastblocklen > 0){ load_partial_block(&tmp, c+i, lastblocklen, ZERO_APPEND); estate = XOR(estate, tmp); print_state("after xoring last partial message block\n", estate); store_partial_block(m+i, estate, lastblocklen); unsigned char shift_bytes = (STATE_LEN - (unsigned char)lastblocklen); tmp = AND(SHR(_mm_set1_epi8(0xff), shift_bytes), tmp); tstate = XOR(tstate, tmp); /* add the one zero padding */ tstate = XOR(tstate, SHL(_mm_set_epi8(0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0x80), lastblocklen)); if((*mlen) % STATE_LEN){ /* apply f2 */ f2(tstate); } else{ /* apply f1 */ f1(tstate); } AES128_encrypt(tstate, tstate); } /* compare tag and output message */ STORE(ltag, tstate); for(j = 0; j < CRYPTO_ABYTES; j++){ if(ltag[j] != c[clen - CRYPTO_ABYTES + j]) return RETURN_TAG_NO_MATCH; } return RETURN_SUCCESS; }
void gainQuantization(bcg729EncoderChannelContextStruct *encoderChannelContext, word16_t targetSignal[], word16_t filteredAdaptativeCodebookVector[], word16_t convolvedFixedCodebookVector[], word16_t fixedCodebookVector[], word64_t xy64, word64_t yy64, word16_t *quantizedAdaptativeCodebookGain, word16_t *quantizedFixedCodebookGain, uint16_t *gainCodebookStage1, uint16_t *gainCodebookStage2) { int i,j; word64_t xz64=0, yz64=0, zz64=0; word32_t xy; word32_t yy; word32_t xz; word32_t yz; word32_t zz; uint16_t minNormalization = 31; uint16_t currentNormalization; word32_t bestAdaptativeCodebookGain, bestFixedCodebookGain; word64_t denominator; word16_t predictedFixedCodebookGain; uint16_t indexBaseGa=0; uint16_t indexBaseGb=0; uint16_t indexGa=0, indexGb=0; word64_t distanceMin = MAXINT64; /*** compute spec 3.9 eq63 terms first on 64 bits and then scale them if needed to fit on 32 ***/ /* Xy64 and Yy64 already computed during adaptativeCodebookGain computation */ for (i=0; i<L_SUBFRAME; i++) { xz64 = MAC64(xz64, targetSignal[i], convolvedFixedCodebookVector[i]); /* in Q12 */ yz64 = MAC64(yz64, filteredAdaptativeCodebookVector[i], convolvedFixedCodebookVector[i]); /* in Q12 */ zz64 = MAC64(zz64, convolvedFixedCodebookVector[i], convolvedFixedCodebookVector[i]); /* in Q24 */ } /* now scale this terms to have them fit on 32 bits - terms Xy, Xz and Yz shall fit on 31 bits because used in eq63 with a factor 2 */ xy = SHR64(((xy64<0)?-xy64:xy64),30); yy = SHR64(yy64,31); xz = SHR64(((xz64<0)?-xz64:xz64),30); yz = SHR64(((yz64<0)?-yz64:yz64),30); zz = SHR64(zz64,31); currentNormalization = countLeadingZeros(xy); if (currentNormalization<minNormalization) { minNormalization = currentNormalization; } currentNormalization = countLeadingZeros(xz); if (currentNormalization<minNormalization) { minNormalization = currentNormalization; } currentNormalization = countLeadingZeros(yz); if (currentNormalization<minNormalization) { minNormalization = currentNormalization; } currentNormalization = countLeadingZeros(yy); if (currentNormalization<minNormalization) { minNormalization = currentNormalization; } currentNormalization = countLeadingZeros(zz); if (currentNormalization<minNormalization) { minNormalization = currentNormalization; } if (minNormalization<31) { /* we shall normalise, values are over 32 bits */ minNormalization = 31 - minNormalization; xy = (word32_t)SHR64(xy64, minNormalization); yy = (word32_t)SHR64(yy64, minNormalization); xz = (word32_t)SHR64(xz64, minNormalization); yz = (word32_t)SHR64(yz64, minNormalization); zz = (word32_t)SHR64(zz64, minNormalization); } else { /* no need to normalise, values already fit on 32 bits, just cast them */ xy = (word32_t)xy64; /* in Q0 */ yy = (word32_t)yy64; /* in Q0 */ xz = (word32_t)xz64; /* in Q12 */ yz = (word32_t)yz64; /* in Q12 */ zz = (word32_t)zz64; /* in Q24 */ } /*** compute the best gains minimizinq eq63 ***/ /* Note this bestgain computation is not at all described in the spec, got it from ITU code */ /* bestAdaptativeCodebookGain = (zz.xy - xz.yz) / (yy*zz) - yz^2) */ /* bestfixedCodebookGain = (yy*xz - xy*yz) / (yy*zz) - yz^2) */ /* best gain are computed in Q9 and Q2 and fits on 16 bits */ denominator = MAC64(MULT32_32(yy, zz), -yz, yz); /* (yy*zz) - yz^2) in Q24 (always >= 0)*/ /* avoid division by zero */ if (denominator==0) { /* consider it to be one */ bestAdaptativeCodebookGain = (word32_t)(SHR64(MAC64(MULT32_32(zz, xy), -xz, yz), 15)); /* MAC in Q24 -> Q9 */ bestFixedCodebookGain = (word32_t)(SHR64(MAC64(MULT32_32(yy, xz), -xy, yz), 10)); /* MAC in Q12 -> Q2 */ } else { /* bestAdaptativeCodebookGain in Q9 */ uint16_t numeratorNorm; word64_t numerator = MAC64(MULT32_32(zz, xy), -xz, yz); /* in Q24 */ /* check if we can shift it by 9 without overflow as the bestAdaptativeCodebookGain in computed in Q9 */ word32_t numeratorH = (word32_t)(SHR64(numerator,32)); numeratorH = (numeratorH>0)?numeratorH:-numeratorH; numeratorNorm = countLeadingZeros(numeratorH); if (numeratorNorm >= 9) { bestAdaptativeCodebookGain = (word32_t)(DIV64(SHL64(numerator,9), denominator)); /* bestAdaptativeCodebookGain in Q9 */ } else { word64_t shiftedDenominator = SHR64(denominator, 9-numeratorNorm); if (shiftedDenominator>0) { /* can't shift left by 9 the numerator, can we shift right by 9-numeratorNorm the denominator without hiting 0 */ bestAdaptativeCodebookGain = (word32_t)(DIV64(SHL64(numerator, numeratorNorm),shiftedDenominator)); /* bestAdaptativeCodebookGain in Q9 */ } else { bestAdaptativeCodebookGain = SHL((word32_t)(DIV64(SHL64(numerator, numeratorNorm), denominator)), 9-numeratorNorm); /* shift left the division result to reach Q9 */ } } numerator = MAC64(MULT32_32(yy, xz), -xy, yz); /* in Q12 */ /* check if we can shift it by 14(it's in Q12 and denominator in Q24) without overflow as the bestFixedCodebookGain in computed in Q2 */ numeratorH = (word32_t)(SHR64(numerator,32)); numeratorH = (numeratorH>0)?numeratorH:-numeratorH; numeratorNorm = countLeadingZeros(numeratorH); if (numeratorNorm >= 14) { bestFixedCodebookGain = (word32_t)(DIV64(SHL64(numerator,14), denominator)); } else { word64_t shiftedDenominator = SHR64(denominator, 14-numeratorNorm); /* bestFixedCodebookGain in Q14 */ if (shiftedDenominator>0) { /* can't shift left by 9 the numerator, can we shift right by 9-numeratorNorm the denominator without hiting 0 */ bestFixedCodebookGain = (word32_t)(DIV64(SHL64(numerator, numeratorNorm),shiftedDenominator)); /* bestFixedCodebookGain in Q14 */ } else { bestFixedCodebookGain = SHL((word32_t)(DIV64(SHL64(numerator, numeratorNorm), denominator)), 14-numeratorNorm); /* shift left the division result to reach Q14 */ } } } /*** Compute the predicted gain as in spec 3.9.1 eq71 in Q6 ***/ predictedFixedCodebookGain = (word16_t)(SHR32(MACodeGainPrediction(encoderChannelContext->previousGainPredictionError, fixedCodebookVector), 12)); /* in Q16 -> Q4 range [3,1830] */ /*** preselection spec 3.9.2 ***/ /* Note: spec just says to select the best 50% of each vector, ITU code go through magical constant computation to select the begining of a continuous range */ /* much more simple here : vector are ordened in growing order so just select 2 (4 for Gb) indexes before the first value to be superior to the best gain previously computed */ while (indexBaseGa<6 && bestFixedCodebookGain>(MULT16_16_Q14(GACodebook[indexBaseGa][1],predictedFixedCodebookGain))) { /* bestFixedCodebookGain> in Q2, GACodebook in Q12 *predictedFixedCodebookGain in Q4 -> Q16-14 */ indexBaseGa++; } if (indexBaseGa>0) indexBaseGa--; if (indexBaseGa>0) indexBaseGa--; while (indexBaseGb<12 && bestAdaptativeCodebookGain>(SHR(GBCodebook[indexBaseGb][0],5))) { indexBaseGb++; } if (indexBaseGb>0) indexBaseGb--; if (indexBaseGb>0) indexBaseGb--; if (indexBaseGb>0) indexBaseGb--; if (indexBaseGb>0) indexBaseGb--; /*** test all possibilities of Ga and Gb indexes and select the best one ***/ xy = -SHL(xy,1); /* xy term is always used with a -2 factor */ xz = -SHL(xz,1); /* xz term is always used with a -2 factor */ yz = SHL(yz,1); /* yz term is always used with a 2 factor */ for (i=0; i<4; i++) { for (j=0; j<8; j++) { /* compute gamma->gc and gp */ word16_t gp = ADD16(GACodebook[i+indexBaseGa][0], GBCodebook[j+indexBaseGb][0]); /* result in Q14 */ word16_t gamma = ADD16(GACodebook[i+indexBaseGa][1], GBCodebook[j+indexBaseGb][1]); /* result in Q3.12 (range [0.185, 5.05])*/ word32_t gc = MULT16_16_Q14(gamma, predictedFixedCodebookGain); /* gamma in Q12, predictedFixedCodebookGain in Q4 -> Q16 -14 -> Q2 */ /* compute E as in eq63 (first term excluded) */ word64_t acc = MULT32_32(MULT16_16(gp, gp), yy); /* acc = gp^2*yy gp in Q14, yy in Q0 -> acc in Q28 */ acc = MAC64(acc, MULT16_16(gc, gc), zz); /* gc in Q2, zz in Q24 -> acc in Q28, note gc is on 32 bits but in a range making gc^2 fitting on 32 bits */ acc = MAC64(acc, SHL32((word32_t)gp, 14), xy); /* gp in Q14 shifted to Q28, xy in Q0 -> acc in Q28 */ acc = MAC64(acc, SHL32(gc, 14), xz); /* gc in Q2 shifted to Q16, xz in Q12 -> acc in Q28 */ acc = MAC64(acc, MULT16_16(gp,gc), yz); /* gp in Q14, gc in Q2 yz in Q12 -> acc in Q28 */ if (acc<distanceMin) { distanceMin = acc; indexGa = i+indexBaseGa; indexGb = j+indexBaseGb; *quantizedAdaptativeCodebookGain = gp; *quantizedFixedCodebookGain = (word16_t)SHR(gc, 1); } } } /* update the previous gain prediction error */ computeGainPredictionError(ADD16(GACodebook[indexGa][1], GBCodebook[indexGb][1]), encoderChannelContext->previousGainPredictionError); /* mapping of indexes */ *gainCodebookStage1 = indexMappingGA[indexGa]; *gainCodebookStage2 = indexMappingGB[indexGb]; return; }