예제 #1
1
bool SamplerJitCache::Jit_Decode5551() {
	MOV(32, R(tempReg2), R(resultReg));
	MOV(32, R(tempReg1), R(resultReg));
	AND(32, R(tempReg2), Imm32(0x0000001F));
	AND(32, R(tempReg1), Imm32(0x000003E0));
	SHL(32, R(tempReg1), Imm8(3));
	OR(32, R(tempReg2), R(tempReg1));

	MOV(32, R(tempReg1), R(resultReg));
	AND(32, R(tempReg1), Imm32(0x00007C00));
	SHL(32, R(tempReg1), Imm8(6));
	OR(32, R(tempReg2), R(tempReg1));

	// Expand 5 -> 8.  After this is just A.
	MOV(32, R(tempReg1), R(tempReg2));
	SHL(32, R(tempReg2), Imm8(3));
	SHR(32, R(tempReg1), Imm8(2));
	// Chop off the bits that were shifted out.
	AND(32, R(tempReg1), Imm32(0x00070707));
	OR(32, R(tempReg2), R(tempReg1));

	// For A, we shift it to a single bit, and then subtract and XOR.
	// That's probably the simplest way to expand it...
	SHR(32, R(resultReg), Imm8(15));
	// If it was 0, it's now -1, otherwise it's 0.  Easy.
	SUB(32, R(resultReg), Imm8(1));
	XOR(32, R(resultReg), Imm32(0xFF000000));
	AND(32, R(resultReg), Imm32(0xFF000000));
	OR(32, R(resultReg), R(tempReg2));

	return true;
}
예제 #2
1
bool SamplerJitCache::Jit_Decode5650() {
	MOV(32, R(tempReg2), R(resultReg));
	AND(32, R(tempReg2), Imm32(0x0000001F));

	// B (we do R and B at the same time, they're both 5.)
	MOV(32, R(tempReg1), R(resultReg));
	AND(32, R(tempReg1), Imm32(0x0000F800));
	SHL(32, R(tempReg1), Imm8(5));
	OR(32, R(tempReg2), R(tempReg1));

	// Expand 5 -> 8.  At this point we have 00BB00RR.
	MOV(32, R(tempReg1), R(tempReg2));
	SHL(32, R(tempReg2), Imm8(3));
	SHR(32, R(tempReg1), Imm8(2));
	OR(32, R(tempReg2), R(tempReg1));
	AND(32, R(tempReg2), Imm32(0x00FF00FF));

	// Now's as good a time to put in A as any.
	OR(32, R(tempReg2), Imm32(0xFF000000));

	// Last, we need to align, extract, and expand G.
	// 3 to align to G, and then 2 to expand to 8.
	SHL(32, R(resultReg), Imm8(3 + 2));
	AND(32, R(resultReg), Imm32(0x0000FC00));
	MOV(32, R(tempReg1), R(resultReg));
	// 2 to account for resultReg being preshifted, 4 for expansion.
	SHR(32, R(tempReg1), Imm8(2 + 4));
	OR(32, R(resultReg), R(tempReg1));
	AND(32, R(resultReg), Imm32(0x0000FF00));
	OR(32, R(resultReg), R(tempReg2));;

	return true;
}
예제 #3
1
파일: frontend.cpp 프로젝트: Ballaw/libcpu
void
arch_store16(cpu_t *cpu, Value *val, Value *addr, BasicBlock *bb) {
	Value *shift = arch_get_shift16(cpu, addr, bb);
	addr = AND(addr, CONST(~3ULL));
	Value *mask = XOR(SHL(CONST(65535), shift),CONST(-1ULL));
	Value *old = AND(arch_load32_aligned(cpu, addr, bb), mask);
	val = OR(old, SHL(AND(val, CONST(65535)), shift));
	arch_store32_aligned(cpu, val, addr, bb);
}
예제 #4
1
void split_cb_shape_sign_unquant(
spx_sig_t *exc,
const void *par,                      /* non-overlapping codebook */
int   nsf,                      /* number of samples in subframe */
SpeexBits *bits,
char *stack
)
{
   int i,j;
   int *ind, *signs;
   const signed char *shape_cb;
   int shape_cb_size, subvect_size, nb_subvect;
   split_cb_params *params;
   int have_sign;

   params = (split_cb_params *) par;
   subvect_size = params->subvect_size;
   nb_subvect = params->nb_subvect;
   shape_cb_size = 1<<params->shape_bits;
   shape_cb = params->shape_cb;
   have_sign = params->have_sign;

   ind = PUSH(stack, nb_subvect, int);
   signs = PUSH(stack, nb_subvect, int);

   /* Decode codewords and gains */
   for (i=0;i<nb_subvect;i++)
   {
      if (have_sign)
         signs[i] = speex_bits_unpack_unsigned(bits, 1);
      else
         signs[i] = 0;
      ind[i] = speex_bits_unpack_unsigned(bits, params->shape_bits);
   }
   /* Compute decoded excitation */
   for (i=0;i<nb_subvect;i++)
   {
      spx_word16_t s=1;
      if (signs[i])
         s=-1;
#ifdef FIXED_POINT
      if (s==1)
      {
         for (j=0;j<subvect_size;j++)
            exc[subvect_size*i+j]=SHL((spx_word32_t)shape_cb[ind[i]*subvect_size+j],SIG_SHIFT-5);
      } else {
         for (j=0;j<subvect_size;j++)
            exc[subvect_size*i+j]=-SHL((spx_word32_t)shape_cb[ind[i]*subvect_size+j],SIG_SHIFT-5);
      }
#else
      for (j=0;j<subvect_size;j++)
         exc[subvect_size*i+j]+=s*0.03125*shape_cb[ind[i]*subvect_size+j];      
#endif
   }
}
예제 #5
1
word32_t ChebyshevPolynomial(word16_t x, word32_t f[])
{
    /* bk in Q15*/
    word32_t bk;
    word32_t bk1 = ADD32(SHL(x,1), f[1]); /* init: b4=2x+f1 */
    word32_t bk2 = ONE_IN_Q15; /* init: b5=1 */
    uint8_t k;
    for (k=3; k>0; k--) { /* at the end of loop execution we have b1 in bk1 and b2 in bk2 */
        bk = SUB32(ADD32(SHL(MULT16_32_Q15(x,bk1), 1), f[5-k]), bk2); /* bk = 2*x*bk1 − bk2 + f(5-k) all in Q15*/
        bk2 = bk1;
        bk1 = bk;
    }

    return SUB32(ADD32(MULT16_32_Q15(x,bk1), SHR(f[5],1)), bk2); /* C(x) = x*b1 - b2 + f(5)/2 */
}
예제 #6
1
static inline Value *
arch_6502_pull16(cpu_t *cpu, BasicBlock *bb)
{
	Value *lo = PULL;
	Value *hi = PULL;
	return (OR(ZEXT16(lo), SHL(ZEXT16(hi), CONST16(8))));
}
예제 #7
0
/*** shlGetAttrType - get the type (DATA_T_xxx) of an attribute by name.
 ***/
int
shlGetAttrType(void* inf_v, char* attrname, pObjTrxTree* oxt)
    {
    pShlData inf = SHL(inf_v);
    int i;
    pStructInf find_inf;
    pEnvVar pEV;

    	/** If name, it's a string **/
	if (!strcmp(attrname,"name")) return DATA_T_STRING;

	/** If 'content-type', it's also a string. **/
	if (!strcmp(attrname,"content_type")) return DATA_T_STRING;
	if (!strcmp(attrname,"inner_type")) return DATA_T_STRING;
	if (!strcmp(attrname,"outer_type")) return DATA_T_STRING;
	if (!strcmp(attrname,"annotation")) return DATA_T_STRING;

	if (!strcmp(attrname,"status")) return DATA_T_STRING;

	if (!strncmp(attrname,"arg",3)) return DATA_T_STRING;

	pEV = (pEnvVar)xhLookup(&inf->envHash,attrname);
	if(pEV)
	    {
	    return DATA_T_STRING;
	    }

	mssError(1,"SHL","Could not locate requested attribute: %s",attrname);

    return -1;
    }
예제 #8
0
/*** shlGetNextAttr - get the next attribute name for this object.
 ***/
char*
shlGetNextAttr(void* inf_v, pObjTrxTree oxt)
    {
    pShlData inf = SHL(inf_v);
    int i;

    if(inf->CurAttr>=xaCount(&inf->argArray)+xaCount(&inf->envList)+1)
	return NULL;

    if(inf->CurAttr==xaCount(&inf->argArray)+xaCount(&inf->envList))
	{
	inf->CurAttr++;
	return "status";
	}

    if(inf->CurAttr>999999 && xaCount(&inf->argArray)>=999999)
	return NULL;

    if(inf->CurAttr<xaCount(&inf->argArray))
	{
	i=snprintf(inf->sCurAttr,10,"arg%02i",inf->CurAttr++);
	inf->sCurAttr[10]='\0';
	return inf->sCurAttr;
	}
    else
	{
	return (char*)xaGetItem(&inf->envList,(inf->CurAttr++)-xaCount(&inf->argArray));
	}
    }
예제 #9
0
/* FIXME: These functions are ugly and probably introduce too much error */
void signal_mul(const spx_sig_t *x, spx_sig_t *y, spx_word32_t scale, int len)
{
   int i;
   for (i=0;i<len;i++)
   {
      y[i] = SHL(MULT16_32_Q14(SHR(x[i],7),scale),7);
   }
}
예제 #10
0
파일: frontend.cpp 프로젝트: Ballaw/libcpu
static Value *
arch_get_shift16(cpu_t *cpu, Value *addr, BasicBlock *bb)
{
	Value *shift = AND(LSHR(addr,CONST(1)),CONST(1));
	if (!IS_LITTLE_ENDIAN(cpu))
		shift = XOR(shift, CONST(1));
	return SHL(shift, CONST(4));
}
예제 #11
0
/*** shlGetFirstAttr - get the first attribute name for this object.
 ***/
char*
shlGetFirstAttr(void* inf_v, pObjTrxTree oxt)
    {
    pShlData inf = SHL(inf_v);

    inf->CurAttr=0;
    return shlGetNextAttr(inf_v,oxt);
    }
예제 #12
0
/*** shlAddAttr - add an attribute to an object.  This doesn't always work
 *** for all object types, and certainly makes no sense for some (like unix
 *** files).
 ***/
int
shlAddAttr(void* inf_v, char* attrname, int type, void* val, pObjTrxTree oxt)
    {
    pShlData inf = SHL(inf_v);
    pStructInf new_inf;
    char* ptr;

    return -1;
    }
예제 #13
0
void lsp_interpolate(spx_lsp_t *old_lsp, spx_lsp_t *new_lsp, spx_lsp_t *interp_lsp, int len, int subframe, int nb_subframes)
{
   int i;
   spx_word16_t tmp = DIV32_16(SHL(1 + subframe,14),nb_subframes);
   spx_word16_t tmp2 = 16384-tmp;
   for (i=0;i<len;i++)
   {
      interp_lsp[i] = MULT16_16_P14(tmp2,old_lsp[i]) + MULT16_16_P14(tmp,new_lsp[i]);
   }
}
예제 #14
0
void
vec4_gs_visitor::gs_end_primitive()
{
    /* We can only do EndPrimitive() functionality when the control data
     * consists of cut bits.  Fortunately, the only time it isn't is when the
     * output type is points, in which case EndPrimitive() is a no-op.
     */
    if (gs_prog_data->control_data_format !=
            GEN7_GS_CONTROL_DATA_FORMAT_GSCTL_CUT) {
        return;
    }

    if (c->control_data_header_size_bits == 0)
        return;

    /* Cut bits use one bit per vertex. */
    assert(c->control_data_bits_per_vertex == 1);

    /* Cut bit n should be set to 1 if EndPrimitive() was called after emitting
     * vertex n, 0 otherwise.  So all we need to do here is mark bit
     * (vertex_count - 1) % 32 in the cut_bits register to indicate that
     * EndPrimitive() was called after emitting vertex (vertex_count - 1);
     * vec4_gs_visitor::emit_control_data_bits() will take care of the rest.
     *
     * Note that if EndPrimitve() is called before emitting any vertices, this
     * will cause us to set bit 31 of the control_data_bits register to 1.
     * That's fine because:
     *
     * - If max_vertices < 32, then vertex number 31 (zero-based) will never be
     *   output, so the hardware will ignore cut bit 31.
     *
     * - If max_vertices == 32, then vertex number 31 is guaranteed to be the
     *   last vertex, so setting cut bit 31 has no effect (since the primitive
     *   is automatically ended when the GS terminates).
     *
     * - If max_vertices > 32, then the ir_emit_vertex visitor will reset the
     *   control_data_bits register to 0 when the first vertex is emitted.
     */

    /* control_data_bits |= 1 << ((vertex_count - 1) % 32) */
    src_reg one(this, glsl_type::uint_type);
    emit(MOV(dst_reg(one), brw_imm_ud(1u)));
    src_reg prev_count(this, glsl_type::uint_type);
    emit(ADD(dst_reg(prev_count), this->vertex_count, brw_imm_ud(0xffffffffu)));
    src_reg mask(this, glsl_type::uint_type);
    /* Note: we're relying on the fact that the GEN SHL instruction only pays
     * attention to the lower 5 bits of its second source argument, so on this
     * architecture, 1 << (vertex_count - 1) is equivalent to 1 <<
     * ((vertex_count - 1) % 32).
     */
    emit(SHL(dst_reg(mask), one, prev_count));
    emit(OR(dst_reg(this->control_data_bits), this->control_data_bits, mask));
}
예제 #15
0
파일: frontend.cpp 프로젝트: Ballaw/libcpu
// shifts or rotates an lvalue left or right, regardless of width
Value *
arch_shiftrotate(cpu_t *cpu, Value *dst, Value *src, bool left, bool rotate, BasicBlock *bb)
{
	Value *c;
	Value *v = LOAD(src);

	if (left) {
		c = ICMP_SLT(v, CONSTs(SIZE(v), 0));	/* old MSB to carry */
		v = SHL(v, CONSTs(SIZE(v), 1));
		if (rotate)
			v = OR(v,ZEXT(SIZE(v), LOAD(cpu->ptr_C)));
	} else {
		c = TRUNC1(v);		/* old LSB to carry */
		v = LSHR(v, CONSTs(SIZE(v), 1));
		if (rotate)
			v = OR(v,SHL(ZEXT(SIZE(v), LOAD(cpu->ptr_C)), CONSTs(SIZE(v), SIZE(v)-1)));
	}
	
	LET1(cpu->ptr_C, c);
	return STORE(v, dst);
}
예제 #16
0
void
vec4_gs_visitor::set_stream_control_data_bits(unsigned stream_id)
{
   /* control_data_bits |= stream_id << ((2 * (vertex_count - 1)) % 32) */

   /* Note: we are calling this *before* increasing vertex_count, so
    * this->vertex_count == vertex_count - 1 in the formula above.
    */

   /* Stream mode uses 2 bits per vertex */
   assert(c->control_data_bits_per_vertex == 2);

   /* Must be a valid stream */
   assert(stream_id >= 0 && stream_id < MAX_VERTEX_STREAMS);

   /* Control data bits are initialized to 0 so we don't have to set any
    * bits when sending vertices to stream 0.
    */
   if (stream_id == 0)
      return;

   /* reg::sid = stream_id */
   src_reg sid(this, glsl_type::uint_type);
   emit(MOV(dst_reg(sid), stream_id));

   /* reg:shift_count = 2 * (vertex_count - 1) */
   src_reg shift_count(this, glsl_type::uint_type);
   emit(SHL(dst_reg(shift_count), this->vertex_count, 1u));

   /* Note: we're relying on the fact that the GEN SHL instruction only pays
    * attention to the lower 5 bits of its second source argument, so on this
    * architecture, stream_id << 2 * (vertex_count - 1) is equivalent to
    * stream_id << ((2 * (vertex_count - 1)) % 32).
    */
   src_reg mask(this, glsl_type::uint_type);
   emit(SHL(dst_reg(mask), sid, shift_count));
   emit(OR(dst_reg(this->control_data_bits), this->control_data_bits, mask));
}
예제 #17
0
void LPSynthesisFilter (word16_t *excitationVector, word16_t *LPCoefficients, word16_t *reconstructedSpeech)
{
	int i;
	/* compute excitationVector[i] - Sum0-9(LPCoefficients[j]*reconstructedSpeech[i-j]) */
	for (i=0; i<L_SUBFRAME; i++) {
		word32_t acc = SHL(excitationVector[i],12); /* acc get the first term of the sum, in Q12 (excitationVector is in Q0)*/
		int j;
		for (j=0; j<NB_LSP_COEFF; j++) {
			acc = MSU16_16(acc, LPCoefficients[j], reconstructedSpeech[i-j-1]);
		}
		reconstructedSpeech[i] = (word16_t)SATURATE(PSHR(acc, 12), MAXINT16); /* shift right acc to get it back in Q0 and check overflow on 16 bits */
	}
	return;
}
예제 #18
0
/*** shlWrite - Write data to the shell's stdin
 ***/
int
shlWrite(void* inf_v, char* buffer, int cnt, int offset, int flags, pObjTrxTree* oxt)
    {
    pShlData inf = SHL(inf_v);
    int i=-1;
    int waitret;
    int retval;

    if(SHELL_DEBUG & SHELL_DEBUG_IO)
	printf("%s -- %p, %p, %i, %i, %i, %p\n",__FUNCTION__,inf_v,buffer,cnt,offset,flags,oxt);

    /** launch the program if it's not running already **/
    if(inf->shell_pid == -1)
	if(shl_internal_Launch(inf) < 0)
	    return -1;

    /** seek is _not_ allowed (obviously) **/
    if(flags & FD_U_SEEK && offset!=inf->curWrite)
	return -1;

    /** can't write to a dead child :) **/
    if(!inf->shell_pid)
	return -1;

    while(i < 0)
	{
	i=fdWrite(inf->shell_fd,buffer,cnt,0,flags & ~FD_U_SEEK);
	if(i < 0)
	    {
	    /** user doesn;t want us to block **/
	    if(flags & FD_U_NOBLOCK)
		return -1;

	    /** check and make sure the child process is still alive... **/
	    shl_internal_UpdateStatus(inf);
	    if(inf->shell_pid>0)
		{
		/** child is alive, it just isn't ready for data yet -- wait a bit **/
		thSleep(200);
		}
	    else if(inf->shell_pid==0)
		{
		/** child died :( **/
		return -1;
		}
	    }
	}
    inf->curWrite+=i;
    return i;
    }
예제 #19
0
void
gen8_vec4_generator::generate_gs_prepare_channel_masks(struct brw_reg dst)
{
   /* We want to left shift just DWORD 4 (the x component belonging to the
    * second geometry shader invocation) by 4 bits.  So generate the
    * instruction:
    *
    *     shl(1) dst.4<1>UD dst.4<0,1,0>UD 4UD { align1 WE_all }
    */
   dst = suboffset(vec1(dst), 4);
   default_state.access_mode = BRW_ALIGN_1;
   gen8_instruction *inst = SHL(dst, dst, brw_imm_ud(4));
   gen8_set_mask_control(inst, BRW_MASK_DISABLE);
   default_state.access_mode = BRW_ALIGN_16;
}
예제 #20
0
bool SamplerJitCache::Jit_TransformClutIndex(const SamplerID &id, int bitsPerIndex) {
	GEPaletteFormat fmt = (GEPaletteFormat)id.clutfmt;
	if (!id.hasClutShift && !id.hasClutMask && !id.hasClutOffset) {
		// This is simple - just mask if necessary.
		if (bitsPerIndex > 8) {
			AND(32, R(resultReg), Imm32(0x000000FF));
		}
		return true;
	}

	MOV(PTRBITS, R(tempReg1), ImmPtr(&gstate.clutformat));
	MOV(32, R(tempReg1), MatR(tempReg1));

	// Shift = (clutformat >> 2) & 0x1F
	if (id.hasClutShift) {
		MOV(32, R(RCX), R(tempReg1));
		SHR(32, R(RCX), Imm8(2));
		AND(32, R(RCX), Imm8(0x1F));
		SHR(32, R(resultReg), R(RCX));
	}

	// Mask = (clutformat >> 8) & 0xFF
	if (id.hasClutMask) {
		MOV(32, R(tempReg2), R(tempReg1));
		SHR(32, R(tempReg2), Imm8(8));
		AND(32, R(resultReg), R(tempReg2));
	}

	// We need to wrap any entries beyond the first 1024 bytes.
	u32 offsetMask = fmt == GE_CMODE_32BIT_ABGR8888 ? 0x00FF : 0x01FF;

	// We must mask to 0xFF before ORing 0x100 in 16 bit CMODEs.
	// But skip if we'll mask 0xFF after offset anyway.
	if (bitsPerIndex > 8 && (!id.hasClutOffset || offsetMask != 0x00FF)) {
		AND(32, R(resultReg), Imm32(0x000000FF));
	}

	// Offset = (clutformat >> 12) & 0x01F0
	if (id.hasClutOffset) {
		SHR(32, R(tempReg1), Imm8(16));
		SHL(32, R(tempReg1), Imm8(4));
		OR(32, R(resultReg), R(tempReg1));
		AND(32, R(resultReg), Imm32(offsetMask));
	}
	return true;
}
예제 #21
0
void filter_mem2(const spx_sig_t *x, const spx_coef_t *num, const spx_coef_t *den, spx_sig_t *y, int N, int ord, spx_mem_t *mem)
{
   int i,j;
   spx_sig_t xi,yi,nyi;

   for (i=0;i<N;i++)
   {
      int xh,xl,yh,yl;
      xi=SATURATE(x[i],805306368);
      yi = SATURATE(ADD32(xi, SHL(mem[0],2)),805306368);
      nyi = -yi;
      xh = xi>>15; xl=xi&0x00007fff; yh = yi>>15; yl=yi&0x00007fff; 
      for (j=0;j<ord-1;j++)
      {
         mem[j] = MAC16_32_Q15(MAC16_32_Q15(mem[j+1], num[j+1],xi), den[j+1],nyi);
      }
      mem[ord-1] = SUB32(MULT16_32_Q15(num[ord],xi), MULT16_32_Q15(den[ord],yi));
      y[i] = yi;
   }
}
예제 #22
0
/*** shlSetAttrValue - sets the value of an attribute.  'val' must
 *** point to an appropriate data type.
 ***/
int
shlSetAttrValue(void* inf_v, char* attrname, int datatype, pObjData val, pObjTrxTree oxt)
    {
    pShlData inf = SHL(inf_v);

	/** Choose the attr name **/
	/** Changing name of node object? **/
	if (!strcmp(attrname,"name"))
	    {
	    if (inf->Obj->Pathname->nElements == inf->Obj->SubPtr)
		{
		if (!strcmp(inf->Obj->Pathname->Pathbuf,".")) return -1;
		if (strlen(inf->Obj->Pathname->Pathbuf) - 
		    strlen(strrchr(inf->Obj->Pathname->Pathbuf,'/')) + 
		    strlen(val->String) + 1 > 255)
		    {
		    mssError(1,"SHL","SetAttr 'name': name too large for internal representation");
		    return -1;
		    }
		strcpy(inf->Pathname, inf->Obj->Pathname->Pathbuf);
		strcpy(strrchr(inf->Pathname,'/')+1,val->String);
		if (rename(inf->Obj->Pathname->Pathbuf, inf->Pathname) < 0) 
		    {
		    mssError(1,"SHL","SetAttr 'name': could not rename structure file node object");
		    return -1;
		    }
		strcpy(inf->Obj->Pathname->Pathbuf, inf->Pathname);
		}

	    /** Set dirty flag **/
	    inf->Node->Status = SN_NS_DIRTY;

	    return 0;
	    }

	/** Try to set a command parameter (env variable) **/
	if (shl_internal_SetParam(inf, attrname, datatype, val) < 0)
	    return -1;

    return -1;
    }
예제 #23
0
spx_word16_t compute_rms(const spx_sig_t *x, int len)
{
   int i;
   spx_word32_t sum=0;
   spx_sig_t max_val=1;
   int sig_shift;

   for (i=0;i<len;i++)
   {
      spx_sig_t tmp = x[i];
      if (tmp<0)
         tmp = -tmp;
      if (tmp > max_val)
         max_val = tmp;
   }

   sig_shift=0;
   while (max_val>16383)
   {
      sig_shift++;
      max_val >>= 1;
   }

   for (i=0;i<len;i+=4)
   {
      spx_word32_t sum2=0;
      spx_word16_t tmp;
      tmp = SHR(x[i],sig_shift);
      sum2 += MULT16_16(tmp,tmp);
      tmp = SHR(x[i+1],sig_shift);
      sum2 += MULT16_16(tmp,tmp);
      tmp = SHR(x[i+2],sig_shift);
      sum2 += MULT16_16(tmp,tmp);
      tmp = SHR(x[i+3],sig_shift);
      sum2 += MULT16_16(tmp,tmp);
      sum += SHR(sum2,6);
   }
   
   return SHR(SHL((spx_word32_t)spx_sqrt(1+DIV32(sum,len)),(sig_shift+3)),SIG_SHIFT);
}
예제 #24
0
INT16 q_gain_pitch (   /* Return index of quantization */
    INT16 *gain        /* (i)  :  Pitch gain to quantize  */
)
{
    INT16 i, index, gain_q14, err, err_min;

    VPP_EFR_PROFILE_FUNCTION_ENTER(q_gain_pitch);

    //gain_q14 = shl (*gain, 2);
	gain_q14 = SHL(*gain, 2);

    //err_min = abs_s (sub (gain_q14, qua_gain_pitch[0]));
    err_min = ABS_S(SUB (gain_q14, qua_gain_pitch[0]));
    index = 0;

    for (i = 1; i < NB_QUA_PITCH; i++)
    {
        //err = abs_s (sub (gain_q14, qua_gain_pitch[i]));
		err = ABS_S(SUB (gain_q14, qua_gain_pitch[i]));


        //if (sub (err, err_min) < 0)
		if (SUB (err, err_min) < 0)
        {
            err_min = err;
            index = i;
        }
    }

    //*gain = shr (qua_gain_pitch[index], 2);
	*gain = SHR_D(qua_gain_pitch[index], 2);

    VPP_EFR_PROFILE_FUNCTION_EXIT(q_gain_pitch);

    return index;
}
예제 #25
0
void
vec4_vs_visitor::emit_prolog()
{
   dst_reg sign_recovery_shift;
   dst_reg normalize_factor;
   dst_reg es3_normalize_factor;

   for (int i = 0; i < VERT_ATTRIB_MAX; i++) {
      if (vs_prog_data->inputs_read & BITFIELD64_BIT(i)) {
         uint8_t wa_flags = key->gl_attrib_wa_flags[i];
         dst_reg reg(ATTR, i);
         dst_reg reg_d = reg;
         reg_d.type = BRW_REGISTER_TYPE_D;
         dst_reg reg_ud = reg;
         reg_ud.type = BRW_REGISTER_TYPE_UD;

         /* Do GL_FIXED rescaling for GLES2.0.  Our GL_FIXED attributes
          * come in as floating point conversions of the integer values.
          */
         if (wa_flags & BRW_ATTRIB_WA_COMPONENT_MASK) {
            dst_reg dst = reg;
            dst.type = brw_type_for_base_type(glsl_type::vec4_type);
            dst.writemask = (1 << (wa_flags & BRW_ATTRIB_WA_COMPONENT_MASK)) - 1;
            emit(MUL(dst, src_reg(dst), src_reg(1.0f / 65536.0f)));
         }

         /* Do sign recovery for 2101010 formats if required. */
         if (wa_flags & BRW_ATTRIB_WA_SIGN) {
            if (sign_recovery_shift.file == BAD_FILE) {
               /* shift constant: <22,22,22,30> */
               sign_recovery_shift = dst_reg(this, glsl_type::uvec4_type);
               emit(MOV(writemask(sign_recovery_shift, WRITEMASK_XYZ), src_reg(22u)));
               emit(MOV(writemask(sign_recovery_shift, WRITEMASK_W), src_reg(30u)));
            }

            emit(SHL(reg_ud, src_reg(reg_ud), src_reg(sign_recovery_shift)));
            emit(ASR(reg_d, src_reg(reg_d), src_reg(sign_recovery_shift)));
         }

         /* Apply BGRA swizzle if required. */
         if (wa_flags & BRW_ATTRIB_WA_BGRA) {
            src_reg temp = src_reg(reg);
            temp.swizzle = BRW_SWIZZLE4(2,1,0,3);
            emit(MOV(reg, temp));
         }

         if (wa_flags & BRW_ATTRIB_WA_NORMALIZE) {
            /* ES 3.0 has different rules for converting signed normalized
             * fixed-point numbers than desktop GL.
             */
            if ((wa_flags & BRW_ATTRIB_WA_SIGN) && !use_legacy_snorm_formula) {
               /* According to equation 2.2 of the ES 3.0 specification,
                * signed normalization conversion is done by:
                *
                * f = c / (2^(b-1)-1)
                */
               if (es3_normalize_factor.file == BAD_FILE) {
                  /* mul constant: 1 / (2^(b-1) - 1) */
                  es3_normalize_factor = dst_reg(this, glsl_type::vec4_type);
                  emit(MOV(writemask(es3_normalize_factor, WRITEMASK_XYZ),
                           src_reg(1.0f / ((1<<9) - 1))));
                  emit(MOV(writemask(es3_normalize_factor, WRITEMASK_W),
                           src_reg(1.0f / ((1<<1) - 1))));
               }

               dst_reg dst = reg;
               dst.type = brw_type_for_base_type(glsl_type::vec4_type);
               emit(MOV(dst, src_reg(reg_d)));
               emit(MUL(dst, src_reg(dst), src_reg(es3_normalize_factor)));
               emit_minmax(BRW_CONDITIONAL_GE, dst, src_reg(dst), src_reg(-1.0f));
            } else {
               /* The following equations are from the OpenGL 3.2 specification:
                *
                * 2.1 unsigned normalization
                * f = c/(2^n-1)
                *
                * 2.2 signed normalization
                * f = (2c+1)/(2^n-1)
                *
                * Both of these share a common divisor, which is represented by
                * "normalize_factor" in the code below.
                */
               if (normalize_factor.file == BAD_FILE) {
                  /* 1 / (2^b - 1) for b=<10,10,10,2> */
                  normalize_factor = dst_reg(this, glsl_type::vec4_type);
                  emit(MOV(writemask(normalize_factor, WRITEMASK_XYZ),
                           src_reg(1.0f / ((1<<10) - 1))));
                  emit(MOV(writemask(normalize_factor, WRITEMASK_W),
                           src_reg(1.0f / ((1<<2) - 1))));
               }

               dst_reg dst = reg;
               dst.type = brw_type_for_base_type(glsl_type::vec4_type);
               emit(MOV(dst, src_reg((wa_flags & BRW_ATTRIB_WA_SIGN) ? reg_d : reg_ud)));

               /* For signed normalization, we want the numerator to be 2c+1. */
               if (wa_flags & BRW_ATTRIB_WA_SIGN) {
                  emit(MUL(dst, src_reg(dst), src_reg(2.0f)));
                  emit(ADD(dst, src_reg(dst), src_reg(1.0f)));
               }

               emit(MUL(dst, src_reg(dst), src_reg(normalize_factor)));
            }
         }

         if (wa_flags & BRW_ATTRIB_WA_SCALE) {
            dst_reg dst = reg;
            dst.type = brw_type_for_base_type(glsl_type::vec4_type);
            emit(MOV(dst, src_reg((wa_flags & BRW_ATTRIB_WA_SIGN) ? reg_d : reg_ud)));
         }
      }
   }
}
예제 #26
0
void split_cb_search_shape_sign(
spx_sig_t target[],			/* target vector */
spx_coef_t ak[],			/* LPCs for this subframe */
spx_coef_t awk1[],			/* Weighted LPCs for this subframe */
spx_coef_t awk2[],			/* Weighted LPCs for this subframe */
const void *par,                      /* Codebook/search parameters*/
int   p,                        /* number of LPC coeffs */
int   nsf,                      /* number of samples in subframe */
spx_sig_t *exc,
spx_sig_t *r,
SpeexBits *bits,
char *stack,
int   complexity
)
{
   int i,j,k,m,n,q;
   spx_word16_t *resp;
#ifdef _USE_SSE
   __m128 *resp2;
   __m128 *E;
#else
   spx_word16_t *resp2;
   spx_word32_t *E;
#endif
   spx_word16_t *t;
   spx_sig_t *e, *r2;
   spx_word16_t *tmp;
   spx_word32_t *ndist, *odist;
   int *itmp;
   spx_word16_t **ot, **nt;
   int **nind, **oind;
   int *ind;
   const signed char *shape_cb;
   int shape_cb_size, subvect_size, nb_subvect;
   split_cb_params *params;
   int N=2;
   int *best_index;
   spx_word32_t *best_dist;
   int have_sign;
   N=complexity;
   if (N>10)
      N=10;

   ot=PUSH(stack, N, spx_word16_t*);
   nt=PUSH(stack, N, spx_word16_t*);
   oind=PUSH(stack, N, int*);
   nind=PUSH(stack, N, int*);

   params = (split_cb_params *) par;
   subvect_size = params->subvect_size;
   nb_subvect = params->nb_subvect;
   shape_cb_size = 1<<params->shape_bits;
   shape_cb = params->shape_cb;
   have_sign = params->have_sign;
   resp = PUSH(stack, shape_cb_size*subvect_size, spx_word16_t);
#ifdef _USE_SSE
   resp2 = PUSH(stack, (shape_cb_size*subvect_size)>>2, __m128);
   E = PUSH(stack, shape_cb_size>>2, __m128);
#else
   resp2 = resp;
   E = PUSH(stack, shape_cb_size, spx_word32_t);
#endif
   t = PUSH(stack, nsf, spx_word16_t);
   e = PUSH(stack, nsf, spx_sig_t);
   r2 = PUSH(stack, nsf, spx_sig_t);
   ind = PUSH(stack, nb_subvect, int);

   tmp = PUSH(stack, 2*N*nsf, spx_word16_t);
   for (i=0;i<N;i++)
   {
      ot[i]=tmp;
      tmp += nsf;
      nt[i]=tmp;
      tmp += nsf;
   }
   best_index = PUSH(stack, N, int);
   best_dist = PUSH(stack, N, spx_word32_t);
   ndist = PUSH(stack, N, spx_word32_t);
   odist = PUSH(stack, N, spx_word32_t);
   
   itmp = PUSH(stack, 2*N*nb_subvect, int);
   for (i=0;i<N;i++)
   {
      nind[i]=itmp;
      itmp+=nb_subvect;
      oind[i]=itmp;
      itmp+=nb_subvect;
      for (j=0;j<nb_subvect;j++)
         nind[i][j]=oind[i][j]=-1;
   }
   
   /* FIXME: make that adaptive? */
   for (i=0;i<nsf;i++)
      t[i]=SHR(target[i],6);

   for (j=0;j<N;j++)
      for (i=0;i<nsf;i++)
         ot[j][i]=t[i];

   /*for (i=0;i<nsf;i++)
     printf ("%d\n", (int)t[i]);*/

   /* Pre-compute codewords response and energy */
   compute_weighted_codebook(shape_cb, r, resp, resp2, E, shape_cb_size, subvect_size, stack);

   for (j=0;j<N;j++)
      odist[j]=0;
   /*For all subvectors*/
   for (i=0;i<nb_subvect;i++)
   {
      /*"erase" nbest list*/
      for (j=0;j<N;j++)
         ndist[j]=-2;

      /*For all n-bests of previous subvector*/
      for (j=0;j<N;j++)
      {
         spx_word16_t *x=ot[j]+subvect_size*i;
         /*Find new n-best based on previous n-best j*/
         if (have_sign)
            vq_nbest_sign(x, resp2, subvect_size, shape_cb_size, E, N, best_index, best_dist, stack);
         else
            vq_nbest(x, resp2, subvect_size, shape_cb_size, E, N, best_index, best_dist, stack);

         /*For all new n-bests*/
         for (k=0;k<N;k++)
         {
            spx_word16_t *ct;
            spx_word32_t err=0;
            ct = ot[j];
            /*update target*/

            /*previous target*/
            for (m=i*subvect_size;m<(i+1)*subvect_size;m++)
               t[m]=ct[m];

            /* New code: update only enough of the target to calculate error*/
            {
               int rind;
               spx_word16_t *res;
               spx_word16_t sign=1;
               rind = best_index[k];
               if (rind>=shape_cb_size)
               {
                  sign=-1;
                  rind-=shape_cb_size;
               }
               res = resp+rind*subvect_size;
               if (sign>0)
                  for (m=0;m<subvect_size;m++)
                     t[subvect_size*i+m] -= res[m];
               else
                  for (m=0;m<subvect_size;m++)
                     t[subvect_size*i+m] += res[m];
            }
            
            /*compute error (distance)*/
            err=odist[j];
            for (m=i*subvect_size;m<(i+1)*subvect_size;m++)
               err += t[m]*t[m];
            /*update n-best list*/
            if (err<ndist[N-1] || ndist[N-1]<-1)
            {

               /*previous target (we don't care what happened before*/
               for (m=(i+1)*subvect_size;m<nsf;m++)
                  t[m]=ct[m];
               /* New code: update the rest of the target only if it's worth it */
               for (m=0;m<subvect_size;m++)
               {
                  spx_word16_t g;
                  int rind;
                  spx_word16_t sign=1;
                  rind = best_index[k];
                  if (rind>=shape_cb_size)
                  {
                     sign=-1;
                     rind-=shape_cb_size;
                  }

                  q=subvect_size-m;
#ifdef FIXED_POINT
                  g=sign*shape_cb[rind*subvect_size+m];
                  for (n=subvect_size*(i+1);n<nsf;n++,q++)
                     t[n] = SUB32(t[n],MULT16_16_Q11(g,r[q]));
#else
                  g=sign*0.03125*shape_cb[rind*subvect_size+m];
                  for (n=subvect_size*(i+1);n<nsf;n++,q++)
                     t[n] = SUB32(t[n],g*r[q]);
#endif
               }


               for (m=0;m<N;m++)
               {
                  if (err < ndist[m] || ndist[m]<-1)
                  {
                     for (n=N-1;n>m;n--)
                     {
                        for (q=(i+1)*subvect_size;q<nsf;q++)
                           nt[n][q]=nt[n-1][q];
                        for (q=0;q<nb_subvect;q++)
                           nind[n][q]=nind[n-1][q];
                        ndist[n]=ndist[n-1];
                     }
                     for (q=(i+1)*subvect_size;q<nsf;q++)
                        nt[m][q]=t[q];
                     for (q=0;q<nb_subvect;q++)
                        nind[m][q]=oind[j][q];
                     nind[m][i]=best_index[k];
                     ndist[m]=err;
                     break;
                  }
               }
            }
         }
         if (i==0)
           break;
      }

      /*update old-new data*/
      /* just swap pointers instead of a long copy */
      {
         spx_word16_t **tmp2;
         tmp2=ot;
         ot=nt;
         nt=tmp2;
      }
      for (j=0;j<N;j++)
         for (m=0;m<nb_subvect;m++)
            oind[j][m]=nind[j][m];
      for (j=0;j<N;j++)
         odist[j]=ndist[j];
   }

   /*save indices*/
   for (i=0;i<nb_subvect;i++)
   {
      ind[i]=nind[0][i];
      speex_bits_pack(bits,ind[i],params->shape_bits+have_sign);
   }
   
   /* Put everything back together */
   for (i=0;i<nb_subvect;i++)
   {
      int rind;
      spx_word16_t sign=1;
      rind = ind[i];
      if (rind>=shape_cb_size)
      {
         sign=-1;
         rind-=shape_cb_size;
      }
#ifdef FIXED_POINT
      if (sign==1)
      {
         for (j=0;j<subvect_size;j++)
            e[subvect_size*i+j]=SHL((spx_word32_t)shape_cb[rind*subvect_size+j],SIG_SHIFT-5);
      } else {
         for (j=0;j<subvect_size;j++)
            e[subvect_size*i+j]=-SHL((spx_word32_t)shape_cb[rind*subvect_size+j],SIG_SHIFT-5);
      }
#else
      for (j=0;j<subvect_size;j++)
         e[subvect_size*i+j]=sign*0.03125*shape_cb[rind*subvect_size+j];
#endif
   }   
   /* Update excitation */
   for (j=0;j<nsf;j++)
      exc[j]+=e[j];
   
   /* Update target */
   syn_percep_zero(e, ak, awk1, awk2, r2, nsf,p, stack);
   for (j=0;j<nsf;j++)
      target[j]-=r2[j];

}
예제 #27
0
 static word_type ROTL(word_type x, unsigned n) {
     return SHL(x, n) | SHR(x, word_bits-n);
 }
예제 #28
0
void Az_lsp (
    INT16 a[],         /* (i)     : predictor coefficients                 */
    INT16 lsp[],       /* (o)     : line spectral pairs                    */
    INT16 old_lsp[]    /* (i)     : old lsp[] (in case not found 10 roots) */
)
{
    INT16 i, j, nf, ip;
    INT16 xlow, ylow, xhigh, yhigh, xmid, ymid, xint;
    INT16 x, y, sign, exp;
    INT16 *coef;
    INT16 f1[M / 2 + 1], f2[M / 2 + 1];
    INT32 t0=0;


    VPP_EFR_PROFILE_FUNCTION_ENTER(Az_lsp);

    /*-------------------------------------------------------------*
     *  find the sum and diff. pol. F1(z) and F2(z)                *
     *    F1(z) <--- F1(z)/(1+z**-1) & F2(z) <--- F2(z)/(1-z**-1)  *
     *                                                             *
     * f1[0] = 1.0;                                                *
     * f2[0] = 1.0;                                                *
     *                                                             *
     * for (i = 0; i< NC; i++)                                     *
     * {                                                           *
     *   f1[i+1] = a[i+1] + a[M-i] - f1[i] ;                       *
     *   f2[i+1] = a[i+1] - a[M-i] + f2[i] ;                       *
     * }                                                           *
     *-------------------------------------------------------------*/

    f1[0] = 1024;                  /* f1[0] = 1.0 */
    f2[0] = 1024;                  /* f2[0] = 1.0 */

    for (i = 0; i < NC; i++)
    {

		//VPP_MLX16 (t0_hi,t0_lo,a[i + 1], 8192);
		//VPP_MLA16 ( t0_hi, t0_lo,   a[M - i],  8192);
		//t0 = VPP_SCALE64_TO_16( t0_hi, t0_lo);
		//x = EXTRACT_H(t0);

        t0 = (INT32) a[i + 1] + (INT32)a[M - i];
        x = (INT16)(L_SHR_D(t0,2));


        /* f1[i+1] = a[i+1] + a[M-i] - f1[i] */
		f1[i + 1] = SUB (x, f1[i]);

		//VPP_MLX16(t0_hi, t0_lo, a[i + 1], 8192);
		//VPP_MLA16(t0_hi, t0_lo, a[M - i], -8192);
		//x = EXTRACT_H(VPP_SCALE64_TO_16(t0_hi, t0_lo));

        t0 = (INT32) a[i + 1] - (INT32)a[M - i];
        x = (INT16)(L_SHR_D(t0,2));

		//f2[i + 1] = add (x, f2[i]);
        f2[i + 1] = ADD(x, f2[i]);

    }

    /*-------------------------------------------------------------*
     * find the LSPs using the Chebychev pol. evaluation           *
     *-------------------------------------------------------------*/

    nf = 0;                         /* number of found frequencies */
    ip = 0;                         /* indicator for f1 or f2      */

    coef = f1;

    xlow = grid[0];
    ylow = Chebps (xlow, coef, NC);

    j = 0;

    /* while ( (nf < M) && (j < grid_points) ) */
    //while ((sub (nf, M) < 0) && (sub (j, grid_points) < 0))
    while ((SUB (nf, M) < 0) && (SUB (j, grid_points) < 0))
    {
        j++;
        xhigh = xlow;
        yhigh = ylow;
        xlow = grid[j];
        ylow = Chebps (xlow, coef, NC);

        //if (L_mult (ylow, yhigh) <= (INT32) 0L)
		if (L_MULT(ylow, yhigh) <= (INT32) 0L)
        {
            /* divide 4 times the interval */

            for (i = 0; i < 4; i++)
            {
                /* xmid = (xlow + xhigh)/2 */

               // xmid = add (shr (xlow, 1), shr (xhigh, 1));
                xmid = ADD ((SHR_D(xlow, 1)),(SHR_D(xhigh, 1)));

                ymid = Chebps (xmid, coef, NC);

                //if (L_mult (ylow, ymid) <= (INT32) 0L)
				if (L_MULT(ylow, ymid) <= (INT32) 0L)
                {
                    yhigh = ymid;
                    xhigh = xmid;
                }
                else
                {
                    ylow = ymid;
                    xlow = xmid;
                }
            }

            /*-------------------------------------------------------------*
             * Linear interpolation                                        *
             *    xint = xlow - ylow*(xhigh-xlow)/(yhigh-ylow);            *
             *-------------------------------------------------------------*/

            //x = sub (xhigh, xlow);
              x = SUB (xhigh, xlow);
			//y = sub (yhigh, ylow);
              y = SUB (yhigh, ylow);


            if (y == 0)
            {
                xint = xlow;
            }
            else
            {
                sign = y;

				//y = abs_s (y);
				y = ABS_S(y);

				exp = norm_s (y);

                //y = shl (y, exp);
				y = SHL(y, exp);

                y = div_s ((INT16) 16383, y);
                //t0 = L_mult (x, y);
				t0 = L_MULT(x, y);
                //t0 = L_shr (t0, sub (20, exp));
                t0 = L_SHR_V(t0, SUB (20, exp));

				//y = extract_l (t0);     /* y= (xhigh-xlow)/(yhigh-ylow) */
                y = EXTRACT_L(t0);

                if (sign < 0)
				{
					//y = negate (y);
					y = NEGATE(y);
				}


                //t0 = L_mult (ylow, y);
				t0 = L_MULT(ylow, y);
                //t0 = L_shr (t0, 11);
                t0 = L_SHR_D(t0, 11);
                //xint = sub (xlow, extract_l (t0)); /* xint = xlow - ylow*y */
                xint = SUB (xlow, EXTRACT_L(t0));
			}

            lsp[nf] = xint;
            xlow = xint;
            nf++;

            if (ip == 0)
            {
                ip = 1;
                coef = f2;
            }
            else
            {
                ip = 0;
                coef = f1;
            }
            ylow = Chebps (xlow, coef, NC);

        }
    }

    /* Check if M roots found */


    //if (sub (nf, M) < 0)
    if (SUB (nf, M) < 0)
    {
        for (i = 0; i < M; i++)
        {
            lsp[i] = old_lsp[i];
        }

    }

    VPP_EFR_PROFILE_FUNCTION_EXIT(Az_lsp);
    return;
}
예제 #29
0
int crypto_aead_decrypt(
                        unsigned char *m,unsigned long long *mlen,          // message
                        unsigned char *nsec,                                // not relavent to CLOC or SLIC
                        const unsigned char *c,unsigned long long clen,     // ciphertext
                        const unsigned char *ad,unsigned long long adlen,   // associated data
                        const unsigned char *npub,                          // nonce
                        const unsigned char *k                              // the master key
                        )
{
    block estate, tstate, tmp;  // encryption state, tag state, and temporary state
    estate = SETZERO();
    unsigned char ltag[16];     // local copy of temporary tag value
    unsigned long long i, lastblocklen,j;
    
    /* set ciphertext length */
    *mlen = clen - CRYPTO_ABYTES;
    
    /* generate round keys from master key */
    AES128_KeyExpansion(k);
    
    
    /* process the first (partial) block of ad */
    load_partial_block(&estate, ad, (adlen>STATE_LEN)?STATE_LEN:adlen, ONE_ZERO_PADDING);
    fix0(estate);
    AES128_encrypt(estate, estate);
    if((ad[0] & 0x80) || (adlen == 0)){
        // appy h
        h(estate);
    }
    else{
        // do nothing
    }
    
    if(adlen > STATE_LEN){ // ad is of moer than one block
        i = STATE_LEN;
        
        /* process the middle ad blocks, excluding the first and last (partial) block */
        while((i+STATE_LEN) < adlen)
        {
            tmp = LOAD(ad+i);
            estate = XOR(estate, tmp);
            AES128_encrypt(estate, estate);
            i += STATE_LEN;
        }
        
        /* process the last (partial) ad block */
        load_partial_block(&tmp, ad+i, adlen - i, ONE_ZERO_PADDING);
        estate = XOR(estate, tmp);
        AES128_encrypt(estate, estate);
    }
    
    
    /* process the nonce */
    load_partial_block(&tmp, npub, CRYPTO_NPUBBYTES, PARAM_OZP);
    estate = XOR(estate, tmp);
    
    if((adlen % STATE_LEN) || (adlen == 0)){
        /* apply f2 */
        f2(estate);
    }
    else{
        /* apply f1 */
        f1(estate);
    }
    
    /* process ciphertext */
    
    tstate = estate;
    AES128_encrypt(estate, estate);
    
    if(*mlen){
        /* apply g2 to tag state */
        g2(tstate);
    }
    else{
        /* apply g1 to tag state */
        g1(tstate);
    }
    AES128_encrypt(tstate, tstate);
    
    
    
    i = 0;
    /* process all the message except for the last message/ciphertext block */
    while((i + STATE_LEN) < (*mlen)){
        tmp = LOAD(c+i);
        estate = XOR(estate, tmp);
        STORE(m+i, estate);
        tstate = XOR(tmp, tstate);
        AES128_encrypt(tstate, tstate);
        fix1(tmp);
        print_state("after applying fix1\n", estate);
        AES128_encrypt(tmp, estate);
        i += STATE_LEN;
    }
    
    /* process the last block of the message/ciphetext */
    lastblocklen = (*mlen) - i;
    
    if(lastblocklen > 0){
        load_partial_block(&tmp, c+i, lastblocklen, ZERO_APPEND);
        estate = XOR(estate, tmp);
        print_state("after xoring last partial message block\n", estate);
        store_partial_block(m+i, estate, lastblocklen);
        unsigned char shift_bytes = (STATE_LEN - (unsigned char)lastblocklen);
        tmp = AND(SHR(_mm_set1_epi8(0xff), shift_bytes), tmp);
        tstate = XOR(tstate, tmp);
        /* add the one zero padding */
        tstate = XOR(tstate, SHL(_mm_set_epi8(0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0x80), lastblocklen));
        
        if((*mlen) % STATE_LEN){
            /* apply f2 */
            f2(tstate);
        }
        else{
            /* apply f1 */
            f1(tstate);
        }
        AES128_encrypt(tstate, tstate);
    }
    
    /* compare tag and output message */
    STORE(ltag, tstate);
    for(j = 0; j < CRYPTO_ABYTES; j++){
        if(ltag[j] != c[clen - CRYPTO_ABYTES + j])
            return RETURN_TAG_NO_MATCH;
    }
    return RETURN_SUCCESS;

}
예제 #30
0
void gainQuantization(bcg729EncoderChannelContextStruct *encoderChannelContext, word16_t targetSignal[], word16_t filteredAdaptativeCodebookVector[], word16_t convolvedFixedCodebookVector[], word16_t fixedCodebookVector[], word64_t xy64, word64_t yy64,
                      word16_t *quantizedAdaptativeCodebookGain, word16_t *quantizedFixedCodebookGain, uint16_t *gainCodebookStage1, uint16_t *gainCodebookStage2)
{
    int i,j;
    word64_t xz64=0, yz64=0, zz64=0;
    word32_t xy;
    word32_t yy;
    word32_t xz;
    word32_t yz;
    word32_t zz;
    uint16_t minNormalization = 31;
    uint16_t currentNormalization;
    word32_t bestAdaptativeCodebookGain, bestFixedCodebookGain;
    word64_t denominator;
    word16_t predictedFixedCodebookGain;
    uint16_t indexBaseGa=0;
    uint16_t indexBaseGb=0;
    uint16_t indexGa=0, indexGb=0;
    word64_t distanceMin = MAXINT64;

    /*** compute spec 3.9 eq63 terms first on 64 bits and then scale them if needed to fit on 32 ***/
    /* Xy64 and Yy64 already computed during adaptativeCodebookGain computation */
    for (i=0; i<L_SUBFRAME; i++) {
        xz64 = MAC64(xz64, targetSignal[i], convolvedFixedCodebookVector[i]); /* in Q12 */
        yz64 = MAC64(yz64, filteredAdaptativeCodebookVector[i], convolvedFixedCodebookVector[i]); /* in Q12 */
        zz64 = MAC64(zz64, convolvedFixedCodebookVector[i], convolvedFixedCodebookVector[i]); /* in Q24 */
    }

    /* now scale this terms to have them fit on 32 bits - terms Xy, Xz and Yz shall fit on 31 bits because used in eq63 with a factor 2 */
    xy = SHR64(((xy64<0)?-xy64:xy64),30);
    yy = SHR64(yy64,31);
    xz = SHR64(((xz64<0)?-xz64:xz64),30);
    yz = SHR64(((yz64<0)?-yz64:yz64),30);
    zz = SHR64(zz64,31);

    currentNormalization = countLeadingZeros(xy);
    if (currentNormalization<minNormalization) {
        minNormalization = currentNormalization;
    }
    currentNormalization = countLeadingZeros(xz);
    if (currentNormalization<minNormalization) {
        minNormalization = currentNormalization;
    }
    currentNormalization = countLeadingZeros(yz);
    if (currentNormalization<minNormalization) {
        minNormalization = currentNormalization;
    }
    currentNormalization = countLeadingZeros(yy);
    if (currentNormalization<minNormalization) {
        minNormalization = currentNormalization;
    }
    currentNormalization = countLeadingZeros(zz);
    if (currentNormalization<minNormalization) {
        minNormalization = currentNormalization;
    }

    if (minNormalization<31) { /* we shall normalise, values are over 32 bits */
        minNormalization = 31 - minNormalization;
        xy = (word32_t)SHR64(xy64, minNormalization);
        yy = (word32_t)SHR64(yy64, minNormalization);
        xz = (word32_t)SHR64(xz64, minNormalization);
        yz = (word32_t)SHR64(yz64, minNormalization);
        zz = (word32_t)SHR64(zz64, minNormalization);

    } else { /* no need to normalise, values already fit on 32 bits, just cast them */
        xy = (word32_t)xy64; /* in Q0 */
        yy = (word32_t)yy64; /* in Q0 */
        xz = (word32_t)xz64; /* in Q12 */
        yz = (word32_t)yz64; /* in Q12 */
        zz = (word32_t)zz64; /* in Q24 */
    }

    /*** compute the best gains minimizinq eq63 ***/
    /* Note this bestgain computation is not at all described in the spec, got it from ITU code */
    /* bestAdaptativeCodebookGain = (zz.xy - xz.yz) / (yy*zz) - yz^2) */
    /* bestfixedCodebookGain = (yy*xz - xy*yz) / (yy*zz) - yz^2) */
    /* best gain are computed in Q9 and Q2 and fits on 16 bits */
    denominator = MAC64(MULT32_32(yy, zz), -yz, yz); /* (yy*zz) - yz^2) in Q24 (always >= 0)*/
    /* avoid division by zero */
    if (denominator==0) { /* consider it to be one */
        bestAdaptativeCodebookGain = (word32_t)(SHR64(MAC64(MULT32_32(zz, xy), -xz, yz), 15)); /* MAC in Q24 -> Q9 */
        bestFixedCodebookGain = (word32_t)(SHR64(MAC64(MULT32_32(yy, xz), -xy, yz), 10)); /* MAC in Q12 -> Q2 */
    } else {
        /* bestAdaptativeCodebookGain in Q9 */
        uint16_t numeratorNorm;
        word64_t numerator = MAC64(MULT32_32(zz, xy), -xz, yz); /* in Q24 */
        /* check if we can shift it by 9 without overflow as the bestAdaptativeCodebookGain in computed in Q9 */
        word32_t numeratorH = (word32_t)(SHR64(numerator,32));
        numeratorH = (numeratorH>0)?numeratorH:-numeratorH;
        numeratorNorm = countLeadingZeros(numeratorH);
        if (numeratorNorm >= 9) {
            bestAdaptativeCodebookGain = (word32_t)(DIV64(SHL64(numerator,9), denominator)); /* bestAdaptativeCodebookGain in Q9 */
        } else {
            word64_t shiftedDenominator = SHR64(denominator, 9-numeratorNorm);
            if (shiftedDenominator>0) { /* can't shift left by 9 the numerator, can we shift right by 9-numeratorNorm the denominator without hiting 0 */
                bestAdaptativeCodebookGain = (word32_t)(DIV64(SHL64(numerator, numeratorNorm),shiftedDenominator)); /* bestAdaptativeCodebookGain in Q9 */
            } else {
                bestAdaptativeCodebookGain = SHL((word32_t)(DIV64(SHL64(numerator, numeratorNorm), denominator)), 9-numeratorNorm); /* shift left the division result to reach Q9 */
            }
        }

        numerator = MAC64(MULT32_32(yy, xz), -xy, yz); /* in Q12 */
        /* check if we can shift it by 14(it's in Q12 and denominator in Q24) without overflow as the bestFixedCodebookGain in computed in Q2 */
        numeratorH = (word32_t)(SHR64(numerator,32));
        numeratorH = (numeratorH>0)?numeratorH:-numeratorH;
        numeratorNorm = countLeadingZeros(numeratorH);

        if (numeratorNorm >= 14) {
            bestFixedCodebookGain = (word32_t)(DIV64(SHL64(numerator,14), denominator));
        } else {
            word64_t shiftedDenominator = SHR64(denominator, 14-numeratorNorm); /* bestFixedCodebookGain in Q14 */
            if (shiftedDenominator>0) { /* can't shift left by 9 the numerator, can we shift right by 9-numeratorNorm the denominator without hiting 0 */
                bestFixedCodebookGain = (word32_t)(DIV64(SHL64(numerator, numeratorNorm),shiftedDenominator)); /* bestFixedCodebookGain in Q14 */
            } else {
                bestFixedCodebookGain = SHL((word32_t)(DIV64(SHL64(numerator, numeratorNorm), denominator)), 14-numeratorNorm); /* shift left the division result to reach Q14 */
            }
        }
    }

    /*** Compute the predicted gain as in spec 3.9.1 eq71 in Q6 ***/
    predictedFixedCodebookGain = (word16_t)(SHR32(MACodeGainPrediction(encoderChannelContext->previousGainPredictionError, fixedCodebookVector), 12)); /* in Q16 -> Q4 range [3,1830] */

    /***  preselection spec 3.9.2 ***/
    /* Note: spec just says to select the best 50% of each vector, ITU code go through magical constant computation to select the begining of a continuous range */
    /* much more simple here : vector are ordened in growing order so just select 2 (4 for Gb) indexes before the first value to be superior to the best gain previously computed */
    while (indexBaseGa<6 && bestFixedCodebookGain>(MULT16_16_Q14(GACodebook[indexBaseGa][1],predictedFixedCodebookGain))) { /* bestFixedCodebookGain> in Q2, GACodebook in Q12 *predictedFixedCodebookGain in Q4 -> Q16-14 */
        indexBaseGa++;
    }
    if (indexBaseGa>0) indexBaseGa--;
    if (indexBaseGa>0) indexBaseGa--;
    while (indexBaseGb<12 && bestAdaptativeCodebookGain>(SHR(GBCodebook[indexBaseGb][0],5))) {
        indexBaseGb++;
    }
    if (indexBaseGb>0) indexBaseGb--;
    if (indexBaseGb>0) indexBaseGb--;
    if (indexBaseGb>0) indexBaseGb--;
    if (indexBaseGb>0) indexBaseGb--;

    /*** test all possibilities of Ga and Gb indexes and select the best one ***/
    xy = -SHL(xy,1); /* xy term is always used with a -2 factor */
    xz = -SHL(xz,1); /* xz term is always used with a -2 factor */
    yz = SHL(yz,1); /* yz term is always used with a 2 factor */

    for (i=0; i<4; i++) {
        for (j=0; j<8; j++) {
            /* compute gamma->gc and gp */
            word16_t gp =  ADD16(GACodebook[i+indexBaseGa][0], GBCodebook[j+indexBaseGb][0]); /* result in Q14 */
            word16_t gamma =  ADD16(GACodebook[i+indexBaseGa][1], GBCodebook[j+indexBaseGb][1]); /* result in Q3.12 (range [0.185, 5.05])*/
            word32_t gc = MULT16_16_Q14(gamma, predictedFixedCodebookGain); /* gamma in Q12, predictedFixedCodebookGain in Q4 -> Q16 -14 -> Q2 */

            /* compute E as in eq63 (first term excluded) */
            word64_t acc = MULT32_32(MULT16_16(gp, gp), yy); /* acc = gp^2*yy  gp in Q14, yy in Q0 -> acc in Q28 */
            acc = MAC64(acc, MULT16_16(gc, gc), zz); /* gc in Q2, zz in Q24 -> acc in Q28, note gc is on 32 bits but in a range making gc^2 fitting on 32 bits */
            acc = MAC64(acc, SHL32((word32_t)gp, 14), xy); /* gp in Q14 shifted to Q28, xy in Q0 -> acc in Q28 */
            acc = MAC64(acc, SHL32(gc, 14), xz); /* gc in Q2 shifted to Q16, xz in Q12 -> acc in Q28 */
            acc = MAC64(acc, MULT16_16(gp,gc), yz); /* gp in Q14, gc in Q2 yz in Q12 -> acc in Q28 */

            if (acc<distanceMin) {
                distanceMin = acc;
                indexGa = i+indexBaseGa;
                indexGb = j+indexBaseGb;
                *quantizedAdaptativeCodebookGain = gp;
                *quantizedFixedCodebookGain = (word16_t)SHR(gc, 1);
            }
        }
    }

    /* update the previous gain prediction error */
    computeGainPredictionError(ADD16(GACodebook[indexGa][1], GBCodebook[indexGb][1]), encoderChannelContext->previousGainPredictionError);

    /* mapping of indexes */
    *gainCodebookStage1 = indexMappingGA[indexGa];
    *gainCodebookStage2 = indexMappingGB[indexGb];

    return;
}