static UINT32 opINCW(v60_state *cpustate) /* TRUSTED */ { UINT32 appw; cpustate->modadd = cpustate->PC + 1; cpustate->moddim = 2; cpustate->amlength1 = ReadAMAddress(cpustate); if (cpustate->amflag) appw = cpustate->reg[cpustate->amout]; else appw = cpustate->program->read_dword_unaligned(cpustate->amout); ADDL(appw, 1); if (cpustate->amflag) cpustate->reg[cpustate->amout] = appw; else cpustate->program->write_dword_unaligned(cpustate->amout, appw); return cpustate->amlength1 + 1; }
UINT32 opINCW(void) /* TRUSTED */ { UINT32 appw; modAdd=PC+1; modDim=2; amLength1=ReadAMAddress(); if (amFlag) appw=v60.reg[amOut]; else appw=MemRead32(amOut); ADDL(appw, 1); if (amFlag) v60.reg[amOut]=appw; else MemWrite32(amOut,appw); return amLength1+1; }
mlib_status __mlib_VideoP64Loop_S16_U8( mlib_s16 mc_block[64], const mlib_u8 *ref_block, mlib_s32 stride) { const mlib_u8 *sl; mlib_s16 *sd; mlib_u32 in0, in1, in2, in3, in4, in5, in6, in7; mlib_u32 a0, a1, a2, a3; mlib_u32 b0, b1, b2, b3; mlib_u32 c0, c1, c2, c3; mlib_u32 d0, d1; sd = mc_block; sl = ref_block; LOADL(a); sl += stride; FILTERX(a); LOADL(b); sl += stride; FILTERX(b); STORB(a); sd += 8; ADDL(a, b); LOADL(c); sl += stride; FILTERX(c); ADDLRND(b, c); STORSUM(a, b); sd += 8; LOADL(a); sl += stride; FILTERX(a); ADDL(c, a); STORSUM(b, c); sd += 8; LOADL(b); sl += stride; FILTERX(b); ADDLRND(a, b); STORSUM(c, a); sd += 8; LOADL(c); sl += stride; FILTERX(c); ADDL(b, c); STORSUM(a, b); sd += 8; LOADL(a); sl += stride; FILTERX(a); ADDLRND(c, a); STORSUM(b, c); sd += 8; LOADL(b); FILTERX(b); ADDL(a, b); STORSUM(c, a); sd += 8; STORB(b); return (MLIB_SUCCESS); }
mlib_status __mlib_VideoP64Loop_S16_U8( mlib_s16 mc_block[64], const mlib_u8 *ref_block, mlib_s32 stride) { const mlib_u8 *sl; mlib_s16 *sd; __m128i txmm0, txmm1, txmm2, txmm3, txmm4, txmm5, txmm6, txmm7; __m128i t0, t1, t2, t3, t4, t5, t6, t7; __m128i Czero, CF, C2, C4, C8; Czero = _mm_setzero_si128(); C2 = _mm_set1_epi16(2); C4 = _mm_set1_epi16(4); C8 = _mm_set1_epi16(8); CF = _mm_set_epi32(0xff0000, 0, 0, 0xff); sd = mc_block; sl = ref_block; LOADL(0); sl += stride; FILTERX(0); LOADL(1); sl += stride; FILTERX(1); STORB(0); sd += 8; ADDL(0, 1); LOADL(2); sl += stride; FILTERX(2); ADDLRND(1, 2); STORSUM(0, 1); sd += 8; LOADL(3); sl += stride; FILTERX(3); ADDL(2, 3); STORSUM(1, 2); sd += 8; LOADL(4); sl += stride; FILTERX(4); ADDLRND(3, 4); STORSUM(2, 3); sd += 8; LOADL(5); sl += stride; FILTERX(5); ADDL(4, 5); STORSUM(3, 4); sd += 8; LOADL(6); sl += stride; FILTERX(6); ADDLRND(5, 6); STORSUM(4, 5); sd += 8; LOADL(7); FILTERX(7); ADDL(6, 7); STORSUM(5, 6); sd += 8; STORB(7); return (MLIB_SUCCESS); }