void fDCT8x8_32f(const float* s, float* d, float* temp) { //for (int j = 0; j < 8; j ++) //{ // for (int i = 0; i < 8; i ++) // { // temp[8*i+j] =s[8*j+i]; // } //} transpose8x8(s, temp); fDCT2D8x4_32f(temp, d); fDCT2D8x4_32f(temp + 4, d + 4); //for (int j = 0; j < 8; j ++) //{ // for (int i = 0; i < 8; i ++) // { // temp[8*i+j] =d[8*j+i]; // } //} transpose8x8(d, temp); fDCT2D8x4_32f(temp, d); fDCT2D8x4_32f(temp + 4, d + 4); }
void fDCT8x8_32f_and_threshold(const float* s, float* d, float threshold, float* temp) { transpose8x8(s, temp); /*for (int j = 0; j < 8; j ++) { for (int i = 0; i < 8; i ++) { temp[8*i+j] =s[8*j+i]; } }*/ fDCT2D8x4_32f(temp, d); fDCT2D8x4_32f(temp + 4, d + 4); transpose8x8(d, temp); /*for (int j = 0; j < 8; j ++) { for (int i = 0; i < 8; i ++) { temp[8*i+j] =d[8*j+i]; } }*/ fDCT2D8x4_and_threshold_32f(temp, d, threshold); fDCT2D8x4_and_threshold_32f(temp + 4, d + 4, threshold); }
void vpx_hadamard_8x8_neon(const int16_t *src_diff, int src_stride, int16_t *coeff) { int16x8_t a0 = vld1q_s16(src_diff); int16x8_t a1 = vld1q_s16(src_diff + src_stride); int16x8_t a2 = vld1q_s16(src_diff + 2 * src_stride); int16x8_t a3 = vld1q_s16(src_diff + 3 * src_stride); int16x8_t a4 = vld1q_s16(src_diff + 4 * src_stride); int16x8_t a5 = vld1q_s16(src_diff + 5 * src_stride); int16x8_t a6 = vld1q_s16(src_diff + 6 * src_stride); int16x8_t a7 = vld1q_s16(src_diff + 7 * src_stride); hadamard8x8_one_pass(&a0, &a1, &a2, &a3, &a4, &a5, &a6, &a7); transpose8x8(&a0, &a1, &a2, &a3, &a4, &a5, &a6, &a7); hadamard8x8_one_pass(&a0, &a1, &a2, &a3, &a4, &a5, &a6, &a7); // Skip the second transpose because it is not required. vst1q_s16(coeff + 0, a0); vst1q_s16(coeff + 8, a1); vst1q_s16(coeff + 16, a2); vst1q_s16(coeff + 24, a3); vst1q_s16(coeff + 32, a4); vst1q_s16(coeff + 40, a5); vst1q_s16(coeff + 48, a6); vst1q_s16(coeff + 56, a7); }
void fDCT8x8_32f_and_threshold_and_iDCT8x8_32f(float* s, float threshold) { fDCT2D8x4_32f(s, s); fDCT2D8x4_32f(s + 4, s + 4); transpose8x8(s); #ifdef _KEEP_00_COEF_ fDCT2D8x4_and_threshold_keep00_32f(s, s, threshold); #else fDCT2D8x4_and_threshold_32f(s, s,threshold); #endif fDCT2D8x4_and_threshold_32f(s + 4, s + 4, threshold); //ommiting transform //transpose8x8(s); //transpose8x8(s); iDCT2D8x4_32f(s, s); iDCT2D8x4_32f(s + 4, s + 4); transpose8x8(s); iDCT2D8x4_32f(s, s); iDCT2D8x4_32f(s + 4, s + 4); return; }
void iDCT8x8_32f(const float* s, float* d, float* temp) { transpose8x8((float*)s, temp); //for (int j = 0; j < 8; j ++) //{ // for (int i = 0; i < 8; i ++) // { // temp[8*i+j] =s[8*j+i]; // } //} iDCT2D8x4_32f(temp, d); iDCT2D8x4_32f(temp + 4, d + 4); transpose8x8(d, temp); /*for (int j = 0; j < 8; j ++) { for (int i = 0; i < 8; i ++) { temp[8*i+j] =d[8*j+i]; } }*/ iDCT2D8x4_32f(temp, d); iDCT2D8x4_32f(temp + 4, d + 4); }