void get_inter_prediction_luma(uint8_t *pblock, uint8_t *ref, int width, int height, int stride, int pstride, mv_t *mv, int sign, int bipred) { int i,j; int m,i_off,j_off; mv_t mvtemp; mvtemp.x = sign ? -mv->x : mv->x; mvtemp.y = sign ? -mv->y : mv->y; int ver_frac = (mvtemp.y)&3; int hor_frac = (mvtemp.x)&3; int ver_int = (mvtemp.y)>>2; int hor_int = (mvtemp.x)>>2; int32_t tmp[MAX_BLOCK_SIZE+16][MAX_BLOCK_SIZE + 16]; //7-bit filter exceeds 16 bit temporary storage /* Integer position */ if (ver_frac==0 && hor_frac==0){ j_off = 0 + hor_int; for(i=0;i<height;i++){ i_off = i + ver_int; memcpy(pblock + i*pstride,ref + i_off*stride+j_off, width*sizeof(uint8_t)); } return; } if (use_simd) get_inter_prediction_luma_simd(width, height, hor_frac, ver_frac, pblock, pstride, ref + ver_int*stride + hor_int, stride, bipred); /* Special lowpass filter at center position */ else if (ver_frac == 2 && hor_frac == 2) { for(i=0;i<height;i++){ for (j=0;j<width;j++){ int sum = 0; i_off = i + ver_int; j_off = j + hor_int; sum += 0*ref[(i_off-1)*stride+j_off-1]+1*ref[(i_off-1)*stride+j_off+0]+1*ref[(i_off-1)*stride+j_off+1]+0*ref[(i_off-1)*stride+j_off+2]; sum += 1*ref[(i_off+0)*stride+j_off-1]+2*ref[(i_off+0)*stride+j_off+0]+2*ref[(i_off+0)*stride+j_off+1]+1*ref[(i_off+0)*stride+j_off+2]; sum += 1*ref[(i_off+1)*stride+j_off-1]+2*ref[(i_off+1)*stride+j_off+0]+2*ref[(i_off+1)*stride+j_off+1]+1*ref[(i_off+1)*stride+j_off+2]; sum += 0*ref[(i_off+2)*stride+j_off-1]+1*ref[(i_off+2)*stride+j_off+0]+1*ref[(i_off+2)*stride+j_off+1]+0*ref[(i_off+2)*stride+j_off+2]; pblock[i*pstride+j] = clip255((sum + 8)>>4); } } } else {
void get_inter_prediction_luma(uint8_t *pblock, uint8_t *ref, int width, int height, int stride, int pstride, mv_t *mv, int sign) { int i,j; int m,i_off,j_off; mv_t mvtemp; mvtemp.x = sign ? -mv->x : mv->x; mvtemp.y = sign ? -mv->y : mv->y; int ver_frac = (mvtemp.y)&3; int hor_frac = (mvtemp.x)&3; int ver_int = (mvtemp.y)>>2; int hor_int = (mvtemp.x)>>2; int32_t tmp[80][80]; //7-bit filter exceeds 16 bit temporary storage /* Integer position */ if (ver_frac==0 && hor_frac==0){ j_off = 0 + hor_int; for(i=0;i<height;i++){ i_off = i + ver_int; memcpy(pblock + i*pstride,ref + i_off*stride+j_off, width*sizeof(uint8_t)); } return; } #if HEVC_INTERPOLATION /* Vertical filtering */ for(i=-OFFYM1;i<width+OFFY;i++){ for (j=0;j<height;j++){ int sum = 0; i_off = i + hor_int; j_off = j + ver_int; for (m=0;m<NTAPY;m++) sum += filter_coeffsY[ver_frac][m] * ref[(j_off + m - OFFYM1) * stride + i_off]; tmp[j][i+OFFYM1] = sum; } } /* Horizontal filtering */ for(i=0;i<width;i++){ for (j=0;j<height;j++){ int sum = 0; for (m=0;m<NTAPY;m++) sum += filter_coeffsY[hor_frac][m] * tmp[j][i+m]; pblock[j*pstride+i] = clip255((sum + 2048)>>12); } } return; #endif if (use_simd) { get_inter_prediction_luma_simd(width, height, hor_frac, ver_frac, pblock, pstride, ref + ver_int*stride + hor_int, stride); } else { /* Special lowpass filter at center position */ if (ver_frac == 2 && hor_frac == 2) { for(i=0;i<height;i++){ for (j=0;j<width;j++){ int sum = 0; i_off = i + ver_int; j_off = j + hor_int; sum += 0*ref[(i_off-1)*stride+j_off-1]+1*ref[(i_off-1)*stride+j_off+0]+1*ref[(i_off-1)*stride+j_off+1]+0*ref[(i_off-1)*stride+j_off+2]; sum += 1*ref[(i_off+0)*stride+j_off-1]+2*ref[(i_off+0)*stride+j_off+0]+2*ref[(i_off+0)*stride+j_off+1]+1*ref[(i_off+0)*stride+j_off+2]; sum += 1*ref[(i_off+1)*stride+j_off-1]+2*ref[(i_off+1)*stride+j_off+0]+2*ref[(i_off+1)*stride+j_off+1]+1*ref[(i_off+1)*stride+j_off+2]; sum += 0*ref[(i_off+2)*stride+j_off-1]+1*ref[(i_off+2)*stride+j_off+0]+1*ref[(i_off+2)*stride+j_off+1]+0*ref[(i_off+2)*stride+j_off+2]; pblock[i*pstride+j] = clip255((sum + 8)>>4); } } } else { /* Vertical filtering */ for(i=-OFFYM1;i<width+OFFY;i++){ for (j=0;j<height;j++){ int sum = 0; i_off = i + hor_int; j_off = j + ver_int; for (m=0;m<NTAPY;m++) sum += filter_coeffsY[ver_frac][m] * ref[(j_off + m - OFFYM1) * stride + i_off]; //7-bit version tmp[j][i+OFFYM1] = sum; } } /* Horizontal filtering */ for(i=0;i<width;i++){ for (j=0;j<height;j++){ int sum = 0; for (m=0;m<NTAPY;m++) sum += filter_coeffsY[hor_frac][m] * tmp[j][i+m]; //7-bit version pblock[j*pstride+i] = clip255((sum + 8192)>>14); //7-bit version } } } }