static void convolve(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const InterpKernel *const x_filters, int x0_q4, int x_step_q4, const InterpKernel *const y_filters, int y0_q4, int y_step_q4, int w, int h) { // Note: Fixed size intermediate buffer, temp, places limits on parameters. // 2d filtering proceeds in 2 steps: // (1) Interpolate horizontally into an intermediate buffer, temp. // (2) Interpolate temp vertically to derive the sub-pixel result. // Deriving the maximum number of rows in the temp buffer (135): // --Smallest scaling factor is x1/2 ==> y_step_q4 = 32 (Normative). // --Largest block size is 64x64 pixels. // --64 rows in the downscaled frame span a distance of (64 - 1) * 32 in the // original frame (in 1/16th pixel units). // --Must round-up because block may be located at sub-pixel position. // --Require an additional SUBPEL_TAPS rows for the 8-tap filter tails. // --((64 - 1) * 32 + 15) >> 4 + 8 = 135. uint8_t temp[135 * 64]; int intermediate_height = (((h - 1) * y_step_q4 + y0_q4) >> SUBPEL_BITS) + SUBPEL_TAPS; assert(w <= 64); assert(h <= 64); assert(y_step_q4 <= 32); assert(x_step_q4 <= 32); convolve_horiz(src - src_stride * (SUBPEL_TAPS / 2 - 1), src_stride, temp, 64, x_filters, x0_q4, x_step_q4, w, intermediate_height); convolve_vert(temp + 64 * (SUBPEL_TAPS / 2 - 1), 64, dst, dst_stride, y_filters, y0_q4, y_step_q4, w, h); }
/* compute local smoothness weight as a sigmoid on image gradient*/ image_t* compute_dpsis_weight(color_image_t *im, float coef, const convolution_t *deriv) { image_t* lum = image_new(im->width, im->height), *lum_x = image_new(im->width, im->height), *lum_y = image_new(im->width, im->height); int i; // ocompute luminance v4sf *im1p = (v4sf*) im->c1, *im2p = (v4sf*) im->c2, *im3p = (v4sf*) im->c3, *lump = (v4sf*) lum->data; for( i=0 ; i<im->height*im->stride/4 ; i++){ *lump = (0.299f*(*im1p) + 0.587f*(*im2p) + 0.114f*(*im3p))/255.0f; lump+=1; im1p+=1; im2p+=1; im3p+=1; } // compute derivatives with five-point tencil convolve_horiz(lum_x, lum, deriv); convolve_vert(lum_y, lum, deriv); // compute lum norm lump = (v4sf*) lum->data; v4sf *lumxp = (v4sf*) lum_x->data, *lumyp = (v4sf*) lum_y->data; for( i=0 ; i<lum->height*lum->stride/4 ; i++){ *lump = -coef*__builtin_ia32_sqrtps( (*lumxp)*(*lumxp) + (*lumyp)*(*lumyp)); lump[0][0] = 0.5f*expf(lump[0][0]); lump[0][1] = 0.5f*expf(lump[0][1]); lump[0][2] = 0.5f*expf(lump[0][2]); lump[0][3] = 0.5f*expf(lump[0][3]); lump+=1; lumxp+=1; lumyp+=1; } image_delete(lum_x); image_delete(lum_y); return lum; }
void vp9_convolve8_vert_c(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h) { const subpel_kernel *const filters_y = get_filter_base(filter_y); const int y0_q4 = get_filter_offset(filter_y, filters_y); convolve_vert(src, src_stride, dst, dst_stride, filters_y, y0_q4, y_step_q4, w, h); }
/* perform horizontal and/or vertical convolution to a color image */ void color_image_convolve_hv(color_image_t *dst, const color_image_t *src, const convolution_t *horiz_conv, const convolution_t *vert_conv){ const int width = src->width, height = src->height, stride = src->stride; // separate channels of images image_t src_red = {width,height,stride,src->c1}, src_green = {width,height,stride,src->c2}, src_blue = {width,height,stride,src->c3}, dst_red = {width,height,stride,dst->c1}, dst_green = {width,height,stride,dst->c2}, dst_blue = {width,height,stride,dst->c3}; // horizontal and vertical if(horiz_conv != NULL && vert_conv != NULL){ float *tmp_data = malloc(sizeof(float)*stride*height); if(tmp_data == NULL){ fprintf(stderr,"error color_image_convolve_hv(): not enough memory\n"); exit(1); } image_t tmp = {width,height,stride,tmp_data}; // perform convolution for each channel convolve_horiz(&tmp,&src_red,horiz_conv); convolve_vert(&dst_red,&tmp,vert_conv); convolve_horiz(&tmp,&src_green,horiz_conv); convolve_vert(&dst_green,&tmp,vert_conv); convolve_horiz(&tmp,&src_blue,horiz_conv); convolve_vert(&dst_blue,&tmp,vert_conv); free(tmp_data); }else if(horiz_conv != NULL && vert_conv == NULL){ // only horizontal convolve_horiz(&dst_red,&src_red,horiz_conv); convolve_horiz(&dst_green,&src_green,horiz_conv); convolve_horiz(&dst_blue,&src_blue,horiz_conv); }else if(vert_conv != NULL && horiz_conv == NULL){ // only vertical convolve_vert(&dst_red,&src_red,vert_conv); convolve_vert(&dst_green,&src_green,vert_conv); convolve_vert(&dst_blue,&src_blue,vert_conv); } }
static void convolve(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const subpel_kernel *const x_filters, int x0_q4, int x_step_q4, const subpel_kernel *const y_filters, int y0_q4, int y_step_q4, int w, int h) { // Fixed size intermediate buffer places limits on parameters. // Maximum intermediate_height is 324, for y_step_q4 == 80, // h == 64, taps == 8. // y_step_q4 of 80 allows for 1/10 scale for 5 layer svc uint8_t temp[64 * 324]; int intermediate_height = (((h - 1) * y_step_q4 + 15) >> 4) + SUBPEL_TAPS; if (intermediate_height < h) intermediate_height = h; convolve_horiz(src - src_stride * (SUBPEL_TAPS / 2 - 1), src_stride, temp, 64, x_filters, x0_q4, x_step_q4, w, intermediate_height); convolve_vert(temp + 64 * (SUBPEL_TAPS / 2 - 1), 64, dst, dst_stride, y_filters, y0_q4, y_step_q4, w, h); }
/* compute the saliency of a given image */ image_t* saliency(const color_image_t *im, float sigma_image, float sigma_matrix ){ int width = im->width, height = im->height, filter_size; // smooth image color_image_t *sim = color_image_new(width, height); float *presmooth_filter = gaussian_filter(sigma_image, &filter_size); convolution_t *presmoothing = convolution_new(filter_size, presmooth_filter, 1); color_image_convolve_hv(sim, im, presmoothing, presmoothing); convolution_delete(presmoothing); free(presmooth_filter); // compute derivatives float deriv_filter[2] = {0.0f, -0.5f}; convolution_t *deriv = convolution_new(1, deriv_filter, 0); color_image_t *imx = color_image_new(width, height), *imy = color_image_new(width, height); color_image_convolve_hv(imx, sim, deriv, NULL); color_image_convolve_hv(imy, sim, NULL, deriv); convolution_delete(deriv); // compute autocorrelation matrix image_t *imxx = image_new(width, height), *imxy = image_new(width, height), *imyy = image_new(width, height); v4sf *imx1p = (v4sf*) imx->c1, *imx2p = (v4sf*) imx->c2, *imx3p = (v4sf*) imx->c3, *imy1p = (v4sf*) imy->c1, *imy2p = (v4sf*) imy->c2, *imy3p = (v4sf*) imy->c3, *imxxp = (v4sf*) imxx->data, *imxyp = (v4sf*) imxy->data, *imyyp = (v4sf*) imyy->data; int i; for(i = 0 ; i<height*im->stride/4 ; i++){ *imxxp = (*imx1p)*(*imx1p) + (*imx2p)*(*imx2p) + (*imx3p)*(*imx3p); *imxyp = (*imx1p)*(*imy1p) + (*imx2p)*(*imy2p) + (*imx3p)*(*imy3p); *imyyp = (*imy1p)*(*imy1p) + (*imy2p)*(*imy2p) + (*imy3p)*(*imy3p); imxxp+=1; imxyp+=1; imyyp+=1; imx1p+=1; imx2p+=1; imx3p+=1; imy1p+=1; imy2p+=1; imy3p+=1; } // integrate autocorrelation matrix float *smooth_filter = gaussian_filter(sigma_matrix, &filter_size); convolution_t *smoothing = convolution_new(filter_size, smooth_filter, 1); image_t *tmp = image_new(width, height); convolve_horiz(tmp, imxx, smoothing); convolve_vert(imxx, tmp, smoothing); convolve_horiz(tmp, imxy, smoothing); convolve_vert(imxy, tmp, smoothing); convolve_horiz(tmp, imyy, smoothing); convolve_vert(imyy, tmp, smoothing); convolution_delete(smoothing); free(smooth_filter); // compute smallest eigenvalue v4sf vzeros = {0.0f,0.0f,0.0f,0.0f}; v4sf vhalf = {0.5f,0.5f,0.5f,0.5f}; v4sf *tmpp = (v4sf*) tmp->data; imxxp = (v4sf*) imxx->data; imxyp = (v4sf*) imxy->data; imyyp = (v4sf*) imyy->data; for(i = 0 ; i<height*im->stride/4 ; i++){ (*tmpp) = vhalf*( (*imxxp)+(*imyyp) ) ; (*tmpp) = __builtin_ia32_sqrtps(__builtin_ia32_maxps(vzeros, (*tmpp) - __builtin_ia32_sqrtps(__builtin_ia32_maxps(vzeros, (*tmpp)*(*tmpp) + (*imxyp)*(*imxyp) - (*imxxp)*(*imyyp) ) ))); tmpp+=1; imxyp+=1; imxxp+=1; imyyp+=1; } image_delete(imxx); image_delete(imxy); image_delete(imyy); color_image_delete(imx); color_image_delete(imy); color_image_delete(sim); return tmp; }
/* It is represented as two images, the first one for horizontal smoothness, the second for vertical in dst_horiz, the pixel i,j represents the smoothness weight between pixel i,j and i,j+1 in dst_vert, the pixel i,j represents the smoothness weight between pixel i,j and i+1,j */ void compute_smoothness(image_t *dst_horiz, image_t *dst_vert, const image_t *uu, const image_t *vv, const image_t *dpsis_weight, const convolution_t *deriv_flow, const float half_alpha) { int w = uu->width, h = uu->height, s = uu->stride, i, j, offset; image_t *ux1 = image_new(w,h), *uy1 = image_new(w,h), *vx1 = image_new(w,h), *vy1 = image_new(w,h), *ux2 = image_new(w,h), *uy2 = image_new(w,h), *vx2 = image_new(w,h), *vy2 = image_new(w,h); // compute ux1, vx1, filter [-1 1] for( j=0 ; j<h ; j++) { offset = j*s; for( i=0 ; i<w-1 ; i++, offset++) { ux1->data[offset] = uu->data[offset+1] - uu->data[offset]; vx1->data[offset] = vv->data[offset+1] - vv->data[offset]; } } // compute uy1, vy1, filter [-1;1] for( j=0 ; j<h-1 ; j++) { offset = j*s; for( i=0 ; i<w ; i++, offset++) { uy1->data[offset] = uu->data[offset+s] - uu->data[offset]; vy1->data[offset] = vv->data[offset+s] - vv->data[offset]; } } // compute ux2, uy2, vx2, vy2, filter [-0.5 0 0.5] convolve_horiz(ux2,uu,deriv_flow); convolve_horiz(vx2,vv,deriv_flow); convolve_vert(uy2,uu,deriv_flow); convolve_vert(vy2,vv,deriv_flow); // compute final value, horiz for( j=0 ; j<h ; j++) { offset = j*s; for( i=0 ; i<w-1 ; i++, offset++) { float tmp = 0.5f*(uy2->data[offset]+uy2->data[offset+1]); float uxsq = ux1->data[offset]*ux1->data[offset] + tmp*tmp; tmp = 0.5f*(vy2->data[offset]+vy2->data[offset+1]); float vxsq = vx1->data[offset]*vx1->data[offset] + tmp*tmp; tmp = uxsq + vxsq; dst_horiz->data[offset] = (dpsis_weight->data[offset]+dpsis_weight->data[offset+1])*half_alpha / sqrt( tmp + epsilon_smooth ) ; } memset( &dst_horiz->data[j*s+w-1], 0, sizeof(float)*(s-w+1)); } // compute final value, vert for( j=0 ; j<h-1 ; j++) { offset = j*s; for( i=0 ; i<w ; i++, offset++) { float tmp = 0.5f*(ux2->data[offset]+ux2->data[offset+s]); float uysq = uy1->data[offset]*uy1->data[offset] + tmp*tmp; tmp = 0.5f*(vx2->data[offset]+vx2->data[offset+s]); float vysq = vy1->data[offset]*vy1->data[offset] + tmp*tmp; tmp = uysq + vysq; dst_vert->data[offset] = (dpsis_weight->data[offset]+dpsis_weight->data[offset+s])*half_alpha / sqrt( tmp + epsilon_smooth ) ; /*if( dpsis_weight->data[offset]<dpsis_weight->data[offset+s]) dst_vert->data[offset] = dpsis_weight->data[offset]*half_alpha / sqrt( tmp + epsilon_smooth ) ; else dst_vert->data[offset] = dpsis_weight->data[offset+s]*half_alpha / sqrt( tmp + epsilon_smooth ) ;*/ } } memset( &dst_vert->data[(h-1)*s], 0, sizeof(float)*s); image_delete(ux1); image_delete(uy1); image_delete(vx1); image_delete(vy1); image_delete(ux2); image_delete(uy2); image_delete(vx2); image_delete(vy2); }