/** VBX Motion Estimation, using vbx_3d ops. * vbw_mtx_motest_3D_byte_setup should be run prior to running this function. * Using bytes as input data. block_height must be an even number. * * @param[out] result * @param[in] x * @param[in] y * @param[in] m * @returns negative on error condition. See vbw_exit_codes.h */ int vbw_mtx_motest_3d_byte(output_type *result, input_type* x, input_type *y, vbw_motest_t *m) { int l,j; int sub_block_width = m->block_width+m->search_width; for( j = 0; j < m->block_height; j++ ) { vbx_dma_to_vector( m->v_block+j*m->block_width, x+j*m->image_width, m->block_width*sizeof(input_type) ); } for( j = 0; j < m->block_height+m->search_height; j++ ) { vbx_dma_to_vector( m->v_img+j*sub_block_width, y+j*m->image_width, sub_block_width*sizeof(input_type) ); } vbx_set_3D( m->search_width, m->block_height*sizeof(intermediate_type), sizeof(input_type), 0 ); for( l = 0; l < m->search_height; l++ ) { //Accumulate each row into a vbx of row SADs vbx_set_vl( m->block_width ); vbx_set_2D( m->block_height, sizeof(intermediate_type), sub_block_width*sizeof(input_type), m->block_width*sizeof(input_type) ); vbx_acc_3D( VVBHU, VABSDIFF, m->v_row_sad, m->v_img+l*sub_block_width, m->v_block ); //Accumulate the SADs vbx_set_vl( m->block_height/2 ); vbx_set_2D( m->search_width, sizeof(output_type), m->block_height*sizeof(intermediate_type), m->block_height*sizeof(intermediate_type) ); vbx_acc_2D( VVHWU, VADD, (vbx_uword_t*)m->v_result+l*m->search_width, m->v_row_sad, m->v_row_sad+(m->block_height/2) ); //Transfer the line to host vbx_dma_to_host( result+l*m->search_width, m->v_result+l*m->search_width, m->search_width*sizeof(output_type) ); } return VBW_SUCCESS; }
/** VBX Motion Estimation. * Similar to the scalar version but scans vertically as it makes it easier to align vectors. * vbw_mtx_motest_byte_setup should be run prior to running this function. * * @param[out] result * @param[in] x * @param[in] y * @param[in] m * @returns negative on error condition. See vbw_exit_codes.h */ int vbw_mtx_motest_byte(output_type *result, input_type *x, input_type *y, vbw_motest_t *m) { int j; int sub_block_width = m->block_width+m->search_width; for( j = 0; j < m->block_height; j++ ) { vbx_dma_to_vector( m->v_block+j*sub_block_width, x+j*m->image_width, sub_block_width*sizeof(input_type) ); } for( j = 0; j < m->block_height+m->search_height; j++ ) { vbx_dma_to_vector( m->v_img +j*sub_block_width, y+j*m->image_width, sub_block_width*sizeof(input_type) ); } // column-ize the reference block vbx_set_vl( m->block_width ); vbx_set_2D( m->block_height, m->block_width*sizeof(input_type), sub_block_width*sizeof(input_type), 0 ); vbx_2D( VVB, VMOV, (vbx_byte_t*)m->v_block, (vbx_byte_t*)m->v_block, 0 ); //Do column by column for( j=0; j < m->search_width; j++ ) { // column-ize the search image vbx_set_vl( m->block_width ); vbx_set_2D( m->block_height+m->search_height, m->block_width*sizeof(input_type), sub_block_width*sizeof(input_type), 0 ); vbx_2D( VVBU, VMOV, m->v_img_sub, m->v_img+j, 0 ); // search the image columnwise vbx_set_vl( m->block_width*m->block_height ); vbx_set_2D( m->search_height, m->search_width*sizeof(output_type), 0, m->block_width*sizeof(input_type) ); vbx_acc_2D( VVBWU, VABSDIFF, (vbx_uword_t*)m->v_result+j, m->v_block, m->v_img_sub ); } // Write back result vbx_dma_to_host( result, m->v_result, m->result_size ); return VBW_SUCCESS; }
int vector_motest(pixel *input_buffer, luma_type **last_luma, int *motest_x, int *motest_y, int start_x, int start_y, int reset, const int image_width, const int image_height, const int image_pitch) { int y, x, starty, startx; unsigned int sad, sad_min, y_min, x_min; vbx_uhalf_t *v_search_luma, *v_last_luma; vbx_uhalf_t *v_row_temp; vbx_uword_t *v_row; vbx_uword_t *v_sad; pixel color; if(*last_luma == NULL || reset){ init_vector_motest(input_buffer, last_luma, motest_x, motest_y, start_x, start_y, image_pitch); } v_search_luma = vbx_sp_malloc( MOTEST_BUFFER_SIZE * sizeof(vbx_uhalf_t) ); v_last_luma = vbx_sp_malloc( MOTEST_BLOCK_SIZE * sizeof(vbx_uhalf_t) ); v_row_temp = vbx_sp_malloc( MOTEST_BUFFER_WIDTH * sizeof(vbx_uhalf_t) ); v_row = vbx_sp_malloc( MOTEST_BUFFER_WIDTH * sizeof(vbx_uword_t) ); v_sad = vbx_sp_malloc( MOTEST_SEARCH_SIZE * sizeof(vbx_uword_t) ); if(v_sad == NULL){ printf("Not enough scratchpad for motest\n"); while(1); } startx = *motest_x-(MOTEST_SEARCH_WIDTH/2); starty = *motest_y-(MOTEST_SEARCH_HEIGHT/2); if(startx < 0){ startx = 0; } if(startx > image_width-MOTEST_BUFFER_WIDTH){ startx = image_width-MOTEST_BUFFER_WIDTH; } if(starty < 0){ starty = 0; } if(starty > image_height-MOTEST_BUFFER_HEIGHT){ starty = image_height-MOTEST_BUFFER_HEIGHT; } vector_rectangle_to_luma(input_buffer, v_search_luma, v_row_temp, v_row, startx, starty, MOTEST_BUFFER_WIDTH, MOTEST_BUFFER_HEIGHT, image_pitch); vbx_dma_to_vector(v_last_luma, *last_luma, MOTEST_BLOCK_SIZE*sizeof(vbx_uhalf_t)); //Vector compute sad here vbx_set_2D(MOTEST_BLOCK_HEIGHT, sizeof(vbx_uword_t), MOTEST_BUFFER_WIDTH*sizeof(vbx_uhalf_t), MOTEST_BLOCK_WIDTH*sizeof(vbx_uhalf_t)); for(y = 0; y < MOTEST_SEARCH_HEIGHT; y++){ for(x = 0; x < MOTEST_SEARCH_WIDTH; x++){ vbx_set_vl(MOTEST_BLOCK_WIDTH); vbx_acc_2D(VVHWU, VABSDIFF, v_row, v_search_luma+(y*MOTEST_BUFFER_WIDTH)+x, v_last_luma); vbx_set_vl(MOTEST_BLOCK_HEIGHT/2); vbx_acc(VVWU, VADD, v_sad+(y*MOTEST_SEARCH_WIDTH)+x, v_row, v_row+MOTEST_BLOCK_HEIGHT/2); } #if TOUCHSCREEN #ifdef TOUCH_INTERRUPTS_VBX if (touchscreen_get_pen(pTouch)) { vbx_sp_free(); return -1; } #endif #endif } vbx_sync(); sad_min = INT_MAX; y_min = *motest_y; x_min = *motest_x; for(y = 0; y < MOTEST_SEARCH_HEIGHT; y++){ for(x = 0; x < MOTEST_SEARCH_WIDTH; x++){ sad = v_sad[y*MOTEST_SEARCH_WIDTH+x]; if(sad < sad_min){ sad_min = sad; x_min = x+startx; y_min = y+starty; } else if(sad == sad_min) { if( (abs( x - MOTEST_SEARCH_WIDTH/2) + abs( y - MOTEST_SEARCH_HEIGHT/2)) < (abs((x_min-startx) - MOTEST_SEARCH_WIDTH/2) + abs((y_min-starty) - MOTEST_SEARCH_HEIGHT/2))) { x_min = x+startx; y_min = y+starty; } } } } color.r = 0; color.g = 255; color.b = 0; color.a = 0; scalar_draw_line(*motest_x+(MOTEST_BLOCK_WIDTH/2), *motest_y+(MOTEST_BLOCK_HEIGHT/2), x_min+(MOTEST_BLOCK_WIDTH/2), y_min+(MOTEST_BLOCK_HEIGHT/2), color, input_buffer, image_pitch); *motest_y = y_min; *motest_x = x_min; vbx_set_vl(MOTEST_BLOCK_WIDTH); for(y = 0; y < MOTEST_BLOCK_HEIGHT; y++){ vbx(VVHU, VMOV, v_last_luma+(y*MOTEST_BLOCK_WIDTH), v_search_luma+((y+y_min-starty)*MOTEST_BUFFER_WIDTH)+(x_min-startx), 0); } vbx_dma_to_host(*last_luma, v_last_luma, MOTEST_BLOCK_SIZE*sizeof(luma_type)); draw_motest(input_buffer, *motest_x, *motest_y, image_pitch); //simple hack to draw thicker draw_motest(input_buffer, *motest_x+1, *motest_y+1, image_pitch); vbx_sp_free(); return 0; }