vbx_void_t *vbx_sp_malloc_debug( int LINE,const char *FNAME, size_t num_bytes ) { // print pretty error messages vbx_mxp_t *this_mxp = VBX_GET_THIS_MXP(); if( !this_mxp || !this_mxp->init ) { VBX_PRINTF( "ERROR: failed to call _vbx_init().\n" ); VBX_FATAL(LINE,FNAME,-1); } // pad to scratchpad width to reduce occurrence of false hazards size_t padded = VBX_PAD_UP( num_bytes, this_mxp->scratchpad_alignment_bytes ); size_t freesp = (size_t)(this_mxp->scratchpad_end - this_mxp->sp); //VBX_SCRATCHPAD_END - (size_t)vbx_sp; // vbx_sp_getfree(); vbx_void_t *result = NULL; if( VBX_DEBUG_LEVEL && (num_bytes==0) ) { print_sp_malloc_null(); } else if( VBX_DEBUG_LEVEL && freesp < padded ) { print_sp_malloc_full( num_bytes, padded ); } else if( num_bytes > 0 && freesp >= padded ) { result = this_mxp->sp; this_mxp->sp += padded; #if VBX_DEBUG_SP_MALLOC printf("sp_malloc %d bytes padded to %d, sp=0x%08x\n", num_bytes, padded, this_mxp->sp); #endif } if( !result ) { VBX_FATAL(LINE,FNAME,-1); } return result; }
vbx_void_t *vbx_sp_malloc_nodebug( size_t num_bytes ) { if( VBX_DEBUG_LEVEL && 0 ) { // print pretty error messages return vbx_sp_malloc_debug( __LINE__, __FILE__, num_bytes ); } // do it, but do not print pretty error messages vbx_mxp_t *this_mxp = VBX_GET_THIS_MXP(); // check for valid argument values if( !this_mxp || num_bytes==0 ) return NULL; // add padding and allocate // pad to scratchpad width to reduce occurrence of false hazards size_t padded = VBX_PAD_UP( num_bytes, this_mxp->scratchpad_alignment_bytes ); vbx_void_t *old_sp = this_mxp->sp; this_mxp->sp += padded; // scratchpad full if( this_mxp->sp > this_mxp->scratchpad_end ) { this_mxp->sp = old_sp; return NULL; } // success return old_sp; }
void *vbx_shared_alloca_nodebug( size_t num_bytes, void *p ) { void *alloced_ptr; void *aligned_ptr = NULL; unsigned int padding = VBX_PADDING(); alloced_ptr = (void *)p; if( alloced_ptr ) { aligned_ptr = (void *)VBX_PAD_UP( alloced_ptr, padding ); aligned_ptr = (void *)vbx_remap_uncached_flush( aligned_ptr, num_bytes ); } return aligned_ptr; }
void *vbx_shared_malloc( size_t num_bytes ) { void *alloced_ptr; void *aligned_ptr = NULL; unsigned int padding = VBX_PADDING(); #if VBX_DEBUG_MALLOC printf("shared_malloc %d bytes\n", num_bytes); #endif alloced_ptr = (void *)vbx_uncached_malloc(num_bytes+sizeof(void*)+2*padding); if( alloced_ptr ) { aligned_ptr = (void *)VBX_PAD_UP( (alloced_ptr+sizeof(void*)), padding ); *((void **)(aligned_ptr-sizeof(void*))) = alloced_ptr; } return aligned_ptr; }
//vector version of rgb converter void vector_blend( output_pointer img_out, input_pointer img_in1, input_pointer img_in2, unsigned int num_row, unsigned int num_column, intermediate_type blending_const ) { intermediate_type *v_img1[2]; input_type *v_img2[2]; intermediate_type *v_temp; intermediate_type blending_const_bar = 256-blending_const; int j; vbx_mxp_t *this_mxp = VBX_GET_THIS_MXP(); const int VBX_SCRATCHPAD_SIZE = this_mxp->scratchpad_size; const int VBX_WIDTH_BYTES = this_mxp->vector_lanes * sizeof(int); const int VBX_DMA_ALIGNMENT = this_mxp->dma_alignment_bytes; unsigned int chunk_size = VBX_SCRATCHPAD_SIZE/((3*sizeof(intermediate_type))+(2*sizeof(input_type))); chunk_size = VBX_PAD_UP( chunk_size-(VBX_WIDTH_BYTES-1), VBX_DMA_ALIGNMENT ); unsigned int chunk_size_old = chunk_size; unsigned int vector_length = chunk_size; unsigned int vector_length_old = vector_length; v_img1[0] = (intermediate_type *)vbx_sp_malloc( chunk_size*sizeof(intermediate_type) ); v_img1[1] = (intermediate_type *)vbx_sp_malloc( chunk_size*sizeof(intermediate_type) ); v_img2[0] = (input_type *)vbx_sp_malloc( chunk_size*sizeof(input_type) ); v_img2[1] = (input_type *)vbx_sp_malloc( chunk_size*sizeof(input_type) ); v_temp = (intermediate_type *)vbx_sp_malloc( chunk_size*sizeof(intermediate_type) ); if( v_temp == NULL ) { VBX_EXIT(0xBADDEAD); } int bufselect = 0; vbx_dma_to_vector( v_img1[bufselect], img_in1, chunk_size*sizeof(input_type) ); vbx_dma_to_vector( v_img2[bufselect], img_in2, chunk_size*sizeof(input_type) ); for( j=0; j<num_row*num_column; j+=vector_length_old ) { vbx_set_vl(vector_length); if( j > 0 ) { vbx_dma_to_host( img_out+j-vector_length_old, v_img1[1-bufselect], chunk_size_old*sizeof(output_type) ); } if( (j+vector_length_old) < (num_row*num_column-1) ) { if( (j+vector_length_old*2) >= num_row*num_column ) { vector_length = num_row*num_column - j - vector_length_old; chunk_size = vector_length; } vbx_dma_to_vector( v_img1[1-bufselect], img_in1+j+vector_length_old, chunk_size*sizeof(input_type) ); vbx_dma_to_vector( v_img2[1-bufselect], img_in2+j+vector_length_old, chunk_size*sizeof(input_type) ); } vbx( SVBHU, VMULLO, v_temp, blending_const, v_img1[bufselect] ); vbx( SVBHU, VMULLO, v_img1[bufselect], blending_const_bar, v_img2[bufselect] ); vbx( VVHU, VADD, v_img1[bufselect], v_img1[bufselect], v_temp ); vbx( SVHBU, VSHR, v_img1[bufselect], 8, v_img1[bufselect] ); bufselect = 1-bufselect; } vbx_dma_to_host( img_out+j-vector_length_old, v_img1[1-bufselect], chunk_size*sizeof(output_type) ); vbx_sp_free(); vbx_sync(); }