void dobj_iterate(Data_Obj *dp,void (*func)(Data_Obj *,index_t)) { dimension_t comp,col,row,frm,seq; /* offsets for sequence, frame, row, pixel, component */ dimension_t s_os, f_os, r_os, p_os, c_os; s_os=0; for(seq=0;seq<OBJ_SEQS(dp);seq++){ f_os = s_os; for(frm=0;frm<OBJ_FRAMES(dp);frm++){ r_os = f_os; for(row=0;row<OBJ_ROWS(dp);row++){ p_os = r_os; for(col=0;col<OBJ_COLS(dp);col++){ c_os = p_os; for(comp=0;comp<OBJ_COMPS(dp);comp++){ (*func)(dp,c_os); c_os += OBJ_COMP_INC(dp); } p_os += OBJ_PXL_INC(dp); } r_os += OBJ_ROW_INC(dp); } f_os += OBJ_FRM_INC(dp); } s_os += OBJ_SEQ_INC(dp); } }
void _dpair_iterate(QSP_ARG_DECL Data_Obj *dp1,Data_Obj *dp2,void (*func)(QSP_ARG_DECL Data_Obj *,index_t,Data_Obj *,index_t)) { dimension_t comp,col,row,frm,seq; /* offsets for sequence, frame, row, pixel, component */ dimension_t s_os1, f_os1, r_os1, p_os1, c_os1; dimension_t s_os2, f_os2, r_os2, p_os2, c_os2; s_os1=0; s_os2=0; for(seq=0;seq<OBJ_SEQS(dp1);seq++){ f_os1 = s_os1; f_os2 = s_os2; for(frm=0;frm<OBJ_FRAMES(dp1);frm++){ r_os1 = f_os1; r_os2 = f_os2; for(row=0;row<OBJ_ROWS(dp1);row++){ p_os1 = r_os1; p_os2 = r_os2; for(col=0;col<OBJ_COLS(dp1);col++){ c_os1 = p_os1; c_os2 = p_os2; for(comp=0;comp<OBJ_COMPS(dp1);comp++){ (*func)(QSP_ARG dp1,c_os1,dp2,c_os2); c_os1 += OBJ_COMP_INC(dp1); c_os2 += OBJ_COMP_INC(dp2); } p_os1 += OBJ_PXL_INC(dp1); p_os2 += OBJ_PXL_INC(dp2); } r_os1 += OBJ_ROW_INC(dp1); r_os2 += OBJ_ROW_INC(dp2); } f_os1 += OBJ_FRM_INC(dp1); f_os2 += OBJ_FRM_INC(dp2); } s_os1 += OBJ_SEQ_INC(dp1); s_os2 += OBJ_SEQ_INC(dp2); } }
static void _ocl_offset_data(QSP_ARG_DECL Data_Obj *dp, index_t offset) { #ifndef USE_OPENCL_SUBREGION /* The original code used subBuffers, but overlapping subregions * don't work... * So instead we use a common memory buffer, but keep track * of the starting offset (in elements). This offset has * to be passed to the kernels. */ //fprintf(stderr,"ocl_offset_data: obj %s, offset = %d\n",OBJ_NAME(dp),offset); //fprintf(stderr,"\tparent obj %s, parent offset = %d\n",OBJ_NAME(OBJ_PARENT(dp)), //OBJ_OFFSET(OBJ_PARENT(dp))); if( IS_COMPLEX(dp) ){ assert( (offset & 1) == 0 ); offset /= 2; //fprintf(stderr,"Adjusted offset (%d) for complex object %s\n",offset,OBJ_NAME(dp)); } else if( IS_QUAT(dp) ){ assert( (offset & 3) == 0 ); offset /= 4; } SET_OBJ_DATA_PTR(dp,OBJ_DATA_PTR(OBJ_PARENT(dp))); SET_OBJ_OFFSET( dp, OBJ_OFFSET(OBJ_PARENT(dp)) + offset ); #else // USE_OPENCL_SUBREGION cl_mem buf; cl_mem parent_buf; cl_buffer_region reg; cl_int status; int extra_offset; parent_buf = find_parent_buf(OBJ_PARENT(dp),&extra_offset); assert( parent_buf != NULL ); reg.origin = (offset+extra_offset) * ELEMENT_SIZE(dp); // No - the region has to be big enough for all of the elements. // The safest thing is to include everything from the start // of the subregion to the end of the parent. Note that this // cannot handle negative increments!? // reg.size = OBJ_N_MACH_ELTS(dp) * ELEMENT_SIZE(dp); // p p p p p p p // p p c c c p p // p p p p p p p // p p c c c p p reg.size = OBJ_SEQ_INC(dp)*(OBJ_SEQS(dp)-1) + OBJ_FRM_INC(dp)*(OBJ_FRAMES(dp)-1) + OBJ_ROW_INC(dp)*(OBJ_ROWS(dp)-1) + OBJ_PXL_INC(dp)*(OBJ_COLS(dp)-1) + OBJ_COMP_INC(dp)*(OBJ_COMPS(dp)-1) + 1; reg.size *= ELEMENT_SIZE(dp); //fprintf(stderr,"requesting subregion of %ld bytes at offset %ld\n", //reg.size,reg.origin); buf = clCreateSubBuffer ( parent_buf, CL_MEM_READ_WRITE, CL_BUFFER_CREATE_TYPE_REGION, ®, &status); if( status != CL_SUCCESS ){ report_ocl_error(status, "clCreateSubBuffer"); SET_OBJ_DATA_PTR(dp,OBJ_DATA_PTR(OBJ_PARENT(dp))); } else { SET_OBJ_DATA_PTR(dp,buf); } // BUG - Because this object doesn't "own" the data, the sub-buffer // won't be released when the object is destroyed, a possible memory // leak... // We need to add a special case, or make data releasing a // platform-specific function... #endif // USE_OPENCL_SUBREGION }
static int change_size(QSP_ARG_DECL Data_Obj *dst_dp,Data_Obj *src_dp ) { Dimension_Set ef, *enlargement_factor=&ef; Dimension_Set rf, *reduction_factor=&rf; Vec_Obj_Args oa1, *oap=&oa1; Dimension_Set size_ds, n_ds; Dimension_Set *size_dsp=(&size_ds), *n_dsp=(&n_ds); Data_Obj *src_ss_dp, *dst_ss_dp; dimension_t i,j,k,l,m; index_t offsets[N_DIMENSIONS]={0,0,0,0,0}; incr_t dst_incrs[N_DIMENSIONS], src_incrs[N_DIMENSIONS]; index_t dst_indices[N_DIMENSIONS]={0,0,0,0,0}, src_indices[N_DIMENSIONS]={0,0,0,0,0}; index_t dst_offset, src_offset; /* For simplicity, we don't allow size changes to be combined with conversions */ if( !dp_same_prec(dst_dp,src_dp,"change_size") ) return(-1); for(i=0;i<N_DIMENSIONS;i++){ if( OBJ_TYPE_DIM(dst_dp,i) > OBJ_TYPE_DIM(src_dp,i) ){ /* enlargement - subsample the destination */ set_dimension(enlargement_factor,i, floor( OBJ_TYPE_DIM(dst_dp,i) / OBJ_TYPE_DIM(src_dp,i) ) ); set_dimension(reduction_factor,i, 0); set_dimension(size_dsp,i, OBJ_TYPE_DIM(src_dp,i) ); set_dimension(n_dsp,i, DIMENSION(enlargement_factor,i) ); dst_incrs[i] = DIMENSION(n_dsp,i); src_incrs[i] = 1; } else { /* reduction - subsample the source */ set_dimension(reduction_factor,i, ceil( OBJ_TYPE_DIM(src_dp,i) / OBJ_TYPE_DIM(dst_dp,i) ) ); set_dimension(enlargement_factor,i, 0 ); set_dimension(size_dsp,i, floor( OBJ_TYPE_DIM(src_dp,i) / DIMENSION(reduction_factor,i) ) ); /* We don't need to do this multiple times, just pick one and do it */ /*set_dimension(n_dsp,i, DIMENSION(reduction_factor,i) ); */ set_dimension(n_dsp,i, 1); src_incrs[i] = DIMENSION(reduction_factor,i); dst_incrs[i] = 1; } } /* make the subsamples. * the column increment is expressed in columns, etc. */ dst_ss_dp=make_subsamp("chngsize_dst_obj",dst_dp,size_dsp,offsets,dst_incrs); src_ss_dp=make_subsamp("chngsize_src_obj",src_dp,size_dsp,offsets,src_incrs); clear_obj_args(oap); SET_OA_DEST(oap,dst_ss_dp); SET_OA_SRC_OBJ(oap,0, src_ss_dp); SET_OA_ARGSTYPE(oap, REAL_ARGS); SET_OA_PFDEV(oap,OBJ_PFDEV(dst_dp)); for(i=0;i<DIMENSION(n_dsp,4);i++){ /* foreach sequence to copy */ if( dst_incrs[4] > 1 ) dst_indices[4]=i; else src_indices[4]=i; for(j=0;j<DIMENSION(n_dsp,3);j++){ /* foreach frame to copy */ if( dst_incrs[3] > 1 ) dst_indices[3]=j; else src_indices[3]=j; for(k=0;k<DIMENSION(n_dsp,2);k++){ /* foreach row */ if( dst_incrs[2] > 1 ) dst_indices[2]=k; else src_indices[2]=k; for(l=0;l<DIMENSION(n_dsp,1);l++){ /* foreach col */ if( dst_incrs[1] > 1 ) dst_indices[1]=l; else src_indices[1]=l; for(m=0;m<DIMENSION(n_dsp,0);m++){ /* foreach comp */ if( dst_incrs[0] > 1 ) dst_indices[0]=m; else src_indices[0]=m; /* relocate the appropriate subsample */ dst_offset = dst_indices[0]*OBJ_COMP_INC(dst_dp) + dst_indices[1]*OBJ_PXL_INC(dst_dp) + dst_indices[2]*OBJ_ROW_INC(dst_dp) + dst_indices[3]*OBJ_FRM_INC(dst_dp) + dst_indices[4]*OBJ_SEQ_INC(dst_dp) ; ( * PF_OFFSET_DATA_FN(OBJ_PLATFORM(dst_ss_dp)) ) (QSP_ARG dst_ss_dp, dst_offset ); src_offset = src_indices[0]*OBJ_COMP_INC(src_dp) + src_indices[1]*OBJ_PXL_INC(src_dp) + src_indices[2]*OBJ_ROW_INC(src_dp) + src_indices[3]*OBJ_FRM_INC(src_dp) + src_indices[4]*OBJ_SEQ_INC(src_dp) ; ( * PF_OFFSET_DATA_FN(OBJ_PLATFORM(src_ss_dp)) ) (QSP_ARG src_ss_dp, src_offset ); // This doesn't check for cuda obj... //vmov(oap); perf_vfunc(QSP_ARG FVMOV, oap ); } } } } } delvec(dst_ss_dp); delvec(src_ss_dp); SET_OBJ_FLAG_BITS(dst_dp, DT_ASSIGNED); return(0); }