static void _ocl_offset_data(QSP_ARG_DECL Data_Obj *dp, index_t offset) { #ifndef USE_OPENCL_SUBREGION /* The original code used subBuffers, but overlapping subregions * don't work... * So instead we use a common memory buffer, but keep track * of the starting offset (in elements). This offset has * to be passed to the kernels. */ //fprintf(stderr,"ocl_offset_data: obj %s, offset = %d\n",OBJ_NAME(dp),offset); //fprintf(stderr,"\tparent obj %s, parent offset = %d\n",OBJ_NAME(OBJ_PARENT(dp)), //OBJ_OFFSET(OBJ_PARENT(dp))); if( IS_COMPLEX(dp) ){ assert( (offset & 1) == 0 ); offset /= 2; //fprintf(stderr,"Adjusted offset (%d) for complex object %s\n",offset,OBJ_NAME(dp)); } else if( IS_QUAT(dp) ){ assert( (offset & 3) == 0 ); offset /= 4; } SET_OBJ_DATA_PTR(dp,OBJ_DATA_PTR(OBJ_PARENT(dp))); SET_OBJ_OFFSET( dp, OBJ_OFFSET(OBJ_PARENT(dp)) + offset ); #else // USE_OPENCL_SUBREGION cl_mem buf; cl_mem parent_buf; cl_buffer_region reg; cl_int status; int extra_offset; parent_buf = find_parent_buf(OBJ_PARENT(dp),&extra_offset); assert( parent_buf != NULL ); reg.origin = (offset+extra_offset) * ELEMENT_SIZE(dp); // No - the region has to be big enough for all of the elements. // The safest thing is to include everything from the start // of the subregion to the end of the parent. Note that this // cannot handle negative increments!? // reg.size = OBJ_N_MACH_ELTS(dp) * ELEMENT_SIZE(dp); // p p p p p p p // p p c c c p p // p p p p p p p // p p c c c p p reg.size = OBJ_SEQ_INC(dp)*(OBJ_SEQS(dp)-1) + OBJ_FRM_INC(dp)*(OBJ_FRAMES(dp)-1) + OBJ_ROW_INC(dp)*(OBJ_ROWS(dp)-1) + OBJ_PXL_INC(dp)*(OBJ_COLS(dp)-1) + OBJ_COMP_INC(dp)*(OBJ_COMPS(dp)-1) + 1; reg.size *= ELEMENT_SIZE(dp); //fprintf(stderr,"requesting subregion of %ld bytes at offset %ld\n", //reg.size,reg.origin); buf = clCreateSubBuffer ( parent_buf, CL_MEM_READ_WRITE, CL_BUFFER_CREATE_TYPE_REGION, ®, &status); if( status != CL_SUCCESS ){ report_ocl_error(status, "clCreateSubBuffer"); SET_OBJ_DATA_PTR(dp,OBJ_DATA_PTR(OBJ_PARENT(dp))); } else { SET_OBJ_DATA_PTR(dp,buf); } // BUG - Because this object doesn't "own" the data, the sub-buffer // won't be released when the object is destroyed, a possible memory // leak... // We need to add a special case, or make data releasing a // platform-specific function... #endif // USE_OPENCL_SUBREGION }
static Data_Obj *insure_ram_obj(QSP_ARG_DECL Data_Obj *dp) { Data_Obj *tmp_dp; char *tname; Data_Area *save_ap; Data_Obj *c_dp=NULL; if( OBJ_IS_RAM(dp) ) return dp; // This object lives on a different platform. // We create a copy in RAM, and download the data // using the platform download function. save_ap = curr_ap; curr_ap = ram_area_p; tname = getbuf( strlen(OBJ_NAME(dp)) + strlen(DNAME_PREFIX) + 1 ); sprintf(tname,"%s%s",DNAME_PREFIX,OBJ_NAME(dp)); tmp_dp = dup_obj(QSP_ARG dp, tname); givbuf(tname); if( tmp_dp == NO_OBJ ){ // This can happen if the object is subscripted, // as the bracket characters are illegal in names return NO_OBJ; } curr_ap = save_ap; // We can't download if the source data is not contiguous... // // We have a problem with bit precision, because the bits can // be non-contiguous when the long words are - any time the number of columns // is not evenly divided by the bits-per-word if( (! IS_CONTIGUOUS(dp)) && ! HAS_CONTIGUOUS_DATA(dp) ){ Vec_Obj_Args oa1, *oap=&oa1; advise("object is not contiguous, and does not have contiguous data..."); longlist(QSP_ARG dp); save_ap = curr_ap; curr_ap = OBJ_AREA( dp ); tname = getbuf( strlen(OBJ_NAME(dp)) + strlen(CNAME_PREFIX) + 1 ); sprintf(tname,"%s%s",CNAME_PREFIX,OBJ_NAME(dp)); c_dp = dup_obj(QSP_ARG dp, tname ); givbuf(tname); curr_ap = save_ap; // Now do the move... setvarg2(oap,c_dp,dp); if( IS_BITMAP(dp) ){ SET_OA_SBM(oap,dp); SET_OA_SRC1(oap,NO_OBJ); } if( IS_REAL(dp) ) /* BUG case for QUAT too? */ OA_ARGSTYPE(oap) = REAL_ARGS; else if( IS_COMPLEX(dp) ) /* BUG case for QUAT too? */ OA_ARGSTYPE(oap) = COMPLEX_ARGS; else if( IS_QUAT(dp) ) /* BUG case for QUAT too? */ OA_ARGSTYPE(oap) = QUATERNION_ARGS; else //ERROR1("CAUTIOUS: insure_ram_obj: bad argset type!?"); assert( AERROR("insure_ram_obj: bad argset type!?") ); //fprintf(stderr,"insure_ram_obj: moving remote data to a contiguous object\n"); call_vfunc( QSP_ARG FIND_VEC_FUNC(FVMOV), oap ); //fprintf(stderr,"insure_ram_obj: DONE moving remote data to a contiguous object\n"); dp = c_dp; } gen_obj_dnload(QSP_ARG tmp_dp, dp); if( c_dp != NO_OBJ ) delvec(QSP_ARG c_dp); // BUG - when to delete? // We try using the VOLATILE flag. This will work as long as // the input object is not VOLATILE!? SET_OBJ_FLAG_BITS(tmp_dp, DT_VOLATILE ) ; return tmp_dp; }