static Data_Obj *make_1394frame_obj(QSP_ARG_DECL dc1394video_frame_t *framep) { Dimension_Set dimset; Data_Obj *dp; char fname[32]; sprintf(fname,"_frame%d",framep->id); dimset.ds_dimension[0] = 1; /* 1 or two depending on video mode (8 or 16) */ dimset.ds_dimension[1] = framep->size[0]; dimset.ds_dimension[2] = framep->size[1]; dimset.ds_dimension[3] = 1; dimset.ds_dimension[4] = 1; dp = _make_dp(QSP_ARG fname,&dimset,PREC_FOR_CODE(PREC_UBY)); /* Do we need to test for a good return value??? */ /* Only one buffer? where do we specify the index? BUG */ SET_OBJ_DATA_PTR(dp, framep->image); if( verbose ) fprintf(stderr,"Object %s, data ptr set to 0x%lx\n",OBJ_NAME(dp),(long)OBJ_DATA_PTR(dp)); if( framep->total_bytes != framep->image_bytes ){ sprintf(DEFAULT_ERROR_STRING,"image may be padded..."); warn(DEFAULT_ERROR_STRING); } return(dp); }
static void vl2_update_offset(QSP_ARG_DECL Data_Obj *dp ) { // We don't need to SET_OBJ_OFFSET, because the child offset // is relative to the parent... // OBJ_OFFSET is in bytes, not pixels? //fprintf(stderr,"vl2_update_offset: obj = %s, obj_offset = 0x%x, prec_size = %d\n", //OBJ_NAME(dp),OBJ_OFFSET(dp),PREC_SIZE(OBJ_PREC_PTR(dp))); // change the base pointer... // we originally scaled the offset by PREC_SIZE, but it appears // the offset is kept in bytes. SET_OBJ_DATA_PTR(dp, ((char *)OBJ_DATA_PTR(OBJ_PARENT(dp)))+OBJ_OFFSET(dp) /* *PREC_SIZE(OBJ_PREC_PTR(dp))*/ ); }
static int ocl_register_buf(QSP_ARG_DECL Data_Obj *dp) { if( opengl_prohibited ) error1("ocl_register_buf: Need to specify GL window BEFORE initializing OpenCL!?"); #ifdef HAVE_OPENGL cl_mem img; cl_int status; // Texture2D deprecated on Apple //fprintf(stderr,"obj %s has texture id %d\n",OBJ_NAME(dp),OBJ_TEX_ID(dp)); //fprintf(stderr,"obj %s has platform device %s\n",OBJ_NAME(dp),PFDEV_NAME(OBJ_PFDEV(dp))); //advise("ocl_register_buf calling clCreateFromGLBuffer"); //longlist(QSP_ARG dp); // Used to call clCreateFromGLTexture, but this works: img = clCreateFromGLBuffer( OCLDEV_CTX( OBJ_PFDEV(dp) ), // OCL context CL_MEM_READ_WRITE, // flags OBJ_TEX_ID(dp), // from glBufferData? &status); if( status != CL_SUCCESS ){ report_ocl_error(status, "clCreateFromGLTexture"); return -1; } else { SET_OBJ_DATA_PTR(dp,img); } // dp is a special buffer object... //cl_mem memobj; //cl_mem = clCreate return 0; #else // ! HAVE_OPENGL warn("ocl_register_buf: Sorry, no OpenGL support in this build!?"); return -1; #endif // ! HAVE_OPENGL }
static void _ocl_offset_data(QSP_ARG_DECL Data_Obj *dp, index_t offset) { #ifndef USE_OPENCL_SUBREGION /* The original code used subBuffers, but overlapping subregions * don't work... * So instead we use a common memory buffer, but keep track * of the starting offset (in elements). This offset has * to be passed to the kernels. */ //fprintf(stderr,"ocl_offset_data: obj %s, offset = %d\n",OBJ_NAME(dp),offset); //fprintf(stderr,"\tparent obj %s, parent offset = %d\n",OBJ_NAME(OBJ_PARENT(dp)), //OBJ_OFFSET(OBJ_PARENT(dp))); if( IS_COMPLEX(dp) ){ assert( (offset & 1) == 0 ); offset /= 2; //fprintf(stderr,"Adjusted offset (%d) for complex object %s\n",offset,OBJ_NAME(dp)); } else if( IS_QUAT(dp) ){ assert( (offset & 3) == 0 ); offset /= 4; } SET_OBJ_DATA_PTR(dp,OBJ_DATA_PTR(OBJ_PARENT(dp))); SET_OBJ_OFFSET( dp, OBJ_OFFSET(OBJ_PARENT(dp)) + offset ); #else // USE_OPENCL_SUBREGION cl_mem buf; cl_mem parent_buf; cl_buffer_region reg; cl_int status; int extra_offset; parent_buf = find_parent_buf(OBJ_PARENT(dp),&extra_offset); assert( parent_buf != NULL ); reg.origin = (offset+extra_offset) * ELEMENT_SIZE(dp); // No - the region has to be big enough for all of the elements. // The safest thing is to include everything from the start // of the subregion to the end of the parent. Note that this // cannot handle negative increments!? // reg.size = OBJ_N_MACH_ELTS(dp) * ELEMENT_SIZE(dp); // p p p p p p p // p p c c c p p // p p p p p p p // p p c c c p p reg.size = OBJ_SEQ_INC(dp)*(OBJ_SEQS(dp)-1) + OBJ_FRM_INC(dp)*(OBJ_FRAMES(dp)-1) + OBJ_ROW_INC(dp)*(OBJ_ROWS(dp)-1) + OBJ_PXL_INC(dp)*(OBJ_COLS(dp)-1) + OBJ_COMP_INC(dp)*(OBJ_COMPS(dp)-1) + 1; reg.size *= ELEMENT_SIZE(dp); //fprintf(stderr,"requesting subregion of %ld bytes at offset %ld\n", //reg.size,reg.origin); buf = clCreateSubBuffer ( parent_buf, CL_MEM_READ_WRITE, CL_BUFFER_CREATE_TYPE_REGION, ®, &status); if( status != CL_SUCCESS ){ report_ocl_error(status, "clCreateSubBuffer"); SET_OBJ_DATA_PTR(dp,OBJ_DATA_PTR(OBJ_PARENT(dp))); } else { SET_OBJ_DATA_PTR(dp,buf); } // BUG - Because this object doesn't "own" the data, the sub-buffer // won't be released when the object is destroyed, a possible memory // leak... // We need to add a special case, or make data releasing a // platform-specific function... #endif // USE_OPENCL_SUBREGION }
static COMMAND_FUNC( do_new_gl_buffer ) { const char *s; Data_Obj *dp; Platform_Device *pdp; Compute_Platform *cdp; dimension_t d,w,h; #ifdef HAVE_OPENGL Dimension_Set ds; int t; #endif // HAVE_OPENGL s = NAMEOF("name for GL buffer object"); cdp = pick_platform("platform"); if( cdp != NULL ) push_pfdev_context(QSP_ARG PF_CONTEXT(cdp) ); pdp = pick_pfdev("device"); if( cdp != NULL ) pop_pfdev_context(SINGLE_QSP_ARG); w = (int)HOW_MANY("width"); h = (int)HOW_MANY("height"); d = (int)HOW_MANY("depth"); /* what should the depth be??? default to 1 for now... */ if( pdp == NULL ) return; /* Make sure this name isn't already in use... */ dp = dobj_of(s); if( dp != NULL ){ sprintf(ERROR_STRING,"Data object name '%s' is already in use, can't use for GL buffer object.",s); warn(ERROR_STRING); return; } #ifdef HAVE_OPENGL // BUG need to be able to set the cuda device. // Note, however, that we don't need GL buffers on the Tesla... //set_data_area(cuda_data_area[0][0]); set_data_area( PFDEV_AREA(pdp,PFDEV_GLOBAL_AREA_INDEX) ); ds.ds_dimension[0]=d; ds.ds_dimension[1]=w; ds.ds_dimension[2]=h; ds.ds_dimension[3]=1; ds.ds_dimension[4]=1; dp = _make_dp(QSP_ARG s,&ds,PREC_FOR_CODE(PREC_UBY)); if( dp == NULL ){ sprintf(ERROR_STRING, "Error creating data_obj header for %s",s); error1(ERROR_STRING); } SET_OBJ_FLAG_BITS(dp, DT_NO_DATA); /* can't free this data */ SET_OBJ_FLAG_BITS(dp, DT_GL_BUF); /* indicate obj is a GL buffer */ SET_OBJ_DATA_PTR(dp, NULL); //fprintf(stderr,"do_new_gl_buffer: allocating gl_info for %s\n",OBJ_NAME(dp)); SET_OBJ_GL_INFO(dp, (GL_Info *) getbuf( sizeof(GL_Info) ) ); //fprintf(stderr,"do_new_gl_buffer: DONE allocating gl_info for %s\n",OBJ_NAME(dp)); glew_check(SINGLE_QSP_ARG); /* without this, we get a segmentation * violation on glGenBuffers??? */ // We need an extra field in which to store the GL identifier... // AND another extra field in which to store the associated texid. // Why is this ifdef here? These don't seem to depend // on libglew??? // Answer: We need libglew to bring in openGL extensions like glBindBuffer... //advise("calling glGenBuffers"); //fprintf(stderr,"OBJ_GL_INFO(%s) = 0x%lx\n",OBJ_NAME(dp),(long)OBJ_GL_INFO(dp)); //fprintf(stderr,"OBJ_BUF_ID_P(%s) = 0x%lx\n",OBJ_NAME(dp),(long)OBJ_BUF_ID_P(dp)); // BUG glGenBuffers seems to require v1.5??? glGenBuffers(1, OBJ_BUF_ID_P(dp) ); // first arg is # buffers to generate? //sprintf(ERROR_STRING,"glGenBuffers gave us buf_id = %d",OBJ_BUF_ID(dp)); //advise(ERROR_STRING); glBindBuffer(GL_PIXEL_UNPACK_BUFFER, OBJ_BUF_ID(dp) ); // glBufferData will allocate the memory for the buffer, // but won't copy unless the pointer is non-null // How do we get the gpu memory space address? // That must be with map glBufferData(GL_PIXEL_UNPACK_BUFFER, OBJ_COMPS(dp) * OBJ_COLS(dp) * OBJ_ROWS(dp), NULL, GL_STREAM_DRAW); /* buffer arg set to 0 unbinds any previously bound buffers... * and restores client memory usage. */ glBindBuffer(GL_PIXEL_UNPACK_BUFFER, 0); //#endif // HAVE_LIBGLEW glGenTextures(1, OBJ_TEX_ID_P(dp) ); // makes a texture name fprintf(stderr,"new_gl_buffer: new texture name is 0x%x\n",OBJ_TEX_ID(dp)); glBindTexture(GL_TEXTURE_2D, OBJ_TEX_ID(dp) ); t = gl_pixel_type(dp); glTexImage2D( GL_TEXTURE_2D, 0, // level-of-detail - is this the same as miplevel??? OBJ_COMPS(dp), // internal format, can also be symbolic constant such as // GL_RGBA etc OBJ_COLS(dp), // width - must be 2^n+2 (border) for some n??? OBJ_ROWS(dp), // height - must be 2^m+2 (border) for some m??? 0, // border - must be 0 or 1 t, // format of pixel data GL_UNSIGNED_BYTE, // type of pixel data NULL // pixel data - null pointer means // allocate but do not copy? // - offset into PIXEL_UNPACK_BUFFER?? ); glTexParameterf(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_LINEAR); glTexParameterf(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_LINEAR); // Why was this here? It would seem to un-bind the target??? glBindTexture(GL_TEXTURE_2D, 0); //glFinish(); // necessary or not? //advise("calling platform-specific buffer registration function"); if( (*PF_REGBUF_FN(PFDEV_PLATFORM(pdp)))( QSP_ARG dp ) < 0 ){ WARN("do_new_gl_buffer: Error in platform-specific buffer registration!?"); // BUG? - should clean up here! } // Leave the buffer mapped by default //cutilSafeCall(cudaGLMapBufferObject( &OBJ_DATA_PTR(dp), OBJ_BUF_ID(dp) )); //advise("calling platform-specific buffer mapping function"); if( (*PF_MAPBUF_FN(PFDEV_PLATFORM(pdp)))( QSP_ARG dp ) < 0 ){ WARN("do_new_gl_buffer: Error in platform-specific buffer mapping!?"); // BUG? - should clean up here! } SET_OBJ_FLAG_BITS(dp, DT_BUF_MAPPED); // propagate change to children and parents propagate_flag(dp,DT_BUF_MAPPED); #else // ! HAVE_OPENGL NO_OGL_MSG #endif // ! HAVE_OPENGL } /* end do_new_gl_buffer */
void inner(QSP_ARG_DECL Data_Obj *dpto,Data_Obj *dpfr1,Data_Obj *dpfr2) { //dimension_t _n; /* dot prod len */ dimension_t i,j; Vec_Obj_Args oa1, *oap=&oa1; //Dimension_Set sizes={{1,1,1,1,1}}; Dimension_Set *sizes; index_t dst_indices[N_DIMENSIONS]={0,0,0,0,0}; index_t src1_indices[N_DIMENSIONS]={0,0,0,0,0}; index_t src2_indices[N_DIMENSIONS]={0,0,0,0,0}; Data_Obj *col_dp; sizes=NEW_DIMSET; for(i=0;i<N_DIMENSIONS;i++) SET_DIMENSION(sizes,i,1); #ifdef CAUTIOUS clear_obj_args(oap); #endif /* CAUTIOUS */ /* The types and precisions should be whatever is allowed by vdot, * which is float, double, real and complex... */ if( ! is_good_for_inner(dpto,"inner") ) return; if( ! is_good_for_inner(dpfr1,"inner") ) return; if( ! is_good_for_inner(dpfr2,"inner") ) return; /* we need to make sure that the types and precisions MATCH! */ if( ! prec_and_type_match(dpto,dpfr1,"inner") ) return; if( ! prec_and_type_match(dpto,dpfr2,"inner") ) return; if( OBJ_ROWS(dpto) != OBJ_ROWS(dpfr1) ){ sprintf(DEFAULT_ERROR_STRING, "inner: dpto %s (%d) and first operand %s (%d) must have same # rows", OBJ_NAME(dpto),OBJ_ROWS(dpto),OBJ_NAME(dpfr1),OBJ_ROWS(dpfr1)); NWARN(DEFAULT_ERROR_STRING); return; } if( OBJ_COLS(dpto) != OBJ_COLS(dpfr2) ){ sprintf(DEFAULT_ERROR_STRING, "inner: target %s (%d) and second operand %s (%d) must have same # columns", OBJ_NAME(dpto),OBJ_COLS(dpto),OBJ_NAME(dpfr2),OBJ_COLS(dpfr2)); NWARN(DEFAULT_ERROR_STRING); return; } if( OBJ_COLS(dpfr1) != OBJ_ROWS(dpfr2) ){ sprintf(DEFAULT_ERROR_STRING, "inner: # cols of operand %s (%d) must match # rows of operand %s (%d)", OBJ_NAME(dpfr1),OBJ_COLS(dpfr1),OBJ_NAME(dpfr2),OBJ_ROWS(dpfr2)); NWARN(DEFAULT_ERROR_STRING); return; } //_n=OBJ_COLS(dpfr1); /* the length of each dot product we will compute */ if( IS_COMPLEX(dpto) ) SET_OA_ARGSTYPE(oap,COMPLEX_ARGS); else SET_OA_ARGSTYPE(oap,REAL_ARGS); /* vdot things it's inputs have the same shape, so if we are taking the inner * product of a column vector with a row vector, we have to transpose one of * the inputs... */ if( OBJ_ROWS(dpfr1) > 1 ) SET_OA_SRC1(oap,d_subscript(QSP_ARG dpfr1,0) ); /* subscript first row */ else SET_OA_SRC1(oap,dpfr1); /* object is a row */ if( OBJ_COLS(dpto) > 1 ) col_dp=c_subscript(QSP_ARG dpfr2,0); else col_dp=dpfr2; SET_OA_DEST(oap,mk_subimg(QSP_ARG dpto,0,0,"target pixel",1,1) ); //[sizes setDimensionAtIndex : 1 withValue : OBJ_ROWS(col_dp) ]; SET_DIMENSION(sizes,1,OBJ_ROWS(col_dp)); SET_DIMENSION(sizes,0,OBJ_COMPS(col_dp)); SET_OA_SRC2(oap,make_equivalence(QSP_ARG "_transposed_column", col_dp,sizes,OBJ_PREC_PTR(col_dp)) ); for(i=0;i<OBJ_ROWS(dpto);i++){ src1_indices[2]=i; SET_OBJ_DATA_PTR( OA_SRC1(oap), multiply_indexed_data(dpfr1,src1_indices) ); for(j=0;j<OBJ_COLS(dpto);j++){ dst_indices[2]=i; /* k_th component */ dst_indices[1]=j; /* k_th component */ SET_OBJ_DATA_PTR( OA_DEST(oap), multiply_indexed_data(dpto,dst_indices) ); src2_indices[1]=j; SET_OBJ_DATA_PTR( OA_SRC2(oap), multiply_indexed_data(dpfr2,src2_indices) ); vdot(QSP_ARG oap); } } delvec(QSP_ARG OA_SRC2(oap) ); /* "_transposed_column" */ if( OA_SRC1(oap) != dpfr1 ) delvec(QSP_ARG OA_SRC1(oap) ); if( col_dp != dpfr2 ) delvec(QSP_ARG col_dp); delvec(QSP_ARG OA_DEST(oap) ); }
static int change_size(QSP_ARG_DECL Data_Obj *dst_dp,Data_Obj *src_dp ) { Dimension_Set ef, *enlargement_factor=&ef; Dimension_Set rf, *reduction_factor=&rf; Vec_Obj_Args oa1, *oap=&oa1; Dimension_Set size_ds, n_ds; Dimension_Set *size_dsp=(&size_ds), *n_dsp=(&n_ds); Data_Obj *src_ss_dp, *dst_ss_dp; dimension_t i,j,k,l,m; index_t offsets[N_DIMENSIONS]={0,0,0,0,0}; incr_t dst_incrs[N_DIMENSIONS], src_incrs[N_DIMENSIONS]; index_t dst_indices[N_DIMENSIONS]={0,0,0,0,0}, src_indices[N_DIMENSIONS]={0,0,0,0,0}; /* For simplicity, we don't allow size changes to be combined with conversions */ if( !dp_same_prec(QSP_ARG dst_dp,src_dp,"change_size") ) return(-1); for(i=0;i<N_DIMENSIONS;i++){ if( OBJ_TYPE_DIM(dst_dp,i) > OBJ_TYPE_DIM(src_dp,i) ){ /* enlargement - subsample the destination */ SET_DIMENSION(enlargement_factor,i, floor( OBJ_TYPE_DIM(dst_dp,i) / OBJ_TYPE_DIM(src_dp,i) ) ); SET_DIMENSION(reduction_factor,i, 0); SET_DIMENSION(size_dsp,i, OBJ_TYPE_DIM(src_dp,i) ); SET_DIMENSION(n_dsp,i, DIMENSION(enlargement_factor,i) ); dst_incrs[i] = DIMENSION(n_dsp,i); src_incrs[i] = 1; } else { /* reduction - subsample the source */ SET_DIMENSION(reduction_factor,i, ceil( OBJ_TYPE_DIM(src_dp,i) / OBJ_TYPE_DIM(dst_dp,i) ) ); SET_DIMENSION(enlargement_factor,i, 0 ); SET_DIMENSION(size_dsp,i, floor( OBJ_TYPE_DIM(src_dp,i) / DIMENSION(reduction_factor,i) ) ); /* We don't need to do this multiple times, just pick one and do it */ /*SET_DIMENSION(n_dsp,i, DIMENSION(reduction_factor,i) ); */ SET_DIMENSION(n_dsp,i, 1); src_incrs[i] = DIMENSION(reduction_factor,i); dst_incrs[i] = 1; } } /* make the subsamples. * the column increment is expressed in columns, etc. */ dst_ss_dp=make_subsamp(QSP_ARG "chngsize_dst_obj",dst_dp,size_dsp,offsets,dst_incrs); src_ss_dp=make_subsamp(QSP_ARG "chngsize_src_obj",src_dp,size_dsp,offsets,src_incrs); clear_obj_args(oap); SET_OA_DEST(oap,dst_ss_dp); SET_OA_SRC_OBJ(oap,0, src_ss_dp); SET_OA_ARGSTYPE(oap, REAL_ARGS); SET_OA_PFDEV(oap,OBJ_PFDEV(dst_dp)); for(i=0;i<DIMENSION(n_dsp,4);i++){ /* foreach sequence to copy */ if( dst_incrs[4] > 1 ) dst_indices[4]=i; else src_indices[4]=i; for(j=0;j<DIMENSION(n_dsp,3);j++){ /* foreach frame to copy */ if( dst_incrs[3] > 1 ) dst_indices[3]=j; else src_indices[3]=j; for(k=0;k<DIMENSION(n_dsp,2);k++){ /* foreach row */ if( dst_incrs[2] > 1 ) dst_indices[2]=k; else src_indices[2]=k; for(l=0;l<DIMENSION(n_dsp,1);l++){ /* foreach col */ if( dst_incrs[1] > 1 ) dst_indices[1]=l; else src_indices[1]=l; for(m=0;m<DIMENSION(n_dsp,0);m++){ /* foreach comp */ if( dst_incrs[0] > 1 ) dst_indices[0]=m; else src_indices[0]=m; /* relocate the appropriate subsample */ SET_OBJ_DATA_PTR(dst_ss_dp, multiply_indexed_data(dst_dp,dst_indices) ); SET_OBJ_DATA_PTR(src_ss_dp, multiply_indexed_data(src_dp,src_indices) ); // This doesn't check for cuda obj... //vmov(oap); perf_vfunc(QSP_ARG FVMOV, oap ); } } } } } delvec(QSP_ARG dst_ss_dp); delvec(QSP_ARG src_ss_dp); SET_OBJ_FLAG_BITS(dst_dp, DT_ASSIGNED); return(0); }