void initialize_ocl(cl_vars_t& cv) { cv.err = clGetPlatformIDs(1, &(cv.platform), &(cv.platforms)); CHK_ERR(cv.err); cv.err = clGetDeviceIDs(cv.platform, CL_DEVICE_TYPE_GPU, 1, &(cv.device_id), NULL); CHK_ERR(cv.err); cv.context = clCreateContext(0, 1, &(cv.device_id), NULL, NULL, &(cv.err)); CHK_ERR(cv.err); cv.commands = clCreateCommandQueue(cv.context, cv.device_id, CL_QUEUE_PROFILING_ENABLE, &(cv.err)); CHK_ERR(cv.err); #ifdef DEBUG std::cout << "CL fill vars success" << std::endl; // Device info cl_ulong mem_size; cv.err = clGetDeviceInfo(cv.device_id, CL_DEVICE_GLOBAL_MEM_SIZE, sizeof(cl_ulong), &mem_size, NULL); std::cout << "Global mem size: " << mem_size << std::endl; size_t max_work_item[3]; cv.err = clGetDeviceInfo(cv.device_id, CL_DEVICE_MAX_WORK_ITEM_SIZES, sizeof(max_work_item), max_work_item, NULL); std::cout << "Max work item sizes: " << max_work_item[0] << ", " << max_work_item[1] << ", " << max_work_item[2] << std::endl; #endif }
/* ...buffer allocation from frontal camera (object detection engine) */ static int objdet_input_alloc(void *data, GstBuffer *buffer) { app_data_t *app = data; vsink_meta_t *vmeta = gst_buffer_get_vsink_meta(buffer); int w = vmeta->width, h = vmeta->height; objdet_meta_t *ometa; if (app->f_width) { /* ...verify buffer dimensions are valid */ CHK_ERR(w == app->f_width && h == app->f_height, -EINVAL); } else { int W = window_get_width(app->window); int H = window_get_height(app->window); /* ...check dimensions are valid */ CHK_ERR(w && h, -EINVAL); /* ...set buffer dimensions */ app->f_width = w, app->f_height = h; /* ...initialize object detection engine */ CHK_ERR(app->od = objdet_engine_init(&objdet_callback, app, w, h, __pixfmt_yuv_bpp(vmeta->format), app->od_cfg), -errno); /* ...create a viewport for data visualization */ texture_scale_to_window(&app->view, app->window, w, h, &app->matrix); //texture_set_view_scale(&app->view, 0, 0, W, H, W, H, w, h); /* ...create transformation matrix */ if (0) { cairo_matrix_t *m = &app->matrix; m->xx = (double) W / w, m->xy = 0, m->x0 = 0; m->yx = 0, m->yy = (double) H / h, m->y0 = 0; } } /* ...allocate texture to wrap the buffer */ CHK_ERR(vmeta->priv = texture_create(w, h, vmeta->plane, vmeta->format), -errno); /* ...add custom buffer metadata */ CHK_ERR(ometa = gst_buffer_add_objdet_meta(buffer), -(errno = ENOMEM)); CHK_ERR(ometa->buf = texture_map(vmeta->priv, CL_MEM_READ_ONLY), -errno); GST_META_FLAG_SET(ometa, GST_META_FLAG_POOLED); /* ...add custom destructor to the buffer */ gst_mini_object_weak_ref(GST_MINI_OBJECT(buffer), __destroy_od_texture, app); TRACE(INFO, _b("front-camera input buffer %p allocated (%p)"), buffer, ometa->buf); return 0; }
void ArrayOperand::callUp(cl_vars_t clv) { Array * v = dynamic_cast<Array*>(variable); // For each device for(int devId = 0 ; devId < partition.nd ; devId++) { ssize_t device_min = 0; ssize_t device_max = 0; if(partition.d == 0) { device_min = 0; device_max = v->getDim(); } else { device_min = partition.d * devId; device_max = partition.d * (devId+1); device_max = (device_max > v->getDim()) ? v->getDim() : device_max; } ssize_t device_range = device_max - device_min; ssize_t bytes_range = device_range * v->bytes_per_element; ssize_t bytes_offset = device_min * v->bytes_per_element; if(bytes_range > 0) { #ifdef VERBOSE_EXECUTION docs.execution_ss << "TRANSFER: Calling up variable: " << variable->properties["name"]; docs.execution_ss << "\tnbytes: " << bytes_range; docs.execution_ss << "\tbytes_offset: " << bytes_offset; docs.execution_ss << "\tdevice id: " << devId << std::endl; #endif cl_int err = CL_SUCCESS; #ifdef PROFILE transfer_prof_t tp; tp.devId = devId; tp.variable = variable; err = clEnqueueWriteBuffer(clv.commands[devId], v->gpu_data[devId], CL_FALSE, bytes_offset, bytes_range, v->cpu_data + bytes_offset, 0, NULL, &(tp.event)); transfer_events.push_back(tp); #else err = clEnqueueWriteBuffer(clv.commands[devId], v->gpu_data[devId], CL_FALSE, bytes_offset, bytes_range, v->cpu_data + bytes_offset, 0, NULL, NULL); #endif CHK_ERR(err); } } }
void* DLLEXPORT PowerMeter_RecallState_ByName( int hInstrumentHandle , char *pszName ) { STD_ERROR StdError = {0}; int iStateNumber = 0; if ( pszName ) iStateNumber = atoi( pszName ); CHK_ERR( age441x_loadFromRegister( hInstrumentHandle , iStateNumber )); Error: RETURN_STDERR_POINTER; }
void* DLLEXPORT PowerMeter_FetchPower( int hInstrumentHandle , int iChannel , double timeout , double *value ) { STD_ERROR StdError = {0}; double lfPower = 0; CHK_ERR( age441x_fetch ( hInstrumentHandle , 1 , AGE441X_MEAS_SINGLE , &lfPower )); if ( value ) *value = lfPower; Error: RETURN_STDERR_POINTER; }
void* DLLEXPORT SpectrumAnalyzer_PhaseNoise_GetMarker ( ViSession viInstrumentHandle , int channel , int marker , double *pPosition , double *pValue ) { STD_ERROR StdError = {0}; double lfFrequency = 0.0, lfPower = 0.0; CHK_ERR( hp8563e_confMkr( viInstrumentHandle , VI_ON , VI_OFF )); CHK_ERR( hp8563e_perfmSwp ( viInstrumentHandle )); CHK_ERR( hp8563e_queryAmpl( viInstrumentHandle , 0 , &lfPower )); CHK_ERR( hp8563e_queryFreq( viInstrumentHandle , 1 , &lfFrequency )); if ( pPosition ) *pPosition = lfFrequency; if ( pValue ) *pValue = lfPower; Error: RETURN_STDERR_POINTER; }
int setGlobalOffsets() { //only set the global offsets once. if (setGlobalOffsets_) return(0); if (matrixGraph_.get() == NULL) return(-1); MPI_Comm comm = matrixGraph_->getRowSpace()->getCommunicator(); int num_procs = fei::numProcs(comm); int local_proc = fei::localProc(comm); std::vector<int> globalOffsets; std::vector<int> globalBlkOffsets; if (reducer_.get() != NULL) { int localsize = reducer_->getLocalReducedEqns().size(); numLocalEqns_ = localsize; std::vector<int> lsizes(num_procs, 0); std::vector<int> gsizes(num_procs, 0); lsizes[local_proc] = localsize; fei::GlobalMax(comm, lsizes, gsizes); globalOffsets.resize(num_procs+1); int offset = 0; for(int p=0; p<num_procs; ++p) { globalOffsets[p] = offset; offset += gsizes[p]; } globalOffsets[num_procs] = offset; globalBlkOffsets = globalOffsets; } else { fei::SharedPtr<fei::VectorSpace> vecSpace = matrixGraph_->getRowSpace(); vecSpace->getGlobalIndexOffsets(globalOffsets); vecSpace->getGlobalBlkIndexOffsets(globalBlkOffsets); numLocalEqns_ = globalOffsets[local_proc+1]-globalOffsets[local_proc]; } CHK_ERR(linsyscore_->setGlobalOffsets(num_procs+1, &globalBlkOffsets[0], &globalOffsets[0], &globalBlkOffsets[0])); setGlobalOffsets_ = true; return(0); }
void* DLLEXPORT SpectrumAnalyzer_GetErrorTextMessage ( ViSession vhInstrumentHandle , int iError , char *pErrorMessage ) { STD_ERROR StdError = {0}; FREE_CALLOC( StdError.pszErrorDescription , STD_STRING , sizeof(char*)); if ( StdError.pszErrorDescription ) { CHK_ERR( hp8563e_errorMessage ( vhInstrumentHandle , iError ,(char*)StdError.pszErrorDescription )); } Error: RETURN_STDERR_POINTER; }
//---------------------------------------------------------------------------- int snl_fei::LinearSystem_General::setBCValuesOnVector(fei::Vector* vector) { if (essBCvalues_ == NULL) { return(0); } if (essBCvalues_->size() == 0) { return(0); } CHK_ERR( vector->copyIn(essBCvalues_->size(), &(essBCvalues_->indices())[0], &(essBCvalues_->coefs())[0]) ); return(0); }
ckError ckOpLimitRule_CreateSliding(ckOpLimitRule** ruleOut, int allowed, ckTime window, ckBool wait) { ckError err = cke_General; CHK_ERR( allocRule(ruleOut) ); (*ruleOut)->allowedCount = allowed; (*ruleOut)->waitForSuccess = wait; (*ruleOut)->type = olrt_SlidingWindow; (*ruleOut)->data.sliding.window = window; err = cke_Success; error: return err; }
void Array::allocate(cl_vars_t clv) { size_t dim0 = MYMAX(get_property_int("dim0"), 1); size_t dim1 = MYMAX(get_property_int("dim1"), 1); bytes_per_element = get_num_bytes(properties["dtype"]); nbytes = dim0 * dim1 * bytes_per_element; #ifdef VERBOSE_COMPILATION docs.compilation_ss << "Allocating: " << nbytes << "\tFor array: " << properties["name"] << std::endl; #endif cpu_data = (char *) calloc(nbytes, sizeof(char)); for(int i = 0 ; i < clv.num_devices ; i++) { cl_int err; gpu_data[i] = clCreateBuffer(clv.context, CL_MEM_READ_WRITE, nbytes, NULL, &err); CHK_ERR(err); } }
int fei::Vector_core::writeToFile(const char* filename, bool matrixMarketFormat) { int numProcs = fei::numProcs(comm_); int localProc =fei::localProc(comm_); double coef; static char mmbanner[] = "%%MatrixMarket matrix array real general"; for(int p=0; p<numProcs; ++p) { fei::Barrier(comm_); if (p != localProc) continue; FEI_OFSTREAM* outFile = NULL; if (p==0) { outFile = new FEI_OFSTREAM(filename, IOS_OUT); FEI_OFSTREAM& ofref = *outFile; if (matrixMarketFormat) { ofref << mmbanner << FEI_ENDL; ofref << eqnComm_->getGlobalOffsets()[numProcs] << " 1" << FEI_ENDL; } else { ofref << eqnComm_->getGlobalOffsets()[numProcs] << FEI_ENDL; } } else outFile = new FEI_OFSTREAM(filename, IOS_APP); FEI_OFSTREAM& ofref = *outFile; ofref.setf(IOS_SCIENTIFIC, IOS_FLOATFIELD); ofref.precision(13); for(int i=firstLocalOffset_; i<=lastLocalOffset_; ++i) { CHK_ERR( copyOut(1, &i, &coef) ); if (matrixMarketFormat) { ofref << " " << coef << FEI_ENDL; } else { ofref << i << " " << coef << FEI_ENDL; } } delete outFile; } return(0); }
//============================================================================== int load_BC_data(FEI* fei, PoissonData& poissonData) { //first, have the data object generate the BC data poissonData.calculateBCs(); int numBCNodes = poissonData.getNumBCNodes(); GlobalID* nodeIDs = poissonData.getBCNodeIDs(); int fieldID = poissonData.getBCFieldID(); double* values = poissonData.getBCValues(); std::vector<int> offsets(numBCNodes, 0); CHK_ERR( fei->loadNodeBCs(numBCNodes, nodeIDs, fieldID, &offsets[0], values) ); return(0); }
int DirichletBCManager::finalizeBCEqns(EqnBuffer& bcEqns) { //copy the boundary-condition prescribed values into bcEqns. bc_map::iterator iter = bcs_.begin(), iter_end = bcs_.end(); for(; iter!=iter_end; ++iter) { int eqn = iter->first; double coef = iter->second; CHK_ERR( bcEqns.addEqn(eqn, &coef, &eqn, 1, false) ); } bcs_.clear(); return(0); }
ckError ckOpLimitRule_CreateBucket(ckOpLimitRule** ruleOut, int allowed, ckTime length, ckTime epoch, ckBool wait) { ckError err = cke_General; CHK_ERR( allocRule(ruleOut) ); (*ruleOut)->allowedCount = allowed; (*ruleOut)->waitForSuccess = wait; (*ruleOut)->type = olrt_Buckets; (*ruleOut)->data.bucket.length = length; (*ruleOut)->data.bucket.epoch = epoch; err = cke_Success; error: return err; }
void* DLLEXPORT PowerMeter_FetchPower( int hInstrumentHandle , int iChannel , double timeout , double *value ) { errEV ret = {0}; errEV *pRet = NULL; double lfPower = 0; CHK_ERR( age441x_fetch ( hInstrumentHandle , 2 , AGE441X_MEAS_SINGLE , &lfPower )); if ( value ) *value = lfPower; Error: CALLOC_COPY(pRet,1,sizeof(errEV),&ret,sizeof(errEV)); return ((void*)pRet); }
//------------------------------------------------------------------------------ int NodeCommMgr::getSendMessage(int destProc, std::vector<int>& message) { std::vector<int>::iterator rs_iter = std::lower_bound(remoteSharingProcs_.begin(), remoteSharingProcs_.end(), destProc); if (rs_iter == remoteSharingProcs_.end() || destProc != *rs_iter) { ERReturn(-1); } int idx = rs_iter - remoteSharingProcs_.begin(); int len = 0; CHK_ERR( getSendMessageLength(destProc, len) ); message.resize(len); packLocalNodesAndData(&message[0], destProc, nodesPerSharingProc_[idx], len); return(0); }
int test_Algebraic::runtests() { if (numProcs_ < 2) { CHK_ERR( serialtest1() ); CHK_ERR( serialtest2() ); } CHK_ERR( test1() ); CHK_ERR( test2() ); CHK_ERR( test3() ); CHK_ERR( test4() ); return(0); }
void runKernel(cl_vars_t cv, cl_kernel cl_kern, kernel kern, std::map<std::string, cl_mem_data> gpu_data, double* vars) { int num_arrays = kern.num_arrays; string* arrays = kern.arrays; int num_vars = kern.num_vars; cl_int err = CL_SUCCESS; //set up the out array err = clSetKernelArg(cl_kern, 0, sizeof(cl_mem), &(gpu_data["out"].array)); CHK_ERR(err); err = clSetKernelArg(cl_kern, 0 + num_arrays, sizeof(int), &(gpu_data["out"].len)); CHK_ERR(err); for (int i = 1; i < num_arrays; i++) { std::string array_name = arrays[i]; err = clSetKernelArg(cl_kern, i, sizeof(cl_mem), &(gpu_data[array_name].array)); CHK_ERR(err); err = clSetKernelArg(cl_kern, i + num_arrays, sizeof(int), &(gpu_data[array_name].len)); CHK_ERR(err); } for (int i = 0; i < num_vars; i++) { err = clSetKernelArg(cl_kern, i + 2*num_arrays, sizeof(double), &(vars[i])); CHK_ERR(err); } size_t global_work_size[1] = {gpu_data[arrays[0]].len}; size_t local_work_size[1] = {256}; adjustWorkSize(global_work_size[0], local_work_size[0]);//pad work groups global_work_size[0] = std::max(local_work_size[0], global_work_size[0]); err = clEnqueueNDRangeKernel(cv.commands, cl_kern, 1, NULL, global_work_size, local_work_size, 0, NULL, NULL ); CHK_ERR(err); }
int test_MatrixGraph::runtests() { if (numProcs_ < 2) { CHK_ERR( serialtest1() ); } CHK_ERR( test1() ); CHK_ERR( test2() ); CHK_ERR( test3() ); CHK_ERR( test4() ); CHK_ERR( test5() ); CHK_ERR( test_MatrixGraph_test6(comm_, numProcs_, localProc_, path_) ); test_MatrixGraph_test7(comm_, numProcs_, localProc_); test_MatrixGraph_test8(comm_, numProcs_, localProc_); return(0); }
int DirichletBCManager::finalizeBCEqns(fei::Matrix& matrix, bool throw_if_bc_slave_conflict) { fei::SharedPtr<fei::Reducer> reducer = matrix.getMatrixGraph()->getReducer(); bool haveSlaves = reducer.get()!=NULL; //copy the boundary-condition prescribed values into the matrix, in //an equation-number obtained by using the matrix' VectorSpace to map //from the BC's idtype,id,fieldID,component to an equation-number. The //bc values will go on the diagonal of the matrix, i.e., column-index //will be the same equation-number. bc_map::iterator iter = bcs_.begin(), iter_end = bcs_.end(); for(; iter!=iter_end; ++iter) { int eqn = iter->first; if (haveSlaves) { if (reducer->isSlaveEqn(eqn)) { if (throw_if_bc_slave_conflict) { FEI_OSTRINGSTREAM osstr; osstr << "fei BCManager::finalizeBCeqns ERROR, eqn="<<eqn << " is both a BC eqn and slave-constraint eqn."; throw std::runtime_error(osstr.str()); } continue; } } double* ptr = &iter->second; CHK_ERR( matrix.copyIn(1, &eqn, 1, &eqn, &ptr) ); } bcs_.clear(); return(0); }
//---------------------------------------------------------------------------- int snl_fei::LinearSystem_General::getMatrixRow(fei::Matrix* matrix, int row, std::vector<double>& coefs, std::vector<int>& indices) { int len = 0; int err = matrix->getRowLength(row, len); if (err != 0) { coefs.resize(0); indices.resize(0); return(err); } if ((int)coefs.size() != len) { coefs.resize(len); } if ((int)indices.size() != len) { indices.resize(len); } CHK_ERR( matrix->copyOutRow(row, len, &coefs[0], &indices[0])); return(0); }
/* ...set frontal camera interface */ int objdet_camera_init(app_data_t *app, camera_init_func_t camera_init) { GstElement *bin; /* ...clear input stream dimensions (force engine reinitialization) */ app->f_width = app->f_height = 0; /* ...create camera interface (it may be network camera or file on disk) */ CHK_ERR(bin = camera_init(&objdet_camera_cb, app), -errno); /* ...add camera to a pipe */ gst_bin_add(GST_BIN(app->pipe), bin); /* ...synchronize state with a parent */ gst_element_sync_state_with_parent(bin); /* ...save camera-set container */ app->fr_camera = bin; TRACE(INIT, _b("frontal camera initialized")); return 0; }
int fei::Vector_core::copyOut(int numValues, const int* indices, double* values, int vectorIndex) const { const std::vector<CSVec*>& remote = remotelyOwned(); for(int i=0; i<numValues; ++i) { int ind = indices[i]; int local = ind - firstLocalOffset_; if (local < 0 || local >= numLocal_) { if (ind < 0) { continue; } int proc = eqnComm_->getOwnerProc(ind); int insertPoint = -1; int idx = fei::binarySearch(ind, remote[proc]->indices(), insertPoint); if (idx < 0) { FEI_CERR << "fei::Vector_core::copyOut: proc " << fei::localProc(comm_) << ", index " << ind << " not in remotelyOwned_ vec object for proc " <<proc<<FEI_ENDL; ERReturn(-1); } else { values[i] = remote[proc]->coefs()[idx]; } } else { CHK_ERR( copyOutOfUnderlyingVector(1, &ind, &(values[i]), vectorIndex) ); } } return(0); }
void compile_ocl_program(std::map<std::string, cl_kernel> &kernels, cl_vars_t &cv, const char * cl_src, std::list<std::string> knames) { cl_int err; cv.main_program = clCreateProgramWithSource(cv.context, 1, (const char **) &cl_src, NULL, &err); CHK_ERR(err); err = clBuildProgram(cv.main_program, 0, NULL, NULL, NULL, NULL); if (err != CL_SUCCESS) { size_t len; char buffer[2048]; std::cout << "Error: Failed to build program executable " << std::endl; clGetProgramBuildInfo(cv.main_program, cv.device_id, CL_PROGRAM_BUILD_LOG, sizeof(buffer), buffer, &len); std::cout << buffer << std::endl; exit(1); } for(std::list<std::string>::iterator it = knames.begin(); it != knames.end(); it++) { cl_kernel kernel = clCreateKernel(cv.main_program, (*it).c_str(), &(err)); if(!kernel || err != CL_SUCCESS) { std::cout << "Failed to create kernel: " << (*it).c_str() << std::endl; exit(1); } #ifdef DEBUG std::cout << "Successfully compiled " << (*it).c_str() << std::endl; #endif cv.kernels.push_back(kernel); kernels[*it] = kernel; } }
void* DLLEXPORT PowerMeter_SetActiveChannel( int hInstrumentHandle , int iChannel ) { STD_ERROR StdError = {0}; switch(iChannel) { case 1: iChannel = AGE441X_CHANNEL_A; break; case 2: iChannel = AGE441X_CHANNEL_B; break; default: iChannel = AGE441X_CHANNEL_A; break; } CHK_ERR( age441x_configureInput ( hInstrumentHandle , iChannel, 50.0E+6, AGE441X_MEAS_DOUBLE )); Error: RETURN_STDERR_POINTER; }
void* DLLEXPORT PowerMeter_MeasurePower( int hInstrumentHandle , int iChannel , double timeout , double *value ) { errEV ret = {0}; errEV *pRet = NULL; double lfPower = 0; ViBoolean bLimit = 0; switch(iChannel) { case 1: iChannel = AGE441X_CHANNEL_A; break; case 2: iChannel = AGE441X_CHANNEL_B; break; default: iChannel = AGE441X_CHANNEL_A; break; } CHK_ERR( age441x_powerMeasurement ( hInstrumentHandle , iChannel , AGE441X_UNIT_DBM , -90.0, 90.0 , &bLimit , &lfPower )); if ( value ) *value = lfPower; //CHK_ERR( age441x_setInitiateContinuousState ( hInstrumentHandle , iChannel , VI_TRUE )); Error: CALLOC_COPY(pRet,1,sizeof(errEV),&ret,sizeof(errEV)); return ((void*)pRet); }
void* DLLEXPORT PowerMeter_SetFrequency( int hInstrumentHandle , int iChannel , double dFreq ) { STD_ERROR StdError = {0}; switch(iChannel) { case 1: iChannel = AGE441X_CHANNEL_A; break; case 2: iChannel = AGE441X_CHANNEL_B; break; default: iChannel = AGE441X_CHANNEL_A; break; } CHK_ERR( age441x_configureFrequency ( hInstrumentHandle , iChannel , dFreq )); Error: RETURN_STDERR_POINTER; }
void* DLLEXPORT PowerMeter_InitSweep( int hInstrumentHandle , int iChannel ) { STD_ERROR StdError = {0}; switch(iChannel) { case 1: iChannel = AGE441X_CHANNEL_A; break; case 2: iChannel = AGE441X_CHANNEL_B; break; default: iChannel = AGE441X_CHANNEL_A; break; } CHK_ERR( age441x_initiate ( hInstrumentHandle , iChannel )); Error: RETURN_STDERR_POINTER; }
void* DLLEXPORT PowerMeter_SetSingleSweep( int hInstrumentHandle , int iChannel ) { STD_ERROR StdError = {0}; switch(iChannel) { case 1: iChannel = AGE441X_CHANNEL_A; break; case 2: iChannel = AGE441X_CHANNEL_B; break; default: iChannel = AGE441X_CHANNEL_A; break; } CHK_ERR( age441x_setInitiateContinuousState ( hInstrumentHandle , iChannel , VI_FALSE )); Error: RETURN_STDERR_POINTER; }