void S<T>::test() { #pragma omp parallel num_threads(n) // { dg-error "must be integral" } work(); }
/* * Receive callback for the /camera/depth_registered/points subscription */ std::vector<suturo_perception_msgs::PerceivedObject> SuturoPerceptionKnowledgeROSNode::receive_image_and_cloud(const sensor_msgs::ImageConstPtr& inputImage, const sensor_msgs::PointCloud2ConstPtr& inputCloud) { // process only one cloud pcl::PointCloud<pcl::PointXYZRGB>::Ptr cloud_in (new pcl::PointCloud<pcl::PointXYZRGB>()); pcl::fromROSMsg(*inputCloud,*cloud_in); logger.logInfo((boost::format("Received a new point cloud: size = %s") % cloud_in->points.size()).str()); // Gazebo sends us unorganized pointclouds! // Reorganize them to be able to compute the ROI of the objects // This workaround is only tested for gazebo 1.9! if(!cloud_in->isOrganized ()) { logger.logInfo((boost::format("Received an unorganized PointCloud: %d x %d .Convert it to a organized one ...") % cloud_in->width % cloud_in->height ).str()); pcl::PointCloud<pcl::PointXYZRGB>::Ptr org_cloud (new pcl::PointCloud<pcl::PointXYZRGB>()); org_cloud->width = 640; org_cloud->height = 480; org_cloud->is_dense = false; org_cloud->points.resize(640 * 480); for (int i = 0; i < cloud_in->points.size(); i++) { pcl::PointXYZRGB result; result.x = 0; result.y = 0; result.z = 0; org_cloud->points[i]=cloud_in->points[i]; } cloud_in = org_cloud; } cv_bridge::CvImagePtr cv_ptr; cv_ptr = cv_bridge::toCvCopy(inputImage, enc::BGR8); // Make a deep copy of the passed cv::Mat and set a new // boost pointer to it. boost::shared_ptr<cv::Mat> img(new cv::Mat(cv_ptr->image.clone())); sp.setOriginalRGBImage(img); logger.logInfo("processing..."); sp.setOriginalCloud(cloud_in); sp.processCloudWithProjections(cloud_in); logger.logInfo("Cloud processed. Lock buffer and return the results"); mutex.lock(); perceivedObjects = sp.getPerceivedObjects(); if(sp.getOriginalRGBImage()->cols != sp.getOriginalCloud()->width && sp.getOriginalRGBImage()->rows != sp.getOriginalCloud()->height) { // Adjust the ROI if the image is at 1280x1024 and the pointcloud is at 640x480 if(sp.getOriginalRGBImage()->cols == 1280 && sp.getOriginalRGBImage()->rows == 1024) { for (int i = 0; i < perceivedObjects.size(); i++) { ROI roi = perceivedObjects.at(i).get_c_roi(); roi.origin.x*=2; roi.origin.y*=2; roi.width*=2; roi.height*=2; perceivedObjects.at(i).set_c_roi(roi); } } else { logger.logError("UNSUPPORTED MIXTURE OF IMAGE AND POINTCLOUD DIMENSIONS"); } } // Execution pipeline // Each capability provides an enrichment for the // returned PerceivedObject // initialize threadpool boost::asio::io_service ioService; boost::thread_group threadpool; std::auto_ptr<boost::asio::io_service::work> work(new boost::asio::io_service::work(ioService)); // Add worker threads to threadpool for(int i = 0; i < numThreads; ++i) { threadpool.create_thread( boost::bind(&boost::asio::io_service::run, &ioService) ); } for (int i = 0; i < perceivedObjects.size(); i++) { // Initialize Capabilities ColorAnalysis ca(perceivedObjects[i]); ca.setLowerSThreshold(color_analysis_lower_s); ca.setUpperSThreshold(color_analysis_upper_s); ca.setLowerVThreshold(color_analysis_lower_v); ca.setUpperVThreshold(color_analysis_upper_v); suturo_perception_shape_detection::RandomSampleConsensus sd(perceivedObjects[i]); //suturo_perception_vfh_estimation::VFHEstimation vfhe(perceivedObjects[i]); // suturo_perception_3d_capabilities::CuboidMatcherAnnotator cma(perceivedObjects[i]); // Init the cuboid matcher with the table coefficients suturo_perception_3d_capabilities::CuboidMatcherAnnotator cma(perceivedObjects[i], sp.getTableCoefficients() ); // post work to threadpool ioService.post(boost::bind(&ColorAnalysis::execute, ca)); ioService.post(boost::bind(&suturo_perception_shape_detection::RandomSampleConsensus::execute, sd)); //ioService.post(boost::bind(&suturo_perception_vfh_estimation::VFHEstimation::execute, vfhe)); ioService.post(boost::bind(&suturo_perception_3d_capabilities::CuboidMatcherAnnotator::execute, cma)); // Is 2d recognition enabled? if(!recognitionDir.empty()) { // perceivedObjects[i].c_recognition_label_2d=""; suturo_perception_2d_capabilities::LabelAnnotator2D la(perceivedObjects[i], sp.getOriginalRGBImage(), object_matcher_); la.execute(); } else { // Set an empty label perceivedObjects[i].set_c_recognition_label_2d(""); } } //boost::this_thread::sleep(boost::posix_time::microseconds(1000)); // wait for thread completion. // destroy the work object to wait for all queued tasks to finish work.reset(); ioService.run(); threadpool.join_all(); std::vector<suturo_perception_msgs::PerceivedObject> perceivedObjs = *convertPerceivedObjects(&perceivedObjects); // TODO handle images in this method mutex.unlock(); return perceivedObjs; }
int main() { init(); work(); return 0; }
void HibernateBoot(char *image_filename) { long long size, imageSize, codeSize, allocSize; long mem_base; IOHibernateImageHeader _header; IOHibernateImageHeader * header = &_header; long buffer; size = ReadFileAtOffset (image_filename, header, 0, sizeof(IOHibernateImageHeader)); printf("header read size %x\n", size); imageSize = header->image1Size; codeSize = header->restore1PageCount << 12; if (kIOHibernateHeaderSignature != header->signature) { printf ("Incorrect image signature\n"); return; } if (header->encryptStart) { printf ("Resuming from Encrypted image is unsupported.\n" "Uncheck \"Use secure virtual memory\" in \"Security\" pane on system preferences.\n" "Press any key to proceed with normal boot.\n"); getc (); return; } // depends on NVRAM #if 0 { uint32_t machineSignature; size = GetProp(gChosenPH, kIOHibernateMachineSignatureKey, (char *)&machineSignature, sizeof(machineSignature)); if (size != sizeof(machineSignature)) machineSignature = 0; if (machineSignature != header->machineSignature) break; } #endif allocSize = imageSize + ((4095 + sizeof(hibernate_graphics_t)) & ~4095); mem_base = getmemorylimit() - allocSize;//TODO: lower this printf("mem_base %x\n", mem_base); if (!((long long)mem_base+allocSize<1024*bootInfo->extmem+0x100000)) { printf ("Not enough space to restore image. Press any key to proceed with normal boot.\n"); getc (); return; } bcopy(header, (void *) mem_base, sizeof(IOHibernateImageHeader)); header = (IOHibernateImageHeader *) mem_base; imageSize -= sizeof(IOHibernateImageHeader); buffer = (long)(header + 1); if (header->previewSize) { uint64_t preview_offset = header->fileExtentMapSize - sizeof(header->fileExtentMap) + codeSize; uint8_t progressSaveUnder[kIOHibernateProgressCount][kIOHibernateProgressSaveUnderSize]; ReadFileAtOffset (image_filename, (char *)buffer, sizeof(IOHibernateImageHeader), preview_offset+header->previewSize); drawPreview ((void *)(long)(buffer+preview_offset + header->previewPageListSize), &(progressSaveUnder[0][0])); previewTotalSectors = (imageSize-(preview_offset+header->previewSize))/512; previewLoadedSectors = 0; previewSaveunder = &(progressSaveUnder[0][0]); if (preview_offset+header->previewSize<imageSize) ReadFileAtOffset (image_filename, (char *)(long)(buffer+preview_offset+header->previewSize), sizeof(IOHibernateImageHeader)+preview_offset+header->previewSize, imageSize-(preview_offset+header->previewSize)); previewTotalSectors = 0; previewLoadedSectors = 0; previewSaveunder = 0; #if 0 AsereBLN: check_vga_nvidia() didn't work as expected (recursion level > 0 & return value). Unforutnaltely I cannot find a note why to switch back to text mode for nVidia cards only and because it check_vga_nvidia does not work (cards normally are behind a bridge) I will remove it completely setVideoMode( VGA_TEXT_MODE, 0 ); #endif }
// a new aggregate is to be inserted into the work queue inline void new_work_agg(db::node *node, db::simple_tuple *stpl) { process::work work(node, stpl, process::mods::LOCAL_TUPLE | process::mods::FORCE_AGGREGATE); new_agg(work); }
void EigenValuesAdvection::v_DoSolve() { int nvariables = 1; int i,dofs = GetNcoeffs(); //bool UseContCoeffs = false; Array<OneD, Array<OneD, NekDouble> > inarray(nvariables); Array<OneD, Array<OneD, NekDouble> > tmp(nvariables); Array<OneD, Array<OneD, NekDouble> > outarray(nvariables); Array<OneD, Array<OneD, NekDouble> > WeakAdv(nvariables); int npoints = GetNpoints(); int ncoeffs = GetNcoeffs(); switch (m_projectionType) { case MultiRegions::eDiscontinuous: { dofs = ncoeffs; break; } case MultiRegions::eGalerkin: case MultiRegions::eMixed_CG_Discontinuous: { //dofs = GetContNcoeffs(); //UseContCoeffs = true; break; } } cout << endl; cout << "Num Phys Points = " << npoints << endl; // phisical points cout << "Num Coeffs = " << ncoeffs << endl; // cout << "Num Cont Coeffs = " << dofs << endl; inarray[0] = Array<OneD, NekDouble>(npoints,0.0); outarray[0] = Array<OneD, NekDouble>(npoints,0.0); tmp[0] = Array<OneD, NekDouble>(npoints,0.0); WeakAdv[0] = Array<OneD, NekDouble>(ncoeffs,0.0); Array<OneD, NekDouble> MATRIX(npoints*npoints,0.0); for (int j = 0; j < npoints; j++) { inarray[0][j] = 1.0; /// Feeding the weak Advection oprator with a vector (inarray) /// Looping on inarray and changing the position of the only non-zero entry /// we simulate the multiplication by the identity matrix. /// The results stored in outarray is one of the columns of the weak advection oprators /// which are then stored in MATRIX for the futher eigenvalues calculation. switch (m_projectionType) { case MultiRegions::eDiscontinuous: { WeakDGAdvection(inarray, WeakAdv,true,true,1); m_fields[0]->MultiplyByElmtInvMass(WeakAdv[0],WeakAdv[0]); m_fields[0]->BwdTrans(WeakAdv[0],outarray[0]); Vmath::Neg(npoints,outarray[0],1); break; } case MultiRegions::eGalerkin: case MultiRegions::eMixed_CG_Discontinuous: { // Calculate -V\cdot Grad(u); for(i = 0; i < nvariables; ++i) { //Projection m_fields[i]->FwdTrans(inarray[i],WeakAdv[i]); m_fields[i]->BwdTrans_IterPerExp(WeakAdv[i],tmp[i]); //Advection operator AdvectionNonConservativeForm(m_velocity,tmp[i],outarray[i]); Vmath::Neg(npoints,outarray[i],1); //m_fields[i]->MultiplyByInvMassMatrix(WeakAdv[i],WeakAdv[i]); //Projection m_fields[i]->FwdTrans(outarray[i],WeakAdv[i]); m_fields[i]->BwdTrans_IterPerExp(WeakAdv[i],outarray[i]); } break; } } /// The result is stored in outarray (is the j-th columns of the weak advection operator). /// We now store it in MATRIX(j) Vmath::Vcopy(npoints,&(outarray[0][0]),1,&(MATRIX[j]),npoints); /// Set the j-th entry of inarray back to zero inarray[0][j] = 0.0; } //////////////////////////////////////////////////////////////////////////////// /// Calulating the eigenvalues of the weak advection operator stored in (MATRIX) /// using Lapack routines char jobvl = 'N'; char jobvr = 'N'; int info = 0, lwork = 3*npoints; NekDouble dum; Array<OneD, NekDouble> EIG_R(npoints); Array<OneD, NekDouble> EIG_I(npoints); Array<OneD, NekDouble> work(lwork); Lapack::Dgeev(jobvl,jobvr,npoints,MATRIX.get(),npoints,EIG_R.get(),EIG_I.get(),&dum,1,&dum,1,&work[0],lwork,info); //////////////////////////////////////////////////////// //Print Matrix FILE *mFile; mFile = fopen ("WeakAdvMatrix.txt","w"); for(int j = 0; j<npoints; j++) { for(int k = 0; k<npoints; k++) { fprintf(mFile,"%e ",MATRIX[j*npoints+k]); } fprintf(mFile,"\n"); } fclose (mFile); //////////////////////////////////////////////////////// //Output of the EigenValues FILE *pFile; pFile = fopen ("Eigenvalues.txt","w"); for(int j = 0; j<npoints; j++) { fprintf(pFile,"%e %e\n",EIG_R[j],EIG_I[j]); } fclose (pFile); cout << "\nEigenvalues : " << endl; for(int j = 0; j<npoints; j++) { cout << EIG_R[j] << "\t" << EIG_I[j] << endl; } cout << endl; }
static GstFlowReturn gst_bml_transform_transform_ip_mono (GstBaseTransform * base, GstBuffer * outbuf) { GstMapInfo info; GstBMLTransform *bml_transform = GST_BML_TRANSFORM (base); GstBMLTransformClass *klass = GST_BML_TRANSFORM_GET_CLASS (bml_transform); GstBML *bml = GST_BML (bml_transform); GstBMLClass *bml_class = GST_BML_CLASS (klass); BMLData *data, *seg_data; gpointer bm = bml->bm; guint todo, seg_size, samples_per_buffer; gboolean has_data; guint mode = 3; /*WM_READWRITE */ bml->running_time = gst_segment_to_stream_time (&base->segment, GST_FORMAT_TIME, GST_BUFFER_TIMESTAMP (outbuf)); if (GST_BUFFER_FLAG_IS_SET (outbuf, GST_BUFFER_FLAG_DISCONT)) { bml->subtick_count = (!bml->reverse) ? bml->subticks_per_tick : 1; } /* TODO(ensonic): sync on subticks ? */ if (bml->subtick_count >= bml->subticks_per_tick) { bml (gstbml_reset_triggers (bml, bml_class)); bml (gstbml_sync_values (bml, bml_class, GST_BUFFER_TIMESTAMP (outbuf))); bml (tick (bm)); bml->subtick_count = 1; } else { bml->subtick_count++; } /* don't process data in passthrough-mode */ if (gst_base_transform_is_passthrough (base)) return GST_FLOW_OK; if (!gst_buffer_map (outbuf, &info, GST_MAP_READ | GST_MAP_WRITE)) { GST_WARNING_OBJECT (base, "unable to map buffer for read & write"); return GST_FLOW_ERROR; } data = (BMLData *) info.data; samples_per_buffer = info.size / sizeof (BMLData); /* if buffer has only silence process with different mode */ if (GST_BUFFER_FLAG_IS_SET (outbuf, GST_BUFFER_FLAG_GAP)) { mode = 2; /* WM_WRITE */ } else { // buzz generates loud output gfloat fc = 32768.0; orc_scalarmultiply_f32_ns (data, data, fc, samples_per_buffer); } GST_DEBUG_OBJECT (bml_transform, " calling work(%d,%d)", samples_per_buffer, mode); todo = samples_per_buffer; seg_data = data; has_data = FALSE; while (todo) { // 256 is MachineInterface.h::MAX_BUFFER_LENGTH seg_size = (todo > 256) ? 256 : todo; has_data |= bml (work (bm, seg_data, (int) seg_size, mode)); seg_data = &seg_data[seg_size]; todo -= seg_size; } if (gstbml_fix_data ((GstElement *) bml_transform, &info, has_data)) { GST_BUFFER_FLAG_SET (outbuf, GST_BUFFER_FLAG_GAP); } else { GST_BUFFER_FLAG_UNSET (outbuf, GST_BUFFER_FLAG_GAP); } gst_buffer_unmap (outbuf, &info); return GST_FLOW_OK; }
int main() { while (work()); return 0; }
void Solve::solve(FILE *fin, FILE *fout) { int cnt = 1; for(int i = 0; i <= 10; i ++, cnt *= 2) lb[cnt] = i; fscanf(fin, "%d%d", &n, &m); for(int i = 1; i <= n; i ++) for(int j = 1; j <= m; j ++) fscanf(fin, "%d", &object[i][j].a); for(int i = 1; i <= n; i ++) for(int j = 1; j <= m; j ++) fscanf(fin, "%d", &object[i][j].d); for(int i = 1; i <= n; i ++) for(int j = 1; j <= m; j ++) fscanf(fin, "%d", &object[i][j].hp); fscanf(fin, "%d%d%d", &llx.a, &llx.d, &llx.hp); fscanf(fin, "%d", &nBaby); for(int i = 1; i <= nBaby; i ++) fscanf(fin, "%d%d%d", &baby[i].a, &baby[i].d, &baby[i].hp); for(int i = 1; i <= n; i ++) for(int j = 1; j <= m; j ++) { Stuff &lyd = object[i][j]; if(llx.a <= lyd.d) w[i][j][0] = INFINITY; else { int t1 = ceilDiv(lyd.hp, llx.a-lyd.d); int tmp = (t1 - 1) * MAX(0, lyd.a - llx.d); if(tmp >= llx.hp) w[i][j][0] = INFINITY; else w[i][j][0] = tmp; } for(int k = 1; k <= nBaby; k ++) { Stuff &bb = baby[k]; if(bb.a <= lyd.d) w[i][j][k] = INFINITY; else { int t1 = ceilDiv(lyd.hp, bb.a - lyd.d); int tmp = (t1 - 1) * MAX(0, lyd.a - bb.d); if(tmp >= bb.hp) { int t2 = ceilDiv(bb.hp, lyd.a - bb.d); tmp = t2 * MAX(0, bb.a - lyd.d); //baby died if(llx.a <= lyd.d) w[i][j][k] = INFINITY; else { int t1 = ceilDiv(lyd.hp - tmp, llx.a-lyd.d); tmp = (t1 - 1) * MAX(0, lyd.a - llx.d); if(tmp >= llx.hp) w[i][j][k] = INFINITY; else w[i][j][k] = tmp; } } else w[i][j][k] = 0; } } } upperlim = (1 << nBaby) - 1; work(fin, fout); }
int main(int argc, char **argv){ // Add some plugin searhc paths plugin_search_path=list_new(free); const char *infilename=NULL; const char *outfilename=NULL; char tmp[256]; char *assetfilename="assets.h"; int i; for (i=1;i<argc;i++){ if (strcmp(argv[i], "--help")==0){ help(NULL); return 0; } else if ((strcmp(argv[i], "--templatetagsdir")==0) || (strcmp(argv[i], "-t")==0)){ i++; if (argc<=i){ help("Missing templatedir name"); return 3; } snprintf(tmp, sizeof(tmp), "%s/lib%%s.so", argv[i]); ONION_DEBUG("Added templatedir %s", tmp); list_add(plugin_search_path, strdup(tmp)); // dup, remember to free later. } else if ((strcmp(argv[i], "--no-orig-lines")==0) || (strcmp(argv[i], "-n")==0)){ use_orig_line_numbers=0; ONION_DEBUG("Disable original line numbers"); } else if ((strcmp(argv[i], "--asset-file")==0) || (strcmp(argv[i], "-a")==0)){ i++; if (argc<=i){ help("Missing assets file name"); return 3; } assetfilename=argv[i]; ONION_DEBUG("Assets file: %s", assetfilename); } else{ if (infilename){ if (outfilename){ help("Too many arguments"); return 1; } outfilename=argv[i]; ONION_DEBUG("Set outfilename %s", outfilename); } else{ infilename=argv[i]; ONION_DEBUG("Set infilename %s", infilename); } } } if (!infilename || !outfilename){ help("Missing input or output filename"); return 2; } if (strcmp(infilename,"-")==0){ infilename=""; } else{ char tmp2[256]; strncpy(tmp2, argv[1], sizeof(tmp2)-1); snprintf(tmp, sizeof(tmp), "%s/lib%%s.so", dirname(tmp2)); list_add(plugin_search_path, strdup(tmp)); strncpy(tmp2, argv[1], sizeof(tmp2)-1); snprintf(tmp, sizeof(tmp), "%s/templatetags/lib%%s.so", dirname(tmp2)); list_add(plugin_search_path, strdup(tmp)); } // Default template dirs list_add_with_flags(plugin_search_path, "lib%s.so", LIST_ITEM_NO_FREE); list_add_with_flags(plugin_search_path, "templatetags/lib%s.so", LIST_ITEM_NO_FREE); char tmp2[256]; strncpy(tmp2, argv[0], sizeof(tmp2)-1); snprintf(tmp, sizeof(tmp), "%s/templatetags/lib%%s.so", dirname(tmp2)); list_add(plugin_search_path, strdup(tmp)); // dupa is ok, as im at main. strncpy(tmp2, argv[0], sizeof(tmp2)-1); snprintf(tmp, sizeof(tmp), "%s/lib%%s.so", dirname(tmp2)); list_add(plugin_search_path, strdup(tmp)); // dupa is ok, as im at main. list_add_with_flags(plugin_search_path, "/usr/local/lib/otemplate/templatetags/lib%s.so", LIST_ITEM_NO_FREE); list_add_with_flags(plugin_search_path, "/usr/lib/otemplate/templatetags/lib%s.so", LIST_ITEM_NO_FREE); onion_assets_file *assetsfile=onion_assets_file_new(assetfilename); int error=work(infilename, outfilename, assetsfile); onion_assets_file_free(assetsfile); list_free(plugin_search_path); return error; }
int main(int argc, char** argv) { boost::program_options::options_description desc("options"); desc.add_options() ("help", "produce help message") ("topic", boost::program_options::value<std::string>(), "topic") ("broker", boost::program_options::value<std::string>(), "broker") ("schema_registry", boost::program_options::value<std::string>(), "schema_registry") ("schema_registry_port", boost::program_options::value<int>()->default_value(8081), "schema_registry_port") ; boost::program_options::variables_map vm; boost::program_options::store(boost::program_options::parse_command_line(argc, argv, desc), vm); boost::program_options::notify(vm); boost::log::core::get()->set_filter(boost::log::trivial::severity >= boost::log::trivial::info); if (vm.count("help")) { std::cout << desc << std::endl; return 0; } std::string topic; if (vm.count("topic")) { topic = vm["topic"].as<std::string>(); } else { std::cout << "--topic must be specified" << std::endl; return 0; } int32_t kafka_port = 9092; std::vector<csi::kafka::broker_address> kafka_brokers; if (vm.count("broker")) { std::string s = vm["broker"].as<std::string>(); size_t last_colon = s.find_last_of(':'); if (last_colon != std::string::npos) kafka_port = atoi(s.substr(last_colon + 1).c_str()); s = s.substr(0, last_colon); // now find the brokers... size_t last_separator = s.find_last_of(','); while (last_separator != std::string::npos) { std::string host = s.substr(last_separator + 1); kafka_brokers.push_back(csi::kafka::broker_address(host, kafka_port)); s = s.substr(0, last_separator); last_separator = s.find_last_of(','); } kafka_brokers.push_back(csi::kafka::broker_address(s, kafka_port)); } else { std::cout << "--broker must be specified" << std::endl; return 0; } int32_t schema_registry_port = 8081; std::vector<csi::kafka::broker_address> schema_registrys; std::string used_schema_registry; if (vm.count("schema_registry_port")) { schema_registry_port = vm["schema_registry_port"].as<int>(); } if (vm.count("schema_registry")) { std::string s = vm["schema_registry"].as<std::string>(); size_t last_colon = s.find_last_of(':'); if (last_colon != std::string::npos) schema_registry_port = atoi(s.substr(last_colon + 1).c_str()); s = s.substr(0, last_colon); // now find the brokers... size_t last_separator = s.find_last_of(','); while (last_separator != std::string::npos) { std::string host = s.substr(last_separator + 1); schema_registrys.push_back(csi::kafka::broker_address(host, schema_registry_port)); s = s.substr(0, last_separator); last_separator = s.find_last_of(','); } schema_registrys.push_back(csi::kafka::broker_address(s, schema_registry_port)); } else { // default - assume registry is running on all kafka brokers for (std::vector<csi::kafka::broker_address>::const_iterator i = kafka_brokers.begin(); i != kafka_brokers.end(); ++i) { schema_registrys.push_back(csi::kafka::broker_address(i->host_name, schema_registry_port)); } } // right now the schema registry class cannot handle severel hosts so just stick to the first one. used_schema_registry = schema_registrys[0].host_name + ":" + std::to_string(schema_registrys[0].port); std::string kafka_broker_str = ""; for (std::vector<csi::kafka::broker_address>::const_iterator i = kafka_brokers.begin(); i != kafka_brokers.end(); ++i) { kafka_broker_str += i->host_name + ":" + std::to_string(i->port); if (i != kafka_brokers.end() - 1) kafka_broker_str += ", "; } BOOST_LOG_TRIVIAL(info) << "kafka broker(s): " << kafka_broker_str; BOOST_LOG_TRIVIAL(info) << "topic : " << topic; std::string schema_registrys_info; for (std::vector<csi::kafka::broker_address>::const_iterator i = schema_registrys.begin(); i != schema_registrys.end(); ++i) { schema_registrys_info += i->host_name + ":" + std::to_string(i->port); if (i != schema_registrys.end() - 1) schema_registrys_info += ", "; } BOOST_LOG_TRIVIAL(info) << "schema_registry(s) : " << schema_registrys_info; BOOST_LOG_TRIVIAL(info) << "used schema registry: " << used_schema_registry; int64_t total = 0; boost::asio::io_service fg_ios; std::auto_ptr<boost::asio::io_service::work> work(new boost::asio::io_service::work(fg_ios)); boost::thread fg(boost::bind(&boost::asio::io_service::run, &fg_ios)); csi::kafka::highlevel_producer producer(fg_ios, topic, -1, 200, 1000000); confluent::registry registry(fg_ios, used_schema_registry); confluent::codec avro_codec(registry); producer.connect(kafka_brokers); BOOST_LOG_TRIVIAL(info) << "connected to kafka"; producer.connect_forever(kafka_brokers); boost::thread do_log([&producer] { while (true) { boost::this_thread::sleep(boost::posix_time::seconds(1)); std::vector<csi::kafka::highlevel_producer::metrics> metrics = producer.get_metrics(); size_t total_queue = 0; uint32_t tx_msg_sec_total = 0; uint32_t tx_kb_sec_total = 0; for (std::vector<csi::kafka::highlevel_producer::metrics>::const_iterator i = metrics.begin(); i != metrics.end(); ++i) { total_queue += (*i).msg_in_queue; tx_msg_sec_total += (*i).tx_msg_sec; tx_kb_sec_total += (*i).tx_kb_sec; } BOOST_LOG_TRIVIAL(info) << "\t \tqueue:" << total_queue << "\t" << tx_msg_sec_total << " msg/s \t" << (tx_kb_sec_total / 1024) << "MB/s"; } }); std::cerr << "registring schemas" << std::endl; auto key_res = avro_codec.put_schema("sample.contact_info_key", sample::contact_info_key::valid_schema()); if (key_res.first!=0) { BOOST_LOG_TRIVIAL(error) << "registring sample.contact_info_key failed"; return -1; } auto val_res = avro_codec.put_schema("sample.contact_info", sample::contact_info::valid_schema()); if (val_res.first!=0) { BOOST_LOG_TRIVIAL(error) << "registring sample.contact_info failed"; return -1; } BOOST_LOG_TRIVIAL(info) << "registring schemas done"; //produce messages std::vector<boost::thread*> threads; for (int i = 0; i != 10; ++i) { threads.emplace_back(new boost::thread([&avro_codec, key_res, val_res, &producer, i] { send_messages(avro_codec, key_res.second, val_res.second, producer, i); })); } while (true) { boost::this_thread::sleep(boost::posix_time::seconds(1)); } work.reset(); fg_ios.stop(); return EXIT_SUCCESS; }
void bob::math::eig_(const blitz::Array<double,2>& A, blitz::Array<std::complex<double>,2>& V, blitz::Array<std::complex<double>,1>& D) { // Size variable const int N = A.extent(0); // Prepares to call LAPACK function // Initialises LAPACK variables const char jobvl = 'N'; // Do NOT compute left eigen-vectors const char jobvr = 'V'; // Compute right eigen-vectors int info = 0; const int lda = N; const int ldvr = N; double VL = 0; // notice we don't compute the left eigen-values const int ldvl = 1; // Initialises LAPACK arrays blitz::Array<double,2> A_lapack = bob::core::array::ccopy(const_cast<blitz::Array<double,2>&>(A).transpose(1,0)); // temporary arrays to receive LAPACK's eigen-values and eigen-vectors blitz::Array<double,1> WR(D.shape()); //real part blitz::Array<double,1> WI(D.shape()); //imaginary part blitz::Array<double,2> VR(A.shape()); //right eigen-vectors // Calls the LAPACK function // A/ Queries the optimal size of the working arrays const int lwork_query = -1; double work_query; dgeev_( &jobvl, &jobvr, &N, A_lapack.data(), &lda, WR.data(), WI.data(), &VL, &ldvl, VR.data(), &ldvr, &work_query, &lwork_query, &info); // B/ Computes the eigenvalue decomposition const int lwork = static_cast<int>(work_query); boost::shared_array<double> work(new double[lwork]); dgeev_( &jobvl, &jobvr, &N, A_lapack.data(), &lda, WR.data(), WI.data(), &VL, &ldvl, VR.data(), &ldvr, work.get(), &lwork, &info); // Checks info variable if (info != 0) { throw std::runtime_error("the QR algorithm failed to compute all the eigenvalues, and no eigenvectors have been computed."); } // Copy results back from WR, WI => D blitz::real(D) = WR; blitz::imag(D) = WI; // Copy results back from VR => V, with two rules: // 1) If the j-th eigenvalue is real, then v(j) = VR(:,j), the j-th column of // VR. // 2) If the j-th and (j+1)-st eigenvalues form a complex conjugate pair, // then v(j) = VR(:,j) + i*VR(:,j+1) and v(j+1) = VR(:,j) - i*VR(:,j+1). blitz::Range a = blitz::Range::all(); int i=0; while (i<N) { if (std::imag(D(i)) == 0.) { //real eigen-value, consume 1 blitz::real(V(a,i)) = VR(i,a); blitz::imag(V(a,i)) = 0.; ++i; } else { //complex eigen-value, consume 2 blitz::real(V(a,i)) = VR(i,a); blitz::imag(V(a,i)) = VR(i+1,a); blitz::real(V(a,i+1)) = VR(i,a); blitz::imag(V(a,i+1)) = -VR(i+1,a); i += 2; } } }
void bob::math::eigSym_(const blitz::Array<double,2>& A, const blitz::Array<double,2>& B, blitz::Array<double,2>& V, blitz::Array<double,1>& D) { // Size variable const int N = A.extent(0); // Prepares to call LAPACK function // Initialises LAPACK variables const int itype = 1; const char jobz = 'V'; // Get both the eigenvalues and the eigenvectors const char uplo = 'U'; int info = 0; const int lda = N; const int ldb = N; // Initialises LAPACK arrays blitz::Array<double,2> A_blitz_lapack; // Tries to use V directly blitz::Array<double,2> Vt = V.transpose(1,0); const bool V_direct_use = bob::core::array::isCZeroBaseContiguous(Vt); if (V_direct_use) { A_blitz_lapack.reference(Vt); // Ugly fix for non-const transpose A_blitz_lapack = const_cast<blitz::Array<double,2>&>(A).transpose(1,0); } else // Ugly fix for non-const transpose A_blitz_lapack.reference( bob::core::array::ccopy(const_cast<blitz::Array<double,2>&>(A).transpose(1,0))); double *A_lapack = A_blitz_lapack.data(); // Ugly fix for non-const transpose blitz::Array<double,2> B_blitz_lapack( bob::core::array::ccopy(const_cast<blitz::Array<double,2>&>(B).transpose(1,0))); double *B_lapack = B_blitz_lapack.data(); blitz::Array<double,1> D_blitz_lapack; const bool D_direct_use = bob::core::array::isCZeroBaseContiguous(D); if (D_direct_use) D_blitz_lapack.reference(D); else D_blitz_lapack.resize(D.shape()); double *D_lapack = D_blitz_lapack.data(); // Calls the LAPACK function // A/ Queries the optimal size of the working arrays const int lwork_query = -1; double work_query; const int liwork_query = -1; int iwork_query; dsygvd_( &itype, &jobz, &uplo, &N, A_lapack, &lda, B_lapack, &ldb, D_lapack, &work_query, &lwork_query, &iwork_query, &liwork_query, &info); // B/ Computes the generalized eigenvalue decomposition const int lwork = static_cast<int>(work_query); boost::shared_array<double> work(new double[lwork]); const int liwork = static_cast<int>(iwork_query); boost::shared_array<int> iwork(new int[liwork]); dsygvd_( &itype, &jobz, &uplo, &N, A_lapack, &lda, B_lapack, &ldb, D_lapack, work.get(), &lwork, iwork.get(), &liwork, &info); // Checks info variable if (info != 0) throw std::runtime_error("The LAPACK function 'dsygvd' returned a non-zero value. This might be caused by a non-positive definite B matrix."); // Copy singular vectors back to V if required if (!V_direct_use) V = A_blitz_lapack.transpose(1,0); // Copy result back to sigma if required if (!D_direct_use) D = D_blitz_lapack; }
int main() { read(); work(); return 0; }
Basis_HGRAD_LINE_Cn_FEM<SpT,OT,PT>:: Basis_HGRAD_LINE_Cn_FEM( const ordinal_type order, const EPointType pointType ) { this->basisCardinality_ = order+1; this->basisDegree_ = order; this->basisCellTopology_ = shards::CellTopology(shards::getCellTopologyData<shards::Line<2> >() ); this->basisType_ = BASIS_FEM_FIAT; this->basisCoordinates_ = COORDINATES_CARTESIAN; const ordinal_type card = this->basisCardinality_; // points are computed in the host and will be copied Kokkos::DynRankView<typename scalarViewType::value_type,typename SpT::array_layout,Kokkos::HostSpace> dofCoords("Hgrad::Line::Cn::dofCoords", card, 1); switch (pointType) { case POINTTYPE_EQUISPACED: case POINTTYPE_WARPBLEND: { // lattice ordering { const ordinal_type offset = 0; PointTools::getLattice( dofCoords, this->basisCellTopology_, order, offset, pointType ); } // topological order // { // // two vertices // dofCoords(0,0) = -1.0; // dofCoords(1,0) = 1.0; // // internal points // typedef Kokkos::pair<ordinal_type,ordinal_type> range_type; // auto pts = Kokkos::subview(dofCoords, range_type(2, card), Kokkos::ALL()); // const auto offset = 1; // PointTools::getLattice( pts, // this->basisCellTopology_, // order, offset, // pointType ); // } break; } case POINTTYPE_GAUSS: { // internal points only PointTools::getGaussPoints( dofCoords, order ); break; } default: { INTREPID2_TEST_FOR_EXCEPTION( !isValidPointType(pointType), std::invalid_argument , ">>> ERROR: (Intrepid2::Basis_HGRAD_LINE_Cn_FEM) invalid pointType." ); } } this->dofCoords_ = Kokkos::create_mirror_view(typename SpT::memory_space(), dofCoords); Kokkos::deep_copy(this->dofCoords_, dofCoords); // form Vandermonde matrix; actually, this is the transpose of the VDM, // this matrix is used in LAPACK so it should be column major and left layout const ordinal_type lwork = card*card; Kokkos::DynRankView<typename scalarViewType::value_type,Kokkos::LayoutLeft,Kokkos::HostSpace> vmat("Hgrad::Line::Cn::vmat", card, card), work("Hgrad::Line::Cn::work", lwork), ipiv("Hgrad::Line::Cn::ipiv", card); const double alpha = 0.0, beta = 0.0; Impl::Basis_HGRAD_LINE_Cn_FEM_JACOBI:: getValues<Kokkos::HostSpace::execution_space,Parameters::MaxNumPtsPerBasisEval> (vmat, dofCoords, order, alpha, beta, OPERATOR_VALUE); ordinal_type info = 0; Teuchos::LAPACK<ordinal_type,typename scalarViewType::value_type> lapack; lapack.GETRF(card, card, vmat.data(), vmat.stride_1(), (ordinal_type*)ipiv.data(), &info); INTREPID2_TEST_FOR_EXCEPTION( info != 0, std::runtime_error , ">>> ERROR: (Intrepid2::Basis_HGRAD_LINE_Cn_FEM) lapack.GETRF returns nonzero info." ); lapack.GETRI(card, vmat.data(), vmat.stride_1(), (ordinal_type*)ipiv.data(), work.data(), lwork, &info); INTREPID2_TEST_FOR_EXCEPTION( info != 0, std::runtime_error , ">>> ERROR: (Intrepid2::Basis_HGRAD_LINE_Cn_FEM) lapack.GETRI returns nonzero info." ); // create host mirror Kokkos::DynRankView<typename scalarViewType::value_type,typename SpT::array_layout,Kokkos::HostSpace> vinv("Hgrad::Line::Cn::vinv", card, card); for (ordinal_type i=0;i<card;++i) for (ordinal_type j=0;j<card;++j) vinv(i,j) = vmat(j,i); this->vinv_ = Kokkos::create_mirror_view(typename SpT::memory_space(), vinv); Kokkos::deep_copy(this->vinv_ , vinv); // initialize tags { const bool is_vertex_included = (pointType != POINTTYPE_GAUSS); // Basis-dependent initializations const ordinal_type tagSize = 4; // size of DoF tag, i.e., number of fields in the tag const ordinal_type posScDim = 0; // position in the tag, counting from 0, of the subcell dim const ordinal_type posScOrd = 1; // position in the tag, counting from 0, of the subcell ordinal const ordinal_type posDfOrd = 2; // position in the tag, counting from 0, of DoF ordinal relative to the subcell ordinal_type tags[Parameters::MaxOrder+1][4]; // now we check the points for association if (is_vertex_included) { // lattice order { const auto v0 = 0; tags[v0][0] = 0; // vertex dof tags[v0][1] = 0; // vertex id tags[v0][2] = 0; // local dof id tags[v0][3] = 1; // total number of dofs in this vertex const ordinal_type iend = card - 2; for (ordinal_type i=0;i<iend;++i) { const auto e = i + 1; tags[e][0] = 1; // edge dof tags[e][1] = 0; // edge id tags[e][2] = i; // local dof id tags[e][3] = iend; // total number of dofs in this edge } const auto v1 = card -1; tags[v1][0] = 0; // vertex dof tags[v1][1] = 1; // vertex id tags[v1][2] = 0; // local dof id tags[v1][3] = 1; // total number of dofs in this vertex } // topological order // { // tags[0][0] = 0; // vertex dof // tags[0][1] = 0; // vertex id // tags[0][2] = 0; // local dof id // tags[0][3] = 1; // total number of dofs in this vertex // tags[1][0] = 0; // vertex dof // tags[1][1] = 1; // vertex id // tags[1][2] = 0; // local dof id // tags[1][3] = 1; // total number of dofs in this vertex // const ordinal_type iend = card - 2; // for (ordinal_type i=0;i<iend;++i) { // const auto ii = i + 2; // tags[ii][0] = 1; // edge dof // tags[ii][1] = 0; // edge id // tags[ii][2] = i; // local dof id // tags[ii][3] = iend; // total number of dofs in this edge // } // } } else { for (ordinal_type i=0;i<card;++i) { tags[i][0] = 1; // edge dof tags[i][1] = 0; // edge id tags[i][2] = i; // local dof id tags[i][3] = card; // total number of dofs in this edge } } ordinal_type_array_1d_host tagView(&tags[0][0], card*4); // Basis-independent function sets tag and enum data in tagToOrdinal_ and ordinalToTag_ arrays: // tags are constructed on host this->setOrdinalTagData(this->tagToOrdinal_, this->ordinalToTag_, tagView, this->basisCardinality_, tagSize, posScDim, posScOrd, posDfOrd); } }
void PBCmgr::maintain (const int_t step , const Field* P , const AuxField** Us , const AuxField** Uf , const bool timedep) // --------------------------------------------------------------------------- // Update storage for evaluation of high-order pressure boundary // condition. Storage order for each edge represents a CCW traverse // of element boundaries. // // If the velocity field varies in time on HOPB field boundaries // (e.g. due to time-varying BCs) the local fluid acceleration will be // estimated from input velocity fields by explicit extrapolation if // timedep is true. This correction cannot be carried out at the // first timestep, since the required extrapolation cannot be done. // If the acceleration is known, (for example, a known reference frame // acceleration) it is probably better to leave timedep unset, and to // use PBCmgr::accelerate() to add in the accelerative term. Note // also that since grad P is dotted with n, the unit outward normal, // at a later stage, timedep only needs to be set if there are // wall-normal accelerative terms. NB: The default value of timedep // is 1. // // Field* master gives a list of pressure boundary conditions with // which to traverse storage areas (note this assumes equal-order // interpolations). // // No smoothing is done to high-order spatial derivatives computed here. // --------------------------------------------------------------------------- { const real_t nu = Femlib::value ("KINVIS"); const real_t invDt = 1.0 / Femlib::value ("D_T"); const int_t nTime = Femlib::ivalue ("N_TIME"); const int_t nEdge = P -> _nbound; const int_t nZ = P -> _nz; const int_t nP = Geometry::nP(); const int_t base = Geometry::baseMode(); const int_t nMode = Geometry::nModeProc(); const int_t mLo = (Geometry::procID() == 0) ? 1 : 0; const AuxField* Ux = Us[0]; const AuxField* Uy = Us[1]; const AuxField* Uz = (nZ > 1) ? Us[2] : 0; const AuxField* Nx = Uf[0]; const AuxField* Ny = Uf[1]; const vector<Boundary*>& BC = P -> _bsys -> BCs (0); register Boundary* B; register int_t i, k, q; int_t m, offset, skip, Je; // -- Roll grad P storage area up, load new level of nonlinear terms Uf. rollv (_Pnx, nTime); rollv (_Pny, nTime); for (i = 0; i < nEdge; i++) { B = BC[i]; offset = B -> dOff (); skip = B -> dSkip(); for (k = 0; k < nZ; k++) { ROOTONLY if (k == 1) continue; Veclib::copy (nP, Nx -> _plane[k] + offset, skip, _Pnx[0][i][k], 1); Veclib::copy (nP, Ny -> _plane[k] + offset, skip, _Pny[0][i][k], 1); // -- For cylindrical coordinates, N_ are radius-premultiplied. Cancel. if (Geometry::cylindrical()) { B -> divY (_Pnx[0][i][k]); B -> divY (_Pny[0][i][k]); } } } // -- Add in -nu * curl curl u. vector<real_t> work (5 * sqr(nP) + 7 * nP + Integration::OrderMax + 1); real_t *UxRe, *UxIm, *UyRe, *UyIm, *UzRe, *UzIm, *tmp; real_t* wrk = &work[0]; real_t* xr = wrk + 5*sqr(nP) + 3*nP; real_t* xi = xr + nP; real_t* yr = xi + nP; real_t* yi = yr + nP; real_t* alpha = yi + nP; for (i = 0; i < nEdge; i++) { B = BC[i]; offset = B -> dOff (); skip = B -> dSkip(); ROOTONLY { // -- Deal with 2D/zero Fourier mode terms. UxRe = Ux -> _plane[0]; UyRe = Uy -> _plane[0]; B -> curlCurl (0,UxRe,0,UyRe,0,0,0,xr,0,yr,0,wrk); Blas::axpy (nP, -nu, xr, 1, _Pnx[0][i][0], 1); Blas::axpy (nP, -nu, yr, 1, _Pny[0][i][0], 1); } for (m = mLo; m < nMode; m++) { // -- Higher modes. UxRe = Ux -> _plane[2 * m] ; UxIm = Ux -> _plane[2 * m + 1]; UyRe = Uy -> _plane[2 * m]; UyIm = Uy -> _plane[2 * m + 1]; UzRe = Uz -> _plane[2 * m]; UzIm = Uz -> _plane[2 * m + 1]; B -> curlCurl (m+base,UxRe,UxIm,UyRe,UyIm,UzRe,UzIm,xr,xi,yr,yi,wrk); Blas::axpy (nP, -nu, xr, 1, _Pnx[0][i][2 * m], 1); Blas::axpy (nP, -nu, xi, 1, _Pnx[0][i][2 * m + 1], 1); Blas::axpy (nP, -nu, yr, 1, _Pny[0][i][2 * m], 1); Blas::axpy (nP, -nu, yi, 1, _Pny[0][i][2 * m + 1], 1); } } if (timedep) { // -- Estimate -du / dt by backwards differentiation and add in. if (step > 1) { Je = min (step - 1, nTime); tmp = xr; Integration::StifflyStable (Je, alpha); for (i = 0; i < nEdge; i++) { B = BC[i]; offset = B -> dOff (); skip = B -> dSkip(); for (k = 0; k < nZ; k++) { ROOTONLY if (k == 1) continue; Veclib::copy (nP, Ux -> _plane[k] + offset, skip, tmp, 1); Blas::scal (nP, alpha[0], tmp, 1); for (q = 0; q < Je; q++) Blas::axpy (nP, alpha[q + 1], _Unx[q][i][k], 1, tmp, 1); Blas::axpy (nP, -invDt, tmp, 1, _Pnx[0][i][k], 1); Veclib::copy (nP, Uy -> _plane[k] + offset, skip, tmp, 1); Blas::scal (nP, alpha[0], tmp, 1); for (q = 0; q < Je; q++) Blas::axpy (nP, alpha[q + 1], _Uny[q][i][k], 1, tmp, 1); Blas::axpy (nP, -invDt, tmp, 1, _Pny[0][i][k], 1); } } } // -- Roll velocity storage area up, load new level. rollv (_Unx, nTime); rollv (_Uny, nTime); for (i = 0; i < nEdge; i++) { B = BC[i]; offset = B -> dOff (); skip = B -> dSkip(); for (k = 0; k < nZ; k++) { ROOTONLY if (k == 1) continue; Veclib::copy (nP, Ux -> _plane[k] + offset, skip, _Unx[0][i][k], 1); Veclib::copy (nP, Uy -> _plane[k] + offset, skip, _Uny[0][i][k], 1); } } }
void AutomaticThread::start(const LockHolder&) { RELEASE_ASSERT(m_isRunning); RefPtr<AutomaticThread> preserveThisForThread = this; m_hasUnderlyingThread = true; ThreadIdentifier thread = createThread( "WTF::AutomaticThread", [=] () { if (verbose) dataLog(RawPointer(this), ": Running automatic thread!\n"); RefPtr<AutomaticThread> thread = preserveThisForThread; thread->threadDidStart(); if (!ASSERT_DISABLED) { LockHolder locker(*m_lock); ASSERT(m_condition->contains(locker, this)); } auto stopImpl = [&] (const LockHolder& locker) { thread->threadIsStopping(locker); thread->m_hasUnderlyingThread = false; }; auto stopPermanently = [&] (const LockHolder& locker) { m_isRunning = false; m_isRunningCondition.notifyAll(); stopImpl(locker); }; auto stopForTimeout = [&] (const LockHolder& locker) { stopImpl(locker); }; for (;;) { { LockHolder locker(*m_lock); for (;;) { PollResult result = poll(locker); if (result == PollResult::Work) break; if (result == PollResult::Stop) return stopPermanently(locker); RELEASE_ASSERT(result == PollResult::Wait); // Shut the thread down after one second. m_isWaiting = true; bool awokenByNotify = m_waitCondition.waitFor(*m_lock, 1_s); if (verbose && !awokenByNotify && !m_isWaiting) dataLog(RawPointer(this), ": waitFor timed out, but notified via m_isWaiting flag!\n"); if (m_isWaiting) { m_isWaiting = false; if (verbose) dataLog(RawPointer(this), ": Going to sleep!\n"); // It's important that we don't release the lock until we have completely // indicated that the thread is kaput. Otherwise we'll have a a notify // race that manifests as a deadlock on VM shutdown. return stopForTimeout(locker); } } } WorkResult result = work(); if (result == WorkResult::Stop) { LockHolder locker(*m_lock); return stopPermanently(locker); } RELEASE_ASSERT(result == WorkResult::Continue); } }); detachThread(thread); }
void o3d3xx::FrameGrabber::Run() { boost::asio::io_service::work work(this->io_service_); // // setup the camera for image acquistion // std::string cam_ip; int cam_port; try { cam_ip = this->cam_->GetIP(); cam_port = std::stoi(this->cam_->GetParameter("PcicTcpPort")); } catch (const o3d3xx::error_t& ex) { LOG(ERROR) << "Could not get IP/Port of the camera: " << ex.what(); return; } LOG(INFO) << "Camera connection info: ip=" << cam_ip << ", port=" << cam_port; try { this->cam_->RequestSession(); this->cam_->SetOperatingMode(o3d3xx::Camera::operating_mode::RUN); this->cam_->CancelSession(); } catch (const o3d3xx::error_t& ex) { LOG(ERROR) << "Failed to setup camera for image acquisition: " << ex.what(); return; } // // init the asio structures // boost::asio::ip::tcp::socket sock(this->io_service_); boost::asio::ip::tcp::endpoint endpoint( boost::asio::ip::address::from_string(cam_ip), cam_port); // // Forward declare our read handlers (because they need to call // eachother). // o3d3xx::FrameGrabber::WriteHandler result_schema_write_handler; o3d3xx::FrameGrabber::ReadHandler ticket_handler; o3d3xx::FrameGrabber::ReadHandler image_handler; // // image data callback // std::size_t bytes_read = 0; std::size_t buff_sz = 0; // bytes image_handler = [&, this] (const boost::system::error_code& ec, std::size_t bytes_transferred) { if (ec) { throw o3d3xx::error_t(ec.value()); } bytes_read += bytes_transferred; //DLOG(INFO) << "Read " << bytes_read << " image bytes of " // << buff_sz; if (bytes_read == buff_sz) { DLOG(INFO) << "Got full image!"; bytes_read = 0; // 1. verify the data if (o3d3xx::verify_image_buffer(this->back_buffer_)) { DLOG(INFO) << "Image OK"; // 2. move the data to the front buffer in O(1) time complexity this->front_buffer_mutex_.lock(); this->back_buffer_.swap(this->front_buffer_); this->front_buffer_mutex_.unlock(); // 3. notify waiting clients this->front_buffer_cv_.notify_all(); } else { LOG(WARNING) << "Bad image!"; } // read another ticket sock.async_read_some( boost::asio::buffer(this->ticket_buffer_.data(), o3d3xx::IMG_TICKET_SZ), ticket_handler); return; } sock.async_read_some( boost::asio::buffer(&this->back_buffer_[bytes_read], buff_sz - bytes_read), image_handler); }; // // ticket callback // std::size_t ticket_bytes_read = 0; std::size_t ticket_buff_sz = o3d3xx::IMG_TICKET_SZ; this->ticket_buffer_.resize(ticket_buff_sz); ticket_handler = [&, this] (const boost::system::error_code& ec, std::size_t bytes_transferred) { if (ec) { throw o3d3xx::error_t(ec.value()); } ticket_bytes_read += bytes_transferred; DLOG(INFO) << "Read " << ticket_bytes_read << " ticket bytes of " << ticket_buff_sz; if (ticket_bytes_read == ticket_buff_sz) { DLOG(INFO) << "Got full ticket!"; ticket_bytes_read = 0; if (o3d3xx::verify_ticket_buffer(this->ticket_buffer_)) { DLOG(INFO) << "Ticket OK"; buff_sz = o3d3xx::get_image_buffer_size(this->ticket_buffer_); DLOG(INFO) << "Image buffer size: " << buff_sz; this->back_buffer_.resize(buff_sz); sock.async_read_some( boost::asio::buffer(this->back_buffer_.data(), buff_sz), image_handler); return; } LOG(WARNING) << "Bad ticket!"; } sock.async_read_some( boost::asio::buffer(&this->ticket_buffer_[ticket_bytes_read], ticket_buff_sz - ticket_bytes_read), ticket_handler); }; // // Check that our request to set the result schema was successful // result_schema_write_handler = [&, this] (const boost::system::error_code& ec, std::size_t bytes_transferred) { if (ec) { throw o3d3xx::error_t(ec.value()); } DLOG(INFO) << "Wrote: " << bytes_transferred << " bytes to camera"; std::size_t c_buff_sz = 16 + 7; std::uint8_t resp_buff[c_buff_sz]; std::size_t resp_bytes_read = boost::asio::read(sock, boost::asio::buffer(resp_buff, c_buff_sz)); if (resp_bytes_read < c_buff_sz) { LOG(ERROR) << "Error getting c_command response!"; throw o3d3xx::error_t(O3D3XX_IO_ERROR); } if (resp_buff[20] != '*') { LOG(ERROR) << "Got back bad response from camera: '" << resp_buff[20] << "'"; throw o3d3xx::error_t(O3D3XX_PCIC_BAD_REPLY); } sock.async_read_some( boost::asio::buffer( this->ticket_buffer_.data(), ticket_buff_sz), ticket_handler); }; // // connect to the sensor and start streaming in image data // try { sock.async_connect(endpoint, [&, this] (const boost::system::error_code& ec) { if (ec) { throw o3d3xx::error_t(ec.value()); } boost::asio::async_write( sock, boost::asio::buffer(this->schema_buffer_.data(), this->schema_buffer_.size()), result_schema_write_handler); }); this->io_service_.run(); } catch (const std::exception& ex) { // // In here we should discern why the exception with thrown. // // Special case the "Stop()" request from the control thread // LOG(WARNING) << "Exception: " << ex.what(); } LOG(INFO) << "Framegrabber thread done."; }
int main(int argc, char *argv[]) { const char *url; int i, threads; pthread_t *t; int *args; lList *answer_list = NULL; lListElem *spooling_context; DENTER_MAIN(TOP_LAYER, "test_berkeleydb_mt"); /* parse commandline parameters */ if (argc < 3) { ERROR((SGE_EVENT, "usage: test_berkeleydb_mt <url> <threads> [<delay>]\n")); ERROR((SGE_EVENT, " <url> = path or host:database\n")); ERROR((SGE_EVENT, " <threads> = number of threads\n")); ERROR((SGE_EVENT, " <delay> = delay after writing [ms]\n")); SGE_EXIT(NULL, 1); } url = argv[1]; threads = atoi(argv[2]); if (argc > 3) { delay = atoi(argv[3]); } /* allocate memory for pthreads and arguments */ t = (pthread_t *)malloc(threads * sizeof(pthread_t)); args = (int *)malloc(threads * sizeof(int)); DPRINTF(("writing to database %s from %d threads\n", url, threads)); /* initialize spooling */ spooling_context = spool_create_dynamic_context(&answer_list, NULL, url, NULL); answer_list_output(&answer_list); if (spooling_context == NULL) { SGE_EXIT(NULL, EXIT_FAILURE); } spool_set_default_context(spooling_context); if (!spool_startup_context(&answer_list, spooling_context, true)) { answer_list_output(&answer_list); SGE_EXIT(NULL, EXIT_FAILURE); } answer_list_output(&answer_list); /* let n threads to parallel spooling */ for (i = 0; i < threads; i++) { args[i] = i + 1; pthread_create(&(t[i]), NULL, work, (void*)(&args[i])); } /* also work in current thread */ work((void *)0); /* wait for termination of all threads */ for (i = 0; i < threads; i++) { pthread_join(t[i], NULL); } /* shutdown spooling */ spool_shutdown_context(&answer_list, spooling_context); answer_list_output(&answer_list); sge_free(&t); DEXIT; return EXIT_SUCCESS; }
int sumNumbers(TreeNode *root) { int ret = 0; //travel all the path work(root, ret, 0); return ret; }
magma_int_t magma_ztrevc3( magma_side_t side, magma_vec_t howmany, magma_int_t *select, // logical in Fortran magma_int_t n, magmaDoubleComplex *T, magma_int_t ldt, magmaDoubleComplex *VL, magma_int_t ldvl, magmaDoubleComplex *VR, magma_int_t ldvr, magma_int_t mm, magma_int_t *mout, magmaDoubleComplex *work, magma_int_t lwork, double *rwork, magma_int_t *info ) { #define T(i,j) ( T + (i) + (j)*ldt ) #define VL(i,j) (VL + (i) + (j)*ldvl) #define VR(i,j) (VR + (i) + (j)*ldvr) #define work(i,j) (work + (i) + (j)*n) // .. Parameters .. const magmaDoubleComplex c_zero = MAGMA_Z_ZERO; const magmaDoubleComplex c_one = MAGMA_Z_ONE; const magma_int_t nbmin = 16, nbmax = 128; const magma_int_t ione = 1; // .. Local Scalars .. magma_int_t allv, bothv, leftv, over, rightv, somev; magma_int_t i, ii, is, j, k, ki, iv, n2, nb, nb2, version; double ovfl, remax, scale, smin, smlnum, ulp, unfl; // Decode and test the input parameters bothv = (side == MagmaBothSides); rightv = (side == MagmaRight) || bothv; leftv = (side == MagmaLeft ) || bothv; allv = (howmany == MagmaAllVec); over = (howmany == MagmaBacktransVec); somev = (howmany == MagmaSomeVec); // Set mout to the number of columns required to store the selected // eigenvectors. if ( somev ) { *mout = 0; for( j=0; j < n; ++j ) { if ( select[j] ) { *mout += 1; } } } else { *mout = n; } *info = 0; if ( ! rightv && ! leftv ) *info = -1; else if ( ! allv && ! over && ! somev ) *info = -2; else if ( n < 0 ) *info = -4; else if ( ldt < max( 1, n ) ) *info = -6; else if ( ldvl < 1 || ( leftv && ldvl < n ) ) *info = -8; else if ( ldvr < 1 || ( rightv && ldvr < n ) ) *info = -10; else if ( mm < *mout ) *info = -11; else if ( lwork < max( 1, 2*n ) ) *info = -14; if ( *info != 0 ) { magma_xerbla( __func__, -(*info) ); return *info; } // Quick return if possible. if ( n == 0 ) { return *info; } // Use blocked version (2) if sufficient workspace. // Requires 1 vector to save diagonal elements, and 2*nb vectors for x and Q*x. // (Compared to dtrevc3, rwork stores 1-norms.) // Zero-out the workspace to avoid potential NaN propagation. nb = 2; if ( lwork >= n + 2*n*nbmin ) { version = 2; nb = (lwork - n) / (2*n); nb = min( nb, nbmax ); nb2 = 1 + 2*nb; lapackf77_zlaset( "F", &n, &nb2, &c_zero, &c_zero, work, &n ); } else { version = 1; } // Set the constants to control overflow. unfl = lapackf77_dlamch( "Safe minimum" ); ovfl = 1. / unfl; lapackf77_dlabad( &unfl, &ovfl ); ulp = lapackf77_dlamch( "Precision" ); smlnum = unfl*( n / ulp ); // Store the diagonal elements of T in working array work. for( i=0; i < n; ++i ) { *work(i,0) = *T(i,i); } // Compute 1-norm of each column of strictly upper triangular // part of T to control overflow in triangular solver. rwork[0] = 0.; for( j=1; j < n; ++j ) { rwork[j] = cblas_dzasum( j, T(0,j), ione ); } magma_timer_t time_total=0, time_trsv=0, time_gemm=0, time_gemv=0, time_trsv_sum=0, time_gemm_sum=0, time_gemv_sum=0; timer_start( time_total ); if ( rightv ) { // ============================================================ // Compute right eigenvectors. // iv is index of column in current block. // Non-blocked version always uses iv=1; // blocked version starts with iv=nb, goes down to 1. // (Note the "0-th" column is used to store the original diagonal.) iv = 1; if ( version == 2 ) { iv = nb; } timer_start( time_trsv ); is = *mout - 1; for( ki=n-1; ki >= 0; --ki ) { if ( somev ) { if ( ! select[ki] ) { continue; } } smin = max( ulp*( MAGMA_Z_ABS1( *T(ki,ki) ) ), smlnum ); // -------------------------------------------------------- // Complex right eigenvector *work(ki,iv) = c_one; // Form right-hand side. for( k=0; k < ki; ++k ) { *work(k,iv) = -(*T(k,ki)); } // Solve upper triangular system: // [ T(1:ki-1,1:ki-1) - T(ki,ki) ]*X = scale*work. for( k=0; k < ki; ++k ) { *T(k,k) -= *T(ki,ki); if ( MAGMA_Z_ABS1( *T(k,k) ) < smin ) { *T(k,k) = MAGMA_Z_MAKE( smin, 0. ); } } if ( ki > 0 ) { lapackf77_zlatrs( "Upper", "No transpose", "Non-unit", "Y", &ki, T, &ldt, work(0,iv), &scale, rwork, info ); *work(ki,iv) = MAGMA_Z_MAKE( scale, 0. ); } // Copy the vector x or Q*x to VR and normalize. if ( ! over ) { // ------------------------------ // no back-transform: copy x to VR and normalize n2 = ki+1; blasf77_zcopy( &n2, work(0,iv), &ione, VR(0,is), &ione ); ii = blasf77_izamax( &n2, VR(0,is), &ione ) - 1; remax = 1. / MAGMA_Z_ABS1( *VR(ii,is) ); blasf77_zdscal( &n2, &remax, VR(0,is), &ione ); for( k=ki+1; k < n; ++k ) { *VR(k,is) = c_zero; } } else if ( version == 1 ) { // ------------------------------ // version 1: back-transform each vector with GEMV, Q*x. time_trsv_sum += timer_stop( time_trsv ); timer_start( time_gemv ); if ( ki > 0 ) { blasf77_zgemv( "n", &n, &ki, &c_one, VR, &ldvr, work(0, iv), &ione, work(ki,iv), VR(0,ki), &ione ); } time_gemv_sum += timer_stop( time_gemv ); ii = blasf77_izamax( &n, VR(0,ki), &ione ) - 1; remax = 1. / MAGMA_Z_ABS1( *VR(ii,ki) ); blasf77_zdscal( &n, &remax, VR(0,ki), &ione ); timer_start( time_trsv ); } else if ( version == 2 ) { // ------------------------------ // version 2: back-transform block of vectors with GEMM // zero out below vector for( k=ki+1; k < n; ++k ) { *work(k,iv) = c_zero; } // Columns iv:nb of work are valid vectors. // When the number of vectors stored reaches nb, // or if this was last vector, do the GEMM if ( (iv == 1) || (ki == 0) ) { time_trsv_sum += timer_stop( time_trsv ); timer_start( time_gemm ); nb2 = nb-iv+1; n2 = ki+nb-iv+1; blasf77_zgemm( "n", "n", &n, &nb2, &n2, &c_one, VR, &ldvr, work(0,iv ), &n, &c_zero, work(0,nb+iv), &n ); time_gemm_sum += timer_stop( time_gemm ); // normalize vectors // TODO if somev, should copy vectors individually to correct location. for( k = iv; k <= nb; ++k ) { ii = blasf77_izamax( &n, work(0,nb+k), &ione ) - 1; remax = 1. / MAGMA_Z_ABS1( *work(ii,nb+k) ); blasf77_zdscal( &n, &remax, work(0,nb+k), &ione ); } lapackf77_zlacpy( "F", &n, &nb2, work(0,nb+iv), &n, VR(0,ki), &ldvr ); iv = nb; timer_start( time_trsv ); } else { iv -= 1; } } // blocked back-transform // Restore the original diagonal elements of T. for( k=0; k <= ki - 1; ++k ) { *T(k,k) = *work(k,0); } is -= 1; } } timer_stop( time_trsv ); timer_stop( time_total ); timer_printf( "trevc trsv %.4f, gemm %.4f, gemv %.4f, total %.4f\n", time_trsv_sum, time_gemm_sum, time_gemv_sum, time_total ); if ( leftv ) { // ============================================================ // Compute left eigenvectors. // iv is index of column in current block. // Non-blocked version always uses iv=1; // blocked version starts with iv=1, goes up to nb. // (Note the "0-th" column is used to store the original diagonal.) iv = 1; is = 0; for( ki=0; ki < n; ++ki ) { if ( somev ) { if ( ! select[ki] ) { continue; } } smin = max( ulp*MAGMA_Z_ABS1( *T(ki,ki) ), smlnum ); // -------------------------------------------------------- // Complex left eigenvector *work(ki,iv) = c_one; // Form right-hand side. for( k = ki + 1; k < n; ++k ) { *work(k,iv) = -MAGMA_Z_CNJG( *T(ki,k) ); } // Solve conjugate-transposed triangular system: // [ T(ki+1:n,ki+1:n) - T(ki,ki) ]**H * X = scale*work. for( k = ki + 1; k < n; ++k ) { *T(k,k) -= *T(ki,ki); if ( MAGMA_Z_ABS1( *T(k,k) ) < smin ) { *T(k,k) = MAGMA_Z_MAKE( smin, 0. ); } } if ( ki < n-1 ) { n2 = n-ki-1; lapackf77_zlatrs( "Upper", "Conjugate transpose", "Non-unit", "Y", &n2, T(ki+1,ki+1), &ldt, work(ki+1,iv), &scale, rwork, info ); *work(ki,iv) = MAGMA_Z_MAKE( scale, 0. ); } // Copy the vector x or Q*x to VL and normalize. if ( ! over ) { // ------------------------------ // no back-transform: copy x to VL and normalize n2 = n-ki; blasf77_zcopy( &n2, work(ki,iv), &ione, VL(ki,is), &ione ); ii = blasf77_izamax( &n2, VL(ki,is), &ione ) + ki - 1; remax = 1. / MAGMA_Z_ABS1( *VL(ii,is) ); blasf77_zdscal( &n2, &remax, VL(ki,is), &ione ); for( k=0; k < ki; ++k ) { *VL(k,is) = c_zero; } } else if ( version == 1 ) { // ------------------------------ // version 1: back-transform each vector with GEMV, Q*x. if ( ki < n-1 ) { n2 = n-ki-1; blasf77_zgemv( "n", &n, &n2, &c_one, VL(0,ki+1), &ldvl, work(ki+1,iv), &ione, work(ki, iv), VL(0,ki), &ione ); } ii = blasf77_izamax( &n, VL(0,ki), &ione ) - 1; remax = 1. / MAGMA_Z_ABS1( *VL(ii,ki) ); blasf77_zdscal( &n, &remax, VL(0,ki), &ione ); } else if ( version == 2 ) { // ------------------------------ // version 2: back-transform block of vectors with GEMM // zero out above vector // could go from (ki+1)-NV+1 to ki for( k=0; k < ki; ++k ) { *work(k,iv) = c_zero; } // Columns 1:iv of work are valid vectors. // When the number of vectors stored reaches nb, // or if this was last vector, do the GEMM if ( (iv == nb) || (ki == n-1) ) { n2 = n-(ki+1)+iv; blasf77_zgemm( "n", "n", &n, &iv, &n2, &c_one, VL(0,ki-iv+1), &ldvl, work(ki-iv+1,1 ), &n, &c_zero, work(0, nb+1), &n ); // normalize vectors for( k=1; k <= iv; ++k ) { ii = blasf77_izamax( &n, work(0,nb+k), &ione ) - 1; remax = 1. / MAGMA_Z_ABS1( *work(ii,nb+k) ); blasf77_zdscal( &n, &remax, work(0,nb+k), &ione ); } lapackf77_zlacpy( "F", &n, &iv, work(0,nb+1), &n, VL(0,ki-iv+1), &ldvl ); iv = 1; } else { iv += 1; } } // blocked back-transform // Restore the original diagonal elements of T. for( k = ki + 1; k < n; ++k ) { *T(k,k) = *work(k,0); } is += 1; } } return *info; } // End of ZTREVC
void ISVDMultiCD::makePass() { Epetra_LAPACK lapack; Epetra_BLAS blas; bool firstPass = (curRank_ == 0); const int numCols = A_->NumVectors(); TEUCHOS_TEST_FOR_EXCEPTION( !firstPass && (numProc_ != numCols), std::logic_error, "RBGen::ISVDMultiCD::makePass(): after first pass, numProc should be numCols"); // compute W = I - Z T Z^T from current V_ Teuchos::RCP<Epetra_MultiVector> lclAZT, lclZ; double *Z_A, *AZT_A; int Z_LDA, AZT_LDA; int oldRank = 0; double Rerr = 0.0; if (!firstPass) { // copy V_ into workZ_ lclAZT = Teuchos::rcp( new Epetra_MultiVector(::View,*workAZT_,0,curRank_) ); lclZ = Teuchos::rcp( new Epetra_MultiVector(::View,*workZ_,0,curRank_) ); { Epetra_MultiVector lclV(::View,*V_,0,curRank_); *lclZ = lclV; } // compute the Householder QR factorization of the current right basis // Vhat = W*R int info, lwork = curRank_; std::vector<double> tau(curRank_), work(lwork); info = lclZ->ExtractView(&Z_A,&Z_LDA); TEUCHOS_TEST_FOR_EXCEPTION(info != 0, std::logic_error, "RBGen::ISVDMultiCD::makePass(): error calling ExtractView on Epetra_MultiVector Z."); lapack.GEQRF(numCols,curRank_,Z_A,Z_LDA,&tau[0],&work[0],lwork,&info); TEUCHOS_TEST_FOR_EXCEPTION(info != 0, std::logic_error, "RBGen::ISVDMultiCD::makePass(): error calling GEQRF on current right basis while constructing next pass coefficients."); if (debug_) { // we just took the QR factorization of a set of orthonormal vectors // they should have an R factor which is diagonal, with unit elements (\pm 1) // check it Rerr = 0.0; for (int j=0; j<curRank_; j++) { for (int i=0; i<j; i++) { Rerr += abs(Z_A[j*Z_LDA+i]); } Rerr += abs(abs(Z_A[j*Z_LDA+j]) - 1.0); } } // compute the block representation // W = I - Z T Z^T lapack.LARFT('F','C',numCols,curRank_,Z_A,Z_LDA,&tau[0],workT_->A(),workT_->LDA()); // LARFT left upper tri block of Z unchanged // note: it should currently contain R factor of V_, which is very close to // diag(\pm 1, ..., \pm 1) // // we need to set it to: // [1 0 0 ... 0] // [ 1 0 ... 0] // [ .... ] // [ 1] // // see documentation for LARFT // for (int j=0; j<curRank_; j++) { Z_A[j*Z_LDA+j] = 1.0; for (int i=0; i<j; i++) { Z_A[j*Z_LDA+i] = 0.0; } } // compute part of A W: A Z T // put this in workAZT_ // first, A Z info = lclAZT->Multiply('N','N',1.0,*A_,*lclZ,0.0); TEUCHOS_TEST_FOR_EXCEPTION(info != 0,std::logic_error, "RBGen::ISVDMultiCD::makePass(): Error calling Epetra_MultiVector::Multiply() for A*Z"); // second, (A Z) T (in situ, as T is upper triangular) info = lclAZT->ExtractView(&AZT_A,&AZT_LDA); TEUCHOS_TEST_FOR_EXCEPTION(info != 0, std::logic_error, "RBGen::ISVDMultiCD::makePass(): error calling ExtractView on Epetra_MultiVector AZ."); blas.TRMM('R','U','N','N',numCols,curRank_,1.0,workT_->A(),workT_->LDA(),AZT_A,AZT_LDA); // save oldRank: it tells us the width of Z oldRank = curRank_; curRank_ = 0; numProc_ = 0; } else { // firstPass == true curRank_ = 0; numProc_ = 0; } while (numProc_ < numCols) { // // determine lup // // want lup >= lmin // lup <= lmax // need lup <= numCols - numProc // lup <= maxBasisSize - curRank // int lup; if (curRank_ == 0) { // first step uses startRank_ // this is not affected by lmin,lmax lup = startRank_; } else { // this value minimizes overall complexity, assuming fixed rank lup = (int)(curRank_ / Teuchos::ScalarTraits<double>::squareroot(2.0)); // contrain to [lmin,lmax] lup = (lup < lmin_ ? lmin_ : lup); lup = (lup > lmax_ ? lmax_ : lup); } // // now cap lup via maxBasisSize and the available data // these caps apply to all lup, as a result of memory and data constraints // // available data lup = (lup > numCols - numProc_ ? numCols - numProc_ : lup); // available memory lup = (lup > maxBasisSize_ - curRank_ ? maxBasisSize_ - curRank_ : lup); // get view of new vectors { const Epetra_MultiVector Aplus(::View,*A_,numProc_,lup); Epetra_MultiVector Unew(::View,*U_,curRank_,lup); // put them in U if (firstPass) { // new vectors are just Aplus Unew = Aplus; } else { // new vectors are Aplus - (A Z T) Z_i^T // specifically, Aplus - (A Z T) Z(numProc:numProc+lup-1,1:oldRank)^T Epetra_LocalMap lclmap(lup,0,A_->Comm()); Epetra_MultiVector Zi(::View,lclmap,&Z_A[numProc_],Z_LDA,oldRank); Unew = Aplus; int info = Unew.Multiply('N','T',-1.0,*lclAZT,Zi,1.0); TEUCHOS_TEST_FOR_EXCEPTION(info != 0,std::logic_error, "RBGen::ISVDMultiCD::makePass(): Error calling Epetra_MultiVector::Multiply() for A*Wi"); } } // perform the incremental step incStep(lup); } // compute W V = V - Z T Z^T V // Z^T V is oldRank x curRank // T Z^T V is oldRank x curRank // we need T Z^T V in a local Epetra_MultiVector if (!firstPass) { Teuchos::RCP<Epetra_MultiVector> lclV; double *TZTV_A; int TZTV_LDA; int info; Epetra_LocalMap lclmap(oldRank,0,A_->Comm()); // get pointer to current V lclV = Teuchos::rcp( new Epetra_MultiVector(::View,*V_,0,curRank_) ); // create space for T Z^T V Epetra_MultiVector TZTV(lclmap,curRank_,false); // multiply Z^T V info = TZTV.Multiply('T','N',1.0,*lclZ,*lclV,0.0); TEUCHOS_TEST_FOR_EXCEPTION(info != 0,std::logic_error, "RBGen::ISVDMultiCD::makePass(): Error calling Epetra_MultiVector::Multiply() for Z^T V."); // get pointer to data in Z^T V info = TZTV.ExtractView(&TZTV_A,&TZTV_LDA); TEUCHOS_TEST_FOR_EXCEPTION(info != 0, std::logic_error, "RBGen::ISVDMultiCD::makePass(): error calling ExtractView on Epetra_MultiVector TZTV."); // multiply T (Z^T V) blas.TRMM('L','U','N','N',oldRank,curRank_,1.0,workT_->A(),workT_->LDA(),TZTV_A,TZTV_LDA); // multiply V - Z (T Z^T V) info = lclV->Multiply('N','N',-1.0,*lclZ,TZTV,1.0); TEUCHOS_TEST_FOR_EXCEPTION(info != 0,std::logic_error, "RBGen::ISVDMultiCD::makePass(): Error calling Epetra_MultiVector::Multiply() for W V."); } // // compute the new residuals // we know that A V = U S // if, in addition, A^T U = V S, then have singular subspaces // check residuals A^T U - V S, scaling the i-th column by sigma[i] // { // make these static, because makePass() will be likely be called again static Epetra_LocalMap lclmap(A_->NumVectors(),0,A_->Comm()); static Epetra_MultiVector ATU(lclmap,maxBasisSize_,false); // we know that A V = U S // if, in addition, A^T U = V S, then have singular subspaces // check residuals A^T U - V S, scaling the i-th column by sigma[i] Epetra_MultiVector ATUlcl(::View,ATU,0,curRank_); Epetra_MultiVector Ulcl(::View,*U_,0,curRank_); Epetra_MultiVector Vlcl(::View,*V_,0,curRank_); // compute A^T U int info = ATUlcl.Multiply('T','N',1.0,*A_,Ulcl,0.0); TEUCHOS_TEST_FOR_EXCEPTION(info != 0, std::logic_error, "RBGen::ISVDMultiCD::makePass(): Error calling Epetra_MultiVector::Multiply for A^T U."); Epetra_LocalMap rankmap(curRank_,0,A_->Comm()); Epetra_MultiVector S(rankmap,curRank_,true); for (int i=0; i<curRank_; i++) { S[i][i] = sigma_[i]; } // subtract V S from A^T U info = ATUlcl.Multiply('N','N',-1.0,Vlcl,S,1.0); TEUCHOS_TEST_FOR_EXCEPTION(info != 0, std::logic_error, "RBGen::ISVDMultiCD::computeBasis(): Error calling Epetra_MultiVector::Multiply for V S."); resNorms_.resize(curRank_); ATUlcl.Norm2(&resNorms_[0]); // scale by sigmas for (int i=0; i<curRank_; i++) { if (sigma_[i] != 0.0) { resNorms_[i] /= sigma_[i]; } } } // debugging checks std::vector<double> errnorms(curRank_); if (debug_) { int info; // Check that A V = U Sigma // get pointers to current U and V, create workspace for A V - U Sigma Epetra_MultiVector work(U_->Map(),curRank_,false), curU(::View,*U_,0,curRank_), curV(::View,*V_,0,curRank_); // create local MV for sigmas Epetra_LocalMap lclmap(curRank_,0,A_->Comm()); Epetra_MultiVector curS(lclmap,curRank_,true); for (int i=0; i<curRank_; i++) { curS[i][i] = sigma_[i]; } info = work.Multiply('N','N',1.0,curU,curS,0.0); TEUCHOS_TEST_FOR_EXCEPTION(info != 0,std::logic_error, "RBGen::ISVDMultiCD::makePass(): Error calling Epetra_MultiVector::Multiply() for debugging U S."); info = work.Multiply('N','N',-1.0,*A_,curV,1.0); TEUCHOS_TEST_FOR_EXCEPTION(info != 0,std::logic_error, "RBGen::ISVDMultiCD::makePass(): Error calling Epetra_MultiVector::Multiply() for debugging U S - A V."); work.Norm2(&errnorms[0]); for (int i=0; i<curRank_; i++) { if (sigma_[i] != 0.0) { errnorms[i] /= sigma_[i]; } } } // update pass counter curNumPasses_++; // print out some info const Epetra_Comm *comm = &A_->Comm(); if (comm->MyPID() == 0 && verbLevel_ >= 1) { std::cout << "------------- ISVDMultiCD::makePass() -----------" << std::endl << "| Number of passes: " << curNumPasses_ << std::endl << "| Current rank: " << curRank_ << std::endl << "| Current sigmas: " << std::endl; for (int i=0; i<curRank_; i++) { std::cout << "| " << sigma_[i] << std::endl; } if (debug_) { std::cout << "|DBG US-AV norms: " << std::endl; for (int i=0; i<curRank_; i++) { std::cout << "|DBG " << errnorms[i] << std::endl; } if (!firstPass) { std::cout << "|DBG R-I norm: " << Rerr << std::endl; } } } return; }
// a new work was created for the current executing node inline void new_work_self(db::node *node, db::simple_tuple *stpl, const process::work_modifier mod = process::mods::NOTHING) { process::work work(node, stpl, process::mods::LOCAL_TUPLE | mod); new_work(node, work); }
// DGELSS computes minimum norm solution to a real linear // least squares problem: Minimize 2-norm(| b - A*x |). // using the singular value decomposition (SVD) of A. // A is an M-by-N matrix which may be rank-deficient. //--------------------------------------------------------- void umSOLVE_LS(const DMat& mat, const DMat& B, DMat& X) //--------------------------------------------------------- { if (!mat.ok()) {umWARNING("umSOLVE_LS()", "system is empty"); return;} DMat A(mat); // work with copy of input. int rows=A.num_rows(), cols=A.num_cols(), mmn=A.min_mn(); int LDB=A.max_mn(), NRHS=B.num_cols(); if (rows!=B.num_rows()) {umERROR("umSOLVE_LS(A,B)", "Inconsistant matrix sizes.");} DVec s(mmn); // allocate array for singular values // X must be big enough to store various results. // Resize X so that its leading dimension = max(M,N), // then load the set of right hand sides. X.resize(LDB,NRHS, true, 0.0); for (int j=1; j<=NRHS; ++j) // loop across colums for (int i=1; i<=rows; ++i) // loop down rows X(i,j) = B(i,j); // RCOND is used to determine the effective rank of A. // Singular values S(i) <= RCOND*S(1) are treated as zero. // If RCOND < 0, machine precision is used instead. //double rcond = 1.0 / 1.0e16; double rcond = -1.0; // NBN: ACML does not use the work vector. int mnLo=A.min_mn(), mnHi=A.max_mn(), rank=1, info=1; int lwork = 10*mnLo + std::max(2*mnLo, std::max(mnHi, NRHS)); DVec work(lwork); // Solve the system GELSS (rows, cols, NRHS, A.data(), rows, X.data(), LDB, s.data(), rcond, rank, work.data(), lwork, info); //--------------------------------------------- // Report: //--------------------------------------------- if (info == 0) { umLOG(1, "umSOLVE_LS reports successful LS-solution." "\nRCOND = %0.6e, " "\nOptimal length of work array was %d\n", rcond, lwork); } else { if (info < 0) { X = 0.0; umERROR("umSOLVE_LS(DMat&, DMat&)", "Error in input argument (%d)\nNo solution or error bounds computed.", -info); } else if (info > 0) { X = 0.0; umERROR("umSOLVE_LS(DMat&, DMat&)", "\nThe algorithm for computing the SVD failed to converge.\n" "\n%d off-diagonal elements of an intermediate " "\nbidiagonal form did not converge to zero.\n " "\nRCOND = %0.6e, " "\nOptimal length of work array was %d.\n", info, rcond, lwork); } } }
int main() { freopen("input.in","r",stdin); work(); return 0; }
//************************************************************************ //Обращение матрицы, заданной LU-разложением // //Входные параметры: // A - LU-разложение матрицы (результат работы подпрограммы // LUDecomposition). // Pivots - таблица перестановок, произведенных в ходе LU-разложения. // (результат работы подпрограммы LUDecomposition). // N - размерность матрицы // //Выходные параметры: // A - матрица, обратная к исходной. Массив с нумерацией // элементов [1..N, 1..N] // //Результат: // True, если исходная матрица невырожденная. // False, если исходная матрица вырожденная. // // -- LAPACK routine (version 3.0) -- // Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., // Courant Institute, Argonne National Lab, and Rice University // February 29, 1992 //************************************************************************ bool inverselu(ap::real_2d_array& a, const ap::integer_1d_array& pivots, int n) { bool result; ap::real_1d_array work; int i; int iws; int j; int jb; int jj; int jp; int jp1; double v; result = true; // // Quick return if possible // if( n==0 ) { return result; } work.setbounds(1, n); // // Form inv(U) // if( !invtriangular(a, n, true, false) ) { result = false; return result; } // // Solve the equation inv(A)*L = inv(U) for inv(A). // for(j = n; j >= 1; j--) { // // Copy current column of L to WORK and replace with zeros. // for(i = j+1; i <= n; i++) { work(i) = a(i,j); a(i,j) = 0; } // // Compute current column of inv(A). // if( j<n ) { jp1 = j+1; for(i = 1; i <= n; i++) { v = ap::vdotproduct(a.getrow(i, jp1, n), work.getvector(jp1, n)); a(i,j) = a(i,j)-v; } } } // // Apply column interchanges. // for(j = n-1; j >= 1; j--) { jp = pivots(j); if( jp!=j ) { ap::vmove(work.getvector(1, n), a.getcolumn(j, 1, n)); ap::vmove(a.getcolumn(j, 1, n), a.getcolumn(jp, 1, n)); ap::vmove(a.getcolumn(jp, 1, n), work.getvector(1, n)); } } return result; }
void operator()(){ work(); }
void Emulator::WorkerRun() { boost::asio::io_service::work work(mIo); mIo.run(); }
SEXP dieharder(SEXP genS, SEXP testS, SEXP seedS, SEXP psamplesS, SEXP verbS, SEXP infileS, SEXP ntupleS) { int verb, testarg; unsigned int i; SEXP result = NULL, vec, pv, name, desc, nkps; char *inputfile; /* Setup argv to allow call of parsecl() to let dieharder set globals */ char *argv[] = { "dieharder" }; optind = 0; parsecl(1, argv); /* Parse 'our' parameters from R */ generator = INTEGER_VALUE(genS); testarg = INTEGER_VALUE(testS); diehard = rgb = sts = user = 0; if (testarg < 100) { diehard = testarg; } else if (testarg < 200) { rgb = testarg - 100; } else if (testarg < 300) { sts = testarg - 200; } else { user = testarg - 300; } Seed = (unsigned long int) INTEGER_VALUE(seedS); /* (user-select) Seed, not (save switch) seed */ psamples = INTEGER_VALUE(psamplesS); verb = INTEGER_VALUE(verbS); inputfile = (char*) CHARACTER_VALUE(infileS); ntuple = INTEGER_VALUE(ntupleS); rdh_testptr = NULL; rdh_dtestptr = NULL; /* to be safe, explicitly flag as NULL; cf test in output.c */ if (strcmp(inputfile, "") != 0) { strncpy(filename, inputfile, 128); fromfile = 1; /* flag this as file input */ } if (Seed == 0) { seed = random_seed(); } else { seed = (unsigned long int) Seed; } if (verb) { Rprintf("Dieharder called with gen=%d test=%d seed=%lu\n", generator, diehard, seed); quiet = 0; hist_flag = 1; } else { quiet = 1; /* override dieharder command-line default */ hist_flag = 0; } /* Now do the work that dieharder.c does */ startup(); work(); gsl_rng_free(rng); reset_bit_buffers(); /* And then bring our results back to R */ /* create vector of size four: [0] is vector (!!) ks_pv, [1] is pvalues vec, [2] name, [3] nkps */ PROTECT(result = allocVector(VECSXP, 4)); /* alloc vector and scalars, and set it */ PROTECT(pv = allocVector(REALSXP, rdh_dtestptr->nkps)); PROTECT(name = allocVector(STRSXP, 1)); PROTECT(nkps = allocVector(INTSXP, 1)); if (rdh_testptr != NULL && rdh_dtestptr != NULL) { for (i=0; i<rdh_dtestptr->nkps; i++) { /* there can be nkps p-values per test */ REAL(pv)[i] = rdh_testptr[i]->ks_pvalue; } PROTECT(vec = allocVector(REALSXP, rdh_testptr[0]->psamples)); /* alloc vector and set it */ for (i = 0; i < rdh_testptr[0]->psamples; i++) { REAL(vec)[i] = rdh_testptr[0]->pvalues[i]; } SET_STRING_ELT(name, 0, mkChar(rdh_dtestptr->name)); INTEGER(nkps)[0] = rdh_dtestptr->nkps; /* nb of Kuiper KS p-values in pv vector */ } else { PROTECT(vec = allocVector(REALSXP, 1)); REAL(pv)[0] = R_NaN; REAL(vec)[0] = R_NaN; SET_STRING_ELT(name, 0, mkChar("")); INTEGER(nkps)[0] = R_NaN; } /* insert vectors and scalars into result vector */ SET_VECTOR_ELT(result, 0, pv); SET_VECTOR_ELT(result, 1, vec); SET_VECTOR_ELT(result, 2, name); SET_VECTOR_ELT(result, 3, nkps); UNPROTECT(5); return result; }
extern "C" magma_int_t magma_zgetrf2_gpu( magma_int_t m, magma_int_t n, magmaDoubleComplex_ptr dA, size_t dA_offset, magma_int_t ldda, magma_int_t *ipiv, magma_queue_t queues[2], magma_int_t *info ) { /* -- clMAGMA (version 1.3.0) -- Univ. of Tennessee, Knoxville Univ. of California, Berkeley Univ. of Colorado, Denver @date November 2014 Purpose ======= ZGETRF computes an LU factorization of a general M-by-N matrix A using partial pivoting with row interchanges. The factorization has the form A = P * L * U where P is a permutation matrix, L is lower triangular with unit diagonal elements (lower trapezoidal if m > n), and U is upper triangular (upper trapezoidal if m < n). This is the right-looking Level 3 BLAS version of the algorithm. Arguments ========= M (input) INTEGER The number of rows of the matrix A. M >= 0. N (input) INTEGER The number of columns of the matrix A. N >= 0. A (input/output) COMPLEX_16 array on the GPU, dimension (LDDA,N). On entry, the M-by-N matrix to be factored. On exit, the factors L and U from the factorization A = P*L*U; the unit diagonal elements of L are not stored. LDDA (input) INTEGER The leading dimension of the array A. LDDA >= max(1,M). IPIV (output) INTEGER array, dimension (min(M,N)) The pivot indices; for 1 <= i <= min(M,N), row i of the matrix was interchanged with row IPIV(i). INFO (output) INTEGER = 0: successful exit < 0: if INFO = -i, the i-th argument had an illegal value or another error occured, such as memory allocation failed. > 0: if INFO = i, U(i,i) is exactly zero. The factorization has been completed, but the factor U is exactly singular, and division by zero will occur if it is used to solve a system of equations. ===================================================================== */ #define dA(i_, j_) dA, dA_offset + (i_)*nb + (j_)*nb*ldda #define dAT(i_, j_) dAT, dAT_offset + (i_)*nb*lddat + (j_)*nb #define dAP(i_, j_) dAP, (i_) + (j_)*maxm #define work(i_) (work + (i_)) magmaDoubleComplex c_one = MAGMA_Z_ONE; magmaDoubleComplex c_neg_one = MAGMA_Z_NEG_ONE; magma_int_t iinfo, nb; magma_int_t maxm, maxn, mindim; magma_int_t i, j, rows, s, lddat, ldwork; magmaDoubleComplex_ptr dAT, dAP; magmaDoubleComplex *work; size_t dAT_offset; /* Check arguments */ *info = 0; if (m < 0) *info = -1; else if (n < 0) *info = -2; else if (ldda < max(1,m)) *info = -4; if (*info != 0) { magma_xerbla( __func__, -(*info) ); return *info; } /* Quick return if possible */ if (m == 0 || n == 0) return *info; /* Function Body */ mindim = min(m, n); nb = magma_get_zgetrf_nb(m); s = mindim / nb; if (nb <= 1 || nb >= min(m,n)) { /* Use CPU code. */ if ( MAGMA_SUCCESS != magma_zmalloc_cpu( &work, m*n )) { *info = MAGMA_ERR_HOST_ALLOC; return *info; } magma_zgetmatrix( m, n, dA(0,0), ldda, work(0), m, queues[0] ); lapackf77_zgetrf( &m, &n, work, &m, ipiv, info ); magma_zsetmatrix( m, n, work(0), m, dA(0,0), ldda, queues[0] ); magma_free_cpu( work ); } else { /* Use hybrid blocked code. */ maxm = ((m + 31)/32)*32; maxn = ((n + 31)/32)*32; if ( MAGMA_SUCCESS != magma_zmalloc( &dAP, nb*maxm )) { *info = MAGMA_ERR_DEVICE_ALLOC; return *info; } // square matrices can be done in place; // rectangular requires copy to transpose if ( m == n ) { dAT = dA; dAT_offset = dA_offset; lddat = ldda; magmablas_ztranspose_inplace( m, dAT(0,0), lddat, queues[0] ); } else { lddat = maxn; // N-by-M dAT_offset = 0; if ( MAGMA_SUCCESS != magma_zmalloc( &dAT, lddat*maxm )) { magma_free( dAP ); *info = MAGMA_ERR_DEVICE_ALLOC; return *info; } magmablas_ztranspose( m, n, dA(0,0), ldda, dAT(0,0), lddat, queues[0] ); } ldwork = maxm; /* if ( MAGMA_SUCCESS != magma_zmalloc_cpu( &work, ldwork*nb ) ) { magma_free( dAP ); if ( dA != dAT ) magma_free( dAT ); *info = MAGMA_ERR_HOST_ALLOC; return *info; } */ cl_mem work_mapped = clCreateBuffer( gContext, CL_MEM_READ_WRITE | CL_MEM_ALLOC_HOST_PTR, ldwork*nb * sizeof(magmaDoubleComplex), NULL, NULL ); work = (magmaDoubleComplex*) clEnqueueMapBuffer( queues[0], work_mapped, CL_TRUE, CL_MAP_READ | CL_MAP_WRITE, 0, ldwork*nb * sizeof(magmaDoubleComplex), 0, NULL, NULL, NULL ); for( j=0; j < s; j++ ) { // download j-th panel magmablas_ztranspose( nb, m-j*nb, dAT(j,j), lddat, dAP(0,0), maxm, queues[0] ); clFlush( queues[0] ); magma_queue_sync( queues[0] ); magma_zgetmatrix_async( m-j*nb, nb, dAP(0,0), maxm, work(0), ldwork, queues[1], NULL ); clFlush( queues[1] ); if ( j > 0 ) { magma_ztrsm( MagmaRight, MagmaUpper, MagmaNoTrans, MagmaUnit, n - (j+1)*nb, nb, c_one, dAT(j-1,j-1), lddat, dAT(j-1,j+1), lddat, queues[0] ); magma_zgemm( MagmaNoTrans, MagmaNoTrans, n-(j+1)*nb, m-j*nb, nb, c_neg_one, dAT(j-1,j+1), lddat, dAT(j, j-1), lddat, c_one, dAT(j, j+1), lddat, queues[0] ); } magma_queue_sync( queues[1] ); // do the cpu part rows = m - j*nb; lapackf77_zgetrf( &rows, &nb, work, &ldwork, ipiv+j*nb, &iinfo ); if ( *info == 0 && iinfo > 0 ) *info = iinfo + j*nb; for( i=j*nb; i < j*nb + nb; ++i ) { ipiv[i] += j*nb; } magmablas_zlaswp( n, dAT(0,0), lddat, j*nb + 1, j*nb + nb, ipiv, 1, queues[0] ); clFlush( queues[0] ); // upload j-th panel magma_zsetmatrix_async( m-j*nb, nb, work(0), ldwork, dAP(0,0), maxm, queues[1], NULL ); magma_queue_sync( queues[1] ); magmablas_ztranspose( m-j*nb, nb, dAP(0,0), maxm, dAT(j,j), lddat, queues[0] ); clFlush( queues[0] ); // do the small non-parallel computations (next panel update) if ( s > (j+1) ) { magma_ztrsm( MagmaRight, MagmaUpper, MagmaNoTrans, MagmaUnit, nb, nb, c_one, dAT(j, j ), lddat, dAT(j, j+1), lddat, queues[0] ); magma_zgemm( MagmaNoTrans, MagmaNoTrans, nb, m-(j+1)*nb, nb, c_neg_one, dAT(j, j+1), lddat, dAT(j+1, j ), lddat, c_one, dAT(j+1, j+1), lddat, queues[0] ); } else { magma_ztrsm( MagmaRight, MagmaUpper, MagmaNoTrans, MagmaUnit, n-s*nb, nb, c_one, dAT(j, j ), lddat, dAT(j, j+1), lddat, queues[0] ); magma_zgemm( MagmaNoTrans, MagmaNoTrans, n-(j+1)*nb, m-(j+1)*nb, nb, c_neg_one, dAT(j, j+1), lddat, dAT(j+1, j ), lddat, c_one, dAT(j+1, j+1), lddat, queues[0] ); } } magma_int_t nb0 = min( m - s*nb, n - s*nb ); if ( nb0 > 0 ) { rows = m - s*nb; magmablas_ztranspose( nb0, rows, dAT(s,s), lddat, dAP(0,0), maxm, queues[0] ); clFlush( queues[0] ); magma_queue_sync( queues[0] ); magma_zgetmatrix_async( rows, nb0, dAP(0,0), maxm, work(0), ldwork, queues[1], NULL ); magma_queue_sync( queues[1] ); // do the cpu part lapackf77_zgetrf( &rows, &nb0, work, &ldwork, ipiv+s*nb, &iinfo ); if ( (*info == 0) && (iinfo > 0) ) *info = iinfo + s*nb; for( i=s*nb; i < s*nb + nb0; ++i ) { ipiv[i] += s*nb; } magmablas_zlaswp( n, dAT(0,0), lddat, s*nb + 1, s*nb + nb0, ipiv, 1, queues[0] ); clFlush( queues[0] ); // upload j-th panel magma_zsetmatrix_async( rows, nb0, work(0), ldwork, dAP(0,0), maxm, queues[1], NULL ); magma_queue_sync( queues[1] ); magmablas_ztranspose( rows, nb0, dAP(0,0), maxm, dAT(s,s), lddat, queues[0] ); clFlush( queues[0] ); magma_ztrsm( MagmaRight, MagmaUpper, MagmaNoTrans, MagmaUnit, n-s*nb-nb0, nb0, c_one, dAT(s,s), lddat, dAT(s,s)+nb0, lddat, queues[0] ); } // undo transpose if ( dA == dAT ) { magmablas_ztranspose_inplace( m, dAT(0,0), lddat, queues[0] ); } else { magmablas_ztranspose( n, m, dAT(0,0), lddat, dA(0,0), ldda, queues[0] ); magma_free( dAT ); } magma_queue_sync( queues[0] ); magma_queue_sync( queues[1] ); magma_free( dAP ); // magma_free_cpu( work ); clEnqueueUnmapMemObject( queues[0], work_mapped, work, 0, NULL, NULL ); clReleaseMemObject( work_mapped ); } return *info; } /* magma_zgetrf_gpu */