void S<T>::test()
{
  #pragma omp parallel num_threads(n)	// { dg-error "must be integral" }
    work();
}
/*
 * Receive callback for the /camera/depth_registered/points subscription
 */
std::vector<suturo_perception_msgs::PerceivedObject> SuturoPerceptionKnowledgeROSNode::receive_image_and_cloud(const sensor_msgs::ImageConstPtr& inputImage, const sensor_msgs::PointCloud2ConstPtr& inputCloud)
{
  // process only one cloud
  pcl::PointCloud<pcl::PointXYZRGB>::Ptr cloud_in (new pcl::PointCloud<pcl::PointXYZRGB>());
  pcl::fromROSMsg(*inputCloud,*cloud_in);
  logger.logInfo((boost::format("Received a new point cloud: size = %s") % cloud_in->points.size()).str());

  // Gazebo sends us unorganized pointclouds!
  // Reorganize them to be able to compute the ROI of the objects
  // This workaround is only tested for gazebo 1.9!
  if(!cloud_in->isOrganized ())
  {
    logger.logInfo((boost::format("Received an unorganized PointCloud: %d x %d .Convert it to a organized one ...") % cloud_in->width % cloud_in->height ).str());

    pcl::PointCloud<pcl::PointXYZRGB>::Ptr org_cloud (new pcl::PointCloud<pcl::PointXYZRGB>());
    org_cloud->width = 640;
    org_cloud->height = 480;
    org_cloud->is_dense = false;
    org_cloud->points.resize(640 * 480);

    for (int i = 0; i < cloud_in->points.size(); i++) {
        pcl::PointXYZRGB result;
        result.x = 0;
        result.y = 0;
        result.z = 0;
        org_cloud->points[i]=cloud_in->points[i];
    }

    cloud_in = org_cloud;
  }
  
  cv_bridge::CvImagePtr cv_ptr;
  cv_ptr = cv_bridge::toCvCopy(inputImage, enc::BGR8);

  // Make a deep copy of the passed cv::Mat and set a new
  // boost pointer to it.
  boost::shared_ptr<cv::Mat> img(new cv::Mat(cv_ptr->image.clone()));
  sp.setOriginalRGBImage(img);
  
  logger.logInfo("processing...");
  sp.setOriginalCloud(cloud_in);
  sp.processCloudWithProjections(cloud_in);
  logger.logInfo("Cloud processed. Lock buffer and return the results");      

  mutex.lock();
  perceivedObjects = sp.getPerceivedObjects();

  if(sp.getOriginalRGBImage()->cols != sp.getOriginalCloud()->width
      && sp.getOriginalRGBImage()->rows != sp.getOriginalCloud()->height)
  {
    // Adjust the ROI if the image is at 1280x1024 and the pointcloud is at 640x480
    if(sp.getOriginalRGBImage()->cols == 1280 && sp.getOriginalRGBImage()->rows == 1024)
    {
      for (int i = 0; i < perceivedObjects.size(); i++) {
          ROI roi = perceivedObjects.at(i).get_c_roi();
          roi.origin.x*=2;
          roi.origin.y*=2;
          roi.width*=2;
          roi.height*=2;
          perceivedObjects.at(i).set_c_roi(roi);
      }
    }
    else
    {
      logger.logError("UNSUPPORTED MIXTURE OF IMAGE AND POINTCLOUD DIMENSIONS");
    }
  }
    
  // Execution pipeline
  // Each capability provides an enrichment for the
  // returned PerceivedObject
  
  // initialize threadpool
  boost::asio::io_service ioService;
  boost::thread_group threadpool;
  std::auto_ptr<boost::asio::io_service::work> work(new boost::asio::io_service::work(ioService));
  
  // Add worker threads to threadpool
  for(int i = 0; i < numThreads; ++i)
  {
    threadpool.create_thread(
      boost::bind(&boost::asio::io_service::run, &ioService)
      );
  }

  for (int i = 0; i < perceivedObjects.size(); i++) 
  {
    // Initialize Capabilities
    ColorAnalysis ca(perceivedObjects[i]);
    ca.setLowerSThreshold(color_analysis_lower_s);
    ca.setUpperSThreshold(color_analysis_upper_s);
    ca.setLowerVThreshold(color_analysis_lower_v);
    ca.setUpperVThreshold(color_analysis_upper_v);
    suturo_perception_shape_detection::RandomSampleConsensus sd(perceivedObjects[i]);
    //suturo_perception_vfh_estimation::VFHEstimation vfhe(perceivedObjects[i]);
    // suturo_perception_3d_capabilities::CuboidMatcherAnnotator cma(perceivedObjects[i]);
    // Init the cuboid matcher with the table coefficients
    suturo_perception_3d_capabilities::CuboidMatcherAnnotator cma(perceivedObjects[i], sp.getTableCoefficients() );

    // post work to threadpool
    ioService.post(boost::bind(&ColorAnalysis::execute, ca));
    ioService.post(boost::bind(&suturo_perception_shape_detection::RandomSampleConsensus::execute, sd));
    //ioService.post(boost::bind(&suturo_perception_vfh_estimation::VFHEstimation::execute, vfhe));
    ioService.post(boost::bind(&suturo_perception_3d_capabilities::CuboidMatcherAnnotator::execute, cma));

    // Is 2d recognition enabled?
    if(!recognitionDir.empty())
    {
      // perceivedObjects[i].c_recognition_label_2d="";
      suturo_perception_2d_capabilities::LabelAnnotator2D la(perceivedObjects[i], sp.getOriginalRGBImage(), object_matcher_);
      la.execute();
    }
    else
    {
      // Set an empty label
      perceivedObjects[i].set_c_recognition_label_2d("");
    }
  }
  //boost::this_thread::sleep(boost::posix_time::microseconds(1000));
  // wait for thread completion.
  // destroy the work object to wait for all queued tasks to finish
  work.reset();
  ioService.run();
  threadpool.join_all();

  std::vector<suturo_perception_msgs::PerceivedObject> perceivedObjs = *convertPerceivedObjects(&perceivedObjects); // TODO handle images in this method

  mutex.unlock();

  return perceivedObjs;
}
Exemple #3
0
int main()
{
	init();
	work();
	return 0;
}
Exemple #4
0
void HibernateBoot(char *image_filename)
{
	long long size, imageSize, codeSize, allocSize;
	long mem_base;
	IOHibernateImageHeader _header;
	IOHibernateImageHeader * header = &_header;
	long buffer;
	
	size = ReadFileAtOffset (image_filename, header, 0, sizeof(IOHibernateImageHeader));
	printf("header read size %x\n", size);
		
	imageSize = header->image1Size;
	codeSize  = header->restore1PageCount << 12;
	if (kIOHibernateHeaderSignature != header->signature)
	{
		printf ("Incorrect image signature\n");
		return;
	}
	if (header->encryptStart)
	{
		printf ("Resuming from Encrypted image is unsupported.\n"
				"Uncheck \"Use secure virtual memory\" in \"Security\" pane on system preferences.\n"
				"Press any key to proceed with normal boot.\n");
		getc ();
		return;
	}
// depends on NVRAM
#if 0
	{
		uint32_t machineSignature;
		size = GetProp(gChosenPH, kIOHibernateMachineSignatureKey, 
					   (char *)&machineSignature, sizeof(machineSignature));
		if (size != sizeof(machineSignature)) machineSignature = 0;
		if (machineSignature != header->machineSignature)
			break;
	}
#endif
		
	allocSize = imageSize + ((4095 + sizeof(hibernate_graphics_t)) & ~4095);

	mem_base = getmemorylimit() - allocSize;//TODO: lower this
		
	printf("mem_base %x\n", mem_base);
			
	if (!((long long)mem_base+allocSize<1024*bootInfo->extmem+0x100000))
	{
		printf ("Not enough space to restore image. Press any key to proceed with normal boot.\n");
		getc ();
		return;
	}

	bcopy(header, (void *) mem_base, sizeof(IOHibernateImageHeader));
	header = (IOHibernateImageHeader *) mem_base;
		
	imageSize -= sizeof(IOHibernateImageHeader);
	buffer = (long)(header + 1);
	
	if (header->previewSize)
	{
		uint64_t preview_offset = header->fileExtentMapSize - sizeof(header->fileExtentMap) + codeSize;
		uint8_t progressSaveUnder[kIOHibernateProgressCount][kIOHibernateProgressSaveUnderSize];
			
		ReadFileAtOffset (image_filename, (char *)buffer, sizeof(IOHibernateImageHeader), preview_offset+header->previewSize);
		drawPreview ((void *)(long)(buffer+preview_offset + header->previewPageListSize), &(progressSaveUnder[0][0]));
		previewTotalSectors = (imageSize-(preview_offset+header->previewSize))/512;
		previewLoadedSectors = 0;
		previewSaveunder = &(progressSaveUnder[0][0]);
		if (preview_offset+header->previewSize<imageSize)
			ReadFileAtOffset (image_filename, (char *)(long)(buffer+preview_offset+header->previewSize), 
							  sizeof(IOHibernateImageHeader)+preview_offset+header->previewSize,
							  imageSize-(preview_offset+header->previewSize));
		previewTotalSectors = 0;
		previewLoadedSectors = 0;
		previewSaveunder = 0;		
#if 0
		AsereBLN:
		check_vga_nvidia() didn't work as expected (recursion level > 0 & return value).
		Unforutnaltely I cannot find a note why to switch back to text mode for nVidia cards only
		and because it check_vga_nvidia does not work (cards normally are behind a bridge) I will
		remove it completely
		setVideoMode( VGA_TEXT_MODE, 0 );
#endif
	}
Exemple #5
0
 // a new aggregate is to be inserted into the work queue
    inline void new_work_agg(db::node *node, db::simple_tuple *stpl)
    {
       process::work work(node, stpl, process::mods::LOCAL_TUPLE | process::mods::FORCE_AGGREGATE);
       new_agg(work);
    }
    void EigenValuesAdvection::v_DoSolve()
    {
        int nvariables = 1;
        int i,dofs = GetNcoeffs();
		//bool UseContCoeffs = false;
		
		Array<OneD, Array<OneD, NekDouble> > inarray(nvariables);
		Array<OneD, Array<OneD, NekDouble> > tmp(nvariables);
		Array<OneD, Array<OneD, NekDouble> > outarray(nvariables);
		Array<OneD, Array<OneD, NekDouble> > WeakAdv(nvariables);
		
		int npoints = GetNpoints();
		int ncoeffs = GetNcoeffs();
		
		switch (m_projectionType)
		{
                case MultiRegions::eDiscontinuous:
                    {
                        dofs = ncoeffs;
                        break;
                    }
                case MultiRegions::eGalerkin:
                case MultiRegions::eMixed_CG_Discontinuous:
                    {
                        //dofs = GetContNcoeffs();
                        //UseContCoeffs = true;
                        break;
                    }
		}
		
		cout << endl;
		cout << "Num Phys Points = " << npoints << endl; // phisical points
		cout << "Num Coeffs      = " << ncoeffs << endl; //
		cout << "Num Cont Coeffs = " << dofs << endl;
		
		inarray[0]  = Array<OneD, NekDouble>(npoints,0.0);
		outarray[0] = Array<OneD, NekDouble>(npoints,0.0);
		tmp[0] = Array<OneD, NekDouble>(npoints,0.0);
		
		WeakAdv[0]  = Array<OneD, NekDouble>(ncoeffs,0.0);
		Array<OneD, NekDouble> MATRIX(npoints*npoints,0.0);
		
		for (int j = 0; j < npoints; j++)
		{
		
		inarray[0][j] = 1.0;
       
	    /// Feeding the weak Advection oprator with  a vector (inarray)
        /// Looping on inarray and changing the position of the only non-zero entry
		/// we simulate the multiplication by the identity matrix.
		/// The results stored in outarray is one of the columns of the weak advection oprators
		/// which are then stored in MATRIX for the futher eigenvalues calculation.

        switch (m_projectionType)
        {
        case MultiRegions::eDiscontinuous:
            {
                WeakDGAdvection(inarray, WeakAdv,true,true,1);
                
                m_fields[0]->MultiplyByElmtInvMass(WeakAdv[0],WeakAdv[0]);
		
                m_fields[0]->BwdTrans(WeakAdv[0],outarray[0]);
                
                Vmath::Neg(npoints,outarray[0],1);
                break;
            }
        case MultiRegions::eGalerkin:
        case MultiRegions::eMixed_CG_Discontinuous:
            {
                // Calculate -V\cdot Grad(u);
                for(i = 0; i < nvariables; ++i)
                {
                    //Projection
                    m_fields[i]->FwdTrans(inarray[i],WeakAdv[i]);
                    
                    m_fields[i]->BwdTrans_IterPerExp(WeakAdv[i],tmp[i]);
                    
                    //Advection operator
                    AdvectionNonConservativeForm(m_velocity,tmp[i],outarray[i]);
                    
                    Vmath::Neg(npoints,outarray[i],1);
                    
                    //m_fields[i]->MultiplyByInvMassMatrix(WeakAdv[i],WeakAdv[i]);
                    //Projection
                    m_fields[i]->FwdTrans(outarray[i],WeakAdv[i]);
                    
                    m_fields[i]->BwdTrans_IterPerExp(WeakAdv[i],outarray[i]);
                }
                break;
            }
        }
	
        /// The result is stored in outarray (is the j-th columns of the weak advection operator).
        /// We now store it in MATRIX(j)
        Vmath::Vcopy(npoints,&(outarray[0][0]),1,&(MATRIX[j]),npoints);
	
        /// Set the j-th entry of inarray back to zero
        inarray[0][j] = 0.0;
		}
                
		////////////////////////////////////////////////////////////////////////////////
		/// Calulating the eigenvalues of the weak advection operator stored in (MATRIX)
		/// using Lapack routines
		
		char jobvl = 'N';
		char jobvr = 'N';
		int info = 0, lwork = 3*npoints;
		NekDouble dum;
		
		Array<OneD, NekDouble> EIG_R(npoints);
		Array<OneD, NekDouble> EIG_I(npoints);
		
		Array<OneD, NekDouble> work(lwork);
		
		Lapack::Dgeev(jobvl,jobvr,npoints,MATRIX.get(),npoints,EIG_R.get(),EIG_I.get(),&dum,1,&dum,1,&work[0],lwork,info);
		
		////////////////////////////////////////////////////////
		//Print Matrix
		FILE *mFile;
		
		mFile = fopen ("WeakAdvMatrix.txt","w");
		for(int j = 0; j<npoints; j++)
		{
			for(int k = 0; k<npoints; k++)
			{
				fprintf(mFile,"%e ",MATRIX[j*npoints+k]);
			}
			fprintf(mFile,"\n");
		}
		fclose (mFile);
		
		////////////////////////////////////////////////////////
		//Output of the EigenValues
		FILE *pFile;
		
		pFile = fopen ("Eigenvalues.txt","w");
		for(int j = 0; j<npoints; j++)
		{
			fprintf(pFile,"%e %e\n",EIG_R[j],EIG_I[j]);
		}
		fclose (pFile);
		
		cout << "\nEigenvalues : " << endl;
		for(int j = 0; j<npoints; j++)
		{
			cout << EIG_R[j] << "\t" << EIG_I[j] << endl;
		}
		cout << endl;
    }
Exemple #7
0
static GstFlowReturn
gst_bml_transform_transform_ip_mono (GstBaseTransform * base,
    GstBuffer * outbuf)
{
  GstMapInfo info;
  GstBMLTransform *bml_transform = GST_BML_TRANSFORM (base);
  GstBMLTransformClass *klass = GST_BML_TRANSFORM_GET_CLASS (bml_transform);
  GstBML *bml = GST_BML (bml_transform);
  GstBMLClass *bml_class = GST_BML_CLASS (klass);
  BMLData *data, *seg_data;
  gpointer bm = bml->bm;
  guint todo, seg_size, samples_per_buffer;
  gboolean has_data;
  guint mode = 3;               /*WM_READWRITE */

  bml->running_time =
      gst_segment_to_stream_time (&base->segment, GST_FORMAT_TIME,
      GST_BUFFER_TIMESTAMP (outbuf));

  if (GST_BUFFER_FLAG_IS_SET (outbuf, GST_BUFFER_FLAG_DISCONT)) {
    bml->subtick_count = (!bml->reverse) ? bml->subticks_per_tick : 1;
  }

  /* TODO(ensonic): sync on subticks ? */
  if (bml->subtick_count >= bml->subticks_per_tick) {
    bml (gstbml_reset_triggers (bml, bml_class));
    bml (gstbml_sync_values (bml, bml_class, GST_BUFFER_TIMESTAMP (outbuf)));
    bml (tick (bm));
    bml->subtick_count = 1;
  } else {
    bml->subtick_count++;
  }

  /* don't process data in passthrough-mode */
  if (gst_base_transform_is_passthrough (base))
    return GST_FLOW_OK;

  if (!gst_buffer_map (outbuf, &info, GST_MAP_READ | GST_MAP_WRITE)) {
    GST_WARNING_OBJECT (base, "unable to map buffer for read & write");
    return GST_FLOW_ERROR;
  }
  data = (BMLData *) info.data;
  samples_per_buffer = info.size / sizeof (BMLData);

  /* if buffer has only silence process with different mode */
  if (GST_BUFFER_FLAG_IS_SET (outbuf, GST_BUFFER_FLAG_GAP)) {
    mode = 2;                   /* WM_WRITE */
  } else {
    // buzz generates loud output
    gfloat fc = 32768.0;
    orc_scalarmultiply_f32_ns (data, data, fc, samples_per_buffer);
  }

  GST_DEBUG_OBJECT (bml_transform, "  calling work(%d,%d)", samples_per_buffer,
      mode);
  todo = samples_per_buffer;
  seg_data = data;
  has_data = FALSE;
  while (todo) {
    // 256 is MachineInterface.h::MAX_BUFFER_LENGTH
    seg_size = (todo > 256) ? 256 : todo;
    has_data |= bml (work (bm, seg_data, (int) seg_size, mode));
    seg_data = &seg_data[seg_size];
    todo -= seg_size;
  }
  if (gstbml_fix_data ((GstElement *) bml_transform, &info, has_data)) {
    GST_BUFFER_FLAG_SET (outbuf, GST_BUFFER_FLAG_GAP);
  } else {
    GST_BUFFER_FLAG_UNSET (outbuf, GST_BUFFER_FLAG_GAP);
  }

  gst_buffer_unmap (outbuf, &info);

  return GST_FLOW_OK;
}
Exemple #8
0
int main()
{
    while (work());
    return 0;
}
Exemple #9
0
void Solve::solve(FILE *fin, FILE *fout)
{
	int cnt = 1;
	for(int i = 0; i <= 10; i ++, cnt *= 2)
		lb[cnt] = i;
	fscanf(fin, "%d%d", &n, &m);
	for(int i = 1; i <= n; i ++)
		for(int j = 1; j <= m; j ++)
			fscanf(fin, "%d", &object[i][j].a);
	for(int i = 1; i <= n; i ++)
		for(int j = 1; j <= m; j ++)
			fscanf(fin, "%d", &object[i][j].d);
	for(int i = 1; i <= n; i ++)
		for(int j = 1; j <= m; j ++)
			fscanf(fin, "%d", &object[i][j].hp);
	fscanf(fin, "%d%d%d", &llx.a, &llx.d, &llx.hp);
	fscanf(fin, "%d", &nBaby);
	for(int i = 1; i <= nBaby; i ++)
		fscanf(fin, "%d%d%d", &baby[i].a, &baby[i].d, &baby[i].hp);
	for(int i = 1; i <= n; i ++)
		for(int j = 1; j <= m; j ++)
		{
			Stuff &lyd = object[i][j];
			if(llx.a <= lyd.d)
				w[i][j][0] = INFINITY;
			else
			{
				int t1 = ceilDiv(lyd.hp, llx.a-lyd.d);
				int tmp = (t1 - 1) * MAX(0, lyd.a - llx.d);
				if(tmp >= llx.hp)
					w[i][j][0] = INFINITY;
				else
					w[i][j][0] = tmp;
			}
			for(int k = 1; k <= nBaby; k ++)
			{
				Stuff &bb = baby[k];
				if(bb.a <= lyd.d)
					w[i][j][k] = INFINITY;
				else
				{
					int t1 = ceilDiv(lyd.hp, bb.a - lyd.d);
					int tmp = (t1 - 1) * MAX(0, lyd.a - bb.d);
					if(tmp >= bb.hp)
					{
						int t2 = ceilDiv(bb.hp, lyd.a - bb.d);
						tmp = t2 * MAX(0, bb.a - lyd.d);
						//baby died
						if(llx.a <= lyd.d)
							w[i][j][k] = INFINITY;
						else
						{
							int t1 = ceilDiv(lyd.hp - tmp, llx.a-lyd.d);
							tmp = (t1 - 1) * MAX(0, lyd.a - llx.d);
							if(tmp >= llx.hp)
								w[i][j][k] = INFINITY;
							else
								w[i][j][k] = tmp;
						}
					}
					else
						w[i][j][k] = 0;
				}
			}
		}
	upperlim = (1 << nBaby) - 1;
	work(fin, fout);
}
Exemple #10
0
int main(int argc, char **argv){
	// Add some plugin searhc paths
	plugin_search_path=list_new(free);

	const char *infilename=NULL;
	const char *outfilename=NULL;
	char tmp[256];
	char *assetfilename="assets.h";

	int i;
	for (i=1;i<argc;i++){
		if (strcmp(argv[i], "--help")==0){
			help(NULL);
			return 0;
		}
		else if ((strcmp(argv[i], "--templatetagsdir")==0) || (strcmp(argv[i], "-t")==0)){
			i++;
			if (argc<=i){
				help("Missing templatedir name");
				return 3;
			}
			snprintf(tmp, sizeof(tmp), "%s/lib%%s.so", argv[i]);
			ONION_DEBUG("Added templatedir %s", tmp);
			list_add(plugin_search_path, strdup(tmp)); // dup, remember to free later.
		}
		else if ((strcmp(argv[i], "--no-orig-lines")==0) || (strcmp(argv[i], "-n")==0)){
			use_orig_line_numbers=0;
			ONION_DEBUG("Disable original line numbers");
		}
		else if ((strcmp(argv[i], "--asset-file")==0) || (strcmp(argv[i], "-a")==0)){
			i++;
			if (argc<=i){
				help("Missing assets file name");
				return 3;
			}
			assetfilename=argv[i];
			ONION_DEBUG("Assets file: %s", assetfilename);
		}
		else{
			if (infilename){
				if (outfilename){
					help("Too many arguments");
					return 1;
				}
				outfilename=argv[i];
				ONION_DEBUG("Set outfilename %s", outfilename);
			}
			else{
				infilename=argv[i];
				ONION_DEBUG("Set infilename %s", infilename);
			}
		}
	}
	
	if (!infilename || !outfilename){
		help("Missing input or output filename");
		return 2;
	}

	if (strcmp(infilename,"-")==0){
		infilename="";
	}
	else{
		char tmp2[256];
		strncpy(tmp2, argv[1], sizeof(tmp2)-1);
		snprintf(tmp, sizeof(tmp), "%s/lib%%s.so", dirname(tmp2));
		list_add(plugin_search_path, strdup(tmp));
		strncpy(tmp2, argv[1], sizeof(tmp2)-1);
		snprintf(tmp, sizeof(tmp), "%s/templatetags/lib%%s.so", dirname(tmp2));
		list_add(plugin_search_path, strdup(tmp));
	}

	// Default template dirs
	list_add_with_flags(plugin_search_path, "lib%s.so", LIST_ITEM_NO_FREE);
	list_add_with_flags(plugin_search_path, "templatetags/lib%s.so", LIST_ITEM_NO_FREE);
	char tmp2[256];
	strncpy(tmp2, argv[0], sizeof(tmp2)-1);
	snprintf(tmp, sizeof(tmp), "%s/templatetags/lib%%s.so", dirname(tmp2));
	list_add(plugin_search_path, strdup(tmp)); // dupa is ok, as im at main.
	strncpy(tmp2, argv[0], sizeof(tmp2)-1);
	snprintf(tmp, sizeof(tmp), "%s/lib%%s.so", dirname(tmp2));
	list_add(plugin_search_path, strdup(tmp)); // dupa is ok, as im at main.
	list_add_with_flags(plugin_search_path, "/usr/local/lib/otemplate/templatetags/lib%s.so", LIST_ITEM_NO_FREE);
	list_add_with_flags(plugin_search_path, "/usr/lib/otemplate/templatetags/lib%s.so", LIST_ITEM_NO_FREE);

	onion_assets_file *assetsfile=onion_assets_file_new(assetfilename);
	int error=work(infilename, outfilename, assetsfile);
	onion_assets_file_free(assetsfile);
	
	list_free(plugin_search_path);
	
	return error;
}
int main(int argc, char** argv)
{
    boost::program_options::options_description desc("options");
    desc.add_options()
        ("help", "produce help message")
        ("topic", boost::program_options::value<std::string>(), "topic")
        ("broker", boost::program_options::value<std::string>(), "broker")
        ("schema_registry", boost::program_options::value<std::string>(), "schema_registry")
        ("schema_registry_port", boost::program_options::value<int>()->default_value(8081), "schema_registry_port")
        ;

    boost::program_options::variables_map vm;
    boost::program_options::store(boost::program_options::parse_command_line(argc, argv, desc), vm);
    boost::program_options::notify(vm);

    boost::log::core::get()->set_filter(boost::log::trivial::severity >= boost::log::trivial::info);

    if (vm.count("help"))
    {
        std::cout << desc << std::endl;
        return 0;
    }

    std::string topic;
    if (vm.count("topic"))
    {
        topic = vm["topic"].as<std::string>();
    }
    else
    {
        std::cout << "--topic must be specified" << std::endl;
        return 0;
    }

    int32_t kafka_port = 9092;
    std::vector<csi::kafka::broker_address> kafka_brokers;
    if (vm.count("broker"))
    {
        std::string s = vm["broker"].as<std::string>();
        size_t last_colon = s.find_last_of(':');
        if (last_colon != std::string::npos)
            kafka_port = atoi(s.substr(last_colon + 1).c_str());
        s = s.substr(0, last_colon);

        // now find the brokers...
        size_t last_separator = s.find_last_of(',');
        while (last_separator != std::string::npos)
        {
            std::string host = s.substr(last_separator + 1);
            kafka_brokers.push_back(csi::kafka::broker_address(host, kafka_port));
            s = s.substr(0, last_separator);
            last_separator = s.find_last_of(',');
        }
        kafka_brokers.push_back(csi::kafka::broker_address(s, kafka_port));
    }
    else
    {
        std::cout << "--broker must be specified" << std::endl;
        return 0;
    }

    int32_t schema_registry_port = 8081;
    std::vector<csi::kafka::broker_address> schema_registrys;
    std::string used_schema_registry;

    if (vm.count("schema_registry_port"))
    {
        schema_registry_port = vm["schema_registry_port"].as<int>();
    }

    if (vm.count("schema_registry"))
    {
        std::string s = vm["schema_registry"].as<std::string>();
        size_t last_colon = s.find_last_of(':');
        if (last_colon != std::string::npos)
            schema_registry_port = atoi(s.substr(last_colon + 1).c_str());
        s = s.substr(0, last_colon);

        // now find the brokers...
        size_t last_separator = s.find_last_of(',');
        while (last_separator != std::string::npos)
        {
            std::string host = s.substr(last_separator + 1);
            schema_registrys.push_back(csi::kafka::broker_address(host, schema_registry_port));
            s = s.substr(0, last_separator);
            last_separator = s.find_last_of(',');
        }
        schema_registrys.push_back(csi::kafka::broker_address(s, schema_registry_port));
    }
    else
    {
        // default - assume registry is running on all kafka brokers
        for (std::vector<csi::kafka::broker_address>::const_iterator i = kafka_brokers.begin(); i != kafka_brokers.end(); ++i)
        {
            schema_registrys.push_back(csi::kafka::broker_address(i->host_name, schema_registry_port));
        }
    }

    // right now the schema registry class cannot handle severel hosts so just stick to the first one.
    used_schema_registry = schema_registrys[0].host_name + ":" + std::to_string(schema_registrys[0].port);


    std::string kafka_broker_str = "";
    for (std::vector<csi::kafka::broker_address>::const_iterator i = kafka_brokers.begin(); i != kafka_brokers.end(); ++i)
    {
        kafka_broker_str += i->host_name + ":" + std::to_string(i->port);
        if (i != kafka_brokers.end() - 1)
            kafka_broker_str += ", ";
    }

    BOOST_LOG_TRIVIAL(info) << "kafka broker(s): " << kafka_broker_str;
    BOOST_LOG_TRIVIAL(info) << "topic          : " << topic;

    std::string schema_registrys_info;
    for (std::vector<csi::kafka::broker_address>::const_iterator i = schema_registrys.begin(); i != schema_registrys.end(); ++i)
    {
        schema_registrys_info += i->host_name + ":" + std::to_string(i->port);
        if (i != schema_registrys.end() - 1)
            schema_registrys_info += ", ";
    }
    BOOST_LOG_TRIVIAL(info) << "schema_registry(s)  : " << schema_registrys_info;
    BOOST_LOG_TRIVIAL(info) << "used schema registry: " << used_schema_registry;


    int64_t total = 0;
	boost::asio::io_service fg_ios;
	std::auto_ptr<boost::asio::io_service::work> work(new boost::asio::io_service::work(fg_ios));
	boost::thread fg(boost::bind(&boost::asio::io_service::run, &fg_ios));

	csi::kafka::highlevel_producer producer(fg_ios, topic, -1, 200, 1000000);
    confluent::registry            registry(fg_ios, used_schema_registry);
	confluent::codec               avro_codec(registry);

    producer.connect(kafka_brokers);
    BOOST_LOG_TRIVIAL(info) << "connected to kafka";
    producer.connect_forever(kafka_brokers);

	boost::thread do_log([&producer]
	{
		while (true)
		{
			boost::this_thread::sleep(boost::posix_time::seconds(1));

			std::vector<csi::kafka::highlevel_producer::metrics>  metrics = producer.get_metrics();

			size_t total_queue = 0;
			uint32_t tx_msg_sec_total = 0;
			uint32_t tx_kb_sec_total = 0;
			for (std::vector<csi::kafka::highlevel_producer::metrics>::const_iterator i = metrics.begin(); i != metrics.end(); ++i)
			{
				total_queue += (*i).msg_in_queue;
				tx_msg_sec_total += (*i).tx_msg_sec;
				tx_kb_sec_total += (*i).tx_kb_sec;
			}
            BOOST_LOG_TRIVIAL(info) << "\t        \tqueue:" << total_queue << "\t" << tx_msg_sec_total << " msg/s \t" << (tx_kb_sec_total / 1024) << "MB/s";
		}
	});


	std::cerr << "registring schemas" << std::endl;
	auto key_res = avro_codec.put_schema("sample.contact_info_key", sample::contact_info_key::valid_schema());

	if (key_res.first!=0)
	{
        BOOST_LOG_TRIVIAL(error) << "registring sample.contact_info_key failed";
		return -1;
	}
	auto val_res = avro_codec.put_schema("sample.contact_info", sample::contact_info::valid_schema());
	if (val_res.first!=0)
	{
        BOOST_LOG_TRIVIAL(error) << "registring sample.contact_info failed";
		return -1;
	}
    BOOST_LOG_TRIVIAL(info) << "registring schemas done";
	
    //produce messages

	std::vector<boost::thread*> threads;
	
	for (int i = 0; i != 10; ++i)
	{
        threads.emplace_back(new boost::thread([&avro_codec, key_res, val_res, &producer, i]
		{
            send_messages(avro_codec, key_res.second, val_res.second, producer, i);
		}));
	}

	while (true)
	{
		boost::this_thread::sleep(boost::posix_time::seconds(1));
	}


    work.reset();
	fg_ios.stop();
    return EXIT_SUCCESS;
}
Exemple #12
0
void bob::math::eig_(const blitz::Array<double,2>& A,
  blitz::Array<std::complex<double>,2>& V,
  blitz::Array<std::complex<double>,1>& D)
{
  // Size variable
  const int N = A.extent(0);

  // Prepares to call LAPACK function
  // Initialises LAPACK variables
  const char jobvl = 'N'; // Do NOT compute left eigen-vectors
  const char jobvr = 'V'; // Compute right eigen-vectors
  int info = 0;
  const int lda = N;
  const int ldvr = N;
  double VL = 0; // notice we don't compute the left eigen-values
  const int ldvl = 1;

  // Initialises LAPACK arrays
  blitz::Array<double,2> A_lapack = bob::core::array::ccopy(const_cast<blitz::Array<double,2>&>(A).transpose(1,0));

  // temporary arrays to receive LAPACK's eigen-values and eigen-vectors
  blitz::Array<double,1> WR(D.shape()); //real part
  blitz::Array<double,1> WI(D.shape()); //imaginary part
  blitz::Array<double,2> VR(A.shape()); //right eigen-vectors

  // Calls the LAPACK function
  // A/ Queries the optimal size of the working arrays
  const int lwork_query = -1;
  double work_query;
  dgeev_( &jobvl, &jobvr, &N, A_lapack.data(), &lda, WR.data(), WI.data(),
      &VL, &ldvl, VR.data(), &ldvr, &work_query, &lwork_query, &info);

  // B/ Computes the eigenvalue decomposition
  const int lwork = static_cast<int>(work_query);
  boost::shared_array<double> work(new double[lwork]);
  dgeev_( &jobvl, &jobvr, &N, A_lapack.data(), &lda, WR.data(), WI.data(),
      &VL, &ldvl, VR.data(), &ldvr, work.get(), &lwork, &info);

  // Checks info variable
  if (info != 0) {
    throw std::runtime_error("the QR algorithm failed to compute all the eigenvalues, and no eigenvectors have been computed.");
  }

  // Copy results back from WR, WI => D
  blitz::real(D) = WR;
  blitz::imag(D) = WI;

  // Copy results back from VR => V, with two rules:
  // 1) If the j-th eigenvalue is real, then v(j) = VR(:,j), the j-th column of
  //    VR.
  // 2) If the j-th and (j+1)-st eigenvalues form a complex conjugate pair,
  // then v(j) = VR(:,j) + i*VR(:,j+1) and v(j+1) = VR(:,j) - i*VR(:,j+1).
  blitz::Range a = blitz::Range::all();
  int i=0;
  while (i<N) {
    if (std::imag(D(i)) == 0.) { //real eigen-value, consume 1
      blitz::real(V(a,i)) = VR(i,a);
      blitz::imag(V(a,i)) = 0.;
      ++i;
    }
    else { //complex eigen-value, consume 2
      blitz::real(V(a,i)) = VR(i,a);
      blitz::imag(V(a,i)) = VR(i+1,a);
      blitz::real(V(a,i+1)) = VR(i,a);
      blitz::imag(V(a,i+1)) = -VR(i+1,a);
      i += 2;
    }
  }
}
Exemple #13
0
void bob::math::eigSym_(const blitz::Array<double,2>& A, const blitz::Array<double,2>& B,
  blitz::Array<double,2>& V, blitz::Array<double,1>& D)
{
  // Size variable
  const int N = A.extent(0);

  // Prepares to call LAPACK function
  // Initialises LAPACK variables
  const int itype = 1;
  const char jobz = 'V'; // Get both the eigenvalues and the eigenvectors
  const char uplo = 'U';
  int info = 0;
  const int lda = N;
  const int ldb = N;

  // Initialises LAPACK arrays
  blitz::Array<double,2> A_blitz_lapack;
  // Tries to use V directly
  blitz::Array<double,2> Vt = V.transpose(1,0);
  const bool V_direct_use = bob::core::array::isCZeroBaseContiguous(Vt);
  if (V_direct_use)
  {
    A_blitz_lapack.reference(Vt);
    // Ugly fix for non-const transpose
    A_blitz_lapack = const_cast<blitz::Array<double,2>&>(A).transpose(1,0);
  }
  else
    // Ugly fix for non-const transpose
    A_blitz_lapack.reference(
      bob::core::array::ccopy(const_cast<blitz::Array<double,2>&>(A).transpose(1,0)));
  double *A_lapack = A_blitz_lapack.data();
  // Ugly fix for non-const transpose
  blitz::Array<double,2> B_blitz_lapack(
    bob::core::array::ccopy(const_cast<blitz::Array<double,2>&>(B).transpose(1,0)));
  double *B_lapack = B_blitz_lapack.data();
  blitz::Array<double,1> D_blitz_lapack;
  const bool D_direct_use = bob::core::array::isCZeroBaseContiguous(D);
  if (D_direct_use)
    D_blitz_lapack.reference(D);
  else
    D_blitz_lapack.resize(D.shape());
  double *D_lapack = D_blitz_lapack.data();

  // Calls the LAPACK function
  // A/ Queries the optimal size of the working arrays
  const int lwork_query = -1;
  double work_query;
  const int liwork_query = -1;
  int iwork_query;
  dsygvd_( &itype, &jobz, &uplo, &N, A_lapack, &lda, B_lapack, &ldb, D_lapack,
    &work_query, &lwork_query, &iwork_query, &liwork_query, &info);
  // B/ Computes the generalized eigenvalue decomposition
  const int lwork = static_cast<int>(work_query);
  boost::shared_array<double> work(new double[lwork]);
  const int liwork = static_cast<int>(iwork_query);
  boost::shared_array<int> iwork(new int[liwork]);
  dsygvd_( &itype, &jobz, &uplo, &N, A_lapack, &lda, B_lapack, &ldb, D_lapack,
    work.get(), &lwork, iwork.get(), &liwork, &info);

  // Checks info variable
  if (info != 0)
    throw std::runtime_error("The LAPACK function 'dsygvd' returned a non-zero value. This might be caused by a non-positive definite B matrix.");

  // Copy singular vectors back to V if required
  if (!V_direct_use)
    V = A_blitz_lapack.transpose(1,0);

  // Copy result back to sigma if required
  if (!D_direct_use)
    D = D_blitz_lapack;
}
Exemple #14
0
int main()
{
     read();
     work();
     return 0;
}
  Basis_HGRAD_LINE_Cn_FEM<SpT,OT,PT>::
  Basis_HGRAD_LINE_Cn_FEM( const ordinal_type order,
                           const EPointType   pointType ) {
    this->basisCardinality_  = order+1;
    this->basisDegree_       = order;
    this->basisCellTopology_ = shards::CellTopology(shards::getCellTopologyData<shards::Line<2> >() );
    this->basisType_         = BASIS_FEM_FIAT;
    this->basisCoordinates_  = COORDINATES_CARTESIAN;

    const ordinal_type card = this->basisCardinality_;
    
    // points are computed in the host and will be copied 
    Kokkos::DynRankView<typename scalarViewType::value_type,typename SpT::array_layout,Kokkos::HostSpace>
      dofCoords("Hgrad::Line::Cn::dofCoords", card, 1);


    switch (pointType) {
    case POINTTYPE_EQUISPACED:
    case POINTTYPE_WARPBLEND: {
      // lattice ordering 
      {
        const ordinal_type offset = 0;
        PointTools::getLattice( dofCoords,
                                this->basisCellTopology_, 
                                order, offset, 
                                pointType );
        
      }
      // topological order
      // { 
      //   // two vertices
      //   dofCoords(0,0) = -1.0;
      //   dofCoords(1,0) =  1.0;
        
      //   // internal points
      //   typedef Kokkos::pair<ordinal_type,ordinal_type> range_type;
      //   auto pts = Kokkos::subview(dofCoords, range_type(2, card), Kokkos::ALL());
        
      //   const auto offset = 1;
      //   PointTools::getLattice( pts,
      //                           this->basisCellTopology_, 
      //                           order, offset, 
      //                           pointType );
      // }
      break;
    }
    case POINTTYPE_GAUSS: {
      // internal points only
      PointTools::getGaussPoints( dofCoords, 
                                  order );
      break;
    }
    default: {
      INTREPID2_TEST_FOR_EXCEPTION( !isValidPointType(pointType),
                                    std::invalid_argument , 
                                    ">>> ERROR: (Intrepid2::Basis_HGRAD_LINE_Cn_FEM) invalid pointType." );
    }
    }

    this->dofCoords_ = Kokkos::create_mirror_view(typename SpT::memory_space(), dofCoords);
    Kokkos::deep_copy(this->dofCoords_, dofCoords);
    
    // form Vandermonde matrix; actually, this is the transpose of the VDM,
    // this matrix is used in LAPACK so it should be column major and left layout
    const ordinal_type lwork = card*card;
    Kokkos::DynRankView<typename scalarViewType::value_type,Kokkos::LayoutLeft,Kokkos::HostSpace>
      vmat("Hgrad::Line::Cn::vmat", card, card), 
      work("Hgrad::Line::Cn::work", lwork),
      ipiv("Hgrad::Line::Cn::ipiv", card);

    const double alpha = 0.0, beta = 0.0;
    Impl::Basis_HGRAD_LINE_Cn_FEM_JACOBI::
      getValues<Kokkos::HostSpace::execution_space,Parameters::MaxNumPtsPerBasisEval>
      (vmat, dofCoords, order, alpha, beta, OPERATOR_VALUE);

    ordinal_type info = 0;
    Teuchos::LAPACK<ordinal_type,typename scalarViewType::value_type> lapack;

    lapack.GETRF(card, card, 
                 vmat.data(), vmat.stride_1(),
                 (ordinal_type*)ipiv.data(),
                 &info);

    INTREPID2_TEST_FOR_EXCEPTION( info != 0,
                                  std::runtime_error , 
                                  ">>> ERROR: (Intrepid2::Basis_HGRAD_LINE_Cn_FEM) lapack.GETRF returns nonzero info." );

    lapack.GETRI(card, 
                 vmat.data(), vmat.stride_1(),
                 (ordinal_type*)ipiv.data(),
                 work.data(), lwork,
                 &info);

    INTREPID2_TEST_FOR_EXCEPTION( info != 0,
                                  std::runtime_error , 
                                  ">>> ERROR: (Intrepid2::Basis_HGRAD_LINE_Cn_FEM) lapack.GETRI returns nonzero info." );
    
    // create host mirror 
    Kokkos::DynRankView<typename scalarViewType::value_type,typename SpT::array_layout,Kokkos::HostSpace>
      vinv("Hgrad::Line::Cn::vinv", card, card);

    for (ordinal_type i=0;i<card;++i) 
      for (ordinal_type j=0;j<card;++j) 
        vinv(i,j) = vmat(j,i);

    this->vinv_ = Kokkos::create_mirror_view(typename SpT::memory_space(), vinv);
    Kokkos::deep_copy(this->vinv_ , vinv);

    // initialize tags
    {
      const bool is_vertex_included = (pointType != POINTTYPE_GAUSS);

      // Basis-dependent initializations
      const ordinal_type tagSize  = 4;        // size of DoF tag, i.e., number of fields in the tag
      const ordinal_type posScDim = 0;        // position in the tag, counting from 0, of the subcell dim 
      const ordinal_type posScOrd = 1;        // position in the tag, counting from 0, of the subcell ordinal
      const ordinal_type posDfOrd = 2;        // position in the tag, counting from 0, of DoF ordinal relative to the subcell
      

      ordinal_type tags[Parameters::MaxOrder+1][4];

      // now we check the points for association 
      if (is_vertex_included) {
        // lattice order
        {
          const auto v0 = 0;
          tags[v0][0] = 0; // vertex dof
          tags[v0][1] = 0; // vertex id
          tags[v0][2] = 0; // local dof id
          tags[v0][3] = 1; // total number of dofs in this vertex
          
          const ordinal_type iend = card - 2;
          for (ordinal_type i=0;i<iend;++i) {
            const auto e = i + 1;
            tags[e][0] = 1;    // edge dof
            tags[e][1] = 0;    // edge id
            tags[e][2] = i;    // local dof id
            tags[e][3] = iend; // total number of dofs in this edge
          }

          const auto v1 = card -1;
          tags[v1][0] = 0; // vertex dof
          tags[v1][1] = 1; // vertex id
          tags[v1][2] = 0; // local dof id
          tags[v1][3] = 1; // total number of dofs in this vertex
        }

        // topological order
        // {
        //   tags[0][0] = 0; // vertex dof
        //   tags[0][1] = 0; // vertex id
        //   tags[0][2] = 0; // local dof id
        //   tags[0][3] = 1; // total number of dofs in this vertex
          
        //   tags[1][0] = 0; // vertex dof
        //   tags[1][1] = 1; // vertex id
        //   tags[1][2] = 0; // local dof id
        //   tags[1][3] = 1; // total number of dofs in this vertex
          
        //   const ordinal_type iend = card - 2;
        //   for (ordinal_type i=0;i<iend;++i) {
        //     const auto ii = i + 2;
        //     tags[ii][0] = 1;    // edge dof
        //     tags[ii][1] = 0;    // edge id
        //     tags[ii][2] = i;    // local dof id
        //     tags[ii][3] = iend; // total number of dofs in this edge
        //   }
        // }
      } else {
        for (ordinal_type i=0;i<card;++i) {
          tags[i][0] = 1;    // edge dof
          tags[i][1] = 0;    // edge id
          tags[i][2] = i;    // local dof id
          tags[i][3] = card; // total number of dofs in this edge
        }
      }

      ordinal_type_array_1d_host tagView(&tags[0][0], card*4);

      // Basis-independent function sets tag and enum data in tagToOrdinal_ and ordinalToTag_ arrays:
      // tags are constructed on host
      this->setOrdinalTagData(this->tagToOrdinal_,
                              this->ordinalToTag_,
                              tagView,
                              this->basisCardinality_,
                              tagSize,
                              posScDim,
                              posScOrd,
                              posDfOrd);
    }  
  }
Exemple #16
0
void PBCmgr::maintain (const int_t      step   ,
                       const Field*     P      ,
                       const AuxField** Us     ,
                       const AuxField** Uf     ,
                       const bool       timedep)
// ---------------------------------------------------------------------------
// Update storage for evaluation of high-order pressure boundary
// condition.  Storage order for each edge represents a CCW traverse
// of element boundaries.
//
// If the velocity field varies in time on HOPB field boundaries
// (e.g. due to time-varying BCs) the local fluid acceleration will be
// estimated from input velocity fields by explicit extrapolation if
// timedep is true.  This correction cannot be carried out at the
// first timestep, since the required extrapolation cannot be done.
// If the acceleration is known, (for example, a known reference frame
// acceleration) it is probably better to leave timedep unset, and to
// use PBCmgr::accelerate() to add in the accelerative term.  Note
// also that since grad P is dotted with n, the unit outward normal,
// at a later stage, timedep only needs to be set if there are
// wall-normal accelerative terms.  NB: The default value of timedep
// is 1.
//
// Field* master gives a list of pressure boundary conditions with
// which to traverse storage areas (note this assumes equal-order
// interpolations).
//
// No smoothing is done to high-order spatial derivatives computed here.
// ---------------------------------------------------------------------------
{
    const real_t nu    = Femlib::value ("KINVIS");
    const real_t invDt = 1.0 / Femlib::value ("D_T");
    const int_t  nTime = Femlib::ivalue ("N_TIME");
    const int_t  nEdge = P -> _nbound;
    const int_t  nZ    = P -> _nz;
    const int_t  nP    =  Geometry::nP();
    const int_t  base  =  Geometry::baseMode();
    const int_t  nMode =  Geometry::nModeProc();
    const int_t  mLo   = (Geometry::procID() == 0) ? 1 : 0;

    const AuxField* Ux = Us[0];
    const AuxField* Uy = Us[1];
    const AuxField* Uz = (nZ > 1) ? Us[2] : 0;
    const AuxField* Nx = Uf[0];
    const AuxField* Ny = Uf[1];

    const vector<Boundary*>& BC = P -> _bsys -> BCs (0);
    register Boundary*       B;
    register int_t           i, k, q;
    int_t                    m, offset, skip, Je;

    // -- Roll grad P storage area up, load new level of nonlinear terms Uf.


    rollv (_Pnx, nTime);
    rollv (_Pny, nTime);

    for (i = 0; i < nEdge; i++) {
        B      = BC[i];
        offset = B -> dOff ();
        skip   = B -> dSkip();

        for (k = 0; k < nZ; k++) {
            ROOTONLY if (k == 1) continue;
            Veclib::copy (nP, Nx -> _plane[k] + offset, skip, _Pnx[0][i][k], 1);
            Veclib::copy (nP, Ny -> _plane[k] + offset, skip, _Pny[0][i][k], 1);

            // -- For cylindrical coordinates, N_ are radius-premultiplied. Cancel.

            if (Geometry::cylindrical()) {
                B -> divY (_Pnx[0][i][k]);
                B -> divY (_Pny[0][i][k]);
            }
        }
    }

    // -- Add in -nu * curl curl u.

    vector<real_t> work (5 * sqr(nP) + 7 * nP + Integration::OrderMax + 1);
    real_t         *UxRe, *UxIm, *UyRe, *UyIm, *UzRe, *UzIm, *tmp;
    real_t*        wrk   = &work[0];
    real_t*        xr    = wrk + 5*sqr(nP) + 3*nP;
    real_t*        xi    = xr  + nP;
    real_t*        yr    = xi  + nP;
    real_t*        yi    = yr  + nP;
    real_t*        alpha = yi  + nP;

    for (i = 0; i < nEdge; i++) {
        B      = BC[i];
        offset = B -> dOff ();
        skip   = B -> dSkip();

        ROOTONLY {			    // -- Deal with 2D/zero Fourier mode terms.
            UxRe = Ux -> _plane[0];
            UyRe = Uy -> _plane[0];

            B -> curlCurl (0,UxRe,0,UyRe,0,0,0,xr,0,yr,0,wrk);

            Blas::axpy (nP, -nu, xr, 1, _Pnx[0][i][0], 1);
            Blas::axpy (nP, -nu, yr, 1, _Pny[0][i][0], 1);
        }

        for (m = mLo; m < nMode; m++) { // -- Higher modes.
            UxRe = Ux -> _plane[2 * m] ;
            UxIm = Ux -> _plane[2 * m + 1];
            UyRe = Uy -> _plane[2 * m];
            UyIm = Uy -> _plane[2 * m + 1];
            UzRe = Uz -> _plane[2 * m];
            UzIm = Uz -> _plane[2 * m + 1];

            B -> curlCurl (m+base,UxRe,UxIm,UyRe,UyIm,UzRe,UzIm,xr,xi,yr,yi,wrk);

            Blas::axpy (nP, -nu, xr, 1, _Pnx[0][i][2 * m],     1);
            Blas::axpy (nP, -nu, xi, 1, _Pnx[0][i][2 * m + 1], 1);
            Blas::axpy (nP, -nu, yr, 1, _Pny[0][i][2 * m],     1);
            Blas::axpy (nP, -nu, yi, 1, _Pny[0][i][2 * m + 1], 1);
        }
    }

    if (timedep) {

        // -- Estimate -du / dt by backwards differentiation and add in.

        if (step > 1) {
            Je  = min (step - 1, nTime);
            tmp = xr;
            Integration::StifflyStable (Je, alpha);

            for (i = 0; i < nEdge; i++) {
                B      = BC[i];
                offset = B -> dOff ();
                skip   = B -> dSkip();

                for (k = 0; k < nZ; k++) {
                    ROOTONLY if (k == 1) continue;

                    Veclib::copy (nP, Ux -> _plane[k] + offset, skip, tmp, 1);
                    Blas::scal   (nP, alpha[0], tmp, 1);
                    for (q = 0; q < Je; q++)
                        Blas::axpy (nP, alpha[q + 1], _Unx[q][i][k], 1, tmp, 1);
                    Blas::axpy (nP, -invDt, tmp, 1, _Pnx[0][i][k], 1);

                    Veclib::copy (nP, Uy -> _plane[k] + offset, skip, tmp, 1);
                    Blas::scal   (nP, alpha[0], tmp, 1);
                    for (q = 0; q < Je; q++)
                        Blas::axpy (nP, alpha[q + 1], _Uny[q][i][k], 1, tmp, 1);
                    Blas::axpy (nP, -invDt, tmp, 1, _Pny[0][i][k], 1);
                }
            }
        }

        // -- Roll velocity storage area up, load new level.

        rollv (_Unx, nTime);
        rollv (_Uny, nTime);

        for (i = 0; i < nEdge; i++) {
            B      = BC[i];
            offset = B -> dOff ();
            skip   = B -> dSkip();

            for (k = 0; k < nZ; k++) {
                ROOTONLY if (k == 1) continue;
                Veclib::copy (nP, Ux -> _plane[k] + offset, skip, _Unx[0][i][k], 1);
                Veclib::copy (nP, Uy -> _plane[k] + offset, skip, _Uny[0][i][k], 1);
            }
        }
    }
Exemple #17
0
void AutomaticThread::start(const LockHolder&)
{
    RELEASE_ASSERT(m_isRunning);
    
    RefPtr<AutomaticThread> preserveThisForThread = this;
    
    m_hasUnderlyingThread = true;
    
    ThreadIdentifier thread = createThread(
        "WTF::AutomaticThread",
        [=] () {
            if (verbose)
                dataLog(RawPointer(this), ": Running automatic thread!\n");
            
            RefPtr<AutomaticThread> thread = preserveThisForThread;
            thread->threadDidStart();
            
            if (!ASSERT_DISABLED) {
                LockHolder locker(*m_lock);
                ASSERT(m_condition->contains(locker, this));
            }
            
            auto stopImpl = [&] (const LockHolder& locker) {
                thread->threadIsStopping(locker);
                thread->m_hasUnderlyingThread = false;
            };
            
            auto stopPermanently = [&] (const LockHolder& locker) {
                m_isRunning = false;
                m_isRunningCondition.notifyAll();
                stopImpl(locker);
            };
            
            auto stopForTimeout = [&] (const LockHolder& locker) {
                stopImpl(locker);
            };
            
            for (;;) {
                {
                    LockHolder locker(*m_lock);
                    for (;;) {
                        PollResult result = poll(locker);
                        if (result == PollResult::Work)
                            break;
                        if (result == PollResult::Stop)
                            return stopPermanently(locker);
                        RELEASE_ASSERT(result == PollResult::Wait);
                        // Shut the thread down after one second.
                        m_isWaiting = true;
                        bool awokenByNotify =
                            m_waitCondition.waitFor(*m_lock, 1_s);
                        if (verbose && !awokenByNotify && !m_isWaiting)
                            dataLog(RawPointer(this), ": waitFor timed out, but notified via m_isWaiting flag!\n");
                        if (m_isWaiting) {
                            m_isWaiting = false;
                            if (verbose)
                                dataLog(RawPointer(this), ": Going to sleep!\n");
                            // It's important that we don't release the lock until we have completely
                            // indicated that the thread is kaput. Otherwise we'll have a a notify
                            // race that manifests as a deadlock on VM shutdown.
                            return stopForTimeout(locker);
                        }
                    }
                }
                
                WorkResult result = work();
                if (result == WorkResult::Stop) {
                    LockHolder locker(*m_lock);
                    return stopPermanently(locker);
                }
                RELEASE_ASSERT(result == WorkResult::Continue);
            }
        });
    detachThread(thread);
}
Exemple #18
0
void
o3d3xx::FrameGrabber::Run()
{
  boost::asio::io_service::work work(this->io_service_);

  //
  // setup the camera for image acquistion
  //
  std::string cam_ip;
  int cam_port;
  try
    {
      cam_ip = this->cam_->GetIP();
      cam_port = std::stoi(this->cam_->GetParameter("PcicTcpPort"));
    }
  catch (const o3d3xx::error_t& ex)
    {
      LOG(ERROR) << "Could not get IP/Port of the camera: "
                 << ex.what();
      return;
    }

  LOG(INFO) << "Camera connection info: ip=" << cam_ip
            << ", port=" << cam_port;

  try
    {
      this->cam_->RequestSession();
      this->cam_->SetOperatingMode(o3d3xx::Camera::operating_mode::RUN);
      this->cam_->CancelSession();
    }
  catch (const o3d3xx::error_t& ex)
    {
      LOG(ERROR) << "Failed to setup camera for image acquisition: "
                 << ex.what();
      return;
    }

  //
  // init the asio structures
  //
  boost::asio::ip::tcp::socket sock(this->io_service_);
  boost::asio::ip::tcp::endpoint endpoint(
    boost::asio::ip::address::from_string(cam_ip), cam_port);

  //
  // Forward declare our read handlers (because they need to call
  // eachother).
  //
  o3d3xx::FrameGrabber::WriteHandler result_schema_write_handler;
  o3d3xx::FrameGrabber::ReadHandler ticket_handler;
  o3d3xx::FrameGrabber::ReadHandler image_handler;

  //
  // image data callback
  //
  std::size_t bytes_read = 0;
  std::size_t buff_sz = 0; // bytes

  image_handler =
    [&, this]
    (const boost::system::error_code& ec, std::size_t bytes_transferred)
    {
      if (ec) { throw o3d3xx::error_t(ec.value()); }

      bytes_read += bytes_transferred;
      //DLOG(INFO) << "Read " << bytes_read << " image bytes of "
      //           << buff_sz;

      if (bytes_read == buff_sz)
        {
          DLOG(INFO) << "Got full image!";
          bytes_read = 0;

          // 1. verify the data
          if (o3d3xx::verify_image_buffer(this->back_buffer_))
            {
              DLOG(INFO) << "Image OK";

              // 2. move the data to the front buffer in O(1) time complexity
              this->front_buffer_mutex_.lock();
              this->back_buffer_.swap(this->front_buffer_);
              this->front_buffer_mutex_.unlock();

              // 3. notify waiting clients
              this->front_buffer_cv_.notify_all();
            }
          else
            {
              LOG(WARNING) << "Bad image!";
            }

          // read another ticket
          sock.async_read_some(
               boost::asio::buffer(this->ticket_buffer_.data(),
                                   o3d3xx::IMG_TICKET_SZ),
               ticket_handler);

          return;
        }

      sock.async_read_some(
        boost::asio::buffer(&this->back_buffer_[bytes_read],
                            buff_sz - bytes_read),
        image_handler);
    };

  //
  // ticket callback
  //
  std::size_t ticket_bytes_read = 0;
  std::size_t ticket_buff_sz = o3d3xx::IMG_TICKET_SZ;
  this->ticket_buffer_.resize(ticket_buff_sz);

  ticket_handler =
    [&, this]
    (const boost::system::error_code& ec, std::size_t bytes_transferred)
    {
      if (ec) { throw o3d3xx::error_t(ec.value()); }

      ticket_bytes_read += bytes_transferred;
      DLOG(INFO) << "Read " << ticket_bytes_read
                 << " ticket bytes of " << ticket_buff_sz;

      if (ticket_bytes_read == ticket_buff_sz)
        {
          DLOG(INFO) << "Got full ticket!";
          ticket_bytes_read = 0;

          if (o3d3xx::verify_ticket_buffer(this->ticket_buffer_))
            {
              DLOG(INFO) << "Ticket OK";

              buff_sz = o3d3xx::get_image_buffer_size(this->ticket_buffer_);
              DLOG(INFO) << "Image buffer size: " << buff_sz;
              this->back_buffer_.resize(buff_sz);

              sock.async_read_some(
                   boost::asio::buffer(this->back_buffer_.data(),
                                       buff_sz),
                   image_handler);

              return;
            }

          LOG(WARNING) << "Bad ticket!";
        }

      sock.async_read_some(
           boost::asio::buffer(&this->ticket_buffer_[ticket_bytes_read],
                               ticket_buff_sz - ticket_bytes_read),
           ticket_handler);
    };

  //
  // Check that our request to set the result schema was successful
  //
  result_schema_write_handler =
    [&, this]
    (const boost::system::error_code& ec, std::size_t bytes_transferred)
    {
      if (ec) { throw o3d3xx::error_t(ec.value()); }
      DLOG(INFO) << "Wrote: " << bytes_transferred << " bytes to camera";

      std::size_t c_buff_sz = 16 + 7;
      std::uint8_t resp_buff[c_buff_sz];
      std::size_t resp_bytes_read =
        boost::asio::read(sock, boost::asio::buffer(resp_buff, c_buff_sz));

      if (resp_bytes_read < c_buff_sz)
        {
          LOG(ERROR) << "Error getting c_command response!";
          throw o3d3xx::error_t(O3D3XX_IO_ERROR);
        }

      if (resp_buff[20] != '*')
        {
          LOG(ERROR) << "Got back bad response from camera: '"
                     << resp_buff[20] << "'";
          throw o3d3xx::error_t(O3D3XX_PCIC_BAD_REPLY);
        }

      sock.async_read_some(
        boost::asio::buffer(
          this->ticket_buffer_.data(), ticket_buff_sz),
        ticket_handler);
     };


  //
  // connect to the sensor and start streaming in image data
  //
  try
    {
      sock.async_connect(endpoint,
                         [&, this]
                         (const boost::system::error_code& ec)
                         {
                           if (ec) { throw o3d3xx::error_t(ec.value()); }

                           boost::asio::async_write(
                             sock,
                             boost::asio::buffer(this->schema_buffer_.data(),
                                                 this->schema_buffer_.size()),
                             result_schema_write_handler);
                         });

      this->io_service_.run();
    }
  catch (const std::exception& ex)
    {
      //
      // In here we should discern why the exception with thrown.
      //
      // Special case the "Stop()" request from the control thread
      //

      LOG(WARNING) << "Exception: " << ex.what();
    }

  LOG(INFO) << "Framegrabber thread done.";
}
int main(int argc, char *argv[])
{
   const char *url;
   int i, threads;
   pthread_t *t;
   int *args;

   lList *answer_list = NULL;
   lListElem *spooling_context;

   DENTER_MAIN(TOP_LAYER, "test_berkeleydb_mt");

   /* parse commandline parameters */
   if (argc < 3) {
      ERROR((SGE_EVENT, "usage: test_berkeleydb_mt <url> <threads> [<delay>]\n"));
      ERROR((SGE_EVENT, "       <url>     = path or host:database\n"));
      ERROR((SGE_EVENT, "       <threads> = number of threads\n"));
      ERROR((SGE_EVENT, "       <delay>   = delay after writing [ms]\n"));
      SGE_EXIT(NULL, 1);
   }

   url = argv[1];
   threads = atoi(argv[2]);

   if (argc > 3) {
      delay = atoi(argv[3]);
   }

   /* allocate memory for pthreads and arguments */
   t = (pthread_t *)malloc(threads * sizeof(pthread_t));
   args = (int *)malloc(threads * sizeof(int));

   DPRINTF(("writing to database %s from %d threads\n", url, threads));

   /* initialize spooling */
   spooling_context = spool_create_dynamic_context(&answer_list, NULL, url, NULL);
   answer_list_output(&answer_list);
   if (spooling_context == NULL) {
      SGE_EXIT(NULL, EXIT_FAILURE);
   }

   spool_set_default_context(spooling_context);

   if (!spool_startup_context(&answer_list, spooling_context, true)) {
      answer_list_output(&answer_list);
      SGE_EXIT(NULL, EXIT_FAILURE);
   }
   answer_list_output(&answer_list);

   /* let n threads to parallel spooling */
   for (i = 0; i < threads; i++) {
      args[i] = i + 1;     
      pthread_create(&(t[i]), NULL, work, (void*)(&args[i]));
   }

   /* also work in current thread */
   work((void *)0);

   /* wait for termination of all threads */
   for (i = 0; i < threads; i++) {
      pthread_join(t[i], NULL);
   }

   /* shutdown spooling */
   spool_shutdown_context(&answer_list, spooling_context);
   answer_list_output(&answer_list);

   sge_free(&t);

   DEXIT;
   return EXIT_SUCCESS;
}
 int sumNumbers(TreeNode *root) {
     int ret = 0;
     //travel all the path
     work(root, ret, 0);
     return ret;
 }
Exemple #21
0
magma_int_t magma_ztrevc3(
    magma_side_t side, magma_vec_t howmany,
    magma_int_t *select,  // logical in Fortran
    magma_int_t n,
    magmaDoubleComplex *T,  magma_int_t ldt,
    magmaDoubleComplex *VL, magma_int_t ldvl,
    magmaDoubleComplex *VR, magma_int_t ldvr,
    magma_int_t mm, magma_int_t *mout,
    magmaDoubleComplex *work, magma_int_t lwork,
    double *rwork, magma_int_t *info )
{
    #define  T(i,j)  ( T + (i) + (j)*ldt )
    #define VL(i,j)  (VL + (i) + (j)*ldvl)
    #define VR(i,j)  (VR + (i) + (j)*ldvr)
    #define work(i,j) (work + (i) + (j)*n)

    // .. Parameters ..
    const magmaDoubleComplex c_zero = MAGMA_Z_ZERO;
    const magmaDoubleComplex c_one  = MAGMA_Z_ONE;
    const magma_int_t  nbmin = 16, nbmax = 128;
    const magma_int_t  ione = 1;
    
    // .. Local Scalars ..
    magma_int_t            allv, bothv, leftv, over, rightv, somev;
    magma_int_t            i, ii, is, j, k, ki, iv, n2, nb, nb2, version;
    double                 ovfl, remax, scale, smin, smlnum, ulp, unfl;
    
    // Decode and test the input parameters
    bothv  = (side == MagmaBothSides);
    rightv = (side == MagmaRight) || bothv;
    leftv  = (side == MagmaLeft ) || bothv;

    allv  = (howmany == MagmaAllVec);
    over  = (howmany == MagmaBacktransVec);
    somev = (howmany == MagmaSomeVec);

    // Set mout to the number of columns required to store the selected
    // eigenvectors.
    if ( somev ) {
        *mout = 0;
        for( j=0; j < n; ++j ) {
            if ( select[j] ) {
                *mout += 1;
            }
        }
    }
    else {
        *mout = n;
    }

    *info = 0;
    if ( ! rightv && ! leftv )
        *info = -1;
    else if ( ! allv && ! over && ! somev )
        *info = -2;
    else if ( n < 0 )
        *info = -4;
    else if ( ldt < max( 1, n ) )
        *info = -6;
    else if ( ldvl < 1 || ( leftv && ldvl < n ) )
        *info = -8;
    else if ( ldvr < 1 || ( rightv && ldvr < n ) )
        *info = -10;
    else if ( mm < *mout )
        *info = -11;
    else if ( lwork < max( 1, 2*n ) )
        *info = -14;
    
    if ( *info != 0 ) {
        magma_xerbla( __func__, -(*info) );
        return *info;
    }

    // Quick return if possible.
    if ( n == 0 ) {
        return *info;
    }
    
    // Use blocked version (2) if sufficient workspace.
    // Requires 1 vector to save diagonal elements, and 2*nb vectors for x and Q*x.
    // (Compared to dtrevc3, rwork stores 1-norms.)
    // Zero-out the workspace to avoid potential NaN propagation.
    nb = 2;
    if ( lwork >= n + 2*n*nbmin ) {
        version = 2;
        nb = (lwork - n) / (2*n);
        nb = min( nb, nbmax );
        nb2 = 1 + 2*nb;
        lapackf77_zlaset( "F", &n, &nb2, &c_zero, &c_zero, work, &n );
    }
    else {
        version = 1;
    }

    // Set the constants to control overflow.
    unfl = lapackf77_dlamch( "Safe minimum" );
    ovfl = 1. / unfl;
    lapackf77_dlabad( &unfl, &ovfl );
    ulp = lapackf77_dlamch( "Precision" );
    smlnum = unfl*( n / ulp );

    // Store the diagonal elements of T in working array work.
    for( i=0; i < n; ++i ) {
        *work(i,0) = *T(i,i);
    }

    // Compute 1-norm of each column of strictly upper triangular
    // part of T to control overflow in triangular solver.
    rwork[0] = 0.;
    for( j=1; j < n; ++j ) {
        rwork[j] = cblas_dzasum( j, T(0,j), ione );
    }

    magma_timer_t time_total=0, time_trsv=0, time_gemm=0, time_gemv=0, time_trsv_sum=0, time_gemm_sum=0, time_gemv_sum=0;
    timer_start( time_total );

    if ( rightv ) {
        // ============================================================
        // Compute right eigenvectors.
        // iv is index of column in current block.
        // Non-blocked version always uses iv=1;
        // blocked     version starts with iv=nb, goes down to 1.
        // (Note the "0-th" column is used to store the original diagonal.)
        iv = 1;
        if ( version == 2 ) {
            iv = nb;
        }
        
        timer_start( time_trsv );
        is = *mout - 1;
        for( ki=n-1; ki >= 0; --ki ) {
            if ( somev ) {
                if ( ! select[ki] ) {
                    continue;
                }
            }
            smin = max( ulp*( MAGMA_Z_ABS1( *T(ki,ki) ) ), smlnum );

            // --------------------------------------------------------
            // Complex right eigenvector
            *work(ki,iv) = c_one;

            // Form right-hand side.
            for( k=0; k < ki; ++k ) {
                *work(k,iv) = -(*T(k,ki));
            }

            // Solve upper triangular system:
            // [ T(1:ki-1,1:ki-1) - T(ki,ki) ]*X = scale*work.
            for( k=0; k < ki; ++k ) {
                *T(k,k) -= *T(ki,ki);
                if ( MAGMA_Z_ABS1( *T(k,k) ) < smin ) {
                    *T(k,k) = MAGMA_Z_MAKE( smin, 0. );
                }
            }

            if ( ki > 0 ) {
                lapackf77_zlatrs( "Upper", "No transpose", "Non-unit", "Y",
                                  &ki, T, &ldt,
                                  work(0,iv), &scale, rwork, info );
                *work(ki,iv) = MAGMA_Z_MAKE( scale, 0. );
            }

            // Copy the vector x or Q*x to VR and normalize.
            if ( ! over ) {
                // ------------------------------
                // no back-transform: copy x to VR and normalize
                n2 = ki+1;
                blasf77_zcopy( &n2, work(0,iv), &ione, VR(0,is), &ione );

                ii = blasf77_izamax( &n2, VR(0,is), &ione ) - 1;
                remax = 1. / MAGMA_Z_ABS1( *VR(ii,is) );
                blasf77_zdscal( &n2, &remax, VR(0,is), &ione );

                for( k=ki+1; k < n; ++k ) {
                    *VR(k,is) = c_zero;
                }
            }
            else if ( version == 1 ) {
                // ------------------------------
                // version 1: back-transform each vector with GEMV, Q*x.
                time_trsv_sum += timer_stop( time_trsv );
                timer_start( time_gemv );
                if ( ki > 0 ) {
                    blasf77_zgemv( "n", &n, &ki, &c_one,
                                   VR, &ldvr,
                                   work(0, iv), &ione,
                                   work(ki,iv), VR(0,ki), &ione );
                }
                time_gemv_sum += timer_stop( time_gemv );
                ii = blasf77_izamax( &n, VR(0,ki), &ione ) - 1;
                remax = 1. / MAGMA_Z_ABS1( *VR(ii,ki) );
                blasf77_zdscal( &n, &remax, VR(0,ki), &ione );
                timer_start( time_trsv );
            }
            else if ( version == 2 ) {
                // ------------------------------
                // version 2: back-transform block of vectors with GEMM
                // zero out below vector
                for( k=ki+1; k < n; ++k ) {
                    *work(k,iv) = c_zero;
                }

                // Columns iv:nb of work are valid vectors.
                // When the number of vectors stored reaches nb,
                // or if this was last vector, do the GEMM
                if ( (iv == 1) || (ki == 0) ) {
                    time_trsv_sum += timer_stop( time_trsv );
                    timer_start( time_gemm );
                    nb2 = nb-iv+1;
                    n2  = ki+nb-iv+1;
                    blasf77_zgemm( "n", "n", &n, &nb2, &n2, &c_one,
                                   VR, &ldvr,
                                   work(0,iv   ), &n, &c_zero,
                                   work(0,nb+iv), &n );
                    time_gemm_sum += timer_stop( time_gemm );
                    
                    // normalize vectors
                    // TODO if somev, should copy vectors individually to correct location.
                    for( k = iv; k <= nb; ++k ) {
                        ii = blasf77_izamax( &n, work(0,nb+k), &ione ) - 1;
                        remax = 1. / MAGMA_Z_ABS1( *work(ii,nb+k) );
                        blasf77_zdscal( &n, &remax, work(0,nb+k), &ione );
                    }
                    lapackf77_zlacpy( "F", &n, &nb2, work(0,nb+iv), &n, VR(0,ki), &ldvr );
                    iv = nb;
                    timer_start( time_trsv );
                }
                else {
                    iv -= 1;
                }
            } // blocked back-transform

            // Restore the original diagonal elements of T.
            for( k=0; k <= ki - 1; ++k ) {
                *T(k,k) = *work(k,0);
            }

            is -= 1;
        }
    }
    timer_stop( time_trsv );

    timer_stop( time_total );
    timer_printf( "trevc trsv %.4f, gemm %.4f, gemv %.4f, total %.4f\n",
                  time_trsv_sum, time_gemm_sum, time_gemv_sum, time_total );

    if ( leftv ) {
        // ============================================================
        // Compute left eigenvectors.
        // iv is index of column in current block.
        // Non-blocked version always uses iv=1;
        // blocked     version starts with iv=1, goes up to nb.
        // (Note the "0-th" column is used to store the original diagonal.)
        iv = 1;
        is = 0;
        for( ki=0; ki < n; ++ki ) {
            if ( somev ) {
                if ( ! select[ki] ) {
                    continue;
                }
            }
            smin = max( ulp*MAGMA_Z_ABS1( *T(ki,ki) ), smlnum );

            // --------------------------------------------------------
            // Complex left eigenvector
            *work(ki,iv) = c_one;

            // Form right-hand side.
            for( k = ki + 1; k < n; ++k ) {
                *work(k,iv) = -MAGMA_Z_CNJG( *T(ki,k) );
            }

            // Solve conjugate-transposed triangular system:
            // [ T(ki+1:n,ki+1:n) - T(ki,ki) ]**H * X = scale*work.
            for( k = ki + 1; k < n; ++k ) {
                *T(k,k) -= *T(ki,ki);
                if ( MAGMA_Z_ABS1( *T(k,k) ) < smin ) {
                    *T(k,k) = MAGMA_Z_MAKE( smin, 0. );
                }
            }

            if ( ki < n-1 ) {
                n2 = n-ki-1;
                lapackf77_zlatrs( "Upper", "Conjugate transpose", "Non-unit", "Y",
                                  &n2, T(ki+1,ki+1), &ldt,
                                  work(ki+1,iv), &scale, rwork, info );
                *work(ki,iv) = MAGMA_Z_MAKE( scale, 0. );
            }

            // Copy the vector x or Q*x to VL and normalize.
            if ( ! over ) {
                // ------------------------------
                // no back-transform: copy x to VL and normalize
                n2 = n-ki;
                blasf77_zcopy( &n2, work(ki,iv), &ione, VL(ki,is), &ione );

                ii = blasf77_izamax( &n2, VL(ki,is), &ione ) + ki - 1;
                remax = 1. / MAGMA_Z_ABS1( *VL(ii,is) );
                blasf77_zdscal( &n2, &remax, VL(ki,is), &ione );

                for( k=0; k < ki; ++k ) {
                    *VL(k,is) = c_zero;
                }
            }
            else if ( version == 1 ) {
                // ------------------------------
                // version 1: back-transform each vector with GEMV, Q*x.
                if ( ki < n-1 ) {
                    n2 = n-ki-1;
                    blasf77_zgemv( "n", &n, &n2, &c_one,
                                   VL(0,ki+1), &ldvl,
                                   work(ki+1,iv), &ione,
                                   work(ki,  iv), VL(0,ki), &ione );
                }
                ii = blasf77_izamax( &n, VL(0,ki), &ione ) - 1;
                remax = 1. / MAGMA_Z_ABS1( *VL(ii,ki) );
                blasf77_zdscal( &n, &remax, VL(0,ki), &ione );
            }
            else if ( version == 2 ) {
                // ------------------------------
                // version 2: back-transform block of vectors with GEMM
                // zero out above vector
                // could go from (ki+1)-NV+1 to ki
                for( k=0; k < ki; ++k ) {
                    *work(k,iv) = c_zero;
                }

                // Columns 1:iv of work are valid vectors.
                // When the number of vectors stored reaches nb,
                // or if this was last vector, do the GEMM
                if ( (iv == nb) || (ki == n-1) ) {
                    n2 = n-(ki+1)+iv;
                    blasf77_zgemm( "n", "n", &n, &iv, &n2, &c_one,
                                   VL(0,ki-iv+1), &ldvl,
                                   work(ki-iv+1,1   ), &n, &c_zero,
                                   work(0,      nb+1), &n );
                    // normalize vectors
                    for( k=1; k <= iv; ++k ) {
                        ii = blasf77_izamax( &n, work(0,nb+k), &ione ) - 1;
                        remax = 1. / MAGMA_Z_ABS1( *work(ii,nb+k) );
                        blasf77_zdscal( &n, &remax, work(0,nb+k), &ione );
                    }
                    lapackf77_zlacpy( "F", &n, &iv, work(0,nb+1), &n, VL(0,ki-iv+1), &ldvl );
                    iv = 1;
                }
                else {
                    iv += 1;
                }
            } // blocked back-transform

            // Restore the original diagonal elements of T.
            for( k = ki + 1; k < n; ++k ) {
                *T(k,k) = *work(k,0);
            }

            is += 1;
        }
    }
    
    return *info;
}  // End of ZTREVC
  void ISVDMultiCD::makePass() {
    Epetra_LAPACK lapack;
    Epetra_BLAS   blas;

    bool firstPass = (curRank_ == 0);
    const int numCols = A_->NumVectors();
    TEUCHOS_TEST_FOR_EXCEPTION( !firstPass && (numProc_ != numCols), std::logic_error,
        "RBGen::ISVDMultiCD::makePass(): after first pass, numProc should be numCols");

    // compute W = I - Z T Z^T from current V_
    Teuchos::RCP<Epetra_MultiVector> lclAZT, lclZ;
    double *Z_A, *AZT_A;
    int Z_LDA, AZT_LDA;
    int oldRank = 0;
    double Rerr = 0.0;
    if (!firstPass) {
      // copy V_ into workZ_
      lclAZT = Teuchos::rcp( new Epetra_MultiVector(::View,*workAZT_,0,curRank_) );
      lclZ   = Teuchos::rcp( new Epetra_MultiVector(::View,*workZ_,0,curRank_) );
      {
        Epetra_MultiVector lclV(::View,*V_,0,curRank_);
        *lclZ = lclV;
      }
      // compute the Householder QR factorization of the current right basis
      // Vhat = W*R
      int info, lwork = curRank_;
      std::vector<double> tau(curRank_), work(lwork);
      info = lclZ->ExtractView(&Z_A,&Z_LDA);
      TEUCHOS_TEST_FOR_EXCEPTION(info != 0, std::logic_error,
          "RBGen::ISVDMultiCD::makePass(): error calling ExtractView on Epetra_MultiVector Z.");
      lapack.GEQRF(numCols,curRank_,Z_A,Z_LDA,&tau[0],&work[0],lwork,&info);
      TEUCHOS_TEST_FOR_EXCEPTION(info != 0, std::logic_error,
          "RBGen::ISVDMultiCD::makePass(): error calling GEQRF on current right basis while constructing next pass coefficients.");
      if (debug_) {
        // we just took the QR factorization of a set of orthonormal vectors
        // they should have an R factor which is diagonal, with unit elements (\pm 1)
        // check it
        Rerr = 0.0;
        for (int j=0; j<curRank_; j++) {
          for (int i=0; i<j; i++) {
            Rerr += abs(Z_A[j*Z_LDA+i]);
          }
          Rerr += abs(abs(Z_A[j*Z_LDA+j]) - 1.0);
        }
      }
      // compute the block representation
      // W = I - Z T Z^T
      lapack.LARFT('F','C',numCols,curRank_,Z_A,Z_LDA,&tau[0],workT_->A(),workT_->LDA());
      // LARFT left upper tri block of Z unchanged
      // note: it should currently contain R factor of V_, which is very close to
      //   diag(\pm 1, ..., \pm 1)
      //
      // we need to set it to:
      //   [1 0 0 ... 0]
      //   [  1 0 ... 0]
      //   [   ....    ]
      //   [          1]
      //
      // see documentation for LARFT
      //
      for (int j=0; j<curRank_; j++) {
        Z_A[j*Z_LDA+j] = 1.0;
        for (int i=0; i<j; i++) {
          Z_A[j*Z_LDA+i] = 0.0;
        }
      }
      // compute part of A W:  A Z T
      // put this in workAZT_
      // first, A Z
      info = lclAZT->Multiply('N','N',1.0,*A_,*lclZ,0.0);
      TEUCHOS_TEST_FOR_EXCEPTION(info != 0,std::logic_error,
          "RBGen::ISVDMultiCD::makePass(): Error calling Epetra_MultiVector::Multiply() for A*Z");
      // second, (A Z) T (in situ, as T is upper triangular)
      info = lclAZT->ExtractView(&AZT_A,&AZT_LDA);
      TEUCHOS_TEST_FOR_EXCEPTION(info != 0, std::logic_error,
          "RBGen::ISVDMultiCD::makePass(): error calling ExtractView on Epetra_MultiVector AZ.");
      blas.TRMM('R','U','N','N',numCols,curRank_,1.0,workT_->A(),workT_->LDA(),AZT_A,AZT_LDA);
      // save oldRank: it tells us the width of Z
      oldRank  = curRank_;

      curRank_ = 0;
      numProc_ = 0;
    }
    else { // firstPass == true
      curRank_ = 0;
      numProc_ = 0;
    }

    while (numProc_ < numCols) {
      //
      // determine lup
      //
      // want lup >= lmin
      //      lup <= lmax
      // need lup <= numCols - numProc
      //      lup <= maxBasisSize - curRank
      //
      int lup;
      if (curRank_ == 0) {
        // first step uses startRank_
        // this is not affected by lmin,lmax
        lup = startRank_;
      }
      else {
        // this value minimizes overall complexity, assuming fixed rank
        lup = (int)(curRank_ / Teuchos::ScalarTraits<double>::squareroot(2.0));
        // contrain to [lmin,lmax]
        lup = (lup < lmin_ ? lmin_ : lup);
        lup = (lup > lmax_ ? lmax_ : lup);
      }
      //
      // now cap lup via maxBasisSize and the available data
      // these caps apply to all lup, as a result of memory and data constraints
      //
      // available data
      lup = (lup > numCols - numProc_ ? numCols - numProc_ : lup);
      // available memory
      lup = (lup > maxBasisSize_ - curRank_ ? maxBasisSize_ - curRank_ : lup);

      // get view of new vectors
      {
        const Epetra_MultiVector Aplus(::View,*A_,numProc_,lup);
        Epetra_MultiVector        Unew(::View,*U_,curRank_,lup);
        // put them in U
        if (firstPass) {
          // new vectors are just Aplus
          Unew = Aplus;
        }
        else {
          // new vectors are Aplus - (A Z T) Z_i^T
          // specifically, Aplus - (A Z T) Z(numProc:numProc+lup-1,1:oldRank)^T
          Epetra_LocalMap lclmap(lup,0,A_->Comm());
          Epetra_MultiVector Zi(::View,lclmap,&Z_A[numProc_],Z_LDA,oldRank);
          Unew = Aplus;
          int info = Unew.Multiply('N','T',-1.0,*lclAZT,Zi,1.0);
          TEUCHOS_TEST_FOR_EXCEPTION(info != 0,std::logic_error,
              "RBGen::ISVDMultiCD::makePass(): Error calling Epetra_MultiVector::Multiply() for A*Wi");
        }
      }

      // perform the incremental step
      incStep(lup);
    }

    // compute W V = V - Z T Z^T V
    // Z^T V is oldRank x curRank
    // T Z^T V is oldRank x curRank
    // we need T Z^T V in a local Epetra_MultiVector
    if (!firstPass) {
      Teuchos::RCP<Epetra_MultiVector> lclV;
      double *TZTV_A;
      int TZTV_LDA;
      int info;
      Epetra_LocalMap lclmap(oldRank,0,A_->Comm());
      // get pointer to current V
      lclV = Teuchos::rcp( new Epetra_MultiVector(::View,*V_,0,curRank_) );
      // create space for T Z^T V
      Epetra_MultiVector TZTV(lclmap,curRank_,false);
      // multiply Z^T V
      info = TZTV.Multiply('T','N',1.0,*lclZ,*lclV,0.0);
      TEUCHOS_TEST_FOR_EXCEPTION(info != 0,std::logic_error,
          "RBGen::ISVDMultiCD::makePass(): Error calling Epetra_MultiVector::Multiply() for Z^T V.");
      // get pointer to data in Z^T V
      info = TZTV.ExtractView(&TZTV_A,&TZTV_LDA);
      TEUCHOS_TEST_FOR_EXCEPTION(info != 0, std::logic_error,
          "RBGen::ISVDMultiCD::makePass(): error calling ExtractView on Epetra_MultiVector TZTV.");
      // multiply T (Z^T V)
      blas.TRMM('L','U','N','N',oldRank,curRank_,1.0,workT_->A(),workT_->LDA(),TZTV_A,TZTV_LDA);
      // multiply V - Z (T Z^T V)
      info = lclV->Multiply('N','N',-1.0,*lclZ,TZTV,1.0);
      TEUCHOS_TEST_FOR_EXCEPTION(info != 0,std::logic_error,
          "RBGen::ISVDMultiCD::makePass(): Error calling Epetra_MultiVector::Multiply() for W V.");
    }

    //
    // compute the new residuals
    // we know that A V = U S
    // if, in addition, A^T U = V S, then have singular subspaces
    // check residuals A^T U - V S, scaling the i-th column by sigma[i]
    //
    {
      // make these static, because makePass() will be likely be called again
      static Epetra_LocalMap lclmap(A_->NumVectors(),0,A_->Comm());
      static Epetra_MultiVector ATU(lclmap,maxBasisSize_,false);

      // we know that A V = U S
      // if, in addition, A^T U = V S, then have singular subspaces
      // check residuals A^T U - V S, scaling the i-th column by sigma[i]
      Epetra_MultiVector ATUlcl(::View,ATU,0,curRank_);
      Epetra_MultiVector Ulcl(::View,*U_,0,curRank_);
      Epetra_MultiVector Vlcl(::View,*V_,0,curRank_);
      // compute A^T U
      int info = ATUlcl.Multiply('T','N',1.0,*A_,Ulcl,0.0);
      TEUCHOS_TEST_FOR_EXCEPTION(info != 0, std::logic_error,
          "RBGen::ISVDMultiCD::makePass(): Error calling Epetra_MultiVector::Multiply for A^T U.");
      Epetra_LocalMap rankmap(curRank_,0,A_->Comm());
      Epetra_MultiVector S(rankmap,curRank_,true);
      for (int i=0; i<curRank_; i++) {
        S[i][i] = sigma_[i];
      }
      // subtract V S from A^T U
      info = ATUlcl.Multiply('N','N',-1.0,Vlcl,S,1.0);
      TEUCHOS_TEST_FOR_EXCEPTION(info != 0, std::logic_error,
          "RBGen::ISVDMultiCD::computeBasis(): Error calling Epetra_MultiVector::Multiply for V S.");
      resNorms_.resize(curRank_);
      ATUlcl.Norm2(&resNorms_[0]);
      // scale by sigmas
      for (int i=0; i<curRank_; i++) {
        if (sigma_[i] != 0.0) {
          resNorms_[i] /= sigma_[i];
        }
      }
    }

    // debugging checks
    std::vector<double> errnorms(curRank_);
    if (debug_) {
      int info;
      // Check that A V = U Sigma
      // get pointers to current U and V, create workspace for A V - U Sigma
      Epetra_MultiVector work(U_->Map(),curRank_,false), 
                         curU(::View,*U_,0,curRank_),
                         curV(::View,*V_,0,curRank_);
      // create local MV for sigmas
      Epetra_LocalMap lclmap(curRank_,0,A_->Comm());
      Epetra_MultiVector curS(lclmap,curRank_,true);
      for (int i=0; i<curRank_; i++) {
        curS[i][i] = sigma_[i];
      }
      info = work.Multiply('N','N',1.0,curU,curS,0.0);
      TEUCHOS_TEST_FOR_EXCEPTION(info != 0,std::logic_error,
          "RBGen::ISVDMultiCD::makePass(): Error calling Epetra_MultiVector::Multiply() for debugging U S.");
      info = work.Multiply('N','N',-1.0,*A_,curV,1.0);
      TEUCHOS_TEST_FOR_EXCEPTION(info != 0,std::logic_error,
          "RBGen::ISVDMultiCD::makePass(): Error calling Epetra_MultiVector::Multiply() for debugging U S - A V.");
      work.Norm2(&errnorms[0]);
      for (int i=0; i<curRank_; i++) {
        if (sigma_[i] != 0.0) {
          errnorms[i] /= sigma_[i];
        }
      }
    }

    // update pass counter
    curNumPasses_++;

    // print out some info
    const Epetra_Comm *comm = &A_->Comm();
    if (comm->MyPID() == 0 && verbLevel_ >= 1) {
      std::cout 
        << "------------- ISVDMultiCD::makePass() -----------" << std::endl
        << "| Number of passes: " << curNumPasses_ << std::endl
        << "|     Current rank: " << curRank_ << std::endl
        << "|   Current sigmas: " << std::endl;
      for (int i=0; i<curRank_; i++) {
        std::cout << "|             " << sigma_[i] << std::endl;
      }
      if (debug_) {
        std::cout << "|DBG   US-AV norms: " << std::endl;
        for (int i=0; i<curRank_; i++) {
          std::cout << "|DBG          " << errnorms[i] << std::endl;
        }
        if (!firstPass) {
          std::cout << "|DBG      R-I norm: " << Rerr << std::endl;
        }
      }
    }

    return;
  }
Exemple #23
0
 // a new work was created for the current executing node
    inline void new_work_self(db::node *node, db::simple_tuple *stpl, const process::work_modifier mod = process::mods::NOTHING)
    {
       process::work work(node, stpl, process::mods::LOCAL_TUPLE | mod);
       new_work(node, work);
    }
Exemple #24
0
// DGELSS computes minimum norm solution to a real linear 
// least squares problem:   Minimize 2-norm(| b - A*x |).   
// using the singular value decomposition (SVD) of A. 
// A is an M-by-N matrix which may be rank-deficient.   
//---------------------------------------------------------
void umSOLVE_LS(const DMat& mat, const DMat& B, DMat& X)
//---------------------------------------------------------
{
  if (!mat.ok()) {umWARNING("umSOLVE_LS()", "system is empty"); return;}

  DMat A(mat);    // work with copy of input.

  int rows=A.num_rows(), cols=A.num_cols(), mmn=A.min_mn();
  int LDB=A.max_mn(), NRHS=B.num_cols();
  if (rows!=B.num_rows()) {umERROR("umSOLVE_LS(A,B)", "Inconsistant matrix sizes.");}

  DVec s(mmn);    // allocate array for singular values

  // X must be big enough to store various results.
  // Resize X so that its leading dimension = max(M,N), 
  // then load the set of right hand sides.

  X.resize(LDB,NRHS, true, 0.0);

  for (int j=1; j<=NRHS; ++j)     // loop across colums
    for (int i=1; i<=rows; ++i)   // loop down rows
      X(i,j) = B(i,j);

  // RCOND is used to determine the effective rank of A.   
  // Singular values S(i) <= RCOND*S(1) are treated as zero.   
  // If RCOND < 0, machine precision is used instead.   

//double rcond =  1.0 / 1.0e16;
  double rcond = -1.0;

  // NBN: ACML does not use the work vector.
  int mnLo=A.min_mn(), mnHi=A.max_mn(), rank=1, info=1;
  int lwork = 10*mnLo + std::max(2*mnLo, std::max(mnHi, NRHS));
  DVec work(lwork); 

  // Solve the system
  GELSS (rows, cols, NRHS, A.data(), rows, X.data(), LDB, s.data(), rcond, rank, work.data(), lwork, info);

  //---------------------------------------------
  // Report:
  //---------------------------------------------

  if (info == 0) {
    umLOG(1, "umSOLVE_LS reports successful LS-solution."
             "\nRCOND = %0.6e, "
             "\nOptimal length of work array was %d\n", rcond, lwork);
  } 
  else 
  {
    if (info < 0) { 
      X = 0.0;
      umERROR("umSOLVE_LS(DMat&, DMat&)", 
              "Error in input argument (%d)\nNo solution or error bounds computed.", -info);

    } else if (info > 0) {
      X = 0.0;
      umERROR("umSOLVE_LS(DMat&, DMat&)", 
          "\nThe algorithm for computing the SVD failed to converge.\n"
          "\n%d off-diagonal elements of an intermediate "
          "\nbidiagonal form did not converge to zero.\n "
          "\nRCOND = %0.6e, "
          "\nOptimal length of work array was %d.\n", info, rcond, lwork);
    }
  }
}
Exemple #25
0
int main()
{
	freopen("input.in","r",stdin);
	work();
	return 0;
}
Exemple #26
0
//************************************************************************
//Обращение матрицы, заданной LU-разложением
//
//Входные параметры:
//    A       -   LU-разложение  матрицы   (результат   работы  подпрограммы
//                LUDecomposition).
//    Pivots  -   таблица перестановок,  произведенных в ходе LU-разложения.
//                (результат работы подпрограммы LUDecomposition).
//    N       -   размерность матрицы
//
//Выходные параметры:
//    A       -   матрица, обратная к исходной. Массив с нумерацией
//                элементов [1..N, 1..N]
//
//Результат:
//    True,  если исходная матрица невырожденная.
//    False, если исходная матрица вырожденная.
//
//  -- LAPACK routine (version 3.0) --
//     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
//     Courant Institute, Argonne National Lab, and Rice University
//     February 29, 1992
//************************************************************************
bool inverselu(ap::real_2d_array& a,
               const ap::integer_1d_array& pivots,
               int n)
{
    bool result;
    ap::real_1d_array work;
    int i;
    int iws;
    int j;
    int jb;
    int jj;
    int jp;
    int jp1;
    double v;

    result = true;

    //
    // Quick return if possible
    //
    if( n==0 )
    {
        return result;
    }
    work.setbounds(1, n);

    //
    // Form inv(U)
    //
    if( !invtriangular(a, n, true, false) )
    {
        result = false;
        return result;
    }

    //
    // Solve the equation inv(A)*L = inv(U) for inv(A).
    //
    for(j = n; j >= 1; j--)
    {
        //
        // Copy current column of L to WORK and replace with zeros.
        //
        for(i = j+1; i <= n; i++)
        {
            work(i) = a(i,j);
            a(i,j) = 0;
        }

        //
        // Compute current column of inv(A).
        //
        if( j<n )
        {
            jp1 = j+1;
            for(i = 1; i <= n; i++)
            {
                v = ap::vdotproduct(a.getrow(i, jp1, n), work.getvector(jp1, n));
                a(i,j) = a(i,j)-v;
            }
        }
    }

    //
    // Apply column interchanges.
    //
    for(j = n-1; j >= 1; j--)
    {
        jp = pivots(j);
        if( jp!=j )
        {
            ap::vmove(work.getvector(1, n), a.getcolumn(j, 1, n));
            ap::vmove(a.getcolumn(j, 1, n), a.getcolumn(jp, 1, n));
            ap::vmove(a.getcolumn(jp, 1, n), work.getvector(1, n));
        }
    }
    return result;
}
Exemple #27
0
		void operator()(){
			work();
		}
Exemple #28
0
 void Emulator::WorkerRun() {
     boost::asio::io_service::work work(mIo);
     mIo.run();
 }
Exemple #29
0
SEXP dieharder(SEXP genS, SEXP testS, SEXP seedS, SEXP psamplesS, SEXP verbS, SEXP infileS, SEXP ntupleS) {

    int verb, testarg;
    unsigned int i;
    SEXP result = NULL, vec, pv, name, desc, nkps;
    char *inputfile;

    /* Setup argv to allow call of parsecl() to let dieharder set globals */
    char *argv[] = { "dieharder" };
    optind = 0;
    parsecl(1, argv); 			

    /* Parse 'our' parameters from R */
    generator  = INTEGER_VALUE(genS);
    testarg = INTEGER_VALUE(testS);
    diehard = rgb = sts = user = 0;
    if (testarg < 100) {
	diehard = testarg;
    } else if (testarg < 200) {
	rgb = testarg - 100;
    } else if (testarg < 300) {
	sts = testarg - 200;
    } else {
	user = testarg - 300;
    }
    Seed = (unsigned long int) INTEGER_VALUE(seedS); /* (user-select) Seed, not (save switch) seed */
    psamples = INTEGER_VALUE(psamplesS);
    verb = INTEGER_VALUE(verbS);
    inputfile = (char*) CHARACTER_VALUE(infileS);
    ntuple = INTEGER_VALUE(ntupleS);

    rdh_testptr = NULL;
    rdh_dtestptr = NULL; 	/* to be safe, explicitly flag as NULL; cf test in output.c */

    if (strcmp(inputfile, "") != 0) {
	strncpy(filename, inputfile, 128);
	fromfile = 1;			/* flag this as file input */
    }
 
   if (Seed == 0) {
    	seed = random_seed();
    } else {
    	seed = (unsigned long int) Seed;
    }

    if (verb) {
	Rprintf("Dieharder called with gen=%d test=%d seed=%lu\n", generator, diehard, seed);
	quiet = 0;
	hist_flag = 1;
    } else {
	quiet = 1; 			/* override dieharder command-line default */
	hist_flag = 0;
    }

    /* Now do the work that dieharder.c does */
    startup();
    work();
    gsl_rng_free(rng);
    reset_bit_buffers();

    /* And then bring our results back to R */

    /* create vector of size four: [0] is vector (!!) ks_pv, [1] is pvalues vec, [2] name, [3] nkps */
    PROTECT(result = allocVector(VECSXP, 4)); 

    /* alloc vector and scalars, and set it */
    PROTECT(pv = allocVector(REALSXP, rdh_dtestptr->nkps));
    PROTECT(name = allocVector(STRSXP, 1));
    PROTECT(nkps = allocVector(INTSXP, 1));

    if (rdh_testptr != NULL && rdh_dtestptr != NULL) {
	for (i=0; i<rdh_dtestptr->nkps; i++) { 		/* there can be nkps p-values per test */
	    REAL(pv)[i] = rdh_testptr[i]->ks_pvalue;
	}
	PROTECT(vec = allocVector(REALSXP, rdh_testptr[0]->psamples)); /* alloc vector and set it */
	for (i = 0; i < rdh_testptr[0]->psamples; i++) {
	    REAL(vec)[i] = rdh_testptr[0]->pvalues[i];
	}
	SET_STRING_ELT(name, 0, mkChar(rdh_dtestptr->name));
	INTEGER(nkps)[0] = rdh_dtestptr->nkps; 		/* nb of Kuiper KS p-values in pv vector */
    } else {
	PROTECT(vec = allocVector(REALSXP, 1)); 
	REAL(pv)[0] = R_NaN;
	REAL(vec)[0] = R_NaN;
	SET_STRING_ELT(name, 0, mkChar(""));
	INTEGER(nkps)[0] = R_NaN;
    }

    /* insert vectors and scalars into result vector */
    SET_VECTOR_ELT(result, 0, pv);
    SET_VECTOR_ELT(result, 1, vec);
    SET_VECTOR_ELT(result, 2, name);
    SET_VECTOR_ELT(result, 3, nkps);
  
    UNPROTECT(5);

    return result;
}
Exemple #30
0
extern "C" magma_int_t
magma_zgetrf2_gpu(
    magma_int_t m, magma_int_t n,
    magmaDoubleComplex_ptr dA, size_t dA_offset, magma_int_t ldda,
    magma_int_t *ipiv,
    magma_queue_t queues[2],
    magma_int_t *info )
{
/*  -- clMAGMA (version 1.3.0) --
       Univ. of Tennessee, Knoxville
       Univ. of California, Berkeley
       Univ. of Colorado, Denver
       @date November 2014

    Purpose
    =======
    ZGETRF computes an LU factorization of a general M-by-N matrix A
    using partial pivoting with row interchanges.

    The factorization has the form
        A = P * L * U
    where P is a permutation matrix, L is lower triangular with unit
    diagonal elements (lower trapezoidal if m > n), and U is upper
    triangular (upper trapezoidal if m < n).

    This is the right-looking Level 3 BLAS version of the algorithm.

    Arguments
    =========
    M       (input) INTEGER
            The number of rows of the matrix A.  M >= 0.

    N       (input) INTEGER
            The number of columns of the matrix A.  N >= 0.

    A       (input/output) COMPLEX_16 array on the GPU, dimension (LDDA,N).
            On entry, the M-by-N matrix to be factored.
            On exit, the factors L and U from the factorization
            A = P*L*U; the unit diagonal elements of L are not stored.

    LDDA     (input) INTEGER
            The leading dimension of the array A.  LDDA >= max(1,M).

    IPIV    (output) INTEGER array, dimension (min(M,N))
            The pivot indices; for 1 <= i <= min(M,N), row i of the
            matrix was interchanged with row IPIV(i).

    INFO    (output) INTEGER
            = 0:  successful exit
            < 0:  if INFO = -i, the i-th argument had an illegal value
                  or another error occured, such as memory allocation failed.
            > 0:  if INFO = i, U(i,i) is exactly zero. The factorization
                  has been completed, but the factor U is exactly
                  singular, and division by zero will occur if it is used
                  to solve a system of equations.
    =====================================================================    */

    #define  dA(i_, j_) dA,   dA_offset  + (i_)*nb       + (j_)*nb*ldda
    #define dAT(i_, j_) dAT,  dAT_offset + (i_)*nb*lddat + (j_)*nb
    #define dAP(i_, j_) dAP,               (i_)          + (j_)*maxm
    #define work(i_)   (work + (i_))

    magmaDoubleComplex c_one     = MAGMA_Z_ONE;
    magmaDoubleComplex c_neg_one = MAGMA_Z_NEG_ONE;

    magma_int_t iinfo, nb;
    magma_int_t maxm, maxn, mindim;
    magma_int_t i, j, rows, s, lddat, ldwork;
    magmaDoubleComplex_ptr dAT, dAP;
    magmaDoubleComplex *work;
    size_t dAT_offset;

    /* Check arguments */
    *info = 0;
    if (m < 0)
        *info = -1;
    else if (n < 0)
        *info = -2;
    else if (ldda < max(1,m))
        *info = -4;

    if (*info != 0) {
        magma_xerbla( __func__, -(*info) );
        return *info;
    }

    /* Quick return if possible */
    if (m == 0 || n == 0)
        return *info;

    /* Function Body */
    mindim = min(m, n);
    nb     = magma_get_zgetrf_nb(m);
    s      = mindim / nb;

    if (nb <= 1 || nb >= min(m,n)) {
        /* Use CPU code. */
        if ( MAGMA_SUCCESS != magma_zmalloc_cpu( &work, m*n )) {
            *info = MAGMA_ERR_HOST_ALLOC;
            return *info;
        }
        magma_zgetmatrix( m, n, dA(0,0), ldda, work(0), m, queues[0] );
        lapackf77_zgetrf( &m, &n, work, &m, ipiv, info );
        magma_zsetmatrix( m, n, work(0), m, dA(0,0), ldda, queues[0] );
        magma_free_cpu( work );
    }
    else {
        /* Use hybrid blocked code. */
        maxm = ((m + 31)/32)*32;
        maxn = ((n + 31)/32)*32;

        if ( MAGMA_SUCCESS != magma_zmalloc( &dAP, nb*maxm )) {
            *info = MAGMA_ERR_DEVICE_ALLOC;
            return *info;
        }

        // square matrices can be done in place;
        // rectangular requires copy to transpose
        if ( m == n ) {
            dAT = dA;
            dAT_offset = dA_offset;
            lddat = ldda;
            magmablas_ztranspose_inplace( m, dAT(0,0), lddat, queues[0] );
        }
        else {
            lddat = maxn;  // N-by-M
            dAT_offset = 0;
            if ( MAGMA_SUCCESS != magma_zmalloc( &dAT, lddat*maxm )) {
                magma_free( dAP );
                *info = MAGMA_ERR_DEVICE_ALLOC;
                return *info;
            }
            magmablas_ztranspose( m, n, dA(0,0), ldda, dAT(0,0), lddat, queues[0] );
        }

        ldwork = maxm;
        /*
        if ( MAGMA_SUCCESS != magma_zmalloc_cpu( &work, ldwork*nb ) ) {
            magma_free( dAP );
            if ( dA != dAT )
                magma_free( dAT );

            *info = MAGMA_ERR_HOST_ALLOC;
            return *info;
        }
        */
        cl_mem work_mapped = clCreateBuffer( gContext, CL_MEM_READ_WRITE | CL_MEM_ALLOC_HOST_PTR, ldwork*nb * sizeof(magmaDoubleComplex), NULL, NULL );
        work = (magmaDoubleComplex*) clEnqueueMapBuffer( queues[0], work_mapped, CL_TRUE, CL_MAP_READ | CL_MAP_WRITE, 0, ldwork*nb * sizeof(magmaDoubleComplex), 0, NULL, NULL, NULL );

        for( j=0; j < s; j++ ) {
            // download j-th panel
            magmablas_ztranspose( nb, m-j*nb, dAT(j,j), lddat, dAP(0,0), maxm, queues[0] );
            clFlush( queues[0] );
            magma_queue_sync( queues[0] );
            magma_zgetmatrix_async( m-j*nb, nb, dAP(0,0), maxm, work(0), ldwork, queues[1], NULL );
            clFlush( queues[1] );
            if ( j > 0 ) {
                magma_ztrsm( MagmaRight, MagmaUpper, MagmaNoTrans, MagmaUnit,
                             n - (j+1)*nb, nb,
                             c_one, dAT(j-1,j-1), lddat,
                             dAT(j-1,j+1), lddat, queues[0] );
                magma_zgemm( MagmaNoTrans, MagmaNoTrans,
                             n-(j+1)*nb, m-j*nb, nb,
                             c_neg_one, dAT(j-1,j+1), lddat,
                                        dAT(j,  j-1), lddat,
                             c_one,     dAT(j,  j+1), lddat, queues[0] );
            }

            magma_queue_sync( queues[1] );
            // do the cpu part
            rows = m - j*nb;
            lapackf77_zgetrf( &rows, &nb, work, &ldwork, ipiv+j*nb, &iinfo );
            if ( *info == 0 && iinfo > 0 )
                *info = iinfo + j*nb;

            for( i=j*nb; i < j*nb + nb; ++i ) {
                ipiv[i] += j*nb;
            }
            magmablas_zlaswp( n, dAT(0,0), lddat, j*nb + 1, j*nb + nb, ipiv, 1, queues[0] );
            clFlush( queues[0] );

            // upload j-th panel
            magma_zsetmatrix_async( m-j*nb, nb, work(0), ldwork, dAP(0,0), maxm, queues[1], NULL );
            magma_queue_sync( queues[1] );
            magmablas_ztranspose( m-j*nb, nb, dAP(0,0), maxm, dAT(j,j), lddat, queues[0] );
            clFlush( queues[0] );
            
            // do the small non-parallel computations (next panel update)
            if ( s > (j+1) ) {
                magma_ztrsm( MagmaRight, MagmaUpper, MagmaNoTrans, MagmaUnit,
                             nb, nb,
                             c_one, dAT(j, j  ), lddat,
                             dAT(j, j+1), lddat, queues[0] );
                magma_zgemm( MagmaNoTrans, MagmaNoTrans,
                             nb, m-(j+1)*nb, nb,
                             c_neg_one, dAT(j,   j+1), lddat,
                                        dAT(j+1, j  ), lddat,
                             c_one,     dAT(j+1, j+1), lddat, queues[0] );
            }
            else {
                magma_ztrsm( MagmaRight, MagmaUpper, MagmaNoTrans, MagmaUnit,
                             n-s*nb, nb,
                             c_one, dAT(j, j  ), lddat,
                             dAT(j, j+1), lddat, queues[0] );
                magma_zgemm( MagmaNoTrans, MagmaNoTrans,
                             n-(j+1)*nb, m-(j+1)*nb, nb,
                             c_neg_one, dAT(j,   j+1), lddat,
                                        dAT(j+1, j  ), lddat,
                             c_one,     dAT(j+1, j+1), lddat, queues[0] );
            }
        }

        magma_int_t nb0 = min( m - s*nb, n - s*nb );
        if ( nb0 > 0 ) {
            rows = m - s*nb;
    
            magmablas_ztranspose( nb0, rows, dAT(s,s), lddat, dAP(0,0), maxm, queues[0] );
            clFlush( queues[0] );
            magma_queue_sync( queues[0] );
            magma_zgetmatrix_async( rows, nb0, dAP(0,0), maxm, work(0), ldwork, queues[1], NULL );
            magma_queue_sync( queues[1] );
            
            // do the cpu part
            lapackf77_zgetrf( &rows, &nb0, work, &ldwork, ipiv+s*nb, &iinfo );
            if ( (*info == 0) && (iinfo > 0) )
                *info = iinfo + s*nb;
            
            for( i=s*nb; i < s*nb + nb0; ++i ) {
                ipiv[i] += s*nb;
            }
            magmablas_zlaswp( n, dAT(0,0), lddat, s*nb + 1, s*nb + nb0, ipiv, 1, queues[0] );
            clFlush( queues[0] );
            
            // upload j-th panel
            magma_zsetmatrix_async( rows, nb0, work(0), ldwork, dAP(0,0), maxm, queues[1], NULL );
            magma_queue_sync( queues[1] );
            magmablas_ztranspose( rows, nb0, dAP(0,0), maxm, dAT(s,s), lddat, queues[0] );
            clFlush( queues[0] );
    
            magma_ztrsm( MagmaRight, MagmaUpper, MagmaNoTrans, MagmaUnit,
                         n-s*nb-nb0, nb0,
                         c_one, dAT(s,s),     lddat,
                         dAT(s,s)+nb0, lddat, queues[0] );
        }

        // undo transpose
        if ( dA == dAT ) {
            magmablas_ztranspose_inplace( m, dAT(0,0), lddat, queues[0] );
        }
        else {
            magmablas_ztranspose( n, m, dAT(0,0), lddat, dA(0,0), ldda, queues[0] );
            magma_free( dAT );
        }
        
        magma_queue_sync( queues[0] );
        magma_queue_sync( queues[1] );
        magma_free( dAP );
        // magma_free_cpu( work );
        clEnqueueUnmapMemObject( queues[0], work_mapped, work, 0, NULL, NULL );
        clReleaseMemObject( work_mapped );
    }

    return *info;
} /* magma_zgetrf_gpu */