Esempio n. 1
0
void parallel_gemv_task(CM && mat, CV && vec, CR && res) {
  int rank, nprocs;
  MPI_Comm_rank(MPI_COMM_WORLD, &rank);
  MPI_Comm_size(MPI_COMM_WORLD, &nprocs);

  int m = (int)mat.dim0();// n = (int)mat.dim1();
  int load = m / nprocs;
  int remainder = m % nprocs;
  int max_load = load + remainder;
  int *rcounts = new int [nprocs];
  int *displs = new int [nprocs];
  int offset;
  offset = rank * load;
  
  douban::vec_container<double> y_tmp(max_load);
   
  // General matrix vector multiplication
  y_tmp = douban::gemv(mat_rows(mat, offset, offset + max_load), vec);
  
  // Prepare for MPI_Gatherv
  for(int i = 0; i < nprocs; ++i) {
    rcounts[i] = load;
    displs[i] = i * load;
  }
  if(remainder != 0)
    rcounts[nprocs - 1] = max_load;
  if(rank == nprocs - 1)
    load = max_load;

  // MPI_Gatherv
  MPI_Gatherv(&y_tmp[0], load, MPI_DOUBLE, &res[0], &rcounts[0], &displs[0], MPI_DOUBLE, 0, MPI_COMM_WORLD);
  
  delete [] rcounts;
  delete [] displs;
  
  return;
}
Esempio n. 2
0
	bool DSN6File::readHeader()
		throw()
	{
		// first read the complete 512 bytes of header information
		char header[512];
		std::fstream::read(header, 512);

		if (gcount() != 512)
		{
			Log.error() << "DSN6File::readHeader(): File does not contain a proper DSN6 header. Aborting read." << std::endl;

			return false;
		}

		// to determine whether we have to swap bytes in the header (depending on the version of
		// the DSN6 - File and on the byte order on the machine) we try to reproduce the known value
		// of 100 in header[2*18]
		short int header_value = readHeaderValue_(header, 18);
		
		if (header_value != 100)
		{
			// try to change endianness
			swap_bytes_ = true;

			header_value = readHeaderValue_(header, 18);
			if (header_value != 100)
			{
				Log.error() << "DSN6File::readHeader(): Corrupt DSN6 header: header[16] != 100. Aborting read." << std::endl;
				
				return false;
			}
		}

		header_value = readHeaderValue_(header, 0);
		start_.x = (float)header_value;

		header_value = readHeaderValue_(header, 1);
		start_.y = (float)header_value;

		header_value = readHeaderValue_(header, 2);
		start_.z = (float)header_value;

		header_value = readHeaderValue_(header, 3);
		extent_.x = (float)header_value;

		header_value = readHeaderValue_(header, 4);
		extent_.y = (float)header_value;

		header_value = readHeaderValue_(header, 5);
		extent_.z = (float)header_value;

		header_value = readHeaderValue_(header, 6);
		sampling_rate_.x = (float)header_value;

		header_value = readHeaderValue_(header, 7);
		sampling_rate_.y = (float)header_value;

		header_value = readHeaderValue_(header, 8);
		sampling_rate_.z = (float)header_value;

		header_value = readHeaderValue_(header, 17);	
		cell_scaling_ = (float)header_value;

		header_value = readHeaderValue_(header, 9);
		crystal_dimension_.x = (float)header_value / (cell_scaling_ * sampling_rate_.x);  

		header_value = readHeaderValue_(header, 10);
		crystal_dimension_.y = (float)header_value / (cell_scaling_ * sampling_rate_.y);

		header_value = readHeaderValue_(header, 11);
		crystal_dimension_.z = (float)header_value / (cell_scaling_ * sampling_rate_.z);

		header_value = readHeaderValue_(header, 12);
		alpha_ = Angle((float)header_value / cell_scaling_, false);

		header_value = readHeaderValue_(header, 13);
		beta_  = Angle((float)header_value / cell_scaling_, false);

		header_value = readHeaderValue_(header, 14);
		gamma_ = Angle((float)header_value / cell_scaling_, false);

		header_value = readHeaderValue_(header, 15);
		prod_ = (float)header_value / 100.;

		header_value = readHeaderValue_(header, 16);
		plus_ = (float)header_value;

		// convert from grid space to cartesian coordinates (inspired by the VMD code :-) )
		Vector3 x_tmp(crystal_dimension_.x, 0., 0.);
		Vector3 y_tmp(cos(gamma_.toRadian()), sin(gamma_.toRadian()), 0.);
		y_tmp *= crystal_dimension_.y;
		Vector3 z_tmp( cos(beta_.toRadian()), 
									(cos(alpha_.toRadian()) - cos(beta_.toRadian())*cos(gamma_.toRadian())) / sin(gamma_.toRadian()),
									0.);
		z_tmp.z = sqrt(1.0 - z_tmp.x*z_tmp.x - z_tmp.y*z_tmp.y);
		z_tmp *= crystal_dimension_.z;

		origin_.x = x_tmp.x * start_.x + y_tmp.x * start_.y + z_tmp.x * start_.z;
		origin_.y = y_tmp.y * start_.y + z_tmp.y * start_.z;
		origin_.z = z_tmp.z * start_.z;

		xaxis_.x = x_tmp.x * (extent_.x - 1);
		xaxis_.y = 0.;
		xaxis_.z = 0.;

		yaxis_.x = y_tmp.x * (extent_.y - 1);
		yaxis_.y = y_tmp.y * (extent_.y - 1);
		yaxis_.z = 0.;

		zaxis_.x = z_tmp.x * (extent_.z - 1);
		zaxis_.y = z_tmp.y * (extent_.z - 1);
		zaxis_.z = z_tmp.z * (extent_.z - 1);

		// that's it. we're done
		return true;
	}
Esempio n. 3
0
void LBFGSSolver::solve(const Function& function,
                        SolverResults* results) const
{
	double global_start_time = wall_time();

	// Dimension of problem.
	size_t n = function.get_number_of_scalars();

	if (n == 0) {
		results->exit_condition = SolverResults::FUNCTION_TOLERANCE;
		return;
	}

	// Current point, gradient and Hessian.
	double fval   = std::numeric_limits<double>::quiet_NaN();
	double fprev  = std::numeric_limits<double>::quiet_NaN();
	double normg0 = std::numeric_limits<double>::quiet_NaN();
	double normg  = std::numeric_limits<double>::quiet_NaN();
	double normdx = std::numeric_limits<double>::quiet_NaN();

	Eigen::VectorXd x, g;

	// Copy the user state to the current point.
	function.copy_user_to_global(&x);
	Eigen::VectorXd x2(n);

	// L-BFGS history.
	std::vector<Eigen::VectorXd>  s_data(this->lbfgs_history_size),
	                              y_data(this->lbfgs_history_size);
	std::vector<Eigen::VectorXd*> s(this->lbfgs_history_size),
	                              y(this->lbfgs_history_size);
	for (int h = 0; h < this->lbfgs_history_size; ++h) {
		s_data[h].resize(function.get_number_of_scalars());
		s_data[h].setZero();
		y_data[h].resize(function.get_number_of_scalars());
		y_data[h].setZero();
		s[h] = &s_data[h];
		y[h] = &y_data[h];
	}

	Eigen::VectorXd rho(this->lbfgs_history_size);
	rho.setZero();

	Eigen::VectorXd alpha(this->lbfgs_history_size);
	alpha.setZero();
	Eigen::VectorXd q(n);
	Eigen::VectorXd r(n);

	// Needed from the previous iteration.
	Eigen::VectorXd x_prev(n), s_tmp(n), y_tmp(n);

	CheckExitConditionsCache exit_condition_cache;

	//
	// START MAIN ITERATION
	//
	results->startup_time   += wall_time() - global_start_time;
	results->exit_condition = SolverResults::INTERNAL_ERROR;
	int iter = 0;
	bool last_iteration_successful = true;
	int number_of_line_search_failures = 0;
	int number_of_restarts = 0;
	while (true) {

		//
		// Evaluate function and derivatives.
		//
		double start_time = wall_time();
		// y[0] should contain the difference between the gradient
		// in this iteration and the gradient from the previous.
		// Therefore, update y before and after evaluating the
		// function.
		if (iter > 0) {
			y_tmp = -g;
		}
		fval = function.evaluate(x, &g);

		normg = std::max(g.maxCoeff(), -g.minCoeff());
		if (iter == 0) {
			normg0 = normg;
		}
		results->function_evaluation_time += wall_time() - start_time;

		//
		// Update history
		//
		start_time = wall_time();

		if (iter > 0 && last_iteration_successful) {
			s_tmp = x - x_prev;
			y_tmp += g;

			double sTy = s_tmp.dot(y_tmp);
			if (sTy > 1e-16) {
				// Shift all pointers one step back, discarding the oldest one.
				Eigen::VectorXd* sh = s[this->lbfgs_history_size - 1];
				Eigen::VectorXd* yh = y[this->lbfgs_history_size - 1];
				for (int h = this->lbfgs_history_size - 1; h >= 1; --h) {
					s[h]   = s[h - 1];
					y[h]   = y[h - 1];
					rho[h] = rho[h - 1];
				}
				// Reuse the storage of the discarded data for the new data.
				s[0] = sh;
				y[0] = yh;

				*y[0] = y_tmp;
				*s[0] = s_tmp;
				rho[0] = 1.0 / sTy;
			}
		}

		results->lbfgs_update_time += wall_time() - start_time;

		//
		// Test stopping criteriea
		//
		start_time = wall_time();
		if (iter > 1 && this->check_exit_conditions(fval, fprev, normg,
		                                            normg0, x.norm(), normdx,
		                                            last_iteration_successful, 
		                                            &exit_condition_cache, results)) {
			break;
		}
		if (iter >= this->maximum_iterations) {
			results->exit_condition = SolverResults::NO_CONVERGENCE;
			break;
		}

		if (this->callback_function) {
			CallbackInformation information;
			information.objective_value = fval;
			information.x = &x;
			information.g = &g;

			if (!callback_function(information)) {
				results->exit_condition = SolverResults::USER_ABORT;
				break;
			}
		}

		results->stopping_criteria_time += wall_time() - start_time;

		//
		// Compute search direction via L-BGFS two-loop recursion.
		//
		start_time = wall_time();
		bool should_restart = false;

		double H0 = 1.0;
		if (iter > 0) {
			// If the gradient is identical two iterations in a row,
			// y will be the zero vector and H0 will be NaN. In this
			// case the line search will fail and L-BFGS will be restarted
			// with a steepest descent step.
			H0 = s[0]->dot(*y[0]) / y[0]->dot(*y[0]);

			// If isinf(H0) || isnan(H0)
			if (H0 ==  std::numeric_limits<double>::infinity() ||
			    H0 == -std::numeric_limits<double>::infinity() ||
			    H0 != H0) {
				should_restart = true;
			}
		}

		q = -g;

		for (int h = 0; h < this->lbfgs_history_size; ++h) {
			alpha[h] = rho[h] * s[h]->dot(q);
			q = q - alpha[h] * (*y[h]);
		}

		r = H0 * q;

		for (int h = this->lbfgs_history_size - 1; h >= 0; --h) {
			double beta = rho[h] * y[h]->dot(r);
			r = r + (*s[h]) * (alpha[h] - beta);
		}

		// If the function improves very little, the approximated Hessian
		// might be very bad. If this is the case, it is better to discard
		// the history once in a while. This allows the solver to correctly
		// solve some badly scaled problems.
		double restart_test = std::fabs(fval - fprev) /
		                      (std::fabs(fval) + std::fabs(fprev));
		if (iter > 0 && iter % 100 == 0 && restart_test
		                                   < this->lbfgs_restart_tolerance) {
			should_restart = true;
		}
		if (! last_iteration_successful) {
			should_restart = true;
		}

		if (should_restart) {
			if (this->log_function) {
				char str[1024];
				if (number_of_restarts <= 10) {
					std::sprintf(str, "Restarting: fval = %.3e, deltaf = %.3e, max|g_i| = %.3e, test = %.3e",
								 fval, std::fabs(fval - fprev), normg, restart_test);
					this->log_function(str);
				}
				if (number_of_restarts == 10) {
					this->log_function("NOTE: No more restarts will be reported.");
				}
				number_of_restarts++;
			}
			r = -g;
			for (int h = 0; h < this->lbfgs_history_size; ++h) {
				(*s[h]).setZero();
				(*y[h]).setZero();
			}
			rho.setZero();
			alpha.setZero();
			// H0 is not used, but its value will be printed.
			H0 = std::numeric_limits<double>::quiet_NaN();
		}

		results->lbfgs_update_time += wall_time() - start_time;

		//
		// Perform line search.
		//
		start_time = wall_time();
		double start_alpha = 1.0;
		// In the first iteration, start with a much smaller step
		// length. (heuristic used by e.g. minFunc)
		if (iter == 0) {
			double sumabsg = 0.0;
			for (size_t i = 0; i < n; ++i) {
				sumabsg += std::fabs(g[i]);
			}
			start_alpha = std::min(1.0, 1.0 / sumabsg);
		}
		double alpha_step = this->perform_linesearch(function, x, fval, g,
		                                             r, &x2, start_alpha);

		if (alpha_step <= 0) {
			if (this->log_function) {
				this->log_function("Line search failed.");
				char str[1024];
				std::sprintf(str, "%4d %+.3e %9.3e %.3e %.3e %.3e %.3e",
					iter, fval, std::fabs(fval - fprev), normg, alpha_step, H0, rho[0]);
				this->log_function(str);
			}
			if (! last_iteration_successful || number_of_line_search_failures++ > 10) {
				// This happens quite seldom. Every time it has happened, the function
				// was actually converged to a solution.
				results->exit_condition = SolverResults::GRADIENT_TOLERANCE;
				break;
			}

			last_iteration_successful = false;
		}
		else {
			// Record length of this step.
			normdx = alpha_step * r.norm();
			// Compute new point.
			x_prev = x;
			x = x + alpha_step * r;

			last_iteration_successful = true;
		}

		results->backtracking_time += wall_time() - start_time;

		//
		// Log the results of this iteration.
		//
		start_time = wall_time();

		int log_interval = 1;
		if (iter > 30) {
			log_interval = 10;
		}
		if (iter > 200) {
			log_interval = 100;
		}
		if (iter > 2000) {
			log_interval = 1000;
		}
		if (this->log_function && iter % log_interval == 0) {
			if (iter == 0) {
				this->log_function("Itr       f       deltaf   max|g_i|   alpha      H0       rho");
			}

			this->log_function(
				to_string(
					std::setw(4), iter, " ",
					std::setw(10), std::setprecision(3), std::scientific, std::showpos, fval, std::noshowpos, " ",
					std::setw(9),  std::setprecision(3), std::scientific, std::fabs(fval - fprev), " ",
					std::setw(9),  std::setprecision(3), std::setprecision(3), std::scientific, normg, " ",
					std::setw(9),  std::setprecision(3), std::scientific, alpha_step, " ",
					std::setw(9),  std::setprecision(3), std::scientific, H0, " ",
					std::setw(9),  std::setprecision(3), std::scientific, rho[0]
				)
			);
		}
		results->log_time += wall_time() - start_time;

		fprev = fval;
		iter++;
	}

	function.copy_global_to_user(x);
	results->total_time += wall_time() - global_start_time;

	if (this->log_function) {
		char str[1024];
		std::sprintf(str, " end %+.3e           %.3e", fval, normg);
		this->log_function(str);
	}
}
Esempio n. 4
0
	bool CCP4File::readHeader()
	{
		// first read the complete 1024 bytes of header information
		char header[1024];
		std::fstream::read(header, 1024);

		if (gcount() != 1024)
		{
			Log.error() << "CCP4File::readHeader(): File does not contain a proper CCP4 header. Aborting read." << std::endl;

			return false;
		}
		// Currently only data_mode=2 is allowed, which stores density values as 4-byte float values	
		Index data_mode = readBinValueasInt_(header, 3);
		
		if (data_mode != 2)
		{
			// try to change endianness
			swap_bytes_= true;

			data_mode = readBinValueasInt_(header, 3);
			if (data_mode != 2)
			{
				Log.error() << "CCP4File::readHeader(): Corrupt CCP4 header: data mode not supported, only 32-bit float supported" << std::endl;
				return false;
			}
		}
		
		//check if file claims to have symmetry reocrds stored	
		Size size_of_symops = readBinValueasInt_(header, 23);
		if (size_of_symops != 0)
		{
			offset_symops_ = size_of_symops;
		}
		
		// check internal ordering of coordinate axis
		col_axis_ = readBinValueasInt_(header, 16)-1;
		row_axis_ = readBinValueasInt_(header, 17)-1;
		sec_axis_ = readBinValueasInt_(header, 18)-1;

		extent_.x = (float)readBinValueasInt_(header, 0+col_axis_);
		extent_.y = (float)readBinValueasInt_(header, 0+row_axis_);
		extent_.z = (float)readBinValueasInt_(header, 0+sec_axis_);
		
		start_.x = (float)readBinValueasInt_(header, 4+col_axis_);
		start_.y = (float)readBinValueasInt_(header, 4+row_axis_);
		start_.z = (float)readBinValueasInt_(header, 4+sec_axis_);
		
		sampling_rate_.x = (float)readBinValueasInt_(header, 7);
		sampling_rate_.y = (float)readBinValueasInt_(header, 8);
		sampling_rate_.z = (float)readBinValueasInt_(header, 9);
		
		cell_dimension_.x = readBinValueasFloat_(header, 10);
		cell_dimension_.y = readBinValueasFloat_(header, 11);
		cell_dimension_.z = readBinValueasFloat_(header, 12);
		
		// Angle values of 0 don't make sense, set the Angles to 90 deg
		if (		readBinValueasFloat_(header, 13) == 0
				||	readBinValueasFloat_(header, 14) == 0
				||	readBinValueasFloat_(header, 15) == 0)
		{
			alpha_ = Angle(90.,false);
			beta_ = Angle(90.,false);
			gamma_ = Angle(90.,false);
		}
		else
		{
			alpha_ = Angle(readBinValueasFloat_(header, 13),false);
			beta_ = Angle(readBinValueasFloat_(header, 14),false);
			gamma_ = Angle(readBinValueasFloat_(header, 15),false);
		}	
		
		mean_density_ = readBinValueasFloat_(header, 21);
		space_group_ = readBinValueasInt_(header, 22);
		deviation_sigma_ = readBinValueasFloat_(header, 54);

		Log.info() << "Mean from file: " << mean_density_ << std::endl;
		Log.info() << "Sigma from file: " << deviation_sigma_ << std::endl;
		
		// convert from grid space to cartesian coordinates
		Vector3 scaled_axes(cell_dimension_.x/sampling_rate_.x,
												cell_dimension_.y/sampling_rate_.y,
												cell_dimension_.z/sampling_rate_.z);
		
		Vector3 x_tmp(scaled_axes.x, 0., 0.);
		
		Vector3 y_tmp(cos(gamma_.toRadian()), sin(gamma_.toRadian()), 0.);
		y_tmp *= scaled_axes.y;
		
		Vector3 z_tmp( cos(beta_.toRadian()), 
									(cos(alpha_.toRadian()) - cos(beta_.toRadian())*cos(gamma_.toRadian())) / sin(gamma_.toRadian()),
									0.);
		z_tmp.z = sqrt(1.0 - z_tmp.x*z_tmp.x - z_tmp.y*z_tmp.y);
		z_tmp *= scaled_axes.z;

		origin_.x = x_tmp.x * start_.x + y_tmp.x * start_.y + z_tmp.x * start_.z;
		origin_.y = y_tmp.y * start_.y + z_tmp.y * start_.z;
		origin_.z = z_tmp.z * start_.z;

		xaxis_.x = x_tmp.x * (extent_.x - 1);
		xaxis_.y = 0.;
		xaxis_.z = 0.;

		yaxis_.x = y_tmp.x * (extent_.y - 1);
		yaxis_.y = y_tmp.y * (extent_.y - 1);
		yaxis_.z = 0.;

		zaxis_.x = z_tmp.x * (extent_.z - 1);
		zaxis_.y = z_tmp.y * (extent_.z - 1);
		zaxis_.z = z_tmp.z * (extent_.z - 1);
		
		// that's it. we're done
		return true;
	}