void parallel_gemv_task(CM && mat, CV && vec, CR && res) { int rank, nprocs; MPI_Comm_rank(MPI_COMM_WORLD, &rank); MPI_Comm_size(MPI_COMM_WORLD, &nprocs); int m = (int)mat.dim0();// n = (int)mat.dim1(); int load = m / nprocs; int remainder = m % nprocs; int max_load = load + remainder; int *rcounts = new int [nprocs]; int *displs = new int [nprocs]; int offset; offset = rank * load; douban::vec_container<double> y_tmp(max_load); // General matrix vector multiplication y_tmp = douban::gemv(mat_rows(mat, offset, offset + max_load), vec); // Prepare for MPI_Gatherv for(int i = 0; i < nprocs; ++i) { rcounts[i] = load; displs[i] = i * load; } if(remainder != 0) rcounts[nprocs - 1] = max_load; if(rank == nprocs - 1) load = max_load; // MPI_Gatherv MPI_Gatherv(&y_tmp[0], load, MPI_DOUBLE, &res[0], &rcounts[0], &displs[0], MPI_DOUBLE, 0, MPI_COMM_WORLD); delete [] rcounts; delete [] displs; return; }
bool DSN6File::readHeader() throw() { // first read the complete 512 bytes of header information char header[512]; std::fstream::read(header, 512); if (gcount() != 512) { Log.error() << "DSN6File::readHeader(): File does not contain a proper DSN6 header. Aborting read." << std::endl; return false; } // to determine whether we have to swap bytes in the header (depending on the version of // the DSN6 - File and on the byte order on the machine) we try to reproduce the known value // of 100 in header[2*18] short int header_value = readHeaderValue_(header, 18); if (header_value != 100) { // try to change endianness swap_bytes_ = true; header_value = readHeaderValue_(header, 18); if (header_value != 100) { Log.error() << "DSN6File::readHeader(): Corrupt DSN6 header: header[16] != 100. Aborting read." << std::endl; return false; } } header_value = readHeaderValue_(header, 0); start_.x = (float)header_value; header_value = readHeaderValue_(header, 1); start_.y = (float)header_value; header_value = readHeaderValue_(header, 2); start_.z = (float)header_value; header_value = readHeaderValue_(header, 3); extent_.x = (float)header_value; header_value = readHeaderValue_(header, 4); extent_.y = (float)header_value; header_value = readHeaderValue_(header, 5); extent_.z = (float)header_value; header_value = readHeaderValue_(header, 6); sampling_rate_.x = (float)header_value; header_value = readHeaderValue_(header, 7); sampling_rate_.y = (float)header_value; header_value = readHeaderValue_(header, 8); sampling_rate_.z = (float)header_value; header_value = readHeaderValue_(header, 17); cell_scaling_ = (float)header_value; header_value = readHeaderValue_(header, 9); crystal_dimension_.x = (float)header_value / (cell_scaling_ * sampling_rate_.x); header_value = readHeaderValue_(header, 10); crystal_dimension_.y = (float)header_value / (cell_scaling_ * sampling_rate_.y); header_value = readHeaderValue_(header, 11); crystal_dimension_.z = (float)header_value / (cell_scaling_ * sampling_rate_.z); header_value = readHeaderValue_(header, 12); alpha_ = Angle((float)header_value / cell_scaling_, false); header_value = readHeaderValue_(header, 13); beta_ = Angle((float)header_value / cell_scaling_, false); header_value = readHeaderValue_(header, 14); gamma_ = Angle((float)header_value / cell_scaling_, false); header_value = readHeaderValue_(header, 15); prod_ = (float)header_value / 100.; header_value = readHeaderValue_(header, 16); plus_ = (float)header_value; // convert from grid space to cartesian coordinates (inspired by the VMD code :-) ) Vector3 x_tmp(crystal_dimension_.x, 0., 0.); Vector3 y_tmp(cos(gamma_.toRadian()), sin(gamma_.toRadian()), 0.); y_tmp *= crystal_dimension_.y; Vector3 z_tmp( cos(beta_.toRadian()), (cos(alpha_.toRadian()) - cos(beta_.toRadian())*cos(gamma_.toRadian())) / sin(gamma_.toRadian()), 0.); z_tmp.z = sqrt(1.0 - z_tmp.x*z_tmp.x - z_tmp.y*z_tmp.y); z_tmp *= crystal_dimension_.z; origin_.x = x_tmp.x * start_.x + y_tmp.x * start_.y + z_tmp.x * start_.z; origin_.y = y_tmp.y * start_.y + z_tmp.y * start_.z; origin_.z = z_tmp.z * start_.z; xaxis_.x = x_tmp.x * (extent_.x - 1); xaxis_.y = 0.; xaxis_.z = 0.; yaxis_.x = y_tmp.x * (extent_.y - 1); yaxis_.y = y_tmp.y * (extent_.y - 1); yaxis_.z = 0.; zaxis_.x = z_tmp.x * (extent_.z - 1); zaxis_.y = z_tmp.y * (extent_.z - 1); zaxis_.z = z_tmp.z * (extent_.z - 1); // that's it. we're done return true; }
void LBFGSSolver::solve(const Function& function, SolverResults* results) const { double global_start_time = wall_time(); // Dimension of problem. size_t n = function.get_number_of_scalars(); if (n == 0) { results->exit_condition = SolverResults::FUNCTION_TOLERANCE; return; } // Current point, gradient and Hessian. double fval = std::numeric_limits<double>::quiet_NaN(); double fprev = std::numeric_limits<double>::quiet_NaN(); double normg0 = std::numeric_limits<double>::quiet_NaN(); double normg = std::numeric_limits<double>::quiet_NaN(); double normdx = std::numeric_limits<double>::quiet_NaN(); Eigen::VectorXd x, g; // Copy the user state to the current point. function.copy_user_to_global(&x); Eigen::VectorXd x2(n); // L-BFGS history. std::vector<Eigen::VectorXd> s_data(this->lbfgs_history_size), y_data(this->lbfgs_history_size); std::vector<Eigen::VectorXd*> s(this->lbfgs_history_size), y(this->lbfgs_history_size); for (int h = 0; h < this->lbfgs_history_size; ++h) { s_data[h].resize(function.get_number_of_scalars()); s_data[h].setZero(); y_data[h].resize(function.get_number_of_scalars()); y_data[h].setZero(); s[h] = &s_data[h]; y[h] = &y_data[h]; } Eigen::VectorXd rho(this->lbfgs_history_size); rho.setZero(); Eigen::VectorXd alpha(this->lbfgs_history_size); alpha.setZero(); Eigen::VectorXd q(n); Eigen::VectorXd r(n); // Needed from the previous iteration. Eigen::VectorXd x_prev(n), s_tmp(n), y_tmp(n); CheckExitConditionsCache exit_condition_cache; // // START MAIN ITERATION // results->startup_time += wall_time() - global_start_time; results->exit_condition = SolverResults::INTERNAL_ERROR; int iter = 0; bool last_iteration_successful = true; int number_of_line_search_failures = 0; int number_of_restarts = 0; while (true) { // // Evaluate function and derivatives. // double start_time = wall_time(); // y[0] should contain the difference between the gradient // in this iteration and the gradient from the previous. // Therefore, update y before and after evaluating the // function. if (iter > 0) { y_tmp = -g; } fval = function.evaluate(x, &g); normg = std::max(g.maxCoeff(), -g.minCoeff()); if (iter == 0) { normg0 = normg; } results->function_evaluation_time += wall_time() - start_time; // // Update history // start_time = wall_time(); if (iter > 0 && last_iteration_successful) { s_tmp = x - x_prev; y_tmp += g; double sTy = s_tmp.dot(y_tmp); if (sTy > 1e-16) { // Shift all pointers one step back, discarding the oldest one. Eigen::VectorXd* sh = s[this->lbfgs_history_size - 1]; Eigen::VectorXd* yh = y[this->lbfgs_history_size - 1]; for (int h = this->lbfgs_history_size - 1; h >= 1; --h) { s[h] = s[h - 1]; y[h] = y[h - 1]; rho[h] = rho[h - 1]; } // Reuse the storage of the discarded data for the new data. s[0] = sh; y[0] = yh; *y[0] = y_tmp; *s[0] = s_tmp; rho[0] = 1.0 / sTy; } } results->lbfgs_update_time += wall_time() - start_time; // // Test stopping criteriea // start_time = wall_time(); if (iter > 1 && this->check_exit_conditions(fval, fprev, normg, normg0, x.norm(), normdx, last_iteration_successful, &exit_condition_cache, results)) { break; } if (iter >= this->maximum_iterations) { results->exit_condition = SolverResults::NO_CONVERGENCE; break; } if (this->callback_function) { CallbackInformation information; information.objective_value = fval; information.x = &x; information.g = &g; if (!callback_function(information)) { results->exit_condition = SolverResults::USER_ABORT; break; } } results->stopping_criteria_time += wall_time() - start_time; // // Compute search direction via L-BGFS two-loop recursion. // start_time = wall_time(); bool should_restart = false; double H0 = 1.0; if (iter > 0) { // If the gradient is identical two iterations in a row, // y will be the zero vector and H0 will be NaN. In this // case the line search will fail and L-BFGS will be restarted // with a steepest descent step. H0 = s[0]->dot(*y[0]) / y[0]->dot(*y[0]); // If isinf(H0) || isnan(H0) if (H0 == std::numeric_limits<double>::infinity() || H0 == -std::numeric_limits<double>::infinity() || H0 != H0) { should_restart = true; } } q = -g; for (int h = 0; h < this->lbfgs_history_size; ++h) { alpha[h] = rho[h] * s[h]->dot(q); q = q - alpha[h] * (*y[h]); } r = H0 * q; for (int h = this->lbfgs_history_size - 1; h >= 0; --h) { double beta = rho[h] * y[h]->dot(r); r = r + (*s[h]) * (alpha[h] - beta); } // If the function improves very little, the approximated Hessian // might be very bad. If this is the case, it is better to discard // the history once in a while. This allows the solver to correctly // solve some badly scaled problems. double restart_test = std::fabs(fval - fprev) / (std::fabs(fval) + std::fabs(fprev)); if (iter > 0 && iter % 100 == 0 && restart_test < this->lbfgs_restart_tolerance) { should_restart = true; } if (! last_iteration_successful) { should_restart = true; } if (should_restart) { if (this->log_function) { char str[1024]; if (number_of_restarts <= 10) { std::sprintf(str, "Restarting: fval = %.3e, deltaf = %.3e, max|g_i| = %.3e, test = %.3e", fval, std::fabs(fval - fprev), normg, restart_test); this->log_function(str); } if (number_of_restarts == 10) { this->log_function("NOTE: No more restarts will be reported."); } number_of_restarts++; } r = -g; for (int h = 0; h < this->lbfgs_history_size; ++h) { (*s[h]).setZero(); (*y[h]).setZero(); } rho.setZero(); alpha.setZero(); // H0 is not used, but its value will be printed. H0 = std::numeric_limits<double>::quiet_NaN(); } results->lbfgs_update_time += wall_time() - start_time; // // Perform line search. // start_time = wall_time(); double start_alpha = 1.0; // In the first iteration, start with a much smaller step // length. (heuristic used by e.g. minFunc) if (iter == 0) { double sumabsg = 0.0; for (size_t i = 0; i < n; ++i) { sumabsg += std::fabs(g[i]); } start_alpha = std::min(1.0, 1.0 / sumabsg); } double alpha_step = this->perform_linesearch(function, x, fval, g, r, &x2, start_alpha); if (alpha_step <= 0) { if (this->log_function) { this->log_function("Line search failed."); char str[1024]; std::sprintf(str, "%4d %+.3e %9.3e %.3e %.3e %.3e %.3e", iter, fval, std::fabs(fval - fprev), normg, alpha_step, H0, rho[0]); this->log_function(str); } if (! last_iteration_successful || number_of_line_search_failures++ > 10) { // This happens quite seldom. Every time it has happened, the function // was actually converged to a solution. results->exit_condition = SolverResults::GRADIENT_TOLERANCE; break; } last_iteration_successful = false; } else { // Record length of this step. normdx = alpha_step * r.norm(); // Compute new point. x_prev = x; x = x + alpha_step * r; last_iteration_successful = true; } results->backtracking_time += wall_time() - start_time; // // Log the results of this iteration. // start_time = wall_time(); int log_interval = 1; if (iter > 30) { log_interval = 10; } if (iter > 200) { log_interval = 100; } if (iter > 2000) { log_interval = 1000; } if (this->log_function && iter % log_interval == 0) { if (iter == 0) { this->log_function("Itr f deltaf max|g_i| alpha H0 rho"); } this->log_function( to_string( std::setw(4), iter, " ", std::setw(10), std::setprecision(3), std::scientific, std::showpos, fval, std::noshowpos, " ", std::setw(9), std::setprecision(3), std::scientific, std::fabs(fval - fprev), " ", std::setw(9), std::setprecision(3), std::setprecision(3), std::scientific, normg, " ", std::setw(9), std::setprecision(3), std::scientific, alpha_step, " ", std::setw(9), std::setprecision(3), std::scientific, H0, " ", std::setw(9), std::setprecision(3), std::scientific, rho[0] ) ); } results->log_time += wall_time() - start_time; fprev = fval; iter++; } function.copy_global_to_user(x); results->total_time += wall_time() - global_start_time; if (this->log_function) { char str[1024]; std::sprintf(str, " end %+.3e %.3e", fval, normg); this->log_function(str); } }
bool CCP4File::readHeader() { // first read the complete 1024 bytes of header information char header[1024]; std::fstream::read(header, 1024); if (gcount() != 1024) { Log.error() << "CCP4File::readHeader(): File does not contain a proper CCP4 header. Aborting read." << std::endl; return false; } // Currently only data_mode=2 is allowed, which stores density values as 4-byte float values Index data_mode = readBinValueasInt_(header, 3); if (data_mode != 2) { // try to change endianness swap_bytes_= true; data_mode = readBinValueasInt_(header, 3); if (data_mode != 2) { Log.error() << "CCP4File::readHeader(): Corrupt CCP4 header: data mode not supported, only 32-bit float supported" << std::endl; return false; } } //check if file claims to have symmetry reocrds stored Size size_of_symops = readBinValueasInt_(header, 23); if (size_of_symops != 0) { offset_symops_ = size_of_symops; } // check internal ordering of coordinate axis col_axis_ = readBinValueasInt_(header, 16)-1; row_axis_ = readBinValueasInt_(header, 17)-1; sec_axis_ = readBinValueasInt_(header, 18)-1; extent_.x = (float)readBinValueasInt_(header, 0+col_axis_); extent_.y = (float)readBinValueasInt_(header, 0+row_axis_); extent_.z = (float)readBinValueasInt_(header, 0+sec_axis_); start_.x = (float)readBinValueasInt_(header, 4+col_axis_); start_.y = (float)readBinValueasInt_(header, 4+row_axis_); start_.z = (float)readBinValueasInt_(header, 4+sec_axis_); sampling_rate_.x = (float)readBinValueasInt_(header, 7); sampling_rate_.y = (float)readBinValueasInt_(header, 8); sampling_rate_.z = (float)readBinValueasInt_(header, 9); cell_dimension_.x = readBinValueasFloat_(header, 10); cell_dimension_.y = readBinValueasFloat_(header, 11); cell_dimension_.z = readBinValueasFloat_(header, 12); // Angle values of 0 don't make sense, set the Angles to 90 deg if ( readBinValueasFloat_(header, 13) == 0 || readBinValueasFloat_(header, 14) == 0 || readBinValueasFloat_(header, 15) == 0) { alpha_ = Angle(90.,false); beta_ = Angle(90.,false); gamma_ = Angle(90.,false); } else { alpha_ = Angle(readBinValueasFloat_(header, 13),false); beta_ = Angle(readBinValueasFloat_(header, 14),false); gamma_ = Angle(readBinValueasFloat_(header, 15),false); } mean_density_ = readBinValueasFloat_(header, 21); space_group_ = readBinValueasInt_(header, 22); deviation_sigma_ = readBinValueasFloat_(header, 54); Log.info() << "Mean from file: " << mean_density_ << std::endl; Log.info() << "Sigma from file: " << deviation_sigma_ << std::endl; // convert from grid space to cartesian coordinates Vector3 scaled_axes(cell_dimension_.x/sampling_rate_.x, cell_dimension_.y/sampling_rate_.y, cell_dimension_.z/sampling_rate_.z); Vector3 x_tmp(scaled_axes.x, 0., 0.); Vector3 y_tmp(cos(gamma_.toRadian()), sin(gamma_.toRadian()), 0.); y_tmp *= scaled_axes.y; Vector3 z_tmp( cos(beta_.toRadian()), (cos(alpha_.toRadian()) - cos(beta_.toRadian())*cos(gamma_.toRadian())) / sin(gamma_.toRadian()), 0.); z_tmp.z = sqrt(1.0 - z_tmp.x*z_tmp.x - z_tmp.y*z_tmp.y); z_tmp *= scaled_axes.z; origin_.x = x_tmp.x * start_.x + y_tmp.x * start_.y + z_tmp.x * start_.z; origin_.y = y_tmp.y * start_.y + z_tmp.y * start_.z; origin_.z = z_tmp.z * start_.z; xaxis_.x = x_tmp.x * (extent_.x - 1); xaxis_.y = 0.; xaxis_.z = 0.; yaxis_.x = y_tmp.x * (extent_.y - 1); yaxis_.y = y_tmp.y * (extent_.y - 1); yaxis_.z = 0.; zaxis_.x = z_tmp.x * (extent_.z - 1); zaxis_.y = z_tmp.y * (extent_.z - 1); zaxis_.z = z_tmp.z * (extent_.z - 1); // that's it. we're done return true; }