void DenseMatrix<T>::_matvec_blas(T alpha, T beta, DenseVector<T>& dest, const DenseVector<T>& arg, bool trans) const { // Ensure that dest and arg sizes are compatible if (!trans) { // dest ~ A * arg // (mx1) (mxn) * (nx1) if ((dest.size() != this->m()) || (arg.size() != this->n())) { libMesh::out << "Improper input argument sizes!" << std::endl; libmesh_error(); } } else // trans == true { // Ensure that dest and arg are proper size // dest ~ A^T * arg // (nx1) (nxm) * (mx1) if ((dest.size() != this->n()) || (arg.size() != this->m())) { libMesh::out << "Improper input argument sizes!" << std::endl; libmesh_error(); } } // Calling sequence for dgemv: // // dgemv(TRANS,M,N,ALPHA,A,LDA,X,INCX,BETA,Y,INCY) // TRANS - CHARACTER*1, 't' for transpose, 'n' for non-transpose multiply // We store everything in row-major order, so pass the transpose flag for // non-transposed matvecs and the 'n' flag for transposed matvecs char TRANS[] = "t"; if (trans) TRANS[0] = 'n'; // M - INTEGER. // On entry, M specifies the number of rows of the matrix A. // In C/C++, pass the number of *cols* of A int M = this->n(); // N - INTEGER. // On entry, N specifies the number of columns of the matrix A. // In C/C++, pass the number of *rows* of A int N = this->m(); // ALPHA - DOUBLE PRECISION. // The scalar constant passed to this function // A - DOUBLE PRECISION array of DIMENSION ( LDA, n ). // Before entry, the leading m by n part of the array A must // contain the matrix of coefficients. // The matrix, *this. Note that _matvec_blas is called from // a const function, vector_mult(), and so we have made this function const // as well. Since BLAS knows nothing about const, we have to cast it away // now. DenseMatrix<T>& a_ref = const_cast< DenseMatrix<T>& > ( *this ); std::vector<T>& a = a_ref.get_values(); // LDA - INTEGER. // On entry, LDA specifies the first dimension of A as declared // in the calling (sub) program. LDA must be at least // max( 1, m ). int LDA = M; // X - DOUBLE PRECISION array of DIMENSION at least // ( 1 + ( n - 1 )*abs( INCX ) ) when TRANS = 'N' or 'n' // and at least // ( 1 + ( m - 1 )*abs( INCX ) ) otherwise. // Before entry, the incremented array X must contain the // vector x. // Here, we must cast away the const-ness of "arg" since BLAS knows // nothing about const DenseVector<T>& x_ref = const_cast< DenseVector<T>& > ( arg ); std::vector<T>& x = x_ref.get_values(); // INCX - INTEGER. // On entry, INCX specifies the increment for the elements of // X. INCX must not be zero. int INCX = 1; // BETA - DOUBLE PRECISION. // On entry, BETA specifies the scalar beta. When BETA is // supplied as zero then Y need not be set on input. // The second scalar constant passed to this function // Y - DOUBLE PRECISION array of DIMENSION at least // ( 1 + ( m - 1 )*abs( INCY ) ) when TRANS = 'N' or 'n' // and at least // ( 1 + ( n - 1 )*abs( INCY ) ) otherwise. // Before entry with BETA non-zero, the incremented array Y // must contain the vector y. On exit, Y is overwritten by the // updated vector y. // The input vector "dest" std::vector<T>& y = dest.get_values(); // INCY - INTEGER. // On entry, INCY specifies the increment for the elements of // Y. INCY must not be zero. int INCY = 1; // Finally, ready to call the BLAS function BLASgemv_(TRANS, &M, &N, &alpha, &(a[0]), &LDA, &(x[0]), &INCX, &beta, &(y[0]), &INCY); }
void DenseMatrix<T>::_svd_lapack (DenseVector<Real> & sigma, DenseMatrix<Number> & U, DenseMatrix<Number> & VT) { // The calling sequence for dgetrf is: // DGESVD( JOBU, JOBVT, M, N, A, LDA, S, U, LDU, VT, LDVT, WORK, LWORK, INFO ) // JOBU (input) CHARACTER*1 // Specifies options for computing all or part of the matrix U: // = 'A': all M columns of U are returned in array U: // = 'S': the first min(m,n) columns of U (the left singular // vectors) are returned in the array U; // = 'O': the first min(m,n) columns of U (the left singular // vectors) are overwritten on the array A; // = 'N': no columns of U (no left singular vectors) are // computed. char JOBU = 'S'; // JOBVT (input) CHARACTER*1 // Specifies options for computing all or part of the matrix // V**T: // = 'A': all N rows of V**T are returned in the array VT; // = 'S': the first min(m,n) rows of V**T (the right singular // vectors) are returned in the array VT; // = 'O': the first min(m,n) rows of V**T (the right singular // vectors) are overwritten on the array A; // = 'N': no rows of V**T (no right singular vectors) are // computed. char JOBVT = 'S'; // Note: Lapack is going to compute the singular values of A^T. If // A=U * S * V^T, then A^T = V * S * U^T, which means that the // values returned in the "U_val" array actually correspond to the // entries of the V matrix, and the values returned in the VT_val // array actually correspond to the entries of U^T. Therefore, we // pass VT in the place of U and U in the place of VT below! std::vector<Real> sigma_val; int M = this->n(); int N = this->m(); int min_MN = (M < N) ? M : N; // Size user-provided storage appropriately. Inside svd_helper: // U_val is sized to (M x min_MN) // VT_val is sized to (min_MN x N) // So, we set up U to have the shape of "VT_val^T", and VT to // have the shape of "U_val^T". // // Finally, since the results are stored in column-major order by // Lapack, but we actually want the transpose of what Lapack // returns, this means (conveniently) that we don't even have to // copy anything after the call to _svd_helper, it should already be // in the correct order! U.resize(N, min_MN); VT.resize(min_MN, M); _svd_helper(JOBU, JOBVT, sigma_val, VT.get_values(), U.get_values()); // Copy the singular values into sigma. sigma.resize(cast_int<unsigned int>(sigma_val.size())); for (unsigned int i=0; i<sigma.size(); i++) sigma(i) = sigma_val[i]; }
void DenseMatrix<T>::_svd_solve_lapack(const DenseVector<T> & rhs, DenseVector<T> & x, Real rcond) const { // Since BLAS is expecting column-major storage, we first need to // make a transposed copy of *this, then pass it to the gelss // routine instead of the original. This extra copy is kind of a // bummer, it might be better if we could use the full SVD to // compute the least-squares solution instead... Note that it isn't // completely terrible either, since A_trans gets overwritten by // Lapack, and we usually would end up making a copy of A outside // the function call anyway. DenseMatrix<T> A_trans; this->get_transpose(A_trans); // M is INTEGER // The number of rows of the input matrix. M >= 0. // This is actually the number of *columns* of A_trans. int M = A_trans.n(); // N is INTEGER // The number of columns of the matrix A. N >= 0. // This is actually the number of *rows* of A_trans. int N = A_trans.m(); // We'll use the min and max of (M,N) several times below. int max_MN = std::max(M,N); int min_MN = std::min(M,N); // NRHS is INTEGER // The number of right hand sides, i.e., the number of columns // of the matrices B and X. NRHS >= 0. // This could later be generalized to solve for multiple right-hand // sides... int NRHS = 1; // A is DOUBLE PRECISION array, dimension (LDA,N) // On entry, the M-by-N matrix A. // On exit, the first min(m,n) rows of A are overwritten with // its right singular vectors, stored rowwise. // // The data vector that will be passed to Lapack. std::vector<T> & A_trans_vals = A_trans.get_values(); // LDA is INTEGER // The leading dimension of the array A. LDA >= max(1,M). int LDA = M; // B is DOUBLE PRECISION array, dimension (LDB,NRHS) // On entry, the M-by-NRHS right hand side matrix B. // On exit, B is overwritten by the N-by-NRHS solution // matrix X. If m >= n and RANK = n, the residual // sum-of-squares for the solution in the i-th column is given // by the sum of squares of elements n+1:m in that column. // // Since we don't want the user's rhs vector to be overwritten by // the solution, we copy the rhs values into the solution vector "x" // now. x needs to be long enough to hold both the (Nx1) solution // vector or the (Mx1) rhs, so size it to the max of those. x.resize(max_MN); for (unsigned i=0; i<rhs.size(); ++i) x(i) = rhs(i); // Make the syntax below simpler by grabbing a reference to this array. std::vector<T> & B = x.get_values(); // LDB is INTEGER // The leading dimension of the array B. LDB >= max(1,max(M,N)). int LDB = x.size(); // S is DOUBLE PRECISION array, dimension (min(M,N)) // The singular values of A in decreasing order. // The condition number of A in the 2-norm = S(1)/S(min(m,n)). std::vector<T> S(min_MN); // RCOND is DOUBLE PRECISION // RCOND is used to determine the effective rank of A. // Singular values S(i) <= RCOND*S(1) are treated as zero. // If RCOND < 0, machine precision is used instead. Real RCOND = rcond; // RANK is INTEGER // The effective rank of A, i.e., the number of singular values // which are greater than RCOND*S(1). int RANK = 0; // LWORK is INTEGER // The dimension of the array WORK. LWORK >= 1, and also: // LWORK >= 3*min(M,N) + max( 2*min(M,N), max(M,N), NRHS ) // For good performance, LWORK should generally be larger. // // If LWORK = -1, then a workspace query is assumed; the routine // only calculates the optimal size of the WORK array, returns // this value as the first entry of the WORK array, and no error // message related to LWORK is issued by XERBLA. // // The factor of 1.5 is arbitrary and is used to satisfy the "should // generally be larger" clause. int LWORK = 1.5 * (3*min_MN + std::max(2*min_MN, std::max(max_MN, NRHS))); // WORK is DOUBLE PRECISION array, dimension (MAX(1,LWORK)) // On exit, if INFO = 0, WORK(1) returns the optimal LWORK. std::vector<T> WORK(LWORK); // INFO is INTEGER // = 0: successful exit // < 0: if INFO = -i, the i-th argument had an illegal value. // > 0: the algorithm for computing the SVD failed to converge; // if INFO = i, i off-diagonal elements of an intermediate // bidiagonal form did not converge to zero. int INFO = 0; // LAPACKgelss_(const PetscBLASInt *, // M // const PetscBLASInt *, // N // const PetscBLASInt *, // NRHS // PetscScalar *, // A // const PetscBLASInt *, // LDA // PetscScalar *, // B // const PetscBLASInt *, // LDB // PetscReal *, // S(out) = singular values of A in increasing order // const PetscReal *, // RCOND = tolerance for singular values // PetscBLASInt *, // RANK(out) = number of "non-zero" singular values // PetscScalar *, // WORK // const PetscBLASInt *, // LWORK // PetscBLASInt *); // INFO LAPACKgelss_(&M, &N, &NRHS, &A_trans_vals[0], &LDA, &B[0], &LDB, &S[0], &RCOND, &RANK, &WORK[0], &LWORK, &INFO); // Check for errors in the Lapack call if (INFO < 0) libmesh_error_msg("Error, argument " << -INFO << " to LAPACKgelss_ had an illegal value."); if (INFO > 0) libmesh_error_msg("The algorithm for computing the SVD failed to converge!"); // Debugging: print singular values and information about condition number: // libMesh::err << "RCOND=" << RCOND << std::endl; // libMesh::err << "Singular values: " << std::endl; // for (unsigned i=0; i<S.size(); ++i) // libMesh::err << S[i] << std::endl; // libMesh::err << "The condition number of A is approximately: " << S[0]/S.back() << std::endl; // Lapack has already written the solution into B, but it will be // the wrong size for non-square problems, so we need to resize it // correctly. The size of the solution vector should be the number // of columns of the original A matrix. Unfortunately, resizing a // DenseVector currently also zeros it out (unlike a std::vector) so // we'll resize the underlying storage directly (the size is not // stored independently elsewhere). x.get_values().resize(this->n()); }
//-------------------------------------------------------- // apply_charged_surfaces //-------------------------------------------------------- void ChargeRegulatorMethodEffectiveCharge::apply_local_forces() { double * q = lammpsInterface_->atom_charge(); _atomElectricalForce_.resize(nlocal(),nsd_); double penalty = poissonSolver_->penalty_coefficient(); if (penalty <= 0.0) throw ATC_Error("ExtrinsicModelElectrostatic::apply_charged_surfaces expecting non zero penalty"); double dx[3]; const DENS_MAT & xa((interscaleManager_->per_atom_quantity("AtomicCoarseGrainingPositions"))->quantity()); // WORKSPACE - most are static SparseVector<double> dv(nNodes_); vector<SparseVector<double> > derivativeVectors; derivativeVectors.reserve(nsd_); const SPAR_MAT_VEC & shapeFunctionDerivatives((interscaleManager_->vector_sparse_matrix("InterpolateGradient"))->quantity()); DenseVector<INDEX> nodeIndices; DENS_VEC nodeValues; NODE_TO_XF_MAP::const_iterator inode; for (inode = nodeXFMap_.begin(); inode != nodeXFMap_.end(); inode++) { int node = inode->first; DENS_VEC xI = (inode->second).first; double qI = (inode->second).second; double phiI = nodalChargePotential_[node]; for (int i = 0; i < nlocal(); i++) { int atom = (atc_->internal_to_atom_map())(i); double qa = q[atom]; if (qa != 0) { double dxSq = 0.; for (int j = 0; j < nsd_; j++) { dx[j] = xa(i,j) - xI(j); dxSq += dx[j]*dx[j]; } if (dxSq < rCsq_) { // first apply pairwise coulombic interaction if (!useSlab_) { double coulForce = qqrd2e_*qI*qa/(dxSq*sqrtf(dxSq)); for (int j = 0; j < nsd_; j++) { _atomElectricalForce_(i,j) += dx[j]*coulForce; } } // second correct for FE potential induced by BCs // determine shape function derivatives at atomic location // and construct sparse vectors to store derivative data for (int j = 0; j < nsd_; j++) { shapeFunctionDerivatives[j]->row(i,nodeValues,nodeIndices); derivativeVectors.push_back(dv); for (int k = 0; k < nodeIndices.size(); k++) { derivativeVectors[j](nodeIndices(k)) = nodeValues(k); } } // compute greens function from charge quadrature SparseVector<double> shortFePotential(nNodes_); shortFePotential.add_scaled(greensFunctions_[node],penalty*phiI); // compute electric field induced by charge DENS_VEC efield(nsd_); for (int j = 0; j < nsd_; j++) { efield(j) = -.1*dot(derivativeVectors[j],shortFePotential); } // apply correction in atomic forces double c = qV2e_*qa; for (int j = 0; j < nsd_; j++) { if ((!useSlab_) || (j==nsd_)) { _atomElectricalForce_(i,j) -= c*efield(j); } } } } } } }
Real ExactErrorEstimator::find_squared_element_error(const System& system, const std::string& var_name, const Elem *elem, const DenseVector<Number> &Uelem, FEBase *fe, MeshFunction *fine_values) const { // The (string) name of this system const std::string& sys_name = system.name(); const unsigned int sys_num = system.number(); const unsigned int var = system.variable_number(var_name); const unsigned int var_component = system.variable_scalar_number(var, 0); const Parameters& parameters = system.get_equation_systems().parameters; // reinitialize the element-specific data // for the current element fe->reinit (elem); // Get the data we need to compute with const std::vector<Real> & JxW = fe->get_JxW(); const std::vector<std::vector<Real> >& phi_values = fe->get_phi(); const std::vector<std::vector<RealGradient> >& dphi_values = fe->get_dphi(); const std::vector<Point>& q_point = fe->get_xyz(); #ifdef LIBMESH_ENABLE_SECOND_DERIVATIVES const std::vector<std::vector<RealTensor> >& d2phi_values = fe->get_d2phi(); #endif // The number of shape functions const unsigned int n_sf = libmesh_cast_int<unsigned int>(Uelem.size()); // The number of quadrature points const unsigned int n_qp = libmesh_cast_int<unsigned int>(JxW.size()); Real error_val = 0; // Begin the loop over the Quadrature points. // for (unsigned int qp=0; qp<n_qp; qp++) { // Real u_h = 0.; // RealGradient grad_u_h; Number u_h = 0.; Gradient grad_u_h; #ifdef LIBMESH_ENABLE_SECOND_DERIVATIVES Tensor grad2_u_h; #endif // Compute solution values at the current // quadrature point. This reqiures a sum // over all the shape functions evaluated // at the quadrature point. for (unsigned int i=0; i<n_sf; i++) { // Values from current solution. u_h += phi_values[i][qp]*Uelem(i); grad_u_h += dphi_values[i][qp]*Uelem(i); #ifdef LIBMESH_ENABLE_SECOND_DERIVATIVES grad2_u_h += d2phi_values[i][qp]*Uelem(i); #endif } // Compute the value of the error at this quadrature point if (error_norm.type(var) == L2 || error_norm.type(var) == H1 || error_norm.type(var) == H2) { Number val_error = u_h; if (_exact_value) val_error -= _exact_value(q_point[qp],parameters,sys_name,var_name); else if (_exact_values.size() > sys_num && _exact_values[sys_num]) val_error -= _exact_values[sys_num]-> component(var_component, q_point[qp], system.time); else if (_equation_systems_fine) val_error -= (*fine_values)(q_point[qp]); // Add the squares of the error to each contribution error_val += JxW[qp]*TensorTools::norm_sq(val_error); } // Compute the value of the error in the gradient at this // quadrature point if (error_norm.type(var) == H1 || error_norm.type(var) == H1_SEMINORM || error_norm.type(var) == H2) { Gradient grad_error = grad_u_h; if(_exact_deriv) grad_error -= _exact_deriv(q_point[qp],parameters,sys_name,var_name); else if (_exact_derivs.size() > sys_num && _exact_derivs[sys_num]) grad_error -= _exact_derivs[sys_num]-> component(var_component, q_point[qp], system.time); else if(_equation_systems_fine) grad_error -= fine_values->gradient(q_point[qp]); error_val += JxW[qp]*grad_error.size_sq(); } #ifdef LIBMESH_ENABLE_SECOND_DERIVATIVES // Compute the value of the error in the hessian at this // quadrature point if ((error_norm.type(var) == H2_SEMINORM || error_norm.type(var) == H2)) { Tensor grad2_error = grad2_u_h; if(_exact_hessian) grad2_error -= _exact_hessian(q_point[qp],parameters,sys_name,var_name); else if (_exact_hessians.size() > sys_num && _exact_hessians[sys_num]) grad2_error -= _exact_hessians[sys_num]-> component(var_component, q_point[qp], system.time); else if (_equation_systems_fine) grad2_error -= fine_values->hessian(q_point[qp]); error_val += JxW[qp]*grad2_error.size_sq(); } #endif } // end qp loop libmesh_assert_greater_equal (error_val, 0.); return error_val; }
bool LineSearcher::MoreThuenteLineSearch( DenseVector ¶m, DenseVector &direc, DenseVector &grad, double finit, double &stepsize, std::function<double(DenseVector &, DenseVector &)> &funcgrad) { itercnt_ = 0; int brackt, stage1, uinfo = 0; double dg; double stx, fx, dgx; double sty, fy, dgy; double fxm, dgxm, fym, dgym, fm, dgm; double ftest1, dginit, dgtest; double width, prev_width; double stmin, stmax; double fval; if (stepsize < 0) { LOG(FATAL) << "Stepsize less than 0"; return false; } dginit = direc.dot(grad); if (dginit > 0) { LOG(FATAL) << "Direction not decent"; return false; } if (tparam_.size() != param.size()) { tparam_.resize(param.size()); } /* Initialize local variables. */ brackt = 0; stage1 = 1; dgtest = alpha_ * dginit; width = maxstep_ - minstep_; prev_width = 2.0 * width; stx = sty = 0.; fx = fy = finit; dgx = dgy = dginit; while (itercnt_ < maxtries_) { /* Set the minimum and maximum steps to correspond to the present interval of uncertainty. */ if (brackt) { stmin = std::min(stx, sty); stmax = std::min(stx, sty); } else { stmin = stx; stmax = stepsize + 4.0 * (stepsize - stx); } /* Clip the step in the range of [minstep_, maxstep_]. */ if (stepsize < minstep_) stepsize = minstep_; if (stepsize > maxstep_) stepsize = maxstep_; /* If an unusual termination is to occur then let stepsize be the lowest point obtained so far. */ if ((brackt && ((stepsize <= stmin || stepsize >= stmax) || (itercnt_ + 1 >= maxtries_) || uinfo != 0)) || (brackt && (stmax - stmin <= parameps_ * stmax))) { stepsize = stx; } tparam_ = param + stepsize * direc; fval = funcgrad(tparam_, grad); dg = grad.dot(direc); ftest1 = finit + stepsize * dgtest; ++itercnt_; /* Test for errors and convergence. */ if (brackt && ((stepsize <= stmin || stmax <= stepsize) || uinfo != 0)) { /* Rounding errors prevent further progress. */ return false; } if (stepsize == maxstep_ && fval <= ftest1 && dg <= dgtest) { /* The step is the maximum value. */ return false; } if (stepsize == minstep_ && (ftest1 < fval || dgtest <= dg)) { /* The step is the minimum value. */ return false; } if (brackt && (stmax - stmin) <= parameps_ * stmax) { /* Relative width of the interval of uncertainty is at most xtol. */ return false; } if (maxtries_ <= itercnt_) { /* Maximum number of iteration. */ return false; } if (fval <= ftest1 && std::fabs(dg) <= beta_ * (-dginit)) { /* The sufficient decrease condition and the directional derivative * condition hold. */ param.swap(tparam_); return true; } /* In the first stage we seek a step for which the modified function has a nonpositive value and nonnegative derivative. */ if (stage1 && fval <= ftest1 && std::min(alpha_, beta_) * dginit <= dg) { stage1 = 0; } /* A modified function is used to predict the step only if we have not obtained a step for which the modified function has a nonpositive function value and nonnegative derivative, and if a lower function value has been obtained but the decrease is not sufficient. */ if (stage1 && ftest1 < fval && fval <= fx) { /* Define the modified function and derivative values. */ fm = fval - stepsize * dgtest; fxm = fx - stx * dgtest; fym = fy - sty * dgtest; dgm = dg - dgtest; dgxm = dgx - dgtest; dgym = dgy - dgtest; /* Call update_trial_interval() to update the interval of uncertainty and to compute the new step. */ uinfo = update_trial_interval(&stx, &fxm, &dgxm, &sty, &fym, &dgym, &stepsize, &fm, &dgm, stmin, stmax, &brackt); /* Reset the function and gradient values for f. */ fx = fxm + stx * dgtest; fy = fym + sty * dgtest; dgx = dgxm + dgtest; dgy = dgym + dgtest; } else { /* Call update_trial_interval() to update the interval of uncertainty and to compute the new step. */ uinfo = update_trial_interval(&stx, &fx, &dgx, &sty, &fy, &dgy, &stepsize, &fval, &dg, stmin, stmax, &brackt); } /* Force a sufficient decrease in the interval of uncertainty. */ if (brackt) { if (0.66 * prev_width <= fabs(sty - stx)) { stepsize = stx + 0.5 * (sty - stx); } prev_width = width; width = std::fabs(sty - stx); } } return false; }
bool LineSearcher::BackTrackLineSearch( DenseVector ¶m, DenseVector &direc, DenseVector &grad, double finit, double &stepsize, std::function<double(DenseVector &, DenseVector &)> &funcgrad) { itercnt_ = 0; double stepupdate; double dginit = direc.dot(grad), dgtest, fval, dgval; const double stepshrink = 0.5, stepexpand = 2.1; if (dginit > 0) { LOG(FATAL) << "initial direction is not a decent direction"; return false; } if (tparam_.size() != param.size()) { tparam_.resize(param.size()); } dgtest = dginit * alpha_; while (itercnt_ < maxtries_) { tparam_ = param + stepsize * direc; fval = funcgrad(tparam_, grad); if (fval > finit + stepsize * dgtest) { stepupdate = stepshrink; } else { if (lscondtype_ == LineSearchConditionType::Armijo) { break; } dgval = direc.dot(grad); if (dgval < beta_ * dginit) { stepupdate = stepexpand; } else { if (lscondtype_ == LineSearchConditionType::Wolfe) { break; } if (dgval > -beta_ * dginit) { stepupdate = stepshrink; } else { break; } } } if (stepsize < minstep_) { LOG(ERROR) << "Small than smallest step size"; return false; } if (stepsize > maxstep_) { LOG(ERROR) << "Large than largest step size"; return false; } stepsize *= stepupdate; ++itercnt_; } if (itercnt_ >= maxtries_) { LOG(ERROR) << "Exceed Maximum number of iteration count"; return false; } param.swap(tparam_); return true; }