void
LOCA::TurningPoint::MooreSpence::ExtendedGroup::lTransNorm(
			const NOX::Abstract::MultiVector& n,
			NOX::Abstract::MultiVector::DenseMatrix& result) const
{
  n.multiply(1.0 / lengthVec->length(), *lengthMultiVec, result);
}
void
LOCA::BorderedSolver::HouseholderQR::applyCompactWY(
                const NOX::Abstract::MultiVector::DenseMatrix& Y1,
                const NOX::Abstract::MultiVector& Y2,
                const NOX::Abstract::MultiVector::DenseMatrix& T,
                NOX::Abstract::MultiVector::DenseMatrix& X1,
                NOX::Abstract::MultiVector& X2,
                bool isZeroX1, bool isZeroX2,
                bool useTranspose) const
{
  if (isZeroX1 && isZeroX2) {
    X1.putScalar(0.0);
    X2.init(0.0);
    return;
  }

  int m = Y2.numVectors();

  Teuchos::ETransp T_flag;
  if (useTranspose)
    T_flag = Teuchos::TRANS;
  else
    T_flag = Teuchos::NO_TRANS;

  NOX::Abstract::MultiVector::DenseMatrix tmp(m, X2.numVectors());

  // Compute Y1^T*X1 + Y2^T*X2
  if (!isZeroX2)
    X2.multiply(1.0, Y2, tmp);

  // Opportunity for optimization here since Y1 is a lower-triangular
  // matrix with unit diagonal
  if (!isZeroX2 && !isZeroX1)
    tmp.multiply(Teuchos::TRANS, Teuchos::NO_TRANS, 1.0, Y1, X1, 1.0);
  else if (!isZeroX1)
    tmp.multiply(Teuchos::TRANS, Teuchos::NO_TRANS, 1.0, Y1, X1, 0.0);

  // Compute op(T)*(Y1^T*X1 + Y2^T*X2)
  dblas.TRMM(Teuchos::LEFT_SIDE, Teuchos::UPPER_TRI, T_flag,
         Teuchos::NON_UNIT_DIAG, tmp.numRows(), tmp.numCols(), 1.0,
         T.values(), T.numRows(), tmp.values(), tmp.numRows());

  // Compute X1 = X1 + Y1*op(T)*(Y1^T*X1 + Y2^T*X2)
  // Opportunity for optimization here since Y1 is a lower-triangular
  // matrix with unit diagonal
  if (isZeroX1)
    X1.multiply(Teuchos::NO_TRANS, Teuchos::NO_TRANS, 1.0, Y1, tmp, 0.0);
  else
    X1.multiply(Teuchos::NO_TRANS, Teuchos::NO_TRANS, 1.0, Y1, tmp, 1.0);

  // Compute X2 = X2 + Y1*op(T)*(Y1^T*X1 + Y2^T*X2)
  if (isZeroX2)
    X2.update(Teuchos::NO_TRANS, 1.0, Y2, tmp, 0.0);
  else
    X2.update(Teuchos::NO_TRANS, 1.0, Y2, tmp, 1.0);
}
NOX::Abstract::Group::ReturnType
LOCA::MultiContinuation::ConstraintInterfaceMVDX::multiplyDX(
               double alpha,
               const NOX::Abstract::MultiVector& input_x,
               NOX::Abstract::MultiVector::DenseMatrix& result_p) const
{


  if (!isDXZero()) {
    const NOX::Abstract::MultiVector* dgdx = getDX();
    input_x.multiply(alpha, *dgdx, result_p);
  }
  else
    result_p.putScalar(0.0);

  return NOX::Abstract::Group::Ok;
}
void
LOCA::BorderedSolver::HouseholderQR::applyHouseholderVector(
               const NOX::Abstract::MultiVector::DenseMatrix& V1,
               const NOX::Abstract::MultiVector& V2,
               double beta,
               NOX::Abstract::MultiVector::DenseMatrix& A1,
               NOX::Abstract::MultiVector& A2)
{
  int nColsA = A2.numVectors();

  // Compute u = V2^T * A2
  NOX::Abstract::MultiVector::DenseMatrix u(1, nColsA);
  A2.multiply(1.0, V2, u);

  // Compute u = u + V1^T * A_P
  u.multiply(Teuchos::TRANS, Teuchos::NO_TRANS, 1.0, V1, A1, 1.0);

  // Compute A1 = A1 - b*V1*u
  A1.multiply(Teuchos::NO_TRANS, Teuchos::NO_TRANS, -beta, V1, u, 1.0);

  // Compute A2 = A2 - b*V2*u
  A2.update(Teuchos::NO_TRANS, -beta, V2, u, 1.0);
}
// Solves turning point equations via classic Salinger bordering
// The first m columns of input_x and input_null store the RHS while
// the last column stores df/dp, d(Jn)/dp respectively.  Note however
// input_param has only m columns (not m+1).  result_x, result_null,
// are result_param have the same dimensions as their input counterparts
NOX::Abstract::Group::ReturnType 
LOCA::TurningPoint::MooreSpence::PhippsBordering::solveTransposeContiguous(
		  Teuchos::ParameterList& params,
		  const NOX::Abstract::MultiVector& input_x,
		  const NOX::Abstract::MultiVector& input_null,
	          const NOX::Abstract::MultiVector::DenseMatrix& input_param,
		  NOX::Abstract::MultiVector& result_x,
		  NOX::Abstract::MultiVector& result_null,
	          NOX::Abstract::MultiVector::DenseMatrix& result_param) const
{
  std::string callingFunction = 
    "LOCA::TurningPoint::MooreSpence::PhippsBordering::solveTransposeContiguous()";
  NOX::Abstract::Group::ReturnType finalStatus = NOX::Abstract::Group::Ok;
  NOX::Abstract::Group::ReturnType status;

  int m = input_x.numVectors()-2;
  std::vector<int> index_input(m);
  std::vector<int> index_input_dp(m+1);
  std::vector<int> index_null(1);
  std::vector<int> index_dp(1);
  for (int i=0; i<m; i++) {
    index_input[i] = i;
    index_input_dp[i] = i;
  }
  index_input_dp[m] = m;
  index_dp[0] = m;
  index_null[0] = m+1;

  NOX::Abstract::MultiVector::DenseMatrix tmp_mat_1(1, m+1);
  NOX::Abstract::MultiVector::DenseMatrix tmp_mat_2(1, m+2);

  // Create view of first m+1 columns of input_null, result_null
  Teuchos::RCP<NOX::Abstract::MultiVector> input_null_view = 
      input_null.subView(index_input_dp);
  Teuchos::RCP<NOX::Abstract::MultiVector> result_null_view = 
      result_null.subView(index_input_dp);

  // verify underlying Jacobian is valid
  if (!group->isJacobian()) {
    status = group->computeJacobian();
    finalStatus = 
      globalData->locaErrorCheck->combineAndCheckReturnTypes(status, 
							     finalStatus,
							     callingFunction);
  }

  // Solve  |J^T v||A B| = |G -phi|
  //        |u^T 0||a b|   |0   0 |
  status =
    transposeBorderedSolver->applyInverseTranspose(params, 
						   input_null_view.get(), 
						   NULL, 
						   *result_null_view, 
						   tmp_mat_1);
  finalStatus = 
    globalData->locaErrorCheck->combineAndCheckReturnTypes(status, finalStatus,
							   callingFunction);
  Teuchos::RCP<NOX::Abstract::MultiVector> A = 
    result_null.subView(index_input);
  Teuchos::RCP<NOX::Abstract::MultiVector> B = 
    result_null.subView(index_dp);
  double b = tmp_mat_1(0,m);

  // compute (Jv)_x^T[A B u]
  result_null[m+1] = *uVector;
  Teuchos::RCP<NOX::Abstract::MultiVector> tmp = 
    result_null.clone(NOX::ShapeCopy);
  status = group->computeDwtJnDxMulti(result_null, *nullVector, *tmp);
  finalStatus = 
    globalData->locaErrorCheck->combineAndCheckReturnTypes(status, 
							   finalStatus,
							   callingFunction);

  // compute [F 0 0] - (Jv)_x^T[A B u]
  tmp->update(1.0, input_x, -1.0);

  // verify underlying Jacobian is valid
  if (!group->isJacobian()) {
    status = group->computeJacobian();
    finalStatus = 
      globalData->locaErrorCheck->combineAndCheckReturnTypes(status, 
							     finalStatus,
							     callingFunction);
  }

  // Solve  |J^T v||C D E| = |F - (Jv)_x^T A  -(Jv)_x^T B  -(Jv)_x^T u|
  //        |u^T 0||c d e|   |         0             0            0   |
  status = 
    transposeBorderedSolver->applyInverseTranspose(params, 
						   tmp.get(), 
						   NULL, 
						   result_x,
						   tmp_mat_2);
  finalStatus = 
    globalData->locaErrorCheck->combineAndCheckReturnTypes(status, finalStatus,
							   callingFunction);
  Teuchos::RCP<NOX::Abstract::MultiVector> C = 
    result_x.subView(index_input);
  Teuchos::RCP<NOX::Abstract::MultiVector> D = 
    result_x.subView(index_dp);
  Teuchos::RCP<NOX::Abstract::MultiVector> E = 
    result_x.subView(index_null);
  double d = tmp_mat_2(0, m);
  double e = tmp_mat_2(0, m+1);

  // compute (Jv)_p^T*[A B u]
  NOX::Abstract::MultiVector::DenseMatrix t1(1,m+2);
  result_null.multiply(1.0, *dJndp, t1);

  // compute f_p^T*[C D E]
  NOX::Abstract::MultiVector::DenseMatrix t2(1,m+2);
  result_x.multiply(1.0, *dfdp, t2);

  // compute f_p^T*u
  double fptu = uVector->innerProduct((*dfdp)[0]);

  // Fill coefficient arrays
  double M[9];
  M[0] = st;   M[1] =  -e;   M[2] = t1(0,m+1) + t2(0,m+1);
  M[3] = 0.0;  M[4] =   st;  M[5] = fptu;
  M[6] = -b;   M[7] =  -d;   M[8] = t1(0,m) + t2(0,m);

  // Compute RHS
  double *R = new double[3*m];
  for (int i=0; i<m; i++) {
    R[3*i]   = tmp_mat_1(0,i);
    R[3*i+1] = tmp_mat_2(0,i);
    R[3*i+2] = result_param(0,i) - t1(0,i) - t2(0,i);
  }

  // Solve M*P = R
  int three = 3;
  int piv[3];
  int info;
  Teuchos::LAPACK<int,double> L;
  L.GESV(three, m, M, three, piv, R, three, &info);
  if (info != 0) {
    globalData->locaErrorCheck->throwError(
				    callingFunction,
				    "Solve of 3x3 coefficient matrix failed!");
    return NOX::Abstract::Group::Failed;
  }

  NOX::Abstract::MultiVector::DenseMatrix alpha(1,m);
  NOX::Abstract::MultiVector::DenseMatrix beta(1,m);
  for (int i=0; i<m; i++) {
    alpha(0,i)        = R[3*i];
    beta(0,i)         = R[3*i+1];
    result_param(0,i) = R[3*i+2];
  }

  // compute A = A + B*z + alpha*u (remember A is a sub-view of result_null)
  A->update(Teuchos::NO_TRANS, 1.0, *B, result_param, 1.0);
  A->update(Teuchos::NO_TRANS, 1.0, *uMultiVector, alpha, 1.0);

  // compute C = C + D*z + alpha*E + beta*u 
  // (remember C is a sub-view of result_x)
  C->update(Teuchos::NO_TRANS, 1.0, *D, result_param, 1.0);
  C->update(Teuchos::NO_TRANS, 1.0, *E, alpha, 1.0);
  C->update(Teuchos::NO_TRANS, 1.0, *uMultiVector, beta, 1.0);

  delete [] R;

  return finalStatus;
}