bool q_ini_bsplinebasis_3D(const long n,Matrix &BxBxB) { if(n<=0) { printf("ERROR: n should > 0!\n"); return false; } Matrix B(4,4); B.row(1) << -1 << 3 <<-3 << 1; B.row(2) << 3 <<-6 << 3 << 0; B.row(3) << -3 << 0 << 3 << 0; B.row(4) << 1 << 4 << 1 << 0; B/=6.0; Matrix T(n,4); double t_step=1.0/n; for(long i=0;i<n;i++) { double t=t_step*i; for(long j=0;j<=3;j++) T(i+1,j+1)=pow(t,3-j); } Matrix TB=T*B; Matrix BxB=KP(TB,TB); BxBxB=KP(BxB,TB); return true; }
void FORTE_FB_FT_PIDWL::setInitialValues(){ KP() = 1.0; TN() = 1.0; TV() = 1.0; LIM_L() = -1.0E38; LIM_H() = 1.0E38; }
int BlockDACG::reSolve(int numEigen, Epetra_MultiVector &Q, double *lambda, int startingEV) { // Computes the smallest eigenvalues and the corresponding eigenvectors // of the generalized eigenvalue problem // // K X = M X Lambda // // using a Block Deflation Accelerated Conjugate Gradient algorithm. // // Note that if M is not specified, then K X = X Lambda is solved. // // Ref: P. Arbenz & R. Lehoucq, "A comparison of algorithms for modal analysis in the // absence of a sparse direct method", SNL, Technical Report SAND2003-1028J // With the notations of this report, the coefficient beta is defined as // diag( H^T_{k} G_{k} ) / diag( H^T_{k-1} G_{k-1} ) // // Input variables: // // numEigen (integer) = Number of eigenmodes requested // // Q (Epetra_MultiVector) = Converged eigenvectors // The number of columns of Q must be equal to numEigen + blockSize. // The rows of Q are distributed across processors. // At exit, the first numEigen columns contain the eigenvectors requested. // // lambda (array of doubles) = Converged eigenvalues // At input, it must be of size numEigen + blockSize. // At exit, the first numEigen locations contain the eigenvalues requested. // // startingEV (integer) = Number of existing converged eigenmodes // // Return information on status of computation // // info >= 0 >> Number of converged eigenpairs at the end of computation // // // Failure due to input arguments // // info = - 1 >> The stiffness matrix K has not been specified. // info = - 2 >> The maps for the matrix K and the matrix M differ. // info = - 3 >> The maps for the matrix K and the preconditioner P differ. // info = - 4 >> The maps for the vectors and the matrix K differ. // info = - 5 >> Q is too small for the number of eigenvalues requested. // info = - 6 >> Q is too small for the computation parameters. // // info = - 10 >> Failure during the mass orthonormalization // // info = - 20 >> Error in LAPACK during the local eigensolve // // info = - 30 >> MEMORY // // Check the input parameters if (numEigen <= startingEV) { return startingEV; } int info = myVerify.inputArguments(numEigen, K, M, Prec, Q, numEigen + blockSize); if (info < 0) return info; int myPid = MyComm.MyPID(); // Get the weight for approximating the M-inverse norm Epetra_Vector *vectWeight = 0; if (normWeight) { vectWeight = new Epetra_Vector(View, Q.Map(), normWeight); } int knownEV = startingEV; int localVerbose = verbose*(myPid==0); // Define local block vectors // // MX = Working vectors (storing M*X if M is specified, else pointing to X) // KX = Working vectors (storing K*X) // // R = Residuals // // H = Preconditioned residuals // // P = Search directions // MP = Working vectors (storing M*P if M is specified, else pointing to P) // KP = Working vectors (storing K*P) int xr = Q.MyLength(); Epetra_MultiVector X(View, Q, numEigen, blockSize); X.Random(); int tmp; tmp = (M == 0) ? 5*blockSize*xr : 7*blockSize*xr; double *work1 = new (nothrow) double[tmp]; if (work1 == 0) { if (vectWeight) delete vectWeight; info = -30; return info; } memRequested += sizeof(double)*tmp/(1024.0*1024.0); highMem = (highMem > currentSize()) ? highMem : currentSize(); double *tmpD = work1; Epetra_MultiVector KX(View, Q.Map(), tmpD, xr, blockSize); tmpD = tmpD + xr*blockSize; Epetra_MultiVector MX(View, Q.Map(), (M) ? tmpD : X.Values(), xr, blockSize); tmpD = (M) ? tmpD + xr*blockSize : tmpD; Epetra_MultiVector R(View, Q.Map(), tmpD, xr, blockSize); tmpD = tmpD + xr*blockSize; Epetra_MultiVector H(View, Q.Map(), tmpD, xr, blockSize); tmpD = tmpD + xr*blockSize; Epetra_MultiVector P(View, Q.Map(), tmpD, xr, blockSize); tmpD = tmpD + xr*blockSize; Epetra_MultiVector KP(View, Q.Map(), tmpD, xr, blockSize); tmpD = tmpD + xr*blockSize; Epetra_MultiVector MP(View, Q.Map(), (M) ? tmpD : P.Values(), xr, blockSize); // Define arrays // // theta = Store the local eigenvalues (size: 2*blockSize) // normR = Store the norm of residuals (size: blockSize) // // oldHtR = Store the previous H_i^T*R_i (size: blockSize) // currentHtR = Store the current H_i^T*R_i (size: blockSize) // // MM = Local mass matrix (size: 2*blockSize x 2*blockSize) // KK = Local stiffness matrix (size: 2*blockSize x 2*blockSize) // // S = Local eigenvectors (size: 2*blockSize x 2*blockSize) int lwork2; lwork2 = 5*blockSize + 12*blockSize*blockSize; double *work2 = new (nothrow) double[lwork2]; if (work2 == 0) { if (vectWeight) delete vectWeight; delete[] work1; info = -30; return info; } highMem = (highMem > currentSize()) ? highMem : currentSize(); tmpD = work2; double *theta = tmpD; tmpD = tmpD + 2*blockSize; double *normR = tmpD; tmpD = tmpD + blockSize; double *oldHtR = tmpD; tmpD = tmpD + blockSize; double *currentHtR = tmpD; tmpD = tmpD + blockSize; memset(currentHtR, 0, blockSize*sizeof(double)); double *MM = tmpD; tmpD = tmpD + 4*blockSize*blockSize; double *KK = tmpD; tmpD = tmpD + 4*blockSize*blockSize; double *S = tmpD; memRequested += sizeof(double)*lwork2/(1024.0*1024.0); // Define an array to store the residuals history if (localVerbose > 2) { resHistory = new (nothrow) double[maxIterEigenSolve*blockSize]; if (resHistory == 0) { if (vectWeight) delete vectWeight; delete[] work1; delete[] work2; info = -30; return info; } historyCount = 0; } // Miscellaneous definitions bool reStart = false; numRestart = 0; int localSize; int twoBlocks = 2*blockSize; int nFound = blockSize; int i, j; if (localVerbose > 0) { cout << endl; cout << " *|* Problem: "; if (M) cout << "K*Q = M*Q D "; else cout << "K*Q = Q D "; if (Prec) cout << " with preconditioner"; cout << endl; cout << " *|* Algorithm = DACG (block version)" << endl; cout << " *|* Size of blocks = " << blockSize << endl; cout << " *|* Number of requested eigenvalues = " << numEigen << endl; cout.precision(2); cout.setf(ios::scientific, ios::floatfield); cout << " *|* Tolerance for convergence = " << tolEigenSolve << endl; cout << " *|* Norm used for convergence: "; if (normWeight) cout << "weighted L2-norm with user-provided weights" << endl; else cout << "L^2-norm" << endl; if (startingEV > 0) cout << " *|* Input converged eigenvectors = " << startingEV << endl; cout << "\n -- Start iterations -- \n"; } timeOuterLoop -= MyWatch.WallTime(); for (outerIter = 1; outerIter <= maxIterEigenSolve; ++outerIter) { highMem = (highMem > currentSize()) ? highMem : currentSize(); if ((outerIter == 1) || (reStart == true)) { reStart = false; localSize = blockSize; if (nFound > 0) { Epetra_MultiVector X2(View, X, blockSize-nFound, nFound); Epetra_MultiVector MX2(View, MX, blockSize-nFound, nFound); Epetra_MultiVector KX2(View, KX, blockSize-nFound, nFound); // Apply the mass matrix to X timeMassOp -= MyWatch.WallTime(); if (M) M->Apply(X2, MX2); timeMassOp += MyWatch.WallTime(); massOp += nFound; if (knownEV > 0) { // Orthonormalize X against the known eigenvectors with Gram-Schmidt // Note: Use R as a temporary work space Epetra_MultiVector copyQ(View, Q, 0, knownEV); timeOrtho -= MyWatch.WallTime(); info = modalTool.massOrthonormalize(X, MX, M, copyQ, nFound, 0, R.Values()); timeOrtho += MyWatch.WallTime(); // Exit the code if the orthogonalization did not succeed if (info < 0) { info = -10; delete[] work1; delete[] work2; if (vectWeight) delete vectWeight; return info; } } // Apply the stiffness matrix to X timeStifOp -= MyWatch.WallTime(); K->Apply(X2, KX2); timeStifOp += MyWatch.WallTime(); stifOp += nFound; } // if (nFound > 0) } // if ((outerIter == 1) || (reStart == true)) else { // Apply the preconditioner on the residuals if (Prec != 0) { timePrecOp -= MyWatch.WallTime(); Prec->ApplyInverse(R, H); timePrecOp += MyWatch.WallTime(); precOp += blockSize; } else { memcpy(H.Values(), R.Values(), xr*blockSize*sizeof(double)); } // Compute the product H^T*R timeSearchP -= MyWatch.WallTime(); memcpy(oldHtR, currentHtR, blockSize*sizeof(double)); H.Dot(R, currentHtR); // Define the new search directions if (localSize == blockSize) { P.Scale(-1.0, H); localSize = twoBlocks; } // if (localSize == blockSize) else { bool hasZeroDot = false; for (j = 0; j < blockSize; ++j) { if (oldHtR[j] == 0.0) { hasZeroDot = true; break; } callBLAS.SCAL(xr, currentHtR[j]/oldHtR[j], P.Values() + j*xr); } if (hasZeroDot == true) { // Restart the computation when there is a null dot product if (localVerbose > 0) { cout << endl; cout << " !! Null dot product -- Restart the search space !!\n"; cout << endl; } if (blockSize == 1) { X.Random(); nFound = blockSize; } else { Epetra_MultiVector Xinit(View, X, j, blockSize-j); Xinit.Random(); nFound = blockSize - j; } // if (blockSize == 1) reStart = true; numRestart += 1; info = 0; continue; } callBLAS.AXPY(xr*blockSize, -1.0, H.Values(), P.Values()); } // if (localSize == blockSize) timeSearchP += MyWatch.WallTime(); // Apply the mass matrix on P timeMassOp -= MyWatch.WallTime(); if (M) M->Apply(P, MP); timeMassOp += MyWatch.WallTime(); massOp += blockSize; if (knownEV > 0) { // Orthogonalize P against the known eigenvectors // Note: Use R as a temporary work space Epetra_MultiVector copyQ(View, Q, 0, knownEV); timeOrtho -= MyWatch.WallTime(); modalTool.massOrthonormalize(P, MP, M, copyQ, blockSize, 1, R.Values()); timeOrtho += MyWatch.WallTime(); } // Apply the stiffness matrix to P timeStifOp -= MyWatch.WallTime(); K->Apply(P, KP); timeStifOp += MyWatch.WallTime(); stifOp += blockSize; } // if ((outerIter == 1) || (reStart == true)) // Form "local" mass and stiffness matrices // Note: Use S as a temporary workspace timeLocalProj -= MyWatch.WallTime(); modalTool.localProjection(blockSize, blockSize, xr, X.Values(), xr, KX.Values(), xr, KK, localSize, S); modalTool.localProjection(blockSize, blockSize, xr, X.Values(), xr, MX.Values(), xr, MM, localSize, S); if (localSize > blockSize) { modalTool.localProjection(blockSize, blockSize, xr, X.Values(), xr, KP.Values(), xr, KK + blockSize*localSize, localSize, S); modalTool.localProjection(blockSize, blockSize, xr, P.Values(), xr, KP.Values(), xr, KK + blockSize*localSize + blockSize, localSize, S); modalTool.localProjection(blockSize, blockSize, xr, X.Values(), xr, MP.Values(), xr, MM + blockSize*localSize, localSize, S); modalTool.localProjection(blockSize, blockSize, xr, P.Values(), xr, MP.Values(), xr, MM + blockSize*localSize + blockSize, localSize, S); } // if (localSize > blockSize) timeLocalProj += MyWatch.WallTime(); // Perform a spectral decomposition timeLocalSolve -= MyWatch.WallTime(); int nevLocal = localSize; info = modalTool.directSolver(localSize, KK, localSize, MM, localSize, nevLocal, S, localSize, theta, localVerbose, (blockSize == 1) ? 1: 0); timeLocalSolve += MyWatch.WallTime(); if (info < 0) { // Stop when spectral decomposition has a critical failure break; } // Check for restarting if ((theta[0] < 0.0) || (nevLocal < blockSize)) { if (localVerbose > 0) { cout << " Iteration " << outerIter; cout << "- Failure for spectral decomposition - RESTART with new random search\n"; } if (blockSize == 1) { X.Random(); nFound = blockSize; } else { Epetra_MultiVector Xinit(View, X, 1, blockSize-1); Xinit.Random(); nFound = blockSize - 1; } // if (blockSize == 1) reStart = true; numRestart += 1; info = 0; continue; } // if ((theta[0] < 0.0) || (nevLocal < blockSize)) if ((localSize == twoBlocks) && (nevLocal == blockSize)) { for (j = 0; j < nevLocal; ++j) memcpy(S + j*blockSize, S + j*twoBlocks, blockSize*sizeof(double)); localSize = blockSize; } // Check the direction of eigenvectors // Note: This sign check is important for convergence for (j = 0; j < nevLocal; ++j) { double coeff = S[j + j*localSize]; if (coeff < 0.0) callBLAS.SCAL(localSize, -1.0, S + j*localSize); } // Compute the residuals timeResidual -= MyWatch.WallTime(); callBLAS.GEMM('N', 'N', xr, blockSize, blockSize, 1.0, KX.Values(), xr, S, localSize, 0.0, R.Values(), xr); if (localSize == twoBlocks) { callBLAS.GEMM('N', 'N', xr, blockSize, blockSize, 1.0, KP.Values(), xr, S + blockSize, localSize, 1.0, R.Values(), xr); } for (j = 0; j < blockSize; ++j) callBLAS.SCAL(localSize, theta[j], S + j*localSize); callBLAS.GEMM('N', 'N', xr, blockSize, blockSize, -1.0, MX.Values(), xr, S, localSize, 1.0, R.Values(), xr); if (localSize == twoBlocks) { callBLAS.GEMM('N', 'N', xr, blockSize, blockSize, -1.0, MP.Values(), xr, S + blockSize, localSize, 1.0, R.Values(), xr); } for (j = 0; j < blockSize; ++j) callBLAS.SCAL(localSize, 1.0/theta[j], S + j*localSize); timeResidual += MyWatch.WallTime(); // Compute the norms of the residuals timeNorm -= MyWatch.WallTime(); if (vectWeight) R.NormWeighted(*vectWeight, normR); else R.Norm2(normR); // Scale the norms of residuals with the eigenvalues // Count the converged eigenvectors nFound = 0; for (j = 0; j < blockSize; ++j) { normR[j] = (theta[j] == 0.0) ? normR[j] : normR[j]/theta[j]; if (normR[j] < tolEigenSolve) nFound += 1; } timeNorm += MyWatch.WallTime(); // Store the residual history if (localVerbose > 2) { memcpy(resHistory + historyCount*blockSize, normR, blockSize*sizeof(double)); historyCount += 1; } // Print information on current iteration if (localVerbose > 0) { cout << " Iteration " << outerIter << " - Number of converged eigenvectors "; cout << knownEV + nFound << endl; } if (localVerbose > 1) { cout << endl; cout.precision(2); cout.setf(ios::scientific, ios::floatfield); for (i=0; i<blockSize; ++i) { cout << " Iteration " << outerIter << " - Scaled Norm of Residual " << i; cout << " = " << normR[i] << endl; } cout << endl; cout.precision(2); for (i=0; i<blockSize; ++i) { cout << " Iteration " << outerIter << " - Ritz eigenvalue " << i; cout.setf((fabs(theta[i]) < 0.01) ? ios::scientific : ios::fixed, ios::floatfield); cout << " = " << theta[i] << endl; } cout << endl; } if (nFound == 0) { // Update the spaces // Note: Use H as a temporary work space timeLocalUpdate -= MyWatch.WallTime(); memcpy(H.Values(), X.Values(), xr*blockSize*sizeof(double)); callBLAS.GEMM('N', 'N', xr, blockSize, blockSize, 1.0, H.Values(), xr, S, localSize, 0.0, X.Values(), xr); memcpy(H.Values(), KX.Values(), xr*blockSize*sizeof(double)); callBLAS.GEMM('N', 'N', xr, blockSize, blockSize, 1.0, H.Values(), xr, S, localSize, 0.0, KX.Values(), xr); if (M) { memcpy(H.Values(), MX.Values(), xr*blockSize*sizeof(double)); callBLAS.GEMM('N', 'N', xr, blockSize, blockSize, 1.0, H.Values(), xr, S, localSize, 0.0, MX.Values(), xr); } if (localSize == twoBlocks) { callBLAS.GEMM('N', 'N', xr, blockSize, blockSize, 1.0, P.Values(), xr, S + blockSize, localSize, 1.0, X.Values(), xr); callBLAS.GEMM('N', 'N', xr, blockSize, blockSize, 1.0, KP.Values(), xr, S + blockSize, localSize, 1.0, KX.Values(), xr); if (M) { callBLAS.GEMM('N', 'N', xr, blockSize, blockSize, 1.0, MP.Values(), xr, S + blockSize, localSize, 1.0, MX.Values(), xr); } } // if (localSize == twoBlocks) timeLocalUpdate += MyWatch.WallTime(); // When required, monitor some orthogonalities if (verbose > 2) { if (knownEV == 0) { accuracyCheck(&X, &MX, &R, 0, (localSize>blockSize) ? &P : 0); } else { Epetra_MultiVector copyQ(View, Q, 0, knownEV); accuracyCheck(&X, &MX, &R, ©Q, (localSize>blockSize) ? &P : 0); } } // if (verbose > 2) continue; } // if (nFound == 0) // Order the Ritz eigenvectors by putting the converged vectors at the beginning int firstIndex = blockSize; for (j = 0; j < blockSize; ++j) { if (normR[j] >= tolEigenSolve) { firstIndex = j; break; } } // for (j = 0; j < blockSize; ++j) while (firstIndex < nFound) { for (j = firstIndex; j < blockSize; ++j) { if (normR[j] < tolEigenSolve) { // Swap the j-th and firstIndex-th position callFortran.SWAP(localSize, S + j*localSize, 1, S + firstIndex*localSize, 1); callFortran.SWAP(1, theta + j, 1, theta + firstIndex, 1); callFortran.SWAP(1, normR + j, 1, normR + firstIndex, 1); break; } } // for (j = firstIndex; j < blockSize; ++j) for (j = 0; j < blockSize; ++j) { if (normR[j] >= tolEigenSolve) { firstIndex = j; break; } } // for (j = 0; j < blockSize; ++j) } // while (firstIndex < nFound) // Copy the converged eigenvalues memcpy(lambda + knownEV, theta, nFound*sizeof(double)); // Convergence test if (knownEV + nFound >= numEigen) { callBLAS.GEMM('N', 'N', xr, nFound, blockSize, 1.0, X.Values(), xr, S, localSize, 0.0, R.Values(), xr); if (localSize > blockSize) { callBLAS.GEMM('N', 'N', xr, nFound, blockSize, 1.0, P.Values(), xr, S + blockSize, localSize, 1.0, R.Values(), xr); } memcpy(Q.Values() + knownEV*xr, R.Values(), nFound*xr*sizeof(double)); knownEV += nFound; if (localVerbose == 1) { cout << endl; cout.precision(2); cout.setf(ios::scientific, ios::floatfield); for (i=0; i<blockSize; ++i) { cout << " Iteration " << outerIter << " - Scaled Norm of Residual " << i; cout << " = " << normR[i] << endl; } cout << endl; } break; } // Store the converged eigenvalues and eigenvectors callBLAS.GEMM('N', 'N', xr, nFound, blockSize, 1.0, X.Values(), xr, S, localSize, 0.0, Q.Values() + knownEV*xr, xr); if (localSize == twoBlocks) { callBLAS.GEMM('N', 'N', xr, nFound, blockSize, 1.0, P.Values(), xr, S + blockSize, localSize, 1.0, Q.Values() + knownEV*xr, xr); } knownEV += nFound; // Define the restarting vectors timeRestart -= MyWatch.WallTime(); int leftOver = (nevLocal < blockSize + nFound) ? nevLocal - nFound : blockSize; double *Snew = S + nFound*localSize; memcpy(H.Values(), X.Values(), blockSize*xr*sizeof(double)); callBLAS.GEMM('N', 'N', xr, leftOver, blockSize, 1.0, H.Values(), xr, Snew, localSize, 0.0, X.Values(), xr); memcpy(H.Values(), KX.Values(), blockSize*xr*sizeof(double)); callBLAS.GEMM('N', 'N', xr, leftOver, blockSize, 1.0, H.Values(), xr, Snew, localSize, 0.0, KX.Values(), xr); if (M) { memcpy(H.Values(), MX.Values(), blockSize*xr*sizeof(double)); callBLAS.GEMM('N', 'N', xr, leftOver, blockSize, 1.0, H.Values(), xr, Snew, localSize, 0.0, MX.Values(), xr); } if (localSize == twoBlocks) { callBLAS.GEMM('N', 'N', xr, leftOver, blockSize, 1.0, P.Values(), xr, Snew+blockSize, localSize, 1.0, X.Values(), xr); callBLAS.GEMM('N', 'N', xr, leftOver, blockSize, 1.0, KP.Values(), xr, Snew+blockSize, localSize, 1.0, KX.Values(), xr); if (M) { callBLAS.GEMM('N', 'N', xr, leftOver, blockSize, 1.0, MP.Values(), xr, Snew+blockSize, localSize, 1.0, MX.Values(), xr); } } // if (localSize == twoBlocks) if (nevLocal < blockSize + nFound) { // Put new random vectors at the end of the block Epetra_MultiVector Xtmp(View, X, leftOver, blockSize - leftOver); Xtmp.Random(); } else { nFound = 0; } // if (nevLocal < blockSize + nFound) reStart = true; timeRestart += MyWatch.WallTime(); } // for (outerIter = 1; outerIter <= maxIterEigenSolve; ++outerIter) timeOuterLoop += MyWatch.WallTime(); highMem = (highMem > currentSize()) ? highMem : currentSize(); // Clean memory delete[] work1; delete[] work2; if (vectWeight) delete vectWeight; // Sort the eigenpairs timePostProce -= MyWatch.WallTime(); if ((info == 0) && (knownEV > 0)) { mySort.sortScalars_Vectors(knownEV, lambda, Q.Values(), Q.MyLength()); } timePostProce += MyWatch.WallTime(); return (info == 0) ? knownEV : info; }
int BlockPCGSolver::Solve(const Epetra_MultiVector &X, Epetra_MultiVector &Y, int blkSize) const { int xrow = X.MyLength(); int xcol = X.NumVectors(); int ycol = Y.NumVectors(); int info = 0; int localVerbose = verbose*(MyComm.MyPID() == 0); double *valX = X.Values(); int NB = 3 + callLAPACK.ILAENV(1, "hetrd", "u", blkSize); int lworkD = (blkSize > NB) ? blkSize*blkSize : NB*blkSize; int wSize = 4*blkSize*xrow + 3*blkSize + 2*blkSize*blkSize + lworkD; bool useY = true; if (ycol % blkSize != 0) { // Allocate an extra block to store the solutions wSize += blkSize*xrow; useY = false; } if (lWorkSpace < wSize) { delete[] workSpace; workSpace = new (std::nothrow) double[wSize]; if (workSpace == 0) { info = -1; return info; } lWorkSpace = wSize; } // if (lWorkSpace < wSize) double *pointer = workSpace; // Array to store the matrix PtKP double *PtKP = pointer; pointer = pointer + blkSize*blkSize; // Array to store coefficient matrices double *coeff = pointer; pointer = pointer + blkSize*blkSize; // Workspace array double *workD = pointer; pointer = pointer + lworkD; // Array to store the eigenvalues of P^t K P double *da = pointer; pointer = pointer + blkSize; // Array to store the norms of right hand sides double *initNorm = pointer; pointer = pointer + blkSize; // Array to store the norms of residuals double *resNorm = pointer; pointer = pointer + blkSize; // Array to store the residuals double *valR = pointer; pointer = pointer + xrow*blkSize; Epetra_MultiVector R(View, X.Map(), valR, xrow, blkSize); // Array to store the preconditioned residuals double *valZ = pointer; pointer = pointer + xrow*blkSize; Epetra_MultiVector Z(View, X.Map(), valZ, xrow, blkSize); // Array to store the search directions double *valP = pointer; pointer = pointer + xrow*blkSize; Epetra_MultiVector P(View, X.Map(), valP, xrow, blkSize); // Array to store the image of the search directions double *valKP = pointer; pointer = pointer + xrow*blkSize; Epetra_MultiVector KP(View, X.Map(), valKP, xrow, blkSize); // Pointer to store the solutions double *valSOL = (useY == true) ? Y.Values() : pointer; int iRHS; for (iRHS = 0; iRHS < xcol; iRHS += blkSize) { int numVec = (iRHS + blkSize < xcol) ? blkSize : xcol - iRHS; // Set the initial residuals to the right hand sides if (numVec < blkSize) { R.Random(); } memcpy(valR, valX + iRHS*xrow, numVec*xrow*sizeof(double)); // Set the initial guess to zero valSOL = (useY == true) ? Y.Values() + iRHS*xrow : valSOL; Epetra_MultiVector SOL(View, X.Map(), valSOL, xrow, blkSize); SOL.PutScalar(0.0); int ii = 0; int iter = 0; int nFound = 0; R.Norm2(initNorm); if (localVerbose > 1) { std::cout << std::endl; std::cout << " Vectors " << iRHS << " to " << iRHS + numVec - 1 << std::endl; if (localVerbose > 2) { std::fprintf(stderr,"\n"); for (ii = 0; ii < numVec; ++ii) { std::cout << " ... Initial Residual Norm " << ii << " = " << initNorm[ii] << std::endl; } std::cout << std::endl; } } // Iteration loop for (iter = 1; iter <= iterMax; ++iter) { // Apply the preconditioner if (Prec) Prec->ApplyInverse(R, Z); else Z = R; // Define the new search directions if (iter == 1) { P = Z; } else { // Compute P^t K Z callBLAS.GEMM(Teuchos::TRANS, Teuchos::NO_TRANS, blkSize, blkSize, xrow, 1.0, KP.Values(), xrow, Z.Values(), xrow, 0.0, workD, blkSize); MyComm.SumAll(workD, coeff, blkSize*blkSize); // Compute the coefficient (P^t K P)^{-1} P^t K Z callBLAS.GEMM(Teuchos::TRANS, Teuchos::NO_TRANS, blkSize, blkSize, blkSize, 1.0, PtKP, blkSize, coeff, blkSize, 0.0, workD, blkSize); for (ii = 0; ii < blkSize; ++ii) callBLAS.SCAL(blkSize, da[ii], workD + ii, blkSize); callBLAS.GEMM(Teuchos::NO_TRANS, Teuchos::NO_TRANS, blkSize, blkSize, blkSize, 1.0, PtKP, blkSize, workD, blkSize, 0.0, coeff, blkSize); // Update the search directions // Note: Use KP as a workspace memcpy(KP.Values(), P.Values(), xrow*blkSize*sizeof(double)); callBLAS.GEMM(Teuchos::NO_TRANS, Teuchos::NO_TRANS, xrow, blkSize, blkSize, 1.0, KP.Values(), xrow, coeff, blkSize, 0.0, P.Values(), xrow); P.Update(1.0, Z, -1.0); } // if (iter == 1) K->Apply(P, KP); // Compute P^t K P callBLAS.GEMM(Teuchos::TRANS, Teuchos::NO_TRANS, blkSize, blkSize, xrow, 1.0, P.Values(), xrow, KP.Values(), xrow, 0.0, workD, blkSize); MyComm.SumAll(workD, PtKP, blkSize*blkSize); // Eigenvalue decomposition of P^t K P callLAPACK.SYEV('V', 'U', blkSize, PtKP, blkSize, da, workD, lworkD, &info); if (info) { // Break the loop as spectral decomposition failed break; } // if (info) // Compute the pseudo-inverse of the eigenvalues for (ii = 0; ii < blkSize; ++ii) { TEUCHOS_TEST_FOR_EXCEPTION(da[ii] < 0.0, std::runtime_error, "Negative " "eigenvalue for P^T K P: da[" << ii << "] = " << da[ii] << "."); da[ii] = (da[ii] == 0.0) ? 0.0 : 1.0/da[ii]; } // for (ii = 0; ii < blkSize; ++ii) // Compute P^t R callBLAS.GEMM(Teuchos::TRANS, Teuchos::NO_TRANS, blkSize, blkSize, xrow, 1.0, P.Values(), xrow, R.Values(), xrow, 0.0, workD, blkSize); MyComm.SumAll(workD, coeff, blkSize*blkSize); // Compute the coefficient (P^t K P)^{-1} P^t R callBLAS.GEMM(Teuchos::TRANS, Teuchos::NO_TRANS, blkSize, blkSize, blkSize, 1.0, PtKP, blkSize, coeff, blkSize, 0.0, workD, blkSize); for (ii = 0; ii < blkSize; ++ii) callBLAS.SCAL(blkSize, da[ii], workD + ii, blkSize); callBLAS.GEMM(Teuchos::NO_TRANS, Teuchos::NO_TRANS, blkSize, blkSize, blkSize, 1.0, PtKP, blkSize, workD, blkSize, 0.0, coeff, blkSize); // Update the solutions callBLAS.GEMM(Teuchos::NO_TRANS, Teuchos::NO_TRANS, xrow, blkSize, blkSize, 1.0, P.Values(), xrow, coeff, blkSize, 1.0, valSOL, xrow); // Update the residuals callBLAS.GEMM(Teuchos::NO_TRANS, Teuchos::NO_TRANS, xrow, blkSize, blkSize, -1.0, KP.Values(), xrow, coeff, blkSize, 1.0, R.Values(), xrow); // Check convergence R.Norm2(resNorm); nFound = 0; for (ii = 0; ii < numVec; ++ii) { if (resNorm[ii] <= tolCG*initNorm[ii]) nFound += 1; } if (localVerbose > 1) { std::cout << " Vectors " << iRHS << " to " << iRHS + numVec - 1; std::cout << " -- Iteration " << iter << " -- " << nFound << " converged vectors\n"; if (localVerbose > 2) { std::cout << std::endl; for (ii = 0; ii < numVec; ++ii) { std::cout << " ... "; std::cout.width(5); std::cout << ii << " ... Residual = "; std::cout.precision(2); std::cout.setf(std::ios::scientific, std::ios::floatfield); std::cout << resNorm[ii] << " ... Right Hand Side = " << initNorm[ii] << std::endl; } std::cout << std::endl; } } if (nFound == numVec) { break; } } // for (iter = 1; iter <= maxIter; ++iter) if (useY == false) { // Copy the solutions back into Y memcpy(Y.Values() + xrow*iRHS, valSOL, numVec*xrow*sizeof(double)); } numSolve += nFound; if (nFound == numVec) { minIter = (iter < minIter) ? iter : minIter; maxIter = (iter > maxIter) ? iter : maxIter; sumIter += iter; } } // for (iRHS = 0; iRHS < xcol; iRHS += blkSize) return info; }
//extern "C" SEXP mc_irf_var(SEXP varobj, SEXP nsteps, SEXP draws) { int m, p, dr=INTEGER(draws)[0], ns=INTEGER(nsteps)[0], T, df, i; SEXP AR, Y, Bhat, XR, prior, hstar, meanS, output; // Get # vars/lags/steps/draws/T/df PROTECT(AR = listElt(varobj, "ar.coefs")); PROTECT(Y = listElt(varobj, "Y")); m = INTEGER(getAttrib(AR, R_DimSymbol))[0]; //#vars p = INTEGER(getAttrib(AR, R_DimSymbol))[2]; //#lags T = nrows(Y); df = T - m*p - m - 1; UNPROTECT(2); // Put coefficients from varobj$Bhat in Bcoefs vector (m^2*p, 1) PROTECT(Bhat = coerceVector(listElt(varobj, "Bhat"), REALSXP)); Matrix bcoefs = R2Cmat(Bhat, m*p, m); bcoefs = bcoefs.AsColumn(); UNPROTECT(1); // Define X(T x m*p) subset of varobj$X and XXinv as solve(X'X) PROTECT(XR = coerceVector(listElt(varobj,"X"),REALSXP)); Matrix X = R2Cmat(XR, T, m*p), XXinv; UNPROTECT(1); // Get the correct moment matrix PROTECT(prior = listElt(varobj,"prior")); if(!isNull(prior)){ PROTECT(hstar = coerceVector(listElt(varobj,"hstar"),REALSXP)); XXinv = R2Cmat(hstar, m*p, m*p).i(); UNPROTECT(1); } else { XXinv = (X.t()*X).i(); } UNPROTECT(1); // Get the transpose of the Cholesky decomp of XXinv SymmetricMatrix XXinvSym; XXinvSym << XXinv; XXinv = Cholesky(XXinvSym); // Cholesky of covariance PROTECT(meanS = coerceVector(listElt(varobj,"mean.S"),REALSXP)); SymmetricMatrix meanSSym; meanSSym << R2Cmat(meanS, m, m); Matrix Sigmat = Cholesky(meanSSym); UNPROTECT(1); // Matricies needed for the loop ColumnVector bvec; bvec=0.0; Matrix sqrtwish, impulse(dr,m*m*ns); impulse = 0.0; SymmetricMatrix sigmadraw; sigmadraw = 0.0; IdentityMatrix I(m); GetRNGstate(); // Main Loop for (i=1; i<=dr; i++){ // Wishart/Beta draws sigmadraw << Sigmat*(T*rwish(I,df).i())*Sigmat.t(); sqrtwish = Cholesky(sigmadraw); bvec = bcoefs+KP(sqrtwish, XXinv)*rnorms(m*m*p); // IRF computation impulse.Row(i) = irf_var_from_beta(sqrtwish, bvec, ns).t(); if (!(i%1000)){ Rprintf("Monte Carlo IRF Iteration = %d\n",i); } } // end main loop PutRNGstate(); int dims[]={dr,ns,m*m}; PROTECT(output = C2R3D(impulse,dims)); setclass(output,"mc.irf.VAR"); UNPROTECT(1); return output; }