int Stokhos::MeanBasedPreconditioner:: ApplyInverse(const Epetra_MultiVector& Input, Epetra_MultiVector& Result) const { int myBlockRows = epetraCijk->numMyRows(); if (!use_block_apply) { EpetraExt::BlockMultiVector sg_input(View, *base_map, Input); EpetraExt::BlockMultiVector sg_result(View, *base_map, Result); for (int i=0; i<myBlockRows; i++) { mean_prec->ApplyInverse(*(sg_input.GetBlock(i)), *(sg_result.GetBlock(i))); } } else { int m = Input.NumVectors(); Epetra_MultiVector input_block( View, *base_map, Input.Values(), base_map->NumMyElements(), m*myBlockRows); Epetra_MultiVector result_block( View, *base_map, Result.Values(), base_map->NumMyElements(), m*myBlockRows); mean_prec->ApplyInverse(input_block, result_block); } return 0; }
int Stokhos::ApproxJacobiPreconditioner:: ApplyInverse(const Epetra_MultiVector& Input, Epetra_MultiVector& Result) const { #ifdef STOKHOS_TEUCHOS_TIME_MONITOR TEUCHOS_FUNC_TIME_MONITOR("Stokhos: Total Approximate Jacobi Time"); #endif // We have to be careful if Input and Result are the same vector. // If this is the case, the only possible solution is to make a copy const Epetra_MultiVector *input = &Input; bool made_copy = false; if (Input.Values() == Result.Values()) { input = new Epetra_MultiVector(Input); made_copy = true; } int m = input->NumVectors(); if (rhs_block == Teuchos::null || rhs_block->NumVectors() != m) rhs_block = Teuchos::rcp(new EpetraExt::BlockMultiVector(*base_map, *sg_map, m)); // Extract blocks EpetraExt::BlockMultiVector input_block(View, *base_map, *input); EpetraExt::BlockMultiVector result_block(View, *base_map, Result); int myBlockRows = epetraCijk->numMyRows(); result_block.PutScalar(0.0); for (int iter=0; iter<num_iter; iter++) { // Compute RHS if (iter == 0) rhs_block->Update(1.0, input_block, 0.0); else { mat_free_op->Apply(result_block, *rhs_block); rhs_block->Update(1.0, input_block, -1.0); } // Apply deterministic preconditioner for(int i=0; i<myBlockRows; i++) { #ifdef STOKHOS_TEUCHOS_TIME_MONITOR TEUCHOS_FUNC_TIME_MONITOR("Stokhos: Total AJ Deterministic Preconditioner Time"); #endif mean_prec->ApplyInverse(*(rhs_block->GetBlock(i)), *(result_block.GetBlock(i))); } } if (made_copy) delete input; return 0; }
bool NOX::Epetra::LinearSystemMPBD:: applyJacobianInverse(Teuchos::ParameterList ¶ms, const NOX::Epetra::Vector &input, NOX::Epetra::Vector &result) { TEUCHOS_FUNC_TIME_MONITOR("Total deterministic solve Time"); // Extract blocks EpetraExt::BlockVector input_block(View, *base_map, input.getEpetraVector()); EpetraExt::BlockVector result_block(View, *base_map, result.getEpetraVector()); result_block.PutScalar(0.0); Teuchos::ParameterList& block_solver_params = params.sublist("Deterministic Solver Parameters"); // Solve block linear systems bool final_status = true; bool status; for (int i=0; i<num_mp_blocks; i++) { NOX::Epetra::Vector nox_input(input_block.GetBlock(i), NOX::Epetra::Vector::CreateView); NOX::Epetra::Vector nox_result(result_block.GetBlock(i), NOX::Epetra::Vector::CreateView); block_solver->setJacobianOperatorForSolve(block_ops->getCoeffPtr(i)); if (precStrategy == STANDARD) block_solver->setPrecOperatorForSolve(precs[i]); else if (precStrategy == ON_THE_FLY) { block_solver->createPreconditioner(*(prec_x->GetBlock(i)), block_solver_params, false); } status = block_solver->applyJacobianInverse(block_solver_params, nox_input, nox_result); final_status = final_status && status; } return final_status; }
static bool convolve_dft(InputArray _image, InputArray _templ, OutputArray _result) { ConvolveBuf buf; CV_Assert(_image.type() == CV_32F); CV_Assert(_templ.type() == CV_32F); buf.create(_image.size(), _templ.size()); _result.create(buf.result_size, CV_32F); UMat image = _image.getUMat(); UMat templ = _templ.getUMat(); UMat result = _result.getUMat(); Size& block_size = buf.block_size; Size& dft_size = buf.dft_size; UMat& image_block = buf.image_block; UMat& templ_block = buf.templ_block; UMat& result_data = buf.result_data; UMat& image_spect = buf.image_spect; UMat& templ_spect = buf.templ_spect; UMat& result_spect = buf.result_spect; UMat templ_roi = templ; copyMakeBorder(templ_roi, templ_block, 0, templ_block.rows - templ_roi.rows, 0, templ_block.cols - templ_roi.cols, BORDER_ISOLATED); dft(templ_block, templ_spect, 0, templ.rows); // Process all blocks of the result matrix for (int y = 0; y < result.rows; y += block_size.height) { for (int x = 0; x < result.cols; x += block_size.width) { Size image_roi_size(std::min(x + dft_size.width, image.cols) - x, std::min(y + dft_size.height, image.rows) - y); Rect roi0(x, y, image_roi_size.width, image_roi_size.height); UMat image_roi(image, roi0); copyMakeBorder(image_roi, image_block, 0, image_block.rows - image_roi.rows, 0, image_block.cols - image_roi.cols, BORDER_ISOLATED); dft(image_block, image_spect, 0); mulSpectrums(image_spect, templ_spect, result_spect, 0, true); dft(result_spect, result_data, cv::DFT_INVERSE | cv::DFT_REAL_OUTPUT | cv::DFT_SCALE); Size result_roi_size(std::min(x + block_size.width, result.cols) - x, std::min(y + block_size.height, result.rows) - y); Rect roi1(x, y, result_roi_size.width, result_roi_size.height); Rect roi2(0, 0, result_roi_size.width, result_roi_size.height); UMat result_roi(result, roi1); UMat result_block(result_data, roi2); result_block.copyTo(result_roi); } } return true; }
int Stokhos::ApproxSchurComplementPreconditioner:: ApplyInverse(const Epetra_MultiVector& Input, Epetra_MultiVector& Result) const { #ifdef STOKHOS_TEUCHOS_TIME_MONITOR TEUCHOS_FUNC_TIME_MONITOR("Stokhos: Total Approximate Schur Complement Time"); #endif // We have to be careful if Input and Result are the same vector. // If this is the case, the only possible solution is to make a copy const Epetra_MultiVector *input = &Input; bool made_copy = false; if (Input.Values() == Result.Values()) { input = new Epetra_MultiVector(Input); made_copy = true; } // Allocate temporary storage int m = input->NumVectors(); if (rhs_block == Teuchos::null || rhs_block->NumVectors() != m) rhs_block = Teuchos::rcp(new EpetraExt::BlockMultiVector(*base_map, *sg_map, m)); if (tmp == Teuchos::null || tmp->NumVectors() != m*max_num_mat_vec) tmp = Teuchos::rcp(new Epetra_MultiVector(*base_map, m*max_num_mat_vec)); j_ptr.resize(m*max_num_mat_vec); mj_indices.resize(m*max_num_mat_vec); // Extract blocks EpetraExt::BlockMultiVector input_block(View, *base_map, *input); EpetraExt::BlockMultiVector result_block(View, *base_map, Result); result_block.PutScalar(0.0); // Set right-hand-side to input_block rhs_block->Update(1.0, input_block, 0.0); // At level l, linear system has the structure // [ A_{l-1} B_l ][ u_l^{l-1} ] = [ r_l^{l-1} ] // [ C_l D_l ][ u_l^l ] [ r_l^l ] for (int l=P; l>=1; l--) { // Compute D_l^{-1} r_l^l divide_diagonal_block(block_indices[l], block_indices[l+1], *rhs_block, result_block); // Compute r_l^{l-1} = r_l^{l-1} - B_l D_l^{-1} r_l^l multiply_block(upper_block_Cijk[l], -1.0, result_block, *rhs_block); } // Solve A_0 u_0 = r_0 divide_diagonal_block(0, 1, *rhs_block, result_block); for (int l=1; l<=P; l++) { // Compute r_l^l - C_l*u_l^{l-1} multiply_block(lower_block_Cijk[l], -1.0, result_block, *rhs_block); // Compute D_l^{-1} (r_l^l - C_l*u_l^{l-1}) divide_diagonal_block(block_indices[l], block_indices[l+1], *rhs_block, result_block); } if (made_copy) delete input; return 0; }
int Stokhos::ApproxGaussSeidelPreconditioner:: ApplyInverse(const Epetra_MultiVector& Input, Epetra_MultiVector& Result) const { #ifdef STOKHOS_TEUCHOS_TIME_MONITOR TEUCHOS_FUNC_TIME_MONITOR("Stokhos: Total Approximate Gauss-Seidel Time"); #endif // We have to be careful if Input and Result are the same vector. // If this is the case, the only possible solution is to make a copy const Epetra_MultiVector *input = &Input; bool made_copy = false; if (Input.Values() == Result.Values()) { input = new Epetra_MultiVector(Input); made_copy = true; } int m = input->NumVectors(); if (mat_vec_tmp == Teuchos::null || mat_vec_tmp->NumVectors() != m) mat_vec_tmp = Teuchos::rcp(new Epetra_MultiVector(*base_map, m)); if (rhs_block == Teuchos::null || rhs_block->NumVectors() != m) rhs_block = Teuchos::rcp(new EpetraExt::BlockMultiVector(*base_map, *sg_map, m)); // Extract blocks EpetraExt::BlockMultiVector input_block(View, *base_map, *input); EpetraExt::BlockMultiVector result_block(View, *base_map, Result); result_block.PutScalar(0.0); int k_limit = sg_poly->size(); if (only_use_linear) k_limit = sg_poly->basis()->dimension() + 1; const Teuchos::Array<double>& norms = sg_basis->norm_squared(); rhs_block->Update(1.0, input_block, 0.0); for (Cijk_type::i_iterator i_it=Cijk->i_begin(); i_it!=Cijk->i_end(); ++i_it) { int i = index(i_it); Teuchos::RCP<Epetra_MultiVector> res_i = result_block.GetBlock(i); { // Apply deterministic preconditioner #ifdef STOKHOS_TEUCHOS_TIME_MONITOR TEUCHOS_FUNC_TIME_MONITOR("Stokhos: Total AGS Deterministic Preconditioner Time"); #endif mean_prec->ApplyInverse(*(rhs_block->GetBlock(i)), *res_i); } int i_gid = epetraCijk->GRID(i); for (Cijk_type::ik_iterator k_it = Cijk->k_begin(i_it); k_it != Cijk->k_end(i_it); ++k_it) { int k = index(k_it); if (k!=0 && k<k_limit) { bool do_mat_vec = false; for (Cijk_type::ikj_iterator j_it = Cijk->j_begin(k_it); j_it != Cijk->j_end(k_it); ++j_it) { int j = index(j_it); int j_gid = epetraCijk->GCID(j); if (j_gid > i_gid) { bool on_proc = epetraCijk->myGRID(j_gid); if (on_proc) { do_mat_vec = true; break; } } } if (do_mat_vec) { (*sg_poly)[k].Apply(*res_i, *mat_vec_tmp); for (Cijk_type::ikj_iterator j_it = Cijk->j_begin(k_it); j_it != Cijk->j_end(k_it); ++j_it) { int j = index(j_it); int j_gid = epetraCijk->GCID(j); double c = value(j_it); if (scale_op) { if (useTranspose) c /= norms[i_gid]; else c /= norms[j_gid]; } if (j_gid > i_gid) { bool on_proc = epetraCijk->myGRID(j_gid); if (on_proc) { rhs_block->GetBlock(j)->Update(-c, *mat_vec_tmp, 1.0); } } } } } } } // For symmetric Gauss-Seidel if (symmetric) { for (Cijk_type::i_reverse_iterator i_it= Cijk->i_rbegin(); i_it!=Cijk->i_rend(); ++i_it) { int i = index(i_it); Teuchos::RCP<Epetra_MultiVector> res_i = result_block.GetBlock(i); { // Apply deterministic preconditioner #ifdef STOKHOS_TEUCHOS_TIME_MONITOR TEUCHOS_FUNC_TIME_MONITOR("Stokhos: Total AGS Deterministic Preconditioner Time"); #endif mean_prec->ApplyInverse(*(rhs_block->GetBlock(i)), *res_i); } int i_gid = epetraCijk->GRID(i); for (Cijk_type::ik_iterator k_it = Cijk->k_begin(i_it); k_it != Cijk->k_end(i_it); ++k_it) { int k = index(k_it); if (k!=0 && k<k_limit) { bool do_mat_vec = false; for (Cijk_type::ikj_iterator j_it = Cijk->j_begin(k_it); j_it != Cijk->j_end(k_it); ++j_it) { int j = index(j_it); int j_gid = epetraCijk->GCID(j); if (j_gid < i_gid) { bool on_proc = epetraCijk->myGRID(j_gid); if (on_proc) { do_mat_vec = true; break; } } } if (do_mat_vec) { (*sg_poly)[k].Apply(*res_i, *mat_vec_tmp); for (Cijk_type::ikj_iterator j_it = Cijk->j_begin(k_it); j_it != Cijk->j_end(k_it); ++j_it) { int j = index(j_it); int j_gid = epetraCijk->GCID(j); double c = value(j_it); if (scale_op) c /= norms[j_gid]; if (j_gid < i_gid) { bool on_proc = epetraCijk->myGRID(j_gid); if (on_proc) { rhs_block->GetBlock(j)->Update(-c, *mat_vec_tmp, 1.0); } } } } } } } } if (made_copy) delete input; return 0; }