Int SolverAztecOO::solveSystem ( const vector_type& rhsFull, vector_type& solution, matrix_ptrtype& baseMatrixForPreconditioner ) { bool retry ( true ); LifeChrono chrono; M_displayer->leaderPrint ( "SLV- Setting up the solver ... \n" ); if ( baseMatrixForPreconditioner.get() == 0 ) { M_displayer->leaderPrint ( "SLV- Warning: baseMatrixForPreconditioner is empty \n" ); } if ( !isPreconditionerSet() || !M_reusePreconditioner ) { buildPreconditioner ( baseMatrixForPreconditioner ); // do not retry if I am recomputing the preconditioner retry = false; } else { M_displayer->leaderPrint ( "SLV- Reusing precond ... \n" ); } Int numIter = solveSystem ( rhsFull, solution, M_preconditioner ); // If we do not want to retry, return now. // otherwise rebuild the preconditioner and solve again: if ( numIter < 0 && retry ) { chrono.start(); M_displayer->leaderPrint ( "SLV- Iterative solver failed, numiter = " , - numIter ); M_displayer->leaderPrint ( "SLV- maxIterSolver = " , M_maxIter ); M_displayer->leaderPrint ( "SLV- retrying: " ); buildPreconditioner ( baseMatrixForPreconditioner ); chrono.stop(); M_displayer->leaderPrintMax ( "done in " , chrono.diff() ); // Solving again, but only once (retry = false) numIter = solveSystem ( rhsFull, solution, M_preconditioner ); if ( numIter < 0 ) { M_displayer->leaderPrint ( " ERROR: Iterative solver failed again.\n" ); } } if ( std::abs (numIter) > M_maxIterForReuse ) { resetPreconditioner(); } return numIter; }
double LSPredictionComputer::compute(const Point3i& currentPos, Context* context) { this->context = context; if(!context->getTrainingregion().getNumberOfElements()) { // first 4 pixels / 8 voxels in image context->contextOf(currentPos, *coefficients); // misuse coefficients vector to store neighbors for variance computation if(coefficients->cols > 1) // not the first pixel return coefficients->at<double>(coefficients->cols - 1) = mean(coefficients->colRange(0, coefficients->cols - 1))[0]; else return (1.0 + predictor->getMaxval()) / 2.0; // first pixel of image } estimate(currentPos); coefficients->create(covMat->rows + 1, 2, CV_64F); // reset to maximum size (should not need memory re-allocation) *coefficients = coefficients->rowRange(0, covMat->rows); // set used region if(border_regularization != 0.0) { // Tikhonov regularization for border and for inner pixels if(context->isBorder()) { // border covMat->diag() += Scalar(border_regularization); solveSystem(); covMat->col(covMat->cols - 2) += coefficients->col(0) * border_regularization; // correct RHS for variance estimation } else if(inner_regularization != 0.0) { // inner covMat->diag() += Scalar(inner_regularization); solveSystem(); covMat->col(covMat->cols - 2) += coefficients->col(0) * inner_regularization; // correct RHS for variance estimation } else solveSystem(); } else solveSystem(); double prediction = covMat->col(covMat->cols - 1).dot(coefficients->col(0)); // linear prediction using dot product return (prediction < 0.0 ? 0.0 : (prediction > predictor->getMaxval() ? predictor->getMaxval() : prediction)); // crop to valid value range } // end LSPredictionComputer::compute
void getSpline(float *curve, float control_X, float control_Y) { double derivatives[3]; ControlPoint points[3]; points[0].x = 0.0f; points[0].y = 0.0f; points[1].x = control_X; points[1].y = control_Y; points[2].x = 1.0f; points[2].y = 1.0f; solveSystem(derivatives, points); int i; int start = 0; if (points[0].x != 0) { start = (int) (points[0].x * 256); } for (i = 0; i < start; i++) { curve[i] = 1.0f - points[0].y; } for (i = start; i < 256; i++) { ControlPoint cur; ControlPoint next; double x = i / 256.0; int pivot = 0; int j; for (j = 0; j < 3 - 1; j++) { if (x >= points[j].x && x <= points[j + 1].x) { pivot = j; } } cur = points[pivot]; next = points[pivot + 1]; if (x <= next.x) { double x1 = cur.x; double x2 = next.x; double y1 = cur.y; double y2 = next.y; double delta = (x2 - x1); double delta2 = delta * delta; double b = (x - x1) / delta; double a = 1 - b; double ta = a * y1; double tb = b * y2; double tc = (a * a * a - a) * derivatives[pivot]; double td = (b * b * b - b) * derivatives[pivot + 1]; double y = ta + tb + (delta2 / 6) * (tc + td); if (y > 1.0f) { y = 1.0f; } if (y < 0) { y = 0; } curve[i] = (float) (1.0f - y); } else { curve[i] = 1.0f - next.y; } } }
// Q = (I - A)(I + A)^{-1} void getCayleyTransform(const double *source, int dim, double *target) { double temp1[dim * dim]; double temp2[dim * dim]; double temp3[dim * dim]; double *plusMatrix = temp1; // temp1 in use; I+A double *minusMatrix = temp2; // temp2 in use; I-A double *identity = target; // I // copy in I+A to lower triangle for inverse, and set scratch to I, // copy I-A to temp int offset = 0; for (int col = 0; col < dim; ++col) { offset = col * (dim + 1); identity[offset] = 1.0; plusMatrix[offset] = 1.0; minusMatrix[offset] = 1.0; ++offset; for (int row = col + 1; row < dim; ++row) { identity[offset] = identity[col + row * dim] = 0.0; plusMatrix[offset] = source[offset]; plusMatrix[col + row * dim] = source[col + row * dim]; minusMatrix[offset] = -source[offset]; minusMatrix[col + row * dim] = -source[col + row * dim]; ++offset; } } double *inverseMatrix = temp3; // temp3 in use; (I+Q)^{-1} int lapackResult = solveSystem(plusMatrix, dim, identity, dim, inverseMatrix); // temp1 free if (lapackResult != 0) { if (lapackResult < 0) { error("error in call to LAPACK routine 'dgesvx': argument %d illegal", -lapackResult); } else if (lapackResult <= dim ){ error("error in call to LAPACK routine 'dgesvx': factor U(%d) is exactly 0 so that U is singular", lapackResult); } else { error("error in call to LAPACK routine 'dgesvx': reciprocal condition estimate below machine tolerance", lapackResult); } } multiplyMatrices(minusMatrix, dim, dim, // temp2, temp3 free inverseMatrix, dim, dim, target); }
bool CCDivGradHypreLevelSolver::solveSystem( SAMRAIVectorReal<NDIM,double>& x, SAMRAIVectorReal<NDIM,double>& b) { IBTK_TIMER_START(t_solve_system); if (d_enable_logging) plog << d_object_name << "::solveSystem():" << std::endl; // Initialize the solver, when necessary. const bool deallocate_after_solve = !d_is_initialized; if (deallocate_after_solve) initializeSolverState(x,b); // Solve the system using the hypre solver. static const int comp = 0; const int x_idx = x.getComponentDescriptorIndex(comp); const int b_idx = b.getComponentDescriptorIndex(comp); bool converged = true; IntVector<NDIM> chkbrd_mode_id; #if (NDIM > 2) for (chkbrd_mode_id(2) = 0; chkbrd_mode_id(2) < 2; ++chkbrd_mode_id(2)) { #endif for (chkbrd_mode_id(1) = 0; chkbrd_mode_id(1) < 2; ++chkbrd_mode_id(1)) { for (chkbrd_mode_id(0) = 0; chkbrd_mode_id(0) < 2; ++chkbrd_mode_id(0)) { bool converged_mode = solveSystem(x_idx, b_idx, chkbrd_mode_id); if (d_enable_logging) { plog << d_object_name << "::solveSystem(): solver " << (converged_mode ? "converged" : "diverged") << "\n" << "chkbrd_mode_id = " << chkbrd_mode_id << "\n" << "iterations = " << d_current_its << "\n" << "residual norm = " << d_current_residual_norm << std::endl; } converged = converged && converged_mode; } } #if (NDIM > 2) } #endif // Deallocate the solver, when necessary. if (deallocate_after_solve) deallocateSolverState(); IBTK_TIMER_STOP(t_solve_system); return converged; }// solveSystem
void ParallelCGCudaTask<N, T>::execute(const Thread* caller){ /*Zero range, skip allocation/computation*/ if(cmat->getMRange(TID)->range == 0) return; if(TID == 0){ Vector<T>::mul(*r, *b, *b); bnorm = Sqrt(r->sum()); } caller->sync(); switch(subTask){ case Allocate: #if 1 try{ /*Try to allocate all the memory needed. If this fails, deallocate and throw an exception*/ allocate(caller); }catch(CUDAException& e){ std::cerr << e.getError(); valid = false; deallocate(caller); throw; }catch(Exception& e){ std::cerr << e.getError(); throw; } #else allocate(caller); #endif break; case Deallocate: deallocate(caller); break; case CopyResult: copyResult(caller); break; case UpdateBlocks: updateBlocks(caller); break; case SolveSystem: solveSystem(caller); break; } }
static int solveSystem_test() { // a rotation matrix double leftHandSide[] = { 0.987576802050028, -0.0130012146128751, -0.156598302900223, -0.0359106006778125, -0.988872097570814, -0.144368983527825, -0.152978720126685, 0.148198998189896, -0.977054025181777 }; double correctAnswer[] = { 0.68340774192092, 0.617022240899081, -0.40316039594569, -0.214071757425697, 0.0997551565880077, -0.314052969735081 }; int arrayLength = testMatrix3Rows * testMatrix3Columns; double rightHandSide[arrayLength]; for (int i = 0; i < arrayLength; ++i) rightHandSide[i] = testMatrix3[i]; double result[arrayLength]; solveSystem(leftHandSide, testMatrix3Rows, rightHandSide, testMatrix3Columns, result); return (allApproximatelyEqual(correctAnswer, result, testMatrix3Rows * testMatrix3Columns, TEST_TOLERANCE)); }
/** * @brief BT_i and B_j are converted to dense matrices in each process to solve the sparse system AX=B_j and afterwards do BT_i * X. X is stored as a dense matrix in AB_sol * * @param A Sparse (0,0)-block of which we want to compute the Schur complement in matrix C * @param BT_i Sparse (1,0)-block of C corresponding to T_ij. * @param B_j Sparse (0,1)-block of C corresponding to T_ij * @param T_ij Dense (1,1)-block of C * @param lld_T local leading dimension of T_ij * @param AB_sol_out Dense solution of AX=B_j (output) * @return int **/ int make_Sij_parallel_denseB(CSRdouble& A, CSRdouble& BT_i, CSRdouble& B_j, double * T_ij, int lld_T, double * AB_sol_out) { double *BT_i_dense, *B_j_dense; assert(A.nrows == BT_i.ncols); BT_i_dense=(double *) calloc(BT_i.nrows * BT_i.ncols,sizeof(double)); if ( BT_i_dense == NULL ) { printf ( "unable to allocate memory for dense matrix BT_i (required: %ld bytes)\n", BT_i.nrows * BT_i.ncols * sizeof ( double ) ); return EXIT_FAILURE; } B_j_dense=(double *) calloc(B_j.nrows * B_j.ncols,sizeof(double)); if ( B_j_dense == NULL ) { printf ( "unable to allocate memory for dense matrix B_j (required: %ld bytes)\n", B_j.nrows * B_j.ncols * sizeof ( double ) ); return EXIT_FAILURE; } CSR2dense(BT_i,BT_i_dense); CSR2dense(B_j,B_j_dense); solveSystem(A, AB_sol_out,B_j_dense, 2, B_j.ncols); printf("Processor %d finished solving system AX=B\n",iam); dgemm_("N","N",&(BT_i.nrows),&(B_j.ncols),&(BT_i.ncols),&d_negone,BT_i_dense,&(BT_i.nrows), AB_sol_out,&(A.nrows),&d_one,T_ij,&lld_T ); if(BT_i_dense != NULL) free(BT_i_dense); BT_i_dense=NULL; if(B_j_dense != NULL) free(B_j_dense); B_j_dense=NULL; return 0; }