示例#1
0
Int SolverAztecOO::solveSystem ( const vector_type& rhsFull,
                                 vector_type&       solution,
                                 matrix_ptrtype&    baseMatrixForPreconditioner )

{

    bool retry ( true );

    LifeChrono chrono;

    M_displayer->leaderPrint ( "SLV-  Setting up the solver ...                \n" );

    if ( baseMatrixForPreconditioner.get() == 0 )
    {
        M_displayer->leaderPrint ( "SLV-  Warning: baseMatrixForPreconditioner is empty     \n" );
    }

    if ( !isPreconditionerSet() || !M_reusePreconditioner  )
    {
        buildPreconditioner ( baseMatrixForPreconditioner );
        // do not retry if I am recomputing the preconditioner
        retry = false;
    }
    else
    {
        M_displayer->leaderPrint ( "SLV-  Reusing precond ...                 \n" );
    }

    Int numIter = solveSystem ( rhsFull, solution, M_preconditioner );

    // If we do not want to retry, return now.
    // otherwise rebuild the preconditioner and solve again:
    if ( numIter < 0  && retry )
    {
        chrono.start();

        M_displayer->leaderPrint ( "SLV-  Iterative solver failed, numiter = " , - numIter );
        M_displayer->leaderPrint ( "SLV-  maxIterSolver = " , M_maxIter );
        M_displayer->leaderPrint ( "SLV-  retrying:          " );

        buildPreconditioner ( baseMatrixForPreconditioner );

        chrono.stop();
        M_displayer->leaderPrintMax ( "done in " , chrono.diff() );
        // Solving again, but only once (retry = false)
        numIter = solveSystem ( rhsFull, solution, M_preconditioner );

        if ( numIter < 0 )
        {
            M_displayer->leaderPrint ( " ERROR: Iterative solver failed again.\n" );
        }
    }

    if ( std::abs (numIter) > M_maxIterForReuse )
    {
        resetPreconditioner();
    }

    return numIter;
}
示例#2
0
double LSPredictionComputer::compute(const Point3i& currentPos, Context* context) {
	this->context = context;
	if(!context->getTrainingregion().getNumberOfElements()) { // first 4 pixels / 8 voxels in image
		context->contextOf(currentPos, *coefficients); // misuse coefficients vector to store neighbors for variance computation
		if(coefficients->cols > 1) // not the first pixel
			return coefficients->at<double>(coefficients->cols - 1) = mean(coefficients->colRange(0, coefficients->cols - 1))[0];
		else return (1.0 + predictor->getMaxval()) / 2.0; // first pixel of image
	}
	estimate(currentPos);
	coefficients->create(covMat->rows + 1, 2, CV_64F); // reset to maximum size (should not need memory re-allocation)
	*coefficients = coefficients->rowRange(0, covMat->rows); // set used region
	if(border_regularization != 0.0) { // Tikhonov regularization for border and for inner pixels
		if(context->isBorder()) { // border
			covMat->diag() += Scalar(border_regularization);
			solveSystem();
			covMat->col(covMat->cols - 2) += coefficients->col(0) * border_regularization; // correct RHS for variance estimation
		} else if(inner_regularization != 0.0) { // inner
			covMat->diag() += Scalar(inner_regularization);
			solveSystem();
			covMat->col(covMat->cols - 2) += coefficients->col(0) * inner_regularization; // correct RHS for variance estimation
		} else solveSystem();
	} else solveSystem();
	double prediction = covMat->col(covMat->cols - 1).dot(coefficients->col(0)); // linear prediction using dot product
	return (prediction < 0.0 ? 0.0 : (prediction > predictor->getMaxval() ? predictor->getMaxval() : prediction)); // crop to valid value range
} // end LSPredictionComputer::compute
示例#3
0
void getSpline(float *curve, float control_X, float control_Y) {
	double derivatives[3];
	ControlPoint points[3];
	points[0].x = 0.0f;
	points[0].y = 0.0f;
	points[1].x = control_X;
	points[1].y = control_Y;
	points[2].x = 1.0f;
	points[2].y = 1.0f;
    
	solveSystem(derivatives, points);
	int i;
	int start = 0;
	if (points[0].x != 0) {
		start = (int) (points[0].x * 256);
	}
	for (i = 0; i < start; i++) {
		curve[i] = 1.0f - points[0].y;
	}
	for (i = start; i < 256; i++) {
		ControlPoint cur;
		ControlPoint next;
		double x = i / 256.0;
		int pivot = 0;
		int j;
		for (j = 0; j < 3 - 1; j++) {
			if (x >= points[j].x && x <= points[j + 1].x) {
				pivot = j;
			}
		}
		cur = points[pivot];
		next = points[pivot + 1];
		if (x <= next.x) {
			double x1 = cur.x;
			double x2 = next.x;
			double y1 = cur.y;
			double y2 = next.y;
            
			double delta = (x2 - x1);
			double delta2 = delta * delta;
			double b = (x - x1) / delta;
			double a = 1 - b;
			double ta = a * y1;
			double tb = b * y2;
			double tc = (a * a * a - a) * derivatives[pivot];
			double td = (b * b * b - b) * derivatives[pivot + 1];
			double y = ta + tb + (delta2 / 6) * (tc + td);
			if (y > 1.0f) {
				y = 1.0f;
			}
			if (y < 0) {
				y = 0;
			}
			curve[i] = (float) (1.0f - y);
		} else {
			curve[i] = 1.0f - next.y;
		}
	}
}
示例#4
0
文件: matrix.c 项目: rforge/blme
// Q = (I - A)(I + A)^{-1}
void getCayleyTransform(const double *source, int dim, double *target)
{
  double temp1[dim * dim];
  double temp2[dim * dim];
  double temp3[dim * dim];
  
  double *plusMatrix     = temp1;          // temp1 in use; I+A
  double *minusMatrix    = temp2;          // temp2 in use; I-A
  double *identity       = target;         //               I
  
  // copy in I+A to lower triangle for inverse, and set scratch to I,
  // copy I-A to temp
  int offset = 0;
  for (int col = 0; col < dim; ++col) {
    offset = col * (dim + 1);
    
    identity[offset] = 1.0;
    plusMatrix[offset] = 1.0;
    minusMatrix[offset] = 1.0;
    ++offset;

    for (int row = col + 1; row < dim; ++row) {
      identity[offset] = identity[col + row * dim] = 0.0;
      
      plusMatrix[offset] = source[offset];
      plusMatrix[col + row * dim] = source[col + row * dim];
      
      minusMatrix[offset] = -source[offset];
      minusMatrix[col + row * dim] = -source[col + row * dim];
      
      ++offset;
    }
  }
  
  double *inverseMatrix  = temp3;           // temp3 in use; (I+Q)^{-1}
  
  int lapackResult = solveSystem(plusMatrix, dim, identity, dim, inverseMatrix);
                                            // temp1 free
  
  if (lapackResult != 0) {
    if (lapackResult < 0) {
      error("error in call to LAPACK routine 'dgesvx': argument %d illegal", -lapackResult);
    } else if (lapackResult <= dim ){
      error("error in call to LAPACK routine 'dgesvx': factor U(%d) is exactly 0 so that U is singular", lapackResult);
    } else {
      error("error in call to LAPACK routine 'dgesvx': reciprocal condition estimate below machine tolerance", lapackResult);
    }
  }
  
  multiplyMatrices(minusMatrix, dim, dim,      // temp2, temp3 free
                   inverseMatrix, dim, dim,
                   target);
}
bool
CCDivGradHypreLevelSolver::solveSystem(
    SAMRAIVectorReal<NDIM,double>& x,
    SAMRAIVectorReal<NDIM,double>& b)
{
    IBTK_TIMER_START(t_solve_system);

    if (d_enable_logging) plog << d_object_name << "::solveSystem():" << std::endl;

    // Initialize the solver, when necessary.
    const bool deallocate_after_solve = !d_is_initialized;
    if (deallocate_after_solve) initializeSolverState(x,b);

    // Solve the system using the hypre solver.
    static const int comp = 0;
    const int x_idx = x.getComponentDescriptorIndex(comp);
    const int b_idx = b.getComponentDescriptorIndex(comp);

    bool converged = true;
    IntVector<NDIM> chkbrd_mode_id;
#if (NDIM > 2)
    for (chkbrd_mode_id(2) = 0; chkbrd_mode_id(2) < 2; ++chkbrd_mode_id(2))
    {
#endif
        for (chkbrd_mode_id(1) = 0; chkbrd_mode_id(1) < 2; ++chkbrd_mode_id(1))
        {
            for (chkbrd_mode_id(0) = 0; chkbrd_mode_id(0) < 2; ++chkbrd_mode_id(0))
            {
                bool converged_mode = solveSystem(x_idx, b_idx, chkbrd_mode_id);
                if (d_enable_logging)
                {
                    plog << d_object_name << "::solveSystem(): solver " << (converged_mode ? "converged" : "diverged") << "\n"
                         << "chkbrd_mode_id = " << chkbrd_mode_id << "\n"
                         << "iterations = " << d_current_its << "\n"
                         << "residual norm = " << d_current_residual_norm << std::endl;
                }
                converged = converged && converged_mode;
            }
        }
#if (NDIM > 2)
    }
#endif

    // Deallocate the solver, when necessary.
    if (deallocate_after_solve) deallocateSolverState();

    IBTK_TIMER_STOP(t_solve_system);
    return converged;
}// solveSystem
  void ParallelCGCudaTask<N, T>::execute(const Thread* caller){
    /*Zero range, skip allocation/computation*/
    if(cmat->getMRange(TID)->range == 0)
      return;

    if(TID == 0){
      Vector<T>::mul(*r, *b, *b);
      bnorm = Sqrt(r->sum());
    }
    caller->sync();

    switch(subTask){
    case Allocate:
#if 1
      try{
        /*Try to allocate all the memory needed. If this fails,
          deallocate and throw an exception*/
        allocate(caller);
      }catch(CUDAException& e){
        std::cerr << e.getError();
        valid = false;
        deallocate(caller);
        throw;
      }catch(Exception& e){
        std::cerr << e.getError();
        throw;
      }
#else
      allocate(caller);
#endif
      break;
    case Deallocate:
      deallocate(caller);
      break;
    case CopyResult:
      copyResult(caller);
      break;
    case UpdateBlocks:
      updateBlocks(caller);
      break;
    case SolveSystem:
      solveSystem(caller);
      break;
    }
  }
示例#7
0
文件: matrix_test.c 项目: rforge/blme
static int solveSystem_test() {
  // a rotation matrix
  double leftHandSide[] = {  0.987576802050028, -0.0130012146128751, -0.156598302900223,
                            -0.0359106006778125, -0.988872097570814, -0.144368983527825,
                            -0.152978720126685, 0.148198998189896, -0.977054025181777 };
  
  
  double correctAnswer[] = {  0.68340774192092, 0.617022240899081, -0.40316039594569,
                             -0.214071757425697, 0.0997551565880077, -0.314052969735081 };

  int arrayLength = testMatrix3Rows * testMatrix3Columns;
  double rightHandSide[arrayLength];
  for (int i = 0; i < arrayLength; ++i) rightHandSide[i] = testMatrix3[i];

  double result[arrayLength];
  
  solveSystem(leftHandSide, testMatrix3Rows, rightHandSide, testMatrix3Columns, result);
  
  return (allApproximatelyEqual(correctAnswer, result, testMatrix3Rows * testMatrix3Columns, TEST_TOLERANCE));
}
示例#8
0
/**
 * @brief BT_i and B_j are converted to dense matrices in each process to solve the sparse system AX=B_j and afterwards do BT_i * X. X is stored as a dense matrix in AB_sol
 *
 * @param A Sparse (0,0)-block of which we want to compute the Schur complement in matrix C
 * @param BT_i Sparse (1,0)-block of C corresponding to T_ij.
 * @param B_j Sparse (0,1)-block of C corresponding to T_ij
 * @param T_ij Dense (1,1)-block of C
 * @param lld_T local leading dimension of T_ij
 * @param AB_sol_out Dense solution of AX=B_j (output)
 * @return int
 **/
int make_Sij_parallel_denseB(CSRdouble& A, CSRdouble& BT_i, CSRdouble& B_j, double * T_ij, int lld_T, double * AB_sol_out) {

    double *BT_i_dense, *B_j_dense;

    assert(A.nrows == BT_i.ncols);

    BT_i_dense=(double *) calloc(BT_i.nrows * BT_i.ncols,sizeof(double));
    if ( BT_i_dense == NULL ) {
        printf ( "unable to allocate memory for dense matrix BT_i  (required: %ld bytes)\n", BT_i.nrows * BT_i.ncols * sizeof ( double ) );
        return EXIT_FAILURE;
    }
    B_j_dense=(double *) calloc(B_j.nrows * B_j.ncols,sizeof(double));
    if ( B_j_dense == NULL ) {
        printf ( "unable to allocate memory for dense matrix B_j  (required: %ld bytes)\n", B_j.nrows * B_j.ncols * sizeof ( double ) );
        return EXIT_FAILURE;
    }

    CSR2dense(BT_i,BT_i_dense);
    CSR2dense(B_j,B_j_dense);

    solveSystem(A, AB_sol_out,B_j_dense, 2, B_j.ncols);

    printf("Processor %d finished solving system AX=B\n",iam);

    dgemm_("N","N",&(BT_i.nrows),&(B_j.ncols),&(BT_i.ncols),&d_negone,BT_i_dense,&(BT_i.nrows),
           AB_sol_out,&(A.nrows),&d_one,T_ij,&lld_T
          );
    if(BT_i_dense != NULL)
        free(BT_i_dense);
    BT_i_dense=NULL;
    if(B_j_dense != NULL)
        free(B_j_dense);
    B_j_dense=NULL;

    return 0;
}