Пример #1
0
void
Scaler::DownScaleBilinear(intType fromRow, int32 toRow)
{
	BBitmap* src;
	BBitmap* dest;
	intType srcW, srcH;
	intType destW, destH;
	intType x, y;
	const uchar* srcBits;
	uchar* destBits;
	intType srcBPR, destBPR;
	const uchar* srcData;
	uchar* destDataRow;
	uchar* destData;
	const int32 kBPP = 4;
	DownScaleColumnData* columnData;

	src = GetSrcImage();
	dest = fScaledImage;

	srcW = src->Bounds().IntegerWidth();
	srcH = src->Bounds().IntegerHeight();
	destW = dest->Bounds().IntegerWidth();
	destH = dest->Bounds().IntegerHeight();

	srcBits = (uchar*)src->Bits();
	destBits = (uchar*)dest->Bits();
	srcBPR = src->BytesPerRow();
	destBPR = dest->BytesPerRow();

	destDataRow = destBits + fromRow * destBPR;

	const float deltaX = (srcW + 1.0) / (destW + 1.0);
	const float deltaY = (srcH + 1.0) / (destH + 1.0);
	const float deltaXY = deltaX * deltaY;

	columnData = new DownScaleColumnData[destW+1];
	DownScaleColumnData* cd = columnData;
	for (x = 0; x <= destW; x ++, cd ++) {
		const float fFromX = x * deltaX;
		const float fToX = fFromX + deltaX;

		cd->from = (intType)fFromX;
		cd->to = (intType)fToX;

		cd->alpha0 = 1.0 - (fFromX - cd->from);
		cd->alpha1 = fToX - cd->to;
	}

	for (y = fromRow; IsRunning() && y <= toRow; y ++, destDataRow += destBPR) {
		const float fFromY = y * deltaY;
		const float fToY = fFromY + deltaY;

		const intType fromY = (intType)fFromY;
		const intType toY = (intType)fToY;

		const float a0Y = 1.0 - (fFromY - fromY);
		const float a1Y = fToY - toY;

		const uchar* srcDataRow = srcBits + fromY * srcBPR;
		destData = destDataRow;

		cd = columnData;
		for (x = 0; x <= destW; x ++, destData += kBPP, cd ++) {
			const intType fromX = cd->from;
			const intType toX = cd->to;

			const float a0X = cd->alpha0;
			const float a1X = cd->alpha1;

			srcData = srcDataRow + fromX * kBPP;

			float totalSum[3];
			float sum[3];

			RowValues(sum, srcData, srcW, fromX, toX, a0X, a1X, kBPP);
			totalSum[0] = a0Y * sum[0];
			totalSum[1] = a0Y * sum[1];
			totalSum[2] = a0Y * sum[2];

			srcData += srcBPR;

			for (int32 r = fromY+1; r < toY; r ++, srcData += srcBPR) {
				RowValues(sum, srcData, srcW, fromX, toX, a0X, a1X, kBPP);
				totalSum[0] += sum[0];
				totalSum[1] += sum[1];
				totalSum[2] += sum[2];
			}

			if (toY <= srcH) {
				RowValues(sum, srcData, srcW, fromX, toX, a0X, a1X, kBPP);
				totalSum[0] += a1Y * sum[0];
				totalSum[1] += a1Y * sum[1];
				totalSum[2] += a1Y * sum[2];
			}

			destData[0] = static_cast<uchar>(totalSum[0] / deltaXY);
			destData[1] = static_cast<uchar>(totalSum[1] / deltaXY);
			destData[2] = static_cast<uchar>(totalSum[2] / deltaXY);
		}
	}

	delete[] columnData;
}
Пример #2
0
//==========================================================================
int Ifpack_ICT::Compute() 
{
  if (!IsInitialized()) 
    IFPACK_CHK_ERR(Initialize());

  Time_.ResetStartTime();
  IsComputed_ = false;

  NumMyRows_ = A_.NumMyRows();
  int Length = A_.MaxNumEntries();
  vector<int>    RowIndices(Length);
  vector<double> RowValues(Length);

  bool distributed = (Comm().NumProc() > 1)?true:false;

  if (distributed)
  {
    SerialComm_ = Teuchos::rcp(new Epetra_SerialComm);
    SerialMap_ = Teuchos::rcp(new Epetra_Map(NumMyRows_, 0, *SerialComm_));
    assert (SerialComm_.get() != 0);
    assert (SerialMap_.get() != 0);
  }
  else
    SerialMap_ = Teuchos::rcp(const_cast<Epetra_Map*>(&A_.RowMatrixRowMap()), false);

  int RowNnz;
#ifdef IFPACK_FLOPCOUNTERS
  double flops = 0.0;
#endif

  H_ = Teuchos::rcp(new Epetra_CrsMatrix(Copy,*SerialMap_,0));
  if (H_.get() == 0)
    IFPACK_CHK_ERR(-5); // memory allocation error

  // get A(0,0) element and insert it (after sqrt)
  IFPACK_CHK_ERR(A_.ExtractMyRowCopy(0,Length,RowNnz,
                                     &RowValues[0],&RowIndices[0]));

  // skip off-processor elements
  if (distributed)
  {
    int count = 0;
    for (int i = 0 ;i < RowNnz ; ++i) 
    {
      if (RowIndices[i] < NumMyRows_){
        RowIndices[count] = RowIndices[i];
        RowValues[count] = RowValues[i];
        ++count;
      }
      else
        continue;
    }
    RowNnz = count;
  }

  // modify diagonal
  double diag_val = 0.0;
  for (int i = 0 ;i < RowNnz ; ++i) {
    if (RowIndices[i] == 0) {
      double& v = RowValues[i];
      diag_val = AbsoluteThreshold() * EPETRA_SGN(v) +
        RelativeThreshold() * v;
      break;
    }
  }

  diag_val = sqrt(diag_val);
  int diag_idx = 0;
  EPETRA_CHK_ERR(H_->InsertGlobalValues(0,1,&diag_val, &diag_idx));

  // The 10 is just a small constant to limit collisons as the actual keys
  // we store are the indices and not integers
  // [0..A_.MaxNumEntries()*LevelofFill()].
  Ifpack_HashTable Hash( 10 * A_.MaxNumEntries() * LevelOfFill(), 1);

  // start factorization for line 1
  for (int row_i = 1 ; row_i < NumMyRows_ ; ++row_i) {

    // get row `row_i' of the matrix
    IFPACK_CHK_ERR(A_.ExtractMyRowCopy(row_i,Length,RowNnz,
                                       &RowValues[0],&RowIndices[0]));

    // skip off-processor elements
    if (distributed)
    {
      int count = 0;
      for (int i = 0 ;i < RowNnz ; ++i) 
      {
        if (RowIndices[i] < NumMyRows_){
          RowIndices[count] = RowIndices[i];
          RowValues[count] = RowValues[i];
          ++count;
        }
        else
          continue;
      }
      RowNnz = count;
    }

    // number of nonzeros in this row are defined as the nonzeros
    // of the matrix, plus the level of fill 
    int LOF = (int)(LevelOfFill() * RowNnz);
    if (LOF == 0) LOF = 1;

    // convert line `row_i' into hash for fast access
    Hash.reset();

    double h_ii = 0.0;
    for (int i = 0 ; i < RowNnz ; ++i) {
      if (RowIndices[i] == row_i) {
        double& v = RowValues[i];
        h_ii = AbsoluteThreshold() * EPETRA_SGN(v) + RelativeThreshold() * v;
      }
      else if (RowIndices[i] < row_i)
      {
        Hash.set(RowIndices[i], RowValues[i], true);
      }
    }
      
    // form element (row_i, col_j)
    // I start from the first row that has a nonzero column
    // index in row_i.
    for (int col_j = RowIndices[0] ; col_j < row_i ; ++col_j) {

      double h_ij = 0.0, h_jj = 0.0;
      // note: get() returns 0.0 if col_j is not found
      h_ij = Hash.get(col_j);

      // get pointers to row `col_j'
      int* ColIndices;
      double* ColValues;
      int ColNnz;
      H_->ExtractGlobalRowView(col_j, ColNnz, ColValues, ColIndices);

      for (int k = 0 ; k < ColNnz ; ++k) {
        int col_k = ColIndices[k];

        if (col_k == col_j)
          h_jj = ColValues[k];
        else {
          double xxx = Hash.get(col_k);
          if (xxx != 0.0)
          {
            h_ij -= ColValues[k] * xxx;
#ifdef IFPACK_FLOPCOUNTERS
            flops += 2.0;
#endif
          }
        }
      }

      h_ij /= h_jj;

      if (IFPACK_ABS(h_ij) > DropTolerance_)
      {
        Hash.set(col_j, h_ij);
      }
    
#ifdef IFPACK_FLOPCOUNTERS
      // only approx
      ComputeFlops_ += 2.0 * flops + 1.0;
#endif
    }

    int size = Hash.getNumEntries();

    vector<double> AbsRow(size);
    int count = 0;
    
    // +1 because I use the extra position for diagonal in insert
    vector<int> keys(size + 1);
    vector<double> values(size + 1);

    Hash.arrayify(&keys[0], &values[0]);

    for (int i = 0 ; i < size ; ++i)
    {
      AbsRow[i] = IFPACK_ABS(values[i]);
    }
    count = size;

    double cutoff = 0.0;
    if (count > LOF) {
      nth_element(AbsRow.begin(), AbsRow.begin() + LOF, AbsRow.begin() + count, 

		  std::greater<double>());
      cutoff = AbsRow[LOF];
    }

    for (int i = 0 ; i < size ; ++i)
    {
      h_ii -= values[i] * values[i];
    }

    if (h_ii < 0.0) h_ii = 1e-12;;

    h_ii = sqrt(h_ii);

#ifdef IFPACK_FLOPCOUNTERS
    // only approx, + 1 == sqrt
    ComputeFlops_ += 2 * size + 1;
#endif

    double DiscardedElements = 0.0;

    count = 0;
    for (int i = 0 ; i < size ; ++i)    
    { 
      if (IFPACK_ABS(values[i]) > cutoff)
      {
        values[count] = values[i];
        keys[count] = keys[i];
        ++count;
      }
      else  
        DiscardedElements += values[i];
    }

    if (RelaxValue() != 0.0) {
      DiscardedElements *= RelaxValue();
      h_ii += DiscardedElements;
    }

    values[count] = h_ii;
    keys[count] = row_i;
    ++count;

    H_->InsertGlobalValues(row_i, count, &(values[0]), (int*)&(keys[0]));
  }

  IFPACK_CHK_ERR(H_->FillComplete());

#if 0
  // to check the complete factorization
  Epetra_Vector LHS(Matrix().RowMatrixRowMap());
  Epetra_Vector RHS1(Matrix().RowMatrixRowMap());
  Epetra_Vector RHS2(Matrix().RowMatrixRowMap());
  Epetra_Vector RHS3(Matrix().RowMatrixRowMap());
  LHS.Random();

  Matrix().Multiply(false,LHS,RHS1);
  H_->Multiply(true,LHS,RHS2);
  H_->Multiply(false,RHS2,RHS3);

  RHS1.Update(-1.0, RHS3, 1.0);
  cout << endl;
  cout << RHS1;
#endif
  int MyNonzeros = H_->NumGlobalNonzeros();
  Comm().SumAll(&MyNonzeros, &GlobalNonzeros_, 1);

  IsComputed_ = true;
#ifdef IFPACK_FLOPCOUNTERS
  double TotalFlops; // sum across all the processors
  A_.Comm().SumAll(&flops, &TotalFlops, 1);
  ComputeFlops_ += TotalFlops;
#endif
  ++NumCompute_;
  ComputeTime_ += Time_.ElapsedTime();

  return(0);

}