void HostMatrixELL<ValueType>::AllocateELL(const int nnz, const int nrow, const int ncol, const int max_row) {

  assert( nnz   >= 0);
  assert( ncol  >= 0);
  assert( nrow  >= 0);
  assert( max_row >= 0);

  if (this->nnz_ > 0)
    this->Clear();

  if (nnz > 0) {

    assert(nnz == max_row * nrow);

    allocate_host(nnz, &this->mat_.val);
    allocate_host(nnz, &this->mat_.col);

    set_to_zero_host(nnz, this->mat_.val);
    set_to_zero_host(nnz, this->mat_.col);

    this->mat_.max_row = max_row;
    this->nrow_ = nrow;
    this->ncol_ = ncol;
    this->nnz_  = nnz;

  }

}
Exemplo n.º 2
0
void HostMatrixCOO<ValueType>::AllocateCOO(const int nnz, const int nrow, const int ncol) {

  assert( nnz >= 0);
  assert( ncol >= 0);
  assert( nrow >= 0);

  if (this->get_nnz() > 0)
    this->Clear();

  if (nnz > 0) {

    allocate_host(nnz, &this->mat_.row);
    allocate_host(nnz, &this->mat_.col);
    allocate_host(nnz, &this->mat_.val);
 
    set_to_zero_host(nnz, this->mat_.row);
    set_to_zero_host(nnz, this->mat_.col);
    set_to_zero_host(nnz, this->mat_.val);
    
    this->nrow_ = nrow;
    this->ncol_ = ncol;
    this->nnz_  = nnz;
  }

}
Exemplo n.º 3
0
void HostVector<ValueType>::Allocate(const int n) {

  assert(n >= 0);

  if (this->size_ >0)
    this->Clear();

  if (n > 0) {

    allocate_host(n, &this->vec_);

    set_to_zero_host(n, this->vec_);

    this->size_ = n;

  }

}
bool HostMatrixCOO<ValueType>::PermuteBackward(const BaseVector<int> &permutation) {

    assert(&permutation != NULL);

    // symmetric permutation only
    assert( (permutation.get_size() == this->nrow_) &&
            (permutation.get_size() == this->ncol_) );

    const HostVector<int> *cast_perm = dynamic_cast<const HostVector<int>*> (&permutation);
    assert(cast_perm != NULL);

    HostMatrixCOO<ValueType> src(this->local_backend_);
    src.AllocateCOO(this->nnz_, this->nrow_, this->ncol_);
    src.CopyFrom(*this);

    _set_omp_backend_threads(this->local_backend_, this->nnz_);

    // TODO
    // Is there a better way?
    int *pb = NULL;
    allocate_host(this->nrow_, &pb);

    #pragma omp parallel for
    for (int i=0; i<this->nrow_; ++i)
        pb [ cast_perm->vec_[i] ] = i;

    #pragma omp parallel for
    for (int i=0; i<this->nnz_; ++i) {

        this->mat_.row[i] = pb[src.mat_.row[i]];
        this->mat_.col[i] = pb[src.mat_.col[i]];

    }

    free_host(&pb);

    return true;

}
Exemplo n.º 5
0
void MixedPrecisionDC<OperatorTypeH, VectorTypeH, ValueTypeH, 
                      OperatorTypeL, VectorTypeL, ValueTypeL>::Build(void) {

  if (this->build_ == true)
    this->Clear();

  assert(this->build_ == false);
  this->build_ = true;

  assert(this->Solver_L_ != NULL);
  assert(this->op_ != NULL);

  this->op_h_ = this->op_;

  assert(this->op_->get_nrow() == this->op_->get_ncol());
  assert(this->op_->get_nrow() > 0);

  assert(this->op_l_ == NULL);
  this->op_l_ = new OperatorTypeL;
        
  this->r_l_.Allocate("r_l", this->op_l_->get_nrow());  
  this->r_h_.Allocate("r_h", this->op_h_->get_nrow());

  this->d_h_.Allocate("d_h", this->op_h_->get_nrow());
  this->d_l_.Allocate("d_l", this->op_h_->get_nrow());
  
  // TODO - ugly
  // copy the matrix

  // CSR H
  int *row_offset = NULL;
  int *col = NULL;
  ValueTypeH *val_h = NULL;

  // CSR L
  ValueTypeL *val_l = NULL;

  allocate_host(this->op_h_->get_nrow()+1, &row_offset);
  allocate_host(this->op_h_->get_nnz(),    &col);
  allocate_host(this->op_h_->get_nnz(),    &val_l);
  allocate_host(this->op_h_->get_nnz(),    &val_h);
  
  this->op_h_->CopyToCSR(row_offset, col, val_h);

  for (int i=0; i<this->op_h_->get_nnz(); ++i)
    val_l[i] = ValueTypeL( val_h[i] );
  
  this->op_l_->SetDataPtrCSR(&row_offset, &col, &val_l,
                             "Low prec Matrix", 
                             this->op_h_->get_nnz(),
                             this->op_h_->get_nrow(),
                             this->op_h_->get_ncol());
  
  // free only the h prec values
  free_host(&val_h);

  this->Solver_L_->SetOperator(*this->op_l_);
  this->Solver_L_->Build();

  this->op_l_->MoveToAccelerator();
  this->Solver_L_->MoveToAccelerator();
}
void MultiColored<OperatorType, VectorType, ValueType>::Decompose_(void) {

  LOG_DEBUG(this, "MultiColored::Decompose_()",
            " * beging");

  if (this->decomp_ == true) {

    assert(this->num_blocks_ > 0);
    assert(this->block_sizes_ != NULL);

    int *offsets = NULL;
    allocate_host(this->num_blocks_+1, &offsets);    

    offsets[0] = 0 ;
    for(int i=0; i<this->num_blocks_; ++i)
      offsets[i+1] = this->block_sizes_[i];

    // sum up
    for(int i=0; i<this->num_blocks_; ++i)
      offsets[i+1] += offsets[i];

    this->diag_solver_ = new Solver<OperatorType, VectorType, ValueType>* [this->num_blocks_];

    this->preconditioner_block_ = new OperatorType** [this->num_blocks_]; 
    for (int i=0; i<this->num_blocks_; ++i) 
      this->preconditioner_block_[i] = new OperatorType* [this->num_blocks_];

    this->x_block_    = new VectorType* [this->num_blocks_];
    this->diag_block_ = new VectorType* [this->num_blocks_];

    for(int i=0; i<this->num_blocks_; ++i)
      for(int j=0; j<this->num_blocks_; ++j) {
        this->preconditioner_block_[i][j] = new LocalMatrix<ValueType>;
        this->preconditioner_block_[i][j]->CloneBackend( *this->op_ );
        
      }

    this->preconditioner_->ExtractSubMatrices(this->num_blocks_,
                                              this->num_blocks_,
                                              offsets,
                                              offsets,
                                              this->preconditioner_block_);

    free_host(&offsets);

    int x_offset = 0;
    for (int i=0; i<this->num_blocks_; ++i) {
      this->diag_block_[i] = new VectorType;
      this->diag_block_[i]->CloneBackend(*this->op_); // clone backend
      this->diag_block_[i]->Allocate("Diagonal preconditioners blocks",
                                     this->block_sizes_[i]);
      
      this->preconditioner_block_[i][i]->ExtractDiagonal(this->diag_block_[i]);

      
      this->x_block_[i] = new VectorType; // empty vector
      
      this->x_block_[i]->CloneBackend(*this->op_); // clone backend
      this->x_block_[i]->Allocate("MultiColored Preconditioner x_block_",
                                  this->block_sizes_[i]);
      
      
      x_offset += this->block_sizes_[i];

      Jacobi<OperatorType, VectorType, ValueType> *jacobi = new Jacobi<OperatorType, VectorType, ValueType>;
      jacobi->SetOperator(*this->preconditioner_block_[i][i]);
      jacobi->Build();
      
      this->diag_solver_[i] = jacobi;
      
      this->preconditioner_block_[i][i]->Clear();
    }
    
    // Clone the format 
    // e.g. the preconditioner block matrices will have the same format as this->op_
    if (this->op_mat_format_ == true)
      for(int i=0; i<this->num_blocks_; ++i)
        for(int j=0; j<this->num_blocks_; ++j)
          this->preconditioner_block_[i][j]->ConvertTo(this->precond_mat_format_);
  
  } else {

    this->diag_.CloneBackend(*this->op_);
    this->preconditioner_->ExtractDiagonal(&this->diag_);

  }

  this->x_.CloneBackend(*this->op_); 
  this->x_.Allocate("Permuted solution vector",
                    this->op_->get_nrow());

  LOG_DEBUG(this, "MultiColored::Decompose_()",
            " * end");

}
Exemplo n.º 7
0
void HostVector<ValueType>::Assemble(const int *ii, const ValueType *v,
                                     int size, const int n) {

  assert(ii != NULL);
  assert(v != NULL);
  assert(size > 0);
  assert(n >= 0);

  _set_omp_backend_threads(this->local_backend_, this->size_);
  const int nThreads = omp_get_max_threads();

  int N = n;

  if (N == 0) {

#pragma omp parallel for
    for (int i=0; i<size; ++i) {

      assert(ii[i] >= 0);

      int val = ii[i]+1;

      if (val > N) {
#pragma omp critical
{
        N = val;
}
      }

    }

    this->Clear();
    this->Allocate(N);

  }

  if (nThreads <= 2) {

    // serial
    for (int i=0; i<size; ++i)
      this->vec_[ ii[i] ] += v[i];

  } else {

    // parallel
    ValueType **v_red;

    v_red = (ValueType **) malloc(nThreads*sizeof(ValueType*));

    for (int k=0; k<nThreads; ++k) {

      v_red[k] = NULL;
      allocate_host(N, &v_red[k]);
      set_to_zero_host(N, v_red[k]);

    }

#pragma omp parallel
{
    const int me = omp_get_thread_num();
    const int istart = size*me/nThreads;
    const int iend = size*(me+1)/nThreads;

    for (int i = istart; i < iend; i++)
      v_red[me][ii[i]] += v[i];
}

#pragma omp parallel
{
    const int me = omp_get_thread_num();
    const int istart = N*me/nThreads;
    const int iend = N*(me+1)/nThreads;

    for (int i = istart; i < iend; ++i)
      for (int k=0; k<nThreads; ++k)
        this->vec_[i] += v_red[k][i];

}

    for (int k=0; k<nThreads; ++k)
      free_host(&v_red[k]);

    free(v_red);

  }

}