void HostMatrixELL<ValueType>::AllocateELL(const int nnz, const int nrow, const int ncol, const int max_row) { assert( nnz >= 0); assert( ncol >= 0); assert( nrow >= 0); assert( max_row >= 0); if (this->nnz_ > 0) this->Clear(); if (nnz > 0) { assert(nnz == max_row * nrow); allocate_host(nnz, &this->mat_.val); allocate_host(nnz, &this->mat_.col); set_to_zero_host(nnz, this->mat_.val); set_to_zero_host(nnz, this->mat_.col); this->mat_.max_row = max_row; this->nrow_ = nrow; this->ncol_ = ncol; this->nnz_ = nnz; } }
void HostMatrixCOO<ValueType>::AllocateCOO(const int nnz, const int nrow, const int ncol) { assert( nnz >= 0); assert( ncol >= 0); assert( nrow >= 0); if (this->get_nnz() > 0) this->Clear(); if (nnz > 0) { allocate_host(nnz, &this->mat_.row); allocate_host(nnz, &this->mat_.col); allocate_host(nnz, &this->mat_.val); set_to_zero_host(nnz, this->mat_.row); set_to_zero_host(nnz, this->mat_.col); set_to_zero_host(nnz, this->mat_.val); this->nrow_ = nrow; this->ncol_ = ncol; this->nnz_ = nnz; } }
void HostVector<ValueType>::Allocate(const int n) { assert(n >= 0); if (this->size_ >0) this->Clear(); if (n > 0) { allocate_host(n, &this->vec_); set_to_zero_host(n, this->vec_); this->size_ = n; } }
bool HostMatrixCOO<ValueType>::PermuteBackward(const BaseVector<int> &permutation) { assert(&permutation != NULL); // symmetric permutation only assert( (permutation.get_size() == this->nrow_) && (permutation.get_size() == this->ncol_) ); const HostVector<int> *cast_perm = dynamic_cast<const HostVector<int>*> (&permutation); assert(cast_perm != NULL); HostMatrixCOO<ValueType> src(this->local_backend_); src.AllocateCOO(this->nnz_, this->nrow_, this->ncol_); src.CopyFrom(*this); _set_omp_backend_threads(this->local_backend_, this->nnz_); // TODO // Is there a better way? int *pb = NULL; allocate_host(this->nrow_, &pb); #pragma omp parallel for for (int i=0; i<this->nrow_; ++i) pb [ cast_perm->vec_[i] ] = i; #pragma omp parallel for for (int i=0; i<this->nnz_; ++i) { this->mat_.row[i] = pb[src.mat_.row[i]]; this->mat_.col[i] = pb[src.mat_.col[i]]; } free_host(&pb); return true; }
void MixedPrecisionDC<OperatorTypeH, VectorTypeH, ValueTypeH, OperatorTypeL, VectorTypeL, ValueTypeL>::Build(void) { if (this->build_ == true) this->Clear(); assert(this->build_ == false); this->build_ = true; assert(this->Solver_L_ != NULL); assert(this->op_ != NULL); this->op_h_ = this->op_; assert(this->op_->get_nrow() == this->op_->get_ncol()); assert(this->op_->get_nrow() > 0); assert(this->op_l_ == NULL); this->op_l_ = new OperatorTypeL; this->r_l_.Allocate("r_l", this->op_l_->get_nrow()); this->r_h_.Allocate("r_h", this->op_h_->get_nrow()); this->d_h_.Allocate("d_h", this->op_h_->get_nrow()); this->d_l_.Allocate("d_l", this->op_h_->get_nrow()); // TODO - ugly // copy the matrix // CSR H int *row_offset = NULL; int *col = NULL; ValueTypeH *val_h = NULL; // CSR L ValueTypeL *val_l = NULL; allocate_host(this->op_h_->get_nrow()+1, &row_offset); allocate_host(this->op_h_->get_nnz(), &col); allocate_host(this->op_h_->get_nnz(), &val_l); allocate_host(this->op_h_->get_nnz(), &val_h); this->op_h_->CopyToCSR(row_offset, col, val_h); for (int i=0; i<this->op_h_->get_nnz(); ++i) val_l[i] = ValueTypeL( val_h[i] ); this->op_l_->SetDataPtrCSR(&row_offset, &col, &val_l, "Low prec Matrix", this->op_h_->get_nnz(), this->op_h_->get_nrow(), this->op_h_->get_ncol()); // free only the h prec values free_host(&val_h); this->Solver_L_->SetOperator(*this->op_l_); this->Solver_L_->Build(); this->op_l_->MoveToAccelerator(); this->Solver_L_->MoveToAccelerator(); }
void MultiColored<OperatorType, VectorType, ValueType>::Decompose_(void) { LOG_DEBUG(this, "MultiColored::Decompose_()", " * beging"); if (this->decomp_ == true) { assert(this->num_blocks_ > 0); assert(this->block_sizes_ != NULL); int *offsets = NULL; allocate_host(this->num_blocks_+1, &offsets); offsets[0] = 0 ; for(int i=0; i<this->num_blocks_; ++i) offsets[i+1] = this->block_sizes_[i]; // sum up for(int i=0; i<this->num_blocks_; ++i) offsets[i+1] += offsets[i]; this->diag_solver_ = new Solver<OperatorType, VectorType, ValueType>* [this->num_blocks_]; this->preconditioner_block_ = new OperatorType** [this->num_blocks_]; for (int i=0; i<this->num_blocks_; ++i) this->preconditioner_block_[i] = new OperatorType* [this->num_blocks_]; this->x_block_ = new VectorType* [this->num_blocks_]; this->diag_block_ = new VectorType* [this->num_blocks_]; for(int i=0; i<this->num_blocks_; ++i) for(int j=0; j<this->num_blocks_; ++j) { this->preconditioner_block_[i][j] = new LocalMatrix<ValueType>; this->preconditioner_block_[i][j]->CloneBackend( *this->op_ ); } this->preconditioner_->ExtractSubMatrices(this->num_blocks_, this->num_blocks_, offsets, offsets, this->preconditioner_block_); free_host(&offsets); int x_offset = 0; for (int i=0; i<this->num_blocks_; ++i) { this->diag_block_[i] = new VectorType; this->diag_block_[i]->CloneBackend(*this->op_); // clone backend this->diag_block_[i]->Allocate("Diagonal preconditioners blocks", this->block_sizes_[i]); this->preconditioner_block_[i][i]->ExtractDiagonal(this->diag_block_[i]); this->x_block_[i] = new VectorType; // empty vector this->x_block_[i]->CloneBackend(*this->op_); // clone backend this->x_block_[i]->Allocate("MultiColored Preconditioner x_block_", this->block_sizes_[i]); x_offset += this->block_sizes_[i]; Jacobi<OperatorType, VectorType, ValueType> *jacobi = new Jacobi<OperatorType, VectorType, ValueType>; jacobi->SetOperator(*this->preconditioner_block_[i][i]); jacobi->Build(); this->diag_solver_[i] = jacobi; this->preconditioner_block_[i][i]->Clear(); } // Clone the format // e.g. the preconditioner block matrices will have the same format as this->op_ if (this->op_mat_format_ == true) for(int i=0; i<this->num_blocks_; ++i) for(int j=0; j<this->num_blocks_; ++j) this->preconditioner_block_[i][j]->ConvertTo(this->precond_mat_format_); } else { this->diag_.CloneBackend(*this->op_); this->preconditioner_->ExtractDiagonal(&this->diag_); } this->x_.CloneBackend(*this->op_); this->x_.Allocate("Permuted solution vector", this->op_->get_nrow()); LOG_DEBUG(this, "MultiColored::Decompose_()", " * end"); }
void HostVector<ValueType>::Assemble(const int *ii, const ValueType *v, int size, const int n) { assert(ii != NULL); assert(v != NULL); assert(size > 0); assert(n >= 0); _set_omp_backend_threads(this->local_backend_, this->size_); const int nThreads = omp_get_max_threads(); int N = n; if (N == 0) { #pragma omp parallel for for (int i=0; i<size; ++i) { assert(ii[i] >= 0); int val = ii[i]+1; if (val > N) { #pragma omp critical { N = val; } } } this->Clear(); this->Allocate(N); } if (nThreads <= 2) { // serial for (int i=0; i<size; ++i) this->vec_[ ii[i] ] += v[i]; } else { // parallel ValueType **v_red; v_red = (ValueType **) malloc(nThreads*sizeof(ValueType*)); for (int k=0; k<nThreads; ++k) { v_red[k] = NULL; allocate_host(N, &v_red[k]); set_to_zero_host(N, v_red[k]); } #pragma omp parallel { const int me = omp_get_thread_num(); const int istart = size*me/nThreads; const int iend = size*(me+1)/nThreads; for (int i = istart; i < iend; i++) v_red[me][ii[i]] += v[i]; } #pragma omp parallel { const int me = omp_get_thread_num(); const int istart = N*me/nThreads; const int iend = N*(me+1)/nThreads; for (int i = istart; i < iend; ++i) for (int k=0; k<nThreads; ++k) this->vec_[i] += v_red[k][i]; } for (int k=0; k<nThreads; ++k) free_host(&v_red[k]); free(v_red); } }