void HostMatrixCOO<ValueType>::AllocateCOO(const int nnz, const int nrow, const int ncol) { assert( nnz >= 0); assert( ncol >= 0); assert( nrow >= 0); if (this->get_nnz() > 0) this->Clear(); if (nnz > 0) { allocate_host(nnz, &this->mat_.row); allocate_host(nnz, &this->mat_.col); allocate_host(nnz, &this->mat_.val); set_to_zero_host(nnz, this->mat_.row); set_to_zero_host(nnz, this->mat_.col); set_to_zero_host(nnz, this->mat_.val); this->nrow_ = nrow; this->ncol_ = ncol; this->nnz_ = nnz; } }
void HostMatrixELL<ValueType>::AllocateELL(const int nnz, const int nrow, const int ncol, const int max_row) { assert( nnz >= 0); assert( ncol >= 0); assert( nrow >= 0); assert( max_row >= 0); if (this->nnz_ > 0) this->Clear(); if (nnz > 0) { assert(nnz == max_row * nrow); allocate_host(nnz, &this->mat_.val); allocate_host(nnz, &this->mat_.col); set_to_zero_host(nnz, this->mat_.val); set_to_zero_host(nnz, this->mat_.col); this->mat_.max_row = max_row; this->nrow_ = nrow; this->ncol_ = ncol; this->nnz_ = nnz; } }
void HostVector<ValueType>::Allocate(const int n) { assert(n >= 0); if (this->size_ >0) this->Clear(); if (n > 0) { allocate_host(n, &this->vec_); set_to_zero_host(n, this->vec_); this->size_ = n; } }
void HostVector<ValueType>::Assemble(const int *ii, const ValueType *v, int size, const int n) { assert(ii != NULL); assert(v != NULL); assert(size > 0); assert(n >= 0); _set_omp_backend_threads(this->local_backend_, this->size_); const int nThreads = omp_get_max_threads(); int N = n; if (N == 0) { #pragma omp parallel for for (int i=0; i<size; ++i) { assert(ii[i] >= 0); int val = ii[i]+1; if (val > N) { #pragma omp critical { N = val; } } } this->Clear(); this->Allocate(N); } if (nThreads <= 2) { // serial for (int i=0; i<size; ++i) this->vec_[ ii[i] ] += v[i]; } else { // parallel ValueType **v_red; v_red = (ValueType **) malloc(nThreads*sizeof(ValueType*)); for (int k=0; k<nThreads; ++k) { v_red[k] = NULL; allocate_host(N, &v_red[k]); set_to_zero_host(N, v_red[k]); } #pragma omp parallel { const int me = omp_get_thread_num(); const int istart = size*me/nThreads; const int iend = size*(me+1)/nThreads; for (int i = istart; i < iend; i++) v_red[me][ii[i]] += v[i]; } #pragma omp parallel { const int me = omp_get_thread_num(); const int istart = N*me/nThreads; const int iend = N*(me+1)/nThreads; for (int i = istart; i < iend; ++i) for (int k=0; k<nThreads; ++k) this->vec_[i] += v_red[k][i]; } for (int k=0; k<nThreads; ++k) free_host(&v_red[k]); free(v_red); } }