__global__ void kernel_ell_add_spmv(const IndexType num_rows, const IndexType num_cols, const IndexType num_cols_per_row, const IndexType *Acol, const ValueType *Aval, const ValueType scalar, const ValueType *x, ValueType *y) { int row = blockDim.x * blockIdx.x + threadIdx.x; if (row < num_rows) { ValueType sum = ValueType(0.0); for (IndexType n=0; n<num_cols_per_row; ++n) { const IndexType ind = ELL_IND(row, n, num_rows, num_cols_per_row); const IndexType col = Acol[ind]; if ((col >= 0) && (col < num_cols)) { sum += Aval[ind] * x[col]; } } y[row] += scalar*sum; } }
void HostMatrixELL<ValueType>::Apply(const BaseVector<ValueType> &in, BaseVector<ValueType> *out) const { if (this->nnz_ > 0) { assert(in. get_size() >= 0); assert(out->get_size() >= 0); assert(in. get_size() == this->ncol_); assert(out->get_size() == this->nrow_); const HostVector<ValueType> *cast_in = dynamic_cast<const HostVector<ValueType>*> (&in); HostVector<ValueType> *cast_out = dynamic_cast< HostVector<ValueType>*> (out); assert(cast_in != NULL); assert(cast_out!= NULL); _set_omp_backend_threads(this->local_backend_, this->nrow_); #pragma omp parallel for for (int ai=0; ai<this->nrow_; ++ai) { ValueType sum = ValueType(0.0); for (int n=0; n<this->mat_.max_row; ++n) { int aj = ELL_IND(ai, n, this->nrow_, this->mat_.max_row); int col_aj = this->mat_.col[aj]; if (col_aj >= 0) sum += this->mat_.val[aj] * cast_in->vec_[col_aj]; else break; } cast_out->vec_[ai] = sum; } } }