Beispiel #1
0
__global__ void kernel_ell_add_spmv(const IndexType num_rows,
                                    const IndexType num_cols,
                                    const IndexType num_cols_per_row,
                                    const IndexType *Acol,
                                    const ValueType *Aval,
                                    const ValueType scalar,
                                    const ValueType *x,
                                    ValueType *y) {

    int row = blockDim.x * blockIdx.x + threadIdx.x;

    if (row < num_rows) {

        ValueType sum = ValueType(0.0);

        for (IndexType n=0; n<num_cols_per_row; ++n) {

            const IndexType ind = ELL_IND(row, n, num_rows, num_cols_per_row);
            const IndexType col = Acol[ind];

            if ((col >= 0) && (col < num_cols))  {

                sum += Aval[ind] * x[col];

            }

        }

        y[row] += scalar*sum;

    }

}
void HostMatrixELL<ValueType>::Apply(const BaseVector<ValueType> &in, BaseVector<ValueType> *out) const {

  if (this->nnz_ > 0) {

    assert(in.  get_size() >= 0);
    assert(out->get_size() >= 0);
    assert(in.  get_size() == this->ncol_);
    assert(out->get_size() == this->nrow_);

    const HostVector<ValueType> *cast_in = dynamic_cast<const HostVector<ValueType>*> (&in);
    HostVector<ValueType> *cast_out      = dynamic_cast<      HostVector<ValueType>*> (out);

    assert(cast_in != NULL);
    assert(cast_out!= NULL);

    _set_omp_backend_threads(this->local_backend_, this->nrow_);

#pragma omp parallel for
    for (int ai=0; ai<this->nrow_; ++ai) {
      ValueType sum = ValueType(0.0);

      for (int n=0; n<this->mat_.max_row; ++n) {

        int aj = ELL_IND(ai, n, this->nrow_, this->mat_.max_row);
        int col_aj = this->mat_.col[aj];

        if (col_aj >= 0)
          sum += this->mat_.val[aj] * cast_in->vec_[col_aj];
        else
          break;

      }

      cast_out->vec_[ai] = sum;

    }

  }

}