Esempio n. 1
0
static void rowSums(TMat<TNumMat> mat, Vec<TNumVec> vec, int nthreads){
	if (mat.nrow != vec.len) throw std::invalid_argument("provided vector has invalid length");

	int nrow = mat.nrow;
	int ncol = mat.ncol;
	
	#pragma omp parallel num_threads(std::max(1, nthreads))
	{
		std::vector<TNumVec> acc(nrow, 0);
		TNumVec* accBegin = acc.data();
		#pragma omp for schedule(static) nowait
		for (int col = 0; col < ncol; ++col){
			TNumMat* matCol = mat.colptr(col);
			TNumVec* accIter = accBegin;
			for (int row = 0; row < nrow; ++row){//this loop should be unrolled...
				*accIter++ += *matCol++;
			}
		}
		#pragma omp critical
		{
			for (int row = 0; row < nrow; ++row){
				vec[row] += acc[row];
			}
		}
	}
}
Esempio n. 2
0
static void colSums(TMat<TNumMat> mat, Vec<TNumVec> vec, int nthreads){
	if (mat.ncol != vec.len) throw std::invalid_argument("provided vector has invalid length");

	TNumVec*  cs = vec.ptr;
	int nrow = mat.nrow;
	int ncol = mat.ncol;
	
	#pragma omp parallel for schedule(static) num_threads(std::max(1, nthreads))
	for (int col = 0; col < ncol; ++col){
		TNumMat* ptr = mat.colptr(col);
		TNumMat tmp = 0;
		for (int row = 0; row < nrow; ++row){
			tmp += *ptr++;
		}
		cs[col] = tmp;
	}
}