예제 #1
0
파일: main.cpp 프로젝트: CCJY/coliru
matrix_t matrixMulUnrolled(const matrix_t & matrixA, const matrix_t & matrixB) {
	auto dimension = matrixA.size();

	assert(matrixA.size() == dimension);
	assert(matrixA[0].size() == dimension);
	assert(matrixB.size() == dimension);
	assert(matrixB[0].size() == dimension);

	matrix_t matrixC(dimension, typename matrix_t::value_type(dimension, 0));//0ed matrix

	const int vec{4};
	int start{0};

	if(dimension > vec) {
		for(int x{0}; x < dimension; ++x)
		//for(int i{0}; i < dimension; ++i)
		for(int i{0}; i < dimension - vec; i += vec)
		for(int y{0}; y < dimension; ++y)
		//for(int y{0}; y < dimension - vec; y += vec)
		for(int t{0}; t < vec; ++t)
			matrixC[x][y] += matrixA[x][i+t] * matrixB[i+t][y];
			//matrixC[x][y+t] += matrixA[x][i] * matrixB[i][y+t];

		start = dimension - dimension % vec;
	}

	for(int x{0}; x < dimension; ++x)
	//for(int i{0}; i < dimension; ++i)
	for(int i{start}; i < dimension; ++i)
	for(int y{0}; y < dimension; ++y)
	//for(int y = start; y < dimension; ++y)
		matrixC[x][y] += matrixA[x][i] * matrixB[i][y];

	return matrixC;//move semantics ftw
}
예제 #2
0
void multi2(const matrix_t &A, const matrix_t &B, matrix_t &C)
{
    size_t n = A.size(); 
    float **__restrict__ const da = A.data;
    float **__restrict__ const db = B.data;
    float **__restrict__ dc = C.data;

    const size_t chunk_size = 8;
    const size_t chunks = n / chunk_size;

#pragma omp parallel for num_threads(8)
    for(size_t i = 0; i < n; ++i)
    {
        __m256 a_line, b_line, c_line, r_line;
        for(size_t k = 0; k < n; ++k)
        {
            float c = da[i][k];
            a_line = _mm256_set_ps(c, c, c, c, c, c, c, c);
            for(size_t j = 0; j < chunks; ++j)
            {
                float mc[32] __attribute__((aligned(32))); 
                b_line = _mm256_load_ps(&db[k][j * chunk_size]);
                c_line = _mm256_load_ps(&dc[i][j * chunk_size]);
                r_line = _mm256_mul_ps(a_line, b_line);  
                r_line = _mm256_add_ps(r_line, c_line);
                _mm256_store_ps(&dc[i][j * chunk_size], r_line);
            } 

            for(size_t j = chunk_size * chunks; j < n; ++j)
            {
                dc[i][j] += c * db[k][j];
            }
        }
    }
}
예제 #3
0
bool search(matrix_t &matrix, int x, int y, string needle, int start_index, bool scan) {
  int x_max = matrix.size() - 1;
  int y_max = matrix[0].size() - 1;

  if (x < 0 || x > x_max) return false;
  if (y < 0 || y > y_max) return false;

  if (needle.size() == start_index) {
    return true;
  }

  if (matrix[x][y] == needle[start_index]) {
    int original_char = matrix[x][y];
    matrix[x][y] = '.';
    for (int i = 0; i < 8; ++i) {
      bool found = search(matrix, x + search_x[i], y + search_y[i], needle, start_index + 1, false);
      if (found) return true;
    }
    matrix[x][y] = original_char;
  }

  if (scan) {
    if (y < y_max) {
      return search(matrix, x, y + 1, needle, start_index, true);
    } else if (x < x_max) {
      return search(matrix, x + 1, 0, needle, start_index, true);
    }
  }

  return false;
}
예제 #4
0
파일: main.cpp 프로젝트: CCJY/coliru
matrix_t matrixMul(const matrix_t & matrixA, const matrix_t & matrixB) {
    auto dimension = matrixA.size();

	assert(matrixA.size() == dimension);
	assert(matrixA[0].size() == dimension);
	assert(matrixB.size() == dimension);
	assert(matrixB[0].size() == dimension);

	matrix_t matrixC(dimension, typename matrix_t::value_type(dimension, 0));//0ed matrix

	for(int x{0}; x < dimension; ++x)
	for(int i{0}; i < dimension; ++i)
	for(int y{0}; y < dimension; ++y)
		matrixC[x][y] += matrixA[x][i] * matrixB[i][y];

	return matrixC;//move semantics ftw
}
예제 #5
0
파일: main.cpp 프로젝트: CCJY/coliru
matrix_t matrixMulTiled(const matrix_t & matrixA, const matrix_t & matrixB) {
	auto dimension = matrixA.size();

	assert(matrixA.size() == dimension);
	assert(matrixA[0].size() == dimension);
	assert(matrixB.size() == dimension);
	assert(matrixB[0].size() == dimension);

	matrix_t matrixC(dimension, typename matrix_t::value_type(dimension, 0));//0ed matrix

	const int m{8};//256bit
	const size_t n{dimension - dimension % m};
	int start{0};

	if(n >= m) {
		for (int i{0}; i < n; i+=m)
		for (int j{0}; j < n; j+=m)
		for (int k{0}; k < n; k+=m)
		for (int s{0}; s < m; s++)
		for (int t{0}; t < m; t++)
		for (int u{0}; u < m; u++)
			matrixC[i + s][j + t] += matrixA[i + s][k + u] * matrixB[k + u][j + t];
		start = n;
	}

	//finalize calculations within tiles
	for(int x{0}; x < n; ++x)
	for(int i{start}; i < dimension; ++i)
	for(int y{0}; y < n; ++y)
		matrixC[x][y] += matrixA[x][i] * matrixB[i][y];

	//calculate remaining rows
	for(int x{start}; x < dimension; ++x)
	for(int i{0}; i < dimension; ++i)
	for(int y{0}; y < dimension; ++y)
		matrixC[x][y] += matrixA[x][i] * matrixB[i][y];

	//calculate remaining elements (remaining columns without already calculated rows)
	for(int x{0}; x < n; ++x)
	for(int i{0}; i < dimension; ++i)
	for(int y{start}; y < dimension; ++y)
		matrixC[x][y] += matrixA[x][i] * matrixB[i][y];

	return matrixC;//move semantics ftw
}
예제 #6
0
파일: mul.cpp 프로젝트: dragosht/courses
void Print(const matrix_t& m)
{
    for (unsigned i = 0; i < m.size(); i++) {
        for (unsigned j = 0; j < m[i].size(); j++) {
            cout << m[i][j] << " ";
        }
        cout << endl;
    }
}
예제 #7
0
파일: main.cpp 프로젝트: CCJY/coliru
matrix_t gaussSeidelIteration(const matrix_t & grid) {
	auto dimension = grid.size();

	assert(grid[0].size() == dimension);

	auto gridCopy = grid;

	for(int x{1}; x < dimension - 1; ++x)
	for(int y{1}; y < dimension - 1; ++y)
		gridCopy[x][y] += 0.25 * (grid[x-1][y] + grid[x][y-1] + grid[x][y+1] + grid[x+1][y]);

	return gridCopy;
}
예제 #8
0
// Transpose of a matrix
void transposeMatrix(matrix_t & M)
{
    int rM = M.size();
    int cM = M[1].size();
    matrix_t tM;
    sizeMatrix(tM,cM,rM);
    for (int r=0; r<cM; r++)
    {
	for (int c=0; c<rM; c++)
	{
	    tM[r][c] = M[c][r];
	}
    }
    M = tM;
}
예제 #9
0
파일: mul.cpp 프로젝트: dragosht/courses
void Multiply(const matrix_t& m1,
              const matrix_t& m2,
              matrix_t& res)
{

    assert(m1.size() > 0 && m2.size() > 0 && m1[0].size() == m2.size());

    int m = m1.size();
    int n = m2.size();
    int p = m2[0].size();

    res.resize(m);

    for (int i = 0; i < m; i++) {
        res[i].resize(p);

        for (int j = 0; j < n; j++) {
            res[i][j] = 0;
            for (int k = 0; k < p; k++) {
                res[i][j] += m1[i][k] * m2[k][j];
            }
        }
    }
}
예제 #10
0
void gaussj(matrix_t & a, matrix_t & b)
{
  int i,icol,irow,j,k,l,ll;
  double big,dum,pivinv;
  
  int n=a.size();
  int m=b[0].size();
  vector_t indxc(n),indxr(n),ipiv(n);
  for (j=0;j<n;j++) ipiv[j]=0;
  for (i=0;i<n;i++) {
    big=0.0;
    for (j=0;j<n;j++)
      if (ipiv[j] != 1)
	for (k=0;k<n;k++) {
	  if (ipiv[k] == 0) {
	    if (fabs(a[j][k]) >= big) {
	      big=fabs(a[j][k]);
	      irow=j;
	      icol=k;
	    }
	  }
	}
    ++(ipiv[icol]);
    if (irow != icol) {
      for (l=0;l<n;l++) SWAP(a[irow][l],a[icol][l]);
      for (l=0;l<m;l++) SWAP(b[irow][l],b[icol][l]);
    }
    indxr[i]=irow;
    indxc[i]=icol;
    if (a[icol][icol] == 0.0) error("gaussj: Singular Matrix");
    pivinv=1.0/a[icol][icol];
    a[icol][icol]=1.0;
    for (l=0;l<n;l++) a[icol][l] *= pivinv;
    for (l=0;l<m;l++) b[icol][l] *= pivinv;
    for (ll=0;ll<n;ll++)
      if (ll != icol) {
	dum=a[ll][icol];
	a[ll][icol]=0.0;
	for (l=0;l<n;l++) a[ll][l] -= a[icol][l]*dum;
	for (l=0;l<m;l++) b[ll][l] -= b[icol][l]*dum;
      }
  }
  for (l=n-1;l>=0;l--) {
    if (indxr[l] != indxc[l])
      for (k=0;k<n;k++)
	SWAP(a[k][(int)indxr[l]],a[k][(int)indxc[l]]);
  }
}
예제 #11
0
void multi1(const matrix_t &A, const matrix_t &B, matrix_t &C)
{
    size_t n = A.size(); 
    float **__restrict__ const da = A.data;
    float **__restrict__ const db = B.data;
    float **__restrict__ dc = C.data;

#pragma omp parallel for num_threads(8)
    for(size_t i = 0; i < n; ++i)
    {
        for(size_t k = 0; k < n; ++k)
        {
            float c = da[i][k];
            for(size_t j = 0; j < n; ++j)
                dc[i][j] += c * db[k][j];
        }
    }

}
예제 #12
0
void generate_matrix(matrix_t &A, matrix_t &B, unsigned int seed)
{
    srand(seed);
    size_t n = A.size();
    float **da = A.data, **db = B.data;

    mt19937 rd(seed);
    normal_distribution<> norm;


    for(size_t i = 0; i < n; ++i)
    {
        for(size_t j = 0; j < n; ++j)
        {
            da[i][j] = norm(rd);
            db[i][j] = norm(rd);
        }
    }
}
예제 #13
0
  template<typename T> std::pair<matrix_t<T>, matrix_t<T>> stat_analysis
  (std::function<matrix_t<T>(matrix_t<T>&, matrix_t<T>&)> method,
   matrix_t<T> A, matrix_t<T> b, int iter)
  {
    size_t n = b.size();
    matrix_t<T> average = make_matrix<T>(n, b[0].size());
    matrix_t<T> results = make_matrix<T>(n, iter);
    matrix_t<T> deviation = make_matrix<T>(n, b[0].size());
    matrix_t<T> temp = make_matrix<T>(n, b[0].size()); 

    
#if defined(_OPENMP)
#pragma omp parallel for
#endif
    for (size_t i = 0 ; i < iter ; i++)
      {
	temp = method (A, b);
	for (size_t j = 0 ; j < n ; j++)
	  {
	    if (isnan(temp[j][0])) temp[j][0] = 0.;
	    results[j][i] = temp[j][0];
	  }
      }
    for (size_t i = 0 ; i < n ; i++)
      {
	    for (size_t j = 0 ; j < iter ; j++)
	      {
	    average[i][0] += results[i][j];
	  }
	    average[i][0] /= iter;
	    for (size_t j = 0 ; j < iter ; j++)
	      {
	    deviation[i][0] += (results[i][j] - average[i][0])
	      * (results[i][j] - average[i][0]);
	  }
	    deviation[i][0] /= (static_cast<T>(iter - 1));
	    deviation[i][0] = sqrt(deviation[i][0])/sqrt(iter);
      }
    return std::make_pair (average, deviation);
  }
예제 #14
0
	static matrix_t dot(const matrix_t& M) {
		
		matrix_t final_matrix = M;
		size_t size = M.size();
		unsigned int k, i, j;
		for (k = 0; k < size; k++) {
			for (i = 0; i < size; i++) {
				for (j = 0; j < size; j++) {
					if (final_matrix[i][j] == 0 || final_matrix[i][k] + final_matrix[k][j] == 0) {
						final_matrix[i][j] = 0;
					}
					else if (1/final_matrix[i][j] + 1/(final_matrix[i][k] + final_matrix[k][j]) == 0) {
						final_matrix[i][j] = std::numeric_limits<double>::infinity();
					}
					else {
						final_matrix[i][j] = 1/(1/final_matrix[i][j] + 1/(final_matrix[i][k] + final_matrix[k][j]));
					}
				}
			}
		}
		return final_matrix;
	}
예제 #15
0
파일: main.cpp 프로젝트: CCJY/coliru
matrix_t matrixMulFMA(const matrix_t & matrixA, const matrix_t & matrixB) {
	auto dimension = matrixA.size();

	assert(matrixA.size() == dimension);
	assert(matrixA[0].size() == dimension);
	assert(matrixB.size() == dimension);
	assert(matrixB[0].size() == dimension);

	matrix_t matrixC(dimension, typename matrix_t::value_type(dimension, 0));//0ed matrix

	const int vec = 8;
	int start{0};

	if(dimension > vec) {
		start = dimension - dimension % vec;
		for(int x{0}; x < dimension; ++x)
		for(int i{0}; i < dimension; ++i) {
			const __m256 a = _mm256_set_ps(matrixA[x][i], matrixA[x][i], matrixA[x][i], matrixA[x][i], matrixA[x][i], matrixA[x][i], matrixA[x][i], matrixA[x][i]);// unaligned read
			for(int y{0}; y < dimension - vec; y += vec) {
				//__m256 c = _mm256_set_ps(matrixC[x][y+7], matrixC[x][y+6], matrixC[x][y+5], matrixC[x][y+4], matrixC[x][y+3], matrixC[x][y+2], matrixC[x][y+1], matrixC[x][y+0]);
				//const __m256 b = _mm256_set_ps(matrixB[i][y+7], matrixB[i][y+6], matrixB[i][y+5], matrixB[i][y+4], matrixB[i][y+3], matrixB[i][y+2], matrixB[i][y+1], matrixB[i][y+0]);

				__m256 c = *reinterpret_cast<__m256*>(&matrixC[x][y]);// aligned read
				const __m256 & b = *reinterpret_cast<const __m256*>(&matrixB[i][y]);// aligned read

				c = _mm256_fmadd_ps(a, b, c);//c = a * b + c;
				//_mm256_store_ps(&matrixC[x][y], c);//aligned
				//_mm256_storeu_ps(&matrixC[x][y], c);//unaligned

				/*
				float c[8];
				c[0] = matrixC[i][y+0];
				c[1] = matrixC[i][y+1];
				c[2] = matrixC[i][y+2];
				c[3] = matrixC[i][y+3];
				c[4] = matrixC[i][y+4];
				c[5] = matrixC[i][y+5];
				c[6] = matrixC[i][y+6];
				c[7] = matrixC[i][y+7];

				c[0] += matrixA[x][i] * matrixB[i][y+0];
				c[1] += matrixA[x][i] * matrixB[i][y+1];
				c[2] += matrixA[x][i] * matrixB[i][y+2];
				c[3] += matrixA[x][i] * matrixB[i][y+3];
				c[4] += matrixA[x][i] * matrixB[i][y+4];
				c[5] += matrixA[x][i] * matrixB[i][y+5];
				c[6] += matrixA[x][i] * matrixB[i][y+6];
				c[7] += matrixA[x][i] * matrixB[i][y+7];
				//*/
				//*
				matrixC[x][y+0] = c[0];
				matrixC[x][y+1] = c[1];
				matrixC[x][y+2] = c[2];
				matrixC[x][y+3] = c[3];
				matrixC[x][y+4] = c[4];
				matrixC[x][y+5] = c[5];
				matrixC[x][y+6] = c[6];
				matrixC[x][y+7] = c[7];
				//*/

				/*is doing this
				matrixC[x][y+0] += matrixA[x][i] * matrixB[i][y+0];
				matrixC[x][y+1] += matrixA[x][i] * matrixB[i][y+1];
				matrixC[x][y+2] += matrixA[x][i] * matrixB[i][y+2];
				matrixC[x][y+3] += matrixA[x][i] * matrixB[i][y+3];
				matrixC[x][y+4] += matrixA[x][i] * matrixB[i][y+4];
				matrixC[x][y+5] += matrixA[x][i] * matrixB[i][y+5];
				matrixC[x][y+6] += matrixA[x][i] * matrixB[i][y+6];
				matrixC[x][y+7] += matrixA[x][i] * matrixB[i][y+7];
				//*/
			}
		}
	}

	//calculate remaining columns
	for(int x{0}; x < dimension; ++x)
	for(int i{0}; i < dimension; ++i)
	for(int y{start}; y < dimension; ++y)
		matrixC[x][y] += matrixA[x][i] * matrixB[i][y];

	return matrixC;//move semantics ftw
}