예제 #1
0
bool ClustFlat(const ClustOptions& opts,
               const std::vector<DenseMatrix<T> >& topic_vectors,
               const MatrixType<T>& A,
               T* buf_w,
               T* buf_h)
{
    int m = opts.nmf_opts.height;
    int n = opts.nmf_opts.width;
    int k = opts.num_clusters;
    
    DenseMatrix<T> W(m, k, buf_w, m);
    
    // load matrix W with the topic vectors
    DenseMatrix<T> col_w;
    for (int c=0; c<k; ++c)
    {
        // create a view of the cth column of W
        View(col_w, W, 0, c, m, 1);
        
        // copy the next topic vector into place
        Copy(topic_vectors[c], col_w);
    }
    
    // randomly initialize matrix H
    Random rng;
    rng.SeedFromTime();
    RandomMatrix(buf_h, k, k, n, rng, T(0.5), T(0.5));
    DenseMatrix<T> H(k, n, buf_h, k);

    bool ok = NnlsHals(A, W, H, 
                       opts.nmf_opts.tol, 
                       opts.verbose,
                       opts.nmf_opts.max_iter);
    return ok;
}
예제 #2
0
template<typename T> void RunOneTest( const unsigned int randSeed,
				      const size_t memAllowed,
				      const char transA, const char transB,
				      const int matrixSize,
				      const T min, const T max,
				      const T alpha, const T beta,
				      const unsigned int nRepeats,
				      std::ostream &os ) {
  // Routine to run a single comparision of the cleaver

  T *A, *B, *C;
  T *resBLAS, *resCleaver;

  int m, n, k, lda, ldb, ldc;
  int hA, wA, hB, wB, hC, wC;

  SciGPUgemm::GEMMcleaver testCleaver( memAllowed );
  

  Chronometer t_cpu, t_cleaver;

  // Initialise PRNG
  srand( randSeed );

  // Set the matrix sizes
  m = n = k = matrixSize;
  lda = wA = hA = ldb = wB = hB = ldc = wC = hC = matrixSize;

  // Allocate memory
  A = new T[lda*wA];
  B = new T[ldb*wB];
  C = new T[ldc*wC];

  resBLAS = new T[ldc*wC];
  resCleaver = new T[ldc*wC];

  // Fill with data
  RandomMatrix( A, lda, hA, wA, min, max );
  RandomMatrix( B, ldb, hB, wB, min, max );
  RandomMatrix( C, ldc, hC, wC, min, max );

  // Average over designated number of runs
  for( unsigned int iRepeat=0; iRepeat<nRepeats; iRepeat++ ) {
    memcpy( resBLAS, C, ldc*wC*sizeof(T) );
    memcpy( resCleaver, C, ldc*wC*sizeof(T) );

    // Run the CPU multiplication
    t_cpu.Start();
    gemm_gold( A, B, resBLAS,
	       alpha, beta,
	       transA, transB,
	       m, n, k, lda, ldb, ldc );
    t_cpu.Stop();

    // Run the GPU multiplication
    t_cleaver.Start();
    RunCleaver( transA, transB,
		m, n, k,
		alpha,
		A, lda, B, ldb,
		beta,
		resCleaver, ldc,
		testCleaver );
    t_cleaver.Stop();
  }

  // Output the results
  WriteStream( os, matrixSize );
  WriteStream( os, t_cpu.GetAverageTime() );
  WriteStream( os, t_cleaver.GetAverageTime() );
  os << std::endl;



  // Release memory
  delete[] A;
  delete[] B;
  delete[] C;
  delete[] resBLAS;
  delete[] resCleaver;
}