bool ClustFlat(const ClustOptions& opts, const std::vector<DenseMatrix<T> >& topic_vectors, const MatrixType<T>& A, T* buf_w, T* buf_h) { int m = opts.nmf_opts.height; int n = opts.nmf_opts.width; int k = opts.num_clusters; DenseMatrix<T> W(m, k, buf_w, m); // load matrix W with the topic vectors DenseMatrix<T> col_w; for (int c=0; c<k; ++c) { // create a view of the cth column of W View(col_w, W, 0, c, m, 1); // copy the next topic vector into place Copy(topic_vectors[c], col_w); } // randomly initialize matrix H Random rng; rng.SeedFromTime(); RandomMatrix(buf_h, k, k, n, rng, T(0.5), T(0.5)); DenseMatrix<T> H(k, n, buf_h, k); bool ok = NnlsHals(A, W, H, opts.nmf_opts.tol, opts.verbose, opts.nmf_opts.max_iter); return ok; }
template<typename T> void RunOneTest( const unsigned int randSeed, const size_t memAllowed, const char transA, const char transB, const int matrixSize, const T min, const T max, const T alpha, const T beta, const unsigned int nRepeats, std::ostream &os ) { // Routine to run a single comparision of the cleaver T *A, *B, *C; T *resBLAS, *resCleaver; int m, n, k, lda, ldb, ldc; int hA, wA, hB, wB, hC, wC; SciGPUgemm::GEMMcleaver testCleaver( memAllowed ); Chronometer t_cpu, t_cleaver; // Initialise PRNG srand( randSeed ); // Set the matrix sizes m = n = k = matrixSize; lda = wA = hA = ldb = wB = hB = ldc = wC = hC = matrixSize; // Allocate memory A = new T[lda*wA]; B = new T[ldb*wB]; C = new T[ldc*wC]; resBLAS = new T[ldc*wC]; resCleaver = new T[ldc*wC]; // Fill with data RandomMatrix( A, lda, hA, wA, min, max ); RandomMatrix( B, ldb, hB, wB, min, max ); RandomMatrix( C, ldc, hC, wC, min, max ); // Average over designated number of runs for( unsigned int iRepeat=0; iRepeat<nRepeats; iRepeat++ ) { memcpy( resBLAS, C, ldc*wC*sizeof(T) ); memcpy( resCleaver, C, ldc*wC*sizeof(T) ); // Run the CPU multiplication t_cpu.Start(); gemm_gold( A, B, resBLAS, alpha, beta, transA, transB, m, n, k, lda, ldb, ldc ); t_cpu.Stop(); // Run the GPU multiplication t_cleaver.Start(); RunCleaver( transA, transB, m, n, k, alpha, A, lda, B, ldb, beta, resCleaver, ldc, testCleaver ); t_cleaver.Stop(); } // Output the results WriteStream( os, matrixSize ); WriteStream( os, t_cpu.GetAverageTime() ); WriteStream( os, t_cleaver.GetAverageTime() ); os << std::endl; // Release memory delete[] A; delete[] B; delete[] C; delete[] resBLAS; delete[] resCleaver; }