T OneNorm(const SparseMatrix<T>& A) { // compute the max absolute column sum const unsigned int* cols_a = A.LockedColBuffer(); const T* data_a = A.LockedDataBuffer(); const unsigned int width_a = A.Width(); T max_col_sum = T(0), col_sum; for (unsigned int c=0; c != width_a; ++c) { unsigned int start = cols_a[c]; unsigned int end = cols_a[c+1]; col_sum = T(0); for (unsigned int offset=start; offset != end; ++offset) { T val = fabs(data_a[offset]); col_sum += val; } if (col_sum > max_col_sum) max_col_sum = col_sum; } return max_col_sum; }
T FrobeniusNorm(const SparseMatrix<T>& A) { // compute the sum of the absolute value squared of each element const T* data_a = A.LockedDataBuffer(); const unsigned int size_a = A.Size(); T sum = T(0); for (unsigned int i=0; i != size_a; ++i) { T val = fabs(data_a[i]); sum += val*val; } return sqrt(sum); }
T MaxNorm(const SparseMatrix<T>& A) { // find max( |A_ij| ) const T* data_a = A.LockedDataBuffer(); const unsigned int size_a = A.Size(); T max_norm = T(0); for (unsigned int i=0; i != size_a; ++i) { T val = fabs(data_a[i]); if (val > max_norm) max_norm = val; } return max_norm; }
bool WriteMatrixMarketFile(const std::string& file_path, const SparseMatrix<T>& A, const unsigned int precision) { // Write a MatrixMarket file with no comments. Note that the // MatrixMarket format uses 1-based indexing for rows and columns. std::ofstream outfile(file_path); if (!outfile) return false; unsigned int height = A.Height(); unsigned int width = A.Width(); unsigned int nnz = A.Size(); // write the 'banner' outfile << MM_BANNER << " matrix coordinate real general" << std::endl; // write matrix dimensions and number of nonzeros outfile << height << " " << width << " " << nnz << std::endl; outfile << std::fixed; outfile.precision(precision); const unsigned int* cols_a = A.LockedColBuffer(); const unsigned int* rows_a = A.LockedRowBuffer(); const T* data_a = A.LockedDataBuffer(); unsigned int width_a = A.Width(); for (unsigned int c=0; c != width_a; ++c) { unsigned int start = cols_a[c]; unsigned int end = cols_a[c+1]; for (unsigned int offset=start; offset != end; ++offset) { unsigned int r = rows_a[offset]; T val = data_a[offset]; outfile << r+1 << " " << c+1 << " " << val << std::endl; } } outfile.close(); return true; }
void Print(const SparseMatrix<T>& M) { // Print a SparseMatrix to the screen. const unsigned int* col_buf = M.LockedColBuffer(); const unsigned int* row_buf = M.LockedRowBuffer(); const T* buf = M.LockedDataBuffer(); if (0 == M.Size()) { std::cout << "Matrix is empty." << std::endl; return; } for (unsigned int c=0; c != M.Width(); ++c) { unsigned int start = col_buf[c]; unsigned int end = col_buf[c+1]; for (unsigned int offset=start; offset != end; ++offset) { assert(offset >= 0); assert(offset < M.Size()); unsigned int row_index = row_buf[offset]; T data = buf[offset]; std::cout << "(" << row_index << ", " << c << "): " << data << std::endl; } } std::cout << "Col indices: "; std::cout.flush(); for (unsigned int i=0; i != M.Width(); ++i) std::cout << col_buf[i] << ", "; std::cout << col_buf[M.Width()] << std::endl; std::cout << "Row indices: "; std::cout.flush(); for (unsigned int i=0; i != M.Size(); ++i) std::cout << row_buf[i] << ", "; std::cout << std::endl; std::cout << "Data: "; std::cout.flush(); for (unsigned int i=0; i != M.Size(); ++i) std::cout << buf[i] << ", "; std::cout << std::endl; }
//----------------------------------------------------------------------------- void Nmf(const unsigned int kval, const Algorithm algorithm, const std::string& csv_file_w, const std::string& csv_file_h) { if (!matrix_loaded) throw std::logic_error("smallk error (NMF): no matrix has been loaded."); if (max_iter < min_iter) throw std::logic_error("smallk error (NMF): min_iterations exceeds max_iterations."); if (0 == kval) throw std::logic_error("smallk error (NMF): k must be greater than 0."); // Check the sizes of matrix W(m, k) and matrix H(k, n) and make sure // they don't overflow Elemental's default signed int index type. if (!SizeCheck<int>(m, kval)) throw std::logic_error("smallk error (Nmf): mxk matrix W is too large."); if (!SizeCheck<int>(kval, n)) throw std::logic_error("smallk error (Nmf): kxn matrix H is too large."); k = kval; // convert to the 'NmfAlgorithm' type in nmf.hpp switch (algorithm) { case Algorithm::MU: nmf_opts.algorithm = NmfAlgorithm::MU; break; case Algorithm::HALS: nmf_opts.algorithm = NmfAlgorithm::HALS; break; case Algorithm::RANK2: nmf_opts.algorithm = NmfAlgorithm::RANK2; break; case Algorithm::BPP: nmf_opts.algorithm = NmfAlgorithm::BPP; break; default: throw std::logic_error("smallk error (NMF): unknown NMF algorithm."); } // set k == 2 for Rank2 algorithm if (NmfAlgorithm::RANK2 == nmf_opts.algorithm) k = 2; ldim_w = m; ldim_h = k; if (buf_w.size() < m*k) buf_w.resize(m*k); if (buf_h.size() < k*n) buf_h.resize(k*n); // initialize matrices W and H bool ok; unsigned int height_w = m, width_w = k, height_h = k, width_h = n; cout << "Initializing matrix W..." << endl; if (csv_file_w.empty()) ok = RandomMatrix(&buf_w[0], ldim_w, m, k, rng); else ok = LoadDelimitedFile(buf_w, height_w, width_w, csv_file_w); if (!ok) { std::ostringstream msg; msg << "smallk error (Nmf): load failed for file "; msg << "\"" << csv_file_w << "\""; throw std::runtime_error(msg.str()); } if ( (height_w != m) || (width_w != k)) { cerr << "\tdimensions of matrix W are " << height_w << " x " << width_w << endl; cerr << "\texpected " << m << " x " << k << endl; throw std::logic_error("smallk error (Nmf): non-conformant matrix W."); } cout << "Initializing matrix H..." << endl; if (csv_file_h.empty()) ok = RandomMatrix(&buf_h[0], ldim_h, k, n, rng); else ok = LoadDelimitedFile(buf_h, height_h, width_h, csv_file_h); if (!ok) { std::ostringstream msg; msg << "smallk error (Nmf): load failed for file "; msg << "\"" << csv_file_h << "\""; throw std::runtime_error(msg.str()); } if ( (height_h != k) || (width_h != n)) { cerr << "\tdimensions of matrix H are " << height_h << " x " << width_h << endl; cerr << "\texpected " << k << " x " << n << endl; throw std::logic_error("smallk error (Nmf): non-conformant matrix H."); } // The ratio of projected gradient norms doesn't seem to work very well // with MU. We frequently observe a 'leveling off' behavior and the // convergence is even slower than usual. So for MU use the relative // change in the Frobenius norm of W as the stopping criterion, which // always seems to behave well, even though it is on shaky theoretical // ground. if (NmfAlgorithm::MU == nmf_opts.algorithm) nmf_opts.prog_est_algorithm = NmfProgressAlgorithm::DELTA_FNORM; else nmf_opts.prog_est_algorithm = NmfProgressAlgorithm::PG_RATIO; nmf_opts.tol = nmf_tolerance; nmf_opts.height = m; nmf_opts.width = n; nmf_opts.k = k; nmf_opts.min_iter = min_iter; nmf_opts.max_iter = max_iter; nmf_opts.tolcount = 1; nmf_opts.max_threads = max_threads; nmf_opts.verbose = true; nmf_opts.normalize = true; // display all params to user PrintNmfOpts(nmf_opts); NmfStats stats; Result result; if (is_sparse) { result = NmfSparse(nmf_opts, A.Height(), A.Width(), A.Size(), A.LockedColBuffer(), A.LockedRowBuffer(), A.LockedDataBuffer(), &buf_w[0], ldim_w, &buf_h[0], ldim_h, stats); } else { result = Nmf(nmf_opts, &buf_a[0], ldim_a, &buf_w[0], ldim_w, &buf_h[0], ldim_h, stats); } cout << "Elapsed wall clock time: "; cout << ElapsedTime(stats.elapsed_us) << endl; cout << endl; if (Result::OK != result) throw std::runtime_error("smallk error (Nmf): NMF solver failure."); // write the computed W and H factors to disk std::string outfile_w, outfile_h; if (outdir.empty()) { outfile_w = DEFAULT_FILENAME_W; outfile_h = DEFAULT_FILENAME_H; } else { outfile_w = outdir + DEFAULT_FILENAME_W; outfile_h = outdir + DEFAULT_FILENAME_H; } cout << "Writing output files..." << endl; if (!WriteDelimitedFile(&buf_w[0], ldim_w, m, k, outfile_w, outprecision)) throw std::runtime_error("smallk error (Nmf): could not write W result."); if (!WriteDelimitedFile(&buf_h[0], ldim_h, k, n, outfile_h, outprecision)) throw std::runtime_error("smallk error (Nmf): could not write H result."); }