// [[Rcpp::export]] List blockUpdate(mat& Rdraw, double rhodraw, mat B, int row, int col, double division){ double d1 = detR(Rdraw, row, col, 1), d2 = detR(Rdraw, row, col,-1), d0 = detR(Rdraw, row, col, 0); double a = 0.5*(d1+d2-2*d0), b = 0.5*(d1-d2), c = d0; NumericVector sol = realRoot(a,b,c); NumericVector diff = abs(rhodraw-sol); double mindiff = min(diff)/division; double rhonew = rhodraw + R::runif(-mindiff,mindiff); B.fill(rhonew); return List::create(Named("rhonew")=rhonew, Named("B")=B); }
virtual void fit(const record_array & train_data, unsigned int n_iter = 1, bool continue_fit=false) { try { unsigned int batch_size = 1000; unsigned int block_size = train_data.size / batch_size / 16; double shrink = 1 - lambda; unsigned int n_user = 0, n_movie = 0; unsigned int *shuffle_idx; unsigned int *shuffle_idx_batch; timer tmr; tmr.display_mode = 1; learning_rate_per_record = learning_rate; // Generate shuffle_idx cout << train_data.size << endl; shuffle_idx = new unsigned int[train_data.size / batch_size]; for (int i = 0; i < train_data.size / batch_size; i++) { shuffle_idx[i] = i; } //shuffle_idx_batch = new unsigned int[batch_size]; //for (int i = 0; i < batch_size; i++) { // shuffle_idx_batch[i] = i; //} if (!continue_fit) { // Calculate n_user and n_movies for (int i = 0; i < train_data.size; i++) { if (train_data[i].user > n_user) { n_user = train_data[i].user; } if (train_data[i].movie > n_movie) { n_movie = train_data[i].movie; } } // Calculate mu unsigned int cnt[6]; long long s; for (int i = 0; i < 6; i++) { cnt[i] = 0; } for (int i = 0; i < train_data.size; i++) { cnt[int(train_data[i].score)]++; } s = 0; for (int i = 0; i < 6; i++) { s += cnt[i] * i; } mu = 1.0 * s / train_data.size; // Reshape the matrix based on n_user and n_movie U.set_size(K, n_user); V.set_size(K, n_movie); A.set_size(n_user); B.set_size(n_movie); U.fill(fill::randu); V.fill(fill::randu); A.fill(fill::randu); B.fill(fill::randu); } for (int i_iter = 0; i_iter < n_iter; i_iter++) { tmr.tic(); cout << "Iter\t" << i_iter << '\t'; // Reshuffle first reshuffle(shuffle_idx, train_data.size / batch_size); #pragma omp parallel for num_threads(8) for (int i = 0; i < train_data.size / batch_size; i++) { unsigned int index_base = shuffle_idx[i] * batch_size; //reshuffle(shuffle_idx_batch, batch_size); for (int j = 0; j < batch_size; j++) { unsigned int index = index_base + j; // shuffle_idx_batch[j] do harm to the result if (index < train_data.size) { const record& rcd = train_data[index]; update(rcd); } } if (i % block_size == 0) { cout << '.'; } } if (ptr_test_data != NULL) { vector<float> result = this->predict_list(*ptr_test_data); cout << fixed; cout << setprecision(5); cout << '\t' << RMSE(*ptr_test_data, result); } cout << '\t'; tmr.toc(); cout << "\t\t"; cout << max(max(abs(U))) << '\t' << max(max(abs(V))) << '\t' << max(abs(A)) << '\t' << max(abs(B)) << endl; if (i_iter != n_iter - 1) { // Regularization U *= shrink; V *= shrink; A *= shrink; B *= shrink; learning_rate_per_record *= learning_rate_mul; } } delete[]shuffle_idx; } catch (std::bad_alloc & ba) { cout << "bad_alloc caught: " << ba.what() << endl; system("pause"); } }