int SvdOrder :: order(int movie1, int movie2) { if (predictRating(movie1 - 1, currentUser) > predictRating(movie2 - 1, currentUser)) return -1; else return 1; }
/** * Iteratively train each feature on the entire data set * Once sufficient progress has been made, move on */ void Svd::calculateFeatures() { double err, p, sq, rmse_last, rmse = 2.0; float cf, mf; printf("--Total Users: %d", currentMovie.dataBase()->totalUsers()); for (int f = 0; f < 5 && f < MAX_FEATURES; ++f) { printf("\n--- Calculating feature: %d ---\n", f); // Keep looping until you have passed a minimum number // of epochs or have stopped making significant progress User user(currentMovie.dataBase()); int totalUsers = currentMovie.dataBase()->totalUsers(); int movieId; for (int e = 0; (e < MIN_EPOCHS) || (rmse <= rmse_last - MIN_IMPROVEMENT); ++e) { sq = 0; rmse_last = rmse; int cacheId = 0; user.setId(1); for (int i = 0; i < totalUsers; ++i) { int custId = currentMovie.dataBase()->mapUser(user.id()); for (int v = 0; v < user.votes(); ++v) { movieId = user.movie(v); // Predict rating and calc error p = predictRating(movieId, custId, f, cache[cacheId], true); err = (1.0 * user.score(v) - p); sq += err * err; // Cache off old feature values cf = m_aCustFeatures[f][custId]; mf = m_aMovieFeatures[f][movieId]; // Cross-train the features m_aCustFeatures[f][custId] += (float)(LRATE * (err * mf - K * cf)); m_aMovieFeatures[f][movieId] += (float)(LRATE * (err * cf - K * mf)); ++cacheId; } user.next(); } rmse = sqrt(sq / MAX_RATINGS); printf(" <set x='%d' y='%f' e='%d'/>\n", e * f + e, rmse, e); } int cacheId = 0; // Cache off old predictions user.setId(6); for (int i = 0; i < currentMovie.dataBase()->totalUsers(); ++i) { for (int v = 0; v < user.votes(); ++v) { cache[cacheId] = (float)predictRating(user.movie(v), i, f, cache[cacheId], false); ++cacheId; } user.next(); } } }
/** * Iteratively train each feature on the entire data set * Once sufficient progress has been made, move on */ void SvdOrder::calculateFeatures() { qDebug() << "Training"; // Keep looping until you have passed the maximum number // of epochs or have stopped making significant progress double prevRMSE = 1e8; double RMSE = 1e7; Movie movie(currDb); double averageRating = currDb->getAverageRating(); for (unsigned int i = 0; i < MIN_EPOCHS || (i < MAX_EPOCHS && (prevRMSE - RMSE) > MIN_IMPROVEMENT); i++) { prevRMSE = RMSE; RMSE = 0; User user(currDb,6); int numUsers = currDb->totalUsers(); int totalSamples = 0; for (int j = 0; j < numUsers; j++) { for (int k = 0; k < user.votes(); k++) { int movieId = user.movie(k); float rating = user.score(k) - averageRating; int userIndex = j; int movieIndex = movieId - 1; float predict = predictRating(movieIndex, userIndex); float diff = predict - rating; RMSE += diff * diff; // Update all the feature vectors here for (unsigned int l = 0; l < NUM_FEATURES; l++) { float oldUF = userFeatures[userIndex][l]; float oldMF = movieFeatures[movieIndex][l]; userFeatures[userIndex][l] -= LRATE * (diff * oldMF * 2 + REGULARIZE * oldUF); movieFeatures[movieIndex][l] -= LRATE * (diff * oldUF * 2 + REGULARIZE * oldMF); } totalSamples++; } user.next(); } RMSE = sqrt(RMSE / totalSamples); qDebug() << "Epoch" << i + 1 << "RMSE: " << RMSE; } qDebug() << "Done with training"; }
void MFModel::updateVectorsWithOneRating(int userID, int itemID, float rating){ addUser(userID); addItem(itemID); vector<float> &userFactors = userVectors[userID]->latentVector; vector<float> &itemFactors = itemVectors[itemID]->latentVector; float eui = rating - predictRating(userID, itemID); //cout << rating << " " << predictRating(userID, itemID) << " "; //cout << userID << itemID << " eui: " << eui << endl;// //for(int i = 0; i < latentLen; i++){ for(int i = 0; i < latentLen; i++){ //cout << "fGama " << fGama << " fLambda: " << fLambda << endl; userFactors[i] += fGama*(eui*itemFactors[i] - fLambda*userFactors[i]); //userVectors[userID]->latentVector[i] += fGama*(eui*itemVectors[itemID]->latentVector[i] - fLambda*userVectors[userID]->latentVector[i]); //cout << "userFactors: " << userFactors[i] << endl; itemFactors[i] += fGama*(eui*userFactors[i] - fLambda*itemFactors[i]); //itemVectors[itemID]->latentVector[i] += fGama*(eui*userVectors[userID]->latentVector[i] - fLambda*itemVectors[itemID]->latentVector[i]); } //cout << userID << " " << userFactors[0] << endl; }
/** * Loop through the entire list of finished features */ double SvdOrder::determine(int user) { int movieIndex = currentMovie - 1; int userIndex = currDb->mapUser(user); return currDb->getAverageRating() + predictRating(movieIndex, userIndex); }
void SvdOrder::calculateFeaturesByOrder() { qDebug() << "Training"; // Keep looping until you have passed the maximum number // of epochs or have stopped making significant progress double prevErr = 1e8; double err = 1e7; Movie movie(currDb); float PROGRESS_INTERVAL = 1; clock_t referTime = clock(); int numTestsPerUser = 1000; for (unsigned int i = 0; i < MIN_EPOCHS || (i < MAX_EPOCHS && (prevErr - err) > MIN_IMPROVEMENT); i++) { prevErr = err; err = 0; User user(currDb,6); int numUsers = currDb->totalUsers(); user.setId(6); for (int j = 0; j < numUsers; j++) { if (((float) (clock() - referTime)) / CLOCKS_PER_SEC > PROGRESS_INTERVAL) { qDebug() << j << err / (j + 1); referTime = clock(); } float userErr = 0; int numTests = 0; // Only process some of the movies per user per epoch. Otherwise, // the algorithm takes up alot of time per epoch int userIndex = j; int userVotes = user.votes(); int thisUserTests = numTestsPerUser; if (thisUserTests > userVotes * userVotes) thisUserTests = userVotes * userVotes; for (int testI = 0; testI < thisUserTests; testI++) { int m1 = rand() % userVotes; int m2 = rand() % userVotes; int movieIndex1 = user.movie(m1) - 1; float rating1 = user.score(m1); int movieIndex2 = user.movie(m2) - 1; float rating2 = user.score(m2); // Only do tests on movies that are not the same if (rating1 == rating2) continue; numTests ++; float predict1 = predictRating(movieIndex1, userIndex); float predict2 = predictRating(movieIndex2, userIndex); // This value is positive only when the prediction // order was correct float ratingDiff = (rating1 - rating2)/fabs(rating1 - rating2); float x = ratingDiff * (predict1 - predict2); if (x < 1) { // If the prediction was not correct to some // threshold (in this case 1), move the user factors // accordingly to fix it for (unsigned int fI = 0; fI < NUM_FEATURES; fI++) { float oldUF = userFeatures[userIndex][fI]; float oldMF1 = movieFeatures[movieIndex1][fI]; float oldMF2 = movieFeatures[movieIndex2][fI]; userFeatures[userIndex][fI] -= LRATE2 * (-ratingDiff * (oldMF1 - oldMF2) + REGULARIZE2 * oldUF); movieFeatures[movieIndex1][fI] -= LRATE2 * (-ratingDiff * oldUF + REGULARIZE2 * oldMF1); movieFeatures[movieIndex2][fI] -= LRATE2 * (ratingDiff * oldUF + REGULARIZE2 * oldMF2); } if (x < 0) userErr += 1; } } err += userErr / (numTests + 1); user.next(); } err /= numUsers; qDebug() << "Epoch" << i + 1 << "Error: " << err; } qDebug() << "Done with training"; }
int main(){ // Load Movie data loadMovieData(); // printf("test %f %f %f %f\n", userOffset[5], userOffset[num_users - 1], movieOffset[5], movieOffset[num_movies - 1]); // Set up offsets and implicit data userOffset = calloc(num_users, sizeof(float)); movieOffset = calloc(num_movies, sizeof(float)); userImplicitData = calloc(num_users*2, sizeof(float)); if (userOffset == NULL || movieOffset == NULL || userImplicitData == NULL) { printf("Malloc failed\n"); return -1; } loadData("../stats/user_offset_reg2.dta", userOffset); loadData("../stats/movie_offset_reg.dta", movieOffset); loadUserImplicit("../stats/user_implicit_2.dta", userImplicitData); // Initialize features // Initialize random seed srand (time(NULL)); initializeUserFeatures(); initializeMovieFeatures(); initializeImplicitMovies("../../implicit/user_implicit_movies.dta"); //printf("test %f\n", userImplicitMovies[num_users - 1][2]); initializeImplicitFeatures(); printf("\n--------------Training --------------\n"); int user, movie, line_number; float rating, predict, err; float total_err; for (int i = 1; i <= epochs; i++) { total_err = 0; for (int j = 0; j < num_lines; j++) { line_number = j * 3; user = movie_data[line_number]; movie = movie_data[line_number + 1]; rating = (float)movie_data[line_number + 2]; // printf("User %d Movie %d Rating %d Baseling %f\n", user, movie, rating, baseline); getImplicitC(user); predict = predictRating(user, movie); err = rating - predict; total_err += err * err; updateFeatures(user, movie, err); updateBaseline(user, movie, err); updateImplicitFeatures(user, err); } // Update gammas by factor gamma1 *= gamma_step; gamma2 *= gamma_step; printf("Epoch %d RMSE: %f\n", i, sqrt(total_err / num_lines)); } printf("-----------Saving features-----------\n"); saveOffsets(); saveUserFeatures("f010_e020/user_features.dta"); saveMovieFeatures("f010_e020/movie_features.dta"); saveImplicitFeatures("f010_e020/implicit_features.dta"); free(userOffset); free(movieOffset); free(movie_data); return 0; }
void model(const double &global_averageRating, const int &userTotalRating[], const int &userRatingCount[], const int &movieTotalRating[], const int movieRatingCount[]) { double bu[SIZE_USER] = {0}; double bi[SIZE_MOVIE] = {0}; for(int i = 0; i < SIZE_USER; i++) { bu[i] = global_averageRating - getMean(userTotalRating[i], userRatingCount[i]); } for(int i = 0; i < SIZE_MOVIE; i++) { bi[i] = global_averageRating - getMean(movieTotalRating[i], movieRatingCount[i]); } //start modeling double learnRate = 0.005; double lambda = 0.02; double pui = 0.0; //predict value user u to movie i double currentRmse = 0.0; double prevRmse = 10000000; float userBase[SIZE_USER][50] = {0}; float movieBase[SIZE_MOVIE][50] = {0}; ofstream result; result.open (OUTPUT_FILE); for(int i = 0; i < 460; i++) { rmseProcess(testMatrix[i],predictRating(userTotalRating[i],userRatingCount[i],bi[i],bu[i])); } for(int step = 0; step < 50; ++step) { long double rmse = 0.0; int n = 0; for(int i = 0; i < SIZE_USER; i++) { double sqrtNub = 0.0; if(userRatingCount[i] != 0) { sqrtNub = (1.0) / sqrt(userRatingCount[i]); } for(int j = 0; j <userRatingCount[i]; j++) { int movieID = movieIndex[j]; //this movieID int rating = dataMatrix[i][j]; //this user rate this movie, the rating double bui = global_totalRating - bu[i] - bi[j]; pui = predictRating(userTotalRating[i], userRatingCount[i],bi[j],bu[i]); result << userIndex[i] << "::" << movieIndex[j] << "::" << dataMatrix[i][j] << "::" << timeMatrix[i][j] << "::"<< pui << endl; double eui = rating - pui; //error about current rating and predicet rating rmse += eui * eui; bu[i] += learnRate * (eui - lambda * bu[j]); bi[j] += learnRate * (eui - lambda * bi[j]); } } currentRmse = sqrt( rmse / n); if(currentRmse >= prevRmse) { break;//if the rmse of test set begin to increase, then break } else { prevRmse = currentRmse; } for(int i = 0; i < 460; i++) { rmseProcess(testMatrix[i],predictRating(userTotalRating[i],userRatingCount[i],bi[i],bu[i])); } } for(int i = 0; i < 460; i++) { rmseProcess(testMatrix[i],predictRating(userTotalRating[i],userRatingCount[i],bi[i],bu[i])); } }