/** * Iteratively train each feature on the entire data set * Once sufficient progress has been made, move on */ void SvdOrder::calculateFeatures() { qDebug() << "Training"; // Keep looping until you have passed the maximum number // of epochs or have stopped making significant progress double prevRMSE = 1e8; double RMSE = 1e7; Movie movie(currDb); double averageRating = currDb->getAverageRating(); for (unsigned int i = 0; i < MIN_EPOCHS || (i < MAX_EPOCHS && (prevRMSE - RMSE) > MIN_IMPROVEMENT); i++) { prevRMSE = RMSE; RMSE = 0; User user(currDb,6); int numUsers = currDb->totalUsers(); int totalSamples = 0; for (int j = 0; j < numUsers; j++) { for (int k = 0; k < user.votes(); k++) { int movieId = user.movie(k); float rating = user.score(k) - averageRating; int userIndex = j; int movieIndex = movieId - 1; float predict = predictRating(movieIndex, userIndex); float diff = predict - rating; RMSE += diff * diff; // Update all the feature vectors here for (unsigned int l = 0; l < NUM_FEATURES; l++) { float oldUF = userFeatures[userIndex][l]; float oldMF = movieFeatures[movieIndex][l]; userFeatures[userIndex][l] -= LRATE * (diff * oldMF * 2 + REGULARIZE * oldUF); movieFeatures[movieIndex][l] -= LRATE * (diff * oldUF * 2 + REGULARIZE * oldMF); } totalSamples++; } user.next(); } RMSE = sqrt(RMSE / totalSamples); qDebug() << "Epoch" << i + 1 << "RMSE: " << RMSE; } qDebug() << "Done with training"; }
void Test::quickdatabase() { DataBase db; QVERIFY(db.load()); QuickDatabase iv(&db); // Quick check a int overflow case iv.has(479906, 252); User user(&db, 6); for (int i = 0; i < db.totalUsers(); i += db.totalUsers() / 10) { int userNumber = db.mapUser(user.id()); for (int j = 0; j < db.totalMovies(); ++j) { int m = user.seenMovie(j); QVERIFY(iv.has(userNumber, j) == (m != -1)); } user.next(); } }
void SvdOrder :: loadFeatures(QString filename) { QFile in(filename); in.open(QFile :: ReadOnly); for (int i = 0; i < currDb->totalMovies(); i++) { for (unsigned int j = 0; j < NUM_FEATURES; j++) { in.read((char*)&movieFeatures[i][j], sizeof(float)); } } for (int i = 0; i < currDb->totalUsers(); i++) { for (unsigned int j = 0; j < NUM_FEATURES; j++) { in.read((char*)&userFeatures[i][j], sizeof(float)); } } }
void SvdOrder :: saveFeatures(QString filename) { QFile out(filename); out.open(QFile :: WriteOnly); for (int i = 0; i < currDb->totalMovies(); i++) { for (unsigned int j = 0; j < NUM_FEATURES; j++) { out.write((char*)&movieFeatures[i][j], sizeof(float)); } } for (int i = 0; i < currDb->totalUsers(); i++) { for (unsigned int j = 0; j < NUM_FEATURES; j++) { out.write((char*)&userFeatures[i][j], sizeof(float)); } } }
void SvdOrder::calculateFeaturesByOrder() { qDebug() << "Training"; // Keep looping until you have passed the maximum number // of epochs or have stopped making significant progress double prevErr = 1e8; double err = 1e7; Movie movie(currDb); float PROGRESS_INTERVAL = 1; clock_t referTime = clock(); int numTestsPerUser = 1000; for (unsigned int i = 0; i < MIN_EPOCHS || (i < MAX_EPOCHS && (prevErr - err) > MIN_IMPROVEMENT); i++) { prevErr = err; err = 0; User user(currDb,6); int numUsers = currDb->totalUsers(); user.setId(6); for (int j = 0; j < numUsers; j++) { if (((float) (clock() - referTime)) / CLOCKS_PER_SEC > PROGRESS_INTERVAL) { qDebug() << j << err / (j + 1); referTime = clock(); } float userErr = 0; int numTests = 0; // Only process some of the movies per user per epoch. Otherwise, // the algorithm takes up alot of time per epoch int userIndex = j; int userVotes = user.votes(); int thisUserTests = numTestsPerUser; if (thisUserTests > userVotes * userVotes) thisUserTests = userVotes * userVotes; for (int testI = 0; testI < thisUserTests; testI++) { int m1 = rand() % userVotes; int m2 = rand() % userVotes; int movieIndex1 = user.movie(m1) - 1; float rating1 = user.score(m1); int movieIndex2 = user.movie(m2) - 1; float rating2 = user.score(m2); // Only do tests on movies that are not the same if (rating1 == rating2) continue; numTests ++; float predict1 = predictRating(movieIndex1, userIndex); float predict2 = predictRating(movieIndex2, userIndex); // This value is positive only when the prediction // order was correct float ratingDiff = (rating1 - rating2)/fabs(rating1 - rating2); float x = ratingDiff * (predict1 - predict2); if (x < 1) { // If the prediction was not correct to some // threshold (in this case 1), move the user factors // accordingly to fix it for (unsigned int fI = 0; fI < NUM_FEATURES; fI++) { float oldUF = userFeatures[userIndex][fI]; float oldMF1 = movieFeatures[movieIndex1][fI]; float oldMF2 = movieFeatures[movieIndex2][fI]; userFeatures[userIndex][fI] -= LRATE2 * (-ratingDiff * (oldMF1 - oldMF2) + REGULARIZE2 * oldUF); movieFeatures[movieIndex1][fI] -= LRATE2 * (-ratingDiff * oldUF + REGULARIZE2 * oldMF1); movieFeatures[movieIndex2][fI] -= LRATE2 * (ratingDiff * oldUF + REGULARIZE2 * oldMF2); } if (x < 0) userErr += 1; } } err += userErr / (numTests + 1); user.next(); } err /= numUsers; qDebug() << "Epoch" << i + 1 << "Error: " << err; } qDebug() << "Done with training"; }
int main(int argc, char *argv[]){ script_timer("Total", false); if(argv[1] && strstr(argv[1],"deb")!=NULL) debug = true; // Set debug to true if argv[1] contains "deb" if(debug) fprintf(stderr, "Debug mode on.\n"); if(argv[1] && strstr(argv[1],"off")!=NULL) full_output = false; // Disable full RMSE output if argv[1] contains "off" DataBase db; db.load(); if(db.checkDB()) fprintf(stderr, "checkDB OK\n"); else fprintf(stderr, "DB Corrupt.\n"); db.setTitles(); Movie movies(&db); User users(&db); movies.setId(1); users.setId(6); fprintf(stderr, "db.totalUsers()=%d\n", db.totalUsers()); fprintf(stderr, "db.totalMovies()=%d\n", db.totalMovies()); fprintf(stderr, "db.totalVotes()=%d\n", db.totalVotes()); /* db.loadPreProcessor("data/somemodel"); // Load a preprocessor built using Algorithm::buildPreProcessor("data/somemodel") */ Average avg(&db); //avg.runProbe(); //avg.runQualifying("none", true); avg.buildPreProcessor("data_average"); /* Globals globals(&db); globals.setAverages(10); globals.setVariances(); globals.setThetas(); globals.runProbe(); //globals.runQualifying("none", true); */ #define TRAIN_SIMU true Matrix_Factorization *mf = new Matrix_Factorization(&db); mf->training(); // mf->cache("data_mf_simu"); //mf->runProbe(); //mf->runQualifying("none", true); mf->buildPreProcessor("data_mf"); // User_KNN * uknn = new User_KNN(&db); // uknn->setup(); // uknn->loadUserFeatures("data_mf_simu.users.cache"); //uknn->runProbe(); //uknn->runQualifying("none", true); // uknn->buildPreProcessor("data_uknn"); Blend blend(&db); // blend.setUp(3, "data_average", "data_mf_simu", "data_uknn"); blend.setUp(2, "data_average", "data_mf"); blend.runProbe(); //blend.runQualifying("none", true); /* Blend_Partial blendpartial(&db); blendpartial.setUp(3, "data/average", "data/mf_simu", "data/uknn"); blendpartial.runProbe_partial(); blendpartial.runQualifying("none", true); */ /* KNN knn(&db); knn.setup(); knn.runProbe(); E E E knn.runQualifying("none", true); */ script_timer("Total", true); fprintf(stderr, "\n"); print_timer_summary_map(); }