예제 #1
0
/**
 * Iteratively train each feature on the entire data set
 * Once sufficient progress has been made, move on
 */
void SvdOrder::calculateFeatures()
{
    qDebug() << "Training";

    // Keep looping until you have passed the maximum number
    // of epochs or have stopped making significant progress
    double prevRMSE = 1e8;
    double RMSE     = 1e7;
    Movie movie(currDb);
    double averageRating = currDb->getAverageRating();
    for (unsigned int i = 0;
         i < MIN_EPOCHS || 
            (i < MAX_EPOCHS && (prevRMSE - RMSE) > MIN_IMPROVEMENT);
         i++)
    {
        prevRMSE = RMSE;
        RMSE = 0;

        User user(currDb,6);
        int numUsers = currDb->totalUsers();
        int totalSamples = 0; 

        for (int j = 0; j < numUsers; j++)
        {   
            for (int k = 0; k < user.votes(); k++)
            {
                int movieId = user.movie(k);
                float rating = user.score(k) - averageRating;

                int userIndex = j;
                int movieIndex = movieId - 1;

                float predict = predictRating(movieIndex, userIndex);

                float diff = predict - rating;
                RMSE += diff * diff;

                // Update all the feature vectors here
                for (unsigned int l = 0; l < NUM_FEATURES; l++)
                {
                    float oldUF = userFeatures[userIndex][l];
                    float oldMF = movieFeatures[movieIndex][l];

                    userFeatures[userIndex][l] -= LRATE * 
                                        (diff * oldMF * 2 + REGULARIZE * oldUF);
                    movieFeatures[movieIndex][l] -= LRATE * 
                                        (diff * oldUF * 2 + REGULARIZE * oldMF);
                }
                totalSamples++;
            }
            user.next();
        }

        RMSE = sqrt(RMSE / totalSamples);
        qDebug() << "Epoch" << i + 1 << "RMSE: " << RMSE;
    }                

    qDebug() << "Done with training";
}
void Test::quickdatabase()
{
    DataBase db;
    QVERIFY(db.load());
    QuickDatabase iv(&db);

    // Quick check a int overflow case
    iv.has(479906, 252);

    User user(&db, 6);
    for (int i = 0; i < db.totalUsers(); i += db.totalUsers() / 10) {
        int userNumber = db.mapUser(user.id());
        for (int j = 0; j < db.totalMovies(); ++j) {
            int m = user.seenMovie(j);
            QVERIFY(iv.has(userNumber, j) == (m != -1));
        }
        user.next();
    }
}
예제 #3
0
void SvdOrder :: loadFeatures(QString filename)
{
    QFile in(filename);
    in.open(QFile :: ReadOnly);

    for (int i = 0; i < currDb->totalMovies(); i++)
    {
        for (unsigned int j = 0; j < NUM_FEATURES; j++)
        {
            in.read((char*)&movieFeatures[i][j], sizeof(float)); 
        }
    }

    for (int i = 0; i < currDb->totalUsers(); i++)
    {
        for (unsigned int j = 0; j < NUM_FEATURES; j++)
        {
            in.read((char*)&userFeatures[i][j], sizeof(float)); 
        }
    }
}
예제 #4
0
void SvdOrder :: saveFeatures(QString filename)
{
    QFile out(filename);
    out.open(QFile :: WriteOnly);

    for (int i = 0; i < currDb->totalMovies(); i++)
    {
        for (unsigned int j = 0; j < NUM_FEATURES; j++)
        {
            out.write((char*)&movieFeatures[i][j], sizeof(float)); 
        }
    }

    for (int i = 0; i < currDb->totalUsers(); i++)
    {
        for (unsigned int j = 0; j < NUM_FEATURES; j++)
        {
            out.write((char*)&userFeatures[i][j], sizeof(float)); 
        }
    }
}
예제 #5
0
void SvdOrder::calculateFeaturesByOrder()
{
    qDebug() << "Training";

    // Keep looping until you have passed the maximum number
    // of epochs or have stopped making significant progress
    double prevErr = 1e8;
    double err     = 1e7;
    Movie movie(currDb);
    
    float PROGRESS_INTERVAL = 1; 
    clock_t referTime = clock();

    int numTestsPerUser = 1000;

    for (unsigned int i = 0;
         i < MIN_EPOCHS || 
            (i < MAX_EPOCHS && (prevErr - err) > MIN_IMPROVEMENT);
         i++)
    {
        prevErr = err;
        err = 0;

        User user(currDb,6);
        int numUsers = currDb->totalUsers();

        user.setId(6);

        for (int j = 0; j < numUsers; j++)
        {  
            if (((float) (clock() - referTime)) / CLOCKS_PER_SEC >
                    PROGRESS_INTERVAL)
            {
                qDebug() << j << err / (j + 1);
                referTime = clock(); 
            }
            
            float userErr = 0;
            int numTests = 0;

            // Only process some of the movies per user per epoch. Otherwise,
            // the algorithm takes up alot of time per epoch
            int userIndex = j;
            int userVotes = user.votes();

            
            int thisUserTests = numTestsPerUser;
            if (thisUserTests > userVotes * userVotes)
                thisUserTests = userVotes * userVotes;
 
            for (int testI = 0; testI < thisUserTests; testI++)
            {
                int m1 = rand() % userVotes;
                int m2 = rand() % userVotes;
                 
                int movieIndex1 = user.movie(m1) - 1;
                float rating1 = user.score(m1);

                int movieIndex2 = user.movie(m2) - 1;
                float rating2 = user.score(m2);
                
                // Only do tests on movies that are not the same
                if (rating1 == rating2)
                    continue;
                numTests ++;

                float predict1 = predictRating(movieIndex1, userIndex);
                float predict2 = predictRating(movieIndex2, userIndex);
                
                // This value is positive only when the prediction
                // order was correct
                float ratingDiff  = (rating1 - rating2)/fabs(rating1 - rating2);
                float x = ratingDiff * (predict1 - predict2);

                if (x < 1)
                {
                    // If the prediction was not correct to some
                    // threshold (in this case 1), move the user factors
                    // accordingly to fix it
                    for (unsigned int fI = 0; fI < NUM_FEATURES; fI++)
                    {
                        float oldUF = userFeatures[userIndex][fI];
                        float oldMF1 = movieFeatures[movieIndex1][fI];
                        float oldMF2 = movieFeatures[movieIndex2][fI];

                        userFeatures[userIndex][fI] -=
                            LRATE2 * (-ratingDiff * (oldMF1 - oldMF2) 
                                     + REGULARIZE2 * oldUF);  

                        movieFeatures[movieIndex1][fI] -=
                            LRATE2 * (-ratingDiff * oldUF 
                                     + REGULARIZE2 * oldMF1);

                        movieFeatures[movieIndex2][fI] -=
                            LRATE2 * (ratingDiff * oldUF 
                                     + REGULARIZE2 * oldMF2);
                    } 
                    
                    if (x < 0) 
                        userErr += 1;
                }
            } 

            err += userErr / (numTests + 1);
            user.next();
        }
       

        err /= numUsers;
        qDebug() << "Epoch" << i + 1 << "Error: " << err;
    }                

    qDebug() << "Done with training";
}
예제 #6
0
int main(int argc, char *argv[]){
	script_timer("Total", false);
	if(argv[1] && strstr(argv[1],"deb")!=NULL) debug = true;	//	Set debug to true if argv[1] contains "deb"
	if(debug) fprintf(stderr, "Debug mode on.\n");
	if(argv[1] && strstr(argv[1],"off")!=NULL) full_output = false;	//	Disable full RMSE output if argv[1] contains "off"

	DataBase db;
	db.load();
	if(db.checkDB()) fprintf(stderr, "checkDB OK\n");
	else fprintf(stderr, "DB Corrupt.\n");
	db.setTitles();
	Movie movies(&db);
	User users(&db);
	movies.setId(1);
	users.setId(6);

	fprintf(stderr, "db.totalUsers()=%d\n", db.totalUsers());
	fprintf(stderr, "db.totalMovies()=%d\n", db.totalMovies());
	fprintf(stderr, "db.totalVotes()=%d\n", db.totalVotes());

/*
	db.loadPreProcessor("data/somemodel");	//	Load a preprocessor built using Algorithm::buildPreProcessor("data/somemodel")
*/

	Average avg(&db);
	//avg.runProbe();
	//avg.runQualifying("none", true);
	avg.buildPreProcessor("data_average");

/*
	Globals globals(&db);
	globals.setAverages(10);
	globals.setVariances();
	globals.setThetas();
	globals.runProbe();
	//globals.runQualifying("none", true);
*/

	#define TRAIN_SIMU true
	Matrix_Factorization *mf = new Matrix_Factorization(&db);
	mf->training();
//	mf->cache("data_mf_simu");
	//mf->runProbe();
	//mf->runQualifying("none", true);
	mf->buildPreProcessor("data_mf");


//	User_KNN * uknn = new User_KNN(&db);
//	uknn->setup();
//	uknn->loadUserFeatures("data_mf_simu.users.cache");
	//uknn->runProbe();
	//uknn->runQualifying("none", true);
//	uknn->buildPreProcessor("data_uknn");


	Blend blend(&db);
//	blend.setUp(3, "data_average", "data_mf_simu", "data_uknn");
	blend.setUp(2, "data_average", "data_mf");
	blend.runProbe();
	//blend.runQualifying("none", true);

/*
	Blend_Partial blendpartial(&db);
	blendpartial.setUp(3, "data/average", "data/mf_simu", "data/uknn");
	blendpartial.runProbe_partial();
	blendpartial.runQualifying("none", true);
*/
/*
	KNN knn(&db);
	knn.setup();
	knn.runProbe();
E
E
E
	knn.runQualifying("none", true);
*/
	script_timer("Total", true);
	fprintf(stderr, "\n");
	print_timer_summary_map();
}