void transacc(GArgReader& args) { // Parse options unsigned int seed = getpid() * (unsigned int)time(NULL); while(args.next_is_flag()) { if(args.if_pop("-seed")) seed = args.pop_uint(); else ThrowError("Invalid crossvalidate option: ", args.peek()); } // Load the data if(args.size() < 1) ThrowError("No training set specified."); GMatrix* pTrain = loadData(args.pop_string()); Holder<GMatrix> hTrain(pTrain); if(args.size() < 1) ThrowError("No test set specified."); GMatrix* pTest = loadData(args.pop_string()); Holder<GMatrix> hTest(pTest); // Instantiate the recommender GRand prng(seed); GCollaborativeFilter* pModel = InstantiateAlgorithm(prng, args); Holder<GCollaborativeFilter> hModel(pModel); if(args.size() > 0) ThrowError("Superfluous argument: ", args.peek()); // Do cross-validation double mae; double mse = pModel->trainAndTest(*pTrain, *pTest, &mae); cout << "MSE=" << mse << ", MAE=" << mae << "\n"; }
void precisionRecall(GArgReader& args) { // Parse options unsigned int seed = getpid() * (unsigned int)time(NULL); bool ideal = false; while(args.next_is_flag()) { if(args.if_pop("-seed")) seed = args.pop_uint(); else if(args.if_pop("-ideal")) ideal = true; else ThrowError("Invalid option: ", args.peek()); } // Load the data if(args.size() < 1) ThrowError("No dataset specified."); GMatrix* pData = loadData(args.pop_string()); Holder<GMatrix> hData(pData); // Instantiate the recommender GRand prng(seed); GCollaborativeFilter* pModel = InstantiateAlgorithm(prng, args); Holder<GCollaborativeFilter> hModel(pModel); if(args.size() > 0) ThrowError("Superfluous argument: ", args.peek()); // Generate precision-recall data GMatrix* pResults = pModel->precisionRecall(*pData, ideal); Holder<GMatrix> hResults(pResults); pResults->deleteColumn(2); // we don't need the false-positive rate column pResults->print(cout); }
void crossValidate(GArgReader& args) { // Parse options unsigned int seed = getpid() * (unsigned int)time(NULL); size_t folds = 2; while(args.next_is_flag()) { if(args.if_pop("-seed")) seed = args.pop_uint(); else if(args.if_pop("-folds")) folds = args.pop_uint(); else ThrowError("Invalid crossvalidate option: ", args.peek()); } if(folds < 2) ThrowError("There must be at least 2 folds."); // Load the data if(args.size() < 1) ThrowError("No dataset specified."); GMatrix* pData = loadData(args.pop_string()); Holder<GMatrix> hData(pData); // Instantiate the recommender GRand prng(seed); GCollaborativeFilter* pModel = InstantiateAlgorithm(prng, args); Holder<GCollaborativeFilter> hModel(pModel); if(args.size() > 0) ThrowError("Superfluous argument: ", args.peek()); // Do cross-validation double mae; double mse = pModel->crossValidate(*pData, folds, &mae); cout << "RMSE=" << sqrt(mse) << ", MSE=" << mse << ", MAE=" << mae << "\n"; }
void GRecommenderLib::transacc(GArgReader& args) { // Parse options unsigned int seed = getpid() * (unsigned int)time(NULL); while(args.next_is_flag()) { if(args.if_pop("-seed")) seed = args.pop_uint(); else throw Ex("Invalid crossvalidate option: ", args.peek()); } // Load the data if(args.size() < 1) throw Ex("No training set specified."); GMatrix train; loadData(train, args.pop_string()); if(args.size() < 1) throw Ex("No test set specified."); GMatrix test; loadData(test, args.pop_string()); // Instantiate the recommender GCollaborativeFilter* pModel = InstantiateAlgorithm(args); std::unique_ptr<GCollaborativeFilter> hModel(pModel); if(args.size() > 0) throw Ex("Superfluous argument: ", args.peek()); pModel->rand().setSeed(seed); // Do cross-validation double mae; double mse = pModel->trainAndTest(train, test, &mae); cout << "MSE=" << mse << ", MAE=" << mae << "\n"; }
void GRecommenderLib::precisionRecall(GArgReader& args) { // Parse options unsigned int seed = getpid() * (unsigned int)time(NULL); bool ideal = false; while(args.next_is_flag()) { if(args.if_pop("-seed")) seed = args.pop_uint(); else if(args.if_pop("-ideal")) ideal = true; else throw Ex("Invalid option: ", args.peek()); } // Load the data if(args.size() < 1) throw Ex("No dataset specified."); GMatrix data; loadData(data, args.pop_string()); // Instantiate the recommender GCollaborativeFilter* pModel = InstantiateAlgorithm(args); std::unique_ptr<GCollaborativeFilter> hModel(pModel); if(args.size() > 0) throw Ex("Superfluous argument: ", args.peek()); pModel->rand().setSeed(seed); // Generate precision-recall data GMatrix* pResults = pModel->precisionRecall(data, ideal); std::unique_ptr<GMatrix> hResults(pResults); pResults->deleteColumns(2, 1); // we don't need the false-positive rate column pResults->print(cout); }
void fillMissingValues(GArgReader& args) { unsigned int seed = getpid() * (unsigned int)time(NULL); while(args.next_is_flag()) { if(args.if_pop("-seed")) seed = args.pop_uint(); else ThrowError("Invalid option: ", args.peek()); } // Load the data and the filter GMatrix* pDataOrig = GMatrix::loadArff(args.pop_string()); Holder<GMatrix> hDataOrig(pDataOrig); sp_relation pOrigRel = pDataOrig->relation(); GRand prng(seed); GCollaborativeFilter* pModel = InstantiateAlgorithm(prng, args); Holder<GCollaborativeFilter> hModel(pModel); if(args.size() > 0) ThrowError("Superfluous argument: ", args.peek()); // Convert to all normalized real values GNominalToCat* pNtc = new GNominalToCat(); GTwoWayTransformChainer filter(new GNormalize(), pNtc); pNtc->preserveUnknowns(); filter.train(*pDataOrig); GMatrix* pData = filter.transformBatch(*pDataOrig); Holder<GMatrix> hData(pData); hDataOrig.release(); pDataOrig = NULL; // Convert to 3-column form GMatrix* pMatrix = new GMatrix(0, 3); Holder<GMatrix> hMatrix(pMatrix); size_t dims = pData->cols(); for(size_t i = 0; i < pData->rows(); i++) { double* pRow = pData->row(i); for(size_t j = 0; j < dims; j++) { if(*pRow != UNKNOWN_REAL_VALUE) { double* pVec = pMatrix->newRow(); pVec[0] = i; pVec[1] = j; pVec[2] = *pRow; } pRow++; } } // Train the collaborative filter pModel->train(*pMatrix); hMatrix.release(); pMatrix = NULL; // Predict values for missing elements for(size_t i = 0; i < pData->rows(); i++) { double* pRow = pData->row(i); for(size_t j = 0; j < dims; j++) { if(*pRow == UNKNOWN_REAL_VALUE) *pRow = pModel->predict(i, j); GAssert(*pRow != UNKNOWN_REAL_VALUE); pRow++; } } // Convert the data back to its original form GMatrix* pOut = filter.untransformBatch(*pData); pOut->setRelation(pOrigRel); pOut->print(cout); }
void GRecommenderLib::fillMissingValues(GArgReader& args) { unsigned int seed = getpid() * (unsigned int)time(NULL); bool normalize = true; while(args.next_is_flag()) { if(args.if_pop("-seed")) seed = args.pop_uint(); else if(args.if_pop("-nonormalize")) normalize = false; else throw Ex("Invalid option: ", args.peek()); } // Load the data and the filter GMatrix dataOrig; dataOrig.loadArff(args.pop_string()); // Parse params vector<size_t> ignore; while(args.next_is_flag()) { if(args.if_pop("-ignore")) parseAttributeList(ignore, args, dataOrig.cols()); else throw Ex("Invalid option: ", args.peek()); } // Throw out the ignored attributes std::sort(ignore.begin(), ignore.end()); for(size_t i = ignore.size() - 1; i < ignore.size(); i--) dataOrig.deleteColumns(ignore[i], 1); GRelation* pOrigRel = dataOrig.relation().clone(); std::unique_ptr<GRelation> hOrigRel(pOrigRel); GCollaborativeFilter* pModel = InstantiateAlgorithm(args); std::unique_ptr<GCollaborativeFilter> hModel(pModel); if(args.size() > 0) throw Ex("Superfluous argument: ", args.peek()); pModel->rand().setSeed(seed); // Convert to all normalized real values GNominalToCat* pNtc = new GNominalToCat(); GIncrementalTransform* pFilter = pNtc; std::unique_ptr<GIncrementalTransformChainer> hChainer; if(normalize) { GIncrementalTransformChainer* pChainer = new GIncrementalTransformChainer(new GNormalize(), pNtc); hChainer.reset(pChainer); pFilter = pChainer; } pNtc->preserveUnknowns(); pFilter->train(dataOrig); GMatrix* pData = pFilter->transformBatch(dataOrig); std::unique_ptr<GMatrix> hData(pData); // Convert to 3-column form GMatrix* pMatrix = new GMatrix(0, 3); std::unique_ptr<GMatrix> hMatrix(pMatrix); size_t dims = pData->cols(); for(size_t i = 0; i < pData->rows(); i++) { GVec& row = pData->row(i); for(size_t j = 0; j < dims; j++) { if(row[j] != UNKNOWN_REAL_VALUE) { GVec& vec = pMatrix->newRow(); vec[0] = (double)i; vec[1] = (double)j; vec[2] = row[j]; } } } // Train the collaborative filter pModel->train(*pMatrix); hMatrix.release(); pMatrix = NULL; // Predict values for missing elements for(size_t i = 0; i < pData->rows(); i++) { GVec& row = pData->row(i); for(size_t j = 0; j < dims; j++) { if(row[j] == UNKNOWN_REAL_VALUE) row[j] = pModel->predict(i, j); GAssert(row[j] != UNKNOWN_REAL_VALUE); } } // Convert the data back to its original form GMatrix* pOut = pFilter->untransformBatch(*pData); pOut->setRelation(hOrigRel.release()); pOut->print(cout); }