int main(){ std::cout << setCross (SetFactory(0), SetFactory(1)) << std::endl; std::cout << setIntersect (SetFactory(0), SetFactory(1)) << std::endl; std::cout << setMinus (SetFactory(0), SetFactory(1)) << std::endl; std::cout << setUnion (SetFactory(0), SetFactory(1)) << std::endl; std::cout << setCardinality (SetFactory(1).add( 2).add( 3)) << std::endl; return 0; }
void compJaccSimAccu(Data& data, Params& params) { std::vector<ModelMF> mfModels; std::vector<ModelMF> origModels; for (int i = 1; i < 3; i++) { std::string prefix = "mf_0_0_10_" + std::to_string(i); ModelMF fullModel(params, params.seed); fullModel.loadFacs(prefix.c_str()); mfModels.push_back(fullModel); std::string uFName = "uFac_" + std::to_string(fullModel.nUsers) + "_10_0.txt"; std::string iFName = "iFac_" + std::to_string(fullModel.nItems+1) + "_10_0.txt"; ModelMF origModel(params, uFName.c_str(), iFName.c_str(), params.seed); origModels.push_back(origModel); } /* for (int i = 0; i < 2; i++) { std::string prefix = "mfrand_" + std::to_string(i) + "_0_20"; ModelMF fullModel(params, params.seed); fullModel.loadFacs(prefix.c_str()); mfModels.push_back(fullModel); std::string uFName = "uFac_" + std::to_string(fullModel.nUsers) + "_20_" + std::to_string(i) + ".txt"; std::string iFName = "iFac_" + std::to_string(fullModel.nItems) + "_20_" + std::to_string(i) + ".txt"; ModelMF origModel(params, uFName.c_str(), iFName.c_str(), params.seed); origModels.push_back(origModel); } */ int nModels = mfModels.size(); std::unordered_set<int> invalidUsers; std::unordered_set<int> invalidItems; std::string modelSign = mfModels[0].modelSignature(); std::cout << "\nModel sign: " << modelSign << std::endl; std::string prefix = std::string(params.prefix) + "_" + modelSign + "_invalUsers.txt"; std::vector<int> invalUsersVec = readVector(prefix.c_str()); prefix = std::string(params.prefix) + "_" + modelSign + "_invalItems.txt"; std::vector<int> invalItemsVec = readVector(prefix.c_str()); for (auto v: invalUsersVec) { invalidUsers.insert(v); } for (auto v: invalItemsVec) { invalidItems.insert(v); } for (int i = 0; i < nModels; i++) { auto& fullModel = mfModels[i]; auto& origModel = origModels[i]; std::cout << "Model: " << i << std::endl; std::cout << "Train RMSE: " << fullModel.RMSE(data.trainMat, invalidUsers, invalidItems, origModel) << std::endl; std::cout << "Test RMSE: " << fullModel.RMSE(data.testMat, invalidUsers, invalidItems, origModel) << std::endl; std::cout << "Val RMSE: " << fullModel.RMSE(data.valMat, invalidUsers, invalidItems, origModel) << std::endl; //std::cout << "Full RMSE: " << fullModel.fullLowRankErr(data, invalidUsers, // invalidItems, origModel) << std::endl; } std::vector<double> epsilons = {0.025, 0.05, 0.1, 0.25, 0.5, 1.0}; //std::vector<double> epsilons = {0.5}; int nUsers = data.trainMat->nrows; int nItems = data.trainMat->ncols; std::cout << "nModels: " << nModels << std::endl; for (auto&& epsilon: epsilons) { std::cout << "epsilon: " << epsilon << std::endl; std::vector<std::vector<float>> itemsJacSims(nItems); std::vector<std::vector<float>> itemAccuCount(nItems); std::vector<std::vector<float>> itemPearsonCorr(nItems); #pragma omp parallel for for (int item = 0; item < nItems; item++) { if (invalidItems.count(item) > 0) { continue; } std::vector<std::unordered_set<int>> modelAccuItems(nModels); std::vector<std::vector<float>> itemErr(nModels); std::vector<float> itemErrMean(nModels, 0); std::vector<bool> ratedUsers(nUsers, false); int count = 0; for (int uu = data.trainMat->colptr[item]; uu < data.trainMat->colptr[item+1]; uu++) { ratedUsers[data.trainMat->colind[uu]] = true; } for (int user = 0; user < nUsers; user++) { if (invalidUsers.count(user) > 0) { continue; } if (ratedUsers[user]) { continue; } for (int i = 0; i < nModels; i++) { auto& mfModel = mfModels[i]; auto& origModel = origModels[i]; float r_ui_est = mfModel.estRating(user, item); float r_ui = origModel.estRating(user, item); float diff = fabs(r_ui - r_ui_est); itemErr[i].push_back(diff); itemErrMean[i] += diff; if (diff <= epsilon) { modelAccuItems[i].insert(user); } } count++; } for (int i = 0; i < nModels; i++) { itemErrMean[i] /= count; } for (int i = 0; i < nModels; i++) { itemAccuCount[item].push_back(modelAccuItems[i].size()); for (int j = i+1; j < nModels; j++) { //compute overlap between model i and model j float intersectCount = (float) setIntersect(modelAccuItems[i], modelAccuItems[j]); float unionCount = (float) setUnion(modelAccuItems[i], modelAccuItems[j]); float jacSim = 0; if (unionCount > 0) { jacSim = intersectCount/unionCount; } itemsJacSims[item].push_back(jacSim); //compute correlation between model i & j for the item itemPearsonCorr[item].push_back(pearsonCorr(itemErr[i], itemErr[j], itemErrMean[i], itemErrMean[j])); } } } std::string opFName = std::string(params.prefix) + "_" + std::to_string(epsilon) + "_modelsJacSim.txt"; std::string opFName2 = std::string(params.prefix) + "_" + std::to_string(epsilon) + "_modelsPearson.txt"; std::cout << "Writing... " << opFName << std::endl; std::ofstream opFile(opFName.c_str()); std::ofstream opFile2(opFName2.c_str()); for (int item = 0; item < nItems; item++) { opFile << item << " "; for (auto&& sim: itemsJacSims[item]) { opFile << sim << " "; } for (auto&& accu: itemAccuCount[item]) { opFile << accu << " "; } opFile << std::endl; opFile2 << item << " "; for (auto&& corr: itemPearsonCorr[item]) { opFile2 << corr << " "; } opFile2 << std::endl; } opFile.close(); opFile2.close(); } }