// General-purpose constructor PlanningSystem::PlanningSystem(Agent& agent, TransitionModel& transition, ObservationModel& observation, ContentmentModel& contentment, Mentor* oracle, size_t actionDimensions, size_t populationSize, size_t planRefinementIters, size_t burnInIters, size_t maxPlanLen, double discount, double explore, GRand& r) : self(agent), transitionModel(transition), observationModel(observation), contentmentModel(contentment), mentor(oracle), tutor(nullptr), actionDims(actionDimensions), burnIn(burnInIters), discountFactor(discount), explorationRate(explore), rand(r), randomPlan(1, actionDims) { GAssert(randomPlan[0].size() == actionDims); if(populationSize < 2) throw Ex("The population size must be at least 2"); refinementIters = populationSize * planRefinementIters; maxPlanLength = maxPlanLen; for(size_t i = 0; i < populationSize; i++) { GMatrix* p = new GMatrix(0, actionDims); plans.push_back(p); for(size_t j = std::min(maxPlanLen, rand.next(maxPlanLen) + 2); j > 0; j--) { // Add a random action vector to the end GVec& newActions = p->newRow(); newActions.fillUniform(rand); } } }
GMatrix* loadData(const char* szFilename) { PathData pd; GFile::parsePath(szFilename, &pd); if(_stricmp(szFilename + pd.extStart, ".sparse") == 0) { GDom doc; doc.loadJson(szFilename); GSparseMatrix sm(doc.root()); GMatrix* pData = new GMatrix(0, 3); for(size_t i = 0; i < sm.rows(); i++) { GSparseMatrix::Iter rowEnd = sm.rowEnd(i); for(GSparseMatrix::Iter it = sm.rowBegin(i); it != rowEnd; it++) { double* pVec = pData->newRow(); pVec[0] = i; pVec[1] = it->first; pVec[2] = it->second; } } return pData; } else if(_stricmp(szFilename + pd.extStart, ".arff") == 0) return GMatrix::loadArff(szFilename); else { ThrowError("Unsupported file format: ", szFilename + pd.extStart); return NULL; } }
void GRecommenderLib::loadData(GMatrix& data, const char* szFilename) { PathData pd; GFile::parsePath(szFilename, &pd); if(_stricmp(szFilename + pd.extStart, ".sparse") == 0) { GDom doc; doc.loadJson(szFilename); GSparseMatrix sm(doc.root()); data.resize(0, 3); for(size_t i = 0; i < sm.rows(); i++) { GSparseMatrix::Iter rowEnd = sm.rowEnd(i); for(GSparseMatrix::Iter it = sm.rowBegin(i); it != rowEnd; it++) { GVec& vec = data.newRow(); vec[0] = (double)i; vec[1] = (double)it->first; vec[2] = it->second; } } } else if(_stricmp(szFilename + pd.extStart, ".arff") == 0) data.loadArff(szFilename); else throw Ex("Unsupported file format: ", szFilename + pd.extStart); }
/// Replaces the specified plan with a new one. void PlanningSystem::replace(size_t childIndex) { double d = rand.uniform(); if(d < 0.2) { // Clone a random parent (asexual reproduction) size_t randomPlanIndex = rand.next(plans.size() - 1); if(randomPlanIndex >= childIndex) randomPlanIndex++; GMatrix& randPlan = *plans[randomPlanIndex]; GMatrix* pPlanCopy = new GMatrix(randPlan); delete(plans[childIndex]); plans[childIndex] = pPlanCopy; } else if(d < 0.7) { // Cross-over (sexual reproduction) GMatrix& mother = *plans[rand.next(plans.size())]; GMatrix& father = *plans[rand.next(plans.size())]; size_t crossOverPoint = rand.next(mother.rows()); GMatrix* pChild = new GMatrix(0, mother.cols()); for(size_t i = 0; i < crossOverPoint; i++) pChild->newRow().copy(mother[i]); for(size_t i = crossOverPoint; i < father.rows(); i++) pChild->newRow().copy(father[i]); delete(plans[childIndex]); plans[childIndex] = pChild; } else { // Interpolation/extrapolation GMatrix& mother = *plans[rand.next(plans.size())]; GMatrix& father = *plans[rand.next(plans.size())]; size_t len = std::min(mother.rows(), father.rows()); GMatrix* pChild = new GMatrix(len, mother.cols()); double alpha = rand.uniform() * 2.0; for(size_t i = 0; i < len; i++) { GVec& a = mother[i]; GVec& b = father[i]; GVec& c = (*pChild)[i]; for(size_t j = 0; j < c.size(); j++) c[j] = alpha * a[j] + (1.0 - alpha) * b[j]; c.clip(0.0, 1.0); } delete(plans[childIndex]); plans[childIndex] = pChild; } }
void fillMissingValues(GArgReader& args) { unsigned int seed = getpid() * (unsigned int)time(NULL); while(args.next_is_flag()) { if(args.if_pop("-seed")) seed = args.pop_uint(); else ThrowError("Invalid option: ", args.peek()); } // Load the data and the filter GMatrix* pDataOrig = GMatrix::loadArff(args.pop_string()); Holder<GMatrix> hDataOrig(pDataOrig); sp_relation pOrigRel = pDataOrig->relation(); GRand prng(seed); GCollaborativeFilter* pModel = InstantiateAlgorithm(prng, args); Holder<GCollaborativeFilter> hModel(pModel); if(args.size() > 0) ThrowError("Superfluous argument: ", args.peek()); // Convert to all normalized real values GNominalToCat* pNtc = new GNominalToCat(); GTwoWayTransformChainer filter(new GNormalize(), pNtc); pNtc->preserveUnknowns(); filter.train(*pDataOrig); GMatrix* pData = filter.transformBatch(*pDataOrig); Holder<GMatrix> hData(pData); hDataOrig.release(); pDataOrig = NULL; // Convert to 3-column form GMatrix* pMatrix = new GMatrix(0, 3); Holder<GMatrix> hMatrix(pMatrix); size_t dims = pData->cols(); for(size_t i = 0; i < pData->rows(); i++) { double* pRow = pData->row(i); for(size_t j = 0; j < dims; j++) { if(*pRow != UNKNOWN_REAL_VALUE) { double* pVec = pMatrix->newRow(); pVec[0] = i; pVec[1] = j; pVec[2] = *pRow; } pRow++; } } // Train the collaborative filter pModel->train(*pMatrix); hMatrix.release(); pMatrix = NULL; // Predict values for missing elements for(size_t i = 0; i < pData->rows(); i++) { double* pRow = pData->row(i); for(size_t j = 0; j < dims; j++) { if(*pRow == UNKNOWN_REAL_VALUE) *pRow = pModel->predict(i, j); GAssert(*pRow != UNKNOWN_REAL_VALUE); pRow++; } } // Convert the data back to its original form GMatrix* pOut = filter.untransformBatch(*pData); pOut->setRelation(pOrigRel); pOut->print(cout); }
void GRecommenderLib::fillMissingValues(GArgReader& args) { unsigned int seed = getpid() * (unsigned int)time(NULL); bool normalize = true; while(args.next_is_flag()) { if(args.if_pop("-seed")) seed = args.pop_uint(); else if(args.if_pop("-nonormalize")) normalize = false; else throw Ex("Invalid option: ", args.peek()); } // Load the data and the filter GMatrix dataOrig; dataOrig.loadArff(args.pop_string()); // Parse params vector<size_t> ignore; while(args.next_is_flag()) { if(args.if_pop("-ignore")) parseAttributeList(ignore, args, dataOrig.cols()); else throw Ex("Invalid option: ", args.peek()); } // Throw out the ignored attributes std::sort(ignore.begin(), ignore.end()); for(size_t i = ignore.size() - 1; i < ignore.size(); i--) dataOrig.deleteColumns(ignore[i], 1); GRelation* pOrigRel = dataOrig.relation().clone(); std::unique_ptr<GRelation> hOrigRel(pOrigRel); GCollaborativeFilter* pModel = InstantiateAlgorithm(args); std::unique_ptr<GCollaborativeFilter> hModel(pModel); if(args.size() > 0) throw Ex("Superfluous argument: ", args.peek()); pModel->rand().setSeed(seed); // Convert to all normalized real values GNominalToCat* pNtc = new GNominalToCat(); GIncrementalTransform* pFilter = pNtc; std::unique_ptr<GIncrementalTransformChainer> hChainer; if(normalize) { GIncrementalTransformChainer* pChainer = new GIncrementalTransformChainer(new GNormalize(), pNtc); hChainer.reset(pChainer); pFilter = pChainer; } pNtc->preserveUnknowns(); pFilter->train(dataOrig); GMatrix* pData = pFilter->transformBatch(dataOrig); std::unique_ptr<GMatrix> hData(pData); // Convert to 3-column form GMatrix* pMatrix = new GMatrix(0, 3); std::unique_ptr<GMatrix> hMatrix(pMatrix); size_t dims = pData->cols(); for(size_t i = 0; i < pData->rows(); i++) { GVec& row = pData->row(i); for(size_t j = 0; j < dims; j++) { if(row[j] != UNKNOWN_REAL_VALUE) { GVec& vec = pMatrix->newRow(); vec[0] = (double)i; vec[1] = (double)j; vec[2] = row[j]; } } } // Train the collaborative filter pModel->train(*pMatrix); hMatrix.release(); pMatrix = NULL; // Predict values for missing elements for(size_t i = 0; i < pData->rows(); i++) { GVec& row = pData->row(i); for(size_t j = 0; j < dims; j++) { if(row[j] == UNKNOWN_REAL_VALUE) row[j] = pModel->predict(i, j); GAssert(row[j] != UNKNOWN_REAL_VALUE); } } // Convert the data back to its original form GMatrix* pOut = pFilter->untransformBatch(*pData); pOut->setRelation(hOrigRel.release()); pOut->print(cout); }