void SwingPendulumTest::testOffPACSwingPendulum2() { Random<double>* random = new Random<double>; RLProblem<double>* problem = new SwingPendulum<double>; Hashing<double>* hashing = new MurmurHashing<double>(random, 1000000); Projector<double>* projector = new TileCoderHashing<double>(hashing, problem->dimension(), 10, 10, true); StateToStateAction<double>* toStateAction = new StateActionTilings<double>(projector, problem->getDiscreteActions()); double alpha_v = 0.1 / projector->vectorNorm(); double alpha_w = .005 / projector->vectorNorm(); double gamma = 0.99; Trace<double>* critice = new AMaxTrace<double>(projector->dimension()); Trace<double>* criticeML = new MaxLengthTrace<double>(critice, 1000); GTDLambda<double>* critic = new GTDLambda<double>(alpha_v, alpha_w, gamma, 0.4, criticeML); double alpha_u = 0.5 / projector->vectorNorm(); PolicyDistribution<double>* target = new BoltzmannDistribution<double>(random, problem->getDiscreteActions(), projector->dimension()); Trace<double>* actore = new AMaxTrace<double>(projector->dimension()); Trace<double>* actoreML = new MaxLengthTrace<double>(actore, 1000); Traces<double>* actoreTraces = new Traces<double>(); actoreTraces->push_back(actoreML); ActorOffPolicy<double>* actor = new ActorLambdaOffPolicy<double>(alpha_u, gamma, 0.4, target, actoreTraces); /*Policy<double>* behavior = new RandomPolicy<double>( &problem->getActions());*/ Policy<double>* behavior = new BoltzmannDistribution<double>(random, problem->getDiscreteActions(), projector->dimension()); OffPolicyControlLearner<double>* control = new OffPAC<double>(behavior, critic, actor, toStateAction, projector); RLAgent<double>* agent = new LearnerAgent<double>(control); RLRunner<double>* sim = new RLRunner<double>(agent, problem, 5000, 200, 1); sim->setTestEpisodesAfterEachRun(true); sim->run(); delete random; delete problem; delete hashing; delete projector; delete toStateAction; delete critice; delete criticeML; delete critic; delete actore; delete actoreML; delete actoreTraces; delete actor; delete behavior; delete target; delete control; delete agent; delete sim; }
void Run(std::string iArgument1, std::string iArgument2) { if (iArgument2=="GeneralStats") GeneralStats(iArgument1); else if (iArgument2=="") { std::string Instance = iArgument1; int PopSize = 16; int MaxHamming = 3; int RCLLength = 3; double MutationRate = 0.1; double TransmitionRate = 0.2; int MaxNbGenerations = 200; double InfMeanDiff = 0.002; Traces ExecTraces; ExecTraces.Initialize(PopSize, MaxNbGenerations); GRASP myGRASP(Instance, PopSize, MaxHamming, RCLLength, MutationRate, TransmitionRate, MaxNbGenerations, InfMeanDiff, &ExecTraces); ExecTraces._BeginPopBuilt_UserTime = get_wall_time(); ExecTraces._BeginPopBuilt_CPUTime = get_cpu_time(); myGRASP.Construction(); ExecTraces._EndPopBuilt_CPUTime = get_cpu_time(); ExecTraces._EndPopBuilt_UserTime = get_wall_time(); Localisation * TmpBestLoc = myGRASP.GetBestLocalisation(); if (TmpBestLoc) ExecTraces._GRASPBestCost = TmpBestLoc->GetLocalisationCost(); ExecTraces._BeginGenetic_UserTime = get_wall_time(); ExecTraces._BeginGenetic_CPUTime = get_cpu_time(); myGRASP.GeneticAlgorithm(); ExecTraces._EndGenetic_CPUTime = get_cpu_time(); ExecTraces._EndGenetic_UserTime = get_wall_time(); TmpBestLoc = myGRASP.GetBestLocalisation(); if (TmpBestLoc) ExecTraces._GeneticBestCost = TmpBestLoc->GetLocalisationCost(); std::cout << "===== Parameters\n"; std::cout << "Size of the population: " << PopSize << "\n"; std::cout << "Maximal Hamming Distance: " << MaxHamming << "\n"; std::cout << "Length of RCl (restricted candidates list): " << RCLLength << "\n"; std::cout << "Mutation rate: " << MutationRate << "\n"; std::cout << "Transmition rate: " << TransmitionRate << "\n"; std::cout << "Maximal number of generation in genetic algorithm: " << MaxNbGenerations << "\n"; std::cout << "Mean difference of the costs of the population from which stop: " << InfMeanDiff << "\n"; std::cout << "\n"; std::cout << "===== Result of the metaheuristic\n"; myGRASP.PrintBestLocalisation(); ExecTraces.PostTreatment(); TmpBestLoc = 0; } }
void SwingPendulumTest::testOffPACOnPolicySwingPendulum() { Random<double>* random = new Random<double>; RLProblem<double>* problem = new SwingPendulum<double>; Hashing<double>* hashing = new MurmurHashing<double>(random, 1000); Projector<double>* projector = new TileCoderHashing<double>(hashing, problem->dimension(), 10, 10, true); StateToStateAction<double>* toStateAction = new StateActionTilings<double>(projector, problem->getDiscreteActions()); double alpha_v = 0.1 / projector->vectorNorm(); double alpha_w = .0001 / projector->vectorNorm(); double gamma = 0.99; double lambda = 0.4; Trace<double>* critice = new ATrace<double>(projector->dimension()); GTDLambda<double>* critic = new GTDLambda<double>(alpha_v, alpha_w, gamma, lambda, critice); double alpha_u = 0.5 / projector->vectorNorm(); PolicyDistribution<double>* acting = new BoltzmannDistribution<double>(random, problem->getDiscreteActions(), projector->dimension()); Trace<double>* actore = new ATrace<double>(projector->dimension()); Traces<double>* actoreTraces = new Traces<double>(); actoreTraces->push_back(actore); ActorOffPolicy<double>* actor = new ActorLambdaOffPolicy<double>(alpha_u, gamma, lambda, acting, actoreTraces); OffPolicyControlLearner<double>* control = new OffPAC<double>(acting, critic, actor, toStateAction, projector); RLAgent<double>* agent = new LearnerAgent<double>(control); RLRunner<double>* sim = new RLRunner<double>(agent, problem, 5000, 10, 5); sim->setTestEpisodesAfterEachRun(true); sim->run(); sim->computeValueFunction(); delete random; delete problem; delete hashing; delete projector; delete toStateAction; delete critice; delete critic; delete actore; delete actoreTraces; delete actor; delete acting; delete control; delete agent; delete sim; }
std::set<std::string> Traces::names() // ---------------------------------------------------------------------------- // Return the names of all traces in all trace groups // ---------------------------------------------------------------------------- { std::set<std::string> ret; std::map<std::string, Traces *>::iterator it; for (it = groups.begin(); it != groups.end(); it++) { Traces * group = (*it).second; std::set<std::string> names = group->groupTraceNames(); ret.insert(names.begin(), names.end()); } return ret; }
void NAOTest::testTrain() { // OffLine { Random<float>* random = new Random<float>; RLProblem<float>* problem = new MountainCar<float>(random); Hashing<float>* hashing = new MurmurHashing<float>(random, 1000000); Projector<float>* projector = new TileCoderHashing<float>(hashing, problem->dimension(), 10, 10); StateToStateAction<float>* toStateAction = new StateActionTilings<float>(projector, problem->getDiscreteActions()); double alpha_v = 0.05 / projector->vectorNorm(); double alpha_w = 0.0001 / projector->vectorNorm(); double lambda = 0.0; //0.4; double gamma = 0.99; Trace<float>* critice = new ATrace<float>(projector->dimension()); OffPolicyTD<float>* critic = new GTDLambda<float>(alpha_v, alpha_w, gamma, lambda, critice); double alpha_u = 1.0 / projector->vectorNorm(); PolicyDistribution<float>* target = new BoltzmannDistribution<float>(random, problem->getDiscreteActions(), projector->dimension()); Trace<float>* actore = new ATrace<float>(projector->dimension()); Traces<float>* actoreTraces = new Traces<float>(); actoreTraces->push_back(actore); ActorOffPolicy<float>* actor = new ActorLambdaOffPolicy<float>(alpha_u, gamma, lambda, target, actoreTraces); Policy<float>* behavior = new RandomPolicy<float>(random, problem->getDiscreteActions()); OffPolicyControlLearner<float>* control = new OffPAC<float>(behavior, critic, actor, toStateAction, projector); RLAgent<float>* agent = new LearnerAgent<float>(control); Simulator<float>* sim = new Simulator<float>(agent, problem, 5000, 100, 1); //sim->setVerbose(false); sim->run(); control->persist("NAOTest_x32_M.bin"); delete random; delete problem; delete hashing; delete projector; delete toStateAction; delete critice; delete critic; delete actore; delete actoreTraces; delete actor; delete behavior; delete target; delete control; delete agent; delete sim; } // OnLine { Random<double>* random = new Random<double>; RLProblem<double>* problem = new SwingPendulum<double>(random); Hashing<double>* hashing = new MurmurHashing<double>(random, 1000); Projector<double>* projector = new TileCoderHashing<double>(hashing, problem->dimension(), 10, 10, false); StateToStateAction<double>* toStateAction = new StateActionTilings<double>(projector, problem->getContinuousActions()); double alpha_v = 0.1 / projector->vectorNorm(); double alpha_u = 0.001 / projector->vectorNorm(); double alpha_r = .0001; double gamma = 1.0; double lambda = 0.5; Trace<double>* critice = new ATrace<double>(projector->dimension()); TDLambda<double>* critic = new TDLambda<double>(alpha_v, gamma, lambda, critice); PolicyDistribution<double>* policyDistribution = new NormalDistributionScaled<double>(random, problem->getContinuousActions(), 0, 1.0, projector->dimension()); Range<double> policyRange(-2.0, 2.0); Range<double> problemRange(-2.0, 2.0); PolicyDistribution<double>* acting = new ScaledPolicyDistribution<double>( problem->getContinuousActions(), policyDistribution, &policyRange, &problemRange); Trace<double>* actore1 = new ATrace<double>(projector->dimension()); Trace<double>* actore2 = new ATrace<double>(projector->dimension()); Traces<double>* actoreTraces = new Traces<double>(); actoreTraces->push_back(actore1); actoreTraces->push_back(actore2); ActorOnPolicy<double>* actor = new ActorLambda<double>(alpha_u, gamma, lambda, acting, actoreTraces); OnPolicyControlLearner<double>* control = new AverageRewardActorCritic<double>(critic, actor, projector, toStateAction, alpha_r); RLAgent<double>* agent = new LearnerAgent<double>(control); Simulator<double>* sim = new Simulator<double>(agent, problem, 5000, 100, 1); sim->run(); control->persist("NAOTest_x32_S.bin"); delete random; delete problem; delete hashing; delete projector; delete toStateAction; delete critice; delete critic; delete actore1; delete actore2; delete actoreTraces; delete actor; delete policyDistribution; delete acting; delete control; delete agent; delete sim; } }
void NAOTest::testEvaluate() { { Random<float>* random = new Random<float>; RLProblem<float>* problem = new MountainCar<float>(random); Hashing<float>* hashing = new MurmurHashing<float>(random, 1000000); Projector<float>* projector = new TileCoderHashing<float>(hashing, problem->dimension(), 10, 10, true); StateToStateAction<float>* toStateAction = new StateActionTilings<float>(projector, problem->getDiscreteActions()); Trace<float>* critice = new ATrace<float>(projector->dimension()); OffPolicyTD<float>* critic = new GTDLambda<float>(0, 0, 0, 0, critice); PolicyDistribution<float>* target = new BoltzmannDistribution<float>(random, problem->getDiscreteActions(), projector->dimension()); Trace<float>* actore = new ATrace<float>(projector->dimension()); Traces<float>* actoreTraces = new Traces<float>(); actoreTraces->push_back(actore); ActorOffPolicy<float>* actor = new ActorLambdaOffPolicy<float>(0, 0, 0, target, actoreTraces); Policy<float>* behavior = new RandomPolicy<float>(random, problem->getDiscreteActions()); OffPolicyControlLearner<float>* control = new OffPAC<float>(behavior, critic, actor, toStateAction, projector); RLAgent<float>* agent = new ControlAgent<float>(control); Simulator<float>* sim = new Simulator<float>(agent, problem, 5000, 10, 10); control->reset(); control->resurrect("NAOTest_x32_M.bin"); sim->runEvaluate(10, 10); delete random; delete problem; delete hashing; delete projector; delete toStateAction; delete critice; delete critic; delete actore; delete actoreTraces; delete actor; delete behavior; delete target; delete control; delete agent; delete sim; } // OnLine { Random<double>* random = new Random<double>; RLProblem<double>* problem = new SwingPendulum<double>(random); Hashing<double>* hashing = new MurmurHashing<double>(random, 1000); Projector<double>* projector = new TileCoderHashing<double>(hashing, problem->dimension(), 10, 10, false); StateToStateAction<double>* toStateAction = new StateActionTilings<double>(projector, problem->getContinuousActions()); Trace<double>* critice = new ATrace<double>(projector->dimension()); TDLambda<double>* critic = new TDLambda<double>(0, 0, 0, critice); PolicyDistribution<double>* policyDistribution = new NormalDistributionScaled<double>(random, problem->getContinuousActions(), 0, 1.0, projector->dimension()); Range<double> policyRange(-2.0, 2.0); Range<double> problemRange(-2.0, 2.0); PolicyDistribution<double>* acting = new ScaledPolicyDistribution<double>( problem->getContinuousActions(), policyDistribution, &policyRange, &problemRange); Trace<double>* actore1 = new ATrace<double>(projector->dimension()); Trace<double>* actore2 = new ATrace<double>(projector->dimension()); Traces<double>* actoreTraces = new Traces<double>(); actoreTraces->push_back(actore1); actoreTraces->push_back(actore2); ActorOnPolicy<double>* actor = new ActorLambda<double>(0, 0, 0, acting, actoreTraces); OnPolicyControlLearner<double>* control = new AverageRewardActorCritic<double>(critic, actor, projector, toStateAction, 0); RLAgent<double>* agent = new ControlAgent<double>(control); Simulator<double>* sim = new Simulator<double>(agent, problem, 5000, 10, 10); control->reset(); control->resurrect("NAOTest_x32_S.bin"); sim->run(); delete random; delete problem; delete hashing; delete projector; delete toStateAction; delete critice; delete critic; delete actore1; delete actore2; delete actoreTraces; delete actor; delete policyDistribution; delete acting; delete control; delete agent; delete sim; } }
void SwingPendulumTest::testOnPolicySwingPendulum() { Random<double>* random = new Random<double>; RLProblem<double>* problem = new SwingPendulum<double>; Hashing<double>* hashing = new MurmurHashing<double>(random, 1000); Projector<double>* projector = new TileCoderHashing<double>(hashing, problem->dimension(), 10, 10, false); StateToStateAction<double>* toStateAction = new StateActionTilings<double>(projector, problem->getContinuousActions()); double alpha_v = 0.1 / projector->vectorNorm(); double alpha_u = 0.001 / projector->vectorNorm(); double alpha_r = .0001; double gamma = 1.0; double lambda = 0.5; Trace<double>* critice = new ATrace<double>(projector->dimension()); TDLambda<double>* critic = new TDLambda<double>(alpha_v, gamma, lambda, critice); PolicyDistribution<double>* policyDistribution = new NormalDistributionScaled<double>(random, problem->getContinuousActions(), 0, 1.0, projector->dimension()); Range<double> policyRange(-2.0, 2.0); Range<double> problemRange(-2.0, 2.0); PolicyDistribution<double>* acting = new ScaledPolicyDistribution<double>( problem->getContinuousActions(), policyDistribution, &policyRange, &problemRange); Trace<double>* actore1 = new ATrace<double>(projector->dimension()); Trace<double>* actore2 = new ATrace<double>(projector->dimension()); Traces<double>* actoreTraces = new Traces<double>(); actoreTraces->push_back(actore1); actoreTraces->push_back(actore2); ActorOnPolicy<double>* actor = new ActorLambda<double>(alpha_u, gamma, lambda, acting, actoreTraces); OnPolicyControlLearner<double>* control = new AverageRewardActorCritic<double>(critic, actor, projector, toStateAction, alpha_r); RLAgent<double>* agent = new LearnerAgent<double>(control); RLRunner<double>* sim = new RLRunner<double>(agent, problem, 5000, 100, 10); sim->setVerbose(true); sim->run(); sim->runEvaluate(100); sim->computeValueFunction(); delete random; delete problem; delete hashing; delete projector; delete toStateAction; delete critice; delete critic; delete actore1; delete actore2; delete actoreTraces; delete actor; delete policyDistribution; delete acting; delete control; delete agent; delete sim; }
void BFReference::CalcSignalReference(const std::string &datFile, const std::string &bgFile, Mask &mask, int traceFrame) { Image bfImg; Image bfBkgImg; bfImg.SetImgLoadImmediate (false); bfBkgImg.SetImgLoadImmediate (false); bool loaded = bfImg.LoadRaw(datFile.c_str()); bool bgLoaded = bfBkgImg.LoadRaw(bgFile.c_str()); if (!loaded) { ION_ABORT("*Error* - No beadfind file found, did beadfind run? are files transferred? (" + datFile + ")"); } if (!bgLoaded) { ION_ABORT("*Error* - No beadfind background file found, did beadfind run? are files transferred? (" + bgFile + ")"); } const RawImage *raw = bfImg.GetImage(); assert(raw->cols == GetNumCol()); assert(raw->rows == GetNumRow()); assert(raw->cols == mask.W()); assert(raw->rows == mask.H()); bfImg.FilterForPinned(&mask, MaskEmpty, false); bfBkgImg.FilterForPinned(&mask, MaskEmpty, false); // bfImg.XTChannelCorrect(&mask); bfImg.XTChannelCorrect(); // bfBkgImg.XTChannelCorrect(&mask); bfBkgImg.XTChannelCorrect(); Traces trace; trace.Init(&bfImg, &mask, FRAMEZERO, FRAMELAST, FIRSTDCFRAME,LASTDCFRAME); bfImg.Close(); Traces bgTrace; bgTrace.Init(&bfBkgImg, &mask, FRAMEZERO, FRAMELAST, FIRSTDCFRAME,LASTDCFRAME); bfBkgImg.Close(); if (mDoRegionalBgSub) { trace.SetMeshDist(0); bgTrace.SetMeshDist(0); } trace.SetT0Step(mRegionXSize); bgTrace.SetT0Step(mRegionXSize); trace.CalcT0(true); size_t numWells = trace.GetNumRow() * trace.GetNumCol(); for (size_t i = 0; i < numWells; i++) { trace.SetT0(max(trace.GetT0(i) - 3, 0.0f), i); } bgTrace.SetT0(trace.GetT0()); trace.T0DcOffset(0,4); trace.FillCriticalFrames(); trace.CalcReference(mRegionXSize,mRegionYSize,trace.mGridMedian); bgTrace.T0DcOffset(0,4); bgTrace.FillCriticalFrames(); bgTrace.CalcReference(mRegionXSize,mRegionYSize,bgTrace.mGridMedian); int length = GetNumRow() * GetNumCol(); mBfMetric.resize(length, std::numeric_limits<double>::signaling_NaN()); vector<double> rawTrace(trace.GetNumFrames()); vector<double> bgRawTrace(bgTrace.GetNumFrames()); int pinned =0, excluded = 0; for (int i = 0; i < length; i++) { if (mask[i] & MaskExclude || mask[i] & MaskPinned) { continue; if (mask[i] & MaskExclude) { excluded++; } else if (mask[i] & MaskPinned) { pinned++; } } trace.GetTraces(i, rawTrace.begin()); bgTrace.GetTraces(i, bgRawTrace.begin()); mBfMetric[i] = 0; for (int s = 3; s < 15; s++) { mBfMetric[i] += rawTrace[s] - bgRawTrace[s]; } } cout << "Pinned: " << pinned << " excluded: " << excluded << endl; for (int i = 0; i < length; i++) { if (mask[i] & MaskExclude || mask[i] & MaskPinned || mask[i] & MaskIgnore) { mWells[i] = Exclude; } else { mask[i] = MaskIgnore; } } cout << "Filling reference. " << endl; FillInReference(mWells, mBfMetric, mGrid, mMinQuantile, mMaxQuantile, mNumEmptiesPerRegion); for (int i = 0; i < length; i++) { if (mWells[i] == Reference) { mask[i] = MaskEmpty; } } bfImg.Close(); }
void BFReference::CalcSignalReference2(const std::string &datFile, const std::string &bgFile, Mask &mask, int traceFrame) { Image bfImg; Image bfBkgImg; bfImg.SetImgLoadImmediate (false); bfBkgImg.SetImgLoadImmediate (false); bool loaded = bfImg.LoadRaw(datFile.c_str()); bool bgLoaded = bfBkgImg.LoadRaw(bgFile.c_str()); if (!loaded) { ION_ABORT("*Error* - No beadfind file found, did beadfind run? are files transferred? (" + datFile + ")"); } if (!bgLoaded) { ION_ABORT("*Error* - No beadfind background file found, did beadfind run? are files transferred? (" + bgFile + ")"); } const RawImage *raw = bfImg.GetImage(); assert(raw->cols == GetNumCol()); assert(raw->rows == GetNumRow()); assert(raw->cols == mask.W()); assert(raw->rows == mask.H()); int StartFrame = bfImg.GetFrame(-663); //5 int EndFrame = bfImg.GetFrame(350); //20 int NNinnerx = 1, NNinnery = 1, NNouterx = 12, NNoutery = 8; cout << "DC start frame: " << StartFrame << " end frame: " << EndFrame << endl; bfImg.FilterForPinned(&mask, MaskEmpty, false); bfImg.XTChannelCorrect(); // bfImg.XTChannelCorrect(&mask); Traces trace; trace.Init(&bfImg, &mask, FRAMEZERO, FRAMELAST, FIRSTDCFRAME,LASTDCFRAME); bfImg.Normalize(StartFrame, EndFrame); if (mDoRegionalBgSub) { trace.SetMeshDist(0); } trace.CalcT0(true); if (mDoRegionalBgSub) { GridMesh<float> grid; grid.Init(raw->rows, raw->cols, mRegionYSize, mRegionXSize); int numBin = grid.GetNumBin(); int rowStart = -1, rowEnd = -1, colStart = -1, colEnd = -1; for (int binIx = 0; binIx < numBin; binIx++) { cout << "BG Subtract Region: " << binIx << endl; grid.GetBinCoords(binIx, rowStart, rowEnd, colStart, colEnd); Region reg; reg.row = rowStart; reg.h = rowEnd - rowStart; reg.col = colStart; reg.w = colEnd - colStart; bfImg.BackgroundCorrectRegion(&mask, reg, MaskAll, MaskEmpty, NNinnerx, NNinnery, NNouterx, NNoutery, NULL); } } else { bfImg.BackgroundCorrect(&mask, MaskEmpty, MaskEmpty, NNinnerx, NNinnery, NNouterx, NNoutery, NULL); } int length = GetNumRow() * GetNumCol(); mBfMetric.resize(length, std::numeric_limits<double>::signaling_NaN()); for (int wIx = 0; wIx < length; wIx++) { if (mask[wIx] & MaskExclude || mask[wIx] & MaskPinned) continue; int t0 = (int)trace.GetT0(wIx); mBfMetric[wIx] = 0; float zSum = 0; int count = 0; for (int fIx = min(t0-20, 0); fIx < t0-10; fIx++) { zSum += bfImg.At(wIx,fIx); count ++; } for (int fIx = t0+3; fIx < t0+15; fIx++) { mBfMetric[wIx] += (bfImg.At(wIx,fIx) - (zSum / count)); } } bfImg.Close(); for (int i = 0; i < length; i++) { if (mask[i] & MaskExclude || mWells[i] == Exclude) { mWells[i] = Exclude; } else { mask[i] = MaskIgnore; } } cout << "Filling reference. " << endl; FillInReference(mWells, mBfMetric, mGrid, mMinQuantile, mMaxQuantile, mNumEmptiesPerRegion); for (int i = 0; i < length; i++) { if (mWells[i] == Reference) { mask[i] = MaskEmpty; } } }