Scheduler
rlBackupAlgorithm.cpp
Go to the documentation of this file.
1 
10 #include "rlBackupAlgorithm.h"
11 
12 #include <cassert>
13 #include <iostream>
14 #include <memory>
15 
16 #include <utils/randomGenerator.h>
17 
18 #include <mdp/context.h>
19 #include <mdp/mdpConfiguration.h>
20 #include <mdp/stateSpace.h>
21 
23 
24 using namespace Mdp;
25 
27  : context(c)
28  , actionValues(av)
29  , bestQ(c->stateSpace->size())
30  , bestAction(c->stateSpace->size())
31  , needsUpdate(std::vector<bool>(c->stateSpace->size(), true))
32 {
33 }
34 
36 {
37  static size_t size = context->stateSpace->size();
38  needsUpdate = std::vector<bool>(size, true);
39 }
40 
42 {
43  if (needsUpdate[state])
44  {
45  updateBestActionAndQ(state);
46  needsUpdate[state] = false;
47  }
48  return bestQ[state];
49 }
50 
52 {
53  /*if ( state >= actionValues->size())
54  std::cout << "state is "<<state<<" and actionValue size is "<<actionValues->size()<<" \n";
55  assert(state < actionValues->size());*/
56  double Q = actionValues->getValue(state, 0);
57  double candidate;
58  action_t action = 0;
59  for (size_t i = 1; i < context->actionSpace->size(); i++)
60  {
61  candidate = actionValues->getValue(state, i);
62  if (candidate >= Q)
63  {
64  const double eps = 0.0000001;//TODO: how to choose this value?
65  if ((candidate < Q + eps) && (candidate > Q - eps) && context->randomGenerator->drawUniform(0.0, 2.0) > 1.0)
66  continue;
67  Q = candidate;
68  action = i;
69  }
70  }
71  bestQ[state] = Q;
72  bestAction[state] = action;
73 }
74 
75 std::pair<action_t, double> RlBackupAlgorithm::getBestActionAndQ(state_t state)
76 {
77  updateIfNeeded(state);
78  return std::pair<action_t, double>(bestAction[state], bestQ[state]);
79 }
80 
82 {
83  if (needsUpdate[state])
84  {
85  updateBestActionAndQ(state);
86  needsUpdate[state] = false;
87  }
88 }
89 
90 
92 {
93  updateIfNeeded(state);
94  return bestAction[state];
95 }
96 
98 {
99  alpha = context->conf->getDoubleValue("reinforcementLearning","alpha");
100  alphaDecaySpeed = context->conf->getDoubleValue("reinforcementLearning","alphaDecaySpeed");
101  if (context->conf->getBoolValue("reinforcementLearning", "alphaHyperbolicDecay", false))
102  hyperbolic = true;
103  if (context->conf->getBoolValue("reinforcementLearning", "alphaStepwiseDecay", false))
104  {
105  stepwise = true;
106  stepLength = context->conf->getUnsignedLongLongIntValue("reinforcementLearning", "alphaStepLength");
107  }
108  alpha0 = alpha;
109 }
110 
111 
112 
114 {
115  if (hyperbolic)
116  {
118  }
119  if (stepwise)
120  {
121  stepwiseCounter++;
123  {
124  stepwiseCounter = 0;
126  }
127  }
128 }
129 
130 
131 
132 
133 
134 
135 
virtual void notifyUpdateNeeded()
virtual double getValue(state_t state, action_t action)=0
std::vector< double > bestQ
unsigned long long stepwiseCounter
virtual void updateBestActionAndQ(state_t state)
virtual void updateIfNeeded(state_t state)
std::vector< action_t > bestAction
size_t action_t
Definition: action_impl.h:18
virtual action_t getBestAction(state_t state)
virtual std::pair< action_t, double > getBestActionAndQ(state_t state)
std::vector< bool > needsUpdate
Definition: action.h:18
RlBackupAlgorithm(std::shared_ptr< Context > c, ActionValuesFunction *av)
size_t state_t
Definition: state.h:19
std::shared_ptr< Context > context
ActionValuesFunction * actionValues
virtual double getMaxQ(state_t state)
unsigned long long int stepLength