Scheduler
watkinsQLambda.cpp
Go to the documentation of this file.
1 
10 #include "watkinsQLambda.h"
11 
12 #include <cassert>
13 
14 #include <mdp/context.h>
15 #include <utils/randomGenerator.h>
16 
17 using namespace Mdp;
18 
19 void WatkinsQLambda::updateActionValues(state_t previousState, state_t /*nextState*/, action_t previousAction, double reward)
20 {
21  std::pair<double, action_t> pair = watkinsBestQ(previousState, previousAction);
22  double newQ = pair.first;
23  action_t astar = pair.second;
24  double delta = previousReward + discountFactor * newQ;
26 
27 
29  for (size_t i = 0; i < stateSize; i++)
30  {
31  for (size_t j = 0; j < actionSize; j++)
32  {
33  double av = actionValues->getValue(i, j);
34  double newValue = av + alpha*delta*e[i][j];
35  tabularAv->updateValue(i, j, newValue);
36  if (astar == previousAction)
37  {
38  e[i][j] *= discountFactor*lambda;
39  }
40  else
41  {
42  e[i][j] = 0.0;
43  }
44  }
45  }
46  updateState(previousState, previousAction, reward);
47 }
48 
49 
50 
51 std::pair<double, action_t> WatkinsQLambda::watkinsBestQ(state_t state, action_t nextAction)
52 {
53  double Q = actionValues->getValue(state, 0);
54  double candidate;
55  action_t action = 0;
56  for (size_t i = 1; i < actionSize; i++)
57  {
58  candidate = actionValues->getValue(state, i);
59  static const double lowerMargin = 0.99999;//TODO: how to choose those
60  static const double upperMargin = 1.00001;
61  if ((candidate > Q*lowerMargin) && (candidate < Q*upperMargin))
62  {
63  if (action == nextAction)
64  {
65  continue;
66  }
67  else if (i == nextAction)
68  {
69  Q = candidate;
70  action = i;
71  continue;
72  }
73  else
74  {
75  if (context->randomGenerator->drawUniform(0.0, 2.0) > 1.0)
76  continue;
77  Q = candidate;
78  action = i;
79  }
80  }
81  else if (candidate > Q)
82  {
83  Q = candidate;
84  action = i;
85  }
86  }
87  return std::pair<double, action_t>(Q, action);
88 }
89 
90 
91 
92 
93 
94 
95 
96 
97 
98 
99 
100 
101 
virtual double getValue(state_t state, action_t action)=0
void updateState(state_t previousState, action_t previousAction, double reward)
void updateValue(state_t state, action_t action, double value)
virtual void updateActionValues(state_t previousState, state_t mextState, action_t previousAction, double reward)
std::vector< std::vector< double > > e
size_t action_t
Definition: action_impl.h:18
Definition: action.h:18
size_t state_t
Definition: state.h:19
std::shared_ptr< Context > context
ActionValuesFunction * actionValues
Definition: reward.py:1