Scheduler
naiveQLambda.cpp
Go to the documentation of this file.
1 
10 #include "naiveQLambda.h"
11 
12 #include <mdp/context.h>
13 
14 using namespace Mdp;
15 
16 void NaiveQLambda::updateActionValues(state_t previousState, state_t /*nextState*/, action_t previousAction, double reward)
17 {
18  double delta = previousReward + discountFactor * getMaxQ(previousState);
20  delta -= av;
21 
23  for (size_t i = 0; i < stateSize; i++)
24  {
25  for (size_t j = 0; j < actionSize; j++)
26  {
27  double update = alpha*delta*e[i][j];
28  double newValue = actionValues->getValue(i, j) + update;
29  tabularAv->updateValue(i, j, newValue);
30  e[i][j] *= discountFactor*lambda;
31  }
32  }
33  updateState(previousState, previousAction, reward);
34 }
35 
36 
37 
38 
virtual double getValue(state_t state, action_t action)=0
void updateState(state_t previousState, action_t previousAction, double reward)
void updateValue(state_t state, action_t action, double value)
virtual void updateActionValues(state_t previousState, state_t nextState, action_t previousAction, double reward)
std::vector< std::vector< double > > e
size_t action_t
Definition: action_impl.h:18
Definition: action.h:18
size_t state_t
Definition: state.h:19
ActionValuesFunction * actionValues
virtual double getMaxQ(state_t state)
Definition: reward.py:1