21 std::pair<double, action_t> pair = watkinsBestQ(previousState, previousAction);
22 double newQ = pair.first;
34 double newValue = av +
alpha*delta*
e[
i][j];
36 if (astar == previousAction)
51 std::pair<double, action_t> WatkinsQLambda::watkinsBestQ(
state_t state,
action_t nextAction)
59 static const double lowerMargin = 0.99999;
60 static const double upperMargin = 1.00001;
61 if ((candidate > Q*lowerMargin) && (candidate < Q*upperMargin))
63 if (action == nextAction)
67 else if (
i == nextAction)
75 if (
context->randomGenerator->drawUniform(0.0, 2.0) > 1.0)
81 else if (candidate > Q)
87 return std::pair<double, action_t>(Q, action);
virtual double getValue(state_t state, action_t action)=0
void updateState(state_t previousState, action_t previousAction, double reward)
void updateValue(state_t state, action_t action, double value)
virtual void updateActionValues(state_t previousState, state_t mextState, action_t previousAction, double reward)
std::vector< std::vector< double > > e
state_t previousPreviousState
std::shared_ptr< Context > context
ActionValuesFunction * actionValues
action_t previousPreviousAction