25 , tabularAv(actionValues)
33 alpha =
context->conf->getDoubleValue(
"reinforcementLearning",
"alpha");
34 discountFactor =
context->conf->getDoubleValue(
"mdp",
"discountFactor");
49 double newQ = Q +
alpha*(reward - Q + discountFactor*
getMaxQ(newState));
51 tabularAv->
updateValue(previousState, previousAction, newQ);
virtual double getValue(state_t state, action_t action)=0
virtual void updateAlpha()
void updateValue(state_t state, action_t action, double value)
void updateActionValues(state_t previousState, state_t nextState, action_t previousAction, double reward) override
std::shared_ptr< Context > context
ActionValuesFunction * actionValues
virtual double getMaxQ(state_t state)
QLearning(std::shared_ptr< Context > context, TabularActionValues *actionValues)