Scheduler
qLearning.cpp
Go to the documentation of this file.
1 
10 #include "qLearning.h"
11 
12 #include <cassert>
13 #include <iostream>
14 
15 #include <mdp/context.h>
16 #include <mdp/mdpConfiguration.h>
17 
18 
19 
20 using namespace Mdp;
21 
22 
23 QLearning::QLearning(std::shared_ptr<Context> c, TabularActionValues *actionValues)
24  : RlBackupAlgorithm(c, actionValues)
25  , tabularAv(actionValues)
26 {
27 }
28 
30 {
31  assert(context != nullptr);
32  assert(actionValues != nullptr);
33  alpha = context->conf->getDoubleValue("reinforcementLearning","alpha");
34  discountFactor = context->conf->getDoubleValue("mdp","discountFactor");
35  alphaDecaySpeed = context->conf->getDoubleValue("reinforcementLearning", "alphaDecaySpeed");
36  initAlpha();
37 }
38 
39 
41 {
42 }
43 
44 
45 
46 void QLearning::updateActionValues(state_t previousState, state_t newState, action_t previousAction, double reward)
47 {
48  double Q = actionValues->getValue(previousState, previousAction);
49  double newQ = Q + alpha*(reward - Q + discountFactor*getMaxQ(newState));
50  updateAlpha();
51  tabularAv->updateValue(previousState, previousAction, newQ);
52 }
53 
54 
55 
56 
57 
58 
59 
60 
61 
62 
63 
virtual double getValue(state_t state, action_t action)=0
void end() override
Definition: qLearning.cpp:40
void init() override
Definition: qLearning.cpp:29
void updateValue(state_t state, action_t action, double value)
size_t action_t
Definition: action_impl.h:18
Definition: action.h:18
void updateActionValues(state_t previousState, state_t nextState, action_t previousAction, double reward) override
Definition: qLearning.cpp:46
size_t state_t
Definition: state.h:19
std::shared_ptr< Context > context
ActionValuesFunction * actionValues
virtual double getMaxQ(state_t state)
Definition: reward.py:1
QLearning(std::shared_ptr< Context > context, TabularActionValues *actionValues)
Definition: qLearning.cpp:23