Scheduler
rlBackupAlgorithm.h
Go to the documentation of this file.
1 
10 #ifndef RL_BACKUP_ALGORITHM_H
11 #define RL_BACKUP_ALGORITHM_H
12 
13 #include <vector>
14 #include <utility>
15 
16 #include <mdp/state.h>
17 #include <mdp/action_impl.h>
18 #include <mdp/stateSpace.h>
19 #include <mdp/actionSpace.h>
21 
22 namespace Utils
23 {
24  class RandomGenerator;
25 }
26 
27 namespace Mdp
28 {
29 struct Context;
30 
32 {
33 public:
34  RlBackupAlgorithm(std::shared_ptr<Context> c, ActionValuesFunction *av);
35  virtual ~RlBackupAlgorithm(){};
36  virtual void init() = 0;
37  virtual void updateActionValues(state_t previousState,
38  state_t nextState,
39  action_t previousAction,
40  double reward) = 0;
41  virtual void end()=0;
42 
43  /*FIXME: first of all, this is only valid for tabular AVs.
44  Also, the notifyUpdateNeeded thing is bad design.
45  Also, do they need to be public?*/
46  virtual double getMaxQ(state_t state);
47  virtual std::pair<action_t, double> getBestActionAndQ(state_t state);
48  virtual action_t getBestAction(state_t state);
49  virtual void updateBestActionAndQ(state_t state);
50  virtual void notifyUpdateNeeded();
51 protected:
52  std::shared_ptr<Context> context{nullptr};
53  ActionValuesFunction *actionValues{nullptr};
54  double alpha{-1.0};
55  double alpha0{0.1};
56  double alphaCounter{1.0};
57  double alphaDecaySpeed{1.0};
58  bool hyperbolic{false};
59  bool stepwise{false};
60  unsigned long long stepwiseCounter{0};
61  unsigned long long int stepLength{0};
62  virtual void initAlpha();
63  virtual void updateAlpha();
64  std::vector<double> bestQ;
65  std::vector<action_t> bestAction;
66  std::vector<bool> needsUpdate;
67  virtual void updateIfNeeded(state_t state);
68 };
69 
70 
71 }
72 
73 
74 
75 
76 #endif
std::vector< double > bestQ
std::vector< action_t > bestAction
size_t action_t
Definition: action_impl.h:18
std::vector< bool > needsUpdate
Definition: action.h:18
size_t state_t
Definition: state.h:19
Definition: reward.py:1
Definition: context.h:16