Scheduler
reinforcedLearning.h
Go to the documentation of this file.
1 
10 #ifndef REINFORCEDLEARNING_H
11 #define REINFORCEDLEARNING_H
12 
14 
15 #include <utility>
16 #include <vector>
17 
18 #include <mdp/state.h>
19 #include <mdp/action_impl.h>
20 
21 #include <utils/record.h>
22 
24 
26 
27 #include "actionValuesRecord.h"
28 
29 namespace Mdp
30 {
31 struct Context;
32 class RlBackupAlgorithm;
33 
34 
36 {
37 public:
38  static constexpr const char *configKey = "reinforcedLearning";
39 public:
40  ReinforcedLearning(std::shared_ptr<Context> context);
42  void initializeModel();
43  void updateModel();
44  void end();
45 protected:
46  size_t S{0};
47  size_t A{0};
48  void initializePolicy();
49  //void initializeActionValues();
51  void updatePolicy(state_t state);
52  void updateLongTermReward(double reward, double discountFactor);
56  //double epsilon{0.1};
57  double discountFactor{0.1};
58  double alpha{0.1};
59  double alphaDecaySpeed{0.99};
60  //double epsilonDecaySpeed{0.99};
65  //std::vector<std::vector<std::vector<double>>> actionValuesRecord;
67  void printAVRecord();
68  void updateEpsilon();
69  long long unsigned int epsilonTimeOut{0};
70  void updateActualDiscountedReward(double reward);
73  void printActionValuesToFile(std::string folder);
75  double longTermReward{0.0};
77  void printStateSpace();
78 };
79 
80 
81 
82 
83 
84 
85 }
86 
87 
88 #endif
ActionValuesFunction * actionValues
RlBackupAlgorithm * backupAlgo
ActionValuesRecord actionValuesRecord
action_t getBestActionFromInitialPolicy(state_t s)
void updatePolicy(state_t state)
void updateLongTermReward(double reward, double discountFactor)
RlBackupAlgorithm * getBackupAlgorithm()
static constexpr const char * configKey
action_t getBestAction(state_t state)
size_t action_t
Definition: action_impl.h:18
Definition: action.h:18
ActionSelectionStrategy * actionSelectionStrategy
void epsilonGreedyPolicyUpdate(state_t state)
void printActionValuesToFile(std::string folder)
size_t state_t
Definition: state.h:19
ReinforcedLearning(std::shared_ptr< Context > context)
Definition: reward.py:1
void updateActualDiscountedReward(double reward)
std::shared_ptr< Context > context
long long unsigned int epsilonTimeOut