Scheduler
mdpModel.cpp
Go to the documentation of this file.
1 
10 #include "mdpModel.h"
11 
12 #include <cassert>
13 #include <iostream>
14 #include <fstream>
15 
16 #include <utils/randomGenerator.h>
17 
18 #include "action.h"
19 #include "action_impl.h"
20 #include "actionSpace.h"
21 #include "constraintList.h"
22 #include "context.h"
23 #include "horizon.h"
25 #include "mdpConfiguration.h"
26 #include "policy.h"
27 #include "rewards.h"
28 #include "stateSpace.h"
29 #include "transitionMatrix.h"
30 
31 using namespace Mdp;
32 
33 
34 MdpModel::MdpModel(std::shared_ptr<StateSpace> stateSpace,
35  std::shared_ptr<ActionSpace> actionSpace,
36  std::shared_ptr<MdpConfiguration> conf)
37 {
38  constructContext(stateSpace, actionSpace, conf);
40 }
41 
43 {
44  assert(context);
45  learningStrategy = context->conf->getLearningStrategyFromFile(context);
46 }
47 
48 void MdpModel::constructContext(std::shared_ptr<StateSpace> stateSpace,
49  std::shared_ptr<ActionSpace> actionSpace,
50  std::shared_ptr<MdpConfiguration> conf)
51 {
52  context = std::make_shared<Context>();
53  context->stateSpace = stateSpace;
54  context->actionSpace = actionSpace;
55  size_t s = context->stateSpace->size();
56  size_t a = context->actionSpace->size();
57  context->matrix = std::make_shared<TransitionMatrix>(s, a);
58  context->rewards = std::make_shared<Rewards>(s, a);
59  context->constraintList = std::make_shared<ConstraintList>();
60  context->randomGenerator = std::make_shared<Utils::RandomGenerator>();
61  context->randomGenerator->seed(conf->getIntValue("mdp","seed"));
62  context->policy = std::make_shared<Policy>(s, a, context->randomGenerator);
63  context->conf = conf;
64  context->horizon = std::shared_ptr<Horizon>(context->conf->getHorizonFromFile());
65  context->horizon->initialStateDistribution = std::vector<double>(s, 1.0/s);
66 }
67 
69 {
70 }
71 
73 {
74  context->stateSpace->updateCurrentState();
76  recordHistory = context->conf->getBoolValue("mdp", "recordHistory", false);
77 }
78 
80 {
82 }
83 
85 {
86  return selectAction(false);
87 }
88 
89 Action *MdpModel::selectAction(bool updateModel)
90 {
91  context->stateSpace->updateCurrentState();
92  if (updateModel)
94  state_t state = context->stateSpace->getState();
95  action_t action = context->policy->getAction(state);
96  context->actionSpace->updateLastAction(action);
97  if (recordHistory)
98  record(state, action, context->stateSpace->getReward());
99  return context->actionSpace->getAction(action);
100 }
101 
102 
103 void MdpModel::printReportsToFile(std::string folder)
104 {
105  std::ofstream file;
106  file.open(folder + "/transitionMatrix.txt", std::ios_base::app);
107  context->matrix->print(file);
108  file.close();
109 
110  std::ofstream policyFile;
111  policyFile.open(folder + "/policy.txt", std::ios_base::app);
112  context->policy->print(policyFile);
113  policyFile.close();
114 
115  std::ofstream summaryFile;
116  summaryFile.open(folder + "/mdpSummary.txt", std::ios_base::app);
117  printSummary(summaryFile);
118  summaryFile.close();
119 }
120 
121 
122 void MdpModel::printPolicy(std::ostream& stream)
123 {
124  context->policy->print(stream);
125 }
126 
127 
128 
129 void MdpModel::record(state_t state, action_t action, double reward)
130 {
131  if (recordHistory)
132  {
133  stateHistory.push_back(state);
134  actionHistory.push_back(action);
135  rewardHistory.push_back(reward);
136  }
137 }
138 
139 
140 
141 void MdpModel::printSummary(std::ostream& stream)
142 {
143  stream << "Summary of MDP\n";
144  stream << "state statistics:\n";
145  stream << "distribution of states visited:\n";
146  std::vector<int> stateCount(context->stateSpace->size());
147  for (size_t i = 0; i < stateHistory.size(); i++)
148  {
149  stateCount[stateHistory[i]]++;
150  }
151  int sum = 0;
152  for (size_t i = 0; i < stateCount.size(); i++)
153  {
154  sum += stateCount[i];
155  }
156  for (size_t i = 0; i < stateCount.size(); i++)
157  {
158  stream << i << ": " << ((double)stateCount[i]) / ((double)sum) << "\n";
159  }
160 }
161 
162 void MdpModel::setConstraintList(std::shared_ptr<ConstraintList> list)
163 {
164  context->constraintList = list;
165 }
166 
167 void MdpModel::setRewards(std::shared_ptr<Rewards> rewards)
168 {
169  context->rewards = rewards;
170 }
171 
172 
173 
174 
virtual void setLearningStrategy()
Definition: mdpModel.cpp:42
virtual void constructContext(std::shared_ptr< StateSpace > stateSpace, std::shared_ptr< ActionSpace > actionSpace, std::shared_ptr< MdpConfiguration > conf)
Definition: mdpModel.cpp:48
virtual ~MdpModel()
Definition: mdpModel.cpp:68
virtual void updateModel()=0
void printPolicy(std::ostream &stream)
Definition: mdpModel.cpp:122
std::vector< action_t > actionHistory
Definition: mdpModel.h:79
LearningStrategy * learningStrategy
Definition: mdpModel.h:75
virtual void initializeModel()=0
void record(state_t state, action_t action, double reward)
Definition: mdpModel.cpp:129
Action * selectAction(bool updateModel=true)
Returns the optimal action for the current timestep.
Definition: mdpModel.cpp:89
std::shared_ptr< Context > context
Definition: mdpModel.h:74
std::vector< state_t > stateHistory
Definition: mdpModel.h:78
bool recordHistory
Definition: mdpModel.h:81
size_t action_t
Definition: action_impl.h:18
Definition: action.h:18
std::vector< double > rewardHistory
Definition: mdpModel.h:80
Action * selectActionWithoutUpdate()
similar to selectAction, but without updating the model
Definition: mdpModel.cpp:84
void setConstraintList(std::shared_ptr< ConstraintList > list)
Definition: mdpModel.cpp:162
MdpModel(std::shared_ptr< StateSpace > stateSpace, std::shared_ptr< ActionSpace > actionSpace, std::shared_ptr< MdpConfiguration > conf)
Definition: mdpModel.cpp:34
void printSummary(std::ostream &stream)
Definition: mdpModel.cpp:141
void init()
call this function first
Definition: mdpModel.cpp:72
void printReportsToFile(std::string folder)
Print reports to file.
Definition: mdpModel.cpp:103
void end()
call this function at the end
Definition: mdpModel.cpp:79
void setRewards(std::shared_ptr< Rewards > rewards)
Definition: mdpModel.cpp:167
size_t state_t
Definition: state.h:19
Definition: reward.py:1