Scheduler
testRL.cpp
Go to the documentation of this file.
1 
10 #include <iostream>
11 #include <vector>
12 
13 
14 #include <mdp/mdpModel.h>
15 #include <mdp/stateSpaceBuilder.h>
16 #include <mdp/actionSpaceBuilder.h>
17 #include <mdp/mdpConfiguration.h>
18 #include <mdp/action.h>
19 
20 #include "rlTestDomainModel.h"
21 #include "rlTestActions.h"
22 #include "rlTestDimensions.h"
23 
24 int main()
25 {
26  Mdp::StateSpaceBuilder stateSpaceBuilder;
27  Mdp::ActionSpaceBuilder actionSpaceBuilder;
28 
29  std::shared_ptr<RlTestDomainModel> domainModel = std::make_shared<RlTestDomainModel>();
30 
31  stateSpaceBuilder.setDomainModel(domainModel);
32  actionSpaceBuilder.setDomainModel(domainModel);
33 
34  stateSpaceBuilder.addDimension(new RlDim(12));
35  stateSpaceBuilder.addDimension(new RlDim(4));
36 
37  Mdp::StateSpace *stateSpace = stateSpaceBuilder.getStateSpace();
38 
39  actionSpaceBuilder.addAction(new GoUp);
40  actionSpaceBuilder.addAction(new GoDown);
41  actionSpaceBuilder.addAction(new GoLeft);
42  actionSpaceBuilder.addAction(new GoRight);
43 
44  Mdp::ActionSpace *actionSpace = actionSpaceBuilder.getActionSpace();
45 
46 
47  Mdp::MdpConfiguration *config = new Mdp::MdpConfiguration("configuration.conf");
48  Mdp::MdpModel mdpModel(stateSpace, actionSpace, config);
49  mdpModel.init();
50 
51  const int nbOfEpisodes = 500;
52  const int averageHorizon = 10;
53  double history[averageHorizon];
54  for (int i = 0; i < nbOfEpisodes; i++)
55  {
56  double reward = 0.0;
57  int steps = 0;
58  Mdp::Action *action = mdpModel.selectActionWithoutUpdate();
59  action->performAction();
60  reward += domainModel->measureReward();
61  while(domainModel->xpos != 11 || domainModel->ypos != 0)
62  {
63  steps++;
64  Mdp::Action *action = mdpModel.selectAction();
65  action->performAction();
66  reward += domainModel->measureReward();
67  //std::cout << "landed in xpos "<<domainModel->xpos<<" ypos "<<domainModel->ypos<<" \n";
68  }
69  mdpModel.end();
70  //std::cout << "episode "<<i<<": total reward is "<<reward<<" reached in "<<steps<<" steps\n";
71  history[i % averageHorizon] = reward;
72  if (i >= averageHorizon)
73  {
74  double average = 0;
75  for (int j = 0; j < averageHorizon; j++)
76  {
77  average += history[j];
78  }
79  std::cout << "episode "<<i<<": average cost: " << average/averageHorizon << "\n";
80  }
81  //mdpModel.printPolicy(std::cout);
82  domainModel->xpos = 0;
83  domainModel->ypos = 0;
84  }
85 #if 0
86  while(domainModel->xpos != 11 || domainModel->ypos != 0)
87  {
88  std::cout << "xpos: "<<domainModel->xpos<<"ypos:"<<domainModel->ypos<<"\n";
89  Mdp::Action *action = mdpModel.selectAction();
90  action->performAction();
91  }
92 #endif
93  return 0;
94 }
void setDomainModel(std::shared_ptr< DomainModel > model)
sets the domain model used by the state space to determine current state
void setDomainModel(std::shared_ptr< DomainModel > model)
sets the domain model used by the actions to act on the environment
virtual void performAction()=0
void addAction(Action *action)
add an action to the actino space
std::shared_ptr< StateSpace > getStateSpace()
call this function last!
std::shared_ptr< ActionSpace > getActionSpace()
get the action space
void addDimension(StateSpaceDimension *dimension)
adds a dimension to the state space
Action * selectAction(bool updateModel=true)
Returns the optimal action for the current timestep.
Definition: mdpModel.cpp:89
Builds the action space.
The main class of the MDP framework.
Definition: mdpModel.h:35
Action * selectActionWithoutUpdate()
similar to selectAction, but without updating the model
Definition: mdpModel.cpp:84
int main()
Definition: testRL.cpp:24
void init()
call this function first
Definition: mdpModel.cpp:72
void end()
call this function at the end
Definition: mdpModel.cpp:79
Definition: reward.py:1
builds a domain specific state space