29 std::shared_ptr<RlTestDomainModel> domainModel = std::make_shared<RlTestDomainModel>();
51 const int nbOfEpisodes = 500;
52 const int averageHorizon = 10;
53 double history[averageHorizon];
54 for (
int i = 0;
i < nbOfEpisodes;
i++)
60 reward += domainModel->measureReward();
61 while(domainModel->xpos != 11 || domainModel->ypos != 0)
66 reward += domainModel->measureReward();
71 history[
i % averageHorizon] = reward;
72 if (
i >= averageHorizon)
75 for (
int j = 0; j < averageHorizon; j++)
77 average += history[j];
79 std::cout <<
"episode "<<
i<<
": average cost: " << average/averageHorizon <<
"\n";
82 domainModel->xpos = 0;
83 domainModel->ypos = 0;
86 while(domainModel->xpos != 11 || domainModel->ypos != 0)
88 std::cout <<
"xpos: "<<domainModel->xpos<<
"ypos:"<<domainModel->ypos<<
"\n";
void setDomainModel(std::shared_ptr< DomainModel > model)
sets the domain model used by the state space to determine current state
void setDomainModel(std::shared_ptr< DomainModel > model)
sets the domain model used by the actions to act on the environment
virtual void performAction()=0
void addAction(Action *action)
add an action to the actino space
std::shared_ptr< StateSpace > getStateSpace()
call this function last!
std::shared_ptr< ActionSpace > getActionSpace()
get the action space
void addDimension(StateSpaceDimension *dimension)
adds a dimension to the state space
Action * selectAction(bool updateModel=true)
Returns the optimal action for the current timestep.
The main class of the MDP framework.
Action * selectActionWithoutUpdate()
similar to selectAction, but without updating the model
void init()
call this function first
void end()
call this function at the end
builds a domain specific state space