28 class LearningStrategy;
29 class MdpConfiguration;
38 MdpModel(std::shared_ptr<StateSpace> stateSpace,
39 std::shared_ptr<ActionSpace> actionSpace,
40 std::shared_ptr<MdpConfiguration> conf);
58 void setRewards(std::shared_ptr<Rewards> rewards);
71 std::shared_ptr<ActionSpace> actionSpace,
72 std::shared_ptr<MdpConfiguration> conf);
virtual void setLearningStrategy()
virtual void constructContext(std::shared_ptr< StateSpace > stateSpace, std::shared_ptr< ActionSpace > actionSpace, std::shared_ptr< MdpConfiguration > conf)
void printPolicy(std::ostream &stream)
std::vector< action_t > actionHistory
LearningStrategy * learningStrategy
void record(state_t state, action_t action, double reward)
Action * selectAction(bool updateModel=true)
Returns the optimal action for the current timestep.
std::shared_ptr< Context > context
std::vector< state_t > stateHistory
The main class of the MDP framework.
std::vector< double > rewardHistory
Action * selectActionWithoutUpdate()
similar to selectAction, but without updating the model
void setConstraintList(std::shared_ptr< ConstraintList > list)
MdpModel(std::shared_ptr< StateSpace > stateSpace, std::shared_ptr< ActionSpace > actionSpace, std::shared_ptr< MdpConfiguration > conf)
void printSummary(std::ostream &stream)
void init()
call this function first
void printReportsToFile(std::string folder)
Print reports to file.
void end()
call this function at the end
void setRewards(std::shared_ptr< Rewards > rewards)