35 std::shared_ptr<ActionSpace> actionSpace,
36 std::shared_ptr<MdpConfiguration> conf)
49 std::shared_ptr<ActionSpace> actionSpace,
50 std::shared_ptr<MdpConfiguration> conf)
52 context = std::make_shared<Context>();
53 context->stateSpace = stateSpace;
54 context->actionSpace = actionSpace;
55 size_t s =
context->stateSpace->size();
56 size_t a =
context->actionSpace->size();
57 context->matrix = std::make_shared<TransitionMatrix>(s, a);
58 context->rewards = std::make_shared<Rewards>(s, a);
59 context->constraintList = std::make_shared<ConstraintList>();
60 context->randomGenerator = std::make_shared<Utils::RandomGenerator>();
61 context->randomGenerator->seed(conf->getIntValue(
"mdp",
"seed"));
62 context->policy = std::make_shared<Policy>(s, a,
context->randomGenerator);
64 context->horizon = std::shared_ptr<Horizon>(
context->conf->getHorizonFromFile());
65 context->horizon->initialStateDistribution = std::vector<double>(s, 1.0/s);
74 context->stateSpace->updateCurrentState();
91 context->stateSpace->updateCurrentState();
96 context->actionSpace->updateLastAction(action);
99 return context->actionSpace->getAction(action);
106 file.open(folder +
"/transitionMatrix.txt", std::ios_base::app);
110 std::ofstream policyFile;
111 policyFile.open(folder +
"/policy.txt", std::ios_base::app);
112 context->policy->print(policyFile);
115 std::ofstream summaryFile;
116 summaryFile.open(folder +
"/mdpSummary.txt", std::ios_base::app);
124 context->policy->print(stream);
143 stream <<
"Summary of MDP\n";
144 stream <<
"state statistics:\n";
145 stream <<
"distribution of states visited:\n";
146 std::vector<int> stateCount(
context->stateSpace->size());
152 for (
size_t i = 0;
i < stateCount.size();
i++)
154 sum += stateCount[
i];
156 for (
size_t i = 0;
i < stateCount.size();
i++)
158 stream <<
i <<
": " << ((double)stateCount[
i]) / ((double)sum) <<
"\n";
164 context->constraintList = list;
virtual void setLearningStrategy()
virtual void constructContext(std::shared_ptr< StateSpace > stateSpace, std::shared_ptr< ActionSpace > actionSpace, std::shared_ptr< MdpConfiguration > conf)
virtual void updateModel()=0
void printPolicy(std::ostream &stream)
std::vector< action_t > actionHistory
LearningStrategy * learningStrategy
virtual void initializeModel()=0
void record(state_t state, action_t action, double reward)
Action * selectAction(bool updateModel=true)
Returns the optimal action for the current timestep.
std::shared_ptr< Context > context
std::vector< state_t > stateHistory
std::vector< double > rewardHistory
Action * selectActionWithoutUpdate()
similar to selectAction, but without updating the model
void setConstraintList(std::shared_ptr< ConstraintList > list)
MdpModel(std::shared_ptr< StateSpace > stateSpace, std::shared_ptr< ActionSpace > actionSpace, std::shared_ptr< MdpConfiguration > conf)
void printSummary(std::ostream &stream)
void init()
call this function first
void printReportsToFile(std::string folder)
Print reports to file.
void end()
call this function at the end
void setRewards(std::shared_ptr< Rewards > rewards)