47 ,
S(c->stateSpace->size())
48 ,
A(c->actionSpace->size())
50 , rewardRecord(c->conf,
"rewardRecord")
53 assert(
context->conf !=
nullptr);
70 assert(
context->conf !=
nullptr);
83 size_t S =
context->stateSpace->size();
84 for (
size_t s = 0; s <
S; s++)
86 std::vector<size_t> *vect =
context->stateSpace->factorize(s);
87 for (
size_t i = 0;
i < vect->size();
i++)
89 std::cerr << (*vect)[
i] <<
" ";
98 std::string str =
context->conf->getStringValue(
"reinforcementLearning",
"actionSelectionStrategy");
99 if (!str.compare(
"epsilonGreedy"))
101 double epsilon =
context->conf->getRlEpsilonFromFile();
102 double epsilonDecaySpeed =
context->conf->getDoubleValue(
103 "reinforcementLearning",
"epsilonDecaySpeed");
104 long long unsigned int epsilonTimeout =
context->conf->getUnsignedLongLongIntValue(
105 "reinforcementLearning",
"epsilonTimeOut");
108 else if (!str.compare(
"greedy"))
112 else if (!str.compare(
"Gibbs"))
114 double temperature =
context->conf->getDoubleValue(
"reinforcementLearning",
"GibbsTemperature");
115 double tempDecaySpeed =
context->conf->getDoubleValue(
"reinforcementLearning",
"GibbsTempDecaySpeed");
116 double tempStepSize =
context->conf->getDoubleValue(
"reinforcementLearning",
"GibbsTempStepSize");
121 throw std::invalid_argument(
"invalid value for actionSelectionStrategy");
128 std::string initStr =
context->conf->getStringValue(
"reinforcementLearning",
"initialPolicy");
129 if (!initStr.compare(
"uniform"))
131 context->policy->initializeUniformly();
133 else if (!initStr.compare(
"fromFile"))
136 context->policy->initializeFromFile(
"configuration/initialPolicy");
140 throw std::invalid_argument(
"initial policy not defined");
146 std::string str =
context->conf->getStringValue(
"reinforcementLearning",
"algo");
172 throw std::runtime_error(
"Reinforcement Learning algorithm lookup failed");
182 if (reward == -HUGE_VAL)
205 static long long int counter = 0;
214 static const bool updatePolicy =
context->conf->getBoolValue(
"reinforcementLearning",
"updatePolicy",
true);
228 context->policy->update(state, policy);
233 size_t S =
context->stateSpace->size();
234 size_t A =
context->actionSpace->size();
235 static std::vector<std::vector<double>> init(S, std::vector<double>(A));
236 static bool valid =
false;
237 static std::vector<action_t> bestAction(S);
241 std::string
filename =
"configuration/initialPolicy";
243 stream.open(filename);
244 if (!stream.is_open())
245 throw std::runtime_error(
"cannot open file");
247 std::vector<std::vector<double>>
pol;
249 while(std::getline(stream, line))
252 std::vector<double> row;
254 for (
size_t i = 0;
i < elements.size();
i++)
256 row.push_back(std::stod(elements[
i]));
261 for (
size_t s = 0; s <
S; s++)
264 bestValue = init[s][0];
265 for (
size_t a = 1; a <
A; a++)
267 if (init[s][a] > bestValue)
269 bestValue = init[s][a];
276 return bestAction[s];
302 std::ofstream normalized;
303 file.open(folder +
"/rlfile.txt", std::ios_base::app);
304 normalized.open(folder +
"/rlfilenormalized.txt", std::ios_base::app);
305 for (
unsigned int i = 0;
i <
S;
i++)
307 bool allEqual =
true;
308 unsigned int maxIndex = 0;
310 for (
size_t j = 1; j <
A; j++)
317 double eps = 0.0000001;
322 for (
unsigned int j = 0; j <
A; j++)
325 normalized << ((allEqual ==
true) ? 1 : ((j == maxIndex) ? 1 : 0) ) <<
" ";
virtual std::vector< double > generatePolicy(const std::vector< double > &, action_t bestAction)=0
Utils::Record rewardRecord
virtual void notifyUpdateNeeded()
virtual double getValue(state_t state, action_t action)=0
ActionValuesFunction * actionValues
RlBackupAlgorithm * backupAlgo
static constexpr const char * configKey
ActionValuesRecord actionValuesRecord
virtual std::vector< double > getValues(state_t state)=0
void printToFile(std::string folder) const
action_t getBestActionFromInitialPolicy(state_t s)
static constexpr const char * configKey
virtual void updateActionValues(state_t previousState, state_t nextState, action_t previousAction, double reward)=0
void updatePolicy(state_t state)
void updateLongTermReward(double reward, double discountFactor)
RlBackupAlgorithm * getBackupAlgorithm()
void add(double time, double element)
void recordActionValues(ActionValuesFunction *actionValues, state_t state, action_t action)
action_t getBestAction(state_t state)
static const constexpr char * configKey
virtual action_t getBestAction(state_t state)
double actualDiscountedReward
ActionSelectionStrategy * actionSelectionStrategy
static constexpr const char * configKey
void epsilonGreedyPolicyUpdate(state_t state)
static constexpr const char * configKey
void printActionValuesToFile(std::string folder)
ReinforcedLearning(std::shared_ptr< Context > context)
void updateActualDiscountedReward(double reward)
std::shared_ptr< Context > context
static std::vector< std::string > split(std::string str, char delimiter)
void initializeActionSelectionStrategy()