29 , bestQ(c->stateSpace->size())
30 , bestAction(c->stateSpace->size())
31 , needsUpdate(std::vector<bool>(c->stateSpace->size(), true))
37 static size_t size =
context->stateSpace->size();
59 for (
size_t i = 1;
i <
context->actionSpace->size();
i++)
64 const double eps = 0.0000001;
65 if ((candidate < Q + eps) && (candidate > Q - eps) &&
context->randomGenerator->drawUniform(0.0, 2.0) > 1.0)
99 alpha =
context->conf->getDoubleValue(
"reinforcementLearning",
"alpha");
101 if (
context->conf->getBoolValue(
"reinforcementLearning",
"alphaHyperbolicDecay",
false))
103 if (
context->conf->getBoolValue(
"reinforcementLearning",
"alphaStepwiseDecay",
false))
106 stepLength =
context->conf->getUnsignedLongLongIntValue(
"reinforcementLearning",
"alphaStepLength");
virtual void notifyUpdateNeeded()
virtual double getValue(state_t state, action_t action)=0
virtual void updateAlpha()
std::vector< double > bestQ
unsigned long long stepwiseCounter
virtual void updateBestActionAndQ(state_t state)
virtual void updateIfNeeded(state_t state)
std::vector< action_t > bestAction
virtual action_t getBestAction(state_t state)
virtual std::pair< action_t, double > getBestActionAndQ(state_t state)
std::vector< bool > needsUpdate
RlBackupAlgorithm(std::shared_ptr< Context > c, ActionValuesFunction *av)
std::shared_ptr< Context > context
ActionValuesFunction * actionValues
virtual double getMaxQ(state_t state)
unsigned long long int stepLength