18 , epsilonDecaySpeed(eDecaySpeed)
19 , epsilonTimeout(eTimeout)
26 static size_t nbActions = actionValues.size();
27 static std::vector<double> policy(nbActions);
28 static const size_t nbActionsMinusOne = nbActions - 1;
29 const double bestActionProba = 1.0 -
epsilon;
30 const double otherActionProba =
epsilon / (double) nbActionsMinusOne;
33 policy[
i] = ((
i == bestAction) ? bestActionProba : otherActionProba);
41 static long long unsigned int counter = 0;
long long unsigned int epsilonTimeout
virtual std::vector< double > generatePolicy(const std::vector< double > &, action_t bestAction) override
EpsilonGreedy(double epsilon, double epsilonDecaySpeed, long long unsigned int epsilonTimeout)