21 , tempDecaySpeed(tDecaySpeed)
22 , tempStepSize(tStepSize)
28 std::vector<double> policy = std::vector<double>(actionValues.size());
30 size_t maxCandidate = 0;
31 double maxCandidateValue = -std::numeric_limits<double>::infinity();
32 for (
size_t i = 0;
i < policy.size();
i++)
39 if (var > maxCandidateValue)
41 maxCandidateValue = var;
46 for (
size_t i = 0;
i < policy.size();
i++)
48 if (sum <= 0.0 && sum >= 0.0)
49 policy[
i] = (
i == maxCandidate) ? 1.0 : 0.0;
58 void GibbsActionSelection::updateTemperature()
60 static long long unsigned int counter = 0;
61 if (counter ++>= tempStepSize)
std::vector< double > generatePolicy(const std::vector< double > &, action_t bestAction) override
GibbsActionSelection(double temperature, double tempDecaySpeed, double tempStepSize)