Scheduler
gibbsActionSelection.cpp
Go to the documentation of this file.
1 
10 #include "gibbsActionSelection.h"
11 
12 #include <cmath>
13 #include <stdexcept>
14 #include <iostream>
15 #include <limits>
16 
17 using namespace Mdp;
18 
19 GibbsActionSelection::GibbsActionSelection(double t, double tDecaySpeed, double tStepSize)
20  : temperature(t)
21  , tempDecaySpeed(tDecaySpeed)
22  , tempStepSize(tStepSize)
23 {
24 }
25 
26 std::vector<double> GibbsActionSelection::generatePolicy(const std::vector<double>& actionValues, action_t /*bestAction*/)
27 {
28  std::vector<double> policy = std::vector<double>(actionValues.size());
29  double sum = 0.0;
30  size_t maxCandidate = 0;
31  double maxCandidateValue = -std::numeric_limits<double>::infinity();
32  for (size_t i = 0; i < policy.size(); i++)
33  {
34  double var = actionValues[i]/temperature;
35 
36  policy[i] = exp(var);
37  //std::cerr << "policy["<<i<<"] is "<<policy[i]<<" = exp("<<var<<")\n";
38  sum += policy[i];
39  if (var > maxCandidateValue)
40  {
41  maxCandidateValue = var;
42  maxCandidate = i;
43  }
44  }
45  //std::cerr << "sum is "<< sum <<"\n";
46  for (size_t i = 0; i < policy.size(); i++)
47  {
48  if (sum <= 0.0 && sum >= 0.0) /*FIXME maybe we can compare the value of the max to the value of the secondmax*/
49  policy[i] = (i == maxCandidate) ? 1.0 : 0.0;
50  else
51  policy[i] /= sum;
52  //std::cerr << "policy[" << i << "] is "<< policy[i] <<"\n";
53  }
54  updateTemperature();
55  return policy;
56 }
57 
58 void GibbsActionSelection::updateTemperature()
59 {
60  static long long unsigned int counter = 0;
61  if (counter ++>= tempStepSize)
62  {
63  temperature *= tempDecaySpeed;
64  counter = 0;
65  }
66 }
67 
68 
69 
std::vector< double > generatePolicy(const std::vector< double > &, action_t bestAction) override
GibbsActionSelection(double temperature, double tempDecaySpeed, double tempStepSize)
size_t action_t
Definition: action_impl.h:18
Definition: action.h:18