Scheduler
actionValuesRecord.cpp
Go to the documentation of this file.
1 
10 #include "actionValuesRecord.h"
11 
12 #include <iostream>
13 #include <algorithm>
14 #include <cassert>
15 #include <memory>
16 #include <fstream>
17 
18 #include <mdp/mdpConfiguration.h>
19 
20 using namespace Mdp;
21 
22 ActionValuesRecord::ActionValuesRecord(std::shared_ptr<MdpConfiguration> c,
23  size_t stateSize,
24  size_t actionSize)
25  : conf(c)
26  , S(stateSize)
27  , A(actionSize)
28 {
29 }
30 
32 {
33  enableRecordingOfActionValues = conf->getBoolValue("reinforcementLearning", "recordActionValues", false);
34  if (!enableRecordingOfActionValues)
35  return;
36  statesToRecord = getListOfActionsToRecordFromFile();
37  chunkSize = conf->getIntValue("reinforcementLearning", "actionValueRecordChunkSize");
38  assert(statesToRecord.size() > 0);
39  assert(chunkSize > 0);
40  std::cout << "S is "<<S <<"\n";
41  std::cout << "A is "<< A <<"\n";
42  std::cout << "chunkSize is " << chunkSize <<"\n";
43  avChunk = std::vector<std::vector<std::vector<std::pair<double, double>>>>(S,
44  std::vector<std::vector<std::pair<double, double>>>(A,
45  std::vector<std::pair<double, double>>(chunkSize, std::pair<double, double>(NAN, NAN))));
46  recordOnlyLastAction = conf->getBoolValue("reinforcementLearning", "recordOnlyLastAction", true);
47  lastValueRecorded = std::vector<std::vector<double>>(S, std::vector<double>(A, NAN));
48 }
49 
50 
51 
53 {
54  /*if recording only last value, we need to write a datapoint at the end,
55  otherwise the curve on the graph stops in the middle of the figure
56  instead of going to the end*/
57  if (!enableRecordingOfActionValues)
58  return;
59  if (recordOnlyLastAction)
60  recordLastDataPoint();
61  writeAllChunksToDisk();
62 }
63 
65  state_t state,
66  action_t action)
67 {
68  /*NOTE: enabling this consumes a LOT of resources*/
69  if (!enableRecordingOfActionValues)
70  return;
71  assert(statesToRecord.size() > 0);
72  if (recordOnlyLastAction)
73  recordLastActionValue(actionValues->getValue(state, action), state, action);
74  else
75  recordAllActionValues(actionValues);
76 }
77 
78 void ActionValuesRecord::recordAllActionValues(ActionValuesFunction *actionValues)
79 {
80  if (!enableRecordingOfActionValues)
81  return;
82  /*For the moment, the avChunk contains all actions.
83  And we only write to file those that are in the actionsToRecord.
84  But maybe we should not even put in avRecord those actions that we don't want.*/
85  static long long unsigned int index = 0;
86  for (size_t s = 0; s < S; s++)
87  {
88  for (size_t a = 0; a < A; a++)
89  {
90  avChunk[s][a][index] = std::pair<double, double>(NAN, actionValues->getValue(s, a));
91  }
92  }
93  index++;
94  if (index == chunkSize)
95  {
96  writeAllChunksToDisk();
97  index = 0;
98  }
99 }
100 
101 void ActionValuesRecord::recordLastActionValue(double value, state_t s, action_t a)
102 {
103  if (!enableRecordingOfActionValues)
104  return;
105  static std::vector<std::vector<unsigned long long int>> indexes = std::vector<std::vector<unsigned long long int>>(
106  S, std::vector<unsigned long long int>(A, 0));
107  static unsigned long long int counter = 0;
108  unsigned long long int index = indexes[s][a]++;
109  avChunk[s][a][index] = std::pair<double, double>(counter, value);
110  if (indexes[s][a] == chunkSize)
111  {
112  indexes[s][a] = 0;
113  writeChunkToDisk(s, a);
114  }
115  lastValueRecorded[s][a] = value;
116  counter++;
117 }
118 
119 std::vector<int> ActionValuesRecord::getListOfActionsToRecordFromFile()
120 {
121  std::vector<std::string> str = conf->getStringList("reinforcementLearning", "actionValuesRecorded");
122  std::vector<int> states;
123  for (size_t i = 0; i < str.size(); i++)
124  {
125  states.push_back(std::stoi(str[i]));
126  }
127  std::sort(states.begin(), states.end());
128  return states;
129 }
130 
131 void ActionValuesRecord::writeAllChunksToDisk()
132 {
133  if (!enableRecordingOfActionValues)
134  return;
135  for (size_t s = 0; s < S; s++)
136  {
137  for (size_t a = 0; a < A; a++)
138  {
139  writeChunkToDisk(s, a);
140  }
141  }
142 }
143 
144 void ActionValuesRecord::writeChunkToDisk(state_t s, action_t a)
145 {
146  if (!enableRecordingOfActionValues)
147  return;
148  if (std::find(statesToRecord.begin(), statesToRecord.end(), s) == statesToRecord.end())
149  return;
150  std::string filename = conf->getFilePrefix() + "actionValuesForState";
151  filename += std::to_string(s) + "action" + std::to_string(a);
152  std::cerr << "writing chunk to "<< filename<<" \n";
153  std::ofstream file;
154  file.open(filename, std::ios_base::app);
155  int counter = 0;
156  for (size_t index = 0; index < avChunk[s][a].size(); index++)
157  {
158  if (std::isnan(avChunk[s][a][index].second))
159  break;
160  if (counter == 5)
161  {
162  counter = 0;
163 
164  counter++;
165  if (!std::isnan(avChunk[s][a][index].first)) //maybe there's a better way, like putting the if out of loop
166  file << avChunk[s][a][index].first << " ";
167  file << avChunk[s][a][index].second << "\n";
168  }
169  counter++;
170  }
171  file.close();
172  /*at this point, the chunk contains the previous values. Let's update it to contain only one value: the last one*/
173  for (size_t index = 0; index < chunkSize; index++)
174  {
175  avChunk[s][a][index] = avChunk[s][a][chunkSize-1]; /*TODO optimize...*/
176  }
177 }
178 
179 
180 
181 void ActionValuesRecord::recordLastDataPoint()
182 {
183  if (!enableRecordingOfActionValues)
184  return;
185  for (size_t i = 0; i < statesToRecord.size(); i++)
186  {
187  state_t s = statesToRecord[i];
188  for (action_t a = 0; a < A; a++)
189  {
190  if (s >= lastValueRecorded.size())
191  {
192  std::cerr << "s is " << s <<"\n";
193  std::cerr << "size of vector: " << lastValueRecorded.size() <<"\n";
194  throw std::invalid_argument("invalid state to record");
195  }
196  recordLastActionValue(lastValueRecorded[s][a], s, a);
197  }
198  }
199 }
200 
201 
202 
203 
virtual double getValue(state_t state, action_t action)=0
const size_t A
const size_t S
void recordActionValues(ActionValuesFunction *actionValues, state_t state, action_t action)
string filename
Definition: aging.py:5
size_t action_t
Definition: action_impl.h:18
Definition: action.h:18
size_t state_t
Definition: state.h:19
ActionValuesRecord(std::shared_ptr< MdpConfiguration > conf, size_t stateSize, size_t actionSize)