33 enableRecordingOfActionValues = conf->getBoolValue(
"reinforcementLearning",
"recordActionValues",
false);
34 if (!enableRecordingOfActionValues)
36 statesToRecord = getListOfActionsToRecordFromFile();
37 chunkSize = conf->getIntValue(
"reinforcementLearning",
"actionValueRecordChunkSize");
38 assert(statesToRecord.size() > 0);
39 assert(chunkSize > 0);
40 std::cout <<
"S is "<<S <<
"\n";
41 std::cout <<
"A is "<< A <<
"\n";
42 std::cout <<
"chunkSize is " << chunkSize <<
"\n";
43 avChunk = std::vector<std::vector<std::vector<std::pair<double, double>>>>(S,
44 std::vector<std::vector<std::pair<double, double>>>(A,
45 std::vector<std::pair<double, double>>(chunkSize, std::pair<double, double>(NAN, NAN))));
46 recordOnlyLastAction = conf->getBoolValue(
"reinforcementLearning",
"recordOnlyLastAction",
true);
47 lastValueRecorded = std::vector<std::vector<double>>(S, std::vector<double>(A, NAN));
57 if (!enableRecordingOfActionValues)
59 if (recordOnlyLastAction)
60 recordLastDataPoint();
61 writeAllChunksToDisk();
69 if (!enableRecordingOfActionValues)
71 assert(statesToRecord.size() > 0);
72 if (recordOnlyLastAction)
73 recordLastActionValue(actionValues->
getValue(state, action), state, action);
75 recordAllActionValues(actionValues);
80 if (!enableRecordingOfActionValues)
85 static long long unsigned int index = 0;
86 for (
size_t s = 0; s < S; s++)
88 for (
size_t a = 0; a < A; a++)
90 avChunk[s][a][index] = std::pair<double, double>(NAN, actionValues->
getValue(s, a));
94 if (index == chunkSize)
96 writeAllChunksToDisk();
101 void ActionValuesRecord::recordLastActionValue(
double value,
state_t s,
action_t a)
103 if (!enableRecordingOfActionValues)
105 static std::vector<std::vector<unsigned long long int>> indexes = std::vector<std::vector<unsigned long long int>>(
106 S, std::vector<unsigned long long int>(A, 0));
107 static unsigned long long int counter = 0;
108 unsigned long long int index = indexes[s][a]++;
109 avChunk[s][a][index] = std::pair<double, double>(
counter, value);
110 if (indexes[s][a] == chunkSize)
113 writeChunkToDisk(s, a);
115 lastValueRecorded[s][a] = value;
119 std::vector<int> ActionValuesRecord::getListOfActionsToRecordFromFile()
121 std::vector<std::string> str = conf->getStringList(
"reinforcementLearning",
"actionValuesRecorded");
122 std::vector<int> states;
123 for (
size_t i = 0;
i < str.size();
i++)
125 states.push_back(std::stoi(str[
i]));
127 std::sort(states.begin(), states.end());
131 void ActionValuesRecord::writeAllChunksToDisk()
133 if (!enableRecordingOfActionValues)
135 for (
size_t s = 0; s < S; s++)
137 for (
size_t a = 0; a < A; a++)
139 writeChunkToDisk(s, a);
146 if (!enableRecordingOfActionValues)
148 if (std::find(statesToRecord.begin(), statesToRecord.end(), s) == statesToRecord.end())
150 std::string
filename = conf->getFilePrefix() +
"actionValuesForState";
151 filename += std::to_string(s) +
"action" + std::to_string(a);
152 std::cerr <<
"writing chunk to "<< filename<<
" \n";
154 file.open(filename, std::ios_base::app);
156 for (
size_t index = 0; index < avChunk[s][a].size(); index++)
158 if (std::isnan(avChunk[s][a][index].second))
165 if (!std::isnan(avChunk[s][a][index].first))
166 file << avChunk[s][a][index].first <<
" ";
167 file << avChunk[s][a][index].second <<
"\n";
173 for (
size_t index = 0; index < chunkSize; index++)
175 avChunk[s][a][index] = avChunk[s][a][chunkSize-1];
181 void ActionValuesRecord::recordLastDataPoint()
183 if (!enableRecordingOfActionValues)
185 for (
size_t i = 0;
i < statesToRecord.size();
i++)
190 if (s >= lastValueRecorded.size())
192 std::cerr <<
"s is " << s <<
"\n";
193 std::cerr <<
"size of vector: " << lastValueRecorded.size() <<
"\n";
194 throw std::invalid_argument(
"invalid state to record");
196 recordLastActionValue(lastValueRecorded[s][a], s, a);
virtual double getValue(state_t state, action_t action)=0
void recordActionValues(ActionValuesFunction *actionValues, state_t state, action_t action)
ActionValuesRecord(std::shared_ptr< MdpConfiguration > conf, size_t stateSize, size_t actionSize)