Scheduler
linearProgramming.cpp
Go to the documentation of this file.
1 
10 #include "linearProgramming.h"
11 
12 #include <cassert>
13 #include <iostream>
14 #include <stdexcept>
15 
16 #include <mdp/constraintList.h>
17 #include <mdp/horizon.h>
18 #include <mdp/mdpConfiguration.h>
19 #include <mdp/policy.h>
20 #include <mdp/rewards.h>
21 #include <mdp/transitionMatrix.h>
22 
23 #include "glpkImplementation.h"
24 
25 
26 using namespace Mdp;
27 
28 void LinearProgramming::solve(Policy *policy, Rewards *rewards, ConstraintList *constraintList,
29  TransitionMatrix *matrix, Horizon *horizon)
30 {
31 
32 /*See references:
33 [1] "Robustness of policies in constrained Markov Decision Processes# by Zadorojniy ans Shwartz
34 [2] "Constrained Markov Decision Processes" by Altman
35 [3] "GNU Linear programming kit reference manual (November 2015)"
36 */
37 
38  int S = policy->getNbOfStates();
39  int A = policy->getNbOfActions();
40 
41  const size_t nbCol = S*A;
42  columns = std::vector<double>(nbCol); //those are the rho(x,u) of [1], or x1 x2 x3 of [3]
43  coeffs = std::vector<double>(nbCol); //c(x,u) of [1]
44  objFunc = 0.0;
45 
46  prepareParameters(rewards, constraintList, matrix, horizon);
47  //removeRedundantEqualityConstraint(0);
48  //printParams();
49  LpImplementation *solver = new GlpkImplementation(LpImplementation::LpAlgo::interiorPoint);
50  objFunc = solver->solve(columns, coeffs, eqCoeffs, eqValue, ineqCoeffs, ineqValue);
51  updatePolicy(policy);
52 
53 }
54 
55 
56 void LinearProgramming::printParams()
57 {
58  std::cout <<"printing coefficients\n";
59  for (size_t i = 0; i < coeffs.size(); i++)
60  {
61  std::cout << coeffs[i]<<" ";
62  }
63  std::cout << "\n";
64  std::cout << "printing eqCoeffs\n";
65  for (size_t i = 0; i < eqCoeffs.size(); i++)
66  {
67  for (size_t j = 0; j < eqCoeffs[i].size(); j++ )
68  {
69  std::cout <<eqCoeffs[i][j]<< " ";
70  }
71  std::cout <<"\n";
72  }
73  std::cout << "eqValue:";
74  for (size_t i = 0; i < eqValue.size(); i++)
75  {
76  std::cout << eqValue[i]<<" ";
77  }
78  std::cout << "\n";
79 
80  std::cout << "printing ineqCoeffs\n";
81  for (size_t i = 0; i < ineqCoeffs.size(); i++)
82  {
83  for (size_t j = 0; j < ineqCoeffs[i].size(); j++ )
84  {
85  std::cout <<ineqCoeffs[i][j]<< " ";
86  }
87  std::cout <<"\n";
88  }
89  std::cout << "ineqValue:";
90  for (size_t i = 0; i < ineqValue.size(); i++)
91  {
92  std::cout << ineqValue[i]<<" ";
93  }
94  std::cout << "\n";
95  std::cout << "variables: ";
96  for (size_t i = 0; i < columns.size(); i++)
97  {
98  std::cout << columns[i]<<" ";
99  }
100  std::cout <<"\n";
101  std::cout << "objective function: "<<objFunc<<"\n";
102 }
103 
104 
105 void LinearProgramming::prepareParametersForDiscountedCost(Rewards *rewards, ConstraintList *constraintList,
106  TransitionMatrix *matrix, double discount, std::vector<double> initialState)
107 {
108  const size_t S = rewards->getNbOfStates();
109  const size_t A = rewards->getNbOfActions();
110  const size_t nbCol = S*A;
111  for (state_t i = 0; i < S; i++)
112  {
113  for (action_t j = 0; j < A; j++)
114  {
115  coeffs[i*A + j] = -rewards->getReward(i, j); /*minus sign because cost, not reward*/
116  }
117  }
118  size_t nbEqConst = S + 1;
119  nbEqConst += constraintList->equalityConstraints.size();
120  eqCoeffs = std::vector<std::vector<double>>(nbEqConst, std::vector<double>(nbCol));
121  eqValue = std::vector<double>(nbEqConst);
122  for (state_t i = 0; i < S; i++) //i is the x of 4.3 in [2].
123  {
124  for (state_t j = 0; j < S; j++)
125  {
126  for (action_t k = 0; k < A; k++)
127  {
128  double delta = 0.0;
129  if (i == j)
130  delta = 1.0;
131  eqCoeffs[i][j*A+k] = delta - discount*matrix->get(j, i, k); //eq. 4.3 in [2]
132  }
133  }
134  eqValue[i] = (1.0-discount)*initialState[i];
135  }
136 
137  for (size_t i = S; i < S+1; i++) //this is executed only once
138  {
139  for (size_t j = 0; j < nbCol; j++)
140  {
141  eqCoeffs[i][j] = 1.0;
142  }
143  eqValue[i] = 1.0;
144  }
145 
146  for (size_t i = S+1; i < nbEqConst; i++ )
147  {
148  for (state_t j = 0; j < S; j++)
149  {
150  for (action_t k = 0; k < A; k++)
151  {
152  eqCoeffs[i][j*A+k] = constraintList->equalityConstraints[i-1-S]->getReward(j, k);
153  /*although called reward, it's not a reward...*/
154  }
155  }
156  eqValue[i] = constraintList->equalityValues[i-S-1];
157  }
158 
159  size_t nbIneqConst = 0;
160  nbIneqConst = constraintList->inequalityConstraints.size();
161  ineqCoeffs = std::vector<std::vector<double>>(nbIneqConst, std::vector<double>(nbCol));
162  ineqValue = std::vector<double>(nbIneqConst);
163  /*remember that rho(y,u) >= 0 ([1]) is taken care of by the glpk solver*/
164  for (size_t i = 0; i < nbIneqConst; i++ )
165  {
166  for (state_t j = 0; j < S; j++)
167  {
168  for (action_t k = 0; k < A; k++)
169  {
170  ineqCoeffs[i][j*A+k] = constraintList->inequalityConstraints[i]->getReward(j, k);
171  /*although called reward, it's not a reward...*/
172  }
173  }
174  ineqValue[i] = constraintList->inequalityValues[i];
175  }
176 
177 }
178 
179 
180 
181 
182 
183 
184 
185 
186 
187 void LinearProgramming::updatePolicy(Policy *policy)
188 {
189  const size_t S = policy->getNbOfStates();
190  const size_t A = policy->getNbOfActions();
191  std::vector<double> sum(S, 0.0);
192  for (state_t i = 0; i < S; i++)
193  {
194  for (action_t j = 0; j < A; j++)
195  {
196  sum[i] += columns[i*A + j];
197  }
198  }
199  const double epsilon = 0.000000001;
200  for (state_t i = 0; i < S; i++)
201  {
202  if (sum[i] < epsilon)
203  {
204  std::vector<double> vector(A, 1.0/A);
205  policy->update(i, vector);
206  }
207  else
208  {
209  std::vector<double> vector(A, 0.0);
210  for (action_t j = 0; j < A; j++)
211  {
212  vector[j] = columns[i * A + j] / sum[i];
213  }
214  policy->update(i, vector);
215  }
216  }
217 }
218 
219 
220 
221 void LinearProgramming::prepareParameters(Rewards *rewards, ConstraintList *constraintList,
222  TransitionMatrix *matrix, Horizon *horizon)
223 {
224  if (horizon->finiteHorizon == false)
225  {
226  prepareParametersForDiscountedCost(rewards, constraintList, matrix,
227  horizon->discountFactor, horizon->initialStateDistribution);
228  }
229  else
230  {
231  throw std::runtime_error("Cost horizon type not supported");
232  }
233 }
234 
235 
236 void LinearProgramming::removeRedundantEqualityConstraint(size_t index)
237 {
238  eqCoeffs.erase(eqCoeffs.begin()+index);
239  eqValue.erase( eqValue.begin()+index);
240 }
241 
242 
243 
244 
245 
246 
247 
248 
249 
250 
251 
252 
253 
254 
255 
256 
257 
258 
259 
260 
261 
262 
263 
264 
265 
266 
267 
268 
269 
270 
271 
272 
273 
274 
275 
276 
277 
278 
279 
280 
281 
282 
283 
284 
285 
286 
287 
288 
289 
290 
291 
292 
293 
294 
295 
296 
297 
298 
299 
300 
301 
302 
303 
304 
305 
306 
307 
308 
309 
310 
311 
312 
313 
314 
315 
316 
317 
318 
319 
320 
321 
322 
323 
324 
325 
326 
327 
328 
329 
330 
331 
332 
333 
334 
335 
336 
337 
std::vector< Rewards * > equalityConstraints
void update(state_t state, const std::vector< double > &vector)
Definition: policy.cpp:79
const size_t A
const size_t S
std::vector< double > equalityValues
std::vector< double > inequalityValues
size_t getNbOfActions()
Definition: rewards.h:31
bool finiteHorizon
Definition: horizon.h:19
double get(state_t from, state_t to, action_t action)
size_t action_t
Definition: action_impl.h:18
size_t getNbOfActions()
Definition: policy.cpp:154
Definition: action.h:18
virtual double solve(std::vector< double > &variables, std::vector< double > coeffs, std::vector< std::vector< double >> eqCoeffs, std::vector< double > eqValue, std::vector< std::vector< double >> ineqCoeffs, std::vector< double > ineqValue)=0
size_t getNbOfStates()
Definition: policy.cpp:149
std::vector< double > initialStateDistribution
Definition: horizon.h:21
double discountFactor
Definition: horizon.h:20
size_t state_t
Definition: state.h:19
size_t getNbOfStates()
Definition: rewards.h:30
std::vector< Rewards * > inequalityConstraints
double getReward(state_t, action_t)
Definition: rewards.cpp:23
void solve(Policy *policy, Rewards *rewards, ConstraintList *constraintList, TransitionMatrix *matrix, Horizon *horizon)