41 const size_t nbCol = S*
A;
42 columns = std::vector<double>(nbCol);
43 coeffs = std::vector<double>(nbCol);
46 prepareParameters(rewards, constraintList, matrix, horizon);
50 objFunc = solver->
solve(columns, coeffs, eqCoeffs, eqValue, ineqCoeffs, ineqValue);
56 void LinearProgramming::printParams()
58 std::cout <<
"printing coefficients\n";
59 for (
size_t i = 0;
i < coeffs.size();
i++)
61 std::cout << coeffs[
i]<<
" ";
64 std::cout <<
"printing eqCoeffs\n";
65 for (
size_t i = 0;
i < eqCoeffs.size();
i++)
67 for (
size_t j = 0; j < eqCoeffs[
i].size(); j++ )
69 std::cout <<eqCoeffs[
i][j]<<
" ";
73 std::cout <<
"eqValue:";
74 for (
size_t i = 0;
i < eqValue.size();
i++)
76 std::cout << eqValue[
i]<<
" ";
80 std::cout <<
"printing ineqCoeffs\n";
81 for (
size_t i = 0;
i < ineqCoeffs.size();
i++)
83 for (
size_t j = 0; j < ineqCoeffs[
i].size(); j++ )
85 std::cout <<ineqCoeffs[
i][j]<<
" ";
89 std::cout <<
"ineqValue:";
90 for (
size_t i = 0;
i < ineqValue.size();
i++)
92 std::cout << ineqValue[
i]<<
" ";
95 std::cout <<
"variables: ";
96 for (
size_t i = 0;
i < columns.size();
i++)
98 std::cout << columns[
i]<<
" ";
101 std::cout <<
"objective function: "<<objFunc<<
"\n";
105 void LinearProgramming::prepareParametersForDiscountedCost(
Rewards *rewards,
ConstraintList *constraintList,
106 TransitionMatrix *matrix,
double discount, std::vector<double> initialState)
110 const size_t nbCol = S*
A;
118 size_t nbEqConst = S + 1;
120 eqCoeffs = std::vector<std::vector<double>>(nbEqConst, std::vector<double>(nbCol));
121 eqValue = std::vector<double>(nbEqConst);
131 eqCoeffs[
i][j*A+k] = delta - discount*matrix->
get(j,
i, k);
134 eqValue[
i] = (1.0-discount)*initialState[
i];
137 for (
size_t i = S;
i < S+1;
i++)
139 for (
size_t j = 0; j < nbCol; j++)
141 eqCoeffs[
i][j] = 1.0;
146 for (
size_t i = S+1;
i < nbEqConst;
i++ )
159 size_t nbIneqConst = 0;
161 ineqCoeffs = std::vector<std::vector<double>>(nbIneqConst, std::vector<double>(nbCol));
162 ineqValue = std::vector<double>(nbIneqConst);
164 for (
size_t i = 0;
i < nbIneqConst;
i++ )
187 void LinearProgramming::updatePolicy(
Policy *policy)
191 std::vector<double> sum(S, 0.0);
196 sum[
i] += columns[
i*A + j];
199 const double epsilon = 0.000000001;
202 if (sum[
i] < epsilon)
204 std::vector<double> vector(A, 1.0/A);
209 std::vector<double> vector(A, 0.0);
212 vector[j] = columns[
i * A + j] / sum[
i];
226 prepareParametersForDiscountedCost(rewards, constraintList, matrix,
231 throw std::runtime_error(
"Cost horizon type not supported");
236 void LinearProgramming::removeRedundantEqualityConstraint(
size_t index)
238 eqCoeffs.erase(eqCoeffs.begin()+index);
239 eqValue.erase( eqValue.begin()+index);
std::vector< Rewards * > equalityConstraints
void update(state_t state, const std::vector< double > &vector)
std::vector< double > equalityValues
std::vector< double > inequalityValues
double get(state_t from, state_t to, action_t action)
virtual double solve(std::vector< double > &variables, std::vector< double > coeffs, std::vector< std::vector< double >> eqCoeffs, std::vector< double > eqValue, std::vector< std::vector< double >> ineqCoeffs, std::vector< double > ineqValue)=0
std::vector< double > initialStateDistribution
std::vector< Rewards * > inequalityConstraints
double getReward(state_t, action_t)
void solve(Policy *policy, Rewards *rewards, ConstraintList *constraintList, TransitionMatrix *matrix, Horizon *horizon)