WASET

	@article{(Open Science Index):https://publications.waset.org/pdf/14894,
	  title     = {Trajectory-Based Modified Policy Iteration},
	  author    = {R. Sharma and  M. Gopal},
	  country	= {},
	  institution	= {},
	  abstract     = {This paper presents a new problem solving approach
that is able to generate optimal policy solution for finite-state
stochastic sequential decision-making problems with high data
efficiency. The proposed algorithm iteratively builds and improves
an approximate Markov Decision Process (MDP) model along with
cost-to-go value approximates by generating finite length trajectories
through the state-space. The approach creates a synergy between an
approximate evolving model and approximate cost-to-go values to
produce a sequence of improving policies finally converging to the
optimal policy through an intelligent and structured search of the
policy space. The approach modifies the policy update step of the
policy iteration so as to result in a speedy and stable convergence to
the optimal policy. We apply the algorithm to a non-holonomic
mobile robot control problem and compare its performance with
other Reinforcement Learning (RL) approaches, e.g., a) Q-learning,
b) Watkins Q(λ), c) SARSA(λ).},
	    journal   = {International Journal of Computer and Information Engineering},
	  volume    = {1},
	  number    = {12},
	  year      = {2007},
	  pages     = {4055 - 4060},
	  ee        = {https://publications.waset.org/pdf/14894},
	  url   	= {https://publications.waset.org/vol/12},
	  bibsource = {https://publications.waset.org/},
	  issn  	= {eISSN: 1307-6892},
	  publisher = {World Academy of Science, Engineering and Technology},
	  index 	= {Open Science Index 12, 2007},
	}