WASET

	@article{(Open Science Index):https://publications.waset.org/pdf/10013320,
	  title     = {Robot Movement Using the Trust Region Policy Optimization},
	  author    = {Romisaa Ali},
	  country	= {},
	  institution	= {},
	  abstract     = {The Policy Gradient approach is a subset of the Deep Reinforcement Learning (DRL) combines Deep Neural Networks (DNN) with Reinforcement Learning (RL). This approach finds the optimal policy of robot movement, based on the experience it gains from interaction with its environment. Unlike previous policy gradient algorithms, which were unable to handle the two types of error variance and bias introduced by the DNN model due to over- or underestimation, this algorithm is capable of handling both types of error variance and bias. This article will discuss the state-of-the-art SOTA policy gradient technique, trust region policy optimization (TRPO), by applying this method in various environments compared to another policy gradient method, the Proximal Policy Optimization (PPO), to explain their robust optimization, using this SOTA to gather experience data during various training phases after observing the impact of hyper-parameters on neural network performance. },
	    journal   = {International Journal of Mechanical and Mechatronics Engineering},
	  volume    = {17},
	  number    = {10},
	  year      = {2023},
	  pages     = {394 - 399},
	  ee        = {https://publications.waset.org/pdf/10013320},
	  url   	= {https://publications.waset.org/vol/202},
	  bibsource = {https://publications.waset.org/},
	  issn  	= {eISSN: 1307-6892},
	  publisher = {World Academy of Science, Engineering and Technology},
	  index 	= {Open Science Index 202, 2023},
	}