class Pendulum_1(OpenAIGym):
"A function that creates an runs the Pendulum-v1 environment from OpenAI Gym. Parameter: The environment name. Flag to display environment. Links: Link to the action function."
# from obs[0], indices
# 0 cos(theta) - +1 is up, -1 is down, 0 is left and right
# 1 sin(theta) - +1 is left, -1 is right, 0 is up and down
# 2 theta dot - +dot is anti-clockwise, -dot is clockwise
# 3 theta dot - normalised to +/- 1
# 4 theta +1/-1 (added here) 1 is pointing upwards, + is anti-clockwise, - is clockwise
# reward - -(theta^2 + 0.1*theta_dt^2 + 0.001*action^2)
def __init__(self, render=False, render_mode="rgb_array", video_wrap=False, value=0, name="Pendulum_1",
seed=None, links=None, new_name=True, namespace=None,**cargs):
super().__init__('Pendulum-v1', render=render, render_mode=render_mode, video_wrap=video_wrap, value=value, name=name, seed=seed,
links=links, new_name=new_name, namespace=namespace, **cargs)
def process_hierarchy_values(self):
self.hierarchy_values = self.links[0].get_value()
def process_actions(self):
pass
def apply_actions_get_obs(self):
return self.env.step([self.hierarchy_values])
def parse_obs(self):
self.value = self.obs[0]
self.reward = -self.obs[1]
self.done = self.obs[2]
self.info = self.obs[3]
def process_values(self):
vel = self.obs[0][2]/8.0
self.value = np.append(self.value, vel)
x = math.copysign(math.acos(self.obs[0][0]), self.obs[0][1])/math.pi
#theta = 100 - (10 * math.copysign(1-abs(x), x))
theta = 100 - (10 * x)
self.value = np.append(self.value, theta)
class Factory:
def create(self, namespace=None, seed=None, gym_name=None): return Pendulum_1(namespace=namespace, seed=seed, gym_name=gym_name)