From 1ce1848736fe12a0a9d58b405083d0796c74af0c Mon Sep 17 00:00:00 2001 From: Hayden Shively <17186559+haydenshively@users.noreply.github.com> Date: Thu, 1 Apr 2021 21:52:50 -0500 Subject: [PATCH] Scale window rewards between 0 and 10 (#340) * Scale window rewards between 0 and 10 * Fix scaling on dial-turn env --- metaworld/envs/mujoco/sawyer_xyz/v2/sawyer_dial_turn_v2.py | 2 +- metaworld/envs/mujoco/sawyer_xyz/v2/sawyer_window_close_v2.py | 2 +- metaworld/envs/mujoco/sawyer_xyz/v2/sawyer_window_open_v2.py | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/metaworld/envs/mujoco/sawyer_xyz/v2/sawyer_dial_turn_v2.py b/metaworld/envs/mujoco/sawyer_xyz/v2/sawyer_dial_turn_v2.py index fbf466ba4..b9f8cb651 100644 --- a/metaworld/envs/mujoco/sawyer_xyz/v2/sawyer_dial_turn_v2.py +++ b/metaworld/envs/mujoco/sawyer_xyz/v2/sawyer_dial_turn_v2.py @@ -131,7 +131,7 @@ def compute_reward(self, action, obs): tcp_opened = 0 object_grasped = reach - reward = reward_utils.hamacher_product(reach, in_place) + reward = 10 * reward_utils.hamacher_product(reach, in_place) return (reward, tcp_to_obj, diff --git a/metaworld/envs/mujoco/sawyer_xyz/v2/sawyer_window_close_v2.py b/metaworld/envs/mujoco/sawyer_xyz/v2/sawyer_window_close_v2.py index 8613bd104..2e6e65894 100644 --- a/metaworld/envs/mujoco/sawyer_xyz/v2/sawyer_window_close_v2.py +++ b/metaworld/envs/mujoco/sawyer_xyz/v2/sawyer_window_close_v2.py @@ -140,7 +140,7 @@ def compute_reward(self, actions, obs): tcp_opened = 0 object_grasped = reach - reward = reward_utils.hamacher_product(reach, in_place) + reward = 10 * reward_utils.hamacher_product(reach, in_place) return (reward, tcp_to_obj, diff --git a/metaworld/envs/mujoco/sawyer_xyz/v2/sawyer_window_open_v2.py b/metaworld/envs/mujoco/sawyer_xyz/v2/sawyer_window_open_v2.py index 524b78a71..e830961b6 100644 --- a/metaworld/envs/mujoco/sawyer_xyz/v2/sawyer_window_open_v2.py +++ b/metaworld/envs/mujoco/sawyer_xyz/v2/sawyer_window_open_v2.py @@ -130,7 +130,7 @@ def compute_reward(self, actions, obs): tcp_opened = 0 object_grasped = reach - reward = reward_utils.hamacher_product(reach, in_place) + reward = 10 * reward_utils.hamacher_product(reach, in_place) return (reward, tcp_to_obj, tcp_opened,