forked from tangger/lerobot
Fix linter issue
This commit is contained in:
@@ -224,7 +224,7 @@ def act_with_policy(
|
||||
logging.info("make_policy")
|
||||
|
||||
### Instantiate the policy in both the actor and learner processes
|
||||
### To avoid sending a SACPolicy object through the port, we create a policy intance
|
||||
### To avoid sending a SACPolicy object through the port, we create a policy instance
|
||||
### on both sides, the learner sends the updated parameters every n steps to update the actor's parameters
|
||||
# TODO: At some point we should just need make sac policy
|
||||
policy: SACPolicy = make_policy(
|
||||
@@ -278,7 +278,7 @@ def act_with_policy(
|
||||
# Increment total steps counter for intervention rate
|
||||
episode_total_steps += 1
|
||||
|
||||
# NOTE: We overide the action if the intervention is True, because the action applied is the intervention action
|
||||
# NOTE: We override the action if the intervention is True, because the action applied is the intervention action
|
||||
if "is_intervention" in info and info["is_intervention"]:
|
||||
# TODO: Check the shape
|
||||
# NOTE: The action space for demonstration before hand is with the full action space
|
||||
|
||||
@@ -269,7 +269,7 @@ if __name__ == "__main__":
|
||||
new_repo_id = args.repo_id + "_cropped_resized"
|
||||
new_dataset_root = Path(str(dataset.root) + "_cropped_resized")
|
||||
|
||||
croped_resized_dataset = convert_lerobot_dataset_to_cropper_lerobot_dataset(
|
||||
cropped_resized_dataset = convert_lerobot_dataset_to_cropper_lerobot_dataset(
|
||||
original_dataset=dataset,
|
||||
crop_params_dict=rois,
|
||||
new_repo_id=new_repo_id,
|
||||
|
||||
@@ -262,7 +262,7 @@ class GamepadController(InputController):
|
||||
elif event.button == 6:
|
||||
self.close_gripper_command = True
|
||||
|
||||
# LT button (7) for openning gripper
|
||||
# LT button (7) for opening gripper
|
||||
elif event.button == 7:
|
||||
self.open_gripper_command = True
|
||||
|
||||
@@ -421,45 +421,44 @@ class GamepadControllerHID(InputController):
|
||||
try:
|
||||
# Read data from the gamepad
|
||||
data = self.device.read(64)
|
||||
if data:
|
||||
# Interpret gamepad data - this will vary by controller model
|
||||
# These offsets are for the Logitech RumblePad 2
|
||||
if len(data) >= 8:
|
||||
# Normalize joystick values from 0-255 to -1.0-1.0
|
||||
self.left_x = (data[1] - 128) / 128.0
|
||||
self.left_y = (data[2] - 128) / 128.0
|
||||
self.right_x = (data[3] - 128) / 128.0
|
||||
self.right_y = (data[4] - 128) / 128.0
|
||||
# Interpret gamepad data - this will vary by controller model
|
||||
# These offsets are for the Logitech RumblePad 2
|
||||
if data and len(data) >= 8:
|
||||
# Normalize joystick values from 0-255 to -1.0-1.0
|
||||
self.left_x = (data[1] - 128) / 128.0
|
||||
self.left_y = (data[2] - 128) / 128.0
|
||||
self.right_x = (data[3] - 128) / 128.0
|
||||
self.right_y = (data[4] - 128) / 128.0
|
||||
|
||||
# Apply deadzone
|
||||
self.left_x = 0 if abs(self.left_x) < self.deadzone else self.left_x
|
||||
self.left_y = 0 if abs(self.left_y) < self.deadzone else self.left_y
|
||||
self.right_x = 0 if abs(self.right_x) < self.deadzone else self.right_x
|
||||
self.right_y = 0 if abs(self.right_y) < self.deadzone else self.right_y
|
||||
# Apply deadzone
|
||||
self.left_x = 0 if abs(self.left_x) < self.deadzone else self.left_x
|
||||
self.left_y = 0 if abs(self.left_y) < self.deadzone else self.left_y
|
||||
self.right_x = 0 if abs(self.right_x) < self.deadzone else self.right_x
|
||||
self.right_y = 0 if abs(self.right_y) < self.deadzone else self.right_y
|
||||
|
||||
# Parse button states (byte 5 in the Logitech RumblePad 2)
|
||||
buttons = data[5]
|
||||
# Parse button states (byte 5 in the Logitech RumblePad 2)
|
||||
buttons = data[5]
|
||||
|
||||
# Check if RB is pressed then the intervention flag should be set
|
||||
self.intervention_flag = data[6] in [2, 6, 10, 14]
|
||||
# Check if RB is pressed then the intervention flag should be set
|
||||
self.intervention_flag = data[6] in [2, 6, 10, 14]
|
||||
|
||||
# Check if RT is pressed
|
||||
self.open_gripper_command = data[6] in [8, 10, 12]
|
||||
# Check if RT is pressed
|
||||
self.open_gripper_command = data[6] in [8, 10, 12]
|
||||
|
||||
# Check if LT is pressed
|
||||
self.close_gripper_command = data[6] in [4, 6, 12]
|
||||
# Check if LT is pressed
|
||||
self.close_gripper_command = data[6] in [4, 6, 12]
|
||||
|
||||
# Check if Y/Triangle button (bit 7) is pressed for saving
|
||||
# Check if X/Square button (bit 5) is pressed for failure
|
||||
# Check if A/Cross button (bit 4) is pressed for rerecording
|
||||
if buttons & 1 << 7:
|
||||
self.episode_end_status = "success"
|
||||
elif buttons & 1 << 5:
|
||||
self.episode_end_status = "failure"
|
||||
elif buttons & 1 << 4:
|
||||
self.episode_end_status = "rerecord_episode"
|
||||
else:
|
||||
self.episode_end_status = None
|
||||
# Check if Y/Triangle button (bit 7) is pressed for saving
|
||||
# Check if X/Square button (bit 5) is pressed for failure
|
||||
# Check if A/Cross button (bit 4) is pressed for rerecording
|
||||
if buttons & 1 << 7:
|
||||
self.episode_end_status = "success"
|
||||
elif buttons & 1 << 5:
|
||||
self.episode_end_status = "failure"
|
||||
elif buttons & 1 << 4:
|
||||
self.episode_end_status = "rerecord_episode"
|
||||
else:
|
||||
self.episode_end_status = None
|
||||
|
||||
except OSError as e:
|
||||
logging.error(f"Error reading from gamepad: {e}")
|
||||
@@ -618,7 +617,7 @@ def teleoperate_delta_inverse_kinematics(robot, controller, fps=10, bounds=None,
|
||||
# Process input events
|
||||
controller.update()
|
||||
|
||||
# Get currrent robot state
|
||||
# Get current robot state
|
||||
joint_positions = robot.follower_arms["main"].read("Present_Position")
|
||||
current_ee_pos = kinematics.fk_gripper_tip(joint_positions)
|
||||
|
||||
@@ -635,7 +634,7 @@ def teleoperate_delta_inverse_kinematics(robot, controller, fps=10, bounds=None,
|
||||
desired_ee_pos[:3, 3] = np.clip(desired_ee_pos[:3, 3], bounds["min"], bounds["max"])
|
||||
|
||||
# Only send commands if there's actual movement
|
||||
if any([abs(v) > 0.001 for v in [delta_x, delta_y, delta_z]]):
|
||||
if any(abs(v) > 0.001 for v in [delta_x, delta_y, delta_z]):
|
||||
# Compute joint targets via inverse kinematics
|
||||
target_joint_state = kinematics.ik(joint_positions, desired_ee_pos, position_only=True)
|
||||
|
||||
@@ -678,7 +677,7 @@ def teleoperate_gym_env(env, controller, fps: int = 30):
|
||||
action = np.array([delta_x, delta_y, delta_z])
|
||||
|
||||
# Skip if no movement
|
||||
if any([abs(v) > 0.001 for v in [delta_x, delta_y, delta_z]]):
|
||||
if any(abs(v) > 0.001 for v in [delta_x, delta_y, delta_z]):
|
||||
# Step the environment - pass action as a tensor with intervention flag
|
||||
action_tensor = torch.from_numpy(action.astype(np.float32))
|
||||
obs, reward, terminated, truncated, info = env.step((action_tensor, False))
|
||||
|
||||
@@ -106,7 +106,7 @@ class HILSerlRobotEnv(gym.Env):
|
||||
- The action space is defined as a Tuple where:
|
||||
• The first element is a Box space representing joint position commands. It is defined as relative (delta)
|
||||
or absolute, based on the configuration.
|
||||
• ThE SECONd element is a Discrete space (with 2 values) serving as a flag for intervention (teleoperation).
|
||||
• The second element is a Discrete space (with 2 values) serving as a flag for intervention (teleoperation).
|
||||
"""
|
||||
example_obs = self.robot.capture_observation()
|
||||
|
||||
@@ -384,7 +384,7 @@ class ActionRepeatWrapper(gym.Wrapper):
|
||||
class RewardWrapper(gym.Wrapper):
|
||||
def __init__(self, env, reward_classifier, device: torch.device = "cuda"):
|
||||
"""
|
||||
Wrapper to add reward prediction to the environment, it use a trained classifer.
|
||||
Wrapper to add reward prediction to the environment, it use a trained classifier.
|
||||
|
||||
cfg.
|
||||
env: The environment to wrap
|
||||
@@ -414,7 +414,7 @@ class RewardWrapper(gym.Wrapper):
|
||||
if self.reward_classifier is not None
|
||||
else 0.0
|
||||
)
|
||||
info["Reward classifer frequency"] = 1 / (time.perf_counter() - start_time)
|
||||
info["Reward classifier frequency"] = 1 / (time.perf_counter() - start_time)
|
||||
|
||||
if success == 1.0:
|
||||
terminated = True
|
||||
@@ -784,11 +784,11 @@ class ResetWrapper(gym.Wrapper):
|
||||
while time.perf_counter() - start_time < self.reset_time_s:
|
||||
self.robot.teleop_step()
|
||||
|
||||
log_say("Manual reseting of the environment done.", play_sounds=True)
|
||||
log_say("Manual reset of the environment done.", play_sounds=True)
|
||||
return super().reset(seed=seed, options=options)
|
||||
|
||||
|
||||
class BatchCompitableWrapper(gym.ObservationWrapper):
|
||||
class BatchCompatibleWrapper(gym.ObservationWrapper):
|
||||
def __init__(self, env):
|
||||
super().__init__(env)
|
||||
|
||||
@@ -823,10 +823,7 @@ class GripperPenaltyWrapper(gym.RewardWrapper):
|
||||
|
||||
def step(self, action):
|
||||
self.last_gripper_state = self.unwrapped.robot.follower_arms["main"].read("Present_Position")[-1]
|
||||
if isinstance(action, tuple):
|
||||
gripper_action = action[0][-1]
|
||||
else:
|
||||
gripper_action = action[-1]
|
||||
gripper_action = action[0][-1] if isinstance(action, tuple) else action[-1]
|
||||
obs, reward, terminated, truncated, info = self.env.step(action)
|
||||
gripper_penalty = self.reward(reward, gripper_action)
|
||||
|
||||
@@ -1279,7 +1276,7 @@ def make_robot_env(cfg) -> gym.vector.VectorEnv:
|
||||
)
|
||||
if cfg.wrapper.ee_action_space_params is None and cfg.wrapper.joint_masking_action_space is not None:
|
||||
env = JointMaskingActionSpace(env=env, mask=cfg.wrapper.joint_masking_action_space)
|
||||
env = BatchCompitableWrapper(env=env)
|
||||
env = BatchCompatibleWrapper(env=env)
|
||||
|
||||
return env
|
||||
|
||||
@@ -1492,7 +1489,7 @@ def main(cfg: EnvConfig):
|
||||
alpha = 1.0
|
||||
|
||||
num_episode = 0
|
||||
sucesses = []
|
||||
successes = []
|
||||
while num_episode < 20:
|
||||
start_loop_s = time.perf_counter()
|
||||
# Sample a new random action from the robot's action space.
|
||||
@@ -1503,15 +1500,15 @@ def main(cfg: EnvConfig):
|
||||
# Execute the step: wrap the NumPy action in a torch tensor.
|
||||
obs, reward, terminated, truncated, info = env.step((torch.from_numpy(smoothed_action), False))
|
||||
if terminated or truncated:
|
||||
sucesses.append(reward)
|
||||
successes.append(reward)
|
||||
env.reset()
|
||||
num_episode += 1
|
||||
|
||||
dt_s = time.perf_counter() - start_loop_s
|
||||
busy_wait(1 / cfg.fps - dt_s)
|
||||
|
||||
logging.info(f"Success after 20 steps {sucesses}")
|
||||
logging.info(f"success rate {sum(sucesses) / len(sucesses)}")
|
||||
logging.info(f"Success after 20 steps {successes}")
|
||||
logging.info(f"success rate {sum(successes) / len(successes)}")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
|
||||
@@ -319,7 +319,7 @@ def add_actor_information_and_train(
|
||||
batch_size: int = batch_size // 2 # We will sample from both replay buffer
|
||||
|
||||
logging.info("Starting learner thread")
|
||||
interaction_message, transition = None, None
|
||||
interaction_message = None
|
||||
optimization_step = resume_optimization_step if resume_optimization_step is not None else 0
|
||||
interaction_step_shift = resume_interaction_step if resume_interaction_step is not None else 0
|
||||
|
||||
@@ -654,7 +654,7 @@ def start_learner_server(
|
||||
|
||||
shutdown_event.wait()
|
||||
logging.info("[LEARNER] Stopping gRPC server...")
|
||||
server.stop(learner_service.STUTDOWN_TIMEOUT)
|
||||
server.stop(learner_service.SHUTDOWN_TIMEOUT)
|
||||
logging.info("[LEARNER] gRPC server stopped")
|
||||
|
||||
|
||||
@@ -719,7 +719,7 @@ def save_training_checkpoint(
|
||||
# Update the "last" symlink
|
||||
update_last_checkpoint(checkpoint_dir)
|
||||
|
||||
# TODO : temporarly save replay buffer here, remove later when on the robot
|
||||
# TODO : temporary save replay buffer here, remove later when on the robot
|
||||
# We want to control this with the keyboard inputs
|
||||
dataset_dir = os.path.join(cfg.output_dir, "dataset")
|
||||
if os.path.exists(dataset_dir) and os.path.isdir(dataset_dir):
|
||||
@@ -889,7 +889,7 @@ def load_training_state(
|
||||
training_state_path = os.path.join(checkpoint_dir, TRAINING_STATE_DIR, "training_state.pt")
|
||||
interaction_step = 0
|
||||
if os.path.exists(training_state_path):
|
||||
training_state = torch.load(training_state_path, weights_only=False)
|
||||
training_state = torch.load(training_state_path, weights_only=False) # nosec B614: Safe usage of torch.load
|
||||
interaction_step = training_state.get("interaction_step", 0)
|
||||
|
||||
logging.info(f"Resuming from step {step}, interaction step {interaction_step}")
|
||||
|
||||
@@ -8,13 +8,13 @@ from lerobot.scripts.server.network_utils import receive_bytes_in_chunks, send_b
|
||||
|
||||
MAX_MESSAGE_SIZE = 4 * 1024 * 1024 # 4 MB
|
||||
MAX_WORKERS = 3 # Stream parameters, send transitions and interactions
|
||||
STUTDOWN_TIMEOUT = 10
|
||||
SHUTDOWN_TIMEOUT = 10
|
||||
|
||||
|
||||
class LearnerService(hilserl_pb2_grpc.LearnerServiceServicer):
|
||||
def __init__(
|
||||
self,
|
||||
shutdown_event: Event,
|
||||
shutdown_event: Event, # type: ignore
|
||||
parameters_queue: Queue,
|
||||
seconds_between_pushes: float,
|
||||
transition_queue: Queue,
|
||||
@@ -26,7 +26,7 @@ class LearnerService(hilserl_pb2_grpc.LearnerServiceServicer):
|
||||
self.transition_queue = transition_queue
|
||||
self.interaction_message_queue = interaction_message_queue
|
||||
|
||||
def StreamParameters(self, request, context):
|
||||
def StreamParameters(self, request, context): # noqa: N802
|
||||
# TODO: authorize the request
|
||||
logging.info("[LEARNER] Received request to stream parameters from the Actor")
|
||||
|
||||
@@ -48,7 +48,7 @@ class LearnerService(hilserl_pb2_grpc.LearnerServiceServicer):
|
||||
logging.info("[LEARNER] Stream parameters finished")
|
||||
return hilserl_pb2.Empty()
|
||||
|
||||
def SendTransitions(self, request_iterator, _context):
|
||||
def SendTransitions(self, request_iterator, _context): # noqa: N802
|
||||
# TODO: authorize the request
|
||||
logging.info("[LEARNER] Received request to receive transitions from the Actor")
|
||||
|
||||
@@ -62,7 +62,7 @@ class LearnerService(hilserl_pb2_grpc.LearnerServiceServicer):
|
||||
logging.debug("[LEARNER] Finished receiving transitions")
|
||||
return hilserl_pb2.Empty()
|
||||
|
||||
def SendInteractions(self, request_iterator, _context):
|
||||
def SendInteractions(self, request_iterator, _context): # noqa: N802
|
||||
# TODO: authorize the request
|
||||
logging.info("[LEARNER] Received request to receive interactions from the Actor")
|
||||
|
||||
@@ -76,5 +76,5 @@ class LearnerService(hilserl_pb2_grpc.LearnerServiceServicer):
|
||||
logging.debug("[LEARNER] Finished receiving interactions")
|
||||
return hilserl_pb2.Empty()
|
||||
|
||||
def Ready(self, request, context):
|
||||
def Ready(self, request, context): # noqa: N802
|
||||
return hilserl_pb2.Empty()
|
||||
|
||||
@@ -219,50 +219,3 @@ def make_maniskill(
|
||||
env = ManiskillMockGripperWrapper(env, nb_discrete_actions=3)
|
||||
|
||||
return env
|
||||
|
||||
|
||||
# @parser.wrap()
|
||||
# def main(cfg: TrainPipelineConfig):
|
||||
# """Main function to run the ManiSkill environment."""
|
||||
# # Create the ManiSkill environment
|
||||
# env = make_maniskill(cfg.env, n_envs=1)
|
||||
|
||||
# # Reset the environment
|
||||
# obs, info = env.reset()
|
||||
|
||||
# # Run a simple interaction loop
|
||||
# sum_reward = 0
|
||||
# for i in range(100):
|
||||
# # Sample a random action
|
||||
# action = env.action_space.sample()
|
||||
|
||||
# # Step the environment
|
||||
# start_time = time.perf_counter()
|
||||
# obs, reward, terminated, truncated, info = env.step(action)
|
||||
# step_time = time.perf_counter() - start_time
|
||||
# sum_reward += reward
|
||||
# # Log information
|
||||
|
||||
# # Reset if episode terminated
|
||||
# if terminated or truncated:
|
||||
# logging.info(f"Step {i}, reward: {sum_reward}, step time: {step_time}s")
|
||||
# sum_reward = 0
|
||||
# obs, info = env.reset()
|
||||
|
||||
# # Close the environment
|
||||
# env.close()
|
||||
|
||||
|
||||
# if __name__ == "__main__":
|
||||
# logging.basicConfig(level=logging.INFO)
|
||||
# main()
|
||||
|
||||
if __name__ == "__main__":
|
||||
import draccus
|
||||
|
||||
config = ManiskillEnvConfig()
|
||||
draccus.set_config_type("json")
|
||||
draccus.dump(
|
||||
config=config,
|
||||
stream=open(file="run_config.json", mode="w"),
|
||||
)
|
||||
|
||||
Reference in New Issue
Block a user