Fix linter issue

This commit is contained in:
AdilZouitine
2025-04-22 10:37:08 +02:00
parent 0030ff3f74
commit c5845ee203
14 changed files with 64 additions and 1667 deletions

View File

@@ -224,7 +224,7 @@ def act_with_policy(
logging.info("make_policy")
### Instantiate the policy in both the actor and learner processes
### To avoid sending a SACPolicy object through the port, we create a policy intance
### To avoid sending a SACPolicy object through the port, we create a policy instance
### on both sides, the learner sends the updated parameters every n steps to update the actor's parameters
# TODO: At some point we should just need make sac policy
policy: SACPolicy = make_policy(
@@ -278,7 +278,7 @@ def act_with_policy(
# Increment total steps counter for intervention rate
episode_total_steps += 1
# NOTE: We overide the action if the intervention is True, because the action applied is the intervention action
# NOTE: We override the action if the intervention is True, because the action applied is the intervention action
if "is_intervention" in info and info["is_intervention"]:
# TODO: Check the shape
# NOTE: The action space for demonstration before hand is with the full action space

View File

@@ -269,7 +269,7 @@ if __name__ == "__main__":
new_repo_id = args.repo_id + "_cropped_resized"
new_dataset_root = Path(str(dataset.root) + "_cropped_resized")
croped_resized_dataset = convert_lerobot_dataset_to_cropper_lerobot_dataset(
cropped_resized_dataset = convert_lerobot_dataset_to_cropper_lerobot_dataset(
original_dataset=dataset,
crop_params_dict=rois,
new_repo_id=new_repo_id,

View File

@@ -262,7 +262,7 @@ class GamepadController(InputController):
elif event.button == 6:
self.close_gripper_command = True
# LT button (7) for openning gripper
# LT button (7) for opening gripper
elif event.button == 7:
self.open_gripper_command = True
@@ -421,45 +421,44 @@ class GamepadControllerHID(InputController):
try:
# Read data from the gamepad
data = self.device.read(64)
if data:
# Interpret gamepad data - this will vary by controller model
# These offsets are for the Logitech RumblePad 2
if len(data) >= 8:
# Normalize joystick values from 0-255 to -1.0-1.0
self.left_x = (data[1] - 128) / 128.0
self.left_y = (data[2] - 128) / 128.0
self.right_x = (data[3] - 128) / 128.0
self.right_y = (data[4] - 128) / 128.0
# Interpret gamepad data - this will vary by controller model
# These offsets are for the Logitech RumblePad 2
if data and len(data) >= 8:
# Normalize joystick values from 0-255 to -1.0-1.0
self.left_x = (data[1] - 128) / 128.0
self.left_y = (data[2] - 128) / 128.0
self.right_x = (data[3] - 128) / 128.0
self.right_y = (data[4] - 128) / 128.0
# Apply deadzone
self.left_x = 0 if abs(self.left_x) < self.deadzone else self.left_x
self.left_y = 0 if abs(self.left_y) < self.deadzone else self.left_y
self.right_x = 0 if abs(self.right_x) < self.deadzone else self.right_x
self.right_y = 0 if abs(self.right_y) < self.deadzone else self.right_y
# Apply deadzone
self.left_x = 0 if abs(self.left_x) < self.deadzone else self.left_x
self.left_y = 0 if abs(self.left_y) < self.deadzone else self.left_y
self.right_x = 0 if abs(self.right_x) < self.deadzone else self.right_x
self.right_y = 0 if abs(self.right_y) < self.deadzone else self.right_y
# Parse button states (byte 5 in the Logitech RumblePad 2)
buttons = data[5]
# Parse button states (byte 5 in the Logitech RumblePad 2)
buttons = data[5]
# Check if RB is pressed then the intervention flag should be set
self.intervention_flag = data[6] in [2, 6, 10, 14]
# Check if RB is pressed then the intervention flag should be set
self.intervention_flag = data[6] in [2, 6, 10, 14]
# Check if RT is pressed
self.open_gripper_command = data[6] in [8, 10, 12]
# Check if RT is pressed
self.open_gripper_command = data[6] in [8, 10, 12]
# Check if LT is pressed
self.close_gripper_command = data[6] in [4, 6, 12]
# Check if LT is pressed
self.close_gripper_command = data[6] in [4, 6, 12]
# Check if Y/Triangle button (bit 7) is pressed for saving
# Check if X/Square button (bit 5) is pressed for failure
# Check if A/Cross button (bit 4) is pressed for rerecording
if buttons & 1 << 7:
self.episode_end_status = "success"
elif buttons & 1 << 5:
self.episode_end_status = "failure"
elif buttons & 1 << 4:
self.episode_end_status = "rerecord_episode"
else:
self.episode_end_status = None
# Check if Y/Triangle button (bit 7) is pressed for saving
# Check if X/Square button (bit 5) is pressed for failure
# Check if A/Cross button (bit 4) is pressed for rerecording
if buttons & 1 << 7:
self.episode_end_status = "success"
elif buttons & 1 << 5:
self.episode_end_status = "failure"
elif buttons & 1 << 4:
self.episode_end_status = "rerecord_episode"
else:
self.episode_end_status = None
except OSError as e:
logging.error(f"Error reading from gamepad: {e}")
@@ -618,7 +617,7 @@ def teleoperate_delta_inverse_kinematics(robot, controller, fps=10, bounds=None,
# Process input events
controller.update()
# Get currrent robot state
# Get current robot state
joint_positions = robot.follower_arms["main"].read("Present_Position")
current_ee_pos = kinematics.fk_gripper_tip(joint_positions)
@@ -635,7 +634,7 @@ def teleoperate_delta_inverse_kinematics(robot, controller, fps=10, bounds=None,
desired_ee_pos[:3, 3] = np.clip(desired_ee_pos[:3, 3], bounds["min"], bounds["max"])
# Only send commands if there's actual movement
if any([abs(v) > 0.001 for v in [delta_x, delta_y, delta_z]]):
if any(abs(v) > 0.001 for v in [delta_x, delta_y, delta_z]):
# Compute joint targets via inverse kinematics
target_joint_state = kinematics.ik(joint_positions, desired_ee_pos, position_only=True)
@@ -678,7 +677,7 @@ def teleoperate_gym_env(env, controller, fps: int = 30):
action = np.array([delta_x, delta_y, delta_z])
# Skip if no movement
if any([abs(v) > 0.001 for v in [delta_x, delta_y, delta_z]]):
if any(abs(v) > 0.001 for v in [delta_x, delta_y, delta_z]):
# Step the environment - pass action as a tensor with intervention flag
action_tensor = torch.from_numpy(action.astype(np.float32))
obs, reward, terminated, truncated, info = env.step((action_tensor, False))

View File

@@ -106,7 +106,7 @@ class HILSerlRobotEnv(gym.Env):
- The action space is defined as a Tuple where:
• The first element is a Box space representing joint position commands. It is defined as relative (delta)
or absolute, based on the configuration.
• ThE SECONd element is a Discrete space (with 2 values) serving as a flag for intervention (teleoperation).
• The second element is a Discrete space (with 2 values) serving as a flag for intervention (teleoperation).
"""
example_obs = self.robot.capture_observation()
@@ -384,7 +384,7 @@ class ActionRepeatWrapper(gym.Wrapper):
class RewardWrapper(gym.Wrapper):
def __init__(self, env, reward_classifier, device: torch.device = "cuda"):
"""
Wrapper to add reward prediction to the environment, it use a trained classifer.
Wrapper to add reward prediction to the environment, it use a trained classifier.
cfg.
env: The environment to wrap
@@ -414,7 +414,7 @@ class RewardWrapper(gym.Wrapper):
if self.reward_classifier is not None
else 0.0
)
info["Reward classifer frequency"] = 1 / (time.perf_counter() - start_time)
info["Reward classifier frequency"] = 1 / (time.perf_counter() - start_time)
if success == 1.0:
terminated = True
@@ -784,11 +784,11 @@ class ResetWrapper(gym.Wrapper):
while time.perf_counter() - start_time < self.reset_time_s:
self.robot.teleop_step()
log_say("Manual reseting of the environment done.", play_sounds=True)
log_say("Manual reset of the environment done.", play_sounds=True)
return super().reset(seed=seed, options=options)
class BatchCompitableWrapper(gym.ObservationWrapper):
class BatchCompatibleWrapper(gym.ObservationWrapper):
def __init__(self, env):
super().__init__(env)
@@ -823,10 +823,7 @@ class GripperPenaltyWrapper(gym.RewardWrapper):
def step(self, action):
self.last_gripper_state = self.unwrapped.robot.follower_arms["main"].read("Present_Position")[-1]
if isinstance(action, tuple):
gripper_action = action[0][-1]
else:
gripper_action = action[-1]
gripper_action = action[0][-1] if isinstance(action, tuple) else action[-1]
obs, reward, terminated, truncated, info = self.env.step(action)
gripper_penalty = self.reward(reward, gripper_action)
@@ -1279,7 +1276,7 @@ def make_robot_env(cfg) -> gym.vector.VectorEnv:
)
if cfg.wrapper.ee_action_space_params is None and cfg.wrapper.joint_masking_action_space is not None:
env = JointMaskingActionSpace(env=env, mask=cfg.wrapper.joint_masking_action_space)
env = BatchCompitableWrapper(env=env)
env = BatchCompatibleWrapper(env=env)
return env
@@ -1492,7 +1489,7 @@ def main(cfg: EnvConfig):
alpha = 1.0
num_episode = 0
sucesses = []
successes = []
while num_episode < 20:
start_loop_s = time.perf_counter()
# Sample a new random action from the robot's action space.
@@ -1503,15 +1500,15 @@ def main(cfg: EnvConfig):
# Execute the step: wrap the NumPy action in a torch tensor.
obs, reward, terminated, truncated, info = env.step((torch.from_numpy(smoothed_action), False))
if terminated or truncated:
sucesses.append(reward)
successes.append(reward)
env.reset()
num_episode += 1
dt_s = time.perf_counter() - start_loop_s
busy_wait(1 / cfg.fps - dt_s)
logging.info(f"Success after 20 steps {sucesses}")
logging.info(f"success rate {sum(sucesses) / len(sucesses)}")
logging.info(f"Success after 20 steps {successes}")
logging.info(f"success rate {sum(successes) / len(successes)}")
if __name__ == "__main__":

View File

@@ -319,7 +319,7 @@ def add_actor_information_and_train(
batch_size: int = batch_size // 2 # We will sample from both replay buffer
logging.info("Starting learner thread")
interaction_message, transition = None, None
interaction_message = None
optimization_step = resume_optimization_step if resume_optimization_step is not None else 0
interaction_step_shift = resume_interaction_step if resume_interaction_step is not None else 0
@@ -654,7 +654,7 @@ def start_learner_server(
shutdown_event.wait()
logging.info("[LEARNER] Stopping gRPC server...")
server.stop(learner_service.STUTDOWN_TIMEOUT)
server.stop(learner_service.SHUTDOWN_TIMEOUT)
logging.info("[LEARNER] gRPC server stopped")
@@ -719,7 +719,7 @@ def save_training_checkpoint(
# Update the "last" symlink
update_last_checkpoint(checkpoint_dir)
# TODO : temporarly save replay buffer here, remove later when on the robot
# TODO : temporary save replay buffer here, remove later when on the robot
# We want to control this with the keyboard inputs
dataset_dir = os.path.join(cfg.output_dir, "dataset")
if os.path.exists(dataset_dir) and os.path.isdir(dataset_dir):
@@ -889,7 +889,7 @@ def load_training_state(
training_state_path = os.path.join(checkpoint_dir, TRAINING_STATE_DIR, "training_state.pt")
interaction_step = 0
if os.path.exists(training_state_path):
training_state = torch.load(training_state_path, weights_only=False)
training_state = torch.load(training_state_path, weights_only=False) # nosec B614: Safe usage of torch.load
interaction_step = training_state.get("interaction_step", 0)
logging.info(f"Resuming from step {step}, interaction step {interaction_step}")

View File

@@ -8,13 +8,13 @@ from lerobot.scripts.server.network_utils import receive_bytes_in_chunks, send_b
MAX_MESSAGE_SIZE = 4 * 1024 * 1024 # 4 MB
MAX_WORKERS = 3 # Stream parameters, send transitions and interactions
STUTDOWN_TIMEOUT = 10
SHUTDOWN_TIMEOUT = 10
class LearnerService(hilserl_pb2_grpc.LearnerServiceServicer):
def __init__(
self,
shutdown_event: Event,
shutdown_event: Event, # type: ignore
parameters_queue: Queue,
seconds_between_pushes: float,
transition_queue: Queue,
@@ -26,7 +26,7 @@ class LearnerService(hilserl_pb2_grpc.LearnerServiceServicer):
self.transition_queue = transition_queue
self.interaction_message_queue = interaction_message_queue
def StreamParameters(self, request, context):
def StreamParameters(self, request, context): # noqa: N802
# TODO: authorize the request
logging.info("[LEARNER] Received request to stream parameters from the Actor")
@@ -48,7 +48,7 @@ class LearnerService(hilserl_pb2_grpc.LearnerServiceServicer):
logging.info("[LEARNER] Stream parameters finished")
return hilserl_pb2.Empty()
def SendTransitions(self, request_iterator, _context):
def SendTransitions(self, request_iterator, _context): # noqa: N802
# TODO: authorize the request
logging.info("[LEARNER] Received request to receive transitions from the Actor")
@@ -62,7 +62,7 @@ class LearnerService(hilserl_pb2_grpc.LearnerServiceServicer):
logging.debug("[LEARNER] Finished receiving transitions")
return hilserl_pb2.Empty()
def SendInteractions(self, request_iterator, _context):
def SendInteractions(self, request_iterator, _context): # noqa: N802
# TODO: authorize the request
logging.info("[LEARNER] Received request to receive interactions from the Actor")
@@ -76,5 +76,5 @@ class LearnerService(hilserl_pb2_grpc.LearnerServiceServicer):
logging.debug("[LEARNER] Finished receiving interactions")
return hilserl_pb2.Empty()
def Ready(self, request, context):
def Ready(self, request, context): # noqa: N802
return hilserl_pb2.Empty()

View File

@@ -219,50 +219,3 @@ def make_maniskill(
env = ManiskillMockGripperWrapper(env, nb_discrete_actions=3)
return env
# @parser.wrap()
# def main(cfg: TrainPipelineConfig):
# """Main function to run the ManiSkill environment."""
# # Create the ManiSkill environment
# env = make_maniskill(cfg.env, n_envs=1)
# # Reset the environment
# obs, info = env.reset()
# # Run a simple interaction loop
# sum_reward = 0
# for i in range(100):
# # Sample a random action
# action = env.action_space.sample()
# # Step the environment
# start_time = time.perf_counter()
# obs, reward, terminated, truncated, info = env.step(action)
# step_time = time.perf_counter() - start_time
# sum_reward += reward
# # Log information
# # Reset if episode terminated
# if terminated or truncated:
# logging.info(f"Step {i}, reward: {sum_reward}, step time: {step_time}s")
# sum_reward = 0
# obs, info = env.reset()
# # Close the environment
# env.close()
# if __name__ == "__main__":
# logging.basicConfig(level=logging.INFO)
# main()
if __name__ == "__main__":
import draccus
config = ManiskillEnvConfig()
draccus.set_config_type("json")
draccus.dump(
config=config,
stream=open(file="run_config.json", mode="w"),
)