diff --git a/docs/source/hilserl.mdx b/docs/source/hilserl.mdx
index f6bac1ffa..08301556f 100644
--- a/docs/source/hilserl.mdx
+++ b/docs/source/hilserl.mdx
@@ -518,7 +518,7 @@ During the online training, press `space` to take over the policy and `space` ag
 Start the recording process, an example of the config file can be found [here](https://huggingface.co/datasets/aractingi/lerobot-example-config-files/blob/main/env_config_so100.json):
 
 ```bash
-python -m lerobot.scripts.rl.gym_manipulator --config_path src/lerobot/configs/env_config_so100.json
+python -m lerobot.rl.gym_manipulator --config_path src/lerobot/configs/env_config_so100.json
 ```
 
 During recording:
@@ -549,7 +549,7 @@ Note: If you already know the crop parameters, you can skip this step and just s
 Use the `crop_dataset_roi.py` script to interactively select regions of interest in your camera images:
 
 ```bash
-python -m lerobot.scripts.rl.crop_dataset_roi --repo-id username/pick_lift_cube
+python -m lerobot.rl.crop_dataset_roi --repo-id username/pick_lift_cube
 ```
 
 1. For each camera view, the script will display the first frame
@@ -618,7 +618,7 @@ Before training, you need to collect a dataset with labeled examples. The `recor
 To collect a dataset, you need to modify some parameters in the environment configuration based on HILSerlRobotEnvConfig.
 
 ```bash
-python -m lerobot.scripts.rl.gym_manipulator --config_path src/lerobot/configs/reward_classifier_train_config.json
+python -m lerobot.rl.gym_manipulator --config_path src/lerobot/configs/reward_classifier_train_config.json
 ```
 
 **Key Parameters for Data Collection**
@@ -764,7 +764,7 @@ or set the argument in the json config file.
 Run `gym_manipulator.py` to test the model.
 
 ```bash
-python -m lerobot.scripts.rl.gym_manipulator --config_path path/to/env_config.json
+python -m lerobot.rl.gym_manipulator --config_path path/to/env_config.json
 ```
 
 The reward classifier will automatically provide rewards based on the visual input from the robot's cameras.
@@ -777,7 +777,7 @@ The reward classifier will automatically provide rewards based on the visual inp
 2. **Collect a dataset**:
 
    ```bash
-   python -m lerobot.scripts.rl.gym_manipulator --config_path src/lerobot/configs/env_config.json
+   python -m lerobot.rl.gym_manipulator --config_path src/lerobot/configs/env_config.json
    ```
 
 3. **Train the classifier**:
@@ -788,7 +788,7 @@ The reward classifier will automatically provide rewards based on the visual inp
 
 4. **Test the classifier**:
    ```bash
-   python -m lerobot.scripts.rl.gym_manipulator --config_path src/lerobot/configs/env_config.json
+   python -m lerobot.rl.gym_manipulator --config_path src/lerobot/configs/env_config.json
    ```
 
 ### Training with Actor-Learner
@@ -810,7 +810,7 @@ Create a training configuration file (example available [here](https://huggingfa
 First, start the learner server process:
 
 ```bash
-python -m lerobot.scripts.rl.learner --config_path src/lerobot/configs/train_config_hilserl_so100.json
+python -m lerobot.rl.learner --config_path src/lerobot/configs/train_config_hilserl_so100.json
 ```
 
 The learner:
@@ -825,7 +825,7 @@ The learner:
 In a separate terminal, start the actor process with the same configuration:
 
 ```bash
-python -m lerobot.scripts.rl.actor --config_path src/lerobot/configs/train_config_hilserl_so100.json
+python -m lerobot.rl.actor --config_path src/lerobot/configs/train_config_hilserl_so100.json
 ```
 
 The actor:
diff --git a/docs/source/hilserl_sim.mdx b/docs/source/hilserl_sim.mdx
index 77191fde3..e2dddd9ed 100644
--- a/docs/source/hilserl_sim.mdx
+++ b/docs/source/hilserl_sim.mdx
@@ -91,7 +91,7 @@ Important parameters:
 To run the environment, set mode to null:
 
 ```bash
-python -m lerobot.scripts.rl.gym_manipulator --config_path path/to/gym_hil_env.json
+python -m lerobot.rl.gym_manipulator --config_path path/to/gym_hil_env.json
 ```
 
 ### Recording a Dataset
@@ -118,7 +118,7 @@ To collect a dataset, set the mode to `record` whilst defining the repo_id and n
 ```
 
 ```bash
-python -m lerobot.scripts.rl.gym_manipulator --config_path path/to/gym_hil_env.json
+python -m lerobot.rl.gym_manipulator --config_path path/to/gym_hil_env.json
 ```
 
 ### Training a Policy
@@ -126,13 +126,13 @@ python -m lerobot.scripts.rl.gym_manipulator --config_path path/to/gym_hil_env.j
 To train a policy, checkout the configuration example available [here](https://huggingface.co/datasets/lerobot/config_examples/resolve/main/rl/gym_hil/train_config.json) and run the actor and learner servers:
 
 ```bash
-python -m lerobot.scripts.rl.actor --config_path path/to/train_gym_hil_env.json
+python -m lerobot.rl.actor --config_path path/to/train_gym_hil_env.json
 ```
 
 In a different terminal, run the learner server:
 
 ```bash
-python -m lerobot.scripts.rl.learner --config_path path/to/train_gym_hil_env.json
+python -m lerobot.rl.learner --config_path path/to/train_gym_hil_env.json
 ```
 
 The simulation environment provides a safe and repeatable way to develop and test your Human-In-the-Loop reinforcement learning components before deploying to real robots.
diff --git a/docs/source/il_sim.mdx b/docs/source/il_sim.mdx
index 6a615620b..9b7d7c111 100644
--- a/docs/source/il_sim.mdx
+++ b/docs/source/il_sim.mdx
@@ -61,14 +61,14 @@ Then we can run this command to start:
 <hfoption id="Linux">
 
 ```bash
-python -m lerobot.scripts.rl.gym_manipulator --config_path path/to/env_config_gym_hil_il.json
+python -m lerobot.rl.gym_manipulator --config_path path/to/env_config_gym_hil_il.json
 ```
 
 </hfoption>
 <hfoption id="MacOS">
 
 ```bash
-mjpython -m lerobot.scripts.rl.gym_manipulator --config_path path/to/env_config_gym_hil_il.json
+mjpython -m lerobot.rl.gym_manipulator --config_path path/to/env_config_gym_hil_il.json
 ```
 
 </hfoption>
@@ -198,14 +198,14 @@ Then you can run this command to visualize your trained policy
 <hfoption id="Linux">
 
 ```bash
-python -m lerobot.scripts.rl.eval_policy --config_path=path/to/eval_config_gym_hil.json
+python -m lerobot.rl.eval_policy --config_path=path/to/eval_config_gym_hil.json
 ```
 
 </hfoption>
 <hfoption id="MacOS">
 
 ```bash
-mjpython -m lerobot.scripts.rl.eval_policy --config_path=path/to/eval_config_gym_hil.json
+mjpython -m lerobot.rl.eval_policy --config_path=path/to/eval_config_gym_hil.json
 ```
 
 </hfoption>
diff --git a/src/lerobot/scripts/rl/actor.py b/src/lerobot/rl/actor.py
similarity index 99%
rename from src/lerobot/scripts/rl/actor.py
rename to src/lerobot/rl/actor.py
index baa284c4a..d1e709253 100644
--- a/src/lerobot/scripts/rl/actor.py
+++ b/src/lerobot/rl/actor.py
@@ -24,7 +24,7 @@ Examples of usage:
 
 - Start an actor server for real robot training with human-in-the-loop intervention:
 ```bash
-python -m lerobot.scripts.rl.actor --config_path src/lerobot/configs/train_config_hilserl_so100.json
+python -m lerobot.rl.actor --config_path src/lerobot/configs/train_config_hilserl_so100.json
 ```
 
 **NOTE**: The actor server requires a running learner server to connect to. Ensure the learner
@@ -64,12 +64,6 @@ from lerobot.policies.factory import make_policy
 from lerobot.policies.sac.modeling_sac import SACPolicy
 from lerobot.processor import TransitionKey
 from lerobot.robots import so100_follower  # noqa: F401
-from lerobot.scripts.rl.gym_manipulator import (
-    create_transition,
-    make_processors,
-    make_robot_env,
-    step_env_and_process_transition,
-)
 from lerobot.teleoperators import gamepad, so101_leader  # noqa: F401
 from lerobot.teleoperators.utils import TeleopEvents
 from lerobot.transport import services_pb2, services_pb2_grpc
@@ -96,6 +90,13 @@ from lerobot.utils.utils import (
     init_logging,
 )
 
+from .gym_manipulator import (
+    create_transition,
+    make_processors,
+    make_robot_env,
+    step_env_and_process_transition,
+)
+
 ACTOR_SHUTDOWN_TIMEOUT = 30
 
 # Main entry point
diff --git a/src/lerobot/scripts/rl/crop_dataset_roi.py b/src/lerobot/rl/crop_dataset_roi.py
similarity index 100%
rename from src/lerobot/scripts/rl/crop_dataset_roi.py
rename to src/lerobot/rl/crop_dataset_roi.py
diff --git a/src/lerobot/scripts/rl/eval_policy.py b/src/lerobot/rl/eval_policy.py
similarity index 97%
rename from src/lerobot/scripts/rl/eval_policy.py
rename to src/lerobot/rl/eval_policy.py
index aa97483b6..7cec66800 100644
--- a/src/lerobot/scripts/rl/eval_policy.py
+++ b/src/lerobot/rl/eval_policy.py
@@ -25,12 +25,13 @@ from lerobot.robots import (  # noqa: F401
     make_robot_from_config,
     so100_follower,
 )
-from lerobot.scripts.rl.gym_manipulator import make_robot_env
 from lerobot.teleoperators import (
     gamepad,  # noqa: F401
     so101_leader,  # noqa: F401
 )
 
+from .gym_manipulator import make_robot_env
+
 logging.basicConfig(level=logging.INFO)
 
 
diff --git a/src/lerobot/scripts/rl/gym_manipulator.py b/src/lerobot/rl/gym_manipulator.py
similarity index 100%
rename from src/lerobot/scripts/rl/gym_manipulator.py
rename to src/lerobot/rl/gym_manipulator.py
diff --git a/src/lerobot/scripts/rl/learner.py b/src/lerobot/rl/learner.py
similarity index 99%
rename from src/lerobot/scripts/rl/learner.py
rename to src/lerobot/rl/learner.py
index 5d9953827..6441ba55f 100644
--- a/src/lerobot/scripts/rl/learner.py
+++ b/src/lerobot/rl/learner.py
@@ -25,7 +25,7 @@ Examples of usage:
 
 - Start a learner server for training:
 ```bash
-python -m lerobot.scripts.rl.learner --config_path src/lerobot/configs/train_config_hilserl_so100.json
+python -m lerobot.rl.learner --config_path src/lerobot/configs/train_config_hilserl_so100.json
 ```
 
 **NOTE**: Start the learner server before launching the actor server. The learner opens a gRPC server
@@ -73,7 +73,6 @@ from lerobot.datasets.lerobot_dataset import LeRobotDataset
 from lerobot.policies.factory import make_policy
 from lerobot.policies.sac.modeling_sac import SACPolicy
 from lerobot.robots import so100_follower  # noqa: F401
-from lerobot.scripts.rl import learner_service
 from lerobot.teleoperators import gamepad, so101_leader  # noqa: F401
 from lerobot.teleoperators.utils import TeleopEvents
 from lerobot.transport import services_pb2_grpc
@@ -100,6 +99,8 @@ from lerobot.utils.utils import (
 )
 from lerobot.utils.wandb_utils import WandBLogger
 
+from .learner_service import MAX_WORKERS, SHUTDOWN_TIMEOUT, LearnerService
+
 LOG_PREFIX = "[LEARNER]"
 
 
@@ -639,7 +640,7 @@ def start_learner(
         # TODO: Check if its useful
         _ = ProcessSignalHandler(False, display_pid=True)
 
-    service = learner_service.LearnerService(
+    service = LearnerService(
         shutdown_event=shutdown_event,
         parameters_queue=parameters_queue,
         seconds_between_pushes=cfg.policy.actor_learner_config.policy_parameters_push_frequency,
@@ -649,7 +650,7 @@ def start_learner(
     )
 
     server = grpc.server(
-        ThreadPoolExecutor(max_workers=learner_service.MAX_WORKERS),
+        ThreadPoolExecutor(max_workers=MAX_WORKERS),
         options=[
             ("grpc.max_receive_message_length", MAX_MESSAGE_SIZE),
             ("grpc.max_send_message_length", MAX_MESSAGE_SIZE),
@@ -670,7 +671,7 @@ def start_learner(
 
     shutdown_event.wait()
     logging.info("[LEARNER] Stopping gRPC server...")
-    server.stop(learner_service.SHUTDOWN_TIMEOUT)
+    server.stop(SHUTDOWN_TIMEOUT)
     logging.info("[LEARNER] gRPC server stopped")
 
 
diff --git a/src/lerobot/scripts/rl/learner_service.py b/src/lerobot/rl/learner_service.py
similarity index 100%
rename from src/lerobot/scripts/rl/learner_service.py
rename to src/lerobot/rl/learner_service.py
diff --git a/tests/rl/test_actor.py b/tests/rl/test_actor.py
index f078b4602..aa9913bb2 100644
--- a/tests/rl/test_actor.py
+++ b/tests/rl/test_actor.py
@@ -65,7 +65,7 @@ def close_service_stub(channel, server):
 
 @require_package("grpc")
 def test_establish_learner_connection_success():
-    from lerobot.scripts.rl.actor import establish_learner_connection
+    from lerobot.rl.actor import establish_learner_connection
 
     """Test successful connection establishment."""
     stub, _servicer, channel, server = create_learner_service_stub()
@@ -82,7 +82,7 @@ def test_establish_learner_connection_success():
 
 @require_package("grpc")
 def test_establish_learner_connection_failure():
-    from lerobot.scripts.rl.actor import establish_learner_connection
+    from lerobot.rl.actor import establish_learner_connection
 
     """Test connection failure."""
     stub, servicer, channel, server = create_learner_service_stub()
@@ -101,7 +101,7 @@ def test_establish_learner_connection_failure():
 
 @require_package("grpc")
 def test_push_transitions_to_transport_queue():
-    from lerobot.scripts.rl.actor import push_transitions_to_transport_queue
+    from lerobot.rl.actor import push_transitions_to_transport_queue
     from lerobot.transport.utils import bytes_to_transitions
     from tests.transport.test_transport_utils import assert_transitions_equal
 
@@ -137,7 +137,7 @@ def test_push_transitions_to_transport_queue():
 @require_package("grpc")
 @pytest.mark.timeout(3)  # force cross-platform watchdog
 def test_transitions_stream():
-    from lerobot.scripts.rl.actor import transitions_stream
+    from lerobot.rl.actor import transitions_stream
 
     """Test transitions stream functionality."""
     shutdown_event = Event()
@@ -169,7 +169,7 @@ def test_transitions_stream():
 @require_package("grpc")
 @pytest.mark.timeout(3)  # force cross-platform watchdog
 def test_interactions_stream():
-    from lerobot.scripts.rl.actor import interactions_stream
+    from lerobot.rl.actor import interactions_stream
     from lerobot.transport.utils import bytes_to_python_object, python_object_to_bytes
 
     """Test interactions stream functionality."""
diff --git a/tests/rl/test_actor_learner.py b/tests/rl/test_actor_learner.py
index b2a7a5d5f..43a6b0957 100644
--- a/tests/rl/test_actor_learner.py
+++ b/tests/rl/test_actor_learner.py
@@ -90,13 +90,13 @@ def cfg():
 @require_package("grpc")
 @pytest.mark.timeout(10)  # force cross-platform watchdog
 def test_end_to_end_transitions_flow(cfg):
-    from lerobot.scripts.rl.actor import (
+    from lerobot.rl.actor import (
         establish_learner_connection,
         learner_service_client,
         push_transitions_to_transport_queue,
         send_transitions,
     )
-    from lerobot.scripts.rl.learner import start_learner
+    from lerobot.rl.learner import start_learner
     from lerobot.transport.utils import bytes_to_transitions
     from tests.transport.test_transport_utils import assert_transitions_equal
 
@@ -152,12 +152,12 @@ def test_end_to_end_transitions_flow(cfg):
 @require_package("grpc")
 @pytest.mark.timeout(10)
 def test_end_to_end_interactions_flow(cfg):
-    from lerobot.scripts.rl.actor import (
+    from lerobot.rl.actor import (
         establish_learner_connection,
         learner_service_client,
         send_interactions,
     )
-    from lerobot.scripts.rl.learner import start_learner
+    from lerobot.rl.learner import start_learner
     from lerobot.transport.utils import bytes_to_python_object, python_object_to_bytes
 
     """Test complete interactions flow from actor to learner."""
@@ -226,8 +226,8 @@ def test_end_to_end_interactions_flow(cfg):
 @pytest.mark.parametrize("data_size", ["small", "large"])
 @pytest.mark.timeout(10)
 def test_end_to_end_parameters_flow(cfg, data_size):
-    from lerobot.scripts.rl.actor import establish_learner_connection, learner_service_client, receive_policy
-    from lerobot.scripts.rl.learner import start_learner
+    from lerobot.rl.actor import establish_learner_connection, learner_service_client, receive_policy
+    from lerobot.rl.learner import start_learner
     from lerobot.transport.utils import bytes_to_state_dict, state_to_bytes
 
     """Test complete parameter flow from learner to actor, with small and large data."""
diff --git a/tests/rl/test_learner_service.py b/tests/rl/test_learner_service.py
index f5e1e8d48..b0e61165a 100644
--- a/tests/rl/test_learner_service.py
+++ b/tests/rl/test_learner_service.py
@@ -50,7 +50,7 @@ def create_learner_service_stub(
 ):
     import grpc
 
-    from lerobot.scripts.rl.learner_service import LearnerService
+    from lerobot.rl.learner_service import LearnerService
     from lerobot.transport import services_pb2_grpc  # generated from .proto
 
     """Fixture to start a LearnerService gRPC server and provide a connected stub."""