Initial commit

2024-12-23 13:38:06 -08:00
commit 385780ecc3
121 changed files with 15572 additions and 0 deletions
--- a/examples/calvin/Dockerfile
+++ b/examples/calvin/Dockerfile
@@ -0,0 +1,65 @@
+# THIS DOCKERFILE DOES NOT YET WORK
+# Dockerfile for the CALVIN benchmark.
+
+# Build the container:
+# docker build . -t calvin -f examples/calvin/Dockerfile
+
+# Run the container:
+# docker run --rm -it --network=host -v .:/app --privileged --gpus all calvin /bin/bash
+
+FROM nvidia/cuda:12.2.2-cudnn8-runtime-ubuntu22.04@sha256:2d913b09e6be8387e1a10976933642c73c840c0b735f0bf3c28d97fc9bc422e0
+SHELL ["/bin/bash", "-c"]
+
+ENV DEBIAN_FRONTEND=noninteractive
+RUN apt-get update && \
+    apt-get install -y --no-install-recommends \
+    make \
+    g++ \
+    git \
+    wget \
+    libosmesa6-dev \
+    libgl1-mesa-glx \
+    libglew-dev \
+    libglfw3-dev \
+    libgles2-mesa-dev \
+    libglib2.0-0 \
+    libsm6 \
+    libxrender1 \
+    libxext6 \
+    unzip \
+    ffmpeg
+
+# Install miniconda
+ENV CONDA_DIR=/opt/conda
+RUN wget --quiet https://repo.anaconda.com/miniconda/Miniconda3-latest-Linux-x86_64.sh -O ~/miniconda.sh && \
+    /bin/bash ~/miniconda.sh -b -p $CONDA_DIR
+ENV PATH=$CONDA_DIR/bin:$PATH
+
+# Submodules don't work with calvin because it internally parses git metadata.
+# So we have to clone it directly.
+RUN git clone --recurse-submodules https://github.com/mees/calvin.git /root/calvin
+
+RUN conda create -n calvin python=3.8
+RUN source /opt/conda/bin/activate calvin && \
+    pip install setuptools==57.5.0 && \
+    cd /root/calvin && \
+    ./install.sh && \
+    pip install \
+    imageio[ffmpeg] \
+    moviepy \
+    numpy==1.23.0 \
+    tqdm \
+    tyro \
+    websockets \
+    msgpack
+
+ENV PYTHONPATH=/app:/app/packages/openpi-client/src
+
+# Download CALVIN dataset, see https://github.com/mees/calvin/blob/main/dataset/download_data.sh
+RUN mkdir -p /datasets && cd /datasets && \
+    wget http://calvin.cs.uni-freiburg.de/dataset/calvin_debug_dataset.zip && \
+    unzip calvin_debug_dataset.zip && \
+    rm calvin_debug_dataset.zip
+
+WORKDIR /app
+CMD ["/bin/bash", "-c", "source /opt/conda/bin/activate calvin && python examples/calvin/main.py"]
--- a/examples/calvin/README.md
+++ b/examples/calvin/README.md
@@ -0,0 +1,47 @@
+# CALVIN Benchmark
+
+This example runs the CALVIN benchmark: https://github.com/mees/calvin
+
+## With Docker
+
+```bash
+export SERVER_ARGS="--env CALVIN"
+docker compose -f examples/calvin/compose.yml up --build
+```
+
+## Without Docker
+
+Terminal window 1:
+
+```bash
+cd $OPENPI_ROOT
+conda create -n calvin python=3.8
+conda activate calvin
+
+git clone --recurse-submodules https://github.com/mees/calvin.git
+cd calvin
+pip install setuptools==57.5.0
+./install.sh
+
+pip install imageio[ffmpeg] moviepy numpy==1.23.0 tqdm tyro websockets msgpack
+ENV PYTHONPATH=$PYTHONPATH:$OPENPI_ROOT/packages/openpi-client/src
+
+# Download CALVIN dataset, see https://github.com/mees/calvin/blob/main/dataset/download_data.sh
+export CALVIN_DATASETS_DIR=~/datasets
+export CALVIN_DATASET=calvin_debug_dataset
+mkdir -p $CALVIN_DATASETS_DIR && cd $CALVIN_DATASETS_DIR
+wget http://calvin.cs.uni-freiburg.de/dataset/$CALVIN_DATASET.zip
+unzip $CALVIN_DATASET.zip
+rm $CALVIN_DATASET.zip
+
+# Run the simulation
+cd $OPENPI_ROOT
+python examples/calvin/main.py --args.calvin_data_path=$CALVIN_DATASETS_DIR
+```
+
+Terminal window 2:
+
+```bash
+# Run the server
+uv run scripts/serve_policy.py --env CALVIN
+```
--- a/examples/calvin/compose.yml
+++ b/examples/calvin/compose.yml
@@ -0,0 +1,46 @@
+# Run with:
+# docker compose -f examples/calvin/compose.yml up --build
+services:
+  runtime:
+    image: calvin
+    depends_on:
+      - openpi_server
+    build:
+      context: ../..
+      dockerfile: examples/calvin/Dockerfile
+    init: true
+    tty: true
+    network_mode: host
+    privileged: true
+    volumes:
+      - $PWD:/app
+      - ../../data:/data
+    deploy:
+      resources:
+        reservations:
+          devices:
+            - driver: nvidia
+              count: 1
+              capabilities: [gpu]
+
+  openpi_server:
+    image: openpi_server
+    build:
+      context: ../..
+      dockerfile: scripts/serve_policy.Dockerfile
+    init: true
+    tty: true
+    network_mode: host
+    volumes:
+      - $PWD:/app
+    environment:
+      - SERVER_ARGS
+
+    # Comment out this block if not running on a machine with GPUs.
+    deploy:
+      resources:
+        reservations:
+          devices:
+            - driver: nvidia
+              count: 1
+              capabilities: [gpu]
--- a/examples/calvin/main.py
+++ b/examples/calvin/main.py
@@ -0,0 +1,175 @@
+"""Runs a model in a CALVIN simulation environment."""
+
+import collections
+from dataclasses import dataclass
+import logging
+import pathlib
+import time
+
+from calvin_agent.evaluation.multistep_sequences import get_sequences
+from calvin_agent.evaluation.utils import get_env_state_for_initial_condition
+import calvin_env
+from calvin_env.envs.play_table_env import get_env
+import hydra
+import imageio
+import numpy as np
+from omegaconf import OmegaConf
+from openpi_client import websocket_client_policy as _websocket_client_policy
+import tqdm
+import tyro
+
+DATE_TIME = time.strftime("%Y_%m_%d-%H_%M_%S")
+
+
+@dataclass
+class Args:
+    #################################################################################################################
+    # Model server parameters
+    #################################################################################################################
+    host: str = "0.0.0.0"
+    port: int = 8000
+    replan_steps: int = 5
+
+    #################################################################################################################
+    # CALVIN environment-specific parameters
+    #################################################################################################################
+    calvin_data_path: str = "/datasets/calvin_debug_dataset"  # Path to CALVIN dataset for loading validation tasks
+    max_subtask_steps: int = 360  # Max number of steps per subtask
+    num_trials: int = 1000  # Number of rollouts per task
+
+    #################################################################################################################
+    # Utils
+    #################################################################################################################
+    video_out_path: str = "data/calvin/videos"  # Path to save videos
+    num_save_videos: int = 5  # Number of videos to be logged per task
+    video_temp_subsample: int = 5  # Temporal subsampling to make videos shorter
+
+    seed: int = 7  # Random Seed (for reproducibility)
+
+
+def main(args: Args) -> None:
+    # Set random seed
+    np.random.seed(args.seed)
+
+    # Initialize CALVIN environment
+    env = get_env(pathlib.Path(args.calvin_data_path) / "validation", show_gui=False)
+
+    # Get CALVIN eval task set
+    task_definitions, task_instructions, task_reward = _get_calvin_tasks_and_reward(args.num_trials)
+
+    client = _websocket_client_policy.WebsocketClientPolicy(args.host, args.port)
+
+    # Start evaluation.
+    episode_solved_subtasks = []
+    per_subtask_success = collections.defaultdict(list)
+    for i, (initial_state, task_sequence) in enumerate(tqdm.tqdm(task_definitions)):
+        logging.info(f"Starting episode {i+1}...")
+        logging.info(f"Task sequence: {task_sequence}")
+
+        # Reset env to initial position for task
+        robot_obs, scene_obs = get_env_state_for_initial_condition(initial_state)
+        env.reset(robot_obs=robot_obs, scene_obs=scene_obs)
+
+        rollout_images = []
+        solved_subtasks = 0
+        for subtask in task_sequence:
+            start_info = env.get_info()
+            action_plan = collections.deque()
+
+            obs = env.get_obs()
+            done = False
+            for _ in range(args.max_subtask_steps):
+                img = obs["rgb_obs"]["rgb_static"]
+                wrist_img = obs["rgb_obs"]["rgb_gripper"]
+                rollout_images.append(img.transpose(2, 0, 1))
+
+                if not action_plan:
+                    # Finished executing previous action chunk -- compute new chunk
+                    # Prepare observations dict
+                    element = {
+                        "observation/rgb_static": img,
+                        "observation/rgb_gripper": wrist_img,
+                        "observation/state": obs["robot_obs"],
+                        "prompt": str(task_instructions[subtask][0]),
+                    }
+
+                    # Query model to get action
+                    action_chunk = client.infer(element)["actions"]
+                    assert (
+                        len(action_chunk) >= args.replan_steps
+                    ), f"We want to replan every {args.replan_steps} steps, but policy only predicts {len(action_chunk)} steps."
+                    action_plan.extend(action_chunk[: args.replan_steps])
+
+                action = action_plan.popleft()
+
+                # Round gripper action since env expects gripper_action in (-1, 1)
+                action[-1] = 1 if action[-1] > 0 else -1
+
+                # Step environment
+                obs, _, _, current_info = env.step(action)
+
+                # check if current step solves a task
+                current_task_info = task_reward.get_task_info_for_set(start_info, current_info, {subtask})
+                if len(current_task_info) > 0:
+                    done = True
+                    solved_subtasks += 1
+                    break
+
+            per_subtask_success[subtask].append(int(done))
+            if not done:
+                # Subtask execution failed --> stop episode
+                break
+
+        episode_solved_subtasks.append(solved_subtasks)
+        if len(episode_solved_subtasks) < args.num_save_videos:
+            # Save rollout video.
+            idx = len(episode_solved_subtasks)
+            imageio.mimwrite(
+                pathlib.Path(args.video_out_path) / f"rollout_{idx}.mp4",
+                [np.asarray(x) for x in rollout_images[:: args.video_temp_subsample]],
+                fps=50 // args.video_temp_subsample,
+            )
+
+        # Print current performance after each episode
+        logging.info(f"Solved subtasks: {solved_subtasks}")
+        _calvin_print_performance(episode_solved_subtasks, per_subtask_success)
+
+    # Log final performance
+    logging.info(f"results/avg_num_subtasks: : {np.mean(episode_solved_subtasks)}")
+    for i in range(1, 6):
+        # Compute fraction of episodes that have *at least* i successful subtasks
+        logging.info(
+            f"results/avg_success_len_{i}: {np.sum(episode_solved_subtasks >= i) / len(episode_solved_subtasks)}"
+        )
+    for key in per_subtask_success:
+        logging.info(f"results/avg_success__{key}: {np.mean(per_subtask_success[key])}")
+
+
+def _get_calvin_tasks_and_reward(num_sequences):
+    conf_dir = pathlib.Path(calvin_env.__file__).absolute().parents[2] / "calvin_models" / "conf"
+    task_cfg = OmegaConf.load(conf_dir / "callbacks/rollout/tasks/new_playtable_tasks.yaml")
+    task_oracle = hydra.utils.instantiate(task_cfg)
+    val_annotations = OmegaConf.load(conf_dir / "annotations/new_playtable_validation.yaml")
+    eval_sequences = get_sequences(num_sequences)
+    return eval_sequences, val_annotations, task_oracle
+
+
+def _calvin_print_performance(episode_solved_subtasks, per_subtask_success):
+    # Compute avg success rate per task length
+    logging.info("#####################################################")
+    logging.info(f"Avg solved subtasks: {np.mean(episode_solved_subtasks)}\n")
+
+    logging.info("Per sequence_length avg success:")
+    for i in range(1, 6):
+        # Compute fraction of episodes that have *at least* i successful subtasks
+        logging.info(f"{i}: {np.sum(np.array(episode_solved_subtasks) >= i) / len(episode_solved_subtasks) * 100}%")
+
+    logging.info("\n Per subtask avg success:")
+    for key in per_subtask_success:
+        logging.info(f"{key}: \t\t\t {np.mean(per_subtask_success[key]) * 100}%")
+    logging.info("#####################################################")
+
+
+if __name__ == "__main__":
+    logging.basicConfig(level=logging.INFO)
+    tyro.cli(main)