Initial commit
This commit is contained in:
65
examples/calvin/Dockerfile
Normal file
65
examples/calvin/Dockerfile
Normal file
@@ -0,0 +1,65 @@
|
||||
# THIS DOCKERFILE DOES NOT YET WORK
|
||||
# Dockerfile for the CALVIN benchmark.
|
||||
|
||||
# Build the container:
|
||||
# docker build . -t calvin -f examples/calvin/Dockerfile
|
||||
|
||||
# Run the container:
|
||||
# docker run --rm -it --network=host -v .:/app --privileged --gpus all calvin /bin/bash
|
||||
|
||||
FROM nvidia/cuda:12.2.2-cudnn8-runtime-ubuntu22.04@sha256:2d913b09e6be8387e1a10976933642c73c840c0b735f0bf3c28d97fc9bc422e0
|
||||
SHELL ["/bin/bash", "-c"]
|
||||
|
||||
ENV DEBIAN_FRONTEND=noninteractive
|
||||
RUN apt-get update && \
|
||||
apt-get install -y --no-install-recommends \
|
||||
make \
|
||||
g++ \
|
||||
git \
|
||||
wget \
|
||||
libosmesa6-dev \
|
||||
libgl1-mesa-glx \
|
||||
libglew-dev \
|
||||
libglfw3-dev \
|
||||
libgles2-mesa-dev \
|
||||
libglib2.0-0 \
|
||||
libsm6 \
|
||||
libxrender1 \
|
||||
libxext6 \
|
||||
unzip \
|
||||
ffmpeg
|
||||
|
||||
# Install miniconda
|
||||
ENV CONDA_DIR=/opt/conda
|
||||
RUN wget --quiet https://repo.anaconda.com/miniconda/Miniconda3-latest-Linux-x86_64.sh -O ~/miniconda.sh && \
|
||||
/bin/bash ~/miniconda.sh -b -p $CONDA_DIR
|
||||
ENV PATH=$CONDA_DIR/bin:$PATH
|
||||
|
||||
# Submodules don't work with calvin because it internally parses git metadata.
|
||||
# So we have to clone it directly.
|
||||
RUN git clone --recurse-submodules https://github.com/mees/calvin.git /root/calvin
|
||||
|
||||
RUN conda create -n calvin python=3.8
|
||||
RUN source /opt/conda/bin/activate calvin && \
|
||||
pip install setuptools==57.5.0 && \
|
||||
cd /root/calvin && \
|
||||
./install.sh && \
|
||||
pip install \
|
||||
imageio[ffmpeg] \
|
||||
moviepy \
|
||||
numpy==1.23.0 \
|
||||
tqdm \
|
||||
tyro \
|
||||
websockets \
|
||||
msgpack
|
||||
|
||||
ENV PYTHONPATH=/app:/app/packages/openpi-client/src
|
||||
|
||||
# Download CALVIN dataset, see https://github.com/mees/calvin/blob/main/dataset/download_data.sh
|
||||
RUN mkdir -p /datasets && cd /datasets && \
|
||||
wget http://calvin.cs.uni-freiburg.de/dataset/calvin_debug_dataset.zip && \
|
||||
unzip calvin_debug_dataset.zip && \
|
||||
rm calvin_debug_dataset.zip
|
||||
|
||||
WORKDIR /app
|
||||
CMD ["/bin/bash", "-c", "source /opt/conda/bin/activate calvin && python examples/calvin/main.py"]
|
||||
47
examples/calvin/README.md
Normal file
47
examples/calvin/README.md
Normal file
@@ -0,0 +1,47 @@
|
||||
# CALVIN Benchmark
|
||||
|
||||
This example runs the CALVIN benchmark: https://github.com/mees/calvin
|
||||
|
||||
## With Docker
|
||||
|
||||
```bash
|
||||
export SERVER_ARGS="--env CALVIN"
|
||||
docker compose -f examples/calvin/compose.yml up --build
|
||||
```
|
||||
|
||||
## Without Docker
|
||||
|
||||
Terminal window 1:
|
||||
|
||||
```bash
|
||||
cd $OPENPI_ROOT
|
||||
conda create -n calvin python=3.8
|
||||
conda activate calvin
|
||||
|
||||
git clone --recurse-submodules https://github.com/mees/calvin.git
|
||||
cd calvin
|
||||
pip install setuptools==57.5.0
|
||||
./install.sh
|
||||
|
||||
pip install imageio[ffmpeg] moviepy numpy==1.23.0 tqdm tyro websockets msgpack
|
||||
ENV PYTHONPATH=$PYTHONPATH:$OPENPI_ROOT/packages/openpi-client/src
|
||||
|
||||
# Download CALVIN dataset, see https://github.com/mees/calvin/blob/main/dataset/download_data.sh
|
||||
export CALVIN_DATASETS_DIR=~/datasets
|
||||
export CALVIN_DATASET=calvin_debug_dataset
|
||||
mkdir -p $CALVIN_DATASETS_DIR && cd $CALVIN_DATASETS_DIR
|
||||
wget http://calvin.cs.uni-freiburg.de/dataset/$CALVIN_DATASET.zip
|
||||
unzip $CALVIN_DATASET.zip
|
||||
rm $CALVIN_DATASET.zip
|
||||
|
||||
# Run the simulation
|
||||
cd $OPENPI_ROOT
|
||||
python examples/calvin/main.py --args.calvin_data_path=$CALVIN_DATASETS_DIR
|
||||
```
|
||||
|
||||
Terminal window 2:
|
||||
|
||||
```bash
|
||||
# Run the server
|
||||
uv run scripts/serve_policy.py --env CALVIN
|
||||
```
|
||||
46
examples/calvin/compose.yml
Normal file
46
examples/calvin/compose.yml
Normal file
@@ -0,0 +1,46 @@
|
||||
# Run with:
|
||||
# docker compose -f examples/calvin/compose.yml up --build
|
||||
services:
|
||||
runtime:
|
||||
image: calvin
|
||||
depends_on:
|
||||
- openpi_server
|
||||
build:
|
||||
context: ../..
|
||||
dockerfile: examples/calvin/Dockerfile
|
||||
init: true
|
||||
tty: true
|
||||
network_mode: host
|
||||
privileged: true
|
||||
volumes:
|
||||
- $PWD:/app
|
||||
- ../../data:/data
|
||||
deploy:
|
||||
resources:
|
||||
reservations:
|
||||
devices:
|
||||
- driver: nvidia
|
||||
count: 1
|
||||
capabilities: [gpu]
|
||||
|
||||
openpi_server:
|
||||
image: openpi_server
|
||||
build:
|
||||
context: ../..
|
||||
dockerfile: scripts/serve_policy.Dockerfile
|
||||
init: true
|
||||
tty: true
|
||||
network_mode: host
|
||||
volumes:
|
||||
- $PWD:/app
|
||||
environment:
|
||||
- SERVER_ARGS
|
||||
|
||||
# Comment out this block if not running on a machine with GPUs.
|
||||
deploy:
|
||||
resources:
|
||||
reservations:
|
||||
devices:
|
||||
- driver: nvidia
|
||||
count: 1
|
||||
capabilities: [gpu]
|
||||
175
examples/calvin/main.py
Normal file
175
examples/calvin/main.py
Normal file
@@ -0,0 +1,175 @@
|
||||
"""Runs a model in a CALVIN simulation environment."""
|
||||
|
||||
import collections
|
||||
from dataclasses import dataclass
|
||||
import logging
|
||||
import pathlib
|
||||
import time
|
||||
|
||||
from calvin_agent.evaluation.multistep_sequences import get_sequences
|
||||
from calvin_agent.evaluation.utils import get_env_state_for_initial_condition
|
||||
import calvin_env
|
||||
from calvin_env.envs.play_table_env import get_env
|
||||
import hydra
|
||||
import imageio
|
||||
import numpy as np
|
||||
from omegaconf import OmegaConf
|
||||
from openpi_client import websocket_client_policy as _websocket_client_policy
|
||||
import tqdm
|
||||
import tyro
|
||||
|
||||
DATE_TIME = time.strftime("%Y_%m_%d-%H_%M_%S")
|
||||
|
||||
|
||||
@dataclass
|
||||
class Args:
|
||||
#################################################################################################################
|
||||
# Model server parameters
|
||||
#################################################################################################################
|
||||
host: str = "0.0.0.0"
|
||||
port: int = 8000
|
||||
replan_steps: int = 5
|
||||
|
||||
#################################################################################################################
|
||||
# CALVIN environment-specific parameters
|
||||
#################################################################################################################
|
||||
calvin_data_path: str = "/datasets/calvin_debug_dataset" # Path to CALVIN dataset for loading validation tasks
|
||||
max_subtask_steps: int = 360 # Max number of steps per subtask
|
||||
num_trials: int = 1000 # Number of rollouts per task
|
||||
|
||||
#################################################################################################################
|
||||
# Utils
|
||||
#################################################################################################################
|
||||
video_out_path: str = "data/calvin/videos" # Path to save videos
|
||||
num_save_videos: int = 5 # Number of videos to be logged per task
|
||||
video_temp_subsample: int = 5 # Temporal subsampling to make videos shorter
|
||||
|
||||
seed: int = 7 # Random Seed (for reproducibility)
|
||||
|
||||
|
||||
def main(args: Args) -> None:
|
||||
# Set random seed
|
||||
np.random.seed(args.seed)
|
||||
|
||||
# Initialize CALVIN environment
|
||||
env = get_env(pathlib.Path(args.calvin_data_path) / "validation", show_gui=False)
|
||||
|
||||
# Get CALVIN eval task set
|
||||
task_definitions, task_instructions, task_reward = _get_calvin_tasks_and_reward(args.num_trials)
|
||||
|
||||
client = _websocket_client_policy.WebsocketClientPolicy(args.host, args.port)
|
||||
|
||||
# Start evaluation.
|
||||
episode_solved_subtasks = []
|
||||
per_subtask_success = collections.defaultdict(list)
|
||||
for i, (initial_state, task_sequence) in enumerate(tqdm.tqdm(task_definitions)):
|
||||
logging.info(f"Starting episode {i+1}...")
|
||||
logging.info(f"Task sequence: {task_sequence}")
|
||||
|
||||
# Reset env to initial position for task
|
||||
robot_obs, scene_obs = get_env_state_for_initial_condition(initial_state)
|
||||
env.reset(robot_obs=robot_obs, scene_obs=scene_obs)
|
||||
|
||||
rollout_images = []
|
||||
solved_subtasks = 0
|
||||
for subtask in task_sequence:
|
||||
start_info = env.get_info()
|
||||
action_plan = collections.deque()
|
||||
|
||||
obs = env.get_obs()
|
||||
done = False
|
||||
for _ in range(args.max_subtask_steps):
|
||||
img = obs["rgb_obs"]["rgb_static"]
|
||||
wrist_img = obs["rgb_obs"]["rgb_gripper"]
|
||||
rollout_images.append(img.transpose(2, 0, 1))
|
||||
|
||||
if not action_plan:
|
||||
# Finished executing previous action chunk -- compute new chunk
|
||||
# Prepare observations dict
|
||||
element = {
|
||||
"observation/rgb_static": img,
|
||||
"observation/rgb_gripper": wrist_img,
|
||||
"observation/state": obs["robot_obs"],
|
||||
"prompt": str(task_instructions[subtask][0]),
|
||||
}
|
||||
|
||||
# Query model to get action
|
||||
action_chunk = client.infer(element)["actions"]
|
||||
assert (
|
||||
len(action_chunk) >= args.replan_steps
|
||||
), f"We want to replan every {args.replan_steps} steps, but policy only predicts {len(action_chunk)} steps."
|
||||
action_plan.extend(action_chunk[: args.replan_steps])
|
||||
|
||||
action = action_plan.popleft()
|
||||
|
||||
# Round gripper action since env expects gripper_action in (-1, 1)
|
||||
action[-1] = 1 if action[-1] > 0 else -1
|
||||
|
||||
# Step environment
|
||||
obs, _, _, current_info = env.step(action)
|
||||
|
||||
# check if current step solves a task
|
||||
current_task_info = task_reward.get_task_info_for_set(start_info, current_info, {subtask})
|
||||
if len(current_task_info) > 0:
|
||||
done = True
|
||||
solved_subtasks += 1
|
||||
break
|
||||
|
||||
per_subtask_success[subtask].append(int(done))
|
||||
if not done:
|
||||
# Subtask execution failed --> stop episode
|
||||
break
|
||||
|
||||
episode_solved_subtasks.append(solved_subtasks)
|
||||
if len(episode_solved_subtasks) < args.num_save_videos:
|
||||
# Save rollout video.
|
||||
idx = len(episode_solved_subtasks)
|
||||
imageio.mimwrite(
|
||||
pathlib.Path(args.video_out_path) / f"rollout_{idx}.mp4",
|
||||
[np.asarray(x) for x in rollout_images[:: args.video_temp_subsample]],
|
||||
fps=50 // args.video_temp_subsample,
|
||||
)
|
||||
|
||||
# Print current performance after each episode
|
||||
logging.info(f"Solved subtasks: {solved_subtasks}")
|
||||
_calvin_print_performance(episode_solved_subtasks, per_subtask_success)
|
||||
|
||||
# Log final performance
|
||||
logging.info(f"results/avg_num_subtasks: : {np.mean(episode_solved_subtasks)}")
|
||||
for i in range(1, 6):
|
||||
# Compute fraction of episodes that have *at least* i successful subtasks
|
||||
logging.info(
|
||||
f"results/avg_success_len_{i}: {np.sum(episode_solved_subtasks >= i) / len(episode_solved_subtasks)}"
|
||||
)
|
||||
for key in per_subtask_success:
|
||||
logging.info(f"results/avg_success__{key}: {np.mean(per_subtask_success[key])}")
|
||||
|
||||
|
||||
def _get_calvin_tasks_and_reward(num_sequences):
|
||||
conf_dir = pathlib.Path(calvin_env.__file__).absolute().parents[2] / "calvin_models" / "conf"
|
||||
task_cfg = OmegaConf.load(conf_dir / "callbacks/rollout/tasks/new_playtable_tasks.yaml")
|
||||
task_oracle = hydra.utils.instantiate(task_cfg)
|
||||
val_annotations = OmegaConf.load(conf_dir / "annotations/new_playtable_validation.yaml")
|
||||
eval_sequences = get_sequences(num_sequences)
|
||||
return eval_sequences, val_annotations, task_oracle
|
||||
|
||||
|
||||
def _calvin_print_performance(episode_solved_subtasks, per_subtask_success):
|
||||
# Compute avg success rate per task length
|
||||
logging.info("#####################################################")
|
||||
logging.info(f"Avg solved subtasks: {np.mean(episode_solved_subtasks)}\n")
|
||||
|
||||
logging.info("Per sequence_length avg success:")
|
||||
for i in range(1, 6):
|
||||
# Compute fraction of episodes that have *at least* i successful subtasks
|
||||
logging.info(f"{i}: {np.sum(np.array(episode_solved_subtasks) >= i) / len(episode_solved_subtasks) * 100}%")
|
||||
|
||||
logging.info("\n Per subtask avg success:")
|
||||
for key in per_subtask_success:
|
||||
logging.info(f"{key}: \t\t\t {np.mean(per_subtask_success[key]) * 100}%")
|
||||
logging.info("#####################################################")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
logging.basicConfig(level=logging.INFO)
|
||||
tyro.cli(main)
|
||||
Reference in New Issue
Block a user