eval update

This commit is contained in:
yuanmengqi
2025-06-07 13:19:22 +00:00
parent 4ade4114da
commit c57b1d4e7a
6 changed files with 38 additions and 17 deletions

View File

@@ -1,7 +1,3 @@
"""Script to run end-to-end evaluation on the benchmark.
Utils and basic architecture credit to https://github.com/web-arena-x/webarena/blob/main/run.py.
"""
from __future__ import annotations
import argparse
import datetime