refactor&fix: update README and main.py for improved configuration and task status handling
This commit is contained in:
@@ -2,9 +2,9 @@
|
|||||||
# Do not write any secret keys or sensitive information here.
|
# Do not write any secret keys or sensitive information here.
|
||||||
|
|
||||||
# Monitor configuration
|
# Monitor configuration
|
||||||
TASK_CONFIG_PATH=../evaluation_examples/test_all.json
|
TASK_CONFIG_PATH=../evaluation_examples/test.json
|
||||||
EXAMPLES_BASE_PATH=../evaluation_examples/examples
|
EXAMPLES_BASE_PATH=../evaluation_examples/examples
|
||||||
RESULTS_BASE_PATH=../results_operator_aws
|
RESULTS_BASE_PATH=../results
|
||||||
ACTION_SPACE=pyautogui
|
ACTION_SPACE=pyautogui
|
||||||
OBSERVATION_TYPE=screenshot
|
OBSERVATION_TYPE=screenshot
|
||||||
MODEL_NAME=computer-use-preview
|
MODEL_NAME=computer-use-preview
|
||||||
|
|||||||
@@ -19,10 +19,13 @@ The monitor can be configured by editing the `.env` file in the monitor director
|
|||||||
|
|
||||||
| Variable | Description | Default Value |
|
| Variable | Description | Default Value |
|
||||||
|----------|-------------|---------------|
|
|----------|-------------|---------------|
|
||||||
| TASK_CONFIG_PATH | Path to the task configuration JSON file | ../evaluation_examples/test_small.json |
|
| TASK_CONFIG_PATH | Path to the task configuration file | ../evaluation_examples/test.json |
|
||||||
| EXAMPLES_BASE_PATH | Base path for task example files | ../evaluation_examples/examples |
|
| EXAMPLES_BASE_PATH | Base path for example files | ../evaluation_examples/examples |
|
||||||
| RESULTS_BASE_PATH | Base path for execution results | ../results_operator_aws/pyautogui/screenshot/computer-use-preview |
|
| RESULTS_BASE_PATH | Base path for storing results | ../results |
|
||||||
| MAX_STEPS | Maximum steps to display for a task | 50 |
|
| ACTION_SPACE | Action space type (e.g., pyautogui, keyboard) | pyautogui |
|
||||||
|
| OBSERVATION_TYPE | Type of observation (e.g., screenshot, video) | screenshot |
|
||||||
|
| MODEL_NAME | Name of the model to use for task execution | computer-use-preview |
|
||||||
|
| MAX_STEPS | Maximum steps to display for a task | 150 |
|
||||||
| FLASK_PORT | Port for the web server | 80 |
|
| FLASK_PORT | Port for the web server | 80 |
|
||||||
| FLASK_HOST | Host address for the web server | 0.0.0.0 |
|
| FLASK_HOST | Host address for the web server | 0.0.0.0 |
|
||||||
| FLASK_DEBUG | Enable debug mode (true/false) | false |
|
| FLASK_DEBUG | Enable debug mode (true/false) | false |
|
||||||
@@ -30,13 +33,16 @@ The monitor can be configured by editing the `.env` file in the monitor director
|
|||||||
For example:
|
For example:
|
||||||
```bash
|
```bash
|
||||||
# .env
|
# .env
|
||||||
TASK_CONFIG_PATH=../evaluation_examples/test_small.json
|
TASK_CONFIG_PATH=../evaluation_examples/test.json
|
||||||
EXAMPLES_BASE_PATH=../evaluation_examples/examples
|
EXAMPLES_BASE_PATH=../evaluation_examples/examples
|
||||||
RESULTS_BASE_PATH=../results_operator_aws/pyautogui/screenshot/computer-use-preview
|
RESULTS_BASE_PATH=../results
|
||||||
MAX_STEPS=50
|
ACTION_SPACE=pyautogui
|
||||||
|
OBSERVATION_TYPE=screenshot
|
||||||
|
MODEL_NAME=computer-use-preview
|
||||||
|
MAX_STEPS=150
|
||||||
FLASK_PORT=80
|
FLASK_PORT=80
|
||||||
FLASK_HOST=0.0.0.0
|
FLASK_HOST=0.0.0.0
|
||||||
FLASK_DEBUG=false
|
FLASK_DEBUG=true
|
||||||
```
|
```
|
||||||
|
|
||||||
## Running with Docker
|
## Running with Docker
|
||||||
|
|||||||
@@ -12,8 +12,11 @@ from dotenv import load_dotenv
|
|||||||
# Load environment variables from .env file
|
# Load environment variables from .env file
|
||||||
load_dotenv()
|
load_dotenv()
|
||||||
|
|
||||||
# {task_type}_{task_id}: status_dict
|
# {task_type}_{task_id}: (status_dict, timestamp)
|
||||||
|
# For "Done" status, we need to verify it for a period to ensure it doesn't change to "Error"
|
||||||
TASK_STATUS_CACHE = {}
|
TASK_STATUS_CACHE = {}
|
||||||
|
# Time in seconds to consider "Done" status as stable (default: 30s)
|
||||||
|
DONE_STABILITY_PERIOD = int(os.getenv("DONE_STABILITY_PERIOD", "30"))
|
||||||
|
|
||||||
app = Flask(__name__)
|
app = Flask(__name__)
|
||||||
|
|
||||||
@@ -26,14 +29,14 @@ if MONITOR_IN_DOCKER:
|
|||||||
RESULTS_BASE_PATH = "/app/results"
|
RESULTS_BASE_PATH = "/app/results"
|
||||||
else:
|
else:
|
||||||
# Load configuration from environment variables
|
# Load configuration from environment variables
|
||||||
TASK_CONFIG_PATH = os.getenv("TASK_CONFIG_PATH", "../evaluation_examples/test_small.json")
|
TASK_CONFIG_PATH = os.getenv("TASK_CONFIG_PATH", "../evaluation_examples/test.json")
|
||||||
EXAMPLES_BASE_PATH = os.getenv("EXAMPLES_BASE_PATH", "../evaluation_examples/examples")
|
EXAMPLES_BASE_PATH = os.getenv("EXAMPLES_BASE_PATH", "../evaluation_examples/examples")
|
||||||
RESULTS_BASE_PATH = os.getenv("RESULTS_BASE_PATH", "../results")
|
RESULTS_BASE_PATH = os.getenv("RESULTS_BASE_PATH", "../results")
|
||||||
|
|
||||||
ACTION_SPACE=os.getenv("ACTION_SPACE", "pyautogui")
|
ACTION_SPACE=os.getenv("ACTION_SPACE", "pyautogui")
|
||||||
OBSERVATION_TYPE=os.getenv("OBSERVATION_TYPE", "screenshot")
|
OBSERVATION_TYPE=os.getenv("OBSERVATION_TYPE", "screenshot")
|
||||||
MODEL_NAME=os.getenv("MODEL_NAME", "computer-use-preview")
|
MODEL_NAME=os.getenv("MODEL_NAME", "computer-use-preview")
|
||||||
MAX_STEPS = int(os.getenv("MAX_STEPS", "50"))
|
MAX_STEPS = int(os.getenv("MAX_STEPS", "150"))
|
||||||
|
|
||||||
RESULTS_PATH = os.path.join(RESULTS_BASE_PATH, ACTION_SPACE, OBSERVATION_TYPE, MODEL_NAME)
|
RESULTS_PATH = os.path.join(RESULTS_BASE_PATH, ACTION_SPACE, OBSERVATION_TYPE, MODEL_NAME)
|
||||||
|
|
||||||
@@ -177,9 +180,24 @@ def get_task_status_brief(task_type, task_id):
|
|||||||
# Generate cache key based on task type and ID
|
# Generate cache key based on task type and ID
|
||||||
cache_key = f"{task_type}_{task_id}"
|
cache_key = f"{task_type}_{task_id}"
|
||||||
|
|
||||||
# Check if the status is already cached
|
# Check if the status is already cached
|
||||||
|
current_time = time.time()
|
||||||
|
last_cache_time = None
|
||||||
if cache_key in TASK_STATUS_CACHE:
|
if cache_key in TASK_STATUS_CACHE:
|
||||||
return TASK_STATUS_CACHE[cache_key]
|
cached_status, cached_time = TASK_STATUS_CACHE[cache_key]
|
||||||
|
last_cache_time = cached_time
|
||||||
|
# If cached status is "Done", check if it's within the stability period
|
||||||
|
if cached_status["status"].startswith("Done"):
|
||||||
|
# If within stability period, recalculate status to ensure it's correct
|
||||||
|
if current_time - cached_time < DONE_STABILITY_PERIOD:
|
||||||
|
# Status is still in verification period, refresh it
|
||||||
|
pass
|
||||||
|
else:
|
||||||
|
# Status is stable, return from cache
|
||||||
|
return cached_status
|
||||||
|
else:
|
||||||
|
# For non-Done status (like Error), just return from cache
|
||||||
|
return cached_status
|
||||||
|
|
||||||
result_dir = os.path.join(RESULTS_PATH, task_type, task_id)
|
result_dir = os.path.join(RESULTS_PATH, task_type, task_id)
|
||||||
|
|
||||||
@@ -293,7 +311,8 @@ def get_task_status_brief(task_type, task_id):
|
|||||||
|
|
||||||
# Cache the status if it is done or error
|
# Cache the status if it is done or error
|
||||||
if status.startswith("Done") or status == "Error":
|
if status.startswith("Done") or status == "Error":
|
||||||
TASK_STATUS_CACHE[cache_key] = status_dict
|
current_time = last_cache_time if last_cache_time else current_time
|
||||||
|
TASK_STATUS_CACHE[cache_key] = (status_dict, current_time)
|
||||||
|
|
||||||
return status_dict
|
return status_dict
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user