Add multiple new modules and tools to enhance the functionality and extensibility of the Maestro project (#333)
* Added a **pyproject.toml** file to define project metadata and dependencies. * Added **run\_maestro.py** and **osworld\_run\_maestro.py** to provide the main execution logic. * Introduced multiple new modules, including **Evaluator**, **Controller**, **Manager**, and **Sub-Worker**, supporting task planning, state management, and data analysis. * Added a **tools module** containing utility functions and tool configurations to improve code reusability. * Updated the **README** and documentation with usage examples and module descriptions. These changes lay the foundation for expanding the Maestro project’s functionality and improving the user experience. Co-authored-by: Hiroid <guoliangxuan@deepmatrix.com>
This commit is contained in:
121
mm_agents/maestro/utils/README.md
Normal file
121
mm_agents/maestro/utils/README.md
Normal file
@@ -0,0 +1,121 @@
|
||||
# Maestro Utilities
|
||||
|
||||
This directory contains various utility functions for the Maestro project to improve code reusability and maintainability.
|
||||
|
||||
## File Structure
|
||||
|
||||
```
|
||||
gui_agents/utils/
|
||||
├── README.md # This document
|
||||
├── file_utils.py # File operation utilities
|
||||
├── id_utils.py # ID generation utilities
|
||||
└── common_utils.py # Other common utilities
|
||||
```
|
||||
|
||||
## file_utils.py - File Operation Utilities
|
||||
|
||||
### File Locking Mechanism
|
||||
|
||||
```python
|
||||
from gui_agents.utils.file_utils import locked
|
||||
|
||||
# Cross-platform file lock, supports Windows and Unix systems
|
||||
with locked(file_path, "w") as f:
|
||||
f.write("content")
|
||||
```
|
||||
|
||||
### Safe JSON Operations
|
||||
|
||||
```python
|
||||
from gui_agents.utils.file_utils import safe_write_json, safe_read_json
|
||||
|
||||
# Safely write JSON file (atomic operation)
|
||||
safe_write_json(file_path, data)
|
||||
|
||||
# Safely read JSON file
|
||||
data = safe_read_json(file_path, default={})
|
||||
```
|
||||
|
||||
### Safe Text Operations
|
||||
|
||||
```python
|
||||
from gui_agents.utils.file_utils import safe_write_text, safe_read_text
|
||||
|
||||
# Safely write text file (UTF-8 encoding)
|
||||
safe_write_text(file_path, content)
|
||||
|
||||
# Safely read text file (automatic encoding detection)
|
||||
content = safe_read_text(file_path)
|
||||
```
|
||||
|
||||
### File Management Tools
|
||||
|
||||
```python
|
||||
from gui_agents.utils.file_utils import ensure_directory, backup_file
|
||||
|
||||
# Ensure directory exists
|
||||
ensure_directory(path)
|
||||
|
||||
# Create file backup
|
||||
backup_path = backup_file(file_path, ".backup")
|
||||
```
|
||||
|
||||
## id_utils.py - ID Generation Utilities
|
||||
|
||||
### UUID Generation
|
||||
|
||||
```python
|
||||
from gui_agents.utils.id_utils import generate_uuid, generate_short_id
|
||||
|
||||
# Generate complete UUID
|
||||
uuid_str = generate_uuid() # "550e8400-e29b-41d4-a716-446655440000"
|
||||
|
||||
# Generate short ID
|
||||
short_id = generate_short_id("task", 8) # "task550e8400"
|
||||
```
|
||||
|
||||
### Timestamp ID
|
||||
|
||||
```python
|
||||
from gui_agents.utils.id_utils import generate_timestamp_id
|
||||
|
||||
# Timestamp-based ID
|
||||
ts_id = generate_timestamp_id("event") # "event1755576661494"
|
||||
```
|
||||
|
||||
### Hash ID
|
||||
|
||||
```python
|
||||
from gui_agents.utils.id_utils import generate_hash_id
|
||||
|
||||
# Content hash-based ID
|
||||
hash_id = generate_hash_id("some content", "hash", 8) # "hasha1b2c3d4"
|
||||
```
|
||||
|
||||
### Composite ID
|
||||
|
||||
```python
|
||||
from gui_agents.utils.id_utils import generate_composite_id
|
||||
|
||||
# Composite ID (prefix + timestamp + UUID)
|
||||
composite_id = generate_composite_id("task", True, True, "_") # "task_1755576661494_550e8400"
|
||||
```
|
||||
|
||||
## Usage in NewGlobalState
|
||||
|
||||
The new `NewGlobalState` class has been refactored to use these utility functions:
|
||||
|
||||
```python
|
||||
from gui_agents.utils.file_utils import safe_write_json, safe_read_json
|
||||
from gui_agents.utils.id_utils import generate_uuid
|
||||
|
||||
class NewGlobalState:
|
||||
def __init__(self, ...):
|
||||
self.task_id = task_id or f"task-{generate_uuid()[:8]}"
|
||||
|
||||
def set_task(self, task_data):
|
||||
safe_write_json(self.task_path, task_data)
|
||||
|
||||
def get_task(self):
|
||||
return safe_read_json(self.task_path, {})
|
||||
```
|
||||
0
mm_agents/maestro/utils/__init__.py
Normal file
0
mm_agents/maestro/utils/__init__.py
Normal file
339
mm_agents/maestro/utils/analyze_display.py
Normal file
339
mm_agents/maestro/utils/analyze_display.py
Normal file
@@ -0,0 +1,339 @@
|
||||
#!/usr/bin/env python3
|
||||
# -*- coding: utf-8 -*-
|
||||
"""
|
||||
Display.json analyzer - Extract and analyze execution statistics from display.json files
|
||||
"""
|
||||
|
||||
import json
|
||||
import os
|
||||
import glob
|
||||
import re
|
||||
from typing import Dict, List, Tuple
|
||||
|
||||
|
||||
def extract_cost_value(cost_str: str) -> tuple:
|
||||
"""
|
||||
Extract numeric value and currency symbol from cost string (e.g., "0.000343¥" -> (0.000343, "¥"))
|
||||
|
||||
Args:
|
||||
cost_str: Cost string with currency symbol
|
||||
|
||||
Returns:
|
||||
Tuple of (float value, currency symbol)
|
||||
"""
|
||||
# Extract numeric value and currency symbol
|
||||
match = re.search(r'([\d.]+)([¥$€£¥]*)', cost_str)
|
||||
if match:
|
||||
value = float(match.group(1))
|
||||
currency = match.group(2) if match.group(2) else "¥" # Default to ¥ if no symbol found
|
||||
return value, currency
|
||||
return 0.0, "¥"
|
||||
|
||||
|
||||
def convert_currency_to_yuan(value: float, currency: str) -> float:
|
||||
"""
|
||||
Convert different currencies to yuan (¥) for consistent cost calculation
|
||||
|
||||
Args:
|
||||
value: Cost value
|
||||
currency: Currency symbol
|
||||
|
||||
Returns:
|
||||
Value converted to yuan
|
||||
"""
|
||||
# Simple conversion rates (you might want to use real-time rates in production)
|
||||
conversion_rates = {
|
||||
"¥": 1.0,
|
||||
"¥": 1.0,
|
||||
"$": 7.2, # USD to CNY (approximate)
|
||||
"€": 7.8, # EUR to CNY (approximate)
|
||||
"£": 9.1, # GBP to CNY (approximate)
|
||||
}
|
||||
|
||||
rate = conversion_rates.get(currency, 1.0)
|
||||
return value * rate
|
||||
|
||||
|
||||
def analyze_display_json(file_path: str) -> Dict:
|
||||
"""
|
||||
Analyze a single display.json file and extract statistics
|
||||
|
||||
Args:
|
||||
file_path: Path to the display.json file
|
||||
|
||||
Returns:
|
||||
Dictionary containing analysis results
|
||||
"""
|
||||
try:
|
||||
with open(file_path, 'r', encoding='utf-8') as f:
|
||||
data = json.load(f)
|
||||
except Exception as e:
|
||||
print(f"Error reading {file_path}: {e}")
|
||||
return {}
|
||||
|
||||
# Initialize counters
|
||||
action_count = 0
|
||||
total_duration = 0
|
||||
total_input_tokens = 0
|
||||
total_output_tokens = 0
|
||||
total_tokens = 0
|
||||
total_cost = 0.0
|
||||
currency_symbol = "¥" # Default currency symbol
|
||||
|
||||
# Check if this is agents3 format (has controller.main_loop_completed)
|
||||
is_agents3 = False
|
||||
if 'operations' in data and 'controller' in data['operations']:
|
||||
for operation in data['operations']['controller']:
|
||||
if operation.get('operation') == 'main_loop_completed':
|
||||
is_agents3 = True
|
||||
# Extract agents3 statistics
|
||||
action_count = operation.get('step_count', 0)
|
||||
total_duration = int(operation.get('duration', 0))
|
||||
break
|
||||
|
||||
if is_agents3:
|
||||
# Agents3 mode analysis - extract from controller.main_loop_completed
|
||||
if 'operations' in data and 'controller' in data['operations']:
|
||||
for operation in data['operations']['controller']:
|
||||
if operation.get('operation') == 'main_loop_completed':
|
||||
action_count = operation.get('step_count', 0)
|
||||
total_duration = int(operation.get('duration', 0))
|
||||
break
|
||||
|
||||
# Extract tokens and cost from all operations
|
||||
if 'operations' in data:
|
||||
for module_name, module_operations in data['operations'].items():
|
||||
if isinstance(module_operations, list):
|
||||
for operation in module_operations:
|
||||
# Extract tokens if available
|
||||
tokens = operation.get('tokens', [0, 0, 0])
|
||||
if isinstance(tokens, list) and len(tokens) >= 3:
|
||||
total_input_tokens += tokens[0]
|
||||
total_output_tokens += tokens[1]
|
||||
total_tokens += tokens[2]
|
||||
|
||||
# Extract cost if available
|
||||
cost_str = operation.get('cost', '0¥')
|
||||
cost_value, currency = extract_cost_value(cost_str)
|
||||
# Convert to yuan for consistent calculation
|
||||
cost_in_yuan = convert_currency_to_yuan(
|
||||
cost_value, currency)
|
||||
total_cost += cost_in_yuan
|
||||
# Always use ¥ for consistency
|
||||
currency_symbol = "¥"
|
||||
|
||||
# Check if this is a fast mode or normal mode display.json
|
||||
elif 'operations' in data and 'agent' in data['operations']:
|
||||
# Fast mode analysis - similar to original logic
|
||||
if 'operations' in data and 'agent' in data['operations']:
|
||||
ops_list = [operation for operation in data['operations']['agent']]
|
||||
ops_list.extend([operation for operation in data['operations']['grounding']])
|
||||
for operation in ops_list:
|
||||
if operation.get('operation') == 'fast_planning_execution':
|
||||
action_count += 1
|
||||
|
||||
# Extract tokens
|
||||
tokens = operation.get('tokens', [0, 0, 0])
|
||||
if len(tokens) >= 3:
|
||||
total_input_tokens += tokens[0]
|
||||
total_output_tokens += tokens[1]
|
||||
total_tokens += tokens[2]
|
||||
|
||||
# Extract cost
|
||||
cost_str = operation.get('cost', '0¥')
|
||||
cost_value, currency = extract_cost_value(cost_str)
|
||||
# Convert to yuan for consistent calculation
|
||||
cost_in_yuan = convert_currency_to_yuan(cost_value, currency)
|
||||
total_cost += cost_in_yuan
|
||||
currency_symbol = "¥" # Always use ¥ for consistency
|
||||
|
||||
# Extract total execution time for fast mode
|
||||
if 'operations' in data and 'other' in data['operations']:
|
||||
for operation in data['operations']['other']:
|
||||
if operation.get('operation') == 'total_execution_time_fast':
|
||||
total_duration = int(operation.get('duration', 0))
|
||||
break
|
||||
else:
|
||||
# Normal mode analysis - analyze specific operations
|
||||
if 'operations' in data:
|
||||
# Define the operations to count for tokens and cost
|
||||
token_cost_operations = {
|
||||
'formulate_query', 'retrieve_narrative_experience', 'retrieve_knowledge',
|
||||
'knowledge_fusion', 'subtask_planner', 'generated_dag', 'reflection',
|
||||
'episode_summarization', 'narrative_summarization', 'Worker.retrieve_episodic_experience',
|
||||
'action_plan', 'grounding_model_response'
|
||||
}
|
||||
|
||||
# Count hardware operations as steps
|
||||
if 'hardware' in data['operations']:
|
||||
action_count = len(data['operations']['hardware'])
|
||||
|
||||
# Extract tokens and cost from specific operations across all modules
|
||||
for module_name, module_operations in data['operations'].items():
|
||||
if isinstance(module_operations, list):
|
||||
for operation in module_operations:
|
||||
operation_type = operation.get('operation', '')
|
||||
|
||||
# Only count tokens and cost for specified operations
|
||||
if operation_type in token_cost_operations:
|
||||
# Extract tokens if available
|
||||
tokens = operation.get('tokens', [0, 0, 0])
|
||||
if isinstance(tokens, list) and len(tokens) >= 3:
|
||||
total_input_tokens += tokens[0]
|
||||
total_output_tokens += tokens[1]
|
||||
total_tokens += tokens[2]
|
||||
|
||||
# Extract cost if available
|
||||
cost_str = operation.get('cost', '0¥')
|
||||
cost_value, currency = extract_cost_value(cost_str)
|
||||
# Convert to yuan for consistent calculation
|
||||
cost_in_yuan = convert_currency_to_yuan(cost_value, currency)
|
||||
total_cost += cost_in_yuan
|
||||
# Always use ¥ for consistency
|
||||
currency_symbol = "¥"
|
||||
|
||||
# Extract total execution time for normal mode
|
||||
if 'other' in data['operations']:
|
||||
for operation in data['operations']['other']:
|
||||
if operation.get('operation') == 'total_execution_time':
|
||||
total_duration = int(operation.get('duration', 0))
|
||||
break
|
||||
|
||||
return {
|
||||
'action_count': action_count,
|
||||
'total_duration': total_duration,
|
||||
'total_input_tokens': total_input_tokens,
|
||||
'total_output_tokens': total_output_tokens,
|
||||
'total_tokens': total_tokens,
|
||||
'total_cost': total_cost,
|
||||
'currency_symbol': currency_symbol
|
||||
}
|
||||
|
||||
|
||||
def analyze_folder(folder_path: str) -> List[Dict]:
|
||||
"""
|
||||
Analyze all display.json files in a folder
|
||||
|
||||
Args:
|
||||
folder_path: Path to the folder containing display.json files
|
||||
|
||||
Returns:
|
||||
List of analysis results for each file
|
||||
"""
|
||||
results = []
|
||||
|
||||
# Find all display.json files recursively
|
||||
pattern = os.path.join(folder_path, "**", "display.json")
|
||||
display_files = glob.glob(pattern, recursive=True)
|
||||
|
||||
if not display_files:
|
||||
print(f"No display.json files found in {folder_path}")
|
||||
return results
|
||||
|
||||
print(f"Found {len(display_files)} display.json files")
|
||||
|
||||
for file_path in display_files:
|
||||
print(f"Analyzing: {file_path}")
|
||||
result = analyze_display_json(file_path)
|
||||
if result:
|
||||
result['file_path'] = file_path
|
||||
results.append(result)
|
||||
|
||||
return results
|
||||
|
||||
|
||||
def aggregate_results(results: List[Dict]) -> Dict:
|
||||
"""
|
||||
Aggregate results from multiple files
|
||||
|
||||
Args:
|
||||
results: List of analysis results
|
||||
|
||||
Returns:
|
||||
Aggregated statistics
|
||||
"""
|
||||
if not results:
|
||||
return {}
|
||||
|
||||
total_fast_actions = sum(r['action_count'] for r in results)
|
||||
total_duration = max(r['total_duration'] for r in results) if results else 0
|
||||
total_input_tokens = sum(r['total_input_tokens'] for r in results)
|
||||
total_output_tokens = sum(r['total_output_tokens'] for r in results)
|
||||
total_tokens = sum(r['total_tokens'] for r in results)
|
||||
total_cost = sum(r['total_cost'] for r in results)
|
||||
|
||||
# Use the currency symbol from the first result, or default to ¥
|
||||
currency_symbol = results[0].get('currency_symbol', '¥') if results else '¥'
|
||||
|
||||
return {
|
||||
'total_fast_actions': total_fast_actions,
|
||||
'total_duration': total_duration,
|
||||
'total_input_tokens': total_input_tokens,
|
||||
'total_output_tokens': total_output_tokens,
|
||||
'total_tokens': total_tokens,
|
||||
'total_cost': total_cost,
|
||||
'currency_symbol': currency_symbol
|
||||
}
|
||||
|
||||
|
||||
def format_output_line(stats: Dict) -> str:
|
||||
"""
|
||||
Format statistics into a single output line
|
||||
|
||||
Args:
|
||||
stats: Statistics dictionary
|
||||
|
||||
Returns:
|
||||
Formatted output line
|
||||
"""
|
||||
if not stats:
|
||||
return "No data available"
|
||||
|
||||
# Format: steps, duration (seconds), tokens, cost
|
||||
steps = stats.get('action_count', 0)
|
||||
duration = stats.get('total_duration', 0)
|
||||
tokens = (stats.get('total_input_tokens', 0),stats.get('total_output_tokens', 0),stats.get('total_tokens', 0))
|
||||
cost = stats.get('total_cost', 0.0)
|
||||
|
||||
return f"{steps}, {duration}, {tokens}, {cost:.4f}{stats.get('currency_symbol', '¥')}"
|
||||
|
||||
|
||||
def main():
|
||||
"""
|
||||
Main function to analyze display.json files
|
||||
"""
|
||||
import sys
|
||||
|
||||
if len(sys.argv) < 2:
|
||||
print("Usage: python analyze_display.py <folder_path>")
|
||||
print("Example: python analyze_display.py lybicguiagents/runtime")
|
||||
return
|
||||
|
||||
folder_path = sys.argv[1]
|
||||
|
||||
if not os.path.exists(folder_path):
|
||||
print(f"Folder not found: {folder_path}")
|
||||
return
|
||||
|
||||
# Analyze all display.json files in the folder
|
||||
results = analyze_folder(folder_path)
|
||||
|
||||
if not results:
|
||||
print("No valid display.json files found")
|
||||
return
|
||||
|
||||
# Aggregate results
|
||||
aggregated_stats = aggregate_results(results)
|
||||
|
||||
# Print the required single line output
|
||||
print("\nStatistics:")
|
||||
print("-" * 80)
|
||||
print("Steps, Duration (seconds), (Input Tokens, Output Tokens, Total Tokens), Cost")
|
||||
print("-" * 80)
|
||||
output_line = format_output_line(aggregated_stats)
|
||||
print(output_line)
|
||||
print("-" * 80)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
577
mm_agents/maestro/utils/common_utils.py
Normal file
577
mm_agents/maestro/utils/common_utils.py
Normal file
@@ -0,0 +1,577 @@
|
||||
import json
|
||||
import re
|
||||
from typing import List
|
||||
import time
|
||||
import tiktoken
|
||||
import numpy as np
|
||||
import os
|
||||
import platform
|
||||
import io
|
||||
from PIL import Image
|
||||
import logging
|
||||
|
||||
from typing import Tuple, List, Union, Dict, Optional
|
||||
|
||||
from pydantic import BaseModel, ValidationError
|
||||
|
||||
import pickle
|
||||
|
||||
|
||||
class Node(BaseModel):
|
||||
name: str
|
||||
info: str
|
||||
# New fields for failed task analysis
|
||||
assignee_role: Optional[str] = None
|
||||
error_type: Optional[str] = None # Error type: UI_ERROR, EXECUTION_ERROR, PLANNING_ERROR, etc.
|
||||
error_message: Optional[str] = None # Specific error message
|
||||
failure_count: Optional[int] = 0 # Failure count
|
||||
last_failure_time: Optional[str] = None # Last failure time
|
||||
suggested_action: Optional[str] = None # Suggested repair action
|
||||
|
||||
|
||||
class Dag(BaseModel):
|
||||
nodes: List[Node]
|
||||
edges: List[List[Node]]
|
||||
|
||||
class SafeLoggingFilter(logging.Filter):
|
||||
"""
|
||||
Safe logging filter that prevents logging format errors
|
||||
Handles cases where log message format strings don't match arguments
|
||||
"""
|
||||
|
||||
def filter(self, record):
|
||||
"""
|
||||
Filter log records to prevent format errors
|
||||
"""
|
||||
try:
|
||||
# Try to format the message to catch format errors early
|
||||
if hasattr(record, 'msg') and hasattr(record, 'args') and record.args:
|
||||
try:
|
||||
# Test if the message can be formatted with the provided args
|
||||
if isinstance(record.msg, str) and '%s' in record.msg:
|
||||
# Count %s placeholders in the message
|
||||
placeholder_count = record.msg.count('%s')
|
||||
args_count = len(record.args)
|
||||
|
||||
if placeholder_count != args_count:
|
||||
# Mismatch detected, create safe message
|
||||
record.msg = f"[Format mismatch prevented] Msg: {record.msg[:100]}{'...' if len(str(record.msg)) > 100 else ''}, Args count: {args_count}"
|
||||
record.args = ()
|
||||
return True
|
||||
|
||||
# Test if the message can be formatted with the provided args
|
||||
_ = record.msg % record.args
|
||||
except (TypeError, ValueError) as e:
|
||||
# If formatting fails, create a safe message
|
||||
record.msg = f"[Logging format error prevented] Original message: {str(record.msg)[:100]}{'...' if len(str(record.msg)) > 100 else ''}, Args: {record.args}"
|
||||
record.args = ()
|
||||
return True
|
||||
except Exception as e:
|
||||
# If anything goes wrong, allow the record through but with a safe message
|
||||
record.msg = f"[Logging filter error: {e}] Original message could not be processed safely"
|
||||
record.args = ()
|
||||
return True
|
||||
|
||||
class ImageDataFilter(logging.Filter):
|
||||
"""
|
||||
Custom log filter for filtering log records containing image binary data
|
||||
Specifically designed to filter image data in multimodal model API calls
|
||||
"""
|
||||
|
||||
# Image data characteristic identifiers
|
||||
IMAGE_INDICATORS = [
|
||||
'data:image', # data URL format
|
||||
'iVBORw0KGgo', # PNG base64 beginning
|
||||
'/9j/', # JPEG base64 beginning
|
||||
'R0lGOD', # GIF base64 beginning
|
||||
'UklGR', # WEBP base64 beginning
|
||||
'Qk0', # BMP base64 beginning
|
||||
]
|
||||
|
||||
# Binary file headers
|
||||
BINARY_HEADERS = [
|
||||
b'\xff\xd8\xff', # JPEG file header
|
||||
b'\x89PNG\r\n\x1a\n', # PNG file header
|
||||
b'GIF87a', # GIF87a file header
|
||||
b'GIF89a', # GIF89a file header
|
||||
b'RIFF', # WEBP/WAV file header
|
||||
b'BM', # BMP file header
|
||||
]
|
||||
|
||||
def filter(self, record):
|
||||
"""
|
||||
Filter image data from log records
|
||||
"""
|
||||
try:
|
||||
# Process log message
|
||||
if hasattr(record, 'msg') and record.msg:
|
||||
record.msg = self._filter_message(record.msg)
|
||||
|
||||
# Process log arguments
|
||||
if hasattr(record, 'args') and record.args:
|
||||
record.args = self._filter_args(record.args)
|
||||
|
||||
except Exception as e:
|
||||
# If filtering process fails, log error but don't block log output
|
||||
record.msg = f"[Log filter error: {e}] Original message may contain image data"
|
||||
record.args = ()
|
||||
|
||||
return True
|
||||
|
||||
def _filter_message(self, msg):
|
||||
"""
|
||||
Filter image data from messages
|
||||
"""
|
||||
msg_str = str(msg)
|
||||
|
||||
# If message is very long, it may contain image data
|
||||
if len(msg_str) > 5000: # Lower threshold to 5KB
|
||||
# Check if contains image data characteristics
|
||||
if self._contains_image_data(msg_str):
|
||||
return f"[LLM Call Log] Contains image data (size: {len(msg_str)} characters) - filtered"
|
||||
|
||||
# Check if contains binary data characteristics
|
||||
if self._contains_binary_data(msg_str):
|
||||
return f"[LLM Call Log] Contains binary data (size: {len(msg_str)} characters) - filtered"
|
||||
|
||||
return msg
|
||||
|
||||
def _filter_args(self, args):
|
||||
"""
|
||||
Filter image data from arguments
|
||||
"""
|
||||
filtered_args = []
|
||||
|
||||
for arg in args:
|
||||
if isinstance(arg, (bytes, bytearray)):
|
||||
# Process binary data
|
||||
if len(arg) > 1000: # Binary data larger than 1KB
|
||||
if self._is_image_binary(arg):
|
||||
filtered_args.append(f"[Image binary data filtered, size: {len(arg)} bytes]")
|
||||
else:
|
||||
filtered_args.append(f"[Binary data filtered, size: {len(arg)} bytes]")
|
||||
else:
|
||||
filtered_args.append(arg)
|
||||
|
||||
elif isinstance(arg, str):
|
||||
# Process string data
|
||||
if len(arg) > 5000: # Strings larger than 5KB
|
||||
if self._contains_image_data(arg):
|
||||
filtered_args.append(f"[Image string data filtered, size: {len(arg)} characters]")
|
||||
else:
|
||||
filtered_args.append(arg)
|
||||
else:
|
||||
filtered_args.append(arg)
|
||||
|
||||
else:
|
||||
# Keep other data types directly
|
||||
filtered_args.append(arg)
|
||||
|
||||
return tuple(filtered_args)
|
||||
|
||||
def _contains_image_data(self, text):
|
||||
"""
|
||||
Check if text contains image data
|
||||
"""
|
||||
text_lower = text.lower()
|
||||
return any(indicator in text_lower for indicator in self.IMAGE_INDICATORS)
|
||||
|
||||
def _contains_binary_data(self, text):
|
||||
"""
|
||||
Check if text contains large amounts of binary data
|
||||
"""
|
||||
# Check if contains large amounts of non-ASCII characters (possibly base64-encoded binary data)
|
||||
non_ascii_count = sum(1 for char in text if ord(char) > 127)
|
||||
non_ascii_ratio = non_ascii_count / len(text) if len(text) > 0 else 0
|
||||
|
||||
# If non-ASCII character ratio exceeds 10%, it might be binary data
|
||||
return non_ascii_ratio > 0.1
|
||||
|
||||
def _is_image_binary(self, data):
|
||||
"""
|
||||
Check if binary data is an image
|
||||
"""
|
||||
if len(data) < 10:
|
||||
return False
|
||||
|
||||
# Check file headers
|
||||
for header in self.BINARY_HEADERS:
|
||||
if data.startswith(header):
|
||||
return True
|
||||
|
||||
return False
|
||||
|
||||
NUM_IMAGE_TOKEN = 1105 # Value set of screen of size 1920x1080 for openai vision
|
||||
|
||||
def calculate_tokens(messages, num_image_token=NUM_IMAGE_TOKEN) -> Tuple[int, int]:
|
||||
|
||||
num_input_images = 0
|
||||
output_message = messages[-1]
|
||||
|
||||
input_message = messages[:-1]
|
||||
|
||||
input_string = """"""
|
||||
for message in input_message:
|
||||
input_string += message["content"][0]["text"] + "\n"
|
||||
if len(message["content"]) > 1:
|
||||
num_input_images += 1
|
||||
|
||||
input_text_tokens = get_input_token_length(input_string)
|
||||
|
||||
input_image_tokens = num_image_token * num_input_images
|
||||
|
||||
output_tokens = get_input_token_length(output_message["content"][0]["text"])
|
||||
|
||||
return (input_text_tokens + input_image_tokens), output_tokens
|
||||
|
||||
def parse_dag(text):
|
||||
"""
|
||||
Try extracting JSON from <json>…</json> tags first;
|
||||
if not found, try ```json … ``` Markdown fences.
|
||||
If both fail, try to parse the entire text as JSON.
|
||||
"""
|
||||
logger = logging.getLogger("desktopenv.agent")
|
||||
|
||||
def _extract(pattern):
|
||||
m = re.search(pattern, text, re.DOTALL)
|
||||
return m.group(1).strip() if m else None
|
||||
|
||||
# 1) look for <json>…</json>
|
||||
json_str = _extract(r"<json>(.*?)</json>")
|
||||
# 2) fallback to ```json … ```
|
||||
if json_str is None:
|
||||
json_str = _extract(r"```json\s*(.*?)\s*```")
|
||||
if json_str is None:
|
||||
# 3) try other possible code block formats
|
||||
json_str = _extract(r"```\s*(.*?)\s*```")
|
||||
|
||||
# 4) if still not found, try to parse the entire text
|
||||
if json_str is None:
|
||||
logger.warning("JSON markers not found, attempting to parse entire text")
|
||||
json_str = text.strip()
|
||||
|
||||
# Log the extracted JSON string
|
||||
logger.debug(f"Extracted JSON string: {json_str[:100]}...")
|
||||
|
||||
try:
|
||||
# Try to parse as JSON directly
|
||||
payload = json.loads(json_str)
|
||||
except json.JSONDecodeError as e:
|
||||
logger.error(f"JSON parsing error: {e}")
|
||||
|
||||
# Try to fix common JSON format issues
|
||||
try:
|
||||
# Replace single quotes with double quotes
|
||||
fixed_json = json_str.replace("'", "\"")
|
||||
payload = json.loads(fixed_json)
|
||||
logger.info("Successfully fixed JSON by replacing single quotes with double quotes")
|
||||
except json.JSONDecodeError:
|
||||
# Try to find and extract possible JSON objects
|
||||
try:
|
||||
# Look for content between { and }
|
||||
match = re.search(r"\{(.*)\}", json_str, re.DOTALL)
|
||||
if match:
|
||||
fixed_json = "{" + match.group(1) + "}"
|
||||
payload = json.loads(fixed_json)
|
||||
logger.info("Successfully fixed JSON by extracting JSON object")
|
||||
else:
|
||||
logger.error("Unable to fix JSON format")
|
||||
return None
|
||||
except Exception:
|
||||
logger.error("All JSON fixing attempts failed")
|
||||
return None
|
||||
|
||||
# Check if payload contains dag key
|
||||
if "dag" not in payload:
|
||||
logger.warning("'dag' key not found in JSON, attempting to use entire JSON object")
|
||||
# If no dag key, try to use the entire payload
|
||||
try:
|
||||
# Check if payload directly conforms to Dag structure
|
||||
if "nodes" in payload and "edges" in payload:
|
||||
return Dag(**payload)
|
||||
else:
|
||||
# Iterate through top-level keys to find possible dag structure
|
||||
for key, value in payload.items():
|
||||
if isinstance(value, dict) and "nodes" in value and "edges" in value:
|
||||
logger.info(f"Found DAG structure in key '{key}'")
|
||||
return Dag(**value)
|
||||
|
||||
logger.error("Could not find valid DAG structure in JSON")
|
||||
return None
|
||||
except ValidationError as e:
|
||||
logger.error(f"Data structure validation error: {e}")
|
||||
return None
|
||||
|
||||
# Normal case, use value of dag key
|
||||
try:
|
||||
return Dag(**payload["dag"])
|
||||
except ValidationError as e:
|
||||
logger.error(f"DAG data structure validation error: {e}")
|
||||
return None
|
||||
except Exception as e:
|
||||
logger.error(f"Unknown error parsing DAG: {e}")
|
||||
return None
|
||||
|
||||
|
||||
def parse_single_code_from_string(input_string):
|
||||
input_string = input_string.strip()
|
||||
if input_string.strip() in ["WAIT", "DONE", "FAIL"]:
|
||||
return input_string.strip()
|
||||
|
||||
pattern = r"```(?:\w+\s+)?(.*?)```"
|
||||
matches = re.findall(pattern, input_string, re.DOTALL)
|
||||
codes = []
|
||||
for match in matches:
|
||||
match = match.strip()
|
||||
commands = ["WAIT", "DONE", "FAIL"]
|
||||
if match in commands:
|
||||
codes.append(match.strip())
|
||||
elif match.split("\n")[-1] in commands:
|
||||
if len(match.split("\n")) > 1:
|
||||
codes.append("\n".join(match.split("\n")[:-1]))
|
||||
codes.append(match.split("\n")[-1])
|
||||
else:
|
||||
codes.append(match)
|
||||
if len(codes) > 0:
|
||||
return codes[0]
|
||||
# The pattern matches function calls with balanced parentheses and quotes
|
||||
code_match = re.search(r"(\w+\.\w+\((?:[^()]*|\([^()]*\))*\))", input_string)
|
||||
if code_match:
|
||||
return code_match.group(1)
|
||||
lines = [line.strip() for line in input_string.splitlines() if line.strip()]
|
||||
if lines:
|
||||
return lines[0]
|
||||
return "fail"
|
||||
|
||||
|
||||
def get_input_token_length(input_string):
|
||||
enc = tiktoken.encoding_for_model("gpt-4")
|
||||
tokens = enc.encode(input_string)
|
||||
return len(tokens)
|
||||
|
||||
def parse_screenshot_analysis(action_plan: str) -> str:
|
||||
"""Parse the Screenshot Analysis section from the LLM response.
|
||||
|
||||
Args:
|
||||
action_plan: The raw LLM response text
|
||||
|
||||
Returns:
|
||||
The screenshot analysis text, or empty string if not found
|
||||
"""
|
||||
try:
|
||||
# Look for Screenshot Analysis section
|
||||
if "(Screenshot Analysis)" in action_plan:
|
||||
# Find the start of Screenshot Analysis section
|
||||
start_idx = action_plan.find("(Screenshot Analysis)")
|
||||
# Find the next section marker
|
||||
next_sections = ["(Next Action)", "(Grounded Action)", "(Previous action verification)"]
|
||||
end_idx = len(action_plan)
|
||||
for section in next_sections:
|
||||
section_idx = action_plan.find(section, start_idx + 1)
|
||||
if section_idx != -1 and section_idx < end_idx:
|
||||
end_idx = section_idx
|
||||
|
||||
# Extract the content between markers
|
||||
analysis_start = start_idx + len("(Screenshot Analysis)")
|
||||
analysis_text = action_plan[analysis_start:end_idx].strip()
|
||||
return analysis_text
|
||||
return ""
|
||||
except Exception as e:
|
||||
return ""
|
||||
|
||||
def parse_technician_screenshot_analysis(command_plan: str) -> str:
|
||||
"""Parse the Screenshot Analysis section from the technician LLM response.
|
||||
|
||||
Args:
|
||||
command_plan: The raw LLM response text
|
||||
|
||||
Returns:
|
||||
The screenshot analysis text, or empty string if not found
|
||||
"""
|
||||
try:
|
||||
# Look for Screenshot Analysis section
|
||||
if "(Screenshot Analysis)" in command_plan:
|
||||
# Find the start of Screenshot Analysis section
|
||||
start_idx = command_plan.find("(Screenshot Analysis)")
|
||||
# Find the next section marker
|
||||
next_sections = ["(Next Action)"]
|
||||
end_idx = len(command_plan)
|
||||
for section in next_sections:
|
||||
section_idx = command_plan.find(section, start_idx + 1)
|
||||
if section_idx != -1 and section_idx < end_idx:
|
||||
end_idx = section_idx
|
||||
|
||||
# Extract the content between markers
|
||||
analysis_start = start_idx + len("(Screenshot Analysis)")
|
||||
analysis_text = command_plan[analysis_start:end_idx].strip()
|
||||
return analysis_text
|
||||
return ""
|
||||
except Exception as e:
|
||||
return ""
|
||||
|
||||
def sanitize_code(code):
|
||||
# This pattern captures the outermost double-quoted text
|
||||
if "\n" in code:
|
||||
pattern = r'(".*?")'
|
||||
# Find all matches in the text
|
||||
matches = re.findall(pattern, code, flags=re.DOTALL)
|
||||
if matches:
|
||||
# Replace the first occurrence only
|
||||
first_match = matches[0]
|
||||
code = code.replace(first_match, f'"""{first_match[1:-1]}"""', 1)
|
||||
return code
|
||||
|
||||
|
||||
def extract_first_agent_function(code_string):
|
||||
# Regular expression pattern to match 'agent' functions with any arguments, including nested parentheses
|
||||
pattern = r'agent\.[a-zA-Z_]+\((?:[^()\'"]|\'[^\']*\'|"[^"]*")*\)'
|
||||
|
||||
# Find all matches in the string
|
||||
matches = re.findall(pattern, code_string)
|
||||
|
||||
# Return the first match if found, otherwise return None
|
||||
return matches[0] if matches else None
|
||||
|
||||
|
||||
def load_knowledge_base(kb_path: str) -> Dict:
|
||||
try:
|
||||
with open(kb_path, "r") as f:
|
||||
return json.load(f)
|
||||
except Exception as e:
|
||||
print(f"Error loading knowledge base: {e}")
|
||||
return {}
|
||||
|
||||
|
||||
def clean_empty_embeddings(embeddings: Dict) -> Dict:
|
||||
to_delete = []
|
||||
for k, v in embeddings.items():
|
||||
arr = np.array(v)
|
||||
if arr.size == 0 or arr.shape == () or (
|
||||
isinstance(v, list) and v and isinstance(v[0], str) and v[0].startswith('Error:')
|
||||
) or (isinstance(v, str) and v.startswith('Error:')):
|
||||
to_delete.append(k)
|
||||
for k in to_delete:
|
||||
del embeddings[k]
|
||||
return embeddings
|
||||
|
||||
|
||||
def load_embeddings(embeddings_path: str) -> Dict:
|
||||
try:
|
||||
with open(embeddings_path, "rb") as f:
|
||||
embeddings = pickle.load(f)
|
||||
embeddings = clean_empty_embeddings(embeddings)
|
||||
return embeddings
|
||||
except Exception as e:
|
||||
# print(f"Error loading embeddings: {e}")
|
||||
print(f"Empty embeddings file: {embeddings_path}")
|
||||
return {}
|
||||
|
||||
|
||||
def save_embeddings(embeddings_path: str, embeddings: Dict):
|
||||
try:
|
||||
import os
|
||||
os.makedirs(os.path.dirname(embeddings_path), exist_ok=True)
|
||||
with open(embeddings_path, "wb") as f:
|
||||
pickle.dump(embeddings, f)
|
||||
except Exception as e:
|
||||
print(f"Error saving embeddings: {e}")
|
||||
|
||||
def agent_log_to_string(agent_log: List[Dict]) -> str:
|
||||
"""
|
||||
Converts a list of agent log entries into a single string for LLM consumption.
|
||||
|
||||
Args:
|
||||
agent_log: A list of dictionaries, where each dictionary is an agent log entry.
|
||||
|
||||
Returns:
|
||||
A formatted string representing the agent log.
|
||||
"""
|
||||
if not agent_log:
|
||||
return "No agent log entries yet."
|
||||
|
||||
log_strings = ["[AGENT LOG]"]
|
||||
for entry in agent_log:
|
||||
entry_id = entry.get("id", "N/A")
|
||||
entry_type = entry.get("type", "N/A").capitalize()
|
||||
content = entry.get("content", "")
|
||||
log_strings.append(f"[Entry {entry_id} - {entry_type}] {content}")
|
||||
|
||||
return "\n".join(log_strings)
|
||||
|
||||
|
||||
def show_task_completion_notification(task_status: str, error_message: str = ""):
|
||||
"""
|
||||
Show a popup notification for task completion status.
|
||||
|
||||
Args:
|
||||
task_status: Task status, supports 'success', 'failed', 'completed', 'error'
|
||||
error_message: Error message (used only when status is 'error')
|
||||
"""
|
||||
try:
|
||||
current_platform = platform.system()
|
||||
|
||||
if task_status == "success":
|
||||
title = "Maestro"
|
||||
message = "Task Completed Successfully"
|
||||
dialog_type = "info"
|
||||
elif task_status == "failed":
|
||||
title = "Maestro"
|
||||
message = "Task Failed/Rejected"
|
||||
dialog_type = "error"
|
||||
elif task_status == "completed":
|
||||
title = "Maestro"
|
||||
message = "Task Execution Completed"
|
||||
dialog_type = "info"
|
||||
elif task_status == "error":
|
||||
title = "Maestro Error"
|
||||
message = f"Task Execution Error: {error_message[:100] if error_message else 'Unknown error'}"
|
||||
dialog_type = "error"
|
||||
else:
|
||||
title = "Maestro"
|
||||
message = "Task Execution Completed"
|
||||
dialog_type = "info"
|
||||
|
||||
if current_platform == "Darwin":
|
||||
# macOS
|
||||
os.system(
|
||||
f'osascript -e \'display dialog "{message}" with title "{title}" buttons "OK" default button "OK"\''
|
||||
)
|
||||
elif current_platform == "Linux":
|
||||
# Linux
|
||||
if dialog_type == "error":
|
||||
os.system(
|
||||
f'zenity --error --title="{title}" --text="{message}" --width=300 --height=150'
|
||||
)
|
||||
else:
|
||||
os.system(
|
||||
f'zenity --info --title="{title}" --text="{message}" --width=200 --height=100'
|
||||
)
|
||||
elif current_platform == "Windows":
|
||||
# Windows
|
||||
os.system(
|
||||
f'msg %username% "{message}"'
|
||||
)
|
||||
else:
|
||||
print(f"\n[{title}] {message}")
|
||||
|
||||
except Exception as e:
|
||||
print(f"\n[Agents3] Failed to show notification: {e}")
|
||||
print(f"[Agents3] {message}")
|
||||
|
||||
def screenshot_bytes_to_pil_image(screenshot_bytes: bytes) -> Optional[Image.Image]:
|
||||
"""
|
||||
Convert the bytes data of obs["screenshot"] to a PIL Image object, preserving the original size
|
||||
|
||||
Args:
|
||||
screenshot_bytes: The bytes data of the screenshot
|
||||
|
||||
Returns:
|
||||
PIL Image object, or None if conversion fails
|
||||
"""
|
||||
try:
|
||||
# Create PIL Image object directly from bytes
|
||||
image = Image.open(io.BytesIO(screenshot_bytes))
|
||||
return image
|
||||
except Exception as e:
|
||||
raise RuntimeError(f"Failed to convert screenshot bytes to PIL Image: {e}")
|
||||
|
||||
281
mm_agents/maestro/utils/display_viewer.py
Normal file
281
mm_agents/maestro/utils/display_viewer.py
Normal file
@@ -0,0 +1,281 @@
|
||||
#!/usr/bin/env python
|
||||
"""
|
||||
Display Viewer - Used to display operation records in display.json file in chronological order
|
||||
|
||||
Usage:
|
||||
python -m lybicguiagents.gui_agents.utils.display_viewer --file /path/to/display.json [--output text|json] [--filter module1,module2]
|
||||
"""
|
||||
|
||||
import os
|
||||
import sys
|
||||
import json
|
||||
import argparse
|
||||
import datetime
|
||||
from pathlib import Path
|
||||
from typing import Dict, List, Any, Optional, Tuple
|
||||
|
||||
|
||||
def load_display_json(file_path: str) -> Dict:
|
||||
"""
|
||||
Load display.json file
|
||||
|
||||
Args:
|
||||
file_path: Path to display.json file
|
||||
|
||||
Returns:
|
||||
Parsed JSON data
|
||||
"""
|
||||
try:
|
||||
try:
|
||||
with open(file_path, 'r', encoding='utf-8') as f:
|
||||
return json.load(f)
|
||||
except UnicodeDecodeError:
|
||||
print(
|
||||
f"Warning: Failed to decode '{file_path}' with utf-8, retrying with GB2312..."
|
||||
)
|
||||
with open(file_path, 'r', encoding='gb2312') as f:
|
||||
return json.load(f)
|
||||
except FileNotFoundError:
|
||||
print(f"Error: File '{file_path}' does not exist")
|
||||
sys.exit(1)
|
||||
except json.JSONDecodeError:
|
||||
print(f"Error: File '{file_path}' is not a valid JSON format")
|
||||
sys.exit(1)
|
||||
except Exception as e:
|
||||
print(f"Error: An error occurred while reading file '{file_path}': {e}")
|
||||
sys.exit(1)
|
||||
|
||||
|
||||
def flatten_operations(data: Dict) -> List[Dict]:
|
||||
"""
|
||||
Flatten all module operation records into a time-sorted list
|
||||
|
||||
Args:
|
||||
data: display.json data
|
||||
|
||||
Returns:
|
||||
List of operation records sorted by time
|
||||
"""
|
||||
all_operations = []
|
||||
|
||||
if "operations" not in data:
|
||||
return all_operations
|
||||
|
||||
for module, operations in data["operations"].items():
|
||||
for op in operations:
|
||||
# Add module information
|
||||
op["module"] = module
|
||||
all_operations.append(op)
|
||||
|
||||
# Sort by timestamp
|
||||
all_operations.sort(key=lambda x: x.get("timestamp", 0))
|
||||
|
||||
return all_operations
|
||||
|
||||
|
||||
def format_timestamp(timestamp: float) -> str:
|
||||
"""
|
||||
Format timestamp into readable datetime
|
||||
|
||||
Args:
|
||||
timestamp: UNIX timestamp
|
||||
|
||||
Returns:
|
||||
Formatted datetime string
|
||||
"""
|
||||
dt = datetime.datetime.fromtimestamp(timestamp)
|
||||
return dt.strftime("%Y-%m-%d %H:%M:%S.%f")[:-3]
|
||||
|
||||
|
||||
def format_duration(duration: float) -> str:
|
||||
"""
|
||||
Format duration
|
||||
|
||||
Args:
|
||||
duration: Duration (seconds)
|
||||
|
||||
Returns:
|
||||
Formatted duration string
|
||||
"""
|
||||
if duration < 0.001:
|
||||
return f"{duration * 1000000:.2f}μs"
|
||||
elif duration < 1:
|
||||
return f"{duration * 1000:.2f}ms"
|
||||
else:
|
||||
return f"{duration:.2f}s"
|
||||
|
||||
|
||||
def format_tokens(tokens: List[int]) -> str:
|
||||
"""
|
||||
Format tokens information
|
||||
|
||||
Args:
|
||||
tokens: [input tokens, output tokens, total tokens]
|
||||
|
||||
Returns:
|
||||
Formatted tokens string
|
||||
"""
|
||||
if not tokens or len(tokens) < 3:
|
||||
return "N/A"
|
||||
|
||||
return f"in:{tokens[0]} out:{tokens[1]} total:{tokens[2]}"
|
||||
|
||||
|
||||
def truncate_text(text: str, max_length: int = 100) -> str:
|
||||
"""
|
||||
Truncate text, add ellipsis when exceeding maximum length
|
||||
|
||||
Args:
|
||||
text: Original text
|
||||
max_length: Maximum length
|
||||
|
||||
Returns:
|
||||
Truncated text
|
||||
"""
|
||||
if not text:
|
||||
return ""
|
||||
|
||||
if isinstance(text, (dict, list)):
|
||||
text = str(text)
|
||||
|
||||
if len(text) <= max_length:
|
||||
return text
|
||||
|
||||
return text[:max_length - 3] + "..."
|
||||
|
||||
|
||||
def find_latest_display_json() -> Optional[str]:
|
||||
"""
|
||||
Find the latest display.json file
|
||||
|
||||
Returns:
|
||||
Path to the latest display.json file, or None if not found
|
||||
"""
|
||||
# Look for the runtime folder in the current directory
|
||||
runtime_dir = Path("runtime")
|
||||
if not runtime_dir.exists() or not runtime_dir.is_dir():
|
||||
# Try looking in the parent directory
|
||||
parent_runtime = Path("..") / "runtime"
|
||||
if parent_runtime.exists() and parent_runtime.is_dir():
|
||||
runtime_dir = parent_runtime
|
||||
else:
|
||||
return None
|
||||
|
||||
# Find all timestamp folders
|
||||
timestamp_dirs = [d for d in runtime_dir.iterdir() if d.is_dir()]
|
||||
if not timestamp_dirs:
|
||||
return None
|
||||
|
||||
# Sort by folder name (timestamp) and take the latest
|
||||
latest_dir = sorted(timestamp_dirs)[-1]
|
||||
display_file = latest_dir / "display.json"
|
||||
|
||||
if display_file.exists():
|
||||
return str(display_file)
|
||||
|
||||
return None
|
||||
|
||||
|
||||
def main():
|
||||
parser = argparse.ArgumentParser(
|
||||
description=
|
||||
"Display operation records in display.json file in chronological order")
|
||||
parser.add_argument("--file", help="Path to display.json file")
|
||||
parser.add_argument("--dir", help="Path to directory containing display.json files (recursive)")
|
||||
parser.add_argument("--output",
|
||||
choices=["text", "json"],
|
||||
default="text",
|
||||
help="Output format (default: text)")
|
||||
parser.add_argument(
|
||||
"--filter",
|
||||
help="Modules to filter, separated by commas (e.g., manager,worker)")
|
||||
|
||||
args = parser.parse_args()
|
||||
|
||||
if args.file and args.dir:
|
||||
print("Error: --file and --dir cannot be used together")
|
||||
sys.exit(1)
|
||||
|
||||
def process_one_file(file_path: str):
|
||||
# Load data
|
||||
data = load_display_json(file_path)
|
||||
# Flatten and sort operations
|
||||
operations = flatten_operations(data)
|
||||
# Handle module filtering
|
||||
filter_modules = None
|
||||
if args.filter:
|
||||
filter_modules = [module.strip() for module in args.filter.split(",")]
|
||||
# Generate output content
|
||||
output_content = ""
|
||||
if args.output == "json":
|
||||
# Filter operations if modules are specified
|
||||
if filter_modules:
|
||||
filtered_ops = [op for op in operations if op["module"] in filter_modules]
|
||||
else:
|
||||
filtered_ops = operations
|
||||
output_content = json.dumps(filtered_ops, indent=2, ensure_ascii=False)
|
||||
else:
|
||||
# Generate text format output
|
||||
output_lines = []
|
||||
for i, op in enumerate(operations):
|
||||
# Skip modules that don't match the filter if a filter is specified
|
||||
if filter_modules and op["module"] not in filter_modules:
|
||||
continue
|
||||
module = op["module"]
|
||||
operation = op.get("operation", "unknown")
|
||||
timestamp = format_timestamp(op.get("timestamp", 0))
|
||||
# Output basic information
|
||||
output_lines.append(f"{i+1:3d} | {timestamp} | {module:10} | {operation}")
|
||||
# Output detailed information
|
||||
if "duration" in op:
|
||||
output_lines.append(f" └─ Duration: {format_duration(op['duration'])}")
|
||||
if "tokens" in op:
|
||||
output_lines.append(f" └─ Tokens: {format_tokens(op['tokens'])}")
|
||||
if "cost" in op:
|
||||
output_lines.append(f" └─ Cost: {op['cost']}")
|
||||
if "content" in op:
|
||||
content = op["content"]
|
||||
output_lines.append(f" └─ Content: {content}")
|
||||
if "status" in op:
|
||||
output_lines.append(f" └─ Status: {op['status']}")
|
||||
output_lines.append("")
|
||||
output_content = "\n".join(output_lines)
|
||||
# Write output to file
|
||||
input_path = Path(file_path)
|
||||
output_filename = f"display_viewer_output_{args.output}.txt"
|
||||
output_path = input_path.parent / output_filename
|
||||
try:
|
||||
with open(output_path, 'w', encoding='utf-8') as f:
|
||||
f.write(output_content)
|
||||
print(f"Output written to: {output_path}")
|
||||
except Exception as e:
|
||||
print(f"Error writing output file: {e}")
|
||||
sys.exit(1)
|
||||
|
||||
if args.dir:
|
||||
for root, dirs, files in os.walk(args.dir):
|
||||
for file in files:
|
||||
if file == "display.json":
|
||||
file_path = os.path.join(root, file)
|
||||
print(f"Processing: {file_path}")
|
||||
process_one_file(file_path)
|
||||
return
|
||||
|
||||
file_path = args.file
|
||||
if not file_path:
|
||||
file_path = find_latest_display_json()
|
||||
if not file_path:
|
||||
print(
|
||||
"Error: Cannot find display.json file, please specify file path using --file parameter"
|
||||
)
|
||||
sys.exit(1)
|
||||
print(f"Using the latest display.json file: {file_path}")
|
||||
process_one_file(file_path)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
"""
|
||||
python display_viewer.py --file
|
||||
python display_viewer.py --dir
|
||||
"""
|
||||
main()
|
||||
53
mm_agents/maestro/utils/embedding_manager.py
Normal file
53
mm_agents/maestro/utils/embedding_manager.py
Normal file
@@ -0,0 +1,53 @@
|
||||
import numpy as np
|
||||
from ..utils.common_utils import (
|
||||
load_embeddings,
|
||||
save_embeddings,
|
||||
)
|
||||
import os
|
||||
|
||||
# List all embeddings' keys and their shapes
|
||||
def list_embeddings(embeddings_path: str):
|
||||
if not os.path.exists(embeddings_path):
|
||||
print(f"[EmbeddingManager] File not found: {embeddings_path}")
|
||||
return {}
|
||||
embeddings = load_embeddings(embeddings_path)
|
||||
info = {}
|
||||
for k, v in embeddings.items():
|
||||
arr = np.array(v)
|
||||
info[k] = {'shape': arr.shape, 'preview': arr.flatten()[:5].tolist()}
|
||||
return info
|
||||
|
||||
# Delete a specific embedding by key
|
||||
def delete_embedding(embeddings_path: str, key: str) -> bool:
|
||||
if not os.path.exists(embeddings_path):
|
||||
print(f"[EmbeddingManager] File not found: {embeddings_path}")
|
||||
return False
|
||||
embeddings = load_embeddings(embeddings_path)
|
||||
if key not in embeddings:
|
||||
print(f"[EmbeddingManager] Key not found: {key}")
|
||||
return False
|
||||
del embeddings[key]
|
||||
save_embeddings(embeddings_path, embeddings)
|
||||
print(f"[EmbeddingManager] Deleted embedding for key: {key}")
|
||||
return True
|
||||
|
||||
def delete_empty_shape_embeddings(embeddings_path: str) -> int:
|
||||
"""Delete all embeddings whose value is empty (shape==0), shape==(), or content is error string, and return the number deleted."""
|
||||
if not os.path.exists(embeddings_path):
|
||||
print(f"[EmbeddingManager] File not found: {embeddings_path}")
|
||||
return 0
|
||||
embeddings = load_embeddings(embeddings_path)
|
||||
to_delete = []
|
||||
for k, v in embeddings.items():
|
||||
arr = np.array(v)
|
||||
# Delete shape==0 or shape==() or content is string/error information
|
||||
if arr.size == 0 or arr.shape == () or (
|
||||
isinstance(v, list) and v and isinstance(v[0], str) and v[0].startswith('Error:')
|
||||
) or (isinstance(v, str) and v.startswith('Error:')):
|
||||
to_delete.append(k)
|
||||
for k in to_delete:
|
||||
del embeddings[k]
|
||||
print(f"[EmbeddingManager] Deleted empty or error embedding for key: {k}")
|
||||
if to_delete:
|
||||
save_embeddings(embeddings_path, embeddings)
|
||||
return len(to_delete)
|
||||
170
mm_agents/maestro/utils/file_utils.py
Normal file
170
mm_agents/maestro/utils/file_utils.py
Normal file
@@ -0,0 +1,170 @@
|
||||
# file_utils.py
|
||||
import json
|
||||
import os
|
||||
import logging
|
||||
from pathlib import Path
|
||||
from contextlib import contextmanager
|
||||
from typing import Any, Dict, List, Optional
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
# ========= File Lock Tools =========
|
||||
@contextmanager
|
||||
def locked(path: Path, mode: str):
|
||||
"""File lock context manager for cross-platform compatibility"""
|
||||
if os.name == "nt":
|
||||
# Windows implementation
|
||||
import msvcrt
|
||||
import time as _t
|
||||
|
||||
# Always use UTF-8 encoding for text files on Windows
|
||||
if 'b' in mode:
|
||||
f = open(path, mode)
|
||||
else:
|
||||
f = open(path, mode, encoding="utf-8")
|
||||
try:
|
||||
while True:
|
||||
try:
|
||||
msvcrt.locking(f.fileno(), msvcrt.LK_NBLCK, 1)
|
||||
break
|
||||
except OSError:
|
||||
_t.sleep(0.01)
|
||||
yield f
|
||||
finally:
|
||||
f.seek(0)
|
||||
msvcrt.locking(f.fileno(), msvcrt.LK_UNLCK, 1)
|
||||
f.close()
|
||||
else:
|
||||
# Unix-like systems implementation
|
||||
import fcntl
|
||||
|
||||
# Always use UTF-8 encoding for text files on Unix-like systems
|
||||
if 'b' in mode:
|
||||
f = open(path, mode)
|
||||
else:
|
||||
f = open(path, mode, encoding="utf-8")
|
||||
try:
|
||||
fcntl.flock(f.fileno(), fcntl.LOCK_EX)
|
||||
yield f
|
||||
finally:
|
||||
fcntl.flock(f.fileno(), fcntl.LOCK_UN)
|
||||
f.close()
|
||||
|
||||
# ========= Safe JSON Operations =========
|
||||
def safe_json_dump(data: Any, file_handle, **kwargs) -> None:
|
||||
"""Safely dump JSON data with proper encoding handling"""
|
||||
kwargs.setdefault('ensure_ascii', False)
|
||||
kwargs.setdefault('indent', 2)
|
||||
|
||||
try:
|
||||
json.dump(data, file_handle, **kwargs)
|
||||
except UnicodeEncodeError as e:
|
||||
logger.warning(f"UnicodeEncodeError during JSON dump: {e}. Falling back to ASCII mode.")
|
||||
kwargs['ensure_ascii'] = True
|
||||
json.dump(data, file_handle, **kwargs)
|
||||
|
||||
def safe_json_load(file_handle) -> Any:
|
||||
"""Safely load JSON data with proper encoding handling"""
|
||||
try:
|
||||
return json.load(file_handle)
|
||||
except UnicodeDecodeError as e:
|
||||
logger.warning(f"UnicodeDecodeError during JSON load: {e}. Attempting recovery.")
|
||||
file_handle.seek(0)
|
||||
content = file_handle.read()
|
||||
|
||||
# Try common encodings
|
||||
for encoding in ['utf-8-sig', 'latin1', 'cp1252']:
|
||||
try:
|
||||
if isinstance(content, bytes):
|
||||
decoded_content = content.decode(encoding)
|
||||
else:
|
||||
decoded_content = content
|
||||
return json.loads(decoded_content)
|
||||
except (UnicodeDecodeError, json.JSONDecodeError):
|
||||
continue
|
||||
|
||||
logger.error("Failed to decode JSON with all attempted encodings. Returning empty data.")
|
||||
return {}
|
||||
|
||||
def safe_write_json(path: Path, data: Any) -> None:
|
||||
"""Safely write JSON data to file with atomic operation"""
|
||||
tmp = path.with_suffix(".tmp")
|
||||
try:
|
||||
with locked(tmp, "w") as f:
|
||||
safe_json_dump(data, f)
|
||||
f.flush()
|
||||
os.fsync(f.fileno())
|
||||
tmp.replace(path)
|
||||
except Exception as e:
|
||||
logger.error(f"Failed to write JSON to {path}: {e}")
|
||||
if tmp.exists():
|
||||
try:
|
||||
tmp.unlink()
|
||||
except Exception:
|
||||
pass
|
||||
raise
|
||||
|
||||
def safe_read_json(path: Path, default: Any = None) -> Any:
|
||||
"""Safely read JSON data from file"""
|
||||
try:
|
||||
with locked(path, "r") as f:
|
||||
return safe_json_load(f)
|
||||
except Exception as e:
|
||||
logger.warning(f"Failed to read JSON from {path}: {e}")
|
||||
return default if default is not None else []
|
||||
|
||||
# ========= Safe Text File Operations =========
|
||||
def safe_write_text(path: Path, content: str) -> None:
|
||||
"""Safely write text to file with UTF-8 encoding"""
|
||||
try:
|
||||
path.write_text(content, encoding='utf-8')
|
||||
except UnicodeEncodeError as e:
|
||||
logger.warning(f"UnicodeEncodeError writing to {path}: {e}. Using error handling.")
|
||||
path.write_text(content, encoding='utf-8', errors='replace')
|
||||
|
||||
def safe_read_text(path: Path) -> str:
|
||||
"""Safely read text from file with proper encoding handling"""
|
||||
try:
|
||||
return path.read_text(encoding='utf-8')
|
||||
except UnicodeDecodeError as e:
|
||||
logger.warning(f"UnicodeDecodeError reading {path}: {e}. Trying alternative encodings.")
|
||||
for encoding in ['utf-8-sig', 'latin1', 'cp1252', 'gbk']:
|
||||
try:
|
||||
return path.read_text(encoding=encoding)
|
||||
except UnicodeDecodeError:
|
||||
continue
|
||||
|
||||
logger.error(f"Failed to decode {path} with all encodings. Using error replacement.")
|
||||
return path.read_text(encoding='utf-8', errors='replace')
|
||||
|
||||
# ========= File Management Utilities =========
|
||||
def ensure_directory(path: Path) -> None:
|
||||
"""Ensure directory exists, create if necessary"""
|
||||
path.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
def safe_file_operation(operation_name: str, file_path: Path, operation_func, *args, **kwargs):
|
||||
"""Generic safe file operation wrapper with error handling"""
|
||||
try:
|
||||
return operation_func(*args, **kwargs)
|
||||
except FileNotFoundError:
|
||||
logger.error(f"{operation_name}: File not found: {file_path}")
|
||||
raise
|
||||
except PermissionError:
|
||||
logger.error(f"{operation_name}: Permission denied: {file_path}")
|
||||
raise
|
||||
except Exception as e:
|
||||
logger.error(f"{operation_name}: Unexpected error with {file_path}: {e}")
|
||||
raise
|
||||
|
||||
def backup_file(file_path: Path, backup_suffix: str = ".backup") -> Path:
|
||||
"""Create a backup of a file"""
|
||||
backup_path = file_path.with_suffix(file_path.suffix + backup_suffix)
|
||||
try:
|
||||
if file_path.exists():
|
||||
import shutil
|
||||
shutil.copy2(file_path, backup_path)
|
||||
logger.info(f"Backup created: {backup_path}")
|
||||
return backup_path
|
||||
except Exception as e:
|
||||
logger.error(f"Failed to create backup of {file_path}: {e}")
|
||||
raise
|
||||
69
mm_agents/maestro/utils/id_utils.py
Normal file
69
mm_agents/maestro/utils/id_utils.py
Normal file
@@ -0,0 +1,69 @@
|
||||
# id_utils.py
|
||||
import uuid
|
||||
import time
|
||||
import hashlib
|
||||
from typing import Optional
|
||||
|
||||
# Module-level counter for sequential IDs
|
||||
_sequential_counter = 1
|
||||
|
||||
def generate_uuid() -> str:
|
||||
"""Generate a random UUID string"""
|
||||
return str(uuid.uuid4())
|
||||
|
||||
def generate_short_id(prefix: str = "", length: int = 8) -> str:
|
||||
"""Generate a short random ID with optional prefix"""
|
||||
# Generate UUID and take first N characters
|
||||
short_uuid = str(uuid.uuid4()).replace("-", "")[:length]
|
||||
return f"{prefix}{short_uuid}" if prefix else short_uuid
|
||||
|
||||
def generate_timestamp_id(prefix: str = "") -> str:
|
||||
"""Generate ID based on current timestamp"""
|
||||
timestamp = int(time.time() * 1000) # milliseconds
|
||||
return f"{prefix}{timestamp}" if prefix else str(timestamp)
|
||||
|
||||
def generate_hash_id(content: str, prefix: str = "", length: int = 8) -> str:
|
||||
"""Generate ID based on content hash"""
|
||||
hash_obj = hashlib.md5(content.encode('utf-8'))
|
||||
hash_hex = hash_obj.hexdigest()[:length]
|
||||
return f"{prefix}{hash_hex}" if prefix else hash_hex
|
||||
|
||||
def generate_sequential_id(prefix: str = "", start: int = 1) -> str:
|
||||
"""Generate sequential ID (not thread-safe, use with caution)"""
|
||||
global _sequential_counter
|
||||
if start != 1: # Reset counter if different start value
|
||||
_sequential_counter = start
|
||||
|
||||
current_id = _sequential_counter
|
||||
_sequential_counter += 1
|
||||
return f"{prefix}{current_id}" if prefix else str(current_id)
|
||||
|
||||
def generate_composite_id(prefix: str = "", include_timestamp: bool = True,
|
||||
include_uuid: bool = True, separator: str = "_") -> str:
|
||||
"""Generate composite ID with multiple components"""
|
||||
parts = []
|
||||
|
||||
if prefix:
|
||||
parts.append(prefix)
|
||||
|
||||
if include_timestamp:
|
||||
parts.append(str(int(time.time() * 1000)))
|
||||
|
||||
if include_uuid:
|
||||
parts.append(str(uuid.uuid4())[:8])
|
||||
|
||||
return separator.join(parts)
|
||||
|
||||
def validate_id_format(id_string: str, expected_prefix: Optional[str] = None,
|
||||
min_length: int = 1, max_length: int = 100) -> bool:
|
||||
"""Validate ID format and constraints"""
|
||||
if not id_string or not isinstance(id_string, str):
|
||||
return False
|
||||
|
||||
if len(id_string) < min_length or len(id_string) > max_length:
|
||||
return False
|
||||
|
||||
if expected_prefix and not id_string.startswith(expected_prefix):
|
||||
return False
|
||||
|
||||
return True
|
||||
27
mm_agents/maestro/utils/image_axis_utils.py
Normal file
27
mm_agents/maestro/utils/image_axis_utils.py
Normal file
@@ -0,0 +1,27 @@
|
||||
from PIL import Image
|
||||
|
||||
|
||||
def pad_to_square(image: Image.Image,
|
||||
fill_color=(0, 0, 0),
|
||||
padding: int = 0) -> Image.Image:
|
||||
"""
|
||||
First make it a square, then expand the padding pixels around it.
|
||||
"""
|
||||
width, height = image.size
|
||||
if width == height:
|
||||
square_img = image.copy()
|
||||
else:
|
||||
new_size = max(width, height)
|
||||
square_img = Image.new(image.mode, (new_size, new_size), fill_color)
|
||||
left = (new_size - width) // 2
|
||||
top = (new_size - height) // 2
|
||||
square_img.paste(image, (left, top))
|
||||
|
||||
if padding > 0:
|
||||
final_size = square_img.size[0] + 2 * padding
|
||||
padded_img = Image.new(square_img.mode, (final_size, final_size),
|
||||
fill_color)
|
||||
padded_img.paste(square_img, (padding, padding))
|
||||
return padded_img
|
||||
else:
|
||||
return square_img
|
||||
Reference in New Issue
Block a user