Refactor compare_python_pure_text function for improved normalization and error handling. Update JSON example to clarify instruction for extracting Python code from Colab, changing output file names for consistency.

2025-07-03 13:50:21 +00:00
parent bdaf37e0e5
commit cb4bed20a0
2 changed files with 69 additions and 39 deletions
--- a/desktop_env/evaluators/metrics/general.py
+++ b/desktop_env/evaluators/metrics/general.py
@@ -470,37 +470,66 @@ def compare_terminal_and_txt(txt_file_path, terminal_output):

 def compare_python_pure_text(py_file_path, gold_file_path):
    if not py_file_path or not gold_file_path:
-        return 0
+        return 0.0

-    # first, change the suffix of gold_file from .txt to .py
-    print("py_file_path: ")
-    print(py_file_path)
-    print("gold_file_path: ")
-    print(gold_file_path)
+    def _normalize(text):
+        """
+        Minimal normalization - only handle basic formatting:
+        - Skip obvious file metadata (encoding, shebang) at the beginning
+        - Normalize whitespace and indentation
+        - Remove empty lines
+        
+        This preserves any content that shouldn't be there (like markdown)
+        so it can be detected as an error.
+        """
+        lines = text.splitlines()
+        result_lines = []
+        i = 0
+        
+        # Only skip obvious metadata at the very beginning
+        while i < len(lines) and i < 3:  # Check only first 3 lines
+            stripped = lines[i].strip()
+            
+            if (stripped.startswith('#!') or
+                stripped.startswith('# -*- coding:') or
+                stripped.startswith('# coding:') or
+                stripped.startswith('# coding=')):
+                i += 1
+                continue
+            
+            break
+        
+        # Process all remaining lines with minimal filtering
+        while i < len(lines):
+            line = lines[i]
+            stripped = line.strip()
+            
+            if stripped:  # Keep all non-empty lines
+                normalized = line.expandtabs(4).rstrip()
+                result_lines.append(normalized)
+            
+            i += 1
+        
+        return '\n'.join(result_lines)

-    # gold_file_path = gold_file_path.replace('.txt', '.py')
-    def remove_whitespace(text):
-        return ''.join(text.split())
-
-    with open(py_file_path, 'r') as file1:
-        content1 = file1.read()
-    with open(gold_file_path, 'r') as file2:
-        content2 = file2.read()
-    content1_no_whitespace = remove_whitespace(content1)
-    content2_no_whitespace = remove_whitespace(content2)
-    if content1_no_whitespace == content2_no_whitespace:
-        return 1
-    else:
-        return 0
-
-if __name__ == '__main__':
-    print(check_direct_json_object([], rules={
-                "relativeTime": {
-                  "from": "5th next month"
-                },
-                "expected": {
-                    "start": "SEA",
-                    "end": "NYC",
-                    "time": "{DoW}, {Month} {DayD}, {Year}",
-                    "category": "Miles"
-                }}))
+    try:
+        with open(py_file_path, 'r', encoding='utf-8') as file1:
+            user_content = file1.read()
+        with open(gold_file_path, 'r', encoding='utf-8') as file2:
+            gold_content = file2.read()
+        
+        # Apply different normalization strategies
+        user_normalized = _normalize(user_content)
+        gold_normalized = _normalize(gold_content)
+        
+        if user_normalized == gold_normalized:
+            return 1.0
+        else:
+            return 0.0
+            
+    except (FileNotFoundError, IOError, UnicodeDecodeError) as e:
+        logger.debug(f"compare_python_pure_text: Error reading files - {e}")
+        return 0.0
+    except Exception as e:
+        logger.debug(f"compare_python_pure_text: Unexpected error - {e}")
+        return 0.0