Refactor compare_python_pure_text function for improved normalization and error handling. Update JSON example to clarify instruction for extracting Python code from Colab, changing output file names for consistency.

2025-07-03 13:50:21 +00:00
parent bdaf37e0e5
commit cb4bed20a0
2 changed files with 69 additions and 39 deletions
--- a/desktop_env/evaluators/metrics/general.py
+++ b/desktop_env/evaluators/metrics/general.py
@@ -470,37 +470,66 @@ def compare_terminal_and_txt(txt_file_path, terminal_output):

 def compare_python_pure_text(py_file_path, gold_file_path):
    if not py_file_path or not gold_file_path:
-        return 0
+        return 0.0

-    # first, change the suffix of gold_file from .txt to .py
-    print("py_file_path: ")
-    print(py_file_path)
-    print("gold_file_path: ")
-    print(gold_file_path)
+    def _normalize(text):
+        """
+        Minimal normalization - only handle basic formatting:
+        - Skip obvious file metadata (encoding, shebang) at the beginning
+        - Normalize whitespace and indentation
+        - Remove empty lines
+        
+        This preserves any content that shouldn't be there (like markdown)
+        so it can be detected as an error.
+        """
+        lines = text.splitlines()
+        result_lines = []
+        i = 0
+        
+        # Only skip obvious metadata at the very beginning
+        while i < len(lines) and i < 3:  # Check only first 3 lines
+            stripped = lines[i].strip()
+            
+            if (stripped.startswith('#!') or
+                stripped.startswith('# -*- coding:') or
+                stripped.startswith('# coding:') or
+                stripped.startswith('# coding=')):
+                i += 1
+                continue
+            
+            break
+        
+        # Process all remaining lines with minimal filtering
+        while i < len(lines):
+            line = lines[i]
+            stripped = line.strip()
+            
+            if stripped:  # Keep all non-empty lines
+                normalized = line.expandtabs(4).rstrip()
+                result_lines.append(normalized)
+            
+            i += 1
+        
+        return '\n'.join(result_lines)

-    # gold_file_path = gold_file_path.replace('.txt', '.py')
-    def remove_whitespace(text):
-        return ''.join(text.split())
-
-    with open(py_file_path, 'r') as file1:
-        content1 = file1.read()
-    with open(gold_file_path, 'r') as file2:
-        content2 = file2.read()
-    content1_no_whitespace = remove_whitespace(content1)
-    content2_no_whitespace = remove_whitespace(content2)
-    if content1_no_whitespace == content2_no_whitespace:
-        return 1
-    else:
-        return 0
-
-if __name__ == '__main__':
-    print(check_direct_json_object([], rules={
-                "relativeTime": {
-                  "from": "5th next month"
-                },
-                "expected": {
-                    "start": "SEA",
-                    "end": "NYC",
-                    "time": "{DoW}, {Month} {DayD}, {Year}",
-                    "category": "Miles"
-                }}))
+    try:
+        with open(py_file_path, 'r', encoding='utf-8') as file1:
+            user_content = file1.read()
+        with open(gold_file_path, 'r', encoding='utf-8') as file2:
+            gold_content = file2.read()
+        
+        # Apply different normalization strategies
+        user_normalized = _normalize(user_content)
+        gold_normalized = _normalize(gold_content)
+        
+        if user_normalized == gold_normalized:
+            return 1.0
+        else:
+            return 0.0
+            
+    except (FileNotFoundError, IOError, UnicodeDecodeError) as e:
+        logger.debug(f"compare_python_pure_text: Error reading files - {e}")
+        return 0.0
+    except Exception as e:
+        logger.debug(f"compare_python_pure_text: Unexpected error - {e}")
+        return 0.0
--- a/evaluation_examples/examples/multi_apps/dd60633f-2c72-42ba-8547-6f2c8cb0fdb0.json
+++ b/evaluation_examples/examples/multi_apps/dd60633f-2c72-42ba-8547-6f2c8cb0fdb0.json
@@ -1,7 +1,7 @@
 {
  "id": "dd60633f-2c72-42ba-8547-6f2c8cb0fdb0",
  "snapshot": "multiapps",
-  "instruction": "I ran some simple code demos on the currently open google colab, and I think the effect is pretty good. Please help me extract the code in all code boxes, merge it into a \"task.py\" file and store it in the local Home directory.",
+  "instruction": "Please extract all Python code and comments from Karpathy's GPT colab code cells (skip markdown parts), merge into \"gpt_dev_pure_code.py\" in Home directory. Include all Python code and # comments from code cells, but exclude markdown docstrings and file headers.",
  "source": "authors",
  "config": [
    {
@@ -27,7 +27,7 @@
      "type": "chrome_open_tabs",
      "parameters": {
        "urls_to_open": [
-          "https://colab.research.google.com/github/stanfordnlp/dspy/blob/main/intro.ipynb#scrollTo=L1BHAoL_LRd7"
+          "https://colab.research.google.com/drive/1JMLa53HDuA-i7ZBmqV7ZnA3c_fvtXnx-?usp=sharing#scrollTo=h5hjCcLDr2WC"
        ]
      }
    }
@@ -42,14 +42,15 @@
    "func": "compare_python_pure_text",
    "result": {
      "type": "vm_file",
-      "path": "/home/user/colab.py",
-      "dest": "colab.py"
+      "path": "/home/user/gpt_dev_pure_code.py",
+      "dest": "gpt_dev_pure_code.py"
    },
    "expected": {
      "type": "cloud_file",
-      "path": "https://huggingface.co/datasets/xlangai/ubuntu_osworld_file_cache/resolve/main/multi_apps/dd60633f-2c72-42ba-8547-6f2c8cb0fdb0/Colab%20Gold.py",
-      "dest": "colab_Gold.py"
+      "path": "https://huggingface.co/datasets/xlangai/ubuntu_osworld_file_cache/resolve/main/multi_apps/dd60633f-2c72-42ba-8547-6f2c8cb0fdb0/gpt_dev_pure_code_gold.py",
+      "dest": "gpt_dev_pure_code_gold.py"
    }
  },
-  "proxy": true
+  "proxy": true,
+  "env_may_change": true
 }