feat: Migrate OSWorld files to HuggingFace cache with comprehensive documentation

- Add detailed README for file cache repository
- Implement migration script with retry logic and browser simulation
- Support automatic file type detection and deduplication
- Ensure reliable hosting for OSWorld evaluation files
This commit is contained in:
Timothyxxx
2025-05-28 04:29:37 +08:00
parent a845824f06
commit 34748567a5
317 changed files with 12630 additions and 12008 deletions

View File

@@ -10,11 +10,11 @@
"files": [
{
"path": "/home/user/Desktop/restaurants.txt",
"url": "https://drive.google.com/uc?id=1IehFLJPZcFv8Ujk31ExbyGLji9AylmmJ&export=download"
"url": "https://huggingface.co/datasets/xlangai/ubuntu_osworld_file_cache/resolve/main/multi_apps/d1acdb87-bb67-4f30-84aa-990e56a09c92/restaurants.txt"
},
{
"path": "/home/user/Desktop/MUST_VISIT.xlsx",
"url": "https://drive.google.com/uc?id=1fXmjvZcwkIcckMIAXi3Hv_JAbVWpgs_l&export=download"
"url": "https://huggingface.co/datasets/xlangai/ubuntu_osworld_file_cache/resolve/main/multi_apps/d1acdb87-bb67-4f30-84aa-990e56a09c92/MUST_VISIT.xlsx"
}
]
}
@@ -91,7 +91,7 @@
},
"expected": {
"type": "cloud_file",
"path": "https://drive.google.com/uc?id=1MV6jBvRbbYwPqeFTd_nX40xzyltNhphl&export=download",
"path": "https://huggingface.co/datasets/xlangai/ubuntu_osworld_file_cache/resolve/main/multi_apps/d1acdb87-bb67-4f30-84aa-990e56a09c92/MUST_VISIT_gold.xlsx",
"dest": "MUST_VISIT-gt.xlsx"
},
"options": {
@@ -102,21 +102,34 @@
"sheet_idx1": "ENSheet1",
"rules": [
{
"range": ["A1:A6", "D1:D6"],
"range": [
"A1:A6",
"D1:D6"
],
"type": "exact_match"
},
{
"range": ["B1:B6"],
"range": [
"B1:B6"
],
"type": "fuzzy_match",
"threshold": 85,
"normalization": [
["Rd", "Road"],
["St", "Street"]
[
"Rd",
"Road"
],
[
"St",
"Street"
]
],
"ignore_case": true
},
{
"range": ["C1:C6"],
"range": [
"C1:C6"
],
"type": "includes",
"trim_leadings": "+ ",
"ignore_chars": " ()-"
@@ -126,4 +139,4 @@
]
}
}
}
}