feat: Migrate OSWorld files to HuggingFace cache with comprehensive documentation

- Add detailed README for file cache repository
- Implement migration script with retry logic and browser simulation
- Support automatic file type detection and deduplication
- Ensure reliable hosting for OSWorld evaluation files
This commit is contained in:
Timothyxxx
2025-05-28 04:29:37 +08:00
parent a845824f06
commit 34748567a5
317 changed files with 12630 additions and 12008 deletions

View File

@@ -1,96 +1,158 @@
{
"id": "7e287123-70ca-47b9-8521-47db09b69b14",
"snapshot": "libreoffice_calc",
"instruction": "I am an assistant professor of CS at HKU, I want to apply for the General Research Fund next year, I need to get some insights, so I need you to help me to organise the data. First please help me to organise the pass rate of the GRF applications of the CS departments of each school for 2019~2023 in percentage form with 2 decimal digits in a table, which I can use subsequently. Set the headers as \"Year\", \"#Applied\", \"#Supported\", and \"Success Rate\". The materials are saved under Documents/Fundings. And please save the result table as \"GRF-p5y.xlsx\" on my desktop.",
"source": "authors",
"config": [
{
"type": "command",
"parameters": {
"command": ["mkdir", "-p", "/home/user/Documents/Fundings/ecs", "/home/user/Documents/Fundings/grf"]
}
},
{
"type": "download",
"parameters": {
"files": [
{"path": "/home/user/Documents/Fundings/ecs/ecs15.pdf", "url": "https://drive.google.com/uc?id=1FTiT3mLlkehe2yWVdSMWr1w4ltLtQZUy&export=download"},
{"path": "/home/user/Documents/Fundings/ecs/ecs16.pdf", "url": "https://drive.google.com/uc?id=1DMzZyhDey3lDsQ7fcPiJm9AYGECZms3q&export=download"},
{"path": "/home/user/Documents/Fundings/ecs/ecs17.pdf", "url": "https://drive.google.com/uc?id=1TgAWk7FiV8fNrG2L3-Eu7BDccqtXebXY&export=download"},
{"path": "/home/user/Documents/Fundings/ecs/ecs23.pdf", "url": "https://drive.google.com/uc?id=11DVxH4eRjECUxZNbUhYNJhgD0Y5WoN8r&export=download"},
{"path": "/home/user/Documents/Fundings/ecs/ecs22.pdf", "url": "https://drive.google.com/uc?id=17IgyJADA65F40kH79S90QgEzPa7IERXx&export=download"},
{"path": "/home/user/Documents/Fundings/ecs/ecs21.pdf", "url": "https://drive.google.com/uc?id=1kB4eFfLisPXKOirGUHbbcOyf73t7MVqL&export=download"},
{"path": "/home/user/Documents/Fundings/ecs/ecs20.pdf", "url": "https://drive.google.com/uc?id=179j9tD1xRSgd9COM7rzErO6FLYO2sc_a&export=download"},
{"path": "/home/user/Documents/Fundings/ecs/ecs19.pdf", "url": "https://drive.google.com/uc?id=1ogZUTfKjvQhg58GXtVUxe1U8VvHU-3ap&export=download"},
{"path": "/home/user/Documents/Fundings/ecs/ecs18.pdf", "url": "https://drive.google.com/uc?id=1MfuvRhAnhMEMbxn5js2ffWqMHJx2fsd2&export=download"},
{"path": "/home/user/Documents/Fundings/grf/customer-information-sheet-for-inward-payments-to-hong-kong.pdf", "url": "https://drive.google.com/uc?id=1s-H3an7HLBM9ku6d6Hcdj1qkSwKAHngU&export=download"},
{"path": "/home/user/Documents/Fundings/grf/grf15.pdf", "url": "https://drive.google.com/uc?id=1rRQXo9XHnCVTG8XqNAv0SJwPTW36MMbm&export=download"},
{"path": "/home/user/Documents/Fundings/grf/grf16.pdf", "url": "https://drive.google.com/uc?id=18ljRqkdyXEZ464E0dpKjaEa2NFexyw3I&export=download"},
{"path": "/home/user/Documents/Fundings/grf/grf17.pdf", "url": "https://drive.google.com/uc?id=1VrqOnyhpOkMpyIJ6YMrAhixpahjYonOd&export=download"},
{"path": "/home/user/Documents/Fundings/grf/grf18.pdf", "url": "https://drive.google.com/uc?id=182CLDUr372-jpAiY4YvSbGNXF9TsWxzA&export=download"},
{"path": "/home/user/Documents/Fundings/grf/grf19.pdf", "url": "https://drive.google.com/uc?id=1YkJtjlklKN0NmLiI2Hi4f_dKtTm5SPxT&export=download"},
{"path": "/home/user/Documents/Fundings/grf/grf20.pdf", "url": "https://drive.google.com/uc?id=1a7Uc7VCMlEX6fy-5oqE6i1YLitBe7gaf&export=download"},
{"path": "/home/user/Documents/Fundings/grf/grf21.pdf", "url": "https://drive.google.com/uc?id=1s8km4Wle4lc5PkbUQfivBFK0IJQgxMiB&export=download"},
{"path": "/home/user/Documents/Fundings/grf/grf22.pdf", "url": "https://drive.google.com/uc?id=1HTEb1uK7LNvbVyeXgO8WemCPPgiASKiy&export=download"},
{"path": "/home/user/Documents/Fundings/grf/grf23.pdf", "url": "https://drive.google.com/uc?id=1XM-jZlfu_i4waDZHb8Z6Vr5b3LgULTtP&export=download"}
]
}
}
],
"trajectory": "trajectories/7e287123-70ca-47b9-8521-47db09b69b14",
"related_apps": [
"libreoffice_calc",
"os"
],
"evaluator": {
"postconfig": [
{
"type": "execute",
"parameters": {
"command": [
"libreoffice",
"--convert-to",
"csv:Text - txt - csv (StarCalc):44,34,UTF-8,,,,false,true,true,false,false,1",
"--outdir",
"/home/user/Desktop",
"/home/user/Desktop/GRF-p5y.xlsx"
]
}
}
],
"func": "compare_table",
"result": {
"type": "vm_file",
"path": [
"/home/user/Desktop/GRF-p5y.xlsx",
"/home/user/Desktop/GRF-p5y-Sheet1.csv"
],
"dest": [
"GRF-p5y.xlsx",
"GRF-p5y-Sheet1.csv"
],
"multi": true
},
"expected": {
"type": "cloud_file",
"path": [
"https://drive.google.com/uc?id=1fDM4Y-WuFCnfksPLgynj-WSmzbqn2TcV&export=download",
"https://drive.google.com/uc?id=1waThupubGOJop0FU0b0yhT6QnjNYkLGy&export=download"
],
"dest": [
"GRF-p5y-gt.xlsx",
"GRF-p5y-gt-Sheet1.csv"
],
"multi": true
},
"options": {
"rules": [
{
"type": "sheet_print",
"sheet_idx0": "RNSheet1",
"sheet_idx1": "ENSheet1"
}
]
}
}
}
"id": "7e287123-70ca-47b9-8521-47db09b69b14",
"snapshot": "libreoffice_calc",
"instruction": "I am an assistant professor of CS at HKU, I want to apply for the General Research Fund next year, I need to get some insights, so I need you to help me to organise the data. First please help me to organise the pass rate of the GRF applications of the CS departments of each school for 2019~2023 in percentage form with 2 decimal digits in a table, which I can use subsequently. Set the headers as \"Year\", \"#Applied\", \"#Supported\", and \"Success Rate\". The materials are saved under Documents/Fundings. And please save the result table as \"GRF-p5y.xlsx\" on my desktop.",
"source": "authors",
"config": [
{
"type": "command",
"parameters": {
"command": [
"mkdir",
"-p",
"/home/user/Documents/Fundings/ecs",
"/home/user/Documents/Fundings/grf"
]
}
},
{
"type": "download",
"parameters": {
"files": [
{
"path": "/home/user/Documents/Fundings/ecs/ecs15.pdf",
"url": "https://huggingface.co/datasets/xlangai/ubuntu_osworld_file_cache/resolve/main/multi_apps/7e287123-70ca-47b9-8521-47db09b69b14/ecs15.pdf"
},
{
"path": "/home/user/Documents/Fundings/ecs/ecs16.pdf",
"url": "https://huggingface.co/datasets/xlangai/ubuntu_osworld_file_cache/resolve/main/multi_apps/7e287123-70ca-47b9-8521-47db09b69b14/ecs16.pdf"
},
{
"path": "/home/user/Documents/Fundings/ecs/ecs17.pdf",
"url": "https://huggingface.co/datasets/xlangai/ubuntu_osworld_file_cache/resolve/main/multi_apps/7e287123-70ca-47b9-8521-47db09b69b14/ecs17.pdf"
},
{
"path": "/home/user/Documents/Fundings/ecs/ecs23.pdf",
"url": "https://huggingface.co/datasets/xlangai/ubuntu_osworld_file_cache/resolve/main/multi_apps/7e287123-70ca-47b9-8521-47db09b69b14/ecs23.pdf"
},
{
"path": "/home/user/Documents/Fundings/ecs/ecs22.pdf",
"url": "https://huggingface.co/datasets/xlangai/ubuntu_osworld_file_cache/resolve/main/multi_apps/7e287123-70ca-47b9-8521-47db09b69b14/ecs22.pdf"
},
{
"path": "/home/user/Documents/Fundings/ecs/ecs21.pdf",
"url": "https://huggingface.co/datasets/xlangai/ubuntu_osworld_file_cache/resolve/main/multi_apps/7e287123-70ca-47b9-8521-47db09b69b14/ecs21.pdf"
},
{
"path": "/home/user/Documents/Fundings/ecs/ecs20.pdf",
"url": "https://huggingface.co/datasets/xlangai/ubuntu_osworld_file_cache/resolve/main/multi_apps/7e287123-70ca-47b9-8521-47db09b69b14/ecs20.pdf"
},
{
"path": "/home/user/Documents/Fundings/ecs/ecs19.pdf",
"url": "https://huggingface.co/datasets/xlangai/ubuntu_osworld_file_cache/resolve/main/multi_apps/7e287123-70ca-47b9-8521-47db09b69b14/ecs19.pdf"
},
{
"path": "/home/user/Documents/Fundings/ecs/ecs18.pdf",
"url": "https://huggingface.co/datasets/xlangai/ubuntu_osworld_file_cache/resolve/main/multi_apps/7e287123-70ca-47b9-8521-47db09b69b14/ecs18.pdf"
},
{
"path": "/home/user/Documents/Fundings/grf/customer-information-sheet-for-inward-payments-to-hong-kong.pdf",
"url": "https://huggingface.co/datasets/xlangai/ubuntu_osworld_file_cache/resolve/main/multi_apps/7e287123-70ca-47b9-8521-47db09b69b14/customer-information-sheet-for-inward-payments-to-hong-kong.pdf"
},
{
"path": "/home/user/Documents/Fundings/grf/grf15.pdf",
"url": "https://huggingface.co/datasets/xlangai/ubuntu_osworld_file_cache/resolve/main/multi_apps/7e287123-70ca-47b9-8521-47db09b69b14/grf15.pdf"
},
{
"path": "/home/user/Documents/Fundings/grf/grf16.pdf",
"url": "https://huggingface.co/datasets/xlangai/ubuntu_osworld_file_cache/resolve/main/multi_apps/7e287123-70ca-47b9-8521-47db09b69b14/grf16.pdf"
},
{
"path": "/home/user/Documents/Fundings/grf/grf17.pdf",
"url": "https://huggingface.co/datasets/xlangai/ubuntu_osworld_file_cache/resolve/main/multi_apps/7e287123-70ca-47b9-8521-47db09b69b14/grf17.pdf"
},
{
"path": "/home/user/Documents/Fundings/grf/grf18.pdf",
"url": "https://huggingface.co/datasets/xlangai/ubuntu_osworld_file_cache/resolve/main/multi_apps/7e287123-70ca-47b9-8521-47db09b69b14/grf18.pdf"
},
{
"path": "/home/user/Documents/Fundings/grf/grf19.pdf",
"url": "https://huggingface.co/datasets/xlangai/ubuntu_osworld_file_cache/resolve/main/multi_apps/7e287123-70ca-47b9-8521-47db09b69b14/grf19.pdf"
},
{
"path": "/home/user/Documents/Fundings/grf/grf20.pdf",
"url": "https://huggingface.co/datasets/xlangai/ubuntu_osworld_file_cache/resolve/main/multi_apps/7e287123-70ca-47b9-8521-47db09b69b14/grf20.pdf"
},
{
"path": "/home/user/Documents/Fundings/grf/grf21.pdf",
"url": "https://huggingface.co/datasets/xlangai/ubuntu_osworld_file_cache/resolve/main/multi_apps/7e287123-70ca-47b9-8521-47db09b69b14/grf21.pdf"
},
{
"path": "/home/user/Documents/Fundings/grf/grf22.pdf",
"url": "https://huggingface.co/datasets/xlangai/ubuntu_osworld_file_cache/resolve/main/multi_apps/7e287123-70ca-47b9-8521-47db09b69b14/grf22.pdf"
},
{
"path": "/home/user/Documents/Fundings/grf/grf23.pdf",
"url": "https://huggingface.co/datasets/xlangai/ubuntu_osworld_file_cache/resolve/main/multi_apps/7e287123-70ca-47b9-8521-47db09b69b14/grf23.pdf"
}
]
}
}
],
"trajectory": "trajectories/7e287123-70ca-47b9-8521-47db09b69b14",
"related_apps": [
"libreoffice_calc",
"os"
],
"evaluator": {
"postconfig": [
{
"type": "execute",
"parameters": {
"command": [
"libreoffice",
"--convert-to",
"csv:Text - txt - csv (StarCalc):44,34,UTF-8,,,,false,true,true,false,false,1",
"--outdir",
"/home/user/Desktop",
"/home/user/Desktop/GRF-p5y.xlsx"
]
}
}
],
"func": "compare_table",
"result": {
"type": "vm_file",
"path": [
"/home/user/Desktop/GRF-p5y.xlsx",
"/home/user/Desktop/GRF-p5y-Sheet1.csv"
],
"dest": [
"GRF-p5y.xlsx",
"GRF-p5y-Sheet1.csv"
],
"multi": true
},
"expected": {
"type": "cloud_file",
"path": [
"https://huggingface.co/datasets/xlangai/ubuntu_osworld_file_cache/resolve/main/multi_apps/7e287123-70ca-47b9-8521-47db09b69b14/GRF-p5y.bak.xlsx",
"https://huggingface.co/datasets/xlangai/ubuntu_osworld_file_cache/resolve/main/multi_apps/7e287123-70ca-47b9-8521-47db09b69b14/GRF-p5y.bak-Sheet1.csv"
],
"dest": [
"GRF-p5y-gt.xlsx",
"GRF-p5y-gt-Sheet1.csv"
],
"multi": true
},
"options": {
"rules": [
{
"type": "sheet_print",
"sheet_idx0": "RNSheet1",
"sheet_idx1": "ENSheet1"
}
]
}
}
}