feat: Migrate OSWorld files to HuggingFace cache with comprehensive documentation

- Add detailed README for file cache repository
- Implement migration script with retry logic and browser simulation
- Support automatic file type detection and deduplication
- Ensure reliable hosting for OSWorld evaluation files
This commit is contained in:
Timothyxxx
2025-05-28 04:29:37 +08:00
parent a845824f06
commit 34748567a5
317 changed files with 12630 additions and 12008 deletions

View File

@@ -9,7 +9,7 @@
"parameters": {
"files": [
{
"url": "https://drive.usercontent.google.com/download?id=1YhHNlRsL7lJBsqRYctz4CmEoD1g8oAm0&export=download&authuser=0&confirm=t&uuid=16776039-9eae-4ee7-ae0b-8b2d71cb25e1&at=APZUnTWVT6sfD3MQEADssAEc4Pwn:1706622286569",
"url": "https://huggingface.co/datasets/xlangai/windows_osworld_file_cache/resolve/main/word/0810415c-bde4-4443-9047-d5f70165a697/Novels_Intro_Packet.docx",
"path": "C:\\Users\\User\\Novels_Intro_Packet.docx"
}
]
@@ -55,7 +55,7 @@
"func": "compare_line_spacing",
"expected": {
"type": "cloud_file",
"path": "https://drive.usercontent.google.com/download?id=1-svVsH-l2ofufEKuN-cYrIrvXNobtATE&export=download&authuser=0&confirm=t&uuid=95ca5e2e-7fb3-4084-9f7b-a608a8277322&at=APZUnTXFO_571vyDp_r_LskPfq-j:1706796981024",
"path": "https://huggingface.co/datasets/xlangai/windows_osworld_file_cache/resolve/main/word/0810415c-bde4-4443-9047-d5f70165a697/Novels_Intro_Packet_Gold.docx",
"dest": "Novels_Intro_Packet_Gold.docx"
},
"result": {

View File

@@ -9,7 +9,7 @@
"parameters": {
"files": [
{
"url": "https://drive.usercontent.google.com/download?id=1FkorQBeTJ5L2jLuvu4YxHSlBMK4VEEG6&export=download&authuser=0&confirm=t&uuid=cc63dc0b-bae7-4ef6-a40d-e2da721976ef&at=APZUnTWyPZlZPFlqGTWAWXWmS04c:1704976667765",
"url": "https://huggingface.co/datasets/xlangai/windows_osworld_file_cache/resolve/main/word/0b17a146-2934-46c7-8727-73ff6b6483e8/H2O_Factsheet_WA.docx",
"path": "C:\\Users\\User\\H2O_Factsheet_WA.docx"
}
]
@@ -60,7 +60,7 @@
},
"expected": {
"type": "cloud_file",
"path": "https://drive.usercontent.google.com/download?id=1dM_FSTGDWxSW64VEth_wKMYNkvw0y_tq&export=download&authuser=0&confirm=t&uuid=342f41e2-f48f-41ff-8942-f7dfe5de1dba&at=APZUnTXHfskcX3tvmrSbzCOyQIgb:1704976694506",
"path": "https://huggingface.co/datasets/xlangai/windows_osworld_file_cache/resolve/main/word/0b17a146-2934-46c7-8727-73ff6b6483e8/H2O_Factsheet_WA_Gold.docx",
"dest": "H2O_Factsheet_WA_Gold.docx"
}
}

View File

@@ -9,7 +9,7 @@
"parameters": {
"files": [
{
"url": "https://drive.usercontent.google.com/download?id=1fIHNzFm8JabWoLKOnxrFM722fQ1d_huK&export=download&authuser=0&confirm=t&uuid=d11a8dda-1e4e-4dc9-b05c-e6b47624dbf0&at=APZUnTVG0ViFnKJa00314wVr3uP9:1704185871014",
"url": "https://huggingface.co/datasets/xlangai/windows_osworld_file_cache/resolve/main/word/0e763496-b6bb-4508-a427-fad0b6c3e195/Dublin_Zoo_Intro.docx",
"path": "C:\\Users\\User\\Dublin_Zoo_Intro.docx"
}
]

View File

@@ -9,7 +9,7 @@
"parameters": {
"files": [
{
"url": "https://drive.usercontent.google.com/download?id=1P8QodvDF-3S50rx6UmW4M2D4Kr-p_Q-h&export=download&authuser=0&confirm=t&uuid=eea70a33-4c3f-4e90-885d-dd3df0d605bc&at=APZUnTX7ISvBhOICNrPLoqK0m3G-:1704971931660",
"url": "https://huggingface.co/datasets/xlangai/windows_osworld_file_cache/resolve/main/word/3ef2b351-8a84-4ff2-8724-d86eae9b842e/Constitution_Template_With_Guidelines.docx",
"path": "C:\\Users\\User\\Constitution_Template_With_Guidelines.docx"
}
]

View File

@@ -9,7 +9,7 @@
"parameters": {
"files": [
{
"url": "https://drive.usercontent.google.com/download?id=1cn3CaA_4ds1WY8SgBT3TvjRunxocCBiu&export=download&authuser=0&confirm=t&uuid=622f5ae1-6f21-4f31-8a3e-e4ead6ea6bc3&at=APZUnTVKOV40Ww5PovU7at2ELzb9:1704949558060",
"url": "https://huggingface.co/datasets/xlangai/windows_osworld_file_cache/resolve/main/word/4bcb1253-a636-4df4-8cb0-a35c04dfef31/View_Person_Organizational_Summary.docx",
"path": "C:\\Users\\User\\View_Person_Organizational_Summary.docx"
}
]
@@ -30,7 +30,7 @@
"func": "compare_pdfs",
"expected": {
"type": "cloud_file",
"path": "https://drive.usercontent.google.com/download?id=1Spn-Gw7D-bRvV4udNQoGNEOViUqf6bL0&export=download&authuser=0&confirm=t&uuid=dcc0eb01-89ed-4852-a7cb-d0400d977ac8&at=APZUnTX57XnHwmb-y3m4JdNkvu6z:1706328786805",
"path": "https://huggingface.co/datasets/xlangai/windows_osworld_file_cache/resolve/main/word/4bcb1253-a636-4df4-8cb0-a35c04dfef31/View_Person_Organizational_Summary.pdf",
"dest": "Constitution_Template_With_Guidelines_Gold.pdf"
},
"result": {

View File

@@ -9,7 +9,7 @@
"parameters": {
"files": [
{
"url": "https://drive.google.com/uc?id=1cK1AMt_qKVAPp6EndSFG8y8r7KOPsqC1&export=download",
"url": "https://huggingface.co/datasets/xlangai/windows_osworld_file_cache/resolve/main/word/6f81754e-285d-4ce0-b59e-af7edb02d108/HK_train_record.docx",
"path": "C:\\Users\\User\\HK_train_record.docx"
}
]
@@ -60,7 +60,7 @@
},
"expected": {
"type": "cloud_file",
"path": "https://drive.google.com/uc?id=1wZ5CKxCD3biB4mFFlrBInZO-bzo36vVG&export=download",
"path": "https://huggingface.co/datasets/xlangai/windows_osworld_file_cache/resolve/main/word/6f81754e-285d-4ce0-b59e-af7edb02d108/HK_train_record_Gold.docx",
"dest": "HK_train_record_Gold.docx"
}
}

View File

@@ -9,7 +9,7 @@
"parameters": {
"files": [
{
"url": "https://drive.google.com/uc?id=1akFeAURJiqnK9wGNlRgPoPuQ6vRmnUPe&export=download",
"url": "https://huggingface.co/datasets/xlangai/windows_osworld_file_cache/resolve/main/word/b21acd93-60fd-4127-8a43-2f5178f4a830/CCHU9045_Course_Outline_2019-20.docx",
"path": "C:\\Users\\User\\CCHU9045_Course_Outline_2019-20.docx"
}
]
@@ -55,7 +55,7 @@
"func": "compare_line_spacing",
"expected": {
"type": "cloud_file",
"path": "https://drive.google.com/uc?id=16LN7uYSSXk_xwgc4IZXnN2Z1nCmPJfLm&export=download",
"path": "https://huggingface.co/datasets/xlangai/windows_osworld_file_cache/resolve/main/word/b21acd93-60fd-4127-8a43-2f5178f4a830/CCHU9045_Course_Outline_2019-20_Gold.docx",
"dest": "CCHU9045_Course_Outline_2019-20_Gold.docx"
},
"result": {

View File

@@ -9,7 +9,7 @@
"parameters": {
"files": [
{
"url": "https://drive.usercontent.google.com/download?id=1rcwueBf1TtBJu6NftUpzqFDVRSZzRCGT&export=download&authuser=0&confirm=t&uuid=a71c0649-c1b5-468a-8bad-aa59b11b510c&at=APZUnTVQ0F2KgX7sa-DqAXDhQ3Vu:1710854515082",
"url": "https://huggingface.co/datasets/xlangai/windows_osworld_file_cache/resolve/main/word/e528b65e-1107-4b8c-8988-490e4fece599/Geography_And_Magical_Realism.docx",
"path": "C:\\Users\\User\\Geography_And_Magical_Realism.docx"
}
]
@@ -55,7 +55,7 @@
"func": "compare_docx_files",
"expected": {
"type": "cloud_file",
"path": "https://drive.usercontent.google.com/download?id=1QM2Ql81_Rex-qa7xTjUvikSb6OGtc4JC&export=download&authuser=0&confirm=t&uuid=6d24454d-3526-4694-9bf6-adc6bc0afd78&at=APZUnTWptHSOEAum1ZUXQQYED2Lb:1710855675026",
"path": "https://huggingface.co/datasets/xlangai/windows_osworld_file_cache/resolve/main/word/e528b65e-1107-4b8c-8988-490e4fece599/Geography_And_Magical_Realism (1).docx",
"dest": "Geography_And_Magical_Realism_Gold.docx"
},
"result": {

View File

@@ -9,7 +9,7 @@
"parameters": {
"files": [
{
"url": "https://drive.usercontent.google.com/download?id=1sDufDSC4foI379-Jikya9WK7FBUSqgrt&export=download&authuser=0&confirm=t&uuid=0abd82d6-2b2c-49bc-af5e-49bfe1c99278&at=APZUnTURIqTNJcIHBcMP2BxEaGXr:1704174850900",
"url": "https://huggingface.co/datasets/xlangai/windows_osworld_file_cache/resolve/main/word/ecc2413d-8a48-416e-a3a2-d30106ca36cb/Sample_Statutory_Declaration.docx",
"path": "C:\\Users\\User\\Sample_Statutory_Declaration.docx"
}
]
@@ -55,7 +55,7 @@
"func": "compare_docx_files",
"expected": {
"type": "cloud_file",
"path": "https://drive.usercontent.google.com/download?id=1ELPKQ9NWsmotA9XfYD-DJxAubGNLmRQL&export=download&authuser=0&confirm=t&uuid=7a57cdbd-479c-4ed8-83b3-b8373b0fdfe7&at=APZUnTXtTFD_P8_5gwAV2TSZLoCV:1706854266276",
"path": "https://huggingface.co/datasets/xlangai/windows_osworld_file_cache/resolve/main/word/ecc2413d-8a48-416e-a3a2-d30106ca36cb/Sample_Statutory_Declaration_Gold.docx",
"dest": "Sample_Statutory_Declaration_Gold.docx"
},
"result": {