Initial commit
This commit is contained in:
25
scripts/download.py
Normal file
25
scripts/download.py
Normal file
@@ -0,0 +1,25 @@
|
||||
import argparse
|
||||
from huggingface_hub import hf_hub_download
|
||||
|
||||
parser = argparse.ArgumentParser(description="Download files from a Hugging Face dataset repository.")
|
||||
parser.add_argument("--repo_id", type=str, default="PeterJinGo/wiki-18-e5-index", help="Hugging Face repository ID")
|
||||
parser.add_argument("--save_path", type=str, required=True, help="Local directory to save files")
|
||||
|
||||
args = parser.parse_args()
|
||||
|
||||
repo_id = "PeterJinGo/wiki-18-e5-index"
|
||||
for file in ["part_aa", "part_ab"]:
|
||||
hf_hub_download(
|
||||
repo_id=repo_id,
|
||||
filename=file, # e.g., "e5_Flat.index"
|
||||
repo_type="dataset",
|
||||
local_dir=args.save_path,
|
||||
)
|
||||
|
||||
repo_id = "PeterJinGo/wiki-18-corpus"
|
||||
hf_hub_download(
|
||||
repo_id=repo_id,
|
||||
filename="wiki-18.jsonl.gz",
|
||||
repo_type="dataset",
|
||||
local_dir=args.save_path,
|
||||
)
|
||||
Reference in New Issue
Block a user