Initial commit

This commit is contained in:
PeterGriffinJin
2025-02-28 15:16:19 +00:00
commit 068516be64
207 changed files with 33063 additions and 0 deletions

View File

@@ -0,0 +1,17 @@
corpus_file=/your/corpus/jsonl/file # jsonl
save_dir=/the/path/to/save/index
retriever_name=e5 # this is for indexing naming
retriever_model=intfloat/e5-base-v2
CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7 python index_builder.py \
--retrieval_method $retriever_name \
--model_path $retriever_model \
--corpus_path $corpus_file \
--save_dir $save_dir \
--use_fp16 \
--max_length 256 \
--batch_size 512 \
--pooling_method mean \
--faiss_type Flat \
--save_embedding