add local sparse retriever, ann dense retriever and online search engine

This commit is contained in:
PeterGriffinJin
2025-04-07 18:20:43 +00:00
parent 0b26e614f7
commit ba152349fd
8 changed files with 470 additions and 7 deletions

View File

@@ -20,7 +20,9 @@ parser = argparse.ArgumentParser(description="Launch the local faiss retriever."
parser.add_argument("--index_path", type=str, default="/home/peterjin/mnt/index/wiki-18/e5_Flat.index", help="Corpus indexing file.")
parser.add_argument("--corpus_path", type=str, default="/home/peterjin/mnt/data/retrieval-corpus/wiki-18.jsonl", help="Local corpus file.")
parser.add_argument("--topk", type=int, default=3, help="Number of retrieved passages for one query.")
parser.add_argument("--retriever_model", type=str, default="intfloat/e5-base-v2", help="Name of the retriever model.")
parser.add_argument("--retriever_name", type=str, default="e5", help="Name of the retriever model.")
parser.add_argument("--retriever_model", type=str, default="intfloat/e5-base-v2", help="Path of the retriever model.")
parser.add_argument('--faiss_gpu', action='store_true', help='Use GPU for computation')
args = parser.parse_args()
@@ -335,11 +337,11 @@ app = FastAPI()
# 1) Build a config (could also parse from arguments).
# In real usage, you'd parse your CLI arguments or environment variables.
config = Config(
retrieval_method = "e5", # or "dense"
retrieval_method = args.retriever_name, # or "dense"
index_path=args.index_path,
corpus_path=args.corpus_path,
retrieval_topk=args.topk,
faiss_gpu=True,
faiss_gpu=args.faiss_gpu,
retrieval_model_path=args.retriever_model,
retrieval_pooling_method="mean",
retrieval_query_max_length=256,