add more data processing codes

This commit is contained in:
PeterGriffinJin
2025-03-31 12:58:04 +00:00
parent 95d16f4548
commit 716cd73977
3 changed files with 230 additions and 0 deletions

View File

@@ -0,0 +1,10 @@
WORK_DIR=your/work/dir
LOCAL_DIR=$WORK_DIR/data/nq_hotpotqa_train
## process multiple dataset search format train file
DATA=nq,hotpotqa
python $WORK_DIR/scripts/data_process/qa_search_train_merge.py --local_dir $LOCAL_DIR --data_sources $DATA
## process multiple dataset search format test file
DATA=nq,triviaqa,popqa,hotpotqa,2wikimultihopqa,musique,bamboogle
python $WORK_DIR/scripts/data_process/qa_search_test_merge.py --local_dir $LOCAL_DIR --data_sources $DATA