选项平衡后的第一次试跑,约70%正确率
This commit is contained in:
3942
results/20250602_1706/claude-sonnet-4-20250514.json
Normal file
3942
results/20250602_1706/claude-sonnet-4-20250514.json
Normal file
File diff suppressed because it is too large
Load Diff
12
results/20250602_1706/claude-sonnet-4-20250514_metrics.json
Normal file
12
results/20250602_1706/claude-sonnet-4-20250514_metrics.json
Normal file
@@ -0,0 +1,12 @@
|
||||
{
|
||||
"timestamp": "2025-06-02T17:15:23.726253",
|
||||
"metrics": {
|
||||
"accuracy": 0.700507614213198,
|
||||
"precision_micro": 0.6934673366834171,
|
||||
"recall_micro": 0.700507614213198,
|
||||
"f1_micro": 0.696969696969697,
|
||||
"precision_macro": 0.7072180484244438,
|
||||
"recall_macro": 0.7009183673469388,
|
||||
"f1_macro": 0.69833034513671
|
||||
}
|
||||
}
|
||||
3942
results/20250602_1706/deepseek-chat.json
Normal file
3942
results/20250602_1706/deepseek-chat.json
Normal file
File diff suppressed because one or more lines are too long
12
results/20250602_1706/deepseek-chat_metrics.json
Normal file
12
results/20250602_1706/deepseek-chat_metrics.json
Normal file
@@ -0,0 +1,12 @@
|
||||
{
|
||||
"timestamp": "2025-06-02T17:13:08.707748",
|
||||
"metrics": {
|
||||
"accuracy": 0.6700507614213198,
|
||||
"precision_micro": 0.676923076923077,
|
||||
"recall_micro": 0.6700507614213198,
|
||||
"f1_micro": 0.673469387755102,
|
||||
"precision_macro": 0.6899114693446089,
|
||||
"recall_macro": 0.6705102040816326,
|
||||
"f1_macro": 0.6754210676562946
|
||||
}
|
||||
}
|
||||
3942
results/20250602_1706/gpt-4o.json
Normal file
3942
results/20250602_1706/gpt-4o.json
Normal file
File diff suppressed because it is too large
Load Diff
12
results/20250602_1706/gpt-4o_metrics.json
Normal file
12
results/20250602_1706/gpt-4o_metrics.json
Normal file
@@ -0,0 +1,12 @@
|
||||
{
|
||||
"timestamp": "2025-06-02T17:10:06.316348",
|
||||
"metrics": {
|
||||
"accuracy": 0.5482233502538071,
|
||||
"precision_micro": 0.5618556701030928,
|
||||
"recall_micro": 0.5532994923857868,
|
||||
"f1_micro": 0.5575447570332481,
|
||||
"precision_macro": 0.5779088050314465,
|
||||
"recall_macro": 0.5536734693877551,
|
||||
"f1_macro": 0.5600088997453159
|
||||
}
|
||||
}
|
||||
3942
results/20250602_1706/qwen-max-2025-01-25.json
Normal file
3942
results/20250602_1706/qwen-max-2025-01-25.json
Normal file
File diff suppressed because it is too large
Load Diff
12
results/20250602_1706/qwen-max-2025-01-25_metrics.json
Normal file
12
results/20250602_1706/qwen-max-2025-01-25_metrics.json
Normal file
@@ -0,0 +1,12 @@
|
||||
{
|
||||
"timestamp": "2025-06-02T17:09:24.216653",
|
||||
"metrics": {
|
||||
"accuracy": 0.6446700507614214,
|
||||
"precision_micro": 0.6336633663366337,
|
||||
"recall_micro": 0.649746192893401,
|
||||
"f1_micro": 0.6416040100250626,
|
||||
"precision_macro": 0.6388760049474336,
|
||||
"recall_macro": 0.6501020408163265,
|
||||
"f1_macro": 0.64232342205538
|
||||
}
|
||||
}
|
||||
5
results/20250602_1706/summary.csv
Normal file
5
results/20250602_1706/summary.csv
Normal file
@@ -0,0 +1,5 @@
|
||||
Model,accuracy,precision_micro,recall_micro,f1_micro,precision_macro,recall_macro,f1_macro,Data Count
|
||||
qwen-max-2025-01-25,0.6446700507614214,0.6336633663366337,0.649746192893401,0.6416040100250626,0.6388760049474336,0.6501020408163265,0.64232342205538,197
|
||||
gpt-4o,0.5482233502538071,0.5618556701030928,0.5532994923857868,0.5575447570332481,0.5779088050314465,0.5536734693877551,0.5600088997453159,197
|
||||
deepseek-chat,0.6700507614213198,0.676923076923077,0.6700507614213198,0.673469387755102,0.6899114693446089,0.6705102040816326,0.6754210676562946,197
|
||||
claude-sonnet-4-20250514,0.700507614213198,0.6934673366834171,0.700507614213198,0.696969696969697,0.7072180484244438,0.7009183673469388,0.69833034513671,197
|
||||
|
98
results/20250602_1706/summary.json
Normal file
98
results/20250602_1706/summary.json
Normal file
@@ -0,0 +1,98 @@
|
||||
{
|
||||
"timestamp": "2025-06-02T17:15:23.737185",
|
||||
"models_count": 4,
|
||||
"models": {
|
||||
"qwen-max-2025-01-25": {
|
||||
"metrics": {
|
||||
"accuracy": 0.6446700507614214,
|
||||
"precision_micro": 0.6336633663366337,
|
||||
"recall_micro": 0.649746192893401,
|
||||
"f1_micro": 0.6416040100250626,
|
||||
"precision_macro": 0.6388760049474336,
|
||||
"recall_macro": 0.6501020408163265,
|
||||
"f1_macro": 0.64232342205538
|
||||
},
|
||||
"data_count": 197
|
||||
},
|
||||
"gpt-4o": {
|
||||
"metrics": {
|
||||
"accuracy": 0.5482233502538071,
|
||||
"precision_micro": 0.5618556701030928,
|
||||
"recall_micro": 0.5532994923857868,
|
||||
"f1_micro": 0.5575447570332481,
|
||||
"precision_macro": 0.5779088050314465,
|
||||
"recall_macro": 0.5536734693877551,
|
||||
"f1_macro": 0.5600088997453159
|
||||
},
|
||||
"data_count": 197
|
||||
},
|
||||
"deepseek-chat": {
|
||||
"metrics": {
|
||||
"accuracy": 0.6700507614213198,
|
||||
"precision_micro": 0.676923076923077,
|
||||
"recall_micro": 0.6700507614213198,
|
||||
"f1_micro": 0.673469387755102,
|
||||
"precision_macro": 0.6899114693446089,
|
||||
"recall_macro": 0.6705102040816326,
|
||||
"f1_macro": 0.6754210676562946
|
||||
},
|
||||
"data_count": 197
|
||||
},
|
||||
"claude-sonnet-4-20250514": {
|
||||
"metrics": {
|
||||
"accuracy": 0.700507614213198,
|
||||
"precision_micro": 0.6934673366834171,
|
||||
"recall_micro": 0.700507614213198,
|
||||
"f1_micro": 0.696969696969697,
|
||||
"precision_macro": 0.7072180484244438,
|
||||
"recall_macro": 0.7009183673469388,
|
||||
"f1_macro": 0.69833034513671
|
||||
},
|
||||
"data_count": 197
|
||||
}
|
||||
},
|
||||
"comparison": {
|
||||
"accuracy": {
|
||||
"qwen-max-2025-01-25": 0.6446700507614214,
|
||||
"gpt-4o": 0.5482233502538071,
|
||||
"deepseek-chat": 0.6700507614213198,
|
||||
"claude-sonnet-4-20250514": 0.700507614213198
|
||||
},
|
||||
"precision_micro": {
|
||||
"qwen-max-2025-01-25": 0.6336633663366337,
|
||||
"gpt-4o": 0.5618556701030928,
|
||||
"deepseek-chat": 0.676923076923077,
|
||||
"claude-sonnet-4-20250514": 0.6934673366834171
|
||||
},
|
||||
"recall_micro": {
|
||||
"qwen-max-2025-01-25": 0.649746192893401,
|
||||
"gpt-4o": 0.5532994923857868,
|
||||
"deepseek-chat": 0.6700507614213198,
|
||||
"claude-sonnet-4-20250514": 0.700507614213198
|
||||
},
|
||||
"f1_micro": {
|
||||
"qwen-max-2025-01-25": 0.6416040100250626,
|
||||
"gpt-4o": 0.5575447570332481,
|
||||
"deepseek-chat": 0.673469387755102,
|
||||
"claude-sonnet-4-20250514": 0.696969696969697
|
||||
},
|
||||
"precision_macro": {
|
||||
"qwen-max-2025-01-25": 0.6388760049474336,
|
||||
"gpt-4o": 0.5779088050314465,
|
||||
"deepseek-chat": 0.6899114693446089,
|
||||
"claude-sonnet-4-20250514": 0.7072180484244438
|
||||
},
|
||||
"recall_macro": {
|
||||
"qwen-max-2025-01-25": 0.6501020408163265,
|
||||
"gpt-4o": 0.5536734693877551,
|
||||
"deepseek-chat": 0.6705102040816326,
|
||||
"claude-sonnet-4-20250514": 0.7009183673469388
|
||||
},
|
||||
"f1_macro": {
|
||||
"qwen-max-2025-01-25": 0.64232342205538,
|
||||
"gpt-4o": 0.5600088997453159,
|
||||
"deepseek-chat": 0.6754210676562946,
|
||||
"claude-sonnet-4-20250514": 0.69833034513671
|
||||
}
|
||||
}
|
||||
}
|
||||
BIN
results/20250602_1706/summary.xlsx
Normal file
BIN
results/20250602_1706/summary.xlsx
Normal file
Binary file not shown.
Reference in New Issue
Block a user