选项平衡后的第一次试跑,约70%正确率

This commit is contained in:
lzy
2025-06-02 17:18:30 +08:00
parent 7a725bc003
commit 3984ec002e
12 changed files with 16742 additions and 0 deletions

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,12 @@
{
"timestamp": "2025-06-02T17:15:23.726253",
"metrics": {
"accuracy": 0.700507614213198,
"precision_micro": 0.6934673366834171,
"recall_micro": 0.700507614213198,
"f1_micro": 0.696969696969697,
"precision_macro": 0.7072180484244438,
"recall_macro": 0.7009183673469388,
"f1_macro": 0.69833034513671
}
}

File diff suppressed because one or more lines are too long

View File

@@ -0,0 +1,12 @@
{
"timestamp": "2025-06-02T17:13:08.707748",
"metrics": {
"accuracy": 0.6700507614213198,
"precision_micro": 0.676923076923077,
"recall_micro": 0.6700507614213198,
"f1_micro": 0.673469387755102,
"precision_macro": 0.6899114693446089,
"recall_macro": 0.6705102040816326,
"f1_macro": 0.6754210676562946
}
}

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,12 @@
{
"timestamp": "2025-06-02T17:10:06.316348",
"metrics": {
"accuracy": 0.5482233502538071,
"precision_micro": 0.5618556701030928,
"recall_micro": 0.5532994923857868,
"f1_micro": 0.5575447570332481,
"precision_macro": 0.5779088050314465,
"recall_macro": 0.5536734693877551,
"f1_macro": 0.5600088997453159
}
}

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,12 @@
{
"timestamp": "2025-06-02T17:09:24.216653",
"metrics": {
"accuracy": 0.6446700507614214,
"precision_micro": 0.6336633663366337,
"recall_micro": 0.649746192893401,
"f1_micro": 0.6416040100250626,
"precision_macro": 0.6388760049474336,
"recall_macro": 0.6501020408163265,
"f1_macro": 0.64232342205538
}
}

View File

@@ -0,0 +1,5 @@
Model,accuracy,precision_micro,recall_micro,f1_micro,precision_macro,recall_macro,f1_macro,Data Count
qwen-max-2025-01-25,0.6446700507614214,0.6336633663366337,0.649746192893401,0.6416040100250626,0.6388760049474336,0.6501020408163265,0.64232342205538,197
gpt-4o,0.5482233502538071,0.5618556701030928,0.5532994923857868,0.5575447570332481,0.5779088050314465,0.5536734693877551,0.5600088997453159,197
deepseek-chat,0.6700507614213198,0.676923076923077,0.6700507614213198,0.673469387755102,0.6899114693446089,0.6705102040816326,0.6754210676562946,197
claude-sonnet-4-20250514,0.700507614213198,0.6934673366834171,0.700507614213198,0.696969696969697,0.7072180484244438,0.7009183673469388,0.69833034513671,197
1 Model accuracy precision_micro recall_micro f1_micro precision_macro recall_macro f1_macro Data Count
2 qwen-max-2025-01-25 0.6446700507614214 0.6336633663366337 0.649746192893401 0.6416040100250626 0.6388760049474336 0.6501020408163265 0.64232342205538 197
3 gpt-4o 0.5482233502538071 0.5618556701030928 0.5532994923857868 0.5575447570332481 0.5779088050314465 0.5536734693877551 0.5600088997453159 197
4 deepseek-chat 0.6700507614213198 0.676923076923077 0.6700507614213198 0.673469387755102 0.6899114693446089 0.6705102040816326 0.6754210676562946 197
5 claude-sonnet-4-20250514 0.700507614213198 0.6934673366834171 0.700507614213198 0.696969696969697 0.7072180484244438 0.7009183673469388 0.69833034513671 197

View File

@@ -0,0 +1,98 @@
{
"timestamp": "2025-06-02T17:15:23.737185",
"models_count": 4,
"models": {
"qwen-max-2025-01-25": {
"metrics": {
"accuracy": 0.6446700507614214,
"precision_micro": 0.6336633663366337,
"recall_micro": 0.649746192893401,
"f1_micro": 0.6416040100250626,
"precision_macro": 0.6388760049474336,
"recall_macro": 0.6501020408163265,
"f1_macro": 0.64232342205538
},
"data_count": 197
},
"gpt-4o": {
"metrics": {
"accuracy": 0.5482233502538071,
"precision_micro": 0.5618556701030928,
"recall_micro": 0.5532994923857868,
"f1_micro": 0.5575447570332481,
"precision_macro": 0.5779088050314465,
"recall_macro": 0.5536734693877551,
"f1_macro": 0.5600088997453159
},
"data_count": 197
},
"deepseek-chat": {
"metrics": {
"accuracy": 0.6700507614213198,
"precision_micro": 0.676923076923077,
"recall_micro": 0.6700507614213198,
"f1_micro": 0.673469387755102,
"precision_macro": 0.6899114693446089,
"recall_macro": 0.6705102040816326,
"f1_macro": 0.6754210676562946
},
"data_count": 197
},
"claude-sonnet-4-20250514": {
"metrics": {
"accuracy": 0.700507614213198,
"precision_micro": 0.6934673366834171,
"recall_micro": 0.700507614213198,
"f1_micro": 0.696969696969697,
"precision_macro": 0.7072180484244438,
"recall_macro": 0.7009183673469388,
"f1_macro": 0.69833034513671
},
"data_count": 197
}
},
"comparison": {
"accuracy": {
"qwen-max-2025-01-25": 0.6446700507614214,
"gpt-4o": 0.5482233502538071,
"deepseek-chat": 0.6700507614213198,
"claude-sonnet-4-20250514": 0.700507614213198
},
"precision_micro": {
"qwen-max-2025-01-25": 0.6336633663366337,
"gpt-4o": 0.5618556701030928,
"deepseek-chat": 0.676923076923077,
"claude-sonnet-4-20250514": 0.6934673366834171
},
"recall_micro": {
"qwen-max-2025-01-25": 0.649746192893401,
"gpt-4o": 0.5532994923857868,
"deepseek-chat": 0.6700507614213198,
"claude-sonnet-4-20250514": 0.700507614213198
},
"f1_micro": {
"qwen-max-2025-01-25": 0.6416040100250626,
"gpt-4o": 0.5575447570332481,
"deepseek-chat": 0.673469387755102,
"claude-sonnet-4-20250514": 0.696969696969697
},
"precision_macro": {
"qwen-max-2025-01-25": 0.6388760049474336,
"gpt-4o": 0.5779088050314465,
"deepseek-chat": 0.6899114693446089,
"claude-sonnet-4-20250514": 0.7072180484244438
},
"recall_macro": {
"qwen-max-2025-01-25": 0.6501020408163265,
"gpt-4o": 0.5536734693877551,
"deepseek-chat": 0.6705102040816326,
"claude-sonnet-4-20250514": 0.7009183673469388
},
"f1_macro": {
"qwen-max-2025-01-25": 0.64232342205538,
"gpt-4o": 0.5600088997453159,
"deepseek-chat": 0.6754210676562946,
"claude-sonnet-4-20250514": 0.69833034513671
}
}
}

Binary file not shown.