格式转换

2025-05-29 20:18:57 +08:00
parent 1156bfdd7c
commit 6c87af5614
14 changed files with 11996 additions and 13 deletions
--- a/eval_framework/config/config.yaml
+++ b/eval_framework/config/config.yaml
@@ -2,22 +2,25 @@
 api:
  key: "sk-oYh3Xrhg8oDY2gW02c966f31C84449Ad86F9Cd9dF6E64a8d"
  base_url: "https://vip.apiyi.com/v1"
-  temperature: 0
+  temperature: -1 # 默认使用模型的温度设置
  max_retries: 10
  # 支持多个模型
  models:
    - "qwen-max-2025-01-25"
    - "gpt-4o"
+    - "deepseek-chat"
+    - "claude-sonnet-4-20250514"
+    - "deepseek-r1"
  # 或者使用单个模型（向后兼容）
  # model: "qwen-max-2025-01-25"

-# 系统提示词
-system_prompt: "You are an expert in the field of materials science, adept at answering questions related to fundamental aspects of materials science, including material structure, properties, processing, and applications."
+system_prompt: None

 # 评估配置
 evaluation:
-  max_workers: 8
-  input_file: "/home/ubuntu/50T/LYT/MatBench/layer1/ALL-merge/merged.json"
+  max_workers: 20
+  # input_file: "/home/ubuntu/50T/LYT/MatBench/layer1/ALL-merge/merged.json"
+  input_file: "/home/ubuntu/50T/LYT/MatBench/layer2/PGEE/code/stepz_final_choice_questions.json"
  # 输出配置
  output:
    base_dir: "results"
--- a/eval_framework/main.py
+++ b/eval_framework/main.py
@@ -144,7 +144,7 @@ def main():
            logger.info(f"Evaluating model {i}/{len(models)}: {model_name}")
            
            try:
-                model_result = evaluate_single_model(model_name, data[:10], config, output_dir)
+                model_result = evaluate_single_model(model_name, data, config, output_dir)
                all_results[model_name] = model_result
                
                # 打印当前模型的结果
--- a/eval_framework/src/pycache/init.cpython-311.pyc
+++ b/eval_framework/src/pycache/init.cpython-311.pyc
--- a/eval_framework/src/pycache/data_loader.cpython-311.pyc
+++ b/eval_framework/src/pycache/data_loader.cpython-311.pyc
--- a/eval_framework/src/pycache/evaluator.cpython-311.pyc
+++ b/eval_framework/src/pycache/evaluator.cpython-311.pyc
--- a/eval_framework/src/pycache/llm_client.cpython-311.pyc
+++ b/eval_framework/src/pycache/llm_client.cpython-311.pyc
--- a/eval_framework/src/pycache/metrics.cpython-311.pyc
+++ b/eval_framework/src/pycache/metrics.cpython-311.pyc
--- a/eval_framework/src/pycache/utils.cpython-311.pyc
+++ b/eval_framework/src/pycache/utils.cpython-311.pyc
--- a/eval_framework/src/evaluator.py
+++ b/eval_framework/src/evaluator.py
@@ -51,7 +51,7 @@ class Evaluator:

        # 格式化选择项
        formatted_choices = " ".join([f"({lbl}) {txt}" for lbl, txt in zip(label, text)])
-        user_input = f"{question} {formatted_choices}. {prompt}"
+        user_input = f"{prompt} \n {question} {formatted_choices}"
        
        # 获取LLM响应
        llm_answer = self.llm_client.get_response(user_input, self.system_prompt)
--- a/eval_framework/src/llm_client.py
+++ b/eval_framework/src/llm_client.py
@@ -48,14 +48,27 @@ class LLMClient:
        retries = 0
        while retries < self.max_retries:
            try:
-                response = self.client.chat.completions.create(
-                    model=self.model,
-                    messages=[
+                if system_prompt == 'None':
+                    messages = [
+                        {"role": "user", "content": user_input}
+                    ]
+                else:
+                    messages = [
                        {"role": "system", "content": system_prompt},
                        {"role": "user", "content": user_input}
-                    ],
-                    temperature=self.temperature
-                )
+                    ]
+
+                if self.temperature == -1:
+                    response = self.client.chat.completions.create(
+                        model=self.model,
+                        messages=messages,
+                    )
+                else:
+                    response = self.client.chat.completions.create(
+                        model=self.model,
+                        messages=messages,
+                        temperature=self.temperature
+                    )
                answer = response.choices[0].message.content
                return answer