Compare commits
7 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
| 18737fe2f4 | |||
| 39dc1e9f06 | |||
| e23c48ef60 | |||
| 839d6589e0 | |||
| f4568b6dcb | |||
| aade9e11cb | |||
| 0f781f5679 |
126
layer1/ALL-merge/classify.py
Normal file
126
layer1/ALL-merge/classify.py
Normal file
@@ -0,0 +1,126 @@
|
|||||||
|
import json
|
||||||
|
from openai import OpenAI
|
||||||
|
import time
|
||||||
|
import os
|
||||||
|
from tqdm import tqdm
|
||||||
|
import pandas as pd
|
||||||
|
import re
|
||||||
|
|
||||||
|
API_KEY="sk-oYh3Xrhg8oDY2gW02c966f31C84449Ad86F9Cd9dF6E64a8d"
|
||||||
|
BASE_URL="https://vip.apiyi.com/v1"
|
||||||
|
MODEL_DEEPSEEK_V3="deepseek-chat"
|
||||||
|
CATEGORIES = ['Physics', 'Chemistry', 'Biological', 'Unknown']
|
||||||
|
# 加载JSON数据
|
||||||
|
def load_data(file_path):
|
||||||
|
with open(file_path, 'r', encoding='utf-8') as f:
|
||||||
|
data = json.load(f)
|
||||||
|
return data
|
||||||
|
|
||||||
|
def classify_question(idx, len,question, options):
|
||||||
|
prompt = f"""
|
||||||
|
Please classify the given question into one of these three categories: 'Physics', 'Chemistry', 'Biological' or 'Unknown'.\n
|
||||||
|
Please format your response by wrapping the category name with the tags [CATEGORY] and [\CATEGORY]. For example, your response should look like one of these:\n
|
||||||
|
- [CATEGORY]Physics[\CATEGORY]
|
||||||
|
- [CATEGORY]Chemistry[\CATEGORY]
|
||||||
|
- [CATEGORY]Biological[\CATEGORY]
|
||||||
|
- [CATEGORY]Unknown[\CATEGORY]
|
||||||
|
Question: {question}\n
|
||||||
|
Options: {options}\n
|
||||||
|
|
||||||
|
"""
|
||||||
|
client = OpenAI(api_key = API_KEY,base_url = BASE_URL)
|
||||||
|
# 重试机制
|
||||||
|
max_retries = 3
|
||||||
|
for attempt in range(max_retries):
|
||||||
|
try:
|
||||||
|
response = client.chat.completions.create(
|
||||||
|
model= MODEL_DEEPSEEK_V3,
|
||||||
|
messages=[
|
||||||
|
{"role": "system", "content": "You are a helpful educational assistant."},
|
||||||
|
{"role": "user", "content": prompt}
|
||||||
|
],
|
||||||
|
temperature=0.3,
|
||||||
|
stream = False,
|
||||||
|
)
|
||||||
|
|
||||||
|
classification = response.choices[0].message.content.strip()
|
||||||
|
extracted_category = string_extraction(idx,len,classification)
|
||||||
|
if extracted_category in CATEGORIES:
|
||||||
|
return extracted_category
|
||||||
|
else:
|
||||||
|
print(f"Invalid category '{extracted_category}' returned. Retrying. {attempt + 1}/{max_retries}")
|
||||||
|
continue
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
print(f"Error on attempt {attempt + 1}/{max_retries}: {e}")
|
||||||
|
if attempt == max_retries - 1:
|
||||||
|
return 'Error' # 如果达到最大重试次数,返回错误
|
||||||
|
|
||||||
|
# 在重试之前等待
|
||||||
|
time.sleep(2)
|
||||||
|
|
||||||
|
return 'Error'
|
||||||
|
def string_extraction(idx,len,classification):
|
||||||
|
pattern = r'\[CATEGORY\](.*?)\[\\CATEGORY\]'
|
||||||
|
match = re.search(pattern, classification)
|
||||||
|
print(f"{idx + 1}/{len}: {match.group(1)}")
|
||||||
|
# if match:
|
||||||
|
# return match.group(1)
|
||||||
|
# else:
|
||||||
|
# return "Unknown"
|
||||||
|
|
||||||
|
return match.group(1) if match else 'Unknown'
|
||||||
|
|
||||||
|
def main():
|
||||||
|
# 加载数据
|
||||||
|
file_path = '/home/ubuntu/50T/fsy/MatBench/layer1/ALL-merge/merged.json' # 替换为你的JSON文件路径
|
||||||
|
data = load_data(file_path)
|
||||||
|
data_length = len(data)
|
||||||
|
# 创建结果列表
|
||||||
|
results = []
|
||||||
|
|
||||||
|
# 处理每个问题
|
||||||
|
for i, item in enumerate(tqdm(data, desc="Classifying questions")):
|
||||||
|
question = item.get('question', '')
|
||||||
|
text = item['choices']['text']
|
||||||
|
label = item['choices']['label']
|
||||||
|
formatted_choices = " ".join([f"({lbl}) {txt}" for lbl, txt in zip(label, text)])
|
||||||
|
# correct_answer = item.get('correct_answer', '')
|
||||||
|
|
||||||
|
classification = classify_question(i, data_length,question,formatted_choices)
|
||||||
|
# 添加分类结果
|
||||||
|
item_with_classification = item.copy()
|
||||||
|
item_with_classification['subject_category'] = classification
|
||||||
|
results.append(item_with_classification)
|
||||||
|
|
||||||
|
# 每处理100个问题,保存一次中间结果
|
||||||
|
if (i + 1) % 100 == 0:
|
||||||
|
# with open('interim_results.json', 'w', encoding='utf-8') as f:
|
||||||
|
# json.dump(results, f, ensure_ascii=False, indent=4)
|
||||||
|
|
||||||
|
# 可选:分析中间结果
|
||||||
|
categories = {'Physics': 0, 'Chemistry': 0, 'Biological': 0, 'Unknown': 0}
|
||||||
|
for item in results:
|
||||||
|
categories[item.get('subject_category', 'Unknown')] += 1
|
||||||
|
|
||||||
|
print(f"Processed {i+1} questions. Current distribution:")
|
||||||
|
for category, count in categories.items():
|
||||||
|
print(f"{category}: {count}")
|
||||||
|
|
||||||
|
# API速率限制处理
|
||||||
|
time.sleep(0.5)
|
||||||
|
|
||||||
|
# 保存最终结果
|
||||||
|
with open('/home/ubuntu/50T/fsy/MatBench/layer1/ALL-merge/merged_classified.json', 'w', encoding='utf-8') as f:
|
||||||
|
json.dump(results, f, ensure_ascii=False, indent=4)
|
||||||
|
|
||||||
|
# 分析结果
|
||||||
|
df = pd.DataFrame(results)
|
||||||
|
category_counts = df['subject_category'].value_counts()
|
||||||
|
print("\nFinal distribution of questions by category:")
|
||||||
|
print(category_counts)
|
||||||
|
|
||||||
|
print("\nTask completed. Results saved to 'classified_questions.json'")
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
main()
|
||||||
3214
layer1/ALL-merge/classify_muti.log
Normal file
3214
layer1/ALL-merge/classify_muti.log
Normal file
File diff suppressed because it is too large
Load Diff
157
layer1/ALL-merge/classify_muti.py
Normal file
157
layer1/ALL-merge/classify_muti.py
Normal file
@@ -0,0 +1,157 @@
|
|||||||
|
import json
|
||||||
|
from openai import OpenAI
|
||||||
|
import time
|
||||||
|
import os
|
||||||
|
from tqdm import tqdm
|
||||||
|
import pandas as pd
|
||||||
|
import re
|
||||||
|
import concurrent.futures
|
||||||
|
import threading
|
||||||
|
|
||||||
|
API_KEY="sk-oYh3Xrhg8oDY2gW02c966f31C84449Ad86F9Cd9dF6E64a8d"
|
||||||
|
BASE_URL="https://vip.apiyi.com/v1"
|
||||||
|
MODEL_DEEPSEEK_V3 = "deepseek-chat"
|
||||||
|
CATEGORIES = ['Physics', 'Chemistry', 'Biological', 'Unknown']
|
||||||
|
|
||||||
|
# Thread-local storage for OpenAI clients
|
||||||
|
local = threading.local()
|
||||||
|
|
||||||
|
# Lock for thread-safe operations
|
||||||
|
write_lock = threading.Lock()
|
||||||
|
progress_lock = threading.Lock()
|
||||||
|
processed_count = 0
|
||||||
|
category_counts = {'Physics': 0, 'Chemistry': 0, 'Biological': 0, 'Unknown': 0}
|
||||||
|
|
||||||
|
# 加载JSON数据
|
||||||
|
def load_data(file_path):
|
||||||
|
with open(file_path, 'r', encoding='utf-8') as f:
|
||||||
|
data = json.load(f)
|
||||||
|
return data
|
||||||
|
|
||||||
|
def get_client():
|
||||||
|
"""Get thread-local OpenAI client"""
|
||||||
|
if not hasattr(local, 'client'):
|
||||||
|
local.client = OpenAI(api_key=API_KEY, base_url=BASE_URL)
|
||||||
|
return local.client
|
||||||
|
|
||||||
|
def classify_question(idx, total_len, question, options):
|
||||||
|
prompt = f"""
|
||||||
|
Please classify the given question into one of these three categories: 'Physics', 'Chemistry', 'Biological' or 'Unknown'.\n
|
||||||
|
Please format your response by wrapping the category name with the tags [CATEGORY] and [/CATEGORY]. For example, your response should look like one of these:\n
|
||||||
|
- [CATEGORY]Physics[/CATEGORY]
|
||||||
|
- [CATEGORY]Chemistry[/CATEGORY]
|
||||||
|
- [CATEGORY]Biological[/CATEGORY]
|
||||||
|
- [CATEGORY]Unknown[/CATEGORY]
|
||||||
|
Question: {question}\n
|
||||||
|
Options: {options}\n
|
||||||
|
"""
|
||||||
|
|
||||||
|
client = get_client()
|
||||||
|
# 重试机制
|
||||||
|
max_retries = 3
|
||||||
|
for attempt in range(max_retries):
|
||||||
|
try:
|
||||||
|
response = client.chat.completions.create(
|
||||||
|
model=MODEL_DEEPSEEK_V3,
|
||||||
|
messages=[
|
||||||
|
{"role": "system", "content": "You are a helpful educational assistant."},
|
||||||
|
{"role": "user", "content": prompt}
|
||||||
|
],
|
||||||
|
temperature=0.3,
|
||||||
|
stream=False,
|
||||||
|
)
|
||||||
|
classification = response.choices[0].message.content.strip()
|
||||||
|
extracted_category = string_extraction(idx, total_len, classification)
|
||||||
|
if extracted_category in CATEGORIES:
|
||||||
|
return extracted_category
|
||||||
|
else:
|
||||||
|
with progress_lock:
|
||||||
|
print(f"Invalid category '{extracted_category}' returned. Retrying. {attempt + 1}/{max_retries}")
|
||||||
|
continue
|
||||||
|
except Exception as e:
|
||||||
|
with progress_lock:
|
||||||
|
print(f"Error on attempt {attempt + 1}/{max_retries}: {e}")
|
||||||
|
if attempt == max_retries - 1:
|
||||||
|
return 'Error' # 如果达到最大重试次数,返回错误
|
||||||
|
|
||||||
|
# 在重试之前等待
|
||||||
|
time.sleep(2)
|
||||||
|
|
||||||
|
return 'Error'
|
||||||
|
|
||||||
|
def string_extraction(idx, total_len, classification):
|
||||||
|
pattern = r'\[CATEGORY\](.*?)\[\/CATEGORY\]'
|
||||||
|
match = re.search(pattern, classification)
|
||||||
|
extracted = match.group(1) if match else 'Unknown'
|
||||||
|
|
||||||
|
with progress_lock:
|
||||||
|
print(f"{idx + 1}/{total_len}: {extracted}")
|
||||||
|
|
||||||
|
return extracted
|
||||||
|
|
||||||
|
def process_item(args):
|
||||||
|
idx, total_len, item = args
|
||||||
|
question = item.get('question', '')
|
||||||
|
text = item['choices']['text']
|
||||||
|
label = item['choices']['label']
|
||||||
|
formatted_choices = " ".join([f"({lbl}) {txt}" for lbl, txt in zip(label, text)])
|
||||||
|
|
||||||
|
classification = classify_question(idx, total_len, question, formatted_choices)
|
||||||
|
|
||||||
|
# 添加分类结果
|
||||||
|
item_with_classification = item.copy()
|
||||||
|
item_with_classification['subject_category'] = classification
|
||||||
|
|
||||||
|
# Update global counters
|
||||||
|
global processed_count
|
||||||
|
with write_lock:
|
||||||
|
processed_count += 1
|
||||||
|
category_counts[classification] += 1
|
||||||
|
|
||||||
|
# 每处理100个问题,打印一次中间结果
|
||||||
|
if processed_count % 100 == 0:
|
||||||
|
print(f"\nProcessed {processed_count} questions. Current distribution:")
|
||||||
|
for category, count in category_counts.items():
|
||||||
|
print(f"{category}: {count}")
|
||||||
|
|
||||||
|
# API速率限制处理 - 减少sleep时间,因为多线程已经提供了自然的延迟
|
||||||
|
time.sleep(0.1)
|
||||||
|
|
||||||
|
return item_with_classification
|
||||||
|
|
||||||
|
def main():
|
||||||
|
# 加载数据
|
||||||
|
file_path = '/home/ubuntu/50T/fsy/MatBench/layer1/ALL-merge/merged.json'
|
||||||
|
data = load_data(file_path)
|
||||||
|
data_length = len(data)
|
||||||
|
|
||||||
|
results = []
|
||||||
|
|
||||||
|
# 创建参数列表
|
||||||
|
args_list = [(i, data_length, item) for i, item in enumerate(data)]
|
||||||
|
|
||||||
|
# 设定线程数,根据实际API限制和服务器性能调整
|
||||||
|
num_threads = 10 # 根据需要调整线程数
|
||||||
|
|
||||||
|
print(f"Starting classification with {num_threads} threads...")
|
||||||
|
|
||||||
|
# 使用ThreadPoolExecutor进行并行处理
|
||||||
|
with concurrent.futures.ThreadPoolExecutor(max_workers=num_threads) as executor:
|
||||||
|
# 使用tqdm来显示进度
|
||||||
|
futures = list(tqdm(executor.map(process_item, args_list), total=data_length, desc="Classifying questions"))
|
||||||
|
results = futures
|
||||||
|
|
||||||
|
# 保存最终结果
|
||||||
|
with open('/home/ubuntu/50T/fsy/MatBench/layer1/ALL-merge/merged_classified.json', 'w', encoding='utf-8') as f:
|
||||||
|
json.dump(results, f, ensure_ascii=False, indent=4)
|
||||||
|
|
||||||
|
# 分析结果
|
||||||
|
df = pd.DataFrame(results)
|
||||||
|
category_counts_final = df['subject_category'].value_counts()
|
||||||
|
print("\nFinal distribution of questions by category:")
|
||||||
|
print(category_counts_final)
|
||||||
|
|
||||||
|
print("\nTask completed. Results saved to '/home/ubuntu/50T/fsy/MatBench/layer1/ALL-merge/merged_classified.json'")
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
main()
|
||||||
2289
layer2/PGEE/code/classify_muti.log
Normal file
2289
layer2/PGEE/code/classify_muti.log
Normal file
File diff suppressed because it is too large
Load Diff
169
layer2/PGEE/code/classify_muti.py
Normal file
169
layer2/PGEE/code/classify_muti.py
Normal file
@@ -0,0 +1,169 @@
|
|||||||
|
import json
|
||||||
|
from openai import OpenAI
|
||||||
|
import time
|
||||||
|
import os
|
||||||
|
from tqdm import tqdm
|
||||||
|
import pandas as pd
|
||||||
|
import re
|
||||||
|
import concurrent.futures
|
||||||
|
import threading
|
||||||
|
|
||||||
|
API_KEY="sk-oYh3Xrhg8oDY2gW02c966f31C84449Ad86F9Cd9dF6E64a8d"
|
||||||
|
BASE_URL="https://vip.apiyi.com/v1"
|
||||||
|
MODEL_DEEPSEEK_V3 = "deepseek-chat"
|
||||||
|
CATEGORIES = ['Atomic Structure and Interatomic Bonding', 'The Structure of Solids', 'Imperfections in Solids', 'Mechanical Properties of Metals','Dislocations and Strengthening Mechanisms','Failure','Phase Transformations: Development of Microstructure and Alteration of Mechanical Properties','Applications and Processing of Materials','Corrosion and Degradation of Materials','Functional Properties of Materials','Unknown']
|
||||||
|
FILE_PATH = '/home/ubuntu/50T/fsy/A/MatBench/layer2/PGEE/code/stepz_final_choice_questions_filtered_only_hard.json'
|
||||||
|
OUTPUT_PATH='/home/ubuntu/50T/fsy/A/MatBench/layer2/PGEE/code/stepz_classified_only_hard.json'
|
||||||
|
# Thread-local storage for OpenAI clients
|
||||||
|
local = threading.local()
|
||||||
|
|
||||||
|
# Lock for thread-safe operations
|
||||||
|
write_lock = threading.Lock()
|
||||||
|
progress_lock = threading.Lock()
|
||||||
|
processed_count = 0
|
||||||
|
category_counts = {'Atomic Structure and Interatomic Bonding': 0, 'The Structure of Solids': 0, 'Imperfections in Solids': 0, 'Mechanical Properties of Metals':0,'Dislocations and Strengthening Mechanisms':0,'Failure':0,'Phase Transformations: Development of Microstructure and Alteration of Mechanical Properties':0,'Applications and Processing of Materials':0,'Corrosion and Degradation of Materials':0,'Functional Properties of Materials':0,'Unknown':0}
|
||||||
|
|
||||||
|
# 加载JSON数据
|
||||||
|
def load_data(file_path):
|
||||||
|
with open(file_path, 'r', encoding='utf-8') as f:
|
||||||
|
data = json.load(f)
|
||||||
|
return data
|
||||||
|
|
||||||
|
def get_client():
|
||||||
|
"""Get thread-local OpenAI client"""
|
||||||
|
if not hasattr(local, 'client'):
|
||||||
|
local.client = OpenAI(api_key=API_KEY, base_url=BASE_URL)
|
||||||
|
return local.client
|
||||||
|
|
||||||
|
def classify_question(idx, total_len, question, answer):
|
||||||
|
prompt = f"""
|
||||||
|
Given a question and its answer from the field of Materials Science fundamentals, identify which chapter or category of Materials Science the question belongs to. Choose from the following 10 categories:
|
||||||
|
|
||||||
|
-- Atomic Structure and Interatomic Bonding
|
||||||
|
-- The Structure of Solids
|
||||||
|
-- Imperfections in Solids
|
||||||
|
-- Mechanical Properties of Metals
|
||||||
|
-- Dislocations and Strengthening Mechanisms
|
||||||
|
-- Failure
|
||||||
|
-- Phase Transformations: Development of Microstructure and Alteration of Mechanical Properties
|
||||||
|
-- Applications and Processing of Materials
|
||||||
|
-- Corrosion and Degradation of Materials
|
||||||
|
-- Functional Properties of Materials
|
||||||
|
|
||||||
|
QUESTIONS:{question}\n
|
||||||
|
ANSWER:{answer}\n
|
||||||
|
|
||||||
|
Provide your response by enclosing the category number and name within [CATEGORY] and [/CATEGORY] tags. For example: [CATEGORY]Atomic Structure and Interatomic Bonding[/CATEGORY]
|
||||||
|
|
||||||
|
Analyze both the question and answer carefully to determine the most appropriate category based on the question and options.
|
||||||
|
"""
|
||||||
|
|
||||||
|
client = get_client()
|
||||||
|
# 重试机制
|
||||||
|
max_retries = 3
|
||||||
|
for attempt in range(max_retries):
|
||||||
|
try:
|
||||||
|
response = client.chat.completions.create(
|
||||||
|
model=MODEL_DEEPSEEK_V3,
|
||||||
|
messages=[
|
||||||
|
{"role": "system", "content": "You are a helpful educational assistant."},
|
||||||
|
{"role": "user", "content": prompt}
|
||||||
|
],
|
||||||
|
temperature=0.3,
|
||||||
|
stream=False,
|
||||||
|
)
|
||||||
|
classification = response.choices[0].message.content.strip()
|
||||||
|
extracted_category = string_extraction(idx, total_len, classification)
|
||||||
|
if extracted_category in CATEGORIES:
|
||||||
|
return extracted_category
|
||||||
|
else:
|
||||||
|
with progress_lock:
|
||||||
|
print(f"Invalid category '{extracted_category}' returned. Retrying. {attempt + 1}/{max_retries}")
|
||||||
|
continue
|
||||||
|
except Exception as e:
|
||||||
|
with progress_lock:
|
||||||
|
print(f"Error on attempt {attempt + 1}/{max_retries}: {e}")
|
||||||
|
if attempt == max_retries - 1:
|
||||||
|
return 'Error' # 如果达到最大重试次数,返回错误
|
||||||
|
|
||||||
|
# 在重试之前等待
|
||||||
|
time.sleep(2)
|
||||||
|
|
||||||
|
return 'Error'
|
||||||
|
|
||||||
|
def string_extraction(idx, total_len, classification):
|
||||||
|
pattern = r'\[CATEGORY\](.*?)\[\/CATEGORY\]'
|
||||||
|
match = re.search(pattern, classification)
|
||||||
|
extracted = match.group(1) if match else 'Unknown'
|
||||||
|
|
||||||
|
with progress_lock:
|
||||||
|
print(f"{idx + 1}/{total_len}: {extracted}")
|
||||||
|
|
||||||
|
return extracted
|
||||||
|
|
||||||
|
def process_item(args):
|
||||||
|
idx, total_len, item = args
|
||||||
|
question = item.get('question', '')
|
||||||
|
text = item['choices']['text']
|
||||||
|
label = item['choices']['label']
|
||||||
|
# answer = item.get('correct_option','')
|
||||||
|
formatted_choices = " ".join([f"({lbl}) {txt}" for lbl, txt in zip(label, text)])
|
||||||
|
|
||||||
|
classification = classify_question(idx, total_len, question, formatted_choices)
|
||||||
|
|
||||||
|
# 添加分类结果
|
||||||
|
item_with_classification = item.copy()
|
||||||
|
item_with_classification['subject_category'] = classification
|
||||||
|
|
||||||
|
# Update global counters
|
||||||
|
global processed_count
|
||||||
|
with write_lock:
|
||||||
|
processed_count += 1
|
||||||
|
category_counts[classification] += 1
|
||||||
|
|
||||||
|
# 每处理100个问题,打印一次中间结果
|
||||||
|
if processed_count % 100 == 0:
|
||||||
|
print(f"\nProcessed {processed_count} questions. Current distribution:")
|
||||||
|
for category, count in category_counts.items():
|
||||||
|
print(f"{category}: {count}")
|
||||||
|
|
||||||
|
# API速率限制处理 - 减少sleep时间,因为多线程已经提供了自然的延迟
|
||||||
|
time.sleep(0.1)
|
||||||
|
|
||||||
|
return item_with_classification
|
||||||
|
|
||||||
|
def main():
|
||||||
|
# 加载数据
|
||||||
|
data = load_data(FILE_PATH)
|
||||||
|
data_length = len(data)
|
||||||
|
|
||||||
|
results = []
|
||||||
|
|
||||||
|
# 创建参数列表
|
||||||
|
args_list = [(i, data_length, item) for i, item in enumerate(data)]
|
||||||
|
|
||||||
|
# 设定线程数,根据实际API限制和服务器性能调整
|
||||||
|
num_threads = 20 # 根据需要调整线程数
|
||||||
|
|
||||||
|
print(f"Starting classification with {num_threads} threads...")
|
||||||
|
|
||||||
|
# 使用ThreadPoolExecutor进行并行处理
|
||||||
|
with concurrent.futures.ThreadPoolExecutor(max_workers=num_threads) as executor:
|
||||||
|
# 使用tqdm来显示进度
|
||||||
|
futures = list(tqdm(executor.map(process_item, args_list), total=data_length, desc="Classifying questions"))
|
||||||
|
results = futures
|
||||||
|
|
||||||
|
# 保存最终结果
|
||||||
|
with open(OUTPUT_PATH, 'w', encoding='utf-8') as f:
|
||||||
|
json.dump(results, f, ensure_ascii=False, indent=4)
|
||||||
|
|
||||||
|
# 分析结果
|
||||||
|
df = pd.DataFrame(results)
|
||||||
|
category_counts_final = df['subject_category'].value_counts()
|
||||||
|
print("\nFinal distribution of questions by category:")
|
||||||
|
print(category_counts_final)
|
||||||
|
|
||||||
|
print(f"\nTask completed. Results saved to {OUTPUT_PATH}")
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
main()
|
||||||
1653
layer2/PGEE/code/classify_muti_only_hard.log
Normal file
1653
layer2/PGEE/code/classify_muti_only_hard.log
Normal file
File diff suppressed because it is too large
Load Diff
40222
layer2/PGEE/code/stepz_classified.json
Normal file
40222
layer2/PGEE/code/stepz_classified.json
Normal file
File diff suppressed because it is too large
Load Diff
29062
layer2/PGEE/code/stepz_classified_only_hard.json
Normal file
29062
layer2/PGEE/code/stepz_classified_only_hard.json
Normal file
File diff suppressed because it is too large
Load Diff
Reference in New Issue
Block a user