feat: 新增 refine_metadata 脚本,更新 extract_instructions_v2

This commit is contained in:
2026-03-04 10:43:14 +08:00
parent e4b039fc02
commit ac3f38ed58
2 changed files with 544 additions and 3 deletions

View File

@@ -85,6 +85,13 @@ SOFTWARE_CONFIG = {
{"type": "sleep", "parameters": {"seconds": 5}}
]
},
"jade": {
"snapshot": "jade",
"config": [
{"type": "launch", "parameters": {"command": ["C:\\JADE\\jade 6.5\\MDI Jade 6.5\\jade6.5.exe"]}},
{"type": "sleep", "parameters": {"seconds": 5}}
]
},
}
# Default config for unknown software
@@ -522,8 +529,12 @@ async def main():
global stats, FORCE_REGENERATE
stats = ProcessingStats()
# Parse --force flag
# Parse --force flag and --software filter
FORCE_REGENERATE = "--force" in sys.argv
software_filter = None
for i, arg in enumerate(sys.argv):
if arg == "--software" and i + 1 < len(sys.argv):
software_filter = sys.argv[i + 1]
if not API_KEY:
logger.error("OPENAI_API_KEY environment variable not set.")
@@ -536,9 +547,10 @@ async def main():
logger.info(f"Please put software PDF tutorials into subfolders in: {INPUT_FOLDER}")
return
# Find files
# Find files (optionally filtered by --software)
search_folder = INPUT_FOLDER / software_filter if software_filter else INPUT_FOLDER
files = []
for root, _, filenames in os.walk(INPUT_FOLDER):
for root, _, filenames in os.walk(search_folder):
for f in filenames:
if Path(f).suffix.lower() in SUPPORTED_EXTENSIONS:
files.append(os.path.join(root, f))