diff --git a/desktop_env/desktop_env.py b/desktop_env/desktop_env.py index daba009..f0ffafe 100644 --- a/desktop_env/desktop_env.py +++ b/desktop_env/desktop_env.py @@ -474,13 +474,18 @@ class DesktopEnv(gym.Env): if "expected" in self.evaluator: assert len(self.metric) == len(self.expected_getter), "The number of metrics and expected getters must be the same" for idx, metric in enumerate(self.metric): - try: - config = self.evaluator["result"][idx] - result_state = self.result_getter[idx](self, config) - except FileNotFoundError: - logger.error("File not found!") - if self.metric_conj == 'and': - return 0 + # Skip result state extraction if result_getter is None (e.g., for vllm_eval) + if self.result_getter[idx] is not None: + try: + config = self.evaluator["result"][idx] + result_state = self.result_getter[idx](self, config) + except FileNotFoundError: + logger.error("File not found!") + if self.metric_conj == 'and': + return 0 + else: + # For evaluators that don't need result state (e.g., vllm_eval) + result_state = None if "expected" in self.evaluator and self.expected_getter and self.evaluator["expected"]: expected_state = self.expected_getter[idx](self, self.evaluator["expected"][idx]) @@ -498,11 +503,16 @@ class DesktopEnv(gym.Env): return sum(results) / len(results) if self.metric_conj == 'and' else max(results) else: # Single metric to evaluate whether the task is successfully completed - try: - result_state = self.result_getter(self, self.evaluator["result"]) - except FileNotFoundError: - logger.error("File not found!") - return 0 + # For evaluators like vllm_eval that don't need result_getter, skip result state extraction + if self.result_getter is not None: + try: + result_state = self.result_getter(self, self.evaluator["result"]) + except FileNotFoundError: + logger.error("File not found!") + return 0 + else: + # For evaluators that don't need result state (e.g., vllm_eval) + result_state = None if "expected" in self.evaluator and self.expected_getter and self.evaluator["expected"]: expected_state = self.expected_getter(self, self.evaluator["expected"])