fix bench

2025-05-30 16:03:24 +08:00 · 2025-05-29 07:12:42 +00:00 · 2025-05-29 07:12:42 +00:00 · 6aabba778d
commit 6aabba778d
parent 7687f8aa3c
1 changed files with 2 additions and 1 deletions
--- a/opencompass/datasets/healthbench/healthbench.py
+++ b/opencompass/datasets/healthbench/healthbench.py
@ -1,5 +1,6 @@
 import hashlib
 import json
+import os
 import re
 from collections import defaultdict
 from typing import Literal
@ -260,7 +261,7 @@ class HealthBenchEvaluator(BaseEvaluator):
        self.n_repeats = n_repeats
        self.n_threads = n_threads
        self.subset_name = subset_name
-        self.grader_model = ChatCompletionSampler(model='gpt-4.1-2025-04-14', system_message=OPENAI_SYSTEM_MESSAGE_API, max_tokens=2048,)   # noqa: E501
+        self.grader_model = ChatCompletionSampler(model=os.environ['OC_JUDGE_MODEL'], system_message=OPENAI_SYSTEM_MESSAGE_API, max_tokens=2048,)   # noqa: E501

    def grade_sample(self, prompt: list[dict[str, str]], response_text: str, example_tags: list[str], rubric_items: list[RubricItem], ) -> tuple[dict, str, list[dict]]:  # noqa: E501
        # construct and grade the sample