mirror of
https://github.com/open-compass/opencompass.git
synced 2025-05-30 16:03:24 +08:00
fix bench
This commit is contained in:
parent
7687f8aa3c
commit
6aabba778d
@ -1,5 +1,6 @@
|
||||
import hashlib
|
||||
import json
|
||||
import os
|
||||
import re
|
||||
from collections import defaultdict
|
||||
from typing import Literal
|
||||
@ -260,7 +261,7 @@ class HealthBenchEvaluator(BaseEvaluator):
|
||||
self.n_repeats = n_repeats
|
||||
self.n_threads = n_threads
|
||||
self.subset_name = subset_name
|
||||
self.grader_model = ChatCompletionSampler(model='gpt-4.1-2025-04-14', system_message=OPENAI_SYSTEM_MESSAGE_API, max_tokens=2048,) # noqa: E501
|
||||
self.grader_model = ChatCompletionSampler(model=os.environ['OC_JUDGE_MODEL'], system_message=OPENAI_SYSTEM_MESSAGE_API, max_tokens=2048,) # noqa: E501
|
||||
|
||||
def grade_sample(self, prompt: list[dict[str, str]], response_text: str, example_tags: list[str], rubric_items: list[RubricItem], ) -> tuple[dict, str, list[dict]]: # noqa: E501
|
||||
# construct and grade the sample
|
||||
|
Loading…
Reference in New Issue
Block a user