mirror of
https://github.com/open-compass/opencompass.git
synced 2025-05-30 16:03:24 +08:00
[Update] Fix LLM Judge metrics cacluation & Add reasoning content concat to OpenAI SDK
This commit is contained in:
parent
75e7834b59
commit
65ff602cf5
@ -37,7 +37,6 @@ def get_final_results(judged_answers,
|
||||
is_correct = is_correct_count / count
|
||||
is_incorrect = is_incorrect_count / count
|
||||
is_given_attempted = is_correct + is_incorrect
|
||||
loose_accuracy = is_correct / count
|
||||
accuracy_given_attempted = (is_correct / is_given_attempted
|
||||
if is_given_attempted > 0 else 0)
|
||||
attempted_judge_ratio = attempted_judge_count / count
|
||||
@ -46,7 +45,7 @@ def get_final_results(judged_answers,
|
||||
(accuracy_given_attempted + is_correct) if
|
||||
(accuracy_given_attempted + is_correct) > 0 else 0)
|
||||
result = {
|
||||
metric_name: loose_accuracy * 100,
|
||||
metric_name: is_correct * 100,
|
||||
f'{metric_name}_given_attempted': accuracy_given_attempted * 100,
|
||||
'f1': f1,
|
||||
'attempted_ratio': attempted_judge_ratio * 100,
|
||||
|
@ -531,8 +531,7 @@ class OpenAI(BaseAPIModel):
|
||||
|
||||
class OpenAISDK(OpenAI):
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
def __init__(self,
|
||||
path: str = 'gpt-3.5-turbo',
|
||||
max_seq_len: int = 16384,
|
||||
query_per_second: int = 1,
|
||||
@ -551,7 +550,7 @@ class OpenAISDK(OpenAI):
|
||||
extra_body: Dict | None = None,
|
||||
verbose: bool = False,
|
||||
status_code_mappings: dict = {},
|
||||
):
|
||||
think_tag: str = '</think>'):
|
||||
super().__init__(
|
||||
path,
|
||||
max_seq_len,
|
||||
@ -596,6 +595,7 @@ class OpenAISDK(OpenAI):
|
||||
if self.verbose:
|
||||
self.logger.info(f'Used openai_client: {self.openai_client}')
|
||||
self.status_code_mappings = status_code_mappings
|
||||
self.think_tag = think_tag
|
||||
|
||||
def _generate(self,
|
||||
input: PromptList | str,
|
||||
@ -670,6 +670,12 @@ class OpenAISDK(OpenAI):
|
||||
num_retries += 1
|
||||
# Continue to retry instead of returning empty response
|
||||
continue
|
||||
# If the model has reasoning_content, concat it
|
||||
# with the content
|
||||
if hasattr(responses.choices[0].message, 'reasoning_content'):
|
||||
return (responses.choices[0].message.reasoning_content +
|
||||
self.think_tag +
|
||||
responses.choices[0].message.content)
|
||||
|
||||
return responses.choices[0].message.content
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user