This commit is contained in:
MaiziXiao 2025-04-15 03:19:04 +00:00
parent 75e7834b59
commit a484de0f25
2 changed files with 28 additions and 23 deletions

View File

@ -37,7 +37,6 @@ def get_final_results(judged_answers,
is_correct = is_correct_count / count is_correct = is_correct_count / count
is_incorrect = is_incorrect_count / count is_incorrect = is_incorrect_count / count
is_given_attempted = is_correct + is_incorrect is_given_attempted = is_correct + is_incorrect
loose_accuracy = is_correct / count
accuracy_given_attempted = (is_correct / is_given_attempted accuracy_given_attempted = (is_correct / is_given_attempted
if is_given_attempted > 0 else 0) if is_given_attempted > 0 else 0)
attempted_judge_ratio = attempted_judge_count / count attempted_judge_ratio = attempted_judge_count / count
@ -46,7 +45,7 @@ def get_final_results(judged_answers,
(accuracy_given_attempted + is_correct) if (accuracy_given_attempted + is_correct) if
(accuracy_given_attempted + is_correct) > 0 else 0) (accuracy_given_attempted + is_correct) > 0 else 0)
result = { result = {
metric_name: loose_accuracy * 100, metric_name: is_correct * 100,
f'{metric_name}_given_attempted': accuracy_given_attempted * 100, f'{metric_name}_given_attempted': accuracy_given_attempted * 100,
'f1': f1, 'f1': f1,
'attempted_ratio': attempted_judge_ratio * 100, 'attempted_ratio': attempted_judge_ratio * 100,

View File

@ -531,8 +531,7 @@ class OpenAI(BaseAPIModel):
class OpenAISDK(OpenAI): class OpenAISDK(OpenAI):
def __init__( def __init__(self,
self,
path: str = 'gpt-3.5-turbo', path: str = 'gpt-3.5-turbo',
max_seq_len: int = 16384, max_seq_len: int = 16384,
query_per_second: int = 1, query_per_second: int = 1,
@ -551,7 +550,7 @@ class OpenAISDK(OpenAI):
extra_body: Dict | None = None, extra_body: Dict | None = None,
verbose: bool = False, verbose: bool = False,
status_code_mappings: dict = {}, status_code_mappings: dict = {},
): think_tag: str = '</think>'):
super().__init__( super().__init__(
path, path,
max_seq_len, max_seq_len,
@ -596,6 +595,7 @@ class OpenAISDK(OpenAI):
if self.verbose: if self.verbose:
self.logger.info(f'Used openai_client: {self.openai_client}') self.logger.info(f'Used openai_client: {self.openai_client}')
self.status_code_mappings = status_code_mappings self.status_code_mappings = status_code_mappings
self.think_tag = think_tag
def _generate(self, def _generate(self,
input: PromptList | str, input: PromptList | str,
@ -670,6 +670,12 @@ class OpenAISDK(OpenAI):
num_retries += 1 num_retries += 1
# Continue to retry instead of returning empty response # Continue to retry instead of returning empty response
continue continue
# If the model has reasoning_content, concat it
# with the content
if hasattr(responses.choices[0].message, 'reasoning_content'):
return (responses.choices[0].message.reasoning_content +
self.think_tag +
responses.choices[0].message.content)
return responses.choices[0].message.content return responses.choices[0].message.content