diff --git a/configs/eval_corebench_2409_base_objective.py b/configs/eval_corebench_2409_base_objective.py index 9c904365..d5d7a387 100644 --- a/configs/eval_corebench_2409_base_objective.py +++ b/configs/eval_corebench_2409_base_objective.py @@ -81,7 +81,7 @@ core_summary_groups = [ ['drop', 'accuracy'], ['math', 'accuracy'], ['gsm8k', 'accuracy'], - ['mathbench-t (average)', 'naive_average'] + ['mathbench-t (average)', 'naive_average'], ['GPQA_diamond', 'accuracy'], ['openai_humaneval', 'humaneval_pass@1'], ['IFEval', 'Prompt-level-strict-accuracy'], @@ -101,7 +101,7 @@ summarizer = dict( ['drop', 'accuracy'], ['math', 'accuracy'], ['gsm8k', 'accuracy'], - ['mathbench-t (average)', 'naive_average'] + ['mathbench-t (average)', 'naive_average'], ['GPQA_diamond', 'accuracy'], ['openai_humaneval', 'humaneval_pass@1'], ['IFEval', 'Prompt-level-strict-accuracy'], @@ -185,4 +185,4 @@ eval = dict( # PART 5 Utils Configuaration # ####################################################################### base_exp_dir = 'outputs/corebench_2409_objective/' -work_dir = osp.join(base_exp_dir, 'chat_objective') +work_dir = osp.join(base_exp_dir, 'base_objective') diff --git a/opencompass/models/bailing_api_oc.py b/opencompass/models/bailing_api_oc.py index 54e0d502..d4368a36 100644 --- a/opencompass/models/bailing_api_oc.py +++ b/opencompass/models/bailing_api_oc.py @@ -13,7 +13,6 @@ try: from retrying import retry except ImportError: retry = None - print('please install retrying by `pip install retrying`') from opencompass.utils.prompt import PromptList diff --git a/opencompass/models/openai_api.py b/opencompass/models/openai_api.py index 4a07dee3..aff2579a 100644 --- a/opencompass/models/openai_api.py +++ b/opencompass/models/openai_api.py @@ -601,6 +601,10 @@ class OpenAISDK(OpenAI): if self.verbose: self.logger.info( 'Successfully get response from OpenAI API') + try: + self.logger.info(responses) + except Exception as e: # noqa F841 + pass return responses.choices[0].message.content except Exception as e: self.logger.error(e)