From f66b0b347a8993628e122c9e39af4369b75e1354 Mon Sep 17 00:00:00 2001 From: Linchen Xiao Date: Wed, 2 Apr 2025 12:03:45 +0800 Subject: [PATCH] [Update] Requirements update (#1993) --- .../datasets/TheoremQA/number_utils.py | 2 +- opencompass/datasets/TheoremQA/utils.py | 2 +- opencompass/models/claude_sdk_api.py | 27 +++++++++++++++---- opencompass/models/openai_api.py | 15 ++++++++--- requirements/extra.txt | 4 +-- 5 files changed, 36 insertions(+), 14 deletions(-) diff --git a/opencompass/datasets/TheoremQA/number_utils.py b/opencompass/datasets/TheoremQA/number_utils.py index 12f6e6dc..fd93fe66 100644 --- a/opencompass/datasets/TheoremQA/number_utils.py +++ b/opencompass/datasets/TheoremQA/number_utils.py @@ -48,7 +48,7 @@ def clean_units(pred_str: str): def number_it(num): - from latex2sympy2 import latex2sympy + from latex2sympy2_extended import latex2sympy if isinstance(num, (int, float)): return num diff --git a/opencompass/datasets/TheoremQA/utils.py b/opencompass/datasets/TheoremQA/utils.py index a4f32b2b..ca9c2661 100644 --- a/opencompass/datasets/TheoremQA/utils.py +++ b/opencompass/datasets/TheoremQA/utils.py @@ -17,7 +17,7 @@ def time_limit(seconds: float): def extract_theoremqa_answer(pred: str, answer_flag: bool = True): - from latex2sympy2 import latex2sympy + from latex2sympy2_extended import latex2sympy if any([option in pred.lower() for option in ['yes', 'true']]): pred = 'True' diff --git a/opencompass/models/claude_sdk_api.py b/opencompass/models/claude_sdk_api.py index 8cbf98ef..173047f1 100644 --- a/opencompass/models/claude_sdk_api.py +++ b/opencompass/models/claude_sdk_api.py @@ -33,6 +33,7 @@ class ClaudeSDK(BaseAPIModel): max_seq_len: int = 2048, meta_template: Optional[Dict] = None, temperature: Optional[float] = 0.0, + thinking: Optional[Dict] = None, retry: int = 2, ): super().__init__(path=path, @@ -49,6 +50,7 @@ class ClaudeSDK(BaseAPIModel): self.anthropic = Anthropic(api_key=key) self.model = path self.temperature = temperature + self.thinking = thinking def generate( self, @@ -108,11 +110,26 @@ class ClaudeSDK(BaseAPIModel): while num_retries < self.retry: self.wait() try: - responses = self.anthropic.messages.create( - model=self.model, - max_tokens=max_out_len, - temperature=self.temperature, - messages=messages) + api_params = { + 'model': self.model, + 'max_tokens': max_out_len, + 'temperature': self.temperature, + 'messages': messages, + } + + if self.thinking is not None: + api_params['thinking'] = self.thinking + api_params['stream'] = True + + responses = self.anthropic.messages.create(**api_params) + + # Handle new response format + for content in responses.content: + if content.type == 'text': + return content.text + + # If no text type content is found, return the first + # content (backward compatibility) return responses.content[0].text except Exception as e: self.logger.error(e) diff --git a/opencompass/models/openai_api.py b/opencompass/models/openai_api.py index f46de71c..7b2c2c53 100644 --- a/opencompass/models/openai_api.py +++ b/opencompass/models/openai_api.py @@ -652,7 +652,6 @@ class OpenAISDK(OpenAI): self.logger.info('Start calling OpenAI API') responses = self.openai_client.chat.completions.create( **query_data, timeout=timeout) # timeout in seconds - if self.verbose: self.logger.info( 'Successfully get response from OpenAI API') @@ -660,10 +659,18 @@ class OpenAISDK(OpenAI): self.logger.info(responses) except Exception: pass # noqa F841 - if not responses.choices: + + # Check if response is empty or content is empty + if not responses.choices or not responses.choices[ + 0].message.content: self.logger.error( - 'Response is empty, it is an internal server error \ - from the API provider.') + 'API response is empty, it might be due to excessive ' + 'input length or an internal server error ' + 'from your API provider.') + num_retries += 1 + # Continue to retry instead of returning empty response + continue + return responses.choices[0].message.content except (BadRequestError, APIStatusError) as e: diff --git a/requirements/extra.txt b/requirements/extra.txt index fd3f7a2f..fa90a34c 100644 --- a/requirements/extra.txt +++ b/requirements/extra.txt @@ -11,12 +11,10 @@ faiss_gpu==1.7.2 -e git+https://github.com/open-compass/human-eval.git#egg=human-eval # IFEval langdetect -# TheoremQA -latex2sympy2==1.9.1 # Lawbench, leval ltp # Math -math-verify +math-verify[antlr4_11_0] # Taco, apps Dataset pyext # Law Bench