[Update] Minor updates

This commit is contained in:
MaiziXiao 2025-04-01 11:24:02 +00:00
parent 330a6e5ca7
commit cf6084bb77
5 changed files with 36 additions and 14 deletions

View File

@ -48,7 +48,7 @@ def clean_units(pred_str: str):
def number_it(num): def number_it(num):
from latex2sympy2 import latex2sympy from latex2sympy2_extended import latex2sympy
if isinstance(num, (int, float)): if isinstance(num, (int, float)):
return num return num

View File

@ -17,7 +17,7 @@ def time_limit(seconds: float):
def extract_theoremqa_answer(pred: str, answer_flag: bool = True): def extract_theoremqa_answer(pred: str, answer_flag: bool = True):
from latex2sympy2 import latex2sympy from latex2sympy2_extended import latex2sympy
if any([option in pred.lower() for option in ['yes', 'true']]): if any([option in pred.lower() for option in ['yes', 'true']]):
pred = 'True' pred = 'True'

View File

@ -33,6 +33,7 @@ class ClaudeSDK(BaseAPIModel):
max_seq_len: int = 2048, max_seq_len: int = 2048,
meta_template: Optional[Dict] = None, meta_template: Optional[Dict] = None,
temperature: Optional[float] = 0.0, temperature: Optional[float] = 0.0,
thinking: Optional[Dict] = None,
retry: int = 2, retry: int = 2,
): ):
super().__init__(path=path, super().__init__(path=path,
@ -49,6 +50,7 @@ class ClaudeSDK(BaseAPIModel):
self.anthropic = Anthropic(api_key=key) self.anthropic = Anthropic(api_key=key)
self.model = path self.model = path
self.temperature = temperature self.temperature = temperature
self.thinking = thinking
def generate( def generate(
self, self,
@ -108,11 +110,26 @@ class ClaudeSDK(BaseAPIModel):
while num_retries < self.retry: while num_retries < self.retry:
self.wait() self.wait()
try: try:
responses = self.anthropic.messages.create( api_params = {
model=self.model, 'model': self.model,
max_tokens=max_out_len, 'max_tokens': max_out_len,
temperature=self.temperature, 'temperature': self.temperature,
messages=messages) 'messages': messages,
}
if self.thinking is not None:
api_params['thinking'] = self.thinking
api_params['stream'] = True
responses = self.anthropic.messages.create(**api_params)
# Handle new response format
for content in responses.content:
if content.type == 'text':
return content.text
# If no text type content is found, return the first
# content (backward compatibility)
return responses.content[0].text return responses.content[0].text
except Exception as e: except Exception as e:
self.logger.error(e) self.logger.error(e)

View File

@ -652,7 +652,6 @@ class OpenAISDK(OpenAI):
self.logger.info('Start calling OpenAI API') self.logger.info('Start calling OpenAI API')
responses = self.openai_client.chat.completions.create( responses = self.openai_client.chat.completions.create(
**query_data, timeout=timeout) # timeout in seconds **query_data, timeout=timeout) # timeout in seconds
if self.verbose: if self.verbose:
self.logger.info( self.logger.info(
'Successfully get response from OpenAI API') 'Successfully get response from OpenAI API')
@ -660,10 +659,18 @@ class OpenAISDK(OpenAI):
self.logger.info(responses) self.logger.info(responses)
except Exception: except Exception:
pass # noqa F841 pass # noqa F841
if not responses.choices:
# Check if response is empty or content is empty
if not responses.choices or not responses.choices[
0].message.content:
self.logger.error( self.logger.error(
'Response is empty, it is an internal server error \ 'API response is empty, it might be due to excessive '
from the API provider.') 'input length or an internal server error '
'from your API provider.')
num_retries += 1
# Continue to retry instead of returning empty response
continue
return responses.choices[0].message.content return responses.choices[0].message.content
except (BadRequestError, APIStatusError) as e: except (BadRequestError, APIStatusError) as e:

View File

@ -11,12 +11,10 @@ faiss_gpu==1.7.2
-e git+https://github.com/open-compass/human-eval.git#egg=human-eval -e git+https://github.com/open-compass/human-eval.git#egg=human-eval
# IFEval # IFEval
langdetect langdetect
# TheoremQA
latex2sympy2==1.9.1
# Lawbench, leval # Lawbench, leval
ltp ltp
# Math # Math
math-verify math-verify[antlr4_11_0]
# Taco, apps Dataset # Taco, apps Dataset
pyext pyext
# Law Bench # Law Bench