mirror of
https://github.com/open-compass/opencompass.git
synced 2025-05-30 16:03:24 +08:00
[Update] Minor updates
This commit is contained in:
parent
330a6e5ca7
commit
cf6084bb77
@ -48,7 +48,7 @@ def clean_units(pred_str: str):
|
|||||||
|
|
||||||
|
|
||||||
def number_it(num):
|
def number_it(num):
|
||||||
from latex2sympy2 import latex2sympy
|
from latex2sympy2_extended import latex2sympy
|
||||||
if isinstance(num, (int, float)):
|
if isinstance(num, (int, float)):
|
||||||
return num
|
return num
|
||||||
|
|
||||||
|
@ -17,7 +17,7 @@ def time_limit(seconds: float):
|
|||||||
|
|
||||||
|
|
||||||
def extract_theoremqa_answer(pred: str, answer_flag: bool = True):
|
def extract_theoremqa_answer(pred: str, answer_flag: bool = True):
|
||||||
from latex2sympy2 import latex2sympy
|
from latex2sympy2_extended import latex2sympy
|
||||||
|
|
||||||
if any([option in pred.lower() for option in ['yes', 'true']]):
|
if any([option in pred.lower() for option in ['yes', 'true']]):
|
||||||
pred = 'True'
|
pred = 'True'
|
||||||
|
@ -33,6 +33,7 @@ class ClaudeSDK(BaseAPIModel):
|
|||||||
max_seq_len: int = 2048,
|
max_seq_len: int = 2048,
|
||||||
meta_template: Optional[Dict] = None,
|
meta_template: Optional[Dict] = None,
|
||||||
temperature: Optional[float] = 0.0,
|
temperature: Optional[float] = 0.0,
|
||||||
|
thinking: Optional[Dict] = None,
|
||||||
retry: int = 2,
|
retry: int = 2,
|
||||||
):
|
):
|
||||||
super().__init__(path=path,
|
super().__init__(path=path,
|
||||||
@ -49,6 +50,7 @@ class ClaudeSDK(BaseAPIModel):
|
|||||||
self.anthropic = Anthropic(api_key=key)
|
self.anthropic = Anthropic(api_key=key)
|
||||||
self.model = path
|
self.model = path
|
||||||
self.temperature = temperature
|
self.temperature = temperature
|
||||||
|
self.thinking = thinking
|
||||||
|
|
||||||
def generate(
|
def generate(
|
||||||
self,
|
self,
|
||||||
@ -108,11 +110,26 @@ class ClaudeSDK(BaseAPIModel):
|
|||||||
while num_retries < self.retry:
|
while num_retries < self.retry:
|
||||||
self.wait()
|
self.wait()
|
||||||
try:
|
try:
|
||||||
responses = self.anthropic.messages.create(
|
api_params = {
|
||||||
model=self.model,
|
'model': self.model,
|
||||||
max_tokens=max_out_len,
|
'max_tokens': max_out_len,
|
||||||
temperature=self.temperature,
|
'temperature': self.temperature,
|
||||||
messages=messages)
|
'messages': messages,
|
||||||
|
}
|
||||||
|
|
||||||
|
if self.thinking is not None:
|
||||||
|
api_params['thinking'] = self.thinking
|
||||||
|
api_params['stream'] = True
|
||||||
|
|
||||||
|
responses = self.anthropic.messages.create(**api_params)
|
||||||
|
|
||||||
|
# Handle new response format
|
||||||
|
for content in responses.content:
|
||||||
|
if content.type == 'text':
|
||||||
|
return content.text
|
||||||
|
|
||||||
|
# If no text type content is found, return the first
|
||||||
|
# content (backward compatibility)
|
||||||
return responses.content[0].text
|
return responses.content[0].text
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
self.logger.error(e)
|
self.logger.error(e)
|
||||||
|
@ -652,7 +652,6 @@ class OpenAISDK(OpenAI):
|
|||||||
self.logger.info('Start calling OpenAI API')
|
self.logger.info('Start calling OpenAI API')
|
||||||
responses = self.openai_client.chat.completions.create(
|
responses = self.openai_client.chat.completions.create(
|
||||||
**query_data, timeout=timeout) # timeout in seconds
|
**query_data, timeout=timeout) # timeout in seconds
|
||||||
|
|
||||||
if self.verbose:
|
if self.verbose:
|
||||||
self.logger.info(
|
self.logger.info(
|
||||||
'Successfully get response from OpenAI API')
|
'Successfully get response from OpenAI API')
|
||||||
@ -660,10 +659,18 @@ class OpenAISDK(OpenAI):
|
|||||||
self.logger.info(responses)
|
self.logger.info(responses)
|
||||||
except Exception:
|
except Exception:
|
||||||
pass # noqa F841
|
pass # noqa F841
|
||||||
if not responses.choices:
|
|
||||||
|
# Check if response is empty or content is empty
|
||||||
|
if not responses.choices or not responses.choices[
|
||||||
|
0].message.content:
|
||||||
self.logger.error(
|
self.logger.error(
|
||||||
'Response is empty, it is an internal server error \
|
'API response is empty, it might be due to excessive '
|
||||||
from the API provider.')
|
'input length or an internal server error '
|
||||||
|
'from your API provider.')
|
||||||
|
num_retries += 1
|
||||||
|
# Continue to retry instead of returning empty response
|
||||||
|
continue
|
||||||
|
|
||||||
return responses.choices[0].message.content
|
return responses.choices[0].message.content
|
||||||
|
|
||||||
except (BadRequestError, APIStatusError) as e:
|
except (BadRequestError, APIStatusError) as e:
|
||||||
|
@ -11,12 +11,10 @@ faiss_gpu==1.7.2
|
|||||||
-e git+https://github.com/open-compass/human-eval.git#egg=human-eval
|
-e git+https://github.com/open-compass/human-eval.git#egg=human-eval
|
||||||
# IFEval
|
# IFEval
|
||||||
langdetect
|
langdetect
|
||||||
# TheoremQA
|
|
||||||
latex2sympy2==1.9.1
|
|
||||||
# Lawbench, leval
|
# Lawbench, leval
|
||||||
ltp
|
ltp
|
||||||
# Math
|
# Math
|
||||||
math-verify
|
math-verify[antlr4_11_0]
|
||||||
# Taco, apps Dataset
|
# Taco, apps Dataset
|
||||||
pyext
|
pyext
|
||||||
# Law Bench
|
# Law Bench
|
||||||
|
Loading…
Reference in New Issue
Block a user