OpenCompass/opencompass/datasets/healthbench/sampler/claude_sampler.py
2025-05-15 08:50:05 +00:00

104 lines
4.1 KiB
Python

import os
import time
import anthropic
from .. import common
from ..types import MessageList, SamplerBase, SamplerResponse
CLAUDE_SYSTEM_MESSAGE_LMSYS = (
'The assistant is Claude, created by Anthropic. The current date is '
"{currentDateTime}. Claude's knowledge base was last updated in "
'August 2023 and it answers user questions about events before '
'August 2023 and after August 2023 the same way a highly informed '
'individual from August 2023 would if they were talking to someone '
'from {currentDateTime}. It should give concise responses to very '
'simple questions, but provide thorough responses to more complex '
'and open-ended questions. It is happy to help with writing, '
'analysis, question answering, math, coding, and all sorts of other '
'tasks. It uses markdown for coding. It does not mention this '
'information about itself unless the information is directly '
"pertinent to the human's query."
).format(currentDateTime='2024-04-01')
# reference: https://github.com/lm-sys/FastChat/blob/7899355ebe32117fdae83985cf8ee476d2f4243f/fastchat/conversation.py#L894
class ClaudeCompletionSampler(SamplerBase):
def __init__(
self,
model: str,
system_message: str | None = None,
temperature: float = 0.0, # default in Anthropic example
max_tokens: int = 4096,
):
self.client = anthropic.Anthropic()
self.api_key = os.environ.get('ANTHROPIC_API_KEY') # please set your API_KEY
self.model = model
self.system_message = system_message
self.temperature = temperature
self.max_tokens = max_tokens
self.image_format = 'base64'
def _handle_image(
self,
image: str,
encoding: str = 'base64',
format: str = 'png',
fovea: int = 768,
):
new_image = {
'type': 'image',
'source': {
'type': encoding,
'media_type': f'image/{format}',
'data': image,
},
}
return new_image
def _handle_text(self, text):
return {'type': 'text', 'text': text}
def _pack_message(self, role, content):
return {'role': str(role), 'content': content}
def __call__(self, message_list: MessageList) -> SamplerResponse:
trial = 0
while True:
try:
if not common.has_only_user_assistant_messages(message_list):
raise ValueError(f'Claude sampler only supports user and assistant messages, got {message_list}')
if self.system_message:
response_message = self.client.messages.create(
model=self.model,
system=self.system_message,
max_tokens=self.max_tokens,
temperature=self.temperature,
messages=message_list,
)
claude_input_messages: MessageList = [{'role': 'system', 'content': self.system_message}] + message_list
else:
response_message = self.client.messages.create(
model=self.model,
max_tokens=self.max_tokens,
temperature=self.temperature,
messages=message_list,
)
claude_input_messages = message_list
response_text = response_message.content[0].text
return SamplerResponse(
response_text=response_text,
response_metadata={},
actual_queried_message_list=claude_input_messages,
)
except anthropic.RateLimitError as e:
exception_backoff = 2**trial # expontial back off
print(
f'Rate limit exception so wait and retry {trial} after {exception_backoff} sec',
e,
)
time.sleep(exception_backoff)
trial += 1
# unknown error shall throw exception