diff --git a/configs/api_examples/eval_api_rendu.py b/configs/api_examples/eval_api_rendu.py new file mode 100644 index 00000000..2b2a2647 --- /dev/null +++ b/configs/api_examples/eval_api_rendu.py @@ -0,0 +1,39 @@ +from mmengine.config import read_base +from opencompass.models import Rendu +from opencompass.partitioners import NaivePartitioner +from opencompass.runners.local_api import LocalAPIRunner +from opencompass.tasks import OpenICLInferTask + +with read_base(): + from ..summarizers.medium import summarizer + from ..datasets.ceval.ceval_gen import ceval_datasets + +datasets = [ + *ceval_datasets +] + +models = [ + dict( + abbr='Rendu', + type=Rendu, + path='rendu', + key='xxxxxx', + url='xxxxxx', + generation_kwargs={ + 'temperature': 0.1, + 'top_p': 0.9, + }, + query_per_second=10, + max_out_len=2048, + max_seq_len=2048, + batch_size=8), +] + +infer = dict(partitioner=dict(type=NaivePartitioner), + runner=dict( + type=LocalAPIRunner, + max_num_workers=1, + concurrent_users=1, + task=dict(type=OpenICLInferTask)), ) + +work_dir = 'outputs/api_rendu/' diff --git a/opencompass/models/__init__.py b/opencompass/models/__init__.py index ab75a4ab..0d384fed 100644 --- a/opencompass/models/__init__.py +++ b/opencompass/models/__init__.py @@ -35,6 +35,7 @@ from .openai_api import OpenAI # noqa: F401 from .openai_api import OpenAISDK # noqa: F401 from .pangu_api import PanGu # noqa: F401 from .qwen_api import Qwen # noqa: F401 +from .rendu_api import Rendu # noqa: F401 from .sensetime_api import SenseTime # noqa: F401 from .stepfun_api import StepFun # noqa: F401 from .turbomind import TurboMindModel # noqa: F401 diff --git a/opencompass/models/rendu_api.py b/opencompass/models/rendu_api.py new file mode 100644 index 00000000..a88741a7 --- /dev/null +++ b/opencompass/models/rendu_api.py @@ -0,0 +1,175 @@ +import time +from concurrent.futures import ThreadPoolExecutor +from typing import Dict, List, Optional, Union + +import requests + +from opencompass.utils.prompt import PromptList + +from .base_api import BaseAPIModel + +PromptType = Union[PromptList, str] + + +class Rendu(BaseAPIModel): + """Model wrapper around Rendu. + Documentation: + + Args: + path (str): The name of Rendu model. + e.g. `Rendu` + key (str): Authorization key. + url (str): model url. + query_per_second (int): The maximum queries allowed per second + between two consecutive calls of the API. Defaults to 1. + max_seq_len (int): Unused here. + meta_template (Dict, optional): The model's meta prompt + template if needed, in case the requirement of injecting or + wrapping of any meta instructions. + retry (int): Number of retires if the API call fails. Defaults to 2. + """ + is_api: bool = True + + def __init__(self, + path: str, + key: str, + url: str, + query_per_second: int = 2, + max_seq_len: int = 2048, + meta_template: Optional[Dict] = None, + retry: int = 2, + generation_kwargs: Dict = { + 'temperature': 0.7, + 'top_p': 0.9, + }): + super().__init__(path=path, + max_seq_len=max_seq_len, + query_per_second=query_per_second, + meta_template=meta_template, + retry=retry, + generation_kwargs=generation_kwargs) + + self.url = url + self.key = key + self.model = path + self.headers = { + 'Content-Type': 'application/json', + 'Authorization': 'Bearer ' + self.key, + } + + def generate( + self, + inputs: List[PromptType], + max_out_len: int = 512, + ) -> List[str]: + """Generate results given a list of inputs. + + Args: + inputs (List[PromptType]): A list of strings or PromptDicts. + The PromptDict should be organized in OpenCompass' + API format. + max_out_len (int): The maximum length of the output. + + Returns: + List[str]: A list of generated strings. + """ + with ThreadPoolExecutor() as executor: + results = list( + executor.map(self._generate, inputs, + [max_out_len] * len(inputs))) + self.flush() + return results + + def _generate( + self, + input: PromptType, + max_out_len: int = 512, + ) -> str: + """Generate results given an input. + + Args: + input (PromptType): A string or PromptDict. + The PromptDict should be organized in OpenCompass' + API format. + max_out_len (int): The maximum length of the output. + + Returns: + str: The generated string. + """ + assert isinstance(input, (str, PromptList)) + + if isinstance(input, str): + messages = [{'role': 'user', 'content': input}] + else: + messages = [] + msg_buffer, last_role = [], None + for item in input: + item['role'] = 'assistant' if item['role'] == 'BOT' else 'user' + if item['role'] != last_role and last_role is not None: + messages.append({ + 'content': '\n'.join(msg_buffer), + 'role': last_role + }) + msg_buffer = [] + msg_buffer.append(item['prompt']) + last_role = item['role'] + messages.append({ + 'content': '\n'.join(msg_buffer), + 'role': last_role + }) + + data = { + 'model': self.model, + 'messages': messages, + } + data.update(self.generation_kwargs) + + max_num_retries = 0 + while max_num_retries < self.retry: + self.acquire() + try: + raw_response = requests.request('POST', + url=self.url, + headers=self.headers, + json=data) + except Exception as err: + print('Request Error:{}'.format(err)) + time.sleep(2) + continue + + response = raw_response.json() + self.release() + + if response is None: + print('Connection error, reconnect.') + # if connect error, frequent requests will casuse + # continuous unstable network, therefore wait here + # to slow down the request + self.wait() + continue + + if raw_response.status_code == 200: + # msg = json.load(response.text) + # response + msg = response['choices'][0]['message']['content'] + return msg + + if raw_response.status_code == 403: + print('请求被拒绝 api_key错误') + continue + elif raw_response.status_code == 400: + print(messages, response) + print('请求失败,状态码:', raw_response) + msg = 'The request was rejected because high risk' + return msg + time.sleep(1) + continue + elif raw_response.status_code == 429: + print(messages, response) + print('请求失败,状态码:', raw_response) + time.sleep(5) + continue + + max_num_retries += 1 + + raise RuntimeError(raw_response)