OpenCompass/opencompass/models/openai_api.py

import json
import os
from concurrent.futures import ThreadPoolExecutor
from threading import Lock
from typing import Dict, List, Optional, Union

import requests

from opencompass.registry import MODELS
from opencompass.utils.prompt import PromptList

from .base_api import BaseAPIModel

PromptType = Union[PromptList, str]
OPENAI_API_BASE = 'https://api.openai.com/v1/chat/completions'


@MODELS.register_module()
class OpenAI(BaseAPIModel):
    """Model wrapper around OpenAI's models.

    Args:
        path (str): The name of OpenAI's model.
        max_seq_len (int): The maximum allowed sequence length of a model.
            Note that the length of prompt + generated tokens shall not exceed
            this value. Defaults to 2048.
        query_per_second (int): The maximum queries allowed per second
            between two consecutive calls of the API. Defaults to 1.
        retry (int): Number of retires if the API call fails. Defaults to 2.
        key (str or List[str]): OpenAI key(s). In particular, when it
            is set to "ENV", the key will be fetched from the environment
            variable $OPENAI_API_KEY, as how openai defaults to be. If it's a
            list, the keys will be used in round-robin manner. Defaults to
            'ENV'.
        org (str or List[str], optional): OpenAI organization(s). If not
            specified, OpenAI uses the default organization bound to each API
            key. If specified, the orgs will be posted with each request in
            round-robin manner. Defaults to None.
        meta_template (Dict, optional): The model's meta prompt
            template if needed, in case the requirement of injecting or
            wrapping of any meta instructions.
        openai_api_base (str): The base url of OpenAI's API. Defaults to
            'https://api.openai.com/v1/chat/completions'.
        temperature (float, optional): What sampling temperature to use.
            If not None, will override the temperature in the `generate()`
            call. Defaults to None.
    """

    is_api: bool = True

    def __init__(self,
                 path: str,
                 max_seq_len: int = 2048,
                 query_per_second: int = 1,
                 retry: int = 2,
                 key: Union[str, List[str]] = 'ENV',
                 org: Optional[Union[str, List[str]]] = None,
                 meta_template: Optional[Dict] = None,
                 openai_api_base: str = OPENAI_API_BASE,
                 temperature: Optional[float] = None):

        super().__init__(path=path,
                         max_seq_len=max_seq_len,
                         meta_template=meta_template,
                         query_per_second=query_per_second,
                         retry=retry)
        import tiktoken
        self.tiktoken = tiktoken
        self.temperature = temperature

        if isinstance(key, str):
            self.keys = [os.getenv('OPENAI_API_KEY') if key == 'ENV' else key]
        else:
            self.keys = key
        self.key_ctr = 0
        if isinstance(org, str):
            self.orgs = [org]
        else:
            self.orgs = org
        self.org_ctr = 0
        self.url = openai_api_base

    def generate(
        self,
        inputs: List[str or PromptList],
        max_out_len: int = 512,
        temperature: float = 0.7,
    ) -> List[str]:
        """Generate results given a list of inputs.

        Args:
            inputs (List[str or PromptList]): A list of strings or PromptDicts.
                The PromptDict should be organized in OpenCompass'
                API format.
            max_out_len (int): The maximum length of the output.
            temperature (float): What sampling temperature to use,
                between 0 and 2. Higher values like 0.8 will make the output
                more random, while lower values like 0.2 will make it more
                focused and deterministic. Defaults to 0.7.

        Returns:
            List[str]: A list of generated strings.
        """
        if self.temperature is not None:
            temperature = self.temperature

        with ThreadPoolExecutor() as executor:
            results = list(
                executor.map(self._generate, inputs,
                             [max_out_len] * len(inputs),
                             [temperature] * len(inputs)))
        return results

    def _generate(self, input: str or PromptList, max_out_len: int,
                  temperature: float) -> str:
        """Generate results given a list of inputs.

        Args:
            inputs (str or PromptList): A string or PromptDict.
                The PromptDict should be organized in OpenCompass'
                API format.
            max_out_len (int): The maximum length of the output.
            temperature (float): What sampling temperature to use,
                between 0 and 2. Higher values like 0.8 will make the output
                more random, while lower values like 0.2 will make it more
                focused and deterministic.

        Returns:
            str: The generated string.
        """
        assert isinstance(input, (str, PromptList))

        if isinstance(input, str):
            messages = [{'role': 'user', 'content': input}]
        else:
            messages = []
            for item in input:
                msg = {'content': item['prompt']}
                if item['role'] == 'HUMAN':
                    msg['role'] = 'user'
                elif item['role'] == 'BOT':
                    msg['role'] = 'assistant'
                elif item['role'] == 'SYSTEM':
                    msg['role'] = 'system'
                messages.append(msg)

        # max num token for gpt-3.5-turbo is 4097
        max_out_len = min(max_out_len, 4000 - self.get_token_len(str(input)))
        if max_out_len <= 0:
            return ''

        max_num_retries = 0
        while max_num_retries < self.retry:
            self.wait()
            if hasattr(self, 'keys'):
                with Lock():
                    self.key_ctr += 1
                    if self.key_ctr == len(self.keys):
                        self.key_ctr = 0
                header = {
                    'Authorization': f'Bearer {self.keys[self.key_ctr]}',
                    'content-type': 'application/json',
                }
            if self.orgs:
                with Lock():
                    self.org_ctr += 1
                    if self.org_ctr == len(self.orgs):
                        self.org_ctr = 0
                header['OpenAI-Organization'] = self.orgs[self.org_ctr]

            try:
                data = dict(
                    model=self.path,
                    messages=messages,
                    max_tokens=max_out_len,
                    n=1,
                    stop=None,
                    temperature=temperature,
                )
                raw_response = requests.post(self.url,
                                             headers=header,
                                             data=json.dumps(data))
            except requests.ConnectionError:
                self.logger.error('Got connection error, retrying...')
                continue
            try:
                response = raw_response.json()
            except requests.JSONDecodeError:
                self.logger.error('JsonDecode error, got',
                                  str(raw_response.content))
            try:
                return response['choices'][0]['message']['content'].strip()
            except KeyError:
                if 'error' in response:
                    self.logger.error('Find error message in response: ',
                                      str(response['error']))
            max_num_retries += 1

        raise RuntimeError('Calling OpenAI failed after retrying for '
                           f'{max_num_retries} times. Check the logs for '
                           'details.')

    def get_token_len(self, prompt: str) -> int:
        """Get lengths of the tokenized string. Only English and Chinese
        characters are counted for now. Users are encouraged to override this
        method if more accurate length is needed.

        Args:
            prompt (str): Input string.

        Returns:
            int: Length of the input tokens
        """
        enc = self.tiktoken.encoding_for_model(self.path)
        return len(enc.encode(prompt))
[Feature] Enhance OpenAI API, add example config for GPT evaluation (#53) * [Feature] Enhance OpenAI API, add example config for GPT evaluation * fix 2023-07-12 16:43:46 +08:00			`import json`
Add release contribution 2023-07-05 10:33:12 +08:00			`import os`
			`from concurrent.futures import ThreadPoolExecutor`
[Feature] Enhance OpenAI API, add example config for GPT evaluation (#53) * [Feature] Enhance OpenAI API, add example config for GPT evaluation * fix 2023-07-12 16:43:46 +08:00			`from threading import Lock`
Add release contribution 2023-07-05 10:33:12 +08:00			`from typing import Dict, List, Optional, Union`

[Feature] Enhance OpenAI API, add example config for GPT evaluation (#53) * [Feature] Enhance OpenAI API, add example config for GPT evaluation * fix 2023-07-12 16:43:46 +08:00			`import requests`

Add release contribution 2023-07-05 10:33:12 +08:00			`from opencompass.registry import MODELS`
			`from opencompass.utils.prompt import PromptList`

			`from .base_api import BaseAPIModel`

			`PromptType = Union[PromptList, str]`
[Feature] Allow explicitly setting the temperature for API model (#121) * allow explicitly setting the temperature * update 2023-07-28 11:28:15 +08:00			`OPENAI_API_BASE = 'https://api.openai.com/v1/chat/completions'`
Add release contribution 2023-07-05 10:33:12 +08:00

			`@MODELS.register_module()`
			`class OpenAI(BaseAPIModel):`
			`"""Model wrapper around OpenAI's models.`

			`Args:`
			`path (str): The name of OpenAI's model.`
			`max_seq_len (int): The maximum allowed sequence length of a model.`
			`Note that the length of prompt + generated tokens shall not exceed`
			`this value. Defaults to 2048.`
			`query_per_second (int): The maximum queries allowed per second`
			`between two consecutive calls of the API. Defaults to 1.`
			`retry (int): Number of retires if the API call fails. Defaults to 2.`
[Feature] Enhance OpenAI API, add example config for GPT evaluation (#53) * [Feature] Enhance OpenAI API, add example config for GPT evaluation * fix 2023-07-12 16:43:46 +08:00			`key (str or List[str]): OpenAI key(s). In particular, when it`
			`is set to "ENV", the key will be fetched from the environment`
			`variable $OPENAI_API_KEY, as how openai defaults to be. If it's a`
			`list, the keys will be used in round-robin manner. Defaults to`
			`'ENV'.`
			`org (str or List[str], optional): OpenAI organization(s). If not`
			`specified, OpenAI uses the default organization bound to each API`
			`key. If specified, the orgs will be posted with each request in`
			`round-robin manner. Defaults to None.`
Add release contribution 2023-07-05 10:33:12 +08:00			`meta_template (Dict, optional): The model's meta prompt`
			`template if needed, in case the requirement of injecting or`
			`wrapping of any meta instructions.`
			`openai_api_base (str): The base url of OpenAI's API. Defaults to`
[Feature] Enhance OpenAI API, add example config for GPT evaluation (#53) * [Feature] Enhance OpenAI API, add example config for GPT evaluation * fix 2023-07-12 16:43:46 +08:00			`'https://api.openai.com/v1/chat/completions'.`
[Feature] Allow explicitly setting the temperature for API model (#121) * allow explicitly setting the temperature * update 2023-07-28 11:28:15 +08:00			`temperature (float, optional): What sampling temperature to use.`
			If not None, will override the temperature in the `generate()`
			`call. Defaults to None.`
Add release contribution 2023-07-05 10:33:12 +08:00			`"""`

			`is_api: bool = True`

[Feature] Allow explicitly setting the temperature for API model (#121) * allow explicitly setting the temperature * update 2023-07-28 11:28:15 +08:00			`def __init__(self,`
			`path: str,`
			`max_seq_len: int = 2048,`
			`query_per_second: int = 1,`
			`retry: int = 2,`
			`key: Union[str, List[str]] = 'ENV',`
			`org: Optional[Union[str, List[str]]] = None,`
			`meta_template: Optional[Dict] = None,`
			`openai_api_base: str = OPENAI_API_BASE,`
			`temperature: Optional[float] = None):`

Add release contribution 2023-07-05 10:33:12 +08:00			`super().__init__(path=path,`
			`max_seq_len=max_seq_len,`
			`meta_template=meta_template,`
			`query_per_second=query_per_second,`
			`retry=retry)`
			`import tiktoken`
			`self.tiktoken = tiktoken`
[Feature] Allow explicitly setting the temperature for API model (#121) * allow explicitly setting the temperature * update 2023-07-28 11:28:15 +08:00			`self.temperature = temperature`
Add release contribution 2023-07-05 10:33:12 +08:00
[Feature] Enhance OpenAI API, add example config for GPT evaluation (#53) * [Feature] Enhance OpenAI API, add example config for GPT evaluation * fix 2023-07-12 16:43:46 +08:00			`if isinstance(key, str):`
			`self.keys = [os.getenv('OPENAI_API_KEY') if key == 'ENV' else key]`
			`else:`
			`self.keys = key`
			`self.key_ctr = 0`
			`if isinstance(org, str):`
			`self.orgs = [org]`
			`else:`
			`self.orgs = org`
			`self.org_ctr = 0`
			`self.url = openai_api_base`
Add release contribution 2023-07-05 10:33:12 +08:00
			`def generate(`
			`self,`
			`inputs: List[str or PromptList],`
			`max_out_len: int = 512,`
			`temperature: float = 0.7,`
			`) -> List[str]:`
			`"""Generate results given a list of inputs.`

			`Args:`
			`inputs (List[str or PromptList]): A list of strings or PromptDicts.`
			`The PromptDict should be organized in OpenCompass'`
			`API format.`
			`max_out_len (int): The maximum length of the output.`
			`temperature (float): What sampling temperature to use,`
			`between 0 and 2. Higher values like 0.8 will make the output`
			`more random, while lower values like 0.2 will make it more`
			`focused and deterministic. Defaults to 0.7.`

			`Returns:`
			`List[str]: A list of generated strings.`
			`"""`
[Feature] Allow explicitly setting the temperature for API model (#121) * allow explicitly setting the temperature * update 2023-07-28 11:28:15 +08:00			`if self.temperature is not None:`
			`temperature = self.temperature`

Add release contribution 2023-07-05 10:33:12 +08:00			`with ThreadPoolExecutor() as executor:`
			`results = list(`
			`executor.map(self._generate, inputs,`
			`[max_out_len] * len(inputs),`
			`[temperature] * len(inputs)))`
			`return results`

			`def _generate(self, input: str or PromptList, max_out_len: int,`
			`temperature: float) -> str:`
			`"""Generate results given a list of inputs.`

			`Args:`
			`inputs (str or PromptList): A string or PromptDict.`
			`The PromptDict should be organized in OpenCompass'`
			`API format.`
			`max_out_len (int): The maximum length of the output.`
			`temperature (float): What sampling temperature to use,`
			`between 0 and 2. Higher values like 0.8 will make the output`
			`more random, while lower values like 0.2 will make it more`
			`focused and deterministic.`

			`Returns:`
			`str: The generated string.`
			`"""`
			`assert isinstance(input, (str, PromptList))`

			`if isinstance(input, str):`
			`messages = [{'role': 'user', 'content': input}]`
			`else:`
			`messages = []`
			`for item in input:`
			`msg = {'content': item['prompt']}`
			`if item['role'] == 'HUMAN':`
			`msg['role'] = 'user'`
			`elif item['role'] == 'BOT':`
			`msg['role'] = 'assistant'`
			`elif item['role'] == 'SYSTEM':`
			`msg['role'] = 'system'`
			`messages.append(msg)`

[Feature] Enhance OpenAI API, add example config for GPT evaluation (#53) * [Feature] Enhance OpenAI API, add example config for GPT evaluation * fix 2023-07-12 16:43:46 +08:00			`# max num token for gpt-3.5-turbo is 4097`
			`max_out_len = min(max_out_len, 4000 - self.get_token_len(str(input)))`
			`if max_out_len <= 0:`
			`return ''`

Add release contribution 2023-07-05 10:33:12 +08:00			`max_num_retries = 0`
			`while max_num_retries < self.retry:`
			`self.wait()`
[Feature] Enhance OpenAI API, add example config for GPT evaluation (#53) * [Feature] Enhance OpenAI API, add example config for GPT evaluation * fix 2023-07-12 16:43:46 +08:00			`if hasattr(self, 'keys'):`
			`with Lock():`
			`self.key_ctr += 1`
			`if self.key_ctr == len(self.keys):`
			`self.key_ctr = 0`
			`header = {`
			`'Authorization': f'Bearer {self.keys[self.key_ctr]}',`
			`'content-type': 'application/json',`
			`}`
			`if self.orgs:`
			`with Lock():`
			`self.org_ctr += 1`
			`if self.org_ctr == len(self.orgs):`
			`self.org_ctr = 0`
			`header['OpenAI-Organization'] = self.orgs[self.org_ctr]`

Add release contribution 2023-07-05 10:33:12 +08:00			`try:`
[Feature] Enhance OpenAI API, add example config for GPT evaluation (#53) * [Feature] Enhance OpenAI API, add example config for GPT evaluation * fix 2023-07-12 16:43:46 +08:00			`data = dict(`
Add release contribution 2023-07-05 10:33:12 +08:00			`model=self.path,`
			`messages=messages,`
			`max_tokens=max_out_len,`
			`n=1,`
			`stop=None,`
			`temperature=temperature,`
			`)`
[Feature] Enhance OpenAI API, add example config for GPT evaluation (#53) * [Feature] Enhance OpenAI API, add example config for GPT evaluation * fix 2023-07-12 16:43:46 +08:00			`raw_response = requests.post(self.url,`
			`headers=header,`
			`data=json.dumps(data))`
			`except requests.ConnectionError:`
			`self.logger.error('Got connection error, retrying...')`
			`continue`
			`try:`
			`response = raw_response.json()`
			`except requests.JSONDecodeError:`
			`self.logger.error('JsonDecode error, got',`
			`str(raw_response.content))`
			`try:`
			`return response['choices'][0]['message']['content'].strip()`
			`except KeyError:`
			`if 'error' in response:`
			`self.logger.error('Find error message in response: ',`
			`str(response['error']))`
Add release contribution 2023-07-05 10:33:12 +08:00			`max_num_retries += 1`

[Feature] Enhance OpenAI API, add example config for GPT evaluation (#53) * [Feature] Enhance OpenAI API, add example config for GPT evaluation * fix 2023-07-12 16:43:46 +08:00			`raise RuntimeError('Calling OpenAI failed after retrying for '`
			`f'{max_num_retries} times. Check the logs for '`
			`'details.')`
Add release contribution 2023-07-05 10:33:12 +08:00
			`def get_token_len(self, prompt: str) -> int:`
			`"""Get lengths of the tokenized string. Only English and Chinese`
			`characters are counted for now. Users are encouraged to override this`
			`method if more accurate length is needed.`

			`Args:`
			`prompt (str): Input string.`

			`Returns:`
			`int: Length of the input tokens`
			`"""`
			`enc = self.tiktoken.encoding_for_model(self.path)`
			`return len(enc.encode(prompt))`