OpenCompass/opencompass/models/lightllm_api.py

import json
import re
from concurrent.futures import ThreadPoolExecutor
from typing import Dict, List, Optional

import numpy as np
import requests

from opencompass.registry import MODELS
from opencompass.utils.logging import get_logger

from .base import BaseModel
from .base_api import TokenBucket


@MODELS.register_module()
class LightllmAPI(BaseModel):

    is_api: bool = True

    def __init__(
            self,
            path: str = 'LightllmAPI',
            url: str = 'http://localhost:8080/generate',
            meta_template: Optional[Dict] = None,
            max_workers_per_task: int = 2,
            rate_per_worker: int = 2,
            retry: int = 2,
            generation_kwargs: Optional[Dict] = dict(),
    ):

        super().__init__(path=path,
                         meta_template=meta_template,
                         generation_kwargs=generation_kwargs)
        self.logger = get_logger()
        self.url = url
        self.retry = retry
        self.generation_kwargs = generation_kwargs
        self.max_out_len = self.generation_kwargs.get('max_new_tokens', 1024)
        self.meta_template = meta_template
        self.max_workers_per_task = max_workers_per_task
        self.token_bucket = TokenBucket(rate_per_worker, False)

    def generate(self, inputs: List[str], max_out_len: int,
                 **kwargs) -> List[str]:
        """Generate results given a list of inputs.

        Args:
            inputs (List[str]): A list of strings or PromptDicts.
                The PromptDict should be organized in OpenCompass'
                API format.
            max_out_len (int): The maximum length of the output.

        Returns:
            List[str]: A list of generated strings.
        """

        with ThreadPoolExecutor(
                max_workers=self.max_workers_per_task) as executor:
            results = list(
                executor.map(self._generate, inputs,
                             [self.max_out_len] * len(inputs)))
        return results

    def _generate(self, input: str, max_out_len: int) -> str:
        max_num_retries = 0
        while max_num_retries < self.retry:
            self.wait()
            header = {'content-type': 'application/json'}
            try:
                self.logger.debug(f'input: {input}')
                data = dict(inputs=input, parameters=self.generation_kwargs)
                raw_response = requests.post(self.url,
                                             headers=header,
                                             data=json.dumps(data))
            except requests.ConnectionError:
                self.logger.error('Got connection error, retrying...')
                continue
            try:
                response = raw_response.json()
                generated_text = response['generated_text']
                if isinstance(generated_text, list):
                    generated_text = generated_text[0]
                self.logger.debug(f'generated_text: {generated_text}')
                return generated_text
            except requests.JSONDecodeError:
                self.logger.error('JsonDecode error, got',
                                  str(raw_response.content))
            except KeyError:
                self.logger.error(f'KeyError. Response: {str(response)}')
            max_num_retries += 1

        raise RuntimeError('Calling LightllmAPI failed after retrying for '
                           f'{max_num_retries} times. Check the logs for '
                           'details.')

    def get_ppl(self, inputs: List[str], max_out_len: int,
                **kwargs) -> List[float]:
        """Generate results given a list of inputs.

        Args:
            inputs (List[str]): A list of strings or PromptDicts.
                The PromptDict should be organized in OpenCompass'
                API format.
            max_out_len (int): The maximum length of the output.

        Returns:
            List[str]: A list of generated strings.
        """

        with ThreadPoolExecutor(
                max_workers=self.max_workers_per_task) as executor:
            results = list(
                executor.map(self._get_ppl, inputs,
                             [self.max_out_len] * len(inputs)))
        return np.array(results)

    def _get_ppl(self, input: str, max_out_len: int) -> float:
        max_num_retries = 0
        if max_out_len is None:
            max_out_len = 1
        while max_num_retries < self.retry:
            self.wait()
            header = {'content-type': 'application/json'}
            try:
                data = dict(inputs=input, parameters=self.generation_kwargs)
                raw_response = requests.post(self.url,
                                             headers=header,
                                             data=json.dumps(data))
            except requests.ConnectionError:
                self.logger.error('Got connection error, retrying...')
                continue
            try:
                response = raw_response.json()

                assert ('prompt_token_ids' in response and 'prompt_logprobs'
                        in response), f'prompt_token_ids and prompt_logprobs \
                    must be in the output. \
                    Please consider adding \
                    --return_all_prompt_logprobs argument \
                    when starting lightllm service. Response: {str(response)}'

                prompt_token_ids = response['prompt_token_ids'][1:]
                prompt_logprobs = [
                    item[1] for item in response['prompt_logprobs']
                ]
                logprobs = [
                    item[str(token_id)] for token_id, item in zip(
                        prompt_token_ids, prompt_logprobs)
                ]
                if len(logprobs) == 0:
                    return 0.0
                ce_loss = -sum(logprobs) / len(logprobs)
                return ce_loss
            except requests.JSONDecodeError:
                self.logger.error('JsonDecode error, got',
                                  str(raw_response.content))
            max_num_retries += 1
        raise RuntimeError('Calling LightllmAPI failed after retrying for '
                           f'{max_num_retries} times. Check the logs for '
                           'details.')

    def wait(self):
        """Wait till the next query can be sent.

        Applicable in both single-thread and multi-thread environments.
        """
        return self.token_bucket.get_token()

    def get_token_len(self, prompt: str) -> int:
        """Get lengths of the tokenized string. Only English and Chinese
        characters are counted for now. Users are encouraged to override this
        method if more accurate length is needed.

        Args:
            prompt (str): Input string.

        Returns:
            int: Length of the input tokens
        """

        english_parts = re.findall(r'[A-Za-z0-9]+', prompt)
        chinese_parts = re.findall(r'[\u4e00-\u9FFF]+', prompt)

        # Count English words
        english_count = sum(len(part.split()) for part in english_parts)

        # Count Chinese words
        chinese_count = sum(len(part) for part in chinese_parts)

        return english_count + chinese_count
[Feature] Support Lightllm API (#613) * [Feature] Support Lightllm api * formatting & renaming --------- Co-authored-by: Leymore <zfz-960727@163.com> 2023-11-21 19:18:40 +08:00			`import json`
Fix LightllmApi ppl test (#951) 2024-03-08 12:04:44 +08:00			`import re`
[Feature] Support Lightllm API (#613) * [Feature] Support Lightllm api * formatting & renaming --------- Co-authored-by: Leymore <zfz-960727@163.com> 2023-11-21 19:18:40 +08:00			`from concurrent.futures import ThreadPoolExecutor`
			`from typing import Dict, List, Optional`

Update LightllmApi and Fix mmlu bug (#738) * Update LightllmApi and Fix mmlu bug * checkout mmlu_gen_a484b3.py --------- Co-authored-by: Leymore <zfz-960727@163.com> 2023-12-27 13:49:08 +08:00			`import numpy as np`
[Feature] Support Lightllm API (#613) * [Feature] Support Lightllm api * formatting & renaming --------- Co-authored-by: Leymore <zfz-960727@163.com> 2023-11-21 19:18:40 +08:00			`import requests`

			`from opencompass.registry import MODELS`
			`from opencompass.utils.logging import get_logger`

Support prompt template for LightllmApi. Update LightllmApi token bucket. (#945) 2024-03-06 15:33:53 +08:00			`from .base import BaseModel`
			`from .base_api import TokenBucket`
[Feature] Support Lightllm API (#613) * [Feature] Support Lightllm api * formatting & renaming --------- Co-authored-by: Leymore <zfz-960727@163.com> 2023-11-21 19:18:40 +08:00

			`@MODELS.register_module()`
Support prompt template for LightllmApi. Update LightllmApi token bucket. (#945) 2024-03-06 15:33:53 +08:00			`class LightllmAPI(BaseModel):`
[Feature] Support Lightllm API (#613) * [Feature] Support Lightllm api * formatting & renaming --------- Co-authored-by: Leymore <zfz-960727@163.com> 2023-11-21 19:18:40 +08:00
			`is_api: bool = True`

			`def __init__(`
[Bug] Update api with generation_kargs (#614) * update api * update generation_kwargs impl --------- Co-authored-by: Leymore <zfz-960727@163.com> 2023-11-22 10:02:57 +08:00			`self,`
			`path: str = 'LightllmAPI',`
			`url: str = 'http://localhost:8080/generate',`
			`meta_template: Optional[Dict] = None,`
fix LightllmApi workers bug (#1113) 2024-04-30 22:09:22 +08:00			`max_workers_per_task: int = 2,`
Support prompt template for LightllmApi. Update LightllmApi token bucket. (#945) 2024-03-06 15:33:53 +08:00			`rate_per_worker: int = 2,`
[Bug] Update api with generation_kargs (#614) * update api * update generation_kwargs impl --------- Co-authored-by: Leymore <zfz-960727@163.com> 2023-11-22 10:02:57 +08:00			`retry: int = 2,`
			`generation_kwargs: Optional[Dict] = dict(),`
[Feature] Support Lightllm API (#613) * [Feature] Support Lightllm api * formatting & renaming --------- Co-authored-by: Leymore <zfz-960727@163.com> 2023-11-21 19:18:40 +08:00			`):`

			`super().__init__(path=path,`
			`meta_template=meta_template,`
[Bug] Update api with generation_kargs (#614) * update api * update generation_kwargs impl --------- Co-authored-by: Leymore <zfz-960727@163.com> 2023-11-22 10:02:57 +08:00			`generation_kwargs=generation_kwargs)`
[Feature] Support Lightllm API (#613) * [Feature] Support Lightllm api * formatting & renaming --------- Co-authored-by: Leymore <zfz-960727@163.com> 2023-11-21 19:18:40 +08:00			`self.logger = get_logger()`
			`self.url = url`
Support prompt template for LightllmApi. Update LightllmApi token bucket. (#945) 2024-03-06 15:33:53 +08:00			`self.retry = retry`
Update LightllmApi and Fix mmlu bug (#738) * Update LightllmApi and Fix mmlu bug * checkout mmlu_gen_a484b3.py --------- Co-authored-by: Leymore <zfz-960727@163.com> 2023-12-27 13:49:08 +08:00			`self.generation_kwargs = generation_kwargs`
			`self.max_out_len = self.generation_kwargs.get('max_new_tokens', 1024)`
Fix LightllmApi ppl test (#951) 2024-03-08 12:04:44 +08:00			`self.meta_template = meta_template`
fix LightllmApi workers bug (#1113) 2024-04-30 22:09:22 +08:00			`self.max_workers_per_task = max_workers_per_task`
Support prompt template for LightllmApi. Update LightllmApi token bucket. (#945) 2024-03-06 15:33:53 +08:00			`self.token_bucket = TokenBucket(rate_per_worker, False)`
[Feature] Support Lightllm API (#613) * [Feature] Support Lightllm api * formatting & renaming --------- Co-authored-by: Leymore <zfz-960727@163.com> 2023-11-21 19:18:40 +08:00
			`def generate(self, inputs: List[str], max_out_len: int,`
			`**kwargs) -> List[str]:`
			`"""Generate results given a list of inputs.`

			`Args:`
			`inputs (List[str]): A list of strings or PromptDicts.`
			`The PromptDict should be organized in OpenCompass'`
			`API format.`
			`max_out_len (int): The maximum length of the output.`

			`Returns:`
			`List[str]: A list of generated strings.`
			`"""`

fix LightllmApi workers bug (#1113) 2024-04-30 22:09:22 +08:00			`with ThreadPoolExecutor(`
			`max_workers=self.max_workers_per_task) as executor:`
[Feature] Support Lightllm API (#613) * [Feature] Support Lightllm api * formatting & renaming --------- Co-authored-by: Leymore <zfz-960727@163.com> 2023-11-21 19:18:40 +08:00			`results = list(`
			`executor.map(self._generate, inputs,`
Update LightllmApi and Fix mmlu bug (#738) * Update LightllmApi and Fix mmlu bug * checkout mmlu_gen_a484b3.py --------- Co-authored-by: Leymore <zfz-960727@163.com> 2023-12-27 13:49:08 +08:00			`[self.max_out_len] * len(inputs)))`
[Feature] Support Lightllm API (#613) * [Feature] Support Lightllm api * formatting & renaming --------- Co-authored-by: Leymore <zfz-960727@163.com> 2023-11-21 19:18:40 +08:00			`return results`

			`def _generate(self, input: str, max_out_len: int) -> str:`
			`max_num_retries = 0`
			`while max_num_retries < self.retry:`
			`self.wait()`
			`header = {'content-type': 'application/json'}`
			`try:`
[Sync] add OC16 entry (#1171) 2024-05-17 16:50:58 +08:00			`self.logger.debug(f'input: {input}')`
Update LightllmApi and Fix mmlu bug (#738) * Update LightllmApi and Fix mmlu bug * checkout mmlu_gen_a484b3.py --------- Co-authored-by: Leymore <zfz-960727@163.com> 2023-12-27 13:49:08 +08:00			`data = dict(inputs=input, parameters=self.generation_kwargs)`
[Feature] Support Lightllm API (#613) * [Feature] Support Lightllm api * formatting & renaming --------- Co-authored-by: Leymore <zfz-960727@163.com> 2023-11-21 19:18:40 +08:00			`raw_response = requests.post(self.url,`
			`headers=header,`
			`data=json.dumps(data))`
			`except requests.ConnectionError:`
			`self.logger.error('Got connection error, retrying...')`
			`continue`
			`try:`
			`response = raw_response.json()`
[Fix] Fix lightllmapi list bug (#635) 2023-11-24 14:24:13 +08:00			`generated_text = response['generated_text']`
			`if isinstance(generated_text, list):`
			`generated_text = generated_text[0]`
[Sync] add OC16 entry (#1171) 2024-05-17 16:50:58 +08:00			`self.logger.debug(f'generated_text: {generated_text}')`
[Fix] Fix lightllmapi list bug (#635) 2023-11-24 14:24:13 +08:00			`return generated_text`
[Feature] Support Lightllm API (#613) * [Feature] Support Lightllm api * formatting & renaming --------- Co-authored-by: Leymore <zfz-960727@163.com> 2023-11-21 19:18:40 +08:00			`except requests.JSONDecodeError:`
			`self.logger.error('JsonDecode error, got',`
			`str(raw_response.content))`
Add LightllmApi KeyError log & Update doc (#816) * Add LightllmApi KeyError log * Update LightllmApi doc 2024-01-18 22:23:38 +08:00			`except KeyError:`
			`self.logger.error(f'KeyError. Response: {str(response)}')`
[Feature] Support Lightllm API (#613) * [Feature] Support Lightllm api * formatting & renaming --------- Co-authored-by: Leymore <zfz-960727@163.com> 2023-11-21 19:18:40 +08:00			`max_num_retries += 1`

			`raise RuntimeError('Calling LightllmAPI failed after retrying for '`
			`f'{max_num_retries} times. Check the logs for '`
			`'details.')`
Update LightllmApi and Fix mmlu bug (#738) * Update LightllmApi and Fix mmlu bug * checkout mmlu_gen_a484b3.py --------- Co-authored-by: Leymore <zfz-960727@163.com> 2023-12-27 13:49:08 +08:00
			`def get_ppl(self, inputs: List[str], max_out_len: int,`
			`**kwargs) -> List[float]:`
			`"""Generate results given a list of inputs.`

			`Args:`
			`inputs (List[str]): A list of strings or PromptDicts.`
			`The PromptDict should be organized in OpenCompass'`
			`API format.`
			`max_out_len (int): The maximum length of the output.`

			`Returns:`
			`List[str]: A list of generated strings.`
			`"""`

fix LightllmApi workers bug (#1113) 2024-04-30 22:09:22 +08:00			`with ThreadPoolExecutor(`
			`max_workers=self.max_workers_per_task) as executor:`
Update LightllmApi and Fix mmlu bug (#738) * Update LightllmApi and Fix mmlu bug * checkout mmlu_gen_a484b3.py --------- Co-authored-by: Leymore <zfz-960727@163.com> 2023-12-27 13:49:08 +08:00			`results = list(`
			`executor.map(self._get_ppl, inputs,`
			`[self.max_out_len] * len(inputs)))`
			`return np.array(results)`

			`def _get_ppl(self, input: str, max_out_len: int) -> float:`
			`max_num_retries = 0`
			`if max_out_len is None:`
			`max_out_len = 1`
			`while max_num_retries < self.retry:`
			`self.wait()`
			`header = {'content-type': 'application/json'}`
			`try:`
			`data = dict(inputs=input, parameters=self.generation_kwargs)`
			`raw_response = requests.post(self.url,`
			`headers=header,`
			`data=json.dumps(data))`
			`except requests.ConnectionError:`
			`self.logger.error('Got connection error, retrying...')`
			`continue`
			`try:`
			`response = raw_response.json()`

			`assert ('prompt_token_ids' in response and 'prompt_logprobs'`
Add LightllmApi KeyError log & Update doc (#816) * Add LightllmApi KeyError log * Update LightllmApi doc 2024-01-18 22:23:38 +08:00			`in response), f'prompt_token_ids and prompt_logprobs \`
Update LightllmApi and Fix mmlu bug (#738) * Update LightllmApi and Fix mmlu bug * checkout mmlu_gen_a484b3.py --------- Co-authored-by: Leymore <zfz-960727@163.com> 2023-12-27 13:49:08 +08:00			`must be in the output. \`
			`Please consider adding \`
			`--return_all_prompt_logprobs argument \`
Add LightllmApi KeyError log & Update doc (#816) * Add LightllmApi KeyError log * Update LightllmApi doc 2024-01-18 22:23:38 +08:00			`when starting lightllm service. Response: {str(response)}'`
Update LightllmApi and Fix mmlu bug (#738) * Update LightllmApi and Fix mmlu bug * checkout mmlu_gen_a484b3.py --------- Co-authored-by: Leymore <zfz-960727@163.com> 2023-12-27 13:49:08 +08:00
			`prompt_token_ids = response['prompt_token_ids'][1:]`
			`prompt_logprobs = [`
			`item[1] for item in response['prompt_logprobs']`
			`]`
			`logprobs = [`
			`item[str(token_id)] for token_id, item in zip(`
			`prompt_token_ids, prompt_logprobs)`
			`]`
			`if len(logprobs) == 0:`
			`return 0.0`
			`ce_loss = -sum(logprobs) / len(logprobs)`
			`return ce_loss`
			`except requests.JSONDecodeError:`
			`self.logger.error('JsonDecode error, got',`
			`str(raw_response.content))`
			`max_num_retries += 1`
			`raise RuntimeError('Calling LightllmAPI failed after retrying for '`
			`f'{max_num_retries} times. Check the logs for '`
			`'details.')`
Support prompt template for LightllmApi. Update LightllmApi token bucket. (#945) 2024-03-06 15:33:53 +08:00
			`def wait(self):`
			`"""Wait till the next query can be sent.`

			`Applicable in both single-thread and multi-thread environments.`
			`"""`
			`return self.token_bucket.get_token()`
Fix LightllmApi ppl test (#951) 2024-03-08 12:04:44 +08:00
			`def get_token_len(self, prompt: str) -> int:`
			`"""Get lengths of the tokenized string. Only English and Chinese`
			`characters are counted for now. Users are encouraged to override this`
			`method if more accurate length is needed.`

			`Args:`
			`prompt (str): Input string.`

			`Returns:`
			`int: Length of the input tokens`
			`"""`

			`english_parts = re.findall(r'[A-Za-z0-9]+', prompt)`
			`chinese_parts = re.findall(r'[\u4e00-\u9FFF]+', prompt)`

			`# Count English words`
			`english_count = sum(len(part.split()) for part in english_parts)`

			`# Count Chinese words`
			`chinese_count = sum(len(part) for part in chinese_parts)`

			`return english_count + chinese_count`