From 1e44541730c70a0ee6072e1fd6e9bd90037fad80 Mon Sep 17 00:00:00 2001 From: Tong Gao Date: Mon, 17 Jul 2023 15:59:10 +0800 Subject: [PATCH] [Enhancement] Test linting in CI and fix existing linting errors (#69) * [Enhancement] Test linting in CI * fix linting --- .codespellrc | 2 +- .github/workflows/lint.yml | 23 ++++ docs/en/_static/image/logo_icon.svg | 2 +- docs/zh_cn/_static/image/logo_icon.svg | 2 +- docs/zh_cn/advanced_guides/new_dataset.md | 76 ++++++------- docs/zh_cn/conf.py | 15 +-- docs/zh_cn/prompt/overview.md | 2 +- opencompass/datasets/ceval.py | 24 ++-- opencompass/datasets/flores.py | 5 +- opencompass/datasets/summedits.py | 3 +- opencompass/models/huggingface.py | 9 +- opencompass/openicl/__init__.py | 10 +- opencompass/openicl/icl_evaluator/__init__.py | 8 +- .../icl_evaluator/icl_aucroc_evaluator.py | 3 +- .../icl_evaluator/icl_base_evaluator.py | 2 +- .../openicl/icl_evaluator/icl_hf_evaluator.py | 12 +- .../openicl/icl_inferencer/__init__.py | 8 +- .../icl_inferencer/icl_clp_inferencer.py | 13 +-- .../icl_inferencer/icl_ppl_inferencer.py | 4 +- opencompass/openicl/icl_retriever/__init__.py | 18 +-- opencompass/openicl/utils/__init__.py | 2 +- opencompass/openicl/utils/api_service.py | 104 ------------------ opencompass/utils/types.py | 4 +- requirements/runtime.txt | 24 ++-- setup.py | 3 + 25 files changed, 153 insertions(+), 225 deletions(-) create mode 100644 .github/workflows/lint.yml delete mode 100644 opencompass/openicl/utils/api_service.py diff --git a/.codespellrc b/.codespellrc index 60e189e2..79abef2c 100644 --- a/.codespellrc +++ b/.codespellrc @@ -2,4 +2,4 @@ skip = *.ipynb count = quiet-level = 3 -ignore-words-list = nd, ans, ques +ignore-words-list = nd, ans, ques, rouge diff --git a/.github/workflows/lint.yml b/.github/workflows/lint.yml new file mode 100644 index 00000000..f14e053c --- /dev/null +++ b/.github/workflows/lint.yml @@ -0,0 +1,23 @@ +name: lint + +on: [push, pull_request] + +concurrency: + group: ${{ github.workflow }}-${{ github.ref }} + cancel-in-progress: true + +jobs: + lint: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v2 + - name: Set up Python 3.10 + uses: actions/setup-python@v2 + with: + python-version: 3.10 + - name: Install pre-commit hook + run: | + pip install pre-commit + pre-commit install + - name: Linting + run: pre-commit run --all-files diff --git a/docs/en/_static/image/logo_icon.svg b/docs/en/_static/image/logo_icon.svg index a881e852..c46dd3b5 100644 --- a/docs/en/_static/image/logo_icon.svg +++ b/docs/en/_static/image/logo_icon.svg @@ -28,4 +28,4 @@ - \ No newline at end of file + diff --git a/docs/zh_cn/_static/image/logo_icon.svg b/docs/zh_cn/_static/image/logo_icon.svg index a881e852..c46dd3b5 100644 --- a/docs/zh_cn/_static/image/logo_icon.svg +++ b/docs/zh_cn/_static/image/logo_icon.svg @@ -28,4 +28,4 @@ - \ No newline at end of file + diff --git a/docs/zh_cn/advanced_guides/new_dataset.md b/docs/zh_cn/advanced_guides/new_dataset.md index 2b631cdb..7b21e2cf 100644 --- a/docs/zh_cn/advanced_guides/new_dataset.md +++ b/docs/zh_cn/advanced_guides/new_dataset.md @@ -3,56 +3,56 @@ 尽管 OpenCompass 已经包含了大多数常用数据集,用户在支持新数据集的时候需要完成以下几个步骤: 1. 在 `opencompass/datasets` 文件夹新增数据集脚本 `mydataset.py`, 该脚本需要包含: - - - 数据集及其加载方式,需要定义一个 `MyDataset` 类,实现数据集加载方法 `load`,该方法为静态方法,需要返回 `datasets.Dataset` 类型的数据。这里我们使用 huggingface dataset 作为数据集的统一接口,避免引入额外的逻辑。具体示例如下: - ```python - import datasets - from .base import BaseDataset + - 数据集及其加载方式,需要定义一个 `MyDataset` 类,实现数据集加载方法 `load`,该方法为静态方法,需要返回 `datasets.Dataset` 类型的数据。这里我们使用 huggingface dataset 作为数据集的统一接口,避免引入额外的逻辑。具体示例如下: - class MyDataset(BaseDataset): + ```python + import datasets + from .base import BaseDataset - @staticmethod - def load(**kwargs) -> datasets.Dataset: - pass - ``` + class MyDataset(BaseDataset): - - (可选)如果 OpenCompass 已有的评测器不能满足需要,需要用户定义 `MyDatasetlEvaluator` 类,实现评分方法 `score`,需要根据输入的 `predictions` 和 `references` 列表,得到需要的字典。由于一个数据集可能存在多种 metric,需要返回一个 metrics 以及对应 scores 的相关字典。具体示例如下: + @staticmethod + def load(**kwargs) -> datasets.Dataset: + pass + ``` - ```python - from opencompass.openicl.icl_evaluator import BaseEvaluator + - (可选)如果 OpenCompass 已有的评测器不能满足需要,需要用户定义 `MyDatasetlEvaluator` 类,实现评分方法 `score`,需要根据输入的 `predictions` 和 `references` 列表,得到需要的字典。由于一个数据集可能存在多种 metric,需要返回一个 metrics 以及对应 scores 的相关字典。具体示例如下: - class MyDatasetlEvaluator(BaseEvaluator): + ```python + from opencompass.openicl.icl_evaluator import BaseEvaluator - def score(self, predictions: List, references: List) -> dict: - pass + class MyDatasetlEvaluator(BaseEvaluator): - ``` + def score(self, predictions: List, references: List) -> dict: + pass - - (可选)如果 OpenCompass 已有的后处理方法不能满足需要,需要用户定义 `mydataset_postprocess` 方法,根据输入的字符串得到相应后处理的结果。具体示例如下: + ``` - ```python - def mydataset_postprocess(text: str) -> str: - pass - ``` + - (可选)如果 OpenCompass 已有的后处理方法不能满足需要,需要用户定义 `mydataset_postprocess` 方法,根据输入的字符串得到相应后处理的结果。具体示例如下: + + ```python + def mydataset_postprocess(text: str) -> str: + pass + ``` 2. 在定义好数据集加载、评测以及数据后处理等方法之后,需要在配置文件中新增以下配置: - ```python - from opencompass.datasets import MyDataset, MyDatasetlEvaluator, mydataset_postprocess - - mydataset_eval_cfg = dict( - evaluator=dict(type=MyDatasetlEvaluator), - pred_postprocessor=dict(type=mydataset_postprocess)) + ```python + from opencompass.datasets import MyDataset, MyDatasetlEvaluator, mydataset_postprocess - mydataset_datasets = [ - dict( - type=MyDataset, - ..., - reader_cfg=..., - infer_cfg=..., - eval_cfg=mydataset_eval_cfg) - ] - ``` + mydataset_eval_cfg = dict( + evaluator=dict(type=MyDatasetlEvaluator), + pred_postprocessor=dict(type=mydataset_postprocess)) - 配置好数据集之后,其他需要的配置文件直接参考[快速上手](../get_started.md)教程即可。 + mydataset_datasets = [ + dict( + type=MyDataset, + ..., + reader_cfg=..., + infer_cfg=..., + eval_cfg=mydataset_eval_cfg) + ] + ``` + + 配置好数据集之后,其他需要的配置文件直接参考[快速上手](../get_started.md)教程即可。 diff --git a/docs/zh_cn/conf.py b/docs/zh_cn/conf.py index a1544924..3338097f 100644 --- a/docs/zh_cn/conf.py +++ b/docs/zh_cn/conf.py @@ -29,14 +29,12 @@ author = 'OpenCompass Authors' # The full version, including alpha/beta/rc tags # version_file = '../../opencompass/version.py' - # def get_version(): # with open(version_file, 'r') as f: # exec(compile(f.read(), version_file, 'exec')) # return locals()['__version__'] - -release = "0.5.0" +release = '0.5.0' # -- General configuration --------------------------------------------------- @@ -141,14 +139,16 @@ latex_elements = { # (source start file, target name, title, # author, documentclass [howto, manual, or own class]). latex_documents = [ - (root_doc, 'opencompass.tex', 'OpenCompass Documentation', author, 'manual'), + (root_doc, 'opencompass.tex', 'OpenCompass Documentation', author, + 'manual'), ] # -- Options for manual page output ------------------------------------------ # One entry per manual page. List of tuples # (source start file, name, description, authors, manual section). -man_pages = [(root_doc, 'opencompass', 'OpenCompass Documentation', [author], 1)] +man_pages = [(root_doc, 'opencompass', 'OpenCompass Documentation', [author], + 1)] # -- Options for Texinfo output ---------------------------------------------- @@ -156,8 +156,9 @@ man_pages = [(root_doc, 'opencompass', 'OpenCompass Documentation', [author], 1) # (source start file, target name, title, author, # dir menu entry, description, category) texinfo_documents = [ - (root_doc, 'opencompass', 'OpenCompass Documentation', author, 'OpenCompass Authors', - 'AGI evaluation toolbox and benchmark.', 'Miscellaneous'), + (root_doc, 'opencompass', 'OpenCompass Documentation', author, + 'OpenCompass Authors', 'AGI evaluation toolbox and benchmark.', + 'Miscellaneous'), ] # -- Options for Epub output ------------------------------------------------- diff --git a/docs/zh_cn/prompt/overview.md b/docs/zh_cn/prompt/overview.md index 37673dfc..f25707b0 100644 --- a/docs/zh_cn/prompt/overview.md +++ b/docs/zh_cn/prompt/overview.md @@ -1 +1 @@ -# Prompt 概括 \ No newline at end of file +# Prompt 概括 diff --git a/opencompass/datasets/ceval.py b/opencompass/datasets/ceval.py index ac1a8015..9b45f118 100644 --- a/opencompass/datasets/ceval.py +++ b/opencompass/datasets/ceval.py @@ -12,20 +12,20 @@ class CEvalDataset(BaseDataset): @staticmethod def load(path: str, name: str): - dev_dataset = load_dataset( - 'csv', - data_files=osp.join(path, 'dev', f'{name}_dev.csv'), - split='train') - val_dataset = load_dataset( - 'csv', - data_files=osp.join(path, 'val', f'{name}_val.csv'), - split='train') + dev_dataset = load_dataset('csv', + data_files=osp.join(path, 'dev', + f'{name}_dev.csv'), + split='train') + val_dataset = load_dataset('csv', + data_files=osp.join(path, 'val', + f'{name}_val.csv'), + split='train') val_dataset = val_dataset.add_column('explanation', [''] * len(val_dataset)) - test_dataset = load_dataset( - 'csv', - data_files=osp.join(path, 'test', f'{name}_test.csv'), - split='train') + test_dataset = load_dataset('csv', + data_files=osp.join( + path, 'test', f'{name}_test.csv'), + split='train') test_dataset = test_dataset.add_column( 'answer', [''] * len(test_dataset)).add_column('explanation', diff --git a/opencompass/datasets/flores.py b/opencompass/datasets/flores.py index a2292fec..3c3c03b8 100644 --- a/opencompass/datasets/flores.py +++ b/opencompass/datasets/flores.py @@ -16,8 +16,9 @@ class FloresFirst100Dataset(BaseDataset): 'dev': load_dataset(path='facebook/flores', name=name, split='dev'), 'devtest': - load_dataset( - path='facebook/flores', name=name, split='devtest[:100]') + load_dataset(path='facebook/flores', + name=name, + split='devtest[:100]') }) diff --git a/opencompass/datasets/summedits.py b/opencompass/datasets/summedits.py index ea8ab750..37927726 100644 --- a/opencompass/datasets/summedits.py +++ b/opencompass/datasets/summedits.py @@ -3,6 +3,7 @@ import json from datasets import Dataset from opencompass.registry import LOAD_DATASET + from .base import BaseDataset @@ -17,4 +18,4 @@ class SummeditsDataset_V2(BaseDataset): line = json.loads(line) line['label'] = 'BA'[line['label']] dataset.append(line) - return Dataset.from_list(dataset) \ No newline at end of file + return Dataset.from_list(dataset) diff --git a/opencompass/models/huggingface.py b/opencompass/models/huggingface.py index b905e5b8..c49866c8 100644 --- a/opencompass/models/huggingface.py +++ b/opencompass/models/huggingface.py @@ -142,7 +142,8 @@ class HuggingFace(BaseModel): tokens = self.tokenizer.batch_encode_plus(inputs, padding=True, truncation=True, - max_length=self.max_seq_len - max_out_len) + max_length=self.max_seq_len - + max_out_len) tokens = { k: torch.tensor(np.array(tokens[k]), device=self.model.device) for k in tokens if k in ['input_ids', 'attention_mask'] @@ -180,10 +181,10 @@ class HuggingFace(BaseModel): input_ids = self.tokenizer(inputs, truncation=True, - max_length=self.max_seq_len - max_out_len)['input_ids'] + max_length=self.max_seq_len - + max_out_len)['input_ids'] input_ids = torch.tensor(input_ids, device=self.model.device) - outputs = self.model.generate(input_ids, - max_new_tokens=max_out_len) + outputs = self.model.generate(input_ids, max_new_tokens=max_out_len) if not self.extract_pred_after_decode: outputs = outputs[:, input_ids.shape[1]:] diff --git a/opencompass/openicl/__init__.py b/opencompass/openicl/__init__.py index 107406c3..fd96cb59 100644 --- a/opencompass/openicl/__init__.py +++ b/opencompass/openicl/__init__.py @@ -1,5 +1,5 @@ -from .icl_dataset_reader import DatasetReader -from .icl_evaluator import * -from .icl_prompt_template import PromptTemplate -from .icl_retriever import * -from .icl_inferencer import * +from .icl_dataset_reader import DatasetReader # noqa +from .icl_evaluator import * # noqa +from .icl_inferencer import * # noqa +from .icl_prompt_template import PromptTemplate # noqa +from .icl_retriever import * # noqa diff --git a/opencompass/openicl/icl_evaluator/__init__.py b/opencompass/openicl/icl_evaluator/__init__.py index fc74ccac..b81dbc15 100644 --- a/opencompass/openicl/icl_evaluator/__init__.py +++ b/opencompass/openicl/icl_evaluator/__init__.py @@ -1,5 +1,5 @@ -from .icl_aucroc_evaluator import AUCROCEvaluator -from .icl_base_evaluator import BaseEvaluator -from .icl_em_evaluator import EMEvaluator +from .icl_aucroc_evaluator import AUCROCEvaluator # noqa +from .icl_base_evaluator import BaseEvaluator # noqa +from .icl_em_evaluator import EMEvaluator # noqa from .icl_hf_evaluator import * # noqa -from .icl_toxic_evaluator import ToxicEvaluator +from .icl_toxic_evaluator import ToxicEvaluator # noqa diff --git a/opencompass/openicl/icl_evaluator/icl_aucroc_evaluator.py b/opencompass/openicl/icl_evaluator/icl_aucroc_evaluator.py index 827a3bbe..4e86789d 100644 --- a/opencompass/openicl/icl_evaluator/icl_aucroc_evaluator.py +++ b/opencompass/openicl/icl_evaluator/icl_aucroc_evaluator.py @@ -1,4 +1,5 @@ from typing import List + import numpy as np from sklearn.metrics import roc_auc_score @@ -10,7 +11,7 @@ from .icl_base_evaluator import BaseEvaluator @ICL_EVALUATORS.register_module() class AUCROCEvaluator(BaseEvaluator): """Calculate AUC-ROC scores and accuracy according the prediction. - + For some dataset, the accuracy cannot reveal the difference between models because of the saturation. AUC-ROC scores can further exam model abilities to distinguish different labels. More details can refer to diff --git a/opencompass/openicl/icl_evaluator/icl_base_evaluator.py b/opencompass/openicl/icl_evaluator/icl_base_evaluator.py index 14fa8a20..1ec8273c 100644 --- a/opencompass/openicl/icl_evaluator/icl_base_evaluator.py +++ b/opencompass/openicl/icl_evaluator/icl_base_evaluator.py @@ -1,8 +1,8 @@ """Base Evaluator.""" -from typing import List class BaseEvaluator: + def __init__(self) -> None: pass diff --git a/opencompass/openicl/icl_evaluator/icl_hf_evaluator.py b/opencompass/openicl/icl_evaluator/icl_hf_evaluator.py index 46acdd32..d270a639 100644 --- a/opencompass/openicl/icl_evaluator/icl_hf_evaluator.py +++ b/opencompass/openicl/icl_evaluator/icl_hf_evaluator.py @@ -1,4 +1,5 @@ from typing import List + import evaluate from opencompass.registry import ICL_EVALUATORS @@ -54,9 +55,12 @@ class HuggingfaceEvaluator(BaseEvaluator): dict: calculated scores. """ if len(predictions) != len(references): - return {'error': 'predictions and references have different ' + return { + 'error': + 'predictions and references have different ' f'length. len(predictions): {len(predictions)}, ' - f'len(references): {len(references)}'} + f'len(references): {len(references)}' + } metric = evaluate.load(self.metric) scores = metric.compute(**self._preprocess(predictions, references)) return self._postprocess(scores) @@ -103,7 +107,7 @@ class AccEvaluator(HuggingfaceEvaluator): Returns: dict: postprocessed scores. """ - scores["accuracy"] *= 100 + scores['accuracy'] *= 100 return scores @@ -150,7 +154,7 @@ class MccEvaluator(AccEvaluator): Returns: dict: postprocessed scores. """ - scores["matthews_correlation"] *= 100 + scores['matthews_correlation'] *= 100 return scores diff --git a/opencompass/openicl/icl_inferencer/__init__.py b/opencompass/openicl/icl_inferencer/__init__.py index fe36bed5..0855edf8 100644 --- a/opencompass/openicl/icl_inferencer/__init__.py +++ b/opencompass/openicl/icl_inferencer/__init__.py @@ -1,4 +1,4 @@ -from .icl_base_inferencer import BaseInferencer -from .icl_gen_inferencer import GenInferencer -from .icl_ppl_inferencer import PPLInferencer -from .icl_clp_inferencer import CLPInferencer +from .icl_base_inferencer import BaseInferencer # noqa +from .icl_clp_inferencer import CLPInferencer # noqa +from .icl_gen_inferencer import GenInferencer # noqa +from .icl_ppl_inferencer import PPLInferencer # noqa diff --git a/opencompass/openicl/icl_inferencer/icl_clp_inferencer.py b/opencompass/openicl/icl_inferencer/icl_clp_inferencer.py index 5369c9e3..21b63264 100644 --- a/opencompass/openicl/icl_inferencer/icl_clp_inferencer.py +++ b/opencompass/openicl/icl_inferencer/icl_clp_inferencer.py @@ -98,8 +98,8 @@ class CLPInferencer(BaseInferencer): # 3. Generate in-context examples for testing inputs for idx in range(len(ice_idx_list)): ice.append( - retriever.generate_ice( - ice_idx_list[idx], ice_template=ice_template)) + retriever.generate_ice(ice_idx_list[idx], + ice_template=ice_template)) output_handler.save_ice(ice) # 4. Collect prompts and calculate conditional log probs @@ -165,11 +165,10 @@ class CLPInferencer(BaseInferencer): choice_target_ids.append(prompt_token_num - 1) logger.info('Calculating conditional log probability for prompts.') - for idx in trange( - 0, - len(prompt_list), - self.batch_size, - disable=not self.is_main_process): + for idx in trange(0, + len(prompt_list), + self.batch_size, + disable=not self.is_main_process): sub_prompt_list = prompt_list[idx:idx + self.batch_size] sub_choice_target_ids = choice_target_ids[idx:idx + self.batch_size] diff --git a/opencompass/openicl/icl_inferencer/icl_ppl_inferencer.py b/opencompass/openicl/icl_inferencer/icl_ppl_inferencer.py index d1787d3f..8e4734c3 100644 --- a/opencompass/openicl/icl_inferencer/icl_ppl_inferencer.py +++ b/opencompass/openicl/icl_inferencer/icl_ppl_inferencer.py @@ -25,7 +25,7 @@ class PPLInferencer(BaseInferencer): model (:obj:`BaseModel`, optional): The module to inference. max_seq_len (:obj:`int`): Maximum number of tokenized words allowed by the LM. - batch_size (:obj:`int`, optional): Batch size for the :obj:`DataLoader`. + batch_size (:obj:`int`, optional): Batch size for the :obj:`DataLoader` output_json_filepath (:obj:`str`, optional): File path for output `JSON` file. output_json_filename (:obj:`str`, optional): File name for output @@ -126,7 +126,7 @@ class PPLInferencer(BaseInferencer): label, ice_template=ice_template, prompt_template=prompt_template) - prompt_token_num = self.model.get_token_len_from_template( + prompt_token_num = self.model.get_token_len_from_template( # noqa prompt, mode='ppl') # noqa if normalizing_str is not None: diff --git a/opencompass/openicl/icl_retriever/__init__.py b/opencompass/openicl/icl_retriever/__init__.py index 739620b8..c1b48c2b 100644 --- a/opencompass/openicl/icl_retriever/__init__.py +++ b/opencompass/openicl/icl_retriever/__init__.py @@ -1,9 +1,9 @@ -from .icl_base_retriever import BaseRetriever -from .icl_bm25_retriever import BM25Retriever -from .icl_dpp_retriever import DPPRetriever -from .icl_fix_k_retriever import FixKRetriever -from .icl_mdl_retriever import MDLRetriever -from .icl_random_retriever import RandomRetriever -from .icl_topk_retriever import TopkRetriever -from .icl_votek_retriever import VotekRetriever -from .icl_zero_retriever import ZeroRetriever +from .icl_base_retriever import BaseRetriever # noqa +from .icl_bm25_retriever import BM25Retriever # noqa +from .icl_dpp_retriever import DPPRetriever # noqa +from .icl_fix_k_retriever import FixKRetriever # noqa +from .icl_mdl_retriever import MDLRetriever # noqa +from .icl_random_retriever import RandomRetriever # noqa +from .icl_topk_retriever import TopkRetriever # noqa +from .icl_votek_retriever import VotekRetriever # noqa +from .icl_zero_retriever import ZeroRetriever # noqa diff --git a/opencompass/openicl/utils/__init__.py b/opencompass/openicl/utils/__init__.py index e060b377..0d0c85f4 100644 --- a/opencompass/openicl/utils/__init__.py +++ b/opencompass/openicl/utils/__init__.py @@ -1 +1 @@ -from .logging import * +from .logging import * # noqa diff --git a/opencompass/openicl/utils/api_service.py b/opencompass/openicl/utils/api_service.py deleted file mode 100644 index f1ee4aac..00000000 --- a/opencompass/openicl/utils/api_service.py +++ /dev/null @@ -1,104 +0,0 @@ -import json -import os -import time - -import numpy as np -import openai -import requests - -OPENICL_API_NAME_LIST = ['opt-175b', 'gpt3'] -OPENICL_API_PARAMETER_DICT = { - 'opt-175b': ['URL', 'headers'], - 'gpt3': [ - 'engine', 'temperature', 'max_tokens', 'top_p', 'frequency_penalty', - 'presence_penalty', 'sleep_time' - ] -} -OPENICL_API_REQUEST_CONFIG = { - 'opt-175b': { - 'URL': '', # http://xxx/completions or http://xxx/generate - 'headers': { - 'Content-Type': 'application/json; charset=UTF-8' - } - }, - 'gpt3': { - 'engine': 'text-davinci-003', - 'temperature': 0, - 'max_tokens': 256, - 'top_p': 1.0, - 'frequency_penalty': 0.0, - 'presence_penalty': 0.0, - 'sleep_time': 3 - } -} -PROXIES = {'https': '', 'http': ''} - - -def is_api_available(api_name): - if api_name is None: - return False - return True if api_name in OPENICL_API_NAME_LIST else False - - -def update_openicl_api_request_config(api_name, **kwargs): - if api_name is None or not is_api_available(api_name): - return - - parameter_list = OPENICL_API_PARAMETER_DICT[api_name] - for parameter in parameter_list: - if parameter in kwargs.keys(): - OPENICL_API_REQUEST_CONFIG[api_name][parameter] = kwargs[parameter] - - -def api_get_ppl(api_name, input_texts): - if api_name == 'opt-175b': - pyload = {'prompt': input_texts, 'max_tokens': 0, 'echo': True} - response = json.loads( - requests.post( - OPENICL_API_REQUEST_CONFIG[api_name]['URL'], - data=json.dumps(pyload), - headers=OPENICL_API_REQUEST_CONFIG[api_name]['headers'], - proxies=PROXIES).text) - lens = np.array( - [len(r['logprobs']['tokens']) for r in response['choices']]) - ce_loss = np.array([ - -sum(r['logprobs']['token_logprobs']) for r in response['choices'] - ]) - return ce_loss / lens - - if api_name == 'gpt3': - raise NotImplementedError("GPT-3 API doesn't support PPL calculation") - - -def api_get_tokens(api_name, input_texts): - length_list = [len(text) for text in input_texts] - - if api_name == 'opt-175b': - pyload = {'prompt': input_texts, 'max_tokens': 100, 'echo': True} - response = json.loads( - requests.post( - OPENICL_API_REQUEST_CONFIG[api_name]['URL'], - data=json.dumps(pyload), - headers=OPENICL_API_REQUEST_CONFIG[api_name]['headers'], - proxies=PROXIES).text) - return [r['text'] for r in response['choices']], [ - r['text'][length:] - for r, length in zip(response['choices'], length_list) - ] - - if api_name == 'gpt3': - openai.api_key = os.getenv('OPENAI_API_KEY') - response = openai.Completion.create( - engine=OPENICL_API_REQUEST_CONFIG['gpt3']['engine'], - prompt=input_texts, - temperature=OPENICL_API_REQUEST_CONFIG['gpt3']['temperature'], - max_tokens=OPENICL_API_REQUEST_CONFIG['gpt3']['max_tokens'], - top_p=OPENICL_API_REQUEST_CONFIG['gpt3']['top_p'], - frequency_penalty=OPENICL_API_REQUEST_CONFIG['gpt3'] - ['frequency_penalty'], - presence_penalty=OPENICL_API_REQUEST_CONFIG['gpt3'] - ['presence_penalty']) - time.sleep(OPENICL_API_REQUEST_CONFIG['gpt3']['sleep_time']) - return [(input + r['text']) - for r, input in zip(response['choices'], input_texts) - ], [r['text'] for r in response['choices']] diff --git a/opencompass/utils/types.py b/opencompass/utils/types.py index 914213c9..868040f1 100644 --- a/opencompass/utils/types.py +++ b/opencompass/utils/types.py @@ -11,7 +11,7 @@ def _check_type_list(obj, typelist: List): elif isinstance(obj, _type): return obj raise TypeError( - f'Expected an object in {[_.__name__ if _ is not None else None for _ in typelist]} type, but got {obj}' + f'Expected an object in {[_.__name__ if _ is not None else None for _ in typelist]} type, but got {obj}' # noqa ) @@ -20,7 +20,7 @@ def _check_dataset(obj) -> Union[Dataset, DatasetDict]: return obj else: raise TypeError( - f'Expected a datasets.Dataset or a datasets.DatasetDict object, but got {obj}' + f'Expected a datasets.Dataset or a datasets.DatasetDict object, but got {obj}' # noqa ) diff --git a/requirements/runtime.txt b/requirements/runtime.txt index 77b80bbe..2830791e 100644 --- a/requirements/runtime.txt +++ b/requirements/runtime.txt @@ -1,26 +1,24 @@ accelerate>=0.19.0 +boto3 +colossalai +cpm_kernels datasets>=2.12.0 evaluate>=0.3.0 +fairscale faiss_gpu==1.7.2 +jieba +mmengine nltk==3.8 numpy==1.23.4 -openai==0.27.1 +openai +pandas<2.0.0 rank_bm25==0.2.2 requests==2.28.1 scikit_learn==1.2.1 sentence_transformers==2.2.2 +tabulate +tiktoken +tokenizers>=0.13.3 torch>=1.13.1 tqdm==4.64.1 transformers>=4.29.1 -openai -mmengine -jieba -pandas<2.0.0 -cpm_kernels -tokenizers>=0.13.3 -tabulate -fairscale -colossalai -tabulate -boto3 -tiktoken \ No newline at end of file diff --git a/setup.py b/setup.py index f9e05d84..7b9c578d 100644 --- a/setup.py +++ b/setup.py @@ -13,6 +13,7 @@ class DownloadNLTK(install): with open('README_zh-CN.md') as f: readme = f.read() + def parse_requirements(fname='requirements.txt', with_version=True): """Parse the package dependencies listed in a requirements file but strips specific versioning information. @@ -91,12 +92,14 @@ def parse_requirements(fname='requirements.txt', with_version=True): packages = list(gen_packages_items()) return packages + def get_version(): version_file = 'opencompass/__init__.py' with open(version_file, 'r', encoding='utf-8') as f: exec(compile(f.read(), version_file, 'exec')) return locals()['__version__'] + def do_setup(): setup( name='opencompass',