mirror of
https://github.com/open-compass/opencompass.git
synced 2025-05-30 16:03:24 +08:00
[Enhancement] Test linting in CI and fix existing linting errors (#69)
* [Enhancement] Test linting in CI * fix linting
This commit is contained in:
parent
9a16448905
commit
1e44541730
@ -2,4 +2,4 @@
|
||||
skip = *.ipynb
|
||||
count =
|
||||
quiet-level = 3
|
||||
ignore-words-list = nd, ans, ques
|
||||
ignore-words-list = nd, ans, ques, rouge
|
||||
|
23
.github/workflows/lint.yml
vendored
Normal file
23
.github/workflows/lint.yml
vendored
Normal file
@ -0,0 +1,23 @@
|
||||
name: lint
|
||||
|
||||
on: [push, pull_request]
|
||||
|
||||
concurrency:
|
||||
group: ${{ github.workflow }}-${{ github.ref }}
|
||||
cancel-in-progress: true
|
||||
|
||||
jobs:
|
||||
lint:
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- uses: actions/checkout@v2
|
||||
- name: Set up Python 3.10
|
||||
uses: actions/setup-python@v2
|
||||
with:
|
||||
python-version: 3.10
|
||||
- name: Install pre-commit hook
|
||||
run: |
|
||||
pip install pre-commit
|
||||
pre-commit install
|
||||
- name: Linting
|
||||
run: pre-commit run --all-files
|
@ -29,14 +29,12 @@ author = 'OpenCompass Authors'
|
||||
# The full version, including alpha/beta/rc tags
|
||||
# version_file = '../../opencompass/version.py'
|
||||
|
||||
|
||||
# def get_version():
|
||||
# with open(version_file, 'r') as f:
|
||||
# exec(compile(f.read(), version_file, 'exec'))
|
||||
# return locals()['__version__']
|
||||
|
||||
|
||||
release = "0.5.0"
|
||||
release = '0.5.0'
|
||||
|
||||
# -- General configuration ---------------------------------------------------
|
||||
|
||||
@ -141,14 +139,16 @@ latex_elements = {
|
||||
# (source start file, target name, title,
|
||||
# author, documentclass [howto, manual, or own class]).
|
||||
latex_documents = [
|
||||
(root_doc, 'opencompass.tex', 'OpenCompass Documentation', author, 'manual'),
|
||||
(root_doc, 'opencompass.tex', 'OpenCompass Documentation', author,
|
||||
'manual'),
|
||||
]
|
||||
|
||||
# -- Options for manual page output ------------------------------------------
|
||||
|
||||
# One entry per manual page. List of tuples
|
||||
# (source start file, name, description, authors, manual section).
|
||||
man_pages = [(root_doc, 'opencompass', 'OpenCompass Documentation', [author], 1)]
|
||||
man_pages = [(root_doc, 'opencompass', 'OpenCompass Documentation', [author],
|
||||
1)]
|
||||
|
||||
# -- Options for Texinfo output ----------------------------------------------
|
||||
|
||||
@ -156,8 +156,9 @@ man_pages = [(root_doc, 'opencompass', 'OpenCompass Documentation', [author], 1)
|
||||
# (source start file, target name, title, author,
|
||||
# dir menu entry, description, category)
|
||||
texinfo_documents = [
|
||||
(root_doc, 'opencompass', 'OpenCompass Documentation', author, 'OpenCompass Authors',
|
||||
'AGI evaluation toolbox and benchmark.', 'Miscellaneous'),
|
||||
(root_doc, 'opencompass', 'OpenCompass Documentation', author,
|
||||
'OpenCompass Authors', 'AGI evaluation toolbox and benchmark.',
|
||||
'Miscellaneous'),
|
||||
]
|
||||
|
||||
# -- Options for Epub output -------------------------------------------------
|
||||
|
@ -12,19 +12,19 @@ class CEvalDataset(BaseDataset):
|
||||
|
||||
@staticmethod
|
||||
def load(path: str, name: str):
|
||||
dev_dataset = load_dataset(
|
||||
'csv',
|
||||
data_files=osp.join(path, 'dev', f'{name}_dev.csv'),
|
||||
dev_dataset = load_dataset('csv',
|
||||
data_files=osp.join(path, 'dev',
|
||||
f'{name}_dev.csv'),
|
||||
split='train')
|
||||
val_dataset = load_dataset(
|
||||
'csv',
|
||||
data_files=osp.join(path, 'val', f'{name}_val.csv'),
|
||||
val_dataset = load_dataset('csv',
|
||||
data_files=osp.join(path, 'val',
|
||||
f'{name}_val.csv'),
|
||||
split='train')
|
||||
val_dataset = val_dataset.add_column('explanation',
|
||||
[''] * len(val_dataset))
|
||||
test_dataset = load_dataset(
|
||||
'csv',
|
||||
data_files=osp.join(path, 'test', f'{name}_test.csv'),
|
||||
test_dataset = load_dataset('csv',
|
||||
data_files=osp.join(
|
||||
path, 'test', f'{name}_test.csv'),
|
||||
split='train')
|
||||
test_dataset = test_dataset.add_column(
|
||||
'answer',
|
||||
|
@ -16,8 +16,9 @@ class FloresFirst100Dataset(BaseDataset):
|
||||
'dev':
|
||||
load_dataset(path='facebook/flores', name=name, split='dev'),
|
||||
'devtest':
|
||||
load_dataset(
|
||||
path='facebook/flores', name=name, split='devtest[:100]')
|
||||
load_dataset(path='facebook/flores',
|
||||
name=name,
|
||||
split='devtest[:100]')
|
||||
})
|
||||
|
||||
|
||||
|
@ -3,6 +3,7 @@ import json
|
||||
from datasets import Dataset
|
||||
|
||||
from opencompass.registry import LOAD_DATASET
|
||||
|
||||
from .base import BaseDataset
|
||||
|
||||
|
||||
|
@ -142,7 +142,8 @@ class HuggingFace(BaseModel):
|
||||
tokens = self.tokenizer.batch_encode_plus(inputs,
|
||||
padding=True,
|
||||
truncation=True,
|
||||
max_length=self.max_seq_len - max_out_len)
|
||||
max_length=self.max_seq_len -
|
||||
max_out_len)
|
||||
tokens = {
|
||||
k: torch.tensor(np.array(tokens[k]), device=self.model.device)
|
||||
for k in tokens if k in ['input_ids', 'attention_mask']
|
||||
@ -180,10 +181,10 @@ class HuggingFace(BaseModel):
|
||||
|
||||
input_ids = self.tokenizer(inputs,
|
||||
truncation=True,
|
||||
max_length=self.max_seq_len - max_out_len)['input_ids']
|
||||
max_length=self.max_seq_len -
|
||||
max_out_len)['input_ids']
|
||||
input_ids = torch.tensor(input_ids, device=self.model.device)
|
||||
outputs = self.model.generate(input_ids,
|
||||
max_new_tokens=max_out_len)
|
||||
outputs = self.model.generate(input_ids, max_new_tokens=max_out_len)
|
||||
|
||||
if not self.extract_pred_after_decode:
|
||||
outputs = outputs[:, input_ids.shape[1]:]
|
||||
|
@ -1,5 +1,5 @@
|
||||
from .icl_dataset_reader import DatasetReader
|
||||
from .icl_evaluator import *
|
||||
from .icl_prompt_template import PromptTemplate
|
||||
from .icl_retriever import *
|
||||
from .icl_inferencer import *
|
||||
from .icl_dataset_reader import DatasetReader # noqa
|
||||
from .icl_evaluator import * # noqa
|
||||
from .icl_inferencer import * # noqa
|
||||
from .icl_prompt_template import PromptTemplate # noqa
|
||||
from .icl_retriever import * # noqa
|
||||
|
@ -1,5 +1,5 @@
|
||||
from .icl_aucroc_evaluator import AUCROCEvaluator
|
||||
from .icl_base_evaluator import BaseEvaluator
|
||||
from .icl_em_evaluator import EMEvaluator
|
||||
from .icl_aucroc_evaluator import AUCROCEvaluator # noqa
|
||||
from .icl_base_evaluator import BaseEvaluator # noqa
|
||||
from .icl_em_evaluator import EMEvaluator # noqa
|
||||
from .icl_hf_evaluator import * # noqa
|
||||
from .icl_toxic_evaluator import ToxicEvaluator
|
||||
from .icl_toxic_evaluator import ToxicEvaluator # noqa
|
||||
|
@ -1,4 +1,5 @@
|
||||
from typing import List
|
||||
|
||||
import numpy as np
|
||||
from sklearn.metrics import roc_auc_score
|
||||
|
||||
|
@ -1,8 +1,8 @@
|
||||
"""Base Evaluator."""
|
||||
from typing import List
|
||||
|
||||
|
||||
class BaseEvaluator:
|
||||
|
||||
def __init__(self) -> None:
|
||||
pass
|
||||
|
||||
|
@ -1,4 +1,5 @@
|
||||
from typing import List
|
||||
|
||||
import evaluate
|
||||
|
||||
from opencompass.registry import ICL_EVALUATORS
|
||||
@ -54,9 +55,12 @@ class HuggingfaceEvaluator(BaseEvaluator):
|
||||
dict: calculated scores.
|
||||
"""
|
||||
if len(predictions) != len(references):
|
||||
return {'error': 'predictions and references have different '
|
||||
return {
|
||||
'error':
|
||||
'predictions and references have different '
|
||||
f'length. len(predictions): {len(predictions)}, '
|
||||
f'len(references): {len(references)}'}
|
||||
f'len(references): {len(references)}'
|
||||
}
|
||||
metric = evaluate.load(self.metric)
|
||||
scores = metric.compute(**self._preprocess(predictions, references))
|
||||
return self._postprocess(scores)
|
||||
@ -103,7 +107,7 @@ class AccEvaluator(HuggingfaceEvaluator):
|
||||
Returns:
|
||||
dict: postprocessed scores.
|
||||
"""
|
||||
scores["accuracy"] *= 100
|
||||
scores['accuracy'] *= 100
|
||||
return scores
|
||||
|
||||
|
||||
@ -150,7 +154,7 @@ class MccEvaluator(AccEvaluator):
|
||||
Returns:
|
||||
dict: postprocessed scores.
|
||||
"""
|
||||
scores["matthews_correlation"] *= 100
|
||||
scores['matthews_correlation'] *= 100
|
||||
return scores
|
||||
|
||||
|
||||
|
@ -1,4 +1,4 @@
|
||||
from .icl_base_inferencer import BaseInferencer
|
||||
from .icl_gen_inferencer import GenInferencer
|
||||
from .icl_ppl_inferencer import PPLInferencer
|
||||
from .icl_clp_inferencer import CLPInferencer
|
||||
from .icl_base_inferencer import BaseInferencer # noqa
|
||||
from .icl_clp_inferencer import CLPInferencer # noqa
|
||||
from .icl_gen_inferencer import GenInferencer # noqa
|
||||
from .icl_ppl_inferencer import PPLInferencer # noqa
|
||||
|
@ -98,8 +98,8 @@ class CLPInferencer(BaseInferencer):
|
||||
# 3. Generate in-context examples for testing inputs
|
||||
for idx in range(len(ice_idx_list)):
|
||||
ice.append(
|
||||
retriever.generate_ice(
|
||||
ice_idx_list[idx], ice_template=ice_template))
|
||||
retriever.generate_ice(ice_idx_list[idx],
|
||||
ice_template=ice_template))
|
||||
output_handler.save_ice(ice)
|
||||
|
||||
# 4. Collect prompts and calculate conditional log probs
|
||||
@ -165,8 +165,7 @@ class CLPInferencer(BaseInferencer):
|
||||
choice_target_ids.append(prompt_token_num - 1)
|
||||
|
||||
logger.info('Calculating conditional log probability for prompts.')
|
||||
for idx in trange(
|
||||
0,
|
||||
for idx in trange(0,
|
||||
len(prompt_list),
|
||||
self.batch_size,
|
||||
disable=not self.is_main_process):
|
||||
|
@ -25,7 +25,7 @@ class PPLInferencer(BaseInferencer):
|
||||
model (:obj:`BaseModel`, optional): The module to inference.
|
||||
max_seq_len (:obj:`int`): Maximum number of tokenized words allowed by
|
||||
the LM.
|
||||
batch_size (:obj:`int`, optional): Batch size for the :obj:`DataLoader`.
|
||||
batch_size (:obj:`int`, optional): Batch size for the :obj:`DataLoader`
|
||||
output_json_filepath (:obj:`str`, optional): File path for output
|
||||
`JSON` file.
|
||||
output_json_filename (:obj:`str`, optional): File name for output
|
||||
@ -126,7 +126,7 @@ class PPLInferencer(BaseInferencer):
|
||||
label,
|
||||
ice_template=ice_template,
|
||||
prompt_template=prompt_template)
|
||||
prompt_token_num = self.model.get_token_len_from_template(
|
||||
prompt_token_num = self.model.get_token_len_from_template( # noqa
|
||||
prompt, mode='ppl') # noqa
|
||||
|
||||
if normalizing_str is not None:
|
||||
|
@ -1,9 +1,9 @@
|
||||
from .icl_base_retriever import BaseRetriever
|
||||
from .icl_bm25_retriever import BM25Retriever
|
||||
from .icl_dpp_retriever import DPPRetriever
|
||||
from .icl_fix_k_retriever import FixKRetriever
|
||||
from .icl_mdl_retriever import MDLRetriever
|
||||
from .icl_random_retriever import RandomRetriever
|
||||
from .icl_topk_retriever import TopkRetriever
|
||||
from .icl_votek_retriever import VotekRetriever
|
||||
from .icl_zero_retriever import ZeroRetriever
|
||||
from .icl_base_retriever import BaseRetriever # noqa
|
||||
from .icl_bm25_retriever import BM25Retriever # noqa
|
||||
from .icl_dpp_retriever import DPPRetriever # noqa
|
||||
from .icl_fix_k_retriever import FixKRetriever # noqa
|
||||
from .icl_mdl_retriever import MDLRetriever # noqa
|
||||
from .icl_random_retriever import RandomRetriever # noqa
|
||||
from .icl_topk_retriever import TopkRetriever # noqa
|
||||
from .icl_votek_retriever import VotekRetriever # noqa
|
||||
from .icl_zero_retriever import ZeroRetriever # noqa
|
||||
|
@ -1 +1 @@
|
||||
from .logging import *
|
||||
from .logging import * # noqa
|
||||
|
@ -1,104 +0,0 @@
|
||||
import json
|
||||
import os
|
||||
import time
|
||||
|
||||
import numpy as np
|
||||
import openai
|
||||
import requests
|
||||
|
||||
OPENICL_API_NAME_LIST = ['opt-175b', 'gpt3']
|
||||
OPENICL_API_PARAMETER_DICT = {
|
||||
'opt-175b': ['URL', 'headers'],
|
||||
'gpt3': [
|
||||
'engine', 'temperature', 'max_tokens', 'top_p', 'frequency_penalty',
|
||||
'presence_penalty', 'sleep_time'
|
||||
]
|
||||
}
|
||||
OPENICL_API_REQUEST_CONFIG = {
|
||||
'opt-175b': {
|
||||
'URL': '', # http://xxx/completions or http://xxx/generate
|
||||
'headers': {
|
||||
'Content-Type': 'application/json; charset=UTF-8'
|
||||
}
|
||||
},
|
||||
'gpt3': {
|
||||
'engine': 'text-davinci-003',
|
||||
'temperature': 0,
|
||||
'max_tokens': 256,
|
||||
'top_p': 1.0,
|
||||
'frequency_penalty': 0.0,
|
||||
'presence_penalty': 0.0,
|
||||
'sleep_time': 3
|
||||
}
|
||||
}
|
||||
PROXIES = {'https': '', 'http': ''}
|
||||
|
||||
|
||||
def is_api_available(api_name):
|
||||
if api_name is None:
|
||||
return False
|
||||
return True if api_name in OPENICL_API_NAME_LIST else False
|
||||
|
||||
|
||||
def update_openicl_api_request_config(api_name, **kwargs):
|
||||
if api_name is None or not is_api_available(api_name):
|
||||
return
|
||||
|
||||
parameter_list = OPENICL_API_PARAMETER_DICT[api_name]
|
||||
for parameter in parameter_list:
|
||||
if parameter in kwargs.keys():
|
||||
OPENICL_API_REQUEST_CONFIG[api_name][parameter] = kwargs[parameter]
|
||||
|
||||
|
||||
def api_get_ppl(api_name, input_texts):
|
||||
if api_name == 'opt-175b':
|
||||
pyload = {'prompt': input_texts, 'max_tokens': 0, 'echo': True}
|
||||
response = json.loads(
|
||||
requests.post(
|
||||
OPENICL_API_REQUEST_CONFIG[api_name]['URL'],
|
||||
data=json.dumps(pyload),
|
||||
headers=OPENICL_API_REQUEST_CONFIG[api_name]['headers'],
|
||||
proxies=PROXIES).text)
|
||||
lens = np.array(
|
||||
[len(r['logprobs']['tokens']) for r in response['choices']])
|
||||
ce_loss = np.array([
|
||||
-sum(r['logprobs']['token_logprobs']) for r in response['choices']
|
||||
])
|
||||
return ce_loss / lens
|
||||
|
||||
if api_name == 'gpt3':
|
||||
raise NotImplementedError("GPT-3 API doesn't support PPL calculation")
|
||||
|
||||
|
||||
def api_get_tokens(api_name, input_texts):
|
||||
length_list = [len(text) for text in input_texts]
|
||||
|
||||
if api_name == 'opt-175b':
|
||||
pyload = {'prompt': input_texts, 'max_tokens': 100, 'echo': True}
|
||||
response = json.loads(
|
||||
requests.post(
|
||||
OPENICL_API_REQUEST_CONFIG[api_name]['URL'],
|
||||
data=json.dumps(pyload),
|
||||
headers=OPENICL_API_REQUEST_CONFIG[api_name]['headers'],
|
||||
proxies=PROXIES).text)
|
||||
return [r['text'] for r in response['choices']], [
|
||||
r['text'][length:]
|
||||
for r, length in zip(response['choices'], length_list)
|
||||
]
|
||||
|
||||
if api_name == 'gpt3':
|
||||
openai.api_key = os.getenv('OPENAI_API_KEY')
|
||||
response = openai.Completion.create(
|
||||
engine=OPENICL_API_REQUEST_CONFIG['gpt3']['engine'],
|
||||
prompt=input_texts,
|
||||
temperature=OPENICL_API_REQUEST_CONFIG['gpt3']['temperature'],
|
||||
max_tokens=OPENICL_API_REQUEST_CONFIG['gpt3']['max_tokens'],
|
||||
top_p=OPENICL_API_REQUEST_CONFIG['gpt3']['top_p'],
|
||||
frequency_penalty=OPENICL_API_REQUEST_CONFIG['gpt3']
|
||||
['frequency_penalty'],
|
||||
presence_penalty=OPENICL_API_REQUEST_CONFIG['gpt3']
|
||||
['presence_penalty'])
|
||||
time.sleep(OPENICL_API_REQUEST_CONFIG['gpt3']['sleep_time'])
|
||||
return [(input + r['text'])
|
||||
for r, input in zip(response['choices'], input_texts)
|
||||
], [r['text'] for r in response['choices']]
|
@ -11,7 +11,7 @@ def _check_type_list(obj, typelist: List):
|
||||
elif isinstance(obj, _type):
|
||||
return obj
|
||||
raise TypeError(
|
||||
f'Expected an object in {[_.__name__ if _ is not None else None for _ in typelist]} type, but got {obj}'
|
||||
f'Expected an object in {[_.__name__ if _ is not None else None for _ in typelist]} type, but got {obj}' # noqa
|
||||
)
|
||||
|
||||
|
||||
@ -20,7 +20,7 @@ def _check_dataset(obj) -> Union[Dataset, DatasetDict]:
|
||||
return obj
|
||||
else:
|
||||
raise TypeError(
|
||||
f'Expected a datasets.Dataset or a datasets.DatasetDict object, but got {obj}'
|
||||
f'Expected a datasets.Dataset or a datasets.DatasetDict object, but got {obj}' # noqa
|
||||
)
|
||||
|
||||
|
||||
|
@ -1,26 +1,24 @@
|
||||
accelerate>=0.19.0
|
||||
boto3
|
||||
colossalai
|
||||
cpm_kernels
|
||||
datasets>=2.12.0
|
||||
evaluate>=0.3.0
|
||||
fairscale
|
||||
faiss_gpu==1.7.2
|
||||
jieba
|
||||
mmengine
|
||||
nltk==3.8
|
||||
numpy==1.23.4
|
||||
openai==0.27.1
|
||||
openai
|
||||
pandas<2.0.0
|
||||
rank_bm25==0.2.2
|
||||
requests==2.28.1
|
||||
scikit_learn==1.2.1
|
||||
sentence_transformers==2.2.2
|
||||
tabulate
|
||||
tiktoken
|
||||
tokenizers>=0.13.3
|
||||
torch>=1.13.1
|
||||
tqdm==4.64.1
|
||||
transformers>=4.29.1
|
||||
openai
|
||||
mmengine
|
||||
jieba
|
||||
pandas<2.0.0
|
||||
cpm_kernels
|
||||
tokenizers>=0.13.3
|
||||
tabulate
|
||||
fairscale
|
||||
colossalai
|
||||
tabulate
|
||||
boto3
|
||||
tiktoken
|
3
setup.py
3
setup.py
@ -13,6 +13,7 @@ class DownloadNLTK(install):
|
||||
with open('README_zh-CN.md') as f:
|
||||
readme = f.read()
|
||||
|
||||
|
||||
def parse_requirements(fname='requirements.txt', with_version=True):
|
||||
"""Parse the package dependencies listed in a requirements file but strips
|
||||
specific versioning information.
|
||||
@ -91,12 +92,14 @@ def parse_requirements(fname='requirements.txt', with_version=True):
|
||||
packages = list(gen_packages_items())
|
||||
return packages
|
||||
|
||||
|
||||
def get_version():
|
||||
version_file = 'opencompass/__init__.py'
|
||||
with open(version_file, 'r', encoding='utf-8') as f:
|
||||
exec(compile(f.read(), version_file, 'exec'))
|
||||
return locals()['__version__']
|
||||
|
||||
|
||||
def do_setup():
|
||||
setup(
|
||||
name='opencompass',
|
||||
|
Loading…
Reference in New Issue
Block a user