Merge branch 'open-compass:main' into main

This commit is contained in:
bittersweet1999 2024-09-14 14:44:08 +08:00 committed by GitHub
commit 5c5d5c119c
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
29 changed files with 565 additions and 7 deletions

View File

@ -0,0 +1,63 @@
from opencompass.openicl.icl_prompt_template import PromptTemplate
from opencompass.openicl.icl_retriever import ZeroRetriever, FixKRetriever
from opencompass.openicl.icl_inferencer import PPLInferencer
from opencompass.openicl.icl_evaluator import AccEvaluator
from opencompass.datasets import ARCDataset
ARC_c_reader_cfg = dict(
input_columns=['question', 'textA', 'textB', 'textC', 'textD'],
output_column='answerKey',
)
ARC_c_infer_cfg = dict(
ice_template=dict(
type=PromptTemplate,
template={
'A': dict(
begin='</E>',
round=[
dict(role='HUMAN', prompt='Question: {question}\nAnswer: '),
dict(role='BOT', prompt='{textA}'),
],
),
'B': dict(
begin='</E>',
round=[
dict(role='HUMAN', prompt='Question: {question}\nAnswer: '),
dict(role='BOT', prompt='{textB}'),
],
),
'C': dict(
begin='</E>',
round=[
dict(role='HUMAN', prompt='Question: {question}\nAnswer: '),
dict(role='BOT', prompt='{textC}'),
],
),
'D': dict(
begin='</E>',
round=[
dict(role='HUMAN', prompt='Question: {question}\nAnswer: '),
dict(role='BOT', prompt='{textD}'),
],
),
},
ice_token='</E>',
),
retriever=dict(type=FixKRetriever, fix_id_list=[0, 2, 4, 6, 8]),
inferencer=dict(type=PPLInferencer),
)
ARC_c_eval_cfg = dict(evaluator=dict(type=AccEvaluator))
ARC_c_datasets = [
dict(
type=ARCDataset,
abbr='ARC-c',
path='opencompass/ai2_arc-dev',
name='ARC-Challenge',
reader_cfg=ARC_c_reader_cfg,
infer_cfg=ARC_c_infer_cfg,
eval_cfg=ARC_c_eval_cfg,
)
]

View File

@ -0,0 +1,47 @@
from opencompass.openicl.icl_prompt_template import PromptTemplate
from opencompass.openicl.icl_retriever import ZeroRetriever, FixKRetriever
from opencompass.openicl.icl_evaluator import AccEvaluator
from opencompass.datasets import BoolQDatasetV2
from opencompass.openicl.icl_inferencer import PPLInferencer
from opencompass.openicl.icl_evaluator import AccEvaluator
BoolQ_reader_cfg = dict(
input_columns=['question', 'passage'],
output_column='label',
)
BoolQ_infer_cfg = dict(
ice_template=dict(
type=PromptTemplate,
template={
'B': dict(
round=[
dict(role='HUMAN', prompt='{passage}\nQuestion: {question}?'),
dict(role='BOT', prompt='No'),
]
),
'A': dict(
round=[
dict(role='HUMAN', prompt='{passage}\nQuestion: {question}?'),
dict(role='BOT', prompt='Yes'),
]
),
},
ice_token='</E>',
),
retriever=dict(type=FixKRetriever, fix_id_list=[0, 2, 4, 6, 8]),
inferencer=dict(type=PPLInferencer, max_out_len=50),
)
BoolQ_eval_cfg = dict(evaluator=dict(type=AccEvaluator))
BoolQ_datasets = [
dict(
abbr='BoolQ',
type=BoolQDatasetV2,
path='opencompass/boolq',
reader_cfg=BoolQ_reader_cfg,
infer_cfg=BoolQ_infer_cfg,
eval_cfg=BoolQ_eval_cfg,
)
]

View File

@ -0,0 +1,57 @@
from opencompass.openicl.icl_prompt_template import PromptTemplate
from opencompass.openicl.icl_retriever import ZeroRetriever, FixKRetriever
from opencompass.openicl.icl_inferencer import PPLInferencer
from opencompass.openicl.icl_evaluator import AccEvaluator
from opencompass.datasets import RaceDataset
race_reader_cfg = dict(
input_columns=['article', 'question', 'A', 'B', 'C', 'D'],
output_column='answer',
train_split='validation',
test_split='test',
)
race_infer_cfg = dict(
ice_template=dict(
type=PromptTemplate,
template={
ans: dict(
begin='</E>',
round=[
dict(
role='HUMAN',
prompt='Article:\n{article}\nQuestion:\n{question}\nA. {A}\nB. {B}\nC. {C}\nD. {D}',
),
dict(role='BOT', prompt=f'Answer: {ans}'),
],
)
for ans in ['A', 'B', 'C', 'D']
},
ice_token='</E>',
),
retriever=dict(type=FixKRetriever, fix_id_list=[0, 2, 4]),
inferencer=dict(type=PPLInferencer),
)
race_eval_cfg = dict(evaluator=dict(type=AccEvaluator))
race_datasets = [
dict(
abbr='race-middle',
type=RaceDataset,
path='opencompass/race',
name='middle',
reader_cfg=race_reader_cfg,
infer_cfg=race_infer_cfg,
eval_cfg=race_eval_cfg,
),
dict(
abbr='race-high',
type=RaceDataset,
path='opencompass/race',
name='high',
reader_cfg=race_reader_cfg,
infer_cfg=race_infer_cfg,
eval_cfg=race_eval_cfg,
),
]

View File

@ -0,0 +1,13 @@
from opencompass.models import HuggingFacewithChatTemplate
models = [
dict(
type=HuggingFacewithChatTemplate,
abbr='llama-3_1-70b-instruct-hf',
path='meta-llama/Meta-Llama-3.1-70B-Instruct',
max_out_len=1024,
batch_size=8,
run_cfg=dict(num_gpus=4),
stop_words=['<|end_of_text|>', '<|eot_id|>'],
)
]

View File

@ -0,0 +1,13 @@
from opencompass.models import HuggingFacewithChatTemplate
models = [
dict(
type=HuggingFacewithChatTemplate,
abbr='llama-3_1-8b-instruct-hf',
path='meta-llama/Meta-Llama-3.1-8B-Instruct',
max_out_len=1024,
batch_size=8,
run_cfg=dict(num_gpus=1),
stop_words=['<|end_of_text|>', '<|eot_id|>'],
)
]

View File

@ -0,0 +1,16 @@
from opencompass.models import TurboMindModelwithChatTemplate
models = [
dict(
type=TurboMindModelwithChatTemplate,
abbr='llama-3_1-70b-instruct-turbomind',
path='meta-llama/Meta-Llama-3.1-70B-Instruct',
engine_config=dict(max_batch_size=16, tp=4),
gen_config=dict(top_k=1, temperature=1e-6, top_p=0.9, max_new_tokens=1024),
max_seq_len=7168,
max_out_len=1024,
batch_size=16,
run_cfg=dict(num_gpus=4),
stop_words=['<|end_of_text|>', '<|eot_id|>'],
)
]

View File

@ -3,7 +3,7 @@ from opencompass.models import TurboMindModelwithChatTemplate
models = [ models = [
dict( dict(
type=TurboMindModelwithChatTemplate, type=TurboMindModelwithChatTemplate,
abbr='llama-3.1-8b-instruct-turbomind', abbr='llama-3_1-8b-instruct-turbomind',
path='meta-llama/Meta-Llama-3.1-8B-Instruct', path='meta-llama/Meta-Llama-3.1-8B-Instruct',
engine_config=dict(max_batch_size=16, tp=1), engine_config=dict(max_batch_size=16, tp=1),
gen_config=dict(top_k=1, temperature=1e-6, top_p=0.9, max_new_tokens=1024), gen_config=dict(top_k=1, temperature=1e-6, top_p=0.9, max_new_tokens=1024),

View File

@ -0,0 +1,15 @@
from opencompass.models import VLLMwithChatTemplate
models = [
dict(
type=VLLMwithChatTemplate,
abbr='mixtral-large-instruct-2407-vllm',
path='mistralai/Mistral-Large-Instruct-2407',
model_kwargs=dict(tensor_parallel_size=8),
max_out_len=256,
batch_size=16,
generation_kwargs=dict(temperature=0),
run_cfg=dict(num_gpus=8),
)
]

View File

@ -0,0 +1,15 @@
from opencompass.models import HuggingFacewithChatTemplate
models = [
dict(
type=HuggingFacewithChatTemplate,
abbr='MiniCPM3-4B-hf',
path='openbmb/MiniCPM3-4B',
max_out_len=1024,
batch_size=8,
run_cfg=dict(num_gpus=1),
model_kwargs=dict(
torch_dtype='torch.bfloat16',
),
)
]

View File

@ -0,0 +1,12 @@
from opencompass.models import HuggingFacewithChatTemplate
models = [
dict(
type=HuggingFacewithChatTemplate,
abbr='phi-3-5-MoE-instruct-hf',
path='microsoft/Phi-3.5-MoE-instruct',
max_out_len=1024,
batch_size=8,
run_cfg=dict(num_gpus=1),
)
]

View File

@ -0,0 +1,12 @@
from opencompass.models import HuggingFacewithChatTemplate
models = [
dict(
type=HuggingFacewithChatTemplate,
abbr='phi-3-5-mini-instruct-hf',
path='microsoft/Phi-3.5-mini-instruct',
max_out_len=1024,
batch_size=8,
run_cfg=dict(num_gpus=1),
)
]

View File

@ -0,0 +1,5 @@
humanevalx_summary_groups = []
_humanevalx_all = ['python', 'cpp', 'go', 'java', 'js']
_humanevalx_all = ['humanevalx-' + d for d in _humanevalx_all]
humanevalx_summary_groups.append({'name': 'humanevalx', 'subsets': _humanevalx_all})

View File

@ -0,0 +1,63 @@
from opencompass.openicl.icl_prompt_template import PromptTemplate
from opencompass.openicl.icl_retriever import ZeroRetriever, FixKRetriever
from opencompass.openicl.icl_inferencer import PPLInferencer
from opencompass.openicl.icl_evaluator import AccEvaluator
from opencompass.datasets import ARCDataset
ARC_c_reader_cfg = dict(
input_columns=['question', 'textA', 'textB', 'textC', 'textD'],
output_column='answerKey',
)
ARC_c_infer_cfg = dict(
ice_template=dict(
type=PromptTemplate,
template={
'A': dict(
begin='</E>',
round=[
dict(role='HUMAN', prompt='Question: {question}\nAnswer: '),
dict(role='BOT', prompt='{textA}'),
],
),
'B': dict(
begin='</E>',
round=[
dict(role='HUMAN', prompt='Question: {question}\nAnswer: '),
dict(role='BOT', prompt='{textB}'),
],
),
'C': dict(
begin='</E>',
round=[
dict(role='HUMAN', prompt='Question: {question}\nAnswer: '),
dict(role='BOT', prompt='{textC}'),
],
),
'D': dict(
begin='</E>',
round=[
dict(role='HUMAN', prompt='Question: {question}\nAnswer: '),
dict(role='BOT', prompt='{textD}'),
],
),
},
ice_token='</E>',
),
retriever=dict(type=FixKRetriever, fix_id_list=[0, 2, 4, 6, 8]),
inferencer=dict(type=PPLInferencer),
)
ARC_c_eval_cfg = dict(evaluator=dict(type=AccEvaluator))
ARC_c_datasets = [
dict(
type=ARCDataset,
abbr='ARC-c',
path='opencompass/ai2_arc-dev',
name='ARC-Challenge',
reader_cfg=ARC_c_reader_cfg,
infer_cfg=ARC_c_infer_cfg,
eval_cfg=ARC_c_eval_cfg,
)
]

View File

@ -0,0 +1,47 @@
from opencompass.openicl.icl_prompt_template import PromptTemplate
from opencompass.openicl.icl_retriever import ZeroRetriever, FixKRetriever
from opencompass.openicl.icl_evaluator import AccEvaluator
from opencompass.datasets import BoolQDatasetV2
from opencompass.openicl.icl_inferencer import PPLInferencer
from opencompass.openicl.icl_evaluator import AccEvaluator
BoolQ_reader_cfg = dict(
input_columns=['question', 'passage'],
output_column='label',
)
BoolQ_infer_cfg = dict(
ice_template=dict(
type=PromptTemplate,
template={
'B': dict(
round=[
dict(role='HUMAN', prompt='{passage}\nQuestion: {question}?'),
dict(role='BOT', prompt='No'),
]
),
'A': dict(
round=[
dict(role='HUMAN', prompt='{passage}\nQuestion: {question}?'),
dict(role='BOT', prompt='Yes'),
]
),
},
ice_token='</E>',
),
retriever=dict(type=FixKRetriever, fix_id_list=[0, 2, 4, 6, 8]),
inferencer=dict(type=PPLInferencer, max_out_len=50),
)
BoolQ_eval_cfg = dict(evaluator=dict(type=AccEvaluator))
BoolQ_datasets = [
dict(
abbr='BoolQ',
type=BoolQDatasetV2,
path='opencompass/boolq',
reader_cfg=BoolQ_reader_cfg,
infer_cfg=BoolQ_infer_cfg,
eval_cfg=BoolQ_eval_cfg,
)
]

View File

@ -0,0 +1,57 @@
from opencompass.openicl.icl_prompt_template import PromptTemplate
from opencompass.openicl.icl_retriever import ZeroRetriever, FixKRetriever
from opencompass.openicl.icl_inferencer import PPLInferencer
from opencompass.openicl.icl_evaluator import AccEvaluator
from opencompass.datasets import RaceDataset
race_reader_cfg = dict(
input_columns=['article', 'question', 'A', 'B', 'C', 'D'],
output_column='answer',
train_split='validation',
test_split='test',
)
race_infer_cfg = dict(
ice_template=dict(
type=PromptTemplate,
template={
ans: dict(
begin='</E>',
round=[
dict(
role='HUMAN',
prompt='Article:\n{article}\nQuestion:\n{question}\nA. {A}\nB. {B}\nC. {C}\nD. {D}',
),
dict(role='BOT', prompt=f'Answer: {ans}'),
],
)
for ans in ['A', 'B', 'C', 'D']
},
ice_token='</E>',
),
retriever=dict(type=FixKRetriever, fix_id_list=[0, 2, 4]),
inferencer=dict(type=PPLInferencer),
)
race_eval_cfg = dict(evaluator=dict(type=AccEvaluator))
race_datasets = [
dict(
abbr='race-middle',
type=RaceDataset,
path='opencompass/race',
name='middle',
reader_cfg=race_reader_cfg,
infer_cfg=race_infer_cfg,
eval_cfg=race_eval_cfg,
),
dict(
abbr='race-high',
type=RaceDataset,
path='opencompass/race',
name='high',
reader_cfg=race_reader_cfg,
infer_cfg=race_infer_cfg,
eval_cfg=race_eval_cfg,
),
]

View File

@ -0,0 +1,13 @@
from opencompass.models import HuggingFacewithChatTemplate
models = [
dict(
type=HuggingFacewithChatTemplate,
abbr='llama-3_1-70b-instruct-hf',
path='meta-llama/Meta-Llama-3.1-70B-Instruct',
max_out_len=1024,
batch_size=8,
run_cfg=dict(num_gpus=4),
stop_words=['<|end_of_text|>', '<|eot_id|>'],
)
]

View File

@ -0,0 +1,13 @@
from opencompass.models import HuggingFacewithChatTemplate
models = [
dict(
type=HuggingFacewithChatTemplate,
abbr='llama-3_1-8b-instruct-hf',
path='meta-llama/Meta-Llama-3.1-8B-Instruct',
max_out_len=1024,
batch_size=8,
run_cfg=dict(num_gpus=1),
stop_words=['<|end_of_text|>', '<|eot_id|>'],
)
]

View File

@ -0,0 +1,16 @@
from opencompass.models import TurboMindModelwithChatTemplate
models = [
dict(
type=TurboMindModelwithChatTemplate,
abbr='llama-3_1-70b-instruct-turbomind',
path='meta-llama/Meta-Llama-3.1-70B-Instruct',
engine_config=dict(max_batch_size=16, tp=4),
gen_config=dict(top_k=1, temperature=1e-6, top_p=0.9, max_new_tokens=1024),
max_seq_len=7168,
max_out_len=1024,
batch_size=16,
run_cfg=dict(num_gpus=4),
stop_words=['<|end_of_text|>', '<|eot_id|>'],
)
]

View File

@ -3,7 +3,7 @@ from opencompass.models import TurboMindModelwithChatTemplate
models = [ models = [
dict( dict(
type=TurboMindModelwithChatTemplate, type=TurboMindModelwithChatTemplate,
abbr='llama-3.1-8b-instruct-turbomind', abbr='llama-3_1-8b-instruct-turbomind',
path='meta-llama/Meta-Llama-3.1-8B-Instruct', path='meta-llama/Meta-Llama-3.1-8B-Instruct',
engine_config=dict(max_batch_size=16, tp=1), engine_config=dict(max_batch_size=16, tp=1),
gen_config=dict(top_k=1, temperature=1e-6, top_p=0.9, max_new_tokens=1024), gen_config=dict(top_k=1, temperature=1e-6, top_p=0.9, max_new_tokens=1024),

View File

@ -0,0 +1,15 @@
from opencompass.models import VLLMwithChatTemplate
models = [
dict(
type=VLLMwithChatTemplate,
abbr='mixtral-large-instruct-2407-vllm',
path='mistralai/Mistral-Large-Instruct-2407',
model_kwargs=dict(tensor_parallel_size=8),
max_out_len=256,
batch_size=16,
generation_kwargs=dict(temperature=0),
run_cfg=dict(num_gpus=8),
)
]

View File

@ -0,0 +1,15 @@
from opencompass.models import HuggingFacewithChatTemplate
models = [
dict(
type=HuggingFacewithChatTemplate,
abbr='MiniCPM3-4B-hf',
path='openbmb/MiniCPM3-4B',
max_out_len=1024,
batch_size=8,
run_cfg=dict(num_gpus=1),
model_kwargs=dict(
torch_dtype='torch.bfloat16',
),
)
]

View File

@ -0,0 +1,12 @@
from opencompass.models import HuggingFacewithChatTemplate
models = [
dict(
type=HuggingFacewithChatTemplate,
abbr='phi-3-5-MoE-instruct-hf',
path='microsoft/Phi-3.5-MoE-instruct',
max_out_len=1024,
batch_size=8,
run_cfg=dict(num_gpus=1),
)
]

View File

@ -0,0 +1,12 @@
from opencompass.models import HuggingFacewithChatTemplate
models = [
dict(
type=HuggingFacewithChatTemplate,
abbr='phi-3-5-mini-instruct-hf',
path='microsoft/Phi-3.5-mini-instruct',
max_out_len=1024,
batch_size=8,
run_cfg=dict(num_gpus=1),
)
]

View File

@ -0,0 +1,5 @@
humanevalx_summary_groups = []
_humanevalx_all = ['python', 'cpp', 'go', 'java', 'js']
_humanevalx_all = ['humanevalx-' + d for d in _humanevalx_all]
humanevalx_summary_groups.append({'name': 'humanevalx', 'subsets': _humanevalx_all})

View File

@ -366,7 +366,7 @@ class DS1000ServiceEvaluator(BaseEvaluator):
def __init__(self, def __init__(self,
lib: str, lib: str,
ip_address='localhost', ip_address='localhost',
port=5000, port='',
timeout=600) -> None: timeout=600) -> None:
assert lib in _LIBRARY_NAME_LIST, ( assert lib in _LIBRARY_NAME_LIST, (
f' lib must be in {_LIBRARY_NAME_LIST}') f' lib must be in {_LIBRARY_NAME_LIST}')
@ -421,9 +421,14 @@ class DS1000ServiceEvaluator(BaseEvaluator):
Returns: Returns:
tuple[bool, str]: Whether the access is successful and the output. tuple[bool, str]: Whether the access is successful and the output.
""" """
if self.port:
eval_server_url = f'{self.ip_address}:{self.port}/evaluate'
else:
eval_server_url = f'{self.ip_address}/evaluate'
exec_result = subprocess.run([ exec_result = subprocess.run([
'curl', '-X', 'POST', '-F', f'file=@{file_path}', 'curl', '-X', 'POST', '-F', f'file=@{file_path}',
f'{self.ip_address}:{self.port}/evaluate' f'{eval_server_url}'
], ],
timeout=self.timeout, timeout=self.timeout,
capture_output=True) capture_output=True)

View File

@ -76,7 +76,7 @@ class HumanevalXEvaluator(BaseEvaluator):
def __init__(self, def __init__(self,
language, language,
ip_address='localhost', ip_address='localhost',
port=5000, port='',
retry=2, retry=2,
timeout=600) -> None: timeout=600) -> None:
assert language in _LANGUAGE_NAME_DICT.keys(), ( assert language in _LANGUAGE_NAME_DICT.keys(), (
@ -141,10 +141,13 @@ class HumanevalXEvaluator(BaseEvaluator):
f'\nError Information: {output}') f'\nError Information: {output}')
def _code_eval_service(self, file_path): def _code_eval_service(self, file_path):
if self.port:
eval_server_url = f'{self.ip_address}:{self.port}/evaluate'
else:
eval_server_url = f'{self.ip_address}/evaluate'
exec_result = subprocess.run([ exec_result = subprocess.run([
'curl', '-X', 'POST', '-F', f'file=@{file_path}', '-F', 'curl', '-X', 'POST', '-F', f'file=@{file_path}', '-F',
f'dataset=humanevalx/{self.language}', f'dataset=humanevalx/{self.language}', f'{eval_server_url}'
f'{self.ip_address}:{self.port}/evaluate'
], ],
timeout=self.timeout, timeout=self.timeout,
capture_output=True) capture_output=True)

View File

@ -86,6 +86,8 @@ class LmdeployPytorchModel(BaseModel):
for token_id in generation_config.eos_token_id: for token_id in generation_config.eos_token_id:
stop_words.append(token_id) stop_words.append(token_id)
gen_config.stop_words = stop_words gen_config.stop_words = stop_words
if version_info >= (0, 6, 0):
gen_config.stop_token_ids = stop_words
self.gen_config = gen_config self.gen_config = gen_config
self.end_str = end_str self.end_str = end_str
self.major_version, self.minor_version = version_info[:2] self.major_version, self.minor_version = version_info[:2]

View File

@ -126,6 +126,7 @@ class TurboMindModelwithChatTemplate(BaseModel):
'top_k': 1, 'top_k': 1,
'stop_words': encode_stop_words, 'stop_words': encode_stop_words,
} }
gen_config = copy.deepcopy(DEFAULT_GEN_CONFIG) gen_config = copy.deepcopy(DEFAULT_GEN_CONFIG)
gen_config.update(self.gen_config) gen_config.update(self.gen_config)
if do_sample: if do_sample:
@ -134,6 +135,9 @@ class TurboMindModelwithChatTemplate(BaseModel):
from lmdeploy.messages import GenerationConfig from lmdeploy.messages import GenerationConfig
gen_config = GenerationConfig(**gen_config) gen_config = GenerationConfig(**gen_config)
if self.version_info >= (0, 6, 0):
gen_config.stop_words = stop_words
gen_config.convert_stop_bad_words_to_ids(self.tokenizer)
results = [] results = []
for batch_message in batch_messages: for batch_message in batch_messages:

View File

@ -340,6 +340,14 @@ DATASETS_URL = {
"url": "http://opencompass.oss-cn-shanghai.aliyuncs.com/datasets/data/humaneval.zip", "url": "http://opencompass.oss-cn-shanghai.aliyuncs.com/datasets/data/humaneval.zip",
"md5": "88b1b89dc47b7121c81da6bcd85a69c3", "md5": "88b1b89dc47b7121c81da6bcd85a69c3",
}, },
"/humanevalx": {
"url": "http://opencompass.oss-cn-shanghai.aliyuncs.com/datasets/data/humanevalx.zip",
"md5": "22930355c03fb73fb5bae14b50f1deb9",
},
"/ds1000_data": {
"url": "http://opencompass.oss-cn-shanghai.aliyuncs.com/datasets/data/ds1000_data.zip",
"md5": "1a4990aec04a2fd73ccfad12e2d43b43",
},
"/drop_simple_eval/": { "/drop_simple_eval/": {
"url": "http://opencompass.oss-cn-shanghai.aliyuncs.com/datasets/data/drop_simple_eval.zip", "url": "http://opencompass.oss-cn-shanghai.aliyuncs.com/datasets/data/drop_simple_eval.zip",
"md5": "c912afe5b4a63509851cf16e6b91830e", "md5": "c912afe5b4a63509851cf16e6b91830e",