[Feature] Add multi-prompt generation demo (#568)

* [Feature] Add multi-prompt generation demo

* [Fix] change form in winogrande_gen_XXX.py

* [Fix] make multi prompt demo more directly

* [Fix] fix bug

* [Fix] minor fix

---------

Co-authored-by: yingfhu <yingfhu@gmail.com>
This commit is contained in:
Jingming 2023-11-20 16:16:37 +08:00 committed by GitHub
parent 91fba2c2e9
commit 5e75e29711
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
3 changed files with 115 additions and 10 deletions

View File

@ -0,0 +1,49 @@
from opencompass.openicl.icl_prompt_template import PromptTemplate
from opencompass.openicl.icl_retriever import ZeroRetriever
from opencompass.openicl.icl_inferencer import GenInferencer
from opencompass.openicl.icl_evaluator import AccEvaluator
from opencompass.datasets import winograndeDataset_V2
from opencompass.utils.text_postprocessors import first_option_postprocess
winogrande_reader_cfg = dict(
input_columns=["opt1", "opt2"],
output_column="answer",
)
winogrande_eval_cfg = dict(
evaluator=dict(type=AccEvaluator),
pred_role="BOT",
pred_postprocessor=dict(type=first_option_postprocess, options='AB'),
)
_winogrande_prompt = dict(
prompt_1="Which of the following is a good sentence:\nA. {opt1}\nB. {opt2}\nAnswer:",
prompt_2="Which is a good sentence out of the following:\nA. {opt1}\nB. {opt2}\nAnswer:",
prompt_3="Can you identify a good sentence from the following:\nA. {opt1}\nB. {opt2}\nAnswer:",
)
winogrande_datasets = []
for _choice in _winogrande_prompt:
winogrande_datasets.append(
dict(
abbr='winogrande_'+_choice,
type=winograndeDataset_V2,
path="./data/winogrande",
reader_cfg=winogrande_reader_cfg,
infer_cfg=dict(
prompt_template=dict(
type=PromptTemplate,
template=dict(round=[
dict(
role="HUMAN",
prompt=_winogrande_prompt[_choice]
),
]),
),
retriever=dict(type=ZeroRetriever),
inferencer=dict(type=GenInferencer),
),
eval_cfg=winogrande_eval_cfg),
)
del _choice

View File

@ -0,0 +1,48 @@
from mmengine.config import read_base
from opencompass.models import HuggingFaceCausalLM
with read_base():
from .datasets.winogrande.winogrande_gen_a027b6 import winogrande_datasets
datasets = [*winogrande_datasets]
_meta_template = dict(
round=[
dict(role='HUMAN', begin='<|User|>:', end='<eoh>\n'),
dict(role='BOT', begin='<|Bot|>:', end='<eoa>\n', generate=True),
],
)
models=[
dict(
type=HuggingFaceCausalLM,
abbr='internlm-chat-7b-hf',
path="internlm/internlm-chat-7b",
tokenizer_path='internlm/internlm-chat-7b',
tokenizer_kwargs=dict(
padding_side='left',
truncation_side='left',
use_fast=False,
trust_remote_code=True,
),
max_out_len=100,
max_seq_len=2048,
batch_size=8,
meta_template=_meta_template,
model_kwargs=dict(
trust_remote_code=True,
device_map='auto',
),
run_cfg=dict(num_gpus=1, num_procs=1),
)
]
_winogrande_all = [d['abbr'] for d in winogrande_datasets]
summarizer = dict(
summary_groups=[
{'name': 'winogrande', 'subsets': _winogrande_all},
{'name': 'winogrande_std', 'subsets': _winogrande_all, 'std': True},
]
)

View File

@ -1,6 +1,7 @@
# flake8: noqa # flake8: noqa
# yapf: disable # yapf: disable
import getpass import getpass
import math
import os.path as osp import os.path as osp
from datetime import datetime from datetime import datetime
from typing import List, Optional from typing import List, Optional
@ -127,6 +128,12 @@ class DefaultSummarizer:
results[dataset_abbr] = parsed_results[model_abbr][dataset_abbr][0] results[dataset_abbr] = parsed_results[model_abbr][dataset_abbr][0]
eval_modes.append(dataset_eval_mode.get(dataset_abbr, 'unknown')) eval_modes.append(dataset_eval_mode.get(dataset_abbr, 'unknown'))
if len(results) == len(sg['subsets']): if len(results) == len(sg['subsets']):
if 'std' in sg and sg['std'] == True:
avg = sum(results[k] for k in results) / len(results)
variance = sum((results[k] - avg)**2 for k in results) / len(results)
metric = 'standard_deviation'
results[metric] = math.sqrt(variance)
else:
if 'weights' in sg: if 'weights' in sg:
numerator = sum(results[k] * sg['weights'][k] for k in sg['weights']) numerator = sum(results[k] * sg['weights'][k] for k in sg['weights'])
denominator = sum(sg['weights'].values()) denominator = sum(sg['weights'].values())
@ -136,12 +143,13 @@ class DefaultSummarizer:
denominator = len(results) denominator = len(results)
metric = 'naive_average' metric = 'naive_average'
results[metric] = numerator / denominator results[metric] = numerator / denominator
eval_modes = list(set(eval_modes)) eval_modes = list(set(eval_modes))
eval_mode = eval_modes[0] if len(eval_modes) == 1 else 'mixed' eval_mode = eval_modes[0] if len(eval_modes) == 1 else 'mixed'
# add to global results # add to global results
raw_results[model_abbr][sg['name']] = results raw_results[model_abbr][sg['name']] = results
parsed_results[model_abbr][sg['name']] = [numerator / denominator] parsed_results[model_abbr][sg['name']] = [results[metric]]
dataset_metrics[sg['name']] = [metric] dataset_metrics[sg['name']] = [metric]
dataset_eval_mode[sg['name']] = eval_mode dataset_eval_mode[sg['name']] = eval_mode
elif len(results) == 0: elif len(results) == 0: