[Feature] add one script for subjective (#993)

* add one script for subjective

* add one script for subjective

* add one script for subjective

* add one script for subjective

---------

Co-authored-by: thebestannie <1290646445@qq.com>
This commit is contained in:
bittersweet1999 2024-03-20 23:20:41 +08:00 committed by GitHub
parent 0221d30877
commit 054e9fa7e5
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
5 changed files with 179 additions and 0 deletions

View File

@ -0,0 +1,21 @@
from mmengine.config import read_base
with read_base():
from ..datasets.subjective.alignbench.alignbench_judgeby_critiquellm import subjective_datasets
from .model_cfg import models, judge_model, given_pred, infer, gpt4, runner
from opencompass.partitioners.sub_naive import SubjectiveNaivePartitioner
from opencompass.partitioners.sub_size import SubjectiveSizePartitioner
from opencompass.summarizers import AlignmentBenchSummarizer
# -------------Inference Stage ----------------------------------------
# For subjective evaluation, we often set do sample for models
datasets = [*subjective_datasets]
eval = dict(
partitioner=dict(
type=SubjectiveNaivePartitioner, mode='singlescore', models=models
),
runner=runner,
)
summarizer = dict(type=AlignmentBenchSummarizer, judge_type='general')
work_dir = 'outputs/alignment_bench/'

View File

@ -0,0 +1,20 @@
from mmengine.config import read_base
with read_base():
from ..datasets.subjective.alpaca_eval.alpacav1_judgeby_gpt4 import subjective_datasets as alpacav1
from ..datasets.subjective.alpaca_eval.alpacav2_judgeby_gpt4 import subjective_datasets as alpacav2
from .model_cfg import models, judge_model, given_pred, infer, gpt4, runner
from opencompass.partitioners.sub_naive import SubjectiveNaivePartitioner
from opencompass.partitioners.sub_size import SubjectiveSizePartitioner
from opencompass.summarizers import AlpacaSummarizer
datasets = [*alpacav2]
eval = dict(
partitioner=dict(
type=SubjectiveSizePartitioner, max_task_size=1000, mode='m2n', base_models=[gpt4], compare_models=models
),
runner=runner,
given_pred=given_pred
)
work_dir = 'outputs/alpaca/'
summarizer = dict(type=AlpacaSummarizer, judge_type='v2')

View File

@ -0,0 +1,28 @@
from os import getenv as gv
from opencompass.models import HuggingFaceCausalLM
from mmengine.config import read_base
with read_base():
from ..datasets.subjective.compassarena.compassarena_compare import subjective_datasets
from .model_cfg import models, judge_model, given_pred, infer, gpt4, runner
from opencompass.partitioners.sub_naive import SubjectiveNaivePartitioner
from opencompass.partitioners.sub_size import SubjectiveSizePartitioner
from opencompass.summarizers import CompassArenaSummarizer
datasets = [*subjective_datasets]
eval = dict(
partitioner=dict(
type=SubjectiveSizePartitioner,
strategy='split',
max_task_size=10000,
mode='m2n',
base_models=[gpt4],
compare_models=models,
),
runner=runner,
given_pred=given_pred
)
work_dir = 'outputs/compass_arena/'
summarizer = dict(type=CompassArenaSummarizer, summary_type='half_add')

View File

@ -0,0 +1,25 @@
from mmengine.config import read_base
with read_base():
from ..datasets.subjective.multiround.mtbench_single_judge_diff_temp import subjective_datasets
# from .datasets.subjective.multiround.mtbench_pair_judge import subjective_datasets
from .model_cfg import models, judge_model, given_pred, infer, gpt4, runner
from opencompass.partitioners.sub_naive import SubjectiveNaivePartitioner
from opencompass.partitioners.sub_size import SubjectiveSizePartitioner
from opencompass.summarizers import MTBenchSummarizer
datasets = [*subjective_datasets]
for model in models:
if 'generation_kwargs' in model:
if 'do_sample' in model['generation_kwargs']:
del model['generation_kwargs']['do_sample']
eval = dict(
partitioner=dict(type=SubjectiveSizePartitioner, strategy='split', max_task_size=10000, mode='singlescore', models=models),
runner=runner
)
summarizer = dict(type=MTBenchSummarizer, judge_type='single')
work_dir = 'outputs/mtbench/'

View File

@ -0,0 +1,85 @@
from opencompass.models import HuggingFaceCausalLM, HuggingFace, HuggingFaceChatGLM3
from opencompass.models.openai_api import OpenAIAllesAPIN
from opencompass.partitioners import NaivePartitioner, SizePartitioner
from opencompass.partitioners.sub_naive import SubjectiveNaivePartitioner
from opencompass.partitioners.sub_size import SubjectiveSizePartitioner
from opencompass.runners import LocalRunner
from opencompass.runners import SlurmSequentialRunner
from opencompass.tasks import OpenICLInferTask
from opencompass.tasks.subjective_eval import SubjectiveEvalTask
api_meta_template = dict(
round=[
dict(role='HUMAN', api_role='HUMAN'),
dict(role='BOT', api_role='BOT', generate=True),
]
)
# -------------Inference Stage ----------------------------------------
# For subjective evaluation, we often set do sample for models
models = [
dict(
type=HuggingFaceChatGLM3,
abbr='chatglm3-6b-hf',
path='THUDM/chatglm3-6b',
tokenizer_path='THUDM/chatglm3-6b',
model_kwargs=dict(
device_map='auto',
trust_remote_code=True,
),
tokenizer_kwargs=dict(
padding_side='left',
truncation_side='left',
trust_remote_code=True,
),
generation_kwargs=dict(
do_sample=True,
),
meta_template=api_meta_template,
max_out_len=2048,
max_seq_len=4096,
batch_size=1,
run_cfg=dict(num_gpus=1, num_procs=1),
)
]
judge_model = dict(
abbr='GPT4-Turbo',
type=OpenAIAllesAPIN, path='gpt-4-1106-preview',
key='', # The key will be obtained from $OPENAI_API_KEY, but you can write down your key here as well
url='',
meta_template=api_meta_template,
query_per_second=1,
max_out_len=1024,
max_seq_len=4096,
batch_size=1,
retry=30,
temperature = 0
)
infer = dict(
partitioner=dict(type=SizePartitioner, strategy='split', max_task_size=10000),
runner=dict(
type=SlurmSequentialRunner,
partition='llmeval',
quotatype='auto',
max_num_workers=256,
task=dict(type=OpenICLInferTask),
),
)
runner=dict(type=LocalRunner, max_num_workers=12, task=dict(type=SubjectiveEvalTask, judge_cfg=judge_model))
gpt4 = dict(
abbr='gpt4-turbo',
type=OpenAIAllesAPIN,
path='gpt-4-1106-preview',
key='', # The key will be obtained from $OPENAI_API_KEY, but you can write down your key here as well
meta_template=api_meta_template,
query_per_second=1,
max_out_len=2048,
max_seq_len=4096,
batch_size=4,
retry=20,
temperature=1,
)
given_pred = [{'abbr':'gpt4-turbo', 'path':'your path'}]