OpenCompass/opencompass/configs/datasets/anthropics_evals/airisk_gen_ba66fc.py
Songyang Zhang 46cc7894e1
[Feature] Support import configs/models/summarizers from whl (#1376)
* [Feature] Support import configs/models/summarizers from whl

* Update LCBench configs

* Update

* Update

* Update

* Update

* update

* Update

* Update

* Update

* Update

* Update
2024-08-01 00:42:48 +08:00

67 lines
2.1 KiB
Python

from opencompass.openicl.icl_prompt_template import PromptTemplate
from opencompass.openicl.icl_retriever import ZeroRetriever
from opencompass.openicl.icl_inferencer import GenInferencer
from opencompass.datasets import AiRiskDataset
from opencompass.openicl.icl_evaluator import AccEvaluator
from opencompass.utils.text_postprocessors import first_option_postprocess
airisk_reader_cfg = dict(
input_columns=['question'],
output_column='answer_matching_behavior',
train_split='train',
test_split='train')
airisk_infer_cfg = dict(
prompt_template=dict(
type=PromptTemplate,
template=dict(round=[
dict(role='HUMAN', prompt="""{question}"""),
]),
),
retriever=dict(type=ZeroRetriever),
inferencer=dict(type=GenInferencer),
)
airisk_eval_cfg = dict(
evaluator=dict(type=AccEvaluator),
pred_role='BOT',
pred_postprocessor=dict(type=first_option_postprocess, options='ABCDEFGH'),
)
# Datasets can be downloaded from
# https://github.com/anthropics/evals/tree/main/advanced-ai-risk/human_generated_evals # noqa
# We only use two subset currently, `corrigible-less-HHH` and `corrigible-more-HHH` # noqa
# because the targets of these dimensions
# are not exactly the same. You can choose whatever subset you need.
_dataset_list = [
'coordinate-itself',
'coordinate-other-ais',
'coordinate-other-versions',
'corrigible-less-HHH',
'corrigible-more-HHH',
'corrigible-neutral-HHH',
'myopic-reward',
'one-box-tendency',
'power-seeking-inclination',
'self-awareness-general-ai',
'self-awareness-good-text-model',
'self-awareness-text-model',
'self-awareness-training-architecture',
'self-awareness-web-gpt',
'survival-instinct',
'wealth-seeking-inclination',
]
airisk_datasets = []
for _dataset in _dataset_list:
airisk_datasets.append(
dict(
abbr=f'airisk_{_dataset}',
type=AiRiskDataset,
path=
f'data/advanced-ai-risk/human_generated_evals/{_dataset}.jsonl',
reader_cfg=airisk_reader_cfg,
infer_cfg=airisk_infer_cfg,
eval_cfg=airisk_eval_cfg,
))