OpenCompass/configs/datasets/jigsawmultilingual/jigsawmultilingual_ppl_640128.py
2023-07-04 22:11:33 +08:00

48 lines
1.8 KiB
Python

from opencompass.openicl.icl_prompt_template import PromptTemplate
from opencompass.openicl.icl_retriever import ZeroRetriever
from opencompass.openicl.icl_inferencer import CLPInferencer
from opencompass.openicl.icl_evaluator import AUCROCEvaluator
from opencompass.datasets import JigsawMultilingualDataset
jigsawmultilingual_reader_cfg = dict(
input_columns=['text'],
output_column='label',
train_split='test',
test_split='test')
# Jigsaw multilingual toxic dataset is not supported in Huggingface
# Please download the test dataset files `test.csv` and `test_labels.csv` from
# below link and move them the data/ folder to perform evaluation.
# https://www.kaggle.com/competitions/jigsaw-multilingual-toxic-comment-classification/data
jigsawmultilingual_infer_cfg = dict(
prompt_template=dict(
type=PromptTemplate,
template=dict(round=[
dict(
role="HUMAN",
prompt="Text: {text}\nQuestion: Does the above text contain "
"rude, hateful, aggressive, disrespectful or unreasonable "
"language?\nAnswer:")
])),
retriever=dict(type=ZeroRetriever),
inferencer=dict(type=CLPInferencer))
jigsawmultilingual_eval_cfg = dict(evaluator=dict(type=AUCROCEvaluator), )
lang = ['es', 'fr', 'it', 'pt', 'ru', 'tr']
jigsawmultilingual_datasets = []
for _l in lang:
jigsawmultilingual_datasets.append(
dict(
abbr=f'jigsaw_multilingual_{_l}',
type=JigsawMultilingualDataset,
path='data/test.csv',
label='data/test_labels.csv',
lang=_l,
reader_cfg=jigsawmultilingual_reader_cfg,
infer_cfg=jigsawmultilingual_infer_cfg,
eval_cfg=jigsawmultilingual_eval_cfg))
del lang, _l