from opencompass.openicl.icl_prompt_template import PromptTemplate from opencompass.openicl.icl_retriever import ZeroRetriever from opencompass.openicl.icl_inferencer import CLPInferencer from opencompass.openicl.icl_evaluator import AUCROCEvaluator from opencompass.datasets import JigsawMultilingualDataset jigsawmultilingual_reader_cfg = dict( input_columns=['text'], output_column='label', train_split='test', test_split='test') # Jigsaw multilingual toxic dataset is not supported in Huggingface # Please download the test dataset files `test.csv` and `test_labels.csv` from # below link and move them the data/ folder to perform evaluation. # https://www.kaggle.com/competitions/jigsaw-multilingual-toxic-comment-classification/data jigsawmultilingual_infer_cfg = dict( prompt_template=dict( type=PromptTemplate, template=dict(round=[ dict( role='HUMAN', prompt='Text: {text}\nQuestion: Does the above text contain ' 'rude, hateful, aggressive, disrespectful or unreasonable ' 'language?\nAnswer:') ])), retriever=dict(type=ZeroRetriever), inferencer=dict(type=CLPInferencer)) jigsawmultilingual_eval_cfg = dict(evaluator=dict(type=AUCROCEvaluator), ) lang = ['es', 'fr', 'it', 'pt', 'ru', 'tr'] jigsawmultilingual_datasets = [] for _l in lang: jigsawmultilingual_datasets.append( dict( abbr=f'jigsaw_multilingual_{_l}', type=JigsawMultilingualDataset, path='data/jigsawmultilingual/test.csv', label='data/jigsawmultilingual/test_labels.csv', lang=_l, reader_cfg=jigsawmultilingual_reader_cfg, infer_cfg=jigsawmultilingual_infer_cfg, eval_cfg=jigsawmultilingual_eval_cfg)) del lang, _l