OpenCompass/opencompass/datasets/mmlu_cf.py

from datasets import DatasetDict, load_dataset

from opencompass.registry import LOAD_DATASET

from .base import BaseDataset


@LOAD_DATASET.register_module()
class MMLUCFDataset(BaseDataset):

    @staticmethod
    def load(path: str, name: str):
        """Loading HuggingFace datasets."""
        # Use HuggingFace's load_dataset method to load the dataset
        hf_dataset = load_dataset(path)
        columns_to_keep = ['Question', 'A', 'B', 'C', 'D', 'Answer']
        hf_dataset = hf_dataset.map(
            lambda x: {key: x[key]
                       for key in columns_to_keep})
        splits = ['dev', 'val']

        for split in splits:
            sub_set = f'{name}_{split}'

            # Rename fields here if they don't match the expected names
            hf_dataset[sub_set] = hf_dataset[sub_set].map(
                lambda example: {
                    'input': example['Question'],
                    'A': example['A'],
                    'B': example['B'],
                    'C': example['C'],
                    'D': example['D'],
                    'target': example['Answer']
                })

        # Create a DatasetDict and return it
        dataset = DatasetDict({
            'dev': hf_dataset[f'{name}_{splits[0]}'],
            'test': hf_dataset[f'{name}_{splits[1]}']  # Use 'val' as 'test'
        })
        return dataset
[Feature] Support MMLU-CF Benchmark (#1775) * [Feature] Support MMLU-CF Benchmark * [Feature] Support MMLU-CF Benchmark * [Feature] Support MMLU-CF Benchmark * [Feature] Support MMLU-CF Benchmark * [Feature] Support MMLU-CF Benchmark * [Feature] Support MMLU-CF Benchmark * [Feature] Support MMLU-CF Benchmark * [Feature] Support MMLU-CF Benchmark * [Feature] Support MMLU-CF Benchmark * [Feature] Support MMLU-CF Benchmark * [Feature] Support MMLU-CF Benchmark * [Feature] Support MMLU-CF Benchmark * [Feature] Support MMLU-CF Benchmark * [Feature] Support MMLU-CF Benchmark * [Feature] Support MMLU-CF Benchmark * [Feature] Support MMLU-CF Benchmark * [Feature] Support MMLU-CF Benchmark * [Feature] Support MMLU-CF Benchmark * [Feature] Support MMLU-CF Benchmark * Update mmlu-cf * Update mmlu-cf * Update mmlu-cf * [Feature] Support MMLU-CF Benchmark * [Feature] Support MMLU-CF Benchmark * [Feature] Support MMLU-CF Benchmark * Remove outside configs --------- Co-authored-by: liushz <qq1791167085@163.com> 2025-01-09 14:11:20 +08:00			`from datasets import DatasetDict, load_dataset`

			`from opencompass.registry import LOAD_DATASET`

			`from .base import BaseDataset`


			`@LOAD_DATASET.register_module()`
			`class MMLUCFDataset(BaseDataset):`

			`@staticmethod`
			`def load(path: str, name: str):`
			`"""Loading HuggingFace datasets."""`
			`# Use HuggingFace's load_dataset method to load the dataset`
			`hf_dataset = load_dataset(path)`
			`columns_to_keep = ['Question', 'A', 'B', 'C', 'D', 'Answer']`
			`hf_dataset = hf_dataset.map(`
			`lambda x: {key: x[key]`
			`for key in columns_to_keep})`
			`splits = ['dev', 'val']`

			`for split in splits:`
			`sub_set = f'{name}_{split}'`

			`# Rename fields here if they don't match the expected names`
			`hf_dataset[sub_set] = hf_dataset[sub_set].map(`
			`lambda example: {`
			`'input': example['Question'],`
			`'A': example['A'],`
			`'B': example['B'],`
			`'C': example['C'],`
			`'D': example['D'],`
			`'target': example['Answer']`
			`})`

			`# Create a DatasetDict and return it`
			`dataset = DatasetDict({`
			`'dev': hf_dataset[f'{name}_{splits[0]}'],`
			`'test': hf_dataset[f'{name}_{splits[1]}'] # Use 'val' as 'test'`
			`})`
			`return dataset`