OpenCompass/opencompass/datasets/IFEval/ifeval.py

import json

from datasets import Dataset

from opencompass.openicl.icl_evaluator import BaseEvaluator
from opencompass.registry import LOAD_DATASET

from ..base import BaseDataset
from .evaluation_main import (InputExample, test_instruction_following_loose,
                              test_instruction_following_strict)


@LOAD_DATASET.register_module()
class IFEvalDataset(BaseDataset):

    @staticmethod
    def load(path):
        datasets = []
        with open(path, 'r', encoding='utf-8') as file:
            for line in file:
                tmp = json.loads(line.strip())
                dataset = dict(prompt=tmp['prompt'], reference=tmp)
                datasets.append(dataset)
        return Dataset.from_list(datasets)


class IFEvaluator(BaseEvaluator):

    def score(self, predictions, references):
        results = []
        for pred, refer in zip(predictions, references):
            input = InputExample(
                key=refer['key'],
                instruction_id_list=refer['instruction_id_list'],
                prompt=refer['prompt'],
                kwargs=refer['kwargs'])
            for kwarg in input.kwargs:
                for k in list(kwarg.keys()):
                    if kwarg[k] is None:
                        kwarg.pop(k, None)
            result = dict(
                strict=test_instruction_following_strict(input, pred),
                loose=test_instruction_following_loose(input, pred),
            )
            results.append(result)
        strict = sum(
            [result['strict'].follow_all_instructions
             for result in results]) / len(results)
        loose = sum(
            [result['loose'].follow_all_instructions
             for result in results]) / len(results)
        return dict(strict_acc=strict * 100, loose_acc=loose * 100)
[Feature] Add IFEval (#813) * [Feature] Add IFEval * [Doc] add introduction of IFEval 2024-01-23 20:07:49 +08:00			`import json`

			`from datasets import Dataset`

			`from opencompass.openicl.icl_evaluator import BaseEvaluator`
			`from opencompass.registry import LOAD_DATASET`

			`from ..base import BaseDataset`
			`from .evaluation_main import (InputExample, test_instruction_following_loose,`
			`test_instruction_following_strict)`


			`@LOAD_DATASET.register_module()`
			`class IFEvalDataset(BaseDataset):`

			`@staticmethod`
			`def load(path):`
			`datasets = []`
			`with open(path, 'r', encoding='utf-8') as file:`
			`for line in file:`
			`tmp = json.loads(line.strip())`
			`dataset = dict(prompt=tmp['prompt'], reference=tmp)`
			`datasets.append(dataset)`
			`return Dataset.from_list(datasets)`


			`class IFEvaluator(BaseEvaluator):`

			`def score(self, predictions, references):`
			`results = []`
			`for pred, refer in zip(predictions, references):`
			`input = InputExample(`
			`key=refer['key'],`
			`instruction_id_list=refer['instruction_id_list'],`
			`prompt=refer['prompt'],`
			`kwargs=refer['kwargs'])`
			`for kwarg in input.kwargs:`
			`for k in list(kwarg.keys()):`
			`if kwarg[k] is None:`
			`kwarg.pop(k, None)`
			`result = dict(`
			`strict=test_instruction_following_strict(input, pred),`
			`loose=test_instruction_following_loose(input, pred),`
			`)`
			`results.append(result)`
			`strict = sum(`
			`[result['strict'].follow_all_instructions`
			`for result in results]) / len(results)`
			`loose = sum(`
			`[result['loose'].follow_all_instructions`
			`for result in results]) / len(results)`
[Fix] Fix acc of IFEval (#849) * [Feature] Add IFEval * [Fix] Changing the Score Rule. 2024-01-27 22:27:07 +08:00			`return dict(strict_acc=strict * 100, loose_acc=loose * 100)`