OpenCompass/opencompass/datasets/humaneval.py

import os.path as osp
import tempfile
from typing import List

from opencompass.openicl.icl_evaluator import BaseEvaluator
from opencompass.registry import ICL_EVALUATORS, TEXT_POSTPROCESSORS


@ICL_EVALUATORS.register_module()
class HumanEvaluator(BaseEvaluator):
    """Evaluator for human eval."""

    def __init__(self, k: List[int] = [1, 10, 100]) -> None:
        try:
            from human_eval.data import HUMAN_EVAL, write_jsonl
            from human_eval.evaluation import evaluate_functional_correctness
            self.write_jsonl = write_jsonl
            self.HUMAN_EVAL = HUMAN_EVAL
            self.eval = evaluate_functional_correctness
        except ImportError:
            raise ImportError('Please install human_eval following'
                              'https://github.com/openai/human-eval/tree/'
                              'master#installation first.')
        self.k = k
        super().__init__()

    def score(self, predictions, references):

        predictions = [{
            'task_id': f'HumanEval/{i}',
            'completion': predictions[i]
        } for i in range(len(predictions))]
        with tempfile.TemporaryDirectory() as tmp_dir:
            out_dir = osp.join(tmp_dir, 'human_eval.json')
            self.write_jsonl(out_dir, predictions)
            score = self.eval(out_dir,
                              self.k,
                              n_workers=4,
                              timeout=3.0,
                              problem_file=self.HUMAN_EVAL)
            return {f'humaneval_{k}': score[k] * 100 for k in score}


@TEXT_POSTPROCESSORS.register_module('humaneval')
def humaneval_postprocess(text: str) -> str:
    text = text.split('\n\n')[0]
    if '```' in text:
        text = text.split('```')[1]
    if text.strip().startswith('def'):
        text = '\n'.join(text.split('\n')[1:])
    if not text.startswith('    '):
        if text.startswith(' '):
            text = '    ' + text.lstrip()
        else:
            text = '\n'.join(['    ' + line for line in text.split('\n')])
    return text
initial commit 2023-07-04 21:34:55 +08:00			`import os.path as osp`
			`import tempfile`
			`from typing import List`

			`from opencompass.openicl.icl_evaluator import BaseEvaluator`
			`from opencompass.registry import ICL_EVALUATORS, TEXT_POSTPROCESSORS`


			`@ICL_EVALUATORS.register_module()`
			`class HumanEvaluator(BaseEvaluator):`
			`"""Evaluator for human eval."""`

			`def __init__(self, k: List[int] = [1, 10, 100]) -> None:`
			`try:`
			`from human_eval.data import HUMAN_EVAL, write_jsonl`
			`from human_eval.evaluation import evaluate_functional_correctness`
			`self.write_jsonl = write_jsonl`
			`self.HUMAN_EVAL = HUMAN_EVAL`
			`self.eval = evaluate_functional_correctness`
			`except ImportError:`
			`raise ImportError('Please install human_eval following'`
			`'https://github.com/openai/human-eval/tree/'`
			`'master#installation first.')`
			`self.k = k`
			`super().__init__()`

			`def score(self, predictions, references):`

			`predictions = [{`
			`'task_id': f'HumanEval/{i}',`
			`'completion': predictions[i]`
			`} for i in range(len(predictions))]`
			`with tempfile.TemporaryDirectory() as tmp_dir:`
			`out_dir = osp.join(tmp_dir, 'human_eval.json')`
			`self.write_jsonl(out_dir, predictions)`
Update configs (#9) * Update implements * Update 2023-07-06 12:27:41 +08:00			`score = self.eval(out_dir,`
			`self.k,`
			`n_workers=4,`
			`timeout=3.0,`
			`problem_file=self.HUMAN_EVAL)`
initial commit 2023-07-04 21:34:55 +08:00			`return {f'humaneval_{k}': score[k] * 100 for k in score}`


			`@TEXT_POSTPROCESSORS.register_module('humaneval')`
			`def humaneval_postprocess(text: str) -> str:`
			`text = text.split('\n\n')[0]`
			if '```' in text:
			text = text.split('```')[1]
Update configs (#9) * Update implements * Update 2023-07-06 12:27:41 +08:00			`if text.strip().startswith('def'):`
initial commit 2023-07-04 21:34:55 +08:00			`text = '\n'.join(text.split('\n')[1:])`
			`if not text.startswith(' '):`
			`if text.startswith(' '):`
			`text = ' ' + text.lstrip()`
			`else:`
			`text = '\n'.join([' ' + line for line in text.split('\n')])`
			`return text`