2023-07-04 21:34:55 +08:00
|
|
|
import os.path as osp
|
|
|
|
import tempfile
|
|
|
|
from typing import List
|
|
|
|
|
|
|
|
from opencompass.openicl.icl_evaluator import BaseEvaluator
|
|
|
|
from opencompass.registry import ICL_EVALUATORS, TEXT_POSTPROCESSORS
|
|
|
|
|
|
|
|
|
|
|
|
@ICL_EVALUATORS.register_module()
|
|
|
|
class HumanEvaluator(BaseEvaluator):
|
|
|
|
"""Evaluator for human eval."""
|
|
|
|
|
|
|
|
def __init__(self, k: List[int] = [1, 10, 100]) -> None:
|
|
|
|
try:
|
|
|
|
from human_eval.data import HUMAN_EVAL, write_jsonl
|
|
|
|
from human_eval.evaluation import evaluate_functional_correctness
|
|
|
|
self.write_jsonl = write_jsonl
|
|
|
|
self.HUMAN_EVAL = HUMAN_EVAL
|
|
|
|
self.eval = evaluate_functional_correctness
|
|
|
|
except ImportError:
|
|
|
|
raise ImportError('Please install human_eval following'
|
|
|
|
'https://github.com/openai/human-eval/tree/'
|
|
|
|
'master#installation first.')
|
|
|
|
self.k = k
|
|
|
|
super().__init__()
|
|
|
|
|
|
|
|
def score(self, predictions, references):
|
|
|
|
|
|
|
|
predictions = [{
|
|
|
|
'task_id': f'HumanEval/{i}',
|
|
|
|
'completion': predictions[i]
|
|
|
|
} for i in range(len(predictions))]
|
|
|
|
with tempfile.TemporaryDirectory() as tmp_dir:
|
|
|
|
out_dir = osp.join(tmp_dir, 'human_eval.json')
|
|
|
|
self.write_jsonl(out_dir, predictions)
|
2023-07-06 12:27:41 +08:00
|
|
|
score = self.eval(out_dir,
|
|
|
|
self.k,
|
|
|
|
n_workers=4,
|
|
|
|
timeout=3.0,
|
|
|
|
problem_file=self.HUMAN_EVAL)
|
2023-07-04 21:34:55 +08:00
|
|
|
return {f'humaneval_{k}': score[k] * 100 for k in score}
|
|
|
|
|
|
|
|
|
|
|
|
@TEXT_POSTPROCESSORS.register_module('humaneval')
|
|
|
|
def humaneval_postprocess(text: str) -> str:
|
|
|
|
text = text.split('\n\n')[0]
|
|
|
|
if '```' in text:
|
|
|
|
text = text.split('```')[1]
|
2023-07-06 12:27:41 +08:00
|
|
|
if text.strip().startswith('def'):
|
2023-07-04 21:34:55 +08:00
|
|
|
text = '\n'.join(text.split('\n')[1:])
|
|
|
|
if not text.startswith(' '):
|
|
|
|
if text.startswith(' '):
|
|
|
|
text = ' ' + text.lstrip()
|
|
|
|
else:
|
|
|
|
text = '\n'.join([' ' + line for line in text.split('\n')])
|
|
|
|
return text
|