OpenCompass/opencompass/datasets/py150.py
jingmingzhuo b3cbef3226
[Feature] Add py150 and maxmin (#562)
* [feat] add clozeTesst_maxmin dataset

* [feat] add py150 datasets

* [feat] change __init__.py in opencompass/datasets

* [fix] pre-commit check

* [fix] rename py150 and masxmin datasets in configs

* [feat] add gen.py of py150 and maxmin in configs/datasets
2023-11-09 22:05:25 +08:00

39 lines
1.0 KiB
Python

import json
import re
from datasets import Dataset
from opencompass.registry import LOAD_DATASET
from .base import BaseDataset
def py150_post_process(code):
code = code.replace('<NUM_LIT>',
'0').replace('<STR_LIT>',
'').replace('<CHAR_LIT>', '')
pattern = re.compile(r'<(STR|NUM|CHAR)_LIT:(.*?)>', re.S)
lit_s = re.findall(pattern, code)
for lit in lit_s:
code = code.replace(f'<{lit[0]}_LIT:{lit[1]}>', lit[1])
code = json.loads(code)
code['input'] = code['input'].replace('<s>', '').split('<EOL>')
for code_line in code['input']:
code_line = code_line.strip()
code['input'] = '\n'.join(code['input'])
code.pop('id', None)
return code
@LOAD_DATASET.register_module()
class Py150Dataset(BaseDataset):
@staticmethod
def load(path):
lines = open(path, 'r').readlines()
rows = []
for line in lines:
row = py150_post_process(line)
rows.append(row)
return Dataset.from_list(rows)