mirror of
https://github.com/open-compass/opencompass.git
synced 2025-05-30 16:03:24 +08:00
[Feature] re-implement ceval load dataset (#446)
This commit is contained in:
parent
d9f3e88dfe
commit
9db5652638
@ -22,3 +22,26 @@ ceval_summary_groups.append({'name': 'ceval-hard', 'subsets': _ceval_hard})
|
|||||||
|
|
||||||
_ceval_all = _ceval_stem + _ceval_social_science + _ceval_humanities + _ceval_other
|
_ceval_all = _ceval_stem + _ceval_social_science + _ceval_humanities + _ceval_other
|
||||||
ceval_summary_groups.append({'name': 'ceval', 'subsets': _ceval_all})
|
ceval_summary_groups.append({'name': 'ceval', 'subsets': _ceval_all})
|
||||||
|
|
||||||
|
_ceval_stem = ['computer_network', 'operating_system', 'computer_architecture', 'college_programming', 'college_physics', 'college_chemistry', 'advanced_mathematics', 'probability_and_statistics', 'discrete_mathematics', 'electrical_engineer', 'metrology_engineer', 'high_school_mathematics', 'high_school_physics', 'high_school_chemistry', 'high_school_biology', 'middle_school_mathematics', 'middle_school_biology', 'middle_school_physics', 'middle_school_chemistry', 'veterinary_medicine']
|
||||||
|
_ceval_stem = ['ceval-test-' + s for s in _ceval_stem]
|
||||||
|
ceval_summary_groups.append({'name': 'ceval-test-stem', 'subsets': _ceval_stem})
|
||||||
|
|
||||||
|
_ceval_social_science = ['college_economics', 'business_administration', 'marxism', 'mao_zedong_thought', 'education_science', 'teacher_qualification', 'high_school_politics', 'high_school_geography', 'middle_school_politics', 'middle_school_geography']
|
||||||
|
_ceval_social_science = ['ceval-test-' + s for s in _ceval_social_science]
|
||||||
|
ceval_summary_groups.append({'name': 'ceval-test-social-science', 'subsets': _ceval_social_science})
|
||||||
|
|
||||||
|
_ceval_humanities = ['modern_chinese_history', 'ideological_and_moral_cultivation', 'logic', 'law', 'chinese_language_and_literature', 'art_studies', 'professional_tour_guide', 'legal_professional', 'high_school_chinese', 'high_school_history', 'middle_school_history']
|
||||||
|
_ceval_humanities = ['ceval-test-' + s for s in _ceval_humanities]
|
||||||
|
ceval_summary_groups.append({'name': 'ceval-test-humanities', 'subsets': _ceval_humanities})
|
||||||
|
|
||||||
|
_ceval_other = ['civil_servant', 'sports_science', 'plant_protection', 'basic_medicine', 'clinical_medicine', 'urban_and_rural_planner', 'accountant', 'fire_engineer', 'environmental_impact_assessment_engineer', 'tax_accountant', 'physician']
|
||||||
|
_ceval_other = ['ceval-test-' + s for s in _ceval_other]
|
||||||
|
ceval_summary_groups.append({'name': 'ceval-test-other', 'subsets': _ceval_other})
|
||||||
|
|
||||||
|
_ceval_hard = ['advanced_mathematics', 'discrete_mathematics', 'probability_and_statistics', 'college_chemistry', 'college_physics', 'high_school_mathematics', 'high_school_chemistry', 'high_school_physics']
|
||||||
|
_ceval_hard = ['ceval-test-' + s for s in _ceval_hard]
|
||||||
|
ceval_summary_groups.append({'name': 'ceval-test-hard', 'subsets': _ceval_hard})
|
||||||
|
|
||||||
|
_ceval_all = _ceval_stem + _ceval_social_science + _ceval_humanities + _ceval_other
|
||||||
|
ceval_summary_groups.append({'name': 'ceval-test', 'subsets': _ceval_all})
|
||||||
|
@ -1,6 +1,7 @@
|
|||||||
|
import csv
|
||||||
import os.path as osp
|
import os.path as osp
|
||||||
|
|
||||||
from datasets import DatasetDict, load_dataset
|
from datasets import Dataset, DatasetDict
|
||||||
|
|
||||||
from opencompass.registry import LOAD_DATASET
|
from opencompass.registry import LOAD_DATASET
|
||||||
|
|
||||||
@ -12,26 +13,15 @@ class CEvalDataset(BaseDataset):
|
|||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def load(path: str, name: str):
|
def load(path: str, name: str):
|
||||||
dev_dataset = load_dataset('csv',
|
dataset = {}
|
||||||
data_files=osp.join(path, 'dev',
|
for split in ['dev', 'val', 'test']:
|
||||||
f'{name}_dev.csv'),
|
with open(osp.join(path, split, f'{name}_{split}.csv')) as f:
|
||||||
split='train')
|
reader = csv.reader(f)
|
||||||
val_dataset = load_dataset('csv',
|
header = next(reader)
|
||||||
data_files=osp.join(path, 'val',
|
for row in reader:
|
||||||
f'{name}_val.csv'),
|
item = dict(zip(header, row))
|
||||||
split='train')
|
item.setdefault('explanation', '')
|
||||||
val_dataset = val_dataset.add_column('explanation',
|
item.setdefault('answer', '')
|
||||||
[''] * len(val_dataset))
|
dataset.setdefault(split, []).append(item)
|
||||||
test_dataset = load_dataset('csv',
|
dataset = {i: Dataset.from_list(dataset[i]) for i in dataset}
|
||||||
data_files=osp.join(
|
return DatasetDict(dataset)
|
||||||
path, 'test', f'{name}_test.csv'),
|
|
||||||
split='train')
|
|
||||||
test_dataset = test_dataset.add_column(
|
|
||||||
'answer',
|
|
||||||
[''] * len(test_dataset)).add_column('explanation',
|
|
||||||
[''] * len(test_dataset))
|
|
||||||
return DatasetDict({
|
|
||||||
'val': val_dataset,
|
|
||||||
'dev': dev_dataset,
|
|
||||||
'test': test_dataset
|
|
||||||
})
|
|
||||||
|
Loading…
Reference in New Issue
Block a user