mirror of
https://github.com/open-compass/opencompass.git
synced 2025-05-30 16:03:24 +08:00

* [Fix] Fix vllm max_seq_len parameter transfer * [Fix] Fix vllm max_seq_len parameter transfer * Add livestembench dataset * Add livestembench dataset * Add livestembench dataset * Update livestembench_gen_3e3c50.py * Update eval_livestembench.py * Update eval_livestembench.py
47 lines
1.6 KiB
Python
47 lines
1.6 KiB
Python
# Edited from the official SimpleQA config: https://github.com/openai/simple-evals/blob/main/simpleqa_eval.py # noqa E501
|
|
import json
|
|
import os
|
|
import random
|
|
|
|
from datasets import Dataset, DatasetDict
|
|
|
|
from opencompass.registry import LOAD_DATASET
|
|
from opencompass.utils import get_data_path
|
|
|
|
from .base import BaseDataset
|
|
|
|
|
|
@LOAD_DATASET.register_module()
|
|
class LiveStemBenchDataset(BaseDataset):
|
|
|
|
@staticmethod
|
|
def load(path: str,
|
|
num_examples: int | None = None,
|
|
n_repeats: int = 1,
|
|
version: str = 'livestembench-20241227',
|
|
**kwargs):
|
|
path = get_data_path(path)
|
|
dataset = DatasetDict()
|
|
path = os.path.join(path, f'{version}.json')
|
|
with open(path, 'r', encoding='utf-8') as f:
|
|
examples = json.load(f)
|
|
pure_dataset = []
|
|
for example in examples:
|
|
if len(example['options']) > 0:
|
|
example['question'] = example['question'] + '\n' + \
|
|
'\n'.join(
|
|
example['options'])
|
|
pure_dataset.append({
|
|
'question': example['question'],
|
|
'answer': example['answer']
|
|
})
|
|
if num_examples:
|
|
assert n_repeats == 1, \
|
|
'n_repeats only supported when max_examples = None'
|
|
rng = random.Random(0)
|
|
pure_dataset = rng.sample(pure_dataset, num_examples)
|
|
pure_dataset = pure_dataset * n_repeats
|
|
dataset['train'] = Dataset.from_list(pure_dataset)
|
|
dataset['test'] = Dataset.from_list(pure_dataset)
|
|
return dataset
|