OpenCompass/opencompass/datasets/llm_compression.py
Alexander Lam 35c94d0cde
[Feature] Adding support for LLM Compression Evaluation (#1108)
* fixed formatting based on pre-commit tests

* fixed typo in comments; reduced the number of models in the eval config

* fixed a bug in LLMCompressionDataset, where setting samples=None would result in passing test[:None] to load_dataset

* removed unnecessary variable in _format_table_pivot; changed lark_reporter message to English
2024-04-30 10:51:01 +08:00

37 lines
1.0 KiB
Python

import os.path as osp
from typing import List
from datasets import load_dataset
from opencompass.registry import LOAD_DATASET
from .base import BaseDataset
@LOAD_DATASET.register_module()
class LLMCompressionDataset(BaseDataset):
@staticmethod
def load(path: str, name: List[str] = None, samples: int = None):
# Check if file exists in the given path
supported_extensions = ['json', 'jsonl']
for ext in supported_extensions:
filename = osp.join(
path, f'{name}.{ext}') # name refers to data subset name
if osp.exists(filename):
break
else:
raise FileNotFoundError(f'{filename} not found.')
samples = 'test' if samples is None else f'test[:{samples}]'
data_files = {'test': filename}
dataset = load_dataset('json', data_files=data_files, split=samples)
# Filter out empty samples
dataset = dataset.filter(lambda example: len(example['content']) > 0)
return dataset