mirror of
https://github.com/open-compass/opencompass.git
synced 2025-05-30 16:03:24 +08:00
Merge f1182e82f7
into d572761cef
This commit is contained in:
commit
f01bad17ee
@ -539,6 +539,12 @@
|
|||||||
paper: https://aclanthology.org/D19-1632.pdf
|
paper: https://aclanthology.org/D19-1632.pdf
|
||||||
configpath: opencompass/configs/datasets/flores/flores_gen.py
|
configpath: opencompass/configs/datasets/flores/flores_gen.py
|
||||||
configpath_llmjudge: ''
|
configpath_llmjudge: ''
|
||||||
|
- gaia:
|
||||||
|
name: GAIA
|
||||||
|
category: Tool Utilization
|
||||||
|
paper: https://arxiv.org/abs/2311.12983
|
||||||
|
configpath: opencompass/configs/datasets/GAIA/gaia_gen.py
|
||||||
|
configpath_llmjudge: ''
|
||||||
- game24:
|
- game24:
|
||||||
name: Game24
|
name: Game24
|
||||||
category: Math
|
category: Math
|
||||||
|
52
opencompass/configs/datasets/GAIA/gaia_gen.py
Normal file
52
opencompass/configs/datasets/GAIA/gaia_gen.py
Normal file
@ -0,0 +1,52 @@
|
|||||||
|
from opencompass.openicl.icl_prompt_template import PromptTemplate
|
||||||
|
from opencompass.openicl.icl_retriever import ZeroRetriever
|
||||||
|
from opencompass.openicl.icl_inferencer import GenInferencer
|
||||||
|
from opencompass.openicl.icl_evaluator import AccEvaluator
|
||||||
|
from opencompass.datasets import GAIADataset
|
||||||
|
from opencompass.utils.text_postprocessors import first_capital_postprocess
|
||||||
|
|
||||||
|
gaia_reader_cfg = dict(
|
||||||
|
input_columns=['question', 'file_path'],
|
||||||
|
output_column='answerKey',
|
||||||
|
test_split='test')
|
||||||
|
|
||||||
|
gaia_infer_cfg = dict(
|
||||||
|
prompt_template=dict(
|
||||||
|
type=PromptTemplate,
|
||||||
|
template=dict(round=[
|
||||||
|
dict(
|
||||||
|
role='HUMAN',
|
||||||
|
prompt=
|
||||||
|
'''You are a general AI assistant. I will ask you a question. Report your thoughts, and
|
||||||
|
finish your answer with the following template: FINAL ANSWER: [YOUR FINAL ANSWER].
|
||||||
|
YOUR FINAL ANSWER should be a number OR as few words as possible OR a comma separated
|
||||||
|
list of numbers and/or strings.
|
||||||
|
If you are asked for a number, don’t use comma to write your number neither use units such as $ or
|
||||||
|
percent sign unless specified otherwise.
|
||||||
|
If you are asked for a string, don’t use articles, neither abbreviations (e.g. for cities), and write the
|
||||||
|
digits in plain text unless specified otherwise.
|
||||||
|
If you are asked for a comma separated list, apply the above rules depending of whether the element
|
||||||
|
to be put in the list is a number or a string.\nGAIA Question: {question}\nFile Path: {file_path}\n'''
|
||||||
|
),
|
||||||
|
]),
|
||||||
|
),
|
||||||
|
retriever=dict(type=ZeroRetriever),
|
||||||
|
inferencer=dict(type=GenInferencer),
|
||||||
|
)
|
||||||
|
gaia_eval_cfg = dict(
|
||||||
|
evaluator=dict(type=AccEvaluator),
|
||||||
|
pred_role='BOT',
|
||||||
|
pred_postprocessor=dict(type=first_capital_postprocess),
|
||||||
|
)
|
||||||
|
|
||||||
|
gaia_datasets = [
|
||||||
|
dict(
|
||||||
|
abbr='gaia-validation',
|
||||||
|
type=GAIADataset,
|
||||||
|
path='opencompass/gaia',
|
||||||
|
local_mode=False,
|
||||||
|
reader_cfg=gaia_reader_cfg,
|
||||||
|
infer_cfg=gaia_infer_cfg,
|
||||||
|
eval_cfg=gaia_eval_cfg,
|
||||||
|
)
|
||||||
|
]
|
@ -51,6 +51,7 @@ from .ds1000_interpreter import * # noqa: F401, F403
|
|||||||
from .eprstmt import * # noqa: F401, F403
|
from .eprstmt import * # noqa: F401, F403
|
||||||
from .FinanceIQ import * # noqa: F401, F403
|
from .FinanceIQ import * # noqa: F401, F403
|
||||||
from .flores import * # noqa: F401, F403
|
from .flores import * # noqa: F401, F403
|
||||||
|
from .gaia import * # noqa: F401, F403
|
||||||
from .game24 import * # noqa: F401, F403
|
from .game24 import * # noqa: F401, F403
|
||||||
from .gaokao_math import * # noqa: F401, F403
|
from .gaokao_math import * # noqa: F401, F403
|
||||||
from .GaokaoBench import * # noqa: F401, F403
|
from .GaokaoBench import * # noqa: F401, F403
|
||||||
|
60
opencompass/datasets/gaia.py
Normal file
60
opencompass/datasets/gaia.py
Normal file
@ -0,0 +1,60 @@
|
|||||||
|
import json
|
||||||
|
import os
|
||||||
|
from os import environ
|
||||||
|
|
||||||
|
from datasets import Dataset
|
||||||
|
|
||||||
|
from opencompass.registry import LOAD_DATASET
|
||||||
|
from opencompass.utils.datasets_info import DATASETS_MAPPING
|
||||||
|
|
||||||
|
from .base import BaseDataset
|
||||||
|
|
||||||
|
|
||||||
|
@LOAD_DATASET.register_module()
|
||||||
|
class GAIADataset(BaseDataset):
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def load(path, local_mode: bool = False):
|
||||||
|
rows = []
|
||||||
|
if environ.get('DATASET_SOURCE') == 'HF':
|
||||||
|
from datasets import load_dataset
|
||||||
|
try:
|
||||||
|
hf_id = DATASETS_MAPPING[path]['hf_id']
|
||||||
|
# 因为ModelScope的GAIA数据集读取存在问题,所以从huggingface读取
|
||||||
|
ds = load_dataset(hf_id, '2023_all', split='validation')
|
||||||
|
rows = []
|
||||||
|
for item in ds:
|
||||||
|
rows.append({
|
||||||
|
'question': item['Question'],
|
||||||
|
'answerKey': item['Final answer'],
|
||||||
|
'file_path': item['file_path'],
|
||||||
|
'file_name': item['file_name'],
|
||||||
|
'level': item['Level']
|
||||||
|
})
|
||||||
|
except Exception as e:
|
||||||
|
print(f'Error loading local file: {e}')
|
||||||
|
else:
|
||||||
|
# 从本地读取
|
||||||
|
compass_data_cache = os.environ.get('COMPASS_DATA_CACHE')
|
||||||
|
local_path = DATASETS_MAPPING[path]['local']
|
||||||
|
local_path = os.path.join(compass_data_cache, local_path)
|
||||||
|
with open(local_path, 'r', encoding='utf-8') as f:
|
||||||
|
for line in f:
|
||||||
|
line = json.loads(line.strip())
|
||||||
|
# 构建数据行
|
||||||
|
row_data = {
|
||||||
|
'question': line['Question'],
|
||||||
|
'answerKey': line['Final answer'],
|
||||||
|
'file_name': line['file_name'],
|
||||||
|
'level': line['Level']
|
||||||
|
}
|
||||||
|
|
||||||
|
# 只有在file_name不为空时设置file_path
|
||||||
|
if line['file_name']:
|
||||||
|
file_name = line['file_name']
|
||||||
|
row_data['file_path'] = f'{local_path}/{file_name}'
|
||||||
|
else:
|
||||||
|
row_data['file_path'] = ''
|
||||||
|
|
||||||
|
rows.append(row_data)
|
||||||
|
return Dataset.from_list(rows)
|
@ -1,4 +1,10 @@
|
|||||||
DATASETS_MAPPING = {
|
DATASETS_MAPPING = {
|
||||||
|
# GAIA Datasets
|
||||||
|
"opencompass/gaia": {
|
||||||
|
"ms_id": None,
|
||||||
|
"hf_id": "gaia-benchmark/GAIA",
|
||||||
|
"local": "./data/gaia/2023/validation/metadata.jsonl",
|
||||||
|
},
|
||||||
# ADVGLUE Datasets
|
# ADVGLUE Datasets
|
||||||
"opencompass/advglue-dev": {
|
"opencompass/advglue-dev": {
|
||||||
"ms_id": None,
|
"ms_id": None,
|
||||||
|
Loading…
Reference in New Issue
Block a user