From 7b626eff9e6a46545bb77d7f7c0d4dff55f2e384 Mon Sep 17 00:00:00 2001 From: Deadwalk Date: Thu, 10 Apr 2025 11:04:38 +0800 Subject: [PATCH 1/4] [Dataset] Add GAIA --- dataset-index.yml | 6 +++ opencompass/configs/datasets/GAIA/gaia_gen.py | 43 +++++++++++++++++++ opencompass/datasets/__init__.py | 1 + opencompass/datasets/gaia.py | 35 +++++++++++++++ opencompass/utils/datasets_info.py | 6 +++ 5 files changed, 91 insertions(+) create mode 100644 opencompass/configs/datasets/GAIA/gaia_gen.py create mode 100644 opencompass/datasets/gaia.py diff --git a/dataset-index.yml b/dataset-index.yml index de5e316e..dd545681 100644 --- a/dataset-index.yml +++ b/dataset-index.yml @@ -491,6 +491,12 @@ paper: https://aclanthology.org/D19-1632.pdf configpath: opencompass/configs/datasets/flores/flores_gen.py configpath_llmjudge: '' +- gaia: + name: GAIA + category: Reasoning + paper: https://arxiv.org/abs/2311.12983 + configpath: opencompass/configs/datasets/GAIA/gaia_gen.py + configpath_llmjudge: '' - game24: name: Game24 category: Math diff --git a/opencompass/configs/datasets/GAIA/gaia_gen.py b/opencompass/configs/datasets/GAIA/gaia_gen.py new file mode 100644 index 00000000..b2234af5 --- /dev/null +++ b/opencompass/configs/datasets/GAIA/gaia_gen.py @@ -0,0 +1,43 @@ +from opencompass.openicl.icl_prompt_template import PromptTemplate +from opencompass.openicl.icl_retriever import ZeroRetriever +from opencompass.openicl.icl_inferencer import GenInferencer +from opencompass.openicl.icl_evaluator import AccEvaluator +from opencompass.datasets import GAIADataset +from opencompass.utils.text_postprocessors import first_capital_postprocess + +gaia_reader_cfg = dict( + input_columns='question', + output_column='answerKey', + test_split='test') + +gaia_infer_cfg = dict( + prompt_template=dict( + type=PromptTemplate, + template=dict(round=[ + dict( + role='HUMAN', + prompt= + '请根据问题:{question}\n给出答案。答:' + ), + ]), + ), + retriever=dict(type=ZeroRetriever), + inferencer=dict(type=GenInferencer), +) +gaia_eval_cfg = dict( + evaluator=dict(type=AccEvaluator), + pred_role='BOT', + pred_postprocessor=dict(type=first_capital_postprocess), +) + +gaia_datasets = [ + dict( + abbr='gaia-validation', + type=GAIADataset, + path='opencompass/gaia', + local_mode=False, + reader_cfg=gaia_reader_cfg, + infer_cfg=gaia_infer_cfg, + eval_cfg=gaia_eval_cfg, + ) +] diff --git a/opencompass/datasets/__init__.py b/opencompass/datasets/__init__.py index 45209054..c888de02 100644 --- a/opencompass/datasets/__init__.py +++ b/opencompass/datasets/__init__.py @@ -51,6 +51,7 @@ from .FinanceIQ import * # noqa: F401, F403 from .flores import * # noqa: F401, F403 from .game24 import * # noqa: F401, F403 from .gaokao_math import * # noqa: F401, F403 +from .gaia import * # noqa: F401, F403 from .GaokaoBench import * # noqa: F401, F403 from .generic import * # noqa: F401, F403 from .govrepcrs import * # noqa: F401, F403 diff --git a/opencompass/datasets/gaia.py b/opencompass/datasets/gaia.py new file mode 100644 index 00000000..fdd15a4c --- /dev/null +++ b/opencompass/datasets/gaia.py @@ -0,0 +1,35 @@ +import json +from os import environ +import os + +from datasets import Dataset + +from opencompass.registry import LOAD_DATASET +from opencompass.utils import get_data_path + +from .base import BaseDataset + + +@LOAD_DATASET.register_module() +class GAIADataset(BaseDataset): + + @staticmethod + def load(path, local_mode: bool = False): + + from datasets import load_dataset + try: + # 因为ModelScope的GAIA数据集读取存在问题,所以从huggingface读取 + ds = load_dataset("gaia-benchmark/GAIA", '2023_all', split='validation') + rows = [] + for item in ds: + rows.append({ + 'question': item['Question'], + 'answerKey': item['Final answer'], + 'file_path': item['file_path'], + 'file_name': item['file_name'], + 'level': item['Level'] + }) + except Exception as e: + print(f"Error loading local file: {e}") + + return Dataset.from_list(rows) diff --git a/opencompass/utils/datasets_info.py b/opencompass/utils/datasets_info.py index 00db25e8..28c18e91 100644 --- a/opencompass/utils/datasets_info.py +++ b/opencompass/utils/datasets_info.py @@ -1,4 +1,10 @@ DATASETS_MAPPING = { + # GAIA Datasets + "opencompass/gaia": { + "ms_id": None, + "hf_id": "gaia-benchmark/GAIA", + "local": "./data/gaia/", + }, # ADVGLUE Datasets "opencompass/advglue-dev": { "ms_id": None, From 6d809bf36d7a995a2c22e1c214674eed20b05a9a Mon Sep 17 00:00:00 2001 From: Deadwalk Date: Sat, 12 Apr 2025 13:45:54 +0800 Subject: [PATCH 2/4] [Dataset]Added local configuration to GAIA dataset --- dataset-index.yml | 2 +- opencompass/datasets/gaia.py | 59 +++++++++++++++++++++--------- opencompass/utils/datasets_info.py | 2 +- 3 files changed, 44 insertions(+), 19 deletions(-) diff --git a/dataset-index.yml b/dataset-index.yml index 06e48d27..3ae50be0 100644 --- a/dataset-index.yml +++ b/dataset-index.yml @@ -499,7 +499,7 @@ configpath_llmjudge: '' - gaia: name: GAIA - category: Reasoning + category: Tool Utilization paper: https://arxiv.org/abs/2311.12983 configpath: opencompass/configs/datasets/GAIA/gaia_gen.py configpath_llmjudge: '' diff --git a/opencompass/datasets/gaia.py b/opencompass/datasets/gaia.py index fdd15a4c..9db1e5fd 100644 --- a/opencompass/datasets/gaia.py +++ b/opencompass/datasets/gaia.py @@ -6,6 +6,7 @@ from datasets import Dataset from opencompass.registry import LOAD_DATASET from opencompass.utils import get_data_path +from opencompass.utils.datasets_info import DATASETS_MAPPING from .base import BaseDataset @@ -15,21 +16,45 @@ class GAIADataset(BaseDataset): @staticmethod def load(path, local_mode: bool = False): - - from datasets import load_dataset - try: - # 因为ModelScope的GAIA数据集读取存在问题,所以从huggingface读取 - ds = load_dataset("gaia-benchmark/GAIA", '2023_all', split='validation') - rows = [] - for item in ds: - rows.append({ - 'question': item['Question'], - 'answerKey': item['Final answer'], - 'file_path': item['file_path'], - 'file_name': item['file_name'], - 'level': item['Level'] - }) - except Exception as e: - print(f"Error loading local file: {e}") - + rows = [] + if environ.get('DATASET_SOURCE') == 'HF': + from datasets import load_dataset + try: + hf_id = DATASETS_MAPPING[path]['hf_id'] + # 因为ModelScope的GAIA数据集读取存在问题,所以从huggingface读取 + ds = load_dataset(hf_id, '2023_all', split='validation') + rows = [] + for item in ds: + rows.append({ + 'question': item['Question'], + 'answerKey': item['Final answer'], + 'file_path': item['file_path'], + 'file_name': item['file_name'], + 'level': item['Level'] + }) + except Exception as e: + print(f"Error loading local file: {e}") + else: + # 从本地读取 + compass_data_cache = os.environ.get('COMPASS_DATA_CACHE') + local_path = DATASETS_MAPPING[path]['local'] + local_path = os.path.join(compass_data_cache, local_path) + with open(local_path, 'r', encoding='utf-8') as f: + for line in f: + line = json.loads(line.strip()) + # 构建数据行 + row_data = { + 'question': line['Question'], + 'answerKey': line['Final answer'], + 'file_name': line['file_name'], + 'level': line['Level'] + } + + # 只有在file_name不为空时设置file_path + if line['file_name']: + row_data['file_path'] = f'{local_path}/{line["file_name"]}' + else: + row_data['file_path'] = '' + + rows.append(row_data) return Dataset.from_list(rows) diff --git a/opencompass/utils/datasets_info.py b/opencompass/utils/datasets_info.py index 28c18e91..81718cd0 100644 --- a/opencompass/utils/datasets_info.py +++ b/opencompass/utils/datasets_info.py @@ -3,7 +3,7 @@ DATASETS_MAPPING = { "opencompass/gaia": { "ms_id": None, "hf_id": "gaia-benchmark/GAIA", - "local": "./data/gaia/", + "local": "./data/gaia/2023/validation/metadata.jsonl", }, # ADVGLUE Datasets "opencompass/advglue-dev": { From 3a805928174afad794a8330d3f06d8bdc35ab28f Mon Sep 17 00:00:00 2001 From: Deadwalk Date: Mon, 14 Apr 2025 10:09:27 +0800 Subject: [PATCH 3/4] [Dataset]update GAIA prompt --- opencompass/configs/datasets/GAIA/gaia_gen.py | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) diff --git a/opencompass/configs/datasets/GAIA/gaia_gen.py b/opencompass/configs/datasets/GAIA/gaia_gen.py index b2234af5..839ba2f9 100644 --- a/opencompass/configs/datasets/GAIA/gaia_gen.py +++ b/opencompass/configs/datasets/GAIA/gaia_gen.py @@ -6,7 +6,7 @@ from opencompass.datasets import GAIADataset from opencompass.utils.text_postprocessors import first_capital_postprocess gaia_reader_cfg = dict( - input_columns='question', + input_columns=['question', 'file_path'], output_column='answerKey', test_split='test') @@ -17,7 +17,16 @@ gaia_infer_cfg = dict( dict( role='HUMAN', prompt= - '请根据问题:{question}\n给出答案。答:' + '''You are a general AI assistant. I will ask you a question. Report your thoughts, and +finish your answer with the following template: FINAL ANSWER: [YOUR FINAL ANSWER]. +YOUR FINAL ANSWER should be a number OR as few words as possible OR a comma separated +list of numbers and/or strings. +If you are asked for a number, don’t use comma to write your number neither use units such as $ or +percent sign unless specified otherwise. +If you are asked for a string, don’t use articles, neither abbreviations (e.g. for cities), and write the +digits in plain text unless specified otherwise. +If you are asked for a comma separated list, apply the above rules depending of whether the element +to be put in the list is a number or a string.\nGAIA Question: {question}\nFile Path: {file_path}\n''' ), ]), ), From e373a2bbcf0393d0249d19db0ae8058ddac90f72 Mon Sep 17 00:00:00 2001 From: Deadwalk Date: Sat, 26 Apr 2025 11:30:24 +0800 Subject: [PATCH 4/4] =?UTF-8?q?Fix=20Bug=EF=BC=9AFix=20GAIA=20datasets=20l?= =?UTF-8?q?int=20error?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- opencompass/datasets/__init__.py | 2 +- opencompass/datasets/gaia.py | 12 ++++++------ 2 files changed, 7 insertions(+), 7 deletions(-) diff --git a/opencompass/datasets/__init__.py b/opencompass/datasets/__init__.py index 51c5228d..dd75241e 100644 --- a/opencompass/datasets/__init__.py +++ b/opencompass/datasets/__init__.py @@ -49,9 +49,9 @@ from .ds1000_interpreter import * # noqa: F401, F403 from .eprstmt import * # noqa: F401, F403 from .FinanceIQ import * # noqa: F401, F403 from .flores import * # noqa: F401, F403 +from .gaia import * # noqa: F401, F403 from .game24 import * # noqa: F401, F403 from .gaokao_math import * # noqa: F401, F403 -from .gaia import * # noqa: F401, F403 from .GaokaoBench import * # noqa: F401, F403 from .generic import * # noqa: F401, F403 from .govrepcrs import * # noqa: F401, F403 diff --git a/opencompass/datasets/gaia.py b/opencompass/datasets/gaia.py index 9db1e5fd..841dffc9 100644 --- a/opencompass/datasets/gaia.py +++ b/opencompass/datasets/gaia.py @@ -1,11 +1,10 @@ import json -from os import environ import os +from os import environ from datasets import Dataset from opencompass.registry import LOAD_DATASET -from opencompass.utils import get_data_path from opencompass.utils.datasets_info import DATASETS_MAPPING from .base import BaseDataset @@ -33,7 +32,7 @@ class GAIADataset(BaseDataset): 'level': item['Level'] }) except Exception as e: - print(f"Error loading local file: {e}") + print(f'Error loading local file: {e}') else: # 从本地读取 compass_data_cache = os.environ.get('COMPASS_DATA_CACHE') @@ -49,12 +48,13 @@ class GAIADataset(BaseDataset): 'file_name': line['file_name'], 'level': line['Level'] } - + # 只有在file_name不为空时设置file_path if line['file_name']: - row_data['file_path'] = f'{local_path}/{line["file_name"]}' + file_name = line['file_name'] + row_data['file_path'] = f'{local_path}/{file_name}' else: row_data['file_path'] = '' - + rows.append(row_data) return Dataset.from_list(rows)