mirror of
https://github.com/open-compass/opencompass.git
synced 2025-05-30 16:03:24 +08:00
[Datasets] Add datasets CMO&AIME (#1610)
* add datasets cmo&aime * delete unused modules * modify prompt * update __init__ * update data load and add README * update data load * update performance * update md5 * remove indents * add indent * fix log for debug mode
This commit is contained in:
parent
9c39cb68d4
commit
645c5f3b2c
13
opencompass/configs/datasets/aime2024/README.md
Normal file
13
opencompass/configs/datasets/aime2024/README.md
Normal file
@ -0,0 +1,13 @@
|
|||||||
|
### Description
|
||||||
|
|
||||||
|
Math dataset composed of problems from AIME2024 (American Invitational Mathematics Examination 2024).
|
||||||
|
|
||||||
|
### Performance
|
||||||
|
|
||||||
|
| Qwen2.5-Math-72B-Instruct | Qwen2.5-Math-7B-Instruct | Qwen2-Math-7B-Instruct | Qwen2-Math-1.5B-Instruct | internlm2-math-7b |
|
||||||
|
| ----------- | ----------- | ----------- | ----------- | ----------- |
|
||||||
|
| 20.00 | 16.67 | 16.67 | 13.33 | 3.33 |
|
||||||
|
|
||||||
|
| Qwen2.5-72B-Instruct | Qwen2.5-7B-Instruct | internlm2_5-7b-chat |
|
||||||
|
| ----------- | ----------- | ----------- |
|
||||||
|
| 31.25 | 26.44 | 9.13 |
|
4
opencompass/configs/datasets/aime2024/aime2024_gen.py
Normal file
4
opencompass/configs/datasets/aime2024/aime2024_gen.py
Normal file
@ -0,0 +1,4 @@
|
|||||||
|
from mmengine.config import read_base
|
||||||
|
|
||||||
|
with read_base():
|
||||||
|
from .aime2024_gen_6e39a4 import aime2024_datasets # noqa: F401, F403
|
39
opencompass/configs/datasets/aime2024/aime2024_gen_6e39a4.py
Normal file
39
opencompass/configs/datasets/aime2024/aime2024_gen_6e39a4.py
Normal file
@ -0,0 +1,39 @@
|
|||||||
|
from opencompass.openicl.icl_prompt_template import PromptTemplate
|
||||||
|
from opencompass.openicl.icl_retriever import ZeroRetriever
|
||||||
|
from opencompass.openicl.icl_inferencer import GenInferencer
|
||||||
|
from opencompass.datasets import Aime2024Dataset, MATHEvaluator, math_postprocess_v2
|
||||||
|
|
||||||
|
|
||||||
|
aime2024_reader_cfg = dict(
|
||||||
|
input_columns=['question'],
|
||||||
|
output_column='answer'
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
aime2024_infer_cfg = dict(
|
||||||
|
prompt_template=dict(
|
||||||
|
type=PromptTemplate,
|
||||||
|
template=dict(
|
||||||
|
round=[
|
||||||
|
dict(role='HUMAN', prompt='{question}\nPlease reason step by step, and put your final answer within \\boxed{}.'),
|
||||||
|
],
|
||||||
|
)
|
||||||
|
),
|
||||||
|
retriever=dict(type=ZeroRetriever),
|
||||||
|
inferencer=dict(type=GenInferencer, max_out_len=2048)
|
||||||
|
)
|
||||||
|
|
||||||
|
aime2024_eval_cfg = dict(
|
||||||
|
evaluator=dict(type=MATHEvaluator, version='v2'), pred_postprocessor=dict(type=math_postprocess_v2)
|
||||||
|
)
|
||||||
|
|
||||||
|
aime2024_datasets = [
|
||||||
|
dict(
|
||||||
|
abbr='aime2024',
|
||||||
|
type=Aime2024Dataset,
|
||||||
|
path='opencompass/aime2024',
|
||||||
|
reader_cfg=aime2024_reader_cfg,
|
||||||
|
infer_cfg=aime2024_infer_cfg,
|
||||||
|
eval_cfg=aime2024_eval_cfg
|
||||||
|
)
|
||||||
|
]
|
13
opencompass/configs/datasets/cmo_fib/README.md
Normal file
13
opencompass/configs/datasets/cmo_fib/README.md
Normal file
@ -0,0 +1,13 @@
|
|||||||
|
### Description
|
||||||
|
|
||||||
|
Math dataset composed of problems from CMO (Chinese Mathematical Olympiad) 2009-2022 .
|
||||||
|
|
||||||
|
### Performance
|
||||||
|
|
||||||
|
| Qwen2.5-Math-72B-Instruct | Qwen2.5-Math-7B-Instruct | Qwen2-Math-7B-Instruct | Qwen2-Math-1.5B-Instruct | internlm2-math-7b |
|
||||||
|
| ----------- | ----------- | ----------- | ----------- | ----------- |
|
||||||
|
| 46.15 | 42.79 | 31.73 | 23.56 | 3.37 |
|
||||||
|
|
||||||
|
| Qwen2.5-72B-Instruct | Qwen2.5-7B-Instruct | internlm2_5-7b-chat |
|
||||||
|
| ----------- | ----------- | ----------- |
|
||||||
|
| 20.00 | 16.67 | 6.67 |
|
4
opencompass/configs/datasets/cmo_fib/cmo_fib_gen.py
Normal file
4
opencompass/configs/datasets/cmo_fib/cmo_fib_gen.py
Normal file
@ -0,0 +1,4 @@
|
|||||||
|
from mmengine.config import read_base
|
||||||
|
|
||||||
|
with read_base():
|
||||||
|
from .cmo_fib_gen_ace24b import cmo_fib_datasets # noqa: F401, F403
|
39
opencompass/configs/datasets/cmo_fib/cmo_fib_gen_ace24b.py
Normal file
39
opencompass/configs/datasets/cmo_fib/cmo_fib_gen_ace24b.py
Normal file
@ -0,0 +1,39 @@
|
|||||||
|
from opencompass.openicl.icl_prompt_template import PromptTemplate
|
||||||
|
from opencompass.openicl.icl_retriever import ZeroRetriever
|
||||||
|
from opencompass.openicl.icl_inferencer import GenInferencer
|
||||||
|
from opencompass.datasets import CMOFibDataset, MATHEvaluator, math_postprocess_v2
|
||||||
|
|
||||||
|
|
||||||
|
cmo_fib_reader_cfg = dict(
|
||||||
|
input_columns=['question'],
|
||||||
|
output_column='answer'
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
cmo_fib_infer_cfg = dict(
|
||||||
|
prompt_template=dict(
|
||||||
|
type=PromptTemplate,
|
||||||
|
template=dict(
|
||||||
|
round=[
|
||||||
|
dict(role='HUMAN', prompt='{question}\n请一步一步地推理,并将最终答案写入\\boxed{}.'),
|
||||||
|
],
|
||||||
|
)
|
||||||
|
),
|
||||||
|
retriever=dict(type=ZeroRetriever),
|
||||||
|
inferencer=dict(type=GenInferencer, max_out_len=2048)
|
||||||
|
)
|
||||||
|
|
||||||
|
cmo_fib_eval_cfg = dict(
|
||||||
|
evaluator=dict(type=MATHEvaluator, version='v2'), pred_postprocessor=dict(type=math_postprocess_v2)
|
||||||
|
)
|
||||||
|
|
||||||
|
cmo_fib_datasets = [
|
||||||
|
dict(
|
||||||
|
abbr='cmo_fib',
|
||||||
|
type=CMOFibDataset,
|
||||||
|
path='opencompass/cmo_fib',
|
||||||
|
reader_cfg=cmo_fib_reader_cfg,
|
||||||
|
infer_cfg=cmo_fib_infer_cfg,
|
||||||
|
eval_cfg=cmo_fib_eval_cfg
|
||||||
|
)
|
||||||
|
]
|
@ -1,6 +1,7 @@
|
|||||||
from .advglue import * # noqa: F401, F403
|
from .advglue import * # noqa: F401, F403
|
||||||
from .afqmcd import * # noqa: F401, F403
|
from .afqmcd import * # noqa: F401, F403
|
||||||
from .agieval import * # noqa: F401, F403
|
from .agieval import * # noqa: F401, F403
|
||||||
|
from .aime2024 import * # noqa: F401, F403
|
||||||
from .anli import AnliDataset # noqa: F401, F403
|
from .anli import AnliDataset # noqa: F401, F403
|
||||||
from .anthropics_evals import * # noqa: F401, F403
|
from .anthropics_evals import * # noqa: F401, F403
|
||||||
from .apps import * # noqa: F401, F403
|
from .apps import * # noqa: F401, F403
|
||||||
@ -24,6 +25,7 @@ from .cluewsc import * # noqa: F401, F403
|
|||||||
from .cmb import * # noqa: F401, F403
|
from .cmb import * # noqa: F401, F403
|
||||||
from .cmmlu import * # noqa: F401, F403
|
from .cmmlu import * # noqa: F401, F403
|
||||||
from .cmnli import * # noqa: F401, F403
|
from .cmnli import * # noqa: F401, F403
|
||||||
|
from .cmo_fib import * # noqa: F401, F403
|
||||||
from .cmrc import * # noqa: F401, F403
|
from .cmrc import * # noqa: F401, F403
|
||||||
from .commonsenseqa import * # noqa: F401, F403
|
from .commonsenseqa import * # noqa: F401, F403
|
||||||
from .commonsenseqa_cn import * # noqa: F401, F403
|
from .commonsenseqa_cn import * # noqa: F401, F403
|
||||||
|
25
opencompass/datasets/aime2024.py
Normal file
25
opencompass/datasets/aime2024.py
Normal file
@ -0,0 +1,25 @@
|
|||||||
|
import json
|
||||||
|
|
||||||
|
from datasets import Dataset
|
||||||
|
|
||||||
|
from opencompass.registry import LOAD_DATASET
|
||||||
|
from opencompass.utils import get_data_path
|
||||||
|
|
||||||
|
from .base import BaseDataset
|
||||||
|
|
||||||
|
|
||||||
|
@LOAD_DATASET.register_module()
|
||||||
|
class Aime2024Dataset(BaseDataset):
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def load(path):
|
||||||
|
path = get_data_path(path)
|
||||||
|
dataset = []
|
||||||
|
with open(path, 'r') as f:
|
||||||
|
for line in f:
|
||||||
|
line = json.loads(line)
|
||||||
|
origin_prompt = line['origin_prompt']
|
||||||
|
line['question'] = origin_prompt[:]
|
||||||
|
line['answer'] = line['gold_answer']
|
||||||
|
dataset.append(line)
|
||||||
|
return Dataset.from_list(dataset)
|
25
opencompass/datasets/cmo_fib.py
Normal file
25
opencompass/datasets/cmo_fib.py
Normal file
@ -0,0 +1,25 @@
|
|||||||
|
import json
|
||||||
|
|
||||||
|
from datasets import Dataset
|
||||||
|
|
||||||
|
from opencompass.registry import LOAD_DATASET
|
||||||
|
from opencompass.utils import get_data_path
|
||||||
|
|
||||||
|
from .base import BaseDataset
|
||||||
|
|
||||||
|
|
||||||
|
@LOAD_DATASET.register_module()
|
||||||
|
class CMOFibDataset(BaseDataset):
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def load(path):
|
||||||
|
path = get_data_path(path)
|
||||||
|
dataset = []
|
||||||
|
with open(path, 'r') as f:
|
||||||
|
for line in f:
|
||||||
|
line = json.loads(line)
|
||||||
|
origin_prompt = line['origin_prompt']
|
||||||
|
line['question'] = origin_prompt[:]
|
||||||
|
line['answer'] = line['gold_answer']
|
||||||
|
dataset.append(line)
|
||||||
|
return Dataset.from_list(dataset)
|
@ -136,7 +136,7 @@ class LocalRunner(BaseRunner):
|
|||||||
task.run()
|
task.run()
|
||||||
else:
|
else:
|
||||||
tmp_logs = f'tmp/{os.getpid()}_debug.log'
|
tmp_logs = f'tmp/{os.getpid()}_debug.log'
|
||||||
get_logger().debug(
|
get_logger().warning(
|
||||||
f'Debug mode, log will be saved to {tmp_logs}')
|
f'Debug mode, log will be saved to {tmp_logs}')
|
||||||
with open(tmp_logs, 'a') as log_file:
|
with open(tmp_logs, 'a') as log_file:
|
||||||
subprocess.run(cmd,
|
subprocess.run(cmd,
|
||||||
|
@ -291,6 +291,16 @@ DATASETS_MAPPING = {
|
|||||||
"ms_id": "",
|
"ms_id": "",
|
||||||
"hf_id": "",
|
"hf_id": "",
|
||||||
"local": "./data/test_generation",
|
"local": "./data/test_generation",
|
||||||
|
},
|
||||||
|
"opencompass/aime2024": {
|
||||||
|
"ms_id": "",
|
||||||
|
"hf_id": "",
|
||||||
|
"local": "./data/aime.jsonl",
|
||||||
|
},
|
||||||
|
"opencompass/cmo_fib": {
|
||||||
|
"ms_id": "",
|
||||||
|
"hf_id": "",
|
||||||
|
"local": "./data/cmo.jsonl",
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -455,4 +465,12 @@ DATASETS_URL = {
|
|||||||
"url": "http://opencompass.oss-cn-shanghai.aliyuncs.com/datasets/data/test_generation.zip",
|
"url": "http://opencompass.oss-cn-shanghai.aliyuncs.com/datasets/data/test_generation.zip",
|
||||||
"md5": "918a6ea2b1eee6f2b1314db3c21cb4c7",
|
"md5": "918a6ea2b1eee6f2b1314db3c21cb4c7",
|
||||||
},
|
},
|
||||||
|
"/aime": {
|
||||||
|
"url": "http://opencompass.oss-cn-shanghai.aliyuncs.com/datasets/data/aime.zip",
|
||||||
|
"md5": "fbe2d0577fc210962a549f8cea1a00c8"
|
||||||
|
},
|
||||||
|
"/cmo": {
|
||||||
|
"url": "http://opencompass.oss-cn-shanghai.aliyuncs.com/datasets/data/cmo.zip",
|
||||||
|
"md5": "fad52c81290506a8ca74f46b5400d8fc"
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
Loading…
Reference in New Issue
Block a user