mirror of
https://github.com/open-compass/opencompass.git
synced 2025-05-30 16:03:24 +08:00
[Feature] Add ATC Choice Version (#1019)
* Squashed commit of the following: commit c48ad194c3976dc63d1b60d8c8ab2d5ff9e1cbfe Author: DseidLi <2568818204@qq.com> Date: Tue Apr 2 16:57:43 2024 +0800 add atc_choice commit 3ac6efea29619573e6fac8fa3cce464853dcead0 Merge:2d4e559
8e3a9c3 Author: DseidLi <2568818204@qq.com> Date: Tue Apr 2 16:41:38 2024 +0800 Merge branch 'atc_choice' into atc_add_choice commit 8e3a9c396a3e5546d3faf584183f6fd60b974d5e Merge: 150a0360a6a03f
Author: DseidLi <2568818204@qq.com> Date: Tue Mar 26 04:47:07 2024 +0800 Merge branch 'main' into atc_choice Conflicts: configs/summarizers/needlebench.py opencompass/datasets/needlebench/multi.py opencompass/datasets/needlebench/origin.py opencompass/datasets/needlebench/parallel.py commit 150a036d6d990f26a57c974d1af83d88c31a0f9d Merge: 8d6ac9a 940dd18 Author: DseidLi <2568818204@qq.com> Date: Wed Mar 20 03:49:08 2024 +0800 Merge branch 'needlebench_fix' into atc_choice commit 8d6ac9a1a43b1c9d0f0ea27e7d58968a203ea898 Author: DseidLi <2568818204@qq.com> Date: Wed Mar 20 03:41:49 2024 +0800 optimize needlebench code commit 940dd18a4270f24bc69edd2a780182c68918e1a9 Author: DseidLi <2568818204@qq.com> Date: Wed Mar 20 03:39:46 2024 +0800 fix vllm commit d8be6877bc41051f3edcc0421c462c834c0f1c9a Merge: ecad78a2527fda
Author: DseidLi <2568818204@qq.com> Date: Tue Mar 19 21:07:08 2024 +0800 Merge remote-tracking branch 'origin/add_1M_dataset' into atc_choice commit2527fda8a5
Author: DseidLi <2568818204@qq.com> Date: Tue Mar 19 16:03:40 2024 +0800 add model configs commit75425acdf8
Author: DseidLi <2568818204@qq.com> Date: Tue Mar 19 16:02:15 2024 +0800 add prompt postion args commit367ba1ba61
Author: DseidLi <2568818204@qq.com> Date: Wed Feb 28 21:40:00 2024 +0800 add Needlebench-1000K configs commit ecad78af14c4bb00fe325779114b384c57ab30bf Author: DseidLi <2568818204@qq.com> Date: Thu Mar 14 22:08:32 2024 +0800 fix atc commit 08772c0787b18872abadc9ffec3223941a5ee0c2 Merge: 9f3f8cfcaf1cf8
Author: DseidLi <2568818204@qq.com> Date: Thu Mar 14 22:07:28 2024 +0800 Merge branch 'main' into atc_choice Conflicts: configs/datasets/needlebench/readme.md configs/datasets/needlebench/readme_zh-CN.md configs/summarizers/needlebench.py opencompass/datasets/needlebench/atc.py opencompass/summarizers/needlebench.py commit 9f3f8cfb4452722734d334114ac1d14110e57406 Author: DseidLi <2568818204@qq.com> Date: Thu Mar 14 21:35:53 2024 +0800 add atc-choice test commit 52be7c1202376b4e09821188b826f1a805328129 Author: DseidLi <2568818204@qq.com> Date: Wed Mar 6 02:54:15 2024 +0800 update needlebench randomseed and add vllm qwen14b commit fc1effce596ae2e5ece4933e8cd34aef8e64a6f9 Merge: 4e747edcaf1cf8
Author: DseidLi <2568818204@qq.com> Date: Wed Mar 6 02:51:14 2024 +0800 Merge branch 'main' into add_model_configs commit 31834f9b23af3354ac3581ec86d693d0f05cdd1c Merge: 7dabc82120bf8b
Author: DseidLi <2568818204@qq.com> Date: Sun Mar 3 23:29:42 2024 +0800 Merge branch 'main' of https://github.com/open-compass/opencompass into atc_choice commit 4e747ed1988ddbcfcc7fff334601259ade72d363 Author: DseidLi <2568818204@qq.com> Date: Sun Mar 3 22:15:25 2024 +0800 add internlm2-lmdeploy model and gemma configs commit 7dabc828123d711c8cf834d6aab4137bb55e85ed Author: DseidLi <2568818204@qq.com> Date: Sat Mar 2 17:26:15 2024 +0800 add atc choice version -ZH commit996f8ae43d
Author: DseidLi <2568818204@qq.com> Date: Wed Feb 28 16:58:56 2024 +0800 update readme for needlebench commitf7266e873c
Author: DseidLi <2568818204@qq.com> Date: Wed Feb 28 16:44:53 2024 +0800 move readme.md commit1c7375681d
Author: DseidLi <2568818204@qq.com> Date: Wed Feb 28 16:38:31 2024 +0800 fix linting error commitb6524f3ebf
Author: DseidLi <2568818204@qq.com> Date: Wed Feb 28 16:33:51 2024 +0800 lint summarizer commitc0d1190e39
Author: DseidLi <2568818204@qq.com> Date: Wed Feb 28 16:29:03 2024 +0800 add needlebench intro, fix summarizer commit0965baf785
Author: DseidLi <2568818204@qq.com> Date: Mon Feb 26 13:31:26 2024 +0800 fix bug in needlebench summarizer commit5d32b31eb8
Author: DseidLi <2568818204@qq.com> Date: Sat Feb 24 03:19:08 2024 +0800 update act prompt commitaf82a7f085
Merge:32bf9fe
53fe788
Author: DseidLi <2568818204@qq.com> Date: Fri Feb 23 17:50:32 2024 +0800 Merge remote-tracking branch 'upstream/main' into needlebench commit32bf9fe802
Author: DseidLi <2568818204@qq.com> Date: Fri Feb 23 17:31:32 2024 +0800 simplify needlebench 32k, 128k, 200k for eval commita7cb025e05
Author: DseidLi <2568818204@qq.com> Date: Fri Feb 23 14:48:58 2024 +0800 add needlebench * fix summarizer * remove repeated code * remove chinese comments
This commit is contained in:
parent
b50d163265
commit
f2af49337d
134
configs/datasets/needlebench/atc/atc_choice.py
Normal file
134
configs/datasets/needlebench/atc/atc_choice.py
Normal file
@ -0,0 +1,134 @@
|
||||
from opencompass.openicl.icl_prompt_template import PromptTemplate
|
||||
from opencompass.openicl.icl_retriever import ZeroRetriever
|
||||
from opencompass.openicl.icl_inferencer import GenInferencer
|
||||
from opencompass.openicl.icl_evaluator import CircularEvaluator
|
||||
from opencompass.datasets.needlebench.atc_choice import NeedleBenchATCDataset
|
||||
from opencompass.utils.text_postprocessors import first_option_postprocess
|
||||
|
||||
# ----------------------- Prompt Content----------------------- #
|
||||
|
||||
few_shot_prompts = {
|
||||
'single_choice_prompts': {
|
||||
"single_choice_cn": [
|
||||
dict(role='HUMAN', prompt='问题: 毕东作为刘红的爸爸,对刘红的成长有重要影响。 \n在上面提供的打乱的家族关系文本中,"刘红"的能够向上追溯到的最年长的亲人是谁?\nA. 毕东\nB. 刘红\nC. 李清亮\nD. 王展\n'),
|
||||
dict(role='BOT', prompt='回答: A'),
|
||||
dict(role='HUMAN', prompt='问题: 梅建平作为黄玉兰的姥姥,对黄玉兰的成长有重要影响。梅建平的妈妈是朱丽丽。蓝鑫把黄玉兰视为其母亲。焦慧不仅是朱丽丽的妈妈,还是朱丽丽的榜样。 \n在上面提供的打乱的家族关系文本中,"蓝鑫"的能够向上追溯到的最年长的亲人是谁?\nA. 梅建平\nB. 朱丽丽\nC. 蓝鑫\nD. 焦慧\n'),
|
||||
dict(role='BOT', prompt='回答: D'),
|
||||
dict(role='HUMAN', prompt='问题: 毕东把柳金凤视为其姥姥。奉兵作为柳金凤的妈妈,对柳金凤的成长有重要影响。余萍把杨颖视为其爸爸。毕东在郭建华的生命中扮演着父亲的角色。常宁的外公是余萍。刘慧是郭建华所生的孩子。刘慧在杨颖的生命中扮演着外公的角色。 \n在上面提供的打乱的家族关系文本中,"常宁"的能够向上追溯到的最年长的亲人是谁?\nA. 柳金凤\nB. 毕东\nC. 奉兵\nD. 余萍\n'),
|
||||
dict(role='BOT', prompt='回答: C'),
|
||||
dict(role='HUMAN', prompt='问题: 魏丽丽在谢平的生命中扮演着奶奶的角色。郭兵是魏阳的姥姥。谢平是郑玉珍的外婆。丁颖把武波视为其外公。丁颖在李建国的生命中扮演着外婆的角色。武波的父亲是刘瑜。许玲把余桂芳视为其父亲。刘瑜把许玲视为其爷爷。李建国对郭兵来说,不只是一个爷爷,还是一个朋友。魏丽丽的外公是魏阳。 \n在上面提供的打乱的家族关系文本中,"郑玉珍"的能够向上追溯到的最年长的亲人是谁?\nA. 魏丽丽\nB. 刘瑜\nC. 李建国\nD. 余桂芳\n'),
|
||||
dict(role='BOT', prompt='回答: D'),
|
||||
dict(role='HUMAN', prompt='问题: {question}'),
|
||||
],
|
||||
"single_choice_en": [
|
||||
dict(role='HUMAN', prompt="Question: Jasmine Lane plays the role of James Hill's father in James Hill's life. \nGiven the scrambled family relationships described above, who is the eldest relative that 'James Hill' can trace back to in the context?\nA. Jasmine Lane\nB. James Hill\nC. Christopher Flores\nD. Paula Reeves\n"),
|
||||
dict(role='BOT', prompt='Answer: A'),
|
||||
dict(role='HUMAN', prompt="Question: For Michael Goodwin, Annette Valdez is not just a dad, but also a friend.For Annette Valdez, Mary Sanders is not just a father, but also a friend. \nGiven the scrambled family relationships described above, who is the eldest relative that 'Michael Goodwin' can trace back to in the context?\nA. Michael Goodwin\nB. Annette Valdez\nC. Mary Sanders\nD. James Mitchell\n"),
|
||||
dict(role='BOT', prompt='Answer: C'),
|
||||
dict(role='HUMAN', prompt="Question: Laura Garcia considers Mr. Stephen Valdez DDS as their mom.Mr. Stephen Valdez DDS's maternal grandfather is Elizabeth Smith.Laura Garcia is Ashley Johnson's mom. \nGiven the scrambled family relationships described above, who is the eldest relative that 'Ashley Johnson' can trace back to in the context?\nA. Ashley Johnson\nB. Mr. Stephen Valdez DDS\nC. Laura Garcia\nD. Elizabeth Smith\n"),
|
||||
dict(role='BOT', prompt='Answer: D'),
|
||||
dict(role='HUMAN', prompt="Question: Justin Sanchez's paternal grandfather is Laurie Bean.For Laurie Bean, Christina Kim is not just a mother, but also a friend.Jessica Bass, as Christina Kim's maternal grandfather, has a significant impact on Christina Kim's upbringing.Sarah Hampton considers Justin Sanchez as their father. \nGiven the scrambled family relationships described above, who is the eldest relative that 'Sarah Hampton' can trace back to in the context?\nA. Christina Kim\nB. Jessica Bass\nC. Laurie Bean\nD. Justin Sanchez\n"),
|
||||
dict(role='BOT', prompt='Answer: B'),
|
||||
dict(role='HUMAN', prompt='Question: {question}'),
|
||||
],
|
||||
"single_choice_cn_reasoning": [
|
||||
dict(role='HUMAN', prompt='问题: 毕东作为刘红的爸爸,对刘红的成长有重要影响。 \n在上面提供的打乱的家族关系文本中,"刘红"的能够向上追溯到的最年长的亲人是谁?\nA. 毕东\nB. 刘红\nC. 李清亮\nD. 王展\n'),
|
||||
dict(role='BOT', prompt='回答: 根据提供的信息,我们可以构建以下家族关系链:\n\n- 刘红的爸爸是毕东。\n\n因此,"刘红"能够向上追溯到的最年长的亲人是毕东。所以答案是:A'),
|
||||
|
||||
dict(role='HUMAN', prompt='问题: 梅建平作为黄玉兰的姥姥,对黄玉兰的成长有重要影响。梅建平的妈妈是朱丽丽。蓝鑫把黄玉兰视为其母亲。焦慧不仅是朱丽丽的妈妈,还是朱丽丽的榜样。 \n在上面提供的打乱的家族关系文本中,"蓝鑫"的能够向上追溯到的最年长的亲人是谁?\nA. 梅建平\nB. 朱丽丽\nC. 蓝鑫\nD. 焦慧\n'),
|
||||
dict(role='BOT', prompt='回答: 根据提供的信息,我们可以构建以下家族关系链:\n\n- 黄玉兰的姥姥是梅建平。\n- 梅建平的妈妈是朱丽丽。\n- 朱丽丽的妈妈是焦慧。\n- 蓝鑫把黄玉兰视为其母亲。\n\n因此,"蓝鑫"能够向上追溯到的最年长的亲人是焦慧。所以答案是:D'),
|
||||
|
||||
dict(role='HUMAN', prompt='问题: 毕东把柳金凤视为其姥姥。奉兵作为柳金凤的妈妈,对柳金凤的成长有重要影响。余萍把杨颖视为其爸爸。毕东在郭建华的生命中扮演着父亲的角色。常宁的外公是余萍。刘慧是郭建华所生的孩子。刘慧在杨颖的生命中扮演着外公的角色。 \n在上面提供的打乱的家族关系文本中,"常宁"的能够向上追溯到的最年长的亲人是谁?\nA. 柳金凤\nB. 毕东\nC. 奉兵\nD. 余萍\n'),
|
||||
dict(role='BOT', prompt='回答: 根据提供的信息,我们可以构建以下家族关系链:\n\n- 常宁的外公是余萍。\n- 余萍把杨颖视为其爸爸。\n- 杨颖的外公是刘慧。\n- 刘慧是郭建华所生的孩子。\n- 郭建华的父亲是毕东。\n- 毕东的姥姥是柳金凤。\n- 柳金凤的妈妈是奉兵。\n\n因此,"常宁"能够向上追溯到的最年长的亲人是奉兵。所以答案是:C'),
|
||||
|
||||
dict(role='HUMAN', prompt='问题: 魏丽丽在谢平的生命中扮演着奶奶的角色。郭兵是魏阳的姥姥。谢平是郑玉珍的外婆。丁颖把武波视为其外公。丁颖在李建国的生命中扮演着外婆的角色。武波的父亲是刘瑜。许玲把余桂芳视为其父亲。刘瑜把许玲视为其爷爷。李建国对郭兵来说,不只是一个爷爷,还是一个朋友。魏丽丽的外公是魏阳。 \n在上面提供的打乱的家族关系文本中,"郑玉珍"的能够向上追溯到的最年长的亲人是谁?\nA. 魏丽丽\nB. 刘瑜\nC. 李建国\nD. 余桂芳\n'),
|
||||
dict(role='BOT', prompt='回答: 根据提供的信息,我们可以构建以下家族关系链:\n\n- 郑玉珍的外婆是谢平。\n- 谢平的奶奶是魏丽丽。\n- 魏丽丽的外公是魏阳。 \n- 魏阳的姥姥是郭兵。\n- 郭兵的爷爷是李建国。 \n- 李建国的外婆是丁颖。 \n- 丁颖的外公是武波。 \n- 武波的父亲是刘瑜。\n- 刘瑜的爷爷是许玲。\n- 许玲的父亲是余桂芳。 \n\n因此,"郑玉珍"能够向上追溯到的最年长的亲人是余桂芳。所以答案是:D'),
|
||||
dict(role='HUMAN', prompt='问题: {question}'),
|
||||
],
|
||||
"single_choice_en_reasoning": [
|
||||
dict(role='HUMAN', prompt="Question: Sharon House, as Jessica Stewart's father, has a significant impact on Jessica Stewart's upbringing. \nGiven the scrambled family relationships described above, who is the eldest relative that 'Jessica Stewart' can trace back to in the context?\nA. Jack Burch\nB. Jessica Stewart\nC. Sharon House\nD. Carolyn Jackson\n"),
|
||||
dict(role='BOT', prompt="Answer: Based on the provided information, we can construct the following family relationship chain:\n\n- Sharon House, as Jessica Stewart's father, has a significant impact on Jessica Stewart's upbringing.\n\nTherefore, the eldest relative that 'Jessica Stewart' can trace back to in the context is Sharon House. The answer is: C"),
|
||||
dict(role='HUMAN', prompt="Question: For Robert Hill, Mikayla Scott is not just a paternal grandfather, but also a friend.Jacob Oconnor's paternal grandmother is Robert Hill. \nGiven the scrambled family relationships described above, who is the eldest relative that 'Jacob Oconnor' can trace back to in the context?\nA. Laura Holland\nB. Robert Hill\nC. Jacob Oconnor\nD. Mikayla Scott\n"),
|
||||
dict(role='BOT', prompt="Answer: Based on the provided information, we can construct the following family relationship chain:\n\n-Jacob Oconnor's paternal grandmother is Robert Hill. \n- For Robert Hill, Mikayla Scott is not just a paternal grandfather, but also a friend.\n\nTherefore, the eldest relative that 'Jacob Oconnor' can trace back to in the context is Mikayla Scott. The answer is: D"),
|
||||
dict(role='HUMAN', prompt="Question: Misty Moore plays the role of Barbara Fuentes's maternal grandfather in Barbara Fuentes's life.Jennifer Garcia, as Michael Martinez's grandmother, has a significant impact on Michael Martinez's upbringing.Michael Martinez is not only Misty Moore's father but also Misty Moore's role model. \nGiven the scrambled family relationships described above, who is the eldest relative that 'Barbara Fuentes' can trace back to in the context?\nA. Michael Martinez\nB. Jennifer Garcia\nC. Misty Moore\nD. Barbara Fuentes\n"),
|
||||
dict(role='BOT', prompt="Answer: Based on the provided information, we can construct the following family relationship chain:\n- Misty Moore plays the role of Barbara Fuentes's maternal grandfather in Barbara Fuentes's life. \n- Michael Martinez is not only Misty Moore's father but also Misty Moore's role model.\n- Jennifer Garcia, as Michael Martinez's grandmother, has a significant impact on Michael Martinez's upbringing.\n\nTherefore, the eldest relative that 'Barbara Fuentes' can trace back to in the context is Jennifer Garcia. The answer is: B"),
|
||||
dict(role='HUMAN', prompt="Question: Carlos Smith, as Mary Gay's grandfather, has a significant impact on Mary Gay's upbringing.Victor Dean considers Mary Gay as their grandfather.Marcus Miller, as Carlos Smith's paternal grandfather, has a significant impact on Carlos Smith's upbringing.Victor Dean is not only Danielle Yates's maternal grandmother but also Danielle Yates's role model.Danielle Yates is not only David Hernandez's paternal grandmother but also David Hernandez's role model.David Hernandez is Jennifer Williams's mom. \nGiven the scrambled family relationships described above, who is the eldest relative that 'Jennifer Williams' can trace back to in the context?\nA. Marcus Miller\nB. Carlos Smith\nC. Mary Gay\nD. Victor Dean\n"),
|
||||
dict(role='BOT', prompt="Answer: Based on the provided information, we can construct the following family relationship chain:\n\n- David Hernandez is Jennifer Williams's mom.\n- Danielle Yates is not only David Hernandez's paternal grandmother but also David Hernandez's role model.\n- Victor Dean is not only Danielle Yates's maternal grandmother but also Danielle Yates's role model.\n- Victor Dean considers Mary Gay as their grandfather. \n- Carlos Smith, as Mary Gay's grandfather, has a significant impact on Mary Gay's upbringing.\n- Marcus Miller, as Carlos Smith's paternal grandfather, has a significant impact on Carlos Smith's upbringing.\n\nTherefore, the eldest relative that 'Jennifer Williams' can trace back to in the context is Marcus Miller. The answer is: A"),
|
||||
dict(role='HUMAN', prompt='Question: {question}'),
|
||||
],
|
||||
},
|
||||
}
|
||||
|
||||
# ----------------------- Prompt Settings ----------------------- #
|
||||
needle_num_list = list(range(2, 20, 1))
|
||||
names_path = './data/needlebench/names.json'
|
||||
|
||||
repeats = 10
|
||||
|
||||
# Use Zero-Shot or not
|
||||
with_few_shot = True
|
||||
|
||||
# Max for this dataset is 4, should be set with `with_few_shot`
|
||||
few_shot_samples = 4
|
||||
|
||||
# Generate reasoning path or not, only for single choice
|
||||
with_reasoning = True
|
||||
|
||||
# Use circular evaluation or not
|
||||
with_circular_eval = True
|
||||
|
||||
needlebench_prompts = few_shot_prompts
|
||||
single_choice_prompts = needlebench_prompts['single_choice_prompts']
|
||||
|
||||
# Set few shot prompt number
|
||||
for _name in list(single_choice_prompts.keys()):
|
||||
if with_few_shot:
|
||||
assert few_shot_samples > 0 and few_shot_samples <= 4
|
||||
single_choice_prompts[_name] = \
|
||||
single_choice_prompts[_name][- few_shot_samples * 2 - 1:]
|
||||
|
||||
# ----------------------- Dataset Settings ----------------------- #
|
||||
|
||||
needlebench_datasets = []
|
||||
|
||||
|
||||
needlebench_atc_reader_cfg = dict(input_columns=["question"],
|
||||
output_column="answer")
|
||||
|
||||
for _name in list(single_choice_prompts.keys()):
|
||||
|
||||
needlebench_atc_infer_cfg = dict(
|
||||
prompt_template=dict(
|
||||
type=PromptTemplate,
|
||||
template=dict(
|
||||
round=(single_choice_prompts[_name])),
|
||||
),
|
||||
retriever=dict(type=ZeroRetriever),
|
||||
inferencer=dict(type=GenInferencer,),
|
||||
)
|
||||
|
||||
needlebench_atc_eval_cfg = dict(
|
||||
evaluator=dict(type=CircularEvaluator),
|
||||
pred_postprocessor=dict(type=first_option_postprocess, options='ABCD'))
|
||||
|
||||
current_needle_num_list = needle_num_list
|
||||
|
||||
for num_needles in current_needle_num_list:
|
||||
abbr = (f'NeedleBenchATCDataset-'
|
||||
f'{num_needles}Needle-{"EN" if "en" in _name else "ZH"}')
|
||||
language = "English" if "en" in _name else "Chinese"
|
||||
if 'reasoning' in _name:
|
||||
abbr += '-Reasoning'
|
||||
dataset_dict = {
|
||||
'abbr': abbr,
|
||||
'type': NeedleBenchATCDataset,
|
||||
'path': names_path,
|
||||
'num_needles': num_needles,
|
||||
'language': language,
|
||||
'repeats': repeats,
|
||||
'with_circular': with_circular_eval,
|
||||
'reader_cfg': needlebench_atc_reader_cfg,
|
||||
'infer_cfg': needlebench_atc_infer_cfg,
|
||||
'eval_cfg': needlebench_atc_eval_cfg
|
||||
}
|
||||
needlebench_datasets.append(dataset_dict)
|
133
configs/datasets/needlebench/atc/atc_choice_20.py
Normal file
133
configs/datasets/needlebench/atc/atc_choice_20.py
Normal file
@ -0,0 +1,133 @@
|
||||
from opencompass.openicl.icl_prompt_template import PromptTemplate
|
||||
from opencompass.openicl.icl_retriever import ZeroRetriever
|
||||
from opencompass.openicl.icl_inferencer import GenInferencer
|
||||
from opencompass.openicl.icl_evaluator import CircularEvaluator
|
||||
from opencompass.datasets.needlebench.atc_choice import NeedleBenchATCDataset
|
||||
from opencompass.utils.text_postprocessors import first_option_postprocess
|
||||
|
||||
# ----------------------- Prompt Content----------------------- #
|
||||
|
||||
few_shot_prompts = {
|
||||
'single_choice_prompts': {
|
||||
"single_choice_cn": [
|
||||
dict(role='HUMAN', prompt='问题: 毕东作为刘红的爸爸,对刘红的成长有重要影响。 \n在上面提供的打乱的家族关系文本中,"刘红"的能够向上追溯到的最年长的亲人是谁?\nA. 毕东\nB. 刘红\nC. 李清亮\nD. 王展\n'),
|
||||
dict(role='BOT', prompt='回答: A'),
|
||||
dict(role='HUMAN', prompt='问题: 梅建平作为黄玉兰的姥姥,对黄玉兰的成长有重要影响。梅建平的妈妈是朱丽丽。蓝鑫把黄玉兰视为其母亲。焦慧不仅是朱丽丽的妈妈,还是朱丽丽的榜样。 \n在上面提供的打乱的家族关系文本中,"蓝鑫"的能够向上追溯到的最年长的亲人是谁?\nA. 梅建平\nB. 朱丽丽\nC. 蓝鑫\nD. 焦慧\n'),
|
||||
dict(role='BOT', prompt='回答: D'),
|
||||
dict(role='HUMAN', prompt='问题: 毕东把柳金凤视为其姥姥。奉兵作为柳金凤的妈妈,对柳金凤的成长有重要影响。余萍把杨颖视为其爸爸。毕东在郭建华的生命中扮演着父亲的角色。常宁的外公是余萍。刘慧是郭建华所生的孩子。刘慧在杨颖的生命中扮演着外公的角色。 \n在上面提供的打乱的家族关系文本中,"常宁"的能够向上追溯到的最年长的亲人是谁?\nA. 柳金凤\nB. 毕东\nC. 奉兵\nD. 余萍\n'),
|
||||
dict(role='BOT', prompt='回答: C'),
|
||||
dict(role='HUMAN', prompt='问题: 魏丽丽在谢平的生命中扮演着奶奶的角色。郭兵是魏阳的姥姥。谢平是郑玉珍的外婆。丁颖把武波视为其外公。丁颖在李建国的生命中扮演着外婆的角色。武波的父亲是刘瑜。许玲把余桂芳视为其父亲。刘瑜把许玲视为其爷爷。李建国对郭兵来说,不只是一个爷爷,还是一个朋友。魏丽丽的外公是魏阳。 \n在上面提供的打乱的家族关系文本中,"郑玉珍"的能够向上追溯到的最年长的亲人是谁?\nA. 魏丽丽\nB. 刘瑜\nC. 李建国\nD. 余桂芳\n'),
|
||||
dict(role='BOT', prompt='回答: D'),
|
||||
dict(role='HUMAN', prompt='问题: {question}'),
|
||||
],
|
||||
"single_choice_en": [
|
||||
dict(role='HUMAN', prompt="Question: Jasmine Lane plays the role of James Hill's father in James Hill's life. \nGiven the scrambled family relationships described above, who is the eldest relative that 'James Hill' can trace back to in the context?\nA. Jasmine Lane\nB. James Hill\nC. Christopher Flores\nD. Paula Reeves\n"),
|
||||
dict(role='BOT', prompt='Answer: A'),
|
||||
dict(role='HUMAN', prompt="Question: For Michael Goodwin, Annette Valdez is not just a dad, but also a friend.For Annette Valdez, Mary Sanders is not just a father, but also a friend. \nGiven the scrambled family relationships described above, who is the eldest relative that 'Michael Goodwin' can trace back to in the context?\nA. Michael Goodwin\nB. Annette Valdez\nC. Mary Sanders\nD. James Mitchell\n"),
|
||||
dict(role='BOT', prompt='Answer: C'),
|
||||
dict(role='HUMAN', prompt="Question: Laura Garcia considers Mr. Stephen Valdez DDS as their mom.Mr. Stephen Valdez DDS's maternal grandfather is Elizabeth Smith.Laura Garcia is Ashley Johnson's mom. \nGiven the scrambled family relationships described above, who is the eldest relative that 'Ashley Johnson' can trace back to in the context?\nA. Ashley Johnson\nB. Mr. Stephen Valdez DDS\nC. Laura Garcia\nD. Elizabeth Smith\n"),
|
||||
dict(role='BOT', prompt='Answer: D'),
|
||||
dict(role='HUMAN', prompt="Question: Justin Sanchez's paternal grandfather is Laurie Bean.For Laurie Bean, Christina Kim is not just a mother, but also a friend.Jessica Bass, as Christina Kim's maternal grandfather, has a significant impact on Christina Kim's upbringing.Sarah Hampton considers Justin Sanchez as their father. \nGiven the scrambled family relationships described above, who is the eldest relative that 'Sarah Hampton' can trace back to in the context?\nA. Christina Kim\nB. Jessica Bass\nC. Laurie Bean\nD. Justin Sanchez\n"),
|
||||
dict(role='BOT', prompt='Answer: B'),
|
||||
dict(role='HUMAN', prompt='Question: {question}'),
|
||||
],
|
||||
"single_choice_cn_reasoning": [
|
||||
dict(role='HUMAN', prompt='问题: 毕东作为刘红的爸爸,对刘红的成长有重要影响。 \n在上面提供的打乱的家族关系文本中,"刘红"的能够向上追溯到的最年长的亲人是谁?\nA. 毕东\nB. 刘红\nC. 李清亮\nD. 王展\n'),
|
||||
dict(role='BOT', prompt='回答: 根据提供的信息,我们可以构建以下家族关系链:\n\n- 刘红的爸爸是毕东。\n\n因此,"刘红"能够向上追溯到的最年长的亲人是毕东。所以答案是:A'),
|
||||
|
||||
dict(role='HUMAN', prompt='问题: 梅建平作为黄玉兰的姥姥,对黄玉兰的成长有重要影响。梅建平的妈妈是朱丽丽。蓝鑫把黄玉兰视为其母亲。焦慧不仅是朱丽丽的妈妈,还是朱丽丽的榜样。 \n在上面提供的打乱的家族关系文本中,"蓝鑫"的能够向上追溯到的最年长的亲人是谁?\nA. 梅建平\nB. 朱丽丽\nC. 蓝鑫\nD. 焦慧\n'),
|
||||
dict(role='BOT', prompt='回答: 根据提供的信息,我们可以构建以下家族关系链:\n\n- 黄玉兰的姥姥是梅建平。\n- 梅建平的妈妈是朱丽丽。\n- 朱丽丽的妈妈是焦慧。\n- 蓝鑫把黄玉兰视为其母亲。\n\n因此,"蓝鑫"能够向上追溯到的最年长的亲人是焦慧。所以答案是:D'),
|
||||
|
||||
dict(role='HUMAN', prompt='问题: 毕东把柳金凤视为其姥姥。奉兵作为柳金凤的妈妈,对柳金凤的成长有重要影响。余萍把杨颖视为其爸爸。毕东在郭建华的生命中扮演着父亲的角色。常宁的外公是余萍。刘慧是郭建华所生的孩子。刘慧在杨颖的生命中扮演着外公的角色。 \n在上面提供的打乱的家族关系文本中,"常宁"的能够向上追溯到的最年长的亲人是谁?\nA. 柳金凤\nB. 毕东\nC. 奉兵\nD. 余萍\n'),
|
||||
dict(role='BOT', prompt='回答: 根据提供的信息,我们可以构建以下家族关系链:\n\n- 常宁的外公是余萍。\n- 余萍把杨颖视为其爸爸。\n- 杨颖的外公是刘慧。\n- 刘慧是郭建华所生的孩子。\n- 郭建华的父亲是毕东。\n- 毕东的姥姥是柳金凤。\n- 柳金凤的妈妈是奉兵。\n\n因此,"常宁"能够向上追溯到的最年长的亲人是奉兵。所以答案是:C'),
|
||||
|
||||
dict(role='HUMAN', prompt='问题: 魏丽丽在谢平的生命中扮演着w奶奶的角色。郭兵是魏阳的姥姥。谢平是郑玉珍的外婆。丁颖把武波视为其外公。丁颖在李建国的生命中扮演着外婆的角色。武波的父亲是刘瑜。许玲把余桂芳视为其父亲。刘瑜把许玲视为其爷爷。李建国对郭兵来说,不只是一个爷爷,还是一个朋友。魏丽丽的外公是魏阳。 \n在上面提供的打乱的家族关系文本中,"郑玉珍"的能够向上追溯到的最年长的亲人是谁?\nA. 魏丽丽\nB. 刘瑜\nC. 李建国\nD. 余桂芳\n'),
|
||||
dict(role='BOT', prompt='回答: 根据提供的信息,我们可以构建以下家族关系链:\n\n- 郑玉珍的外婆是谢平。\n- 谢平的奶奶是魏丽丽。\n- 魏丽丽的外公是魏阳。 \n- 魏阳的姥姥是郭兵。\n- 郭兵的爷爷是李建国。 \n- 李建国的外婆是丁颖。 \n- 丁颖的外公是武波。 \n- 武波的父亲是刘瑜。\n- 刘瑜的爷爷是许玲。\n- 许玲的父亲是余桂芳。 \n\n因此,"郑玉珍"能够向上追溯到的最年长的亲人是余桂芳。所以答案是:D'),
|
||||
dict(role='HUMAN', prompt='问题: {question}'),
|
||||
],
|
||||
"single_choice_en_reasoning": [
|
||||
dict(role='HUMAN', prompt="Question: Sharon House, as Jessica Stewart's father, has a significant impact on Jessica Stewart's upbringing. \nGiven the scrambled family relationships described above, who is the eldest relative that 'Jessica Stewart' can trace back to in the context?\nA. Jack Burch\nB. Jessica Stewart\nC. Sharon House\nD. Carolyn Jackson\n"),
|
||||
dict(role='BOT', prompt="Answer: Based on the provided information, we can construct the following family relationship chain:\n\n- Sharon House, as Jessica Stewart's father, has a significant impact on Jessica Stewart's upbringing.\n\nTherefore, the eldest relative that 'Jessica Stewart' can trace back to in the context is Sharon House. The answer is: C"),
|
||||
dict(role='HUMAN', prompt="Question: For Robert Hill, Mikayla Scott is not just a paternal grandfather, but also a friend.Jacob Oconnor's paternal grandmother is Robert Hill. \nGiven the scrambled family relationships described above, who is the eldest relative that 'Jacob Oconnor' can trace back to in the context?\nA. Laura Holland\nB. Robert Hill\nC. Jacob Oconnor\nD. Mikayla Scott\n"),
|
||||
dict(role='BOT', prompt="Answer: Based on the provided information, we can construct the following family relationship chain:\n\n-Jacob Oconnor's paternal grandmother is Robert Hill. \n- For Robert Hill, Mikayla Scott is not just a paternal grandfather, but also a friend.\n\nTherefore, the eldest relative that 'Jacob Oconnor' can trace back to in the context is Mikayla Scott. The answer is: D"),
|
||||
dict(role='HUMAN', prompt="Question: Misty Moore plays the role of Barbara Fuentes's maternal grandfather in Barbara Fuentes's life.Jennifer Garcia, as Michael Martinez's grandmother, has a significant impact on Michael Martinez's upbringing.Michael Martinez is not only Misty Moore's father but also Misty Moore's role model. \nGiven the scrambled family relationships described above, who is the eldest relative that 'Barbara Fuentes' can trace back to in the context?\nA. Michael Martinez\nB. Jennifer Garcia\nC. Misty Moore\nD. Barbara Fuentes\n"),
|
||||
dict(role='BOT', prompt="Answer: Based on the provided information, we can construct the following family relationship chain:\n- Misty Moore plays the role of Barbara Fuentes's maternal grandfather in Barbara Fuentes's life. \n- Michael Martinez is not only Misty Moore's father but also Misty Moore's role model.\n- Jennifer Garcia, as Michael Martinez's grandmother, has a significant impact on Michael Martinez's upbringing.\n\nTherefore, the eldest relative that 'Barbara Fuentes' can trace back to in the context is Jennifer Garcia. The answer is: B"),
|
||||
dict(role='HUMAN', prompt="Question: Carlos Smith, as Mary Gay's grandfather, has a significant impact on Mary Gay's upbringing.Victor Dean considers Mary Gay as their grandfather.Marcus Miller, as Carlos Smith's paternal grandfather, has a significant impact on Carlos Smith's upbringing.Victor Dean is not only Danielle Yates's maternal grandmother but also Danielle Yates's role model.Danielle Yates is not only David Hernandez's paternal grandmother but also David Hernandez's role model.David Hernandez is Jennifer Williams's mom. \nGiven the scrambled family relationships described above, who is the eldest relative that 'Jennifer Williams' can trace back to in the context?\nA. Marcus Miller\nB. Carlos Smith\nC. Mary Gay\nD. Victor Dean\n"),
|
||||
dict(role='BOT', prompt="Answer: Based on the provided information, we can construct the following family relationship chain:\n\n- David Hernandez is Jennifer Williams's mom.\n- Danielle Yates is not only David Hernandez's paternal grandmother but also David Hernandez's role model.\n- Victor Dean is not only Danielle Yates's maternal grandmother but also Danielle Yates's role model.\n- Victor Dean considers Mary Gay as their grandfather. \n- Carlos Smith, as Mary Gay's grandfather, has a significant impact on Mary Gay's upbringing.\n- Marcus Miller, as Carlos Smith's paternal grandfather, has a significant impact on Carlos Smith's upbringing.\n\nTherefore, the eldest relative that 'Jennifer Williams' can trace back to in the context is Marcus Miller. The answer is: A"),
|
||||
dict(role='HUMAN', prompt='Question: {question}'),
|
||||
],
|
||||
},
|
||||
}
|
||||
|
||||
# ----------------------- Prompt Settings ----------------------- #
|
||||
needle_num_list = list(range(2, 20, 1))
|
||||
names_path = './data/needlebench/names.json'
|
||||
|
||||
repeats = 10
|
||||
|
||||
# Use Zero-Shot or not
|
||||
with_few_shot = True
|
||||
|
||||
# Max for this dataset is 4, should be set with `with_few_shot`
|
||||
few_shot_samples = 4
|
||||
|
||||
# Generate reasoning path or not, only for single choice
|
||||
with_reasoning = True
|
||||
|
||||
# Use circular evaluation or not
|
||||
with_circular_eval = True
|
||||
|
||||
needlebench_prompts = few_shot_prompts
|
||||
single_choice_prompts = needlebench_prompts['single_choice_prompts']
|
||||
|
||||
# Set few shot prompt number
|
||||
for _name in list(single_choice_prompts.keys()):
|
||||
if with_few_shot:
|
||||
assert few_shot_samples > 0 and few_shot_samples <= 4
|
||||
single_choice_prompts[_name] = \
|
||||
single_choice_prompts[_name][- few_shot_samples * 2 - 1:]
|
||||
|
||||
# ----------------------- Dataset Settings ----------------------- #
|
||||
|
||||
needlebench_datasets = []
|
||||
|
||||
|
||||
needlebench_atc_reader_cfg = dict(input_columns=["question"],
|
||||
output_column="answer")
|
||||
|
||||
for _name in list(single_choice_prompts.keys()):
|
||||
|
||||
needlebench_atc_infer_cfg = dict(
|
||||
prompt_template=dict(
|
||||
type=PromptTemplate,
|
||||
template=dict(
|
||||
round=(single_choice_prompts[_name])),
|
||||
),
|
||||
retriever=dict(type=ZeroRetriever),
|
||||
inferencer=dict(type=GenInferencer,),
|
||||
)
|
||||
|
||||
needlebench_atc_eval_cfg = dict(
|
||||
evaluator=dict(type=CircularEvaluator),
|
||||
pred_postprocessor=dict(type=first_option_postprocess, options='ABCD'))
|
||||
|
||||
for num_needles in needle_num_list:
|
||||
abbr = (f'NeedleBenchATCDataset-'
|
||||
f'{num_needles}Needle-{"EN" if "en" in _name else "ZH"}')
|
||||
language = "English" if "en" in _name else "Chinese"
|
||||
if 'reasoning' in _name:
|
||||
abbr += '-Reasoning'
|
||||
dataset_dict = {
|
||||
'abbr': abbr,
|
||||
'type': NeedleBenchATCDataset,
|
||||
'path': names_path,
|
||||
'num_needles': num_needles,
|
||||
'language': language,
|
||||
'repeats': repeats,
|
||||
'with_circular': with_circular_eval,
|
||||
'reader_cfg': needlebench_atc_reader_cfg,
|
||||
'infer_cfg': needlebench_atc_infer_cfg,
|
||||
'eval_cfg': needlebench_atc_eval_cfg
|
||||
}
|
||||
needlebench_datasets.append(dataset_dict)
|
||||
|
@ -0,0 +1,96 @@
|
||||
from opencompass.openicl.icl_prompt_template import PromptTemplate
|
||||
from opencompass.openicl.icl_retriever import ZeroRetriever
|
||||
from opencompass.openicl.icl_inferencer import GenInferencer
|
||||
from opencompass.openicl.icl_evaluator import CircularEvaluator
|
||||
from opencompass.datasets.needlebench.atc_choice import NeedleBenchATCDataset
|
||||
from opencompass.utils.text_postprocessors import first_option_postprocess
|
||||
|
||||
# ----------------------- Prompt Content----------------------- #
|
||||
|
||||
few_shot_prompts = {
|
||||
'single_choice_prompts': {
|
||||
"single_choice_en_reasoning": [
|
||||
dict(role='HUMAN', prompt="Question: Sharon House, as Jessica Stewart's father, has a significant impact on Jessica Stewart's upbringing. \nGiven the scrambled family relationships described above, who is the eldest relative that 'Jessica Stewart' can trace back to in the context?\nA. Jack Burch\nB. Jessica Stewart\nC. Sharon House\nD. Carolyn Jackson\n"),
|
||||
dict(role='BOT', prompt="Answer: Based on the provided information, we can construct the following family relationship chain:\n\n- Sharon House, as Jessica Stewart's father, has a significant impact on Jessica Stewart's upbringing.\n\nTherefore, the eldest relative that 'Jessica Stewart' can trace back to in the context is Sharon House. The answer is: C"),
|
||||
dict(role='HUMAN', prompt="Question: For Robert Hill, Mikayla Scott is not just a paternal grandfather, but also a friend.Jacob Oconnor's paternal grandmother is Robert Hill. \nGiven the scrambled family relationships described above, who is the eldest relative that 'Jacob Oconnor' can trace back to in the context?\nA. Laura Holland\nB. Robert Hill\nC. Jacob Oconnor\nD. Mikayla Scott\n"),
|
||||
dict(role='BOT', prompt="Answer: Based on the provided information, we can construct the following family relationship chain:\n\n-Jacob Oconnor's paternal grandmother is Robert Hill. \n- For Robert Hill, Mikayla Scott is not just a paternal grandfather, but also a friend.\n\nTherefore, the eldest relative that 'Jacob Oconnor' can trace back to in the context is Mikayla Scott. The answer is: D"),
|
||||
dict(role='HUMAN', prompt="Question: Misty Moore plays the role of Barbara Fuentes's maternal grandfather in Barbara Fuentes's life.Jennifer Garcia, as Michael Martinez's grandmother, has a significant impact on Michael Martinez's upbringing.Michael Martinez is not only Misty Moore's father but also Misty Moore's role model. \nGiven the scrambled family relationships described above, who is the eldest relative that 'Barbara Fuentes' can trace back to in the context?\nA. Michael Martinez\nB. Jennifer Garcia\nC. Misty Moore\nD. Barbara Fuentes\n"),
|
||||
dict(role='BOT', prompt="Answer: Based on the provided information, we can construct the following family relationship chain:\n- Misty Moore plays the role of Barbara Fuentes's maternal grandfather in Barbara Fuentes's life. \n- Michael Martinez is not only Misty Moore's father but also Misty Moore's role model.\n- Jennifer Garcia, as Michael Martinez's grandmother, has a significant impact on Michael Martinez's upbringing.\n\nTherefore, the eldest relative that 'Barbara Fuentes' can trace back to in the context is Jennifer Garcia. The answer is: B"),
|
||||
dict(role='HUMAN', prompt="Question: Carlos Smith, as Mary Gay's grandfather, has a significant impact on Mary Gay's upbringing.Victor Dean considers Mary Gay as their grandfather.Marcus Miller, as Carlos Smith's paternal grandfather, has a significant impact on Carlos Smith's upbringing.Victor Dean is not only Danielle Yates's maternal grandmother but also Danielle Yates's role model.Danielle Yates is not only David Hernandez's paternal grandmother but also David Hernandez's role model.David Hernandez is Jennifer Williams's mom. \nGiven the scrambled family relationships described above, who is the eldest relative that 'Jennifer Williams' can trace back to in the context?\nA. Marcus Miller\nB. Carlos Smith\nC. Mary Gay\nD. Victor Dean\n"),
|
||||
dict(role='BOT', prompt="Answer: Based on the provided information, we can construct the following family relationship chain:\n\n- David Hernandez is Jennifer Williams's mom.\n- Danielle Yates is not only David Hernandez's paternal grandmother but also David Hernandez's role model.\n- Victor Dean is not only Danielle Yates's maternal grandmother but also Danielle Yates's role model.\n- Victor Dean considers Mary Gay as their grandfather. \n- Carlos Smith, as Mary Gay's grandfather, has a significant impact on Mary Gay's upbringing.\n- Marcus Miller, as Carlos Smith's paternal grandfather, has a significant impact on Carlos Smith's upbringing.\n\nTherefore, the eldest relative that 'Jennifer Williams' can trace back to in the context is Marcus Miller. The answer is: A"),
|
||||
dict(role='HUMAN', prompt='Question: {question}'),
|
||||
],
|
||||
},
|
||||
}
|
||||
|
||||
# ----------------------- Prompt Settings ----------------------- #
|
||||
needle_num_list = list(range(2, 50, 1))
|
||||
names_path = './data/needlebench/names.json'
|
||||
|
||||
repeats = 10
|
||||
|
||||
# Use Zero-Shot or not
|
||||
with_few_shot = True
|
||||
|
||||
# Max for this dataset is 4, should be set with `with_few_shot`
|
||||
few_shot_samples = 4
|
||||
|
||||
# Generate reasoning path or not, only for single choice
|
||||
with_reasoning = True
|
||||
|
||||
# Use circular evaluation or not
|
||||
with_circular_eval = True
|
||||
|
||||
needlebench_prompts = few_shot_prompts
|
||||
single_choice_prompts = needlebench_prompts['single_choice_prompts']
|
||||
|
||||
# Set few shot prompt number
|
||||
for _name in list(single_choice_prompts.keys()):
|
||||
if with_few_shot:
|
||||
assert few_shot_samples > 0 and few_shot_samples <= 4
|
||||
single_choice_prompts[_name] = \
|
||||
single_choice_prompts[_name][- few_shot_samples * 2 - 1:]
|
||||
|
||||
# ----------------------- Dataset Settings ----------------------- #
|
||||
|
||||
needlebench_datasets = []
|
||||
|
||||
|
||||
needlebench_atc_reader_cfg = dict(input_columns=["question"],
|
||||
output_column="answer")
|
||||
|
||||
for _name in list(single_choice_prompts.keys()):
|
||||
|
||||
needlebench_atc_infer_cfg = dict(
|
||||
prompt_template=dict(
|
||||
type=PromptTemplate,
|
||||
template=dict(
|
||||
round=(single_choice_prompts[_name])),
|
||||
),
|
||||
retriever=dict(type=ZeroRetriever),
|
||||
inferencer=dict(type=GenInferencer,),
|
||||
)
|
||||
|
||||
needlebench_atc_eval_cfg = dict(
|
||||
evaluator=dict(type=CircularEvaluator),
|
||||
pred_postprocessor=dict(type=first_option_postprocess, options='ABCD'))
|
||||
|
||||
for num_needles in needle_num_list:
|
||||
abbr = (f'NeedleBenchATCDataset-'
|
||||
f'{num_needles}Needle-{"EN" if "en" in _name else "ZH"}')
|
||||
language = "English" if "en" in _name else "Chinese"
|
||||
if 'reasoning' in _name:
|
||||
abbr += '-Reasoning'
|
||||
dataset_dict = {
|
||||
'abbr': abbr,
|
||||
'type': NeedleBenchATCDataset,
|
||||
'path': names_path,
|
||||
'num_needles': num_needles,
|
||||
'language': language,
|
||||
'repeats': repeats,
|
||||
'with_circular': with_circular_eval,
|
||||
'reader_cfg': needlebench_atc_reader_cfg,
|
||||
'infer_cfg': needlebench_atc_infer_cfg,
|
||||
'eval_cfg': needlebench_atc_eval_cfg
|
||||
}
|
||||
needlebench_datasets.append(dataset_dict)
|
@ -0,0 +1,96 @@
|
||||
from opencompass.openicl.icl_prompt_template import PromptTemplate
|
||||
from opencompass.openicl.icl_retriever import ZeroRetriever
|
||||
from opencompass.openicl.icl_inferencer import GenInferencer
|
||||
from opencompass.openicl.icl_evaluator import CircularEvaluator
|
||||
from opencompass.datasets.needlebench.atc_choice import NeedleBenchATCDataset
|
||||
from opencompass.utils.text_postprocessors import first_option_postprocess
|
||||
|
||||
# ----------------------- Prompt Content----------------------- #
|
||||
|
||||
few_shot_prompts = {
|
||||
'single_choice_prompts': {
|
||||
"single_choice_en_reasoning": [
|
||||
dict(role='HUMAN', prompt="Question: Sharon House, as Jessica Stewart's father, has a significant impact on Jessica Stewart's upbringing. \nGiven the scrambled family relationships described above, who is the eldest relative that 'Jessica Stewart' can trace back to in the context?\nA. Jack Burch\nB. Jessica Stewart\nC. Sharon House\nD. Carolyn Jackson\n"),
|
||||
dict(role='BOT', prompt="Answer: Based on the provided information, we can construct the following family relationship chain:\n\n- Sharon House, as Jessica Stewart's father, has a significant impact on Jessica Stewart's upbringing.\n\nTherefore, the eldest relative that 'Jessica Stewart' can trace back to in the context is Sharon House. The answer is: C"),
|
||||
dict(role='HUMAN', prompt="Question: For Robert Hill, Mikayla Scott is not just a paternal grandfather, but also a friend.Jacob Oconnor's paternal grandmother is Robert Hill. \nGiven the scrambled family relationships described above, who is the eldest relative that 'Jacob Oconnor' can trace back to in the context?\nA. Laura Holland\nB. Robert Hill\nC. Jacob Oconnor\nD. Mikayla Scott\n"),
|
||||
dict(role='BOT', prompt="Answer: Based on the provided information, we can construct the following family relationship chain:\n\n-Jacob Oconnor's paternal grandmother is Robert Hill. \n- For Robert Hill, Mikayla Scott is not just a paternal grandfather, but also a friend.\n\nTherefore, the eldest relative that 'Jacob Oconnor' can trace back to in the context is Mikayla Scott. The answer is: D"),
|
||||
dict(role='HUMAN', prompt="Question: Misty Moore plays the role of Barbara Fuentes's maternal grandfather in Barbara Fuentes's life.Jennifer Garcia, as Michael Martinez's grandmother, has a significant impact on Michael Martinez's upbringing.Michael Martinez is not only Misty Moore's father but also Misty Moore's role model. \nGiven the scrambled family relationships described above, who is the eldest relative that 'Barbara Fuentes' can trace back to in the context?\nA. Michael Martinez\nB. Jennifer Garcia\nC. Misty Moore\nD. Barbara Fuentes\n"),
|
||||
dict(role='BOT', prompt="Answer: Based on the provided information, we can construct the following family relationship chain:\n- Misty Moore plays the role of Barbara Fuentes's maternal grandfather in Barbara Fuentes's life. \n- Michael Martinez is not only Misty Moore's father but also Misty Moore's role model.\n- Jennifer Garcia, as Michael Martinez's grandmother, has a significant impact on Michael Martinez's upbringing.\n\nTherefore, the eldest relative that 'Barbara Fuentes' can trace back to in the context is Jennifer Garcia. The answer is: B"),
|
||||
dict(role='HUMAN', prompt="Question: Carlos Smith, as Mary Gay's grandfather, has a significant impact on Mary Gay's upbringing.Victor Dean considers Mary Gay as their grandfather.Marcus Miller, as Carlos Smith's paternal grandfather, has a significant impact on Carlos Smith's upbringing.Victor Dean is not only Danielle Yates's maternal grandmother but also Danielle Yates's role model.Danielle Yates is not only David Hernandez's paternal grandmother but also David Hernandez's role model.David Hernandez is Jennifer Williams's mom. \nGiven the scrambled family relationships described above, who is the eldest relative that 'Jennifer Williams' can trace back to in the context?\nA. Marcus Miller\nB. Carlos Smith\nC. Mary Gay\nD. Victor Dean\n"),
|
||||
dict(role='BOT', prompt="Answer: Based on the provided information, we can construct the following family relationship chain:\n\n- David Hernandez is Jennifer Williams's mom.\n- Danielle Yates is not only David Hernandez's paternal grandmother but also David Hernandez's role model.\n- Victor Dean is not only Danielle Yates's maternal grandmother but also Danielle Yates's role model.\n- Victor Dean considers Mary Gay as their grandfather. \n- Carlos Smith, as Mary Gay's grandfather, has a significant impact on Mary Gay's upbringing.\n- Marcus Miller, as Carlos Smith's paternal grandfather, has a significant impact on Carlos Smith's upbringing.\n\nTherefore, the eldest relative that 'Jennifer Williams' can trace back to in the context is Marcus Miller. The answer is: A"),
|
||||
dict(role='HUMAN', prompt='Question: {question}'),
|
||||
],
|
||||
},
|
||||
}
|
||||
|
||||
# ----------------------- Prompt Settings ----------------------- #
|
||||
needle_num_list = list(range(2, 80, 1))
|
||||
names_path = './data/needlebench/names.json'
|
||||
|
||||
repeats = 10
|
||||
|
||||
# Use Zero-Shot or not
|
||||
with_few_shot = True
|
||||
|
||||
# Max for this dataset is 4, should be set with `with_few_shot`
|
||||
few_shot_samples = 4
|
||||
|
||||
# Generate reasoning path or not, only for single choice
|
||||
with_reasoning = True
|
||||
|
||||
# Use circular evaluation or not
|
||||
with_circular_eval = True
|
||||
|
||||
needlebench_prompts = few_shot_prompts
|
||||
single_choice_prompts = needlebench_prompts['single_choice_prompts']
|
||||
|
||||
# Set few shot prompt number
|
||||
for _name in list(single_choice_prompts.keys()):
|
||||
if with_few_shot:
|
||||
assert few_shot_samples > 0 and few_shot_samples <= 4
|
||||
single_choice_prompts[_name] = \
|
||||
single_choice_prompts[_name][- few_shot_samples * 2 - 1:]
|
||||
|
||||
# ----------------------- Dataset Settings ----------------------- #
|
||||
|
||||
needlebench_datasets = []
|
||||
|
||||
|
||||
needlebench_atc_reader_cfg = dict(input_columns=["question"],
|
||||
output_column="answer")
|
||||
|
||||
for _name in list(single_choice_prompts.keys()):
|
||||
|
||||
needlebench_atc_infer_cfg = dict(
|
||||
prompt_template=dict(
|
||||
type=PromptTemplate,
|
||||
template=dict(
|
||||
round=(single_choice_prompts[_name])),
|
||||
),
|
||||
retriever=dict(type=ZeroRetriever),
|
||||
inferencer=dict(type=GenInferencer,),
|
||||
)
|
||||
|
||||
needlebench_atc_eval_cfg = dict(
|
||||
evaluator=dict(type=CircularEvaluator),
|
||||
pred_postprocessor=dict(type=first_option_postprocess, options='ABCD'))
|
||||
|
||||
for num_needles in needle_num_list:
|
||||
abbr = (f'NeedleBenchATCDataset-'
|
||||
f'{num_needles}Needle-{"EN" if "en" in _name else "ZH"}')
|
||||
language = "English" if "en" in _name else "Chinese"
|
||||
if 'reasoning' in _name:
|
||||
abbr += '-Reasoning'
|
||||
dataset_dict = {
|
||||
'abbr': abbr,
|
||||
'type': NeedleBenchATCDataset,
|
||||
'path': names_path,
|
||||
'num_needles': num_needles,
|
||||
'language': language,
|
||||
'repeats': repeats,
|
||||
'with_circular': with_circular_eval,
|
||||
'reader_cfg': needlebench_atc_reader_cfg,
|
||||
'infer_cfg': needlebench_atc_infer_cfg,
|
||||
'eval_cfg': needlebench_atc_eval_cfg
|
||||
}
|
||||
needlebench_datasets.append(dataset_dict)
|
26
configs/models/mistral/vllm_mistral_7b_instruct_v0_1.py
Normal file
26
configs/models/mistral/vllm_mistral_7b_instruct_v0_1.py
Normal file
@ -0,0 +1,26 @@
|
||||
from opencompass.models import VLLM
|
||||
|
||||
|
||||
_meta_template = dict(
|
||||
begin="<s>",
|
||||
round=[
|
||||
dict(role="HUMAN", begin='[INST]', end='[/INST]'),
|
||||
dict(role="BOT", begin="", end='</s>', generate=True),
|
||||
],
|
||||
eos_token_id=2
|
||||
)
|
||||
|
||||
models = [
|
||||
dict(
|
||||
type=VLLM,
|
||||
abbr='mistral-7b-instruct-v0.1-vllm',
|
||||
path='mistralai/Mistral-7B-Instruct-v0.1',
|
||||
meta_template=_meta_template,
|
||||
max_out_len=100,
|
||||
max_seq_len=2048,
|
||||
batch_size=32,
|
||||
generation_kwargs=dict(temperature=0),
|
||||
end_str='</s>',
|
||||
run_cfg=dict(num_gpus=1, num_procs=1),
|
||||
)
|
||||
]
|
@ -758,6 +758,60 @@ needlebench_8k_batch_depth0_summarizer = dict(
|
||||
],
|
||||
summary_groups=needlebench_summary_groups,
|
||||
)
|
||||
|
||||
needle_num_list = list(range(2, 20, 1))
|
||||
|
||||
categories = ['ZH', 'EN', 'ZH-Reasoning', 'EN-Reasoning', 'ZH-CircularEval', 'EN-CircularEval', 'ZH-Reasoning-Circular', 'EN-Reasoning-Circular']
|
||||
needlebench_atc_summary_groups = []
|
||||
|
||||
for category in categories:
|
||||
metric = 'perf_4' if 'CircularEval' in category else 'acc_1'
|
||||
cleaned_category = category.replace('-CircularEval', '').replace('-Circular', '')
|
||||
subsets = [f'NeedleBenchATCDataset-{num_needles}Needle-{cleaned_category}'
|
||||
for num_needles in needle_num_list]
|
||||
|
||||
needlebench_atc_summary_groups.append({
|
||||
'name': category,
|
||||
'subsets': [
|
||||
[f'NeedleBenchATCDataset-{num_needles}Needle-{cleaned_category}',
|
||||
metric]
|
||||
for num_needles in needle_num_list
|
||||
]
|
||||
})
|
||||
|
||||
atc_dataset_abbrs = []
|
||||
|
||||
for category in categories:
|
||||
title = f'######## Needlebench-ATC-{category}-Score ########'
|
||||
atc_dataset_abbrs.append(title)
|
||||
|
||||
weighted_average_score_entry = [f'{category}', 'weighted_average']
|
||||
atc_dataset_abbrs.append(weighted_average_score_entry)
|
||||
|
||||
if atc_dataset_abbrs[-1] == '------------------------------------------':
|
||||
atc_dataset_abbrs.pop()
|
||||
|
||||
needlebench_atc_summarizer = dict(
|
||||
type=NeedleBenchATCSummarizer,
|
||||
dataset_abbrs=[
|
||||
*atc_dataset_abbrs,
|
||||
'######## Needlebench-ATC Accuracy ########', # category
|
||||
*[[f'NeedleBenchATCDataset-{num_needles}Needle-ZH', 'acc_1'] for num_needles in needle_num_list],
|
||||
'------------------------------------------',
|
||||
*[[f'NeedleBenchATCDataset-{num_needles}Needle-EN', 'acc_1'] for num_needles in needle_num_list],
|
||||
'------------------------------------------',
|
||||
*[[f'NeedleBenchATCDataset-{num_needles}Needle-ZH-Reasoning', 'acc_1'] for num_needles in needle_num_list],
|
||||
'------------------------------------------',
|
||||
*[[f'NeedleBenchATCDataset-{num_needles}Needle-EN-Reasoning', 'acc_1'] for num_needles in needle_num_list],
|
||||
'------------------------------------------',
|
||||
'######## Needlebench-ATC CircularEval ########', # category
|
||||
*[[f'NeedleBenchATCDataset-{num_needles}Needle-ZH', 'perf_4'] for num_needles in needle_num_list],
|
||||
'------------------------------------------',
|
||||
*[[f'NeedleBenchATCDataset-{num_needles}Needle-EN', 'perf_4'] for num_needles in needle_num_list],
|
||||
'------------------------------------------',
|
||||
*[[f'NeedleBenchATCDataset-{num_needles}Needle-ZH-Reasoning', 'perf_4'] for num_needles in needle_num_list],
|
||||
'------------------------------------------',
|
||||
*[[f'NeedleBenchATCDataset-{num_needles}Needle-EN-Reasoning', 'perf_4'] for num_needles in needle_num_list],
|
||||
'------------------------------------------',
|
||||
],
|
||||
summary_groups=needlebench_atc_summary_groups
|
||||
)
|
||||
|
169
opencompass/datasets/needlebench/atc_choice.py
Normal file
169
opencompass/datasets/needlebench/atc_choice.py
Normal file
@ -0,0 +1,169 @@
|
||||
# flake8: noqa
|
||||
import copy
|
||||
import json
|
||||
import random
|
||||
|
||||
from datasets import Dataset
|
||||
|
||||
from opencompass.registry import LOAD_DATASET
|
||||
|
||||
from ..base import BaseDataset
|
||||
|
||||
|
||||
def get_number(options):
|
||||
result_string = ''
|
||||
for i, option in enumerate(options, start=ord('A')):
|
||||
result_string += f'{chr(i)}. {option}\n'
|
||||
return result_string
|
||||
|
||||
|
||||
def get_circular_example(entry, id):
|
||||
"""For given example, generate four circular examples."""
|
||||
# Only 4 options is supported for current circular eval.
|
||||
circular_patterns = ['ABCD', 'BCDA', 'CDAB', 'DABC']
|
||||
data = []
|
||||
for c in circular_patterns:
|
||||
line = copy.deepcopy(entry)
|
||||
options = []
|
||||
for i in range(4):
|
||||
options.append(line['options'][ord(c[i]) - ord('A')])
|
||||
line['options'] = options
|
||||
line['answer'] = {
|
||||
c[0]: 'A',
|
||||
c[1]: 'B',
|
||||
c[2]: 'C',
|
||||
c[3]: 'D'
|
||||
}[line['answer']]
|
||||
line['answer'] = str(id) + '--' + line['answer'] + '--' + c
|
||||
line['question'] = line['question'].strip() + '\n' + get_number(
|
||||
line['options'])
|
||||
data.append(line)
|
||||
|
||||
return data
|
||||
|
||||
|
||||
@LOAD_DATASET.register_module()
|
||||
class NeedleBenchATCDataset(BaseDataset):
|
||||
|
||||
@staticmethod
|
||||
def load(path: str,
|
||||
num_needles: int,
|
||||
language: str,
|
||||
repeats: int,
|
||||
with_circular: bool = True):
|
||||
"""NeedleBenthATC Dataset.
|
||||
|
||||
Args:
|
||||
path (str): Path of the needlebench dataset.
|
||||
name (str): Name of the target subset.
|
||||
with_circular (bool): Whether to create circular dataset for
|
||||
single choice question. Defaults to True.
|
||||
"""
|
||||
data = []
|
||||
entry = {}
|
||||
|
||||
with open(path, 'r', encoding='utf-8') as file:
|
||||
names_data = json.load(file)
|
||||
|
||||
all_names = names_data[language].split(',')
|
||||
|
||||
for id in range(repeats):
|
||||
random.seed(id)
|
||||
names = random.sample(all_names, num_needles)
|
||||
if language == 'Chinese':
|
||||
|
||||
relationship_terms = [
|
||||
'父亲', '母亲', '爸爸', '妈妈', '爷爷', '奶奶', '姥姥', '姥爷', '外公', '外婆'
|
||||
]
|
||||
|
||||
relationship_templates = [
|
||||
'{A}是{B}的{relationship}。',
|
||||
'{B}的{relationship}是{A}。',
|
||||
'{A}作为{B}的{relationship},对{B}的成长有重要影响。',
|
||||
'{A}不仅是{B}的{relationship},还是{B}的榜样。',
|
||||
'{B}是{A}所生的孩子。',
|
||||
'{A}对{B}来说,不只是一个{relationship},还是一个朋友。',
|
||||
'{A}在{B}的生命中扮演着{relationship}的角色。',
|
||||
'{B}把{A}视为其{relationship}。',
|
||||
]
|
||||
elif language == 'English':
|
||||
|
||||
relationship_terms = [
|
||||
'father', 'mother', 'dad', 'mom', 'grandfather',
|
||||
'grandmother', 'maternal grandmother',
|
||||
'maternal grandfather', 'paternal grandfather',
|
||||
'paternal grandmother'
|
||||
]
|
||||
|
||||
relationship_templates = [
|
||||
"{A} is {B}'s {relationship}.",
|
||||
"{B}'s {relationship} is {A}.",
|
||||
("{A}, as {B}'s {relationship}, "
|
||||
"has a significant impact on {B}'s upbringing."),
|
||||
("{A} is not only {B}'s {relationship} "
|
||||
"but also {B}'s role model."),
|
||||
'{B} is the child of {A}.',
|
||||
('For {B}, {A} is not just a {relationship}, '
|
||||
'but also a friend.'),
|
||||
("{A} plays the role of {B}'s {relationship} "
|
||||
"in {B}'s life."),
|
||||
'{B} considers {A} as their {relationship}.',
|
||||
]
|
||||
|
||||
def generate_chain_family_story(names, templates,
|
||||
relationship_terms):
|
||||
story = ''
|
||||
for i in range(len(names) - 1):
|
||||
template = random.choice(templates)
|
||||
relation_term = random.choice(relationship_terms)
|
||||
relation = template.format(A=names[i],
|
||||
B=names[i + 1],
|
||||
relationship=relation_term)
|
||||
story += f'{relation}*'
|
||||
return story
|
||||
|
||||
chain_story = generate_chain_family_story(names,
|
||||
relationship_templates,
|
||||
relationship_terms)
|
||||
|
||||
# Splitting the chain_story into a list of fragments
|
||||
family_story_fragments = chain_story.split('*')
|
||||
|
||||
# Shuffling the list of fragments
|
||||
random.shuffle(family_story_fragments)
|
||||
|
||||
# Joining the shuffled fragments back into a string
|
||||
shuffled_story = ''.join(family_story_fragments)
|
||||
|
||||
last_person = names[-1]
|
||||
|
||||
# Generating the prompt based on the language
|
||||
if language == 'Chinese':
|
||||
prompt = (f"""
|
||||
在上面提供的打乱的家族关系文本中,'{last_person}'的能够向上追溯到的最年长的亲人是谁?""")
|
||||
elif language == 'English':
|
||||
prompt = (f"""
|
||||
Given the scrambled family relationships described above, who is the eldest relative that '{last_person}' can trace back to in the context?"""
|
||||
)
|
||||
else:
|
||||
prompt = 'Language not supported.'
|
||||
raise Exception('Unsupported language specified. '
|
||||
"Please choose either 'Chinese' or 'English'.")
|
||||
|
||||
# Combine story and prompt
|
||||
shuffled_story_with_prompt = shuffled_story + ' ' + prompt
|
||||
|
||||
entry['question'] = shuffled_story_with_prompt
|
||||
if len(names) < 4:
|
||||
additional_names_needed = max(4 - len(names), 0)
|
||||
additional_names = random.sample(
|
||||
[name for name in all_names if name not in names],
|
||||
additional_names_needed)
|
||||
names.extend(additional_names)
|
||||
|
||||
entry['options'] = names[0:4]
|
||||
entry['answer'] = 'A'
|
||||
# print(entry)
|
||||
data.extend(get_circular_example(entry, id))
|
||||
dataset = Dataset.from_list(data)
|
||||
return dataset
|
@ -11,7 +11,7 @@ from opencompass.openicl import BaseEvaluator
|
||||
from opencompass.registry import LOAD_DATASET
|
||||
|
||||
|
||||
def get_random_needles(file_path, needle_count):
|
||||
def get_random_needles(counter, file_path, needle_count):
|
||||
with open(file_path, 'r', encoding='utf-8') as file:
|
||||
data = json.load(file)
|
||||
|
||||
@ -21,6 +21,7 @@ def get_random_needles(file_path, needle_count):
|
||||
]
|
||||
|
||||
if matching_records:
|
||||
random.seed(counter)
|
||||
random_record = random.choice(matching_records)
|
||||
return {
|
||||
'needles': random_record['derivations'],
|
||||
@ -144,7 +145,7 @@ class NeedleBenchMultiDataset(BaseDataset):
|
||||
'The document given to you by the user'
|
||||
f' is {context}\n\n')
|
||||
else:
|
||||
raise ValueError('Unsupported position. '
|
||||
raise ValueError(f'Unsupported position {position}. '
|
||||
'Position must be "End" or "Start".')
|
||||
else:
|
||||
raise ValueError(f"Language '{language}' is not supported.")
|
||||
@ -164,7 +165,7 @@ class NeedleBenchMultiDataset(BaseDataset):
|
||||
random.seed(counter)
|
||||
random.shuffle(lines)
|
||||
random_needle_data = get_random_needles(
|
||||
needle_file_path, num_needles)
|
||||
counter, needle_file_path, num_needles)
|
||||
needles = [
|
||||
'\n' + needle + '\n'
|
||||
for needle in random_needle_data['needles']
|
||||
|
@ -12,7 +12,7 @@ from opencompass.openicl import BaseEvaluator
|
||||
from opencompass.registry import LOAD_DATASET, TEXT_POSTPROCESSORS
|
||||
|
||||
|
||||
def get_random_line_by_language(file_path, language):
|
||||
def get_random_line_by_language(counter, file_path, language):
|
||||
with open(file_path, 'r', encoding='utf-8') as file:
|
||||
lines = [
|
||||
json.loads(line.strip()) for line in file
|
||||
@ -20,6 +20,7 @@ def get_random_line_by_language(file_path, language):
|
||||
]
|
||||
|
||||
if lines:
|
||||
random.seed(counter)
|
||||
random_line = random.choice(lines)
|
||||
return {
|
||||
'needle': random_line['needle'],
|
||||
@ -120,7 +121,7 @@ class NeedleBenchOriginDataset(BaseDataset):
|
||||
'The document given to you by the user'
|
||||
f' is {context}\n\n')
|
||||
else:
|
||||
raise ValueError('Unsupported position. '
|
||||
raise ValueError(f'Unsupported position {position}. '
|
||||
'Position must be "End" or "Start".')
|
||||
else:
|
||||
raise ValueError(f"Language '{language}' is not supported.")
|
||||
@ -140,7 +141,7 @@ class NeedleBenchOriginDataset(BaseDataset):
|
||||
random.shuffle(lines)
|
||||
needle_file_path = os.path.join(path, needle_file_name)
|
||||
random_needle = get_random_line_by_language(
|
||||
needle_file_path, language)
|
||||
counter, needle_file_path, language)
|
||||
needle = '\n' + random_needle['needle'] + '\n'
|
||||
retrieval_question = random_needle['retrieval_question']
|
||||
keyword = random_needle['keyword']
|
||||
|
@ -148,7 +148,7 @@ class NeedleBenchParallelDataset(BaseDataset):
|
||||
f'最后提出的问题\n现在请问:{retrieval_question}\n\n'
|
||||
f'用户现在给你的文档是{context}\n')
|
||||
else:
|
||||
raise ValueError('Unsupported position. '
|
||||
raise ValueError(f'Unsupported position {position}. '
|
||||
'Position must be "End" or "Start".')
|
||||
|
||||
elif language == 'English':
|
||||
@ -171,7 +171,7 @@ class NeedleBenchParallelDataset(BaseDataset):
|
||||
f'\nNow, the questions are: {retrieval_question}\n\n'
|
||||
f'The document given to you by the user is {context}')
|
||||
else:
|
||||
raise ValueError('Unsupported position. '
|
||||
raise ValueError(f'Unsupported position {position}. '
|
||||
'Position must be "End" or "Start".')
|
||||
else:
|
||||
raise ValueError(f"Language '{language}' is not supported.")
|
||||
@ -190,6 +190,7 @@ class NeedleBenchParallelDataset(BaseDataset):
|
||||
random.seed(counter)
|
||||
random.shuffle(lines)
|
||||
predefined_needles = predefined_needles_bak.copy()
|
||||
random.seed(counter)
|
||||
random.shuffle(predefined_needles)
|
||||
|
||||
needles = [
|
||||
|
Loading…
Reference in New Issue
Block a user