This commit is contained in:
leao1995 2025-05-29 14:37:25 +08:00 committed by GitHub
commit a08be10602
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
17 changed files with 713 additions and 0 deletions

View File

@ -0,0 +1,41 @@
from opencompass.openicl.icl_prompt_template import PromptTemplate
from opencompass.openicl.icl_retriever import ZeroRetriever
from opencompass.openicl.icl_inferencer import GenInferencer
from opencompass.openicl.icl_evaluator import AccEvaluator
from opencompass.datasets import CoinFlipDataset, coinflip_pred_postprocess
coinflip_reader_cfg = dict(
input_columns=['question'],
output_column='answer',
train_split='test',
test_split='test'
)
coinflip_infer_cfg = dict(
prompt_template=dict(
type=PromptTemplate,
template=dict(
round=[
dict(role='HUMAN', prompt='Question: {question}\nPlease reason step by step, and format your final answer as `The answer is [ANSWER]`, where [ANSWER] should be `yes` or `no`.\nAnswer:'),
]
)
),
retriever=dict(type=ZeroRetriever),
inferencer=dict(type=GenInferencer, max_out_len=512),
)
coinflip_eval_cfg = dict(
evaluator=dict(type=AccEvaluator),
pred_postprocessor=dict(type=coinflip_pred_postprocess),
)
coinflip_datasets = [
dict(
abbr='coinflip',
type=CoinFlipDataset,
path='coin_flip',
reader_cfg=coinflip_reader_cfg,
infer_cfg=coinflip_infer_cfg,
eval_cfg=coinflip_eval_cfg
)
]

View File

@ -0,0 +1,41 @@
from opencompass.openicl.icl_prompt_template import PromptTemplate
from opencompass.openicl.icl_retriever import ZeroRetriever
from opencompass.openicl.icl_inferencer import GenInferencer
from opencompass.openicl.icl_evaluator import AccEvaluator
from opencompass.datasets import CoinFlipDataset, coinflip_pred_postprocess
coinflip_reader_cfg = dict(
input_columns=['question'],
output_column='answer',
train_split='test',
test_split='test'
)
coinflip_infer_cfg = dict(
prompt_template=dict(
type=PromptTemplate,
template=dict(
round=[
dict(role='HUMAN', prompt='Question: {question}\nPlease respond `yes` or `no` directly without any additional explanations.\nAnswer:'),
]
)
),
retriever=dict(type=ZeroRetriever),
inferencer=dict(type=GenInferencer, max_out_len=512),
)
coinflip_eval_cfg = dict(
evaluator=dict(type=AccEvaluator),
pred_postprocessor=dict(type=coinflip_pred_postprocess),
)
coinflip_datasets = [
dict(
abbr='coinflip',
type=CoinFlipDataset,
path='coin_flip',
reader_cfg=coinflip_reader_cfg,
infer_cfg=coinflip_infer_cfg,
eval_cfg=coinflip_eval_cfg
)
]

View File

@ -0,0 +1,57 @@
from opencompass.openicl.icl_prompt_template import PromptTemplate
from opencompass.openicl.icl_retriever import ZeroRetriever
from opencompass.openicl.icl_inferencer import GenInferencer
from opencompass.openicl.icl_evaluator import AccEvaluator
from opencompass.datasets import CoinFlipDataset, coinflip_pred_postprocess
coinflip_reader_cfg = dict(
input_columns=['question'],
output_column='answer',
train_split='test',
test_split='test'
)
coinflip_infer_cfg = dict(
prompt_template=dict(
type=PromptTemplate,
template=dict(
round=[
dict(role='HUMAN', prompt='Question: A coin is heads up. Ka flips the coin. Sherrie flips the coin. Is the coin still heads up?\nAnswer:'),
dict(role='BOT', prompt='The coin was flipped by Ka and Sherrie. So the coin was flipped 2 times, which is an even number. The coin started heads up, so after an even number of flips, it will still be heads up.\nSo the answer is yes.\n'),
dict(role='HUMAN', prompt='Question: A coin is heads up. Jamey flips the coin. Teressa flips the coin. Is the coin still heads up?\nAnswer:'),
dict(role='BOT', prompt='The coin was flipped by Jamey and Teressa. So the coin was flipped 2 times, which is an even number. The coin started heads up, so after an even number of flips, it will still be heads up.\nSo the answer is yes.\n'),
dict(role='HUMAN', prompt='Question: A coin is heads up. Maybelle flips the coin. Shalonda does not flip the coin. Is the coin still heads up?\nAnswer:'),
dict(role='BOT', prompt='The coin was flipped by Maybelle. So the coin was flipped 1 time, which is an odd number. The coin started heads up, so after an odd number of flips, it will be tails up.\nSo the answer is no.\n'),
dict(role='HUMAN', prompt='Question: A coin is heads up. Millicent does not flip the coin. Conception flips the coin. Is the coin still heads up?\nAnswer:'),
dict(role='BOT', prompt='The coin was flipped by Conception. So the coin was flipped 1 time, which is an odd number. The coin started heads up, so after an odd number of flips, it will be tails up.\nSo the answer is no.\n'),
dict(role='HUMAN', prompt='Question: A coin is heads up. Sal flips the coin. Raymond does not flip the coin. Is the coin still heads up?\nAnswer:'),
dict(role='BOT', prompt='The coin was flipped by Sal. So the coin was flipped 1 time, which is an odd number. The coin started heads up, so after an odd number of flips, it will be tails up.\nSo the answer is no.\n'),
dict(role='HUMAN', prompt='Question: A coin is heads up. Conception flips the coin. Kristian does not flip the coin. Is the coin still heads up?\nAnswer:'),
dict(role='BOT', prompt='The coin was flipped by Conception. So the coin was flipped 1 time, which is an odd number. The coin started heads up, so after an odd number of flips, it will be tails up.\nSo the answer is no.\n'),
dict(role='HUMAN', prompt='Question: A coin is heads up. Inga does not flip the coin. Elanor does not flip the coin. Is the coin still heads up?\nAnswer:'),
dict(role='BOT', prompt='The coin was flipped by no one. So the coin was flipped 0 times. The coin started heads up, and it was not flipped, so it is still heads up.\nSo the answer is yes.\n'),
dict(role='HUMAN', prompt='Question: A coin is heads up. Ryan flips the coin. Shaunda flips the coin. Is the coin still heads up?\nAnswer:'),
dict(role='BOT', prompt='The coin was flipped by Ryan and Shaunda. So the coin was flipped 2 times, which is an even number. The coin started heads up, so after an even number of flips, it will still be heads up.\nSo the answer is yes.\n'),
dict(role='HUMAN', prompt='Question: {question}\nAnswer:'),
]
)
),
retriever=dict(type=ZeroRetriever),
inferencer=dict(type=GenInferencer, max_out_len=512),
)
coinflip_eval_cfg = dict(
evaluator=dict(type=AccEvaluator),
pred_postprocessor=dict(type=coinflip_pred_postprocess),
)
coinflip_datasets = [
dict(
abbr='coinflip',
type=CoinFlipDataset,
path='coin_flip',
reader_cfg=coinflip_reader_cfg,
infer_cfg=coinflip_infer_cfg,
eval_cfg=coinflip_eval_cfg
)
]

View File

@ -0,0 +1,57 @@
from opencompass.openicl.icl_prompt_template import PromptTemplate
from opencompass.openicl.icl_retriever import ZeroRetriever
from opencompass.openicl.icl_inferencer import GenInferencer
from opencompass.openicl.icl_evaluator import AccEvaluator
from opencompass.datasets import CoinFlipDataset, coinflip_pred_postprocess
coinflip_reader_cfg = dict(
input_columns=['question'],
output_column='answer',
train_split='test',
test_split='test'
)
coinflip_infer_cfg = dict(
prompt_template=dict(
type=PromptTemplate,
template=dict(
round=[
dict(role='HUMAN', prompt='Question: A coin is heads up. Ka flips the coin. Sherrie flips the coin. Is the coin still heads up?\nPlease answer directly without additional reasoning steps.\nAnswer:'),
dict(role='BOT', prompt='The answer is yes.\n'),
dict(role='HUMAN', prompt='Question: A coin is heads up. Jamey flips the coin. Teressa flips the coin. Is the coin still heads up?\nPlease answer directly without additional reasoning steps.\nAnswer:'),
dict(role='BOT', prompt='The answer is yes.\n'),
dict(role='HUMAN', prompt='Question: A coin is heads up. Maybelle flips the coin. Shalonda does not flip the coin. Is the coin still heads up?\nPlease answer directly without additional reasoning steps.\nAnswer:'),
dict(role='BOT', prompt='The answer is no.\n'),
dict(role='HUMAN', prompt='Question: A coin is heads up. Millicent does not flip the coin. Conception flips the coin. Is the coin still heads up?\nPlease answer directly without additional reasoning steps.\nAnswer:'),
dict(role='BOT', prompt='The answer is no.\n'),
dict(role='HUMAN', prompt='Question: A coin is heads up. Sal flips the coin. Raymond does not flip the coin. Is the coin still heads up?\nPlease answer directly without additional reasoning steps.\nAnswer:'),
dict(role='BOT', prompt='The answer is no.\n'),
dict(role='HUMAN', prompt='Question: A coin is heads up. Conception flips the coin. Kristian does not flip the coin. Is the coin still heads up?\nPlease answer directly without additional reasoning steps.\nAnswer:'),
dict(role='BOT', prompt='The answer is no.\n'),
dict(role='HUMAN', prompt='Question: A coin is heads up. Inga does not flip the coin. Elanor does not flip the coin. Is the coin still heads up?\nPlease answer directly without additional reasoning steps.\nAnswer:'),
dict(role='BOT', prompt='The answer is yes.\n'),
dict(role='HUMAN', prompt='Question: A coin is heads up. Ryan flips the coin. Shaunda flips the coin. Is the coin still heads up?\nPlease answer directly without additional reasoning steps.\nAnswer:'),
dict(role='BOT', prompt='The answer is yes.\n'),
dict(role='HUMAN', prompt='Question: {question}\nPlease answer directly without additional reasoning steps.\nAnswer:'),
]
)
),
retriever=dict(type=ZeroRetriever),
inferencer=dict(type=GenInferencer, max_out_len=512),
)
coinflip_eval_cfg = dict(
evaluator=dict(type=AccEvaluator),
pred_postprocessor=dict(type=coinflip_pred_postprocess),
)
coinflip_datasets = [
dict(
abbr='coinflip',
type=CoinFlipDataset,
path='coin_flip',
reader_cfg=coinflip_reader_cfg,
infer_cfg=coinflip_infer_cfg,
eval_cfg=coinflip_eval_cfg
)
]

View File

@ -0,0 +1,47 @@
from opencompass.openicl.icl_prompt_template import PromptTemplate
from opencompass.openicl.icl_retriever import ZeroRetriever
from opencompass.openicl.icl_inferencer import GenInferencer
from opencompass.datasets import GPQADataset, GPQAEvaluator
from opencompass.utils import first_option_postprocess
gpqa_reader_cfg = dict(
input_columns=['question', 'A', 'B', 'C', 'D'],
output_column='answer')
gpqa_infer_cfg = dict(
prompt_template=dict(
type=PromptTemplate,
template=dict(
round=[
dict(role='HUMAN', prompt='What is the correct answer to this question: {question}\nChoices:\n'
'(A){A}\n'
'(B){B}\n'
'(C){C}\n'
'(D){D}\n'
'Please provide your answer directly without any additional reasoning steps or explanations. '
'Format your response as follows: "The correct answer is (insert answer here)"'),
], )),
retriever=dict(type=ZeroRetriever),
inferencer=dict(type=GenInferencer))
gpqa_eval_cfg = dict(evaluator=dict(type=GPQAEvaluator),
pred_postprocessor=dict(type=first_option_postprocess, options='ABCD'))
gpqa_datasets = []
gpqa_subsets = {
'extended': 'gpqa_extended.csv',
'main': 'gpqa_main.csv',
'diamond': 'gpqa_diamond.csv'
}
for split in list(gpqa_subsets.keys()):
gpqa_datasets.append(
dict(
abbr='GPQA_' + split,
type=GPQADataset,
path='./data/gpqa/',
name=gpqa_subsets[split],
reader_cfg=gpqa_reader_cfg,
infer_cfg=gpqa_infer_cfg,
eval_cfg=gpqa_eval_cfg)
)

View File

@ -0,0 +1,37 @@
from opencompass.openicl.icl_prompt_template import PromptTemplate
from opencompass.openicl.icl_retriever import ZeroRetriever
from opencompass.openicl.icl_inferencer import GenInferencer
from opencompass.datasets import GSM8KDataset, gsm8k_postprocess, gsm8k_dataset_postprocess, Gsm8kEvaluator
from opencompass.datasets import MATHEvaluator, math_postprocess_v2
gsm8k_reader_cfg = dict(input_columns=['question'], output_column='answer')
gsm8k_infer_cfg = dict(
prompt_template=dict(
type=PromptTemplate,
template=dict(
round=[
dict(role='HUMAN', prompt='{question}\nPlease provide only the final answer, without including any intermediate reasoning steps, and put your final answer within \\boxed{}.'),
],
),
),
retriever=dict(type=ZeroRetriever),
inferencer=dict(type=GenInferencer, max_out_len=512),
)
gsm8k_eval_cfg = dict(
evaluator=dict(type=MATHEvaluator, version='v2'),
pred_postprocessor=dict(type=math_postprocess_v2),
dataset_postprocessor=dict(type=gsm8k_dataset_postprocess),
)
gsm8k_datasets = [
dict(
abbr='gsm8k',
type=GSM8KDataset,
path='opencompass/gsm8k',
reader_cfg=gsm8k_reader_cfg,
infer_cfg=gsm8k_infer_cfg,
eval_cfg=gsm8k_eval_cfg,
)
]

View File

@ -0,0 +1,40 @@
from opencompass.openicl.icl_prompt_template import PromptTemplate
from opencompass.openicl.icl_retriever import ZeroRetriever
from opencompass.openicl.icl_inferencer import GenInferencer
from opencompass.datasets import GSM8KDataset, gsm8k_postprocess, gsm8k_dataset_postprocess, Gsm8kEvaluator
gsm8k_reader_cfg = dict(input_columns=['question'], output_column='answer')
gsm8k_infer_cfg = dict(
prompt_template=dict(
type=PromptTemplate,
template=dict(
round=[
dict(role='HUMAN', prompt="Question: Angelo and Melanie want to plan how many hours over the next week they should study together for their test next week. They have 2 chapters of their textbook to study and 4 worksheets to memorize. They figure out that they should dedicate 3 hours to each chapter of their textbook and 1.5 hours for each worksheet. If they plan to study no more than 4 hours each day, how many days should they plan to study total over the next week if they take a 10-minute break every hour, include 3 10-minute snack breaks each day, and 30 minutes for lunch each day?\nPlease provide your answer directly without additional reasoning steps. Format your answer as 'The answer is (insert answer here)'\nAnswer:"),
dict(role='BOT', prompt='The answer is 4\n'),
dict(role='HUMAN', prompt="Question: Mark's basketball team scores 25 2 pointers, 8 3 pointers and 10 free throws. Their opponents score double the 2 pointers but half the 3 pointers and free throws. What's the total number of points scored by both teams added together?\nPlease provide your answer directly without additional reasoning steps. Format your answer as 'The answer is (insert answer here)'\nAnswer:"),
dict(role='BOT', prompt="The answer is 201\n"),
dict(role='HUMAN', prompt="Question: Bella has two times as many marbles as frisbees. She also has 20 more frisbees than deck cards. If she buys 2/5 times more of each item, what would be the total number of the items she will have if she currently has 60 marbles?\nPlease provide your answer directly without additional reasoning steps. Format your answer as 'The answer is (insert answer here)'\nAnswer:"),
dict(role='BOT', prompt="The answer is 140\n"),
dict(role='HUMAN', prompt="Question: A group of 4 fruit baskets contains 9 apples, 15 oranges, and 14 bananas in the first three baskets and 2 less of each fruit in the fourth basket. How many fruits are there?\nPlease provide your answer directly without additional reasoning steps. Format your answer as 'The answer is (insert answer here)'\nAnswer:"),
dict(role='BOT', prompt='The answer is 146\n'),
dict(role='HUMAN', prompt="Question: {question}\nPlease provide your answer directly without additional reasoning steps. Format your answer as 'The answer is (insert answer here)'\nAnswer:"),
],
)),
retriever=dict(type=ZeroRetriever),
inferencer=dict(type=GenInferencer, max_out_len=512))
gsm8k_eval_cfg = dict(evaluator=dict(type=Gsm8kEvaluator),
pred_postprocessor=dict(type=gsm8k_postprocess),
dataset_postprocessor=dict(type=gsm8k_dataset_postprocess))
gsm8k_datasets = [
dict(
abbr='gsm8k',
type=GSM8KDataset,
path='opencompass/gsm8k',
reader_cfg=gsm8k_reader_cfg,
infer_cfg=gsm8k_infer_cfg,
eval_cfg=gsm8k_eval_cfg)
]

View File

@ -0,0 +1,41 @@
from opencompass.openicl.icl_prompt_template import PromptTemplate
from opencompass.openicl.icl_retriever import ZeroRetriever
from opencompass.openicl.icl_inferencer import GenInferencer
from opencompass.openicl.icl_evaluator import AccEvaluator
from opencompass.datasets import LastLettersDataset, last_letters_pred_postprocess
last_letters_reader_cfg = dict(
input_columns=['question'],
output_column='answer',
train_split='test',
test_split='test'
)
last_letters_infer_cfg = dict(
prompt_template=dict(
type=PromptTemplate,
template=dict(
round=[
dict(role='HUMAN', prompt='Question: {question}\nPlease reason step by step, and format your final answer as `The answer is [ANSWER]`.\nAnswer:'),
]
)
),
retriever=dict(type=ZeroRetriever),
inferencer=dict(type=GenInferencer, max_out_len=512),
)
last_letters_eval_cfg = dict(
evaluator=dict(type=AccEvaluator),
pred_postprocessor=dict(type=last_letters_pred_postprocess),
)
last_letters_datasets = [
dict(
abbr='last_letters',
type=LastLettersDataset,
path='last_letters',
reader_cfg=last_letters_reader_cfg,
infer_cfg=last_letters_infer_cfg,
eval_cfg=last_letters_eval_cfg
)
]

View File

@ -0,0 +1,41 @@
from opencompass.openicl.icl_prompt_template import PromptTemplate
from opencompass.openicl.icl_retriever import ZeroRetriever
from opencompass.openicl.icl_inferencer import GenInferencer
from opencompass.openicl.icl_evaluator import AccEvaluator
from opencompass.datasets import LastLettersDataset, last_letters_pred_postprocess
last_letters_reader_cfg = dict(
input_columns=['question'],
output_column='answer',
train_split='test',
test_split='test'
)
last_letters_infer_cfg = dict(
prompt_template=dict(
type=PromptTemplate,
template=dict(
round=[
dict(role='HUMAN', prompt='Question: {question}\nPlease respond directly without any additional explanations, and format your final answer as `The answer is [ANSWER]`.\nAnswer:'),
]
)
),
retriever=dict(type=ZeroRetriever),
inferencer=dict(type=GenInferencer, max_out_len=512),
)
last_letters_eval_cfg = dict(
evaluator=dict(type=AccEvaluator),
pred_postprocessor=dict(type=last_letters_pred_postprocess),
)
last_letters_datasets = [
dict(
abbr='last_letters',
type=LastLettersDataset,
path='last_letters',
reader_cfg=last_letters_reader_cfg,
infer_cfg=last_letters_infer_cfg,
eval_cfg=last_letters_eval_cfg
)
]

View File

@ -0,0 +1,49 @@
from opencompass.openicl.icl_prompt_template import PromptTemplate
from opencompass.openicl.icl_retriever import ZeroRetriever
from opencompass.openicl.icl_inferencer import GenInferencer
from opencompass.openicl.icl_evaluator import AccEvaluator
from opencompass.datasets import LastLettersDataset, last_letters_pred_postprocess
last_letters_reader_cfg = dict(
input_columns=['question'],
output_column='answer',
train_split='test',
test_split='test'
)
last_letters_infer_cfg = dict(
prompt_template=dict(
type=PromptTemplate,
template=dict(
round=[
dict(role='HUMAN', prompt='Question: Take the last letters of the words in "Elon Musk" and concatenate them.\nAnswer:'),
dict(role='BOT', prompt='The last letter of "Elon" is "n". The last letter of "Musk" is "k". Concatenating them is "nk".\nSo the answer is nk.\n'),
dict(role='HUMAN', prompt='Question: Take the last letters of the words in "Larry Page" and concatenate them.\nAnswer:'),
dict(role='BOT', prompt='The last letter of "Larry" is "y". The last letter of "Page" is "e". Concatenating them is "ye".\nSo the answer is ye.\n'),
dict(role='HUMAN', prompt='Question: Take the last letters of the words in "Sergey Brin" and concatenate them.\nAnswer:'),
dict(role='BOT', prompt='The last letter of "Sergey" is "y". The last letter of "Brin" is "n". Concatenating them is "yn".\nSo the answer is yn.\n'),
dict(role='HUMAN', prompt='Question: Take the last letters of the words in "Bill Gates" and concatenate them.\nAnswer:'),
dict(role='BOT', prompt='The last letter of "Bill" is "l". The last letter of "Gates" is "s". Concatenating them is "ls".\nSo the answer is ls.\n'),
dict(role='HUMAN', prompt='Question: {question}\nAnswer:'),
]
)
),
retriever=dict(type=ZeroRetriever),
inferencer=dict(type=GenInferencer, max_out_len=512),
)
last_letters_eval_cfg = dict(
evaluator=dict(type=AccEvaluator),
pred_postprocessor=dict(type=last_letters_pred_postprocess),
)
last_letters_datasets = [
dict(
abbr='last_letters',
type=LastLettersDataset,
path='last_letters',
reader_cfg=last_letters_reader_cfg,
infer_cfg=last_letters_infer_cfg,
eval_cfg=last_letters_eval_cfg
)
]

View File

@ -0,0 +1,49 @@
from opencompass.openicl.icl_prompt_template import PromptTemplate
from opencompass.openicl.icl_retriever import ZeroRetriever
from opencompass.openicl.icl_inferencer import GenInferencer
from opencompass.openicl.icl_evaluator import AccEvaluator
from opencompass.datasets import LastLettersDataset, last_letters_pred_postprocess
last_letters_reader_cfg = dict(
input_columns=['question'],
output_column='answer',
train_split='test',
test_split='test'
)
last_letters_infer_cfg = dict(
prompt_template=dict(
type=PromptTemplate,
template=dict(
round=[
dict(role='HUMAN', prompt='Question: Take the last letters of the words in "Elon Musk" and concatenate them.\nPlease answer directly without additional reasoning steps.\nAnswer:'),
dict(role='BOT', prompt='The answer is nk.\n'),
dict(role='HUMAN', prompt='Question: Take the last letters of the words in "Larry Page" and concatenate them.\nPlease answer directly without additional reasoning steps.\nAnswer:'),
dict(role='BOT', prompt='The answer is ye.\n'),
dict(role='HUMAN', prompt='Question: Take the last letters of the words in "Sergey Brin" and concatenate them.\nPlease answer directly without additional reasoning steps.\nAnswer:'),
dict(role='BOT', prompt='The answer is yn.\n'),
dict(role='HUMAN', prompt='Question: Take the last letters of the words in "Bill Gates" and concatenate them.\nPlease answer directly without additional reasoning steps.\nAnswer:'),
dict(role='BOT', prompt='The answer is ls.\n'),
dict(role='HUMAN', prompt='Question: {question}\nPlease answer directly without additional reasoning steps.\nAnswer:'),
]
)
),
retriever=dict(type=ZeroRetriever),
inferencer=dict(type=GenInferencer, max_out_len=512),
)
last_letters_eval_cfg = dict(
evaluator=dict(type=AccEvaluator),
pred_postprocessor=dict(type=last_letters_pred_postprocess),
)
last_letters_datasets = [
dict(
abbr='last_letters',
type=LastLettersDataset,
path='last_letters',
reader_cfg=last_letters_reader_cfg,
infer_cfg=last_letters_infer_cfg,
eval_cfg=last_letters_eval_cfg
)
]

View File

@ -0,0 +1,35 @@
from opencompass.openicl.icl_prompt_template import PromptTemplate
from opencompass.openicl.icl_retriever import ZeroRetriever
from opencompass.openicl.icl_inferencer import GenInferencer
from opencompass.datasets import MATHDataset, MATHEvaluator, math_postprocess_v2, normalize_final_answer
math_reader_cfg = dict(input_columns=['problem'], output_column='solution')
math_infer_cfg = dict(
prompt_template=dict(
type=PromptTemplate,
template=dict(
round=[
dict(role='HUMAN', prompt='{problem}\nPlease provide only the final answer, without including any intermediate reasoning steps, and put your final answer within \\boxed{}.'),
]
),
),
retriever=dict(type=ZeroRetriever),
inferencer=dict(type=GenInferencer, max_out_len=1024),
)
# postprocess v2
math_eval_cfg = dict(
evaluator=dict(type=MATHEvaluator, version='v2'), pred_postprocessor=dict(type=math_postprocess_v2),
)
math_datasets = [
dict(
type=MATHDataset,
abbr='math',
path='opencompass/math',
reader_cfg=math_reader_cfg,
infer_cfg=math_infer_cfg,
eval_cfg=math_eval_cfg,
)
]

View File

@ -0,0 +1,36 @@
from opencompass.openicl.icl_prompt_template import PromptTemplate
from opencompass.openicl.icl_retriever import ZeroRetriever
from opencompass.openicl.icl_inferencer import GenInferencer
from opencompass.datasets import MATHDataset, MATHEvaluator, math_postprocess_v2
math_reader_cfg = dict(input_columns=['problem'], output_column='solution')
math_infer_cfg = dict(
prompt_template=dict(
type=PromptTemplate,
template=dict(round=[
dict(role='HUMAN', prompt='Problem:\nFind the domain of the expression $\\frac{{\sqrt{{x-2}}}}{{\sqrt{{5-x}}}}$.}}\nPlease provide only the final answer, without including any intermediate reasoning steps, and put your final answer within \\boxed{}.\nSolution:'),
dict(role='BOT', prompt='Final Answer: \\boxed{{[2,5)}}\n'),
dict(role='HUMAN', prompt='Problem:\nIf $\det \mathbf{{A}} = 2$ and $\det \mathbf{{B}} = 12,$ then find $\det (\mathbf{{A}} \mathbf{{B}}).$\nPlease provide only the final answer, without including any intermediate reasoning steps, and put your final answer within \\boxed{}.\nSolution:'),
dict(role='BOT', prompt='Final Answer: \\boxed{{24}}\n'),
dict(role='HUMAN', prompt='Problem:\nTerrell usually lifts two 20-pound weights 12 times. If he uses two 15-pound weights instead, how many times must Terrell lift them in order to lift the same total weight?\nPlease provide only the final answer, without including any intermediate reasoning steps, and put your final answer within \\boxed{}.\nSolution:'),
dict(role='BOT', prompt='Final Answer: \\boxed{{16}}\n'),
dict(role='HUMAN', prompt='Problem:\nIf the system of equations: \\begin{{align*}} 6x-4y&=a,\\\\ 6y-9x &=b. \end{{align*}}has a solution $(x, y)$ where $x$ and $y$ are both nonzero, find $\\frac{{a}}{{b}},$ assuming $b$ is nonzero.\nPlease provide only the final answer, without including any intermediate reasoning steps, and put your final answer within \\boxed{}.\nSolution:'),
dict(role='BOT', prompt='Final Answer: \\boxed{{-\\frac{{2}}{{3}}}}\n'),
dict(role='HUMAN', prompt='Problem:\n{problem}\nPlease provide only the final answer, without including any intermediate reasoning steps, and put your final answer within \\boxed{}.\nSolution:\n'),
])),
retriever=dict(type=ZeroRetriever),
inferencer=dict(type=GenInferencer, max_out_len=512))
math_eval_cfg = dict(
evaluator=dict(type=MATHEvaluator, version='v2'), pred_postprocessor=dict(type=math_postprocess_v2))
math_datasets = [
dict(
type=MATHDataset,
abbr='math',
path='opencompass/math',
reader_cfg=math_reader_cfg,
infer_cfg=math_infer_cfg,
eval_cfg=math_eval_cfg)
]

View File

@ -0,0 +1,64 @@
from mmengine.config import read_base
from opencompass.openicl.icl_prompt_template import PromptTemplate
from opencompass.openicl.icl_retriever import ZeroRetriever
from opencompass.openicl.icl_inferencer import GenInferencer
from opencompass.openicl.icl_evaluator import AccEvaluator
from opencompass.datasets import MMLUProDataset
from opencompass.utils.text_postprocessors import match_answer_pattern
with read_base():
from .mmlu_pro_categories import categories
QUERY_TEMPLATE = """
Answer the following multiple choice question. Your response should be of the following format: 'ANSWER: $LETTER' (without quotes) where LETTER is one of Options(e.g. one of ABCDEFGHIJKLMNOP). Please answer directly without additional explanations.
Question:\n
{question}
Options:\n
{options_str}
""".strip()
mmlu_pro_datasets = []
for category in categories:
mmlu_pro_reader_cfg = dict(
input_columns=['question', 'cot_content', 'options_str'],
output_column='answer',
train_split='validation',
test_split='test',
)
mmlu_pro_infer_cfg = dict(
prompt_template=dict(
type=PromptTemplate,
template=dict(
round=[
dict(role='HUMAN',
prompt=QUERY_TEMPLATE),
],
),
),
retriever=dict(type=ZeroRetriever),
inferencer=dict(type=GenInferencer),
)
mmlu_pro_eval_cfg = dict(
evaluator=dict(type=AccEvaluator),
pred_postprocessor=dict(
type=match_answer_pattern,
answer_pattern=r'(?i)ANSWER\s*:\s*([A-P])')
)
mmlu_pro_datasets.append(
dict(
abbr=f'mmlu_pro_{category.replace(" ", "_")}',
type=MMLUProDataset,
path='opencompass/mmlu_pro',
category=category,
reader_cfg=mmlu_pro_reader_cfg,
infer_cfg=mmlu_pro_infer_cfg,
eval_cfg=mmlu_pro_eval_cfg,
))

View File

@ -34,6 +34,7 @@ from .cmmlu import * # noqa: F401, F403
from .cmnli import * # noqa: F401, F403 from .cmnli import * # noqa: F401, F403
from .cmo_fib import * # noqa: F401, F403 from .cmo_fib import * # noqa: F401, F403
from .cmrc import * # noqa: F401, F403 from .cmrc import * # noqa: F401, F403
from .coinflip import * # noqa: F401, F403
from .commonsenseqa import * # noqa: F401, F403 from .commonsenseqa import * # noqa: F401, F403
from .commonsenseqa_cn import * # noqa: F401, F403 from .commonsenseqa_cn import * # noqa: F401, F403
from .copa import * # noqa: F401, F403 from .copa import * # noqa: F401, F403
@ -78,6 +79,7 @@ from .judge import * # noqa: F401, F403
from .kaoshi import KaoshiDataset, KaoshiEvaluator # noqa: F401, F403 from .kaoshi import KaoshiDataset, KaoshiEvaluator # noqa: F401, F403
from .korbench import * # noqa: F401, F403 from .korbench import * # noqa: F401, F403
from .lambada import * # noqa: F401, F403 from .lambada import * # noqa: F401, F403
from .last_letters import * # noqa: F401, F403
from .lawbench import * # noqa: F401, F403 from .lawbench import * # noqa: F401, F403
from .LCBench import * # noqa: F401, F403 from .LCBench import * # noqa: F401, F403
from .lcsts import * # noqa: F401, F403 from .lcsts import * # noqa: F401, F403

View File

@ -0,0 +1,39 @@
import os
import re
import json
from datasets import Dataset, DatasetDict
from opencompass.registry import LOAD_DATASET, TEXT_POSTPROCESSORS
from .base import BaseDataset
from opencompass.utils.datasets import DEFAULT_DATA_FOLDER
from opencompass.utils.fileio import download_url
@LOAD_DATASET.register_module()
class CoinFlipDataset(BaseDataset):
@staticmethod
def load(path: str):
cache_dir = os.environ.get('COMPASS_DATA_CACHE', '')
local_path = './data/coin_flip/coin_flip.json'
data_path = os.path.join(DEFAULT_DATA_FOLDER, cache_dir, local_path)
if not os.path.exists(data_path):
dataset_url = "https://raw.githubusercontent.com/wjn1996/Chain-of-Knowledge/refs/heads/main/tasks/Coin/dataset/coin_flip.json"
download_url(dataset_url, os.path.dirname(data_path))
dataset = []
with open(data_path, 'r', encoding='utf-8') as f:
for ex in json.load(f)["examples"]:
dataset.append(ex)
dataset = Dataset.from_list(dataset)
return DatasetDict({'test': dataset})
@TEXT_POSTPROCESSORS.register_module('coinflip')
def coinflip_pred_postprocess(text: str) -> str:
text = text.split('answer is ')[-1]
match = re.search(r'(yes|no)', text.lower())
if match:
return match.group(1)
return ''

View File

@ -0,0 +1,37 @@
import os
import re
import json
from datasets import Dataset, DatasetDict
from opencompass.registry import LOAD_DATASET, TEXT_POSTPROCESSORS
from .base import BaseDataset
from opencompass.utils.datasets import DEFAULT_DATA_FOLDER
from opencompass.utils.fileio import download_url
@LOAD_DATASET.register_module()
class LastLettersDataset(BaseDataset):
@staticmethod
def load(path: str):
cache_dir = os.environ.get('COMPASS_DATA_CACHE', '')
local_path = './data/last_letters/last_letters.json'
data_path = os.path.join(DEFAULT_DATA_FOLDER, cache_dir, local_path)
if not os.path.exists(data_path):
dataset_url = "https://raw.githubusercontent.com/wjn1996/Chain-of-Knowledge/refs/heads/main/tasks/Letter/dataset/last_letters.json"
download_url(dataset_url, os.path.dirname(data_path))
dataset = []
with open(data_path, 'r', encoding='utf-8') as f:
for ex in json.load(f)["examples"]:
dataset.append(ex)
dataset = Dataset.from_list(dataset)
return DatasetDict({'test': dataset})
@TEXT_POSTPROCESSORS.register_module('last_letters')
def last_letters_pred_postprocess(text: str) -> str:
text = text.split('answer is ')[-1]
text = re.sub("\"|\'|\n|\.|\s", "", text)
text = re.sub(r"^[^a-zA-Z]+|[^a-zA-Z]+$", "", text)
return text