mirror of
https://github.com/open-compass/opencompass.git
synced 2025-05-30 16:03:24 +08:00

* Add model postprocess function * Add model postprocess function * Add model postprocess function * Add model postprocess function * Add model postprocess function * Add model postprocess function * Add model postprocess function * Add model postprocess function --------- Co-authored-by: liushz <liuhongwei@pjlab.rog.cn>
124 lines
4.1 KiB
Python
124 lines
4.1 KiB
Python
# Convert OpenCompass prediction data to XFinder format
|
|
import copy
|
|
import json
|
|
import re
|
|
|
|
xfinder_template = {
|
|
'math': {
|
|
'model_name':
|
|
'',
|
|
'dataset':
|
|
'',
|
|
'key_answer_type':
|
|
'math',
|
|
'question':
|
|
'',
|
|
'llm_output':
|
|
'',
|
|
'correct_answer':
|
|
'',
|
|
'standard_answer_range':
|
|
'a(n) number / set / vector / matrix / interval / expression / function / equation / inequality' # noqa
|
|
},
|
|
'alphabet_option': {
|
|
'model_name': '',
|
|
'dataset': '',
|
|
'key_answer_type': 'alphabet_option',
|
|
'question': '',
|
|
'llm_output': '.',
|
|
'correct_answer': '',
|
|
'standard_answer_range': []
|
|
},
|
|
'categorical_label': {
|
|
'model_name': '',
|
|
'dataset': '',
|
|
'key_answer_type': '',
|
|
'question': '',
|
|
'llm_output': '',
|
|
'correct_answer': '',
|
|
'standard_answer_range': []
|
|
},
|
|
'short_text': {
|
|
'model_name': '',
|
|
'dataset': '',
|
|
'key_answer_type': 'short_text',
|
|
'question': '',
|
|
'llm_output': '',
|
|
'correct_answer': '',
|
|
'standard_answer_range': []
|
|
}
|
|
}
|
|
|
|
|
|
def parse_options(text: str):
|
|
lines = text.split('\n')
|
|
parsed_options = []
|
|
option_pattern = r'^[A-Z]\)|[A-Z]\.|[A-Z]\)|[A-Z]:|\([A-Z]\)'
|
|
for line in lines:
|
|
line = line.strip()
|
|
match = re.match(option_pattern, line)
|
|
if match:
|
|
option = ''
|
|
# 等于第一个属于选项的字符
|
|
for c in line:
|
|
if c.isalpha():
|
|
option = c
|
|
break
|
|
content_start = match.end() + 1
|
|
content = line[content_start:].strip()
|
|
parsed_options.append([option, content])
|
|
|
|
return parsed_options
|
|
|
|
|
|
def convert_to_xfinder_format(typ, data, model_name='', dataset_name=''):
|
|
assert typ in xfinder_template.keys(), f'Invalid type {typ}'
|
|
format_data = []
|
|
for item in data:
|
|
template = copy.deepcopy(xfinder_template[typ])
|
|
question = item['origin_prompt'][-1]['prompt']
|
|
llm_output = item['prediction']
|
|
correct_answer = item['reference'] if item['reference'] else item[
|
|
'gold']
|
|
template['correct_answer'] = correct_answer
|
|
template['model_name'] = model_name
|
|
template['dataset'] = dataset_name
|
|
template['question'] = question
|
|
template['llm_output'] = llm_output
|
|
try:
|
|
assert typ in list(xfinder_template.keys())
|
|
if typ == 'alphabet_option':
|
|
options = parse_options(question)
|
|
template['standard_answer_range'] = options
|
|
elif typ == 'short_text':
|
|
template['standard_answer_range'] = item['gold']
|
|
elif typ == 'categorical_label':
|
|
pass
|
|
except Exception as e:
|
|
print(f'Error when parsing question options: {e}, skipping...')
|
|
continue
|
|
|
|
format_data.append(template)
|
|
return format_data
|
|
|
|
|
|
if __name__ == '__main__':
|
|
# Test
|
|
example_data = {
|
|
'origin_prompt': [{
|
|
'role':
|
|
'HUMAN',
|
|
'prompt':
|
|
'Alice, Bob, Claire, Dave, and Eve are dancers at a square dance. At the start of a song, they each have a partner: Alice is dancing with Ophelia, Bob is dancing with Jamie, Claire is dancing with Melissa, Dave is dancing with Rodrigo, and Eve is dancing with Patrick.\nThroughout the song, the dancers often trade partners. First, Claire and Bob switch partners. Then, Claire and Eve switch partners. Then, Claire and Bob switch partners. Then, Eve and Dave switch partners. Finally, Claire and Alice switch partners. At the end of the dance, Alice is dancing with\nOptions:\n(A) Ophelia\n(B) Jamie\n(C) Melissa\n(D) Rodrigo\n(E) Patrick' # noqa
|
|
}],
|
|
'origin_prediction':
|
|
'\n 答案: B) 前者小于后者',
|
|
'prediction':
|
|
'B',
|
|
'reference':
|
|
'A'
|
|
}
|
|
example_data = convert_to_xfinder_format('alphabet_option', [example_data],
|
|
'GPT-3', 'OpenAI')
|
|
print(json.dumps(example_data, indent=4, ensure_ascii=False))
|