mirror of
https://github.com/open-compass/opencompass.git
synced 2025-05-30 16:03:24 +08:00
[Change] Change Compassarena metric (#1749)
* fix pip version * fix pip version * fix summarizer bug * fix compassarena * fix compassarena * fix compassarena
This commit is contained in:
parent
0d8df541bc
commit
54c0fb7a93
@ -149,6 +149,6 @@ for _name, _prompt in sub_map.items():
|
||||
mode='m2n',
|
||||
infer_order='double',
|
||||
base_models=gpt4,
|
||||
summarizer = dict(type=CompassArenaSummarizer, summary_type='half_add'),
|
||||
summarizer = dict(type=CompassArenaSummarizer, summary_type='single'),
|
||||
given_pred = [{'abbr':'gpt4-turbo', 'path':'./data/subjective/compass_arena/gpt4-turbo'}]
|
||||
))
|
||||
|
@ -120,7 +120,7 @@ for _name, _prompt in sub_map.items():
|
||||
),
|
||||
]),
|
||||
),
|
||||
dict_postprocessor=dict(type=compassarena_postprocess, summary_type='half_add', check_pos_bias=True),
|
||||
dict_postprocessor=dict(type=compassarena_postprocess, summary_type='single', check_pos_bias=True),
|
||||
),
|
||||
pred_role='BOT',
|
||||
)
|
||||
|
@ -149,6 +149,6 @@ for _name, _prompt in sub_map.items():
|
||||
mode='m2n',
|
||||
infer_order='double',
|
||||
base_models=gpt4,
|
||||
summarizer = dict(type=CompassArenaSummarizer, summary_type='half_add'),
|
||||
summarizer = dict(type=CompassArenaSummarizer, summary_type='single'),
|
||||
given_pred = [{'abbr':'gpt4-turbo', 'path':'./data/subjective/compass_arena/gpt4-turbo'}]
|
||||
))
|
||||
|
@ -120,7 +120,7 @@ for _name, _prompt in sub_map.items():
|
||||
),
|
||||
]),
|
||||
),
|
||||
dict_postprocessor=dict(type=compassarena_postprocess, summary_type='half_add', check_pos_bias=True),
|
||||
dict_postprocessor=dict(type=compassarena_postprocess, summary_type='single', check_pos_bias=True),
|
||||
),
|
||||
pred_role='BOT',
|
||||
)
|
||||
|
@ -65,7 +65,7 @@ def post_process_compassarena(item):
|
||||
@DICT_POSTPROCESSORS.register_module('compassarena')
|
||||
def compassarena_postprocess(output: dict,
|
||||
output_path: str,
|
||||
summary_type='half_add',
|
||||
summary_type='single',
|
||||
check_pos_bias=True) -> dict:
|
||||
judged_answers, references = get_judgeanswer_and_reference(
|
||||
output, output_path, post_process_compassarena)
|
||||
@ -81,6 +81,7 @@ def compassarena_postprocess(output: dict,
|
||||
model1 = references[0]['answer1']
|
||||
|
||||
for prediction, reference in zip(judged_answers, references):
|
||||
|
||||
categories[reference['capability']] += 1
|
||||
|
||||
if prediction == 'A':
|
||||
|
Loading…
Reference in New Issue
Block a user