mirror of
https://github.com/open-compass/opencompass.git
synced 2025-05-30 16:03:24 +08:00
[Change] Change Compassarena metric (#1749)
* fix pip version * fix pip version * fix summarizer bug * fix compassarena * fix compassarena * fix compassarena
This commit is contained in:
parent
0d8df541bc
commit
54c0fb7a93
@ -149,6 +149,6 @@ for _name, _prompt in sub_map.items():
|
|||||||
mode='m2n',
|
mode='m2n',
|
||||||
infer_order='double',
|
infer_order='double',
|
||||||
base_models=gpt4,
|
base_models=gpt4,
|
||||||
summarizer = dict(type=CompassArenaSummarizer, summary_type='half_add'),
|
summarizer = dict(type=CompassArenaSummarizer, summary_type='single'),
|
||||||
given_pred = [{'abbr':'gpt4-turbo', 'path':'./data/subjective/compass_arena/gpt4-turbo'}]
|
given_pred = [{'abbr':'gpt4-turbo', 'path':'./data/subjective/compass_arena/gpt4-turbo'}]
|
||||||
))
|
))
|
||||||
|
@ -120,7 +120,7 @@ for _name, _prompt in sub_map.items():
|
|||||||
),
|
),
|
||||||
]),
|
]),
|
||||||
),
|
),
|
||||||
dict_postprocessor=dict(type=compassarena_postprocess, summary_type='half_add', check_pos_bias=True),
|
dict_postprocessor=dict(type=compassarena_postprocess, summary_type='single', check_pos_bias=True),
|
||||||
),
|
),
|
||||||
pred_role='BOT',
|
pred_role='BOT',
|
||||||
)
|
)
|
||||||
|
@ -149,6 +149,6 @@ for _name, _prompt in sub_map.items():
|
|||||||
mode='m2n',
|
mode='m2n',
|
||||||
infer_order='double',
|
infer_order='double',
|
||||||
base_models=gpt4,
|
base_models=gpt4,
|
||||||
summarizer = dict(type=CompassArenaSummarizer, summary_type='half_add'),
|
summarizer = dict(type=CompassArenaSummarizer, summary_type='single'),
|
||||||
given_pred = [{'abbr':'gpt4-turbo', 'path':'./data/subjective/compass_arena/gpt4-turbo'}]
|
given_pred = [{'abbr':'gpt4-turbo', 'path':'./data/subjective/compass_arena/gpt4-turbo'}]
|
||||||
))
|
))
|
||||||
|
@ -120,7 +120,7 @@ for _name, _prompt in sub_map.items():
|
|||||||
),
|
),
|
||||||
]),
|
]),
|
||||||
),
|
),
|
||||||
dict_postprocessor=dict(type=compassarena_postprocess, summary_type='half_add', check_pos_bias=True),
|
dict_postprocessor=dict(type=compassarena_postprocess, summary_type='single', check_pos_bias=True),
|
||||||
),
|
),
|
||||||
pred_role='BOT',
|
pred_role='BOT',
|
||||||
)
|
)
|
||||||
|
@ -65,7 +65,7 @@ def post_process_compassarena(item):
|
|||||||
@DICT_POSTPROCESSORS.register_module('compassarena')
|
@DICT_POSTPROCESSORS.register_module('compassarena')
|
||||||
def compassarena_postprocess(output: dict,
|
def compassarena_postprocess(output: dict,
|
||||||
output_path: str,
|
output_path: str,
|
||||||
summary_type='half_add',
|
summary_type='single',
|
||||||
check_pos_bias=True) -> dict:
|
check_pos_bias=True) -> dict:
|
||||||
judged_answers, references = get_judgeanswer_and_reference(
|
judged_answers, references = get_judgeanswer_and_reference(
|
||||||
output, output_path, post_process_compassarena)
|
output, output_path, post_process_compassarena)
|
||||||
@ -81,6 +81,7 @@ def compassarena_postprocess(output: dict,
|
|||||||
model1 = references[0]['answer1']
|
model1 = references[0]['answer1']
|
||||||
|
|
||||||
for prediction, reference in zip(judged_answers, references):
|
for prediction, reference in zip(judged_answers, references):
|
||||||
|
|
||||||
categories[reference['capability']] += 1
|
categories[reference['capability']] += 1
|
||||||
|
|
||||||
if prediction == 'A':
|
if prediction == 'A':
|
||||||
|
Loading…
Reference in New Issue
Block a user