[Change] Change Compassarena metric (#1749)

* fix pip version

* fix pip version

* fix summarizer bug

* fix compassarena

* fix compassarena

* fix compassarena
This commit is contained in:
bittersweet1999 2024-12-10 14:45:32 +08:00 committed by GitHub
parent 0d8df541bc
commit 54c0fb7a93
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
5 changed files with 6 additions and 5 deletions

View File

@ -149,6 +149,6 @@ for _name, _prompt in sub_map.items():
mode='m2n',
infer_order='double',
base_models=gpt4,
summarizer = dict(type=CompassArenaSummarizer, summary_type='half_add'),
summarizer = dict(type=CompassArenaSummarizer, summary_type='single'),
given_pred = [{'abbr':'gpt4-turbo', 'path':'./data/subjective/compass_arena/gpt4-turbo'}]
))

View File

@ -120,7 +120,7 @@ for _name, _prompt in sub_map.items():
),
]),
),
dict_postprocessor=dict(type=compassarena_postprocess, summary_type='half_add', check_pos_bias=True),
dict_postprocessor=dict(type=compassarena_postprocess, summary_type='single', check_pos_bias=True),
),
pred_role='BOT',
)

View File

@ -149,6 +149,6 @@ for _name, _prompt in sub_map.items():
mode='m2n',
infer_order='double',
base_models=gpt4,
summarizer = dict(type=CompassArenaSummarizer, summary_type='half_add'),
summarizer = dict(type=CompassArenaSummarizer, summary_type='single'),
given_pred = [{'abbr':'gpt4-turbo', 'path':'./data/subjective/compass_arena/gpt4-turbo'}]
))

View File

@ -120,7 +120,7 @@ for _name, _prompt in sub_map.items():
),
]),
),
dict_postprocessor=dict(type=compassarena_postprocess, summary_type='half_add', check_pos_bias=True),
dict_postprocessor=dict(type=compassarena_postprocess, summary_type='single', check_pos_bias=True),
),
pred_role='BOT',
)

View File

@ -65,7 +65,7 @@ def post_process_compassarena(item):
@DICT_POSTPROCESSORS.register_module('compassarena')
def compassarena_postprocess(output: dict,
output_path: str,
summary_type='half_add',
summary_type='single',
check_pos_bias=True) -> dict:
judged_answers, references = get_judgeanswer_and_reference(
output, output_path, post_process_compassarena)
@ -81,6 +81,7 @@ def compassarena_postprocess(output: dict,
model1 = references[0]['answer1']
for prediction, reference in zip(judged_answers, references):
categories[reference['capability']] += 1
if prediction == 'A':