From 171b28b38b7bedc97beeeafb87092f981d46bd7f Mon Sep 17 00:00:00 2001 From: yufeng zhao Date: Mon, 17 Mar 2025 08:46:44 +0000 Subject: [PATCH] harmonic-tested --- opencompass/configs/summarizers/bbeh.py | 39 +++++++++++++++++++++---- opencompass/summarizers/default.py | 6 ++-- 2 files changed, 36 insertions(+), 9 deletions(-) diff --git a/opencompass/configs/summarizers/bbeh.py b/opencompass/configs/summarizers/bbeh.py index 66111bd3..8ed4aad3 100644 --- a/opencompass/configs/summarizers/bbeh.py +++ b/opencompass/configs/summarizers/bbeh.py @@ -3,11 +3,38 @@ from mmengine.config import read_base with read_base(): from .groups.bbeh import bbeh_summary_groups +# Get all the BBEH subset names from the imported bbeh_summary_groups +bbeh_subsets = [] +for group in bbeh_summary_groups: + if group['name'] == 'bbeh': + bbeh_subsets = group['subsets'] + break + summarizer = dict( - dataset_abbrs=[ - ['bbeh', 'naive_average'], - ['bbeh', 'harmonic_mean'] - ], - summary_groups=sum( - [v for k, v in locals().items() if k.endswith('_summary_groups')], []), + # Include both individual datasets and the summary metrics we want to see + dataset_abbrs=bbeh_subsets + ['bbeh'] + ['bbeh_harmonic_mean', 'bbeh_standard_deviation', 'bbeh_sum'], + + # Define the summary group for bbeh + summary_groups=[ + { + 'name': 'bbeh', + 'subsets': bbeh_subsets, + 'metric': 'score' # Explicitly specify the metric to use + }, + { + 'name': 'bbeh_harmonic_mean', + 'subsets': bbeh_subsets, + 'metric': 'harmonic_mean' + }, + { + 'name': 'bbeh_standard_deviation', + 'subsets': bbeh_subsets, + 'metric': 'standard_deviation' + }, + { + 'name': 'bbeh_sum', + 'subsets': bbeh_subsets, + 'metric': 'sum' + } + ] ) \ No newline at end of file diff --git a/opencompass/summarizers/default.py b/opencompass/summarizers/default.py index f4163ecd..5e9b2393 100644 --- a/opencompass/summarizers/default.py +++ b/opencompass/summarizers/default.py @@ -171,11 +171,11 @@ class DefaultSummarizer: default_metric = 'sum' elif sg.get('weights', []): default_metric = 'weighted_average' - elif 'harmonic_mean' in sg: + elif sg.get('harmonic_mean', False): default_metric = 'harmonic_mean' else: default_metric = 'naive_average' - + scores, eval_modes, group_metrics = {}, [], None if any(isinstance(dataset_abbr, (list, tuple)) for dataset_abbr in sg['subsets']) and \ any(isinstance(dataset_abbr, str) for dataset_abbr in sg['subsets']): @@ -212,7 +212,7 @@ class DefaultSummarizer: self.logger.warning(f'Non-positive values found when calculating harmonic mean for {sg["name"]}') # Handle non-positive values (either skip or use a small positive value) numerator = len(scores[metric]) - denominator = sum(1 / max(scores[metric][k], 1e-10) for k in scores[metric]) + denominator = sum(1 / max(scores[metric][k], 1) for k in scores[metric]) else: numerator = len(scores[metric]) denominator = sum(1 / scores[metric][k] for k in scores[metric])