harmonic-tested

This commit is contained in:
yufeng zhao 2025-03-17 08:46:44 +00:00
parent f9599c1f32
commit 171b28b38b
2 changed files with 36 additions and 9 deletions

View File

@ -3,11 +3,38 @@ from mmengine.config import read_base
with read_base():
from .groups.bbeh import bbeh_summary_groups
# Get all the BBEH subset names from the imported bbeh_summary_groups
bbeh_subsets = []
for group in bbeh_summary_groups:
if group['name'] == 'bbeh':
bbeh_subsets = group['subsets']
break
summarizer = dict(
dataset_abbrs=[
['bbeh', 'naive_average'],
['bbeh', 'harmonic_mean']
],
summary_groups=sum(
[v for k, v in locals().items() if k.endswith('_summary_groups')], []),
# Include both individual datasets and the summary metrics we want to see
dataset_abbrs=bbeh_subsets + ['bbeh'] + ['bbeh_harmonic_mean', 'bbeh_standard_deviation', 'bbeh_sum'],
# Define the summary group for bbeh
summary_groups=[
{
'name': 'bbeh',
'subsets': bbeh_subsets,
'metric': 'score' # Explicitly specify the metric to use
},
{
'name': 'bbeh_harmonic_mean',
'subsets': bbeh_subsets,
'metric': 'harmonic_mean'
},
{
'name': 'bbeh_standard_deviation',
'subsets': bbeh_subsets,
'metric': 'standard_deviation'
},
{
'name': 'bbeh_sum',
'subsets': bbeh_subsets,
'metric': 'sum'
}
]
)

View File

@ -171,11 +171,11 @@ class DefaultSummarizer:
default_metric = 'sum'
elif sg.get('weights', []):
default_metric = 'weighted_average'
elif 'harmonic_mean' in sg:
elif sg.get('harmonic_mean', False):
default_metric = 'harmonic_mean'
else:
default_metric = 'naive_average'
scores, eval_modes, group_metrics = {}, [], None
if any(isinstance(dataset_abbr, (list, tuple)) for dataset_abbr in sg['subsets']) and \
any(isinstance(dataset_abbr, str) for dataset_abbr in sg['subsets']):
@ -212,7 +212,7 @@ class DefaultSummarizer:
self.logger.warning(f'Non-positive values found when calculating harmonic mean for {sg["name"]}')
# Handle non-positive values (either skip or use a small positive value)
numerator = len(scores[metric])
denominator = sum(1 / max(scores[metric][k], 1e-10) for k in scores[metric])
denominator = sum(1 / max(scores[metric][k], 1) for k in scores[metric])
else:
numerator = len(scores[metric])
denominator = sum(1 / scores[metric][k] for k in scores[metric])