mirror of
https://github.com/open-compass/opencompass.git
synced 2025-05-30 16:03:24 +08:00
harmonic-tested
This commit is contained in:
parent
f9599c1f32
commit
171b28b38b
@ -3,11 +3,38 @@ from mmengine.config import read_base
|
||||
with read_base():
|
||||
from .groups.bbeh import bbeh_summary_groups
|
||||
|
||||
# Get all the BBEH subset names from the imported bbeh_summary_groups
|
||||
bbeh_subsets = []
|
||||
for group in bbeh_summary_groups:
|
||||
if group['name'] == 'bbeh':
|
||||
bbeh_subsets = group['subsets']
|
||||
break
|
||||
|
||||
summarizer = dict(
|
||||
dataset_abbrs=[
|
||||
['bbeh', 'naive_average'],
|
||||
['bbeh', 'harmonic_mean']
|
||||
],
|
||||
summary_groups=sum(
|
||||
[v for k, v in locals().items() if k.endswith('_summary_groups')], []),
|
||||
# Include both individual datasets and the summary metrics we want to see
|
||||
dataset_abbrs=bbeh_subsets + ['bbeh'] + ['bbeh_harmonic_mean', 'bbeh_standard_deviation', 'bbeh_sum'],
|
||||
|
||||
# Define the summary group for bbeh
|
||||
summary_groups=[
|
||||
{
|
||||
'name': 'bbeh',
|
||||
'subsets': bbeh_subsets,
|
||||
'metric': 'score' # Explicitly specify the metric to use
|
||||
},
|
||||
{
|
||||
'name': 'bbeh_harmonic_mean',
|
||||
'subsets': bbeh_subsets,
|
||||
'metric': 'harmonic_mean'
|
||||
},
|
||||
{
|
||||
'name': 'bbeh_standard_deviation',
|
||||
'subsets': bbeh_subsets,
|
||||
'metric': 'standard_deviation'
|
||||
},
|
||||
{
|
||||
'name': 'bbeh_sum',
|
||||
'subsets': bbeh_subsets,
|
||||
'metric': 'sum'
|
||||
}
|
||||
]
|
||||
)
|
@ -171,11 +171,11 @@ class DefaultSummarizer:
|
||||
default_metric = 'sum'
|
||||
elif sg.get('weights', []):
|
||||
default_metric = 'weighted_average'
|
||||
elif 'harmonic_mean' in sg:
|
||||
elif sg.get('harmonic_mean', False):
|
||||
default_metric = 'harmonic_mean'
|
||||
else:
|
||||
default_metric = 'naive_average'
|
||||
|
||||
|
||||
scores, eval_modes, group_metrics = {}, [], None
|
||||
if any(isinstance(dataset_abbr, (list, tuple)) for dataset_abbr in sg['subsets']) and \
|
||||
any(isinstance(dataset_abbr, str) for dataset_abbr in sg['subsets']):
|
||||
@ -212,7 +212,7 @@ class DefaultSummarizer:
|
||||
self.logger.warning(f'Non-positive values found when calculating harmonic mean for {sg["name"]}')
|
||||
# Handle non-positive values (either skip or use a small positive value)
|
||||
numerator = len(scores[metric])
|
||||
denominator = sum(1 / max(scores[metric][k], 1e-10) for k in scores[metric])
|
||||
denominator = sum(1 / max(scores[metric][k], 1) for k in scores[metric])
|
||||
else:
|
||||
numerator = len(scores[metric])
|
||||
denominator = sum(1 / scores[metric][k] for k in scores[metric])
|
||||
|
Loading…
Reference in New Issue
Block a user