mirror of
https://github.com/open-compass/opencompass.git
synced 2025-05-30 16:03:24 +08:00
harmonic-tested
This commit is contained in:
parent
f9599c1f32
commit
171b28b38b
@ -3,11 +3,38 @@ from mmengine.config import read_base
|
|||||||
with read_base():
|
with read_base():
|
||||||
from .groups.bbeh import bbeh_summary_groups
|
from .groups.bbeh import bbeh_summary_groups
|
||||||
|
|
||||||
|
# Get all the BBEH subset names from the imported bbeh_summary_groups
|
||||||
|
bbeh_subsets = []
|
||||||
|
for group in bbeh_summary_groups:
|
||||||
|
if group['name'] == 'bbeh':
|
||||||
|
bbeh_subsets = group['subsets']
|
||||||
|
break
|
||||||
|
|
||||||
summarizer = dict(
|
summarizer = dict(
|
||||||
dataset_abbrs=[
|
# Include both individual datasets and the summary metrics we want to see
|
||||||
['bbeh', 'naive_average'],
|
dataset_abbrs=bbeh_subsets + ['bbeh'] + ['bbeh_harmonic_mean', 'bbeh_standard_deviation', 'bbeh_sum'],
|
||||||
['bbeh', 'harmonic_mean']
|
|
||||||
],
|
# Define the summary group for bbeh
|
||||||
summary_groups=sum(
|
summary_groups=[
|
||||||
[v for k, v in locals().items() if k.endswith('_summary_groups')], []),
|
{
|
||||||
|
'name': 'bbeh',
|
||||||
|
'subsets': bbeh_subsets,
|
||||||
|
'metric': 'score' # Explicitly specify the metric to use
|
||||||
|
},
|
||||||
|
{
|
||||||
|
'name': 'bbeh_harmonic_mean',
|
||||||
|
'subsets': bbeh_subsets,
|
||||||
|
'metric': 'harmonic_mean'
|
||||||
|
},
|
||||||
|
{
|
||||||
|
'name': 'bbeh_standard_deviation',
|
||||||
|
'subsets': bbeh_subsets,
|
||||||
|
'metric': 'standard_deviation'
|
||||||
|
},
|
||||||
|
{
|
||||||
|
'name': 'bbeh_sum',
|
||||||
|
'subsets': bbeh_subsets,
|
||||||
|
'metric': 'sum'
|
||||||
|
}
|
||||||
|
]
|
||||||
)
|
)
|
@ -171,7 +171,7 @@ class DefaultSummarizer:
|
|||||||
default_metric = 'sum'
|
default_metric = 'sum'
|
||||||
elif sg.get('weights', []):
|
elif sg.get('weights', []):
|
||||||
default_metric = 'weighted_average'
|
default_metric = 'weighted_average'
|
||||||
elif 'harmonic_mean' in sg:
|
elif sg.get('harmonic_mean', False):
|
||||||
default_metric = 'harmonic_mean'
|
default_metric = 'harmonic_mean'
|
||||||
else:
|
else:
|
||||||
default_metric = 'naive_average'
|
default_metric = 'naive_average'
|
||||||
@ -212,7 +212,7 @@ class DefaultSummarizer:
|
|||||||
self.logger.warning(f'Non-positive values found when calculating harmonic mean for {sg["name"]}')
|
self.logger.warning(f'Non-positive values found when calculating harmonic mean for {sg["name"]}')
|
||||||
# Handle non-positive values (either skip or use a small positive value)
|
# Handle non-positive values (either skip or use a small positive value)
|
||||||
numerator = len(scores[metric])
|
numerator = len(scores[metric])
|
||||||
denominator = sum(1 / max(scores[metric][k], 1e-10) for k in scores[metric])
|
denominator = sum(1 / max(scores[metric][k], 1) for k in scores[metric])
|
||||||
else:
|
else:
|
||||||
numerator = len(scores[metric])
|
numerator = len(scores[metric])
|
||||||
denominator = sum(1 / scores[metric][k] for k in scores[metric])
|
denominator = sum(1 / scores[metric][k] for k in scores[metric])
|
||||||
|
Loading…
Reference in New Issue
Block a user