diff --git a/opencompass/summarizers/subjective/alpacaeval.py b/opencompass/summarizers/subjective/alpacaeval.py index a5aef8e6..528aa03e 100644 --- a/opencompass/summarizers/subjective/alpacaeval.py +++ b/opencompass/summarizers/subjective/alpacaeval.py @@ -133,8 +133,7 @@ class AlpacaSummarizer: dataset, subdir_path, self.judge_function) win_model1, win_model2, categories = defaultdict( float), defaultdict(float), defaultdict(float) - model1, model2 = references[0]['answer1'], references[0][ - 'answer2'] + for prediction, reference in zip(judged_answers, references): categories['total'] += 1 diff --git a/opencompass/summarizers/subjective/compass_arena.py b/opencompass/summarizers/subjective/compass_arena.py index 7232ceca..13662110 100644 --- a/opencompass/summarizers/subjective/compass_arena.py +++ b/opencompass/summarizers/subjective/compass_arena.py @@ -117,8 +117,6 @@ class CompassArenaSummarizer: win_model1 = defaultdict(float) win_model2 = defaultdict(float) categories = defaultdict(float) - model1 = references[0]['answer1'] - model2 = references[0]['answer2'] for prediction, reference in zip(judged_answers, references): categories[dataset_abbr] += 1 categories[reference['capability']] += 1