[ci] add common_summarizer return (#1724)

* Update common_summarizer.py * Update common_summarizer.py
2025-05-30 16:03:24 +08:00 · 2024-12-11 20:38:32 +08:00 · 2024-12-11 20:38:32 +08:00 · a1c00cc8b7
commit a1c00cc8b7
parent c4ce0174fe
1 changed files with 7 additions and 1 deletions
--- a/opencompass/summarizers/subjective/common_summarizer.py
+++ b/opencompass/summarizers/subjective/common_summarizer.py
@ -73,6 +73,7 @@ def get_capability_results(
        with open(fout, 'a+', newline='') as csvfile:
            writer = csv.writer(csvfile)
            writer.writerow([model_abbr] + [judge_model_abbr] + [dataset_abbr] + [capability_avg_ratings[column] for column in columns])
+    return {column:capability_avg_ratings[column] for column in columns if column != ''}


 class CommonSummarizer(CompassArenaSummarizer):
@ -113,6 +114,7 @@ class CommonSummarizer(CompassArenaSummarizer):
        fout_flag = 0
        output_tmp_file = osp.join(output_dir, 'result.csv')
        output_file = osp.join(output_dir, 'total_result.csv')
+        json_result={}
        for eval_model_cfg in self.eval_model_cfgs:
            for judge_model_cfg in self.judge_model_cfgs:
                eval_model_abbr = model_abbr_from_cfg(eval_model_cfg)
@ -125,7 +127,10 @@ class CommonSummarizer(CompassArenaSummarizer):
                        judged_answers, references = get_judgeanswer_and_reference(dataset, subdir_path, self.judge_function)
                        show_dataset_abbr = dataset_abbr_from_cfg(dataset)

-                        get_capability_results(judged_answers, references, output_tmp_file, fout_flag, show_model_abbr, show_judge_model_abbr, show_dataset_abbr)
+                        tmp_result = get_capability_results(judged_answers, references, output_tmp_file, fout_flag, show_model_abbr, show_judge_model_abbr, show_dataset_abbr)
+                        if show_judge_model_abbr not in json_result:
+                            json_result[show_judge_model_abbr] = {}
+                        json_result[show_judge_model_abbr][show_model_abbr] = tmp_result
                        fout_flag += 1
                else:
                    print(subdir_path + ' is not exist! please check!')
@ -144,3 +149,4 @@ class CommonSummarizer(CompassArenaSummarizer):
                f.write(','.join(map(str, line)) + '\n')
            print(t)
            print(output_file)
+        return {'qa_bench_' + show_dataset_abbr:json_result}