mirror of
https://github.com/open-compass/opencompass.git
synced 2025-05-30 16:03:24 +08:00
[ci] add common_summarizer return (#1724)
* Update common_summarizer.py * Update common_summarizer.py
This commit is contained in:
parent
c4ce0174fe
commit
a1c00cc8b7
@ -73,6 +73,7 @@ def get_capability_results(
|
||||
with open(fout, 'a+', newline='') as csvfile:
|
||||
writer = csv.writer(csvfile)
|
||||
writer.writerow([model_abbr] + [judge_model_abbr] + [dataset_abbr] + [capability_avg_ratings[column] for column in columns])
|
||||
return {column:capability_avg_ratings[column] for column in columns if column != ''}
|
||||
|
||||
|
||||
class CommonSummarizer(CompassArenaSummarizer):
|
||||
@ -113,6 +114,7 @@ class CommonSummarizer(CompassArenaSummarizer):
|
||||
fout_flag = 0
|
||||
output_tmp_file = osp.join(output_dir, 'result.csv')
|
||||
output_file = osp.join(output_dir, 'total_result.csv')
|
||||
json_result={}
|
||||
for eval_model_cfg in self.eval_model_cfgs:
|
||||
for judge_model_cfg in self.judge_model_cfgs:
|
||||
eval_model_abbr = model_abbr_from_cfg(eval_model_cfg)
|
||||
@ -125,7 +127,10 @@ class CommonSummarizer(CompassArenaSummarizer):
|
||||
judged_answers, references = get_judgeanswer_and_reference(dataset, subdir_path, self.judge_function)
|
||||
show_dataset_abbr = dataset_abbr_from_cfg(dataset)
|
||||
|
||||
get_capability_results(judged_answers, references, output_tmp_file, fout_flag, show_model_abbr, show_judge_model_abbr, show_dataset_abbr)
|
||||
tmp_result = get_capability_results(judged_answers, references, output_tmp_file, fout_flag, show_model_abbr, show_judge_model_abbr, show_dataset_abbr)
|
||||
if show_judge_model_abbr not in json_result:
|
||||
json_result[show_judge_model_abbr] = {}
|
||||
json_result[show_judge_model_abbr][show_model_abbr] = tmp_result
|
||||
fout_flag += 1
|
||||
else:
|
||||
print(subdir_path + ' is not exist! please check!')
|
||||
@ -144,3 +149,4 @@ class CommonSummarizer(CompassArenaSummarizer):
|
||||
f.write(','.join(map(str, line)) + '\n')
|
||||
print(t)
|
||||
print(output_file)
|
||||
return {'qa_bench_' + show_dataset_abbr:json_result}
|
||||
|
Loading…
Reference in New Issue
Block a user