OpenCompass/opencompass/metrics/dump_results.py
Yuan Liu 191a3f6f9d
[Feature]: Use multimodal (#73)
* [Feature]: Add minigpt-4

* [Feature]: Add mm local runner

* [Feature]: Add instructblip

* [Feature]: Delete redundant file

* [Feature]: Delete redundant file

* [Feature]: Add README to InstructBLIP

* [Feature]: Update MiniGPT-4

* [Fix]: Fix lint

* [Feature]add omnibenchmark readme (#49)

* add omnibenchmark readme

* fix

* Update OmniMMBench.md

* Update OmniMMBench.md

* Update OmniMMBench.md

* [Fix]: Refine name (#54)

* [Feature]: Unify out and err

* [Fix]: Fix lint

* [Feature]: Rename to mmbench and change weight path

* [Feature]: Delete Omni in instructblip

* [Feature]: Check the avaliablity of lavis

* [Fix]: Fix lint

* [Feature]: Refactor MM

* [Refactor]: Refactor path

* [Feature]: Delete redundant files

* [Refactor]: Delete redundant files

---------

Co-authored-by: Wangbo Zhao(黑色枷锁) <56866854+wangbo-zhao@users.noreply.github.com>
2023-08-03 11:07:50 +08:00

54 lines
2.1 KiB
Python

import os
from typing import Optional
import pandas as pd
from mmengine.evaluator import BaseMetric
from opencompass.registry import METRICS
@METRICS.register_module()
class DumpResults(BaseMetric):
"""Dump model's prediction to a file.
Args:
save_path (str): the path to save model's prediction.
collect_device (str): Device name used for collecting results from
different ranks during distributed training. Must be 'cpu' or
'gpu'. Defaults to 'cpu'.
prefix (str, optional): The prefix that will be added in the metric
names to disambiguate homonymous metrics of different evaluators.
If prefix is not provided in the argument, self.default_prefix
will be used instead. Default: None.
"""
def __init__(self,
save_path: str,
collect_device: str = 'cpu',
prefix: Optional[str] = None) -> None:
super().__init__(collect_device, prefix)
self.save_path = save_path
if not os.path.exists(os.path.dirname(self.save_path)):
os.makedirs(os.path.dirname(self.save_path), exist_ok=True)
def process(self, data_batch, data_samples) -> None:
for data_sample in data_samples:
result = dict()
result['question'] = data_sample.get('question')
result.update(data_sample.get('options_dict'))
result['prediction'] = data_sample.get('pred_answer')
if data_sample.get('category') is not None:
result['category'] = data_sample.get('category')
if data_sample.get('l2-category') is not None:
result['l2-category'] = data_sample.get('l2-category')
result['index'] = data_sample.get('index')
result['split'] = data_sample.get('split')
self.results.append(result)
def compute_metrics(self, results: list) -> dict:
df = pd.DataFrame(results)
with pd.ExcelWriter(self.save_path, engine='openpyxl') as writer:
df.to_excel(writer, index=False)
return {}