diff --git a/opencompass/datasets/needlebench_v2/multi.py b/opencompass/datasets/needlebench_v2/multi.py index 8c1b3526..90a968ee 100644 --- a/opencompass/datasets/needlebench_v2/multi.py +++ b/opencompass/datasets/needlebench_v2/multi.py @@ -8,10 +8,9 @@ from datasets import Dataset from huggingface_hub import hf_hub_download from opencompass.datasets.base import BaseDataset -from opencompass.datasets.needlebench_v2.atc import (relationship_templates_en, - relationship_templates_zh_CN, - relationship_terms_en, - relationship_terms_zh_CN) +from opencompass.datasets.needlebench_v2.atc import ( + relationship_templates_en, relationship_templates_zh_CN, + relationship_terms_en, relationship_terms_zh_CN) from opencompass.registry import LOAD_DATASET diff --git a/opencompass/summarizers/needlebench.py b/opencompass/summarizers/needlebench.py index c8f9bfab..d1c07dfb 100644 --- a/opencompass/summarizers/needlebench.py +++ b/opencompass/summarizers/needlebench.py @@ -561,9 +561,9 @@ class NeedleBenchSummarizer(DefaultSummarizer): class NeedleBenchSummarizerV2(NeedleBenchSummarizer): """NeedleBench summarizer V2 in OpenCompass. - + This version calls save_results_to_plots with mean=True. - + Args: config (ConfigDict): The configuration object of the evaluation task. It's expected to be filled out at runtime. dataset_abbrs (list[str], optional): Dataset abbreviations to be listed in the summary. @@ -572,7 +572,7 @@ class NeedleBenchSummarizerV2(NeedleBenchSummarizer): 'weights' if weighted average is needed. prompt_db: A deprecated field. """ - + def summarize( self, output_path: str = None,