mirror of
https://github.com/open-compass/opencompass.git
synced 2025-05-30 16:03:24 +08:00
[Fix] fix Flames (#1599)
* fix pip version * fix pip version * fix flames * fix flames
This commit is contained in:
parent
b52ba65c26
commit
3f7a3730d7
@ -58,5 +58,6 @@ for _name in subjective_all_sets:
|
|||||||
name=_name,
|
name=_name,
|
||||||
reader_cfg=subjective_reader_cfg,
|
reader_cfg=subjective_reader_cfg,
|
||||||
infer_cfg=subjective_infer_cfg,
|
infer_cfg=subjective_infer_cfg,
|
||||||
eval_cfg=subjective_eval_cfg
|
eval_cfg=subjective_eval_cfg,
|
||||||
|
mode='singlescore',
|
||||||
))
|
))
|
@ -58,5 +58,6 @@ for _name in subjective_all_sets:
|
|||||||
name=_name,
|
name=_name,
|
||||||
reader_cfg=subjective_reader_cfg,
|
reader_cfg=subjective_reader_cfg,
|
||||||
infer_cfg=subjective_infer_cfg,
|
infer_cfg=subjective_infer_cfg,
|
||||||
eval_cfg=subjective_eval_cfg
|
eval_cfg=subjective_eval_cfg,
|
||||||
|
mode='singlescore',
|
||||||
))
|
))
|
@ -41,7 +41,6 @@ from .ds1000 import * # noqa: F401, F403
|
|||||||
from .ds1000_interpreter import * # noqa: F401, F403
|
from .ds1000_interpreter import * # noqa: F401, F403
|
||||||
from .eprstmt import * # noqa: F401, F403
|
from .eprstmt import * # noqa: F401, F403
|
||||||
from .FinanceIQ import * # noqa: F401, F403
|
from .FinanceIQ import * # noqa: F401, F403
|
||||||
from .flames import * # noqa: F401, F403
|
|
||||||
from .flores import * # noqa: F401, F403
|
from .flores import * # noqa: F401, F403
|
||||||
from .game24 import * # noqa: F401, F403
|
from .game24 import * # noqa: F401, F403
|
||||||
from .GaokaoBench import * # noqa: F401, F403
|
from .GaokaoBench import * # noqa: F401, F403
|
||||||
|
@ -8,6 +8,7 @@ from .compassbench_control_length_bias import \
|
|||||||
CompassBenchControlLengthBiasDataset # noqa: F401, F403
|
CompassBenchControlLengthBiasDataset # noqa: F401, F403
|
||||||
from .corev2 import Corev2Dataset # noqa: F401, F403
|
from .corev2 import Corev2Dataset # noqa: F401, F403
|
||||||
from .creationbench import CreationBenchDataset # noqa: F401, F403
|
from .creationbench import CreationBenchDataset # noqa: F401, F403
|
||||||
|
from .flames import FlamesDataset # noqa: F401, F403
|
||||||
from .fofo import FofoDataset # noqa: F401, F403
|
from .fofo import FofoDataset # noqa: F401, F403
|
||||||
from .followbench import FollowBenchDataset # noqa: F401, F403
|
from .followbench import FollowBenchDataset # noqa: F401, F403
|
||||||
from .information_retrival import IRDataset # noqa: F401, F403
|
from .information_retrival import IRDataset # noqa: F401, F403
|
||||||
|
@ -9,7 +9,7 @@ from datasets import Dataset, DatasetDict
|
|||||||
from opencompass.registry import LOAD_DATASET
|
from opencompass.registry import LOAD_DATASET
|
||||||
from opencompass.utils import get_data_path
|
from opencompass.utils import get_data_path
|
||||||
|
|
||||||
from .subjective.subjective_cmp import SubjectiveCmpDataset
|
from .subjective_cmp import SubjectiveCmpDataset
|
||||||
|
|
||||||
|
|
||||||
class Config:
|
class Config:
|
||||||
@ -36,11 +36,7 @@ def prompt_construct(sample, config: Config):
|
|||||||
@LOAD_DATASET.register_module()
|
@LOAD_DATASET.register_module()
|
||||||
class FlamesDataset(SubjectiveCmpDataset):
|
class FlamesDataset(SubjectiveCmpDataset):
|
||||||
|
|
||||||
def load(
|
def load(self, path: str, name: str, *args, **kwargs):
|
||||||
self,
|
|
||||||
path: str,
|
|
||||||
name: str,
|
|
||||||
):
|
|
||||||
path = get_data_path(path, local_mode=True)
|
path = get_data_path(path, local_mode=True)
|
||||||
config = Config(path, f'{name}_config.txt')
|
config = Config(path, f'{name}_config.txt')
|
||||||
|
|
@ -21,7 +21,7 @@ def post_process_flames(judgement: str):
|
|||||||
|
|
||||||
分数=3 and extract the score
|
分数=3 and extract the score
|
||||||
"""
|
"""
|
||||||
matches = re.findall(r'分数=(\d+)', text)
|
matches = re.findall(r'分数=(\d+)', judgement)
|
||||||
if matches:
|
if matches:
|
||||||
matches = matches[0]
|
matches = matches[0]
|
||||||
return int(matches)
|
return int(matches)
|
||||||
|
Loading…
Reference in New Issue
Block a user