Merge branch 'open-compass:main' into main

This commit is contained in:
bittersweet1999 2024-07-18 09:51:36 +08:00 committed by GitHub
commit c4aa7825ad
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
6 changed files with 21 additions and 3 deletions

View File

@ -1,7 +1,7 @@
from opencompass.openicl.icl_prompt_template import PromptTemplate from opencompass.openicl.icl_prompt_template import PromptTemplate
from opencompass.openicl.icl_retriever import ZeroRetriever from opencompass.openicl.icl_retriever import ZeroRetriever
from opencompass.openicl.icl_inferencer import GenInferencer from opencompass.openicl.icl_inferencer import GenInferencer
from opencompass.openicl.icl_evaluator import RougeEvaluator from opencompass.openicl.icl_evaluator import JiebaRougeEvaluator
from opencompass.datasets.rolebench import InstructionGeneralizationChineseDataset from opencompass.datasets.rolebench import InstructionGeneralizationChineseDataset
instruction_generalization_zh_reader_cfg = dict( instruction_generalization_zh_reader_cfg = dict(
@ -27,7 +27,7 @@ instruction_generalization_zh_infer_cfg = dict(
) )
instruction_generalization_zh_eval_cfg = dict( instruction_generalization_zh_eval_cfg = dict(
evaluator=dict(type=RougeEvaluator), evaluator=dict(type=JiebaRougeEvaluator),
pred_role='BOT' pred_role='BOT'
) )

View File

@ -26,7 +26,7 @@ for _name in subjective_all_sets:
template="""{dialogue}""", template="""{dialogue}""",
), ),
retriever=dict(type=ZeroRetriever), retriever=dict(type=ZeroRetriever),
inferencer=dict(type=ChatInferencer, max_seq_len=4096, max_out_len=512, temperature=temperature, do_sample=do_sample,infer_mode='every'), inferencer=dict(type=ChatInferencer, max_seq_len=4096, max_out_len=1024, temperature=temperature, do_sample=do_sample,infer_mode='every'),
) )
subjective_eval_cfg = dict( subjective_eval_cfg = dict(

View File

@ -119,3 +119,10 @@ If you have already download the checkpoints of the model, you can specify the l
```bash ```bash
python run.py --datasets siqa_gen winograd_ppl --hf-type base --hf-path /path/to/model python run.py --datasets siqa_gen winograd_ppl --hf-type base --hf-path /path/to/model
``` ```
## Dataset
### How to build a new dataset?
- For building new objective dataset: [new_dataset](../advanced_guides/new_dataset.md)
- For building new subjective dataset: [subjective_evaluation](../advanced_guides/subjective_evaluation.md)

View File

@ -119,3 +119,10 @@ OpenCompass 中的每个任务代表等待评估的特定模型和数据集部
```bash ```bash
python run.py --datasets siqa_gen winograd_ppl --hf-type base --hf-path /path/to/model python run.py --datasets siqa_gen winograd_ppl --hf-type base --hf-path /path/to/model
``` ```
## 数据集
### 如何构建自己的评测数据集
- 客观数据集构建参见:[支持新数据集](../advanced_guides/new_dataset.md)
- 主观数据集构建参见:[主观评测指引](../advanced_guides/subjective_evaluation.md)

View File

@ -1,6 +1,7 @@
# flake8: noqa # flake8: noqa
# yapf: disable # yapf: disable
import argparse import argparse
import copy
import getpass import getpass
import os import os
import os.path as osp import os.path as osp

View File

@ -369,6 +369,9 @@ class AlignmentBenchSummarizer:
if os.path.isdir(subdir_path): if os.path.isdir(subdir_path):
judged_answers, references = get_judgeanswer_and_reference( judged_answers, references = get_judgeanswer_and_reference(
dataset, subdir_path, self.judge_function) dataset, subdir_path, self.judge_function)
if len(judged_answers) == 0:
score_by_judgemodel[model] = None
continue
if self.judge_type == 'general': if self.judge_type == 'general':
get_dimension_results(judged_answers, references, fout, get_dimension_results(judged_answers, references, fout,
fout_flag, model) fout_flag, model)