diff --git a/docs/en/user_guides/experimentation.md b/docs/en/user_guides/experimentation.md index 0f5575a0..a5e2df43 100644 --- a/docs/en/user_guides/experimentation.md +++ b/docs/en/user_guides/experimentation.md @@ -57,7 +57,7 @@ The parameter explanation is as follows: - `-w`: Specify the working path, default is `./outputs/default`. - `-l`: Enable status reporting via Lark bot. - `--dry-run`: When enabled, inference and evaluation tasks will be dispatched but won't actually run for debugging. -- `--dump-eval-details`: When enabled,evaluation under the `results` folder will include more details, such as the correctness of each sample. +- `--dump-eval-details`: Default enabled,evaluation under the `results` folder will include more details, such as the correctness of each sample. Set `--dump-eval-details False` to disable it。 Using run mode `-m all` as an example, the overall execution flow is as follows: diff --git a/docs/zh_cn/user_guides/experimentation.md b/docs/zh_cn/user_guides/experimentation.md index c960e0f8..53e5d6de 100644 --- a/docs/zh_cn/user_guides/experimentation.md +++ b/docs/zh_cn/user_guides/experimentation.md @@ -57,7 +57,7 @@ python run.py $EXP {--slurm | --dlc | None} [-p PARTITION] [-q QUOTATYPE] [--deb - `-w`: 指定工作路径,默认为 `./outputs/default` - `-l`: 打开飞书机器人状态上报。 - `--dry-run`: 开启时,推理和评测任务仅会分发但不会真正运行,便于调试; -- `--dump-eval-details`: 开启时,`results` 下的评测结果中将会包含更加详细的评测结果信息,例如每条样本是否正确等。 +- `--dump-eval-details`: 默认开启,`results` 下的评测结果中将会包含更加详细的评测结果信息,例如每条样本是否正确等。如不需要开启,需设置`--dump-eval-details False`。 以运行模式 `-m all` 为例,整体运行流如下: diff --git a/opencompass/cli/main.py b/opencompass/cli/main.py index a5937033..d1f4b1dd 100644 --- a/opencompass/cli/main.py +++ b/opencompass/cli/main.py @@ -119,8 +119,11 @@ def parse_args(): parser.add_argument( '--dump-eval-details', help='Whether to dump the evaluation details, including the ' - 'correctness of each sample, bpb, etc.', - action='store_true', + 'correctness of each sample, bpb, etc. Defaults to True.', + nargs='?', + const=True, + default=True, + type=lambda x: False if x and x.lower() == 'false' else True ) parser.add_argument( '--dump-extract-rate', @@ -233,7 +236,6 @@ def parse_custom_dataset_args(custom_dataset_parser): def main(): args = parse_args() - if args.num_gpus is not None: raise ValueError('The `--num-gpus` argument is deprecated, please use ' '`--hf-num-gpus` to describe number of gpus used for ' @@ -350,6 +352,9 @@ def main(): if args.dlc or args.slurm or cfg.get('eval', None) is None: fill_eval_cfg(cfg, args) if args.dump_eval_details: + logger.warning('Default to dump eval details, it might take extra' + 'space to save all the evaluation details. ' + 'Set --dump-eval-details False to skip the details dump') cfg.eval.runner.task.dump_details = True if args.dump_extract_rate: cfg.eval.runner.task.cal_extract_rate = True