[Feat] update code config (#749)

* [Feat] update code dataset * [Feat] update code dataset * [Feat] update code dataset
2025-05-30 16:03:24 +08:00 · 2023-12-29 18:46:34 +08:00 · 2023-12-29 18:46:34 +08:00 · 327951087f
commit 327951087f
parent fe0b717033
16 changed files with 204 additions and 19 deletions
--- a/configs/datasets/humaneval/deprecated_humaneval_gen_8e312c.py
+++ b/configs/datasets/humaneval/deprecated_humaneval_gen_8e312c.py
@ -0,0 +1,36 @@
 from opencompass.openicl.icl_prompt_template import PromptTemplate
 from opencompass.openicl.icl_retriever import ZeroRetriever
 from opencompass.openicl.icl_inferencer import GenInferencer
 from opencompass.datasets import HumanevalDataset, HumanEvaluator, humaneval_postprocess
 humaneval_reader_cfg = dict(
    input_columns=['prompt'], output_column='task_id', train_split='test')
 # TODO: allow empty output-column
 humaneval_infer_cfg = dict(
    prompt_template=dict(
        type=PromptTemplate,
        template=dict(round=[
            dict(
                role='HUMAN',
                prompt='Complete the following python code:\n{prompt}'),
        ])),
    retriever=dict(type=ZeroRetriever),
    inferencer=dict(type=GenInferencer, max_out_len=512))
 humaneval_eval_cfg = dict(
    evaluator=dict(type=HumanEvaluator),
    pred_role='BOT',
    k=[1, 10, 100],  # the parameter only for humaneval
    pred_postprocessor=dict(type=humaneval_postprocess),
 )
 humaneval_datasets = [
    dict(
        abbr='openai_humaneval',
        type=HumanevalDataset,
        path='./data/humaneval/human-eval-v2-20210705.jsonl',
        reader_cfg=humaneval_reader_cfg,
        infer_cfg=humaneval_infer_cfg,
        eval_cfg=humaneval_eval_cfg)
 ]
--- a/configs/datasets/humaneval/humaneval_gen_8e312c.py
+++ b/configs/datasets/humaneval/humaneval_gen_8e312c.py
@ -1,7 +1,7 @@
 from opencompass.openicl.icl_prompt_template import PromptTemplate
 from opencompass.openicl.icl_retriever import ZeroRetriever
 from opencompass.openicl.icl_inferencer import GenInferencer
-from opencompass.datasets import HumanevalDataset, HumanEvaluator, humaneval_postprocess
+from opencompass.datasets import HumanevalDataset, HumanEvaluator, humaneval_postprocess_v2
 humaneval_reader_cfg = dict(
    input_columns=['prompt'], output_column='task_id', train_split='test')
@ -22,7 +22,7 @@ humaneval_eval_cfg = dict(
    evaluator=dict(type=HumanEvaluator),
    pred_role='BOT',
    k=[1, 10, 100],  # the parameter only for humaneval
-    pred_postprocessor=dict(type=humaneval_postprocess),
+    pred_postprocessor=dict(type=humaneval_postprocess_v2),
 )
 humaneval_datasets = [
--- a/configs/datasets/humaneval/humaneval_passk_gen_8e312c.py
+++ b/configs/datasets/humaneval/humaneval_passk_gen_8e312c.py
@ -1 +0,0 @@
 ./humaneval_gen_8e312c.py
--- a/configs/datasets/humaneval/humaneval_passk_gen_8e312c.py
+++ b/configs/datasets/humaneval/humaneval_passk_gen_8e312c.py
@ -0,0 +1,36 @@
 from opencompass.openicl.icl_prompt_template import PromptTemplate
 from opencompass.openicl.icl_retriever import ZeroRetriever
 from opencompass.openicl.icl_inferencer import GenInferencer
 from opencompass.datasets import HumanevalDataset, HumanEvaluator, humaneval_postprocess_v2
 humaneval_reader_cfg = dict(
    input_columns=['prompt'], output_column='task_id', train_split='test')
 # TODO: allow empty output-column
 humaneval_infer_cfg = dict(
    prompt_template=dict(
        type=PromptTemplate,
        template=dict(round=[
            dict(
                role='HUMAN',
                prompt='Complete the following python code:\n{prompt}'),
        ])),
    retriever=dict(type=ZeroRetriever),
    inferencer=dict(type=GenInferencer, max_out_len=512))
 humaneval_eval_cfg = dict(
    evaluator=dict(type=HumanEvaluator),
    pred_role='BOT',
    k=[1, 10, 100],  # the parameter only for humaneval
    pred_postprocessor=dict(type=humaneval_postprocess_v2),
 )
 humaneval_datasets = [
    dict(
        abbr='openai_humaneval_passk',
        type=HumanevalDataset,
        path='./data/humaneval/human-eval-v2-20210705.jsonl',
        reader_cfg=humaneval_reader_cfg,
        infer_cfg=humaneval_infer_cfg,
        eval_cfg=humaneval_eval_cfg)
 ]
--- a/configs/datasets/humaneval/humaneval_repeat10_gen_8e312c.py
+++ b/configs/datasets/humaneval/humaneval_repeat10_gen_8e312c.py
@ -1,7 +1,7 @@
 from opencompass.openicl.icl_prompt_template import PromptTemplate
 from opencompass.openicl.icl_retriever import ZeroRetriever
 from opencompass.openicl.icl_inferencer import GenInferencer
-from opencompass.datasets import HumanevalDataset, HumanEvaluator, humaneval_postprocess
+from opencompass.datasets import HumanevalDataset, HumanEvaluator, humaneval_postprocess_v2
 humaneval_reader_cfg = dict(
    input_columns=['prompt'], output_column='task_id', train_split='test')
@ -22,12 +22,12 @@ humaneval_eval_cfg = dict(
    evaluator=dict(type=HumanEvaluator),
    pred_role='BOT',
    k=[1, 10, 100],  # the parameter only for humaneval
-    pred_postprocessor=dict(type=humaneval_postprocess),
+    pred_postprocessor=dict(type=humaneval_postprocess_v2),
 )
 humaneval_datasets = [
    dict(
-        abbr='openai_humaneval_pass10',
+        abbr='openai_humaneval_repeat10',
        type=HumanevalDataset,
        path='./data/humaneval/human-eval-v2-20210705.jsonl',
        num_repeats=10,
--- a/configs/datasets/humaneval_cn/humaneval_cn_passk_gen_6313aa.py
+++ b/configs/datasets/humaneval_cn/humaneval_cn_passk_gen_6313aa.py
@ -1 +0,0 @@
 ./humaneval_cn_gen_6313aa.py
--- a/configs/datasets/humaneval_cn/humaneval_cn_passk_gen_6313aa.py
+++ b/configs/datasets/humaneval_cn/humaneval_cn_passk_gen_6313aa.py
@ -0,0 +1,37 @@
 from opencompass.openicl.icl_prompt_template import PromptTemplate
 from opencompass.openicl.icl_retriever import ZeroRetriever
 from opencompass.openicl.icl_inferencer import GenInferencer
 from opencompass.datasets import HumanevalDataset, HumanEvaluator, humaneval_postprocess_v2
 humaneval_reader_cfg = dict(
    input_columns=['prompt'], output_column='task_id', train_split='test')
 # TODO: allow empty output-column
 humaneval_infer_cfg = dict(
    prompt_template=dict(
        type=PromptTemplate,
        template=dict(round=[
            dict(
                role='HUMAN',
                prompt='完成以下Python代码任务:\n{prompt}'),
        ])),
    retriever=dict(type=ZeroRetriever),
    inferencer=dict(type=GenInferencer, max_out_len=512))
 humaneval_eval_cfg = dict(
    evaluator=dict(type=HumanEvaluator),
    pred_role='BOT',
    k=[1, 10, 100],  # the parameter only for humaneval
    pred_postprocessor=dict(type=humaneval_postprocess_v2),
 )
 humaneval_cn_datasets = [
    dict(
        abbr='openai_humaneval_cn_passk',
        type=HumanevalDataset,
        path='./data/humaneval_cn/human-eval-cn-v2-20210705.jsonl',
        reader_cfg=humaneval_reader_cfg,
        infer_cfg=humaneval_infer_cfg,
        eval_cfg=humaneval_eval_cfg)
 ]
--- a/configs/datasets/humaneval_cn/humaneval_cn_repeat10_gen_6313aa.py
+++ b/configs/datasets/humaneval_cn/humaneval_cn_repeat10_gen_6313aa.py
@ -27,7 +27,7 @@ humaneval_eval_cfg = dict(
 humaneval_cn_datasets = [
    dict(
-        abbr='openai_humaneval_cn_pass10',
+        abbr='openai_humaneval_cn_repeat10',
        type=HumanevalDataset,
        path='./data/humaneval_cn/human-eval-cn-v2-20210705.jsonl',
        num_repeats=10,
--- a/configs/datasets/humaneval_plus/humaneval_plus_gen_8e312c.py
+++ b/configs/datasets/humaneval_plus/humaneval_plus_gen_8e312c.py
@ -19,7 +19,7 @@ humaneval_plus_infer_cfg = dict(
    inferencer=dict(type=GenInferencer, max_out_len=512))
 humaneval_plus_eval_cfg = dict(
-    evaluator=dict(type=HumanEvaluator,k=1, metric='EvalPlus'),
+    evaluator=dict(type=HumanEvaluator, metric='EvalPlus'),
    pred_role='BOT',
    k=[1, 10, 100],  # the parameter only for humaneval
    pred_postprocessor=dict(type=humaneval_postprocess_v2),
--- a/configs/datasets/humaneval_plus/humaneval_plus_passk_gen_8e312c.py
+++ b/configs/datasets/humaneval_plus/humaneval_plus_passk_gen_8e312c.py
@ -0,0 +1,36 @@
 from opencompass.openicl.icl_prompt_template import PromptTemplate
 from opencompass.openicl.icl_retriever import ZeroRetriever
 from opencompass.openicl.icl_inferencer import GenInferencer
 from opencompass.datasets import HumanevalDataset, HumanEvaluator, humaneval_postprocess_v2
 humaneval_plus_reader_cfg = dict(
    input_columns=['prompt'], output_column='task_id', train_split='test')
 # TODO: allow empty output-column
 humaneval_plus_infer_cfg = dict(
    prompt_template=dict(
        type=PromptTemplate,
        template=dict(round=[
            dict(
                role='HUMAN',
                prompt='Complete the following python code:\n{prompt}'),
        ])),
    retriever=dict(type=ZeroRetriever),
    inferencer=dict(type=GenInferencer, max_out_len=512))
 humaneval_plus_eval_cfg = dict(
    evaluator=dict(type=HumanEvaluator, metric='EvalPlus'),
    pred_role='BOT',
    k=[1, 10, 100],  # the parameter only for humaneval
    pred_postprocessor=dict(type=humaneval_postprocess_v2),
 )
 humaneval_plus_datasets = [
    dict(
        abbr='humaneval_plus_passk',
        type=HumanevalDataset,
        path='./data/humaneval/human-eval-v2-20210705.jsonl',
        reader_cfg=humaneval_plus_reader_cfg,
        infer_cfg=humaneval_plus_infer_cfg,
        eval_cfg=humaneval_plus_eval_cfg)
 ]
--- a/configs/datasets/humaneval_plus/humaneval_plus_repeat10_gen_8e312c.py
+++ b/configs/datasets/humaneval_plus/humaneval_plus_repeat10_gen_8e312c.py
@ -0,0 +1,37 @@
 from opencompass.openicl.icl_prompt_template import PromptTemplate
 from opencompass.openicl.icl_retriever import ZeroRetriever
 from opencompass.openicl.icl_inferencer import GenInferencer
 from opencompass.datasets import HumanevalDataset, HumanEvaluator, humaneval_postprocess_v2
 humaneval_plus_reader_cfg = dict(
    input_columns=['prompt'], output_column='task_id', train_split='test')
 # TODO: allow empty output-column
 humaneval_plus_infer_cfg = dict(
    prompt_template=dict(
        type=PromptTemplate,
        template=dict(round=[
            dict(
                role='HUMAN',
                prompt='Complete the following python code:\n{prompt}'),
        ])),
    retriever=dict(type=ZeroRetriever),
    inferencer=dict(type=GenInferencer, max_out_len=512))
 humaneval_plus_eval_cfg = dict(
    evaluator=dict(type=HumanEvaluator, metric='EvalPlus'),
    pred_role='BOT',
    k=[1, 10, 100],  # the parameter only for humaneval
    pred_postprocessor=dict(type=humaneval_postprocess_v2),
 )
 humaneval_plus_datasets = [
    dict(
        abbr='humaneval_plus_repeat10',
        type=HumanevalDataset,
        path='./data/humaneval/human-eval-v2-20210705.jsonl',
        num_repeats=10,
        reader_cfg=humaneval_plus_reader_cfg,
        infer_cfg=humaneval_plus_infer_cfg,
        eval_cfg=humaneval_plus_eval_cfg)
 ]
--- a/configs/datasets/mbpp/mbpp_passk_gen_1e1056.py
+++ b/configs/datasets/mbpp/mbpp_passk_gen_1e1056.py
@ -56,7 +56,7 @@ mbpp_eval_cfg = dict(evaluator=dict(type=MBPPPassKEvaluator), pred_role="BOT")
 mbpp_datasets = [
    dict(
        type=MBPPDataset_V2,
-        abbr='mbpp',
+        abbr='mbpp_passk',
        path='./data/mbpp/mbpp.jsonl',
        reader_cfg=mbpp_reader_cfg,
        infer_cfg=mbpp_infer_cfg,
--- a/configs/datasets/mbpp/mbpp_repeat10_gen_1e1056.py
+++ b/configs/datasets/mbpp/mbpp_repeat10_gen_1e1056.py
@ -58,7 +58,7 @@ mbpp_eval_cfg = dict(evaluator=dict(type=MBPPPassKEvaluator), pred_role="BOT")
 mbpp_datasets = [
    dict(
        type=MBPPDataset_V2,
-        abbr='mbpp_pass10',
+        abbr='mbpp_repeat10',
        path='./data/mbpp/mbpp.jsonl',
        num_repeats=10,
        reader_cfg=mbpp_reader_cfg,
--- a/configs/datasets/mbpp/sanitized_mbpp_passk_gen_1e1056.py
+++ b/configs/datasets/mbpp/sanitized_mbpp_passk_gen_1e1056.py
@ -56,7 +56,7 @@ sanitized_mbpp_eval_cfg = dict(evaluator=dict(type=MBPPPassKEvaluator), pred_rol
 sanitized_mbpp_datasets = [
    dict(
        type=SanitizedMBPPDataset,
-        abbr='sanitized_mbpp',
+        abbr='sanitized_mbpp_passk',
        path='./sanitized-mbpp.jsonl',
        reader_cfg=sanitized_mbpp_reader_cfg,
        infer_cfg=sanitized_mbpp_infer_cfg,
--- a/configs/datasets/mbpp/sanitized_mbpp_repeat10_gen_1e1056.py
+++ b/configs/datasets/mbpp/sanitized_mbpp_repeat10_gen_1e1056.py
@ -56,7 +56,7 @@ sanitized_mbpp_eval_cfg = dict(evaluator=dict(type=MBPPPassKEvaluator), pred_rol
 sanitized_mbpp_datasets = [
    dict(
        type=SanitizedMBPPDataset,
-        abbr='sanitized_mbpp_pass10',
+        abbr='sanitized_mbpp_repeat10',
        path='./sanitized-mbpp.jsonl',
        num_repeats=10,
        reader_cfg=sanitized_mbpp_reader_cfg,
--- a/configs/datasets/mbpp_cn/mbpp_cn_passk_gen_1d1481.py
+++ b/configs/datasets/mbpp_cn/mbpp_cn_passk_gen_1d1481.py
@ -56,7 +56,7 @@ mbpp_eval_cfg = dict(evaluator=dict(type=MBPPPassKEvaluator), pred_role="BOT")
 mbpp_cn_datasets = [
    dict(
        type=MBPPDataset_V2,
-        abbr='mbpp_cn',
+        abbr='mbpp_cn_passk',
        path='./data/mbpp_cn/mbpp_cn.jsonl',
        reader_cfg=mbpp_reader_cfg,
        infer_cfg=mbpp_infer_cfg,
--- a/configs/datasets/mbpp_cn/mbpp_cn_repeat10_gen_1d1481.py
+++ b/configs/datasets/mbpp_cn/mbpp_cn_repeat10_gen_1d1481.py
@ -56,7 +56,7 @@ mbpp_eval_cfg = dict(evaluator=dict(type=MBPPPassKEvaluator), pred_role="BOT")
 mbpp_cn_datasets = [
    dict(
        type=MBPPDataset_V2,
-        abbr='mbpp_cn_pass10',
+        abbr='mbpp_cn_repeat10',
        path='./data/mbpp_cn/mbpp_cn.jsonl',
        num_repeats=10,
        reader_cfg=mbpp_reader_cfg,
--- a/opencompass/models/huggingface.py
+++ b/opencompass/models/huggingface.py
@ -621,6 +621,7 @@ class HuggingFaceChatGLM3(HuggingFace):
                 peft_path: Optional[str] = None,
                 tokenizer_only: bool = False,
                 model_kwargs: dict = dict(device_map='auto'),
                 generation_kwargs: dict = dict(),
                 meta_template: Optional[Dict] = None,
                 extract_pred_after_decode: bool = False,
                 batch_padding: bool = False,
@ -634,6 +635,7 @@ class HuggingFaceChatGLM3(HuggingFace):
                         tokenizer_kwargs=tokenizer_kwargs,
                         peft_path=peft_path,
                         tokenizer_only=tokenizer_only,
                         generation_kwargs=generation_kwargs,
                         model_kwargs=model_kwargs,
                         meta_template=meta_template,
                         extract_pred_after_decode=extract_pred_after_decode,
@ -647,15 +649,17 @@ class HuggingFaceChatGLM3(HuggingFace):
    def generate(self,
                 inputs: List[str or PromptList],
                 max_out_len: int = 512,
-                 temperature: float = 0.6,
+                 skip_overlength=False,
-                 skip_overlength=False) -> str:
+                 **kwargs) -> str:
        """Generate response from input prompt.
        Args:
            inputs (list): input prompt
            max_out_len (int): max output length
            temperature (float): temperature for sampling
        """
        generation_kwargs = kwargs.copy()
        generation_kwargs.update(self.generation_kwargs)
        responses = []
        for _input in inputs:
            assert isinstance(_input, (str, PromptList))
@ -692,7 +696,8 @@ class HuggingFaceChatGLM3(HuggingFace):
            try:
                response, history = self.model.chat(self.tokenizer,
                                                    user_content,
-                                                    history=history)
+                                                    history=history,
                                                    **generation_kwargs)
                # response will be dict sometime
                if isinstance(response, dict):
                    response = response.get('content', '')