diff --git a/configs/datasets/humaneval/deprecated_humaneval_gen_8e312c.py b/configs/datasets/humaneval/deprecated_humaneval_gen_8e312c.py
new file mode 100644
index 00000000..2b9e20ff
--- /dev/null
+++ b/configs/datasets/humaneval/deprecated_humaneval_gen_8e312c.py
@@ -0,0 +1,36 @@
+from opencompass.openicl.icl_prompt_template import PromptTemplate
+from opencompass.openicl.icl_retriever import ZeroRetriever
+from opencompass.openicl.icl_inferencer import GenInferencer
+from opencompass.datasets import HumanevalDataset, HumanEvaluator, humaneval_postprocess
+
+humaneval_reader_cfg = dict(
+    input_columns=['prompt'], output_column='task_id', train_split='test')
+
+# TODO: allow empty output-column
+humaneval_infer_cfg = dict(
+    prompt_template=dict(
+        type=PromptTemplate,
+        template=dict(round=[
+            dict(
+                role='HUMAN',
+                prompt='Complete the following python code:\n{prompt}'),
+        ])),
+    retriever=dict(type=ZeroRetriever),
+    inferencer=dict(type=GenInferencer, max_out_len=512))
+
+humaneval_eval_cfg = dict(
+    evaluator=dict(type=HumanEvaluator),
+    pred_role='BOT',
+    k=[1, 10, 100],  # the parameter only for humaneval
+    pred_postprocessor=dict(type=humaneval_postprocess),
+)
+
+humaneval_datasets = [
+    dict(
+        abbr='openai_humaneval',
+        type=HumanevalDataset,
+        path='./data/humaneval/human-eval-v2-20210705.jsonl',
+        reader_cfg=humaneval_reader_cfg,
+        infer_cfg=humaneval_infer_cfg,
+        eval_cfg=humaneval_eval_cfg)
+]
diff --git a/configs/datasets/humaneval/humaneval_gen_8e312c.py b/configs/datasets/humaneval/humaneval_gen_8e312c.py
index 2b9e20ff..8a0c6c7c 100644
--- a/configs/datasets/humaneval/humaneval_gen_8e312c.py
+++ b/configs/datasets/humaneval/humaneval_gen_8e312c.py
@@ -1,7 +1,7 @@
 from opencompass.openicl.icl_prompt_template import PromptTemplate
 from opencompass.openicl.icl_retriever import ZeroRetriever
 from opencompass.openicl.icl_inferencer import GenInferencer
-from opencompass.datasets import HumanevalDataset, HumanEvaluator, humaneval_postprocess
+from opencompass.datasets import HumanevalDataset, HumanEvaluator, humaneval_postprocess_v2
 
 humaneval_reader_cfg = dict(
     input_columns=['prompt'], output_column='task_id', train_split='test')
@@ -22,7 +22,7 @@ humaneval_eval_cfg = dict(
     evaluator=dict(type=HumanEvaluator),
     pred_role='BOT',
     k=[1, 10, 100],  # the parameter only for humaneval
-    pred_postprocessor=dict(type=humaneval_postprocess),
+    pred_postprocessor=dict(type=humaneval_postprocess_v2),
 )
 
 humaneval_datasets = [
diff --git a/configs/datasets/humaneval/humaneval_passk_gen_8e312c.py b/configs/datasets/humaneval/humaneval_passk_gen_8e312c.py
deleted file mode 120000
index ca9488ed..00000000
--- a/configs/datasets/humaneval/humaneval_passk_gen_8e312c.py
+++ /dev/null
@@ -1 +0,0 @@
-./humaneval_gen_8e312c.py
\ No newline at end of file
diff --git a/configs/datasets/humaneval/humaneval_passk_gen_8e312c.py b/configs/datasets/humaneval/humaneval_passk_gen_8e312c.py
new file mode 100644
index 00000000..d1696511
--- /dev/null
+++ b/configs/datasets/humaneval/humaneval_passk_gen_8e312c.py
@@ -0,0 +1,36 @@
+from opencompass.openicl.icl_prompt_template import PromptTemplate
+from opencompass.openicl.icl_retriever import ZeroRetriever
+from opencompass.openicl.icl_inferencer import GenInferencer
+from opencompass.datasets import HumanevalDataset, HumanEvaluator, humaneval_postprocess_v2
+
+humaneval_reader_cfg = dict(
+    input_columns=['prompt'], output_column='task_id', train_split='test')
+
+# TODO: allow empty output-column
+humaneval_infer_cfg = dict(
+    prompt_template=dict(
+        type=PromptTemplate,
+        template=dict(round=[
+            dict(
+                role='HUMAN',
+                prompt='Complete the following python code:\n{prompt}'),
+        ])),
+    retriever=dict(type=ZeroRetriever),
+    inferencer=dict(type=GenInferencer, max_out_len=512))
+
+humaneval_eval_cfg = dict(
+    evaluator=dict(type=HumanEvaluator),
+    pred_role='BOT',
+    k=[1, 10, 100],  # the parameter only for humaneval
+    pred_postprocessor=dict(type=humaneval_postprocess_v2),
+)
+
+humaneval_datasets = [
+    dict(
+        abbr='openai_humaneval_passk',
+        type=HumanevalDataset,
+        path='./data/humaneval/human-eval-v2-20210705.jsonl',
+        reader_cfg=humaneval_reader_cfg,
+        infer_cfg=humaneval_infer_cfg,
+        eval_cfg=humaneval_eval_cfg)
+]
diff --git a/configs/datasets/humaneval/humaneval_repeat10_gen_8e312c.py b/configs/datasets/humaneval/humaneval_repeat10_gen_8e312c.py
index a1940b3f..5eff32c2 100644
--- a/configs/datasets/humaneval/humaneval_repeat10_gen_8e312c.py
+++ b/configs/datasets/humaneval/humaneval_repeat10_gen_8e312c.py
@@ -1,7 +1,7 @@
 from opencompass.openicl.icl_prompt_template import PromptTemplate
 from opencompass.openicl.icl_retriever import ZeroRetriever
 from opencompass.openicl.icl_inferencer import GenInferencer
-from opencompass.datasets import HumanevalDataset, HumanEvaluator, humaneval_postprocess
+from opencompass.datasets import HumanevalDataset, HumanEvaluator, humaneval_postprocess_v2
 
 humaneval_reader_cfg = dict(
     input_columns=['prompt'], output_column='task_id', train_split='test')
@@ -22,12 +22,12 @@ humaneval_eval_cfg = dict(
     evaluator=dict(type=HumanEvaluator),
     pred_role='BOT',
     k=[1, 10, 100],  # the parameter only for humaneval
-    pred_postprocessor=dict(type=humaneval_postprocess),
+    pred_postprocessor=dict(type=humaneval_postprocess_v2),
 )
 
 humaneval_datasets = [
     dict(
-        abbr='openai_humaneval_pass10',
+        abbr='openai_humaneval_repeat10',
         type=HumanevalDataset,
         path='./data/humaneval/human-eval-v2-20210705.jsonl',
         num_repeats=10,
diff --git a/configs/datasets/humaneval_cn/humaneval_cn_passk_gen_6313aa.py b/configs/datasets/humaneval_cn/humaneval_cn_passk_gen_6313aa.py
deleted file mode 120000
index 4a0cf6f3..00000000
--- a/configs/datasets/humaneval_cn/humaneval_cn_passk_gen_6313aa.py
+++ /dev/null
@@ -1 +0,0 @@
-./humaneval_cn_gen_6313aa.py
\ No newline at end of file
diff --git a/configs/datasets/humaneval_cn/humaneval_cn_passk_gen_6313aa.py b/configs/datasets/humaneval_cn/humaneval_cn_passk_gen_6313aa.py
new file mode 100644
index 00000000..4073824f
--- /dev/null
+++ b/configs/datasets/humaneval_cn/humaneval_cn_passk_gen_6313aa.py
@@ -0,0 +1,37 @@
+from opencompass.openicl.icl_prompt_template import PromptTemplate
+from opencompass.openicl.icl_retriever import ZeroRetriever
+from opencompass.openicl.icl_inferencer import GenInferencer
+from opencompass.datasets import HumanevalDataset, HumanEvaluator, humaneval_postprocess_v2
+
+humaneval_reader_cfg = dict(
+    input_columns=['prompt'], output_column='task_id', train_split='test')
+
+# TODO: allow empty output-column
+humaneval_infer_cfg = dict(
+    prompt_template=dict(
+        type=PromptTemplate,
+        template=dict(round=[
+            dict(
+                role='HUMAN',
+                prompt='完成以下Python代码任务:\n{prompt}'),
+        ])),
+    retriever=dict(type=ZeroRetriever),
+    inferencer=dict(type=GenInferencer, max_out_len=512))
+
+humaneval_eval_cfg = dict(
+    evaluator=dict(type=HumanEvaluator),
+    pred_role='BOT',
+    k=[1, 10, 100],  # the parameter only for humaneval
+    pred_postprocessor=dict(type=humaneval_postprocess_v2),
+)
+
+humaneval_cn_datasets = [
+    dict(
+        abbr='openai_humaneval_cn_passk',
+        type=HumanevalDataset,
+        path='./data/humaneval_cn/human-eval-cn-v2-20210705.jsonl',
+        reader_cfg=humaneval_reader_cfg,
+        infer_cfg=humaneval_infer_cfg,
+        eval_cfg=humaneval_eval_cfg)
+
+]
diff --git a/configs/datasets/humaneval_cn/humaneval_cn_repeat10_gen_6313aa.py b/configs/datasets/humaneval_cn/humaneval_cn_repeat10_gen_6313aa.py
index 5665695b..3cafadc3 100644
--- a/configs/datasets/humaneval_cn/humaneval_cn_repeat10_gen_6313aa.py
+++ b/configs/datasets/humaneval_cn/humaneval_cn_repeat10_gen_6313aa.py
@@ -27,7 +27,7 @@ humaneval_eval_cfg = dict(
 
 humaneval_cn_datasets = [
     dict(
-        abbr='openai_humaneval_cn_pass10',
+        abbr='openai_humaneval_cn_repeat10',
         type=HumanevalDataset,
         path='./data/humaneval_cn/human-eval-cn-v2-20210705.jsonl',
         num_repeats=10,
diff --git a/configs/datasets/humaneval_plus/humaneval_plus_gen_8e312c.py b/configs/datasets/humaneval_plus/humaneval_plus_gen_8e312c.py
index 63698cba..f1f61387 100644
--- a/configs/datasets/humaneval_plus/humaneval_plus_gen_8e312c.py
+++ b/configs/datasets/humaneval_plus/humaneval_plus_gen_8e312c.py
@@ -19,7 +19,7 @@ humaneval_plus_infer_cfg = dict(
     inferencer=dict(type=GenInferencer, max_out_len=512))
 
 humaneval_plus_eval_cfg = dict(
-    evaluator=dict(type=HumanEvaluator,k=1, metric='EvalPlus'),
+    evaluator=dict(type=HumanEvaluator, metric='EvalPlus'),
     pred_role='BOT',
     k=[1, 10, 100],  # the parameter only for humaneval
     pred_postprocessor=dict(type=humaneval_postprocess_v2),
diff --git a/configs/datasets/humaneval_plus/humaneval_plus_passk_gen_8e312c.py b/configs/datasets/humaneval_plus/humaneval_plus_passk_gen_8e312c.py
new file mode 100644
index 00000000..1ff4c4dd
--- /dev/null
+++ b/configs/datasets/humaneval_plus/humaneval_plus_passk_gen_8e312c.py
@@ -0,0 +1,36 @@
+from opencompass.openicl.icl_prompt_template import PromptTemplate
+from opencompass.openicl.icl_retriever import ZeroRetriever
+from opencompass.openicl.icl_inferencer import GenInferencer
+from opencompass.datasets import HumanevalDataset, HumanEvaluator, humaneval_postprocess_v2
+
+humaneval_plus_reader_cfg = dict(
+    input_columns=['prompt'], output_column='task_id', train_split='test')
+
+# TODO: allow empty output-column
+humaneval_plus_infer_cfg = dict(
+    prompt_template=dict(
+        type=PromptTemplate,
+        template=dict(round=[
+            dict(
+                role='HUMAN',
+                prompt='Complete the following python code:\n{prompt}'),
+        ])),
+    retriever=dict(type=ZeroRetriever),
+    inferencer=dict(type=GenInferencer, max_out_len=512))
+
+humaneval_plus_eval_cfg = dict(
+    evaluator=dict(type=HumanEvaluator, metric='EvalPlus'),
+    pred_role='BOT',
+    k=[1, 10, 100],  # the parameter only for humaneval
+    pred_postprocessor=dict(type=humaneval_postprocess_v2),
+)
+
+humaneval_plus_datasets = [
+    dict(
+        abbr='humaneval_plus_passk',
+        type=HumanevalDataset,
+        path='./data/humaneval/human-eval-v2-20210705.jsonl',
+        reader_cfg=humaneval_plus_reader_cfg,
+        infer_cfg=humaneval_plus_infer_cfg,
+        eval_cfg=humaneval_plus_eval_cfg)
+]
diff --git a/configs/datasets/humaneval_plus/humaneval_plus_repeat10_gen_8e312c.py b/configs/datasets/humaneval_plus/humaneval_plus_repeat10_gen_8e312c.py
new file mode 100644
index 00000000..bb859a7a
--- /dev/null
+++ b/configs/datasets/humaneval_plus/humaneval_plus_repeat10_gen_8e312c.py
@@ -0,0 +1,37 @@
+from opencompass.openicl.icl_prompt_template import PromptTemplate
+from opencompass.openicl.icl_retriever import ZeroRetriever
+from opencompass.openicl.icl_inferencer import GenInferencer
+from opencompass.datasets import HumanevalDataset, HumanEvaluator, humaneval_postprocess_v2
+
+humaneval_plus_reader_cfg = dict(
+    input_columns=['prompt'], output_column='task_id', train_split='test')
+
+# TODO: allow empty output-column
+humaneval_plus_infer_cfg = dict(
+    prompt_template=dict(
+        type=PromptTemplate,
+        template=dict(round=[
+            dict(
+                role='HUMAN',
+                prompt='Complete the following python code:\n{prompt}'),
+        ])),
+    retriever=dict(type=ZeroRetriever),
+    inferencer=dict(type=GenInferencer, max_out_len=512))
+
+humaneval_plus_eval_cfg = dict(
+    evaluator=dict(type=HumanEvaluator, metric='EvalPlus'),
+    pred_role='BOT',
+    k=[1, 10, 100],  # the parameter only for humaneval
+    pred_postprocessor=dict(type=humaneval_postprocess_v2),
+)
+
+humaneval_plus_datasets = [
+    dict(
+        abbr='humaneval_plus_repeat10',
+        type=HumanevalDataset,
+        path='./data/humaneval/human-eval-v2-20210705.jsonl',
+        num_repeats=10,
+        reader_cfg=humaneval_plus_reader_cfg,
+        infer_cfg=humaneval_plus_infer_cfg,
+        eval_cfg=humaneval_plus_eval_cfg)
+]
diff --git a/configs/datasets/mbpp/mbpp_passk_gen_1e1056.py b/configs/datasets/mbpp/mbpp_passk_gen_1e1056.py
index edc4b9ae..1d3f6611 100644
--- a/configs/datasets/mbpp/mbpp_passk_gen_1e1056.py
+++ b/configs/datasets/mbpp/mbpp_passk_gen_1e1056.py
@@ -56,7 +56,7 @@ mbpp_eval_cfg = dict(evaluator=dict(type=MBPPPassKEvaluator), pred_role="BOT")
 mbpp_datasets = [
     dict(
         type=MBPPDataset_V2,
-        abbr='mbpp',
+        abbr='mbpp_passk',
         path='./data/mbpp/mbpp.jsonl',
         reader_cfg=mbpp_reader_cfg,
         infer_cfg=mbpp_infer_cfg,
diff --git a/configs/datasets/mbpp/mbpp_repeat10_gen_1e1056.py b/configs/datasets/mbpp/mbpp_repeat10_gen_1e1056.py
index 1b8a6a86..53fad641 100644
--- a/configs/datasets/mbpp/mbpp_repeat10_gen_1e1056.py
+++ b/configs/datasets/mbpp/mbpp_repeat10_gen_1e1056.py
@@ -58,7 +58,7 @@ mbpp_eval_cfg = dict(evaluator=dict(type=MBPPPassKEvaluator), pred_role="BOT")
 mbpp_datasets = [
     dict(
         type=MBPPDataset_V2,
-        abbr='mbpp_pass10',
+        abbr='mbpp_repeat10',
         path='./data/mbpp/mbpp.jsonl',
         num_repeats=10,
         reader_cfg=mbpp_reader_cfg,
diff --git a/configs/datasets/mbpp/sanitized_mbpp_passk_gen_1e1056.py b/configs/datasets/mbpp/sanitized_mbpp_passk_gen_1e1056.py
index 7514c531..fc3a430a 100644
--- a/configs/datasets/mbpp/sanitized_mbpp_passk_gen_1e1056.py
+++ b/configs/datasets/mbpp/sanitized_mbpp_passk_gen_1e1056.py
@@ -56,7 +56,7 @@ sanitized_mbpp_eval_cfg = dict(evaluator=dict(type=MBPPPassKEvaluator), pred_rol
 sanitized_mbpp_datasets = [
     dict(
         type=SanitizedMBPPDataset,
-        abbr='sanitized_mbpp',
+        abbr='sanitized_mbpp_passk',
         path='./sanitized-mbpp.jsonl',
         reader_cfg=sanitized_mbpp_reader_cfg,
         infer_cfg=sanitized_mbpp_infer_cfg,
diff --git a/configs/datasets/mbpp/sanitized_mbpp_repeat10_gen_1e1056.py b/configs/datasets/mbpp/sanitized_mbpp_repeat10_gen_1e1056.py
index 6af2d3f6..90e64c15 100644
--- a/configs/datasets/mbpp/sanitized_mbpp_repeat10_gen_1e1056.py
+++ b/configs/datasets/mbpp/sanitized_mbpp_repeat10_gen_1e1056.py
@@ -56,7 +56,7 @@ sanitized_mbpp_eval_cfg = dict(evaluator=dict(type=MBPPPassKEvaluator), pred_rol
 sanitized_mbpp_datasets = [
     dict(
         type=SanitizedMBPPDataset,
-        abbr='sanitized_mbpp_pass10',
+        abbr='sanitized_mbpp_repeat10',
         path='./sanitized-mbpp.jsonl',
         num_repeats=10,
         reader_cfg=sanitized_mbpp_reader_cfg,
diff --git a/configs/datasets/mbpp_cn/mbpp_cn_passk_gen_1d1481.py b/configs/datasets/mbpp_cn/mbpp_cn_passk_gen_1d1481.py
index e80954b1..8ce5b7c1 100644
--- a/configs/datasets/mbpp_cn/mbpp_cn_passk_gen_1d1481.py
+++ b/configs/datasets/mbpp_cn/mbpp_cn_passk_gen_1d1481.py
@@ -56,7 +56,7 @@ mbpp_eval_cfg = dict(evaluator=dict(type=MBPPPassKEvaluator), pred_role="BOT")
 mbpp_cn_datasets = [
     dict(
         type=MBPPDataset_V2,
-        abbr='mbpp_cn',
+        abbr='mbpp_cn_passk',
         path='./data/mbpp_cn/mbpp_cn.jsonl',
         reader_cfg=mbpp_reader_cfg,
         infer_cfg=mbpp_infer_cfg,
diff --git a/configs/datasets/mbpp_cn/mbpp_cn_repeat10_gen_1d1481.py b/configs/datasets/mbpp_cn/mbpp_cn_repeat10_gen_1d1481.py
index 3f0a6258..9ac1fc59 100644
--- a/configs/datasets/mbpp_cn/mbpp_cn_repeat10_gen_1d1481.py
+++ b/configs/datasets/mbpp_cn/mbpp_cn_repeat10_gen_1d1481.py
@@ -56,7 +56,7 @@ mbpp_eval_cfg = dict(evaluator=dict(type=MBPPPassKEvaluator), pred_role="BOT")
 mbpp_cn_datasets = [
     dict(
         type=MBPPDataset_V2,
-        abbr='mbpp_cn_pass10',
+        abbr='mbpp_cn_repeat10',
         path='./data/mbpp_cn/mbpp_cn.jsonl',
         num_repeats=10,
         reader_cfg=mbpp_reader_cfg,
diff --git a/opencompass/models/huggingface.py b/opencompass/models/huggingface.py
index d1f98864..0f6864ee 100644
--- a/opencompass/models/huggingface.py
+++ b/opencompass/models/huggingface.py
@@ -621,6 +621,7 @@ class HuggingFaceChatGLM3(HuggingFace):
                  peft_path: Optional[str] = None,
                  tokenizer_only: bool = False,
                  model_kwargs: dict = dict(device_map='auto'),
+                 generation_kwargs: dict = dict(),
                  meta_template: Optional[Dict] = None,
                  extract_pred_after_decode: bool = False,
                  batch_padding: bool = False,
@@ -634,6 +635,7 @@ class HuggingFaceChatGLM3(HuggingFace):
                          tokenizer_kwargs=tokenizer_kwargs,
                          peft_path=peft_path,
                          tokenizer_only=tokenizer_only,
+                         generation_kwargs=generation_kwargs,
                          model_kwargs=model_kwargs,
                          meta_template=meta_template,
                          extract_pred_after_decode=extract_pred_after_decode,
@@ -647,15 +649,17 @@ class HuggingFaceChatGLM3(HuggingFace):
     def generate(self,
                  inputs: List[str or PromptList],
                  max_out_len: int = 512,
-                 temperature: float = 0.6,
-                 skip_overlength=False) -> str:
+                 skip_overlength=False,
+                 **kwargs) -> str:
         """Generate response from input prompt.
 
         Args:
             inputs (list): input prompt
             max_out_len (int): max output length
-            temperature (float): temperature for sampling
         """
+        generation_kwargs = kwargs.copy()
+        generation_kwargs.update(self.generation_kwargs)
+
         responses = []
         for _input in inputs:
             assert isinstance(_input, (str, PromptList))
@@ -692,7 +696,8 @@ class HuggingFaceChatGLM3(HuggingFace):
             try:
                 response, history = self.model.chat(self.tokenizer,
                                                     user_content,
-                                                    history=history)
+                                                    history=history,
+                                                    **generation_kwargs)
                 # response will be dict sometime
                 if isinstance(response, dict):
                     response = response.get('content', '')