mirror of
https://github.com/open-compass/opencompass.git
synced 2025-05-30 16:03:24 +08:00
[Feat] update code config (#749)
* [Feat] update code dataset * [Feat] update code dataset * [Feat] update code dataset
This commit is contained in:
parent
fe0b717033
commit
327951087f
@ -0,0 +1,36 @@
|
||||
from opencompass.openicl.icl_prompt_template import PromptTemplate
|
||||
from opencompass.openicl.icl_retriever import ZeroRetriever
|
||||
from opencompass.openicl.icl_inferencer import GenInferencer
|
||||
from opencompass.datasets import HumanevalDataset, HumanEvaluator, humaneval_postprocess
|
||||
|
||||
humaneval_reader_cfg = dict(
|
||||
input_columns=['prompt'], output_column='task_id', train_split='test')
|
||||
|
||||
# TODO: allow empty output-column
|
||||
humaneval_infer_cfg = dict(
|
||||
prompt_template=dict(
|
||||
type=PromptTemplate,
|
||||
template=dict(round=[
|
||||
dict(
|
||||
role='HUMAN',
|
||||
prompt='Complete the following python code:\n{prompt}'),
|
||||
])),
|
||||
retriever=dict(type=ZeroRetriever),
|
||||
inferencer=dict(type=GenInferencer, max_out_len=512))
|
||||
|
||||
humaneval_eval_cfg = dict(
|
||||
evaluator=dict(type=HumanEvaluator),
|
||||
pred_role='BOT',
|
||||
k=[1, 10, 100], # the parameter only for humaneval
|
||||
pred_postprocessor=dict(type=humaneval_postprocess),
|
||||
)
|
||||
|
||||
humaneval_datasets = [
|
||||
dict(
|
||||
abbr='openai_humaneval',
|
||||
type=HumanevalDataset,
|
||||
path='./data/humaneval/human-eval-v2-20210705.jsonl',
|
||||
reader_cfg=humaneval_reader_cfg,
|
||||
infer_cfg=humaneval_infer_cfg,
|
||||
eval_cfg=humaneval_eval_cfg)
|
||||
]
|
@ -1,7 +1,7 @@
|
||||
from opencompass.openicl.icl_prompt_template import PromptTemplate
|
||||
from opencompass.openicl.icl_retriever import ZeroRetriever
|
||||
from opencompass.openicl.icl_inferencer import GenInferencer
|
||||
from opencompass.datasets import HumanevalDataset, HumanEvaluator, humaneval_postprocess
|
||||
from opencompass.datasets import HumanevalDataset, HumanEvaluator, humaneval_postprocess_v2
|
||||
|
||||
humaneval_reader_cfg = dict(
|
||||
input_columns=['prompt'], output_column='task_id', train_split='test')
|
||||
@ -22,7 +22,7 @@ humaneval_eval_cfg = dict(
|
||||
evaluator=dict(type=HumanEvaluator),
|
||||
pred_role='BOT',
|
||||
k=[1, 10, 100], # the parameter only for humaneval
|
||||
pred_postprocessor=dict(type=humaneval_postprocess),
|
||||
pred_postprocessor=dict(type=humaneval_postprocess_v2),
|
||||
)
|
||||
|
||||
humaneval_datasets = [
|
||||
|
@ -1 +0,0 @@
|
||||
./humaneval_gen_8e312c.py
|
36
configs/datasets/humaneval/humaneval_passk_gen_8e312c.py
Normal file
36
configs/datasets/humaneval/humaneval_passk_gen_8e312c.py
Normal file
@ -0,0 +1,36 @@
|
||||
from opencompass.openicl.icl_prompt_template import PromptTemplate
|
||||
from opencompass.openicl.icl_retriever import ZeroRetriever
|
||||
from opencompass.openicl.icl_inferencer import GenInferencer
|
||||
from opencompass.datasets import HumanevalDataset, HumanEvaluator, humaneval_postprocess_v2
|
||||
|
||||
humaneval_reader_cfg = dict(
|
||||
input_columns=['prompt'], output_column='task_id', train_split='test')
|
||||
|
||||
# TODO: allow empty output-column
|
||||
humaneval_infer_cfg = dict(
|
||||
prompt_template=dict(
|
||||
type=PromptTemplate,
|
||||
template=dict(round=[
|
||||
dict(
|
||||
role='HUMAN',
|
||||
prompt='Complete the following python code:\n{prompt}'),
|
||||
])),
|
||||
retriever=dict(type=ZeroRetriever),
|
||||
inferencer=dict(type=GenInferencer, max_out_len=512))
|
||||
|
||||
humaneval_eval_cfg = dict(
|
||||
evaluator=dict(type=HumanEvaluator),
|
||||
pred_role='BOT',
|
||||
k=[1, 10, 100], # the parameter only for humaneval
|
||||
pred_postprocessor=dict(type=humaneval_postprocess_v2),
|
||||
)
|
||||
|
||||
humaneval_datasets = [
|
||||
dict(
|
||||
abbr='openai_humaneval_passk',
|
||||
type=HumanevalDataset,
|
||||
path='./data/humaneval/human-eval-v2-20210705.jsonl',
|
||||
reader_cfg=humaneval_reader_cfg,
|
||||
infer_cfg=humaneval_infer_cfg,
|
||||
eval_cfg=humaneval_eval_cfg)
|
||||
]
|
@ -1,7 +1,7 @@
|
||||
from opencompass.openicl.icl_prompt_template import PromptTemplate
|
||||
from opencompass.openicl.icl_retriever import ZeroRetriever
|
||||
from opencompass.openicl.icl_inferencer import GenInferencer
|
||||
from opencompass.datasets import HumanevalDataset, HumanEvaluator, humaneval_postprocess
|
||||
from opencompass.datasets import HumanevalDataset, HumanEvaluator, humaneval_postprocess_v2
|
||||
|
||||
humaneval_reader_cfg = dict(
|
||||
input_columns=['prompt'], output_column='task_id', train_split='test')
|
||||
@ -22,12 +22,12 @@ humaneval_eval_cfg = dict(
|
||||
evaluator=dict(type=HumanEvaluator),
|
||||
pred_role='BOT',
|
||||
k=[1, 10, 100], # the parameter only for humaneval
|
||||
pred_postprocessor=dict(type=humaneval_postprocess),
|
||||
pred_postprocessor=dict(type=humaneval_postprocess_v2),
|
||||
)
|
||||
|
||||
humaneval_datasets = [
|
||||
dict(
|
||||
abbr='openai_humaneval_pass10',
|
||||
abbr='openai_humaneval_repeat10',
|
||||
type=HumanevalDataset,
|
||||
path='./data/humaneval/human-eval-v2-20210705.jsonl',
|
||||
num_repeats=10,
|
||||
|
@ -1 +0,0 @@
|
||||
./humaneval_cn_gen_6313aa.py
|
@ -0,0 +1,37 @@
|
||||
from opencompass.openicl.icl_prompt_template import PromptTemplate
|
||||
from opencompass.openicl.icl_retriever import ZeroRetriever
|
||||
from opencompass.openicl.icl_inferencer import GenInferencer
|
||||
from opencompass.datasets import HumanevalDataset, HumanEvaluator, humaneval_postprocess_v2
|
||||
|
||||
humaneval_reader_cfg = dict(
|
||||
input_columns=['prompt'], output_column='task_id', train_split='test')
|
||||
|
||||
# TODO: allow empty output-column
|
||||
humaneval_infer_cfg = dict(
|
||||
prompt_template=dict(
|
||||
type=PromptTemplate,
|
||||
template=dict(round=[
|
||||
dict(
|
||||
role='HUMAN',
|
||||
prompt='完成以下Python代码任务:\n{prompt}'),
|
||||
])),
|
||||
retriever=dict(type=ZeroRetriever),
|
||||
inferencer=dict(type=GenInferencer, max_out_len=512))
|
||||
|
||||
humaneval_eval_cfg = dict(
|
||||
evaluator=dict(type=HumanEvaluator),
|
||||
pred_role='BOT',
|
||||
k=[1, 10, 100], # the parameter only for humaneval
|
||||
pred_postprocessor=dict(type=humaneval_postprocess_v2),
|
||||
)
|
||||
|
||||
humaneval_cn_datasets = [
|
||||
dict(
|
||||
abbr='openai_humaneval_cn_passk',
|
||||
type=HumanevalDataset,
|
||||
path='./data/humaneval_cn/human-eval-cn-v2-20210705.jsonl',
|
||||
reader_cfg=humaneval_reader_cfg,
|
||||
infer_cfg=humaneval_infer_cfg,
|
||||
eval_cfg=humaneval_eval_cfg)
|
||||
|
||||
]
|
@ -27,7 +27,7 @@ humaneval_eval_cfg = dict(
|
||||
|
||||
humaneval_cn_datasets = [
|
||||
dict(
|
||||
abbr='openai_humaneval_cn_pass10',
|
||||
abbr='openai_humaneval_cn_repeat10',
|
||||
type=HumanevalDataset,
|
||||
path='./data/humaneval_cn/human-eval-cn-v2-20210705.jsonl',
|
||||
num_repeats=10,
|
||||
|
@ -19,7 +19,7 @@ humaneval_plus_infer_cfg = dict(
|
||||
inferencer=dict(type=GenInferencer, max_out_len=512))
|
||||
|
||||
humaneval_plus_eval_cfg = dict(
|
||||
evaluator=dict(type=HumanEvaluator,k=1, metric='EvalPlus'),
|
||||
evaluator=dict(type=HumanEvaluator, metric='EvalPlus'),
|
||||
pred_role='BOT',
|
||||
k=[1, 10, 100], # the parameter only for humaneval
|
||||
pred_postprocessor=dict(type=humaneval_postprocess_v2),
|
||||
|
@ -0,0 +1,36 @@
|
||||
from opencompass.openicl.icl_prompt_template import PromptTemplate
|
||||
from opencompass.openicl.icl_retriever import ZeroRetriever
|
||||
from opencompass.openicl.icl_inferencer import GenInferencer
|
||||
from opencompass.datasets import HumanevalDataset, HumanEvaluator, humaneval_postprocess_v2
|
||||
|
||||
humaneval_plus_reader_cfg = dict(
|
||||
input_columns=['prompt'], output_column='task_id', train_split='test')
|
||||
|
||||
# TODO: allow empty output-column
|
||||
humaneval_plus_infer_cfg = dict(
|
||||
prompt_template=dict(
|
||||
type=PromptTemplate,
|
||||
template=dict(round=[
|
||||
dict(
|
||||
role='HUMAN',
|
||||
prompt='Complete the following python code:\n{prompt}'),
|
||||
])),
|
||||
retriever=dict(type=ZeroRetriever),
|
||||
inferencer=dict(type=GenInferencer, max_out_len=512))
|
||||
|
||||
humaneval_plus_eval_cfg = dict(
|
||||
evaluator=dict(type=HumanEvaluator, metric='EvalPlus'),
|
||||
pred_role='BOT',
|
||||
k=[1, 10, 100], # the parameter only for humaneval
|
||||
pred_postprocessor=dict(type=humaneval_postprocess_v2),
|
||||
)
|
||||
|
||||
humaneval_plus_datasets = [
|
||||
dict(
|
||||
abbr='humaneval_plus_passk',
|
||||
type=HumanevalDataset,
|
||||
path='./data/humaneval/human-eval-v2-20210705.jsonl',
|
||||
reader_cfg=humaneval_plus_reader_cfg,
|
||||
infer_cfg=humaneval_plus_infer_cfg,
|
||||
eval_cfg=humaneval_plus_eval_cfg)
|
||||
]
|
@ -0,0 +1,37 @@
|
||||
from opencompass.openicl.icl_prompt_template import PromptTemplate
|
||||
from opencompass.openicl.icl_retriever import ZeroRetriever
|
||||
from opencompass.openicl.icl_inferencer import GenInferencer
|
||||
from opencompass.datasets import HumanevalDataset, HumanEvaluator, humaneval_postprocess_v2
|
||||
|
||||
humaneval_plus_reader_cfg = dict(
|
||||
input_columns=['prompt'], output_column='task_id', train_split='test')
|
||||
|
||||
# TODO: allow empty output-column
|
||||
humaneval_plus_infer_cfg = dict(
|
||||
prompt_template=dict(
|
||||
type=PromptTemplate,
|
||||
template=dict(round=[
|
||||
dict(
|
||||
role='HUMAN',
|
||||
prompt='Complete the following python code:\n{prompt}'),
|
||||
])),
|
||||
retriever=dict(type=ZeroRetriever),
|
||||
inferencer=dict(type=GenInferencer, max_out_len=512))
|
||||
|
||||
humaneval_plus_eval_cfg = dict(
|
||||
evaluator=dict(type=HumanEvaluator, metric='EvalPlus'),
|
||||
pred_role='BOT',
|
||||
k=[1, 10, 100], # the parameter only for humaneval
|
||||
pred_postprocessor=dict(type=humaneval_postprocess_v2),
|
||||
)
|
||||
|
||||
humaneval_plus_datasets = [
|
||||
dict(
|
||||
abbr='humaneval_plus_repeat10',
|
||||
type=HumanevalDataset,
|
||||
path='./data/humaneval/human-eval-v2-20210705.jsonl',
|
||||
num_repeats=10,
|
||||
reader_cfg=humaneval_plus_reader_cfg,
|
||||
infer_cfg=humaneval_plus_infer_cfg,
|
||||
eval_cfg=humaneval_plus_eval_cfg)
|
||||
]
|
@ -56,7 +56,7 @@ mbpp_eval_cfg = dict(evaluator=dict(type=MBPPPassKEvaluator), pred_role="BOT")
|
||||
mbpp_datasets = [
|
||||
dict(
|
||||
type=MBPPDataset_V2,
|
||||
abbr='mbpp',
|
||||
abbr='mbpp_passk',
|
||||
path='./data/mbpp/mbpp.jsonl',
|
||||
reader_cfg=mbpp_reader_cfg,
|
||||
infer_cfg=mbpp_infer_cfg,
|
||||
|
@ -58,7 +58,7 @@ mbpp_eval_cfg = dict(evaluator=dict(type=MBPPPassKEvaluator), pred_role="BOT")
|
||||
mbpp_datasets = [
|
||||
dict(
|
||||
type=MBPPDataset_V2,
|
||||
abbr='mbpp_pass10',
|
||||
abbr='mbpp_repeat10',
|
||||
path='./data/mbpp/mbpp.jsonl',
|
||||
num_repeats=10,
|
||||
reader_cfg=mbpp_reader_cfg,
|
||||
|
@ -56,7 +56,7 @@ sanitized_mbpp_eval_cfg = dict(evaluator=dict(type=MBPPPassKEvaluator), pred_rol
|
||||
sanitized_mbpp_datasets = [
|
||||
dict(
|
||||
type=SanitizedMBPPDataset,
|
||||
abbr='sanitized_mbpp',
|
||||
abbr='sanitized_mbpp_passk',
|
||||
path='./sanitized-mbpp.jsonl',
|
||||
reader_cfg=sanitized_mbpp_reader_cfg,
|
||||
infer_cfg=sanitized_mbpp_infer_cfg,
|
||||
|
@ -56,7 +56,7 @@ sanitized_mbpp_eval_cfg = dict(evaluator=dict(type=MBPPPassKEvaluator), pred_rol
|
||||
sanitized_mbpp_datasets = [
|
||||
dict(
|
||||
type=SanitizedMBPPDataset,
|
||||
abbr='sanitized_mbpp_pass10',
|
||||
abbr='sanitized_mbpp_repeat10',
|
||||
path='./sanitized-mbpp.jsonl',
|
||||
num_repeats=10,
|
||||
reader_cfg=sanitized_mbpp_reader_cfg,
|
||||
|
@ -56,7 +56,7 @@ mbpp_eval_cfg = dict(evaluator=dict(type=MBPPPassKEvaluator), pred_role="BOT")
|
||||
mbpp_cn_datasets = [
|
||||
dict(
|
||||
type=MBPPDataset_V2,
|
||||
abbr='mbpp_cn',
|
||||
abbr='mbpp_cn_passk',
|
||||
path='./data/mbpp_cn/mbpp_cn.jsonl',
|
||||
reader_cfg=mbpp_reader_cfg,
|
||||
infer_cfg=mbpp_infer_cfg,
|
||||
|
@ -56,7 +56,7 @@ mbpp_eval_cfg = dict(evaluator=dict(type=MBPPPassKEvaluator), pred_role="BOT")
|
||||
mbpp_cn_datasets = [
|
||||
dict(
|
||||
type=MBPPDataset_V2,
|
||||
abbr='mbpp_cn_pass10',
|
||||
abbr='mbpp_cn_repeat10',
|
||||
path='./data/mbpp_cn/mbpp_cn.jsonl',
|
||||
num_repeats=10,
|
||||
reader_cfg=mbpp_reader_cfg,
|
||||
|
@ -621,6 +621,7 @@ class HuggingFaceChatGLM3(HuggingFace):
|
||||
peft_path: Optional[str] = None,
|
||||
tokenizer_only: bool = False,
|
||||
model_kwargs: dict = dict(device_map='auto'),
|
||||
generation_kwargs: dict = dict(),
|
||||
meta_template: Optional[Dict] = None,
|
||||
extract_pred_after_decode: bool = False,
|
||||
batch_padding: bool = False,
|
||||
@ -634,6 +635,7 @@ class HuggingFaceChatGLM3(HuggingFace):
|
||||
tokenizer_kwargs=tokenizer_kwargs,
|
||||
peft_path=peft_path,
|
||||
tokenizer_only=tokenizer_only,
|
||||
generation_kwargs=generation_kwargs,
|
||||
model_kwargs=model_kwargs,
|
||||
meta_template=meta_template,
|
||||
extract_pred_after_decode=extract_pred_after_decode,
|
||||
@ -647,15 +649,17 @@ class HuggingFaceChatGLM3(HuggingFace):
|
||||
def generate(self,
|
||||
inputs: List[str or PromptList],
|
||||
max_out_len: int = 512,
|
||||
temperature: float = 0.6,
|
||||
skip_overlength=False) -> str:
|
||||
skip_overlength=False,
|
||||
**kwargs) -> str:
|
||||
"""Generate response from input prompt.
|
||||
|
||||
Args:
|
||||
inputs (list): input prompt
|
||||
max_out_len (int): max output length
|
||||
temperature (float): temperature for sampling
|
||||
"""
|
||||
generation_kwargs = kwargs.copy()
|
||||
generation_kwargs.update(self.generation_kwargs)
|
||||
|
||||
responses = []
|
||||
for _input in inputs:
|
||||
assert isinstance(_input, (str, PromptList))
|
||||
@ -692,7 +696,8 @@ class HuggingFaceChatGLM3(HuggingFace):
|
||||
try:
|
||||
response, history = self.model.chat(self.tokenizer,
|
||||
user_content,
|
||||
history=history)
|
||||
history=history,
|
||||
**generation_kwargs)
|
||||
# response will be dict sometime
|
||||
if isinstance(response, dict):
|
||||
response = response.get('content', '')
|
||||
|
Loading…
Reference in New Issue
Block a user