mirror of
https://github.com/open-compass/opencompass.git
synced 2025-05-30 16:03:24 +08:00
[Feat] update code config (#749)
* [Feat] update code dataset * [Feat] update code dataset * [Feat] update code dataset
This commit is contained in:
parent
fe0b717033
commit
327951087f
@ -0,0 +1,36 @@
|
|||||||
|
from opencompass.openicl.icl_prompt_template import PromptTemplate
|
||||||
|
from opencompass.openicl.icl_retriever import ZeroRetriever
|
||||||
|
from opencompass.openicl.icl_inferencer import GenInferencer
|
||||||
|
from opencompass.datasets import HumanevalDataset, HumanEvaluator, humaneval_postprocess
|
||||||
|
|
||||||
|
humaneval_reader_cfg = dict(
|
||||||
|
input_columns=['prompt'], output_column='task_id', train_split='test')
|
||||||
|
|
||||||
|
# TODO: allow empty output-column
|
||||||
|
humaneval_infer_cfg = dict(
|
||||||
|
prompt_template=dict(
|
||||||
|
type=PromptTemplate,
|
||||||
|
template=dict(round=[
|
||||||
|
dict(
|
||||||
|
role='HUMAN',
|
||||||
|
prompt='Complete the following python code:\n{prompt}'),
|
||||||
|
])),
|
||||||
|
retriever=dict(type=ZeroRetriever),
|
||||||
|
inferencer=dict(type=GenInferencer, max_out_len=512))
|
||||||
|
|
||||||
|
humaneval_eval_cfg = dict(
|
||||||
|
evaluator=dict(type=HumanEvaluator),
|
||||||
|
pred_role='BOT',
|
||||||
|
k=[1, 10, 100], # the parameter only for humaneval
|
||||||
|
pred_postprocessor=dict(type=humaneval_postprocess),
|
||||||
|
)
|
||||||
|
|
||||||
|
humaneval_datasets = [
|
||||||
|
dict(
|
||||||
|
abbr='openai_humaneval',
|
||||||
|
type=HumanevalDataset,
|
||||||
|
path='./data/humaneval/human-eval-v2-20210705.jsonl',
|
||||||
|
reader_cfg=humaneval_reader_cfg,
|
||||||
|
infer_cfg=humaneval_infer_cfg,
|
||||||
|
eval_cfg=humaneval_eval_cfg)
|
||||||
|
]
|
@ -1,7 +1,7 @@
|
|||||||
from opencompass.openicl.icl_prompt_template import PromptTemplate
|
from opencompass.openicl.icl_prompt_template import PromptTemplate
|
||||||
from opencompass.openicl.icl_retriever import ZeroRetriever
|
from opencompass.openicl.icl_retriever import ZeroRetriever
|
||||||
from opencompass.openicl.icl_inferencer import GenInferencer
|
from opencompass.openicl.icl_inferencer import GenInferencer
|
||||||
from opencompass.datasets import HumanevalDataset, HumanEvaluator, humaneval_postprocess
|
from opencompass.datasets import HumanevalDataset, HumanEvaluator, humaneval_postprocess_v2
|
||||||
|
|
||||||
humaneval_reader_cfg = dict(
|
humaneval_reader_cfg = dict(
|
||||||
input_columns=['prompt'], output_column='task_id', train_split='test')
|
input_columns=['prompt'], output_column='task_id', train_split='test')
|
||||||
@ -22,7 +22,7 @@ humaneval_eval_cfg = dict(
|
|||||||
evaluator=dict(type=HumanEvaluator),
|
evaluator=dict(type=HumanEvaluator),
|
||||||
pred_role='BOT',
|
pred_role='BOT',
|
||||||
k=[1, 10, 100], # the parameter only for humaneval
|
k=[1, 10, 100], # the parameter only for humaneval
|
||||||
pred_postprocessor=dict(type=humaneval_postprocess),
|
pred_postprocessor=dict(type=humaneval_postprocess_v2),
|
||||||
)
|
)
|
||||||
|
|
||||||
humaneval_datasets = [
|
humaneval_datasets = [
|
||||||
|
@ -1 +0,0 @@
|
|||||||
./humaneval_gen_8e312c.py
|
|
36
configs/datasets/humaneval/humaneval_passk_gen_8e312c.py
Normal file
36
configs/datasets/humaneval/humaneval_passk_gen_8e312c.py
Normal file
@ -0,0 +1,36 @@
|
|||||||
|
from opencompass.openicl.icl_prompt_template import PromptTemplate
|
||||||
|
from opencompass.openicl.icl_retriever import ZeroRetriever
|
||||||
|
from opencompass.openicl.icl_inferencer import GenInferencer
|
||||||
|
from opencompass.datasets import HumanevalDataset, HumanEvaluator, humaneval_postprocess_v2
|
||||||
|
|
||||||
|
humaneval_reader_cfg = dict(
|
||||||
|
input_columns=['prompt'], output_column='task_id', train_split='test')
|
||||||
|
|
||||||
|
# TODO: allow empty output-column
|
||||||
|
humaneval_infer_cfg = dict(
|
||||||
|
prompt_template=dict(
|
||||||
|
type=PromptTemplate,
|
||||||
|
template=dict(round=[
|
||||||
|
dict(
|
||||||
|
role='HUMAN',
|
||||||
|
prompt='Complete the following python code:\n{prompt}'),
|
||||||
|
])),
|
||||||
|
retriever=dict(type=ZeroRetriever),
|
||||||
|
inferencer=dict(type=GenInferencer, max_out_len=512))
|
||||||
|
|
||||||
|
humaneval_eval_cfg = dict(
|
||||||
|
evaluator=dict(type=HumanEvaluator),
|
||||||
|
pred_role='BOT',
|
||||||
|
k=[1, 10, 100], # the parameter only for humaneval
|
||||||
|
pred_postprocessor=dict(type=humaneval_postprocess_v2),
|
||||||
|
)
|
||||||
|
|
||||||
|
humaneval_datasets = [
|
||||||
|
dict(
|
||||||
|
abbr='openai_humaneval_passk',
|
||||||
|
type=HumanevalDataset,
|
||||||
|
path='./data/humaneval/human-eval-v2-20210705.jsonl',
|
||||||
|
reader_cfg=humaneval_reader_cfg,
|
||||||
|
infer_cfg=humaneval_infer_cfg,
|
||||||
|
eval_cfg=humaneval_eval_cfg)
|
||||||
|
]
|
@ -1,7 +1,7 @@
|
|||||||
from opencompass.openicl.icl_prompt_template import PromptTemplate
|
from opencompass.openicl.icl_prompt_template import PromptTemplate
|
||||||
from opencompass.openicl.icl_retriever import ZeroRetriever
|
from opencompass.openicl.icl_retriever import ZeroRetriever
|
||||||
from opencompass.openicl.icl_inferencer import GenInferencer
|
from opencompass.openicl.icl_inferencer import GenInferencer
|
||||||
from opencompass.datasets import HumanevalDataset, HumanEvaluator, humaneval_postprocess
|
from opencompass.datasets import HumanevalDataset, HumanEvaluator, humaneval_postprocess_v2
|
||||||
|
|
||||||
humaneval_reader_cfg = dict(
|
humaneval_reader_cfg = dict(
|
||||||
input_columns=['prompt'], output_column='task_id', train_split='test')
|
input_columns=['prompt'], output_column='task_id', train_split='test')
|
||||||
@ -22,12 +22,12 @@ humaneval_eval_cfg = dict(
|
|||||||
evaluator=dict(type=HumanEvaluator),
|
evaluator=dict(type=HumanEvaluator),
|
||||||
pred_role='BOT',
|
pred_role='BOT',
|
||||||
k=[1, 10, 100], # the parameter only for humaneval
|
k=[1, 10, 100], # the parameter only for humaneval
|
||||||
pred_postprocessor=dict(type=humaneval_postprocess),
|
pred_postprocessor=dict(type=humaneval_postprocess_v2),
|
||||||
)
|
)
|
||||||
|
|
||||||
humaneval_datasets = [
|
humaneval_datasets = [
|
||||||
dict(
|
dict(
|
||||||
abbr='openai_humaneval_pass10',
|
abbr='openai_humaneval_repeat10',
|
||||||
type=HumanevalDataset,
|
type=HumanevalDataset,
|
||||||
path='./data/humaneval/human-eval-v2-20210705.jsonl',
|
path='./data/humaneval/human-eval-v2-20210705.jsonl',
|
||||||
num_repeats=10,
|
num_repeats=10,
|
||||||
|
@ -1 +0,0 @@
|
|||||||
./humaneval_cn_gen_6313aa.py
|
|
@ -0,0 +1,37 @@
|
|||||||
|
from opencompass.openicl.icl_prompt_template import PromptTemplate
|
||||||
|
from opencompass.openicl.icl_retriever import ZeroRetriever
|
||||||
|
from opencompass.openicl.icl_inferencer import GenInferencer
|
||||||
|
from opencompass.datasets import HumanevalDataset, HumanEvaluator, humaneval_postprocess_v2
|
||||||
|
|
||||||
|
humaneval_reader_cfg = dict(
|
||||||
|
input_columns=['prompt'], output_column='task_id', train_split='test')
|
||||||
|
|
||||||
|
# TODO: allow empty output-column
|
||||||
|
humaneval_infer_cfg = dict(
|
||||||
|
prompt_template=dict(
|
||||||
|
type=PromptTemplate,
|
||||||
|
template=dict(round=[
|
||||||
|
dict(
|
||||||
|
role='HUMAN',
|
||||||
|
prompt='完成以下Python代码任务:\n{prompt}'),
|
||||||
|
])),
|
||||||
|
retriever=dict(type=ZeroRetriever),
|
||||||
|
inferencer=dict(type=GenInferencer, max_out_len=512))
|
||||||
|
|
||||||
|
humaneval_eval_cfg = dict(
|
||||||
|
evaluator=dict(type=HumanEvaluator),
|
||||||
|
pred_role='BOT',
|
||||||
|
k=[1, 10, 100], # the parameter only for humaneval
|
||||||
|
pred_postprocessor=dict(type=humaneval_postprocess_v2),
|
||||||
|
)
|
||||||
|
|
||||||
|
humaneval_cn_datasets = [
|
||||||
|
dict(
|
||||||
|
abbr='openai_humaneval_cn_passk',
|
||||||
|
type=HumanevalDataset,
|
||||||
|
path='./data/humaneval_cn/human-eval-cn-v2-20210705.jsonl',
|
||||||
|
reader_cfg=humaneval_reader_cfg,
|
||||||
|
infer_cfg=humaneval_infer_cfg,
|
||||||
|
eval_cfg=humaneval_eval_cfg)
|
||||||
|
|
||||||
|
]
|
@ -27,7 +27,7 @@ humaneval_eval_cfg = dict(
|
|||||||
|
|
||||||
humaneval_cn_datasets = [
|
humaneval_cn_datasets = [
|
||||||
dict(
|
dict(
|
||||||
abbr='openai_humaneval_cn_pass10',
|
abbr='openai_humaneval_cn_repeat10',
|
||||||
type=HumanevalDataset,
|
type=HumanevalDataset,
|
||||||
path='./data/humaneval_cn/human-eval-cn-v2-20210705.jsonl',
|
path='./data/humaneval_cn/human-eval-cn-v2-20210705.jsonl',
|
||||||
num_repeats=10,
|
num_repeats=10,
|
||||||
|
@ -19,7 +19,7 @@ humaneval_plus_infer_cfg = dict(
|
|||||||
inferencer=dict(type=GenInferencer, max_out_len=512))
|
inferencer=dict(type=GenInferencer, max_out_len=512))
|
||||||
|
|
||||||
humaneval_plus_eval_cfg = dict(
|
humaneval_plus_eval_cfg = dict(
|
||||||
evaluator=dict(type=HumanEvaluator,k=1, metric='EvalPlus'),
|
evaluator=dict(type=HumanEvaluator, metric='EvalPlus'),
|
||||||
pred_role='BOT',
|
pred_role='BOT',
|
||||||
k=[1, 10, 100], # the parameter only for humaneval
|
k=[1, 10, 100], # the parameter only for humaneval
|
||||||
pred_postprocessor=dict(type=humaneval_postprocess_v2),
|
pred_postprocessor=dict(type=humaneval_postprocess_v2),
|
||||||
|
@ -0,0 +1,36 @@
|
|||||||
|
from opencompass.openicl.icl_prompt_template import PromptTemplate
|
||||||
|
from opencompass.openicl.icl_retriever import ZeroRetriever
|
||||||
|
from opencompass.openicl.icl_inferencer import GenInferencer
|
||||||
|
from opencompass.datasets import HumanevalDataset, HumanEvaluator, humaneval_postprocess_v2
|
||||||
|
|
||||||
|
humaneval_plus_reader_cfg = dict(
|
||||||
|
input_columns=['prompt'], output_column='task_id', train_split='test')
|
||||||
|
|
||||||
|
# TODO: allow empty output-column
|
||||||
|
humaneval_plus_infer_cfg = dict(
|
||||||
|
prompt_template=dict(
|
||||||
|
type=PromptTemplate,
|
||||||
|
template=dict(round=[
|
||||||
|
dict(
|
||||||
|
role='HUMAN',
|
||||||
|
prompt='Complete the following python code:\n{prompt}'),
|
||||||
|
])),
|
||||||
|
retriever=dict(type=ZeroRetriever),
|
||||||
|
inferencer=dict(type=GenInferencer, max_out_len=512))
|
||||||
|
|
||||||
|
humaneval_plus_eval_cfg = dict(
|
||||||
|
evaluator=dict(type=HumanEvaluator, metric='EvalPlus'),
|
||||||
|
pred_role='BOT',
|
||||||
|
k=[1, 10, 100], # the parameter only for humaneval
|
||||||
|
pred_postprocessor=dict(type=humaneval_postprocess_v2),
|
||||||
|
)
|
||||||
|
|
||||||
|
humaneval_plus_datasets = [
|
||||||
|
dict(
|
||||||
|
abbr='humaneval_plus_passk',
|
||||||
|
type=HumanevalDataset,
|
||||||
|
path='./data/humaneval/human-eval-v2-20210705.jsonl',
|
||||||
|
reader_cfg=humaneval_plus_reader_cfg,
|
||||||
|
infer_cfg=humaneval_plus_infer_cfg,
|
||||||
|
eval_cfg=humaneval_plus_eval_cfg)
|
||||||
|
]
|
@ -0,0 +1,37 @@
|
|||||||
|
from opencompass.openicl.icl_prompt_template import PromptTemplate
|
||||||
|
from opencompass.openicl.icl_retriever import ZeroRetriever
|
||||||
|
from opencompass.openicl.icl_inferencer import GenInferencer
|
||||||
|
from opencompass.datasets import HumanevalDataset, HumanEvaluator, humaneval_postprocess_v2
|
||||||
|
|
||||||
|
humaneval_plus_reader_cfg = dict(
|
||||||
|
input_columns=['prompt'], output_column='task_id', train_split='test')
|
||||||
|
|
||||||
|
# TODO: allow empty output-column
|
||||||
|
humaneval_plus_infer_cfg = dict(
|
||||||
|
prompt_template=dict(
|
||||||
|
type=PromptTemplate,
|
||||||
|
template=dict(round=[
|
||||||
|
dict(
|
||||||
|
role='HUMAN',
|
||||||
|
prompt='Complete the following python code:\n{prompt}'),
|
||||||
|
])),
|
||||||
|
retriever=dict(type=ZeroRetriever),
|
||||||
|
inferencer=dict(type=GenInferencer, max_out_len=512))
|
||||||
|
|
||||||
|
humaneval_plus_eval_cfg = dict(
|
||||||
|
evaluator=dict(type=HumanEvaluator, metric='EvalPlus'),
|
||||||
|
pred_role='BOT',
|
||||||
|
k=[1, 10, 100], # the parameter only for humaneval
|
||||||
|
pred_postprocessor=dict(type=humaneval_postprocess_v2),
|
||||||
|
)
|
||||||
|
|
||||||
|
humaneval_plus_datasets = [
|
||||||
|
dict(
|
||||||
|
abbr='humaneval_plus_repeat10',
|
||||||
|
type=HumanevalDataset,
|
||||||
|
path='./data/humaneval/human-eval-v2-20210705.jsonl',
|
||||||
|
num_repeats=10,
|
||||||
|
reader_cfg=humaneval_plus_reader_cfg,
|
||||||
|
infer_cfg=humaneval_plus_infer_cfg,
|
||||||
|
eval_cfg=humaneval_plus_eval_cfg)
|
||||||
|
]
|
@ -56,7 +56,7 @@ mbpp_eval_cfg = dict(evaluator=dict(type=MBPPPassKEvaluator), pred_role="BOT")
|
|||||||
mbpp_datasets = [
|
mbpp_datasets = [
|
||||||
dict(
|
dict(
|
||||||
type=MBPPDataset_V2,
|
type=MBPPDataset_V2,
|
||||||
abbr='mbpp',
|
abbr='mbpp_passk',
|
||||||
path='./data/mbpp/mbpp.jsonl',
|
path='./data/mbpp/mbpp.jsonl',
|
||||||
reader_cfg=mbpp_reader_cfg,
|
reader_cfg=mbpp_reader_cfg,
|
||||||
infer_cfg=mbpp_infer_cfg,
|
infer_cfg=mbpp_infer_cfg,
|
||||||
|
@ -58,7 +58,7 @@ mbpp_eval_cfg = dict(evaluator=dict(type=MBPPPassKEvaluator), pred_role="BOT")
|
|||||||
mbpp_datasets = [
|
mbpp_datasets = [
|
||||||
dict(
|
dict(
|
||||||
type=MBPPDataset_V2,
|
type=MBPPDataset_V2,
|
||||||
abbr='mbpp_pass10',
|
abbr='mbpp_repeat10',
|
||||||
path='./data/mbpp/mbpp.jsonl',
|
path='./data/mbpp/mbpp.jsonl',
|
||||||
num_repeats=10,
|
num_repeats=10,
|
||||||
reader_cfg=mbpp_reader_cfg,
|
reader_cfg=mbpp_reader_cfg,
|
||||||
|
@ -56,7 +56,7 @@ sanitized_mbpp_eval_cfg = dict(evaluator=dict(type=MBPPPassKEvaluator), pred_rol
|
|||||||
sanitized_mbpp_datasets = [
|
sanitized_mbpp_datasets = [
|
||||||
dict(
|
dict(
|
||||||
type=SanitizedMBPPDataset,
|
type=SanitizedMBPPDataset,
|
||||||
abbr='sanitized_mbpp',
|
abbr='sanitized_mbpp_passk',
|
||||||
path='./sanitized-mbpp.jsonl',
|
path='./sanitized-mbpp.jsonl',
|
||||||
reader_cfg=sanitized_mbpp_reader_cfg,
|
reader_cfg=sanitized_mbpp_reader_cfg,
|
||||||
infer_cfg=sanitized_mbpp_infer_cfg,
|
infer_cfg=sanitized_mbpp_infer_cfg,
|
||||||
|
@ -56,7 +56,7 @@ sanitized_mbpp_eval_cfg = dict(evaluator=dict(type=MBPPPassKEvaluator), pred_rol
|
|||||||
sanitized_mbpp_datasets = [
|
sanitized_mbpp_datasets = [
|
||||||
dict(
|
dict(
|
||||||
type=SanitizedMBPPDataset,
|
type=SanitizedMBPPDataset,
|
||||||
abbr='sanitized_mbpp_pass10',
|
abbr='sanitized_mbpp_repeat10',
|
||||||
path='./sanitized-mbpp.jsonl',
|
path='./sanitized-mbpp.jsonl',
|
||||||
num_repeats=10,
|
num_repeats=10,
|
||||||
reader_cfg=sanitized_mbpp_reader_cfg,
|
reader_cfg=sanitized_mbpp_reader_cfg,
|
||||||
|
@ -56,7 +56,7 @@ mbpp_eval_cfg = dict(evaluator=dict(type=MBPPPassKEvaluator), pred_role="BOT")
|
|||||||
mbpp_cn_datasets = [
|
mbpp_cn_datasets = [
|
||||||
dict(
|
dict(
|
||||||
type=MBPPDataset_V2,
|
type=MBPPDataset_V2,
|
||||||
abbr='mbpp_cn',
|
abbr='mbpp_cn_passk',
|
||||||
path='./data/mbpp_cn/mbpp_cn.jsonl',
|
path='./data/mbpp_cn/mbpp_cn.jsonl',
|
||||||
reader_cfg=mbpp_reader_cfg,
|
reader_cfg=mbpp_reader_cfg,
|
||||||
infer_cfg=mbpp_infer_cfg,
|
infer_cfg=mbpp_infer_cfg,
|
||||||
|
@ -56,7 +56,7 @@ mbpp_eval_cfg = dict(evaluator=dict(type=MBPPPassKEvaluator), pred_role="BOT")
|
|||||||
mbpp_cn_datasets = [
|
mbpp_cn_datasets = [
|
||||||
dict(
|
dict(
|
||||||
type=MBPPDataset_V2,
|
type=MBPPDataset_V2,
|
||||||
abbr='mbpp_cn_pass10',
|
abbr='mbpp_cn_repeat10',
|
||||||
path='./data/mbpp_cn/mbpp_cn.jsonl',
|
path='./data/mbpp_cn/mbpp_cn.jsonl',
|
||||||
num_repeats=10,
|
num_repeats=10,
|
||||||
reader_cfg=mbpp_reader_cfg,
|
reader_cfg=mbpp_reader_cfg,
|
||||||
|
@ -621,6 +621,7 @@ class HuggingFaceChatGLM3(HuggingFace):
|
|||||||
peft_path: Optional[str] = None,
|
peft_path: Optional[str] = None,
|
||||||
tokenizer_only: bool = False,
|
tokenizer_only: bool = False,
|
||||||
model_kwargs: dict = dict(device_map='auto'),
|
model_kwargs: dict = dict(device_map='auto'),
|
||||||
|
generation_kwargs: dict = dict(),
|
||||||
meta_template: Optional[Dict] = None,
|
meta_template: Optional[Dict] = None,
|
||||||
extract_pred_after_decode: bool = False,
|
extract_pred_after_decode: bool = False,
|
||||||
batch_padding: bool = False,
|
batch_padding: bool = False,
|
||||||
@ -634,6 +635,7 @@ class HuggingFaceChatGLM3(HuggingFace):
|
|||||||
tokenizer_kwargs=tokenizer_kwargs,
|
tokenizer_kwargs=tokenizer_kwargs,
|
||||||
peft_path=peft_path,
|
peft_path=peft_path,
|
||||||
tokenizer_only=tokenizer_only,
|
tokenizer_only=tokenizer_only,
|
||||||
|
generation_kwargs=generation_kwargs,
|
||||||
model_kwargs=model_kwargs,
|
model_kwargs=model_kwargs,
|
||||||
meta_template=meta_template,
|
meta_template=meta_template,
|
||||||
extract_pred_after_decode=extract_pred_after_decode,
|
extract_pred_after_decode=extract_pred_after_decode,
|
||||||
@ -647,15 +649,17 @@ class HuggingFaceChatGLM3(HuggingFace):
|
|||||||
def generate(self,
|
def generate(self,
|
||||||
inputs: List[str or PromptList],
|
inputs: List[str or PromptList],
|
||||||
max_out_len: int = 512,
|
max_out_len: int = 512,
|
||||||
temperature: float = 0.6,
|
skip_overlength=False,
|
||||||
skip_overlength=False) -> str:
|
**kwargs) -> str:
|
||||||
"""Generate response from input prompt.
|
"""Generate response from input prompt.
|
||||||
|
|
||||||
Args:
|
Args:
|
||||||
inputs (list): input prompt
|
inputs (list): input prompt
|
||||||
max_out_len (int): max output length
|
max_out_len (int): max output length
|
||||||
temperature (float): temperature for sampling
|
|
||||||
"""
|
"""
|
||||||
|
generation_kwargs = kwargs.copy()
|
||||||
|
generation_kwargs.update(self.generation_kwargs)
|
||||||
|
|
||||||
responses = []
|
responses = []
|
||||||
for _input in inputs:
|
for _input in inputs:
|
||||||
assert isinstance(_input, (str, PromptList))
|
assert isinstance(_input, (str, PromptList))
|
||||||
@ -692,7 +696,8 @@ class HuggingFaceChatGLM3(HuggingFace):
|
|||||||
try:
|
try:
|
||||||
response, history = self.model.chat(self.tokenizer,
|
response, history = self.model.chat(self.tokenizer,
|
||||||
user_content,
|
user_content,
|
||||||
history=history)
|
history=history,
|
||||||
|
**generation_kwargs)
|
||||||
# response will be dict sometime
|
# response will be dict sometime
|
||||||
if isinstance(response, dict):
|
if isinstance(response, dict):
|
||||||
response = response.get('content', '')
|
response = response.get('content', '')
|
||||||
|
Loading…
Reference in New Issue
Block a user