mirror of
https://github.com/open-compass/opencompass.git
synced 2025-05-30 16:03:24 +08:00
60 lines
2.1 KiB
Python
60 lines
2.1 KiB
Python
from opencompass.openicl.icl_prompt_template import PromptTemplate
|
|
from opencompass.openicl.icl_retriever import ZeroRetriever
|
|
from opencompass.openicl.icl_inferencer import GenInferencer
|
|
from opencompass.datasets import HumanevalXDataset, HumanevalXEvaluator
|
|
|
|
humanevalx_reader_cfg = dict(
|
|
input_columns=['prompt'], output_column='task_id', train_split='test')
|
|
|
|
# This prompt is used for WizardLMCode series
|
|
# You can use 620cfa for basic generation
|
|
humanevalx_infer_cfg = {
|
|
lang: dict(
|
|
prompt_template=dict(
|
|
type=PromptTemplate,
|
|
template=dict(round=[
|
|
dict(
|
|
role='HUMAN',
|
|
prompt=
|
|
f"""Below is an instruction that describes a task. Write a response that appropriately completes the request.
|
|
|
|
|
|
### Instruction:
|
|
Create a {lang} script for this problem:
|
|
{{prompt}}
|
|
|
|
### Response:"""),
|
|
])),
|
|
retriever=dict(type=ZeroRetriever),
|
|
inferencer=dict(type=GenInferencer, max_out_len=1024))
|
|
for lang in ['python', 'cpp', 'go', 'java', 'js']
|
|
}
|
|
|
|
humanevalx_eval_cfg_dict = {
|
|
lang: dict(
|
|
evaluator=dict(
|
|
type=HumanevalXEvaluator,
|
|
language=lang,
|
|
ip_address=
|
|
"localhost", # replace to your code_eval_server ip_address, port
|
|
port=5001
|
|
), # refer to https://opencompass.readthedocs.io/en/latest/advanced_guides/code_eval_service.html to launch a server
|
|
pred_role='BOT')
|
|
for lang in ['python', 'cpp', 'go', 'java', 'js'
|
|
] # do not support rust now
|
|
}
|
|
|
|
# Please download the needed `xx.jsonl.gz` from
|
|
# https://github.com/THUDM/CodeGeeX2/tree/main/benchmark/humanevalx
|
|
# and move them into `data/humanevalx/` folder
|
|
humanevalx_datasets = [
|
|
dict(
|
|
type=HumanevalXDataset,
|
|
abbr=f'humanevalx-{lang}',
|
|
language=lang,
|
|
path='./data/humanevalx',
|
|
reader_cfg=humanevalx_reader_cfg,
|
|
infer_cfg=humanevalx_infer_cfg[lang],
|
|
eval_cfg=humanevalx_eval_cfg_dict[lang])
|
|
for lang in ['python', 'cpp', 'go', 'java', 'js']
|
|
] |