mirror of
https://github.com/open-compass/opencompass.git
synced 2025-05-30 16:03:24 +08:00
252 lines
7.8 KiB
Python
252 lines
7.8 KiB
Python
# Copyright (c) 2024, LiveCodeBench and its contributors.
|
|
# Copyright (c) 2023, OpenCompass and its contributors.
|
|
|
|
import base64
|
|
import json
|
|
import pickle
|
|
import zlib
|
|
from dataclasses import dataclass
|
|
from enum import Enum
|
|
|
|
from datasets import DatasetDict, load_dataset, load_from_disk
|
|
|
|
from opencompass.utils import get_data_path # noqa: F401, F403
|
|
|
|
from ..base import BaseDataset
|
|
from .prompts import SelfRepairPromptConstants # noqa: F401, F403
|
|
from .prompts import TestOutputPromptConstants # noqa: F401, F403
|
|
from .prompts import (CodeGenerationPromptConstants,
|
|
get_generic_question_template_answer_self_repair,
|
|
get_generic_question_template_test_completion,
|
|
make_code_execution_prompt)
|
|
|
|
|
|
class Platform(Enum):
|
|
LEETCODE = 'leetcode'
|
|
CODEFORCES = 'codeforces'
|
|
ATCODER = 'atcoder'
|
|
|
|
|
|
class Difficulty(Enum):
|
|
EASY = 'easy'
|
|
MEDIUM = 'medium'
|
|
HARD = 'hard'
|
|
|
|
|
|
class TestType(Enum):
|
|
STDIN = 'stdin'
|
|
FUNCTIONAL = 'functional'
|
|
|
|
|
|
@dataclass
|
|
class Test:
|
|
input: str
|
|
output: str
|
|
testtype: TestType
|
|
|
|
def __post_init__(self):
|
|
self.testtype = TestType(self.testtype)
|
|
|
|
|
|
class LCBCodeGenerationDataset(BaseDataset):
|
|
|
|
@staticmethod
|
|
def load(path: str = 'opencompass/code_generation_lite',
|
|
local_mode: bool = False,
|
|
release_version: str = 'release_v1'):
|
|
|
|
def transform(item):
|
|
# Define the dataitem mapping logic
|
|
|
|
# starter_code
|
|
if item['starter_code']:
|
|
format_prompt = f'### Format: {CodeGenerationPromptConstants.FORMATTING_MESSAGE_WITH_STARTER_CODE}\n' # noqa: E501
|
|
format_prompt += f"```python\n{item['starter_code']}\n```\n\n"
|
|
else:
|
|
format_prompt = f'### Format: {CodeGenerationPromptConstants.FORMATTING_WITHOUT_STARTER_CODE}\n' # noqa: E501
|
|
format_prompt += '```python\n# YOUR CODE HERE\n```\n\n'
|
|
|
|
item['format_prompt'] = format_prompt
|
|
|
|
# load test cases
|
|
public_test_cases = item['public_test_cases']
|
|
public_test_cases = json.loads(item['public_test_cases'])
|
|
|
|
private_test_cases = item['private_test_cases']
|
|
try:
|
|
private_test_cases = json.loads(item['private_test_cases'])
|
|
except Exception as e: # noqa: F841
|
|
private_test_cases = json.loads(
|
|
pickle.loads(
|
|
zlib.decompress(
|
|
base64.b64decode(private_test_cases.encode(
|
|
'utf-8')) # type: ignore
|
|
))) # type: ignore
|
|
|
|
# load metadata
|
|
metadata = json.loads(item['metadata'])
|
|
evaluation_sample = json.dumps({
|
|
'inputs':
|
|
[t['input'] for t in public_test_cases + private_test_cases],
|
|
'outputs':
|
|
[t['output'] for t in public_test_cases + private_test_cases],
|
|
'fn_name':
|
|
metadata.get('func_name', None),
|
|
})
|
|
item['evaluation_sample'] = evaluation_sample
|
|
|
|
return item
|
|
|
|
path = get_data_path(path, local_mode=local_mode)
|
|
|
|
dataset = load_dataset(
|
|
path, # 'livecodebench/code_generation_lite'
|
|
split='test',
|
|
version_tag=release_version,
|
|
trust_remote_code=True)
|
|
|
|
dataset = dataset.map(transform)
|
|
|
|
return DatasetDict({'test': dataset, 'train': dataset})
|
|
|
|
|
|
class LCBCodeExecutionDataset(BaseDataset):
|
|
|
|
@staticmethod
|
|
def load(
|
|
path: str = 'opencompass/execution-v2',
|
|
local_mode: bool = False,
|
|
cot: bool = False,
|
|
# release_version: str = "release_v1"
|
|
):
|
|
# path = get_data_path(path, local_mode=local_mode)
|
|
|
|
def transform(item):
|
|
code, input = item['code'], item['input']
|
|
prompt = make_code_execution_prompt(code, input, cot=cot)
|
|
|
|
item['prompt'] = prompt
|
|
|
|
evaluation_sample = json.dumps({
|
|
'code': item['code'],
|
|
'input': item['input'],
|
|
'output': item['output']
|
|
})
|
|
item['evaluation_sample'] = evaluation_sample
|
|
|
|
return item
|
|
|
|
path = get_data_path(path, local_mode=local_mode)
|
|
dataset = load_dataset(path,
|
|
split='test') # 'livecodebench/execution-v2'
|
|
dataset = dataset.map(transform)
|
|
|
|
return DatasetDict({'test': dataset, 'train': dataset})
|
|
|
|
|
|
class LCBTestOutputPredictionDataset(BaseDataset):
|
|
|
|
@staticmethod
|
|
def load(
|
|
path: str = 'opencompass/test_generation',
|
|
local_mode: bool = False,
|
|
# release_version: str = "release_v1"
|
|
):
|
|
# path = get_data_path(path, local_mode=local_mode)
|
|
|
|
def transform(item):
|
|
question_content = item['question_content']
|
|
starter_code = item['starter_code']
|
|
test = json.loads(item['test'])
|
|
|
|
testcase_input = test[0]['input']
|
|
testcase_output = test[0]['output']
|
|
|
|
item['testcase_input'] = testcase_input
|
|
item['testcase_output'] = testcase_output
|
|
|
|
item['prompt'] = get_generic_question_template_test_completion(
|
|
question_content=question_content,
|
|
starter_code=starter_code,
|
|
testcase_input=testcase_input)
|
|
|
|
evaluation_sample = json.dumps({
|
|
'input':
|
|
item['question_content'],
|
|
'output':
|
|
json.loads(item['test'])[0]['output']
|
|
})
|
|
item['evaluation_sample'] = evaluation_sample
|
|
|
|
return item
|
|
|
|
path = get_data_path(path, local_mode=local_mode)
|
|
# 'livecodebench/test_generation',
|
|
dataset = load_dataset(path, split='test', trust_remote_code=True)
|
|
dataset = dataset.map(transform)
|
|
|
|
return DatasetDict({'test': dataset, 'train': dataset})
|
|
|
|
|
|
class LCBSelfRepairDataset(BaseDataset):
|
|
|
|
@staticmethod
|
|
def load(path: str = 'livecodebench/code_generation_lite',
|
|
local_mode: bool = False,
|
|
release_version: str = 'release_v1'):
|
|
|
|
def transform(item):
|
|
# define the data item mapping logic
|
|
|
|
question = item['question_content']
|
|
code = item['code_list'][0]
|
|
metadata = item['metadata']
|
|
|
|
prompt = get_generic_question_template_answer_self_repair(
|
|
question=question, code=code, metadata=metadata)
|
|
item['prompt'] = prompt
|
|
|
|
return
|
|
|
|
dataset = load_dataset(path,
|
|
split='test',
|
|
version_tag=release_version,
|
|
trust_remote_code=True)
|
|
dataset = dataset.map(transform)
|
|
|
|
return DatasetDict({'test': dataset, 'train': dataset})
|
|
|
|
|
|
class CompassBenchCodeExecutionDataset(BaseDataset):
|
|
|
|
@staticmethod
|
|
def load(
|
|
path: str = 'opencompass/execution-v2',
|
|
local_mode: bool = False,
|
|
cot: bool = False,
|
|
# release_version: str = "release_v1"
|
|
):
|
|
# path = get_data_path(path, local_mode=local_mode)
|
|
|
|
def transform(item):
|
|
code, input = item['code'], item['input']
|
|
prompt = make_code_execution_prompt(code, input, cot=cot)
|
|
|
|
item['prompt'] = prompt
|
|
|
|
evaluation_sample = json.dumps({
|
|
'code': item['code'],
|
|
'input': item['input'],
|
|
'output': item['output']
|
|
})
|
|
item['evaluation_sample'] = evaluation_sample
|
|
|
|
return item
|
|
|
|
path = get_data_path(path, local_mode=local_mode)
|
|
dataset = load_from_disk(path) # 'livecodebench/execution-v2'
|
|
dataset = dataset['test']
|
|
dataset = dataset.map(transform)
|
|
|
|
return DatasetDict({'test': dataset, 'train': dataset})
|