This commit is contained in:
Dongsheng Zhu 2025-03-04 09:16:02 +00:00
parent 63c7970937
commit a7070ba2d2

View File

@ -6,8 +6,8 @@ import json
import pickle import pickle
import zlib import zlib
from dataclasses import dataclass from dataclasses import dataclass
from enum import Enum
from datetime import datetime from datetime import datetime
from enum import Enum
from datasets import DatasetDict, load_dataset, load_from_disk from datasets import DatasetDict, load_dataset, load_from_disk
@ -111,13 +111,14 @@ class LCBCodeGenerationDataset(BaseDataset):
dataset = dataset.map(transform) dataset = dataset.map(transform)
if start_date is not None: if start_date is not None:
p_start_date = datetime.strptime(start_date, "%Y-%m-%d") p_start_date = datetime.strptime(start_date, '%Y-%m-%d')
dataset = dataset.filter( dataset = dataset.filter(
lambda e: p_start_date <= datetime.fromisoformat(e['contest_date'])) # noqa: E501 lambda e: p_start_date <= datetime.fromisoformat(e[
'contest_date'])) # noqa: E501
if end_date is not None: if end_date is not None:
p_end_date = datetime.strptime(end_date, "%Y-%m-%d") p_end_date = datetime.strptime(end_date, '%Y-%m-%d')
dataset = dataset.filter( dataset = dataset.filter(lambda e: datetime.fromisoformat(e[
lambda e: datetime.fromisoformat(e['contest_date']) <= p_end_date) # noqa: E501 'contest_date']) <= p_end_date) # noqa: E501
return DatasetDict({'test': dataset, 'train': dataset}) return DatasetDict({'test': dataset, 'train': dataset})