OpenCompass/opencompass/datasets/scicode.py

import concurrent.futures
import json
import os
import os.path as osp
import re
import subprocess

import h5py
import numpy as np
import scipy
import scipy.sparse
import sympy
from datasets import Dataset

from opencompass.openicl.icl_evaluator import BaseEvaluator
from opencompass.registry import ICL_EVALUATORS, LOAD_DATASET
from opencompass.utils import get_data_path

from .base import BaseDataset


@LOAD_DATASET.register_module()
class SciCodeDataset(BaseDataset):

    @staticmethod
    def load(path, with_bg, **kwargs):
        test_data = []
        path = get_data_path(path, local_mode=True)
        if with_bg:  # test with background
            file_path = osp.join(path, 'SciCode_datasets_with_background.json')
        else:  # test w/o background
            file_path = osp.join(path, 'SciCode_datasets.json')

        with open(file_path, 'r', encoding='utf-8') as file:
            test_data = json.load(file)

        dataset = Dataset.from_list(test_data)
        return dataset

    def return_dataset(self):
        return self.dataset


def process_hdf5_list(group):
    lst = []
    for key in group.keys():
        lst.append(group[key][()])
    return lst


def process_hdf5_dict(group):
    dict = {}
    for key, obj in group.items():
        if isinstance(obj, h5py.Group):
            dict[key] = process_hdf5_sparse_matrix(obj['sparse_matrix'])
        elif isinstance(obj[()], bytes):
            dict[key] = obj[()].decode('utf-8', errors='strict')
        else:
            try:
                tmp = float(key)
                dict[tmp] = obj[()]
            except ValueError:
                dict[key] = obj[()]
    return dict


def process_hdf5_sparse_matrix(group):
    data = group['data'][()]
    shape = tuple(group['shape'][()])
    if 'row' in group and 'col' in group:
        row = group['row'][()]
        col = group['col'][()]
        return scipy.sparse.coo_matrix((data, (row, col)), shape=shape)
    elif 'blocksize' in group:
        indices = group['indices'][()]
        indptr = group['indptr'][()]
        blocksize = tuple(group['blocksize'][()])
        return scipy.sparse.bsr_matrix((data, indices, indptr),
                                       shape=shape,
                                       blocksize=blocksize)
    else:
        indices = group['indices'][()]
        indptr = group['indptr'][()]
        return scipy.sparse.csr_matrix((data, indices, indptr), shape=shape)


def process_hdf5_datagroup(group):
    for key in group.keys():
        if key == 'list':
            return process_hdf5_list(group[key])
        if key == 'sparse_matrix':
            return process_hdf5_sparse_matrix(group[key])
        else:
            return process_hdf5_dict(group)


def process_hdf5_to_tuple(step_id, test_num):

    H5PY_FILE_FOLDER = './data/scicode/test_data'
    H5PY_FILE_FOLDER = get_data_path(H5PY_FILE_FOLDER, local_mode=True)
    data_lst = []
    H5PY_FILE = os.path.join(H5PY_FILE_FOLDER, f'{step_id}.h5')
    assert os.path.exists(
        H5PY_FILE
    ), f"Please manually download 'test_data.h5' from https://github.com/open-compass/storage/releases/download/v0.1.0/scicode_test_data.zip and put the file in {H5PY_FILE}"  # noqa: E501

    with h5py.File(H5PY_FILE, 'r') as f:
        for test_id in range(test_num):
            group_path = f'{step_id}/test{test_id + 1}'
            if isinstance(f[group_path], h5py.Group):
                group = f[group_path]  # test1, test2, test3
                num_keys = [key for key in group.keys()]
                if len(num_keys) == 1:  # only 1 var in the test
                    subgroup = group[num_keys[0]]
                    if isinstance(subgroup, h5py.Dataset):
                        if isinstance(subgroup[()], bytes):
                            data_lst.append(subgroup[()].decode(
                                'utf-8', errors='strict'))
                        else:
                            data_lst.append(subgroup[()])
                    elif isinstance(subgroup, h5py.Group):
                        data_lst.append(process_hdf5_datagroup(subgroup))
                else:
                    var_lst = []
                    for key in group.keys():  # var1, var2, var3
                        subgroup = group[key]
                        if isinstance(subgroup, h5py.Dataset):
                            if isinstance(subgroup[()], bytes):
                                var_lst.append(subgroup[()].decode(
                                    'utf-8', errors='strict'))
                            else:
                                var_lst.append(subgroup[()])
                        elif isinstance(subgroup, h5py.Group):
                            var_lst.append(process_hdf5_datagroup(subgroup))
                    data_lst.append(tuple(var_lst))
            else:
                raise FileNotFoundError(
                    f'Path {group_path} not found in the file.')
    return data_lst


def are_dicts_close(dict1, dict2, atol=1e-8, rtol=1e-5):
    dict1 = process_symbol_in_dict(dict1)
    dict2 = process_symbol_in_dict(dict2)
    # Check if both dictionaries have the same keys
    if dict1.keys() != dict2.keys():
        return False

    # Check if the corresponding values are close
    for key in dict1:
        value1 = dict1[key]
        value2 = dict2[key]
        if isinstance(value1, (sympy.Symbol, str)):
            if not value1 == value2:
                return False
        elif isinstance(value1,
                        (scipy.sparse.csr_matrix, scipy.sparse.csc_matrix,
                         scipy.sparse.bsr_matrix, scipy.sparse.coo_matrix)):
            value1 = value1.toarray()
            value2 = value2.toarray()
            if not np.allclose(value1, value2, atol=atol, rtol=rtol):
                return False
        # Use np.allclose to compare values
        else:
            try:
                if not np.allclose(value1, value2, atol=atol, rtol=rtol):
                    return False
            except ValueError:
                if not value1 == value2:
                    return False

    return True


def process_symbol_in_dict(dict):
    new_dict = {}
    for key, value in dict.items():
        new_dict[key] = value
        if isinstance(value, sympy.Symbol):
            new_dict[key] = str(value)
        if isinstance(key, sympy.Symbol):
            new_dict[str(key)] = dict[key]
            new_dict.pop(key)
    return new_dict


def are_csc_matrix_close(matrix1, matrix2):
    dense1 = matrix1.toarray()
    dense2 = matrix2.toarray()
    return np.allclose(dense1, dense2)


def cmp_tuple_or_list(var1, var2):
    if len(var1) != len(var2):
        return False
    for v1, v2 in zip(var1, var2):
        if isinstance(v1, dict):
            if not are_dicts_close(v1, v2):
                return False
        elif isinstance(v1,
                        (scipy.sparse.csr_matrix, scipy.sparse.csc_matrix)):
            if not are_csc_matrix_close(v1, v2):
                return False
        elif isinstance(v1, bool):
            if not v1 == v2:
                return False
        else:
            try:
                if not np.allclose(v1, v2):
                    return False
            except ValueError as e:
                print(e)
                if not v1 == v2:
                    return False
    return True


@ICL_EVALUATORS.register_module()
class SciCodeEvaluator(BaseEvaluator):

    def __init__(self, dataset_path, with_bg):
        super().__init__()
        test_data = []
        dataset_path = get_data_path(dataset_path, local_mode=True)
        if with_bg:  # test with background
            file_path = osp.join(dataset_path,
                                 'SciCode_datasets_with_background.json')
        else:  # test w/o background
            file_path = osp.join(dataset_path, 'SciCode_datasets.json')
        with open(file_path, 'r', encoding='utf-8') as file:
            test_data = json.load(file)
        self.dataset = Dataset.from_list(test_data)

    def extract_python_script(self, response: str):
        start_marker = '```python'
        end_marker = '```'

        if start_marker not in response or end_marker not in response:
            # If the markers are not present, return an empty string
            # print("fail to follow the instruct")
            return ''

        # Split the response at the start marker and take the second part
        after_start = response.split(start_marker)
        if len(after_start) < 2:
            return ''  # No valid split was made

        # Split the part after the start marker at the end marker
        python_script = after_start[1].split(end_marker)[0]

        # Remove leading import statements using regex
        python_script = re.sub(r'^\s*(import .*|from .*\s+import\s+.*)',
                               '',
                               python_script,
                               flags=re.MULTILINE)

        return python_script

    def run_script(self, script_path):
        try:
            subprocess.run(['python', script_path],
                           check=True,
                           capture_output=True,
                           text=True,
                           timeout=60)
            return 0
        except subprocess.CalledProcessError:
            return 1
        except subprocess.TimeoutExpired:
            return 2

    def score(self, predictions, references):
        # generate all python test codes
        for idx, prediction_list in enumerate(predictions):
            # traverse each test sample
            problem_id = self.dataset[idx]['id']
            num_of_subproblems = len(prediction_list)

            # create dir for each test sample
            testdir_path = os.path.join(self._out_dir, str(problem_id))
            os.makedirs(testdir_path, exist_ok=True)

            python_code = ''
            # add import statement
            python_code += self.dataset[idx]['import']

            for sub_idx in range(num_of_subproblems):
                # extract code
                response = prediction_list[sub_idx]
                python_code += self.extract_python_script(response)

                # process special examples
                if problem_id == '13' and sub_idx >= 5 or \
                   problem_id == '62' and sub_idx >= 0 or \
                   problem_id == '76' and sub_idx >= 2:
                    sub_idx += 1

                # test cases
                test_lst = self.dataset[idx]['test'][sub_idx]

                testfile_path = os.path.join(testdir_path,
                                             f'{problem_id}-{sub_idx + 1}.py')
                # write python code and test cases to a real python file
                with open(testfile_path, 'w', encoding='utf-8') as f:
                    f.write(python_code)
                    f.write("""

from opencompass.datasets.scicode import process_hdf5_to_tuple

""")
                    f.write('targets = process_hdf5_to_tuple(' +
                            f"'{problem_id}.{sub_idx + 1}', {len(test_lst)})" +
                            '\n')
                    for idx2 in range(len(test_lst)):
                        f.write(f'target = targets[{idx2}]\n\n')
                        for line in test_lst[idx2].split('\n'):
                            f.write(line + '\n')

        # find all scripts
        python_scripts = []
        for root, dirs, files in os.walk(self._out_dir):
            for file in files:
                if file.endswith('.py'):
                    python_scripts.append(os.path.join(root, file))

        # Use ThreadPoolExecutor to concurrently execute scripts
        with concurrent.futures.ThreadPoolExecutor() as executor:
            # Submit task and obtain Future object
            futures = [
                executor.submit(self.run_script, script)
                for script in python_scripts
            ]

        results = []
        for future in concurrent.futures.as_completed(futures):
            result = future.result()
            results.append(result)

        all_results = {}
        for script_path, result in zip(python_scripts, results):
            basename = os.path.basename(script_path)
            main_id = basename.split('-')[0]
            if all_results.get(main_id):
                all_results[main_id].append(result)
            else:
                all_results[main_id] = [result]

        correct, sub_correct = 0, 0
        count, sub_count = 0, 0

        for main_id in all_results:
            correct += sum(all_results[main_id]) == 0
            count += 1
            for sub in all_results[main_id]:
                sub_correct += sub == 0
                sub_count += 1

        result = {
            'accuracy': 100 * correct / count,
            'sub_accuracy': 100 * sub_correct / sub_count,
        }
        return result
[Feature] Optimize Evaluation Speed of SciCode (#1489) * update scicode * update comments * remove redundant variable * Update --------- Co-authored-by: tonysy <sy.zhangbuaa@gmail.com> 2024-09-06 00:59:41 +08:00			`import concurrent.futures`
[Feature] Add support for SciCode (#1417) * add SciCode * add SciCode * add SciCode * add SciCode * add SciCode * add SciCode * add SciCode * add SciCode w/ bg * add scicode * Update README.md * Update README.md * Delete configs/eval_SciCode.py * rename * 1 * rename * Update README.md * Update scicode.py * Update scicode.py * fix some bugs * Update * Update --------- Co-authored-by: root <HariSeldon0> Co-authored-by: tonysy <sy.zhangbuaa@gmail.com> 2024-08-22 13:42:25 +08:00			`import json`
			`import os`
			`import os.path as osp`
			`import re`
			`import subprocess`

			`import h5py`
			`import numpy as np`
			`import scipy`
			`import scipy.sparse`
			`import sympy`
			`from datasets import Dataset`

			`from opencompass.openicl.icl_evaluator import BaseEvaluator`
			`from opencompass.registry import ICL_EVALUATORS, LOAD_DATASET`
			`from opencompass.utils import get_data_path`

			`from .base import BaseDataset`


			`@LOAD_DATASET.register_module()`
			`class SciCodeDataset(BaseDataset):`

			`@staticmethod`
			`def load(path, with_bg, **kwargs):`
			`test_data = []`
			`path = get_data_path(path, local_mode=True)`
			`if with_bg: # test with background`
			`file_path = osp.join(path, 'SciCode_datasets_with_background.json')`
			`else: # test w/o background`
			`file_path = osp.join(path, 'SciCode_datasets.json')`

			`with open(file_path, 'r', encoding='utf-8') as file:`
			`test_data = json.load(file)`

			`dataset = Dataset.from_list(test_data)`
			`return dataset`

			`def return_dataset(self):`
			`return self.dataset`


			`def process_hdf5_list(group):`
			`lst = []`
			`for key in group.keys():`
			`lst.append(group[key][()])`
			`return lst`


			`def process_hdf5_dict(group):`
			`dict = {}`
			`for key, obj in group.items():`
			`if isinstance(obj, h5py.Group):`
			`dict[key] = process_hdf5_sparse_matrix(obj['sparse_matrix'])`
			`elif isinstance(obj[()], bytes):`
			`dict[key] = obj[()].decode('utf-8', errors='strict')`
			`else:`
			`try:`
			`tmp = float(key)`
			`dict[tmp] = obj[()]`
			`except ValueError:`
			`dict[key] = obj[()]`
			`return dict`


			`def process_hdf5_sparse_matrix(group):`
			`data = group['data'][()]`
			`shape = tuple(group['shape'][()])`
			`if 'row' in group and 'col' in group:`
			`row = group['row'][()]`
			`col = group['col'][()]`
			`return scipy.sparse.coo_matrix((data, (row, col)), shape=shape)`
			`elif 'blocksize' in group:`
			`indices = group['indices'][()]`
			`indptr = group['indptr'][()]`
			`blocksize = tuple(group['blocksize'][()])`
			`return scipy.sparse.bsr_matrix((data, indices, indptr),`
			`shape=shape,`
			`blocksize=blocksize)`
			`else:`
			`indices = group['indices'][()]`
			`indptr = group['indptr'][()]`
			`return scipy.sparse.csr_matrix((data, indices, indptr), shape=shape)`


			`def process_hdf5_datagroup(group):`
			`for key in group.keys():`
			`if key == 'list':`
			`return process_hdf5_list(group[key])`
			`if key == 'sparse_matrix':`
			`return process_hdf5_sparse_matrix(group[key])`
			`else:`
			`return process_hdf5_dict(group)`


			`def process_hdf5_to_tuple(step_id, test_num):`
[Fix] Update SciCode and Gemma model (#1449) * [Fix] Update SciCode and Gemma model * Update * Update 2024-08-23 10:42:27 +08:00
[Feature] Optimize Evaluation Speed of SciCode (#1489) * update scicode * update comments * remove redundant variable * Update --------- Co-authored-by: tonysy <sy.zhangbuaa@gmail.com> 2024-09-06 00:59:41 +08:00			`H5PY_FILE_FOLDER = './data/scicode/test_data'`
[Fix] Update SciCode and Gemma model (#1449) * [Fix] Update SciCode and Gemma model * Update * Update 2024-08-23 10:42:27 +08:00			`H5PY_FILE_FOLDER = get_data_path(H5PY_FILE_FOLDER, local_mode=True)`
[Feature] Add support for SciCode (#1417) * add SciCode * add SciCode * add SciCode * add SciCode * add SciCode * add SciCode * add SciCode * add SciCode w/ bg * add scicode * Update README.md * Update README.md * Delete configs/eval_SciCode.py * rename * 1 * rename * Update README.md * Update scicode.py * Update scicode.py * fix some bugs * Update * Update --------- Co-authored-by: root <HariSeldon0> Co-authored-by: tonysy <sy.zhangbuaa@gmail.com> 2024-08-22 13:42:25 +08:00			`data_lst = []`
[Feature] Optimize Evaluation Speed of SciCode (#1489) * update scicode * update comments * remove redundant variable * Update --------- Co-authored-by: tonysy <sy.zhangbuaa@gmail.com> 2024-09-06 00:59:41 +08:00			`H5PY_FILE = os.path.join(H5PY_FILE_FOLDER, f'{step_id}.h5')`
[Feature] Add support for SciCode (#1417) * add SciCode * add SciCode * add SciCode * add SciCode * add SciCode * add SciCode * add SciCode * add SciCode w/ bg * add scicode * Update README.md * Update README.md * Delete configs/eval_SciCode.py * rename * 1 * rename * Update README.md * Update scicode.py * Update scicode.py * fix some bugs * Update * Update --------- Co-authored-by: root <HariSeldon0> Co-authored-by: tonysy <sy.zhangbuaa@gmail.com> 2024-08-22 13:42:25 +08:00			`assert os.path.exists(`
			`H5PY_FILE`
			`), f"Please manually download 'test_data.h5' from https://github.com/open-compass/storage/releases/download/v0.1.0/scicode_test_data.zip and put the file in {H5PY_FILE}" # noqa: E501`

			`with h5py.File(H5PY_FILE, 'r') as f:`
			`for test_id in range(test_num):`
			`group_path = f'{step_id}/test{test_id + 1}'`
			`if isinstance(f[group_path], h5py.Group):`
			`group = f[group_path] # test1, test2, test3`
			`num_keys = [key for key in group.keys()]`
			`if len(num_keys) == 1: # only 1 var in the test`
			`subgroup = group[num_keys[0]]`
			`if isinstance(subgroup, h5py.Dataset):`
			`if isinstance(subgroup[()], bytes):`
			`data_lst.append(subgroup[()].decode(`
			`'utf-8', errors='strict'))`
			`else:`
			`data_lst.append(subgroup[()])`
			`elif isinstance(subgroup, h5py.Group):`
			`data_lst.append(process_hdf5_datagroup(subgroup))`
			`else:`
			`var_lst = []`
			`for key in group.keys(): # var1, var2, var3`
			`subgroup = group[key]`
			`if isinstance(subgroup, h5py.Dataset):`
			`if isinstance(subgroup[()], bytes):`
			`var_lst.append(subgroup[()].decode(`
			`'utf-8', errors='strict'))`
			`else:`
			`var_lst.append(subgroup[()])`
			`elif isinstance(subgroup, h5py.Group):`
			`var_lst.append(process_hdf5_datagroup(subgroup))`
			`data_lst.append(tuple(var_lst))`
			`else:`
			`raise FileNotFoundError(`
			`f'Path {group_path} not found in the file.')`
			`return data_lst`


			`def are_dicts_close(dict1, dict2, atol=1e-8, rtol=1e-5):`
			`dict1 = process_symbol_in_dict(dict1)`
			`dict2 = process_symbol_in_dict(dict2)`
			`# Check if both dictionaries have the same keys`
			`if dict1.keys() != dict2.keys():`
			`return False`

			`# Check if the corresponding values are close`
			`for key in dict1:`
			`value1 = dict1[key]`
			`value2 = dict2[key]`
			`if isinstance(value1, (sympy.Symbol, str)):`
			`if not value1 == value2:`
			`return False`
			`elif isinstance(value1,`
			`(scipy.sparse.csr_matrix, scipy.sparse.csc_matrix,`
			`scipy.sparse.bsr_matrix, scipy.sparse.coo_matrix)):`
			`value1 = value1.toarray()`
			`value2 = value2.toarray()`
			`if not np.allclose(value1, value2, atol=atol, rtol=rtol):`
			`return False`
			`# Use np.allclose to compare values`
			`else:`
			`try:`
			`if not np.allclose(value1, value2, atol=atol, rtol=rtol):`
			`return False`
			`except ValueError:`
			`if not value1 == value2:`
			`return False`

			`return True`


			`def process_symbol_in_dict(dict):`
			`new_dict = {}`
			`for key, value in dict.items():`
			`new_dict[key] = value`
			`if isinstance(value, sympy.Symbol):`
			`new_dict[key] = str(value)`
			`if isinstance(key, sympy.Symbol):`
			`new_dict[str(key)] = dict[key]`
			`new_dict.pop(key)`
			`return new_dict`


			`def are_csc_matrix_close(matrix1, matrix2):`
			`dense1 = matrix1.toarray()`
			`dense2 = matrix2.toarray()`
			`return np.allclose(dense1, dense2)`


			`def cmp_tuple_or_list(var1, var2):`
			`if len(var1) != len(var2):`
			`return False`
			`for v1, v2 in zip(var1, var2):`
			`if isinstance(v1, dict):`
			`if not are_dicts_close(v1, v2):`
			`return False`
			`elif isinstance(v1,`
			`(scipy.sparse.csr_matrix, scipy.sparse.csc_matrix)):`
			`if not are_csc_matrix_close(v1, v2):`
			`return False`
			`elif isinstance(v1, bool):`
			`if not v1 == v2:`
			`return False`
			`else:`
			`try:`
			`if not np.allclose(v1, v2):`
			`return False`
			`except ValueError as e:`
			`print(e)`
			`if not v1 == v2:`
			`return False`
			`return True`


			`@ICL_EVALUATORS.register_module()`
			`class SciCodeEvaluator(BaseEvaluator):`

[Feature] Optimize Evaluation Speed of SciCode (#1489) * update scicode * update comments * remove redundant variable * Update --------- Co-authored-by: tonysy <sy.zhangbuaa@gmail.com> 2024-09-06 00:59:41 +08:00			`def __init__(self, dataset_path, with_bg):`
[Feature] Add support for SciCode (#1417) * add SciCode * add SciCode * add SciCode * add SciCode * add SciCode * add SciCode * add SciCode * add SciCode w/ bg * add scicode * Update README.md * Update README.md * Delete configs/eval_SciCode.py * rename * 1 * rename * Update README.md * Update scicode.py * Update scicode.py * fix some bugs * Update * Update --------- Co-authored-by: root <HariSeldon0> Co-authored-by: tonysy <sy.zhangbuaa@gmail.com> 2024-08-22 13:42:25 +08:00			`super().__init__()`
			`test_data = []`
			`dataset_path = get_data_path(dataset_path, local_mode=True)`
			`if with_bg: # test with background`
			`file_path = osp.join(dataset_path,`
			`'SciCode_datasets_with_background.json')`
			`else: # test w/o background`
			`file_path = osp.join(dataset_path, 'SciCode_datasets.json')`
			`with open(file_path, 'r', encoding='utf-8') as file:`
			`test_data = json.load(file)`
			`self.dataset = Dataset.from_list(test_data)`

			`def extract_python_script(self, response: str):`
			start_marker = '```python'
			end_marker = '```'

			`if start_marker not in response or end_marker not in response:`
			`# If the markers are not present, return an empty string`
			`# print("fail to follow the instruct")`
			`return ''`

			`# Split the response at the start marker and take the second part`
			`after_start = response.split(start_marker)`
			`if len(after_start) < 2:`
			`return '' # No valid split was made`

			`# Split the part after the start marker at the end marker`
			`python_script = after_start[1].split(end_marker)[0]`

			`# Remove leading import statements using regex`
			`python_script = re.sub(r'^\s(import .\|from .\s+import\s+.)',`
			`'',`
			`python_script,`
			`flags=re.MULTILINE)`

			`return python_script`

			`def run_script(self, script_path):`
			`try:`
			`subprocess.run(['python', script_path],`
			`check=True,`
			`capture_output=True,`
			`text=True,`
			`timeout=60)`
			`return 0`
			`except subprocess.CalledProcessError:`
			`return 1`
			`except subprocess.TimeoutExpired:`
			`return 2`

			`def score(self, predictions, references):`
[Feature] Optimize Evaluation Speed of SciCode (#1489) * update scicode * update comments * remove redundant variable * Update --------- Co-authored-by: tonysy <sy.zhangbuaa@gmail.com> 2024-09-06 00:59:41 +08:00			`# generate all python test codes`
[Feature] Add support for SciCode (#1417) * add SciCode * add SciCode * add SciCode * add SciCode * add SciCode * add SciCode * add SciCode * add SciCode w/ bg * add scicode * Update README.md * Update README.md * Delete configs/eval_SciCode.py * rename * 1 * rename * Update README.md * Update scicode.py * Update scicode.py * fix some bugs * Update * Update --------- Co-authored-by: root <HariSeldon0> Co-authored-by: tonysy <sy.zhangbuaa@gmail.com> 2024-08-22 13:42:25 +08:00			`for idx, prediction_list in enumerate(predictions):`
			`# traverse each test sample`
			`problem_id = self.dataset[idx]['id']`
			`num_of_subproblems = len(prediction_list)`

			`# create dir for each test sample`
[Feature] Optimize Evaluation Speed of SciCode (#1489) * update scicode * update comments * remove redundant variable * Update --------- Co-authored-by: tonysy <sy.zhangbuaa@gmail.com> 2024-09-06 00:59:41 +08:00			`testdir_path = os.path.join(self._out_dir, str(problem_id))`
[Feature] Add support for SciCode (#1417) * add SciCode * add SciCode * add SciCode * add SciCode * add SciCode * add SciCode * add SciCode * add SciCode w/ bg * add scicode * Update README.md * Update README.md * Delete configs/eval_SciCode.py * rename * 1 * rename * Update README.md * Update scicode.py * Update scicode.py * fix some bugs * Update * Update --------- Co-authored-by: root <HariSeldon0> Co-authored-by: tonysy <sy.zhangbuaa@gmail.com> 2024-08-22 13:42:25 +08:00			`os.makedirs(testdir_path, exist_ok=True)`

			`python_code = ''`
			`# add import statement`
			`python_code += self.dataset[idx]['import']`

			`for sub_idx in range(num_of_subproblems):`
			`# extract code`
			`response = prediction_list[sub_idx]`
			`python_code += self.extract_python_script(response)`

			`# process special examples`
			`if problem_id == '13' and sub_idx >= 5 or \`
			`problem_id == '62' and sub_idx >= 0 or \`
			`problem_id == '76' and sub_idx >= 2:`
			`sub_idx += 1`

			`# test cases`
			`test_lst = self.dataset[idx]['test'][sub_idx]`

			`testfile_path = os.path.join(testdir_path,`
			`f'{problem_id}-{sub_idx + 1}.py')`
			`# write python code and test cases to a real python file`
			`with open(testfile_path, 'w', encoding='utf-8') as f:`
			`f.write(python_code)`
			`f.write("""`

			`from opencompass.datasets.scicode import process_hdf5_to_tuple`

			`""")`
			`f.write('targets = process_hdf5_to_tuple(' +`
			`f"'{problem_id}.{sub_idx + 1}', {len(test_lst)})" +`
			`'\n')`
			`for idx2 in range(len(test_lst)):`
			`f.write(f'target = targets[{idx2}]\n\n')`
[Feature] Optimize Evaluation Speed of SciCode (#1489) * update scicode * update comments * remove redundant variable * Update --------- Co-authored-by: tonysy <sy.zhangbuaa@gmail.com> 2024-09-06 00:59:41 +08:00			`for line in test_lst[idx2].split('\n'):`
			`f.write(line + '\n')`

			`# find all scripts`
			`python_scripts = []`
			`for root, dirs, files in os.walk(self._out_dir):`
			`for file in files:`
			`if file.endswith('.py'):`
			`python_scripts.append(os.path.join(root, file))`

			`# Use ThreadPoolExecutor to concurrently execute scripts`
			`with concurrent.futures.ThreadPoolExecutor() as executor:`
			`# Submit task and obtain Future object`
			`futures = [`
			`executor.submit(self.run_script, script)`
			`for script in python_scripts`
			`]`

			`results = []`
			`for future in concurrent.futures.as_completed(futures):`
			`result = future.result()`
			`results.append(result)`

			`all_results = {}`
			`for script_path, result in zip(python_scripts, results):`
			`basename = os.path.basename(script_path)`
			`main_id = basename.split('-')[0]`
			`if all_results.get(main_id):`
			`all_results[main_id].append(result)`
			`else:`
			`all_results[main_id] = [result]`
[Feature] Add support for SciCode (#1417) * add SciCode * add SciCode * add SciCode * add SciCode * add SciCode * add SciCode * add SciCode * add SciCode w/ bg * add scicode * Update README.md * Update README.md * Delete configs/eval_SciCode.py * rename * 1 * rename * Update README.md * Update scicode.py * Update scicode.py * fix some bugs * Update * Update --------- Co-authored-by: root <HariSeldon0> Co-authored-by: tonysy <sy.zhangbuaa@gmail.com> 2024-08-22 13:42:25 +08:00
[Feature] Optimize Evaluation Speed of SciCode (#1489) * update scicode * update comments * remove redundant variable * Update --------- Co-authored-by: tonysy <sy.zhangbuaa@gmail.com> 2024-09-06 00:59:41 +08:00			`correct, sub_correct = 0, 0`
			`count, sub_count = 0, 0`

			`for main_id in all_results:`
			`correct += sum(all_results[main_id]) == 0`
[Feature] Add support for SciCode (#1417) * add SciCode * add SciCode * add SciCode * add SciCode * add SciCode * add SciCode * add SciCode * add SciCode w/ bg * add scicode * Update README.md * Update README.md * Delete configs/eval_SciCode.py * rename * 1 * rename * Update README.md * Update scicode.py * Update scicode.py * fix some bugs * Update * Update --------- Co-authored-by: root <HariSeldon0> Co-authored-by: tonysy <sy.zhangbuaa@gmail.com> 2024-08-22 13:42:25 +08:00			`count += 1`
[Feature] Optimize Evaluation Speed of SciCode (#1489) * update scicode * update comments * remove redundant variable * Update --------- Co-authored-by: tonysy <sy.zhangbuaa@gmail.com> 2024-09-06 00:59:41 +08:00			`for sub in all_results[main_id]:`
			`sub_correct += sub == 0`
			`sub_count += 1`
[Feature] Add support for SciCode (#1417) * add SciCode * add SciCode * add SciCode * add SciCode * add SciCode * add SciCode * add SciCode * add SciCode w/ bg * add scicode * Update README.md * Update README.md * Delete configs/eval_SciCode.py * rename * 1 * rename * Update README.md * Update scicode.py * Update scicode.py * fix some bugs * Update * Update --------- Co-authored-by: root <HariSeldon0> Co-authored-by: tonysy <sy.zhangbuaa@gmail.com> 2024-08-22 13:42:25 +08:00
			`result = {`
			`'accuracy': 100 * correct / count,`
			`'sub_accuracy': 100 * sub_correct / sub_count,`
			`}`
			`return result`