2023-12-27 22:17:23 +08:00
# This config is used for pass@k evaluation with dataset repetition
# That model cannot generate multiple response for single input
from opencompass . openicl . icl_prompt_template import PromptTemplate
from opencompass . openicl . icl_retriever import ZeroRetriever
from opencompass . openicl . icl_inferencer import GenInferencer
[Feature] Support ModelScope datasets (#1289)
* add ceval, gsm8k modelscope surpport
* update race, mmlu, arc, cmmlu, commonsenseqa, humaneval and unittest
* update bbh, flores, obqa, siqa, storycloze, summedits, winogrande, xsum datasets
* format file
* format file
* update dataset format
* support ms_dataset
* udpate dataset for modelscope support
* merge myl_dev and update test_ms_dataset
* udpate dataset for modelscope support
* update readme
* update eval_api_zhipu_v2
* remove unused code
* add get_data_path function
* update readme
* remove tydiqa japanese subset
* add ceval, gsm8k modelscope surpport
* update race, mmlu, arc, cmmlu, commonsenseqa, humaneval and unittest
* update bbh, flores, obqa, siqa, storycloze, summedits, winogrande, xsum datasets
* format file
* format file
* update dataset format
* support ms_dataset
* udpate dataset for modelscope support
* merge myl_dev and update test_ms_dataset
* update readme
* udpate dataset for modelscope support
* update eval_api_zhipu_v2
* remove unused code
* add get_data_path function
* remove tydiqa japanese subset
* update util
* remove .DS_Store
* fix md format
* move util into package
* update docs/get_started.md
* restore eval_api_zhipu_v2.py, add environment setting
* Update dataset
* Update
* Update
* Update
* Update
---------
Co-authored-by: Yun lin <yunlin@U-Q9X2K4QV-1904.local>
Co-authored-by: Yunnglin <mao.looper@qq.com>
Co-authored-by: Yun lin <yunlin@laptop.local>
Co-authored-by: Yunnglin <maoyl@smail.nju.edu.cn>
Co-authored-by: zhangsongyang <zhangsongyang@pjlab.org.cn>
2024-07-29 13:48:32 +08:00
from opencompass . datasets import MBPPDatasetV2 , MBPPPassKEvaluator
2023-12-27 22:17:23 +08:00
2024-05-14 15:35:58 +08:00
mbpp_reader_cfg = dict ( input_columns = [ ' text ' , ' test_list ' ] , output_column = ' test_column ' )
2023-12-27 22:17:23 +08:00
mbpp_infer_cfg = dict (
prompt_template = dict (
type = PromptTemplate ,
template = dict (
round = [
2024-05-14 15:35:58 +08:00
dict ( role = ' HUMAN ' , prompt = ' You are an expert Python programmer, and here is your task: Write a function to find the similar elements from the given two tuple lists. Your code should pass these tests: \n \n assert similar_elements((3, 4, 5, 6),(5, 7, 4, 10)) == (4, 5) \n assert similar_elements((1, 2, 3, 4),(5, 4, 3, 7)) == (3, 4) \n assert similar_elements((11, 12, 14, 13),(17, 15, 14, 13)) == (13, 14) \n ' ) ,
dict ( role = ' BOT ' , prompt = " [BEGIN] \n ' def similar_elements(test_tup1, test_tup2): \r \n res = tuple(set(test_tup1) & set(test_tup2)) \r \n return (res) ' \n [DONE] \n \n " ) ,
2024-03-04 14:42:36 +08:00
2024-05-14 15:35:58 +08:00
dict ( role = ' HUMAN ' , prompt = ' You are an expert Python programmer, and here is your task: Write a python function to identify non-prime numbers. Your code should pass these tests: \n \n assert is_not_prime(2) == False \n assert is_not_prime(10) == True \n assert is_not_prime(35) == True \n ' ) ,
dict ( role = ' BOT ' , prompt = " [BEGIN] \n ' import math \r \n def is_not_prime(n): \r \n result = False \r \n for i in range(2,int(math.sqrt(n)) + 1): \r \n if n % i == 0: \r \n result = True \r \n return result ' \n [DONE] \n \n " ) ,
2024-03-04 14:42:36 +08:00
2024-05-14 15:35:58 +08:00
dict ( role = ' HUMAN ' , prompt = ' You are an expert Python programmer, and here is your task: Write a function to find the largest integers from a given list of numbers using heap queue algorithm. Your code should pass these tests: \n \n assert heap_queue_largest( [25, 35, 22, 85, 14, 65, 75, 22, 58],3)==[85, 75, 65] \n assert heap_queue_largest( [25, 35, 22, 85, 14, 65, 75, 22, 58],2)==[85, 75] \n assert heap_queue_largest( [25, 35, 22, 85, 14, 65, 75, 22, 58],5)==[85, 75, 65, 58, 35] \n ' ) ,
dict ( role = ' BOT ' , prompt = " [BEGIN] \n ' import heapq as hq \r \n def heap_queue_largest(nums,n): \r \n largest_nums = hq.nlargest(n, nums) \r \n return largest_nums ' \n [DONE] \n \n " ) ,
2024-03-04 14:42:36 +08:00
2024-05-14 15:35:58 +08:00
dict ( role = ' HUMAN ' , prompt = ' You are an expert Python programmer, and here is your task: {text} Your code should pass these tests: \n \n {test_list} \n ' ) ,
dict ( role = ' BOT ' , prompt = ' [BEGIN] \n ' ) ,
2024-03-04 14:42:36 +08:00
] ,
) ,
) ,
2023-12-27 22:17:23 +08:00
retriever = dict ( type = ZeroRetriever ) ,
2024-03-04 14:42:36 +08:00
inferencer = dict ( type = GenInferencer , max_out_len = 512 ) ,
)
2023-12-27 22:17:23 +08:00
2024-05-14 15:35:58 +08:00
mbpp_eval_cfg = dict ( evaluator = dict ( type = MBPPPassKEvaluator ) , pred_role = ' BOT ' )
2023-12-27 22:17:23 +08:00
mbpp_datasets = [
dict (
[Feature] Support ModelScope datasets (#1289)
* add ceval, gsm8k modelscope surpport
* update race, mmlu, arc, cmmlu, commonsenseqa, humaneval and unittest
* update bbh, flores, obqa, siqa, storycloze, summedits, winogrande, xsum datasets
* format file
* format file
* update dataset format
* support ms_dataset
* udpate dataset for modelscope support
* merge myl_dev and update test_ms_dataset
* udpate dataset for modelscope support
* update readme
* update eval_api_zhipu_v2
* remove unused code
* add get_data_path function
* update readme
* remove tydiqa japanese subset
* add ceval, gsm8k modelscope surpport
* update race, mmlu, arc, cmmlu, commonsenseqa, humaneval and unittest
* update bbh, flores, obqa, siqa, storycloze, summedits, winogrande, xsum datasets
* format file
* format file
* update dataset format
* support ms_dataset
* udpate dataset for modelscope support
* merge myl_dev and update test_ms_dataset
* update readme
* udpate dataset for modelscope support
* update eval_api_zhipu_v2
* remove unused code
* add get_data_path function
* remove tydiqa japanese subset
* update util
* remove .DS_Store
* fix md format
* move util into package
* update docs/get_started.md
* restore eval_api_zhipu_v2.py, add environment setting
* Update dataset
* Update
* Update
* Update
* Update
---------
Co-authored-by: Yun lin <yunlin@U-Q9X2K4QV-1904.local>
Co-authored-by: Yunnglin <mao.looper@qq.com>
Co-authored-by: Yun lin <yunlin@laptop.local>
Co-authored-by: Yunnglin <maoyl@smail.nju.edu.cn>
Co-authored-by: zhangsongyang <zhangsongyang@pjlab.org.cn>
2024-07-29 13:48:32 +08:00
type = MBPPDatasetV2 ,
2024-05-14 15:35:58 +08:00
abbr = ' mbpp_repeat10 ' ,
path = ' ./data/mbpp/mbpp.jsonl ' ,
2023-12-27 22:17:23 +08:00
num_repeats = 10 ,
reader_cfg = mbpp_reader_cfg ,
infer_cfg = mbpp_infer_cfg ,
2024-03-04 14:42:36 +08:00
eval_cfg = mbpp_eval_cfg ,
)
2023-12-27 22:17:23 +08:00
]