diff --git a/examples/eval_supergpqa.py b/examples/eval_supergpqa.py deleted file mode 100644 index a79665fb..00000000 --- a/examples/eval_supergpqa.py +++ /dev/null @@ -1,10 +0,0 @@ -from mmengine import read_base - -with read_base(): - from opencompass.configs.datasets.supergpqa.supergpqa_gen import \ - supergpqa_datasets - from opencompass.configs.models.qwen2_5.lmdeploy_qwen2_5_7b_instruct import \ - models - -datasets = supergpqa_datasets -models = models diff --git a/opencompass/configs/datasets/supergpqa/supergpqa_single_0_shot_gen.py b/opencompass/configs/datasets/supergpqa/supergpqa_single_0_shot_gen.py deleted file mode 100644 index 7afa6f7c..00000000 --- a/opencompass/configs/datasets/supergpqa/supergpqa_single_0_shot_gen.py +++ /dev/null @@ -1,55 +0,0 @@ -from opencompass.datasets.supergpqa.supergpqa import SuperGPQADataset, SuperGPQAEvaluator -from opencompass.openicl.icl_inferencer import GenInferencer -from opencompass.openicl.icl_prompt_template import PromptTemplate -from opencompass.openicl.icl_retriever import ZeroRetriever - - -supergpqa_0shot_single_datasets = [] -prompt_template = dict( - type=PromptTemplate, - template=dict( - begin=[ - dict( - role='HUMAN', - prompt='' - ) - ], - round=[ - dict( - role='HUMAN', - prompt='{infer_prompt}' # f-string - ) - ] - ) -) - -# Reader configuration -reader_cfg = dict( - input_columns=['infer_prompt'], - output_column='answer_letter', -) - -# Inference configuration -infer_cfg = dict( - prompt_template=prompt_template, - retriever=dict(type=ZeroRetriever), - inferencer=dict(type=GenInferencer, max_out_len=1024), -) - -# Evaluation configuration -eval_cfg = dict( - evaluator=dict(type=SuperGPQAEvaluator), - pred_role='BOT', -) -supergpqa_dataset = dict( - type=SuperGPQADataset, - abbr='supergpqa', - path='opencompass/supergpqa', - prompt_mode='zero-shot', - reader_cfg=reader_cfg, - infer_cfg=infer_cfg, - eval_cfg=eval_cfg, -) -# print(type(supergpqa_0shot_single_datasets)) - -supergpqa_0shot_single_datasets.append(supergpqa_dataset) diff --git a/opencompass/configs/datasets/supergpqa/supergpqa_single_0_shot_gen_category.py b/opencompass/configs/datasets/supergpqa/supergpqa_single_0_shot_gen_category.py deleted file mode 100644 index f0686c8e..00000000 --- a/opencompass/configs/datasets/supergpqa/supergpqa_single_0_shot_gen_category.py +++ /dev/null @@ -1,347 +0,0 @@ -from opencompass.datasets.supergpqa.supergpqa import SuperGPQADataset, SuperGPQAEvaluator -from opencompass.openicl.icl_inferencer import GenInferencer -from opencompass.openicl.icl_prompt_template import PromptTemplate -from opencompass.openicl.icl_retriever import ZeroRetriever - -categories =[ - "Power Systems and Automation", - "Anesthesiology", - "Oncology", - "Group Theory", - "Thermal Energy Engineering", - "Emergency Medicine", - "Systems Science", - "Geometry and Topology", - "Advanced Algebra", - "Electrical Theory and New Technologies", - "Engineering Thermophysics", - "Operating Systems", - "Guidance, Navigation and Control", - "Harmony", - "Marine Biology", - "Pediatrics", - "Road and Railway Engineering", - "Information Management and Communication", - "Water conservancy and Hydropower Engineering", - "Veterinary Medicine", - "Astronomical Observation and Technology", - "Special Number Theory", - "Philology and Bibliography", - "Textile Materials Science", - "Legal Theory and Legal History", - "Education Economics, Management and Social Security", - "Traditional Chinese Health Preservation", - "Epidemiology and Health Statistics", - "Pitch and Scales", - "Economic History", - "Marine Engineering", - "Labor Economics", - "Materials Processing Engineering", - "Demography and Anthropology", - "Preschool Education", - "Music History, Education, and Technology", - "Instrumentation and Performance", - "Cryptography", - "Mineralogy, Petrology, and Economic Geology", - "Microbiology and Biochemical Pharmacy", - "Poromechanics and Reservoir Physics", - "Imaging and Nuclear Medicine", - "Solid State Physics", - "Microelectronics and Solid-State Electronics", - "Zoology", - "Food Biochemistry", - "Traditional Chinese Pharmacy", - "Neurology", - "Hydrogeology", - "Criminal Law", - "Radiation Medicine", - "Relativity", - "Analytical Chemistry", - "Signal and Information Processing", - "Military Command and Information Systems", - "Literary Theory", - "Textile Chemistry and Dyeing Engineering", - "Urban Infrastructure Engineering", - "Stellar and Interstellar Evolution", - "Geological Resources and Geological Engineering", - "Pattern Recognition", - "Engineering Fluid Mechanics", - "Communication and Information Systems", - "Architectural History", - "Stochastic Processes", - "Microbiology", - "French Language and Literature", - "Principles of Computer Organization", - "Architectural Design and Theory", - "Animal Rearing and Breeding", - "Physical Oceanography", - "Acoustics", - "Organic Chemistry", - "Refrigeration and Cryogenic Engineering", - "Public Finance", - "Dermatology and Venereology", - "Religious Studies", - "Discrete Mathematics", - "Forest Cultivation and Genetic Breeding", - "Vehicle Operation Engineering", - "Physical Chemistry", - "Nutrition and Food Hygiene", - "Ship Mechanics and Design Principles", - "Power Electronics and Electrical Drives", - "Finance", - "Pharmacology", - "Environmental Engineering", - "Ecology", - "Aeronautical and Astronautical Science and Technology", - "Agricultural Mechanization Engineering", - "Computer Architecture", - "Political Economy", - "Principles of Seismic Exploration", - "Elements of Chemical Reaction Engineering", - "Digital Surveying and Remote Sensing Applications", - "History and Theory of Journalism and Media Management", - "Instrument Science and Technology", - "Structural Engineering", - "Computer Networks", - "Power Machinery and Engineering", - "Constitutional and Administrative Law", - "Law and Social Governance", - "Psychology", - "Urban Planning and Design", - "Thermodynamics and Statistical Physics", - "Chemical Transport Engineering", - "Environmental and Resource Protection", - "Fluid Machinery and Engineering", - "Cartography and Geographic Information Engineering", - "Computational Mathematics", - "Pathogen Biology", - "Human Geography", - "Theoretical Optics", - "Solid Mechanics", - "Electrochemistry", - "Aquaculture", - "Logic", - "Mechatronic Engineering", - "Modern and Contemporary Chinese Literature", - "Operations Research and Cybernetics", - "Circuits and Systems", - "Internal Combustion Engineering", - "Atomic and Molecular Physics", - "Marine Chemistry", - "Electromagnetic Field and Microwave Technology", - "Rigid Body Mechanics", - "Physiology", - "Military Chemistry and Pyrotechnics", - "Fundamentals of Dynamics and Control", - "Control Theory and Control Engineering", - "Historical Geography", - "Physical Geography", - "National and Defense Economics", - "Polymer Physics", - "Landscape Plants and Ornamental Horticulture", - "Solar System Science", - "Library and Archival Science", - "Internal Medicine", - "Physical Chemistry of Metallurgical Process", - "Antenna and Radio Communication", - "Genetics", - "Graph Theory", - "Principles of Metallurgy", - "Bridge and Tunnel Engineering", - "Combinatorial Mathematics", - "Otorhinolaryngology", - "Political Science", - "Medicinal Chemistry", - "Health Toxicology and Environmental Health", - "Archaeology and Museology", - "Geotechnical Engineering", - "Land Resource Management and Administrative Management", - "Thermodynamics", - "Atmospheric Physics and Atmospheric Environment", - "Broadcasting and Television Art", - "Numerical Analysis", - "Statistical Mechanics", - "Mineral Processing Engineering", - "Mathematical Analysis", - "Philosophy of Science and Technology", - "Western Economics", - "Data Structures", - "Fine Arts", - "Economic Statistics", - "Environmental Science", - "Military Thought and History", - "Drama and Opera Studies", - "Film Studies", - "High Voltage and Insulation Technology", - "Military Law", - "Wood Science and Technology", - "Obstetrics and Gynecology", - "Hydraulics and Hydrology", - "Cell Biology", - "Biochemistry and Molecular Biology", - "Fluid Flow and Heat Transfer in Chemical Engineering", - "Formal Languages", - "Optoelectronic Technology", - "Crop Science", - "Fundamental Mathematics", - "Immunology", - "Surgery", - "Ophthalmology", - "Social Medicine and Health Management", - "Industrial Economics", - "Traffic Information Engineering and Control", - "Traditional Chinese Medicine Theory", - "Polymer Chemistry and Physics", - "Maternal, Child and Adolescent Health", - "Radiation Protection and Nuclear Technology Applications", - "Food Processing and Storage Engineering", - "Fluid Physics", - "Materials Physics and Chemistry", - "Pharmaceutical Analysis", - "Semiconductor Physics", - "Optical Fiber Communication", - "Ethics", - "Psychiatry and Mental Health", - "Management Science and Engineering", - "Number Theory", - "Contract Law", - "Inorganic Chemistry", - "Design Arts", - "Human Anatomy and Histology-Embryology", - "Iron and Steel Metallurgy", - "Dance Studies", - "Structural Geology", - "Special Education", - "Musical Forms and Analysis", - "Philosophical Aesthetics", - "Astrophysics", - "Manufacturing Automation", - "Quantum Mechanics", - "Probability and Statistics", - "Military Logistics and Equipment", - "Heat Transfer", - "Classical Chinese Literature", - "Information Management Science", - "Cosmology", - "Educational Technology and Principles", - "Ordinary Differential Equations", - "Underwater Acoustics", - "Business and Accounting Management", - "Dynamic Meteorology", - "Military Management", - "Journalism and News Practice", - "Animal Nutrition and Feed Science", - "Applied Optics", - "Theoretical Fluid Mechanics", - "Communication Principles", - "Physical Education and Training", - "Geodesy and Surveying Engineering", - "Meteorology", - "Sports Science and Medicine", - "Solid Earth Geophysics", - "Particle and Nuclear Physics", - "International Law", - "Oil and Gas Field Development and Storage & Transportation Engineering", - "Basic Stomatology", - "Agricultural Environment and Soil-Water Engineering", - "Geochemistry", - "Procedural Law", - "Botany", - "Fuzzy Mathematics", - "Paleontology and Stratigraphy", - "Sports Humanities and Sociology", - "Civil and Commercial Law", - "Electrodynamics", - "Mining and Safety Engineering", - "Mass Transport and Separation Process in Chemical Engineering", - "Advanced Programming Languages", - "Laser Technology", - "Weapon Systems Science and Engineering", - "Quantitative Economics", - "Theoretical Mechanics", - "Nursing and Rehabilitation Medicine", - "Databases", - "Pharmaceutics", - "Space physics", - "Functions of Real Variables", - "Non-ferrous Metallurgy", - "Theory of Curriculum and Instruction", - "Clinical Laboratory Diagnostics", - "Clinical Stomatology", - "Literary History", - "Tourism Management and Technological Economics Management", - "Communication and Broadcasting", - "Pathology and Pathophysiology", - "Functions of Complex Variables", - "World History", - "Forest Engineering", - "Forensic Medicine", - "Linguistics and Applied Linguistics", - "Social and Folklore Studies", - "Computer Software and Theory", - "Subatomic and Atomic Physics", - "Biophysics", - "Radiochemistry", - "Russian Language and Literature", - "International Trade", - "Geriatric Medicine", - "Composition", - "Transportation Planning and Management", - "Polynomials and Series Expansions", - "Nuclear Energy and Reactor Technology" -] - -supergpqa_0shot_single_datasets = [] - -for category in categories: - - - prompt_template = dict( - type=PromptTemplate, - template=dict( - begin=[ - dict( - role='HUMAN', - prompt='' - ) - ], - round=[ - dict( - role='HUMAN', - prompt='{infer_prompt}' # f-string - ) - ] - ) - ) - - # Reader configuration - reader_cfg = dict( - input_columns=['infer_prompt'], - output_column='answer_letter', - ) - - # Inference configuration - infer_cfg = dict( - prompt_template=prompt_template, - retriever=dict(type=ZeroRetriever), - inferencer=dict(type=GenInferencer, max_out_len=1024), - ) - - # Evaluation configuration - eval_cfg = dict( - evaluator=dict(type=SuperGPQAEvaluator), - pred_role='BOT', - ) - supergpqa_dataset = dict( - type=SuperGPQADataset, - abbr=f'supergpqa_{category.replace(" ", "_")}', - # abbr='supergpqa', - path='opencompass/supergpqa', - prompt_mode='zero-shot', - category=category, - reader_cfg=reader_cfg, - infer_cfg=infer_cfg, - eval_cfg=eval_cfg, - ) - # print(type(supergpqa_0shot_single_datasets)) - supergpqa_0shot_single_datasets.append(supergpqa_dataset) diff --git a/opencompass/utils/datasets_info.py b/opencompass/utils/datasets_info.py index c4a18fb8..79be5736 100644 --- a/opencompass/utils/datasets_info.py +++ b/opencompass/utils/datasets_info.py @@ -403,11 +403,6 @@ DATASETS_MAPPING = { "hf_id": "", "local": "./data/OlympiadBench", }, - "opencompass/supergpqa": { - "ms_id": "", - "hf_id": "m-a-p/SuperGPQA", - "local": "./data/supergpqa", - }, } DATASETS_URL = {