add 2

2025-05-30 16:03:24 +08:00 · 2025-05-27 03:26:40 +00:00 · 2025-05-27 03:26:40 +00:00 · d4a69ba65f
commit d4a69ba65f
parent e59f0b59cb
11 changed files with 118 additions and 516 deletions
--- a/opencompass/configs/datasets/srbench/srbench_gen.py
+++ b/opencompass/configs/datasets/srbench/srbench_gen.py
@ -0,0 +1,58 @@
+from opencompass.openicl.icl_prompt_template import PromptTemplate
+from opencompass.openicl.icl_retriever import ZeroRetriever
+from opencompass.openicl.icl_inferencer import GenInferencer
+from opencompass.datasets import (
+    SRbenchDataset,SRbenchDatasetEvaluator
+)
+
+from opencompass.evaluator import GenericLLMEvaluator
+
+
+
+INFER_TEMPLATE = f'''
+            You will be provided with a set of input-output pairs. Based on these data, infer the mathematical relationship between y and multiple input variables. Please note that the possible mathematical operations include: +, -, *, /, exp, sqrt, sin, arcsin, and constant terms.
+            The input sample data are as follows:
+            {{prompt1}}
+            Based on the above data, please infer the possible formula. Ensure that your inference applies to all the provided data points, and consider both linear and nonlinear combinations.
+            Verify whether your formula applies to the following new data point and adjust it to ensure accuracy:
+            {{prompt2}}
+            Finally, please output only the formula string you inferred (e.g. y=x_0 * x_1), without any additional information.
+        '''
+
+srbench_reader_cfg = dict(input_columns=["prompt1","prompt2"], output_column='Formula')
+
+srbench_datasets = []
+
+srbench_infer_cfg = dict(
+    prompt_template=dict(
+            type=PromptTemplate,
+            template=dict(
+                    round=[
+                        dict(
+                            role='HUMAN',
+                            prompt=INFER_TEMPLATE)
+                    ]
+                ),
+            ),
+        retriever=dict(type=ZeroRetriever),
+        inferencer=dict(type=GenInferencer),
+        )
+
+
+srbench_eval_cfg = dict(
+        evaluator=dict(type=SRbenchDatasetEvaluator),
+        path="opencompass/srbench",
+        pred_role='BOT',
+        )
+
+srbench_datasets.append(
+        dict(
+            abbr='srbench',
+            type=SRbenchDataset,
+            path='opencompass/srbench',
+            reader_cfg=srbench_reader_cfg,
+            infer_cfg=srbench_infer_cfg,
+            eval_cfg=srbench_eval_cfg,
+        )
+    )
+
--- a/opencompass/datasets/init.py
+++ b/opencompass/datasets/init.py
@ -92,7 +92,7 @@ from .longbenchv2 import *  # noqa: F401, F403
 from .lveval import *  # noqa: F401, F403
 from .mastermath2024v1 import *  # noqa: F401, F403
 from .matbench import *  # noqa: F401, F403
-from .math import *  # noqa: F401, F403
+from .math2 import *  # noqa: F401, F403
 from .math401 import *  # noqa: F401, F403
 from .math_intern import *  # noqa: F401, F403
 from .mathbench import *  # noqa: F401, F403
@ -144,6 +144,7 @@ from .simpleqa import *  # noqa: F401, F403
 from .siqa import *  # noqa: F401, F403
 from .smolinstruct import *  # noqa: F401, F403
 from .squad20 import SQuAD20Dataset, SQuAD20Evaluator  # noqa: F401, F403
+from .srbench import *
 from .storycloze import *  # noqa: F401, F403
 from .strategyqa import *  # noqa: F401, F403
 from .subjective import *  # noqa: F401, F403
@ -170,3 +171,4 @@ from .xcopa import *  # noqa: F401, F403
 from .xiezhi import XiezhiDataset, XiezhiRetriever  # noqa: F401, F403
 from .xlsum import *  # noqa: F401, F403
 from .xsum import *  # noqa: F401, F403
+
--- a/opencompass/datasets/bigcodebench/bigcodebench.py
+++ b/opencompass/datasets/bigcodebench/bigcodebench.py
@ -71,7 +71,7 @@ class BigCodeBenchEvaluator(BaseEvaluator):
            self,
            release_version='v0.1.2',
            eval_type='instruct',
-            remote_execute_api='https://bigcode-bigcodebench-evaluator.hf.space/',  # noqa
+            remote_execute_api='https://bigcode-bigcodebench-evaluator.hf.space/', 
            dataset_version: str = 'full',
            local_mode: bool = False,
            path: str = 'opencompass/bigcodebench',
--- a/opencompass/datasets/livemathbench/livemathbench.py
+++ b/opencompass/datasets/livemathbench/livemathbench.py
@ -10,7 +10,7 @@ import mmengine
 import numpy as np
 from datasets import Dataset, load_dataset

-from opencompass.datasets.math import MATHAgentEvaluator, math_postprocess_v2
+from opencompass.datasets.math2 import MATHAgentEvaluator, math_postprocess_v2
 from opencompass.models import OpenAISDK
 from opencompass.openicl.icl_evaluator import BaseEvaluator
 from opencompass.openicl.icl_inferencer.icl_base_inferencer import \
--- a/opencompass/datasets/srbench/2d/Feynman_2d.csv
+++ b/opencompass/datasets/srbench/2d/Feynman_2d.csv
@ -1,16 +0,0 @@
-Formula,Filename,n_variables
-y = exp(-(x1/x0)**2/2) / (sqrt(2*pi)*x0),I.6.2,2
-y = x0 * x1,I.12.1,2
-y = x0 * x1,I.12.5,2
-y = 1/2 * x0 * x1**2,I.14.4,2
-y = x0 / x1,I.25.13,2
-y = arcsin(x0 * sin(x1)),I.26.2,2
-y = x0 / x1,I.29.4,2
-y = (x1 / (2 * pi)) * x0,I.34.27,2
-y = (3/2) * x0 * x1,I.39.1,2
-y = x0 / (4 * pi * x1**2),II.3.24,2
-y = x0 * x1**2 / 2,II.8.31,2
-y = 1 + x0 * x1 / (1 - (x0 * x1 / 3)),II.11.28,2
-y = x0 * x1**2,II.27.18,2
-y = x0 / (2 * (1 + x1)),II.38.14,2
-y = x0 * (x1 / (2 * pi)),III.12.43,2
--- a/opencompass/datasets/srbench/Feynman/FeynmanEquation_23.csv
+++ b/opencompass/datasets/srbench/Feynman/FeynmanEquation_23.csv
@ -1,4 +1,19 @@
 Formula,Filename,n_variables
+y = exp(-(x1/x0)**2/2) / (sqrt(2*pi)*x0),I.6.2,2
+y = x0 * x1,I.12.1,2
+y = x0 * x1,I.12.5,2
+y = 1/2 * x0 * x1**2,I.14.4,2
+y = x0 / x1,I.25.13,2
+y = arcsin(x0 * sin(x1)),I.26.2,2
+y = x0 / x1,I.29.4,2
+y = (x1 / (2 * pi)) * x0,I.34.27,2
+y = (3/2) * x0 * x1,I.39.1,2
+y = x0 / (4 * pi * x1**2),II.3.24,2
+y = x0 * x1**2 / 2,II.8.31,2
+y = 1 + x0 * x1 / (1 - (x0 * x1 / 3)),II.11.28,2
+y = x0 * x1**2,II.27.18,2
+y = x0 / (2 * (1 + x1)),II.38.14,2
+y = x0 * (x1 / (2 * pi)),III.12.43,2
 y = exp(-((x1 - x2) / x0)**2 / 2) / (sqrt(2 * pi) * x0),I.6.2b,3
 y = x0 / sqrt(1 - x1**2 / x2**2),I.10.7,3
 y = x0*x2/(4*pi*x1*x2**3),I.12.4,3
--- a/opencompass/datasets/srbench/Feynman/FeynmanEquations.csv
+++ b/opencompass/datasets/srbench/Feynman/FeynmanEquations.csv
@ -1,131 +0,0 @@
-Filename,Number,Output,Formula,# variables,v1_name,v1_low,v1_high,v2_name,v2_low,v2_high,v3_name,v3_low,v3_high,v4_name,v4_low,v4_high,v5_name,v5_low,v5_high,v6_name,v6_low,v6_high,v7_name,v7_low,v7_high,v8_name,v8_low,v8_high,v9_name,v9_low,v9_high,v10_name,v10_low,v10_high
-I.6.2a,1,f,exp(-theta**2/2)/sqrt(2*pi),1,theta,1,3,,,,,,,,,,,,,,,,,,,,,,,,,,,
-I.6.2,2,f,exp(-(theta/sigma)**2/2)/(sqrt(2*pi)*sigma),2,sigma,1,3,theta,1,3,,,,,,,,,,,,,,,,,,,,,,,,
-I.6.2b,3,f,exp(-((theta-theta1)/sigma)**2/2)/(sqrt(2*pi)*sigma),3,sigma,1,3,theta,1,3,theta1,1,3,,,,,,,,,,,,,,,,,,,,,
-I.8.14,4,d,sqrt((x2-x1)**2+(y2-y1)**2),4,x1,1,5,x2,1,5,y1,1,5,y2,1,5,,,,,,,,,,,,,,,,,,
-I.9.18,5,F,G*m1*m2/((x2-x1)**2+(y2-y1)**2+(z2-z1)**2),9,m1,1,2,m2,1,2,G,1,2,x1,3,4,x2,1,2,y1,3,4,y2,1,2,z1,3,4,z2,1,2,,,
-I.10.7,6,m,m_0/sqrt(1-v**2/c**2),3,m_0,1,5,v,1,2,c,3,10,,,,,,,,,,,,,,,,,,,,,
-I.11.19,7,A,x1*y1+x2*y2+x3*y3,6,x1,1,5,x2,1,5,x3,1,5,y1,1,5,y2,1,5,y3,1,5,,,,,,,,,,,,
-I.12.1,8,F,mu*Nn,2,mu,1,5,Nn,1,5,,,,,,,,,,,,,,,,,,,,,,,,
-I.12.2,10,F,q1*q2*r/(4*pi*epsilon*r**3),4,q1,1,5,q2,1,5,epsilon,1,5,r,1,5,,,,,,,,,,,,,,,,,,
-I.12.4,11,Ef,q1*r/(4*pi*epsilon*r**3),3,q1,1,5,epsilon,1,5,r,1,5,,,,,,,,,,,,,,,,,,,,,
-I.12.5,12,F,q2*Ef,2,q2,1,5,Ef,1,5,,,,,,,,,,,,,,,,,,,,,,,,
-I.12.11,13,F,q*(Ef+B*v*sin(theta)),5,q,1,5,Ef,1,5,B,1,5,v,1,5,theta,1,5,,,,,,,,,,,,,,,
-I.13.4,9,K,1/2*m*(v**2+u**2+w**2),4,m,1,5,v,1,5,u,1,5,w,1,5,,,,,,,,,,,,,,,,,,
-I.13.12,14,U,G*m1*m2*(1/r2-1/r1),5,m1,1,5,m2,1,5,r1,1,5,r2,1,5,G,1,5,,,,,,,,,,,,,,,
-I.14.3,15,U,m*g*z,3,m,1,5,g,1,5,z,1,5,,,,,,,,,,,,,,,,,,,,,
-I.14.4,16,U,1/2*k_spring*x**2,2,k_spring,1,5,x,1,5,,,,,,,,,,,,,,,,,,,,,,,,
-I.15.3x,17,x1,(x-u*t)/sqrt(1-u**2/c**2),4,x,5,10,u,1,2,c,3,20,t,1,2,,,,,,,,,,,,,,,,,,
-I.15.3t,18,t1,(t-u*x/c**2)/sqrt(1-u**2/c**2),4,x,1,5,c,3,10,u,1,2,t,1,5,,,,,,,,,,,,,,,,,,
-I.15.1,19,p,m_0*v/sqrt(1-v**2/c**2),3,m_0,1,5,v,1,2,c,3,10,,,,,,,,,,,,,,,,,,,,,
-I.16.6,20,v1,(u+v)/(1+u*v/c**2),3,c,1,5,v,1,5,u,1,5,,,,,,,,,,,,,,,,,,,,,
-I.18.4,21,r,(m1*r1+m2*r2)/(m1+m2),4,m1,1,5,m2,1,5,r1,1,5,r2,1,5,,,,,,,,,,,,,,,,,,
-I.18.12,22,tau,r*F*sin(theta),2,r,1,5,F,1,5,theta,0,5,,,,,,,,,,,,,,,,,,,,,
-I.18.14,23,L,m*r*v*sin(theta),3,m,1,5,r,1,5,v,1,5,theta,1,5,,,,,,,,,,,,,,,,,,
-I.24.6,24,E_n,1/2*m*(omega**2+omega_0**2)*1/2*x**2,4,m,1,3,omega,1,3,omega_0,1,3,x,1,3,,,,,,,,,,,,,,,,,,
-I.25.13,25,Volt,q/C,2,q,1,5,C,1,5,,,,,,,,,,,,,,,,,,,,,,,,
-I.26.2,26,theta1,arcsin(n*sin(theta2)),2,n,0,1,theta2,1,5,,,,,,,,,,,,,,,,,,,,,,,,
-I.27.6,27,foc,1/(1/d1+n/d2),3,d1,1,5,d2,1,5,n,1,5,,,,,,,,,,,,,,,,,,,,,
-I.29.4,28,k,omega/c,2,omega,1,10,c,1,10,,,,,,,,,,,,,,,,,,,,,,,,
-I.29.16,29,x,sqrt(x1**2+x2**2-2*x1*x2*cos(theta1-theta2)),4,x1,1,5,x2,1,5,theta1,1,5,theta2,1,5,,,,,,,,,,,,,,,,,,
-I.30.3,30,Int,Int_0*sin(n*theta/2)**2/sin(theta/2)**2,3,Int_0,1,5,theta,1,5,n,1,5,,,,,,,,,,,,,,,,,,,,,
-I.30.5,31,theta,arcsin(lambd/(n*d)),3,lambd,1,2,d,2,5,n,1,5,,,,,,,,,,,,,,,,,,,,,
-I.32.5,32,Pwr,q**2*a**2/(6*pi*epsilon*c**3),4,q,1,5,a,1,5,epsilon,1,5,c,1,5,,,,,,,,,,,,,,,,,,
-I.32.17,33,Pwr,(1/2*epsilon*c*Ef**2)*(8*pi*r**2/3)*(omega**4/(omega**2-omega_0**2)**2),6,epsilon,1,2,c,1,2,Ef,1,2,r,1,2,omega,1,2,omega_0,3,5,,,,,,,,,,,,
-I.34.8,34,omega,q*v*B/p,4,q,1,5,v,1,5,B,1,5,p,1,5,,,,,,,,,,,,,,,,,,
-I.34.1,35,omega,omega_0/(1-v/c),3,c,3,10,v,1,2,omega_0,1,5,,,,,,,,,,,,,,,,,,,,,
-I.34.14,36,omega,(1+v/c)/sqrt(1-v**2/c**2)*omega_0,3,c,3,10,v,1,2,omega_0,1,5,,,,,,,,,,,,,,,,,,,,,
-I.34.27,37,E_n,(h/(2*pi))*omega,2,omega,1,5,h,1,5,,,,,,,,,,,,,,,,,,,,,,,,
-I.37.4,38,Int,I1+I2+2*sqrt(I1*I2)*cos(delta),3,I1,1,5,I2,1,5,delta,1,5,,,,,,,,,,,,,,,,,,,,,
-I.38.12,39,r,4*pi*epsilon*(h/(2*pi))**2/(m*q**2),3,m,1,5,q,1,5,h,1,5,epsilon,1,5,,,,,,,,,,,,,,,,,,
-I.39.1,40,E_n,3/2*pr*V,2,pr,1,5,V,1,5,,,,,,,,,,,,,,,,,,,,,,,,
-I.39.11,41,E_n,1/(gamma-1)*pr*V,3,gamma,2,5,pr,1,5,V,1,5,,,,,,,,,,,,,,,,,,,,,
-I.39.22,42,pr,n*kb*T/V,4,n,1,5,T,1,5,V,1,5,kb,1,5,,,,,,,,,,,,,,,,,,
-I.40.1,43,n,n_0*exp(-m*g*x/(kb*T)),6,n_0,1,5,m,1,5,x,1,5,T,1,5,g,1,5,kb,1,5,,,,,,,,,,,,
-I.41.16,44,L_rad,h/(2*pi)*omega**3/(pi**2*c**2*(exp((h/(2*pi))*omega/(kb*T))-1)),5,omega,1,5,T,1,5,h,1,5,kb,1,5,c,1,5,,,,,,,,,,,,,,,
-I.43.16,45,v,mu_drift*q*Volt/d,4,mu_drift,1,5,q,1,5,Volt,1,5,d,1,5,,,,,,,,,,,,,,,,,,
-I.43.31,46,D,mob*kb*T,3,mob,1,5,T,1,5,kb,1,5,,,,,,,,,,,,,,,,,,,,,
-I.43.43,47,kappa,1/(gamma-1)*kb*v/A,4,gamma,2,5,kb,1,5,A,1,5,v,1,5,,,,,,,,,,,,,,,,,,
-I.44.4,48,E_n,n*kb*T*ln(V2/V1),5,n,1,5,kb,1,5,T,1,5,V1,1,5,V2,1,5,,,,,,,,,,,,,,,
-I.47.23,49,c,sqrt(gamma*pr/rho),3,gamma,1,5,pr,1,5,rho,1,5,,,,,,,,,,,,,,,,,,,,,
-I.48.2,50,E_n,m*c**2/sqrt(1-v**2/c**2),3,m,1,5,v,1,2,c,3,10,,,,,,,,,,,,,,,,,,,,,
-I.50.26,51,x,x1*(cos(omega*t)+alpha*cos(omega*t)**2),4,x1,1,3,omega,1,3,t,1,3,alpha,1,3,,,,,,,,,,,,,,,,,,
-II.2.42,52,Pwr,kappa*(T2-T1)*A/d,5,kappa,1,5,T1,1,5,T2,1,5,A,1,5,d,1,5,,,,,,,,,,,,,,,
-II.3.24,53,flux,Pwr/(4*pi*r**2),2,Pwr,1,5,r,1,5,,,,,,,,,,,,,,,,,,,,,,,,
-II.4.23,54,Volt,q/(4*pi*epsilon*r),3,q,1,5,epsilon,1,5,r,1,5,,,,,,,,,,,,,,,,,,,,,
-II.6.11,55,Volt,1/(4*pi*epsilon)*p_d*cos(theta)/r**2,4,epsilon,1,3,p_d,1,3,theta,1,3,r,1,3,,,,,,,,,,,,,,,,,,
-II.6.15a,56,Ef,p_d/(4*pi*epsilon)*3*z/r**5*sqrt(x**2+y**2),6,epsilon,1,3,p_d,1,3,r,1,3,x,1,3,y,1,3,z,1,3,,,,,,,,,,,,
-II.6.15b,57,Ef,p_d/(4*pi*epsilon)*3*cos(theta)*sin(theta)/r**3,4,epsilon,1,3,p_d,1,3,theta,1,3,r,1,3,,,,,,,,,,,,,,,,,,
-II.8.7,58,E_n,3/5*q**2/(4*pi*epsilon*d),3,q,1,5,epsilon,1,5,d,1,5,,,,,,,,,,,,,,,,,,,,,
-II.8.31,59,E_den,epsilon*Ef**2/2,2,epsilon,1,5,Ef,1,5,,,,,,,,,,,,,,,,,,,,,,,,
-II.10.9,60,Ef,sigma_den/epsilon*1/(1+chi),3,sigma_den,1,5,epsilon,1,5,chi,1,5,,,,,,,,,,,,,,,,,,,,,
-II.11.3,61,x,q*Ef/(m*(omega_0**2-omega**2)),5,q,1,3,Ef,1,3,m,1,3,omega_0,3,5,omega,1,2,,,,,,,,,,,,,,,
-II.11.17,62,n,n_0*(1+p_d*Ef*cos(theta)/(kb*T)),6,n_0,1,3,kb,1,3,T,1,3,theta,1,3,p_d,1,3,Ef,1,3,,,,,,,,,,,,
-II.11.20,63,Pol,n_rho*p_d**2*Ef/(3*kb*T),5,n_rho,1,5,p_d,1,5,Ef,1,5,kb,1,5,T,1,5,,,,,,,,,,,,,,,
-II.11.27,64,Pol,n*alpha/(1-(n*alpha/3))*epsilon*Ef,4,n,0,1,alpha,0,1,epsilon,1,2,Ef,1,2,,,,,,,,,,,,,,,,,,
-II.11.28,65,theta,1+n*alpha/(1-(n*alpha/3)),2,n,0,1,alpha,0,1,,,,,,,,,,,,,,,,,,,,,,,,
-II.13.17,66,B,1/(4*pi*epsilon*c**2)*2*I/r,4,epsilon,1,5,c,1,5,I,1,5,r,1,5,,,,,,,,,,,,,,,,,,
-II.13.23,67,rho_c,rho_c_0/sqrt(1-v**2/c**2),3,rho_c_0,1,5,v,1,2,c,3,10,,,,,,,,,,,,,,,,,,,,,
-II.13.34,68,j,rho_c_0*v/sqrt(1-v**2/c**2),3,rho_c_0,1,5,v,1,2,c,3,10,,,,,,,,,,,,,,,,,,,,,
-II.15.4,69,E_n,-mom*B*cos(theta),3,mom,1,5,B,1,5,theta,1,5,,,,,,,,,,,,,,,,,,,,,
-II.15.5,70,E_n,-p_d*Ef*cos(theta),3,p_d,1,5,Ef,1,5,theta,1,5,,,,,,,,,,,,,,,,,,,,,
-II.21.32,71,Volt,q/(4*pi*epsilon*r*(1-v/c)),5,q,1,5,epsilon,1,5,r,1,5,v,1,2,c,3,10,,,,,,,,,,,,,,,
-II.24.17,72,k,sqrt(omega**2/c**2-pi**2/d**2),3,omega,4,6,c,1,2,d,2,4,,,,,,,,,,,,,,,,,,,,,
-II.27.16,73,flux,epsilon*c*Ef**2,3,epsilon,1,5,c,1,5,Ef,1,5,,,,,,,,,,,,,,,,,,,,,
-II.27.18,74,E_den,epsilon*Ef**2,2,epsilon,1,5,Ef,1,5,,,,,,,,,,,,,,,,,,,,,,,,
-II.34.2a,75,I,q*v/(2*pi*r),3,q,1,5,v,1,5,r,1,5,,,,,,,,,,,,,,,,,,,,,
-II.34.2,76,mom,q*v*r/2,3,q,1,5,v,1,5,r,1,5,,,,,,,,,,,,,,,,,,,,,
-II.34.11,77,omega,g_*q*B/(2*m),4,g_,1,5,q,1,5,B,1,5,m,1,5,,,,,,,,,,,,,,,,,,
-II.34.29a,78,mom,q*h/(4*pi*m),3,q,1,5,h,1,5,m,1,5,,,,,,,,,,,,,,,,,,,,,
-II.34.29b,79,E_n,g_*mom*B*Jz/(h/(2*pi)),5,g_,1,5,h,1,5,Jz,1,5,mom,1,5,B,1,5,,,,,,,,,,,,,,,
-II.35.18,80,n,n_0/(exp(mom*B/(kb*T))+exp(-mom*B/(kb*T))),5,n_0,1,3,kb,1,3,T,1,3,mom,1,3,B,1,3,,,,,,,,,,,,,,,
-II.35.21,81,M,n_rho*mom*tanh(mom*B/(kb*T)),5,n_rho,1,5,mom,1,5,B,1,5,kb,1,5,T,1,5,,,,,,,,,,,,,,,
-II.36.38,82,f,mom*H/(kb*T)+(mom*alpha)/(epsilon*c**2*kb*T)*M,8,mom,1,3,H,1,3,kb,1,3,T,1,3,alpha,1,3,epsilon,1,3,c,1,3,M,1,3,,,,,,
-II.37.1,83,E_n,mom*(1+chi)*B,6,mom,1,5,B,1,5,chi,1,5,,,,,,,,,,,,,,,,,,,,,
-II.38.3,84,F,Y*A*x/d,4,Y,1,5,A,1,5,d,1,5,x,1,5,,,,,,,,,,,,,,,,,,
-II.38.14,85,mu_S,Y/(2*(1+sigma)),2,Y,1,5,sigma,1,5,,,,,,,,,,,,,,,,,,,,,,,,
-III.4.32,86,n,1/(exp((h/(2*pi))*omega/(kb*T))-1),4,h,1,5,omega,1,5,kb,1,5,T,1,5,,,,,,,,,,,,,,,,,,
-III.4.33,87,E_n,(h/(2*pi))*omega/(exp((h/(2*pi))*omega/(kb*T))-1),4,h,1,5,omega,1,5,kb,1,5,T,1,5,,,,,,,,,,,,,,,,,,
-III.7.38,88,omega,2*mom*B/(h/(2*pi)),3,mom,1,5,B,1,5,h,1,5,,,,,,,,,,,,,,,,,,,,,
-III.8.54,89,prob,sin(E_n*t/(h/(2*pi)))**2,3,E_n,1,2,t,1,2,h,1,4,,,,,,,,,,,,,,,,,,,,,
-III.9.52,90,prob,(p_d*Ef*t/(h/(2*pi)))*sin((omega-omega_0)*t/2)**2/((omega-omega_0)*t/2)**2,6,p_d,1,3,Ef,1,3,t,1,3,h,1,3,omega,1,5,omega_0,1,5,,,,,,,,,,,,
-III.10.19,91,E_n,mom*sqrt(Bx**2+By**2+Bz**2),3,mom,1,5,Bx,1,5,By,1,5,Bz,1,5,,,,,,,,,,,,,,,,,,
-III.12.43,92,L,n*(h/(2*pi)),2,n,1,5,h,1,5,,,,,,,,,,,,,,,,,,,,,,,,
-III.13.18,93,v,2*E_n*d**2*k/(h/(2*pi)),4,E_n,1,5,d,1,5,k,1,5,h,1,5,,,,,,,,,,,,,,,,,,
-III.14.14,94,I,I_0*(exp(q*Volt/(kb*T))-1),5,I_0,1,5,q,1,2,Volt,1,2,kb,1,2,T,1,2,,,,,,,,,,,,,,,
-III.15.12,95,E_n,2*U*(1-cos(k*d)),3,U,1,5,k,1,5,d,1,5,,,,,,,,,,,,,,,,,,,,,
-III.15.14,96,m,(h/(2*pi))**2/(2*E_n*d**2),3,h,1,5,E_n,1,5,d,1,5,,,,,,,,,,,,,,,,,,,,,
-III.15.27,97,k,2*pi*alpha/(n*d),3,alpha,1,5,n,1,5,d,1,5,,,,,,,,,,,,,,,,,,,,,
-III.17.37,98,f,beta*(1+alpha*cos(theta)),3,beta,1,5,alpha,1,5,theta,1,5,,,,,,,,,,,,,,,,,,,,,
-III.19.51,99,E_n,-m*q**4/(2*(4*pi*epsilon)**2*(h/(2*pi))**2)*(1/n**2),4,m,1,5,q,1,5,h,1,5,n,1,5,epsilon,1,5,,,,,,,,,,,,,,,
-III.21.20,100,j,-rho_c_0*q*A_vec/m,4,rho_c_0,1,5,q,1,5,A_vec,1,5,m,1,5,,,,,,,,,,,,,,,,,,
-,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
-,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
-,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
-,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
-,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
-,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
-,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
-,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
-,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
-,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
-,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
-,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
-,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
-,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
-,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
-,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
-,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
-,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
-,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
-,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
-,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
-,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
-,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
-,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
-,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
-,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
-,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
-,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
-,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
-,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
--- a/opencompass/datasets/srbench/evaluate.py
+++ b/opencompass/datasets/srbench/evaluate.py
@ -1,330 +0,0 @@
-import os
-import re
-import pandas as pd
-import numpy as np
-import requests
-import json
-import sys
-import sympy as sp
-from sklearn.metrics import r2_score,root_mean_squared_error
-import torch
-from transformers import AutoTokenizer, AutoModelForCausalLM
-MLLM_claudeshop = {
-    'gpt-3.5': 'gpt-3.5-turbo',
-    'gpt-4o': 'chatgpt-4o-latest',
-    'gpt-4': 'gpt-4',
-    'gpt-o3': 'o3-mini',
-    'claude-3-7': 'claude-3-7-sonnet-20250219-thinking',
-    'Qwen-72b': 'qwen-72b',
-    'Qwen2.5':'qwen2.5-32b-instruct',
-    'Qwen-vl': 'qwen-vl-max',
-    'Gemini-1.5p': 'gemini-1.5-pro-latest',
-    'Gemini-2.0p': 'gemini-2.0-pro-exp-02-05',
-    'Gemini-2.5p': 'gemini-2.5-pro-exp-03-25',
-    'grok-2': 'grok-2',
-    'grok-3': 'grok-3',
-}
-
-MLLM_siliconflow = {
-    'deepseek-v3': 'deepseek-ai/DeepSeek-V3',
-    'deepseek-r1': 'Pro/deepseek-ai/DeepSeek-R1',
-    'QwQ-32b': 'Qwen/QwQ-32B',
-    'Qwen2.5-vl-72b': 'Qwen/Qwen2.5-VL-72B-Instruct',
-}
-
-MLLM_intern = {
-    'InternLM3-8B': 'internlm3-8b-instruct',
-    'InternVL3-78B': 'internvl2.5-78b',
-}
-
-MLLM_other = {
-    'MOE': 'MOE',
-}
-
-def _send_request(messages, mllm='4o'):
-    
-    if mllm in MLLM_claudeshop:
-        URL = f"your_url_here"  # Replace with the actual URL
-        API_KEY = "your_api_key_here"  # Replace with the actual API key
-        HEADERS = {
-            'Accept': 'application/json',
-            'Authorization': f'Bearer {API_KEY}',
-            'User-Agent': 'Apifox/1.0.0 (https://apifox.com)',
-            'Content-Type': 'application/json'
-        }
-        model = MLLM_claudeshop[mllm]
-    elif mllm in MLLM_siliconflow:
-        URL = f"your_url_here"  # Replace with the actual URL
-        API_KEY = "your_api_key_here"  # Replace with the actual API key
-        HEADERS = {
-            'Authorization': f'Bearer {API_KEY}',
-            'Content-Type': 'application/json'
-        }
-        model = MLLM_siliconflow[mllm]
-    elif mllm in MLLM_intern:
-        URL = f"your_url_here"  # Replace with the actual URL
-        API_KEY = "your_api_key_here"  # Replace with the actual API key
-        HEADERS = {
-            'Authorization': f'Bearer {API_KEY}',
-            'Content-Type': 'application/json'
-        }
-        model = MLLM_intern[mllm]
-    elif mllm in MLLM_other:
-        URL = f"your_url_here"  # Replace with the actual URL
-        API_KEY = "your_api_key_here"  # Replace with the actual API key
-        HEADERS = {
-            'Authorization': f'Bearer {API_KEY}',
-            'Content-Type': 'application/json'
-        }
-        model = MLLM_other[mllm]
-        
-        
-    count = 0
-    while True and count < 20:
-        count += 1
-        payload = json.dumps({
-            "model": model,
-            "messages": messages,
-            "temperature": 0.6,
-            "max_tokens": 50
-        })
-        session = requests.Session()
-        session.keep_alive = False
-        response = session.post(URL, headers=HEADERS, data=payload, verify=True)
-        try:
-            content = response.json()['choices'][0]['message']['content']
-            break
-        except:
-            content=None
-            pass
-    
-    return content
-    
-        
-
-
-def llm_formula(formula, var_list, mllm='gpt-4o'):
-    content = f'''
-        You are provided with a mathematical formula involving multiple variables. Your task is to rewrite this formula in the form of y=f(x0,x1,...).
-        The formula is as follows:
-        {formula}
-        The variables in the formula are denoted as: {', '.join(var_list)}.
-        Replace them in the order they appear with x0, x1, x2, ..., and replace the dependent variable with y.
-        Please output only the reformulated equation, in the form y=x0,x1,..., without any additional information.
-    '''
-    messages = [{"role": "user", "content": content}]
-    content = _send_request(messages, mllm=mllm)
-    return content
-
-
-def clean_formula_string(formula_str):
-    # 1. 删除 Markdown 残留符号
-    formula_str = formula_str.replace('×', '*').replace('·', '*').replace('÷', '/')
-    formula_str = formula_str.replace('−', '-').replace('^', '**')
-    formula_str = formula_str.replace('“', '"').replace('”', '"').replace('’', "'")
-
-    # 2. 去除 markdown 反引号 ``` 和 $ 符号
-    formula_str = formula_str.replace('`', '').replace('$', '').strip()
-
-    # 3. 提取第一行公式（防止有多行解释性输出）
-    formula_str = formula_str.split('\n')[0].strip()
-
-    # 4. 用正则去除非合法字符（保留基本数学表达式）
-    formula_str = re.sub(r'[^\w\s\+\-\*/\^\=\.\(\)]', '', formula_str)
-
-    # 5. 确保左右去空格
-    return formula_str.strip()
-
-def llm_evaluate(inferred_formula, true_formula, mllm='gpt-4o'):
-    content = f'''
-        You are given two mathematical formulas. Your task is to evaluate how structurally similar they are, and return a similarity score between 0 and 1.
-
-        The score should reflect how closely the formulas match in terms of:
-        - Mathematical operations and structure (e.g., same use of +, *, sin, etc.)
-        - Term arrangement and complexity
-        - Overall symbolic expression and intent
-
-        A score of:
-        - 1 means the formulas are structurally identical or mathematically equivalent
-        - Around 0.8-0.9 means they are very similar but not identical
-        - Around 0.5 means moderately similar (e.g., same overall shape but different terms)
-        - Near 0 means structurally unrelated formulas
-
-        Do not consider numerical evaluation or specific input values — only the symbolic structure and mathematical form.
-
-        Formulas:
-        Inferred Formula: {inferred_formula}
-        True Formula: {true_formula}
-
-        ONLY RETURN [THE SIMILARITY SCORE]
-    '''
-    messages = [{"role": "user", "content": content}]
-    similarity_score = _send_request(messages, mllm=mllm)
-    return similarity_score[-4:]
-
-def llm_translate(dirty_formula, mllm='gpt-4o'):
-    content = f'''
-        This is a language model's judgment on a mathematical formula. Please help me extract the mathematical formula from this judgment and return it:
-        {dirty_formula}
-        Please serve pi as pi and use x0, x1, x2,... to represent the variable names.
-        ONLY RETURN THE FORMULA STRING (Not LATEX).
-    '''
-    messages = [{"role": "user", "content": content}]
-    clean_formula = _send_request(messages, mllm=mllm)
-    return clean_formula
-
-def is_symbolically_equivalent(formula1, formula2, n_var=2):
-    try:
-        x = [sp.Symbol(f'x{i}') for i in range(n_var)]
-
-        expr1 = sp.sympify(formula1.split('=')[1] if '=' in formula1 else formula1)
-        expr2 = sp.sympify(formula2.split('=')[1] if '=' in formula2 else formula2)
-
-        return sp.simplify(expr1 - expr2) == 0
-    except Exception:
-        return False
-
-def parse_formula(formula_str, n_var=2):
-    try:
-        if '=' in formula_str:
-            _, expr_str = formula_str.split('=', 1)
-        else:
-            expr_str = formula_str
-        variables = [sp.Symbol(f'x{i}') for i in range(n_var)]
-        expr = sp.sympify(expr_str)
-        func = sp.lambdify(variables, expr, modules='numpy')
-        return func
-    except Exception as e:
-        print(f'[Parse Error] {formula_str}\n{e}')
-        return None
-
-def evaluate_formula_metrics(formula_str, true_formula, x, y_true, n_var=2, mllm='gpt-4o'):
-    metrics = {
-        'LLM_Score': None,
-        'RMSE': None,
-        'SymbolicMatch': False,
-        'R2': -100000.0
-    }
-
-    # 结构评分（用 LLM）
-    metrics['LLM_Score'] = llm_evaluate(formula_str, true_formula, mllm=mllm)
-
-    # 数值拟合
-    func = parse_formula(formula_str, n_var)
-    if func is not None:
-        try:
-            x_vars = [x[:, i] for i in range(n_var)]
-            y_pred = func(*x_vars)
-            if np.isscalar(y_pred):
-                y_pred = np.full_like(y_true, y_pred)
-            metrics['RMSE'] = root_mean_squared_error(y_true, y_pred)
-            metrics['R2'] = r2_score(y_true, y_pred)
-        except Exception:
-            pass
-
-    # 判断方程等价性
-    metrics['SymbolicMatch'] = is_symbolically_equivalent(formula_str, true_formula, n_var)
-
-    return metrics
-
-
-
-mllm = 'gpt-4o'
-sample_num = 100
-n_var = 2
-
-os.makedirs(f'{n_var}d/', exist_ok=True)
-for seed_idx in [1]:
-    try:
-        formula_2d = pd.read_csv(f'{n_var}d/Feynman_{n_var}d.csv')
-    except:
-        formula_2d = pd.DataFrame(columns=['Formula', 'Filename', 'n_variables'])
-
-        collect = pd.read_csv('Feynman/FeynmanEquations.csv')
-        try:
-            for index, row in collect.iterrows():
-                file_path = f'Feynman/Feynman_with_units/' + str(row['Filename'])
-                formula = row['Formula']
-                n_variables = int(row['# variables'])
-                
-                if n_variables == n_var:
-                    try:
-                        dataset = np.loadtxt(file_path)
-                    except:
-                        continue
-                    if dataset.shape[1] == n_variables + 1:
-                        var_list = [row[f'v{var_idx+1}_name'] for var_idx in range(n_variables)]
-                        new_formula = llm_formula(formula, var_list)
-                        print(index, formula, '——>', new_formula)
-                    else:
-                        continue
-                    formula_2d = formula_2d._append({'Formula': new_formula, 'Filename': row['Filename'], 'n_variables': n_variables}, ignore_index=True)
-        except Exception as e:
-            print(e)
-
-        formula_2d.to_csv(f'{n_var}d/Feynman_{n_var}d.csv', index=False)
-    
-    try:
-        result = pd.read_csv(f'{n_var}d/Feynman_{n_var}d_s{sample_num}_{mllm}.csv')
-    except:
-        result = pd.DataFrame({
-            'Index': pd.Series(dtype=int),
-            'GT': pd.Series(dtype=str),
-            'Pred': pd.Series(dtype=str),
-            'Score': pd.Series(dtype=float),
-            'RMSE': pd.Series(dtype=float),
-            'R2': pd.Series(dtype=float),
-            'SymbolicMatch': pd.Series(dtype=bool)
-        })
-
-    for index, row in formula_2d.iterrows():
-        true_formula = row['Formula']
-        file_path = f'Feynman/Feynman_with_units/' + str(row['Filename'])
-        dataset = np.loadtxt(file_path)
-        rand_idx = np.random.choice(dataset.shape[0], sample_num, replace=False)
-        dataset = dataset[rand_idx]
-        x = dataset[:, :n_var]
-        y_true = dataset[:, -1]
-        
-        data_samples = '\n'.join([f'x0={x1:.4f}, x1={x2:.4f}, y={y:.4f}' for x1, x2, y in dataset[:-1]])
-        content = f'''
-            You will be provided with a set of input-output pairs. Based on these data, infer the mathematical relationship between y and multiple input variables. Please note that the possible mathematical operations include: +, -, *, /, exp, sqrt, sin, arcsin, and constant terms.
-            The input sample data are as follows:
-            {data_samples}
-            Based on the above data, please infer the possible formula. Ensure that your inference applies to all the provided data points, and consider both linear and nonlinear combinations.
-            Verify whether your formula applies to the following new data point and adjust it to ensure accuracy:
-            {f'x0={dataset[-1, 0]:.4f}, x1={dataset[-1, 1]:.4f}, y={dataset[-1, 2]:.4f}'}
-            Finally, please output only the formula string you inferred (e.g. z=x_0 * x_1), without any additional information.
-        '''
-        messages = [{"role": "user", "content": content}]
-
-        infer_formula  = _send_request(messages, mllm=mllm)
-        infer_formula  = llm_translate(infer_formula, mllm='gpt-4o') 
-        infer_formula  = clean_formula_string(infer_formula)
-        metrics = evaluate_formula_metrics(infer_formula, true_formula, x, y_true, n_var=n_var, mllm='gpt-4o')
-        
-        print(f'GT: {true_formula.ljust(40)} | Pred: {infer_formula.ljust(40)} | Score: {metrics["LLM_Score"]} | RMSE: {metrics["RMSE"]} | R2: {metrics["R2"]} | Match: {metrics["SymbolicMatch"]}')
-        result = result._append({
-            'Index': seed_idx,
-            'GT': true_formula,
-            'Pred': infer_formula,
-            'Score': metrics['LLM_Score'],
-            'RMSE': metrics['RMSE'],
-            'R2': metrics['R2'],
-            'SymbolicMatch': bool(metrics['SymbolicMatch'])
-        }, ignore_index=True)
-
-    result.to_csv(f'{n_var}d/Feynman_{n_var}d_s{sample_num}_{mllm}.csv', index=False)
-    if not result.empty:
-        symbolic_accuracy = result['SymbolicMatch'].sum() / len(result)
-        print(f'\model: {mllm},sample_nums: {sample_num},symbolic_accuracy: {symbolic_accuracy:.4f}')
-    else:
-        symbolic_accuracy = 0
-    csv_filepath = f'{n_var}d/Feynman_{n_var}d_s{sample_num}_{mllm}.csv'
-    result.to_csv(csv_filepath, index=False)
-
-    with open(csv_filepath, 'a', encoding='utf-8') as f:
-        f.write("symbolic_accuracy:"+f'{symbolic_accuracy:.4f}')
-        f.write(f"AverageR2,{average_r2:.4f}\n")
-
-    
--- a/opencompass/openicl/icl_prompt_template.py
+++ b/opencompass/openicl/icl_prompt_template.py
@ -50,7 +50,6 @@ class PromptTemplate:
                      for key in ('begin', 'round', 'end'))
            self.prompt_type = 'meta' if ctr == len(
                self.template.keys()) else 'origin'
-
            # Check if token exists in values of tp_dict
            for tp_dict_val in self.template.values():
                if not isinstance(tp_dict_val, (str, list, dict)):
--- a/opencompass/utils/datasets.py
+++ b/opencompass/utils/datasets.py
@ -7,7 +7,7 @@ USER_HOME = os.path.expanduser("~")
 DEFAULT_DATA_FOLDER = os.path.join(USER_HOME, '.cache/opencompass/')


-def get_data_path(dataset_id: str, local_mode: bool = False):
+def get_data_path(dataset_id: str, local_mode: bool = True):
    """return dataset id when getting data from ModelScope/HuggingFace repo, otherwise just
    return local path as is.

--- a/opencompass/utils/datasets_info.py
+++ b/opencompass/utils/datasets_info.py
@ -466,6 +466,11 @@ DATASETS_MAPPING = {
        "hf_id": "",
        "local": "./data/medbullets/medbullets.csv",
    },
+    "opencompass/srbench": {
+        "ms_id": "",
+        "hf_id": "",
+        "local": "url_to_srebnch_dataset",
+    } 

 }