mirror of
https://github.com/open-compass/opencompass.git
synced 2025-05-30 16:03:24 +08:00
add 2
This commit is contained in:
parent
e59f0b59cb
commit
d4a69ba65f
58
opencompass/configs/datasets/srbench/srbench_gen.py
Normal file
58
opencompass/configs/datasets/srbench/srbench_gen.py
Normal file
@ -0,0 +1,58 @@
|
||||
from opencompass.openicl.icl_prompt_template import PromptTemplate
|
||||
from opencompass.openicl.icl_retriever import ZeroRetriever
|
||||
from opencompass.openicl.icl_inferencer import GenInferencer
|
||||
from opencompass.datasets import (
|
||||
SRbenchDataset,SRbenchDatasetEvaluator
|
||||
)
|
||||
|
||||
from opencompass.evaluator import GenericLLMEvaluator
|
||||
|
||||
|
||||
|
||||
INFER_TEMPLATE = f'''
|
||||
You will be provided with a set of input-output pairs. Based on these data, infer the mathematical relationship between y and multiple input variables. Please note that the possible mathematical operations include: +, -, *, /, exp, sqrt, sin, arcsin, and constant terms.
|
||||
The input sample data are as follows:
|
||||
{{prompt1}}
|
||||
Based on the above data, please infer the possible formula. Ensure that your inference applies to all the provided data points, and consider both linear and nonlinear combinations.
|
||||
Verify whether your formula applies to the following new data point and adjust it to ensure accuracy:
|
||||
{{prompt2}}
|
||||
Finally, please output only the formula string you inferred (e.g. y=x_0 * x_1), without any additional information.
|
||||
'''
|
||||
|
||||
srbench_reader_cfg = dict(input_columns=["prompt1","prompt2"], output_column='Formula')
|
||||
|
||||
srbench_datasets = []
|
||||
|
||||
srbench_infer_cfg = dict(
|
||||
prompt_template=dict(
|
||||
type=PromptTemplate,
|
||||
template=dict(
|
||||
round=[
|
||||
dict(
|
||||
role='HUMAN',
|
||||
prompt=INFER_TEMPLATE)
|
||||
]
|
||||
),
|
||||
),
|
||||
retriever=dict(type=ZeroRetriever),
|
||||
inferencer=dict(type=GenInferencer),
|
||||
)
|
||||
|
||||
|
||||
srbench_eval_cfg = dict(
|
||||
evaluator=dict(type=SRbenchDatasetEvaluator),
|
||||
path="opencompass/srbench",
|
||||
pred_role='BOT',
|
||||
)
|
||||
|
||||
srbench_datasets.append(
|
||||
dict(
|
||||
abbr='srbench',
|
||||
type=SRbenchDataset,
|
||||
path='opencompass/srbench',
|
||||
reader_cfg=srbench_reader_cfg,
|
||||
infer_cfg=srbench_infer_cfg,
|
||||
eval_cfg=srbench_eval_cfg,
|
||||
)
|
||||
)
|
||||
|
@ -92,7 +92,7 @@ from .longbenchv2 import * # noqa: F401, F403
|
||||
from .lveval import * # noqa: F401, F403
|
||||
from .mastermath2024v1 import * # noqa: F401, F403
|
||||
from .matbench import * # noqa: F401, F403
|
||||
from .math import * # noqa: F401, F403
|
||||
from .math2 import * # noqa: F401, F403
|
||||
from .math401 import * # noqa: F401, F403
|
||||
from .math_intern import * # noqa: F401, F403
|
||||
from .mathbench import * # noqa: F401, F403
|
||||
@ -144,6 +144,7 @@ from .simpleqa import * # noqa: F401, F403
|
||||
from .siqa import * # noqa: F401, F403
|
||||
from .smolinstruct import * # noqa: F401, F403
|
||||
from .squad20 import SQuAD20Dataset, SQuAD20Evaluator # noqa: F401, F403
|
||||
from .srbench import *
|
||||
from .storycloze import * # noqa: F401, F403
|
||||
from .strategyqa import * # noqa: F401, F403
|
||||
from .subjective import * # noqa: F401, F403
|
||||
@ -170,3 +171,4 @@ from .xcopa import * # noqa: F401, F403
|
||||
from .xiezhi import XiezhiDataset, XiezhiRetriever # noqa: F401, F403
|
||||
from .xlsum import * # noqa: F401, F403
|
||||
from .xsum import * # noqa: F401, F403
|
||||
|
||||
|
@ -71,7 +71,7 @@ class BigCodeBenchEvaluator(BaseEvaluator):
|
||||
self,
|
||||
release_version='v0.1.2',
|
||||
eval_type='instruct',
|
||||
remote_execute_api='https://bigcode-bigcodebench-evaluator.hf.space/', # noqa
|
||||
remote_execute_api='https://bigcode-bigcodebench-evaluator.hf.space/',
|
||||
dataset_version: str = 'full',
|
||||
local_mode: bool = False,
|
||||
path: str = 'opencompass/bigcodebench',
|
||||
|
@ -10,7 +10,7 @@ import mmengine
|
||||
import numpy as np
|
||||
from datasets import Dataset, load_dataset
|
||||
|
||||
from opencompass.datasets.math import MATHAgentEvaluator, math_postprocess_v2
|
||||
from opencompass.datasets.math2 import MATHAgentEvaluator, math_postprocess_v2
|
||||
from opencompass.models import OpenAISDK
|
||||
from opencompass.openicl.icl_evaluator import BaseEvaluator
|
||||
from opencompass.openicl.icl_inferencer.icl_base_inferencer import \
|
||||
|
@ -1,16 +0,0 @@
|
||||
Formula,Filename,n_variables
|
||||
y = exp(-(x1/x0)**2/2) / (sqrt(2*pi)*x0),I.6.2,2
|
||||
y = x0 * x1,I.12.1,2
|
||||
y = x0 * x1,I.12.5,2
|
||||
y = 1/2 * x0 * x1**2,I.14.4,2
|
||||
y = x0 / x1,I.25.13,2
|
||||
y = arcsin(x0 * sin(x1)),I.26.2,2
|
||||
y = x0 / x1,I.29.4,2
|
||||
y = (x1 / (2 * pi)) * x0,I.34.27,2
|
||||
y = (3/2) * x0 * x1,I.39.1,2
|
||||
y = x0 / (4 * pi * x1**2),II.3.24,2
|
||||
y = x0 * x1**2 / 2,II.8.31,2
|
||||
y = 1 + x0 * x1 / (1 - (x0 * x1 / 3)),II.11.28,2
|
||||
y = x0 * x1**2,II.27.18,2
|
||||
y = x0 / (2 * (1 + x1)),II.38.14,2
|
||||
y = x0 * (x1 / (2 * pi)),III.12.43,2
|
|
@ -1,4 +1,19 @@
|
||||
Formula,Filename,n_variables
|
||||
y = exp(-(x1/x0)**2/2) / (sqrt(2*pi)*x0),I.6.2,2
|
||||
y = x0 * x1,I.12.1,2
|
||||
y = x0 * x1,I.12.5,2
|
||||
y = 1/2 * x0 * x1**2,I.14.4,2
|
||||
y = x0 / x1,I.25.13,2
|
||||
y = arcsin(x0 * sin(x1)),I.26.2,2
|
||||
y = x0 / x1,I.29.4,2
|
||||
y = (x1 / (2 * pi)) * x0,I.34.27,2
|
||||
y = (3/2) * x0 * x1,I.39.1,2
|
||||
y = x0 / (4 * pi * x1**2),II.3.24,2
|
||||
y = x0 * x1**2 / 2,II.8.31,2
|
||||
y = 1 + x0 * x1 / (1 - (x0 * x1 / 3)),II.11.28,2
|
||||
y = x0 * x1**2,II.27.18,2
|
||||
y = x0 / (2 * (1 + x1)),II.38.14,2
|
||||
y = x0 * (x1 / (2 * pi)),III.12.43,2
|
||||
y = exp(-((x1 - x2) / x0)**2 / 2) / (sqrt(2 * pi) * x0),I.6.2b,3
|
||||
y = x0 / sqrt(1 - x1**2 / x2**2),I.10.7,3
|
||||
y = x0*x2/(4*pi*x1*x2**3),I.12.4,3
|
|
@ -1,131 +0,0 @@
|
||||
Filename,Number,Output,Formula,# variables,v1_name,v1_low,v1_high,v2_name,v2_low,v2_high,v3_name,v3_low,v3_high,v4_name,v4_low,v4_high,v5_name,v5_low,v5_high,v6_name,v6_low,v6_high,v7_name,v7_low,v7_high,v8_name,v8_low,v8_high,v9_name,v9_low,v9_high,v10_name,v10_low,v10_high
|
||||
I.6.2a,1,f,exp(-theta**2/2)/sqrt(2*pi),1,theta,1,3,,,,,,,,,,,,,,,,,,,,,,,,,,,
|
||||
I.6.2,2,f,exp(-(theta/sigma)**2/2)/(sqrt(2*pi)*sigma),2,sigma,1,3,theta,1,3,,,,,,,,,,,,,,,,,,,,,,,,
|
||||
I.6.2b,3,f,exp(-((theta-theta1)/sigma)**2/2)/(sqrt(2*pi)*sigma),3,sigma,1,3,theta,1,3,theta1,1,3,,,,,,,,,,,,,,,,,,,,,
|
||||
I.8.14,4,d,sqrt((x2-x1)**2+(y2-y1)**2),4,x1,1,5,x2,1,5,y1,1,5,y2,1,5,,,,,,,,,,,,,,,,,,
|
||||
I.9.18,5,F,G*m1*m2/((x2-x1)**2+(y2-y1)**2+(z2-z1)**2),9,m1,1,2,m2,1,2,G,1,2,x1,3,4,x2,1,2,y1,3,4,y2,1,2,z1,3,4,z2,1,2,,,
|
||||
I.10.7,6,m,m_0/sqrt(1-v**2/c**2),3,m_0,1,5,v,1,2,c,3,10,,,,,,,,,,,,,,,,,,,,,
|
||||
I.11.19,7,A,x1*y1+x2*y2+x3*y3,6,x1,1,5,x2,1,5,x3,1,5,y1,1,5,y2,1,5,y3,1,5,,,,,,,,,,,,
|
||||
I.12.1,8,F,mu*Nn,2,mu,1,5,Nn,1,5,,,,,,,,,,,,,,,,,,,,,,,,
|
||||
I.12.2,10,F,q1*q2*r/(4*pi*epsilon*r**3),4,q1,1,5,q2,1,5,epsilon,1,5,r,1,5,,,,,,,,,,,,,,,,,,
|
||||
I.12.4,11,Ef,q1*r/(4*pi*epsilon*r**3),3,q1,1,5,epsilon,1,5,r,1,5,,,,,,,,,,,,,,,,,,,,,
|
||||
I.12.5,12,F,q2*Ef,2,q2,1,5,Ef,1,5,,,,,,,,,,,,,,,,,,,,,,,,
|
||||
I.12.11,13,F,q*(Ef+B*v*sin(theta)),5,q,1,5,Ef,1,5,B,1,5,v,1,5,theta,1,5,,,,,,,,,,,,,,,
|
||||
I.13.4,9,K,1/2*m*(v**2+u**2+w**2),4,m,1,5,v,1,5,u,1,5,w,1,5,,,,,,,,,,,,,,,,,,
|
||||
I.13.12,14,U,G*m1*m2*(1/r2-1/r1),5,m1,1,5,m2,1,5,r1,1,5,r2,1,5,G,1,5,,,,,,,,,,,,,,,
|
||||
I.14.3,15,U,m*g*z,3,m,1,5,g,1,5,z,1,5,,,,,,,,,,,,,,,,,,,,,
|
||||
I.14.4,16,U,1/2*k_spring*x**2,2,k_spring,1,5,x,1,5,,,,,,,,,,,,,,,,,,,,,,,,
|
||||
I.15.3x,17,x1,(x-u*t)/sqrt(1-u**2/c**2),4,x,5,10,u,1,2,c,3,20,t,1,2,,,,,,,,,,,,,,,,,,
|
||||
I.15.3t,18,t1,(t-u*x/c**2)/sqrt(1-u**2/c**2),4,x,1,5,c,3,10,u,1,2,t,1,5,,,,,,,,,,,,,,,,,,
|
||||
I.15.1,19,p,m_0*v/sqrt(1-v**2/c**2),3,m_0,1,5,v,1,2,c,3,10,,,,,,,,,,,,,,,,,,,,,
|
||||
I.16.6,20,v1,(u+v)/(1+u*v/c**2),3,c,1,5,v,1,5,u,1,5,,,,,,,,,,,,,,,,,,,,,
|
||||
I.18.4,21,r,(m1*r1+m2*r2)/(m1+m2),4,m1,1,5,m2,1,5,r1,1,5,r2,1,5,,,,,,,,,,,,,,,,,,
|
||||
I.18.12,22,tau,r*F*sin(theta),2,r,1,5,F,1,5,theta,0,5,,,,,,,,,,,,,,,,,,,,,
|
||||
I.18.14,23,L,m*r*v*sin(theta),3,m,1,5,r,1,5,v,1,5,theta,1,5,,,,,,,,,,,,,,,,,,
|
||||
I.24.6,24,E_n,1/2*m*(omega**2+omega_0**2)*1/2*x**2,4,m,1,3,omega,1,3,omega_0,1,3,x,1,3,,,,,,,,,,,,,,,,,,
|
||||
I.25.13,25,Volt,q/C,2,q,1,5,C,1,5,,,,,,,,,,,,,,,,,,,,,,,,
|
||||
I.26.2,26,theta1,arcsin(n*sin(theta2)),2,n,0,1,theta2,1,5,,,,,,,,,,,,,,,,,,,,,,,,
|
||||
I.27.6,27,foc,1/(1/d1+n/d2),3,d1,1,5,d2,1,5,n,1,5,,,,,,,,,,,,,,,,,,,,,
|
||||
I.29.4,28,k,omega/c,2,omega,1,10,c,1,10,,,,,,,,,,,,,,,,,,,,,,,,
|
||||
I.29.16,29,x,sqrt(x1**2+x2**2-2*x1*x2*cos(theta1-theta2)),4,x1,1,5,x2,1,5,theta1,1,5,theta2,1,5,,,,,,,,,,,,,,,,,,
|
||||
I.30.3,30,Int,Int_0*sin(n*theta/2)**2/sin(theta/2)**2,3,Int_0,1,5,theta,1,5,n,1,5,,,,,,,,,,,,,,,,,,,,,
|
||||
I.30.5,31,theta,arcsin(lambd/(n*d)),3,lambd,1,2,d,2,5,n,1,5,,,,,,,,,,,,,,,,,,,,,
|
||||
I.32.5,32,Pwr,q**2*a**2/(6*pi*epsilon*c**3),4,q,1,5,a,1,5,epsilon,1,5,c,1,5,,,,,,,,,,,,,,,,,,
|
||||
I.32.17,33,Pwr,(1/2*epsilon*c*Ef**2)*(8*pi*r**2/3)*(omega**4/(omega**2-omega_0**2)**2),6,epsilon,1,2,c,1,2,Ef,1,2,r,1,2,omega,1,2,omega_0,3,5,,,,,,,,,,,,
|
||||
I.34.8,34,omega,q*v*B/p,4,q,1,5,v,1,5,B,1,5,p,1,5,,,,,,,,,,,,,,,,,,
|
||||
I.34.1,35,omega,omega_0/(1-v/c),3,c,3,10,v,1,2,omega_0,1,5,,,,,,,,,,,,,,,,,,,,,
|
||||
I.34.14,36,omega,(1+v/c)/sqrt(1-v**2/c**2)*omega_0,3,c,3,10,v,1,2,omega_0,1,5,,,,,,,,,,,,,,,,,,,,,
|
||||
I.34.27,37,E_n,(h/(2*pi))*omega,2,omega,1,5,h,1,5,,,,,,,,,,,,,,,,,,,,,,,,
|
||||
I.37.4,38,Int,I1+I2+2*sqrt(I1*I2)*cos(delta),3,I1,1,5,I2,1,5,delta,1,5,,,,,,,,,,,,,,,,,,,,,
|
||||
I.38.12,39,r,4*pi*epsilon*(h/(2*pi))**2/(m*q**2),3,m,1,5,q,1,5,h,1,5,epsilon,1,5,,,,,,,,,,,,,,,,,,
|
||||
I.39.1,40,E_n,3/2*pr*V,2,pr,1,5,V,1,5,,,,,,,,,,,,,,,,,,,,,,,,
|
||||
I.39.11,41,E_n,1/(gamma-1)*pr*V,3,gamma,2,5,pr,1,5,V,1,5,,,,,,,,,,,,,,,,,,,,,
|
||||
I.39.22,42,pr,n*kb*T/V,4,n,1,5,T,1,5,V,1,5,kb,1,5,,,,,,,,,,,,,,,,,,
|
||||
I.40.1,43,n,n_0*exp(-m*g*x/(kb*T)),6,n_0,1,5,m,1,5,x,1,5,T,1,5,g,1,5,kb,1,5,,,,,,,,,,,,
|
||||
I.41.16,44,L_rad,h/(2*pi)*omega**3/(pi**2*c**2*(exp((h/(2*pi))*omega/(kb*T))-1)),5,omega,1,5,T,1,5,h,1,5,kb,1,5,c,1,5,,,,,,,,,,,,,,,
|
||||
I.43.16,45,v,mu_drift*q*Volt/d,4,mu_drift,1,5,q,1,5,Volt,1,5,d,1,5,,,,,,,,,,,,,,,,,,
|
||||
I.43.31,46,D,mob*kb*T,3,mob,1,5,T,1,5,kb,1,5,,,,,,,,,,,,,,,,,,,,,
|
||||
I.43.43,47,kappa,1/(gamma-1)*kb*v/A,4,gamma,2,5,kb,1,5,A,1,5,v,1,5,,,,,,,,,,,,,,,,,,
|
||||
I.44.4,48,E_n,n*kb*T*ln(V2/V1),5,n,1,5,kb,1,5,T,1,5,V1,1,5,V2,1,5,,,,,,,,,,,,,,,
|
||||
I.47.23,49,c,sqrt(gamma*pr/rho),3,gamma,1,5,pr,1,5,rho,1,5,,,,,,,,,,,,,,,,,,,,,
|
||||
I.48.2,50,E_n,m*c**2/sqrt(1-v**2/c**2),3,m,1,5,v,1,2,c,3,10,,,,,,,,,,,,,,,,,,,,,
|
||||
I.50.26,51,x,x1*(cos(omega*t)+alpha*cos(omega*t)**2),4,x1,1,3,omega,1,3,t,1,3,alpha,1,3,,,,,,,,,,,,,,,,,,
|
||||
II.2.42,52,Pwr,kappa*(T2-T1)*A/d,5,kappa,1,5,T1,1,5,T2,1,5,A,1,5,d,1,5,,,,,,,,,,,,,,,
|
||||
II.3.24,53,flux,Pwr/(4*pi*r**2),2,Pwr,1,5,r,1,5,,,,,,,,,,,,,,,,,,,,,,,,
|
||||
II.4.23,54,Volt,q/(4*pi*epsilon*r),3,q,1,5,epsilon,1,5,r,1,5,,,,,,,,,,,,,,,,,,,,,
|
||||
II.6.11,55,Volt,1/(4*pi*epsilon)*p_d*cos(theta)/r**2,4,epsilon,1,3,p_d,1,3,theta,1,3,r,1,3,,,,,,,,,,,,,,,,,,
|
||||
II.6.15a,56,Ef,p_d/(4*pi*epsilon)*3*z/r**5*sqrt(x**2+y**2),6,epsilon,1,3,p_d,1,3,r,1,3,x,1,3,y,1,3,z,1,3,,,,,,,,,,,,
|
||||
II.6.15b,57,Ef,p_d/(4*pi*epsilon)*3*cos(theta)*sin(theta)/r**3,4,epsilon,1,3,p_d,1,3,theta,1,3,r,1,3,,,,,,,,,,,,,,,,,,
|
||||
II.8.7,58,E_n,3/5*q**2/(4*pi*epsilon*d),3,q,1,5,epsilon,1,5,d,1,5,,,,,,,,,,,,,,,,,,,,,
|
||||
II.8.31,59,E_den,epsilon*Ef**2/2,2,epsilon,1,5,Ef,1,5,,,,,,,,,,,,,,,,,,,,,,,,
|
||||
II.10.9,60,Ef,sigma_den/epsilon*1/(1+chi),3,sigma_den,1,5,epsilon,1,5,chi,1,5,,,,,,,,,,,,,,,,,,,,,
|
||||
II.11.3,61,x,q*Ef/(m*(omega_0**2-omega**2)),5,q,1,3,Ef,1,3,m,1,3,omega_0,3,5,omega,1,2,,,,,,,,,,,,,,,
|
||||
II.11.17,62,n,n_0*(1+p_d*Ef*cos(theta)/(kb*T)),6,n_0,1,3,kb,1,3,T,1,3,theta,1,3,p_d,1,3,Ef,1,3,,,,,,,,,,,,
|
||||
II.11.20,63,Pol,n_rho*p_d**2*Ef/(3*kb*T),5,n_rho,1,5,p_d,1,5,Ef,1,5,kb,1,5,T,1,5,,,,,,,,,,,,,,,
|
||||
II.11.27,64,Pol,n*alpha/(1-(n*alpha/3))*epsilon*Ef,4,n,0,1,alpha,0,1,epsilon,1,2,Ef,1,2,,,,,,,,,,,,,,,,,,
|
||||
II.11.28,65,theta,1+n*alpha/(1-(n*alpha/3)),2,n,0,1,alpha,0,1,,,,,,,,,,,,,,,,,,,,,,,,
|
||||
II.13.17,66,B,1/(4*pi*epsilon*c**2)*2*I/r,4,epsilon,1,5,c,1,5,I,1,5,r,1,5,,,,,,,,,,,,,,,,,,
|
||||
II.13.23,67,rho_c,rho_c_0/sqrt(1-v**2/c**2),3,rho_c_0,1,5,v,1,2,c,3,10,,,,,,,,,,,,,,,,,,,,,
|
||||
II.13.34,68,j,rho_c_0*v/sqrt(1-v**2/c**2),3,rho_c_0,1,5,v,1,2,c,3,10,,,,,,,,,,,,,,,,,,,,,
|
||||
II.15.4,69,E_n,-mom*B*cos(theta),3,mom,1,5,B,1,5,theta,1,5,,,,,,,,,,,,,,,,,,,,,
|
||||
II.15.5,70,E_n,-p_d*Ef*cos(theta),3,p_d,1,5,Ef,1,5,theta,1,5,,,,,,,,,,,,,,,,,,,,,
|
||||
II.21.32,71,Volt,q/(4*pi*epsilon*r*(1-v/c)),5,q,1,5,epsilon,1,5,r,1,5,v,1,2,c,3,10,,,,,,,,,,,,,,,
|
||||
II.24.17,72,k,sqrt(omega**2/c**2-pi**2/d**2),3,omega,4,6,c,1,2,d,2,4,,,,,,,,,,,,,,,,,,,,,
|
||||
II.27.16,73,flux,epsilon*c*Ef**2,3,epsilon,1,5,c,1,5,Ef,1,5,,,,,,,,,,,,,,,,,,,,,
|
||||
II.27.18,74,E_den,epsilon*Ef**2,2,epsilon,1,5,Ef,1,5,,,,,,,,,,,,,,,,,,,,,,,,
|
||||
II.34.2a,75,I,q*v/(2*pi*r),3,q,1,5,v,1,5,r,1,5,,,,,,,,,,,,,,,,,,,,,
|
||||
II.34.2,76,mom,q*v*r/2,3,q,1,5,v,1,5,r,1,5,,,,,,,,,,,,,,,,,,,,,
|
||||
II.34.11,77,omega,g_*q*B/(2*m),4,g_,1,5,q,1,5,B,1,5,m,1,5,,,,,,,,,,,,,,,,,,
|
||||
II.34.29a,78,mom,q*h/(4*pi*m),3,q,1,5,h,1,5,m,1,5,,,,,,,,,,,,,,,,,,,,,
|
||||
II.34.29b,79,E_n,g_*mom*B*Jz/(h/(2*pi)),5,g_,1,5,h,1,5,Jz,1,5,mom,1,5,B,1,5,,,,,,,,,,,,,,,
|
||||
II.35.18,80,n,n_0/(exp(mom*B/(kb*T))+exp(-mom*B/(kb*T))),5,n_0,1,3,kb,1,3,T,1,3,mom,1,3,B,1,3,,,,,,,,,,,,,,,
|
||||
II.35.21,81,M,n_rho*mom*tanh(mom*B/(kb*T)),5,n_rho,1,5,mom,1,5,B,1,5,kb,1,5,T,1,5,,,,,,,,,,,,,,,
|
||||
II.36.38,82,f,mom*H/(kb*T)+(mom*alpha)/(epsilon*c**2*kb*T)*M,8,mom,1,3,H,1,3,kb,1,3,T,1,3,alpha,1,3,epsilon,1,3,c,1,3,M,1,3,,,,,,
|
||||
II.37.1,83,E_n,mom*(1+chi)*B,6,mom,1,5,B,1,5,chi,1,5,,,,,,,,,,,,,,,,,,,,,
|
||||
II.38.3,84,F,Y*A*x/d,4,Y,1,5,A,1,5,d,1,5,x,1,5,,,,,,,,,,,,,,,,,,
|
||||
II.38.14,85,mu_S,Y/(2*(1+sigma)),2,Y,1,5,sigma,1,5,,,,,,,,,,,,,,,,,,,,,,,,
|
||||
III.4.32,86,n,1/(exp((h/(2*pi))*omega/(kb*T))-1),4,h,1,5,omega,1,5,kb,1,5,T,1,5,,,,,,,,,,,,,,,,,,
|
||||
III.4.33,87,E_n,(h/(2*pi))*omega/(exp((h/(2*pi))*omega/(kb*T))-1),4,h,1,5,omega,1,5,kb,1,5,T,1,5,,,,,,,,,,,,,,,,,,
|
||||
III.7.38,88,omega,2*mom*B/(h/(2*pi)),3,mom,1,5,B,1,5,h,1,5,,,,,,,,,,,,,,,,,,,,,
|
||||
III.8.54,89,prob,sin(E_n*t/(h/(2*pi)))**2,3,E_n,1,2,t,1,2,h,1,4,,,,,,,,,,,,,,,,,,,,,
|
||||
III.9.52,90,prob,(p_d*Ef*t/(h/(2*pi)))*sin((omega-omega_0)*t/2)**2/((omega-omega_0)*t/2)**2,6,p_d,1,3,Ef,1,3,t,1,3,h,1,3,omega,1,5,omega_0,1,5,,,,,,,,,,,,
|
||||
III.10.19,91,E_n,mom*sqrt(Bx**2+By**2+Bz**2),3,mom,1,5,Bx,1,5,By,1,5,Bz,1,5,,,,,,,,,,,,,,,,,,
|
||||
III.12.43,92,L,n*(h/(2*pi)),2,n,1,5,h,1,5,,,,,,,,,,,,,,,,,,,,,,,,
|
||||
III.13.18,93,v,2*E_n*d**2*k/(h/(2*pi)),4,E_n,1,5,d,1,5,k,1,5,h,1,5,,,,,,,,,,,,,,,,,,
|
||||
III.14.14,94,I,I_0*(exp(q*Volt/(kb*T))-1),5,I_0,1,5,q,1,2,Volt,1,2,kb,1,2,T,1,2,,,,,,,,,,,,,,,
|
||||
III.15.12,95,E_n,2*U*(1-cos(k*d)),3,U,1,5,k,1,5,d,1,5,,,,,,,,,,,,,,,,,,,,,
|
||||
III.15.14,96,m,(h/(2*pi))**2/(2*E_n*d**2),3,h,1,5,E_n,1,5,d,1,5,,,,,,,,,,,,,,,,,,,,,
|
||||
III.15.27,97,k,2*pi*alpha/(n*d),3,alpha,1,5,n,1,5,d,1,5,,,,,,,,,,,,,,,,,,,,,
|
||||
III.17.37,98,f,beta*(1+alpha*cos(theta)),3,beta,1,5,alpha,1,5,theta,1,5,,,,,,,,,,,,,,,,,,,,,
|
||||
III.19.51,99,E_n,-m*q**4/(2*(4*pi*epsilon)**2*(h/(2*pi))**2)*(1/n**2),4,m,1,5,q,1,5,h,1,5,n,1,5,epsilon,1,5,,,,,,,,,,,,,,,
|
||||
III.21.20,100,j,-rho_c_0*q*A_vec/m,4,rho_c_0,1,5,q,1,5,A_vec,1,5,m,1,5,,,,,,,,,,,,,,,,,,
|
||||
,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
|
||||
,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
|
||||
,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
|
||||
,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
|
||||
,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
|
||||
,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
|
||||
,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
|
||||
,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
|
||||
,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
|
||||
,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
|
||||
,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
|
||||
,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
|
||||
,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
|
||||
,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
|
||||
,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
|
||||
,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
|
||||
,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
|
||||
,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
|
||||
,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
|
||||
,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
|
||||
,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
|
||||
,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
|
||||
,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
|
||||
,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
|
||||
,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
|
||||
,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
|
||||
,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
|
||||
,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
|
||||
,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
|
||||
,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
|
|
@ -1,330 +0,0 @@
|
||||
import os
|
||||
import re
|
||||
import pandas as pd
|
||||
import numpy as np
|
||||
import requests
|
||||
import json
|
||||
import sys
|
||||
import sympy as sp
|
||||
from sklearn.metrics import r2_score,root_mean_squared_error
|
||||
import torch
|
||||
from transformers import AutoTokenizer, AutoModelForCausalLM
|
||||
MLLM_claudeshop = {
|
||||
'gpt-3.5': 'gpt-3.5-turbo',
|
||||
'gpt-4o': 'chatgpt-4o-latest',
|
||||
'gpt-4': 'gpt-4',
|
||||
'gpt-o3': 'o3-mini',
|
||||
'claude-3-7': 'claude-3-7-sonnet-20250219-thinking',
|
||||
'Qwen-72b': 'qwen-72b',
|
||||
'Qwen2.5':'qwen2.5-32b-instruct',
|
||||
'Qwen-vl': 'qwen-vl-max',
|
||||
'Gemini-1.5p': 'gemini-1.5-pro-latest',
|
||||
'Gemini-2.0p': 'gemini-2.0-pro-exp-02-05',
|
||||
'Gemini-2.5p': 'gemini-2.5-pro-exp-03-25',
|
||||
'grok-2': 'grok-2',
|
||||
'grok-3': 'grok-3',
|
||||
}
|
||||
|
||||
MLLM_siliconflow = {
|
||||
'deepseek-v3': 'deepseek-ai/DeepSeek-V3',
|
||||
'deepseek-r1': 'Pro/deepseek-ai/DeepSeek-R1',
|
||||
'QwQ-32b': 'Qwen/QwQ-32B',
|
||||
'Qwen2.5-vl-72b': 'Qwen/Qwen2.5-VL-72B-Instruct',
|
||||
}
|
||||
|
||||
MLLM_intern = {
|
||||
'InternLM3-8B': 'internlm3-8b-instruct',
|
||||
'InternVL3-78B': 'internvl2.5-78b',
|
||||
}
|
||||
|
||||
MLLM_other = {
|
||||
'MOE': 'MOE',
|
||||
}
|
||||
|
||||
def _send_request(messages, mllm='4o'):
|
||||
|
||||
if mllm in MLLM_claudeshop:
|
||||
URL = f"your_url_here" # Replace with the actual URL
|
||||
API_KEY = "your_api_key_here" # Replace with the actual API key
|
||||
HEADERS = {
|
||||
'Accept': 'application/json',
|
||||
'Authorization': f'Bearer {API_KEY}',
|
||||
'User-Agent': 'Apifox/1.0.0 (https://apifox.com)',
|
||||
'Content-Type': 'application/json'
|
||||
}
|
||||
model = MLLM_claudeshop[mllm]
|
||||
elif mllm in MLLM_siliconflow:
|
||||
URL = f"your_url_here" # Replace with the actual URL
|
||||
API_KEY = "your_api_key_here" # Replace with the actual API key
|
||||
HEADERS = {
|
||||
'Authorization': f'Bearer {API_KEY}',
|
||||
'Content-Type': 'application/json'
|
||||
}
|
||||
model = MLLM_siliconflow[mllm]
|
||||
elif mllm in MLLM_intern:
|
||||
URL = f"your_url_here" # Replace with the actual URL
|
||||
API_KEY = "your_api_key_here" # Replace with the actual API key
|
||||
HEADERS = {
|
||||
'Authorization': f'Bearer {API_KEY}',
|
||||
'Content-Type': 'application/json'
|
||||
}
|
||||
model = MLLM_intern[mllm]
|
||||
elif mllm in MLLM_other:
|
||||
URL = f"your_url_here" # Replace with the actual URL
|
||||
API_KEY = "your_api_key_here" # Replace with the actual API key
|
||||
HEADERS = {
|
||||
'Authorization': f'Bearer {API_KEY}',
|
||||
'Content-Type': 'application/json'
|
||||
}
|
||||
model = MLLM_other[mllm]
|
||||
|
||||
|
||||
count = 0
|
||||
while True and count < 20:
|
||||
count += 1
|
||||
payload = json.dumps({
|
||||
"model": model,
|
||||
"messages": messages,
|
||||
"temperature": 0.6,
|
||||
"max_tokens": 50
|
||||
})
|
||||
session = requests.Session()
|
||||
session.keep_alive = False
|
||||
response = session.post(URL, headers=HEADERS, data=payload, verify=True)
|
||||
try:
|
||||
content = response.json()['choices'][0]['message']['content']
|
||||
break
|
||||
except:
|
||||
content=None
|
||||
pass
|
||||
|
||||
return content
|
||||
|
||||
|
||||
|
||||
|
||||
def llm_formula(formula, var_list, mllm='gpt-4o'):
|
||||
content = f'''
|
||||
You are provided with a mathematical formula involving multiple variables. Your task is to rewrite this formula in the form of y=f(x0,x1,...).
|
||||
The formula is as follows:
|
||||
{formula}
|
||||
The variables in the formula are denoted as: {', '.join(var_list)}.
|
||||
Replace them in the order they appear with x0, x1, x2, ..., and replace the dependent variable with y.
|
||||
Please output only the reformulated equation, in the form y=x0,x1,..., without any additional information.
|
||||
'''
|
||||
messages = [{"role": "user", "content": content}]
|
||||
content = _send_request(messages, mllm=mllm)
|
||||
return content
|
||||
|
||||
|
||||
def clean_formula_string(formula_str):
|
||||
# 1. 删除 Markdown 残留符号
|
||||
formula_str = formula_str.replace('×', '*').replace('·', '*').replace('÷', '/')
|
||||
formula_str = formula_str.replace('−', '-').replace('^', '**')
|
||||
formula_str = formula_str.replace('“', '"').replace('”', '"').replace('’', "'")
|
||||
|
||||
# 2. 去除 markdown 反引号 ``` 和 $ 符号
|
||||
formula_str = formula_str.replace('`', '').replace('$', '').strip()
|
||||
|
||||
# 3. 提取第一行公式(防止有多行解释性输出)
|
||||
formula_str = formula_str.split('\n')[0].strip()
|
||||
|
||||
# 4. 用正则去除非合法字符(保留基本数学表达式)
|
||||
formula_str = re.sub(r'[^\w\s\+\-\*/\^\=\.\(\)]', '', formula_str)
|
||||
|
||||
# 5. 确保左右去空格
|
||||
return formula_str.strip()
|
||||
|
||||
def llm_evaluate(inferred_formula, true_formula, mllm='gpt-4o'):
|
||||
content = f'''
|
||||
You are given two mathematical formulas. Your task is to evaluate how structurally similar they are, and return a similarity score between 0 and 1.
|
||||
|
||||
The score should reflect how closely the formulas match in terms of:
|
||||
- Mathematical operations and structure (e.g., same use of +, *, sin, etc.)
|
||||
- Term arrangement and complexity
|
||||
- Overall symbolic expression and intent
|
||||
|
||||
A score of:
|
||||
- 1 means the formulas are structurally identical or mathematically equivalent
|
||||
- Around 0.8-0.9 means they are very similar but not identical
|
||||
- Around 0.5 means moderately similar (e.g., same overall shape but different terms)
|
||||
- Near 0 means structurally unrelated formulas
|
||||
|
||||
Do not consider numerical evaluation or specific input values — only the symbolic structure and mathematical form.
|
||||
|
||||
Formulas:
|
||||
Inferred Formula: {inferred_formula}
|
||||
True Formula: {true_formula}
|
||||
|
||||
ONLY RETURN [THE SIMILARITY SCORE]
|
||||
'''
|
||||
messages = [{"role": "user", "content": content}]
|
||||
similarity_score = _send_request(messages, mllm=mllm)
|
||||
return similarity_score[-4:]
|
||||
|
||||
def llm_translate(dirty_formula, mllm='gpt-4o'):
|
||||
content = f'''
|
||||
This is a language model's judgment on a mathematical formula. Please help me extract the mathematical formula from this judgment and return it:
|
||||
{dirty_formula}
|
||||
Please serve pi as pi and use x0, x1, x2,... to represent the variable names.
|
||||
ONLY RETURN THE FORMULA STRING (Not LATEX).
|
||||
'''
|
||||
messages = [{"role": "user", "content": content}]
|
||||
clean_formula = _send_request(messages, mllm=mllm)
|
||||
return clean_formula
|
||||
|
||||
def is_symbolically_equivalent(formula1, formula2, n_var=2):
|
||||
try:
|
||||
x = [sp.Symbol(f'x{i}') for i in range(n_var)]
|
||||
|
||||
expr1 = sp.sympify(formula1.split('=')[1] if '=' in formula1 else formula1)
|
||||
expr2 = sp.sympify(formula2.split('=')[1] if '=' in formula2 else formula2)
|
||||
|
||||
return sp.simplify(expr1 - expr2) == 0
|
||||
except Exception:
|
||||
return False
|
||||
|
||||
def parse_formula(formula_str, n_var=2):
|
||||
try:
|
||||
if '=' in formula_str:
|
||||
_, expr_str = formula_str.split('=', 1)
|
||||
else:
|
||||
expr_str = formula_str
|
||||
variables = [sp.Symbol(f'x{i}') for i in range(n_var)]
|
||||
expr = sp.sympify(expr_str)
|
||||
func = sp.lambdify(variables, expr, modules='numpy')
|
||||
return func
|
||||
except Exception as e:
|
||||
print(f'[Parse Error] {formula_str}\n{e}')
|
||||
return None
|
||||
|
||||
def evaluate_formula_metrics(formula_str, true_formula, x, y_true, n_var=2, mllm='gpt-4o'):
|
||||
metrics = {
|
||||
'LLM_Score': None,
|
||||
'RMSE': None,
|
||||
'SymbolicMatch': False,
|
||||
'R2': -100000.0
|
||||
}
|
||||
|
||||
# 结构评分(用 LLM)
|
||||
metrics['LLM_Score'] = llm_evaluate(formula_str, true_formula, mllm=mllm)
|
||||
|
||||
# 数值拟合
|
||||
func = parse_formula(formula_str, n_var)
|
||||
if func is not None:
|
||||
try:
|
||||
x_vars = [x[:, i] for i in range(n_var)]
|
||||
y_pred = func(*x_vars)
|
||||
if np.isscalar(y_pred):
|
||||
y_pred = np.full_like(y_true, y_pred)
|
||||
metrics['RMSE'] = root_mean_squared_error(y_true, y_pred)
|
||||
metrics['R2'] = r2_score(y_true, y_pred)
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
# 判断方程等价性
|
||||
metrics['SymbolicMatch'] = is_symbolically_equivalent(formula_str, true_formula, n_var)
|
||||
|
||||
return metrics
|
||||
|
||||
|
||||
|
||||
mllm = 'gpt-4o'
|
||||
sample_num = 100
|
||||
n_var = 2
|
||||
|
||||
os.makedirs(f'{n_var}d/', exist_ok=True)
|
||||
for seed_idx in [1]:
|
||||
try:
|
||||
formula_2d = pd.read_csv(f'{n_var}d/Feynman_{n_var}d.csv')
|
||||
except:
|
||||
formula_2d = pd.DataFrame(columns=['Formula', 'Filename', 'n_variables'])
|
||||
|
||||
collect = pd.read_csv('Feynman/FeynmanEquations.csv')
|
||||
try:
|
||||
for index, row in collect.iterrows():
|
||||
file_path = f'Feynman/Feynman_with_units/' + str(row['Filename'])
|
||||
formula = row['Formula']
|
||||
n_variables = int(row['# variables'])
|
||||
|
||||
if n_variables == n_var:
|
||||
try:
|
||||
dataset = np.loadtxt(file_path)
|
||||
except:
|
||||
continue
|
||||
if dataset.shape[1] == n_variables + 1:
|
||||
var_list = [row[f'v{var_idx+1}_name'] for var_idx in range(n_variables)]
|
||||
new_formula = llm_formula(formula, var_list)
|
||||
print(index, formula, '——>', new_formula)
|
||||
else:
|
||||
continue
|
||||
formula_2d = formula_2d._append({'Formula': new_formula, 'Filename': row['Filename'], 'n_variables': n_variables}, ignore_index=True)
|
||||
except Exception as e:
|
||||
print(e)
|
||||
|
||||
formula_2d.to_csv(f'{n_var}d/Feynman_{n_var}d.csv', index=False)
|
||||
|
||||
try:
|
||||
result = pd.read_csv(f'{n_var}d/Feynman_{n_var}d_s{sample_num}_{mllm}.csv')
|
||||
except:
|
||||
result = pd.DataFrame({
|
||||
'Index': pd.Series(dtype=int),
|
||||
'GT': pd.Series(dtype=str),
|
||||
'Pred': pd.Series(dtype=str),
|
||||
'Score': pd.Series(dtype=float),
|
||||
'RMSE': pd.Series(dtype=float),
|
||||
'R2': pd.Series(dtype=float),
|
||||
'SymbolicMatch': pd.Series(dtype=bool)
|
||||
})
|
||||
|
||||
for index, row in formula_2d.iterrows():
|
||||
true_formula = row['Formula']
|
||||
file_path = f'Feynman/Feynman_with_units/' + str(row['Filename'])
|
||||
dataset = np.loadtxt(file_path)
|
||||
rand_idx = np.random.choice(dataset.shape[0], sample_num, replace=False)
|
||||
dataset = dataset[rand_idx]
|
||||
x = dataset[:, :n_var]
|
||||
y_true = dataset[:, -1]
|
||||
|
||||
data_samples = '\n'.join([f'x0={x1:.4f}, x1={x2:.4f}, y={y:.4f}' for x1, x2, y in dataset[:-1]])
|
||||
content = f'''
|
||||
You will be provided with a set of input-output pairs. Based on these data, infer the mathematical relationship between y and multiple input variables. Please note that the possible mathematical operations include: +, -, *, /, exp, sqrt, sin, arcsin, and constant terms.
|
||||
The input sample data are as follows:
|
||||
{data_samples}
|
||||
Based on the above data, please infer the possible formula. Ensure that your inference applies to all the provided data points, and consider both linear and nonlinear combinations.
|
||||
Verify whether your formula applies to the following new data point and adjust it to ensure accuracy:
|
||||
{f'x0={dataset[-1, 0]:.4f}, x1={dataset[-1, 1]:.4f}, y={dataset[-1, 2]:.4f}'}
|
||||
Finally, please output only the formula string you inferred (e.g. z=x_0 * x_1), without any additional information.
|
||||
'''
|
||||
messages = [{"role": "user", "content": content}]
|
||||
|
||||
infer_formula = _send_request(messages, mllm=mllm)
|
||||
infer_formula = llm_translate(infer_formula, mllm='gpt-4o')
|
||||
infer_formula = clean_formula_string(infer_formula)
|
||||
metrics = evaluate_formula_metrics(infer_formula, true_formula, x, y_true, n_var=n_var, mllm='gpt-4o')
|
||||
|
||||
print(f'GT: {true_formula.ljust(40)} | Pred: {infer_formula.ljust(40)} | Score: {metrics["LLM_Score"]} | RMSE: {metrics["RMSE"]} | R2: {metrics["R2"]} | Match: {metrics["SymbolicMatch"]}')
|
||||
result = result._append({
|
||||
'Index': seed_idx,
|
||||
'GT': true_formula,
|
||||
'Pred': infer_formula,
|
||||
'Score': metrics['LLM_Score'],
|
||||
'RMSE': metrics['RMSE'],
|
||||
'R2': metrics['R2'],
|
||||
'SymbolicMatch': bool(metrics['SymbolicMatch'])
|
||||
}, ignore_index=True)
|
||||
|
||||
result.to_csv(f'{n_var}d/Feynman_{n_var}d_s{sample_num}_{mllm}.csv', index=False)
|
||||
if not result.empty:
|
||||
symbolic_accuracy = result['SymbolicMatch'].sum() / len(result)
|
||||
print(f'\model: {mllm},sample_nums: {sample_num},symbolic_accuracy: {symbolic_accuracy:.4f}')
|
||||
else:
|
||||
symbolic_accuracy = 0
|
||||
csv_filepath = f'{n_var}d/Feynman_{n_var}d_s{sample_num}_{mllm}.csv'
|
||||
result.to_csv(csv_filepath, index=False)
|
||||
|
||||
with open(csv_filepath, 'a', encoding='utf-8') as f:
|
||||
f.write("symbolic_accuracy:"+f'{symbolic_accuracy:.4f}')
|
||||
f.write(f"AverageR2,{average_r2:.4f}\n")
|
||||
|
||||
|
@ -50,7 +50,6 @@ class PromptTemplate:
|
||||
for key in ('begin', 'round', 'end'))
|
||||
self.prompt_type = 'meta' if ctr == len(
|
||||
self.template.keys()) else 'origin'
|
||||
|
||||
# Check if token exists in values of tp_dict
|
||||
for tp_dict_val in self.template.values():
|
||||
if not isinstance(tp_dict_val, (str, list, dict)):
|
||||
|
@ -7,7 +7,7 @@ USER_HOME = os.path.expanduser("~")
|
||||
DEFAULT_DATA_FOLDER = os.path.join(USER_HOME, '.cache/opencompass/')
|
||||
|
||||
|
||||
def get_data_path(dataset_id: str, local_mode: bool = False):
|
||||
def get_data_path(dataset_id: str, local_mode: bool = True):
|
||||
"""return dataset id when getting data from ModelScope/HuggingFace repo, otherwise just
|
||||
return local path as is.
|
||||
|
||||
|
@ -466,6 +466,11 @@ DATASETS_MAPPING = {
|
||||
"hf_id": "",
|
||||
"local": "./data/medbullets/medbullets.csv",
|
||||
},
|
||||
"opencompass/srbench": {
|
||||
"ms_id": "",
|
||||
"hf_id": "",
|
||||
"local": "url_to_srebnch_dataset",
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user