mirror of
https://github.com/open-compass/opencompass.git
synced 2025-05-30 16:03:24 +08:00
[Feature] Support OpenAI O1 models (#1539)
* [Feature] Support OpenAI O1 models * Update README.md --------- Co-authored-by: liushz <qq1791167085@163.com>
This commit is contained in:
parent
2e9db77d57
commit
be460fbb21
@ -59,6 +59,7 @@ Just like a compass guides us on our journey, OpenCompass will guide you through
|
||||
|
||||
## 🚀 What's New <a><img width="35" height="20" src="https://user-images.githubusercontent.com/12782558/212848161-5e783dd6-11e8-4fe0-bbba-39ffb77730be.png"></a>
|
||||
|
||||
- **\[2024.09.05\]** We now support OpenAI o1(`o1-mini-2024-09-12` and `o1-preview-2024-09-12`). Feel free to give them a try! 🔥🔥🔥
|
||||
- **\[2024.09.05\]** We now support answer extraction through model post-processing to provide a more accurate representation of the model's capabilities. As part of this update, we have integrated [XFinder](https://github.com/IAAR-Shanghai/xFinder) as our first post-processing model. For more detailed information, please refer to the [documentation](opencompass/utils/postprocessors/xfinder/README.md), and give it a try! 🔥🔥🔥
|
||||
- **\[2024.08.20\]** OpenCompass now supports the [SciCode](https://github.com/scicode-bench/SciCode): A Research Coding Benchmark Curated by Scientists. 🔥🔥🔥
|
||||
- **\[2024.08.16\]** OpenCompass now supports the brand new long-context language model evaluation benchmark — [RULER](https://arxiv.org/pdf/2404.06654). RULER provides an evaluation of long-context including retrieval, multi-hop tracing, aggregation, and question answering through flexible configurations. Check out the [RULER](configs/datasets/ruler/README.md) evaluation config now! 🔥🔥🔥
|
||||
@ -191,6 +192,8 @@ After ensuring that OpenCompass is installed correctly according to the above st
|
||||
|
||||
# Python scripts
|
||||
opencompass ./configs/eval_api_demo.py
|
||||
|
||||
# You can use o1_mini_2024_09_12/o1_preview_2024_09_12 for o1 models, we set max_completion_tokens=8192 as default.
|
||||
```
|
||||
|
||||
- Accelerated Evaluation
|
||||
|
@ -59,6 +59,7 @@
|
||||
|
||||
## 🚀 最新进展 <a><img width="35" height="20" src="https://user-images.githubusercontent.com/12782558/212848161-5e783dd6-11e8-4fe0-bbba-39ffb77730be.png"></a>
|
||||
|
||||
- **\[2024.09.05\]** 现已支持OpenAI o1 模型(`o1-mini-2024-09-12` and `o1-preview-2024-09-12`), 欢迎尝试! 🔥🔥🔥
|
||||
- **\[2024.09.05\]** OpenCompass 现在支持通过模型后处理来进行答案提取,以更准确地展示模型的能力。作为此次更新的一部分,我们集成了 [XFinder](https://github.com/IAAR-Shanghai/xFinder) 作为首个后处理模型。具体信息请参阅 [文档](opencompass/utils/postprocessors/xfinder/README.md),欢迎尝试! 🔥🔥🔥
|
||||
- **\[2024.08.20\]** OpenCompass 现已支持 [SciCode](https://github.com/scicode-bench/SciCode): A Research Coding Benchmark Curated by Scientists。 🔥🔥🔥
|
||||
- **\[2024.08.16\]** OpenCompass 现已支持全新的长上下文语言模型评估基准——[RULER](https://arxiv.org/pdf/2404.06654)。RULER 通过灵活的配置,提供了对长上下文包括检索、多跳追踪、聚合和问答等多种任务类型的评测,欢迎访问[RULER](configs/datasets/ruler/README.md)。🔥🔥🔥
|
||||
@ -187,6 +188,9 @@ humaneval, triviaqa, commonsenseqa, tydiqa, strategyqa, cmmlu, lambada, piqa, ce
|
||||
|
||||
# Python 脚本
|
||||
opencompass ./configs/eval_api_demo.py
|
||||
|
||||
|
||||
# 现已支持 o1_mini_2024_09_12/o1_preview_2024_09_12 模型, 默认情况下 max_completion_tokens=8192.
|
||||
```
|
||||
|
||||
- ### 推理后端
|
||||
|
20
configs/models/openai/o1_mini_2024_09_12.py
Normal file
20
configs/models/openai/o1_mini_2024_09_12.py
Normal file
@ -0,0 +1,20 @@
|
||||
from opencompass.models import OpenAISDK
|
||||
|
||||
api_meta_template = dict(round=[
|
||||
dict(role='HUMAN', api_role='HUMAN'),
|
||||
dict(role='BOT', api_role='BOT', generate=True),
|
||||
], )
|
||||
|
||||
models = [
|
||||
dict(
|
||||
abbr='o1-mini-2024-09-12',
|
||||
type=OpenAISDK,
|
||||
path='o1-mini-2024-09-12',
|
||||
key=
|
||||
'ENV', # The key will be obtained from $OPENAI_API_KEY, but you can write down your key here as well
|
||||
meta_template=api_meta_template,
|
||||
query_per_second=1,
|
||||
batch_size=1,
|
||||
temperature=1,
|
||||
max_completion_tokens=8192), # you can change it for large reasoning inference cost, according to: https://platform.openai.com/docs/guides/reasoning
|
||||
]
|
20
configs/models/openai/o1_preview_2024_09_12.py
Normal file
20
configs/models/openai/o1_preview_2024_09_12.py
Normal file
@ -0,0 +1,20 @@
|
||||
from opencompass.models import OpenAISDK
|
||||
|
||||
api_meta_template = dict(round=[
|
||||
dict(role='HUMAN', api_role='HUMAN'),
|
||||
dict(role='BOT', api_role='BOT', generate=True),
|
||||
], )
|
||||
|
||||
models = [
|
||||
dict(
|
||||
abbr='o1-preview-2024-09-12',
|
||||
type=OpenAISDK,
|
||||
path='o1-preview-2024-09-12',
|
||||
key=
|
||||
'ENV', # The key will be obtained from $OPENAI_API_KEY, but you can write down your key here as well
|
||||
meta_template=api_meta_template,
|
||||
query_per_second=1,
|
||||
batch_size=1,
|
||||
temperature=1,
|
||||
max_completion_tokens=8192), # you can change it for large reasoning inference cost, according to: https://platform.openai.com/docs/guides/reasoning
|
||||
]
|
20
opencompass/configs/models/openai/o1_mini_2024_09_12.py
Normal file
20
opencompass/configs/models/openai/o1_mini_2024_09_12.py
Normal file
@ -0,0 +1,20 @@
|
||||
from opencompass.models import OpenAISDK
|
||||
|
||||
api_meta_template = dict(round=[
|
||||
dict(role='HUMAN', api_role='HUMAN'),
|
||||
dict(role='BOT', api_role='BOT', generate=True),
|
||||
], )
|
||||
|
||||
models = [
|
||||
dict(
|
||||
abbr='o1-mini-2024-09-12',
|
||||
type=OpenAISDK,
|
||||
path='o1-mini-2024-09-12',
|
||||
key=
|
||||
'ENV', # The key will be obtained from $OPENAI_API_KEY, but you can write down your key here as well
|
||||
meta_template=api_meta_template,
|
||||
query_per_second=1,
|
||||
batch_size=1,
|
||||
temperature=1,
|
||||
max_completion_tokens=8192), # you can change it for large reasoning inference cost, according to: https://platform.openai.com/docs/guides/reasoning
|
||||
]
|
20
opencompass/configs/models/openai/o1_preview_2024_09_12.py
Normal file
20
opencompass/configs/models/openai/o1_preview_2024_09_12.py
Normal file
@ -0,0 +1,20 @@
|
||||
from opencompass.models import OpenAISDK
|
||||
|
||||
api_meta_template = dict(round=[
|
||||
dict(role='HUMAN', api_role='HUMAN'),
|
||||
dict(role='BOT', api_role='BOT', generate=True),
|
||||
], )
|
||||
|
||||
models = [
|
||||
dict(
|
||||
abbr='o1-preview-2024-09-12',
|
||||
type=OpenAISDK,
|
||||
path='o1-preview-2024-09-12',
|
||||
key=
|
||||
'ENV', # The key will be obtained from $OPENAI_API_KEY, but you can write down your key here as well
|
||||
meta_template=api_meta_template,
|
||||
query_per_second=1,
|
||||
batch_size=1,
|
||||
temperature=1,
|
||||
max_completion_tokens=8192), # you can change it for large reasoning inference cost, according to: https://platform.openai.com/docs/guides/reasoning
|
||||
]
|
@ -20,6 +20,13 @@ OPENAI_API_BASE = os.path.join(
|
||||
os.environ.get('OPENAI_BASE_URL', 'https://api.openai.com/v1/'),
|
||||
'chat/completions')
|
||||
|
||||
O1_MODEL_LIST = [
|
||||
'o1-preview-2024-09-12',
|
||||
'o1-mini-2024-09-12',
|
||||
'o1-preview',
|
||||
'o1-mini',
|
||||
]
|
||||
|
||||
|
||||
@MODELS.register_module()
|
||||
class OpenAI(BaseAPIModel):
|
||||
@ -82,7 +89,8 @@ class OpenAI(BaseAPIModel):
|
||||
top_logprobs: Optional[int] = None,
|
||||
temperature: Optional[float] = None,
|
||||
tokenizer_path: Optional[str] = None,
|
||||
extra_body: Optional[Dict] = None):
|
||||
extra_body: Optional[Dict] = None,
|
||||
max_completion_tokens: int = 16384):
|
||||
|
||||
super().__init__(path=path,
|
||||
max_seq_len=max_seq_len,
|
||||
@ -131,6 +139,9 @@ class OpenAI(BaseAPIModel):
|
||||
self.proxy_url = openai_proxy_url
|
||||
|
||||
self.path = path
|
||||
self.max_completion_tokens = max_completion_tokens
|
||||
self.logger.warning(
|
||||
f'Max Completion tokens for {path} is :{max_completion_tokens}')
|
||||
|
||||
def generate(self,
|
||||
inputs: List[PromptType],
|
||||
@ -255,16 +266,33 @@ class OpenAI(BaseAPIModel):
|
||||
header['OpenAI-Organization'] = self.orgs[self.org_ctr]
|
||||
|
||||
try:
|
||||
data = dict(
|
||||
model=self.path,
|
||||
messages=messages,
|
||||
max_tokens=max_out_len,
|
||||
n=1,
|
||||
logprobs=self.logprobs,
|
||||
top_logprobs=self.top_logprobs,
|
||||
stop=None,
|
||||
temperature=temperature,
|
||||
)
|
||||
if self.path in O1_MODEL_LIST:
|
||||
self.logger.warning(
|
||||
f"'max_token' is unsupported for model {self.path}")
|
||||
self.logger.warning(
|
||||
f'We use max_completion_tokens:'
|
||||
f'{self.max_completion_tokens}for this query')
|
||||
data = dict(
|
||||
model=self.path,
|
||||
messages=messages,
|
||||
max_completion_tokens=self.max_completion_tokens,
|
||||
n=1,
|
||||
logprobs=self.logprobs,
|
||||
top_logprobs=self.top_logprobs,
|
||||
stop=None,
|
||||
temperature=temperature,
|
||||
)
|
||||
else:
|
||||
data = dict(
|
||||
model=self.path,
|
||||
messages=messages,
|
||||
max_tokens=max_out_len,
|
||||
n=1,
|
||||
logprobs=self.logprobs,
|
||||
top_logprobs=self.top_logprobs,
|
||||
stop=None,
|
||||
temperature=temperature,
|
||||
)
|
||||
if self.extra_body:
|
||||
data.update(self.extra_body)
|
||||
if isinstance(self.url, list):
|
||||
@ -429,11 +457,13 @@ class OpenAISDK(OpenAI):
|
||||
top_logprobs: int | None = None,
|
||||
temperature: float | None = None,
|
||||
tokenizer_path: str | None = None,
|
||||
extra_body: Dict | None = None):
|
||||
extra_body: Dict | None = None,
|
||||
max_completion_tokens: int = 16384):
|
||||
super().__init__(path, max_seq_len, query_per_second, rpm_verbose,
|
||||
retry, key, org, meta_template, openai_api_base,
|
||||
openai_proxy_url, mode, logprobs, top_logprobs,
|
||||
temperature, tokenizer_path, extra_body)
|
||||
temperature, tokenizer_path, extra_body,
|
||||
max_completion_tokens)
|
||||
from openai import OpenAI
|
||||
|
||||
if self.proxy_url is None:
|
||||
@ -497,8 +527,23 @@ class OpenAISDK(OpenAI):
|
||||
num_retries = 0
|
||||
while num_retries < self.retry:
|
||||
self.wait()
|
||||
try:
|
||||
responses = self.openai_client.chat.completions.create(
|
||||
|
||||
if self.path in O1_MODEL_LIST:
|
||||
self.logger.warning(
|
||||
f"'max_token' is unsupported for model {self.path}")
|
||||
self.logger.warning(
|
||||
f'We use max_completion_tokens:'
|
||||
f'{self.max_completion_tokens}for this query')
|
||||
query_data = dict(
|
||||
model=self.path,
|
||||
max_completion_tokens=self.max_completion_tokens,
|
||||
n=1,
|
||||
temperature=self.temperature,
|
||||
messages=messages,
|
||||
extra_body=self.extra_body,
|
||||
)
|
||||
else:
|
||||
query_data = dict(
|
||||
model=self.path,
|
||||
max_tokens=max_out_len,
|
||||
n=1,
|
||||
@ -506,6 +551,10 @@ class OpenAISDK(OpenAI):
|
||||
messages=messages,
|
||||
extra_body=self.extra_body,
|
||||
)
|
||||
|
||||
try:
|
||||
responses = self.openai_client.chat.completions.create(
|
||||
**query_data)
|
||||
return responses.choices[0].message.content
|
||||
except Exception as e:
|
||||
self.logger.error(e)
|
||||
|
Loading…
Reference in New Issue
Block a user