diff --git a/README.md b/README.md
index 2a143836..81169f42 100644
--- a/README.md
+++ b/README.md
@@ -59,6 +59,7 @@ Just like a compass guides us on our journey, OpenCompass will guide you through
## 🚀 What's New
+- **\[2024.09.05\]** We now support OpenAI o1(`o1-mini-2024-09-12` and `o1-preview-2024-09-12`). Feel free to give them a try! 🔥🔥🔥
- **\[2024.09.05\]** We now support answer extraction through model post-processing to provide a more accurate representation of the model's capabilities. As part of this update, we have integrated [XFinder](https://github.com/IAAR-Shanghai/xFinder) as our first post-processing model. For more detailed information, please refer to the [documentation](opencompass/utils/postprocessors/xfinder/README.md), and give it a try! 🔥🔥🔥
- **\[2024.08.20\]** OpenCompass now supports the [SciCode](https://github.com/scicode-bench/SciCode): A Research Coding Benchmark Curated by Scientists. 🔥🔥🔥
- **\[2024.08.16\]** OpenCompass now supports the brand new long-context language model evaluation benchmark — [RULER](https://arxiv.org/pdf/2404.06654). RULER provides an evaluation of long-context including retrieval, multi-hop tracing, aggregation, and question answering through flexible configurations. Check out the [RULER](configs/datasets/ruler/README.md) evaluation config now! 🔥🔥🔥
@@ -191,6 +192,8 @@ After ensuring that OpenCompass is installed correctly according to the above st
# Python scripts
opencompass ./configs/eval_api_demo.py
+
+ # You can use o1_mini_2024_09_12/o1_preview_2024_09_12 for o1 models, we set max_completion_tokens=8192 as default.
```
- Accelerated Evaluation
diff --git a/README_zh-CN.md b/README_zh-CN.md
index 882964e9..66eb4ef8 100644
--- a/README_zh-CN.md
+++ b/README_zh-CN.md
@@ -59,6 +59,7 @@
## 🚀 最新进展
+- **\[2024.09.05\]** 现已支持OpenAI o1 模型(`o1-mini-2024-09-12` and `o1-preview-2024-09-12`), 欢迎尝试! 🔥🔥🔥
- **\[2024.09.05\]** OpenCompass 现在支持通过模型后处理来进行答案提取,以更准确地展示模型的能力。作为此次更新的一部分,我们集成了 [XFinder](https://github.com/IAAR-Shanghai/xFinder) 作为首个后处理模型。具体信息请参阅 [文档](opencompass/utils/postprocessors/xfinder/README.md),欢迎尝试! 🔥🔥🔥
- **\[2024.08.20\]** OpenCompass 现已支持 [SciCode](https://github.com/scicode-bench/SciCode): A Research Coding Benchmark Curated by Scientists。 🔥🔥🔥
- **\[2024.08.16\]** OpenCompass 现已支持全新的长上下文语言模型评估基准——[RULER](https://arxiv.org/pdf/2404.06654)。RULER 通过灵活的配置,提供了对长上下文包括检索、多跳追踪、聚合和问答等多种任务类型的评测,欢迎访问[RULER](configs/datasets/ruler/README.md)。🔥🔥🔥
@@ -187,6 +188,9 @@ humaneval, triviaqa, commonsenseqa, tydiqa, strategyqa, cmmlu, lambada, piqa, ce
# Python 脚本
opencompass ./configs/eval_api_demo.py
+
+
+ # 现已支持 o1_mini_2024_09_12/o1_preview_2024_09_12 模型, 默认情况下 max_completion_tokens=8192.
```
- ### 推理后端
diff --git a/configs/models/openai/o1_mini_2024_09_12.py b/configs/models/openai/o1_mini_2024_09_12.py
new file mode 100644
index 00000000..331ecf31
--- /dev/null
+++ b/configs/models/openai/o1_mini_2024_09_12.py
@@ -0,0 +1,20 @@
+from opencompass.models import OpenAISDK
+
+api_meta_template = dict(round=[
+ dict(role='HUMAN', api_role='HUMAN'),
+ dict(role='BOT', api_role='BOT', generate=True),
+], )
+
+models = [
+ dict(
+ abbr='o1-mini-2024-09-12',
+ type=OpenAISDK,
+ path='o1-mini-2024-09-12',
+ key=
+ 'ENV', # The key will be obtained from $OPENAI_API_KEY, but you can write down your key here as well
+ meta_template=api_meta_template,
+ query_per_second=1,
+ batch_size=1,
+ temperature=1,
+ max_completion_tokens=8192), # you can change it for large reasoning inference cost, according to: https://platform.openai.com/docs/guides/reasoning
+]
diff --git a/configs/models/openai/o1_preview_2024_09_12.py b/configs/models/openai/o1_preview_2024_09_12.py
new file mode 100644
index 00000000..9dff1037
--- /dev/null
+++ b/configs/models/openai/o1_preview_2024_09_12.py
@@ -0,0 +1,20 @@
+from opencompass.models import OpenAISDK
+
+api_meta_template = dict(round=[
+ dict(role='HUMAN', api_role='HUMAN'),
+ dict(role='BOT', api_role='BOT', generate=True),
+], )
+
+models = [
+ dict(
+ abbr='o1-preview-2024-09-12',
+ type=OpenAISDK,
+ path='o1-preview-2024-09-12',
+ key=
+ 'ENV', # The key will be obtained from $OPENAI_API_KEY, but you can write down your key here as well
+ meta_template=api_meta_template,
+ query_per_second=1,
+ batch_size=1,
+ temperature=1,
+ max_completion_tokens=8192), # you can change it for large reasoning inference cost, according to: https://platform.openai.com/docs/guides/reasoning
+]
diff --git a/opencompass/configs/models/openai/o1_mini_2024_09_12.py b/opencompass/configs/models/openai/o1_mini_2024_09_12.py
new file mode 100644
index 00000000..331ecf31
--- /dev/null
+++ b/opencompass/configs/models/openai/o1_mini_2024_09_12.py
@@ -0,0 +1,20 @@
+from opencompass.models import OpenAISDK
+
+api_meta_template = dict(round=[
+ dict(role='HUMAN', api_role='HUMAN'),
+ dict(role='BOT', api_role='BOT', generate=True),
+], )
+
+models = [
+ dict(
+ abbr='o1-mini-2024-09-12',
+ type=OpenAISDK,
+ path='o1-mini-2024-09-12',
+ key=
+ 'ENV', # The key will be obtained from $OPENAI_API_KEY, but you can write down your key here as well
+ meta_template=api_meta_template,
+ query_per_second=1,
+ batch_size=1,
+ temperature=1,
+ max_completion_tokens=8192), # you can change it for large reasoning inference cost, according to: https://platform.openai.com/docs/guides/reasoning
+]
diff --git a/opencompass/configs/models/openai/o1_preview_2024_09_12.py b/opencompass/configs/models/openai/o1_preview_2024_09_12.py
new file mode 100644
index 00000000..9dff1037
--- /dev/null
+++ b/opencompass/configs/models/openai/o1_preview_2024_09_12.py
@@ -0,0 +1,20 @@
+from opencompass.models import OpenAISDK
+
+api_meta_template = dict(round=[
+ dict(role='HUMAN', api_role='HUMAN'),
+ dict(role='BOT', api_role='BOT', generate=True),
+], )
+
+models = [
+ dict(
+ abbr='o1-preview-2024-09-12',
+ type=OpenAISDK,
+ path='o1-preview-2024-09-12',
+ key=
+ 'ENV', # The key will be obtained from $OPENAI_API_KEY, but you can write down your key here as well
+ meta_template=api_meta_template,
+ query_per_second=1,
+ batch_size=1,
+ temperature=1,
+ max_completion_tokens=8192), # you can change it for large reasoning inference cost, according to: https://platform.openai.com/docs/guides/reasoning
+]
diff --git a/opencompass/models/openai_api.py b/opencompass/models/openai_api.py
index f572a846..7f306e4e 100644
--- a/opencompass/models/openai_api.py
+++ b/opencompass/models/openai_api.py
@@ -20,6 +20,13 @@ OPENAI_API_BASE = os.path.join(
os.environ.get('OPENAI_BASE_URL', 'https://api.openai.com/v1/'),
'chat/completions')
+O1_MODEL_LIST = [
+ 'o1-preview-2024-09-12',
+ 'o1-mini-2024-09-12',
+ 'o1-preview',
+ 'o1-mini',
+]
+
@MODELS.register_module()
class OpenAI(BaseAPIModel):
@@ -82,7 +89,8 @@ class OpenAI(BaseAPIModel):
top_logprobs: Optional[int] = None,
temperature: Optional[float] = None,
tokenizer_path: Optional[str] = None,
- extra_body: Optional[Dict] = None):
+ extra_body: Optional[Dict] = None,
+ max_completion_tokens: int = 16384):
super().__init__(path=path,
max_seq_len=max_seq_len,
@@ -131,6 +139,9 @@ class OpenAI(BaseAPIModel):
self.proxy_url = openai_proxy_url
self.path = path
+ self.max_completion_tokens = max_completion_tokens
+ self.logger.warning(
+ f'Max Completion tokens for {path} is :{max_completion_tokens}')
def generate(self,
inputs: List[PromptType],
@@ -255,16 +266,33 @@ class OpenAI(BaseAPIModel):
header['OpenAI-Organization'] = self.orgs[self.org_ctr]
try:
- data = dict(
- model=self.path,
- messages=messages,
- max_tokens=max_out_len,
- n=1,
- logprobs=self.logprobs,
- top_logprobs=self.top_logprobs,
- stop=None,
- temperature=temperature,
- )
+ if self.path in O1_MODEL_LIST:
+ self.logger.warning(
+ f"'max_token' is unsupported for model {self.path}")
+ self.logger.warning(
+ f'We use max_completion_tokens:'
+ f'{self.max_completion_tokens}for this query')
+ data = dict(
+ model=self.path,
+ messages=messages,
+ max_completion_tokens=self.max_completion_tokens,
+ n=1,
+ logprobs=self.logprobs,
+ top_logprobs=self.top_logprobs,
+ stop=None,
+ temperature=temperature,
+ )
+ else:
+ data = dict(
+ model=self.path,
+ messages=messages,
+ max_tokens=max_out_len,
+ n=1,
+ logprobs=self.logprobs,
+ top_logprobs=self.top_logprobs,
+ stop=None,
+ temperature=temperature,
+ )
if self.extra_body:
data.update(self.extra_body)
if isinstance(self.url, list):
@@ -429,11 +457,13 @@ class OpenAISDK(OpenAI):
top_logprobs: int | None = None,
temperature: float | None = None,
tokenizer_path: str | None = None,
- extra_body: Dict | None = None):
+ extra_body: Dict | None = None,
+ max_completion_tokens: int = 16384):
super().__init__(path, max_seq_len, query_per_second, rpm_verbose,
retry, key, org, meta_template, openai_api_base,
openai_proxy_url, mode, logprobs, top_logprobs,
- temperature, tokenizer_path, extra_body)
+ temperature, tokenizer_path, extra_body,
+ max_completion_tokens)
from openai import OpenAI
if self.proxy_url is None:
@@ -497,8 +527,23 @@ class OpenAISDK(OpenAI):
num_retries = 0
while num_retries < self.retry:
self.wait()
- try:
- responses = self.openai_client.chat.completions.create(
+
+ if self.path in O1_MODEL_LIST:
+ self.logger.warning(
+ f"'max_token' is unsupported for model {self.path}")
+ self.logger.warning(
+ f'We use max_completion_tokens:'
+ f'{self.max_completion_tokens}for this query')
+ query_data = dict(
+ model=self.path,
+ max_completion_tokens=self.max_completion_tokens,
+ n=1,
+ temperature=self.temperature,
+ messages=messages,
+ extra_body=self.extra_body,
+ )
+ else:
+ query_data = dict(
model=self.path,
max_tokens=max_out_len,
n=1,
@@ -506,6 +551,10 @@ class OpenAISDK(OpenAI):
messages=messages,
extra_body=self.extra_body,
)
+
+ try:
+ responses = self.openai_client.chat.completions.create(
+ **query_data)
return responses.choices[0].message.content
except Exception as e:
self.logger.error(e)