diff --git a/README.md b/README.md index 2a143836..81169f42 100644 --- a/README.md +++ b/README.md @@ -59,6 +59,7 @@ Just like a compass guides us on our journey, OpenCompass will guide you through ## 🚀 What's New +- **\[2024.09.05\]** We now support OpenAI o1(`o1-mini-2024-09-12` and `o1-preview-2024-09-12`). Feel free to give them a try! 🔥🔥🔥 - **\[2024.09.05\]** We now support answer extraction through model post-processing to provide a more accurate representation of the model's capabilities. As part of this update, we have integrated [XFinder](https://github.com/IAAR-Shanghai/xFinder) as our first post-processing model. For more detailed information, please refer to the [documentation](opencompass/utils/postprocessors/xfinder/README.md), and give it a try! 🔥🔥🔥 - **\[2024.08.20\]** OpenCompass now supports the [SciCode](https://github.com/scicode-bench/SciCode): A Research Coding Benchmark Curated by Scientists. 🔥🔥🔥 - **\[2024.08.16\]** OpenCompass now supports the brand new long-context language model evaluation benchmark — [RULER](https://arxiv.org/pdf/2404.06654). RULER provides an evaluation of long-context including retrieval, multi-hop tracing, aggregation, and question answering through flexible configurations. Check out the [RULER](configs/datasets/ruler/README.md) evaluation config now! 🔥🔥🔥 @@ -191,6 +192,8 @@ After ensuring that OpenCompass is installed correctly according to the above st # Python scripts opencompass ./configs/eval_api_demo.py + + # You can use o1_mini_2024_09_12/o1_preview_2024_09_12 for o1 models, we set max_completion_tokens=8192 as default. ``` - Accelerated Evaluation diff --git a/README_zh-CN.md b/README_zh-CN.md index 882964e9..66eb4ef8 100644 --- a/README_zh-CN.md +++ b/README_zh-CN.md @@ -59,6 +59,7 @@ ## 🚀 最新进展 +- **\[2024.09.05\]** 现已支持OpenAI o1 模型(`o1-mini-2024-09-12` and `o1-preview-2024-09-12`), 欢迎尝试! 🔥🔥🔥 - **\[2024.09.05\]** OpenCompass 现在支持通过模型后处理来进行答案提取,以更准确地展示模型的能力。作为此次更新的一部分,我们集成了 [XFinder](https://github.com/IAAR-Shanghai/xFinder) 作为首个后处理模型。具体信息请参阅 [文档](opencompass/utils/postprocessors/xfinder/README.md),欢迎尝试! 🔥🔥🔥 - **\[2024.08.20\]** OpenCompass 现已支持 [SciCode](https://github.com/scicode-bench/SciCode): A Research Coding Benchmark Curated by Scientists。 🔥🔥🔥 - **\[2024.08.16\]** OpenCompass 现已支持全新的长上下文语言模型评估基准——[RULER](https://arxiv.org/pdf/2404.06654)。RULER 通过灵活的配置,提供了对长上下文包括检索、多跳追踪、聚合和问答等多种任务类型的评测,欢迎访问[RULER](configs/datasets/ruler/README.md)。🔥🔥🔥 @@ -187,6 +188,9 @@ humaneval, triviaqa, commonsenseqa, tydiqa, strategyqa, cmmlu, lambada, piqa, ce # Python 脚本 opencompass ./configs/eval_api_demo.py + + + # 现已支持 o1_mini_2024_09_12/o1_preview_2024_09_12 模型, 默认情况下 max_completion_tokens=8192. ``` - ### 推理后端 diff --git a/configs/models/openai/o1_mini_2024_09_12.py b/configs/models/openai/o1_mini_2024_09_12.py new file mode 100644 index 00000000..331ecf31 --- /dev/null +++ b/configs/models/openai/o1_mini_2024_09_12.py @@ -0,0 +1,20 @@ +from opencompass.models import OpenAISDK + +api_meta_template = dict(round=[ + dict(role='HUMAN', api_role='HUMAN'), + dict(role='BOT', api_role='BOT', generate=True), +], ) + +models = [ + dict( + abbr='o1-mini-2024-09-12', + type=OpenAISDK, + path='o1-mini-2024-09-12', + key= + 'ENV', # The key will be obtained from $OPENAI_API_KEY, but you can write down your key here as well + meta_template=api_meta_template, + query_per_second=1, + batch_size=1, + temperature=1, + max_completion_tokens=8192), # you can change it for large reasoning inference cost, according to: https://platform.openai.com/docs/guides/reasoning +] diff --git a/configs/models/openai/o1_preview_2024_09_12.py b/configs/models/openai/o1_preview_2024_09_12.py new file mode 100644 index 00000000..9dff1037 --- /dev/null +++ b/configs/models/openai/o1_preview_2024_09_12.py @@ -0,0 +1,20 @@ +from opencompass.models import OpenAISDK + +api_meta_template = dict(round=[ + dict(role='HUMAN', api_role='HUMAN'), + dict(role='BOT', api_role='BOT', generate=True), +], ) + +models = [ + dict( + abbr='o1-preview-2024-09-12', + type=OpenAISDK, + path='o1-preview-2024-09-12', + key= + 'ENV', # The key will be obtained from $OPENAI_API_KEY, but you can write down your key here as well + meta_template=api_meta_template, + query_per_second=1, + batch_size=1, + temperature=1, + max_completion_tokens=8192), # you can change it for large reasoning inference cost, according to: https://platform.openai.com/docs/guides/reasoning +] diff --git a/opencompass/configs/models/openai/o1_mini_2024_09_12.py b/opencompass/configs/models/openai/o1_mini_2024_09_12.py new file mode 100644 index 00000000..331ecf31 --- /dev/null +++ b/opencompass/configs/models/openai/o1_mini_2024_09_12.py @@ -0,0 +1,20 @@ +from opencompass.models import OpenAISDK + +api_meta_template = dict(round=[ + dict(role='HUMAN', api_role='HUMAN'), + dict(role='BOT', api_role='BOT', generate=True), +], ) + +models = [ + dict( + abbr='o1-mini-2024-09-12', + type=OpenAISDK, + path='o1-mini-2024-09-12', + key= + 'ENV', # The key will be obtained from $OPENAI_API_KEY, but you can write down your key here as well + meta_template=api_meta_template, + query_per_second=1, + batch_size=1, + temperature=1, + max_completion_tokens=8192), # you can change it for large reasoning inference cost, according to: https://platform.openai.com/docs/guides/reasoning +] diff --git a/opencompass/configs/models/openai/o1_preview_2024_09_12.py b/opencompass/configs/models/openai/o1_preview_2024_09_12.py new file mode 100644 index 00000000..9dff1037 --- /dev/null +++ b/opencompass/configs/models/openai/o1_preview_2024_09_12.py @@ -0,0 +1,20 @@ +from opencompass.models import OpenAISDK + +api_meta_template = dict(round=[ + dict(role='HUMAN', api_role='HUMAN'), + dict(role='BOT', api_role='BOT', generate=True), +], ) + +models = [ + dict( + abbr='o1-preview-2024-09-12', + type=OpenAISDK, + path='o1-preview-2024-09-12', + key= + 'ENV', # The key will be obtained from $OPENAI_API_KEY, but you can write down your key here as well + meta_template=api_meta_template, + query_per_second=1, + batch_size=1, + temperature=1, + max_completion_tokens=8192), # you can change it for large reasoning inference cost, according to: https://platform.openai.com/docs/guides/reasoning +] diff --git a/opencompass/models/openai_api.py b/opencompass/models/openai_api.py index f572a846..7f306e4e 100644 --- a/opencompass/models/openai_api.py +++ b/opencompass/models/openai_api.py @@ -20,6 +20,13 @@ OPENAI_API_BASE = os.path.join( os.environ.get('OPENAI_BASE_URL', 'https://api.openai.com/v1/'), 'chat/completions') +O1_MODEL_LIST = [ + 'o1-preview-2024-09-12', + 'o1-mini-2024-09-12', + 'o1-preview', + 'o1-mini', +] + @MODELS.register_module() class OpenAI(BaseAPIModel): @@ -82,7 +89,8 @@ class OpenAI(BaseAPIModel): top_logprobs: Optional[int] = None, temperature: Optional[float] = None, tokenizer_path: Optional[str] = None, - extra_body: Optional[Dict] = None): + extra_body: Optional[Dict] = None, + max_completion_tokens: int = 16384): super().__init__(path=path, max_seq_len=max_seq_len, @@ -131,6 +139,9 @@ class OpenAI(BaseAPIModel): self.proxy_url = openai_proxy_url self.path = path + self.max_completion_tokens = max_completion_tokens + self.logger.warning( + f'Max Completion tokens for {path} is :{max_completion_tokens}') def generate(self, inputs: List[PromptType], @@ -255,16 +266,33 @@ class OpenAI(BaseAPIModel): header['OpenAI-Organization'] = self.orgs[self.org_ctr] try: - data = dict( - model=self.path, - messages=messages, - max_tokens=max_out_len, - n=1, - logprobs=self.logprobs, - top_logprobs=self.top_logprobs, - stop=None, - temperature=temperature, - ) + if self.path in O1_MODEL_LIST: + self.logger.warning( + f"'max_token' is unsupported for model {self.path}") + self.logger.warning( + f'We use max_completion_tokens:' + f'{self.max_completion_tokens}for this query') + data = dict( + model=self.path, + messages=messages, + max_completion_tokens=self.max_completion_tokens, + n=1, + logprobs=self.logprobs, + top_logprobs=self.top_logprobs, + stop=None, + temperature=temperature, + ) + else: + data = dict( + model=self.path, + messages=messages, + max_tokens=max_out_len, + n=1, + logprobs=self.logprobs, + top_logprobs=self.top_logprobs, + stop=None, + temperature=temperature, + ) if self.extra_body: data.update(self.extra_body) if isinstance(self.url, list): @@ -429,11 +457,13 @@ class OpenAISDK(OpenAI): top_logprobs: int | None = None, temperature: float | None = None, tokenizer_path: str | None = None, - extra_body: Dict | None = None): + extra_body: Dict | None = None, + max_completion_tokens: int = 16384): super().__init__(path, max_seq_len, query_per_second, rpm_verbose, retry, key, org, meta_template, openai_api_base, openai_proxy_url, mode, logprobs, top_logprobs, - temperature, tokenizer_path, extra_body) + temperature, tokenizer_path, extra_body, + max_completion_tokens) from openai import OpenAI if self.proxy_url is None: @@ -497,8 +527,23 @@ class OpenAISDK(OpenAI): num_retries = 0 while num_retries < self.retry: self.wait() - try: - responses = self.openai_client.chat.completions.create( + + if self.path in O1_MODEL_LIST: + self.logger.warning( + f"'max_token' is unsupported for model {self.path}") + self.logger.warning( + f'We use max_completion_tokens:' + f'{self.max_completion_tokens}for this query') + query_data = dict( + model=self.path, + max_completion_tokens=self.max_completion_tokens, + n=1, + temperature=self.temperature, + messages=messages, + extra_body=self.extra_body, + ) + else: + query_data = dict( model=self.path, max_tokens=max_out_len, n=1, @@ -506,6 +551,10 @@ class OpenAISDK(OpenAI): messages=messages, extra_body=self.extra_body, ) + + try: + responses = self.openai_client.chat.completions.create( + **query_data) return responses.choices[0].message.content except Exception as e: self.logger.error(e)