add_qwen_api_qwq_32b

2025-05-30 16:03:24 +08:00 · 2025-03-13 14:02:36 +08:00 · 2025-03-13 14:02:36 +08:00 · a80dcd4df7
commit a80dcd4df7
parent 4b09860f4a
3 changed files with 80 additions and 29 deletions
--- a/examples/eval_OpenHuEval_HuLifeQA.py
+++ b/examples/eval_OpenHuEval_HuLifeQA.py
@ -19,6 +19,7 @@ with read_base():

    from opencompass.configs.models.openai.o1_mini_2024_09_12 import models as o1_mini_2024_09_12_model
    from opencompass.configs.models.qwq.lmdeploy_qwq_32b_preview import models as lmdeploy_qwq_32b_preview_model
+    from opencompass.configs.models.qwq.qwq_32b import models as qwq_32b_model
    from opencompass.configs.models.deepseek.deepseek_r1_api_aliyun import models as deepseek_r1_api_aliyun_model
    from opencompass.configs.models.deepseek.deepseek_r1_distill_llama_8b_api_aliyun import models as deepseek_r1_distill_llama_8b_api_aliyun_model
    from opencompass.configs.models.deepseek.deepseek_r1_distill_qwen_7b_api_aliyun import models as deepseek_r1_distill_qwen_7b_api_aliyun_model
@ -58,6 +59,7 @@ models = [
    *gpt_4o_mini_20240718_model,
    *gpt_4o_20241120_model,
    *o1_mini_2024_09_12_model,
+    *qwq_32b_model,
    *deepseek_v3_api_aliyun_model,
    *deepseek_r1_api_aliyun_model,
    *deepseek_r1_distill_llama_8b_api_aliyun_model,
--- a/opencompass/configs/models/qwq/qwq_32b.py
+++ b/opencompass/configs/models/qwq/qwq_32b.py
@ -0,0 +1,19 @@
+from opencompass.models import Qwen
+
+api_meta_template = dict(round=[
+    dict(role='HUMAN', api_role='HUMAN'),
+    dict(role='BOT', api_role='BOT', generate=True),
+], )
+
+models = [
+    dict(
+        abbr='QwQ-32B',
+        type=Qwen,
+        path='qwq-32b',
+        key='ENV',  # The key will be obtained from $OPENAI_API_KEY, but you can write down your key here as well
+        meta_template=api_meta_template,
+        query_per_second=1,
+        max_out_len=2048,
+        max_seq_len=4096,
+        batch_size=8),
+]
--- a/opencompass/models/qwen_api.py
+++ b/opencompass/models/qwen_api.py
@ -1,3 +1,5 @@
+import os
+import random
 import time
 from concurrent.futures import ThreadPoolExecutor
 from typing import Dict, List, Optional, Union
@ -43,7 +45,16 @@ class Qwen(BaseAPIModel):
                         retry=retry,
                         generation_kwargs=generation_kwargs)
        import dashscope
-        dashscope.api_key = key
+        if isinstance(key, str):
+            if key == 'ENV':
+                if 'DASHSCOPE_API_KEY' not in os.environ:
+                    raise ValueError('DASHSCOPE API key is not set.')
+                self.keys = os.getenv('DASHSCOPE_API_KEY').split(',')
+            else:
+                self.keys = [key]
+        else:
+            self.keys = key
+        self.path = path
        self.dashscope = dashscope

    def generate(
@ -131,7 +142,9 @@ class Qwen(BaseAPIModel):
            self.acquire()
            try:
                response = self.dashscope.Generation.call(
+                    api_key=random.choice(self.keys),
                    model=self.path,
+                    stream=True,
                    **data,
                )
            except Exception as err:
@ -148,34 +161,51 @@ class Qwen(BaseAPIModel):
                # to slow down the request
                self.wait()
                continue
+            #
+            reasoning_content = ""  # 定义完整思考过程
+            answer_content = ""     # 定义完整回复
+            is_answering = False   # 判断是否结束思考过程并开始回复
+            for chunk in response:
+                if (chunk.output.choices[0].message.content == "" and
+                        chunk.output.choices[0].message.reasoning_content == ""):
+                    pass
+                else:
+                    if (chunk.output.choices[0].message.reasoning_content != "" and
+                            chunk.output.choices[0].message.content == ""):
+                        reasoning_content += chunk.output.choices[0].message.reasoning_content
+                    elif chunk.output.choices[0].message.content != "":
+                        if not is_answering:
+                            is_answering = True
+                        answer_content += chunk.output.choices[0].message.content
+            reasoning_content = '<think>' + reasoning_content + '</think>'
+            return reasoning_content + answer_content
+            # if response.status_code == 200:
+            #     try:
+            #         msg = response.output.text
+            #         self.logger.debug(msg)
+            #         return msg
+            #     except KeyError:
+            #         print(response)
+            #         self.logger.error(str(response.status_code))
+            #         time.sleep(1)
+            #         continue
+            # if response.status_code == 429:
+            #     print(response)
+            #     time.sleep(2)
+            #     continue
+            # if response.status_code == 400:
+            #     print('=' * 128)
+            #     print(response)
+            #     msg = 'Output data may contain inappropriate content.'
+            #     return msg

-            if response.status_code == 200:
-                try:
-                    msg = response.output.text
-                    self.logger.debug(msg)
-                    return msg
-                except KeyError:
-                    print(response)
-                    self.logger.error(str(response.status_code))
-                    time.sleep(1)
-                    continue
-            if response.status_code == 429:
-                print(response)
-                time.sleep(2)
-                continue
-            if response.status_code == 400:
-                print('=' * 128)
-                print(response)
-                msg = 'Output data may contain inappropriate content.'
-                return msg
-
-            if ('Range of input length should be ' in response.message
-                    or  # input too long
-                    'Input data may contain inappropriate content.'
-                    in response.message):  # bad input
-                print(response.message)
-                return ''
-            print(response)
-            max_num_retries += 1
+            # if ('Range of input length should be ' in response.message
+            #         or  # input too long
+            #         'Input data may contain inappropriate content.'
+            #         in response.message):  # bad input
+            #     print(response.message)
+            #     return ''
+            # print(response)
+            # max_num_retries += 1

        raise RuntimeError(response.message)