mirror of
https://github.com/open-compass/opencompass.git
synced 2025-05-30 16:03:24 +08:00
[Sync] update github workflow (#1156)
This commit is contained in:
parent
aa2dd2b58c
commit
62dbf04708
4
.github/scripts/pr_oc_score_assert.py
vendored
4
.github/scripts/pr_oc_score_assert.py
vendored
@ -4,7 +4,7 @@ import os
|
||||
import pytest
|
||||
|
||||
output_path = 'regression_result'
|
||||
model = 'internlm-chat-7b-hf'
|
||||
model = 'internlm2-chat-7b-hf'
|
||||
dataset = 'siqa'
|
||||
|
||||
|
||||
@ -22,7 +22,7 @@ class TestChatScore:
|
||||
|
||||
def test_model_dataset_score(self, result_scores):
|
||||
result_score = result_scores.get(model).get(dataset)
|
||||
assert_score(result_score, 73.59)
|
||||
assert_score(result_score, 79.53)
|
||||
|
||||
|
||||
def assert_score(score, baseline):
|
||||
|
5
.github/workflows/daily-run-test.yml
vendored
5
.github/workflows/daily-run-test.yml
vendored
@ -14,6 +14,9 @@ env:
|
||||
PIP_CACHE_PATH: /cpfs01/user/qa-llm-cicd/.cache/pip
|
||||
USERSPACE_PREFIX: /cpfs01/user/qa-llm-cicd
|
||||
HF_CACHE_PATH: /cpfs01/shared/public/public_hdd/llmeval/model_weights/hf_hub
|
||||
HF_DATASETS_OFFLINE: 1
|
||||
TRANSFORMERS_OFFLINE: 1
|
||||
HF_HUB_OFFLINE: 1
|
||||
|
||||
jobs:
|
||||
daily_run_test:
|
||||
@ -42,7 +45,7 @@ jobs:
|
||||
cp -r ${{env.USERSPACE_PREFIX}}/data .
|
||||
rm -rf ~/.cache/huggingface/hub -f && mkdir ~/.cache -p && mkdir ~/.cache/huggingface -p
|
||||
ln -s ${{env.HF_CACHE_PATH}} ~/.cache/huggingface/hub
|
||||
export HF_DATASETS_OFFLINE=1; export TRANSFORMERS_OFFLINE=1;
|
||||
export HF_DATASETS_OFFLINE=1; export TRANSFORMERS_OFFLINE=1; export HF_HUB_OFFLINE=1;
|
||||
- name: Run test
|
||||
run: |
|
||||
eval "$(conda shell.bash hook)"
|
||||
|
12
.github/workflows/pr-run-test.yml
vendored
12
.github/workflows/pr-run-test.yml
vendored
@ -21,6 +21,9 @@ env:
|
||||
CONDA_ENV: opencompass_base
|
||||
USERSPACE_PREFIX: /cpfs01/user/qa-llm-cicd
|
||||
HF_CACHE_PATH: /cpfs01/shared/public/public_hdd/llmeval/model_weights/hf_hub
|
||||
HF_DATASETS_OFFLINE: 1
|
||||
TRANSFORMERS_OFFLINE: 1
|
||||
HF_HUB_OFFLINE: 1
|
||||
|
||||
jobs:
|
||||
pr_run_test:
|
||||
@ -42,21 +45,20 @@ jobs:
|
||||
cp -r ${{env.USERSPACE_PREFIX}}/data .
|
||||
rm -rf ~/.cache/huggingface/hub -f && mkdir ~/.cache -p && mkdir ~/.cache/huggingface -p
|
||||
ln -s ${{env.HF_CACHE_PATH}} ~/.cache/huggingface/hub
|
||||
export HF_DATASETS_OFFLINE=1; export TRANSFORMERS_OFFLINE=1;
|
||||
- name: Run test
|
||||
run: |
|
||||
eval "$(conda shell.bash hook)"
|
||||
conda activate ${{env.CONDA_ENV}}
|
||||
conda info --envs
|
||||
rm -rf regression_result
|
||||
python3 run.py --models hf_internlm_chat_7b --datasets siqa_gen --work-dir regression_result --debug
|
||||
python3 run.py --models hf_internlm2_chat_7b --datasets siqa_gen --work-dir regression_result --debug
|
||||
- name: Get result
|
||||
run: |
|
||||
score=$(sed -n '$p' regression_result/*/summary/*.csv | awk -F ',' '{print $NF}')
|
||||
if (( ${score%.*} >= 70 && ${score%.*} <= 75 )); then
|
||||
echo "score is $score between 70 and 75"
|
||||
if (( ${score%.*} >= 75 && ${score%.*} <= 85 )); then
|
||||
echo "score is $score between 75 and 85"
|
||||
else
|
||||
echo "score is $score not between 70 and 75"
|
||||
echo "score is $score not between 75 and 85"
|
||||
exit 1
|
||||
fi
|
||||
rm -rf regression_result
|
||||
|
@ -49,7 +49,7 @@ for _name in bbh_multiple_choice_sets:
|
||||
template=f"Follow the given examples and answer the question.\n{_hint}\n\nQ: {{input}}\nA: Let's think step by step."
|
||||
),
|
||||
retriever=dict(type=ZeroRetriever),
|
||||
inferencer=dict(type=GenInferencer, max_out_len=512))
|
||||
inferencer=dict(type=GenInferencer, max_out_len=512, stopping_criteria=['Q:']))
|
||||
bbh_eval_cfg = dict(
|
||||
evaluator=dict(type=BBHEvaluator_mcq),
|
||||
pred_role='BOT',
|
||||
@ -66,6 +66,7 @@ for _name in bbh_multiple_choice_sets:
|
||||
infer_cfg=bbh_infer_cfg.copy(),
|
||||
eval_cfg=bbh_eval_cfg.copy()))
|
||||
|
||||
|
||||
for _name in bbh_free_form_sets:
|
||||
with open(os.path.join(os.path.dirname(__file__), 'lib_prompt', f'{_name}.txt'), 'r') as f:
|
||||
_hint = f.read()
|
||||
@ -75,7 +76,7 @@ for _name in bbh_free_form_sets:
|
||||
template=f"Follow the given examples and answer the question.\n{_hint}\n\nQ: {{input}}\nA: Let's think step by step."
|
||||
),
|
||||
retriever=dict(type=ZeroRetriever),
|
||||
inferencer=dict(type=GenInferencer, max_out_len=512))
|
||||
inferencer=dict(type=GenInferencer, max_out_len=512, stopping_criteria=['Q:']))
|
||||
bbh_eval_cfg = dict(evaluator=dict(type=BBHEvaluator), pred_role='BOT')
|
||||
|
||||
bbh_datasets.append(
|
@ -10,9 +10,9 @@ with read_base():
|
||||
from ..race.race_ppl_abed12 import race_datasets
|
||||
from ..winogrande.winogrande_5shot_ll_252f01 import winogrande_datasets
|
||||
from ..hellaswag.hellaswag_10shot_ppl_59c85e import hellaswag_datasets
|
||||
from ..bbh.bbh_gen_0a5495 import bbh_datasets
|
||||
from ..bbh.bbh_gen_98fba6 import bbh_datasets
|
||||
from ..gsm8k.gsm8k_gen_ee684f import gsm8k_datasets
|
||||
from ..math.math_evaluatorv2_gen_9d2049 import math_datasets
|
||||
from ..math.math_evaluatorv2_gen_2f4a71 import math_datasets
|
||||
from ..TheoremQA.TheoremQA_post_v2_gen_2c2583 import TheoremQA_datasets
|
||||
from ..humaneval.humaneval_gen_d2537e import humaneval_datasets
|
||||
from ..mbpp.deprecated_sanitized_mbpp_gen_cb43ef import sanitized_mbpp_datasets
|
||||
|
39
configs/datasets/gsm8k/gsm8k_gen_17d0dc.py
Normal file
39
configs/datasets/gsm8k/gsm8k_gen_17d0dc.py
Normal file
@ -0,0 +1,39 @@
|
||||
from opencompass.openicl.icl_prompt_template import PromptTemplate
|
||||
from opencompass.openicl.icl_retriever import ZeroRetriever
|
||||
from opencompass.openicl.icl_inferencer import GenInferencer
|
||||
from opencompass.datasets import GSM8KDataset, gsm8k_postprocess, gsm8k_dataset_postprocess, Gsm8kEvaluator
|
||||
|
||||
gsm8k_reader_cfg = dict(input_columns=['question'], output_column='answer')
|
||||
|
||||
gsm8k_infer_cfg = dict(
|
||||
prompt_template=dict(
|
||||
type=PromptTemplate,
|
||||
template=dict(
|
||||
round=[
|
||||
dict(role='HUMAN', prompt="Question: Angelo and Melanie want to plan how many hours over the next week they should study together for their test next week. They have 2 chapters of their textbook to study and 4 worksheets to memorize. They figure out that they should dedicate 3 hours to each chapter of their textbook and 1.5 hours for each worksheet. If they plan to study no more than 4 hours each day, how many days should they plan to study total over the next week if they take a 10-minute break every hour, include 3 10-minute snack breaks each day, and 30 minutes for lunch each day?\nLet's think step by step\nAnswer:"),
|
||||
dict(role='BOT', prompt='Angelo and Melanie think they should dedicate 3 hours to each of the 2 chapters, 3 hours x 2 chapters = 6 hours total.\nFor the worksheets they plan to dedicate 1.5 hours for each worksheet, 1.5 hours x 4 worksheets = 6 hours total.\nAngelo and Melanie need to start with planning 12 hours to study, at 4 hours a day, 12 / 4 = 3 days.\nHowever, they need to include time for breaks and lunch. Every hour they want to include a 10-minute break, so 12 total hours x 10 minutes = 120 extra minutes for breaks.\nThey also want to include 3 10-minute snack breaks, 3 x 10 minutes = 30 minutes.\nAnd they want to include 30 minutes for lunch each day, so 120 minutes for breaks + 30 minutes for snack breaks + 30 minutes for lunch = 180 minutes, or 180 / 60 minutes per hour = 3 extra hours.\nSo Angelo and Melanie want to plan 12 hours to study + 3 hours of breaks = 15 hours total.\nThey want to study no more than 4 hours each day, 15 hours / 4 hours each day = 3.75\nThey will need to plan to study 4 days to allow for all the time they need.\nThe answer is 4\n'),
|
||||
dict(role='HUMAN', prompt="Question: Mark's basketball team scores 25 2 pointers, 8 3 pointers and 10 free throws. Their opponents score double the 2 pointers but half the 3 pointers and free throws. What's the total number of points scored by both teams added together?\nLet's think step by step\nAnswer:"),
|
||||
dict(role='BOT', prompt="Mark's team scores 25 2 pointers, meaning they scored 25*2= 50 points in 2 pointers.\nHis team also scores 6 3 pointers, meaning they scored 8*3= 24 points in 3 pointers\nThey scored 10 free throws, and free throws count as one point so they scored 10*1=10 points in free throws.\nAll together his team scored 50+24+10= 84 points\nMark's opponents scored double his team's number of 2 pointers, meaning they scored 50*2=100 points in 2 pointers.\nHis opponents scored half his team's number of 3 pointers, meaning they scored 24/2= 12 points in 3 pointers.\nThey also scored half Mark's team's points in free throws, meaning they scored 10/2=5 points in free throws.\nAll together Mark's opponents scored 100+12+5=117 points\nThe total score for the game is both team's scores added together, so it is 84+117=201 points\nThe answer is 201\n"),
|
||||
dict(role='HUMAN', prompt="Question: Bella has two times as many marbles as frisbees. She also has 20 more frisbees than deck cards. If she buys 2/5 times more of each item, what would be the total number of the items she will have if she currently has 60 marbles?\nLet's think step by step\nAnswer:"),
|
||||
dict(role='BOT', prompt="When Bella buys 2/5 times more marbles, she'll have increased the number of marbles by 2/5*60 = 24\nThe total number of marbles she'll have is 60+24 = 84\nIf Bella currently has 60 marbles, and she has two times as many marbles as frisbees, she has 60/2 = 30 frisbees.\nIf Bella buys 2/5 times more frisbees, she'll have 2/5*30 = 12 more frisbees.\nThe total number of frisbees she'll have will increase to 30+12 = 42\nBella also has 20 more frisbees than deck cards, meaning she has 30-20 = 10 deck cards\nIf she buys 2/5 times more deck cards, she'll have 2/5*10 = 4 more deck cards.\nThe total number of deck cards she'll have is 10+4 = 14\nTogether, Bella will have a total of 14+42+84 = 140 items\nThe answer is 140\n"),
|
||||
dict(role='HUMAN', prompt="Question: A group of 4 fruit baskets contains 9 apples, 15 oranges, and 14 bananas in the first three baskets and 2 less of each fruit in the fourth basket. How many fruits are there?\nLet's think step by step\nAnswer:"),
|
||||
dict(role='BOT', prompt='For the first three baskets, the number of apples and oranges in one basket is 9+15=24\nIn total, together with bananas, the number of fruits in one basket is 24+14=38 for the first three baskets.\nSince there are three baskets each having 38 fruits, there are 3*38=114 fruits in the first three baskets.\nThe number of apples in the fourth basket is 9-2=7\nThere are also 15-2=13 oranges in the fourth basket\nThe combined number of oranges and apples in the fourth basket is 13+7=20\nThe fourth basket also contains 14-2=12 bananas.\nIn total, the fourth basket has 20+12=32 fruits.\nThe four baskets together have 32+114=146 fruits.\nThe answer is 146\n'),
|
||||
dict(role='HUMAN', prompt="Question: {question}\nLet's think step by step\nAnswer:"),
|
||||
],
|
||||
)),
|
||||
retriever=dict(type=ZeroRetriever),
|
||||
inferencer=dict(type=GenInferencer, max_out_len=512, stopping_criteria=['Question']))
|
||||
|
||||
gsm8k_eval_cfg = dict(evaluator=dict(type=Gsm8kEvaluator),
|
||||
pred_postprocessor=dict(type=gsm8k_postprocess),
|
||||
dataset_postprocessor=dict(type=gsm8k_dataset_postprocess))
|
||||
|
||||
gsm8k_datasets = [
|
||||
dict(
|
||||
abbr='gsm8k',
|
||||
type=GSM8KDataset,
|
||||
path='./data/gsm8k',
|
||||
reader_cfg=gsm8k_reader_cfg,
|
||||
infer_cfg=gsm8k_infer_cfg,
|
||||
eval_cfg=gsm8k_eval_cfg)
|
||||
]
|
30
configs/datasets/math/math_4shot_base_gen_db136b.py
Normal file
30
configs/datasets/math/math_4shot_base_gen_db136b.py
Normal file
@ -0,0 +1,30 @@
|
||||
from mmengine.config import read_base
|
||||
from opencompass.openicl.icl_prompt_template import PromptTemplate
|
||||
from opencompass.openicl.icl_retriever import ZeroRetriever
|
||||
from opencompass.openicl.icl_inferencer import GenInferencer
|
||||
from opencompass.datasets import MATHDataset, MATHEvaluator, math_postprocess_v2
|
||||
|
||||
with read_base():
|
||||
from .math_4shot_example_from_google_research import prompt
|
||||
|
||||
math_reader_cfg = dict(input_columns=['problem'], output_column='solution')
|
||||
|
||||
math_infer_cfg = dict(
|
||||
prompt_template=dict(type=PromptTemplate, template=prompt + '\n\nProblem:\n{problem}\nSolution:'),
|
||||
retriever=dict(type=ZeroRetriever),
|
||||
inferencer=dict(type=GenInferencer, max_out_len=1024, stopping_criteria=['Problem']))
|
||||
|
||||
# postprocess v2
|
||||
math_eval_cfg = dict(
|
||||
evaluator=dict(type=MATHEvaluator, version='v2'),
|
||||
pred_postprocessor=dict(type=math_postprocess_v2))
|
||||
|
||||
math_datasets = [
|
||||
dict(
|
||||
type=MATHDataset,
|
||||
abbr='math',
|
||||
path='./data/math/math.json',
|
||||
reader_cfg=math_reader_cfg,
|
||||
infer_cfg=math_infer_cfg,
|
||||
eval_cfg=math_eval_cfg)
|
||||
]
|
@ -0,0 +1,40 @@
|
||||
# Solving Quantitative Reasoning Problems with Language Models
|
||||
|
||||
prompt = '''
|
||||
Problem:
|
||||
Find the domain of the expression $\\frac{\\sqrt{x-2}}{\\sqrt{5-x}}$.
|
||||
|
||||
Solution:
|
||||
The expressions inside each square root must be non-negative. Therefore, $x-2 \\ge 0$, so $x\\ge2$, and $5 - x \\ge 0$, so $x \\le 5$. Also, the denominator cannot be equal to zero, so $5-x>0$, which gives $x<5$. Therefore, the domain of the expression is $\\boxed{[2,5)}$.
|
||||
Final Answer: The final answer is $[2,5)$. I hope it is correct.
|
||||
|
||||
Problem:
|
||||
If $\\det \\mathbf{A} = 2$ and $\\det \\mathbf{B} = 12,$ then find $\\det (\\mathbf{A} \\mathbf{B}).$
|
||||
|
||||
Solution:
|
||||
We have that $\\det (\\mathbf{A} \\mathbf{B}) = (\\det \\mathbf{A})(\\det \\mathbf{B}) = (2)(12) = \\boxed{24}.$
|
||||
Final Answer: The final answer is $24$. I hope it is correct.
|
||||
|
||||
Problem:
|
||||
Terrell usually lifts two 20-pound weights 12 times. If he uses two 15-pound weights instead, how many times must Terrell lift them in order to lift the same total weight?
|
||||
|
||||
Solution:
|
||||
If Terrell lifts two 20-pound weights 12 times, he lifts a total of $2\\cdot 12\\cdot20=480$ pounds of weight. If he lifts two 15-pound weights instead for $n$ times, he will lift a total of $2\\cdot15\\cdot n=30n$ pounds of weight. Equating this to 480 pounds, we can solve for $n$:
|
||||
\\begin{align*}
|
||||
30n&=480\\
|
||||
\\Rightarrow\\qquad n&=480/30=\\boxed{16}
|
||||
\\end{align*}
|
||||
Final Answer: The final answer is $16$. I hope it is correct.
|
||||
|
||||
Problem:
|
||||
If the system of equations
|
||||
\\begin{align*}
|
||||
6x-4y&=a,\\
|
||||
6y-9x &=b.
|
||||
\\end{align*}
|
||||
has a solution $(x, y)$ where $x$ and $y$ are both nonzero, find $\\frac{a}{b},$ assuming $b$ is nonzero.
|
||||
|
||||
Solution:
|
||||
If we multiply the first equation by $-\\frac{3}{2}$, we obtain $$6y-9x=-\\frac{3}{2}a.$$Since we also know that $6y-9x=b$, we have $$-\\frac{3}{2}a=b\\Rightarrow\\frac{a}{b}=\\boxed{-\\frac{2}{3}}.$$
|
||||
Final Answer: The final answer is $-\\frac{2}{3}$. I hope it is correct.
|
||||
'''.strip()
|
@ -38,7 +38,7 @@ Problem:
|
||||
Solution:"""
|
||||
),
|
||||
retriever=dict(type=ZeroRetriever),
|
||||
inferencer=dict(type=GenInferencer, max_out_len=512))
|
||||
inferencer=dict(type=GenInferencer, max_out_len=512, stopping_criteria=['Problem']))
|
||||
|
||||
# postprocess v2
|
||||
math_eval_cfg = dict(
|
82
configs/datasets/mbpp/sanitized_mbpp_gen_742f0c.py
Normal file
82
configs/datasets/mbpp/sanitized_mbpp_gen_742f0c.py
Normal file
@ -0,0 +1,82 @@
|
||||
from opencompass.openicl.icl_prompt_template import PromptTemplate
|
||||
from opencompass.openicl.icl_retriever import ZeroRetriever
|
||||
from opencompass.openicl.icl_inferencer import GenInferencer
|
||||
from opencompass.datasets import SanitizedMBPPDataset, MBPPEvaluator
|
||||
|
||||
sanitized_mbpp_reader_cfg = dict(input_columns=['text', 'test_list'], output_column='test_list_2')
|
||||
|
||||
prompt = '''
|
||||
You are an expert Python programmer, and here is your task: Write a function to find the similar elements from the given two tuple lists. Your code should pass these tests:
|
||||
|
||||
assert similar_elements((3, 4, 5, 6),(5, 7, 4, 10)) == (4, 5)
|
||||
assert similar_elements((1, 2, 3, 4),(5, 4, 3, 7)) == (3, 4)
|
||||
assert similar_elements((11, 12, 14, 13),(17, 15, 14, 13)) == (13, 14)
|
||||
|
||||
[BEGIN]
|
||||
'\
|
||||
def similar_elements(test_tup1, test_tup2):
|
||||
res = tuple(set(test_tup1) & set(test_tup2))
|
||||
return (res)\
|
||||
'
|
||||
[DONE]
|
||||
|
||||
|
||||
You are an expert Python programmer, and here is your task: Write a python function to identify non-prime numbers. Your code should pass these tests:
|
||||
|
||||
assert is_not_prime(2) == False
|
||||
assert is_not_prime(10) == True
|
||||
assert is_not_prime(35) == True
|
||||
|
||||
[BEGIN]
|
||||
'\
|
||||
import math
|
||||
def is_not_prime(n):
|
||||
result = False
|
||||
for i in range(2,int(math.sqrt(n)) + 1):
|
||||
if n % i == 0:
|
||||
result = True
|
||||
return result\
|
||||
'
|
||||
[DONE]
|
||||
|
||||
|
||||
You are an expert Python programmer, and here is your task: Write a function to find the largest integers from a given list of numbers using heap queue algorithm. Your code should pass these tests:
|
||||
|
||||
assert heap_queue_largest( [25, 35, 22, 85, 14, 65, 75, 22, 58],3)==[85, 75, 65]
|
||||
assert heap_queue_largest( [25, 35, 22, 85, 14, 65, 75, 22, 58],2)==[85, 75]
|
||||
assert heap_queue_largest( [25, 35, 22, 85, 14, 65, 75, 22, 58],5)==[85, 75, 65, 58, 35]
|
||||
|
||||
[BEGIN]
|
||||
'\
|
||||
import heapq as hq
|
||||
def heap_queue_largest(nums,n):
|
||||
largest_nums = hq.nlargest(n, nums)
|
||||
return largest_nums\
|
||||
'
|
||||
[DONE]
|
||||
|
||||
|
||||
You are an expert Python programmer, and here is your task: {text} Your code should pass these tests:
|
||||
|
||||
{test_list}
|
||||
|
||||
'''.strip()
|
||||
|
||||
sanitized_mbpp_infer_cfg = dict(
|
||||
prompt_template=dict(type=PromptTemplate, template=prompt),
|
||||
retriever=dict(type=ZeroRetriever),
|
||||
inferencer=dict(type=GenInferencer, max_out_len=512),
|
||||
)
|
||||
|
||||
sanitized_mbpp_eval_cfg = dict(evaluator=dict(type=MBPPEvaluator), pred_role='BOT')
|
||||
|
||||
sanitized_mbpp_datasets = [
|
||||
dict(
|
||||
type=SanitizedMBPPDataset,
|
||||
abbr='sanitized_mbpp',
|
||||
path='./data/mbpp/sanitized-mbpp.jsonl',
|
||||
reader_cfg=sanitized_mbpp_reader_cfg,
|
||||
infer_cfg=sanitized_mbpp_infer_cfg,
|
||||
eval_cfg=sanitized_mbpp_eval_cfg,
|
||||
)
|
||||
]
|
41
configs/datasets/mbpp/sanitized_mbpp_gen_a0fc46.py
Normal file
41
configs/datasets/mbpp/sanitized_mbpp_gen_a0fc46.py
Normal file
@ -0,0 +1,41 @@
|
||||
from opencompass.openicl.icl_prompt_template import PromptTemplate
|
||||
from opencompass.openicl.icl_retriever import ZeroRetriever
|
||||
from opencompass.openicl.icl_inferencer import GenInferencer
|
||||
from opencompass.datasets import SanitizedMBPPDataset, MBPPEvaluator
|
||||
|
||||
sanitized_mbpp_reader_cfg = dict(input_columns=['text', 'test_list'], output_column='test_list_2')
|
||||
|
||||
sanitized_mbpp_infer_cfg = dict(
|
||||
prompt_template=dict(
|
||||
type=PromptTemplate,
|
||||
template=dict(
|
||||
round=[
|
||||
dict(role='HUMAN', prompt='You are an expert Python programmer, and here is your task:\nWrite a function to find the similar elements from the given two tuple lists.\nYour code should pass these tests:\n\nassert similar_elements((3, 4, 5, 6),(5, 7, 4, 10)) == (4, 5)\nassert similar_elements((1, 2, 3, 4),(5, 4, 3, 7)) == (3, 4)\nassert similar_elements((11, 12, 14, 13),(17, 15, 14, 13)) == (13, 14)\n',),
|
||||
dict(role='BOT', prompt="[BEGIN]\n 'def similar_elements(test_tup1, test_tup2):\n res = tuple(set(test_tup1) & set(test_tup2))\n return (res)' \n[DONE]\n\n",),
|
||||
|
||||
dict(role='HUMAN', prompt='You are an expert Python programmer, and here is your task:\nWrite a python function to identify non-prime numbers.\nYour code should pass these tests:\n\nassert is_not_prime(2) == False\nassert is_not_prime(10) == True\nassert is_not_prime(35) == True\n',),
|
||||
dict(role='BOT', prompt="[BEGIN]\n 'import math\ndef is_not_prime(n):\n result = False\n for i in range(2,int(math.sqrt(n)) + 1):\n if n %% i == 0:\n result = True\n return result' \n[DONE]\n\n",),
|
||||
|
||||
dict(role='HUMAN', prompt='You are an expert Python programmer, and here is your task:\nWrite a function to find the largest integers from a given list of numbers using heap queue algorithm.\nYour code should pass these tests:\n\nassert heap_queue_largest( [25, 35, 22, 85, 14, 65, 75, 22, 58],3)==[85, 75, 65]\nassert heap_queue_largest( [25, 35, 22, 85, 14, 65, 75, 22, 58],2)==[85, 75]\nassert heap_queue_largest( [25, 35, 22, 85, 14, 65, 75, 22, 58],5)==[85, 75, 65, 58, 35]\n',),
|
||||
dict(role='BOT', prompt="[BEGIN]\n 'import heapq as hq\ndef heap_queue_largest(nums,n):\n largest_nums = hq.nlargest(n, nums)\n return largest_nums' \n[DONE]\n\n",),
|
||||
|
||||
dict(role='HUMAN', prompt='You are an expert Python programmer, and here is your task:\n{text}\nYour code should pass these tests:\n\n{test_list}\n',),
|
||||
],
|
||||
),
|
||||
),
|
||||
retriever=dict(type=ZeroRetriever),
|
||||
inferencer=dict(type=GenInferencer, max_out_len=512),
|
||||
)
|
||||
|
||||
sanitized_mbpp_eval_cfg = dict(evaluator=dict(type=MBPPEvaluator), pred_role='BOT')
|
||||
|
||||
sanitized_mbpp_datasets = [
|
||||
dict(
|
||||
type=SanitizedMBPPDataset,
|
||||
abbr='sanitized_mbpp',
|
||||
path='./data/mbpp/sanitized-mbpp.jsonl',
|
||||
reader_cfg=sanitized_mbpp_reader_cfg,
|
||||
infer_cfg=sanitized_mbpp_infer_cfg,
|
||||
eval_cfg=sanitized_mbpp_eval_cfg,
|
||||
)
|
||||
]
|
18
configs/models/deepseek/hf_deepseek_v2.py
Normal file
18
configs/models/deepseek/hf_deepseek_v2.py
Normal file
@ -0,0 +1,18 @@
|
||||
from opencompass.models import HuggingFaceBaseModel
|
||||
|
||||
models = [
|
||||
dict(
|
||||
type=HuggingFaceBaseModel,
|
||||
abbr='deepseek-v2-hf',
|
||||
path='deepseek-ai/DeepSeek-V2',
|
||||
max_out_len=1024,
|
||||
batch_size=4,
|
||||
model_kwargs=dict(
|
||||
device_map='sequential',
|
||||
torch_dtype='torch.bfloat16',
|
||||
max_memory={i: '75GB' for i in range(8)},
|
||||
attn_implementation='eager'
|
||||
),
|
||||
run_cfg=dict(num_gpus=4),
|
||||
)
|
||||
]
|
18
configs/models/deepseek/hf_deepseek_v2_chat.py
Normal file
18
configs/models/deepseek/hf_deepseek_v2_chat.py
Normal file
@ -0,0 +1,18 @@
|
||||
from opencompass.models import HuggingFacewithChatTemplate
|
||||
|
||||
models = [
|
||||
dict(
|
||||
type=HuggingFacewithChatTemplate,
|
||||
abbr='deepseek-v2-chat-hf',
|
||||
path='deepseek-ai/DeepSeek-V2-Chat',
|
||||
max_out_len=1024,
|
||||
batch_size=4,
|
||||
model_kwargs=dict(
|
||||
device_map='sequential',
|
||||
torch_dtype='torch.bfloat16',
|
||||
max_memory={i: '75GB' for i in range(8)},
|
||||
attn_implementation='eager'
|
||||
),
|
||||
run_cfg=dict(num_gpus=4),
|
||||
)
|
||||
]
|
23
configs/models/deepseek/lmdeploy_deepseek_series.py
Normal file
23
configs/models/deepseek/lmdeploy_deepseek_series.py
Normal file
@ -0,0 +1,23 @@
|
||||
from opencompass.models import LmdeployPytorchModel
|
||||
|
||||
settings = [
|
||||
('deepseek-7b-base-hf', 'deepseek-ai/deepseek-llm-7b-base', 1),
|
||||
('deepseek-67b-base-hf', 'deepseek-ai/deepseek-llm-67b-base', 4),
|
||||
]
|
||||
|
||||
models = []
|
||||
for abbr, path, num_gpus in settings:
|
||||
models.append(
|
||||
dict(
|
||||
type=LmdeployPytorchModel,
|
||||
abbr=abbr,
|
||||
path=path,
|
||||
engine_config=dict(session_len=2048, max_batch_size=16, tp=num_gpus),
|
||||
gen_config=dict(top_k=1, temperature=1, top_p=0.9, max_new_tokens=1024),
|
||||
max_out_len=1024,
|
||||
max_seq_len=2048,
|
||||
batch_size=16,
|
||||
concurrency=16,
|
||||
run_cfg=dict(num_gpus=num_gpus),
|
||||
)
|
||||
)
|
24
configs/models/hf_internlm/lmdeploy_internlm2_series.py
Normal file
24
configs/models/hf_internlm/lmdeploy_internlm2_series.py
Normal file
@ -0,0 +1,24 @@
|
||||
from opencompass.models import TurboMindModel
|
||||
|
||||
settings = [
|
||||
('internlm2-1.8b-turbomind', 'internlm/internlm2-1_8b', 1),
|
||||
('internlm2-7b-turbomind', 'internlm/internlm2-7b', 1),
|
||||
('internlm2-20b-turbomind', 'internlm/internlm2-20b', 2),
|
||||
]
|
||||
|
||||
models = []
|
||||
for abbr, path, num_gpus in settings:
|
||||
models.append(
|
||||
dict(
|
||||
type=TurboMindModel,
|
||||
abbr=abbr,
|
||||
path=path,
|
||||
engine_config=dict(session_len=2048, max_batch_size=16, tp=num_gpus),
|
||||
gen_config=dict(top_k=1, temperature=1, top_p=0.9, max_new_tokens=1024),
|
||||
max_out_len=1024,
|
||||
max_seq_len=2048,
|
||||
batch_size=16,
|
||||
concurrency=16,
|
||||
run_cfg=dict(num_gpus=num_gpus),
|
||||
)
|
||||
)
|
30
configs/models/hf_llama/lmdeploy_llama_series.py
Normal file
30
configs/models/hf_llama/lmdeploy_llama_series.py
Normal file
@ -0,0 +1,30 @@
|
||||
from opencompass.models import TurboMindModel
|
||||
|
||||
settings = [
|
||||
('llama-7b-turbomind', 'huggyllama/llama-7b', 1),
|
||||
('llama-13b-turbomind', 'huggyllama/llama-13b', 1),
|
||||
('llama-30b-turbomind', 'huggyllama/llama-30b', 2),
|
||||
('llama-65b-turbomind', 'huggyllama/llama-65b', 4),
|
||||
('llama-2-7b-turbomind', 'meta-llama/Llama-2-7b-hf', 1),
|
||||
('llama-2-13b-turbomind', 'meta-llama/Llama-2-13b-hf', 1),
|
||||
('llama-2-70b-turbomind', 'meta-llama/Llama-2-70b-hf', 4),
|
||||
('llama-3-8b-turbomind', 'meta-llama/Meta-Llama-3-8B', 1),
|
||||
('llama-3-70b-turbomind', 'meta-llama/Meta-Llama-3-70B', 4),
|
||||
]
|
||||
|
||||
models = []
|
||||
for abbr, path, num_gpus in settings:
|
||||
models.append(
|
||||
dict(
|
||||
type=TurboMindModel,
|
||||
abbr=abbr,
|
||||
path=path,
|
||||
engine_config=dict(session_len=2048, max_batch_size=16, tp=num_gpus),
|
||||
gen_config=dict(top_k=1, temperature=1, top_p=0.9, max_new_tokens=1024),
|
||||
max_out_len=1024,
|
||||
max_seq_len=2048,
|
||||
batch_size=16,
|
||||
concurrency=16,
|
||||
run_cfg=dict(num_gpus=num_gpus),
|
||||
)
|
||||
)
|
24
configs/models/mistral/lmdeploy_mistral_series.py
Normal file
24
configs/models/mistral/lmdeploy_mistral_series.py
Normal file
@ -0,0 +1,24 @@
|
||||
from opencompass.models import LmdeployPytorchModel
|
||||
|
||||
settings = [
|
||||
('mistral-7b-v0.1-pytorch', 'mistralai/Mistral-7B-v0.1', 1),
|
||||
('mixtral-8x7b-v0.1-pytorch', 'mistralai/Mixtral-8x7B-v0.1', 2),
|
||||
('mixtral-8x22b-v0.1-pytorch', 'mistralai/Mixtral-8x22B-v0.1', 4),
|
||||
]
|
||||
|
||||
models = []
|
||||
for abbr, path, num_gpus in settings:
|
||||
models.append(
|
||||
dict(
|
||||
type=LmdeployPytorchModel,
|
||||
abbr=abbr,
|
||||
path=path,
|
||||
engine_config=dict(session_len=2048, max_batch_size=16, tp=num_gpus),
|
||||
gen_config=dict(top_k=1, temperature=1, top_p=0.9, max_new_tokens=1024),
|
||||
max_out_len=1024,
|
||||
max_seq_len=2048,
|
||||
batch_size=16,
|
||||
concurrency=16,
|
||||
run_cfg=dict(num_gpus=num_gpus),
|
||||
)
|
||||
)
|
12
configs/models/qwen/hf_qwen1_5_110b.py
Normal file
12
configs/models/qwen/hf_qwen1_5_110b.py
Normal file
@ -0,0 +1,12 @@
|
||||
from opencompass.models import HuggingFaceBaseModel
|
||||
|
||||
models = [
|
||||
dict(
|
||||
type=HuggingFaceBaseModel,
|
||||
abbr='qwen1.5-110b-hf',
|
||||
path='Qwen/Qwen1.5-110B',
|
||||
max_out_len=1024,
|
||||
batch_size=8,
|
||||
run_cfg=dict(num_gpus=4),
|
||||
)
|
||||
]
|
12
configs/models/qwen/hf_qwen1_5_110b_chat.py
Normal file
12
configs/models/qwen/hf_qwen1_5_110b_chat.py
Normal file
@ -0,0 +1,12 @@
|
||||
from opencompass.models import HuggingFacewithChatTemplate
|
||||
|
||||
models = [
|
||||
dict(
|
||||
type=HuggingFacewithChatTemplate,
|
||||
abbr='qwen1.5-110b-chat-hf',
|
||||
path='Qwen/Qwen1.5-110B-Chat',
|
||||
max_out_len=1024,
|
||||
batch_size=8,
|
||||
run_cfg=dict(num_gpus=4),
|
||||
)
|
||||
]
|
29
configs/models/qwen/lmdeploy_qwen1_5_series.py
Normal file
29
configs/models/qwen/lmdeploy_qwen1_5_series.py
Normal file
@ -0,0 +1,29 @@
|
||||
from opencompass.models import LmdeployPytorchModel
|
||||
|
||||
settings = [
|
||||
('qwen1.5-0.5b-pytorch', 'Qwen/Qwen1.5-0.5B', 1),
|
||||
('qwen1.5-1.8b-pytorch', 'Qwen/Qwen1.5-1.8B', 1),
|
||||
('qwen1.5-4b-pytorch', 'Qwen/Qwen1.5-4B', 1),
|
||||
('qwen1.5-7b-pytorch', 'Qwen/Qwen1.5-7B', 1),
|
||||
('qwen1.5-14b-pytorch', 'Qwen/Qwen1.5-14B', 1),
|
||||
('qwen1.5-32b-pytorch', 'Qwen/Qwen1.5-32B', 2),
|
||||
('qwen1.5-72b-pytorch', 'Qwen/Qwen1.5-72B', 4),
|
||||
('qwen1.5-moe-a2.7b-pytorch', 'Qwen/Qwen1.5-MoE-A2.7B', 1),
|
||||
]
|
||||
|
||||
models = []
|
||||
for abbr, path, num_gpus in settings:
|
||||
models.append(
|
||||
dict(
|
||||
type=LmdeployPytorchModel,
|
||||
abbr=abbr,
|
||||
path=path,
|
||||
engine_config=dict(session_len=2048, max_batch_size=16, tp=num_gpus),
|
||||
gen_config=dict(top_k=1, temperature=1, top_p=0.9, max_new_tokens=1024),
|
||||
max_out_len=1024,
|
||||
max_seq_len=2048,
|
||||
batch_size=16,
|
||||
concurrency=16,
|
||||
run_cfg=dict(num_gpus=num_gpus),
|
||||
)
|
||||
)
|
25
configs/models/qwen/lmdeploy_qwen_series.py
Normal file
25
configs/models/qwen/lmdeploy_qwen_series.py
Normal file
@ -0,0 +1,25 @@
|
||||
from opencompass.models import TurboMindModel
|
||||
|
||||
settings = [
|
||||
('qwen-1.8b-turbomind', 'Qwen/Qwen-1_8B', 1),
|
||||
('qwen-7b-turbomind', 'Qwen/Qwen-7B', 1),
|
||||
('qwen-14b-turbomind', 'Qwen/Qwen-14B', 1),
|
||||
('qwen-72b-turbomind', 'Qwen/Qwen-72B', 4),
|
||||
]
|
||||
|
||||
models = []
|
||||
for abbr, path, num_gpus in settings:
|
||||
models.append(
|
||||
dict(
|
||||
type=TurboMindModel,
|
||||
abbr=abbr,
|
||||
path=path,
|
||||
engine_config=dict(session_len=2048, max_batch_size=16, tp=num_gpus),
|
||||
gen_config=dict(top_k=1, temperature=1, top_p=0.9, max_new_tokens=1024),
|
||||
max_out_len=1024,
|
||||
max_seq_len=2048,
|
||||
batch_size=16,
|
||||
concurrency=16,
|
||||
run_cfg=dict(num_gpus=num_gpus),
|
||||
)
|
||||
)
|
23
configs/models/yi/lmdeploy_yi_series.py
Normal file
23
configs/models/yi/lmdeploy_yi_series.py
Normal file
@ -0,0 +1,23 @@
|
||||
from opencompass.models import LmdeployPytorchModel
|
||||
|
||||
settings = [
|
||||
('yi-6b-pytorch', '01-ai/Yi-6B', 1),
|
||||
('yi-34b-pytorch', '01-ai/Yi-34B', 2),
|
||||
]
|
||||
|
||||
models = []
|
||||
for abbr, path, num_gpus in settings:
|
||||
models.append(
|
||||
dict(
|
||||
type=LmdeployPytorchModel,
|
||||
abbr=abbr,
|
||||
path=path,
|
||||
engine_config=dict(session_len=2048, max_batch_size=16, tp=num_gpus),
|
||||
gen_config=dict(top_k=1, temperature=1, top_p=0.9, max_new_tokens=1024),
|
||||
max_out_len=1024,
|
||||
max_seq_len=2048,
|
||||
batch_size=16,
|
||||
concurrency=16,
|
||||
run_cfg=dict(num_gpus=num_gpus),
|
||||
)
|
||||
)
|
@ -2,7 +2,7 @@
|
||||
from mmengine.config import read_base
|
||||
|
||||
with read_base():
|
||||
from .groups.cibench import cibench_summary_groups
|
||||
from .groups.legacy.cibench import cibench_summary_groups
|
||||
from .groups.plugineval import plugineval_summary_groups
|
||||
|
||||
|
||||
|
109
configs/summarizers/groups/legacy/cibench.py
Normal file
109
configs/summarizers/groups/legacy/cibench.py
Normal file
@ -0,0 +1,109 @@
|
||||
|
||||
_cibench = ['Pandas', 'Matplotlib', 'Opencv', 'SciPy', 'Seaborn', 'PyTorch']
|
||||
_cibench = ['cibench_' + i for i in _cibench]
|
||||
cibench_summary_groups = [{'name': 'cibench', 'subsets': _cibench}]
|
||||
|
||||
_cibench_template = ['lightgbm', 'matplotlib', 'nltk', 'opencv', 'pandas', 'pytorch',
|
||||
'scipy', 'seaborn', 'sklearn', 'tensorflow']
|
||||
_cibench_template = ['cibench_template/' + i for i in _cibench_template]
|
||||
# number of total exec questions in this module
|
||||
_cibench_template_weight = {
|
||||
'lightgbm': [30, 15, 0, 0],
|
||||
'matplotlib': [42, 0, 0, 36],
|
||||
'nltk': [70, 30, 20, 10],
|
||||
'opencv': [60, 10, 0, 40],
|
||||
'pandas': [60, 40, 0, 10],
|
||||
'pytorch': [28, 0, 0, 0],
|
||||
'scipy': [60, 40, 0, 0],
|
||||
'seaborn': [42, 0, 0, 35],
|
||||
'sklearn': [42, 6, 0, 18],
|
||||
'tensorflow': [36, 6, 0, 12],
|
||||
}
|
||||
cibench_summary_groups.extend([
|
||||
{
|
||||
'name': 'cibench_template:executable',
|
||||
'subsets': [[i, 'executable'] for i in _cibench_template],
|
||||
'weights': {'cibench_template/' + k : v[0] for k,v in _cibench_template_weight.items()},
|
||||
},
|
||||
{
|
||||
'name': 'cibench_template:numeric_correct',
|
||||
'subsets': [[i, 'numeric_correct'] for i in _cibench_template],
|
||||
'weights': {'cibench_template/' + k : v[1] for k,v in _cibench_template_weight.items()},
|
||||
},
|
||||
{
|
||||
'name': 'cibench_template:text_score',
|
||||
'subsets': [[i, 'text_score'] for i in _cibench_template],
|
||||
'weights': {'cibench_template/' + k : v[2] for k,v in _cibench_template_weight.items()},
|
||||
},
|
||||
{
|
||||
'name': 'cibench_template:vis_sim',
|
||||
'subsets': [[i, 'vis_sim'] for i in _cibench_template],
|
||||
'weights': {'cibench_template/' + k : v[3] for k,v in _cibench_template_weight.items()},
|
||||
},
|
||||
])
|
||||
|
||||
|
||||
## chinese
|
||||
_cibench_template_cn = ['lightgbm', 'matplotlib', 'nltk', 'opencv', 'pandas', 'pytorch',
|
||||
'scipy', 'seaborn', 'sklearn', 'tensorflow']
|
||||
_cibench_template_cn = ['cibench_template_chinese/' + i for i in _cibench_template_cn]
|
||||
cibench_summary_groups.extend([
|
||||
{
|
||||
'name': 'cibench_template_cn:executable',
|
||||
'subsets': [[i, 'executable'] for i in _cibench_template_cn],
|
||||
'weights': {'cibench_template_chinese/' + k : v[0] for k,v in _cibench_template_weight.items()},
|
||||
},
|
||||
{
|
||||
'name': 'cibench_template_cn:numeric_correct',
|
||||
'subsets': [[i, 'numeric_correct'] for i in _cibench_template_cn],
|
||||
'weights': {'cibench_template_chinese/' + k : v[1] for k,v in _cibench_template_weight.items()},
|
||||
},
|
||||
{
|
||||
'name': 'cibench_template_cn:text_score',
|
||||
'subsets': [[i, 'text_score'] for i in _cibench_template_cn],
|
||||
'weights': {'cibench_template_chinese/' + k : v[2] for k,v in _cibench_template_weight.items()},
|
||||
},
|
||||
{
|
||||
'name': 'cibench_template_cn:vis_sim',
|
||||
'subsets': [[i, 'vis_sim'] for i in _cibench_template_cn],
|
||||
'weights': {'cibench_template_chinese/' + k : v[3] for k,v in _cibench_template_weight.items()},
|
||||
},
|
||||
])
|
||||
|
||||
|
||||
## add more without nltk
|
||||
cibench_summary_groups.extend([
|
||||
{
|
||||
'name': 'cibench_template_wo_nltk:executable',
|
||||
'subsets': [[i, 'executable'] for i in _cibench_template if 'nltk' not in i],
|
||||
'weights': {'cibench_template/' + k : v[0] for k,v in _cibench_template_weight.items() if 'nltk' not in k},
|
||||
},
|
||||
{
|
||||
'name': 'cibench_template_wo_nltk:numeric_correct',
|
||||
'subsets': [[i, 'numeric_correct'] for i in _cibench_template if 'nltk' not in i],
|
||||
'weights': {'cibench_template/' + k : v[1] for k,v in _cibench_template_weight.items() if 'nltk' not in k},
|
||||
},
|
||||
{
|
||||
'name': 'cibench_template_wo_nltk:vis_sim',
|
||||
'subsets': [[i, 'vis_sim'] for i in _cibench_template if 'nltk' not in i],
|
||||
'weights': {'cibench_template/' + k : v[3] for k,v in _cibench_template_weight.items() if 'nltk' not in k},
|
||||
},
|
||||
])
|
||||
|
||||
cibench_summary_groups.extend([
|
||||
{
|
||||
'name': 'cibench_template_cn_wo_nltk:executable',
|
||||
'subsets': [[i, 'executable'] for i in _cibench_template_cn if 'nltk' not in i],
|
||||
'weights': {'cibench_template_chinese/' + k : v[0] for k,v in _cibench_template_weight.items() if 'nltk' not in k},
|
||||
},
|
||||
{
|
||||
'name': 'cibench_template_cn_wo_nltk:numeric_correct',
|
||||
'subsets': [[i, 'numeric_correct'] for i in _cibench_template_cn if 'nltk' not in i],
|
||||
'weights': {'cibench_template_chinese/' + k : v[1] for k,v in _cibench_template_weight.items() if 'nltk' not in k},
|
||||
},
|
||||
{
|
||||
'name': 'cibench_template_cn_wo_nltk:vis_sim',
|
||||
'subsets': [[i, 'vis_sim'] for i in _cibench_template_cn if 'nltk' not in i],
|
||||
'weights': {'cibench_template_chinese/' + k : v[3] for k,v in _cibench_template_weight.items() if 'nltk' not in k},
|
||||
},
|
||||
])
|
@ -170,6 +170,8 @@ def parse_dlc_args(dlc_parser):
|
||||
type=str)
|
||||
|
||||
|
||||
|
||||
|
||||
def parse_hf_args(hf_parser):
|
||||
"""These args are all for the quick construction of HuggingFace models."""
|
||||
hf_parser.add_argument('--hf-type', type=str, choices=['base', 'chat'], default='chat', help='The type of the HuggingFace model, base or chat')
|
||||
@ -212,7 +214,7 @@ def main():
|
||||
if args.work_dir is not None:
|
||||
cfg['work_dir'] = args.work_dir
|
||||
else:
|
||||
cfg.setdefault('work_dir', osp.join('outputs', 'default'))
|
||||
cfg.setdefault('work_dir', os.path.join('outputs', 'default'))
|
||||
|
||||
# cfg_time_str defaults to the current time
|
||||
cfg_time_str = dir_time_str = datetime.now().strftime('%Y%m%d_%H%M%S')
|
||||
@ -340,5 +342,6 @@ def main():
|
||||
summarizer.summarize(time_str=cfg_time_str)
|
||||
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
main()
|
||||
|
@ -7,7 +7,8 @@ from .base import BaseModel, LMTemplateParser # noqa: F401
|
||||
from .base_api import APITemplateParser, BaseAPIModel # noqa: F401
|
||||
from .bytedance_api import ByteDance # noqa: F401
|
||||
from .claude_api import Claude # noqa: F401
|
||||
from .gemini_api import Gemini, GeminiAllesAPIN # noqa: F401
|
||||
from .deepseek_api import DeepseekAPI # noqa: F401
|
||||
from .gemini_api import Gemini # noqa: F401
|
||||
from .glm import GLM130B # noqa: F401
|
||||
from .huggingface import HuggingFace # noqa: F401
|
||||
from .huggingface import HuggingFaceCausalLM # noqa: F401
|
||||
@ -21,7 +22,7 @@ from .lightllm_api import LightllmAPI # noqa: F401
|
||||
from .llama2 import Llama2, Llama2Chat # noqa: F401
|
||||
from .lmdeploy_pytorch import LmdeployPytorchModel # noqa: F401
|
||||
from .lmdeploy_tis import LmdeployTisModel # noqa: F401
|
||||
from .minimax_api import MiniMax # noqa: F401
|
||||
from .minimax_api import MiniMax, MiniMaxChatCompletionV2 # noqa: F401
|
||||
from .mistral_api import Mistral # noqa: F401
|
||||
from .mixtral import Mixtral # noqa: F401
|
||||
from .modelscope import ModelScope, ModelScopeCausalLM # noqa: F401
|
||||
@ -31,11 +32,12 @@ from .openai_api import OpenAI # noqa: F401
|
||||
from .pangu_api import PanGu # noqa: F401
|
||||
from .qwen_api import Qwen # noqa: F401
|
||||
from .sensetime_api import SenseTime # noqa: F401
|
||||
from .stepfun_api import StepFun # noqa: F401
|
||||
from .turbomind import TurboMindModel # noqa: F401
|
||||
from .turbomind_tis import TurboMindTisModel # noqa: F401
|
||||
from .unigpt_api import UniGPT # noqa: F401
|
||||
from .vllm import VLLM # noqa: F401
|
||||
from .xunfei_api import XunFei # noqa: F401
|
||||
from .xunfei_api import XunFei, XunFeiSpark # noqa: F401
|
||||
from .yayi_api import Yayi # noqa: F401
|
||||
from .zhipuai_api import ZhiPuAI # noqa: F401
|
||||
from .zhipuai_v2_api import ZhiPuV2AI # noqa: F401
|
||||
|
@ -1,4 +1,3 @@
|
||||
import time
|
||||
from concurrent.futures import ThreadPoolExecutor
|
||||
from typing import Dict, List, Optional, Union
|
||||
|
||||
@ -141,29 +140,32 @@ class AI360GPT(BaseAPIModel):
|
||||
self.wait()
|
||||
continue
|
||||
if raw_response.status_code == 200:
|
||||
try:
|
||||
msg = response['choices'][0]['message']['content'].strip()
|
||||
return msg
|
||||
|
||||
except KeyError:
|
||||
if 'error' in response:
|
||||
# tpm(token per minitue) limit
|
||||
if response['erro']['code'] == '1005':
|
||||
time.sleep(1)
|
||||
continue
|
||||
|
||||
self.logger.error('Find error message in response: ',
|
||||
str(response['error']))
|
||||
msg = response['choices'][0]['message']['content'].strip()
|
||||
self.logger.debug(f'Generated: {msg}')
|
||||
return msg
|
||||
|
||||
# sensitive content, prompt overlength, network error
|
||||
# or illegal prompt
|
||||
if (raw_response.status_code == 400
|
||||
or raw_response.status_code == 401
|
||||
or raw_response.status_code == 402
|
||||
or raw_response.status_code == 429
|
||||
or raw_response.status_code == 500):
|
||||
print(raw_response.text)
|
||||
continue
|
||||
if raw_response.status_code in [400, 401, 402, 429, 500]:
|
||||
if 'error' not in response:
|
||||
print(raw_response.status_code)
|
||||
print(raw_response.text)
|
||||
continue
|
||||
print(response)
|
||||
# tpm(token per minitue) limit
|
||||
if response['error']['code'] == '1005':
|
||||
self.logger.debug('tpm limit, ignoring')
|
||||
continue
|
||||
elif response['error']['code'] == '1001':
|
||||
msg = '参数错误:messages参数过长或max_tokens参数值过大'
|
||||
self.logger.debug(f'Generated: {msg}')
|
||||
return msg
|
||||
else:
|
||||
print(response)
|
||||
|
||||
self.logger.error('Find error message in response: ',
|
||||
str(response['error']))
|
||||
|
||||
print(raw_response)
|
||||
max_num_retries += 1
|
||||
|
||||
|
@ -145,8 +145,8 @@ class BaiChuan(BaseAPIModel):
|
||||
self.wait()
|
||||
continue
|
||||
if raw_response.status_code == 200:
|
||||
|
||||
msg = response['choices'][0]['message']['content']
|
||||
self.logger.debug(f'Generated: {msg}')
|
||||
return msg
|
||||
|
||||
if raw_response.status_code != 200:
|
||||
|
@ -53,6 +53,8 @@ class ERNIEBot(BaseAPIModel):
|
||||
self.headers = {'Content_Type': 'application/json'}
|
||||
self.secretkey = secretkey
|
||||
self.key = key
|
||||
if not url.endswith('?access_token='):
|
||||
url += '?access_token='
|
||||
self.url = url
|
||||
access_token, _ = self._generate_access_token()
|
||||
self.access_token = access_token
|
||||
@ -143,14 +145,25 @@ class ERNIEBot(BaseAPIModel):
|
||||
messages = [{'role': 'user', 'content': input}]
|
||||
else:
|
||||
messages = []
|
||||
msg_buffer, last_role = [], None
|
||||
for item in input:
|
||||
msg = {'content': item['prompt']}
|
||||
if item['role'] == 'HUMAN':
|
||||
msg['role'] = 'user'
|
||||
elif item['role'] == 'BOT':
|
||||
msg['role'] = 'assistant'
|
||||
if item['role'] == 'BOT':
|
||||
role = 'assistant'
|
||||
else: # USER or SYSTEM
|
||||
role = 'user'
|
||||
if role != last_role and last_role is not None:
|
||||
messages.append({
|
||||
'content': '\n'.join(msg_buffer),
|
||||
'role': last_role
|
||||
})
|
||||
msg_buffer = []
|
||||
msg_buffer.append(item['prompt'])
|
||||
last_role = role
|
||||
messages.append({
|
||||
'content': '\n'.join(msg_buffer),
|
||||
'role': last_role
|
||||
})
|
||||
|
||||
messages.append(msg)
|
||||
data = {'messages': messages}
|
||||
data.update(self.generation_kwargs)
|
||||
|
||||
@ -181,6 +194,7 @@ class ERNIEBot(BaseAPIModel):
|
||||
if raw_response.status_code == 200:
|
||||
try:
|
||||
msg = response['result']
|
||||
self.logger.debug(msg)
|
||||
return msg
|
||||
except KeyError:
|
||||
print(response)
|
||||
@ -188,9 +202,12 @@ class ERNIEBot(BaseAPIModel):
|
||||
if response['error_code'] == 336007:
|
||||
# exceed max length
|
||||
return ''
|
||||
|
||||
time.sleep(1)
|
||||
continue
|
||||
elif response['error_code'] == 336103:
|
||||
# prompt tokens too long
|
||||
return ''
|
||||
else:
|
||||
time.sleep(1)
|
||||
continue
|
||||
|
||||
if (response['error_code'] == 110 or response['error_code'] == 100
|
||||
or response['error_code'] == 111
|
||||
|
178
opencompass/models/deepseek_api.py
Normal file
178
opencompass/models/deepseek_api.py
Normal file
@ -0,0 +1,178 @@
|
||||
import time
|
||||
from concurrent.futures import ThreadPoolExecutor
|
||||
from typing import Dict, List, Optional, Union
|
||||
|
||||
import requests
|
||||
|
||||
from opencompass.utils.prompt import PromptList
|
||||
|
||||
from .base_api import BaseAPIModel
|
||||
|
||||
PromptType = Union[PromptList, str]
|
||||
|
||||
|
||||
class DeepseekAPI(BaseAPIModel):
|
||||
"""Model wrapper around DeepseekAPI.
|
||||
|
||||
Documentation:
|
||||
|
||||
Args:
|
||||
path (str): The name of DeepseekAPI model.
|
||||
e.g. `moonshot-v1-32k`
|
||||
key (str): Authorization key.
|
||||
query_per_second (int): The maximum queries allowed per second
|
||||
between two consecutive calls of the API. Defaults to 1.
|
||||
max_seq_len (int): Unused here.
|
||||
meta_template (Dict, optional): The model's meta prompt
|
||||
template if needed, in case the requirement of injecting or
|
||||
wrapping of any meta instructions.
|
||||
retry (int): Number of retires if the API call fails. Defaults to 2.
|
||||
"""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
path: str,
|
||||
key: str,
|
||||
url: str,
|
||||
query_per_second: int = 2,
|
||||
max_seq_len: int = 2048,
|
||||
meta_template: Optional[Dict] = None,
|
||||
retry: int = 2,
|
||||
system_prompt: str = '',
|
||||
):
|
||||
super().__init__(path=path,
|
||||
max_seq_len=max_seq_len,
|
||||
query_per_second=query_per_second,
|
||||
meta_template=meta_template,
|
||||
retry=retry)
|
||||
self.headers = {
|
||||
'Content-Type': 'application/json',
|
||||
'Authorization': 'Bearer ' + key,
|
||||
}
|
||||
self.url = url
|
||||
self.model = path
|
||||
self.system_prompt = system_prompt
|
||||
|
||||
def generate(
|
||||
self,
|
||||
inputs: List[PromptType],
|
||||
max_out_len: int = 512,
|
||||
) -> List[str]:
|
||||
"""Generate results given a list of inputs.
|
||||
|
||||
Args:
|
||||
inputs (List[PromptType]): A list of strings or PromptDicts.
|
||||
The PromptDict should be organized in OpenCompass'
|
||||
API format.
|
||||
max_out_len (int): The maximum length of the output.
|
||||
|
||||
Returns:
|
||||
List[str]: A list of generated strings.
|
||||
"""
|
||||
with ThreadPoolExecutor() as executor:
|
||||
results = list(
|
||||
executor.map(self._generate, inputs,
|
||||
[max_out_len] * len(inputs)))
|
||||
self.flush()
|
||||
return results
|
||||
|
||||
def _generate(
|
||||
self,
|
||||
input: PromptType,
|
||||
max_out_len: int = 512,
|
||||
) -> str:
|
||||
"""Generate results given an input.
|
||||
|
||||
Args:
|
||||
inputs (PromptType): A string or PromptDict.
|
||||
The PromptDict should be organized in OpenCompass'
|
||||
API format.
|
||||
max_out_len (int): The maximum length of the output.
|
||||
|
||||
Returns:
|
||||
str: The generated string.
|
||||
"""
|
||||
assert isinstance(input, (str, PromptList))
|
||||
|
||||
if isinstance(input, str):
|
||||
messages = [{'role': 'user', 'content': input}]
|
||||
else:
|
||||
messages = []
|
||||
msg_buffer, last_role = [], None
|
||||
for item in input:
|
||||
item['role'] = 'assistant' if item['role'] == 'BOT' else 'user'
|
||||
if item['role'] != last_role and last_role is not None:
|
||||
messages.append({
|
||||
'content': '\n'.join(msg_buffer),
|
||||
'role': last_role
|
||||
})
|
||||
msg_buffer = []
|
||||
msg_buffer.append(item['prompt'])
|
||||
last_role = item['role']
|
||||
messages.append({
|
||||
'content': '\n'.join(msg_buffer),
|
||||
'role': last_role
|
||||
})
|
||||
|
||||
if self.system_prompt:
|
||||
system = {'role': 'system', 'content': self.system_prompt}
|
||||
messages.insert(0, system)
|
||||
|
||||
data = {'model': self.model, 'messages': messages}
|
||||
|
||||
max_num_retries = 0
|
||||
while max_num_retries < self.retry:
|
||||
self.acquire()
|
||||
try:
|
||||
raw_response = requests.request('POST',
|
||||
url=self.url,
|
||||
headers=self.headers,
|
||||
json=data)
|
||||
except Exception as err:
|
||||
print('Request Error:{}'.format(err))
|
||||
time.sleep(2)
|
||||
continue
|
||||
|
||||
try:
|
||||
response = raw_response.json()
|
||||
except Exception as err:
|
||||
print('Response Error:{}'.format(err))
|
||||
response = None
|
||||
self.release()
|
||||
|
||||
if response is None:
|
||||
print('Connection error, reconnect.')
|
||||
# if connect error, frequent requests will casuse
|
||||
# continuous unstable network, therefore wait here
|
||||
# to slow down the request
|
||||
self.wait()
|
||||
continue
|
||||
|
||||
if raw_response.status_code == 200:
|
||||
# msg = json.load(response.text)
|
||||
# response
|
||||
msg = response['choices'][0]['message']['content']
|
||||
self.logger.debug(f'Generated: {msg}')
|
||||
return msg
|
||||
|
||||
if raw_response.status_code == 401:
|
||||
print('请求被拒绝 api_key错误')
|
||||
continue
|
||||
elif raw_response.status_code == 400:
|
||||
print(messages, response)
|
||||
print('请求失败,状态码:', raw_response)
|
||||
msg = 'The request was rejected because high risk'
|
||||
return msg
|
||||
elif raw_response.status_code == 429:
|
||||
print(messages, response)
|
||||
print('请求失败,状态码:', raw_response)
|
||||
time.sleep(5)
|
||||
continue
|
||||
else:
|
||||
print(messages, response)
|
||||
print('请求失败,状态码:', raw_response)
|
||||
time.sleep(1)
|
||||
|
||||
max_num_retries += 1
|
||||
|
||||
raise RuntimeError(raw_response)
|
@ -186,66 +186,3 @@ class Gemini(BaseAPIModel):
|
||||
time.sleep(1)
|
||||
|
||||
raise RuntimeError('API call failed.')
|
||||
|
||||
|
||||
class GeminiAllesAPIN(Gemini):
|
||||
"""Model wrapper around Gemini models.
|
||||
|
||||
Documentation:
|
||||
|
||||
Args:
|
||||
path (str): The name of Gemini model.
|
||||
e.g. `gemini-pro`
|
||||
key (str): Authorization key.
|
||||
query_per_second (int): The maximum queries allowed per second
|
||||
between two consecutive calls of the API. Defaults to 1.
|
||||
max_seq_len (int): Unused here.
|
||||
meta_template (Dict, optional): The model's meta prompt
|
||||
template if needed, in case the requirement of injecting or
|
||||
wrapping of any meta instructions.
|
||||
retry (int): Number of retires if the API call fails. Defaults to 2.
|
||||
"""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
path: str,
|
||||
key: str,
|
||||
url: str,
|
||||
query_per_second: int = 2,
|
||||
max_seq_len: int = 2048,
|
||||
meta_template: Optional[Dict] = None,
|
||||
retry: int = 2,
|
||||
temperature: float = 1.0,
|
||||
top_p: float = 0.8,
|
||||
top_k: float = 10.0,
|
||||
):
|
||||
super().__init__(key=key,
|
||||
path=path,
|
||||
max_seq_len=max_seq_len,
|
||||
query_per_second=query_per_second,
|
||||
meta_template=meta_template,
|
||||
retry=retry)
|
||||
# Replace the url and headers into AllesApin
|
||||
self.url = url
|
||||
self.headers = {
|
||||
'alles-apin-token': key,
|
||||
'content-type': 'application/json',
|
||||
}
|
||||
|
||||
def generate(
|
||||
self,
|
||||
inputs: List[PromptType],
|
||||
max_out_len: int = 512,
|
||||
) -> List[str]:
|
||||
"""Generate results given a list of inputs.
|
||||
|
||||
Args:
|
||||
inputs (List[PromptType]): A list of strings or PromptDicts.
|
||||
The PromptDict should be organized in OpenCompass'
|
||||
API format.
|
||||
max_out_len (int): The maximum length of the output.
|
||||
|
||||
Returns:
|
||||
List[str]: A list of generated strings.
|
||||
"""
|
||||
return super().generate(inputs, max_out_len)
|
||||
|
@ -289,13 +289,13 @@ class HuggingFace(BaseModel):
|
||||
tokens = self.tokenizer.batch_encode_plus(inputs,
|
||||
padding=True,
|
||||
truncation=True,
|
||||
max_length=self.max_seq_len -
|
||||
max_out_len)
|
||||
max_length=self.max_seq_len)
|
||||
tokens = {
|
||||
k: torch.tensor(np.array(tokens[k]), device=self.model.device)
|
||||
for k in tokens if k in ['input_ids', 'attention_mask']
|
||||
}
|
||||
|
||||
origin_stopping_criteria = stopping_criteria
|
||||
if stopping_criteria:
|
||||
# Construct huggingface stopping criteria
|
||||
if self.tokenizer.eos_token is not None:
|
||||
@ -332,6 +332,9 @@ class HuggingFace(BaseModel):
|
||||
|
||||
if self.end_str:
|
||||
decodeds = [token.split(self.end_str)[0] for token in decodeds]
|
||||
if origin_stopping_criteria:
|
||||
for t in origin_stopping_criteria:
|
||||
decodeds = [token.split(t)[0] for token in decodeds]
|
||||
return decodeds
|
||||
|
||||
def _single_generate(self,
|
||||
@ -382,6 +385,7 @@ class HuggingFace(BaseModel):
|
||||
max_length=self.max_seq_len -
|
||||
max_out_len)['input_ids']
|
||||
input_ids = torch.tensor(input_ids, device=self.model.device)
|
||||
origin_stopping_criteria = stopping_criteria
|
||||
if stopping_criteria:
|
||||
# Construct huggingface stopping criteria
|
||||
if self.tokenizer.eos_token is not None:
|
||||
@ -419,6 +423,9 @@ class HuggingFace(BaseModel):
|
||||
|
||||
if self.end_str:
|
||||
decodeds = [token.split(self.end_str)[0] for token in decodeds]
|
||||
if origin_stopping_criteria:
|
||||
for t in origin_stopping_criteria:
|
||||
decodeds = [token.split(t)[0] for token in decodeds]
|
||||
return decodeds
|
||||
|
||||
def get_logits(self, inputs: List[str]):
|
||||
|
@ -180,3 +180,173 @@ class MiniMax(BaseAPIModel):
|
||||
max_num_retries += 1
|
||||
|
||||
raise RuntimeError(response.text)
|
||||
|
||||
|
||||
class MiniMaxChatCompletionV2(BaseAPIModel):
|
||||
"""Model wrapper around MiniMax ChatCompletionV2.
|
||||
|
||||
Documentation:
|
||||
|
||||
Args:
|
||||
path (str): The name of MiniMax model.
|
||||
e.g. `moonshot-v1-32k`
|
||||
key (str): Authorization key.
|
||||
query_per_second (int): The maximum queries allowed per second
|
||||
between two consecutive calls of the API. Defaults to 1.
|
||||
max_seq_len (int): Unused here.
|
||||
meta_template (Dict, optional): The model's meta prompt
|
||||
template if needed, in case the requirement of injecting or
|
||||
wrapping of any meta instructions.
|
||||
retry (int): Number of retires if the API call fails. Defaults to 2.
|
||||
"""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
path: str,
|
||||
key: str,
|
||||
url: str,
|
||||
query_per_second: int = 2,
|
||||
max_seq_len: int = 2048,
|
||||
meta_template: Optional[Dict] = None,
|
||||
retry: int = 2,
|
||||
):
|
||||
super().__init__(path=path,
|
||||
max_seq_len=max_seq_len,
|
||||
query_per_second=query_per_second,
|
||||
meta_template=meta_template,
|
||||
retry=retry)
|
||||
self.headers = {
|
||||
'Content-Type': 'application/json',
|
||||
'Authorization': 'Bearer ' + key,
|
||||
}
|
||||
self.url = url
|
||||
self.model = path
|
||||
|
||||
def generate(
|
||||
self,
|
||||
inputs: List[PromptType],
|
||||
max_out_len: int = 512,
|
||||
) -> List[str]:
|
||||
"""Generate results given a list of inputs.
|
||||
|
||||
Args:
|
||||
inputs (List[PromptType]): A list of strings or PromptDicts.
|
||||
The PromptDict should be organized in OpenCompass'
|
||||
API format.
|
||||
max_out_len (int): The maximum length of the output.
|
||||
|
||||
Returns:
|
||||
List[str]: A list of generated strings.
|
||||
"""
|
||||
with ThreadPoolExecutor() as executor:
|
||||
results = list(
|
||||
executor.map(self._generate, inputs,
|
||||
[max_out_len] * len(inputs)))
|
||||
self.flush()
|
||||
return results
|
||||
|
||||
def _generate(
|
||||
self,
|
||||
input: PromptType,
|
||||
max_out_len: int = 512,
|
||||
) -> str:
|
||||
"""Generate results given an input.
|
||||
|
||||
Args:
|
||||
inputs (PromptType): A string or PromptDict.
|
||||
The PromptDict should be organized in OpenCompass'
|
||||
API format.
|
||||
max_out_len (int): The maximum length of the output.
|
||||
|
||||
Returns:
|
||||
str: The generated string.
|
||||
"""
|
||||
assert isinstance(input, (str, PromptList))
|
||||
|
||||
if isinstance(input, str):
|
||||
messages = [{'role': 'user', 'content': input}]
|
||||
else:
|
||||
messages = []
|
||||
msg_buffer, last_role = [], None
|
||||
for item in input:
|
||||
item['role'] = 'assistant' if item['role'] == 'BOT' else 'user'
|
||||
if item['role'] != last_role and last_role is not None:
|
||||
messages.append({
|
||||
'content': '\n'.join(msg_buffer),
|
||||
'role': last_role
|
||||
})
|
||||
msg_buffer = []
|
||||
msg_buffer.append(item['prompt'])
|
||||
last_role = item['role']
|
||||
messages.append({
|
||||
'content': '\n'.join(msg_buffer),
|
||||
'role': last_role
|
||||
})
|
||||
|
||||
data = {
|
||||
'model': self.model,
|
||||
'messages': messages,
|
||||
'max_tokens': max_out_len
|
||||
}
|
||||
|
||||
max_num_retries = 0
|
||||
while max_num_retries < self.retry:
|
||||
self.acquire()
|
||||
try:
|
||||
raw_response = requests.request('POST',
|
||||
url=self.url,
|
||||
headers=self.headers,
|
||||
json=data)
|
||||
except Exception as err:
|
||||
print('Request Error:{}'.format(err))
|
||||
time.sleep(2)
|
||||
continue
|
||||
|
||||
response = raw_response.json()
|
||||
self.release()
|
||||
|
||||
if response is None:
|
||||
print('Connection error, reconnect.')
|
||||
# if connect error, frequent requests will casuse
|
||||
# continuous unstable network, therefore wait here
|
||||
# to slow down the request
|
||||
self.wait()
|
||||
continue
|
||||
|
||||
if raw_response.status_code == 200:
|
||||
try:
|
||||
msg = response['choices'][0]['message']['content']
|
||||
self.logger.debug(f'Generated: {msg}')
|
||||
return msg
|
||||
except Exception:
|
||||
code = response.get('base_resp', {}).get('status_code')
|
||||
if code == 1002:
|
||||
# rate limit
|
||||
time.sleep(1)
|
||||
continue
|
||||
elif code == 1027:
|
||||
return 'The request was rejected because high risk'
|
||||
print(messages, response)
|
||||
pass
|
||||
|
||||
elif raw_response.status_code == 401:
|
||||
print('请求被拒绝 api_key错误')
|
||||
continue
|
||||
elif raw_response.status_code == 400:
|
||||
print(messages, response)
|
||||
print('请求失败,状态码:', raw_response)
|
||||
msg = 'The request was rejected because high risk'
|
||||
return msg
|
||||
elif raw_response.status_code == 429:
|
||||
print(messages, response)
|
||||
print('请求失败,状态码:', raw_response)
|
||||
time.sleep(5)
|
||||
continue
|
||||
else:
|
||||
print(messages, response)
|
||||
print('请求失败,状态码:', raw_response)
|
||||
time.sleep(1)
|
||||
|
||||
max_num_retries += 1
|
||||
|
||||
raise RuntimeError(raw_response)
|
||||
|
@ -152,8 +152,7 @@ class Qwen(BaseAPIModel):
|
||||
if response.status_code == 200:
|
||||
try:
|
||||
msg = response.output.text
|
||||
print('=' * 128)
|
||||
print(msg)
|
||||
self.logger.debug(msg)
|
||||
return msg
|
||||
except KeyError:
|
||||
print(response)
|
||||
|
182
opencompass/models/stepfun_api.py
Normal file
182
opencompass/models/stepfun_api.py
Normal file
@ -0,0 +1,182 @@
|
||||
import time
|
||||
from concurrent.futures import ThreadPoolExecutor
|
||||
from typing import Dict, List, Optional, Union
|
||||
|
||||
import requests
|
||||
|
||||
from opencompass.utils.prompt import PromptList
|
||||
|
||||
from .base_api import BaseAPIModel
|
||||
|
||||
PromptType = Union[PromptList, str]
|
||||
|
||||
|
||||
class StepFun(BaseAPIModel):
|
||||
"""Model wrapper around StepFun.
|
||||
|
||||
Documentation:
|
||||
|
||||
Args:
|
||||
path (str): The name of StepFun model.
|
||||
e.g. `moonshot-v1-32k`
|
||||
key (str): Authorization key.
|
||||
query_per_second (int): The maximum queries allowed per second
|
||||
between two consecutive calls of the API. Defaults to 1.
|
||||
max_seq_len (int): Unused here.
|
||||
meta_template (Dict, optional): The model's meta prompt
|
||||
template if needed, in case the requirement of injecting or
|
||||
wrapping of any meta instructions.
|
||||
retry (int): Number of retires if the API call fails. Defaults to 2.
|
||||
"""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
path: str,
|
||||
key: str,
|
||||
url: str,
|
||||
query_per_second: int = 2,
|
||||
max_seq_len: int = 2048,
|
||||
meta_template: Optional[Dict] = None,
|
||||
retry: int = 2,
|
||||
system_prompt: str = '',
|
||||
):
|
||||
super().__init__(path=path,
|
||||
max_seq_len=max_seq_len,
|
||||
query_per_second=query_per_second,
|
||||
meta_template=meta_template,
|
||||
retry=retry)
|
||||
self.headers = {
|
||||
'Content-Type': 'application/json',
|
||||
'Authorization': 'Bearer ' + key,
|
||||
}
|
||||
self.url = url
|
||||
self.model = path
|
||||
self.system_prompt = system_prompt
|
||||
|
||||
def generate(
|
||||
self,
|
||||
inputs: List[PromptType],
|
||||
max_out_len: int = 512,
|
||||
) -> List[str]:
|
||||
"""Generate results given a list of inputs.
|
||||
|
||||
Args:
|
||||
inputs (List[PromptType]): A list of strings or PromptDicts.
|
||||
The PromptDict should be organized in OpenCompass'
|
||||
API format.
|
||||
max_out_len (int): The maximum length of the output.
|
||||
|
||||
Returns:
|
||||
List[str]: A list of generated strings.
|
||||
"""
|
||||
with ThreadPoolExecutor() as executor:
|
||||
results = list(
|
||||
executor.map(self._generate, inputs,
|
||||
[max_out_len] * len(inputs)))
|
||||
self.flush()
|
||||
return results
|
||||
|
||||
def _generate(
|
||||
self,
|
||||
input: PromptType,
|
||||
max_out_len: int = 512,
|
||||
) -> str:
|
||||
"""Generate results given an input.
|
||||
|
||||
Args:
|
||||
inputs (PromptType): A string or PromptDict.
|
||||
The PromptDict should be organized in OpenCompass'
|
||||
API format.
|
||||
max_out_len (int): The maximum length of the output.
|
||||
|
||||
Returns:
|
||||
str: The generated string.
|
||||
"""
|
||||
assert isinstance(input, (str, PromptList))
|
||||
|
||||
if isinstance(input, str):
|
||||
messages = [{'role': 'user', 'content': input}]
|
||||
else:
|
||||
messages = []
|
||||
msg_buffer, last_role = [], None
|
||||
for item in input:
|
||||
item['role'] = 'assistant' if item['role'] == 'BOT' else 'user'
|
||||
if item['role'] != last_role and last_role is not None:
|
||||
messages.append({
|
||||
'content': '\n'.join(msg_buffer),
|
||||
'role': last_role
|
||||
})
|
||||
msg_buffer = []
|
||||
msg_buffer.append(item['prompt'])
|
||||
last_role = item['role']
|
||||
messages.append({
|
||||
'content': '\n'.join(msg_buffer),
|
||||
'role': last_role
|
||||
})
|
||||
|
||||
if self.system_prompt:
|
||||
system = {'role': 'system', 'content': self.system_prompt}
|
||||
messages.insert(0, system)
|
||||
|
||||
data = {'model': self.model, 'messages': messages}
|
||||
|
||||
max_num_retries = 0
|
||||
while max_num_retries < self.retry:
|
||||
self.acquire()
|
||||
try:
|
||||
raw_response = requests.request('POST',
|
||||
url=self.url,
|
||||
headers=self.headers,
|
||||
json=data)
|
||||
except Exception as err:
|
||||
print('Request Error:{}'.format(err))
|
||||
time.sleep(2)
|
||||
continue
|
||||
|
||||
try:
|
||||
response = raw_response.json()
|
||||
except Exception:
|
||||
response = None
|
||||
self.release()
|
||||
|
||||
if response is None:
|
||||
print('Connection error, reconnect.')
|
||||
# if connect error, frequent requests will casuse
|
||||
# continuous unstable network, therefore wait here
|
||||
# to slow down the request
|
||||
self.wait()
|
||||
continue
|
||||
|
||||
if raw_response.status_code == 200:
|
||||
# msg = json.load(response.text)
|
||||
# response
|
||||
msg = response['choices'][0]['message']['content']
|
||||
self.logger.debug(f'Generated: {msg}')
|
||||
return msg
|
||||
|
||||
if raw_response.status_code == 400:
|
||||
print(messages, response)
|
||||
print('请求失败,状态码:', raw_response)
|
||||
msg = 'The context length exceeded'
|
||||
return msg
|
||||
elif raw_response.status_code == 403:
|
||||
print('请求被拒绝 api_key错误')
|
||||
continue
|
||||
elif raw_response.status_code == 429:
|
||||
print(messages, response)
|
||||
print('请求失败,状态码:', raw_response)
|
||||
time.sleep(5)
|
||||
continue
|
||||
elif raw_response.status_code == 451:
|
||||
print(messages, response)
|
||||
print('请求失败,状态码:', raw_response)
|
||||
msg = 'The request was rejected because high risk'
|
||||
return msg
|
||||
else:
|
||||
print(messages, response)
|
||||
print('请求失败,状态码:', raw_response)
|
||||
time.sleep(1)
|
||||
|
||||
max_num_retries += 1
|
||||
|
||||
raise RuntimeError(raw_response)
|
@ -55,9 +55,6 @@ class TurboMindModel(BaseModel):
|
||||
if engine_config is not None:
|
||||
from lmdeploy.messages import TurbomindEngineConfig
|
||||
engine_config = TurbomindEngineConfig(**engine_config)
|
||||
if gen_config is not None:
|
||||
from lmdeploy.messages import EngineGenerationConfig
|
||||
gen_config = EngineGenerationConfig(**gen_config)
|
||||
self.logger = get_logger()
|
||||
tm_model = TurboMind.from_pretrained(path, engine_config=engine_config)
|
||||
self.tokenizer = tm_model.tokenizer
|
||||
@ -106,6 +103,7 @@ class TurboMindModel(BaseModel):
|
||||
t = self.tokenizer.encode(t, add_bos=False)
|
||||
stop_words.append(t[0])
|
||||
gen_config['stop_words'] = list(set(stop_words))
|
||||
gen_config.setdefault('min_new_tokens', 1)
|
||||
|
||||
from lmdeploy.messages import EngineGenerationConfig
|
||||
gen_config = EngineGenerationConfig(**gen_config)
|
||||
@ -123,6 +121,9 @@ class TurboMindModel(BaseModel):
|
||||
[gen_config] * len(batch_input),
|
||||
))
|
||||
results += _results
|
||||
if stopping_criteria:
|
||||
for s in stopping_criteria:
|
||||
results = [r.split(s)[0] for r in results]
|
||||
return results
|
||||
|
||||
def get_token_len(self, prompt: str) -> int:
|
||||
|
@ -1,4 +1,6 @@
|
||||
import json
|
||||
import re
|
||||
import time
|
||||
from concurrent.futures import ThreadPoolExecutor
|
||||
from typing import Dict, List, Optional, Union
|
||||
|
||||
@ -221,3 +223,150 @@ class XunFei(BaseAPIModel):
|
||||
if err_code == 10013:
|
||||
return err_data['header']['message']
|
||||
raise RuntimeError(f'Code: {err_code}, data: {err_data}')
|
||||
|
||||
|
||||
class XunFeiSpark(BaseAPIModel):
|
||||
"""Model wrapper around XunFeiSpark.
|
||||
|
||||
Documentation:
|
||||
|
||||
Args:
|
||||
path (str): The name of XunFeiSpark model.
|
||||
e.g. `moonshot-v1-32k`
|
||||
key (str): Authorization key.
|
||||
query_per_second (int): The maximum queries allowed per second
|
||||
between two consecutive calls of the API. Defaults to 1.
|
||||
max_seq_len (int): Unused here.
|
||||
meta_template (Dict, optional): The model's meta prompt
|
||||
template if needed, in case the requirement of injecting or
|
||||
wrapping of any meta instructions.
|
||||
retry (int): Number of retires if the API call fails. Defaults to 2.
|
||||
"""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
path: str,
|
||||
url: str,
|
||||
app_id: str,
|
||||
api_key: str,
|
||||
api_secret: str,
|
||||
query_per_second: int = 2,
|
||||
max_seq_len: int = 2048,
|
||||
meta_template: Optional[Dict] = None,
|
||||
retry: int = 2,
|
||||
):
|
||||
super().__init__(path=path,
|
||||
max_seq_len=max_seq_len,
|
||||
query_per_second=query_per_second,
|
||||
meta_template=meta_template,
|
||||
retry=retry)
|
||||
try:
|
||||
from sparkai.llm.llm import ChatSparkLLM # noqa: F401
|
||||
except ImportError:
|
||||
raise ImportError('run `pip install --upgrade spark_ai_python`')
|
||||
|
||||
self.spark_domain = path
|
||||
self.url = url
|
||||
self.app_id = app_id
|
||||
self.api_key = api_key
|
||||
self.api_secret = api_secret
|
||||
|
||||
def generate(
|
||||
self,
|
||||
inputs: List[PromptType],
|
||||
max_out_len: int = 512,
|
||||
) -> List[str]:
|
||||
results = [self._generate(input, max_out_len) for input in inputs]
|
||||
return results
|
||||
|
||||
def _generate(
|
||||
self,
|
||||
input: PromptType,
|
||||
max_out_len: int = 512,
|
||||
) -> str:
|
||||
assert isinstance(input, (str, PromptList))
|
||||
|
||||
from sparkai.core.messages import ChatMessage
|
||||
from sparkai.llm.llm import ChatSparkLLM
|
||||
|
||||
if isinstance(input, str):
|
||||
messages = [ChatMessage(role='user', content=input)]
|
||||
else:
|
||||
messages = []
|
||||
msg_buffer, last_role = [], None
|
||||
for index, item in enumerate(input):
|
||||
if index == 0 and item['role'] == 'SYSTEM':
|
||||
role = 'system'
|
||||
elif item['role'] == 'BOT':
|
||||
role = 'assistant'
|
||||
else:
|
||||
role = 'user'
|
||||
|
||||
if role != last_role and last_role is not None:
|
||||
content = '\n'.join(msg_buffer)
|
||||
messages.append(
|
||||
ChatMessage(role=last_role, content=content))
|
||||
msg_buffer = []
|
||||
|
||||
msg_buffer.append(item['prompt'])
|
||||
last_role = role
|
||||
|
||||
content = '\n'.join(msg_buffer)
|
||||
messages.append(ChatMessage(role=last_role, content=content))
|
||||
|
||||
spark = ChatSparkLLM(
|
||||
spark_api_url=self.url,
|
||||
spark_app_id=self.app_id,
|
||||
spark_api_key=self.api_key,
|
||||
spark_api_secret=self.api_secret,
|
||||
spark_llm_domain=self.spark_domain,
|
||||
streaming=False,
|
||||
max_tokens=max_out_len,
|
||||
)
|
||||
|
||||
all_empty_response = True
|
||||
for _ in range(self.retry + 1):
|
||||
try:
|
||||
outputs = spark.generate([messages]).generations[0]
|
||||
if len(outputs) == 0:
|
||||
self.logger.error('Empty response, retrying...')
|
||||
continue
|
||||
msg = outputs[0].text
|
||||
self.logger.debug(f'Generated: {msg}')
|
||||
return msg
|
||||
except ConnectionError as e:
|
||||
match = re.match(r'Error Code: (\d+), Error: (.*)',
|
||||
e.args[0],
|
||||
flags=re.DOTALL)
|
||||
if match:
|
||||
error_code = int(match.group(1))
|
||||
msg = match.group(2)
|
||||
if error_code == 10003: # query data exceed limit
|
||||
self.logger.error(f'Error {error_code}: {msg}')
|
||||
return msg
|
||||
elif error_code in [10013, 10014]: # skip safety problem
|
||||
self.logger.debug(f'Generated: {msg}')
|
||||
return msg
|
||||
elif error_code == 10020: # plugin result is empty
|
||||
self.logger.error(f'Error {error_code}: {msg}')
|
||||
return msg
|
||||
elif error_code == 11202: # qps limit
|
||||
time.sleep(1)
|
||||
else:
|
||||
self.logger.error(f'Error {error_code}: {msg}')
|
||||
raise e
|
||||
raise e
|
||||
except TimeoutError:
|
||||
self.logger.error('TimeoutError, sleep 60, retrying...')
|
||||
time.sleep(60)
|
||||
except Exception as e:
|
||||
self.logger.error(str(e))
|
||||
pass
|
||||
|
||||
all_empty_response = False
|
||||
|
||||
if all_empty_response:
|
||||
self.logger.error('All empty response')
|
||||
return 'all empty response'
|
||||
|
||||
raise RuntimeError('Failed to generate response')
|
||||
|
@ -141,7 +141,7 @@ class DLCRunner(BaseRunner):
|
||||
|
||||
hf_offline = self.aliyun_cfg.get('hf_offline', True)
|
||||
if hf_offline:
|
||||
shell_cmd += 'export HF_DATASETS_OFFLINE=1; export TRANSFORMERS_OFFLINE=1; export HF_EVALUATE_OFFLINE=1; ' # noqa: E501
|
||||
shell_cmd += 'export HF_DATASETS_OFFLINE=1; export TRANSFORMERS_OFFLINE=1; export HF_EVALUATE_OFFLINE=1; export HF_HUB_OFFLINE=1; ' # noqa: E501
|
||||
|
||||
http_proxy = self.aliyun_cfg.get('http_proxy')
|
||||
if http_proxy is not None:
|
||||
@ -158,6 +158,7 @@ class DLCRunner(BaseRunner):
|
||||
shell_cmd += f'export {extra_env}; '
|
||||
|
||||
shell_cmd += f'cd {pwd}; '
|
||||
shell_cmd += 'umask 0000; '
|
||||
shell_cmd += '{task_cmd}'
|
||||
|
||||
tmpl = ('dlc create job'
|
||||
@ -195,7 +196,10 @@ class DLCRunner(BaseRunner):
|
||||
index_to_start = 0
|
||||
while index_to_start < num_retry_to_start:
|
||||
index_to_start += 1
|
||||
output = subprocess.getoutput(cmd)
|
||||
try:
|
||||
output = subprocess.getoutput(cmd)
|
||||
except BlockingIOError:
|
||||
output = ''
|
||||
match = re.search(r'\|\s+(dlc[0-9a-z]+)\s+\|', output)
|
||||
if match is None:
|
||||
stdout.write('Failed to get job id from output:')
|
||||
@ -264,7 +268,10 @@ class DLCRunner(BaseRunner):
|
||||
f" -c {self.aliyun_cfg['dlc_config_path']}"
|
||||
f' --start_time {pri_time}'
|
||||
f' --end_time {cur_time}')
|
||||
log_output = subprocess.getoutput(logs_cmd)
|
||||
try:
|
||||
log_output = subprocess.getoutput(logs_cmd)
|
||||
except BlockingIOError:
|
||||
log_output = '[WARN] No logs found for the pod'
|
||||
|
||||
if '[WARN] No logs found for the pod' not in log_output:
|
||||
pri_time = cur_time
|
||||
|
@ -46,17 +46,19 @@ class LocalRunner(BaseRunner):
|
||||
lark_bot_url (str): Lark bot url.
|
||||
"""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
task: ConfigDict,
|
||||
max_num_workers: int = 16,
|
||||
debug: bool = False,
|
||||
max_workers_per_gpu: int = 1,
|
||||
lark_bot_url: str = None,
|
||||
):
|
||||
def __init__(self,
|
||||
task: ConfigDict,
|
||||
max_num_workers: int = 16,
|
||||
debug: bool = False,
|
||||
max_workers_per_gpu: int = 1,
|
||||
lark_bot_url: str = None,
|
||||
**kwargs):
|
||||
super().__init__(task=task, debug=debug, lark_bot_url=lark_bot_url)
|
||||
self.max_num_workers = max_num_workers
|
||||
self.max_workers_per_gpu = max_workers_per_gpu
|
||||
logger = get_logger()
|
||||
for k, v in kwargs.items():
|
||||
logger.warning(f'Ignored argument in {self.__module__}: {k}={v}')
|
||||
|
||||
def launch(self, tasks: List[Dict[str, Any]]) -> List[Tuple[str, int]]:
|
||||
"""Launch multiple tasks.
|
||||
|
@ -94,11 +94,11 @@ def first_option_postprocess(text: str, options: str, cushion=True) -> str:
|
||||
f'答案是\s?(\S+)(?:。|$)',
|
||||
f'答案应该是\s?(\S+)(?:。|$)',
|
||||
f'答案为\s?(\S+)(?:。|$)',
|
||||
f'[Tt]he answer is \(?([{options}])\)?',
|
||||
f'[Tt]he answer is option \(?([{options}])\)?',
|
||||
f'[Tt]he correct answer is \(?([{options}])\)?',
|
||||
f'[Tt]he correct answer is option \(?([{options}])\)?',
|
||||
f'[Tt]he answer to the question is \(?([{options}])\)?',
|
||||
f'[Tt]he answer is:?\s+\(?([{options}])\)?',
|
||||
f'[Tt]he answer is option:?\s+\(?([{options}])\)?',
|
||||
f'[Tt]he correct answer is:?\s+\(?([{options}])\)?',
|
||||
f'[Tt]he correct answer is option:?\s+\(?([{options}])\)?',
|
||||
f'[Tt]he answer to the question is:?\s+\(?([{options}])\)?',
|
||||
f'^选项\s?([{options}])',
|
||||
f'^([{options}])\s?选?项',
|
||||
f'(\s|^)[{options}][\s。,,::\.$]',
|
||||
@ -116,7 +116,7 @@ def first_option_postprocess(text: str, options: str, cushion=True) -> str:
|
||||
if cushion:
|
||||
patterns.extend(cushion_patterns)
|
||||
for pattern in patterns:
|
||||
match = re.search(pattern, text)
|
||||
match = re.search(pattern, text, re.DOTALL)
|
||||
if match:
|
||||
outputs = match.group(0)
|
||||
for i in options:
|
||||
|
Loading…
Reference in New Issue
Block a user