mirror of
https://github.com/open-compass/opencompass.git
synced 2025-05-30 16:03:24 +08:00
[Feature] Support OpenAI ChatCompletion (#1389)
* [Feature] Support import configs/models/summarizers from whl * Update * Update openai sdk * Update * Update gemma
This commit is contained in:
parent
07c96ac659
commit
c09fc79ba8
@ -93,13 +93,16 @@ repos:
|
||||
files: ^configs/datasets
|
||||
- repo: local
|
||||
hooks:
|
||||
- id: update-dataset-suffix
|
||||
name: dataset suffix updater
|
||||
- id: update-dataset-suffix-pacakge
|
||||
name: dataset suffix updater(package)
|
||||
entry: ./tools/update_dataset_suffix.py
|
||||
language: script
|
||||
pass_filenames: true
|
||||
require_serial: true
|
||||
files: ^opencompass/configs/datasets
|
||||
args:
|
||||
- --root_folder
|
||||
- opencompass/configs/datasets
|
||||
- repo: local
|
||||
hooks:
|
||||
- id: compare-configs-datasets
|
||||
@ -123,7 +126,7 @@ repos:
|
||||
- configs/models
|
||||
- opencompass/configs/models
|
||||
- --ignore
|
||||
- configs/models/llama
|
||||
- llama
|
||||
- repo: local
|
||||
hooks:
|
||||
- id: compare-configs-summarizers
|
||||
|
@ -96,15 +96,16 @@ repos:
|
||||
files: ^configs/datasets
|
||||
- repo: local
|
||||
hooks:
|
||||
- id: update-dataset-suffix-package
|
||||
- id: update-dataset-suffix-pacakge
|
||||
name: dataset suffix updater(package)
|
||||
entry: ./tools/update_dataset_suffix.py
|
||||
language: script
|
||||
pass_filenames: false
|
||||
pass_filenames: true
|
||||
require_serial: true
|
||||
files: ^opencompass/configs/datasets
|
||||
args:
|
||||
- --root_folder
|
||||
- opencompass/configs/datasets
|
||||
files: \.py$
|
||||
- repo: local
|
||||
hooks:
|
||||
- id: compare-configs-datasets
|
||||
@ -128,7 +129,7 @@ repos:
|
||||
- configs/models
|
||||
- opencompass/configs/models
|
||||
- --ignore
|
||||
- configs/models/llama
|
||||
- llama
|
||||
- repo: local
|
||||
hooks:
|
||||
- id: compare-configs-summarizers
|
||||
|
@ -70,6 +70,7 @@ Just like a compass guides us on our journey, OpenCompass will guide you through
|
||||
|
||||
## 🚀 What's New <a><img width="35" height="20" src="https://user-images.githubusercontent.com/12782558/212848161-5e783dd6-11e8-4fe0-bbba-39ffb77730be.png"></a>
|
||||
|
||||
- **\[2024.08.01\]** We supported the [Gemma2](https://huggingface.co/collections/google/gemma-2-release-667d6600fd5220e7b967f315) models. Welcome to try! 🔥🔥🔥
|
||||
- **\[2024.07.23\]** We supported the [ModelScope](www.modelscope.cn) datasets, you can load them on demand without downloading all the data to your local disk. Welcome to try! 🔥🔥🔥
|
||||
- **\[2024.07.17\]** We have released the example data and configuration for the CompassBench-202408, welcome to [CompassBench](https://opencompass.readthedocs.io/zh-cn/latest/advanced_guides/compassbench_intro.html) for more details. 🔥🔥🔥
|
||||
- **\[2024.07.17\]** We are excited to announce the release of NeedleBench's [technical report](http://arxiv.org/abs/2407.11963). We invite you to visit our [support documentation](https://opencompass.readthedocs.io/en/latest/advanced_guides/needleinahaystack_eval.html) for detailed evaluation guidelines. 🔥🔥🔥
|
||||
|
@ -69,6 +69,7 @@
|
||||
|
||||
## 🚀 最新进展 <a><img width="35" height="20" src="https://user-images.githubusercontent.com/12782558/212848161-5e783dd6-11e8-4fe0-bbba-39ffb77730be.png"></a>
|
||||
|
||||
- **\[2024.07.23\]** 我们支持了[Gemma2](https://huggingface.co/collections/google/gemma-2-release-667d6600fd5220e7b967f315)模型,欢迎试用!🔥🔥🔥
|
||||
- **\[2024.07.23\]** 我们支持了[ModelScope](www.modelscope.cn)数据集,您可以按需加载,无需事先下载全部数据到本地,欢迎试用!🔥🔥🔥
|
||||
- **\[2024.07.17\]** 我们发布了CompassBench-202408榜单的示例数据和评测规则,敬请访问 [CompassBench](https://opencompass.readthedocs.io/zh-cn/latest/advanced_guides/compassbench_intro.html) 获取更多信息。 🔥🔥🔥
|
||||
- **\[2024.07.17\]** 我们正式发布 NeedleBench 的[技术报告](http://arxiv.org/abs/2407.11963)。诚邀您访问我们的[帮助文档](https://opencompass.readthedocs.io/zh-cn/latest/advanced_guides/needleinahaystack_eval.html)进行评估。🔥🔥🔥
|
||||
|
15
configs/models/gemma/hf_gemma2_27b.py
Normal file
15
configs/models/gemma/hf_gemma2_27b.py
Normal file
@ -0,0 +1,15 @@
|
||||
from opencompass.models import HuggingFaceBaseModel
|
||||
|
||||
models = [
|
||||
dict(
|
||||
type=HuggingFaceBaseModel,
|
||||
abbr='gemma2-27b-hf',
|
||||
path='google/gemma-2-27b',
|
||||
max_out_len=1024,
|
||||
batch_size=4,
|
||||
run_cfg=dict(num_gpus=2),
|
||||
model_kwargs=dict(
|
||||
torch_dtype='torch.bfloat16',
|
||||
),
|
||||
)
|
||||
]
|
16
configs/models/gemma/hf_gemma2_27b_it.py
Normal file
16
configs/models/gemma/hf_gemma2_27b_it.py
Normal file
@ -0,0 +1,16 @@
|
||||
from opencompass.models import HuggingFacewithChatTemplate
|
||||
|
||||
models = [
|
||||
dict(
|
||||
type=HuggingFacewithChatTemplate,
|
||||
abbr='gemma2-27b-it-hf',
|
||||
path='google/gemma-2-27b-it',
|
||||
max_out_len=2048,
|
||||
batch_size=2,
|
||||
run_cfg=dict(num_gpus=2),
|
||||
stop_words=['<end_of_turn>'],
|
||||
model_kwargs=dict(
|
||||
torch_dtype='torch.bfloat16',
|
||||
)
|
||||
)
|
||||
]
|
15
configs/models/gemma/hf_gemma2_2b.py
Normal file
15
configs/models/gemma/hf_gemma2_2b.py
Normal file
@ -0,0 +1,15 @@
|
||||
from opencompass.models import HuggingFaceBaseModel
|
||||
|
||||
models = [
|
||||
dict(
|
||||
type=HuggingFaceBaseModel,
|
||||
abbr='gemma2-2b-hf',
|
||||
path='google/gemma-2-2b',
|
||||
max_out_len=1024,
|
||||
batch_size=4,
|
||||
run_cfg=dict(num_gpus=1),
|
||||
model_kwargs=dict(
|
||||
torch_dtype='torch.bfloat16',
|
||||
),
|
||||
)
|
||||
]
|
16
configs/models/gemma/hf_gemma2_2b_it.py
Normal file
16
configs/models/gemma/hf_gemma2_2b_it.py
Normal file
@ -0,0 +1,16 @@
|
||||
from opencompass.models import HuggingFacewithChatTemplate
|
||||
|
||||
models = [
|
||||
dict(
|
||||
type=HuggingFacewithChatTemplate,
|
||||
abbr='gemma2-2b-it-hf',
|
||||
path='google/gemma-2-2b-it',
|
||||
max_out_len=2048,
|
||||
batch_size=4,
|
||||
run_cfg=dict(num_gpus=1),
|
||||
stop_words=['<end_of_turn>'],
|
||||
model_kwargs=dict(
|
||||
torch_dtype='torch.bfloat16',
|
||||
)
|
||||
)
|
||||
]
|
15
configs/models/gemma/hf_gemma2_9b.py
Normal file
15
configs/models/gemma/hf_gemma2_9b.py
Normal file
@ -0,0 +1,15 @@
|
||||
from opencompass.models import HuggingFaceBaseModel
|
||||
|
||||
models = [
|
||||
dict(
|
||||
type=HuggingFaceBaseModel,
|
||||
abbr='gemma2-9b-hf',
|
||||
path='google/gemma-2-9b',
|
||||
max_out_len=1024,
|
||||
batch_size=4,
|
||||
run_cfg=dict(num_gpus=1),
|
||||
model_kwargs=dict(
|
||||
torch_dtype='torch.bfloat16',
|
||||
),
|
||||
)
|
||||
]
|
16
configs/models/gemma/hf_gemma2_9b_it.py
Normal file
16
configs/models/gemma/hf_gemma2_9b_it.py
Normal file
@ -0,0 +1,16 @@
|
||||
from opencompass.models import HuggingFacewithChatTemplate
|
||||
|
||||
models = [
|
||||
dict(
|
||||
type=HuggingFacewithChatTemplate,
|
||||
abbr='gemma2-9b-it-hf',
|
||||
path='google/gemma-2-9b-it',
|
||||
max_out_len=2048,
|
||||
batch_size=2,
|
||||
run_cfg=dict(num_gpus=1),
|
||||
stop_words=['<end_of_turn>'],
|
||||
model_kwargs=dict(
|
||||
torch_dtype='torch.bfloat16',
|
||||
)
|
||||
)
|
||||
]
|
117
opencompass/configs/datasets/calm/README.md
Normal file
117
opencompass/configs/datasets/calm/README.md
Normal file
@ -0,0 +1,117 @@
|
||||
# CaLM Lite
|
||||
**CaLM Lite** is a lightweight version of CaLM.
|
||||
|
||||
**Ca**usal evaluation of **L**anguage **M**odels (CaLM), to the best of our knowledge, is the first comprehensive benchmark for evaluating the causal reasoning capabilities of language models. The CaLM framework establishes a foundational taxonomy consisting of four modules: causal target (i.e., what to evaluate), adaptation (i.e., how to obtain the results), metric (i.e., how to measure the results), and error (i.e., how to analyze the bad results).
|
||||
|
||||
<div align="center">
|
||||
|
||||
[🌐 Website](https://opencausalab.github.io/CaLM) |
|
||||
[📃 Report](https://arxiv.org/abs/2405.00622) |[ 🎆 Github](https://github.com/OpenCausaLab/CaLM) | 📧 Welcome to join us by email at causalai@pjlab.org.cn
|
||||
</div>
|
||||
|
||||
## Quick Start
|
||||
### Data Preparation
|
||||
Download dataset to data/ folder.
|
||||
```
|
||||
wget https://github.com/OpenCausaLab/CaLM/releases/download/v1.0.0.lite/calm.zip
|
||||
unzip calm.zip
|
||||
```
|
||||
### Run Model and Infer
|
||||
To obtain a concise output with only the average information for all tasks, use:
|
||||
|
||||
```
|
||||
python run.py --models YOUR_MODEL --datasets calm --summarizer calm
|
||||
```
|
||||
|
||||
If you want detailed information for each task, use:
|
||||
|
||||
```
|
||||
python run.py --models YOUR_MODEL --datasets calm
|
||||
```
|
||||
|
||||
The `--summarizer calm` flag in the first command is used to generate a summarized output, while omitting it in the second command will provide task-specific details.
|
||||
## Available Causal Tasks
|
||||
We provide 92 tasks for causal evaluation, stored in the `data/calm` folder. For more information about our causal tasks, refer to [tasks](https://github.com/OpenCausaLab/CaLM/blob/main/documents/tasks.md).
|
||||
The directory structure is:
|
||||
|
||||
```
|
||||
├── calm
|
||||
| ├── association
|
||||
| ├── causal_discovery # Rung of the causal ladder
|
||||
| │ ├── abstract_reasoning # Causal scenario
|
||||
| │ │ ├── AR-B_CaLM-AR_CN.json # Causal task
|
||||
| │ | └── AR-B_CaLM-AR_EN.json # Causal task
|
||||
| │ └── ...
|
||||
| └── ...
|
||||
└── ...
|
||||
```
|
||||
|
||||
## Dataset
|
||||
- **Dataset size**: CaLM Lite leverages a light dataset of **9200**, while CaLM uses a significantly larger dataset of 126,334. The table below details the English dataset composition, with the Chinese version structured identically.
|
||||
- **Dataset configuration**: We prioritize balance in our dataset for **binary classification** and **choice selection** questions. By ensuring an equal number of each GT label, we minimize the risk of introducing bias into the model's testing. For **probability calculation**, CaLM-Lite takes extra attention to balance the number of problems across different causal reasoning processes. (For more details on how causal reasoning process is defined, please refer to Section 9.1.6 of the [paper](https://arxiv.org/abs/2405.00622).)
|
||||
- **Efficient evaluation**: For enhanced evaluation efficiency, OpenCompass offers customizable methods. Refer to the [documentation](https://opencompass.org.cn/doc) for guidance on tailoring these methods to your needs.
|
||||
|
||||
| Causal ladder | Causal scenario | Subset | Question type | Mode | CaLM Lite | CaLM |
|
||||
|---------------|-----------------|--------|---------------|------|-----------|------|
|
||||
| Causal discovery | PCD | E-CARE | Binary classification | Natural | 100 | 2000 |
|
||||
| Causal discovery | PCD | E-CARE | Choice selection | Natural | 100 | 1000 |
|
||||
| Causal discovery | PCD | COPA | Binary classification | Natural | 100 | 2000 |
|
||||
| Causal discovery | PCD | COPA | Choice selection | Natural | 100 | 1000 |
|
||||
| Causal discovery | ECI | CTB | Binary classification | Natural | 100 | 596 |
|
||||
| Causal discovery | ECI | ESC | Binary classification | Natural | 100 | 1000 |
|
||||
| Causal discovery | ECI | MAVEN-ERE | Binary classification | Natural | 100 | 1000 |
|
||||
| Causal discovery | AR | CaLM-AR | Binary classification | Symbolic | 100 | 1600 |
|
||||
| Causal discovery | CA | FP | Binary classification | Symbolic | 100 | 1600 |
|
||||
| Causal discovery | CA | FA | Binary classification | Symbolic | 100 | 1600 |
|
||||
| Association | CORR | correlation | Binary classification | Natural | 100 | 1476 |
|
||||
| Association | EAE | exp-away | Binary classification | Natural | 100 | 168 |
|
||||
| Intervention | CB | collider-bias | Binary classification | Natural | 100 | 163 |
|
||||
| Intervention | ATE | ATE-natural | Binary classification | Natural | 100 | 1600 |
|
||||
| Intervention | ATE | ATE-basic | Probability calculation | Mathematical | 100 | 1600 |
|
||||
| Intervention | ATE | ATE-hard | Probability calculation | Mathematical | 100 | 1600 |
|
||||
| Intervention | CDE | CDE-natural | Binary classification | Natural | 100 | 1600 |
|
||||
| Intervention | CDE | CDE-basic | Probability calculation | Mathematical | 100 | 1600 |
|
||||
| Intervention | CDE | CDE-hard | Probability calculation | Mathematical | 100 | 1600 |
|
||||
| Intervention | BAS | backadj | Binary classification | Natural | 100 | 227 |
|
||||
| Intervention | BAS | max-BAS | Choice selection | Symbolic | 100 | 1600 |
|
||||
| Intervention | BAS | min-BAS | Choice selection | Symbolic | 100 | 1600 |
|
||||
| Intervention | BAS | mix-BAS | Choice selection | Symbolic | 100 | 1600 |
|
||||
| Intervention | FAS | FAS | Choice selection | Symbolic | 100 | 1600 |
|
||||
| Intervention | IV | CaLM-IV | Choice selection | Symbolic | 100 | 1600 |
|
||||
| Intervention | CEI | 0.2-UC | Binary classification | Symbolic | 100 | 1600 |
|
||||
| Intervention | CEI | 0.4-UC | Binary classification | Symbolic | 100 | 1600 |
|
||||
| Intervention | CEI | 0.6-UC | Binary classification | Symbolic | 100 | 1600 |
|
||||
| Intervention | CEI | 0.8-UC | Binary classification | Symbolic | 100 | 1600 |
|
||||
| Counterfactuals | ETT | ETT-natural | Binary classification | Natural | 100 | 1600 |
|
||||
| Counterfactuals | ETT | ETT-basic | Probability calculation | Mathematical | 100 | 1600 |
|
||||
| Counterfactuals | ETT | ETT-hard | Probability calculation | Mathematical | 100 | 1600 |
|
||||
| Counterfactuals | NDE | NDE-natural | Binary classification | Natural | 100 | 1600 |
|
||||
| Counterfactuals | NDE | NDE-basic | Probability calculation | Mathematical | 100 | 1600 |
|
||||
| Counterfactuals | NDE | NDE-hard | Probability calculation | Mathematical | 100 | 1600 |
|
||||
| Counterfactuals | NIE | NIE-natural | Binary classification | Natural | 100 | 1600 |
|
||||
| Counterfactuals | NIE | NIE-basic | Probability calculation | Mathematical | 100 | 1600 |
|
||||
| Counterfactuals | NIE | NIE-hard | Probability calculation | Mathematical | 100 | 1600 |
|
||||
| Counterfactuals | PN | PN-basic | Probability calculation | Mathematical | 100 | 1600 |
|
||||
| Counterfactuals | PN | PN-hard | Probability calculation | Mathematical | 100 | 1600 |
|
||||
| Counterfactuals | PS | PS-basic | Probability calculation | Mathematical | 100 | 1600 |
|
||||
| Counterfactuals | PS | PS-hard | Probability calculation | Mathematical | 100 | 1600 |
|
||||
| Counterfactuals | AC | causal judgement | Binary classification | Natural | 100 | 187 |
|
||||
| Counterfactuals | CR | CRASS | Choice selection | Natural | 100 | 274 |
|
||||
| Counterfactuals | CR | det-counterfactual | Binary classification | Natural | 100 | 1476 |
|
||||
| Counterfactuals | CEG | E-CARE | Open-ended generation | Natural | 100 | 1000 |
|
||||
| **Total** | | | | | 4600 | 63167 |
|
||||
|
||||
## Available Prompt Styles (Adaptation)
|
||||
Basic Prompt is our default setting for efficient evaluation of CaLM Lite, but we provide flexibility for exploring additional prompts through CaLM. If you'd like to explore and compare a wider range of prompts, we encourage you to use CaLM. We provide a comprehensive and easy-to-follow guide to assist you in our [repository](https://github.com/OpenCausaLab/CaLM).
|
||||
|
||||
## Citation
|
||||
```
|
||||
@misc{chen2024causal,
|
||||
title={Causal Evaluation of Language Models},
|
||||
author={Sirui Chen and Bo Peng and Meiqi Chen and Ruiqi Wang and Mengying Xu and Xingyu Zeng and Rui Zhao and Shengjie Zhao and Yu Qiao and Chaochao Lu},
|
||||
year={2024},
|
||||
eprint={2405.00622},
|
||||
archivePrefix={arXiv},
|
||||
primaryClass={cs.CL}
|
||||
}
|
||||
```
|
160
opencompass/configs/datasets/calm/calm.py
Normal file
160
opencompass/configs/datasets/calm/calm.py
Normal file
@ -0,0 +1,160 @@
|
||||
from opencompass.openicl.icl_prompt_template import PromptTemplate
|
||||
from opencompass.openicl.icl_retriever import ZeroRetriever
|
||||
from opencompass.openicl.icl_inferencer import GenInferencer
|
||||
from opencompass.datasets import CaLMDataset, CaLMEvaluator
|
||||
|
||||
task_hiearchy_dict = {
|
||||
# association/
|
||||
# correlation/
|
||||
'CORR-B_correlation_CN':'association/correlation/',
|
||||
'CORR-B_correlation_EN':'association/correlation/',
|
||||
# explaining_away_effect/
|
||||
'EAE-B_exp-away_CN':'association/explaining_away_effect/',
|
||||
'EAE-B_exp-away_EN':'association/explaining_away_effect/',
|
||||
# causal_discovery/
|
||||
# abstract_reasoning/
|
||||
'AR-B_CaLM-AR_CN':'causal_discovery/abstract_reasoning/',
|
||||
'AR-B_CaLM-AR_EN':'causal_discovery/abstract_reasoning/',
|
||||
# causal_attribution/
|
||||
'CA-B_FA_CN':'causal_discovery/causal_attribution/',
|
||||
'CA-B_FA_EN':'causal_discovery/causal_attribution/',
|
||||
'CA-B_FP_CN':'causal_discovery/causal_attribution/',
|
||||
'CA-B_FP_EN':'causal_discovery/causal_attribution/',
|
||||
# event_causality_identification/
|
||||
'ECI-B_CTB_CN':'causal_discovery/event_causality_identification/',
|
||||
'ECI-B_CTB_EN':'causal_discovery/event_causality_identification/',
|
||||
'ECI-B_ESC_CN':'causal_discovery/event_causality_identification/',
|
||||
'ECI-B_ESC_EN':'causal_discovery/event_causality_identification/',
|
||||
'ECI-B_MAVEN-ERE_CN':'causal_discovery/event_causality_identification/',
|
||||
'ECI-B_MAVEN-ERE_EN':'causal_discovery/event_causality_identification/',
|
||||
# pairwise_causal_discovery/
|
||||
'PCD-B_COPA_CN':'causal_discovery/pairwise_causal_discovery/',
|
||||
'PCD-B_COPA_EN':'causal_discovery/pairwise_causal_discovery/',
|
||||
'PCD-B_E-CARE_CN':'causal_discovery/pairwise_causal_discovery/',
|
||||
'PCD-B_E-CARE_EN':'causal_discovery/pairwise_causal_discovery/',
|
||||
'PCD-C_COPA_CN':'causal_discovery/pairwise_causal_discovery/',
|
||||
'PCD-C_COPA_EN':'causal_discovery/pairwise_causal_discovery/',
|
||||
'PCD-C_E-CARE_CN':'causal_discovery/pairwise_causal_discovery/',
|
||||
'PCD-C_E-CARE_EN':'causal_discovery/pairwise_causal_discovery/',
|
||||
# counterfactual/
|
||||
# actual_causality/
|
||||
'AC-B_causal_judgement_CN':'counterfactual/actual_causality/',
|
||||
'AC-B_causal_judgement_EN':'counterfactual/actual_causality/',
|
||||
# causal_explanation_generation/
|
||||
'CEG-O_E-CARE_CN':'counterfactual/causal_explanation_generation/',
|
||||
'CEG-O_E-CARE_EN':'counterfactual/causal_explanation_generation/',
|
||||
# counterfactual_reasoning/
|
||||
'CR-B_det-counterfactual_CN':'counterfactual/counterfactual_reasoning/',
|
||||
'CR-B_det-counterfactual_EN':'counterfactual/counterfactual_reasoning/',
|
||||
'CR-C_CRASS_CN':'counterfactual/counterfactual_reasoning/',
|
||||
'CR-C_CRASS_EN':'counterfactual/counterfactual_reasoning/',
|
||||
# effect_of_the_treatment_on_the_treated/
|
||||
'ETT-B_ETT-natural_CN':'counterfactual/effect_of_the_treatment_on_the_treated/',
|
||||
'ETT-B_ETT-natural_EN':'counterfactual/effect_of_the_treatment_on_the_treated/',
|
||||
'ETT-P_ETT-basic_CN':'counterfactual/effect_of_the_treatment_on_the_treated/',
|
||||
'ETT-P_ETT-basic_EN':'counterfactual/effect_of_the_treatment_on_the_treated/',
|
||||
'ETT-P_ETT-hard_CN':'counterfactual/effect_of_the_treatment_on_the_treated/',
|
||||
'ETT-P_ETT-hard_EN':'counterfactual/effect_of_the_treatment_on_the_treated/',
|
||||
# natural_direct_effect/
|
||||
'NDE-B_NDE-natural_CN':'counterfactual/natural_direct_effect/',
|
||||
'NDE-B_NDE-natural_EN':'counterfactual/natural_direct_effect/',
|
||||
'NDE-P_NDE-basic_CN':'counterfactual/natural_direct_effect/',
|
||||
'NDE-P_NDE-basic_EN':'counterfactual/natural_direct_effect/',
|
||||
'NDE-P_NDE-hard_CN':'counterfactual/natural_direct_effect/',
|
||||
'NDE-P_NDE-hard_EN':'counterfactual/natural_direct_effect/',
|
||||
# natural_indirect_effect/
|
||||
'NIE-B_NIE-natural_CN':'counterfactual/natural_indirect_effect/',
|
||||
'NIE-B_NIE-natural_EN':'counterfactual/natural_indirect_effect/',
|
||||
'NIE-P_NIE-basic_CN':'counterfactual/natural_indirect_effect/',
|
||||
'NIE-P_NIE-basic_EN':'counterfactual/natural_indirect_effect/',
|
||||
'NIE-P_NIE-hard_CN':'counterfactual/natural_indirect_effect/',
|
||||
'NIE-P_NIE-hard_EN':'counterfactual/natural_indirect_effect/',
|
||||
# probability_of_necessity/
|
||||
'PN-P_PN-basic_CN':'counterfactual/probability_of_necessity/',
|
||||
'PN-P_PN-basic_EN':'counterfactual/probability_of_necessity/',
|
||||
'PN-P_PN-hard_CN':'counterfactual/probability_of_necessity/',
|
||||
'PN-P_PN-hard_EN':'counterfactual/probability_of_necessity/',
|
||||
# probability_of_sufficiency/
|
||||
'PS-P_PS-basic_CN':'counterfactual/probability_of_sufficiency/',
|
||||
'PS-P_PS-basic_EN':'counterfactual/probability_of_sufficiency/',
|
||||
'PS-P_PS-hard_CN':'counterfactual/probability_of_sufficiency/',
|
||||
'PS-P_PS-hard_EN':'counterfactual/probability_of_sufficiency/',
|
||||
# intervention/
|
||||
# average_treatment_effect/
|
||||
'ATE-B_ATE-natural_CN':'intervention/average_treatment_effect/',
|
||||
'ATE-B_ATE-natural_EN':'intervention/average_treatment_effect/',
|
||||
'ATE-P_ATE-basic_CN':'intervention/average_treatment_effect/',
|
||||
'ATE-P_ATE-basic_EN':'intervention/average_treatment_effect/',
|
||||
'ATE-P_ATE-hard_CN':'intervention/average_treatment_effect/',
|
||||
'ATE-P_ATE-hard_EN':'intervention/average_treatment_effect/',
|
||||
# backdoor_adjustment_set/
|
||||
'BAS-B_backadj_CN':'intervention/backdoor_adjustment_set/',
|
||||
'BAS-B_backadj_EN':'intervention/backdoor_adjustment_set/',
|
||||
'BAS-C_max-BAS_CN':'intervention/backdoor_adjustment_set/',
|
||||
'BAS-C_max-BAS_EN':'intervention/backdoor_adjustment_set/',
|
||||
'BAS-C_min-BAS_CN':'intervention/backdoor_adjustment_set/',
|
||||
'BAS-C_min-BAS_EN':'intervention/backdoor_adjustment_set/',
|
||||
'BAS-C_mix-BAS_CN':'intervention/backdoor_adjustment_set/',
|
||||
'BAS-C_mix-BAS_EN':'intervention/backdoor_adjustment_set/',
|
||||
# causal_effect_identification/
|
||||
'CEI-B_0.2-UC_CN':'intervention/causal_effect_identification/',
|
||||
'CEI-B_0.2-UC_EN':'intervention/causal_effect_identification/',
|
||||
'CEI-B_0.4-UC_CN':'intervention/causal_effect_identification/',
|
||||
'CEI-B_0.4-UC_EN':'intervention/causal_effect_identification/',
|
||||
'CEI-B_0.6-UC_CN':'intervention/causal_effect_identification/',
|
||||
'CEI-B_0.6-UC_EN':'intervention/causal_effect_identification/',
|
||||
'CEI-B_0.8-UC_CN':'intervention/causal_effect_identification/',
|
||||
'CEI-B_0.8-UC_EN':'intervention/causal_effect_identification/',
|
||||
# collider_bias/
|
||||
'CB-B_collider-bias_CN':'intervention/collider_bias/',
|
||||
'CB-B_collider-bias_EN':'intervention/collider_bias/',
|
||||
# controlled_direct_effect/
|
||||
'CDE-B_CDE-natural_CN':'intervention/controlled_direct_effect/',
|
||||
'CDE-B_CDE-natural_EN':'intervention/controlled_direct_effect/',
|
||||
'CDE-P_CDE-basic_CN':'intervention/controlled_direct_effect/',
|
||||
'CDE-P_CDE-basic_EN':'intervention/controlled_direct_effect/',
|
||||
'CDE-P_CDE-hard_CN':'intervention/controlled_direct_effect/',
|
||||
'CDE-P_CDE-hard_EN':'intervention/controlled_direct_effect/',
|
||||
# frontdoor_adjustment_set/
|
||||
'FAS-C_FAS_CN':'intervention/frontdoor_adjustment_set/',
|
||||
'FAS-C_FAS_EN':'intervention/frontdoor_adjustment_set/',
|
||||
# instrumental_variable/
|
||||
'IV-C_CaLM-IV_CN':'intervention/instrumental_variable/',
|
||||
'IV-C_CaLM-IV_EN':'intervention/instrumental_variable/',}
|
||||
|
||||
calm_reader_cfg = dict(
|
||||
input_columns=['question'],
|
||||
output_column='gt_item')
|
||||
|
||||
calm_all_sets = list(set(key[:-3] for key in task_hiearchy_dict.keys()))
|
||||
|
||||
calm_datasets = []
|
||||
for _name in calm_all_sets:
|
||||
for _prompt_style in ['basic','basic-CN']:
|
||||
_task_name = _name + ('_CN' if _prompt_style.endswith('-CN') else '_EN')
|
||||
_path = f'./data/calm/{task_hiearchy_dict[_task_name]}{_task_name}.json'
|
||||
|
||||
calm_infer_cfg = dict(
|
||||
prompt_template=dict(
|
||||
type=PromptTemplate,
|
||||
template='{question}'),
|
||||
retriever=dict(type=ZeroRetriever),
|
||||
inferencer=dict(type=GenInferencer, max_out_len=500))
|
||||
|
||||
calm_eval_cfg = dict(evaluator=dict(
|
||||
type=CaLMEvaluator,
|
||||
core_metrics=True,
|
||||
error_analysis=True,
|
||||
prompt_style=_prompt_style,
|
||||
task=_task_name))
|
||||
calm_datasets.append(
|
||||
dict(
|
||||
abbr=f'calm_{_task_name}',
|
||||
type=CaLMDataset,
|
||||
path=_path,
|
||||
prompt_style=_prompt_style,
|
||||
reader_cfg=calm_reader_cfg,
|
||||
infer_cfg=calm_infer_cfg,
|
||||
eval_cfg=calm_eval_cfg)
|
||||
)
|
||||
del _prompt_style, _task_name, _path, _name
|
15
opencompass/configs/models/gemma/hf_gemma2_27b.py
Normal file
15
opencompass/configs/models/gemma/hf_gemma2_27b.py
Normal file
@ -0,0 +1,15 @@
|
||||
from opencompass.models import HuggingFaceBaseModel
|
||||
|
||||
models = [
|
||||
dict(
|
||||
type=HuggingFaceBaseModel,
|
||||
abbr='gemma2-27b-hf',
|
||||
path='google/gemma-2-27b',
|
||||
max_out_len=1024,
|
||||
batch_size=4,
|
||||
run_cfg=dict(num_gpus=2),
|
||||
model_kwargs=dict(
|
||||
torch_dtype='torch.bfloat16',
|
||||
),
|
||||
)
|
||||
]
|
16
opencompass/configs/models/gemma/hf_gemma2_27b_it.py
Normal file
16
opencompass/configs/models/gemma/hf_gemma2_27b_it.py
Normal file
@ -0,0 +1,16 @@
|
||||
from opencompass.models import HuggingFacewithChatTemplate
|
||||
|
||||
models = [
|
||||
dict(
|
||||
type=HuggingFacewithChatTemplate,
|
||||
abbr='gemma2-27b-it-hf',
|
||||
path='google/gemma-2-27b-it',
|
||||
max_out_len=2048,
|
||||
batch_size=2,
|
||||
run_cfg=dict(num_gpus=2),
|
||||
stop_words=['<end_of_turn>'],
|
||||
model_kwargs=dict(
|
||||
torch_dtype='torch.bfloat16',
|
||||
)
|
||||
)
|
||||
]
|
15
opencompass/configs/models/gemma/hf_gemma2_2b.py
Normal file
15
opencompass/configs/models/gemma/hf_gemma2_2b.py
Normal file
@ -0,0 +1,15 @@
|
||||
from opencompass.models import HuggingFaceBaseModel
|
||||
|
||||
models = [
|
||||
dict(
|
||||
type=HuggingFaceBaseModel,
|
||||
abbr='gemma2-2b-hf',
|
||||
path='google/gemma-2-2b',
|
||||
max_out_len=1024,
|
||||
batch_size=4,
|
||||
run_cfg=dict(num_gpus=1),
|
||||
model_kwargs=dict(
|
||||
torch_dtype='torch.bfloat16',
|
||||
),
|
||||
)
|
||||
]
|
16
opencompass/configs/models/gemma/hf_gemma2_2b_it.py
Normal file
16
opencompass/configs/models/gemma/hf_gemma2_2b_it.py
Normal file
@ -0,0 +1,16 @@
|
||||
from opencompass.models import HuggingFacewithChatTemplate
|
||||
|
||||
models = [
|
||||
dict(
|
||||
type=HuggingFacewithChatTemplate,
|
||||
abbr='gemma2-2b-it-hf',
|
||||
path='google/gemma-2-2b-it',
|
||||
max_out_len=2048,
|
||||
batch_size=4,
|
||||
run_cfg=dict(num_gpus=1),
|
||||
stop_words=['<end_of_turn>'],
|
||||
model_kwargs=dict(
|
||||
torch_dtype='torch.bfloat16',
|
||||
)
|
||||
)
|
||||
]
|
15
opencompass/configs/models/gemma/hf_gemma2_9b.py
Normal file
15
opencompass/configs/models/gemma/hf_gemma2_9b.py
Normal file
@ -0,0 +1,15 @@
|
||||
from opencompass.models import HuggingFaceBaseModel
|
||||
|
||||
models = [
|
||||
dict(
|
||||
type=HuggingFaceBaseModel,
|
||||
abbr='gemma2-9b-hf',
|
||||
path='google/gemma-2-9b',
|
||||
max_out_len=1024,
|
||||
batch_size=4,
|
||||
run_cfg=dict(num_gpus=1),
|
||||
model_kwargs=dict(
|
||||
torch_dtype='torch.bfloat16',
|
||||
),
|
||||
)
|
||||
]
|
16
opencompass/configs/models/gemma/hf_gemma2_9b_it.py
Normal file
16
opencompass/configs/models/gemma/hf_gemma2_9b_it.py
Normal file
@ -0,0 +1,16 @@
|
||||
from opencompass.models import HuggingFacewithChatTemplate
|
||||
|
||||
models = [
|
||||
dict(
|
||||
type=HuggingFacewithChatTemplate,
|
||||
abbr='gemma2-9b-it-hf',
|
||||
path='google/gemma-2-9b-it',
|
||||
max_out_len=2048,
|
||||
batch_size=2,
|
||||
run_cfg=dict(num_gpus=1),
|
||||
stop_words=['<end_of_turn>'],
|
||||
model_kwargs=dict(
|
||||
torch_dtype='torch.bfloat16',
|
||||
)
|
||||
)
|
||||
]
|
169
opencompass/configs/summarizers/groups/calm.py
Normal file
169
opencompass/configs/summarizers/groups/calm.py
Normal file
@ -0,0 +1,169 @@
|
||||
task_hiearchy_dict = {
|
||||
# association/
|
||||
# correlation/
|
||||
'CORR-B_correlation_CN':'association/correlation/',
|
||||
'CORR-B_correlation_EN':'association/correlation/',
|
||||
# explaining_away_effect/
|
||||
'EAE-B_exp-away_CN':'association/explaining_away_effect/',
|
||||
'EAE-B_exp-away_EN':'association/explaining_away_effect/',
|
||||
# causal_discovery/
|
||||
# abstract_reasoning/
|
||||
'AR-B_CaLM-AR_CN':'causal_discovery/abstract_reasoning/',
|
||||
'AR-B_CaLM-AR_EN':'causal_discovery/abstract_reasoning/',
|
||||
# causal_attribution/
|
||||
'CA-B_FA_CN':'causal_discovery/causal_attribution/',
|
||||
'CA-B_FA_EN':'causal_discovery/causal_attribution/',
|
||||
'CA-B_FP_CN':'causal_discovery/causal_attribution/',
|
||||
'CA-B_FP_EN':'causal_discovery/causal_attribution/',
|
||||
# event_causality_identification/
|
||||
'ECI-B_CTB_CN':'causal_discovery/event_causality_identification/',
|
||||
'ECI-B_CTB_EN':'causal_discovery/event_causality_identification/',
|
||||
'ECI-B_ESC_CN':'causal_discovery/event_causality_identification/',
|
||||
'ECI-B_ESC_EN':'causal_discovery/event_causality_identification/',
|
||||
'ECI-B_MAVEN-ERE_CN':'causal_discovery/event_causality_identification/',
|
||||
'ECI-B_MAVEN-ERE_EN':'causal_discovery/event_causality_identification/',
|
||||
# pairwise_causal_discovery/
|
||||
'PCD-B_COPA_CN':'causal_discovery/pairwise_causal_discovery/',
|
||||
'PCD-B_COPA_EN':'causal_discovery/pairwise_causal_discovery/',
|
||||
'PCD-B_E-CARE_CN':'causal_discovery/pairwise_causal_discovery/',
|
||||
'PCD-B_E-CARE_EN':'causal_discovery/pairwise_causal_discovery/',
|
||||
'PCD-C_COPA_CN':'causal_discovery/pairwise_causal_discovery/',
|
||||
'PCD-C_COPA_EN':'causal_discovery/pairwise_causal_discovery/',
|
||||
'PCD-C_E-CARE_CN':'causal_discovery/pairwise_causal_discovery/',
|
||||
'PCD-C_E-CARE_EN':'causal_discovery/pairwise_causal_discovery/',
|
||||
# counterfactual/
|
||||
# actual_causality/
|
||||
'AC-B_causal_judgement_CN':'counterfactual/actual_causality/',
|
||||
'AC-B_causal_judgement_EN':'counterfactual/actual_causality/',
|
||||
# causal_explanation_generation/
|
||||
'CEG-O_E-CARE_CN':'counterfactual/causal_explanation_generation/',
|
||||
'CEG-O_E-CARE_EN':'counterfactual/causal_explanation_generation/',
|
||||
# counterfactual_reasoning/
|
||||
'CR-B_det-counterfactual_CN':'counterfactual/counterfactual_reasoning/',
|
||||
'CR-B_det-counterfactual_EN':'counterfactual/counterfactual_reasoning/',
|
||||
'CR-C_CRASS_CN':'counterfactual/counterfactual_reasoning/',
|
||||
'CR-C_CRASS_EN':'counterfactual/counterfactual_reasoning/',
|
||||
# effect_of_the_treatment_on_the_treated/
|
||||
'ETT-B_ETT-natural_CN':'counterfactual/effect_of_the_treatment_on_the_treated/',
|
||||
'ETT-B_ETT-natural_EN':'counterfactual/effect_of_the_treatment_on_the_treated/',
|
||||
'ETT-P_ETT-basic_CN':'counterfactual/effect_of_the_treatment_on_the_treated/',
|
||||
'ETT-P_ETT-basic_EN':'counterfactual/effect_of_the_treatment_on_the_treated/',
|
||||
'ETT-P_ETT-hard_CN':'counterfactual/effect_of_the_treatment_on_the_treated/',
|
||||
'ETT-P_ETT-hard_EN':'counterfactual/effect_of_the_treatment_on_the_treated/',
|
||||
# natural_direct_effect/
|
||||
'NDE-B_NDE-natural_CN':'counterfactual/natural_direct_effect/',
|
||||
'NDE-B_NDE-natural_EN':'counterfactual/natural_direct_effect/',
|
||||
'NDE-P_NDE-basic_CN':'counterfactual/natural_direct_effect/',
|
||||
'NDE-P_NDE-basic_EN':'counterfactual/natural_direct_effect/',
|
||||
'NDE-P_NDE-hard_CN':'counterfactual/natural_direct_effect/',
|
||||
'NDE-P_NDE-hard_EN':'counterfactual/natural_direct_effect/',
|
||||
# natural_indirect_effect/
|
||||
'NIE-B_NIE-natural_CN':'counterfactual/natural_indirect_effect/',
|
||||
'NIE-B_NIE-natural_EN':'counterfactual/natural_indirect_effect/',
|
||||
'NIE-P_NIE-basic_CN':'counterfactual/natural_indirect_effect/',
|
||||
'NIE-P_NIE-basic_EN':'counterfactual/natural_indirect_effect/',
|
||||
'NIE-P_NIE-hard_CN':'counterfactual/natural_indirect_effect/',
|
||||
'NIE-P_NIE-hard_EN':'counterfactual/natural_indirect_effect/',
|
||||
# probability_of_necessity/
|
||||
'PN-P_PN-basic_CN':'counterfactual/probability_of_necessity/',
|
||||
'PN-P_PN-basic_EN':'counterfactual/probability_of_necessity/',
|
||||
'PN-P_PN-hard_CN':'counterfactual/probability_of_necessity/',
|
||||
'PN-P_PN-hard_EN':'counterfactual/probability_of_necessity/',
|
||||
# probability_of_sufficiency/
|
||||
'PS-P_PS-basic_CN':'counterfactual/probability_of_sufficiency/',
|
||||
'PS-P_PS-basic_EN':'counterfactual/probability_of_sufficiency/',
|
||||
'PS-P_PS-hard_CN':'counterfactual/probability_of_sufficiency/',
|
||||
'PS-P_PS-hard_EN':'counterfactual/probability_of_sufficiency/',
|
||||
# intervention/
|
||||
# average_treatment_effect/
|
||||
'ATE-B_ATE-natural_CN':'intervention/average_treatment_effect/',
|
||||
'ATE-B_ATE-natural_EN':'intervention/average_treatment_effect/',
|
||||
'ATE-P_ATE-basic_CN':'intervention/average_treatment_effect/',
|
||||
'ATE-P_ATE-basic_EN':'intervention/average_treatment_effect/',
|
||||
'ATE-P_ATE-hard_CN':'intervention/average_treatment_effect/',
|
||||
'ATE-P_ATE-hard_EN':'intervention/average_treatment_effect/',
|
||||
# backdoor_adjustment_set/
|
||||
'BAS-B_backadj_CN':'intervention/backdoor_adjustment_set/',
|
||||
'BAS-B_backadj_EN':'intervention/backdoor_adjustment_set/',
|
||||
'BAS-C_max-BAS_CN':'intervention/backdoor_adjustment_set/',
|
||||
'BAS-C_max-BAS_EN':'intervention/backdoor_adjustment_set/',
|
||||
'BAS-C_min-BAS_CN':'intervention/backdoor_adjustment_set/',
|
||||
'BAS-C_min-BAS_EN':'intervention/backdoor_adjustment_set/',
|
||||
'BAS-C_mix-BAS_CN':'intervention/backdoor_adjustment_set/',
|
||||
'BAS-C_mix-BAS_EN':'intervention/backdoor_adjustment_set/',
|
||||
# causal_effect_identification/
|
||||
'CEI-B_0.2-UC_CN':'intervention/causal_effect_identification/',
|
||||
'CEI-B_0.2-UC_EN':'intervention/causal_effect_identification/',
|
||||
'CEI-B_0.4-UC_CN':'intervention/causal_effect_identification/',
|
||||
'CEI-B_0.4-UC_EN':'intervention/causal_effect_identification/',
|
||||
'CEI-B_0.6-UC_CN':'intervention/causal_effect_identification/',
|
||||
'CEI-B_0.6-UC_EN':'intervention/causal_effect_identification/',
|
||||
'CEI-B_0.8-UC_CN':'intervention/causal_effect_identification/',
|
||||
'CEI-B_0.8-UC_EN':'intervention/causal_effect_identification/',
|
||||
# collider_bias/
|
||||
'CB-B_collider-bias_CN':'intervention/collider_bias/',
|
||||
'CB-B_collider-bias_EN':'intervention/collider_bias/',
|
||||
# controlled_direct_effect/
|
||||
'CDE-B_CDE-natural_CN':'intervention/controlled_direct_effect/',
|
||||
'CDE-B_CDE-natural_EN':'intervention/controlled_direct_effect/',
|
||||
'CDE-P_CDE-basic_CN':'intervention/controlled_direct_effect/',
|
||||
'CDE-P_CDE-basic_EN':'intervention/controlled_direct_effect/',
|
||||
'CDE-P_CDE-hard_CN':'intervention/controlled_direct_effect/',
|
||||
'CDE-P_CDE-hard_EN':'intervention/controlled_direct_effect/',
|
||||
# frontdoor_adjustment_set/
|
||||
'FAS-C_FAS_CN':'intervention/frontdoor_adjustment_set/',
|
||||
'FAS-C_FAS_EN':'intervention/frontdoor_adjustment_set/',
|
||||
# instrumental_variable/
|
||||
'IV-C_CaLM-IV_CN':'intervention/instrumental_variable/',
|
||||
'IV-C_CaLM-IV_EN':'intervention/instrumental_variable/',}
|
||||
dict_keys = list(task_hiearchy_dict.keys())
|
||||
error_dict = {'Same response to all questions':[],
|
||||
'Language inconsistency':[],
|
||||
'Limitation of instruction-following':[],
|
||||
'Repetition':[],
|
||||
'Empty response':[],}
|
||||
|
||||
for error in error_dict:
|
||||
for key in dict_keys:
|
||||
if 'CEG-O_E-CARE' in key:
|
||||
continue
|
||||
error_dict[error].append([f'calm_{key}', error])
|
||||
|
||||
English_avg = []
|
||||
Chinese_avg = []
|
||||
for key in dict_keys:
|
||||
if key.endswith('EN'):
|
||||
English_avg.append([f'calm_{key}', 'Accuracy'])
|
||||
else:
|
||||
assert key.endswith('CN')
|
||||
Chinese_avg.append([f'calm_{key}', 'Accuracy'])
|
||||
|
||||
calm_summary_groups = [
|
||||
# English Average
|
||||
{'name': 'English Average', 'subsets': English_avg},
|
||||
|
||||
# Chinese Average
|
||||
{'name': 'Chinese Average', 'subsets': Chinese_avg},
|
||||
|
||||
# Accuracy Average
|
||||
{'name': 'Accuracy Average', 'subsets': ['English Average', 'Chinese Average']},
|
||||
]
|
||||
for error in error_dict:
|
||||
calm_summary_groups.append({'name': error+' Average', 'subsets': error_dict[error]})
|
||||
|
||||
summarizer = dict(
|
||||
dataset_abbrs = [
|
||||
'###### CALM-Lite Accuracy ######',
|
||||
'Accuracy Average',
|
||||
'English Average',
|
||||
'Chinese Average',
|
||||
|
||||
'###### CALM-Lite Errors ######',
|
||||
'Same response to all questions Average',
|
||||
'Language inconsistency Average',
|
||||
'Limitation of instruction-following Average',
|
||||
'Repetition Average',
|
||||
'Empty response Average',
|
||||
],
|
||||
summary_groups=calm_summary_groups,
|
||||
)
|
@ -14,5 +14,4 @@ class HFDataset(BaseDataset):
|
||||
if 'data_files' in kwargs:
|
||||
kwargs['data_files'] = get_data_path(kwargs['data_files'],
|
||||
local_mode=True)
|
||||
|
||||
return load_dataset(**kwargs)
|
||||
|
@ -364,3 +364,88 @@ class OpenAI(BaseAPIModel):
|
||||
elif self.mode == 'rear':
|
||||
prompt = sep.join(words[:l])
|
||||
return prompt
|
||||
|
||||
|
||||
class OpenAISDK(OpenAI):
|
||||
|
||||
def __init__(self,
|
||||
path: str = 'gpt-3.5-turbo',
|
||||
max_seq_len: int = 4096,
|
||||
query_per_second: int = 1,
|
||||
rpm_verbose: bool = False,
|
||||
retry: int = 2,
|
||||
key: str | List[str] = 'ENV',
|
||||
org: str | List[str] | None = None,
|
||||
meta_template: Dict | None = None,
|
||||
openai_api_base: str = OPENAI_API_BASE,
|
||||
mode: str = 'none',
|
||||
logprobs: bool | None = False,
|
||||
top_logprobs: int | None = None,
|
||||
temperature: float | None = None,
|
||||
tokenizer_path: str | None = None,
|
||||
extra_body: Dict | None = None):
|
||||
super().__init__(path, max_seq_len, query_per_second, rpm_verbose,
|
||||
retry, key, org, meta_template, openai_api_base, mode,
|
||||
logprobs, top_logprobs, temperature, tokenizer_path,
|
||||
extra_body)
|
||||
from openai import OpenAI
|
||||
self.opeanai_cleint = OpenAI(base_url=openai_api_base, api_key=key)
|
||||
|
||||
def _generate(self, input: PromptList | str, max_out_len: int,
|
||||
temperature: float) -> str:
|
||||
assert isinstance(input, (str, PromptList))
|
||||
|
||||
# max num token for gpt-3.5-turbo is 4097
|
||||
context_window = 4096
|
||||
if '32k' in self.path:
|
||||
context_window = 32768
|
||||
elif '16k' in self.path:
|
||||
context_window = 16384
|
||||
elif 'gpt-4' in self.path:
|
||||
context_window = 8192
|
||||
|
||||
# will leave 100 tokens as prompt buffer, triggered if input is str
|
||||
if isinstance(input, str) and self.mode != 'none':
|
||||
context_window = self.max_seq_len
|
||||
input = self.bin_trim(input, context_window - 100 - max_out_len)
|
||||
|
||||
if isinstance(input, str):
|
||||
messages = [{'role': 'user', 'content': input}]
|
||||
else:
|
||||
messages = []
|
||||
for item in input:
|
||||
msg = {'content': item['prompt']}
|
||||
if item['role'] == 'HUMAN':
|
||||
msg['role'] = 'user'
|
||||
elif item['role'] == 'BOT':
|
||||
msg['role'] = 'assistant'
|
||||
elif item['role'] == 'SYSTEM':
|
||||
msg['role'] = 'system'
|
||||
messages.append(msg)
|
||||
|
||||
# Hold out 100 tokens due to potential errors in tiktoken calculation
|
||||
try:
|
||||
max_out_len = min(
|
||||
max_out_len,
|
||||
context_window - self.get_token_len(str(input)) - 100)
|
||||
except KeyError:
|
||||
max_out_len = max_out_len
|
||||
if max_out_len <= 0:
|
||||
return ''
|
||||
|
||||
num_retries = 0
|
||||
while num_retries < self.retry:
|
||||
self.wait()
|
||||
try:
|
||||
responses = self.opeanai_cleint.chat.completions.create(
|
||||
model=self.path,
|
||||
max_tokens=max_out_len,
|
||||
n=1,
|
||||
temperature=self.temperature,
|
||||
messages=messages)
|
||||
return responses.choices[0].message.content
|
||||
except Exception as e:
|
||||
self.logger.error(e)
|
||||
num_retries += 1
|
||||
raise RuntimeError('Calling OpenAI API failed after retrying for '
|
||||
f'{self.retry} times. Check the logs for details.')
|
||||
|
@ -17,11 +17,18 @@ def get_files(folder, extensions, ignore_folder=[]):
|
||||
for folder in ignore_folder:
|
||||
ignore_folders.append(os.path.relpath(folder))
|
||||
# ignore the files starting with the folder in ignore_folder
|
||||
ignore_files = []
|
||||
for file in files:
|
||||
for folder in ignore_folders:
|
||||
if file.startswith(folder):
|
||||
files.remove(file)
|
||||
return files
|
||||
ignore_files.append(file)
|
||||
# files.remove(file)
|
||||
keep_files = []
|
||||
for file in files:
|
||||
if file not in ignore_files:
|
||||
keep_files.append(file)
|
||||
|
||||
return keep_files
|
||||
|
||||
|
||||
def compare_folders(folder1, folder2, extensions, ignore_folder):
|
||||
@ -36,7 +43,9 @@ def compare_folders(folder1, folder2, extensions, ignore_folder):
|
||||
common_files = files1 & files2
|
||||
|
||||
if only_in_folder1:
|
||||
print(f'Only in {folder1}: {only_in_folder1}')
|
||||
message = f'Only in {folder1}: {only_in_folder1}, '\
|
||||
'please copy files into {folder2}'
|
||||
raise ValueError(message)
|
||||
if only_in_folder2:
|
||||
print(f'Only in {folder2}: {only_in_folder2}')
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user