mirror of
https://github.com/open-compass/opencompass.git
synced 2025-05-30 16:03:24 +08:00
[Feature]: Add other public datasets config (#214)
* [Feature]: Add flickr30k * [Feature]: Add GQA * [Feature]: Add OCR VQA * [Feature]: Add OK VQA * [Feature]: Add text vqa * [Feature]: Add other vqa
This commit is contained in:
parent
17ccaa5980
commit
ae3c1869da
53
configs/multimodal/minigpt_4/minigpt_4_7b_flickr30k.py
Normal file
53
configs/multimodal/minigpt_4/minigpt_4_7b_flickr30k.py
Normal file
@ -0,0 +1,53 @@
|
||||
from opencompass.multimodal.models.minigpt_4 import (
|
||||
MiniGPT4COCOCaotionPromptConstructor,
|
||||
MiniGPT4COCOCaptionPostProcessor,
|
||||
)
|
||||
|
||||
# dataloader settings
|
||||
val_pipeline = [
|
||||
dict(type='mmpretrain.LoadImageFromFile'),
|
||||
dict(type='mmpretrain.ToPIL', to_rgb=True),
|
||||
dict(type='mmpretrain.torchvision/Resize',
|
||||
size=(384, 384),
|
||||
interpolation=3),
|
||||
dict(type='mmpretrain.torchvision/ToTensor'),
|
||||
dict(type='mmpretrain.torchvision/Normalize',
|
||||
mean=(0.48145466, 0.4578275, 0.40821073),
|
||||
std=(0.26862954, 0.26130258, 0.27577711)),
|
||||
dict(type='mmpretrain.PackInputs', algorithm_keys=['image_id'])
|
||||
]
|
||||
|
||||
dataset = dict(type='mmpretrain.Flickr30kCaption',
|
||||
data_root='data/flickr30k',
|
||||
ann_file='annotations/dataset_flickr30k.json',
|
||||
data_prefix='images',
|
||||
split='val',
|
||||
pipeline=val_pipeline)
|
||||
|
||||
minigpt_4_flickr30k_dataloader = dict(
|
||||
batch_size=1,
|
||||
num_workers=4,
|
||||
dataset=dataset,
|
||||
collate_fn=dict(type='pseudo_collate'),
|
||||
sampler=dict(type='DefaultSampler', shuffle=False))
|
||||
|
||||
# model settings
|
||||
minigpt_4_flickr30k_model = dict(
|
||||
type='minigpt-4',
|
||||
low_resource=False,
|
||||
img_size=384,
|
||||
llama_model='/path/to/vicuna-7b/',
|
||||
prompt_constructor=dict(type=MiniGPT4COCOCaotionPromptConstructor,
|
||||
image_prompt='###Human: <Img><ImageHere></Img>',
|
||||
reply_prompt='###Assistant:'),
|
||||
post_processor=dict(type=MiniGPT4COCOCaptionPostProcessor))
|
||||
|
||||
# evaluation settings
|
||||
minigpt_4_flickr30k_evaluator = [
|
||||
dict(
|
||||
type='mmpretrain.COCOCaption',
|
||||
ann_file='data/coco/annotations/coco_karpathy_val_gt.json',
|
||||
) # noqa
|
||||
]
|
||||
|
||||
minigpt_4_flickr30k_load_from = '/path/to/prerained_minigpt4_7b.pth' # noqa
|
52
configs/multimodal/minigpt_4/minigpt_4_7b_gqa.py
Normal file
52
configs/multimodal/minigpt_4/minigpt_4_7b_gqa.py
Normal file
@ -0,0 +1,52 @@
|
||||
from opencompass.multimodal.models.minigpt_4 import (
|
||||
MiniGPT4VQAPromptConstructor,
|
||||
MiniGPT4VQAPostProcessor,
|
||||
)
|
||||
|
||||
# dataloader settings
|
||||
val_pipeline = [
|
||||
dict(type='mmpretrain.LoadImageFromFile'),
|
||||
dict(type='mmpretrain.ToPIL', to_rgb=True),
|
||||
dict(type='mmpretrain.torchvision/Resize',
|
||||
size=(224, 224),
|
||||
interpolation=3),
|
||||
dict(type='mmpretrain.torchvision/ToTensor'),
|
||||
dict(type='mmpretrain.torchvision/Normalize',
|
||||
mean=(0.48145466, 0.4578275, 0.40821073),
|
||||
std=(0.26862954, 0.26130258, 0.27577711)),
|
||||
dict(
|
||||
type='mmpretrain.PackInputs',
|
||||
algorithm_keys=['question', 'gt_answer', 'gt_answer_weight'],
|
||||
meta_keys=['question_id', 'image_id'],
|
||||
)
|
||||
]
|
||||
|
||||
dataset = dict(type='mmpretrain.GQA',
|
||||
data_root='data/gqa',
|
||||
data_prefix='images',
|
||||
ann_file='annotations/testdev_balanced_questions.json',
|
||||
pipeline=val_pipeline)
|
||||
|
||||
minigpt_4_gqa_dataloader = dict(batch_size=1,
|
||||
num_workers=4,
|
||||
dataset=dataset,
|
||||
collate_fn=dict(type='pseudo_collate'),
|
||||
sampler=dict(type='DefaultSampler',
|
||||
shuffle=False))
|
||||
|
||||
# model settings
|
||||
minigpt_4_gqa_model = dict(type='minigpt-4',
|
||||
low_resource=False,
|
||||
img_size=224,
|
||||
max_length=10,
|
||||
llama_model='/path/to/vicuna-7b/',
|
||||
prompt_constructor=dict(
|
||||
type=MiniGPT4VQAPromptConstructor,
|
||||
image_prompt='###Human: <Img><ImageHere></Img>',
|
||||
reply_prompt='###Assistant:'),
|
||||
post_processor=dict(type=MiniGPT4VQAPostProcessor))
|
||||
|
||||
# evaluation settings
|
||||
minigpt_4_gqa_evaluator = [dict(type='mmpretrain.GQAAcc')]
|
||||
|
||||
minigpt_4_gqa_load_from = '/path/to/prerained_minigpt4_7b.pth' # noqa
|
53
configs/multimodal/minigpt_4/minigpt_4_7b_ocr-vqa.py
Normal file
53
configs/multimodal/minigpt_4/minigpt_4_7b_ocr-vqa.py
Normal file
@ -0,0 +1,53 @@
|
||||
from opencompass.multimodal.models.minigpt_4 import (
|
||||
MiniGPT4VQAPromptConstructor,
|
||||
MiniGPT4VQAPostProcessor,
|
||||
)
|
||||
|
||||
# dataloader settings
|
||||
val_pipeline = [
|
||||
dict(type='mmpretrain.LoadImageFromFile'),
|
||||
dict(type='mmpretrain.ToPIL', to_rgb=True),
|
||||
dict(type='mmpretrain.torchvision/Resize',
|
||||
size=(224, 224),
|
||||
interpolation=3),
|
||||
dict(type='mmpretrain.torchvision/ToTensor'),
|
||||
dict(type='mmpretrain.torchvision/Normalize',
|
||||
mean=(0.48145466, 0.4578275, 0.40821073),
|
||||
std=(0.26862954, 0.26130258, 0.27577711)),
|
||||
dict(
|
||||
type='mmpretrain.PackInputs',
|
||||
algorithm_keys=['question', 'gt_answer', 'gt_answer_weight'],
|
||||
meta_keys=['question_id', 'image_id'],
|
||||
)
|
||||
]
|
||||
|
||||
dataset = dict(type='mmpretrain.OCRVQA',
|
||||
data_root='data/ocrvqa',
|
||||
ann_file='annotations/dataset.json',
|
||||
split='test',
|
||||
data_prefix='images',
|
||||
pipeline=val_pipeline)
|
||||
|
||||
minigpt_4_ocr_vqa_dataloader = dict(batch_size=1,
|
||||
num_workers=4,
|
||||
dataset=dataset,
|
||||
collate_fn=dict(type='pseudo_collate'),
|
||||
sampler=dict(type='DefaultSampler',
|
||||
shuffle=False))
|
||||
|
||||
# model settings
|
||||
minigpt_4_ocr_vqa_model = dict(
|
||||
type='minigpt-4',
|
||||
low_resource=False,
|
||||
img_size=224,
|
||||
max_length=10,
|
||||
llama_model='/path/to/vicuna-7b/',
|
||||
prompt_constructor=dict(type=MiniGPT4VQAPromptConstructor,
|
||||
image_prompt='###Human: <Img><ImageHere></Img>',
|
||||
reply_prompt='###Assistant:'),
|
||||
post_processor=dict(type=MiniGPT4VQAPostProcessor))
|
||||
|
||||
# evaluation settings
|
||||
minigpt_4_ocr_vqa_evaluator = [dict(type='mmpretrain.VQAAcc')]
|
||||
|
||||
minigpt_4_ocr_vqa_load_from = '/path/to/prerained_minigpt4_7b.pth' # noqa
|
55
configs/multimodal/minigpt_4/minigpt_4_7b_ok-vqa.py
Normal file
55
configs/multimodal/minigpt_4/minigpt_4_7b_ok-vqa.py
Normal file
@ -0,0 +1,55 @@
|
||||
from opencompass.multimodal.models.minigpt_4 import (
|
||||
MiniGPT4VQAPromptConstructor,
|
||||
MiniGPT4VQAPostProcessor,
|
||||
)
|
||||
|
||||
# dataloader settings
|
||||
val_pipeline = [
|
||||
dict(type='mmpretrain.LoadImageFromFile'),
|
||||
dict(type='mmpretrain.ToPIL', to_rgb=True),
|
||||
dict(type='mmpretrain.torchvision/Resize',
|
||||
size=(224, 224),
|
||||
interpolation=3),
|
||||
dict(type='mmpretrain.torchvision/ToTensor'),
|
||||
dict(type='mmpretrain.torchvision/Normalize',
|
||||
mean=(0.48145466, 0.4578275, 0.40821073),
|
||||
std=(0.26862954, 0.26130258, 0.27577711)),
|
||||
dict(
|
||||
type='mmpretrain.PackInputs',
|
||||
algorithm_keys=['question', 'gt_answer', 'gt_answer_weight'],
|
||||
meta_keys=['question_id', 'image_id'],
|
||||
)
|
||||
]
|
||||
|
||||
dataset = dict(
|
||||
type='mmpretrain.COCOVQA',
|
||||
data_root='data/okvqa',
|
||||
question_file='annotations/OpenEnded_mscoco_val2014_questions.json',
|
||||
ann_file='annotations/mscoco_val2014_annotations.json',
|
||||
pipeline=val_pipeline,
|
||||
data_prefix='images/val2014',
|
||||
)
|
||||
|
||||
minigpt_4_ok_vqa_dataloader = dict(batch_size=1,
|
||||
num_workers=4,
|
||||
dataset=dataset,
|
||||
collate_fn=dict(type='pseudo_collate'),
|
||||
sampler=dict(type='DefaultSampler',
|
||||
shuffle=False))
|
||||
|
||||
# model settings
|
||||
minigpt_4_ok_vqa_model = dict(
|
||||
type='minigpt-4',
|
||||
low_resource=False,
|
||||
img_size=224,
|
||||
max_length=10,
|
||||
llama_model='/path/to/vicuna-7b/',
|
||||
prompt_constructor=dict(type=MiniGPT4VQAPromptConstructor,
|
||||
image_prompt='###Human: <Img><ImageHere></Img>',
|
||||
reply_prompt='###Assistant:'),
|
||||
post_processor=dict(type=MiniGPT4VQAPostProcessor))
|
||||
|
||||
# evaluation settings
|
||||
minigpt_4_ok_vqa_evaluator = [dict(type='mmpretrain.VQAAcc')]
|
||||
|
||||
minigpt_4_ok_vqa_load_from = '/path/to/prerained_minigpt4_7b.pth' # noqa
|
55
configs/multimodal/minigpt_4/minigpt_4_7b_textvqa.py
Normal file
55
configs/multimodal/minigpt_4/minigpt_4_7b_textvqa.py
Normal file
@ -0,0 +1,55 @@
|
||||
from opencompass.multimodal.models.minigpt_4 import (
|
||||
MiniGPT4VQAPromptConstructor,
|
||||
MiniGPT4VQAPostProcessor,
|
||||
)
|
||||
|
||||
|
||||
# dataloader settings
|
||||
val_pipeline = [
|
||||
dict(type='mmpretrain.LoadImageFromFile'),
|
||||
dict(type='mmpretrain.ToPIL', to_rgb=True),
|
||||
dict(type='mmpretrain.torchvision/Resize',
|
||||
size=(224, 224),
|
||||
interpolation=3),
|
||||
dict(type='mmpretrain.torchvision/ToTensor'),
|
||||
dict(type='mmpretrain.torchvision/Normalize',
|
||||
mean=(0.48145466, 0.4578275, 0.40821073),
|
||||
std=(0.26862954, 0.26130258, 0.27577711)),
|
||||
dict(
|
||||
type='mmpretrain.PackInputs',
|
||||
algorithm_keys=['question', 'gt_answer', 'gt_answer_weight'],
|
||||
meta_keys=['question_id', 'image_id'],
|
||||
)
|
||||
]
|
||||
|
||||
dataset = dict(
|
||||
type='mmpretrain.TextVQA',
|
||||
data_root='data/textvqa',
|
||||
ann_file='annotations/TextVQA_0.5.1_val.json',
|
||||
pipeline=val_pipeline,
|
||||
data_prefix='images/train_images',
|
||||
)
|
||||
|
||||
minigpt_4_textvqa_dataloader = dict(batch_size=1,
|
||||
num_workers=4,
|
||||
dataset=dataset,
|
||||
collate_fn=dict(type='pseudo_collate'),
|
||||
sampler=dict(type='DefaultSampler',
|
||||
shuffle=False))
|
||||
|
||||
# model settings
|
||||
minigpt_4_textvqa_model = dict(
|
||||
type='minigpt-4',
|
||||
low_resource=False,
|
||||
img_size=224,
|
||||
max_length=10,
|
||||
llama_model='/path/to/vicuna-7b/',
|
||||
prompt_constructor=dict(type=MiniGPT4VQAPromptConstructor,
|
||||
image_prompt='###Human: <Img><ImageHere></Img>',
|
||||
reply_prompt='###Assistant:'),
|
||||
post_processor=dict(type=MiniGPT4VQAPostProcessor))
|
||||
|
||||
# evaluation settings
|
||||
minigpt_4_textvqa_evaluator = [dict(type='mmpretrain.VQAAcc')]
|
||||
|
||||
minigpt_4_textvqa_load_from = '/path/to/prerained_minigpt4_7b.pth' # noqa
|
52
configs/multimodal/minigpt_4/minigpt_4_7b_vizwiz.py
Normal file
52
configs/multimodal/minigpt_4/minigpt_4_7b_vizwiz.py
Normal file
@ -0,0 +1,52 @@
|
||||
from opencompass.multimodal.models.minigpt_4 import (
|
||||
MiniGPT4VQAPromptConstructor,
|
||||
MiniGPT4VQAPostProcessor,
|
||||
)
|
||||
|
||||
# dataloader settings
|
||||
val_pipeline = [
|
||||
dict(type='mmpretrain.LoadImageFromFile'),
|
||||
dict(type='mmpretrain.ToPIL', to_rgb=True),
|
||||
dict(type='mmpretrain.torchvision/Resize',
|
||||
size=(224, 224),
|
||||
interpolation=3),
|
||||
dict(type='mmpretrain.torchvision/ToTensor'),
|
||||
dict(type='mmpretrain.torchvision/Normalize',
|
||||
mean=(0.48145466, 0.4578275, 0.40821073),
|
||||
std=(0.26862954, 0.26130258, 0.27577711)),
|
||||
dict(
|
||||
type='mmpretrain.PackInputs',
|
||||
algorithm_keys=['question', 'gt_answer', 'gt_answer_weight'],
|
||||
meta_keys=['question_id', 'image_id'],
|
||||
)
|
||||
]
|
||||
|
||||
dataset = dict(type='mmpretrain.VizWiz',
|
||||
data_root='data/vizwiz/',
|
||||
data_prefix='Images/val',
|
||||
ann_file='Annotations/val.json',
|
||||
pipeline=val_pipeline)
|
||||
|
||||
minigpt_4_vizwiz_dataloader = dict(batch_size=1,
|
||||
num_workers=4,
|
||||
dataset=dataset,
|
||||
collate_fn=dict(type='pseudo_collate'),
|
||||
sampler=dict(type='DefaultSampler',
|
||||
shuffle=False))
|
||||
|
||||
# model settings
|
||||
minigpt_4_vizwiz_model = dict(
|
||||
type='minigpt-4',
|
||||
low_resource=False,
|
||||
img_size=224,
|
||||
max_length=10,
|
||||
llama_model='/path/to/vicuna-7b/',
|
||||
prompt_constructor=dict(type=MiniGPT4VQAPromptConstructor,
|
||||
image_prompt='###Human: <Img><ImageHere></Img>',
|
||||
reply_prompt='###Assistant:'),
|
||||
post_processor=dict(type=MiniGPT4VQAPostProcessor))
|
||||
|
||||
# evaluation settings
|
||||
minigpt_4_vizwiz_evaluator = [dict(type='mmpretrain.VQAAcc')]
|
||||
|
||||
minigpt_4_vizwiz_load_from = '/path/to/prerained_minigpt4_7b.pth' # noqa
|
Loading…
Reference in New Issue
Block a user