diff --git a/configs/multimodal/llama_adapter_v2_multimodal/llama_adapter_v2_mm_7b_mmbench.py b/configs/multimodal/llama_adapter_v2_multimodal/llama_adapter_v2_mm_7b_mmbench.py index 44c42c60..2149e178 100644 --- a/configs/multimodal/llama_adapter_v2_multimodal/llama_adapter_v2_mm_7b_mmbench.py +++ b/configs/multimodal/llama_adapter_v2_multimodal/llama_adapter_v2_mm_7b_mmbench.py @@ -28,18 +28,21 @@ llama_adapter_mmbench_dataloader = dict(batch_size=1, sampler=dict(type='DefaultSampler', shuffle=False)) # model settings -llama_adapter_model = dict( +llama_adapter_mmbench_model = dict( type='LLaMA-adapter-v2', llama_dir= # noqa '/llama_adapter_v2_multimodal', prompt_constructor=dict(type=LlamaAadapterMMBenchPromptConstructor), - post_processor=dict(type=LlamaAadapterMMBenchPostProcessor)) + post_processor=dict(type=LlamaAadapterMMBenchPostProcessor) ) # evaluation settings -llama_adapter_evaluator = [ +llama_adapter_mmbench_evaluator = [ dict( type='opencompass.DumpResults', save_path='work_dirs/llama-adapter-v2-multimodal-mmagibench-v0.1.0.xlsx' ) ] + +llama_adapter_mmbench_load_from = None # noqa + diff --git a/configs/multimodal/mplug_owl/mplug_owl-7b-mmbench.py b/configs/multimodal/mplug_owl/mplug_owl_7b_mmbench.py similarity index 93% rename from configs/multimodal/mplug_owl/mplug_owl-7b-mmbench.py rename to configs/multimodal/mplug_owl/mplug_owl_7b_mmbench.py index 322c041f..cecc1e24 100644 --- a/configs/multimodal/mplug_owl/mplug_owl-7b-mmbench.py +++ b/configs/multimodal/mplug_owl/mplug_owl_7b_mmbench.py @@ -35,8 +35,8 @@ mplug_owl_mmbench_dataloader = dict( # model settings mplug_owl_mmbench_model = dict( - type='mplug_owl-7b', - model_path='/mplug-owl-llama-7b-ft', + type='mplug_owl_7b', + model_path='/mplug-owl-llama-7b-ft/', prompt_constructor=dict(type=MplugOwlMMBenchPromptConstructor), post_processor=dict(type=MplugOwlMMBenchPostProcessor) ) # noqa @@ -46,3 +46,5 @@ mplug_owl_mmbench_evaluator = [ dict(type='opencompass.DumpResults', save_path='work_dirs/mplug_owl-7b-mmagibench-v0.1.0.xlsx') ] + +mplug_owl_mmbench_load_from = None \ No newline at end of file diff --git a/opencompass/multimodal/datasets/mmbench.py b/opencompass/multimodal/datasets/mmbench.py index aa2fb5c3..a5384c4d 100644 --- a/opencompass/multimodal/datasets/mmbench.py +++ b/opencompass/multimodal/datasets/mmbench.py @@ -41,12 +41,11 @@ class MMBenchDataset(Dataset): return len(self.df) def __getitem__(self, idx: int) -> dict: + # Mandatory Fields Begin index = self.df.iloc[idx]['index'] image = self.df.iloc[idx]['image'] image = decode_base64_to_image(image) question = self.df.iloc[idx]['question'] - catetory = self.df.iloc[idx]['category'] - l2_catetory = self.df.iloc[idx]['l2-category'] option_candidate = ['A', 'B', 'C', 'D', 'E'] options = { @@ -57,13 +56,19 @@ class MMBenchDataset(Dataset): options_prompt = f'{self.sys_prompt}\n' for key, item in options.items(): options_prompt += f'{key}. {item}\n' + # Mandatory Fields End + # Optional Fields Begin hint = self.load_from_df(idx, 'hint') + category = self.load_from_df(idx, 'category') + l2_catetory = self.load_from_df(idx, 'l2-category') + # Optional Fields End + data = { 'img': image, 'question': question, 'options': options_prompt, - 'category': catetory, + 'category': category, 'l2-category': l2_catetory, 'options_dict': options, 'index': index, diff --git a/opencompass/multimodal/models/llama_adapter_v2_multimodal/llama_adapter.py b/opencompass/multimodal/models/llama_adapter_v2_multimodal/llama_adapter.py index b65d50f4..76bac324 100644 --- a/opencompass/multimodal/models/llama_adapter_v2_multimodal/llama_adapter.py +++ b/opencompass/multimodal/models/llama_adapter_v2_multimodal/llama_adapter.py @@ -1,20 +1,37 @@ import json import os +import os.path as osp +import sys from pathlib import Path import clip import mmengine import torch import torch.nn as nn -from llama_adapter_v2_multimodal7b.llama.llama import ModelArgs, Transformer -from llama_adapter_v2_multimodal7b.llama.tokenizer import Tokenizer -from llama_adapter_v2_multimodal7b.llama.utils import sample_top_p from mmengine.device import get_device from timm.models.vision_transformer import Block from opencompass.registry import MM_MODELS +def load_package(): + """Load required packages from llama_adapter_v2_multimodal7b.""" + current_file_path = os.path.abspath(__file__) + current_folder_path = os.path.dirname(current_file_path) + + sys.path.append(os.path.join(current_folder_path, 'LLaMA-Adapter')) # noqa + from llama_adapter_v2_multimodal7b.llama.llama import (ModelArgs, + Transformer) + from llama_adapter_v2_multimodal7b.llama.tokenizer import Tokenizer + from llama_adapter_v2_multimodal7b.llama.utils import sample_top_p + sys.path.pop(-1) + + return ModelArgs, Transformer, Tokenizer, sample_top_p + + +ModelArgs, Transformer, Tokenizer, sample_top_p = load_package() + + class LLaMA_adapter(nn.Module): def __init__(self, @@ -182,7 +199,6 @@ class LLaMA_adapter(nn.Module): data_sample = data_samples[0] - prompts = [prompts] imgs = image # import pdb;pdb.set_trace() @@ -261,12 +277,14 @@ class LLaMA_adapter_v2(nn.Module): llama_dir, prompt_constructor: dict, post_processor: dict, + model_path: str = 'llama_adapter_v2_multimodal7b', + name: str = 'LORA-BIAS-7B', mode: str = 'generation', device='cuda' if torch.cuda.is_available() else 'cpu', download_root='ckpts'): super().__init__() - name = 'BIAS-7B' + assert name in ['LORA-BIAS-7B', 'BIAS-7B', 'CAPTION-7B'] # BIAS-7B or https://xxx/sha256_BIAS-7B.pth -> 7B llama_type = name.split('.')[0].split('-')[-1] llama_ckpt_dir = os.path.join(llama_dir, llama_type) @@ -274,9 +292,22 @@ class LLaMA_adapter_v2(nn.Module): # load llama_adapter weights and model_cfg print(f'Loading LLaMA-Adapter from {llama_dir}') - ckpt = torch.load( - f'{llama_dir}/7fa55208379faf2dd862565284101b0e4a2a72114d6490a95e432cf9d9b6c813_BIAS-7B.pth', # noqa: E501 - map_location='cpu') + + current_file_path = os.path.abspath(__file__) + current_folder_path = os.path.dirname(current_file_path) + model_path = osp.join(current_folder_path, 'LLaMA-Adapter', model_path) + ckpt_root = osp.join(model_path, download_root) + ckpt_map = { + 'LORA-BIAS-7B': + '1bcbffc43484332672092e0024a8699a6eb5f558161aebf98a7c6b1db67224d1_LORA-BIAS-7B.pth', # noqa: E501 + 'BIAS-7B': + '7fa55208379faf2dd862565284101b0e4a2a72114d6490a95e432cf9d9b6c813_BIAS-7B.pth', # noqa: E501 + 'CAPTION-7B': + '5088aeb63a89746b90bcfd5cb819e1c7411b2771b267c6d131ce73e250a8abf0_CAPTION-7B.pth' # noqa: E501 + } + ckpt = torch.load(osp.join(ckpt_root, ckpt_map[name]), + map_location='cpu') + model_cfg = ckpt.get('config', {}) self.model = LLaMA_adapter( diff --git a/opencompass/multimodal/models/llama_adapter_v2_multimodal/post_processor.py b/opencompass/multimodal/models/llama_adapter_v2_multimodal/post_processor.py index fd9073ab..60909a3d 100644 --- a/opencompass/multimodal/models/llama_adapter_v2_multimodal/post_processor.py +++ b/opencompass/multimodal/models/llama_adapter_v2_multimodal/post_processor.py @@ -7,7 +7,7 @@ class LlamaAadapterMMBenchPostProcessor: def __init__(self) -> None: pass - def __call__(self, output_token: torch.tensor, tokenizer) -> str: + def __call__(self, output_token: torch.tensor) -> str: if len(output_token) >= 2: if output_token[1] == '.': diff --git a/opencompass/multimodal/models/llama_adapter_v2_multimodal/prompt_constructor.py b/opencompass/multimodal/models/llama_adapter_v2_multimodal/prompt_constructor.py index 2657447c..50b47b8f 100644 --- a/opencompass/multimodal/models/llama_adapter_v2_multimodal/prompt_constructor.py +++ b/opencompass/multimodal/models/llama_adapter_v2_multimodal/prompt_constructor.py @@ -49,8 +49,10 @@ class LlamaAadapterMMBenchPromptConstructor: data_sample.get('context') for data_sample in data_samples ] else: - context = '' - - prompts = context + ' ' + question + ' ' + options # noqa - + context = [''] * len(data_samples) + prompts = [] + for cur_context, cur_question, cur_options in zip( + context, question, options): + prompts.append(cur_context + ' ' + cur_question + ' ' + + cur_options) # noqa return prompts diff --git a/opencompass/multimodal/models/mplug_owl/__init__.py b/opencompass/multimodal/models/mplug_owl/__init__.py index a29cf270..d5ba4073 100644 --- a/opencompass/multimodal/models/mplug_owl/__init__.py +++ b/opencompass/multimodal/models/mplug_owl/__init__.py @@ -1,4 +1,4 @@ -from .mplug_owl import MplugOwl +from .mplug_owl_7b import MplugOwl from .post_processor import MplugOwlMMBenchPostProcessor from .prompt_constructor import MplugOwlMMBenchPromptConstructor # noqa diff --git a/opencompass/multimodal/models/mplug_owl/mplug_owl.py b/opencompass/multimodal/models/mplug_owl/mplug_owl_7b.py similarity index 74% rename from opencompass/multimodal/models/mplug_owl/mplug_owl.py rename to opencompass/multimodal/models/mplug_owl/mplug_owl_7b.py index 49da472a..fe564fb7 100644 --- a/opencompass/multimodal/models/mplug_owl/mplug_owl.py +++ b/opencompass/multimodal/models/mplug_owl/mplug_owl_7b.py @@ -1,27 +1,45 @@ +import os +import sys + import mmengine import torch import torch.nn as nn from mmengine.device import get_device -# Load via Huggingface Style -from mplug_owl.modeling_mplug_owl import MplugOwlForConditionalGeneration -from mplug_owl.processing_mplug_owl import (MplugOwlImageProcessor, - MplugOwlProcessor) -from mplug_owl.tokenization_mplug_owl import MplugOwlTokenizer from opencompass.registry import MM_MODELS -@MM_MODELS.register_module('mplug_owl') +def load_package(): + """Load required packages from llama_adapter_v2_multimodal7b.""" + current_file_path = os.path.abspath(__file__) + current_folder_path = os.path.dirname(current_file_path) + + sys.path.append(os.path.join(current_folder_path, 'mPLUG-Owl')) # noqa + from mplug_owl.modeling_mplug_owl import MplugOwlForConditionalGeneration + from mplug_owl.processing_mplug_owl import (MplugOwlImageProcessor, + MplugOwlProcessor) + from mplug_owl.tokenization_mplug_owl import MplugOwlTokenizer + sys.path.pop(-1) + + return MplugOwlForConditionalGeneration, MplugOwlImageProcessor, MplugOwlProcessor, MplugOwlTokenizer # noqa + + +MplugOwlForConditionalGeneration, MplugOwlImageProcessor, MplugOwlProcessor, MplugOwlTokenizer = load_package( # noqa +) # noqa + + +@MM_MODELS.register_module('mplug_owl_7b') class MplugOwl(nn.Module): def __init__(self, prompt_constructor: dict, post_processor: dict, model_path='MAGAer13/mplug-owl-llama-7b', - mode: str = 'generation') -> None: + mode: str = 'generation'): super().__init__() pretrained_ckpt = model_path # import pdb;pdb.set_trace() + print(pretrained_ckpt) self.model = MplugOwlForConditionalGeneration.from_pretrained( pretrained_ckpt, torch_dtype=torch.bfloat16, @@ -57,7 +75,7 @@ class MplugOwl(nn.Module): inputs = {'image': images, 'data_samples': data_samples} inputs = self.prompt_constructor(inputs) image = inputs['image'] - prompt = inputs['prompt'] + prompt = inputs['prompt'][0] data_samples = inputs['data_samples'] data_sample = data_samples[0] diff --git a/opencompass/multimodal/models/mplug_owl/post_processor.py b/opencompass/multimodal/models/mplug_owl/post_processor.py index 2538349b..6339e1e6 100644 --- a/opencompass/multimodal/models/mplug_owl/post_processor.py +++ b/opencompass/multimodal/models/mplug_owl/post_processor.py @@ -9,7 +9,7 @@ class MplugOwlMMBenchPostProcessor: def __init__(self) -> None: pass - def __call__(self, output_token: torch.tensor, tokenizer) -> str: + def __call__(self, output_token: torch.tensor) -> str: pattern = re.compile(r'([A-Z]\.)') res = pattern.findall(output_token) if len(res) > 0: diff --git a/opencompass/multimodal/models/mplug_owl/prompt_constructor.py b/opencompass/multimodal/models/mplug_owl/prompt_constructor.py index b3998710..6e7bc17e 100644 --- a/opencompass/multimodal/models/mplug_owl/prompt_constructor.py +++ b/opencompass/multimodal/models/mplug_owl/prompt_constructor.py @@ -48,8 +48,11 @@ class MplugOwlMMBenchPromptConstructor: data_sample.get('context') for data_sample in data_samples ] else: - context = '' - - prompts = context + ' ' + question + ' ' + options # noqa + context = [''] * len(data_samples) + prompts = [] + for cur_context, cur_question, cur_options in zip( + context, question, options): + prompts.append(cur_context + ' ' + cur_question + ' ' + + cur_options) # noqa return prompts diff --git a/opencompass/tasks/mm_infer.py b/opencompass/tasks/mm_infer.py index f8ed1e45..d46c59b2 100644 --- a/opencompass/tasks/mm_infer.py +++ b/opencompass/tasks/mm_infer.py @@ -122,6 +122,7 @@ class MultimodalInferTask: dataloader = Runner.build_dataloader(self.dataloader) # build model model = build_model(self.cfg) + model.eval() # build evaluator evaluator = Evaluator(self.evaluator) diff --git a/opencompass/utils/run.py b/opencompass/utils/run.py index 445c8451..73a45047 100644 --- a/opencompass/utils/run.py +++ b/opencompass/utils/run.py @@ -95,7 +95,7 @@ def get_config_from_arg(args) -> Config: pad_token_id=args.pad_token_id, run_cfg=dict(num_gpus=args.num_gpus)) models.append(model) - + summarizer = None if args.summarizer: s = match_cfg_file('configs/summarizers/', [args.summarizer])[0] @@ -103,7 +103,8 @@ def get_config_from_arg(args) -> Config: cfg = Config.fromfile(s[1]) summarizer = cfg['summarizer'] - return Config(dict(models=models, datasets=datasets, summarizer=summarizer), + return Config(dict(models=models, datasets=datasets, + summarizer=summarizer), format_python_code=False)