diff --git a/configs/eval_subjective_alpacaeval.py b/configs/eval_subjective_alpacaeval_official.py
similarity index 99%
rename from configs/eval_subjective_alpacaeval.py
rename to configs/eval_subjective_alpacaeval_official.py
index 0d27b4c8..f3d2e2ec 100644
--- a/configs/eval_subjective_alpacaeval.py
+++ b/configs/eval_subjective_alpacaeval_official.py
@@ -74,4 +74,3 @@ eval = dict(
     )
 )
 work_dir = 'outputs/alpaca/'
-
diff --git a/opencompass/tasks/outer_eval/alpacaeval.py b/opencompass/tasks/outer_eval/alpacaeval.py
index 2701e870..ccfdfcae 100644
--- a/opencompass/tasks/outer_eval/alpacaeval.py
+++ b/opencompass/tasks/outer_eval/alpacaeval.py
@@ -28,12 +28,14 @@ class PredictionMerger:
             self.model_cfg, self.dataset_cfg,
             osp.join(self.work_dir, 'predictions'))
         root, ext = osp.splitext(filename)
+        alpaca_format_filename = root + '_alpaca' + ext
         partial_filename = root + '_0' + ext
 
-        if osp.exists(osp.realpath(filename)):
+        if osp.exists(osp.realpath(alpaca_format_filename)):
             return
 
-        if not osp.exists(osp.realpath(partial_filename)):
+        if not osp.exists(osp.realpath(partial_filename)) and not osp.exists(
+                osp.realpath(filename)):
             print(f'{filename} not found')
             return
 
@@ -67,8 +69,8 @@ class PredictionMerger:
             data_format[idx]['output'] = preds[str(idx)]['prediction']
             data_format[idx]['generator'] = self.model_cfg['abbr']
 
-        print(f'Merge {partial_filenames} to {filename}')
-        with open(filename, 'w', encoding='utf-8') as f:
+        print(f'Convert to {alpaca_format_filename}')
+        with open(alpaca_format_filename, 'w', encoding='utf-8') as f:
             json.dump(data_format, f, indent=4, ensure_ascii=False)
 
 
@@ -107,6 +109,7 @@ class AlpacaEvalTask(BaseTask):
         # script_path = __file__
         alpaca_cfg = self.judge_cfg.get('config', None)
         api_key = self.judge_cfg.get('key', None)
+        base_url = self.judge_cfg.get('base_url', None)
         assert alpaca_cfg is not None
         all_cfg = Config.fromfile(cfg_path)
         model_cfg = all_cfg['models']
@@ -120,7 +123,12 @@ class AlpacaEvalTask(BaseTask):
             }).run()
             filename = get_infer_output_path(m_cfg, dataset_cfg,
                                              osp.join(work_dir, 'predictions'))
+            root, ext = osp.splitext(filename)
+            alpaca_format_filename = root + '_alpaca' + ext
             output_path = osp.join(work_dir, 'results', m_cfg['abbr'])
+            if not osp.exists(output_path):
+                os.makedirs(output_path)
+            caching_path = osp.join(output_path, 'tmp_annotations.json')
             command = ''
             if api_key is not None:
                 command += f'export OPENAI_API_KEY={api_key}; '
@@ -128,7 +136,9 @@ class AlpacaEvalTask(BaseTask):
                 api_key = os.environ.get('OPENAI_API_KEY', '').split(',')[0]
                 if api_key:
                     command += f'export OPENAI_API_KEY={api_key}; '
-            command += f'alpaca_eval --model_outputs {filename} --annotators_config {alpaca_cfg} --output_path {output_path}'
+            if base_url is not None:
+                command += f'export OPENAI_BASE_URL={base_url}; '
+            command += f'alpaca_eval --model_outputs {alpaca_format_filename} --annotators_config {alpaca_cfg} --output_path {output_path} --caching_path {caching_path};'
             return template.format(task_cmd=command)
 
     def run(self):