diff --git a/configs/eval_subjective_arena_hard.py b/configs/eval_subjective_arena_hard.py
index 08db8b29..4200fd04 100644
--- a/configs/eval_subjective_arena_hard.py
+++ b/configs/eval_subjective_arena_hard.py
@@ -72,8 +72,8 @@ judge_models = [dict(
     key='',
         meta_template=api_meta_template,
         query_per_second=1,
-        max_out_len=5120,
-        max_seq_len=9216,
+        max_out_len=4096,
+        max_seq_len=8192,
         batch_size=10,
         retry=10,
         temperature = 0,
diff --git a/opencompass/summarizers/subjective/arenahard.py b/opencompass/summarizers/subjective/arenahard.py
index 5958bed9..d82c800f 100644
--- a/opencompass/summarizers/subjective/arenahard.py
+++ b/opencompass/summarizers/subjective/arenahard.py
@@ -133,6 +133,27 @@ def get_win_rate_column(df, column, baseline='gpt4-0314'):
     return win_rate_table[baseline].fillna(0.5).apply(lambda x: round(x * 100, 2))
 
 
+def load_model_preds(filename):
+    root, ext = osp.splitext(filename)
+    partial_filename = root + '_0' + ext
+    if osp.exists(osp.realpath(filename)):
+        preds = mmengine.load(filename)
+        pred_strs = [
+            preds[str(i)]['prediction'] for i in range(len(preds))
+        ]
+    else:
+        filename = partial_filename
+        pred_strs = []
+        i = 1
+        while osp.exists(osp.realpath(filename)):
+            preds = mmengine.load(filename)
+            filename = root + f'_{i}' + ext
+            i += 1
+            pred_strs += [
+                preds[str(i)]['prediction'] for i in range(len(preds))
+            ]
+    return pred_strs
+
 def get_battles_from_judgment(dataset, subdir_path, post_process, WEIGHT=3):
     arena_hard_battles = pd.DataFrame()
     dataset_abbr = dataset_abbr_from_cfg(dataset)
@@ -274,12 +295,12 @@ class ArenaHardSummarizer:
                     if model == 'gpt4-0314':
                         stats.at[i, 'avg_tokens'] = 423
                     else:
-                        with open(os.path.join(output_dir.split('summary')[0], 'predictions', model, dataset_abbr+'.json'), 'r') as f:
-                            model_preds = json.load(f)
-                            pred_length = 0
-                            for k, v in model_preds.items():
-                                pred_length += len(tiktoken.encoding_for_model('gpt-3.5-turbo').encode(v['prediction']))
-                            pred_length /= len(model_preds)
+                        file_name = os.path.join(output_dir.split('summary')[0], 'predictions', model, dataset_abbr+'.json')
+                        model_preds = load_model_preds(file_name)
+                        pred_length = 0
+                        for model_pred in model_preds:
+                            pred_length += len(tiktoken.encoding_for_model('gpt-3.5-turbo').encode(model_pred, disallowed_special=()))
+                        pred_length /= len(model_preds)
                         stats.at[i, 'avg_tokens'] = pred_length
                     stats.at[i, 'results'] = bootstrap_elo_lu[model].tolist()
                 stats.sort_values(by='model', inplace=True)
diff --git a/opencompass/utils/run.py b/opencompass/utils/run.py
index 0072060c..5eaa0271 100644
--- a/opencompass/utils/run.py
+++ b/opencompass/utils/run.py
@@ -92,7 +92,7 @@ def get_config_from_arg(args) -> Config:
                 config['eval']['partitioner']['compare_models'] = change_accelerator(config['eval']['partitioner']['compare_models'], args.accelerator)
             if config.get('eval', {}).get('partitioner', {}).get('judge_models') is not None:
                 config['eval']['partitioner']['judge_models'] = change_accelerator(config['eval']['partitioner']['judge_models'], args.accelerator)
-            if config.get('judge_models', {}) is not None:
+            if config.get('judge_models') is not None:
                 config['judge_models'] = change_accelerator(config['judge_models'], args.accelerator)
         return config