[Minor] Smooth Evaluation

mrG7 · Dec 29, 2023 · 5b055a4 · 5b055a4
1 parent 613201a
commit 5b055a4
Show file tree

Hide file tree

Showing 5 changed files with 25 additions and 5 deletions.
diff --git a/run.py b/run.py
@@ -4,6 +4,7 @@
 from vlmeval.evaluate import COCO_eval, MME_eval, MMVet_eval, multiple_choice_eval, MME_rating, VQAEval
 from vlmeval.inference import infer_data_job, prefetch_acc
 from vlmeval.config import supported_VLM
+from vlmeval.utils import dataset_URLs, abbr2full
 
 def parse_args():
     parser = argparse.ArgumentParser()
@@ -33,6 +34,13 @@ def main():
         pred_root = model_name
 
         for i, dataset_name in enumerate(args.data):
+            if dataset_name not in dataset_URLs:
+                dataset_name = abbr2full(dataset_name)
+
+            if dataset_name not in dataset_URLs:
+                logger.error(f'Unknown dataset: {dataset_name}. ')
+                continue
+
             tmpl = f'{pred_root}/' + '{}' + f'{world_size}_{dataset_name}.pkl'
             out_file = tmpl.format(rank)
             result_file = f'{pred_root}/{model_name}_{dataset_name}.xlsx'

diff --git a/scripts/auto_run.py b/scripts/auto_run.py
@@ -17,6 +17,6 @@ def is_api(x):
     if '80b' in m:
         cmd = f'python run.py --data {dataset_str} --model {m}'
     else:
-        cmd = f'torchrun --nproc-per-node=8 run.py --data {dataset_str} --model {m}'
+        cmd = f'bash run.sh --data {dataset_str} --model {m}'
     print(cmd)
     os.system(cmd)
diff --git a/scripts/report_missing.py b/scripts/report_missing.py
@@ -6,11 +6,13 @@
 
 dataset = [
     'MME', 'SEEDBench_IMG', 'MMBench', 'CCBench', 'MMBench_CN',
-    'MMVet', 'OCRVQA_TESTCORE', 'TextVQA_VAL', 'COCO_VAL', 'MMMU_DEV_VAL'
+    'MMVet', 'OCRVQA_TESTCORE', 'TextVQA_VAL', 'COCO_VAL', 'MMMU_DEV_VAL',
+    'ChartQA_VALTEST_HUMAN'
 ]
 suffix = [
     'score.csv', 'acc.csv', 'acc.csv', 'acc.csv', 'acc.csv',
-    'gpt-4-turbo_score.csv', 'acc.csv', 'acc.csv', 'score.json', 'acc.csv'
+    'gpt-4-turbo_score.csv', 'acc.csv', 'acc.csv', 'score.json', 'acc.csv',
+    'acc.csv'
 ]
 
 N = len(dataset)

diff --git a/vlmeval/utils/__init__.py b/vlmeval/utils/__init__.py
@@ -1,12 +1,12 @@
 from .matching_util import can_infer, can_infer_option, can_infer_text
 from .mp_util import track_progress_rich
 from .custom_prompt import CustomPrompt
-from .dataset_config import dataset_URLs, img_root_map, DATASET_TYPE
+from .dataset_config import dataset_URLs, img_root_map, DATASET_TYPE, abbr2full
 from .dataset import TSVDataset, split_MMMU
 
 
 __all__ = [
     'can_infer', 'can_infer_option', 'can_infer_text', 'track_progress_rich', 
     'TSVDataset', 'dataset_URLs', 'img_root_map', 'DATASET_TYPE', 'CustomPrompt',
-    'split_MMMU'
+    'split_MMMU', 'abbr2full'
 ]
diff --git a/vlmeval/utils/dataset_config.py b/vlmeval/utils/dataset_config.py
@@ -72,3 +72,13 @@ def DATASET_TYPE(dataset):
     elif listinstr(['ocrvqa', 'textvqa', 'chartqa'], dataset.lower()):
         return 'VQA'
     return None
+
+def abbr2full(s):
+    datasets = [x for x in img_root_map]
+    ins = [s in d for d in datasets]
+    if sum(ins) == 1:
+        for d in datasets:
+            if s in d:
+                return d
+    else:
+        return None