Cuixiaom/transformer mlperf ww45 (#200)

* Updated the transformer bfloat16 inference accuracy mode and througput mode output * Updated the transformer mlperf model inference for better performance measurement * minor changes for the transfomer mlperf models
ZhaoqiongZ · Nov 5, 2021 · eae27ab · eae27ab
1 parent bdfd36c
commit eae27ab
Show file tree

Hide file tree

Showing 3 changed files with 36 additions and 36 deletions.
diff --git a/...age_translation/tensorflow/transformer_mlperf/inference/bfloat16/transformer/translate.py b/...age_translation/tensorflow/transformer_mlperf/inference/bfloat16/transformer/translate.py
@@ -154,8 +154,7 @@ def input_fn():
 
     duration = time.time() - start_time
     num_sentences = len(sorted_inputs)
-    print('Total inferencing time:%s' %(duration))
-    print('Throughput:{} sentences/second'.format(num_sentences/duration))
+    print('Total number of sentences is %s ' %(num_sentences))
 
     # Write translations in the order they appeared in the original file.
     if output_file is not None:
@@ -177,8 +176,6 @@ def input_fn():
         available_steps = len(sorted_inputs) // FLAGS.batch_size + 1
         if FLAGS.warmup_steps > available_steps:
           FLAGS.warmup_steps = available_steps
-        num_lines_added = FLAGS.warmup_steps * FLAGS.batch_size
-        sorted_inputs = sorted_inputs[0:num_lines_added] + sorted_inputs
 
     elif FLAGS.test_mode == 'profile':
       hooks = [UpdateGlobalStepHook(),
@@ -189,21 +186,24 @@ def input_fn():
 
     if FLAGS.steps is 0:
         hooks =[]
+
+    num_warmup_sentences = FLAGS.warmup_steps * FLAGS.batch_size
     translations = []
     start_time = time.time()
     for i, prediction in enumerate(estimator.predict(input_fn, hooks=hooks)):
-      if i == FLAGS.warmup_steps * FLAGS.batch_size:
-        start_time = time.time()
       translation = _trim_and_decode(prediction["outputs"], subtokenizer)
-      if i >= FLAGS.warmup_steps * FLAGS.batch_size:
+      if i >= num_warmup_sentences:
         translations.append(translation)
-      if FLAGS.test_mode != 'benchmark' and i%10 == 9:
-        tf.compat.v1.logging.info('Number of examples processed: {}'.format(len(translations)))
+      #skip the time spended for the warmup steps
+      if FLAGS.warmup_steps > 0 and i == num_warmup_sentences-FLAGS.batch_size:
+        start_time = time.time()
 
     duration = time.time() - start_time
-    num_sentences = len(translations)
-    print('Total inferencing time:%s seconds' %(duration))
-    print('Throughput:{} sentences/second'.format(num_sentences/duration))
+    if FLAGS.test_mode == 'benchmark':
+      num_sentences = len(translations)
+      print('The number of sentences translated is %s ' %(num_sentences))
+      print('Total inferencing time:%s seconds' %(duration))
+      print('Throughput:{} sentences/second'.format(num_sentences/duration))
 
 def translate_text(estimator, subtokenizer, txt):
   """Translate a single string."""

diff --git a/...anguage_translation/tensorflow/transformer_mlperf/inference/fp32/transformer/translate.py b/...anguage_translation/tensorflow/transformer_mlperf/inference/fp32/transformer/translate.py
@@ -154,8 +154,7 @@ def input_fn():
 
     duration = time.time() - start_time
     num_sentences = len(sorted_inputs)
-    print('Total inferencing time:%s' %(duration))
-    print('Throughput:{} sentences/second'.format(num_sentences/duration))
+    print('Total number of sentences is %s ' %(num_sentences))
 
     # Write translations in the order they appeared in the original file.
     if output_file is not None:
@@ -177,8 +176,6 @@ def input_fn():
         available_steps = len(sorted_inputs) // FLAGS.batch_size + 1
         if FLAGS.warmup_steps > available_steps:
           FLAGS.warmup_steps = available_steps
-        num_lines_added = FLAGS.warmup_steps * FLAGS.batch_size
-        sorted_inputs = sorted_inputs[0:num_lines_added] + sorted_inputs
 
     elif FLAGS.test_mode == 'profile':
       hooks = [UpdateGlobalStepHook(),
@@ -189,21 +186,24 @@ def input_fn():
 
     if FLAGS.steps is 0:
         hooks =[]
+
+    num_warmup_sentences = FLAGS.warmup_steps * FLAGS.batch_size
     translations = []
     start_time = time.time()
     for i, prediction in enumerate(estimator.predict(input_fn, hooks=hooks)):
-      if i == FLAGS.warmup_steps * FLAGS.batch_size:
-        start_time = time.time()
       translation = _trim_and_decode(prediction["outputs"], subtokenizer)
-      if i >= FLAGS.warmup_steps * FLAGS.batch_size:
+      if i >= num_warmup_sentences:
         translations.append(translation)
-      if FLAGS.test_mode != 'benchmark' and i%10 == 9:
-        tf.compat.v1.logging.info('Number of examples processed: {}'.format(len(translations)))
+      #skip the time spended for the warmup steps
+      if FLAGS.warmup_steps > 0 and i == num_warmup_sentences-FLAGS.batch_size:
+        start_time = time.time()
 
     duration = time.time() - start_time
-    num_sentences = len(translations)
-    print('Total inferencing time:%s seconds' %(duration))
-    print('Throughput:{} sentences/second'.format(num_sentences/duration))
+    if FLAGS.test_mode == 'benchmark':
+      num_sentences = len(translations)
+      print('The number of sentences translated is %s ' %(num_sentences))
+      print('Total inferencing time:%s seconds' %(duration))
+      print('Throughput:{} sentences/second'.format(num_sentences/duration))
 
 def translate_text(estimator, subtokenizer, txt):
   """Translate a single string."""

diff --git a/...anguage_translation/tensorflow/transformer_mlperf/inference/int8/transformer/translate.py b/...anguage_translation/tensorflow/transformer_mlperf/inference/int8/transformer/translate.py
@@ -154,8 +154,7 @@ def input_fn():
 
     duration = time.time() - start_time
     num_sentences = len(sorted_inputs)
-    print('Total inferencing time:%s' %(duration))
-    print('Throughput:{} sentences/second'.format(num_sentences/duration))
+    print('Total number of sentences is %s ' %(num_sentences))
 
     # Write translations in the order they appeared in the original file.
     if output_file is not None:
@@ -177,8 +176,6 @@ def input_fn():
         available_steps = len(sorted_inputs) // FLAGS.batch_size + 1
         if FLAGS.warmup_steps > available_steps:
           FLAGS.warmup_steps = available_steps
-        num_lines_added = FLAGS.warmup_steps * FLAGS.batch_size
-        sorted_inputs = sorted_inputs[0:num_lines_added] + sorted_inputs
 
     elif FLAGS.test_mode == 'profile':
       hooks = [UpdateGlobalStepHook(),
@@ -189,21 +186,24 @@ def input_fn():
 
     if FLAGS.steps is 0:
         hooks =[]
+
+    num_warmup_sentences = FLAGS.warmup_steps * FLAGS.batch_size
     translations = []
     start_time = time.time()
     for i, prediction in enumerate(estimator.predict(input_fn, hooks=hooks)):
-      if i == FLAGS.warmup_steps * FLAGS.batch_size:
-        start_time = time.time()
       translation = _trim_and_decode(prediction["outputs"], subtokenizer)
-      if i >= FLAGS.warmup_steps * FLAGS.batch_size:
+      if i >= num_warmup_sentences:
         translations.append(translation)
-      if FLAGS.test_mode != 'benchmark' and i%10 == 9:
-        tf.compat.v1.logging.info('Number of examples processed: {}'.format(len(translations)))
+      #skip the time spended for the warmup steps
+      if FLAGS.warmup_steps > 0 and i == num_warmup_sentences-FLAGS.batch_size:
+        start_time = time.time()
 
     duration = time.time() - start_time
-    num_sentences = len(translations)
-    print('Total inferencing time:%s seconds' %(duration))
-    print('Throughput:{} sentences/second'.format(num_sentences/duration))
+    if FLAGS.test_mode == 'benchmark':
+      num_sentences = len(translations)
+      print('The number of sentences translated is %s ' %(num_sentences))
+      print('Total inferencing time:%s seconds' %(duration))
+      print('Throughput:{} sentences/second'.format(num_sentences/duration))
 
 def translate_text(estimator, subtokenizer, txt):
   """Translate a single string."""