Merge branch 'ashraf/transformer_mlperf_final' into 'develop'

Ashraf/transformer mlperf final See merge request intelai/models!70
ZhaoqiongZ · Apr 4, 2020 · 0c2fb04 · 0c2fb04
2 parents a10941d + 77b4138
commit 0c2fb04
Show file tree

Hide file tree

Showing 89 changed files with 81,396 additions and 0 deletions.
diff --git a/benchmarks/README.md b/benchmarks/README.md
@@ -36,6 +36,7 @@ dependencies to be installed:
 | Language Translation   | TensorFlow    | [GNMT](https://arxiv.org/pdf/1609.08144.pdf)                | Inference | [FP32](language_translation/tensorflow/gnmt/README.md#fp32-inference-instructions) |
 | Language Translation   | TensorFlow    | [Transformer Language](https://arxiv.org/pdf/1706.03762.pdf)| Inference | [FP32](language_translation/tensorflow/transformer_language/README.md#fp32-inference-instructions) |
 | Language Translation   | TensorFlow    | [Transformer_LT_Official ](https://arxiv.org/pdf/1706.03762.pdf)| Inference | [FP32](language_translation/tensorflow/transformer_lt_official/README.md#fp32-inference-instructions) |
+| Language Translation   | TensorFlow    | [Transformer_LT_mlperf ](https://arxiv.org/pdf/1706.03762.pdf)| Training | [FP32 Training,](language_translation/tensorflow/transformer_mlperf/training/fp32/README.md#fp32-training-instructions) [ BFloat16 Training](language_translation/tensorflow/transformer_mlperf/training/bfloat16/README.md#bfloat16-training-instructions) |
 | Object Detection       | TensorFlow    | [R-FCN](https://arxiv.org/pdf/1605.06409.pdf)               | Inference | [Int8](object_detection/tensorflow/rfcn/README.md#int8-inference-instructions) [FP32](object_detection/tensorflow/rfcn/README.md#fp32-inference-instructions) |
 | Object Detection       | TensorFlow    | [Faster R-CNN](https://arxiv.org/pdf/1506.01497.pdf)        | Inference | [Int8](object_detection/tensorflow/faster_rcnn/README.md#int8-inference-instructions) [FP32](object_detection/tensorflow/faster_rcnn/README.md#fp32-inference-instructions) |
 | Object Detection       | TensorFlow    | [SSD-MobileNet](https://arxiv.org/pdf/1704.04861.pdf)       | Inference | [Int8](object_detection/tensorflow/ssd-mobilenet/README.md#int8-inference-instructions) [FP32](object_detection/tensorflow/ssd-mobilenet/README.md#fp32-inference-instructions) |

diff --git a/benchmarks/common/tensorflow/mlperf_compliance/__init__.py b/benchmarks/common/tensorflow/mlperf_compliance/__init__.py
@@ -0,0 +1 @@
+from . import mlperf_log
diff --git a/benchmarks/common/tensorflow/mlperf_compliance/_gnmt_tags.py b/benchmarks/common/tensorflow/mlperf_compliance/_gnmt_tags.py
@@ -0,0 +1,51 @@
+# Copyright 2018 MLBenchmark Group. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""Keys which only appear in GNMT RNN Translation.
+"""
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+
+# Loss smoothing factor
+MODEL_HP_LOSS_SMOOTHING = "model_hp_loss_smoothing"
+
+# Number of layers in encoder and in decoder
+MODEL_HP_NUM_LAYERS = "model_hp_num_layers"
+
+# RNN hidden size
+MODEL_HP_HIDDEN_SIZE = "model_hp_hidden_size"
+
+# Dropout
+MODEL_HP_DROPOUT = "model_hp_dropout"
+
+# Beam size for beam search
+EVAL_HP_BEAM_SIZE = "eval_hp_beam_size"
+
+# Maximum sequence length for training
+TRAIN_HP_MAX_SEQ_LEN = "train_hp_max_sequence_length"
+
+# Maximum sequence length for evaluation
+EVAL_HP_MAX_SEQ_LEN = "eval_hp_max_sequence_length"
+
+# Length normalization constant for beam search
+EVAL_HP_LEN_NORM_CONST = "eval_hp_length_normalization_constant"
+
+# Length normalization factor for beam search
+EVAL_HP_LEN_NORM_FACTOR = "eval_hp_length_normalization_factor"
+
+# Coverage penalty factor for beam search
+EVAL_HP_COV_PENALTY_FACTOR = "eval_hp_coverage_penalty_factor"
diff --git a/benchmarks/common/tensorflow/mlperf_compliance/_maskrcnn_tags.py b/benchmarks/common/tensorflow/mlperf_compliance/_maskrcnn_tags.py
@@ -0,0 +1,53 @@
+# Copyright 2018 MLBenchmark Group. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""Keys which only appear in MASKRCNN.
+"""
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+
+# Anchor overlap threshop
+FG_IOU_THRESHOLD = "foreground_iou_threshold"
+BG_IOU_THRESHOLD = "background_iou_threshold"
+
+# Top ROIs to be selected before and after NMS
+RPN_PRE_NMS_TOP_N_TRAIN = "rpn_pre_nms_top_n_train"
+RPN_PRE_NMS_TOP_N_TEST = "rpn_pre_nms_top_n_test"
+RPN_POST_NMS_TOP_N_TRAIN = "rpn_post_nms_top_n_train"
+RPN_POST_NMS_TOP_N_TEST = "rpn_post_nms_top_n_test"
+
+#Global batch size during training
+GLOBAL_BATCH_SIZE = "global_batch_size"
+
+# Batch size during eval
+BATCH_SIZE_TEST = "batch_size_test"
+
+
+# Pretrained classifer model
+BACKBONE = "backbone"
+
+# Anchor aspect ratio
+ASPECT_RATIOS = "aspect_ratios"
+
+# Overlap threshold for NMS
+NMS_THRESHOLD = "nms_threshold"
+
+# data pipeline
+MIN_IMAGE_SIZE = "min_image_size"
+MAX_IMAGE_SIZE = "max_image_size"
+RANDOM_FLIP_PROBABILITY = "random_flip_probability"
+INPUT_NORMALIZATION_STD = "input_normalization_std"
diff --git a/benchmarks/common/tensorflow/mlperf_compliance/_ncf_tags.py b/benchmarks/common/tensorflow/mlperf_compliance/_ncf_tags.py
@@ -0,0 +1,60 @@
+# Copyright 2018 MLBenchmark Group. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""Keys which only appear in NCF Recommendation.
+"""
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+# The minimum number of ratings for a user to be included.
+PREPROC_HP_MIN_RATINGS = "preproc_hp_min_ratings"
+
+# The number of false negatives to use during evaluation.
+PREPROC_HP_NUM_EVAL = "preproc_hp_num_eval"
+
+# Are evaluation negatives sampled with replacement?
+PREPROC_HP_SAMPLE_EVAL_REPLACEMENT = "preproc_hp_sample_eval_replacement"
+
+
+# The number of false negatives per postive generated during training.
+INPUT_HP_NUM_NEG = "input_hp_num_neg"
+
+# Are training negatives sampled with replacement?
+INPUT_HP_SAMPLE_TRAIN_REPLACEMENT = "input_hp_sample_train_replacement"
+
+# This tag should be emitted each time the submission begins construction of the
+# false negatives for a trainging epoch.
+INPUT_STEP_TRAIN_NEG_GEN = "input_step_train_neg_gen"
+
+# This tag should be emitted when the evaluation negatives are selected. This
+# should occur only once.
+INPUT_STEP_EVAL_NEG_GEN = "input_step_eval_neg_gen"
+
+# The number of users in the evaluation set. This should be the same as the
+# number of users in the training set.
+EVAL_HP_NUM_USERS = "eval_hp_num_users"
+
+# The number of false negatives per positive which actually appear during
+# evaluation. This should match PREPROC_HP_NUM_EVAL.
+EVAL_HP_NUM_NEG = "eval_hp_num_neg"
+
+
+# The dimensionality of the matrix factorization portion of the model.
+MODEL_HP_MF_DIM = "model_hp_mf_dim"
+
+# The sizes of the fully connected layers in the dense section of the model.
+MODEL_HP_MLP_LAYER_SIZES = "model_hp_mlp_layer_sizes"
+
diff --git a/benchmarks/common/tensorflow/mlperf_compliance/_resnet_tags.py b/benchmarks/common/tensorflow/mlperf_compliance/_resnet_tags.py
@@ -0,0 +1,47 @@
+# Copyright 2018 MLBenchmark Group. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""Keys which only appear in ResNet.
+"""
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+
+BOTTLENECK_BLOCK = "bottleneck_block"
+
+# The ResNet reference specifies that evaluation occurs once every four epochs.
+# This can result in a quantization penalty for batch sizes which converge on
+# certain epochs. For instance a batch size which tends to converge on epoch 81
+# or 82 would be unduly punished by evaluating at epochs 80 and 84. In order to
+# address this, submissions may select an offset between 0 and 3 for the first
+# evaluation. So in the example above, the submitter could select an offset of
+# 1. In that case the first evaluation would occur on epoch 2, with later
+# evaluations correspondingly offset. Because this would trigger an eval on
+# epoch 82, the submission in this example can exit at a natural time.
+EVAL_EPOCH_OFFSET = "eval_offset"
+
+# ==============================================================================
+# == Topology ==================================================================
+# ==============================================================================
+
+MODEL_HP_INITIAL_MAX_POOL = "model_hp_initial_max_pool"
+MODEL_HP_BEGIN_BLOCK = "model_hp_begin_block"
+MODEL_HP_END_BLOCK = "model_hp_end_block"
+MODEL_HP_BLOCK_TYPE = "model_hp_block_type"
+MODEL_HP_PROJECTION_SHORTCUT = "model_hp_projection_shortcut"
+MODEL_HP_SHORTCUT_ADD = "model_hp_shorcut_add"
+
+MODEL_HP_RESNET_TOPOLOGY = "model_hp_resnet_topology"
diff --git a/benchmarks/common/tensorflow/mlperf_compliance/_ssd_tags.py b/benchmarks/common/tensorflow/mlperf_compliance/_ssd_tags.py
@@ -0,0 +1,42 @@
+# Copyright 2018 MLBenchmark Group. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""Keys which only appear in SSD.
+"""
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+
+# Pretrained classifer model
+BACKBONE = "backbone"
+
+FEATURE_SIZES = "feature_sizes"
+STEPS = "steps"
+SCALES = "scales"
+ASPECT_RATIOS = "aspect_ratios"
+NUM_DEFAULTS_PER_CELL = "num_defaults_per_cell"
+LOC_CONF_OUT_CHANNELS = "loc_conf_out_channels"
+NUM_DEFAULTS = "num_default_boxes"
+
+# Overlap threshold for NMS
+NMS_THRESHOLD = "nms_threshold"
+NMS_MAX_DETECTIONS = "nms_max_detections"
+
+# data pipeline
+NUM_CROPPING_ITERATIONS = "num_cropping_iterations"
+RANDOM_FLIP_PROBABILITY = "random_flip_probability"
+DATA_NORMALIZATION_MEAN = "data_normalization_mean"
+DATA_NORMALIZATION_STD = "data_normalization_std"
diff --git a/benchmarks/common/tensorflow/mlperf_compliance/_transformer_tags.py b/benchmarks/common/tensorflow/mlperf_compliance/_transformer_tags.py
@@ -0,0 +1,35 @@
+# Copyright 2018 MLBenchmark Group. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""Keys which only appear in transformer.
+"""
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+INPUT_MAX_LENGTH = "input_max_length"
+
+MODEL_HP_INITIALIZER_GAIN = "model_hp_initializer_gain"
+MODEL_HP_VOCAB_SIZE = "model_hp_vocab_size"
+MODEL_HP_NUM_HIDDEN_LAYERS = "model_hp_hidden_layers"
+MODEL_HP_EMBEDDING_SHARED_WEIGHTS = "model_hp_embedding_shared_weights"
+MODEL_HP_ATTENTION_DENSE = "model_hp_attention_dense"
+MODEL_HP_ATTENTION_DROPOUT = "model_hp_attention_dropout"
+MODEL_HP_FFN_OUTPUT_DENSE = "model_hp_ffn_output_dense"
+MODEL_HP_FFN_FILTER_DENSE = "model_hp_ffn_filter_dense"
+MODEL_HP_RELU_DROPOUT = "model_hp_relu_dropout"
+MODEL_HP_LAYER_POSTPROCESS_DROPOUT = "model_hp_layer_postprocess_dropout"
+MODEL_HP_NORM = "model_hp_norm"
+MODEL_HP_SEQ_BEAM_SEARCH = "model_hp_sequence_beam_search"